Skip to content

Commit ee89c62

Browse files
committed
negative strand handled
1 parent f7b012d commit ee89c62

2 files changed

Lines changed: 20 additions & 11 deletions

File tree

kipoiseq/extractors/vcf_seq.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from pybedtools import Interval
2-
from pyfaidx import Sequence
2+
from pyfaidx import Sequence, complement
33
from kipoiseq.extractors import BaseExtractor, FastaStringExtractor
44
try:
55
from cyvcf2 import VCF
@@ -77,7 +77,7 @@ def __init__(self, fasta_file):
7777
Args:
7878
fasta_file: path to the fasta file (can be gzipped)
7979
"""
80-
self.fasta = FastaStringExtractor(fasta_file)
80+
self.fasta = FastaStringExtractor(fasta_file, use_strand=True)
8181

8282
def extract(self, interval, variants, anchor, fixed_len=True):
8383
"""
@@ -147,7 +147,12 @@ def extract(self, interval, variants, anchor, fixed_len=True):
147147
down_str, up_str = self._cut_to_fix_len(
148148
down_str, up_str, interval, anchor)
149149

150-
return down_str + up_str
150+
seq = down_str + up_str
151+
152+
if interval.strand == '-':
153+
seq = complement(seq)[::-1]
154+
155+
return seq
151156

152157
def _variant_to_sequence(self, variants):
153158
"""
@@ -197,11 +202,10 @@ def _downstream_builder(self, down_variants, interval, anchor, istart):
197202
for ref, alt in down_variants:
198203
if ref.end <= istart:
199204
break
200-
down_sb.append(Interval(interval.chrom, ref.end,
201-
prev, interval.strand))
205+
down_sb.append(Interval(interval.chrom, ref.end, prev))
202206
down_sb.append(alt)
203207
prev = ref.start
204-
down_sb.append(Interval(interval.chrom, istart, prev, interval.strand))
208+
down_sb.append(Interval(interval.chrom, istart, prev))
205209
down_sb.reverse()
206210

207211
return down_sb
@@ -213,17 +217,15 @@ def _upstream_builder(self, up_variants, interval, anchor, iend):
213217
for ref, alt in up_variants:
214218
if ref.start > iend:
215219
break
216-
up_sb.append(Interval(interval.chrom, prev,
217-
ref.start, interval.strand))
220+
up_sb.append(Interval(interval.chrom, prev, ref.start))
218221
up_sb.append(alt)
219222
prev = ref.end
220-
up_sb.append(Interval(interval.chrom, prev, iend, interval.strand))
223+
up_sb.append(Interval(interval.chrom, prev, iend))
221224

222225
return up_sb
223226

224227
def _fetch(self, interval, istart, iend):
225-
t_interval = Interval(interval.chrom, istart, iend, interval.strand)
226-
seq = self.fasta.extract(t_interval)
228+
seq = self.fasta.extract(Interval(interval.chrom, istart, iend))
227229
seq = Sequence(name=interval.chrom, seq=seq, start=istart, end=iend)
228230
return seq
229231

tests/extractors/test_vcf_seq_extractor.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,18 @@ def test__split_overlapping(variant_seq_extractor):
9292

9393
def test_extract(variant_seq_extractor):
9494
interval = Interval('chr1', 2, 9)
95+
9596
variants = list(VCF(vcf_file)())
9697
seq = variant_seq_extractor.extract(interval, variants, anchor=5)
9798
assert len(seq) == interval.end - interval.start
9899
assert seq == 'GCGAACG'
99100

101+
interval = Interval('chr1', 2, 9, strand='-')
102+
variants = list(VCF(vcf_file)())
103+
seq = variant_seq_extractor.extract(interval, variants, anchor=5)
104+
assert len(seq) == interval.end - interval.start
105+
assert seq == 'CGTTCGC'
106+
100107
interval = Interval('chr1', 4, 14)
101108
seq = variant_seq_extractor.extract(interval, variants, anchor=7)
102109
assert len(seq) == interval.end - interval.start

0 commit comments

Comments
 (0)