Skip to content

Commit 1cc380e

Browse files
Merge pull request #36 from MuhammedHasan/master
critical bug fix, insertion behavior changed
2 parents d30eb1b + 278b591 commit 1cc380e

2 files changed

Lines changed: 18 additions & 12 deletions

File tree

kipoiseq/extractors/vcf_seq.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def _split_overlapping(self, variant_pairs, anchor, which='both'):
178178
Split the variants hitting the anchor into two
179179
"""
180180
for ref, alt in variant_pairs:
181-
if ref.start < anchor < ref.end or alt.start < anchor < alt.end:
181+
if ref.start < anchor < ref.end:
182182
mid = anchor - ref.start
183183
if which == 'left' or which == 'both':
184184
yield ref[:mid], alt[:mid]
@@ -208,7 +208,7 @@ def _downstream_builder(self, down_variants, interval, anchor, istart):
208208

209209
prev = anchor
210210
for ref, alt in down_variants:
211-
if ref.end < istart:
211+
if ref.end <= istart:
212212
break
213213
down_sb.append(Interval(interval.chrom, ref.end, prev))
214214
down_sb.append(alt)
@@ -223,7 +223,7 @@ def _upstream_builder(self, up_variants, interval, anchor, iend):
223223

224224
prev = anchor
225225
for ref, alt in up_variants:
226-
if ref.start > iend:
226+
if ref.start >= iend:
227227
break
228228
up_sb.append(Interval(interval.chrom, prev, ref.start))
229229
up_sb.append(alt)
@@ -241,7 +241,7 @@ def _cut_to_fix_len(self, down_str, up_str, interval, anchor):
241241
down_len = anchor - interval.start
242242
up_len = interval.end - anchor
243243
down_str = down_str[-down_len:] if down_len else ''
244-
up_str = up_str[:up_len] if up_len else ''
244+
up_str = up_str[: up_len] if up_len else ''
245245
return down_str, up_str
246246

247247

tests/extractors/test_vcf_seq_extractor.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -81,28 +81,29 @@ def test__split_overlapping(variant_seq_extractor):
8181
assert splited_pairs[1][0].seq == 'A'
8282
assert splited_pairs[1][1].seq == ''
8383

84-
pair = (Sequence(seq='T', start=3, end=4),
84+
pair = (Sequence(seq='TT', start=3, end=5),
8585
Sequence(seq='AAA', start=3, end=6))
86-
splited_pairs = list(variant_seq_extractor._split_overlapping([pair], 5))
86+
splited_pairs = list(variant_seq_extractor._split_overlapping([pair], 4))
8787

8888
assert splited_pairs[0][0].seq == 'T'
89-
assert splited_pairs[0][1].seq == 'AA'
90-
assert splited_pairs[1][0].seq == ''
91-
assert splited_pairs[1][1].seq == 'A'
89+
assert splited_pairs[0][1].seq == 'A'
90+
assert splited_pairs[1][0].seq == 'T'
91+
assert splited_pairs[1][1].seq == 'AA'
9292

9393

9494
def test_extract(variant_seq_extractor):
9595
variants = list(VCF(vcf_file)())
9696

9797
interval = Interval('chr1', 2, 9)
98+
9899
seq = variant_seq_extractor.extract(interval, variants, anchor=5)
99100
assert len(seq) == interval.end - interval.start
100-
assert seq == 'GCGAACG'
101+
assert seq == 'CGAACGT'
101102

102103
interval = Interval('chr1', 2, 9, strand='-')
103104
seq = variant_seq_extractor.extract(interval, variants, anchor=5)
104105
assert len(seq) == interval.end - interval.start
105-
assert seq == 'CGTTCGC'
106+
assert seq == 'ACGTTCG'
106107

107108
interval = Interval('chr1', 4, 14)
108109
seq = variant_seq_extractor.extract(interval, variants, anchor=7)
@@ -142,7 +143,12 @@ def test_extract(variant_seq_extractor):
142143
interval = Interval('chr1', 5, 11, strand='+')
143144
seq = variant_seq_extractor.extract(
144145
interval, variants, anchor=10, fixed_len=False)
145-
assert seq == 'AACGTAA'
146+
assert seq == 'ACGTAA'
147+
148+
interval = Interval('chr1', 0, 3, strand='+')
149+
seq = variant_seq_extractor.extract(
150+
interval, variants, anchor=10, fixed_len=False)
151+
assert seq == 'ACG'
146152

147153

148154
@pytest.fixture

0 commit comments

Comments
 (0)