Skip to content

Commit 19cce93

Browse files
Merge pull request #49 from kipoi/migrate-extractor-to-kipoiseq-dataclasses
migrate-extractor-to-kipoiseq-dataclasses
2 parents 570193b + 1f0f180 commit 19cce93

2 files changed

Lines changed: 12 additions & 11 deletions

File tree

kipoiseq/extractors/vcf_seq.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
from pybedtools import Interval
21
from pyfaidx import Sequence, complement
32
from kipoiseq.extractors import BaseExtractor, FastaStringExtractor
3+
from kipoiseq.dataclasses import Variant, Interval
4+
45
try:
56
from cyvcf2 import VCF
67
except ImportError:
@@ -23,14 +24,15 @@ def __init__(self, *args, **kwargs):
2324

2425
def fetch_variants(self, interval, sample_id=None):
2526
for v in self(self._region(interval)):
26-
if sample_id is None or self._has_variant(v, sample_id):
27+
v = Variant.from_cyvcf(v)
28+
if sample_id is None or self.has_variant(v, sample_id):
2729
yield v
2830

2931
def _region(self, interval):
3032
return '%s:%d-%d' % (interval.chrom, interval.start, interval.end)
3133

32-
def _has_variant(self, variant, sample_id):
33-
gt_type = variant.gt_types[self.sample_mapping[sample_id]]
34+
def has_variant(self, variant, sample_id):
35+
gt_type = variant.source.gt_types[self.sample_mapping[sample_id]]
3436
return gt_type != 0 and gt_type != 2
3537

3638

@@ -166,11 +168,10 @@ def _variant_to_sequence(self, variants):
166168
for reference and variants.
167169
"""
168170
for v in variants:
169-
ref = Sequence(name=v.CHROM, seq=v.REF,
170-
start=v.start, end=v.start + len(v.REF))
171-
# TO DO: consider alternative alleles.
172-
alt = Sequence(name=v.CHROM, seq=v.ALT[0],
173-
start=v.start, end=v.start + len(v.ALT[0]))
171+
ref = Sequence(name=v.chrom, seq=v.ref,
172+
start=v.start, end=v.start + len(v.ref))
173+
alt = Sequence(name=v.chrom, seq=v.alt,
174+
start=v.start, end=v.start + len(v.alt))
174175
yield ref, alt
175176

176177
def _split_overlapping(self, variant_pairs, anchor, which='both'):

tests/extractors/test_vcf_seq_extractor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import pytest
22
from cyvcf2 import VCF
33
from pyfaidx import Sequence
4-
from pybedtools import Interval
54
from kipoiseq.extractors.vcf_seq import IntervalSeqBuilder
5+
from kipoiseq.dataclasses import Variant, Interval
66
from kipoiseq.extractors import *
77

88
fasta_file = 'tests/data/sample.5kb.fa'
@@ -92,7 +92,7 @@ def test__split_overlapping(variant_seq_extractor):
9292

9393

9494
def test_extract(variant_seq_extractor):
95-
variants = list(VCF(vcf_file)())
95+
variants = [Variant.from_cyvcf(v) for v in VCF(vcf_file)]
9696

9797
interval = Interval('chr1', 2, 9)
9898

0 commit comments

Comments
 (0)