11from pybedtools import Interval
2- from pyfaidx import Sequence
2+ from pyfaidx import Sequence , complement
33from kipoiseq .extractors import BaseExtractor , FastaStringExtractor
44try :
55 from cyvcf2 import VCF
@@ -77,7 +77,7 @@ def __init__(self, fasta_file):
7777 Args:
7878 fasta_file: path to the fasta file (can be gzipped)
7979 """
80- self .fasta = FastaStringExtractor (fasta_file )
80+ self .fasta = FastaStringExtractor (fasta_file , use_strand = True )
8181
8282 def extract (self , interval , variants , anchor , fixed_len = True ):
8383 """
@@ -147,7 +147,12 @@ def extract(self, interval, variants, anchor, fixed_len=True):
147147 down_str , up_str = self ._cut_to_fix_len (
148148 down_str , up_str , interval , anchor )
149149
150- return down_str + up_str
150+ seq = down_str + up_str
151+
152+ if interval .strand == '-' :
153+ seq = complement (seq )[::- 1 ]
154+
155+ return seq
151156
152157 def _variant_to_sequence (self , variants ):
153158 """
@@ -197,11 +202,10 @@ def _downstream_builder(self, down_variants, interval, anchor, istart):
197202 for ref , alt in down_variants :
198203 if ref .end <= istart :
199204 break
200- down_sb .append (Interval (interval .chrom , ref .end ,
201- prev , interval .strand ))
205+ down_sb .append (Interval (interval .chrom , ref .end , prev ))
202206 down_sb .append (alt )
203207 prev = ref .start
204- down_sb .append (Interval (interval .chrom , istart , prev , interval . strand ))
208+ down_sb .append (Interval (interval .chrom , istart , prev ))
205209 down_sb .reverse ()
206210
207211 return down_sb
@@ -213,17 +217,15 @@ def _upstream_builder(self, up_variants, interval, anchor, iend):
213217 for ref , alt in up_variants :
214218 if ref .start > iend :
215219 break
216- up_sb .append (Interval (interval .chrom , prev ,
217- ref .start , interval .strand ))
220+ up_sb .append (Interval (interval .chrom , prev , ref .start ))
218221 up_sb .append (alt )
219222 prev = ref .end
220- up_sb .append (Interval (interval .chrom , prev , iend , interval . strand ))
223+ up_sb .append (Interval (interval .chrom , prev , iend ))
221224
222225 return up_sb
223226
224227 def _fetch (self , interval , istart , iend ):
225- t_interval = Interval (interval .chrom , istart , iend , interval .strand )
226- seq = self .fasta .extract (t_interval )
228+ seq = self .fasta .extract (Interval (interval .chrom , istart , iend ))
227229 seq = Sequence (name = interval .chrom , seq = seq , start = istart , end = iend )
228230 return seq
229231
0 commit comments