Skip to content

Commit 9d98f97

Browse files
authored
Add an example of pyfaidx usage
1 parent 9d5148d commit 9d98f97

1 file changed

Lines changed: 8 additions & 11 deletions

File tree

bin/02_FilterFasta.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from glob import glob
1010
from collections import OrderedDict
1111
import os
12+
import pyfaidx
1213

1314
def openFiles(indir, outdir, ref, percent, retainStops):
1415
'''Opens all input files in the directory, performs filtering steps,
@@ -76,17 +77,13 @@ def seqDict(fasta, n):
7677
'''Convert fasta into separate sequence objects, determine sequence names
7778
and create dictionary entries for each set of codons'''
7879
seqs = OrderedDict()
79-
with open(fasta, "r") as infile:
80-
for line in infile:
81-
if line[0] == ">":
82-
species = line[1:].strip()
83-
if line[0] != ">":
84-
codons = []
85-
seq = line.strip()
86-
for i in range(0, len(seq), 3):
87-
codons.append(seq[i:i +3])
88-
i += 3
89-
seqs[species] = codons
80+
with pyfaidx.Fasta(fasta) as infile: # you can treat infile like a dictionary too
81+
for seq in infile: # or just iterate over the entries
82+
codons = []
83+
for i in range(0, len(seq), 3): # so far no sequence has actually been read
84+
codons.append(seq[i:i +3].seq) # the .seq method fetches the codon from the file
85+
i += 3
86+
seqs[seq.name] = codons # the sequence name is stored without '>' or '\n'
9087
return True, seqs
9188

9289
#-----------------------------------------------------------------------------

0 commit comments

Comments
 (0)