1- # Copyright 2022 Vicky Hunt Lab Members
1+ #!/usr/bin/env python3
2+ # Copyright 2022 - 2025 Vicky Hunt Lab Members
23#
34# Licensed under the Apache License, Version 2.0 (the "License");
45# you may not use this file except in compliance with the License.
1617
1718from sys import argv
1819from collections import defaultdict
19-
20- from .config import do_log
20+ from argparse import ArgumentParser
2121
2222def get_pairs (array ):
2323 '''
@@ -115,14 +115,14 @@ def merge_cds(coding_reigon):
115115
116116 return n_merged
117117
118- def extract_noncoding (genome , gff_path , quiet = 0 , output = 'result.fasta' ):
118+ def extract_noncoding (genome , gff_path , output = 'result.fasta' ):
119119 '''
120120 Extract the noncoding reigon from the genome, basied on a GFF file
121121 '''
122122 gff_iter = DataIterator (gff_path )
123123 genome_data = SeqIO .parse (genome , 'fasta' )
124124
125- do_log ( quiet , '====> Calculating coordinates' )
125+ print ( '====> Calculating coordinates' )
126126
127127 coordinates = defaultdict (lambda : [])
128128 mRNAs = defaultdict (lambda : [])
@@ -141,13 +141,13 @@ def extract_noncoding(genome, gff_path, quiet=0, output='result.fasta'):
141141 if item [2 ] == 'CDS' :
142142 coding_reigon [item [0 ] + item [6 ]].append ([int (item [3 ]), int (item [4 ])])
143143
144- do_log ( quiet , '====> Merging and validating coordinates' )
144+ print ( '====> Merging and validating coordinates' )
145145
146146 cds_merged = merge_cds (coding_reigon )
147147 mRNA_merged = merge_cds (mRNAs )
148148 validate_gff (mRNAs , coding_reigon )
149149
150- do_log ( quiet , f'Merged { cds_merged } coding reigons and { mRNA_merged } mRNAs' )
150+ print ( f'Merged { cds_merged } coding reigons and { mRNA_merged } mRNAs' )
151151
152152 for key in mRNAs .keys ():
153153 for item in mRNAs [key ]:
@@ -159,12 +159,21 @@ def extract_noncoding(genome, gff_path, quiet=0, output='result.fasta'):
159159 coordinates [key ].append (item [0 ])
160160 coordinates [key ].append (item [1 ])
161161
162- do_log ( quiet , '====> Extarcting fragments' )
162+ print ( '====> Extarcting fragments' )
163163
164164 for key in coordinates .keys ():
165165 coordinates [key ].sort ()
166166
167167 SeqIO .write (extract_fragments (genome_data , coordinates , mRNA_start , mRNA_end ), output , 'fasta' )
168168
169169if __name__ == '__main__' :
170- extract_noncoding (argv [1 ], argv [2 ])
170+ parser = ArgumentParser ('' )
171+
172+ parser .add_argument ('genome' , help = 'FASTA containing the genome to extract from' )
173+ parser .add_argument ('gff_file' , help = 'GFF file containing annotations of CDS and mRNA regions' )
174+
175+ parser .add_argument ('-o' , '--output' , help = 'FASTA file to write output to' , default = 'result.fasta' )
176+
177+ args = parser .parse_args ()
178+
179+ extract_noncoding (args .genome , args .gff_file , output = args .output )
0 commit comments