@@ -132,7 +132,7 @@ def make_flank(coords, L, d):
132132 .apply (lambda s : pd .Series ([s ["chrom" ], int (s ["midpoint" ]- L / 2 ), int (s ["midpoint" ]+ L / 2 )],
133133 index = ["chrom" , "start" , "end" ]), axis = 1 ))
134134
135- def make_gc_match (coords , genome_fa , n , l = 500 , seed = 1 , gc_diff_max = 0.05 , max_attemps = 1000 , incl = None , excl = None ):
135+ def make_gc_match (coords , genome_fa , n , l = 500 , seed = 1 , gc_diff_max = 0.05 , max_attempts = 1000 , incl = None , excl = None ):
136136 """
137137 Make GC amtch regions by:
138138 1. Randomly shuffle genomic regions by bedtools
@@ -150,8 +150,8 @@ def make_gc_match(coords, genome_fa, n, l=500, seed=1, gc_diff_max=0.05, max_att
150150 :type seed: int
151151 :param gc_diff_max: allowed gc percentage difference between input and returned regions
152152 :type gc_diff_max: float
153- :param max_attemps : maximum number of attempts to shuffle the input dataframe for extracting GC matched regions
154- :type max_attemps : int
153+ :param max_attempts : maximum number of attempts to shuffle the input dataframe for extracting GC matched regions
154+ :type max_attempts : int
155155 :param excl: regions that chopped regions shouldn't overlap with
156156 :type excl: BedTool object
157157 :param incl: regions that chopped regions should overlap with
@@ -168,10 +168,10 @@ def make_gc_match(coords, genome_fa, n, l=500, seed=1, gc_diff_max=0.05, max_att
168168 gc_percent_global = gc_total / nuc_total
169169 logger .info (f"Global GC content of input regions is { gc_percent_global } " )
170170
171- return make_gc_match_given_ratio (gc_percent_global , n , genome_fa , l = l , seed = seed , gc_diff_max = gc_diff_max , max_attemps = max_attemps , incl = incl , excl = excl )
171+ return make_gc_match_given_ratio (gc_percent_global , n , genome_fa , l = l , seed = seed , gc_diff_max = gc_diff_max , max_attempts = max_attempts , incl = incl , excl = excl )
172172
173173
174- def make_gc_match_given_ratio (gc_ratio , n , genome_fa , l = 500 , seed = 1 , gc_diff_max = 0.05 , max_attemps = 1000 , incl = None , excl = None ):
174+ def make_gc_match_given_ratio (gc_ratio , n , genome_fa , l = 500 , seed = 1 , gc_diff_max = 0.05 , max_attempts = 1000 , incl = None , excl = None ):
175175 """
176176 Make GC amtch regions by:
177177 1. Randomly shuffle genomic regions by bedtools
@@ -187,8 +187,8 @@ def make_gc_match_given_ratio(gc_ratio, n, genome_fa, l=500, seed=1, gc_diff_max
187187 :type seed: int
188188 :param gc_diff_max: allowed gc percentage difference between input and returned regions
189189 :type gc_diff_max: float
190- :param max_attemps : maximum number of attempts to shuffle the input dataframe for extracting GC matched regions
191- :type max_attemps : int
190+ :param max_attempts : maximum number of attempts to shuffle the input dataframe for extracting GC matched regions
191+ :type max_attempts : int
192192 :param excl: regions that chopped regions shouldn't overlap with
193193 :type excl: BedTool object
194194 :param incl: regions that chopped regions should overlap with
@@ -199,7 +199,7 @@ def make_gc_match_given_ratio(gc_ratio, n, genome_fa, l=500, seed=1, gc_diff_max
199199 # shuffle regions and keep those of similar gc percentage
200200 rng = np .random .RandomState (seed ) # create a random number generator by given seed to get different shuffled regions per loop
201201 return_regions = []
202- for i in range (max_attemps ):
202+ for i in range (max_attempts ):
203203 regions_shuffle = random_coords (gs = f'{ genome_fa } .fai' , incl = incl , excl = excl , l = l , n = n , seed = rng .randint (1e5 ))
204204 for item in regions_shuffle .itertuples ():
205205 subseq = genome_pyfaidx [item .chrom ][item .start :item .end ]
@@ -208,10 +208,10 @@ def make_gc_match_given_ratio(gc_ratio, n, genome_fa, l=500, seed=1, gc_diff_max
208208 if len (return_regions ) >= n :
209209 return pd .DataFrame (return_regions )[['chrom' , 'start' , 'end' ]]
210210
211- logger .warning ("Reach max attemps, return currently found GC matched regions, increase max_attemps if you need more regions" )
211+ logger .warning ("Reach max attemps, return currently found GC matched regions, increase max_attempts if you need more regions" )
212212 return pd .DataFrame (return_regions )[['chrom' , 'start' , 'end' ]]
213213
214- def make_motif_match (motif : motifs .Motif , genome_fa , l = 500 , n = 1000 , gc_content = 0.4 , threshold = 1.0 , seed = 1 , max_attemps = 1000 , incl = None , excl = None ):
214+ def make_motif_match (motif : motifs .Motif , genome_fa , l = 500 , n = 1000 , gc_content = 0.4 , threshold = 1.0 , seed = 1 , max_attempts = 1000 , incl = None , excl = None ):
215215 """
216216 Make regions containing the sub-sequence that matches the given motfi above a threshold
217217
@@ -229,8 +229,8 @@ def make_motif_match(motif: motifs.Motif, genome_fa, l=500, n=1000, gc_content=0
229229 :type gc_content: float
230230 :param threshold: threshold to filter regions containing sub-sequence match the given motif by pssm score
231231 :type threshold: float
232- :param max_attemps : maximum number of attempts to shuffle the input dataframe for extracting GC matched regions
233- :type max_attemps : int
232+ :param max_attempts : maximum number of attempts to shuffle the input dataframe for extracting GC matched regions
233+ :type max_attempts : int
234234 :param excl: regions that chopped regions should overlap with
235235 :type excl: BedTool object
236236 :param incl: regions that chopped regions shouldn't overlap with
@@ -246,7 +246,7 @@ def make_motif_match(motif: motifs.Motif, genome_fa, l=500, n=1000, gc_content=0
246246 genome_pyfaidx = Fasta (genome_fa )
247247 rng = np .random .RandomState (seed ) # create a random number generator by given seed to get different shuffled regions per loop
248248 return_regions = []
249- for i in range (max_attemps ):
249+ for i in range (max_attempts ):
250250 regions_shuffle = random_coords (gs = f'{ genome_fa } .fai' , incl = incl , excl = excl , l = l , n = n , seed = rng .randint (1e5 ))
251251 for item in regions_shuffle .itertuples ():
252252 subseq = genome_pyfaidx [item .chrom ][item .start :item .end ]
@@ -269,7 +269,7 @@ def make_motif_match(motif: motifs.Motif, genome_fa, l=500, n=1000, gc_content=0
269269 if len (return_regions ) >= n :
270270 return pd .DataFrame (return_regions )[['chrom' , 'start' , 'end' ]]
271271
272- logger .warning ("Reach max attemps , return currently found motif matched regions, increase max_attemps if you need more regions" )
272+ logger .warning ("Reach max attempts , return currently found motif matched regions, increase max_attempts if you need more regions" )
273273 return pd .DataFrame (return_regions )[['chrom' , 'start' , 'end' ]]
274274
275275def random_coords (gs :str = None , genome :str = None , incl :BedTool = None , excl :BedTool = None ,
0 commit comments