Skip to content

Commit 5cfff78

Browse files
committed
minor fix
1 parent a168e33 commit 5cfff78

1 file changed

Lines changed: 14 additions & 14 deletions

File tree

seqchromloader/utils.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def make_flank(coords, L, d):
132132
.apply(lambda s: pd.Series([s["chrom"], int(s["midpoint"]-L/2), int(s["midpoint"]+L/2)],
133133
index=["chrom", "start", "end"]), axis=1))
134134

135-
def make_gc_match(coords, genome_fa, n, l=500, seed=1, gc_diff_max=0.05, max_attemps=1000, incl=None, excl=None):
135+
def make_gc_match(coords, genome_fa, n, l=500, seed=1, gc_diff_max=0.05, max_attempts=1000, incl=None, excl=None):
136136
"""
137137
Make GC amtch regions by:
138138
1. Randomly shuffle genomic regions by bedtools
@@ -150,8 +150,8 @@ def make_gc_match(coords, genome_fa, n, l=500, seed=1, gc_diff_max=0.05, max_att
150150
:type seed: int
151151
:param gc_diff_max: allowed gc percentage difference between input and returned regions
152152
:type gc_diff_max: float
153-
:param max_attemps: maximum number of attempts to shuffle the input dataframe for extracting GC matched regions
154-
:type max_attemps: int
153+
:param max_attempts: maximum number of attempts to shuffle the input dataframe for extracting GC matched regions
154+
:type max_attempts: int
155155
:param excl: regions that chopped regions shouldn't overlap with
156156
:type excl: BedTool object
157157
:param incl: regions that chopped regions should overlap with
@@ -168,10 +168,10 @@ def make_gc_match(coords, genome_fa, n, l=500, seed=1, gc_diff_max=0.05, max_att
168168
gc_percent_global = gc_total / nuc_total
169169
logger.info(f"Global GC content of input regions is {gc_percent_global}")
170170

171-
return make_gc_match_given_ratio(gc_percent_global, n, genome_fa, l=l, seed=seed, gc_diff_max=gc_diff_max, max_attemps=max_attemps, incl=incl, excl=excl)
171+
return make_gc_match_given_ratio(gc_percent_global, n, genome_fa, l=l, seed=seed, gc_diff_max=gc_diff_max, max_attempts=max_attempts, incl=incl, excl=excl)
172172

173173

174-
def make_gc_match_given_ratio(gc_ratio, n, genome_fa, l=500, seed=1, gc_diff_max=0.05, max_attemps=1000, incl=None, excl=None):
174+
def make_gc_match_given_ratio(gc_ratio, n, genome_fa, l=500, seed=1, gc_diff_max=0.05, max_attempts=1000, incl=None, excl=None):
175175
"""
176176
Make GC amtch regions by:
177177
1. Randomly shuffle genomic regions by bedtools
@@ -187,8 +187,8 @@ def make_gc_match_given_ratio(gc_ratio, n, genome_fa, l=500, seed=1, gc_diff_max
187187
:type seed: int
188188
:param gc_diff_max: allowed gc percentage difference between input and returned regions
189189
:type gc_diff_max: float
190-
:param max_attemps: maximum number of attempts to shuffle the input dataframe for extracting GC matched regions
191-
:type max_attemps: int
190+
:param max_attempts: maximum number of attempts to shuffle the input dataframe for extracting GC matched regions
191+
:type max_attempts: int
192192
:param excl: regions that chopped regions shouldn't overlap with
193193
:type excl: BedTool object
194194
:param incl: regions that chopped regions should overlap with
@@ -199,7 +199,7 @@ def make_gc_match_given_ratio(gc_ratio, n, genome_fa, l=500, seed=1, gc_diff_max
199199
# shuffle regions and keep those of similar gc percentage
200200
rng = np.random.RandomState(seed) # create a random number generator by given seed to get different shuffled regions per loop
201201
return_regions = []
202-
for i in range(max_attemps):
202+
for i in range(max_attempts):
203203
regions_shuffle = random_coords(gs=f'{genome_fa}.fai', incl=incl, excl=excl, l=l, n=n, seed=rng.randint(1e5))
204204
for item in regions_shuffle.itertuples():
205205
subseq = genome_pyfaidx[item.chrom][item.start:item.end]
@@ -208,10 +208,10 @@ def make_gc_match_given_ratio(gc_ratio, n, genome_fa, l=500, seed=1, gc_diff_max
208208
if len(return_regions) >= n:
209209
return pd.DataFrame(return_regions)[['chrom', 'start', 'end']]
210210

211-
logger.warning("Reach max attemps, return currently found GC matched regions, increase max_attemps if you need more regions")
211+
logger.warning("Reach max attemps, return currently found GC matched regions, increase max_attempts if you need more regions")
212212
return pd.DataFrame(return_regions)[['chrom', 'start', 'end']]
213213

214-
def make_motif_match(motif: motifs.Motif, genome_fa, l=500, n=1000, gc_content=0.4, threshold=1.0, seed=1, max_attemps=1000, incl=None, excl=None):
214+
def make_motif_match(motif: motifs.Motif, genome_fa, l=500, n=1000, gc_content=0.4, threshold=1.0, seed=1, max_attempts=1000, incl=None, excl=None):
215215
"""
216216
Make regions containing the sub-sequence that matches the given motfi above a threshold
217217
@@ -229,8 +229,8 @@ def make_motif_match(motif: motifs.Motif, genome_fa, l=500, n=1000, gc_content=0
229229
:type gc_content: float
230230
:param threshold: threshold to filter regions containing sub-sequence match the given motif by pssm score
231231
:type threshold: float
232-
:param max_attemps: maximum number of attempts to shuffle the input dataframe for extracting GC matched regions
233-
:type max_attemps: int
232+
:param max_attempts: maximum number of attempts to shuffle the input dataframe for extracting GC matched regions
233+
:type max_attempts: int
234234
:param excl: regions that chopped regions should overlap with
235235
:type excl: BedTool object
236236
:param incl: regions that chopped regions shouldn't overlap with
@@ -246,7 +246,7 @@ def make_motif_match(motif: motifs.Motif, genome_fa, l=500, n=1000, gc_content=0
246246
genome_pyfaidx = Fasta(genome_fa)
247247
rng = np.random.RandomState(seed) # create a random number generator by given seed to get different shuffled regions per loop
248248
return_regions = []
249-
for i in range(max_attemps):
249+
for i in range(max_attempts):
250250
regions_shuffle = random_coords(gs=f'{genome_fa}.fai', incl=incl, excl=excl, l=l, n=n, seed=rng.randint(1e5))
251251
for item in regions_shuffle.itertuples():
252252
subseq = genome_pyfaidx[item.chrom][item.start:item.end]
@@ -269,7 +269,7 @@ def make_motif_match(motif: motifs.Motif, genome_fa, l=500, n=1000, gc_content=0
269269
if len(return_regions) >= n:
270270
return pd.DataFrame(return_regions)[['chrom', 'start', 'end']]
271271

272-
logger.warning("Reach max attemps, return currently found motif matched regions, increase max_attemps if you need more regions")
272+
logger.warning("Reach max attempts, return currently found motif matched regions, increase max_attempts if you need more regions")
273273
return pd.DataFrame(return_regions)[['chrom', 'start', 'end']]
274274

275275
def random_coords(gs:str=None, genome:str=None, incl:BedTool=None, excl:BedTool=None,

0 commit comments

Comments
 (0)