88import logging
99import pysam
1010import pyBigWig
11+ import pybigtools
1112from Bio import motifs
1213from pyfaidx import Fasta
1314from multiprocessing import Pool
1415from pybedtools import Interval , BedTool
1516from pybedtools .helpers import chromsizes
1617
18+ from seqchromloader import config
19+
1720logger = logging .getLogger (__name__ )
1821
22+ class BigWig ():
23+ def __init__ (self , bw_path , backend = 'pyBigWig' ):
24+ if backend == 'pyBigWig' :
25+ self .bw = pyBigWig .open (bw_path )
26+ else :
27+ self .bw = pybigtools .open (bw_path )
28+ self .backend = backend
29+
30+ def intervals (self , chrom ):
31+ if self .backend == 'pyBigWig' :
32+ return self .bw .intervals (chrom )
33+ else :
34+ return self .bw .records (chrom )
35+
36+ def stats (self , chrom , type = 'mean' ):
37+ if self .backend == 'pyBigWig' :
38+ return self .bw .stats (chrom , type = type , exact = True )[0 ]
39+ else :
40+ return self .bw .values (chrom , missing = np .nan , bins = 1 , exact = True , summary = 'mean' )[0 ].item ()
41+
42+ def values (self , chrom , start , end , missing = 0 ):
43+ if self .backend == 'pyBigWig' :
44+ return np .nan_to_num (self .bw .values (chrom , start , end )).astype (np .float32 )
45+ else :
46+ return self .bw .values (chrom , start , end , missing = 0. ).astype (np .float32 )
47+
48+ def chroms (self ):
49+ return self .chroms ()
50+
51+ def close (self ):
52+ self .bw .close ()
53+
1954def get_genome_sizes (gs = None , genome = None , to_filter = None , to_keep = None ):
2055 """
2156 Loads the genome sizes file, filter or keep chromosomes
@@ -372,7 +407,7 @@ def compute_mean_std_bigwig(bigwig):
372407 :type bigwig: str
373408 :rtype: (mean, stddev)
374409 """
375- bw = pyBigWig . open (bigwig )
410+ bw = BigWig (bigwig )
376411
377412 # get chrom length list
378413 chroms = bw .chroms ()
@@ -485,7 +520,7 @@ def extract_bw(chrom, start, end, strand, bigwigs):
485520 chroms_array = []
486521 try :
487522 for idx , bigwig in enumerate (bigwigs ):
488- c = ( np . nan_to_num ( bigwig .values (chrom , start , end ))). astype ( np . float32 )
523+ c = bigwig .values (chrom , start , end )
489524 if strand == "-" :
490525 c = c [::- 1 ]
491526 chroms_array .append (c )
@@ -508,9 +543,9 @@ def extract_dnaOneHot(chrom, start, end, strand, genome_pyfaidx):
508543def extract_single_target (chrom , start , end , strand , target ):
509544 if isinstance (target , pysam .AlignmentFile ):
510545 target_array = np .array (target .count (chrom , start , end ), dtype = np .float32 )[np .newaxis ]
511- elif isinstance (target , pyBigWig . pyBigWig ):
546+ elif isinstance (target , BigWig ):
512547 try :
513- target_array = np . nan_to_num ( target .values (chrom , start , end )). astype ( np . float32 )
548+ target_array = target .values (chrom , start , end )
514549 if strand == "-" :
515550 target_array = target_array [::- 1 ]
516551 except RuntimeError as e :
0 commit comments