99import logging
1010import pysam
1111import pyBigWig
12- import pybigtools
1312from Bio import motifs
1413from pyfaidx import Fasta
1514from multiprocessing import Pool
@@ -25,9 +24,6 @@ def __init__(self, bw_path, memmap_dir=None, backend='pyBigWig'):
2524 if backend == 'pyBigWig' :
2625 self .bw = pyBigWig .open (bw_path )
2726 self ._chroms = self .bw .chroms ()
28- elif backend == 'pybigtools' :
29- self .bw = pybigtools .open (bw_path )
30- self ._chroms = self .bw .chroms ()
3127 elif backend == 'memmap' :
3228 # load all chromosomes using np.memmap
3329 if memmap_dir is None :
@@ -36,8 +32,8 @@ def __init__(self, bw_path, memmap_dir=None, backend='pyBigWig'):
3632 self .memmap_dir = memmap_dir
3733 os .makedirs (self .memmap_dir , exist_ok = True )
3834
39- # open bigwig via pybigtools
40- self .bw = pybigtools .open (bw_path )
35+ # open bigwig via pyBigWig
36+ self .bw = pyBigWig .open (bw_path )
4137
4238 self .arrays = {}
4339 self ._chroms = {}
@@ -50,7 +46,7 @@ def __init__(self, bw_path, memmap_dir=None, backend='pyBigWig'):
5046 logger .info (f"[memmap] dumping { chrom } ..." )
5147
5248 # load full chromosome
53- data = self .bw .values (chrom , 0 , length , missing = 0.0 )
49+ data = np . nan_to_num ( self .bw .values (chrom , 0 , length , numpy = True ), nan = 0.0 ). astype ( np . float32 )
5450
5551 # save
5652 np .save (npy_path , data )
@@ -65,34 +61,18 @@ def __init__(self, bw_path, memmap_dir=None, backend='pyBigWig'):
6561 self .backend = backend
6662
6763 def intervals (self , chrom ):
68- if self .backend == 'pyBigWig' :
69- return self .bw .intervals (chrom )
70- else :
71- return self .bw .records (chrom )
64+ return self .bw .intervals (chrom )
7265
7366 def stats (self , chrom , type = 'mean' , exact = True ):
7467 if self .backend == 'pyBigWig' :
75- return self .bw .stats (chrom , type = type , exact = exact )[0 ]
76- elif self .backend == 'pybigtools' :
77- return self .bw .values (chrom , missing = np .nan , bins = 1 , exact = exact , summary = type )[0 ].item ()
68+ return np .float32 (self .bw .stats (chrom , type = type , exact = exact )[0 ])
7869 elif self .backend == 'memmap' :
79- arr = self .arrays [chrom ]
80- if type == 'mean' :
81- return float (np .mean (arr ))
82- elif type == 'max' :
83- return float (np .max (arr ))
84- elif type == 'min' :
85- return float (np .min (arr ))
86- elif type == 'std' :
87- return float (np .std (arr ))
88- else :
89- raise NotImplementedError (f"stat { type } not implemented for memmap backend" )
70+ # use pyBigWig to obtain stats to avoid load full chromosome arrays into memory
71+ return np .float32 (self .bw .stats (chrom , type = type , exact = exact )[0 ])
9072
9173 def values (self , chrom , start , end , missing = 0 ):
9274 if self .backend == 'pyBigWig' :
93- return np .nan_to_num (self .bw .values (chrom , start , end ), nan = missing ).astype (np .float32 )
94- elif self .backend == 'pybigtools' :
95- return self .bw .values (chrom , start , end , missing = missing ).astype (np .float32 )
75+ return np .nan_to_num (self .bw .values (chrom , start , end , numpy = True ), nan = missing ).astype (np .float32 )
9676 elif self .backend == 'memmap' :
9777 arr = self .arrays [chrom ]
9878
0 commit comments