Skip to content

Commit 0996659

Browse files
committed
remove pybigtools backend & bug fixes
1 parent 9649b3e commit 0996659

8 files changed

Lines changed: 36 additions & 42 deletions

File tree

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ dependencies = [
1919
'pybigwig>=0.3.0',
2020
'torch>=1.10.0',
2121
'webdataset>=0.2.0',
22-
'pybigtools>=0.2.5, <0.3'
2322
]
2423
license = "MIT AND (Apache-2.0 OR BSD-2-Clause)"
2524
keywords = ["interpretation", "attribution", "concept", "genomics", "deep learning"]

seqchromloader/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@
88

99
import logging
1010
def mute_warning():
11-
loader_logger.setLevel(logging.ERROR)
12-
writer_logger.setLevel(logging.ERROR)
13-
utils_logger.setLevel(logging.ERROR)
11+
loader_logger.setLevel(logging.INFO)
12+
writer_logger.setLevel(logging.INFO)
13+
utils_logger.setLevel(logging.INFO)

seqchromloader/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
def set_bigwig_backend(backend):
44
global BIGWIG_BACKEND
55

6-
assert backend in ['pyBigWig', 'pybigtools', 'memmap'], "Backend options are pyBigWig, memmap or pybigtools!"
6+
assert backend in ['pyBigWig', 'memmap'], "Backend options are pyBigWig or memmap!"
77

88
BIGWIG_BACKEND = backend

seqchromloader/loader.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,13 @@
77
import math
88
import logging
99
import torch
10-
import random
1110
import pysam
1211
import pyfaidx
13-
import pybigtools
1412
import numpy as np
1513
import pandas as pd
1614
import webdataset as wds
17-
from math import sqrt, ceil
1815
from itertools import islice
19-
from torch.utils.data import Dataset, IterableDataset, DataLoader
16+
from torch.utils.data import IterableDataset, DataLoader
2017
from pybedtools import BedTool
2118

2219
from seqchromloader import utils, config

seqchromloader/utils.py

Lines changed: 8 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import logging
1010
import pysam
1111
import pyBigWig
12-
import pybigtools
1312
from Bio import motifs
1413
from pyfaidx import Fasta
1514
from multiprocessing import Pool
@@ -25,9 +24,6 @@ def __init__(self, bw_path, memmap_dir=None, backend='pyBigWig'):
2524
if backend == 'pyBigWig':
2625
self.bw = pyBigWig.open(bw_path)
2726
self._chroms = self.bw.chroms()
28-
elif backend == 'pybigtools':
29-
self.bw = pybigtools.open(bw_path)
30-
self._chroms = self.bw.chroms()
3127
elif backend == 'memmap':
3228
# load all chromosomes using np.memmap
3329
if memmap_dir is None:
@@ -36,8 +32,8 @@ def __init__(self, bw_path, memmap_dir=None, backend='pyBigWig'):
3632
self.memmap_dir = memmap_dir
3733
os.makedirs(self.memmap_dir, exist_ok=True)
3834

39-
# open bigwig via pybigtools
40-
self.bw = pybigtools.open(bw_path)
35+
# open bigwig via pyBigWig
36+
self.bw = pyBigWig.open(bw_path)
4137

4238
self.arrays = {}
4339
self._chroms = {}
@@ -50,7 +46,7 @@ def __init__(self, bw_path, memmap_dir=None, backend='pyBigWig'):
5046
logger.info(f"[memmap] dumping {chrom}...")
5147

5248
# load full chromosome
53-
data = self.bw.values(chrom, 0, length, missing=0.0)
49+
data = np.nan_to_num(self.bw.values(chrom, 0, length, numpy=True), nan=0.0).astype(np.float32)
5450

5551
# save
5652
np.save(npy_path, data)
@@ -65,34 +61,18 @@ def __init__(self, bw_path, memmap_dir=None, backend='pyBigWig'):
6561
self.backend = backend
6662

6763
def intervals(self, chrom):
68-
if self.backend == 'pyBigWig':
69-
return self.bw.intervals(chrom)
70-
else:
71-
return self.bw.records(chrom)
64+
return self.bw.intervals(chrom)
7265

7366
def stats(self, chrom, type='mean', exact=True):
7467
if self.backend == 'pyBigWig':
75-
return self.bw.stats(chrom, type=type, exact=exact)[0]
76-
elif self.backend == 'pybigtools':
77-
return self.bw.values(chrom, missing=np.nan, bins=1, exact=exact, summary=type)[0].item()
68+
return np.float32(self.bw.stats(chrom, type=type, exact=exact)[0])
7869
elif self.backend == 'memmap':
79-
arr = self.arrays[chrom]
80-
if type == 'mean':
81-
return float(np.mean(arr))
82-
elif type == 'max':
83-
return float(np.max(arr))
84-
elif type == 'min':
85-
return float(np.min(arr))
86-
elif type == 'std':
87-
return float(np.std(arr))
88-
else:
89-
raise NotImplementedError(f"stat {type} not implemented for memmap backend")
70+
# use pyBigWig to obtain stats to avoid load full chromosome arrays into memory
71+
return np.float32(self.bw.stats(chrom, type=type, exact=exact)[0])
9072

9173
def values(self, chrom, start, end, missing=0):
9274
if self.backend == 'pyBigWig':
93-
return np.nan_to_num(self.bw.values(chrom, start, end), nan=missing).astype(np.float32)
94-
elif self.backend == 'pybigtools':
95-
return self.bw.values(chrom, start, end, missing=missing).astype(np.float32)
75+
return np.nan_to_num(self.bw.values(chrom, start, end, numpy=True), nan=missing).astype(np.float32)
9676
elif self.backend == 'memmap':
9777
arr = self.arrays[chrom]
9878

tests/test_backend_memmap.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from test_writer_loader import Test
2+
import unittest, seqchromloader
3+
4+
class TestBackendMemmap(Test, unittest.TestCase):
5+
seqchromloader.config.set_bigwig_backend("memmap")
6+
7+
if __name__ == "__main__":
8+
unittest.main(verbosity=2)

tests/test_backend_pybigwig.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from test_writer_loader import Test
2+
import unittest, seqchromloader
3+
4+
class TestBackendPyBigWig(Test, unittest.TestCase):
5+
seqchromloader.config.set_bigwig_backend("pyBigWig")
6+
7+
if __name__ == "__main__":
8+
unittest.main(verbosity=2)
9+

tests/test_writer_loader.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@
2020

2121
import seqchromloader
2222

23-
seqchromloader.config.set_bigwig_backend('pybigtools')
23+
#seqchromloader.config.set_bigwig_backend('memmap')
2424

25-
class Test(unittest.TestCase):
25+
class Test:
2626
def setUp(self) -> None:
2727
pass
2828

@@ -40,6 +40,10 @@ def tearDownClass(cls) -> None:
4040
def assertIsFile(self, path):
4141
if not pl.Path(path).resolve().is_file():
4242
raise AssertionError("File does not exist: %s" % str(path))
43+
44+
def test_compute_mean_std_bw(self):
45+
bw_f = "data/sample.bw"
46+
mean, std = seqchromloader.utils.compute_mean_std_bigwig(bw_f)
4347

4448
def test_get_genome_sizes(self):
4549
genome_sizes_nochr10=get_genome_sizes(genome="mm10", to_filter=["chr10"])
@@ -389,6 +393,3 @@ def test_chrom_transform(chrom):
389393

390394
def test_target_transform(target):
391395
return target * 3
392-
393-
if __name__ == "__main__":
394-
unittest.main(verbosity=2)

0 commit comments

Comments
 (0)