-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathutils.py
More file actions
97 lines (79 loc) · 2.83 KB
/
utils.py
File metadata and controls
97 lines (79 loc) · 2.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from six import string_types
import numpy as np
import pyranges
from kipoiseq.extractors import MultiSampleVCF, FastaStringExtractor
# alphabets:
DNA = ["A", "C", "G", "T"]
RNA = ["A", "C", "G", "U"]
AMINO_ACIDS = ["A", "R", "N", "D", "B", "C", "E", "Q", "Z", "G", "H",
"I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V"]
alphabets = {"DNA": DNA,
"RNA": RNA,
"AMINO_ACIDS": AMINO_ACIDS}
def to_scalar(obj):
"""Convert numpy scalar to native scalar
"""
if isinstance(obj, np.generic):
return np.asscalar(obj)
else:
return obj
def parse_alphabet(alphabet):
if isinstance(alphabet, str):
return list(alphabet)
else:
return alphabet
def parse_dtype(dtype):
if isinstance(dtype, string_types):
try:
return eval(dtype)
except Exception as e:
raise ValueError(
"Unable to parse dtype: {}. \nException: {}".format(dtype, e))
else:
return dtype
def _get_chrom_annotation(source):
if type(source) == FastaStringExtractor:
return set(source.fasta.keys())
elif type(source) == MultiSampleVCF:
return set(source.seqnames)
elif type(source) == pyranges.PyRanges:
return set(source.Chromosome)
else:
raise ValueError('source `%s` is not valid is not valid because '
' source type `%s` is not supported.'
% (repr(source), type(source)))
def compare_chrom_annotation(sources, strategy='some', core_chroms=None):
"""Compares chromosome annotations from different sources.
Throws exception iif annotations are not compatible.
# Arguments:
sources: list of different objects. vcf, fasta, pyranges are valid.
strategy: comparison strategy. `some` means some intersection excepted
or `all` all chromosomes should be same.
core_chroms: chromosomes must exist.
# Returns:
chroms common cross files.
# Example:
```python
>>> sources = [
MultiSampleVCF(...),
FastaStringExtractor(...),
pyranges,
pyranges,
MultiSampleVCF(...)
]
>>> compare_chrom_annotation(sources, strategy='all')
```
"""
if not len(sources) > 1:
raise ValueError(
'At least two item should gived as sources to compare')
chroms = list(map(_get_chrom_annotation, sources))
if strategy == 'all':
assert all(chroms[0] == i for i in chroms), \
'chroms annotations are not all same.'
return chroms[0]
elif strategy == 'some':
chrom_intersect = set.intersection(*chroms)
assert len(chrom_intersect) > 0, \
'there is not intersection between chromosomes.'
return chrom_intersect