Skip to content

Commit 50b01b1

Browse files
author
Alicia Schep
committed
Merge branch 'gamma' to change NFR fit to be based on gamma distribution rather than exponential distribution. This is more robust and corresponds better with observed distributions.
2 parents 225a51e + 19937ed commit 50b01b1

7 files changed

Lines changed: 243212 additions & 21 deletions

File tree

example/README.txt

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11

2-
Example files to test out functions
2+
#This folder contains Example files to test out functions
3+
4+
#example.bam created by intersecting a bam file with example.slopped.bed (which has 1000bp flank on each side of example.bed)
5+
6+
7+
#To test out nucleoatac installation, try:
8+
9+
nucleoatac run --bed example.bed --bam example.bam --fasta sacCer3.fa --out test_example
10+
311

4-
example.bam created by intersecting a bam file with example.slopped.bed (which has 1000bp flank on each side of example.bed)
512

6-
example.Scores.bw has scores for ranges in example.slopped.bed
713

814

example/sacCer3.fa

Lines changed: 243167 additions & 0 deletions
Large diffs are not rendered by default.

example/sacCer3.fa.fai

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
chrI 230218 6 50 51
2+
chrII 813184 234836 50 51
3+
chrIII 316620 1064292 50 51
4+
chrIV 1531933 1387252 50 51
5+
chrV 576874 2949830 50 51
6+
chrVI 270161 3538249 50 51
7+
chrVII 1090940 3813822 50 51
8+
chrVIII 562643 4926590 50 51
9+
chrIX 439888 5500493 50 51
10+
chrX 745751 5949185 50 51
11+
chrXI 666816 6709859 50 51
12+
chrXII 1078177 7390020 50 51
13+
chrXIII 924431 8489770 50 51
14+
chrXIV 784333 9432698 50 51
15+
chrXV 1091291 10232725 50 51
16+
chrXVI 948066 11345850 50 51
17+
chrM 85779 12312884 50 51

nucleoatac/Occupancy.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from pyatac.utils import smooth, call_peaks, read_chrom_sizes_from_fasta
1616
from pyatac.chunkmat2d import FragmentMat2D, BiasMat2D
1717
from pyatac.bias import InsertionBiasTrack, PWM
18+
from scipy.special import gamma
1819

1920

2021
class FragmentMixDistribution:
@@ -25,24 +26,24 @@ def __init__(self, lower = 0, upper =2000):
2526
def getFragmentSizes(self, bamfile, chunklist = None):
2627
self.fragmentsizes = FragmentSizes(self.lower, self.upper)
2728
self.fragmentsizes.calculateSizes(bamfile, chunks = chunklist)
28-
def modelNFR(self, boundary = 115):
29-
"""Model NFR distribution with exponential distribution"""
30-
b = np.where(self.fragmentsizes.get(self.lower,boundary) == max(self.fragmentsizes.get(self.lower,boundary)))[0][0]+10 + self.lower
31-
def exp_pdf(x,*p): #defines the PDF
32-
k=p[0]
33-
a=p[1]
34-
x=x-b
35-
return a*k*np.exp(-k*x)
36-
x = np.array(range(b,boundary))
37-
p0 = (.1,1)
38-
coeff, var_matrix = optimize.curve_fit(exp_pdf,x, self.fragmentsizes.get(b,boundary),
29+
def modelNFR(self, boundaries = (45,115)):
30+
"""Model NFR distribution with gamma distribution"""
31+
smoothed = smooth(self.fragmentsizes.get(),19, window = "gaussian",sd = 3, mode='same')
32+
def gamma_pdf(x,*p): #defines the PDF
33+
k = p[0]
34+
theta =p[1]
35+
a = p[2]
36+
return a * x**(k-1) * np.exp(-x/theta) / (theta**k * gamma(k))
37+
x = np.arange(boundaries[0],boundaries[1])
38+
p0 = (3, 25, 1)
39+
coeff, var_matrix = optimize.curve_fit(gamma_pdf,x, self.fragmentsizes.get(boundaries[0],boundaries[1]),
3940
p0=p0)
40-
nfr = np.concatenate((self.fragmentsizes.get(self.lower,boundary), exp_pdf(np.array(range(boundary,self.upper)),*coeff)))
41+
nfr = np.concatenate((self.fragmentsizes.get(self.lower,boundaries[1]), gamma_pdf(np.array(range(boundaries[1],self.upper)),*coeff)))
4142
nfr[nfr==0] = min(nfr[nfr!=0])*0.01
4243
self.nfr_fit = FragmentSizes(self.lower,self.upper, vals = nfr)
43-
nuc = np.concatenate((np.zeros(boundary-self.lower),
44-
self.fragmentsizes.get(boundary,self.upper) -
45-
self.nfr_fit.get(boundary,self.upper)))
44+
nuc = np.concatenate((np.zeros(boundaries[1]-self.lower),
45+
self.fragmentsizes.get(boundaries[1],self.upper) -
46+
self.nfr_fit.get(boundaries[1],self.upper)))
4647
nuc[nuc<=0]=min(min(nfr)*0.1,min(nuc[nuc>0])*0.001)
4748
self.nuc_fit = FragmentSizes(self.lower, self.upper, vals = nuc)
4849
def plotFits(self,filename=None):

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010
setup(name='NucleoATAC',
11-
version='0.2.3',
11+
version='0.3.0',
1212
description='python package for calling nucleosomes with ATAC-Seq',
1313
classifiers=[
1414
'Development Status :: 3 - Alpha',

tests/test_var.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from unittest import TestCase
22
import numpy as np
33
import nucleoatac.NucleosomeCalling as Nuc
4-
import nucleoatac.VMat as V
4+
import pyatac.VMat as V
55
from pyatac.chunkmat2d import BiasMat2D
66
from pyatac.chunk import ChunkList
77
from pyatac.bias import InsertionBiasTrack

tests/test_xcor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import numpy as np
33

44
import nucleoatac.NucleosomeCalling as Nuc
5-
import nucleoatac.VMat as V
5+
import pyatac.VMat as V
66
from pyatac.chunkmat2d import FragmentMat2D
77
from pyatac.chunk import ChunkList
88

0 commit comments

Comments
 (0)