Skip to content

Commit e717c01

Browse files
authored
Merge pull request #108 from RBL-NCI/activeDev
Active dev
2 parents 04d5260 + d18bc31 commit e717c01

29 files changed

Lines changed: 6463 additions & 3695 deletions

config/cluster_config.yaml

100755100644
Lines changed: 98 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22
__default__:
33
gres: lscratch:96
44
mem: 40g
5-
partition: norm
6-
time: 00-02:00:00
5+
partition: ccr,norm
6+
time: 00-08:00:00
77
threads: 32
88
output: .%j.{wildcards}.out
99
error: .%j.{wildcards}.err
1010

1111
qc_barcode:
12-
threads: 3
13-
mem: 3g
12+
threads: 4
13+
mem: 100g
1414
time: 00-04:00:00
1515

1616
demultiplex:
@@ -20,16 +20,16 @@ demultiplex:
2020

2121
remove_adaptors:
2222
threads: 16
23-
time: 1-00:00:00
23+
time: 01-00:00:00
2424
mem: 32g
2525

2626
qc_fastq_pre:
27-
threads: 3
27+
threads: 4
2828
mem: 3g
2929
time: 00-03:00:00
3030

3131
qc_fastq_post:
32-
threads: 3
32+
threads: 4
3333
mem: 3g
3434
time: 00-03:00:00
3535

@@ -38,90 +38,147 @@ qc_screen_validator:
3838
time: 00-03:00:00
3939

4040
split_files:
41-
threads: 3
41+
threads: 4
4242
mem: 3g
4343
time: 00-03:00:00
4444

4545
novoalign:
4646
mem: 50g
47+
threads: 32
4748
time: 10-00:00:00
4849

4950
cleanup_conversion:
50-
threads: 5
51+
threads: 8
5152
gres: lscratch:256
52-
mem: 30g
53-
time: 00-3:00:00
53+
mem: 200g
54+
time: 01-00:00:00
5455

5556
merge_unmapped_splits:
57+
threads: 8
5658
time: 01-00:00:00
57-
mem: 75g
59+
mem: 200g
5860

5961
create_bam_mm_unique:
60-
threads: 6
62+
threads: 8
6163
gres: lscratch:256
62-
mem: 30g
63-
time: 09-00:00:00
64+
mem: 200g
65+
time: 01-00:00:00
6466

6567
merge_splits_unique_mm:
66-
mem: 512g
67-
time: 03-00:00:00
68-
partition: largemem
68+
threads: 32
69+
mem: 200g
70+
time: 01-00:00:00
6971

7072
merge_mm_and_unique:
71-
threads: 2
73+
threads: 8
7274
gres: lscratch:256
73-
mem: 5g
74-
time: 02-00:00:00
75+
mem: 200g
76+
time: 01-00:00:00
7577

7678
qc_alignment:
77-
mem: 10g
79+
threads: 4
80+
mem: 200g
7881

7982
qc_troubleshoot:
80-
threads: 3
83+
threads: 4
8184
mem: 3g
8285

8386
dedup:
84-
threads: 2
85-
mem: 64g
87+
threads: 8
88+
mem: 200g
8689
gres: lscratch:256
87-
time: 01-00:00:00
90+
time: 02-00:00:00
8891

8992
create_beds_safs:
90-
mem: 350g
91-
gres: lscratch:256
93+
mem: 200g
94+
gres: lscratch:512
95+
threads: 8
96+
97+
bgzip_beds:
98+
mem: 100g
9299
threads: 4
93-
partition: largemem
94100

95101
project_annotations:
96102
threads: 2
97103
mem: 10g
98104
time: 00-01:00:00
99105

100-
peak_annotations:
101-
threads: 3
106+
peak_junctions:
107+
threads: 4
102108
gres: lscratch:128
103109
mem: 30g
104-
time: 00-12:00:00
110+
time: 04-00:00:00
111+
112+
peak_Transcripts:
113+
threads: 4
114+
gres: lscratch:128
115+
mem: 30g
116+
time: 04-00:00:00
117+
118+
peak_ExonIntron:
119+
threads: 4
120+
gres: lscratch:128
121+
mem: 30g
122+
time: 04-00:00:00
105123

124+
peak_RMSK:
125+
threads: 4
126+
gres: lscratch:128
127+
mem: 30g
128+
time: 04-00:00:00
129+
106130
annotation_report:
107-
mem: 10g
131+
threads: 4
132+
gres: lscratch:128
133+
mem: 30g
134+
time: 00-12:00:00
108135

136+
MANORM_beds:
137+
threads: 4
138+
mem: 30g
139+
140+
DIFFBIND_beds:
141+
threads: 4
142+
mem: 30g
143+
109144
MANORM_analysis:
110145
threads: 4
111146
mem: 30g
147+
time: 04-00:00:00
148+
149+
DIFFBIND_preprocess:
150+
threads: 4
151+
mem: 30g
152+
153+
DIFFBIND_analysis:
154+
threads: 4
155+
mem: 30g
112156

113157
MANORM_post_processing:
114158
threads: 2
115-
mem: 2g
116-
time: 00-01:00:00
159+
mem: 30g
160+
time: 00-12:00:00
161+
162+
DIFFBIND_report:
163+
threads: 4
164+
mem: 30g
117165

118166
MANORM_RMD:
119167
threads: 2
120-
mem: 3g
121-
time: 00-01:00:00
168+
mem: 30g
169+
time: 00-02:00:00
122170

123171
mapq_recalc:
124-
mem: 1TB
125-
gres: lscratch:256
126-
partition: largemem
127-
time: 00-06:00:00
172+
threads: 8
173+
mem: 200g
174+
gres: lscratch:512
175+
time: 00-12:00:00
176+
177+
mapq_stats:
178+
threads: 8
179+
mem: 200g
180+
181+
feature_counts:
182+
threads: 8
183+
mem: 200g
184+

config/snakemake_config.yaml

100755100644
Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,33 @@
1+
#########################################################################################
12
# Global configuration file for the pipeline
3+
#########################################################################################
4+
5+
#########################################################################################
6+
#Folders and Paths
7+
#########################################################################################
28
#path to iCLIP directory
39
sourceDir: "PIPELINE_HOME"
4-
510
#path to output directory
611
outputDir: "OUTPUT_DIR"
7-
812
#path to manifest files
9-
sampleManifest: "OUTPUT_DIR/manifest/samples.tsv"
10-
multiplexManifest: "OUTPUT_DIR/manifest/multiplex.tsv"
11-
contrastManifest: "OUTPUT_DIR/manifest/contrasts.tsv"
12-
13+
sampleManifest: "OUTPUT_DIR/manifests/samples.tsv"
14+
multiplexManifest: "OUTPUT_DIR/manifests/multiplex.tsv"
15+
contrastManifest: "OUTPUT_DIR/manifests/contrasts.tsv"
1316
#path to fastq files
1417
fastqDir: "/path/to/fastq/files/"
1518

19+
########################################################################################
1620
#user parameters
17-
filterlength: 20 #minimum read length to include in analysis [any int >20]
18-
multiplexflag: "Y" #whether samples are multiplexed ["Y","N"]
21+
#########################################################################################
22+
filterlength: 20 #minimum read length to include in analysis [any int >20] this is also the novoalign l parameter
23+
multiplexflag: "N" #whether samples are multiplexed ["Y","N"]
1924
mismatch: 1 #if multiplexed, number of bp mismatches allowed in demultiplexing [1,2,3]
20-
reference: "hg38" #reference organism ["hg38","mm10"]
25+
reference: "mm10" #reference organism ["hg38","mm10"]
2126
spliceaware: "Y" #whether to run include splice_aware feature for alignment ["Y","N"]
2227
includerRNA: "Y" #if spliceaware, include refseq rRNA's in annotations ["Y", "N"]
2328
spliceBPlength: 75 #if spliceaware Y, length of splice index to use [50, 75, 150]
2429
splicejunction: "Y" #if spliceaware Y, include splice junctions in peak calls for DE_METHOD MANORM or DIFFBIND ["Y", "N"]
25-
condenseexon: "Y" #if spliceaware Y, if there are multiple peaks in the same transcript, whether to combine into one feature ["Y", "N"]
30+
AnnoAnchor: "max_total" #Annotations for spliced peaks will be based on either 5' most region or region with max reads ["max","5prime"]
2631
mincount: 3 #minimum number of reads to count as a peak [1,2,3]
2732
ntmerge: 10 #minimum distance of nucleotides to merge peaks [any integer >=1, default 10]
2833
peakid: "ALL" #report peaks for unique peaks only or unique and fractional mm ["unique","all"]
@@ -32,7 +37,37 @@ pval: 0.005 #if DEmethod, pval cutoff for significance
3237
fc: 1 #if DEmethod, fold change cut off for significance
3338
splitSampleNChunks: 96 #split samples into N chunks to speed up compute heavy tasks like alignment [1 - 99]
3439

40+
#########################################################################################
41+
# novoalign parameters "test5" --> new iCLIP pipeline defaults
42+
#########################################################################################
43+
novoalign_x: 6
44+
novoalign_g: 40
45+
novoalign_s: 2
46+
novoalign_t: "20,3"
47+
novoalign_R: 5
48+
novoalign_r_mode: "EXHAUSTIVE"
49+
novoalign_r_int: 999 # integer represents number of alignments to be reported out
50+
51+
#########################################################################################
52+
# novoalign parameters old iCLIP pipeline defaults
53+
#########################################################################################
54+
#novoalign_x: 4
55+
#novoalign_g: 20
56+
#novoalign_s: 1
57+
#novoalign_t: "15,3"
58+
#novoalign_R: 0
59+
#novoalign_r_mode: "EXHAUSTIVE"
60+
#novoalign_r_int: 999 # integer represents number of alignments to be reported out
61+
62+
#########################################################################################
63+
# Useq parameters
64+
#########################################################################################
65+
useq_a: 50000
66+
useq_n: 999 # trying to match this to novoalign_r_exhaustive_int .... was set to 25 earlier
67+
68+
#########################################################################################
3569
#modules, container parameters
70+
#########################################################################################
3671
containerDir: "/data/CCBR_Pipeliner/iCLIP/container"
3772
fastq_val: "/data/CCBR_Pipeliner/iCLIP/bin/fastQValidator"
3873
bedtools: "bedtools/2.29.2"
@@ -44,7 +79,7 @@ manorm: "manorm/1.1.4"
4479
multiqc: "multiqc/1.9"
4580
novocraft: "novocraft/4.03.01"
4681
perl: "perl/5.24.3"
47-
python: "python/3.7"
82+
python: "python/3.8"
4883
Qt: "Qt/5.13.2"
4984
singularity: "singularity"
5085
samtools: "samtools/1.11"
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
group,background
2+
Ro_Clip,Control_Clip

manifests/multiplex_example.tsv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
file_name multiplex
2-
test_1.fastq.gz test_1
1+
file_name,multiplex
2+
test_1.fastq.gz,test_1

manifests/samples_example.tsv

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
multiplex sample group barcode adaptor
2-
test_1 Ro_Clip CLIP NNNTGGCNN AGATCGGAAGAGCGGTTCAG
3-
test_1 Control_Clip CNTRL NNNCGGANN AGATCGGAAGAGCGGTTCAG
1+
multiplex,sample,group,barcode,adaptor
2+
test_1,Ro_Clip,CLIP,NNNTGGCNN,AGATCGGAAGAGCGGTTCAG
3+
test_1,Control_Clip,CNTRL,NNNCGGANN,AGATCGGAAGAGCGGTTCAG

0 commit comments

Comments
 (0)