Skip to content

Commit db8aa6f

Browse files
authored
Merge pull request #6 from CEGRcode/validation
Validation DelID using Simulated Synthetic Deletion data
2 parents f8b9c64 + 4e7eb30 commit db8aa6f

66 files changed

Lines changed: 40131 additions & 134 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

paper/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ SyntheticEpitope/logs/*.err-*
1313
SyntheticEpitope/logs/*.out-*
1414
SyntheticStrain/logs/*.err-*
1515
SyntheticStrain/logs/*.out-*
16+
YKOC-wgs/logs/*.err-*
17+
YKOC-wgs/logs/*.out-*
1618
ENCODEdata-eGFP/logs/*.out-*
1719
ENCODEdata-eGFP/logs/*.err-*
1820
ENCODEdata-eGFP/results/FASTQ
Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,12 @@
1-
sacCer3_Reb1 10K 0
2-
sacCer3_Reb1 100K 1000
3-
sacCer3_Reb1 1M 2000
4-
sacCer3_Reb1 10M 3000
5-
sacCer3_Rap1 10K 4000
6-
sacCer3_Rap1 100K 5000
7-
sacCer3_Rap1 1M 6000
8-
sacCer3_Rap1 10M 7000
9-
sacCer3_Reb1 2M 8000
10-
sacCer3_Rap1 2M 9000
11-
#hg19_CTCF 100K 8000
12-
#hg19_CTCF 1M 9000
13-
#hg19_CTCF 10M 10000
14-
#hg19_CTCF 50M 11000
15-
#hg19_POLR2H 100K 12000
16-
#hg19_POLR2H 1M 13000
17-
#hg19_POLR2H 10M 14000
18-
#hg19_POLR2H 50M 15000
1+
sacCer3_Rap1 500K 0
2+
sacCer3_Rap1 1M 1000
3+
sacCer3_Rap1 2M 2000
4+
sacCer3_Rap1 3M 3000
5+
sacCer3_Rap1 4M 4000
6+
sacCer3_Rap1 5M 5000
7+
sacCer3_Reb1 500K 6000
8+
sacCer3_Reb1 1M 7000
9+
sacCer3_Reb1 2M 8000
10+
sacCer3_Reb1 3M 9000
11+
sacCer3_Reb1 4M 10000
12+
sacCer3_Reb1 5M 11000
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
DEPTH_ORDER 500K 1M 2M 3M 4M 5M
2+
STRAIN_COLOR Rap1-del darkorange
3+
STRAIN_COLOR Reb1-del purple
4+
results/RAP1_500K_tally.txt Rap1-del 500K
5+
results/RAP1_1M_tally.txt Rap1-del 1M
6+
results/RAP1_2M_tally.txt Rap1-del 2M
7+
results/RAP1_3M_tally.txt Rap1-del 3M
8+
results/RAP1_4M_tally.txt Rap1-del 4M
9+
results/RAP1_5M_tally.txt Rap1-del 5M
10+
results/REB1_500K_tally.txt Reb1-del 500K
11+
results/REB1_1M_tally.txt Reb1-del 1M
12+
results/REB1_2M_tally.txt Reb1-del 2M
13+
results/REB1_3M_tally.txt Reb1-del 3M
14+
results/REB1_4M_tally.txt Reb1-del 4M
15+
results/REB1_5M_tally.txt Reb1-del 5M
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
DEPTH_ORDER 500K 1M 2M 3M 4M 5M
2+
STRAIN_COLOR Rap1-del darkorange
3+
STRAIN_COLOR Reb1-del purple
4+
RAP1_500K_runtime.txt Rap1-del 500K
5+
RAP1_1M_runtime.txt Rap1-del 1M
6+
RAP1_2M_runtime.txt Rap1-del 2M
7+
RAP1_3M_runtime.txt Rap1-del 3M
8+
RAP1_4M_runtime.txt Rap1-del 4M
9+
RAP1_5M_runtime.txt Rap1-del 5M
10+
REB1_500K_runtime.txt Reb1-del 500K
11+
REB1_1M_runtime.txt Reb1-del 1M
12+
REB1_2M_runtime.txt Reb1-del 2M
13+
REB1_3M_runtime.txt Reb1-del 3M
14+
REB1_4M_runtime.txt Reb1-del 4M
15+
REB1_5M_runtime.txt Reb1-del 5M
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
python scripts/make_barplot.py -c fig3a_config.txt -t Figure3A
2+
python scripts/make_boxplot.py -c fig3b_config.txt -t Figure3B
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:10:00
5+
#PBS -A open
6+
#PBS -o logs/depth.did.Rap1.1M.log.out
7+
#PBS -e logs/depth.did.Rap1.1M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing DeletionID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticDeletion
21+
cd $WRK
22+
23+
INFO=`sed "2q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
27+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
28+
29+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
30+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM
31+
TEMP=$WRK/temp2-$PBS_ARRAYID
32+
33+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
34+
[ -d logs ] || mkdir logs
35+
[ -d $ID ] || mkdir $ID
36+
[ -d $TEMP ] || mkdir $TEMP
37+
38+
#Check that BAM file was generated first
39+
if [ -f $BAM.bam ];
40+
then
41+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
42+
exit
43+
fi
44+
#Check that BAM Index file exists
45+
if [ -f $BAM.bam.bai ];
46+
then
47+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
48+
exit
49+
fi
50+
#Check if ID file alrady generated
51+
#if [[ -f $ID/Simulation_$PBS_ARRAYID\_R1-ID.tab ]]; then
52+
# echo "ID already generated ($PBS_ARRAYID). Exiting.."
53+
# exit
54+
#fi
55+
56+
# Set-up Temp directory
57+
cd $TEMP
58+
echo $BAM
59+
ln -s $BAM/Simulation_$PBS_ARRAYID.bam
60+
ln -s $BAM/Simulation_$PBS_ARRAYID.bam.bai
61+
62+
DATABASE=$WRK/../db/sacCer3_Del
63+
GENOPIPE=$WRK/../..
64+
65+
## Execute Single DeletionID and record time
66+
cd $GENOPIPE/DeletionID
67+
echo "**Begin executing DeletionID for ${LOCUS}_${DEPTH}..."
68+
time bash identify-Deletion.sh -i $TEMP -o $OUTPUT/ID -d $DATABASE
69+
MESSAGE="...single DeletionID for ${LOCUS} ${DEPTH} finished."
70+
echo $MESSAGE
71+
cd $WRK
72+
73+
## Clean-up
74+
rm -r $TEMP
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:10:00
5+
#PBS -A open
6+
#PBS -o logs/depth.did.Rap1.2M.log.out
7+
#PBS -e logs/depth.did.Rap1.2M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing DeletionID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticDeletion
21+
cd $WRK
22+
23+
INFO=`sed "3q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
27+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
28+
29+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
30+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM
31+
TEMP=$WRK/temp3-$PBS_ARRAYID
32+
33+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
34+
[ -d logs ] || mkdir logs
35+
[ -d $ID ] || mkdir $ID
36+
[ -d $TEMP ] || mkdir $TEMP
37+
38+
#Check that BAM file was generated first
39+
if [ -f $BAM.bam ];
40+
then
41+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
42+
exit
43+
fi
44+
#Check that BAM Index file exists
45+
if [ -f $BAM.bam.bai ];
46+
then
47+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
48+
exit
49+
fi
50+
#Check if ID file alrady generated
51+
#if [[ -f $ID/Simulation_$PBS_ARRAYID\_R1-ID.tab ]]; then
52+
# echo "ID already generated ($PBS_ARRAYID). Exiting.."
53+
# exit
54+
#fi
55+
56+
# Set-up Temp directory
57+
cd $TEMP
58+
echo $BAM
59+
ln -s $BAM/Simulation_$PBS_ARRAYID.bam
60+
ln -s $BAM/Simulation_$PBS_ARRAYID.bam.bai
61+
62+
DATABASE=$WRK/../db/sacCer3_Del
63+
GENOPIPE=$WRK/../..
64+
65+
## Execute Single DeletionID and record time
66+
cd $GENOPIPE/DeletionID
67+
echo "**Begin executing DeletionID for ${LOCUS}_${DEPTH}..."
68+
time bash identify-Deletion.sh -i $TEMP -o $OUTPUT/ID -d $DATABASE
69+
MESSAGE="...single DeletionID for ${LOCUS} ${DEPTH} finished."
70+
echo $MESSAGE
71+
cd $WRK
72+
73+
## Clean-up
74+
rm -r $TEMP
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:20:00
5+
#PBS -A open
6+
#PBS -o logs/depth.did.Rap1.3M.log.out
7+
#PBS -e logs/depth.did.Rap1.3M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing DeletionID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticDeletion
21+
cd $WRK
22+
23+
INFO=`sed "4q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
27+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
28+
29+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
30+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM
31+
TEMP=$WRK/temp4-$PBS_ARRAYID
32+
33+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
34+
[ -d logs ] || mkdir logs
35+
[ -d $ID ] || mkdir $ID
36+
[ -d $TEMP ] || mkdir $TEMP
37+
38+
#Check that BAM file was generated first
39+
if [ -f $BAM.bam ];
40+
then
41+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
42+
exit
43+
fi
44+
#Check that BAM Index file exists
45+
if [ -f $BAM.bam.bai ];
46+
then
47+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
48+
exit
49+
fi
50+
#Check if ID file alrady generated
51+
#if [[ -f $ID/Simulation_$PBS_ARRAYID\_R1-ID.tab ]]; then
52+
# echo "ID already generated ($PBS_ARRAYID). Exiting.."
53+
# exit
54+
#fi
55+
56+
# Set-up Temp directory
57+
cd $TEMP
58+
echo $BAM
59+
ln -s $BAM/Simulation_$PBS_ARRAYID.bam
60+
ln -s $BAM/Simulation_$PBS_ARRAYID.bam.bai
61+
62+
DATABASE=$WRK/../db/sacCer3_Del
63+
GENOPIPE=$WRK/../..
64+
65+
## Execute Single DeletionID and record time
66+
cd $GENOPIPE/DeletionID
67+
echo "**Begin executing DeletionID for ${LOCUS}_${DEPTH}..."
68+
time bash identify-Deletion.sh -i $TEMP -o $OUTPUT/ID -d $DATABASE
69+
MESSAGE="...single DeletionID for ${LOCUS} ${DEPTH} finished."
70+
echo $MESSAGE
71+
cd $WRK
72+
73+
## Clean-up
74+
rm -r $TEMP
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:20:00
5+
#PBS -A open
6+
#PBS -o logs/depth.did.Rap1.4M.log.out
7+
#PBS -e logs/depth.did.Rap1.4M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing DeletionID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticDeletion
21+
cd $WRK
22+
23+
INFO=`sed "5q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
27+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
28+
29+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
30+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM
31+
TEMP=$WRK/temp5-$PBS_ARRAYID
32+
33+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
34+
[ -d logs ] || mkdir logs
35+
[ -d $ID ] || mkdir $ID
36+
[ -d $TEMP ] || mkdir $TEMP
37+
38+
#Check that BAM file was generated first
39+
if [ -f $BAM.bam ];
40+
then
41+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
42+
exit
43+
fi
44+
#Check that BAM Index file exists
45+
if [ -f $BAM.bam.bai ];
46+
then
47+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
48+
exit
49+
fi
50+
#Check if ID file alrady generated
51+
#if [[ -f $ID/Simulation_$PBS_ARRAYID\_R1-ID.tab ]]; then
52+
# echo "ID already generated ($PBS_ARRAYID). Exiting.."
53+
# exit
54+
#fi
55+
56+
# Set-up Temp directory
57+
cd $TEMP
58+
echo $BAM
59+
ln -s $BAM/Simulation_$PBS_ARRAYID.bam
60+
ln -s $BAM/Simulation_$PBS_ARRAYID.bam.bai
61+
62+
DATABASE=$WRK/../db/sacCer3_Del
63+
GENOPIPE=$WRK/../..
64+
65+
## Execute Single DeletionID and record time
66+
cd $GENOPIPE/DeletionID
67+
echo "**Begin executing DeletionID for ${LOCUS}_${DEPTH}..."
68+
time bash identify-Deletion.sh -i $TEMP -o $OUTPUT/ID -d $DATABASE
69+
MESSAGE="...single DeletionID for ${LOCUS} ${DEPTH} finished."
70+
echo $MESSAGE
71+
cd $WRK
72+
73+
## Clean-up
74+
rm -r $TEMP

0 commit comments

Comments
 (0)