Skip to content

Commit db11665

Browse files
authored
Merge pull request #12 from CEGRcode/validation
Validate StrainID on human simulations
2 parents d0149a0 + 38dad0d commit db11665

11 files changed

Lines changed: 700 additions & 0 deletions

.DS_Store

6 KB
Binary file not shown.
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:10:00
5+
#PBS -A open
6+
#PBS -o logs/depth.sid.HELA.10M.log.out
7+
#PBS -e logs/depth.sid.HELA.10M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing StrainID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticStrain
21+
cd $WRK
22+
23+
INFO=`sed "21q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
BASE=`awk '{print $3}' <(echo $INFO)`
27+
28+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
29+
SEED=$(($BASE+$PBS_ARRAYID))
30+
31+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
32+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM/Simulation_$PBS_ARRAYID.bam
33+
TEMP=$WRK/temp21-$PBS_ARRAYID
34+
35+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
36+
[ -d logs ] || mkdir logs
37+
[ -d $TEMP ] || mkdir $TEMP
38+
39+
#Check that BAM file was generated first
40+
if [ ! -f $BAM ];
41+
then
42+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
43+
exit
44+
fi
45+
#Check that BAM Index file exists
46+
if [ ! -f $BAM.bai ];
47+
then
48+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
49+
exit
50+
fi
51+
52+
# Set-up Temp directory
53+
cd $TEMP
54+
echo $BAM
55+
ln -s $BAM
56+
ln -s $BAM.bai
57+
58+
GENOME=$WRK/../input/$REF.fa
59+
DATABASE=$WRK/../db/$REF\_VCF
60+
GENOPIPE=$WRK/../..
61+
62+
## Execute Single StrainID and record time
63+
cd $GENOPIPE/StrainID
64+
echo "**Begin executing StrainID for ${LOCUS}_${DEPTH}..."
65+
{ time bash identify-Strain.sh -i $TEMP -g $GENOME -v $DATABASE -o $OUTPUT/ID -s $SEED > $OUTPUT/ID/Simulation_$PBS_ARRAYID.std ; } 2> $OUTPUT/ID/Simulation_$PBS_ARRAYID.time
66+
echo "...single StrainID for ${LOCUS} ${DEPTH} finished."
67+
cd $WRK
68+
69+
## Clean-up
70+
rm -r $TEMP
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:10:00
5+
#PBS -A open
6+
#PBS -o logs/depth.sid.HELA.1M.log.out
7+
#PBS -e logs/depth.sid.HELA.1M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing StrainID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticStrain
21+
cd $WRK
22+
23+
INFO=`sed "18q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
BASE=`awk '{print $3}' <(echo $INFO)`
27+
28+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
29+
SEED=$(($BASE+$PBS_ARRAYID))
30+
31+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
32+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM/Simulation_$PBS_ARRAYID.bam
33+
TEMP=$WRK/temp18-$PBS_ARRAYID
34+
35+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
36+
[ -d logs ] || mkdir logs
37+
[ -d $TEMP ] || mkdir $TEMP
38+
39+
#Check that BAM file was generated first
40+
if [ ! -f $BAM ];
41+
then
42+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
43+
exit
44+
fi
45+
#Check that BAM Index file exists
46+
if [ ! -f $BAM.bai ];
47+
then
48+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
49+
exit
50+
fi
51+
52+
# Set-up Temp directory
53+
cd $TEMP
54+
echo $BAM
55+
ln -s $BAM
56+
ln -s $BAM.bai
57+
58+
GENOME=$WRK/../input/$REF.fa
59+
DATABASE=$WRK/../db/$REF\_VCF
60+
GENOPIPE=$WRK/../..
61+
62+
## Execute Single StrainID and record time
63+
cd $GENOPIPE/StrainID
64+
echo "**Begin executing StrainID for ${LOCUS}_${DEPTH}..."
65+
{ time bash identify-Strain.sh -i $TEMP -g $GENOME -v $DATABASE -o $OUTPUT/ID -s $SEED > $OUTPUT/ID/Simulation_$PBS_ARRAYID.std ; } 2> $OUTPUT/ID/Simulation_$PBS_ARRAYID.time
66+
echo "...single StrainID for ${LOCUS} ${DEPTH} finished."
67+
cd $WRK
68+
69+
## Clean-up
70+
rm -r $TEMP
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:10:00
5+
#PBS -A open
6+
#PBS -o logs/depth.sid.HELA.20M.log.out
7+
#PBS -e logs/depth.sid.HELA.20M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing StrainID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticStrain
21+
cd $WRK
22+
23+
INFO=`sed "22q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
BASE=`awk '{print $3}' <(echo $INFO)`
27+
28+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
29+
SEED=$(($BASE+$PBS_ARRAYID))
30+
31+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
32+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM/Simulation_$PBS_ARRAYID.bam
33+
TEMP=$WRK/temp22-$PBS_ARRAYID
34+
35+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
36+
[ -d logs ] || mkdir logs
37+
[ -d $TEMP ] || mkdir $TEMP
38+
39+
#Check that BAM file was generated first
40+
if [ ! -f $BAM ];
41+
then
42+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
43+
exit
44+
fi
45+
#Check that BAM Index file exists
46+
if [ ! -f $BAM.bai ];
47+
then
48+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
49+
exit
50+
fi
51+
52+
# Set-up Temp directory
53+
cd $TEMP
54+
echo $BAM
55+
ln -s $BAM
56+
ln -s $BAM.bai
57+
58+
GENOME=$WRK/../input/$REF.fa
59+
DATABASE=$WRK/../db/$REF\_VCF
60+
GENOPIPE=$WRK/../..
61+
62+
## Execute Single StrainID and record time
63+
cd $GENOPIPE/StrainID
64+
echo "**Begin executing StrainID for ${LOCUS}_${DEPTH}..."
65+
{ time bash identify-Strain.sh -i $TEMP -g $GENOME -v $DATABASE -o $OUTPUT/ID -s $SEED > $OUTPUT/ID/Simulation_$PBS_ARRAYID.std ; } 2> $OUTPUT/ID/Simulation_$PBS_ARRAYID.time
66+
echo "...single StrainID for ${LOCUS} ${DEPTH} finished."
67+
cd $WRK
68+
69+
## Clean-up
70+
rm -r $TEMP
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:10:00
5+
#PBS -A open
6+
#PBS -o logs/depth.sid.HELA.2M.log.out
7+
#PBS -e logs/depth.sid.HELA.2M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing StrainID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticStrain
21+
cd $WRK
22+
23+
INFO=`sed "19q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
BASE=`awk '{print $3}' <(echo $INFO)`
27+
28+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
29+
SEED=$(($BASE+$PBS_ARRAYID))
30+
31+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
32+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM/Simulation_$PBS_ARRAYID.bam
33+
TEMP=$WRK/temp19-$PBS_ARRAYID
34+
35+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
36+
[ -d logs ] || mkdir logs
37+
[ -d $TEMP ] || mkdir $TEMP
38+
39+
#Check that BAM file was generated first
40+
if [ ! -f $BAM ];
41+
then
42+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
43+
exit
44+
fi
45+
#Check that BAM Index file exists
46+
if [ ! -f $BAM.bai ];
47+
then
48+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
49+
exit
50+
fi
51+
52+
# Set-up Temp directory
53+
cd $TEMP
54+
echo $BAM
55+
ln -s $BAM
56+
ln -s $BAM.bai
57+
58+
GENOME=$WRK/../input/$REF.fa
59+
DATABASE=$WRK/../db/$REF\_VCF
60+
GENOPIPE=$WRK/../..
61+
62+
## Execute Single StrainID and record time
63+
cd $GENOPIPE/StrainID
64+
echo "**Begin executing StrainID for ${LOCUS}_${DEPTH}..."
65+
{ time bash identify-Strain.sh -i $TEMP -g $GENOME -v $DATABASE -o $OUTPUT/ID -s $SEED > $OUTPUT/ID/Simulation_$PBS_ARRAYID.std ; } 2> $OUTPUT/ID/Simulation_$PBS_ARRAYID.time
66+
echo "...single StrainID for ${LOCUS} ${DEPTH} finished."
67+
cd $WRK
68+
69+
## Clean-up
70+
rm -r $TEMP
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:10:00
5+
#PBS -A open
6+
#PBS -o logs/depth.sid.HELA.5M.log.out
7+
#PBS -e logs/depth.sid.HELA.5M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing StrainID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticStrain
21+
cd $WRK
22+
23+
INFO=`sed "20q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
BASE=`awk '{print $3}' <(echo $INFO)`
27+
28+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
29+
SEED=$(($BASE+$PBS_ARRAYID))
30+
31+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
32+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM/Simulation_$PBS_ARRAYID.bam
33+
TEMP=$WRK/temp20-$PBS_ARRAYID
34+
35+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
36+
[ -d logs ] || mkdir logs
37+
[ -d $TEMP ] || mkdir $TEMP
38+
39+
#Check that BAM file was generated first
40+
if [ ! -f $BAM ];
41+
then
42+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
43+
exit
44+
fi
45+
#Check that BAM Index file exists
46+
if [ ! -f $BAM.bai ];
47+
then
48+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
49+
exit
50+
fi
51+
52+
# Set-up Temp directory
53+
cd $TEMP
54+
echo $BAM
55+
ln -s $BAM
56+
ln -s $BAM.bai
57+
58+
GENOME=$WRK/../input/$REF.fa
59+
DATABASE=$WRK/../db/$REF\_VCF
60+
GENOPIPE=$WRK/../..
61+
62+
## Execute Single StrainID and record time
63+
cd $GENOPIPE/StrainID
64+
echo "**Begin executing StrainID for ${LOCUS}_${DEPTH}..."
65+
{ time bash identify-Strain.sh -i $TEMP -g $GENOME -v $DATABASE -o $OUTPUT/ID -s $SEED > $OUTPUT/ID/Simulation_$PBS_ARRAYID.std ; } 2> $OUTPUT/ID/Simulation_$PBS_ARRAYID.time
66+
echo "...single StrainID for ${LOCUS} ${DEPTH} finished."
67+
cd $WRK
68+
69+
## Clean-up
70+
rm -r $TEMP

0 commit comments

Comments
 (0)