Skip to content

Commit 38dad0d

Browse files
committed
add scripts to run StrainID on human simulations
These PBS scripts run StrainID on the human data simulated from K562 and HELA at the varying sequencing depths
1 parent 0ce484b commit 38dad0d

10 files changed

Lines changed: 630 additions & 0 deletions

.DS_Store

6 KB
Binary file not shown.
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:10:00
5+
#PBS -A open
6+
#PBS -o logs/depth.sid.HELA.10M.log.out
7+
#PBS -e logs/depth.sid.HELA.10M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing StrainID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticStrain
21+
cd $WRK
22+
23+
INFO=`sed "21q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
BASE=`awk '{print $3}' <(echo $INFO)`
27+
28+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
29+
SEED=$(($BASE+$PBS_ARRAYID))
30+
31+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
32+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM/Simulation_$PBS_ARRAYID.bam
33+
TEMP=$WRK/temp21-$PBS_ARRAYID
34+
35+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
36+
[ -d logs ] || mkdir logs
37+
[ -d $TEMP ] || mkdir $TEMP
38+
39+
#Check that BAM file was generated first
40+
if [ ! -f $BAM ];
41+
then
42+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
43+
exit
44+
fi
45+
#Check that BAM Index file exists
46+
if [ ! -f $BAM.bai ];
47+
then
48+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
49+
exit
50+
fi
51+
52+
# Set-up Temp directory
53+
cd $TEMP
54+
echo $BAM
55+
ln -s $BAM
56+
ln -s $BAM.bai
57+
58+
GENOME=$WRK/../input/$REF.fa
59+
DATABASE=$WRK/../db/$REF\_VCF
60+
GENOPIPE=$WRK/../..
61+
62+
## Execute Single StrainID and record time
63+
cd $GENOPIPE/StrainID
64+
echo "**Begin executing StrainID for ${LOCUS}_${DEPTH}..."
65+
{ time bash identify-Strain.sh -i $TEMP -g $GENOME -v $DATABASE -o $OUTPUT/ID -s $SEED > $OUTPUT/ID/Simulation_$PBS_ARRAYID.std ; } 2> $OUTPUT/ID/Simulation_$PBS_ARRAYID.time
66+
echo "...single StrainID for ${LOCUS} ${DEPTH} finished."
67+
cd $WRK
68+
69+
## Clean-up
70+
rm -r $TEMP
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:10:00
5+
#PBS -A open
6+
#PBS -o logs/depth.sid.HELA.1M.log.out
7+
#PBS -e logs/depth.sid.HELA.1M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing StrainID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticStrain
21+
cd $WRK
22+
23+
INFO=`sed "18q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
BASE=`awk '{print $3}' <(echo $INFO)`
27+
28+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
29+
SEED=$(($BASE+$PBS_ARRAYID))
30+
31+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
32+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM/Simulation_$PBS_ARRAYID.bam
33+
TEMP=$WRK/temp18-$PBS_ARRAYID
34+
35+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
36+
[ -d logs ] || mkdir logs
37+
[ -d $TEMP ] || mkdir $TEMP
38+
39+
#Check that BAM file was generated first
40+
if [ ! -f $BAM ];
41+
then
42+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
43+
exit
44+
fi
45+
#Check that BAM Index file exists
46+
if [ ! -f $BAM.bai ];
47+
then
48+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
49+
exit
50+
fi
51+
52+
# Set-up Temp directory
53+
cd $TEMP
54+
echo $BAM
55+
ln -s $BAM
56+
ln -s $BAM.bai
57+
58+
GENOME=$WRK/../input/$REF.fa
59+
DATABASE=$WRK/../db/$REF\_VCF
60+
GENOPIPE=$WRK/../..
61+
62+
## Execute Single StrainID and record time
63+
cd $GENOPIPE/StrainID
64+
echo "**Begin executing StrainID for ${LOCUS}_${DEPTH}..."
65+
{ time bash identify-Strain.sh -i $TEMP -g $GENOME -v $DATABASE -o $OUTPUT/ID -s $SEED > $OUTPUT/ID/Simulation_$PBS_ARRAYID.std ; } 2> $OUTPUT/ID/Simulation_$PBS_ARRAYID.time
66+
echo "...single StrainID for ${LOCUS} ${DEPTH} finished."
67+
cd $WRK
68+
69+
## Clean-up
70+
rm -r $TEMP
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:10:00
5+
#PBS -A open
6+
#PBS -o logs/depth.sid.HELA.20M.log.out
7+
#PBS -e logs/depth.sid.HELA.20M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing StrainID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticStrain
21+
cd $WRK
22+
23+
INFO=`sed "22q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
BASE=`awk '{print $3}' <(echo $INFO)`
27+
28+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
29+
SEED=$(($BASE+$PBS_ARRAYID))
30+
31+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
32+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM/Simulation_$PBS_ARRAYID.bam
33+
TEMP=$WRK/temp22-$PBS_ARRAYID
34+
35+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
36+
[ -d logs ] || mkdir logs
37+
[ -d $TEMP ] || mkdir $TEMP
38+
39+
#Check that BAM file was generated first
40+
if [ ! -f $BAM ];
41+
then
42+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
43+
exit
44+
fi
45+
#Check that BAM Index file exists
46+
if [ ! -f $BAM.bai ];
47+
then
48+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
49+
exit
50+
fi
51+
52+
# Set-up Temp directory
53+
cd $TEMP
54+
echo $BAM
55+
ln -s $BAM
56+
ln -s $BAM.bai
57+
58+
GENOME=$WRK/../input/$REF.fa
59+
DATABASE=$WRK/../db/$REF\_VCF
60+
GENOPIPE=$WRK/../..
61+
62+
## Execute Single StrainID and record time
63+
cd $GENOPIPE/StrainID
64+
echo "**Begin executing StrainID for ${LOCUS}_${DEPTH}..."
65+
{ time bash identify-Strain.sh -i $TEMP -g $GENOME -v $DATABASE -o $OUTPUT/ID -s $SEED > $OUTPUT/ID/Simulation_$PBS_ARRAYID.std ; } 2> $OUTPUT/ID/Simulation_$PBS_ARRAYID.time
66+
echo "...single StrainID for ${LOCUS} ${DEPTH} finished."
67+
cd $WRK
68+
69+
## Clean-up
70+
rm -r $TEMP
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:10:00
5+
#PBS -A open
6+
#PBS -o logs/depth.sid.HELA.2M.log.out
7+
#PBS -e logs/depth.sid.HELA.2M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing StrainID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticStrain
21+
cd $WRK
22+
23+
INFO=`sed "19q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
BASE=`awk '{print $3}' <(echo $INFO)`
27+
28+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
29+
SEED=$(($BASE+$PBS_ARRAYID))
30+
31+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
32+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM/Simulation_$PBS_ARRAYID.bam
33+
TEMP=$WRK/temp19-$PBS_ARRAYID
34+
35+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
36+
[ -d logs ] || mkdir logs
37+
[ -d $TEMP ] || mkdir $TEMP
38+
39+
#Check that BAM file was generated first
40+
if [ ! -f $BAM ];
41+
then
42+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
43+
exit
44+
fi
45+
#Check that BAM Index file exists
46+
if [ ! -f $BAM.bai ];
47+
then
48+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
49+
exit
50+
fi
51+
52+
# Set-up Temp directory
53+
cd $TEMP
54+
echo $BAM
55+
ln -s $BAM
56+
ln -s $BAM.bai
57+
58+
GENOME=$WRK/../input/$REF.fa
59+
DATABASE=$WRK/../db/$REF\_VCF
60+
GENOPIPE=$WRK/../..
61+
62+
## Execute Single StrainID and record time
63+
cd $GENOPIPE/StrainID
64+
echo "**Begin executing StrainID for ${LOCUS}_${DEPTH}..."
65+
{ time bash identify-Strain.sh -i $TEMP -g $GENOME -v $DATABASE -o $OUTPUT/ID -s $SEED > $OUTPUT/ID/Simulation_$PBS_ARRAYID.std ; } 2> $OUTPUT/ID/Simulation_$PBS_ARRAYID.time
66+
echo "...single StrainID for ${LOCUS} ${DEPTH} finished."
67+
cd $WRK
68+
69+
## Clean-up
70+
rm -r $TEMP
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:10:00
5+
#PBS -A open
6+
#PBS -o logs/depth.sid.HELA.5M.log.out
7+
#PBS -e logs/depth.sid.HELA.5M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing StrainID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticStrain
21+
cd $WRK
22+
23+
INFO=`sed "20q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
BASE=`awk '{print $3}' <(echo $INFO)`
27+
28+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
29+
SEED=$(($BASE+$PBS_ARRAYID))
30+
31+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
32+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM/Simulation_$PBS_ARRAYID.bam
33+
TEMP=$WRK/temp20-$PBS_ARRAYID
34+
35+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
36+
[ -d logs ] || mkdir logs
37+
[ -d $TEMP ] || mkdir $TEMP
38+
39+
#Check that BAM file was generated first
40+
if [ ! -f $BAM ];
41+
then
42+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
43+
exit
44+
fi
45+
#Check that BAM Index file exists
46+
if [ ! -f $BAM.bai ];
47+
then
48+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
49+
exit
50+
fi
51+
52+
# Set-up Temp directory
53+
cd $TEMP
54+
echo $BAM
55+
ln -s $BAM
56+
ln -s $BAM.bai
57+
58+
GENOME=$WRK/../input/$REF.fa
59+
DATABASE=$WRK/../db/$REF\_VCF
60+
GENOPIPE=$WRK/../..
61+
62+
## Execute Single StrainID and record time
63+
cd $GENOPIPE/StrainID
64+
echo "**Begin executing StrainID for ${LOCUS}_${DEPTH}..."
65+
{ time bash identify-Strain.sh -i $TEMP -g $GENOME -v $DATABASE -o $OUTPUT/ID -s $SEED > $OUTPUT/ID/Simulation_$PBS_ARRAYID.std ; } 2> $OUTPUT/ID/Simulation_$PBS_ARRAYID.time
66+
echo "...single StrainID for ${LOCUS} ${DEPTH} finished."
67+
cd $WRK
68+
69+
## Clean-up
70+
rm -r $TEMP

0 commit comments

Comments
 (0)