Skip to content

Commit 0ce484b

Browse files
committed
PBS script to run SID on K562_1M
A seeded run of the StrainID algorithm is run on each simulated dataset of 1M reads from the K562 synthetic genome. This is wrapped up in a PBS script.
1 parent 661a974 commit 0ce484b

1 file changed

Lines changed: 70 additions & 0 deletions

File tree

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=00:10:00
5+
#PBS -A open
6+
#PBS -o logs/depth.sid.K562.1M.log.out
7+
#PBS -e logs/depth.sid.K562.1M.log.err
8+
#PBS -t 1-1000
9+
10+
# This script will check that 1000 BAM files have been generated before executing StrainID.
11+
12+
module load gcc/8.3.1
13+
module load bedtools/2.27.1
14+
module load bwa/0.7.15
15+
module load samtools/1.5
16+
module load anaconda3
17+
source activate genopipe
18+
19+
# FIRST CHANGE PATH TO EXECUTE
20+
WRK=/path/to/GenoPipe/paper/SyntheticStrain
21+
cd $WRK
22+
23+
INFO=`sed "13q;d" depth_simulations.txt`
24+
LOCUS=`awk '{print $1}' <(echo $INFO)`
25+
DEPTH=`awk '{print $2}' <(echo $INFO)`
26+
BASE=`awk '{print $3}' <(echo $INFO)`
27+
28+
REF=`echo $LOCUS | awk -F'_' '{print $1}'`
29+
SEED=$(($BASE+$PBS_ARRAYID))
30+
31+
OUTPUT=$WRK/results/$LOCUS\_$DEPTH
32+
BAM=$WRK/results/$LOCUS\_$DEPTH/BAM/Simulation_$PBS_ARRAYID.bam
33+
TEMP=$WRK/temp13-$PBS_ARRAYID
34+
35+
[ -d $OUTPUT/ID ] || mkdir $OUTPUT/ID
36+
[ -d logs ] || mkdir logs
37+
[ -d $TEMP ] || mkdir $TEMP
38+
39+
#Check that BAM file was generated first
40+
if [ ! -f $BAM ];
41+
then
42+
echo "BAM input for ${LOCUS}_${DEPTH}_${PBS_ARRAYID} does not exist. Exiting."
43+
exit
44+
fi
45+
#Check that BAM Index file exists
46+
if [ ! -f $BAM.bai ];
47+
then
48+
echo "BAI missing for for ${LOCUS}_${DEPTH}_${PBS_ARRAYID}. Exiting."
49+
exit
50+
fi
51+
52+
# Set-up Temp directory
53+
cd $TEMP
54+
echo $BAM
55+
ln -s $BAM
56+
ln -s $BAM.bai
57+
58+
GENOME=$WRK/../input/$REF.fa
59+
DATABASE=$WRK/../db/$REF\_VCF
60+
GENOPIPE=$WRK/../..
61+
62+
## Execute Single StrainID and record time
63+
cd $GENOPIPE/StrainID
64+
echo "**Begin executing StrainID for ${LOCUS}_${DEPTH}..."
65+
{ time bash identify-Strain.sh -i $TEMP -g $GENOME -v $DATABASE -o $OUTPUT/ID -s $SEED > $OUTPUT/ID/Simulation_$PBS_ARRAYID.std ; } 2> $OUTPUT/ID/Simulation_$PBS_ARRAYID.time
66+
echo "...single StrainID for ${LOCUS} ${DEPTH} finished."
67+
cd $WRK
68+
69+
## Clean-up
70+
rm -r $TEMP

0 commit comments

Comments
 (0)