Skip to content

Commit df17e26

Browse files
committed
add PBS scripts-run StrainID on ENCODE data
This PBS script runs StrainID on the ENCODE BAM files that were filtered of spike-in reads, sample-by-sample. The execution time and stdout information is collected along with the actual StrainID report.
1 parent d46b19e commit df17e26

2 files changed

Lines changed: 68 additions & 0 deletions

File tree

paper/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,4 @@ SyntheticStrain/results/sacCer3*
2828
SyntheticStrain/results/hg19*
2929
ENCODE_CellLines/results/BAM
3030
ENCODE_CellLines/results/BAM-nospike
31+
ENCODE_CellLines/results/ID
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=6
3+
#PBS -l pmem=24gb
4+
#PBS -l walltime=01:00:00
5+
#PBS -A open
6+
#PBS -o logs/sid.log.out
7+
#PBS -e logs/sid.log.err
8+
#PBS -t 1-14260
9+
10+
module load gcc/8.3.1
11+
module load bedtools/2.27.1
12+
module load bwa/0.7.15
13+
module load samtools/1.5
14+
module load anaconda3
15+
source activate genopipe
16+
17+
WRK=/path/to/GenoPipe/paper/ENCODE-CellLines
18+
cd $WRK
19+
20+
# Store directory paths
21+
DATABASE=$WRK/../db/hg19_VCF
22+
GENOME=$WRK/../input/hg19.fa
23+
SEED=$PBS_ARRAYID
24+
GENOPIPE=$WRK/../..
25+
BAM=$WRK/results/BAM-nospike
26+
ID=$WRK/results/ID
27+
28+
[ -d logs ] || mkdir logs
29+
[ -d $ID ] || mkdir -p $ID
30+
31+
# Parse metadata
32+
METADATA=210512_sample_metadata.txt
33+
INFO=`sed "${PBS_ARRAYID}q;d" $METADATA`
34+
ENCFF=`echo $INFO | awk '{print $1}'`
35+
#echo $INFO
36+
37+
#Check that BAM file was generated first
38+
if [ ! -f $BAM/$ENCFF.bam ];
39+
then
40+
echo "BAM input for $BAM/$ENCFF does not exist. Exiting."
41+
exit
42+
fi
43+
44+
#Check that BAM Index file exists
45+
if [ ! -f $BAM/$ENCFF.bam.bai ];
46+
then
47+
echo "BAI missing for for $ENCFF. Exiting."
48+
exit
49+
fi
50+
51+
# Set-up Temp directory
52+
TEMP=$WRK/temp-$PBS_ARRAYID
53+
[ -d $TEMP ] || mkdir $TEMP
54+
cd $TEMP
55+
echo $BAM
56+
ln -s $BAM/$ENCFF.bam
57+
ln -s $BAM/$ENCFF.bam.bai
58+
59+
## Execute Single StrainID and record time
60+
cd $GENOPIPE/StrainID
61+
echo "**Begin executing StrainID for ${ENCFF}..."
62+
{ time bash identify-Strain.sh -i $TEMP -g $GENOME -v $DATABASE -s $SEED -o $ID > $ID/$ENCFF.std ; } 2> $ID/$ENCFF.time
63+
echo "...single StrainID for ($PBS_ARRAYID) ${ENCFF} finished."
64+
cd $WRK
65+
66+
## Clean-up
67+
rm -r $TEMP

0 commit comments

Comments
 (0)