|
| 1 | +#!/bin/bash |
| 2 | +#PBS -l nodes=1:ppn=4 |
| 3 | +#PBS -l pmem=16gb |
| 4 | +#PBS -l walltime=01:00:00 |
| 5 | +#PBS -A open |
| 6 | +#PBS -o logs/align-picard.log.out |
| 7 | +#PBS -e logs/align-picard.log.err |
| 8 | +#PBS -t 1-9010 |
| 9 | + |
| 10 | +module load gcc/8.3.1 |
| 11 | +module load samtools/1.5 |
| 12 | +module load bwa/0.7.15 |
| 13 | +module load picard/2.20.8 |
| 14 | + |
| 15 | +WRK=/path/to/GenoPipe/paper/YKOC-wgs |
| 16 | +cd $WRK |
| 17 | + |
| 18 | +[ -d logs ] || mkdir logs |
| 19 | +[ -d results/BAM ] || mkdir results/BAM |
| 20 | + |
| 21 | +GENOME=../input/sacCer3.fa |
| 22 | +METADATA=210403_PRJEB27160_accessions.txt |
| 23 | +INDEX=$(($PBS_ARRAYID+1)) |
| 24 | + |
| 25 | +INFO=`sed "${INDEX}q;d" $METADATA` |
| 26 | +ERR=`echo $INFO | awk '{print $4}'` |
| 27 | +ERS=`echo $INFO | awk '{print $2}'` |
| 28 | +FQ1=results/FASTQ/$ERR/$ERR\_1.fastq.gz |
| 29 | +FQ2=results/FASTQ/$ERR/$ERR\_2.fastq.gz |
| 30 | +BAM=$WRK/results/BAM/$ERS |
| 31 | +#echo $INFO |
| 32 | + |
| 33 | +start=`date +%s` |
| 34 | +# Align with Pugh Lab standard alignment pipeline parameters |
| 35 | +# -T INT Don't output alignments with score lower than INT. This option affects output and occsaionally SAM flag 2. |
| 36 | +# -h INT If query has not more than INT hits with score higher than 880% of the best hit, output them all in the XA tag. |
| 37 | +# -M Mark shorter split hits as secondary (for Picard compatibility). |
| 38 | +echo "(${PBS_ARRAYID}) Aligning $ERR fastq files..." |
| 39 | +bwa mem -v 1 -T '30' -h '5' -t 4 -M $GENOME $FQ1 $FQ2 > $BAM.unsorted.bam |
| 40 | +# Sort |
| 41 | +echo "(${PBS_ARRAYID}) Sorting $ERR ..." |
| 42 | +samtools sort $BAM.unsorted.bam > $BAM.unmarked.bam |
| 43 | +# Mark duplicates |
| 44 | +echo "(${PBS_ARRAYID}) Marking duplicates $ERR ..." |
| 45 | +picard MarkDuplicates \ |
| 46 | + INPUT=$BAM.unmarked.bam \ |
| 47 | + OUTPUT=$BAM.marked.bam \ |
| 48 | + METRICS_FILE=$BAM.metrics.txt \ |
| 49 | + REMOVE_DUPLICATES='false' ASSUME_SORTED='true' \ |
| 50 | + DUPLICATE_SCORING_STRATEGY='SUM_OF_BASE_QUALITIES' \ |
| 51 | + #READ_NAME_REGEX='[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*.' \ |
| 52 | + #OPTICAL_DUPLICATE_PIXEL_DISTANCE='100' \ |
| 53 | + #VALIDATION_STRINGENCY='LENIENT' #VERBOSITY=ERROR |
| 54 | +# Filter duplicates |
| 55 | +echo "(${PBS_ARRAYID}) Filter duplicates $ERR ..." |
| 56 | +samtools view -h -b -f 0x1 -F 0x404 \ |
| 57 | + -o $BAM.bam \ |
| 58 | + $BAM.marked.bam |
| 59 | +# Index |
| 60 | +echo "(${PBS_ARRAYID}) Index $ERR ..." |
| 61 | +samtools index $BAM.bam |
| 62 | +end=`date +%s` |
| 63 | +runtime=$((end-start)) |
| 64 | +echo "...alignment and indexing for ($PBS_ARRAYID) $ERR.fq > $ERS.bam finished in ${runtime}" |
| 65 | +# Clean-up |
| 66 | +rm $BAM.unsorted.bam $BAM.unmarked.bam $BAM.marked.bam |
0 commit comments