Skip to content

Commit da96f87

Browse files
committed
scripts to align BY4742 data
This commit includes scripts and setup needed to align the BY4742 ChIPseq data which includes some ABI SOLiD data. `paper/setup.sh` was updated to create a bowtie colorspace index of sacCer3 for the ABI samples `paper/.gitignore` was updated to include the BAM alignment output `paper/BY4742-chipseq/job01_align_data.pbs` is the PBS script to call either BWA or bowtie (as appropriate for sequencing platform used) to align the raw FASTQ data.
1 parent ccc43a6 commit da96f87

3 files changed

Lines changed: 65 additions & 0 deletions

File tree

paper/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ ENCODE_CellLines/results/ID
3838
BY4742-chipseq/logs/*.out
3939
BY4742-chipseq/logs/*.err
4040
BY4742-chipseq/results/FASTQ
41+
BY4742-chipseq/results/BAM
4142
CENPK-chipseq/logs/*.out
4243
CENPK-chipseq/logs/*.err
4344
CENPK-chipseq/results/FASTQ
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#!/bin/bash
2+
#PBS -l nodes=1:ppn=4
3+
#PBS -l pmem=16gb
4+
#PBS -l walltime=02:00:00
5+
#PBS -A open
6+
#PBS -o logs/align.data.log.out
7+
#PBS -e logs/align.data.log.err
8+
#PBS -t 1-10
9+
10+
module load gcc
11+
module load samtools
12+
module load bwa
13+
module load anaconda3
14+
source activate ~/work/myconda/genopipe/
15+
16+
# FIRST CHANGE PATH TO EXECUTE
17+
WRK=/path/to/GenoPipe/paper/BY4742-chipseq
18+
cd $WRK
19+
20+
[ -d logs ] || mkdir logs
21+
[ -d results/BAM ] || mkdir -p results/BAM
22+
[ -d results/uniq-BAM ] || mkdir -p results/uniq-BAM
23+
24+
YGENOME=$WRK/../input/sacCer3.fa
25+
CSGENOME=$WRK/../input/sacCer3_index
26+
27+
INDEX=$(($PBS_ARRAYID+1))
28+
29+
METADATA=SraRunInfo.csv
30+
INFO=`sed "${INDEX}q;d" $METADATA`
31+
SRR=`echo $INFO | cut -d"," -f1`
32+
SAMPLE=`echo $INFO | cut -d"," -f12`
33+
PLATFORM=`echo $INFO | cut -d"," -f19`
34+
#PAIR=`echo $INFO | cut -d"," -f16`
35+
#echo $INFO
36+
37+
FQ=$WRK/results/FASTQ/$SRR
38+
BAM=$WRK/results/BAM/$SAMPLE
39+
40+
echo "($PBS_ARRAYID) Aligned $SRR $PLATFORM reads > $BAM"
41+
if [[ " $PLATFORM " =~ " ABI_SOLID " ]]; then
42+
bowtie -C -S $CSGENOME <(gzip -dc $YGENOME $FQ\_1.fastq.gz) \
43+
| samtools sort \
44+
> $BAM.bam
45+
echo "(PBS_ARRAYID) $BAM single aligned (bowtie color space)"
46+
elif [[ " $PLATFORM " =~ " ILLUMINA " ]]; then
47+
bwa mem $YGENOME $FQ\_1.fastq.gz $FQ\_2.fastq.gz -t 4 \
48+
| samtools sort \
49+
> $BAM.bam
50+
echo "($PBS_ARRAYID) $BAM pair aligned (BWA)"
51+
fi
52+
53+
#samtools view -b -F4 $BAM > $WRK/results/uniq-BAM/$SAMPLE.bam
54+
55+
echo "($PBS_ARRAYID) Indexing..."
56+
samtools index $BAM.bam
57+
echo "($PBS_ARRAYID) Complete!"

paper/setup.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313

1414
# Required software:
1515
# wget
16+
# Python 3
1617
# Perl 5.18+
1718
# bwa v0.7.14+
19+
# bowtie v1.2.3
1820
#
1921
# Optional software:
2022
# twoBitToFa
@@ -149,3 +151,8 @@ cd $WRK/db
149151
ln -s ../../StrainID/sacCer3_VCF
150152
ln -s ../../StrainID/hg19_VCF
151153
cd $WRK
154+
155+
# Setup color-space index for yeast genome
156+
# (used by BY4742-chipseq)
157+
bowtie-build -C input/sacCer3.fa input/sacCer3_index
158+

0 commit comments

Comments
 (0)