File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ run.setup.err
2+ run.setup.out
13input /hg19.fa *
24input /sacCer3.fa *
35db /
@@ -11,6 +13,10 @@ ENCODEdata-eGFP/logs/*.out-*
1113ENCODEdata-eGFP /logs /* .err- *
1214ENCODEdata-eGFP /results /FASTQ
1315ENCODEdata-eGFP /results /ID
16+ HIV_samples /logs /* .err- *
17+ HIV_samples /logs /* .out- *
18+ HIV_samples /results /FASTQ
19+ HIV_samples /results /ID
1420SyntheticDeletion /synthetic_genome /
1521SyntheticDeletion /logs /* .err- *
1622SyntheticDeletion /logs /* .out- *
@@ -29,3 +35,8 @@ SyntheticStrain/results/hg19*
2935ENCODE_CellLines /results /BAM
3036ENCODE_CellLines /results /BAM-nospike
3137ENCODE_CellLines /results /ID
38+ CENPK-chipseq /logs /* .out
39+ CENPK-chipseq /logs /* .err
40+ CENPK-chipseq /results /FASTQ
41+ CENPK-chipseq /results /BAM
42+ CENPK-chipseq /results /ID
Original file line number Diff line number Diff line change 1+ # Run StrainID on CENPK datasets to evaluate StrainID's ability to detect the variant-based strain background
2+
3+ # "Integration of multiple nutrient cues and regulation of lifespan by ribosomal transcription factor Ifh1"
4+ # (Cai et al, 2013)
5+
6+ # GEO accession: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE39147
7+
8+ # The default sacCer3 StrainID database is used
9+ # Download data using SRA accessions using `job/00_download_data.pbs`
10+ # Align FASTQ files and process using `job/01_align_data.pbs`
11+ # Run StrainID on BAM inputs using `job/02_run_StrainID.pbs` to determine if StrainID can successfully identify the strain background
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+ # PBS -l nodes=1:ppn=8
3+ # PBS -l pmem=32gb
4+ # PBS -l walltime=06:00:00
5+ # PBS -A open
6+ # PBS -o logs/download.data.log.out
7+ # PBS -e logs/download.data.log.err
8+
9+ # FIRST CHANGE PATH TO EXECUTE
10+ WRK=/path/to/GenoPipe/paper/CENPK-chipseq
11+ cd $WRK
12+
13+ module load anaconda3
14+ source activate ~ /work/myconda/genopipe/
15+
16+ [ -d logs ] || mkdir logs
17+ [ -d results/FASTQ ] || mkdir -p results/FASTQ
18+
19+ parallel-fastq-dump --gzip --split-files -t 8 -O results/FASTQ -s SRR518875
20+ parallel-fastq-dump --gzip --split-files -t 8 -O results/FASTQ -s SRR518876
21+ parallel-fastq-dump --gzip --split-files -t 8 -O results/FASTQ -s SRR518877
22+ parallel-fastq-dump --gzip --split-files -t 8 -O results/FASTQ -s SRR518878
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+ # PBS -l nodes=1:ppn=8
3+ # PBS -l pmem=32gb
4+ # PBS -l walltime=06:00:00
5+ # PBS -A open
6+ # PBS -o logs/align.data.log.out
7+ # PBS -e logs/align.data.log.err
8+
9+ # FIRST CHANGE PATH TO EXECUTE
10+ WRK=/path/to/GenoPipe/paper/CENPK-chipseq
11+ cd $WRK
12+
13+ module load gcc
14+ module load samtools
15+ module load bwa
16+ module load anaconda3
17+ source activate ~ /work/myconda/genopipe/
18+
19+ [ -d logs ] || mkdir logs
20+ [ -d results/BAM ] || mkdir -p results/BAM
21+
22+ YGENOME=$WRK /../input/sacCer3.fa
23+
24+ for SRR in " SRR518875" " SRR518876" " SRR518877" " SRR518878" ;
25+ do
26+ FQ=$WRK /results/FASTQ/$SRR
27+ BAM=$WRK /results/BAM/$SRR
28+ # align
29+ bwa mem $YGENOME $FQ \_ 1.fastq.gz -t 8 \
30+ | samtools sort \
31+ > $BAM .bam
32+ # index
33+ samtools index $BAM .bam
34+ done
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+ # PBS -l nodes=1:ppn=4
3+ # PBS -l pmem=16gb
4+ # PBS -l walltime=03:00:00
5+ # PBS -A open
6+ # PBS -o logs/sid.cenpk.chip.log.out
7+ # PBS -e logs/sid.cenpk.chip.log.err
8+
9+ # FIRST CHANGE PATH TO EXECUTE
10+ WRK=/path/to/GenoPipe/paper/CENPK-chipseq
11+ cd $WRK
12+
13+ module load gcc
14+ module load samtools
15+ module load bwa
16+ module load anaconda3
17+ source activate ~ /work/myconda/genopipe
18+
19+ [ -d logs ] || mkdir logs
20+ [ -d results/ID ] || mkdir -p results/ID
21+
22+ DB=$WRK /../db/sacCer3_VCF
23+ GENOME=$WRK /../input/sacCer3.fa
24+
25+ STRAINID=$WRK /../../StrainID
26+ cd $STRAINID
27+ bash identify-Strain.sh -i $WRK /results/BAM -g $GENOME -o $WRK /results/ID/ -v $DB
Original file line number Diff line number Diff line change 1+ # logfiles from STDERR and STDOUT of running job files go here
Original file line number Diff line number Diff line change 1+ # Downloaded FASTQ files and StrainID results go here
Original file line number Diff line number Diff line change 1+ # Run EpitopeID on HIV datasets to evaluate EpitopeID's ability to detect HIV genome insertions
2+
3+ # "Benzotriazoles Reactivate Latent HIV-1 through Inactivation of STAT5 SUMOylation"
4+ # (Bosque et al, 2017)
5+
6+ # GEO accession: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE84199
7+ # HIV genome: https://www.ncbi.nlm.nih.gov/nuccore/AF324493
8+
9+ # EpitopeID database with HIV genome as a tag and hg19 genome as genomic sequence is setup with `../setup.sh`
10+ # Download data using SRA accessions using `job/00_download_data.pbs`
11+ # Run EpitopeID on FASTQ inputs using `job/01_run_EpitopeID.pbs` to determine if EpitopeID can localize HIV genome insertions
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+ # PBS -l nodes=1:ppn=8
3+ # PBS -l pmem=32gb
4+ # PBS -l walltime=03:00:00
5+ # PBS -A open
6+ # PBS -o logs/download.data.log.out
7+ # PBS -e logs/download.data.log.err
8+
9+ # Requires
10+ # parallel fastq dump v2.8.0
11+
12+ WRK=/path/to/GenoPipe/paper/HIV_samples
13+ cd $WRK
14+
15+ module load anaconda3
16+ source activate ~ /work/myconda/genopipe/
17+
18+ [ -d results/FASTQ ] || mkdir -p results/FASTQ
19+ [ -d logs ] || mkdir logs
20+
21+ parallel-fastq-dump --gzip --split-files -t 4 -O results/FASTQ -s SRR3812124
22+ parallel-fastq-dump --gzip --split-files -t 4 -O results/FASTQ -s SRR3812125
23+ parallel-fastq-dump --gzip --split-files -t 4 -O results/FASTQ -s SRR3812126
24+ parallel-fastq-dump --gzip --split-files -t 4 -O results/FASTQ -s SRR3812127
25+ parallel-fastq-dump --gzip --split-files -t 4 -O results/FASTQ -s SRR3812128
26+ parallel-fastq-dump --gzip --split-files -t 4 -O results/FASTQ -s SRR3812129
27+ mv * .fastq.gz results/FASTQ/
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+ # PBS -l nodes=1:ppn=4
3+ # PBS -l pmem=16gb
4+ # PBS -l walltime=03:00:00
5+ # PBS -A open
6+ # PBS -o logs/eid.hiv.log.out
7+ # PBS -e logs/eid.hiv.log.err
8+
9+ module load gcc
10+ module load samtools
11+ module load bwa
12+ module load bedtools
13+ module load anaconda3
14+ source activate genopipe
15+
16+ WRK=/path/to/GenoPipe/paper/HIV_samples
17+ cd $WRK
18+
19+ [ -d logs ] || mkdir logs
20+ [ -d results/ID ] || mkdir -p results/ID
21+
22+ DB=$WRK /../db/hiv_EpiDB
23+
24+ EPITOPEID=$WRK /../../EpitopeID
25+ cd $EPITOPEID
26+ bash identify-Epitope.sh -i $WRK /results/FASTQ/ -o $WRK /results/ID/ -d $DB -t 4
You can’t perform that action at this time.
0 commit comments