Skip to content

Commit eba2c2b

Browse files
committed
refactor general scripts for Bowtie2 and update
Fix straggling updates including ones from EpitopeID switch to Bowtie2 for the aligner - refactor update_tagDB.sh utility generate Bowtie2 indexes - update dependency notes in all of identify-*.sh scripts - minor fix to identify-Strain.sh report name generation - adjust genome index file check in identify-Epitope.sh for Bowtie2-named index files - update gitignore with Bowtie2 index filenames - update paper README with extra dependencies
1 parent 236cd49 commit eba2c2b

6 files changed

Lines changed: 26 additions & 12 deletions

File tree

.gitignore

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,21 @@ EpitopeID/sacCer3_EpiID/FASTA_genome/genome.fa.ann
55
EpitopeID/sacCer3_EpiID/FASTA_genome/genome.fa.bwt
66
EpitopeID/sacCer3_EpiID/FASTA_genome/genome.fa.pac
77
EpitopeID/sacCer3_EpiID/FASTA_genome/genome.fa.sa
8+
EpitopeID/sacCer3_EpiID/FASTA_genome/genome.fa.1.bt2
9+
EpitopeID/sacCer3_EpiID/FASTA_genome/genome.fa.2.bt2
10+
EpitopeID/sacCer3_EpiID/FASTA_genome/genome.fa.3.bt2
11+
EpitopeID/sacCer3_EpiID/FASTA_genome/genome.fa.4.bt2
12+
EpitopeID/sacCer3_EpiID/FASTA_genome/genome.fa.rev.1.bt2
13+
EpitopeID/sacCer3_EpiID/FASTA_genome/genome.fa.rev.2.bt2
814
EpitopeID/hg19_EpiID/FASTA_genome/genome.fa
915
EpitopeID/hg19_EpiID/FASTA_genome/genome.fa.amb
1016
EpitopeID/hg19_EpiID/FASTA_genome/genome.fa.ann
1117
EpitopeID/hg19_EpiID/FASTA_genome/genome.fa.bwt
1218
EpitopeID/hg19_EpiID/FASTA_genome/genome.fa.pac
1319
EpitopeID/hg19_EpiID/FASTA_genome/genome.fa.sa
20+
EpitopeID/hg19_EpiID/FASTA_genome/genome.fa.1.bt2
21+
EpitopeID/hg19_EpiID/FASTA_genome/genome.fa.2.bt2
22+
EpitopeID/hg19_EpiID/FASTA_genome/genome.fa.3.bt2
23+
EpitopeID/hg19_EpiID/FASTA_genome/genome.fa.4.bt2
24+
EpitopeID/hg19_EpiID/FASTA_genome/genome.fa.rev.1.bt2
25+
EpitopeID/hg19_EpiID/FASTA_genome/genome.fa.rev.2.bt2

DeletionID/identify-Deletion.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22

33
# Required software:
4-
# python v2.15 with scipy
4+
# python3 with scipy
55

66
usage()
77
{

EpitopeID/identify-Epitope.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# samtools v1.7+
66
# bedtools v2.26+
77
# perl5
8-
# python v2.15 with scipy
8+
# python3 with scipy
99
# GNU grep (BSD grep on MacOSX is >10X slower)
1010

1111
# Unused Software (no-longer required)
@@ -73,15 +73,15 @@ fi
7373

7474
# Check if ALL_TAG.fa aligner index exists, creates if it doesn't
7575
#if [ ! -f $DATABASE/FASTA_tag/ALL_TAG.fa.amb ] || [ ! -f $DATABASE/FASTA_tag/ALL_TAG.fa.ann ] || [ ! -f $DATABASE/FASTA_tag/ALL_TAG.fa.bwt ] || [ ! -f $DATABASE/FASTA_tag/ALL_TAG.fa.pac ] || [ ! -f $DATABASE/FASTA_tag/ALL_TAG.fa.sa ]; then
76-
if [ ! -f $DATABASE/FASTA_tag/ALL_TAG.fa.1.bt2 ] || [ ! -f $DATABASE/FASTA_tag/ALL_TAG.fa.rev.1.bt2 ]; then
76+
if [ ! -f $DATABASE/FASTA_tag/ALL_TAG.fa.1.bt2 ] || [ ! -f $DATABASE/FASTA_tag/ALL_TAG.fa.2.bt2 ] || [ ! -f $DATABASE/FASTA_tag/ALL_TAG.fa.3.bt2 ] || [ ! -f $DATABASE/FASTA_tag/ALL_TAG.fa.4.bt2 ] || [ ! -f $DATABASE/FASTA_tag/ALL_TAG.fa.rev.1.bt2 ] || [ ! -f $DATABASE/FASTA_tag/ALL_TAG.fa.rev.2.bt2 ]; then
7777
echo "Building TAG index..."
7878
#bwa index $DATABASE/FASTA_tag/ALL_TAG.fa
7979
bowtie2-build $DATABASE/FASTA_tag/ALL_TAG.fa $DATABASE/FASTA_tag/ALL_TAG.fa
8080
fi
8181

8282
# Check if genome.fa aligner index exists, creates if it doesn't
8383
#if [ ! -f $DATABASE/FASTA_genome/genome.fa.amb ] || [ ! -f $DATABASE/FASTA_genome/genome.fa.ann ] || [ ! -f $DATABASE/FASTA_genome/genome.fa.bwt ] || [ ! -f $DATABASE/FASTA_genome/genome.fa.pac ] || [ ! -f $DATABASE/FASTA_genome/genome.fa.sa ]; then
84-
if [ ! -f $DATABASE/FASTA_genome/genome.fa.1.bt2 ] || [ ! -f $DATABASE/FASTA_genome/genome.fa.rev.1.bt2 ]; then
84+
if [ ! -f $DATABASE/FASTA_genome/genome.fa.1.bt2 ] || [ ! -f $DATABASE/FASTA_genome/genome.fa.2.bt2 ] || [ ! -f $DATABASE/FASTA_genome/genome.fa.3.bt2 ] || [ ! -f $DATABASE/FASTA_genome/genome.fa.4.bt2 ] || [ ! -f $DATABASE/FASTA_genome/genome.fa.rev.1.bt2 ] || [ ! -f $DATABASE/FASTA_genome/genome.fa.rev.2.bt2 ]; then
8585
echo "Building ORF index..."
8686
#bwa index $DATABASE/FASTA_genome/genome.fa
8787
bowtie2-build $DATABASE/FASTA_genome/genome.fa $DATABASE/FASTA_genome/genome.fa

EpitopeID/utility_scripts/update_tagDB.sh

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,11 @@
44
# and creates a bwa index. The resulting files should be moved into the appropriate /pwd/tag_FASTA/ folder
55

66
# Required software:
7-
# BWA v0.7.14+
7+
# Bowtie2 v2.2.5+
88

99
# Remove existing master tag file and bwa indexes if they exist
1010
rm -f ALL_TAG.fa*
1111
# Concatenate various posible FASTA files into master index
1212
cat *.fa *.fna *.ffn *.fasta > ALL_TAG.fa
13-
# BWA index command
14-
bwa index ALL_TAG.fa
15-
16-
13+
# Bowtie2 index command
14+
bowtie2-build ALL_TAG.fa ALL_TAG.fa

StrainID/identify-Strain.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22

33
# Required software:
4-
# python v2.15 with pysam
4+
# python3 with pysam
55

66
usage()
77
{
@@ -55,7 +55,7 @@ cd $INPUT
5555
for BAM in *.bam
5656
do
5757

58-
SAMPLE=`basename $BAM`
58+
SAMPLE=`basename $BAM ".bam"`
5959
echo $SAMPLE
6060

6161
if [[ $SEED -eq "" ]]; then

paper/README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,11 @@ paper
2323

2424
These scripts were built to run on a linux server with a PBS job scheduler set up and some of the dependencies installed using some environmental modules and a conda environment for remaining dependencies. You may need to modify these scripts to account for different server setup and configurations.
2525

26-
See the [GenoPipe documentation](https://pughlab.mbg.cornell.edu/GenoPipe-docs/) for a list of dependencies needed to run these publication-associated scripts. In addition to these dependencies, you will also need to instal [seqtk](https://github.com/lh3/seqtk).
26+
See the [GenoPipe documentation](https://pughlab.mbg.cornell.edu/GenoPipe-docs/) for a list of dependencies needed to run these publication-associated scripts. In addition to these dependencies, you will also need to install the following:
27+
28+
* [seqtk](https://github.com/lh3/seqtk).
29+
* sra-toolkit (fastq-dump)
30+
* wget
2731

2832
## setup.sh
2933
Runs the scripts to download and format the yeast and human genomes and other reference files for aligning the data

0 commit comments

Comments
 (0)