-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate_genome_index.sh
More file actions
executable file
·53 lines (45 loc) · 2.28 KB
/
create_genome_index.sh
File metadata and controls
executable file
·53 lines (45 loc) · 2.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/bin/bash
# This script generates genome index according to the STAR manual.
# Please note that the genome index creation (especially of large genomes, such as the human)
# requires lots of RAM - This task together with the annotaions file was not be able to fininsh
# on a standard laptop with 30 GB RAM and 16 cores.
# Input file recommendations:
# FASTA: Ensembl "Primary Assembly" FASTA (e.g., Homo_sapiens.GRCh38.dna.primary_assembly.fa)
# GTF: Matching Ensembl GTF (e.g., Homo_sapiens.GRCh38.113.gtf)
# Set parameters GENOME, ANNOTATIONS (uncomment), and OVERHANG (uncomment) below
# according to your genome, annotations file, and the length of your reads.
# --sjdbOverhang equals to the maximum read length you plan to map-1.
if [ "$1" != "local" ] && [ "$1" != "hpc" ]; then
echo "Specify either 'local' or 'hpc' as an argument to the script, ex., $0 local"
exit 1
fi
DATA_DIR=/data
GENOME_DIR=/data/genome
GENOME=genome.fa
ANNOTATION=genome.gtf
OVERHANG=149
mkdir -p .${GENOME_DIR}/genome_index
echo "Creating genome STAR index ..."
# -w - setting working directory to ensure STAR writes logs and other temporary files
# within the mounted volume
if [ "$1" == "local" ] && command -v docker &> /dev/null; then
echo "Docker found. Running with Docker..."
docker run --rm -w ${GENOME_DIR}/genome_index -v .${DATA_DIR}:${DATA_DIR} community.wave.seqera.io/library/star:2.7.10b--90133b03b1960405 STAR \
--runThreadN 8 \
--runMode genomeGenerate \
--genomeDir ${GENOME_DIR}/genome_index \
--genomeFastaFiles ${GENOME_DIR}/${GENOME} \
--sjdbGTFfile ${GENOME_DIR}/${ANNOTATION} \
--sjdbOverhang ${OVERHANG}
elif [ "$1" == "hpc" ] && command -v singularity &> /dev/null; then
echo "Singularity found. Running with Singularity..."
singularity exec --pwd ${GENOME_DIR}/genome_index --bind .${DATA_DIR}:${DATA_DIR} docker://community.wave.seqera.io/library/star:2.7.10b--90133b03b1960405 STAR \
--runThreadN 8 \
--runMode genomeGenerate \
--genomeDir ${GENOME_DIR}/genome_index \
--genomeFastaFiles ${GENOME_DIR}/${GENOME} \
--sjdbGTFfile ${GENOME_DIR}/${ANNOTATION} \
--sjdbOverhang ${OVERHANG}
else
echo "No supported containerization tool matching argument '$1' was found in the environment."
fi