-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathSnakefile
More file actions
112 lines (94 loc) · 3.99 KB
/
Snakefile
File metadata and controls
112 lines (94 loc) · 3.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
configfile: "config.yaml"
include: "src/snakefiles/datacollect.snakefile"
include: "src/snakefiles/anatomy.snakefile"
include: "src/snakefiles/cell_line.snakefile"
include: "src/snakefiles/gene.snakefile"
include: "src/snakefiles/protein.snakefile"
include: "src/snakefiles/geneprotein.snakefile"
include: "src/snakefiles/diseasephenotype.snakefile"
include: "src/snakefiles/process.snakefile"
include: "src/snakefiles/chemical.snakefile"
include: "src/snakefiles/drugchemical.snakefile"
include: "src/snakefiles/taxon.snakefile"
include: "src/snakefiles/genefamily.snakefile"
include: "src/snakefiles/leftover_umls.snakefile"
include: "src/snakefiles/macromolecular_complex.snakefile"
include: "src/snakefiles/publications.snakefile"
include: "src/snakefiles/duckdb.snakefile"
include: "src/snakefiles/reports.snakefile"
include: "src/snakefiles/exports.snakefile"
# Some general imports.
import shutil
from src.snakefiles.util import write_done
# Some global settings.
import os
os.environ["TMPDIR"] = config["tmp_directory"]
# Trivial done-marker rules and destructive cleanup rules run locally so they don't consume a SLURM slot.
localrules:
all,
all_outputs,
clean_compendia,
clean_downloads,
# Top-level rules.
rule all:
input:
# See rule all_outputs later in this file for how we generate all the outputs.
config["output_directory"] + "/reports/outputs_done",
# reports_done are generated by the rules in src/snakefiles/
config["output_directory"] + "/reports/reports_done",
# Build all the Parquet files.
config["output_directory"] + "/duckdb/done",
# Build all the DuckDB (index-wide) reports.
config["output_directory"] + "/reports/duckdb/done",
# Build all the exports.
config["output_directory"] + "/kgx/done",
config["output_directory"] + "/sapbert-training-data/done",
# Store the config.yaml file used to produce the output.
config_file="config.yaml",
output:
x=config["output_directory"] + "/reports/all_done",
output_config_file=config["output_directory"] + "/config.yaml",
run:
shutil.copyfile(input.config_file, output.output_config_file)
write_done(output.x)
rule all_outputs:
input:
config["output_directory"] + "/reports/anatomy_done",
config["output_directory"] + "/reports/chemicals_done",
config["output_directory"] + "/reports/disease_done",
config["output_directory"] + "/reports/gene_done",
config["output_directory"] + "/reports/genefamily_done",
config["output_directory"] + "/reports/geneprotein_done",
config["output_directory"] + "/reports/process_done",
config["output_directory"] + "/reports/protein_done",
config["output_directory"] + "/reports/taxon_done",
config["output_directory"] + "/reports/cell_line_done",
config["output_directory"] + "/reports/umls_done",
config["output_directory"] + "/reports/macromolecular_complex_done",
config["output_directory"] + "/reports/drugchemical_done",
config["output_directory"] + "/reports/publications_done",
output:
x=config["output_directory"] + "/reports/outputs_done",
run:
write_done(output.x)
rule clean_compendia:
params:
dir=config["output_directory"],
shell:
"rm {params.dir}/compendia/*; rm {params.dir}/synonyms/*"
rule clean_downloads:
params:
dir=config["download_directory"],
shell:
"rm -rf {params.dir}/*"
# Sometimes a synonyms file is available as a .gz file, but not as the .txt file itself.
# This rule is here so that Snakemake knows how to uncompress it if needed.
rule uncompress_synonym_file:
input:
config["output_directory"] + "/synonyms/{synonym_file}.txt.gz",
output:
config["output_directory"] + "/synonyms/{synonym_file}.txt",
benchmark:
config["output_directory"] + "/benchmarks/uncompress_synonym_file_{synonym_file}.tsv"
shell:
"gunzip {input} -c > {output}"