Skip to content

Commit 2fdf72f

Browse files
bschilderclaude
andcommitted
Add SpliceAI and DeepLearning modules
SpliceAI (6 functions): run model, query precomputed VCF/TSV files, post-process predictions, multi-panel visualization. DeepLearning (5 functions): query Dey et al. deep learning annotations (Basenji, DeepSEA, etc.), melt by SNP groups, violin/boxplot viz. Note: original Alkes Price lab LDSCORE hosting is offline (404). Data now at Google Cloud requester-pays bucket: https://console.cloud.google.com/storage/browser/broad-alkesgroup-public-requester-pays R CMD check: 0 errors, 0 non-boilerplate notes. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5d96643 commit 2fdf72f

20 files changed

Lines changed: 1786 additions & 5 deletions

DESCRIPTION

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ Authors@R:
2121
comment = c(ORCID = "0000-0002-9355-5704"))
2222
)
2323
Description: Echoverse module: API access to variant-level AI/ML predictions
24-
including IMPACT (Inference and Modeling of Phenotype-related ACtive
25-
Transcription) for immune cell annotations. Part of the echoverse suite
26-
for genomic fine-mapping.
24+
including IMPACT (immune cell TF binding), SpliceAI (splice site
25+
predictions), and deep learning annotations (Basenji, DeepSEA).
26+
Part of the echoverse suite for genomic fine-mapping.
2727
URL: https://github.com/RajLabMSSM/echoAI
2828
BugReports: https://github.com/RajLabMSSM/echoAI/issues
2929
Encoding: UTF-8
@@ -38,6 +38,8 @@ Imports:
3838
stats,
3939
utils,
4040
methods,
41+
parallel,
42+
tidyr,
4143
ggplot2
4244
Suggests:
4345
markdown,
@@ -57,8 +59,7 @@ Suggests:
5759
shades,
5860
reshape2,
5961
stringr,
60-
R.utils,
61-
tidyr
62+
R.utils
6263
Remotes:
6364
github::RajLabMSSM/echodata,
6465
github::RajLabMSSM/echotabix,

NAMESPACE

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
export(DEEPLEARNING_melt)
4+
export(DEEPLEARNING_plot)
5+
export(DEEPLEARNING_query)
6+
export(DEEPLEARNING_query_multi_chr)
7+
export(DEEPLEARNING_query_one_chr)
38
export(IMPACT_compute_enrichment)
49
export(IMPACT_get_annotation_key)
510
export(IMPACT_get_annotations)
@@ -13,6 +18,12 @@ export(IMPACT_plot_impact_score)
1318
export(IMPACT_postprocess_annotations)
1419
export(IMPACT_query)
1520
export(IMPACT_snp_group_boxplot)
21+
export(SPLICEAI_plot)
22+
export(SPLICEAI_query_tsv)
23+
export(SPLICEAI_query_tsv_iterate)
24+
export(SPLICEAI_query_vcf)
25+
export(SPLICEAI_run)
26+
export(SPLICEAI_snp_probs)
1627
importFrom(data.table,":=")
1728
importFrom(data.table,data.table)
1829
importFrom(data.table,dcast)
@@ -22,17 +33,20 @@ importFrom(data.table,melt.data.table)
2233
importFrom(data.table,merge.data.table)
2334
importFrom(data.table,rbindlist)
2435
importFrom(downloadR,zenodo_upload)
36+
importFrom(dplyr,any_of)
2537
importFrom(dplyr,arrange)
2638
importFrom(dplyr,group_by)
2739
importFrom(dplyr,mutate)
2840
importFrom(dplyr,mutate_at)
2941
importFrom(dplyr,n_distinct)
42+
importFrom(dplyr,rename)
3043
importFrom(dplyr,select)
3144
importFrom(dplyr,slice)
3245
importFrom(dplyr,slice_head)
3346
importFrom(dplyr,summarise)
3447
importFrom(dplyr,summarise_at)
3548
importFrom(dplyr,top_n)
49+
importFrom(dplyr,vars)
3650
importFrom(echodata,find_consensus_snps_no_polyfun)
3751
importFrom(echodata,snp_group_colorDict)
3852
importFrom(echodata,snp_group_filters)
@@ -41,6 +55,7 @@ importFrom(echotabix,convert)
4155
importFrom(echotabix,query)
4256
importFrom(ggplot2,aes)
4357
importFrom(ggplot2,element_blank)
58+
importFrom(ggplot2,element_rect)
4459
importFrom(ggplot2,element_text)
4560
importFrom(ggplot2,facet_grid)
4661
importFrom(ggplot2,geom_boxplot)
@@ -58,8 +73,13 @@ importFrom(ggplot2,scale_color_viridis_c)
5873
importFrom(ggplot2,scale_fill_manual)
5974
importFrom(ggplot2,theme)
6075
importFrom(ggplot2,theme_bw)
76+
importFrom(ggplot2,theme_classic)
6177
importFrom(ggplot2,ylim)
78+
importFrom(parallel,mclapply)
79+
importFrom(stats,as.formula)
6280
importFrom(stats,median)
6381
importFrom(stats,setNames)
82+
importFrom(tidyr,replace_na)
83+
importFrom(tidyr,separate)
6484
importFrom(utils,combn)
6585
importFrom(utils,data)

R/DEEPLEARNING_melt.R

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
#' Melt deep learning annotations into long format
2+
#'
3+
#' Aggregates deep learning annotation values by SNP group within each
4+
#' locus, then melts the result into long format and parses annotation
5+
#' column names into component fields (\code{Model}, \code{Tissue},
6+
#' \code{Assay}, \code{Type}, \code{Metric}, \code{SNP_group}).
7+
#'
8+
#' @param ANNOT A \code{data.table} of deep learning annotations as
9+
#' returned by \code{\link{DEEPLEARNING_query}}.
10+
#' @param model Character vector of model names used to identify
11+
#' annotation columns.
12+
#' @param aggregate_func Name of the aggregation function
13+
#' (e.g. \code{"mean"}, \code{"median"}).
14+
#' @param replace_NA Value to substitute for \code{NA} before aggregation.
15+
#' @param replace_negInf Value to substitute for \code{-Inf} (currently
16+
#' unused but reserved for future use).
17+
#' @param save_path File path to save the melted result, or \code{FALSE}
18+
#' to skip saving.
19+
#' @param verbose Print messages.
20+
#'
21+
#' @returns A \code{data.table} in long format with columns \code{Locus},
22+
#' \code{Annotation}, \code{value}, \code{Model}, \code{Tissue},
23+
#' \code{Assay}, \code{Type}, \code{Metric}, and \code{SNP_group}.
24+
#'
25+
#' @export
26+
#' @family DEEPLEARNING
27+
#' @source
28+
#' \url{https://alkesgroup.broadinstitute.org/LDSCORE/DeepLearning/Dey_DeepLearning.tgz}
29+
#' @importFrom data.table data.table melt.data.table
30+
#' @importFrom dplyr group_by summarise_at vars mutate
31+
#' @importFrom tidyr replace_na separate
32+
#' @importFrom echodata snp_group_filters
33+
#' @examples
34+
#' \dontrun{
35+
#' annot_melt <- DEEPLEARNING_melt(
36+
#' ANNOT = ANNOT,
37+
#' aggregate_func = "mean",
38+
#' save_path = "results/deeplearning_snp_groups_mean.csv.gz"
39+
#' )
40+
#' }
41+
DEEPLEARNING_melt <- function(
42+
ANNOT,
43+
model = c("Basenji", "BiClassCNN", "DeepSEA",
44+
"ChromHMM", "Roadmap", "Others"),
45+
aggregate_func = "mean",
46+
replace_NA = NA,
47+
replace_negInf = NA,
48+
save_path = FALSE,
49+
verbose = TRUE) {
50+
51+
Locus <- P <- leadSNP <- ABF.CS <- SUSIE.CS <- NULL
52+
POLYFUN_SUSIE.CS <- FINEMAP.CS <- NULL
53+
Support <- Support_noPF <- NULL
54+
Consensus_SNP <- Consensus_SNP_noPF <- NULL
55+
Annotation <- SNP_group <- NULL
56+
57+
snp_groups_list <- echodata::snp_group_filters()
58+
agg_func <- get(aggregate_func)
59+
60+
annot_melt <- ANNOT |>
61+
dplyr::group_by(Locus) |>
62+
dplyr::summarise_at(
63+
.vars = dplyr::vars(
64+
grep(paste(model, collapse = "|"),
65+
colnames(ANNOT), value = TRUE)
66+
),
67+
.funs = list(
68+
"Random" = ~ agg_func(
69+
tidyr::replace_na(
70+
sample(.x, size = 3, replace = TRUE), replace_NA
71+
), na.rm = TRUE),
72+
"All" = ~ agg_func(
73+
tidyr::replace_na(.x, replace_NA),
74+
na.rm = TRUE),
75+
"GWAS nom. sig." = ~ agg_func(
76+
tidyr::replace_na(.x[P < .05], replace_NA),
77+
na.rm = TRUE),
78+
"GWAS sig." = ~ agg_func(
79+
tidyr::replace_na(.x[P < 5e-8], replace_NA),
80+
na.rm = TRUE),
81+
"GWAS lead" = ~ agg_func(
82+
tidyr::replace_na(.x[leadSNP], replace_NA),
83+
na.rm = TRUE),
84+
"ABF CS" = ~ agg_func(
85+
tidyr::replace_na(.x[ABF.CS > 0], replace_NA),
86+
na.rm = TRUE),
87+
"SUSIE CS" = ~ agg_func(
88+
tidyr::replace_na(.x[SUSIE.CS > 0], replace_NA),
89+
na.rm = TRUE),
90+
"POLYFUN-SUSIE CS" = ~ agg_func(
91+
tidyr::replace_na(
92+
.x[POLYFUN_SUSIE.CS > 0], replace_NA
93+
), na.rm = TRUE),
94+
"FINEMAP CS" = ~ agg_func(
95+
tidyr::replace_na(.x[FINEMAP.CS > 0], replace_NA),
96+
na.rm = TRUE),
97+
"UCS (-PolyFun)" = ~ agg_func(
98+
tidyr::replace_na(.x[Support_noPF > 0], replace_NA),
99+
na.rm = TRUE),
100+
"UCS" = ~ agg_func(
101+
tidyr::replace_na(.x[Support > 0], replace_NA),
102+
na.rm = TRUE),
103+
"Support==0" = ~ agg_func(
104+
tidyr::replace_na(.x[Support == 0], replace_NA),
105+
na.rm = TRUE),
106+
"Support==1" = ~ agg_func(
107+
tidyr::replace_na(.x[Support == 1], replace_NA),
108+
na.rm = TRUE),
109+
"Support==2" = ~ agg_func(
110+
tidyr::replace_na(.x[Support == 2], replace_NA),
111+
na.rm = TRUE),
112+
"Support==3" = ~ agg_func(
113+
tidyr::replace_na(.x[Support == 3], replace_NA),
114+
na.rm = TRUE),
115+
"Support==4" = ~ agg_func(
116+
tidyr::replace_na(.x[Support == 4], replace_NA),
117+
na.rm = TRUE),
118+
"Consensus (-PolyFun)" = ~ agg_func(
119+
tidyr::replace_na(
120+
.x[Consensus_SNP_noPF], replace_NA
121+
), na.rm = TRUE),
122+
"Consensus" = ~ agg_func(
123+
tidyr::replace_na(.x[Consensus_SNP], replace_NA),
124+
na.rm = TRUE)
125+
)
126+
) |>
127+
data.table::data.table() |>
128+
data.table::melt.data.table(
129+
id.vars = "Locus",
130+
variable.name = "Annotation"
131+
) |>
132+
tidyr::separate(
133+
col = "Annotation",
134+
sep = "_",
135+
into = c("Model", "Tissue", "Assay", "Type",
136+
"Metric", "SNP_group"),
137+
remove = FALSE
138+
) |>
139+
dplyr::mutate(
140+
Annotation = gsub("^_+|_+$", "",
141+
trimws(as.character(Annotation))),
142+
SNP_group = factor(
143+
SNP_group,
144+
levels = names(snp_groups_list),
145+
ordered = TRUE
146+
)
147+
)
148+
149+
if (!isFALSE(save_path)) {
150+
messager("DEEPLEARNING:: Saving aggregated SNP_group values",
151+
aggregate_func, "==>", save_path, v = verbose)
152+
dir.create(dirname(save_path),
153+
showWarnings = FALSE, recursive = TRUE)
154+
data.table::fwrite(annot_melt, save_path)
155+
}
156+
return(annot_melt)
157+
}

0 commit comments

Comments
 (0)