Skip to content

Commit 2c770b7

Browse files
bschilderclaude
andcommitted
Add SpliceAI API, comprehensive tests, fix ggplot2 facet_grid
- New SPLICEAI_query_api(): query Broad SpliceAI Lookup REST API for on-the-fly splice predictions without local precomputed files - Fix deprecated facet_grid(facets=) -> rows/cols/vars() across all IMPACT and DEEPLEARNING plot functions - Fix IMPACT_get_annotations CHR type mismatch (char vs int) - Fix IMPACT_iterate_get_annotations to handle errors gracefully - Fix DEEPLEARNING_plot combn() crash with < 2 SNP groups - Fix DEEPLEARNING_plot outlier removal edge case - 57 tests passing, covering all 25 exported functions with real data integration tests (Zenodo, GitHub, SpliceAI API) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2fdf72f commit 2c770b7

24 files changed

Lines changed: 634 additions & 29 deletions

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ Suggests:
5959
shades,
6060
reshape2,
6161
stringr,
62-
R.utils
62+
R.utils,
63+
jsonlite
6364
Remotes:
6465
github::RajLabMSSM/echodata,
6566
github::RajLabMSSM/echotabix,

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ export(IMPACT_postprocess_annotations)
1919
export(IMPACT_query)
2020
export(IMPACT_snp_group_boxplot)
2121
export(SPLICEAI_plot)
22+
export(SPLICEAI_query_api)
2223
export(SPLICEAI_query_tsv)
2324
export(SPLICEAI_query_tsv_iterate)
2425
export(SPLICEAI_query_vcf)

R/DEEPLEARNING_plot.R

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,10 @@ DEEPLEARNING_plot <- function(
7272
## Suppress the side-effect plot device from boxplot()
7373
outliers <- graphics::boxplot(annot_melt$value,
7474
plot = FALSE)$out
75-
annot_melt <- annot_melt[-which(annot_melt$value %in% outliers), ]
75+
if (length(outliers) > 0) {
76+
idx <- which(annot_melt$value %in% outliers)
77+
if (length(idx) > 0) annot_melt <- annot_melt[-idx, ]
78+
}
7679
}
7780
colorDict <- echodata::snp_group_colorDict()
7881
dat_plot <- subset(
@@ -84,14 +87,19 @@ DEEPLEARNING_plot <- function(
8487
levels = names(colorDict),
8588
ordered = TRUE)
8689
)
87-
snp_groups_present <- unique(dat_plot$SNP_group)
88-
comparisons <- utils::combn(
89-
x = as.character(snp_groups_present),
90-
m = 2,
91-
FUN = comparisons_filter,
92-
simplify = FALSE
93-
)
94-
comparisons <- Filter(Negate(is.null), comparisons)
90+
snp_groups_present <- unique(as.character(
91+
dat_plot$SNP_group[!is.na(dat_plot$SNP_group)]))
92+
if (length(snp_groups_present) >= 2) {
93+
comparisons <- utils::combn(
94+
x = snp_groups_present,
95+
m = 2,
96+
FUN = comparisons_filter,
97+
simplify = FALSE
98+
)
99+
comparisons <- Filter(Negate(is.null), comparisons)
100+
} else {
101+
comparisons <- list()
102+
}
95103

96104
method <- "wilcox.test"
97105
gp <- ggplot2::ggplot(
@@ -102,7 +110,7 @@ DEEPLEARNING_plot <- function(
102110
ggplot2::geom_violin(alpha = 0.6) +
103111
ggplot2::geom_boxplot(alpha = 0.6, color = "black") +
104112
ggplot2::facet_grid(
105-
facets = stats::as.formula(facet_formula),
113+
rows = stats::as.formula(facet_formula),
106114
scales = "free"
107115
) +
108116
ggplot2::labs(

R/IMPACT_get_annotations.R

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ IMPACT_get_annotations <- function(
3939
Annot <- IMPACT_score <- POS <- NULL;
4040

4141
if (!is.null(dat)) {
42-
dat$CHR <- gsub("chr", "", dat$CHR)
42+
dat$CHR <- as.integer(gsub("chr", "", dat$CHR))
4343
chrom <- dat$CHR[1]
4444
}
4545
URL <- file.path(baseURL,
@@ -126,7 +126,7 @@ IMPACT_iterate_get_annotations <- function(
126126
function(chrom, .merged_DT = merged_DT) {
127127
messager("+ IMPACT:: Gathering annotations for chrom =",
128128
chrom, v = verbose)
129-
try({
129+
tryCatch({
130130
dat <- subset(.merged_DT, CHR == chrom)
131131
annot_melt <- IMPACT_get_annotations(
132132
baseURL = baseURL,
@@ -153,8 +153,13 @@ IMPACT_iterate_get_annotations <- function(
153153
"annotations found at IMPACT_score >=",
154154
IMPACT_score_thresh, v = verbose)
155155
return(annot_melt)
156+
}, error = function(e) {
157+
messager("+ IMPACT:: Error for chrom", chrom, ":",
158+
conditionMessage(e), v = verbose)
159+
return(NULL)
156160
})
157161
})
158-
ANNOT_MELT <- data.table::rbindlist(ANNOT_MELT)
162+
ANNOT_MELT <- Filter(Negate(is.null), ANNOT_MELT)
163+
ANNOT_MELT <- data.table::rbindlist(ANNOT_MELT, fill = TRUE)
159164
return(ANNOT_MELT)
160165
}

R/IMPACT_plot.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ IMPACT_plot_enrichment <- function(ENRICH,
182182
height = 0) +
183183
ggplot2::geom_hline(yintercept = 1, linetype = "dashed",
184184
alpha = 0.8) +
185-
ggplot2::facet_grid(facets = SNP.group ~ .,
185+
ggplot2::facet_grid(rows = ggplot2::vars(SNP.group),
186186
switch = "y", space = "free_x",
187187
scales = "free_x") +
188188
ggplot2::theme_bw() +
@@ -196,7 +196,7 @@ IMPACT_plot_enrichment <- function(ENRICH,
196196
ggplot2::geom_col(position = "dodge") +
197197
ggplot2::geom_hline(yintercept = 1, linetype = "dashed",
198198
alpha = 0.8) +
199-
ggplot2::facet_grid(facets = . ~ Tissue,
199+
ggplot2::facet_grid(cols = ggplot2::vars(Tissue),
200200
switch = "y", space = "free_x",
201201
scales = "free_x") +
202202
ggplot2::theme_bw() +
@@ -298,7 +298,7 @@ IMPACT_plot_impact_score <- function(annot_melt,
298298
subset(annot_melt, IMPACT_score > 0.5),
299299
ggplot2::aes(x = Mb, y = IMPACT_score, color = TF)) +
300300
ggplot2::geom_point(show.legend = FALSE) +
301-
ggplot2::facet_grid(facets = Tissue ~ ., switch = "y") +
301+
ggplot2::facet_grid(rows = ggplot2::vars(Tissue), switch = "y") +
302302
ggplot2::theme_bw() +
303303
ggplot2::labs(y = "IMPACT score per tissue") +
304304
ggplot2::theme(

R/SPLICEAI_query_api.R

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#' Query SpliceAI Lookup API
2+
#'
3+
#' Query the public
4+
#' \href{https://spliceailookup.broadinstitute.org/}{SpliceAI Lookup}
5+
#' web API hosted by the Broad Institute. This service runs the SpliceAI
6+
#' model on-the-fly for individual variants without requiring local
7+
#' installation or precomputed files.
8+
#'
9+
#' @param variants Character vector of variants in
10+
#' \code{"chrom-pos-ref-alt"} format (e.g. \code{"2-179415988-C-CA"}).
11+
#' @param genome Genome build: \code{"37"} (GRCh37/hg19) or
12+
#' \code{"38"} (GRCh38/hg38).
13+
#' @param distance Maximum distance between the variant and gained/lost
14+
#' splice site (default 50, max 10000).
15+
#' @param mask Mask scores for annotated splice sites
16+
#' (0 = raw, 1 = masked). Default 0.
17+
#' @param verbose Print messages.
18+
#'
19+
#' @returns A \code{data.table} with one row per variant, containing
20+
#' columns for \code{variant}, \code{gene}, \code{DS_AG}, \code{DS_AL},
21+
#' \code{DS_DG}, \code{DS_DL} (delta scores), and corresponding
22+
#' \code{DP_*} (delta positions). Returns \code{NULL} for variants
23+
#' with no scores.
24+
#'
25+
#' @details
26+
#' \strong{Rate limit:} The public API supports only a handful of
27+
#' queries per user per minute. For batch processing, use
28+
#' \code{\link{SPLICEAI_query_tsv}} with local precomputed files or
29+
#' set up a local instance via
30+
#' \url{https://github.com/broadinstitute/SpliceAI-lookup}.
31+
#'
32+
#' @export
33+
#' @family SPLICEAI
34+
#' @source
35+
#' \url{https://spliceailookup.broadinstitute.org/}
36+
#' \doi{10.1016/j.cell.2018.12.015}
37+
#' @importFrom data.table data.table rbindlist
38+
#' @examples
39+
#' \dontrun{
40+
#' result <- SPLICEAI_query_api(
41+
#' variants = "2-179415988-C-CA",
42+
#' genome = "37"
43+
#' )
44+
#' }
45+
SPLICEAI_query_api <- function(variants,
46+
genome = c("37", "38"),
47+
distance = 50,
48+
mask = 0,
49+
verbose = TRUE) {
50+
51+
genome <- match.arg(genome)
52+
base_url <- paste0("https://spliceai-", genome,
53+
"-xwkwwwxdwq-uc.a.run.app/spliceai/")
54+
55+
results <- lapply(variants, function(v) {
56+
messager("SPLICEAI:: Querying API for:", v, v = verbose)
57+
url <- paste0(base_url,
58+
"?hg=", genome,
59+
"&distance=", distance,
60+
"&mask=", mask,
61+
"&variant=", v)
62+
tryCatch({
63+
resp <- readLines(url(url), warn = FALSE)
64+
json <- jsonlite::fromJSON(resp, flatten = TRUE)
65+
if (!is.null(json$error)) {
66+
messager("SPLICEAI:: No scores for", v, ":", json$error,
67+
v = verbose)
68+
return(NULL)
69+
}
70+
scores <- json$scores
71+
if (is.null(scores) || length(scores) == 0) return(NULL)
72+
dt <- data.table::data.table(
73+
variant = v,
74+
gene = scores$g_name,
75+
transcript = scores$t_id,
76+
DS_AG = as.numeric(scores$DS_AG),
77+
DS_AL = as.numeric(scores$DS_AL),
78+
DS_DG = as.numeric(scores$DS_DG),
79+
DS_DL = as.numeric(scores$DS_DL),
80+
DP_AG = as.integer(scores$DP_AG),
81+
DP_AL = as.integer(scores$DP_AL),
82+
DP_DG = as.integer(scores$DP_DG),
83+
DP_DL = as.integer(scores$DP_DL)
84+
)
85+
return(dt)
86+
}, error = function(e) {
87+
messager("SPLICEAI:: API error for", v, ":",
88+
conditionMessage(e), v = verbose)
89+
return(NULL)
90+
})
91+
})
92+
results <- Filter(Negate(is.null), results)
93+
if (length(results) == 0) return(NULL)
94+
data.table::rbindlist(results, fill = TRUE)
95+
}

man/SPLICEAI_plot.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/SPLICEAI_query_api.Rd

Lines changed: 72 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/SPLICEAI_query_tsv.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/SPLICEAI_query_tsv_iterate.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)