Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 156 additions & 92 deletions R/annotateProteinInfoFromIndra.R

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions R/cytoscapeNetwork.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
#' overlaps are surfaced as hover tooltips.
#'
#' @param nodes Data frame with at minimum an \code{id} column. Optional
#' columns: \code{logFC} (numeric), \code{hgncName}
#' (character), \code{Site} (character, underscore-separated
#' PTM site list).
#' columns: \code{logFC} (numeric), \code{entityName}
#' (character; may be semicolon-joined for multi-grounded
#' rows), \code{entityId} (character), \code{Site}
#' (character, underscore-separated PTM site list).
#' @param edges Data frame with columns \code{source}, \code{target},
#' \code{interaction}. Optional: \code{site},
#' \code{evidenceLink}.
#' @param displayLabelType \code{"id"} (default) or \code{"hgncName"} –
#' @param displayLabelType \code{"id"} (default) or \code{"entityName"} –
#' controls which column is used as the visible node label.
#' @param nodeFontSize Font size (px) for node labels. Default \code{12}.
#' @param layoutOptions Named list of dagre layout options to override the
Expand Down
13 changes: 8 additions & 5 deletions R/getSubnetworkFromIndra.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
#' Using differential abundance results from MSstats, this function retrieves
#' a subnetwork of protein interactions from INDRA database.
#'
#' @param input output of \code{\link[MSstats]{groupComparison}} function's
#' comparisionResult table, which contains a list of proteins and their
#' corresponding p-values, logFCs, along with additional HGNC ID and HGNC
#' name columns
#' @param input output of \code{\link[MSstats]{groupComparison}} function's
#' comparisionResult table, annotated by
#' \code{\link{annotateProteinInfoFromIndra}}. Must contain \code{Protein},
#' \code{EntityNamespace}, and \code{EntityId} columns (and typically also
#' \code{EntityName}, \code{log2FC}, \code{adj.pvalue}). When an analyte
#' grounds to multiple candidates the three \code{Entity*} columns are
#' semicolon-joined and positionally aligned.
#' @param protein_level_data output of the \code{\link[MSstats]{dataProcess}}
#' function's ProteinLevelData table, which contains a list of proteins and
#' their corresponding abundances. Used for annotating correlation information
Expand Down Expand Up @@ -72,7 +75,7 @@ getSubnetworkFromIndra <- function(input,
direction = match.arg(direction)
input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff, logfc_cutoff, force_include_other, include_infinite_fc, direction)
Comment thread
tonywu1999 marked this conversation as resolved.
.validateGetSubnetworkFromIndraInput(input, protein_level_data, sources_filter, force_include_other)
res <- .callIndraCogexApi(input$HgncId, force_include_other)
res <- .callIndraCogexApi(input$EntityNamespace, input$EntityId, force_include_other)
res <- .filterIndraResponse(res, statement_types, evidence_count_cutoff, sources_filter)
edges <- .constructEdgesDataFrame(res, input, protein_level_data)
edges <- .filterEdgesDataFrame(edges, paper_count_cutoff, correlation_cutoff)
Expand Down
101 changes: 68 additions & 33 deletions R/utils_annotateProteinInfoFromIndra.R
Original file line number Diff line number Diff line change
Expand Up @@ -254,34 +254,51 @@ INDRA_API_URL = "https://discovery.indra.bio"
return(res)
}

#' Call gilda API to get HGNC IDs from HGNC names
#' @param hgncNames list of hgnc names
#' @return named character vector mapping HGNC names to HGNC IDs
#' Call Gilda API to ground entity text against any namespace
#'
#' Posts each input text to Gilda's `ground_multi` endpoint and returns
#' every grounding candidate per input (in Gilda's ranking order). When
#' `keep_only` is set, candidates whose `term$db` does not match are
#' filtered out. The canonical entity name is taken from `term$entry_name`
#' when present, falling back to `term$text` (the input string).
#' @param textInputs list of character strings to ground
#' @param keep_only optional character; if non-NULL, only candidates whose
#' `term$db == keep_only` are retained
#' @param organisms optional list of NCBI taxonomy ids (e.g.
#' \code{list("9606")} for human) to constrain Gilda's grounding.
#' When \code{NULL}, the organisms filter is omitted from the
#' request body and Gilda may return groundings from any
#' organism / non-organism namespace (e.g. CHEBI for metabolites).
#' @return Named list keyed by input text. Each value is a list with
#' three equal-length character vectors: `ns`, `id`, `name`,
#' positionally aligned across Gilda's returned candidates.
#' Texts with no surviving grounding are omitted from the result.
#' @importFrom jsonlite toJSON
#' @importFrom httr POST add_headers content
#' @keywords internal
#' @noRd
.callGetHgncIdsFromGildaApi <- function(hgncNames) {
if (!is.list(hgncNames)) {
.callGroundEntitiesFromGildaApi <- function(textInputs, keep_only = NULL, organisms = NULL) {

if (!is.list(textInputs)) {
stop("Input must be a list.")
}
if (any(!sapply(hgncNames, is.character))) {
stop("All elements in the list must be character strings representing hgnc names.")

if (any(!sapply(textInputs, is.character))) {
stop("All elements in the list must be character strings.")
}
if (length(hgncNames) == 0) {

if (length(textInputs) == 0) {
stop("Input list must not be empty.")
}

apiUrl <- file.path("https://grounding.indra.bio/", "ground_multi")

requestBody <- lapply(hgncNames, function(hgnc_name) {
list(
text = hgnc_name,
organisms = list("9606")
)

requestBody <- lapply(textInputs, function(text_input) {
entry <- list(text = text_input)
if (!is.null(organisms)) {
entry$organisms <- organisms
}
entry
})
requestBody <- jsonlite::toJSON(requestBody, auto_unbox = TRUE)
res <- tryCatch({
Expand All @@ -296,27 +313,45 @@ INDRA_API_URL = "https://discovery.indra.bio"
message("Error in API call: ", e)
NULL
})

if (is.null(res)) {
return(NULL)
}

hgnc_mapping <- character(0)

for (item in res) {
# Find the term where db == "HGNC"
hgnc_term <- NULL

grounding_map <- list()

for (i in seq_along(res)) {
item <- res[[i]]
input_text <- as.character(textInputs[[i]])

ns_vec <- character(0)
id_vec <- character(0)
name_vec <- character(0)

for (entry in item) {
if (!is.null(entry$term$db) && entry$term$db == "HGNC") {
hgnc_term <- entry$term
break
term <- entry$term
if (is.null(term) || is.null(term$db) || is.null(term$id)) next
if (!is.null(keep_only) && term$db != keep_only) next

entry_name <- if (!is.null(term$entry_name) && nzchar(term$entry_name)) {
term$entry_name
} else {
term$text
}

ns_vec <- c(ns_vec, term$db)
id_vec <- c(id_vec, term$id)
name_vec <- c(name_vec, entry_name)
}

# Only add to mapping if HGNC term was found
if (!is.null(hgnc_term)) {
hgnc_mapping[hgnc_term$text] <- hgnc_term$id

if (length(ns_vec) > 0) {
grounding_map[[input_text]] <- list(
ns = ns_vec,
id = id_vec,
name = name_vec
)
}
}
return(hgnc_mapping)

return(grounding_map)
}
22 changes: 11 additions & 11 deletions R/utils_cytoscapeNetwork.R
Original file line number Diff line number Diff line change
Expand Up @@ -238,34 +238,34 @@
rep("#D3D3D3", nrow(nodes))
}

label_col <- if (display_label_type == "hgncName" &&
"hgncName" %in% names(nodes)) "hgncName" else "id"
label_col <- if (display_label_type == "entityName" &&
"entityName" %in% names(nodes)) "entityName" else "id"

has_ptm_sites <- if ("Site" %in% names(nodes)) {
unique(nodes$id[!is.na(nodes$Site) & trimws(nodes$Site) != ""])
} else {
character(0)
}

elements <- list()
emitted_prots <- character(0)
emitted_cpds <- character(0)
emitted_ptm_n <- character(0)
emitted_ptm_e <- character(0)

for (i in seq_len(nrow(nodes))) {
row <- nodes[i, , drop = FALSE]
color <- node_colors[i]
has_site <- "Site" %in% names(nodes) &&
!is.na(row$Site) && trimws(row$Site) != ""
display_label <- if (label_col == "hgncName" &&
!is.na(row$hgncName) && row$hgncName != "")
row$hgncName else row$id

display_label <- if (label_col == "entityName" &&
!is.na(row$entityName) && row$entityName != "")
row$entityName else row$id

needs_compound <- row$id %in% has_ptm_sites
compound_id <- paste0(row$id, "__compound__")

# Compound container
if (needs_compound && !(compound_id %in% emitted_cpds)) {
elements <- c(elements, list(
Expand Down
Loading
Loading