diff --git a/R/module-loadpage-server.R b/R/module-loadpage-server.R index fd1d3c3..68aabc9 100644 --- a/R/module-loadpage-server.R +++ b/R/module-loadpage-server.R @@ -22,31 +22,48 @@ loadpageServer <- function(id, parent_session, is_web_server = FALSE, app_templa # Define volumes for the file selection. if (!is_web_server) { volumes <- shinyFiles::getVolumes()() - - # Server-side logic for the shinyFiles button + + # Server-side logic for the shinyFiles buttons (Spectronaut + DIANN) shinyFiles::shinyFileChoose(input, "big_file_browse", roots = volumes, session = session) - + shinyFiles::shinyFileChoose(input, "big_diann_browse", roots = volumes, session = session) + # Reactive to parse and store the full file information (path, name, etc.) # This is efficient because parseFilePaths is only called once. local_file_info <- reactive({ req(is.list(input$big_file_browse)) shinyFiles::parseFilePaths(volumes, input$big_file_browse) }) - + + local_diann_file_info <- reactive({ + req(is.list(input$big_diann_browse)) + shinyFiles::parseFilePaths(volumes, input$big_diann_browse) + }) + # Reactive to get just the full datapath, for use in backend processing. local_big_file_path <- reactive({ path_info <- local_file_info() if (nrow(path_info) > 0) path_info$datapath else NULL }) - + + local_big_diann_path <- reactive({ + path_info <- local_diann_file_info() + if (nrow(path_info) > 0) path_info$datapath else NULL + }) + # Render just the filename for user feedback in the UI. output$specdata_big_path <- renderPrint({ req(nrow(local_file_info()) > 0) cat(local_file_info()$name) }) - } + + output$dianndata_big_path <- renderPrint({ + req(nrow(local_diann_file_info()) > 0) + cat(local_diann_file_info()$name) + }) + } else { local_big_file_path <- reactive({ NULL }) + local_big_diann_path <- reactive({ NULL }) } # ============ PREVIEW DATA: Read first 100 rows on file upload ============ @@ -190,11 +207,6 @@ loadpageServer <- function(id, parent_session, is_web_server = FALSE, app_templa ui_elements }) - # Spectronaut intensity column input — universal across both the - # regular (in-memory) and large-file paths, regardless of analysis - # template. Default tracks the template: turnover analyses want the - # MS1-only quantity, normal analyses want the normalized peak area - # (which is also `bigSpectronauttoMSstatsFormat`'s default). output$spectronaut_intensity_ui <- renderUI({ req(input$filetype == 'spec', input$BIO != 'PTM') @@ -243,9 +255,69 @@ loadpageServer <- function(id, parent_session, is_web_server = FALSE, app_templa value = "K") }) + output$diann_header_ui <- renderUI({ + req(input$filetype == 'diann', input$BIO != 'PTM') + create_diann_header() + }) + + output$diann_file_selection_ui <- renderUI({ + req(input$filetype == 'diann', input$BIO != 'PTM') + + ui_elements <- tagList() + + if (!is_web_server) { + ui_elements <- tagList(ui_elements, create_diann_mode_selector(session$ns, isTRUE(input$big_file_diann))) + + if (isTRUE(input$big_file_diann)) { + ui_elements <- tagList(ui_elements, create_diann_large_file_ui(session$ns)) + } else { + ui_elements <- tagList(ui_elements, create_diann_standard_ui(session$ns)) + } + } else { + ui_elements <- tagList(ui_elements, create_diann_standard_ui(session$ns)) + } + + ui_elements + }) + + output$diann_options_ui <- renderUI({ + req(input$filetype == 'diann', input$BIO != 'PTM') + + if (!is_web_server && isTRUE(input$big_file_diann)) { + mbr_def <- if (is.null(input$big_diann_MBR)) TRUE else input$big_diann_MBR + quantcol_def <- if (is.null(input$big_diann_quantification_column) || + !nzchar(input$big_diann_quantification_column)) { + "FragmentQuantCorrected" + } else { + input$big_diann_quantification_column + } + global_qv_def <- if (is.null(input$big_diann_global_qvalue_cutoff)) 0.01 else input$big_diann_global_qvalue_cutoff + qv_def <- if (is.null(input$big_diann_qvalue_cutoff)) 0.01 else input$big_diann_qvalue_cutoff + pg_qv_def <- if (is.null(input$big_diann_pg_qvalue_cutoff)) 0.01 else input$big_diann_pg_qvalue_cutoff + + max_feature_def <- if (is.null(input$big_diann_max_feature_count)) 100 else input$big_diann_max_feature_count + unique_peps_def <- if (is.null(input$big_diann_filter_unique_peptides)) FALSE else input$big_diann_filter_unique_peptides + agg_psms_def <- if (is.null(input$big_diann_aggregate_psms)) FALSE else input$big_diann_aggregate_psms + few_obs_def <- if (is.null(input$big_diann_filter_few_obs)) FALSE else input$big_diann_filter_few_obs + backend_def <- if (is.null(input$big_diann_backend) || !nzchar(input$big_diann_backend)) "arrow" else input$big_diann_backend + calculate_anomaly_def <- if (is.null(input$big_diann_calculate_anomaly_scores)) FALSE else input$big_diann_calculate_anomaly_scores + + tagList( + create_diann_large_filter_options(session$ns, mbr_def, quantcol_def, + global_qv_def, qv_def, pg_qv_def), + create_diann_large_bottom_ui(session$ns, max_feature_def, + unique_peps_def, agg_psms_def, few_obs_def, + backend_def), + create_diann_large_annotation_ui(session$ns, calculate_anomaly_def) + ) + } else { + NULL + } + }) + output$spectronaut_options_ui <- renderUI({ req(input$filetype == 'spec', input$BIO != 'PTM') - + if (!is_web_server && isTRUE(input$big_file_spec)) { qval_def <- if (is.null(input$filter_by_qvalue)) TRUE else input$filter_by_qvalue excluded_def <- if (is.null(input$filter_by_excluded)) FALSE else input$filter_by_excluded @@ -368,7 +440,9 @@ loadpageServer <- function(id, parent_session, is_web_server = FALSE, app_templa enable("proceed1") } } else if (input$filetype == "diann") { - if(!is.null(input$dianndata)) { + diann_regular_file_ok <- !isTRUE(input$big_file_diann) && !is.null(input$dianndata) + diann_big_file_ok <- isTRUE(input$big_file_diann) && length(local_big_diann_path()) > 0 + if(diann_regular_file_ok || diann_big_file_ok) { enable("proceed1") } } diff --git a/R/module-loadpage-ui.R b/R/module-loadpage-ui.R index 44e5ea3..8f359e3 100644 --- a/R/module-loadpage-ui.R +++ b/R/module-loadpage-ui.R @@ -76,7 +76,7 @@ create_header_content <- function() { a("documentation", href="https://www.bioconductor.org/packages/release/bioc/vignettes/MSstatsPTM/inst/doc/MSstatsPTM_LabelFree_Workflow.html", target="_blank")), - p("Note: files must be in CSV/TSV format, or Parquet (.parquet/.pq) for DIANN 2.0+ inputs, and under 250 MB when using msstatsshiny.com (no limit when running locally)."), + p("Note: files must be in CSV/TSV format, or Parquet (.parquet/.pq) for DIANN 2.0+ inputs, and under 250 MB when using msstatsshiny.com. When running the app locally, Spectronaut and DIANN reports above this limit can be processed via 'Large file mode' (out-of-memory streaming through MSstatsBig)."), p("Some users may have trouble uploading files while using the application via Google Chrome. If the 'Browse...' button does not work please try a different web browser.") ) } @@ -223,7 +223,7 @@ create_standard_uploads <- function(ns) { #' @noRd create_standard_annotation_uploads <- function(ns) { conditionalPanel( - condition = "(input['loadpage-filetype'] == 'sky' || input['loadpage-filetype'] == 'prog' || input['loadpage-filetype'] == 'PD' || (input['loadpage-filetype'] == 'spec' && !input['loadpage-big_file_spec']) || input['loadpage-filetype'] == 'open'|| input['loadpage-filetype'] =='spmin' || input['loadpage-filetype'] == 'phil' || input['loadpage-filetype'] == 'diann' || input['loadpage-filetype'] == 'meta') && input['loadpage-BIO'] != 'PTM'", + condition = "(input['loadpage-filetype'] == 'sky' || input['loadpage-filetype'] == 'prog' || input['loadpage-filetype'] == 'PD' || (input['loadpage-filetype'] == 'spec' && !input['loadpage-big_file_spec']) || input['loadpage-filetype'] == 'open'|| input['loadpage-filetype'] =='spmin' || input['loadpage-filetype'] == 'phil' || (input['loadpage-filetype'] == 'diann' && !input['loadpage-big_file_diann']) || input['loadpage-filetype'] == 'meta') && input['loadpage-BIO'] != 'PTM'", h4("5. Upload annotation File", class = "icon-wrapper", icon("question-circle", lib = "font-awesome"), div("Upload manually created annotation file. This file maps MS runs to experiment metadata (i.e. conditions, bioreplicates). Please see Help tab for information on creating this file.", class = "icon-tooltip")), @@ -266,12 +266,150 @@ create_skyline_uploads <- function(ns) { } #' Create DIANN file uploads +#' +#' Mirrors the Spectronaut layout (`create_spectronaut_uploads`): a stack +#' of `uiOutput()` slots that the server renders conditionally based on +#' `input$filetype == 'diann'` and the `big_file_diann` mode toggle. #' @noRd create_diann_uploads <- function(ns) { - conditionalPanel( - condition = "input['loadpage-filetype'] == 'diann' && input['loadpage-BIO'] != 'PTM'", - h4("4. Upload MSstats report from DIANN"), - fileInput(ns('dianndata'), "", multiple = FALSE, accept = NULL) + tagList( + uiOutput(ns("diann_header_ui")), + uiOutput(ns("diann_file_selection_ui")), + uiOutput(ns("diann_options_ui")) + ) +} + +#' Create DIANN header +#' @noRd +create_diann_header <- function() { + h4("4. Upload MSstats report from DIANN") +} + +#' Create DIANN mode selector (Local only) +#' @noRd +create_diann_mode_selector <- function(ns, selected = FALSE) { + checkboxInput(ns("big_file_diann"), "Large file mode", value = selected) +} + +#' Create DIANN standard file input +#' @noRd +create_diann_standard_ui <- function(ns) { + fileInput(ns('dianndata'), "", multiple = FALSE, accept = NULL) +} + +#' Create DIANN large file selection UI +#' @noRd +create_diann_large_file_ui <- function(ns) { + tagList( + shinyFiles::shinyFilesButton(ns("big_diann_browse"), "Browse for local file...", "Please select a file", multiple = FALSE), + verbatimTextOutput(ns("dianndata_big_path")) + ) +} + +#' Create DIANN large file filter / cutoff options +#' +#' Exposes `bigDIANNtoMSstatsFormat`'s converter knobs: MBR, three q-value +#' cutoffs (global / precursor / protein-group), and the quantification +#' column (reuses the same default the regular DIANN 1.x path's +#' `intensity_column` defaults to). +#' @noRd +create_diann_large_filter_options <- function(ns, + mbr_def = TRUE, + quantcol_def = "FragmentQuantCorrected", + global_qv_def = 0.01, + qv_def = 0.01, + pg_qv_def = 0.01) { + tagList( + tags$hr(), + h4("Options for large file processing"), + checkboxInput(ns("big_diann_MBR"), "MBR Enabled", value = mbr_def), + textInput(ns("big_diann_quantification_column"), + h5("Quantification column", class = "icon-wrapper", + icon("question-circle", lib = "font-awesome"), + div("Column in the DIANN report to use as the intensity measure. Use 'auto' for DIANN 2.0+ (per-fragment columns); otherwise the legacy column name (default: FragmentQuantCorrected).", + class = "icon-tooltip")), + value = quantcol_def), + numericInput(ns("big_diann_global_qvalue_cutoff"), + "Global Q-value cutoff", value = global_qv_def, min = 0, max = 1, step = 0.01), + numericInput(ns("big_diann_qvalue_cutoff"), + "Q-value cutoff", value = qv_def, min = 0, max = 1, step = 0.01), + numericInput(ns("big_diann_pg_qvalue_cutoff"), + "Protein group Q-value cutoff", value = pg_qv_def, min = 0, max = 1, step = 0.01) + ) +} + +#' Create DIANN large file options (feature processing) +#' @noRd +create_diann_large_bottom_ui <- function(ns, + max_feature_def = 100, + unique_peps_def = FALSE, + agg_psms_def = FALSE, + few_obs_def = FALSE, + backend_def = "arrow") { + tagList( + numericInput(ns("big_diann_max_feature_count"), "Max feature count", + value = max_feature_def, min = 1), + checkboxInput(ns("big_diann_filter_unique_peptides"), "Use unique peptides", + value = unique_peps_def), + checkboxInput(ns("big_diann_aggregate_psms"), "Aggregate PSMs to peptides", + value = agg_psms_def), + checkboxInput(ns("big_diann_filter_few_obs"), "Filter features with few observations", + value = few_obs_def), + selectInput(ns("big_diann_backend"), "Backend", + choices = c("arrow", "sparklyr"), + selected = backend_def) + ) +} + +#' Create DIANN large file annotation override + anomaly UI +#' +#' Renders an optional annotation upload that overrides DIANN's embedded +#' Run / Condition / BioReplicate columns, plus the "Calculate Anomaly +#' Scores" controls. `bigDIANNtoMSstatsFormat` accepts the annotation +#' data frame directly via its `annotation` argument. +#' +#' Anomaly scoring is a two-step pipeline in the large-file path: +#' (1) `bigDIANNtoMSstatsFormat` runs with `calculateAnomalyScores = TRUE` +#' and `anomalyModelFeatures = c("Ms1ProfileCorr", "Evidence", "RT", +#' "Predicted.RT")`, which carries those columns through the +#' out-of-memory reduce/preprocess steps. +#' (2) After `dplyr::collect`, `DeltaRT = RT - Predicted.RT` is +#' engineered in-memory and `MSstatsConvert::MSstatsAnomalyScores` +#' fits the isolation-forest model on +#' `c("Ms1ProfileCorr", "Evidence", "DeltaRT")` to produce the +#' `AnomalyScores` column. +#' +#' A run-order CSV is required (Run + Order columns) — `MSstatsAnomalyScores` +#' uses it for temporal feature engineering. +#' @noRd +create_diann_large_annotation_ui <- function(ns, calculate_anomaly_def = FALSE) { + tagList( + tags$hr(), + h5("Annotation file (optional)", + class = "icon-wrapper", + icon("question-circle", lib = "font-awesome"), + div("Upload a CSV/TSV with columns Run, BioReplicate, Condition (and any extras). When supplied, the converter merges it on Run and overrides any Condition / BioReplicate values from DIANN's embedded annotation. Required for paired designs and other layouts the report itself cannot express.", + class = "icon-tooltip")), + fileInput(ns("big_diann_annotation"), label = NULL, + multiple = FALSE, accept = c(".csv", ".tsv", ".txt")), + checkboxInput(ns("big_diann_calculate_anomaly_scores"), + label = tags$span( + "Calculate Anomaly Scores", + class = "icon-wrapper", + icon("question-circle", lib = "font-awesome"), + div("Carries Ms1ProfileCorr, Evidence, RT, and Predicted.RT through the out-of-memory steps, then engineers DeltaRT = RT - Predicted.RT in-memory after collect and fits MSstatsConvert::MSstatsAnomalyScores on c(Ms1ProfileCorr, Evidence, DeltaRT). Requires a run order CSV.", + class = "icon-tooltip")), + value = calculate_anomaly_def), + conditionalPanel( + condition = sprintf("input['%s']", ns("big_diann_calculate_anomaly_scores")), + fileInput(ns("big_diann_run_order_file"), + label = h5("Upload Run Order File", + class = "icon-wrapper", + icon("question-circle", lib = "font-awesome"), + div("CSV with two columns: 'Run' (sequence name matching the converter output) and 'Order' (chronological run number, e.g. 1, 2, 3...).", + class = "icon-tooltip")), + multiple = FALSE, accept = c(".csv")) + ) ) } @@ -345,27 +483,6 @@ create_spectronaut_large_bottom_ui <- function(ns, max_feature_def = 20, unique_ #' Create Spectronaut large file annotation override + anomaly UI #' -#' Renders an optional annotation upload that overrides Spectronaut's embedded -#' R.Condition / R.Replicate columns on Run, plus the "Calculate Anomaly -#' Scores" controls. End-to-end anomaly scoring is a two-step pipeline in -#' the large-file path: -#' (1) `bigSpectronauttoMSstatsFormat` runs with -#' `calculateAnomalyScores = TRUE` + the model feature column list, -#' which carries those feature columns through the out-of-memory -#' reduce/preprocess steps. -#' (2) After `dplyr::collect`, `MSstatsConvert::MSstatsAnomalyScores` -#' is called on the in-memory result to fit the isolation-forest -#' model and produce the `AnomalyScores` column. -#' Input IDs `calculate_anomaly_scores` and `run_order_file` are deliberately -#' the same as the regular Spectronaut path's so downstream pages -#' (module-qc-server's MSstats+ summarization gate, getDataCode's -#' reproducibility script, etc.) read a single source of truth regardless -#' of which path the user took. The two UI checkboxes never coexist — -#' the regular path's `create_label_free_options` is hidden when -#' `big_file_spec` is on, and this helper only renders when it is — so -#' there is no Shiny namespace collision. -#' A run-order CSV is required (Run + Order columns) — `MSstatsAnomalyScores` -#' uses it for temporal feature engineering. #' @noRd create_spectronaut_large_annotation_ui <- function(ns, calculate_anomaly_def = FALSE) { tagList( @@ -642,7 +759,7 @@ create_tmt_options <- function(ns) { create_label_free_options <- function(ns) { tagList( conditionalPanel( - condition = "input['loadpage-filetype'] && input['loadpage-DDA_DIA'] == 'LType' && input['loadpage-filetype'] != 'sample' && (input['loadpage-filetype'] != 'spec' || !input['loadpage-big_file_spec'])", + condition = "input['loadpage-filetype'] && input['loadpage-DDA_DIA'] == 'LType' && input['loadpage-filetype'] != 'sample' && (input['loadpage-filetype'] != 'spec' || !input['loadpage-big_file_spec']) && (input['loadpage-filetype'] != 'diann' || !input['loadpage-big_file_diann'])", h4("Select the options for pre-processing"), checkboxInput(ns("unique_peptides"), "Use unique peptides", value = TRUE), checkboxInput(ns("remove"), "Remove proteins with 1 feature", value = FALSE), @@ -652,7 +769,7 @@ create_label_free_options <- function(ns) { # DIANN specific options conditionalPanel( - condition = "input['loadpage-filetype'] == 'diann' && input['loadpage-DDA_DIA'] == 'LType'", + condition = "input['loadpage-filetype'] == 'diann' && input['loadpage-DDA_DIA'] == 'LType' && !input['loadpage-big_file_diann']", checkboxInput(ns("diann_2plus"), "DIANN 2.0+", value = FALSE), conditionalPanel( condition = "!input['loadpage-diann_2plus']", @@ -672,7 +789,7 @@ create_label_free_options <- function(ns) { create_quality_filtering_options <- function(ns) { tagList( conditionalPanel( - condition = "input['loadpage-filetype'] == 'sky' || input['loadpage-filetype'] == 'spec'|| input['loadpage-filetype'] == 'diann'", + condition = "input['loadpage-filetype'] == 'sky' || input['loadpage-filetype'] == 'spec'|| (input['loadpage-filetype'] == 'diann' && !input['loadpage-big_file_diann'])", checkboxInput(ns("q_val"), "Filter with Q-value"), conditionalPanel( condition = "input['loadpage-q_val']", @@ -686,24 +803,54 @@ create_quality_filtering_options <- function(ns) { conditionalPanel( condition = "input['loadpage-filetype'] == 'spec'", - checkboxInput(ns("calculate_anomaly_scores"), + checkboxInput(ns("calculate_anomaly_scores"), label = tags$span( "Calculate Anomaly Scores", class = "icon-wrapper", icon("question-circle", lib = "font-awesome"), - div("Calculate anomaly scores for each feature based on a random forest model. This requires a CSV file containing the order of your MS runs.", + div("Calculate anomaly scores for each feature based on a random forest model. This requires a CSV file containing the order of your MS runs.", class = "icon-tooltip") - ), + ), value = FALSE), conditionalPanel( condition = "input['loadpage-calculate_anomaly_scores']", - fileInput(ns("run_order_file"), + fileInput(ns("run_order_file"), label = h5("Upload Run Order File", class = "icon-wrapper", icon("question-circle", lib = "font-awesome"), div("The run order file should be a CSV with two columns: 'Run' and 'Order'. 'Run' contains the sequence name, and 'Order' contains the chronological run number (e.g., 1, 2, 3...).", class = "icon-tooltip")), multiple = FALSE, accept = c(".csv")) ) ), + + # DIANN anomaly scoring (regular path). + # + # DIANN reports do not ship a DeltaRT column; it's engineered as + # RT - Predicted.RT before the converter runs. The user supplies a + # run-order CSV (same shape as the Spectronaut path) so + # MSstatsConvert::MSstatsAnomalyScores (invoked internally by + # DIANNtoMSstatsFormat when calculateAnomalyScores = TRUE) can do + # temporal feature engineering on Ms1ProfileCorr, Evidence, and + # DeltaRT. + conditionalPanel( + condition = "input['loadpage-filetype'] == 'diann' && !input['loadpage-big_file_diann']", + checkboxInput(ns("diann_calculate_anomaly_scores"), + label = tags$span( + "Calculate Anomaly Scores", + class = "icon-wrapper", + icon("question-circle", lib = "font-awesome"), + div("Engineers DeltaRT = RT - Predicted.RT in the raw DIANN report, then calls MSstatsConvert::MSstatsAnomalyScores via DIANNtoMSstatsFormat with quality_metrics c(Ms1ProfileCorr, Evidence, DeltaRT) and temporal directions c(mean_decrease, mean_decrease, dispersion_increase). Requires a run order CSV.", + class = "icon-tooltip") + ), + value = FALSE), + conditionalPanel( + condition = "input['loadpage-diann_calculate_anomaly_scores']", + fileInput(ns("diann_run_order_file"), + label = h5("Upload Run Order File", class = "icon-wrapper", + icon("question-circle", lib = "font-awesome"), + div("CSV with two columns: 'Run' (sequence name matching the DIANN report's Run column) and 'Order' (chronological run number, e.g. 1, 2, 3...).", class = "icon-tooltip")), + multiple = FALSE, accept = c(".csv")) + ) + ), conditionalPanel( condition = "input['loadpage-filetype'] == 'open'", diff --git a/R/module-qc-server.R b/R/module-qc-server.R index 28a7d8d..6611e22 100644 --- a/R/module-qc-server.R +++ b/R/module-qc-server.R @@ -154,7 +154,7 @@ qcServer <- function(input, output, session, parent_session, loadpage_input, get choices <- c("Quality Control Plots" = "QCPlot", "Profile Plots" = "ProfilePlot") - if (isTRUE(loadpage_input()$calculate_anomaly_scores)) { + if (.anomaly_scores_enabled(loadpage_input())) { choices <- c(choices, "Quality Metrics Plots" = "QualityMetricsPlot") } @@ -209,7 +209,8 @@ qcServer <- function(input, output, session, parent_session, loadpage_input, get selected <- "TMP" # Conditionally add MSstats+ if anomaly score calculation is checked - if (isTRUE(loadpage_input()$calculate_anomaly_scores)) { + # (Spectronaut, regular DIANN, or big-file DIANN). + if (.anomaly_scores_enabled(loadpage_input())) { choices <- c(choices, "MSstats+" = "linear") tooltip_text <- paste0(tooltip_text, "MSstats+ uses a weighted linear model.") selected = "linear" diff --git a/R/utils.R b/R/utils.R index 8b41a8d..5fde734 100644 --- a/R/utils.R +++ b/R/utils.R @@ -276,6 +276,24 @@ getFileExtension <- function(filename) { tolower(file_ext(basename(filename))) } +#' TRUE if any loadpage anomaly-score checkbox is on +#' +#' Three checkboxes (Spectronaut, regular DIANN, big-file DIANN) all +#' feed the same downstream surface: the QC page's MSstats+ +#' summarization option and Quality Metrics Plot type. Centralizing the +#' OR-test here keeps those gates in sync as new inputs are added. +#' +#' @param loadpage_input the resolved loadpage input list (NOT the +#' reactive — call it before passing in). +#' @return logical(1) +#' @keywords internal +#' @noRd +.anomaly_scores_enabled <- function(loadpage_input) { + isTRUE(loadpage_input$calculate_anomaly_scores) || + isTRUE(loadpage_input$diann_calculate_anomaly_scores) || + isTRUE(loadpage_input$big_diann_calculate_anomaly_scores) +} + #' @importFrom arrow read_parquet getData <- function(input) { show_modal_spinner() @@ -651,10 +669,6 @@ getData <- function(input) { shinybusy::update_modal_spinner(text = "Processing large Spectronaut file...") - # Base arguments shared by every large-file Spectronaut run. - # Optional args (annotation override, anomaly-feature - # carry-through) are spliced in below so callers that don't - # supply them aren't forced to pass NULL / FALSE explicitly. big_spec_args <- list( input_file = local_big_file_path, output_file_name = "output_file.csv", @@ -773,42 +787,228 @@ getData <- function(input) { } } else if(input$filetype == 'diann') { - if (getFileExtension(input$dianndata$name) %in% c("parquet", "pq")) { - data = read_parquet(input$dianndata$datapath) - } else { - data = data.table::fread(input$dianndata$datapath) - } - - qvalue_cutoff = 0.01 - MBR = FALSE - if (isTRUE(input$q_val)) { - if (is.numeric(input$q_cutoff) && length(input$q_cutoff) == 1L && - !is.na(input$q_cutoff) && input$q_cutoff >= 0 && input$q_cutoff <= 1) { - qvalue_cutoff = input$q_cutoff + + if (isTRUE(input$big_file_diann)) { + volumes <- shinyFiles::getVolumes()() + path_info <- shinyFiles::parseFilePaths(volumes, input$big_diann_browse) + local_big_diann_path <- if (nrow(path_info) > 0) path_info$datapath else NULL + + for (cutoff_name in c("big_diann_global_qvalue_cutoff", + "big_diann_qvalue_cutoff", + "big_diann_pg_qvalue_cutoff")) { + val <- input[[cutoff_name]] + if (!is.numeric(val) || is.na(val) || val < 0 || val > 1) { + showNotification( + paste0("Error: ", cutoff_name, " must be between 0 and 1."), + type = "error") + shinybusy::remove_modal_spinner() + return(NULL) + } } - MBR = isTRUE(input$MBR) - } - quantificationColumn = if (isTRUE(input$diann_2plus)) "auto" else { - if (!is.null(input$intensity_column) && nzchar(input$intensity_column)) input$intensity_column else "auto" - } - labeled_aa <- if (!is.null(input$diann_labeled_aa) && nzchar(input$diann_labeled_aa)) { - trimws(strsplit(input$diann_labeled_aa, ",")[[1]]) + + if (!is.numeric(input$big_diann_max_feature_count) || + is.na(input$big_diann_max_feature_count) || + input$big_diann_max_feature_count <= 0) { + showNotification("Error: max_feature_count must be a positive number.", + type = "error") + shinybusy::remove_modal_spinner() + return(NULL) + } + + if (is.null(local_big_diann_path) || !file.exists(local_big_diann_path)) { + showNotification("Error: The selected DIANN file does not exist or is not readable.", + type = "error") + shinybusy::remove_modal_spinner() + return(NULL) + } + + if (isTRUE(input$big_diann_calculate_anomaly_scores) && + is.null(input$big_diann_run_order_file)) { + showNotification( + "Error: Run Order CSV is required when Calculate Anomaly Scores is enabled. Please upload a CSV with Run and Order columns.", + type = "error", + duration = NULL) + shinybusy::remove_modal_spinner() + return(NULL) + } + + shinybusy::update_modal_spinner(text = "Processing large DIANN file...") + + big_diann_args <- list( + input_file = local_big_diann_path, + output_file_name = "output_file.csv", + backend = if (!is.null(input$big_diann_backend) && nzchar(input$big_diann_backend)) input$big_diann_backend else "arrow", + MBR = isTRUE(input$big_diann_MBR), + quantificationColumn = if (!is.null(input$big_diann_quantification_column) && + nzchar(input$big_diann_quantification_column)) { + input$big_diann_quantification_column + } else { + "FragmentQuantCorrected" + }, + global_qvalue_cutoff = input$big_diann_global_qvalue_cutoff, + qvalue_cutoff = input$big_diann_qvalue_cutoff, + pg_qvalue_cutoff = input$big_diann_pg_qvalue_cutoff, + max_feature_count = input$big_diann_max_feature_count, + filter_unique_peptides = isTRUE(input$big_diann_filter_unique_peptides), + aggregate_psms = isTRUE(input$big_diann_aggregate_psms), + filter_few_obs = isTRUE(input$big_diann_filter_few_obs) + ) + + if (!is.null(input$big_diann_annotation)) { + big_diann_args$annotation <- data.table::fread( + input$big_diann_annotation$datapath) + } + + if (isTRUE(input$big_diann_calculate_anomaly_scores)) { + # Carry through the quality-metric columns the post-collect + # step needs. Names here are the standardized form + # (MSstatsImport strips dots during column cleaning, so the raw + # DIANN column `Predicted.RT` becomes `PredictedRT`). + # DeltaRT is engineered in-memory from RT and PredictedRT after + # dplyr::collect, then fed to + # MSstatsConvert::MSstatsAnomalyScores alongside Ms1ProfileCorr + # and Evidence. + big_diann_args$calculateAnomalyScores <- TRUE + big_diann_args$anomalyModelFeatures <- c( + "Ms1ProfileCorr", + "Evidence", + "RT", + "PredictedRT") + } + + converted_data <- do.call( + MSstatsBig::bigDIANNtoMSstatsFormat, big_diann_args) + + mydata <- tryCatch({ + dplyr::collect(converted_data) + }, error = function(e) { + showNotification( + paste("Memory Error: The dataset is too large to process in-memory.", e$message), + type = "error", + duration = NULL) + NULL + }) + + if (is.null(mydata)) { + shinybusy::remove_modal_spinner() + return(NULL) + } + + if (isTRUE(input$big_diann_calculate_anomaly_scores) && + !is.null(input$big_diann_run_order_file)) { + # Step 2 of the anomaly scoring pipeline. The converter carried + # RT and PredictedRT through (raw DIANN column `Predicted.RT` + # gets the dot stripped by MSstatsImport's column + # standardization); compute DeltaRT now that the data is in + # memory, then fit the isolation-forest model. + if (!all(c("RT", "PredictedRT") %in% colnames(mydata))) { + showNotification( + paste("Error: collected DIANN data is missing RT or PredictedRT columns", + "(found:", paste(colnames(mydata), collapse = ", "), ")."), + type = "error", duration = NULL) + shinybusy::remove_modal_spinner() + return(NULL) + } + mydata$DeltaRT <- mydata$RT - mydata$PredictedRT + run_order <- data.table::fread(input$big_diann_run_order_file$datapath) + mydata <- MSstatsConvert::MSstatsAnomalyScores( + input = mydata, + quality_metrics = c("Ms1ProfileCorr", "Evidence", "DeltaRT"), + temporal_direction = c("mean_decrease", + "mean_decrease", + "dispersion_increase"), + missing_run_count = 0.5, + n_feat = 100, + run_order = run_order, + n_trees = 100, + max_depth = "auto", + cores = 1) + } + } else { - NULL - } - mydata = DIANNtoMSstatsFormat(data, - annotation = getAnnot(input), - qvalue_cutoff = qvalue_cutoff, - MBR = MBR, - removeProtein_with1Feature = TRUE, - removeFewMeasurements = FALSE, - use_log_file = FALSE, - quantificationColumn = quantificationColumn, - labeledAminoAcids = labeled_aa - ) - print("Mydata from mstats") - print(mydata) + if (isTRUE(input$diann_calculate_anomaly_scores) && + is.null(input$diann_run_order_file)) { + showNotification( + "Error: Run Order CSV is required when Calculate Anomaly Scores is enabled. Please upload a CSV with Run and Order columns.", + type = "error", + duration = NULL) + remove_modal_spinner() + return(NULL) + } + + if (getFileExtension(input$dianndata$name) %in% c("parquet", "pq")) { + data = read_parquet(input$dianndata$datapath) + } else { + data = data.table::fread(input$dianndata$datapath) + } + + qvalue_cutoff = 0.01 + MBR = FALSE + if (isTRUE(input$q_val)) { + if (is.numeric(input$q_cutoff) && length(input$q_cutoff) == 1L && + !is.na(input$q_cutoff) && input$q_cutoff >= 0 && input$q_cutoff <= 1) { + qvalue_cutoff = input$q_cutoff + } + MBR = isTRUE(input$MBR) + } + quantificationColumn = if (isTRUE(input$diann_2plus)) "auto" else { + if (!is.null(input$intensity_column) && nzchar(input$intensity_column)) input$intensity_column else "auto" + } + labeled_aa <- if (!is.null(input$diann_labeled_aa) && nzchar(input$diann_labeled_aa)) { + trimws(strsplit(input$diann_labeled_aa, ",")[[1]]) + } else { + NULL + } + + diann_converter_args <- list( + input = data, + annotation = getAnnot(input), + qvalue_cutoff = qvalue_cutoff, + MBR = MBR, + removeProtein_with1Feature = TRUE, + removeFewMeasurements = FALSE, + use_log_file = FALSE, + quantificationColumn = quantificationColumn, + labeledAminoAcids = labeled_aa + ) + + if (isTRUE(input$diann_calculate_anomaly_scores) && + !is.null(input$diann_run_order_file)) { + # DIANN reports don't ship DeltaRT — engineer it from RT and + # Predicted.RT (raw DIANN column names, with dot) before the + # converter standardizes column names. DIANNtoMSstatsFormat + # then carries Ms1.Profile.Corr / Evidence / DeltaRT through + # cleaning and calls MSstatsAnomalyScores internally. + # Use base R `[[<-` (not data.table `:=`) so this works whether + # `data` is a tibble (from arrow::read_parquet) or a data.table + # (from data.table::fread). + if (all(c("RT", "Predicted.RT") %in% colnames(data))) { + data[["DeltaRT"]] <- data[["RT"]] - data[["Predicted.RT"]] + diann_converter_args$input <- data + } else { + showNotification( + "Error: DIANN report is missing RT or Predicted.RT columns, which are required to compute DeltaRT for anomaly scoring.", + type = "error", duration = NULL) + remove_modal_spinner() + return(NULL) + } + diann_converter_args$calculateAnomalyScores <- TRUE + diann_converter_args$anomalyModelFeatures <- c( + "Ms1ProfileCorr", "Evidence", "DeltaRT") + diann_converter_args$anomalyModelFeatureTemporal <- c( + "mean_decrease", "mean_decrease", "dispersion_increase") + diann_converter_args$runOrder <- data.table::fread( + input$diann_run_order_file$datapath) + diann_converter_args$n_trees <- 100 + diann_converter_args$max_depth <- "auto" + diann_converter_args$numberOfCores <- 1 + } + + mydata = do.call(DIANNtoMSstatsFormat, diann_converter_args) + print("Mydata from mstats") + print(mydata) + } } else if(input$filetype == 'meta') { cat(file=stderr(), "Reached in metamorpheus\n") @@ -1123,15 +1323,113 @@ library(MSstatsPTM)\n", sep = "") } } else if(input$filetype == 'diann') { - - codes = paste(codes, "data = data.table::fread(\"insert your MSstats scheme output from DIANN filepath\")\nannot_file = data.table::fread(\"insert your annotation filepath\")#Optional\n" - , sep = "") - - codes = paste(codes, "data = DIANNtoMSstatsFormat(data, + + if (isTRUE(input$big_file_diann)) { + codes = paste(codes, + "# Large-file (out-of-memory) DIANN path.\n", + "input_file = \"insert your raw DIANN report filepath\"\n", + sep = "") + + big_diann_extra <- "" + if (!is.null(input$big_diann_annotation)) { + codes = paste(codes, + "annot_file = data.table::fread(\"insert your annotation filepath (Run, BioReplicate, Condition)\")\n", + sep = "") + big_diann_extra <- paste0(big_diann_extra, + ",\n annotation = annot_file") + } + if (isTRUE(input$big_diann_calculate_anomaly_scores)) { + big_diann_extra <- paste0(big_diann_extra, + ",\n calculateAnomalyScores = TRUE", + ",\n anomalyModelFeatures = c(\"Ms1ProfileCorr\", \"Evidence\", \"RT\", \"PredictedRT\")") + } + + quantcol_arg <- if (!is.null(input$big_diann_quantification_column) && + nzchar(input$big_diann_quantification_column)) { + input$big_diann_quantification_column + } else { + "FragmentQuantCorrected" + } + backend_arg <- if (!is.null(input$big_diann_backend) && + nzchar(input$big_diann_backend)) { + input$big_diann_backend + } else { + "arrow" + } + + codes = paste(codes, + "converted = MSstatsBig::bigDIANNtoMSstatsFormat(input_file, + output_file_name = \"output_file.csv\", + backend = \"", backend_arg, "\", + MBR = ", isTRUE(input$big_diann_MBR), ", + quantificationColumn = \"", quantcol_arg, "\", + global_qvalue_cutoff = ", input$big_diann_global_qvalue_cutoff, ", + qvalue_cutoff = ", input$big_diann_qvalue_cutoff, ", + pg_qvalue_cutoff = ", input$big_diann_pg_qvalue_cutoff, ", + max_feature_count = ", input$big_diann_max_feature_count, ", + filter_unique_peptides = ", isTRUE(input$big_diann_filter_unique_peptides), ", + aggregate_psms = ", isTRUE(input$big_diann_aggregate_psms), ", + filter_few_obs = ", isTRUE(input$big_diann_filter_few_obs), + big_diann_extra, + ")\ndata = dplyr::collect(converted)\n", + sep = "") + + if (isTRUE(input$big_diann_calculate_anomaly_scores)) { + codes = paste(codes, + "# Step 2 of the anomaly scoring pipeline: engineer\n", + "# DeltaRT from the carried-through columns, then\n", + "# fit the isolation-forest model. The converter\n", + "# strips dots during column standardization, so\n", + "# Predicted.RT becomes PredictedRT post-collect.\n", + "data$DeltaRT = data$RT - data$PredictedRT\n", + "run_order = data.table::fread(\"insert your run order CSV filepath (Run, Order columns)\")\n", + "data = MSstatsConvert::MSstatsAnomalyScores(\n", + " input = data,\n", + " quality_metrics = c(\"Ms1ProfileCorr\", \"Evidence\", \"DeltaRT\"),\n", + " temporal_direction = c(\"mean_decrease\", \"mean_decrease\", \"dispersion_increase\"),\n", + " missing_run_count = 0.5,\n", + " n_feat = 100,\n", + " run_order = run_order,\n", + " n_trees = 100,\n", + " max_depth = \"auto\",\n", + " cores = 1)\n", + sep = "") + } + + } else { + + codes = paste(codes, "data = data.table::fread(\"insert your MSstats scheme output from DIANN filepath\")\nannot_file = data.table::fread(\"insert your annotation filepath\")#Optional\n" + , sep = "") + + if (isTRUE(input$diann_calculate_anomaly_scores)) { + codes = paste(codes, + "# DIANN does not ship a DeltaRT column — engineer\n", + "# it from RT and Predicted.RT before the converter\n", + "# runs, so it can be carried through cleaning into\n", + "# MSstatsConvert::MSstatsAnomalyScores.\n", + "data$DeltaRT = data$RT - data$Predicted.RT\n", + "run_order = data.table::fread(\"insert your run order CSV filepath (Run, Order columns)\")\n", + sep = "") + codes = paste(codes, "data = DIANNtoMSstatsFormat(data, + annotation = annot_file, #Optional + qvalue_cutoff = 0.01, ## same as default + removeProtein_with1Feature = TRUE, + use_log_file = FALSE, + calculateAnomalyScores = TRUE, + anomalyModelFeatures = c(\"Ms1ProfileCorr\", \"Evidence\", \"DeltaRT\"), + anomalyModelFeatureTemporal = c(\"mean_decrease\", \"mean_decrease\", \"dispersion_increase\"), + runOrder = run_order, + n_trees = 100, + max_depth = \"auto\", + numberOfCores = 1)\n", sep = "") + } else { + codes = paste(codes, "data = DIANNtoMSstatsFormat(data, annotation = annot_file, #Optional qvalue_cutoff = 0.01, ## same as default removeProtein_with1Feature = TRUE, use_log_file = FALSE)\n", sep = "") + } + } } else if(input$filetype == 'meta') { if (input$BIO == "PTM") { @@ -1564,7 +1862,7 @@ preprocessDataCode <- function(qc_input,loadpage_input) { summaryPlot = TRUE, address = FALSE,isPlotly=TRUE)\n", sep="") - if (isTRUE(loadpage_input$calculate_anomaly_scores)) { + if (.anomaly_scores_enabled(loadpage_input)) { codes = paste(codes, "\n# Plot per-feature quality metrics (e.g. AnomalyScores) carried through from the converter\n", sep = "") codes = paste(codes, "MSstats::MSstatsQualityMetricsPlot(data, metric = \"AnomalyScores\", diff --git a/tests/testthat/test-module-loadpage-ui.R b/tests/testthat/test-module-loadpage-ui.R index f1136a3..d606d82 100644 --- a/tests/testthat/test-module-loadpage-ui.R +++ b/tests/testthat/test-module-loadpage-ui.R @@ -324,12 +324,15 @@ test_that("create_processing_options creates TMT and label-free options", { test_that("create_quality_filtering_options creates filtering controls", { options <- create_quality_filtering_options(NS("test")) options_html <- as.character(options) - + expect_true(grepl("Filter with Q-value", options_html)) expect_true(grepl("Filter with M-score", options_html)) expect_true(grepl("Q-value cutoff", options_html)) expect_true(grepl("M-score cutoff", options_html)) expect_true(grepl("MBR Enabled", options_html)) + # Regular DIANN anomaly scoring controls (parallel to Spectronaut's). + expect_true(grepl("test-diann_calculate_anomaly_scores", options_html)) + expect_true(grepl("test-diann_run_order_file", options_html)) }) # Test order preservation in main selection controls @@ -385,6 +388,78 @@ test_that("create_spectronaut_uploads creates UI outputs", { expect_true(grepl("spectronaut_options_ui", uploads_html)) }) +test_that("create_diann_uploads exposes the diann renderUI slots", { + uploads <- create_diann_uploads(NS("test")) + uploads_html <- as.character(uploads) + + expect_true(grepl("diann_header_ui", uploads_html)) + expect_true(grepl("diann_file_selection_ui", uploads_html)) + expect_true(grepl("diann_options_ui", uploads_html)) +}) + +test_that("DIANN large-file helper functions create correct UI elements", { + # Header + header <- create_diann_header() + expect_true(grepl("Upload MSstats report from DIANN", as.character(header))) + + # Mode selector + mode_sel <- create_diann_mode_selector(NS("test")) + mode_html <- as.character(mode_sel) + expect_true(grepl("Large file mode", mode_html)) + expect_true(grepl("checkbox", mode_html)) + expect_true(grepl("test-big_file_diann", mode_html)) + + # Standard UI + std_ui <- create_diann_standard_ui(NS("test")) + std_html <- as.character(std_ui) + expect_true(grepl("file", std_html)) + expect_true(grepl("test-dianndata", std_html)) + + # Large file UI + large_ui <- create_diann_large_file_ui(NS("test")) + large_html <- as.character(large_ui) + expect_true(grepl("Browse for local file", large_html)) + expect_true(grepl("dianndata_big_path", large_html)) + expect_true(grepl("test-big_diann_browse", large_html)) + + # Filter options + filter_opts <- create_diann_large_filter_options(NS("test")) + opts_html <- as.character(filter_opts) + expect_true(grepl("MBR Enabled", opts_html)) + expect_true(grepl("Quantification column", opts_html)) + expect_true(grepl("Global Q-value cutoff", opts_html)) + expect_true(grepl("Protein group Q-value cutoff", opts_html)) + expect_true(grepl("FragmentQuantCorrected", opts_html)) + + # Bottom UI + bottom_ui <- create_diann_large_bottom_ui(NS("test")) + bottom_html <- as.character(bottom_ui) + expect_true(grepl("Max feature count", bottom_html)) + expect_true(grepl("Use unique peptides", bottom_html)) + expect_true(grepl("Aggregate PSMs", bottom_html)) + expect_true(grepl("Filter features with few observations", bottom_html)) + expect_true(grepl("Backend", bottom_html)) + expect_true(grepl("arrow", bottom_html)) + + # Annotation + anomaly UI + annot_ui <- create_diann_large_annotation_ui(NS("test")) + annot_html <- as.character(annot_ui) + expect_true(grepl("Annotation file", annot_html)) + expect_true(grepl("test-big_diann_annotation", annot_html)) + expect_true(grepl("Calculate Anomaly Scores", annot_html)) + expect_true(grepl("test-big_diann_calculate_anomaly_scores", annot_html)) + expect_true(grepl("test-big_diann_run_order_file", annot_html)) +}) + +test_that("DIANN regular-path condition strings hide controls in big-file mode", { + result <- loadpageUI("test") + html_output <- as.character(result) + + # Annotation upload is gated on !big_file_diann for DIANN + expect_true(grepl("loadpage-big_file_diann", html_output, fixed = TRUE), + info = "DIANN large-file gating condition is missing from rendered UI") +}) + test_that("Spectronaut helper functions create correct UI elements", { # Header header <- create_spectronaut_header() diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 3ed7d6c..a400644 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,6 +1,29 @@ library(testthat) library(mockery) +test_that(".anomaly_scores_enabled ORs all three loadpage checkboxes", { + # Spectronaut + expect_true(MSstatsShiny:::.anomaly_scores_enabled( + list(calculate_anomaly_scores = TRUE))) + # Regular DIANN + expect_true(MSstatsShiny:::.anomaly_scores_enabled( + list(diann_calculate_anomaly_scores = TRUE))) + # Big-file DIANN + expect_true(MSstatsShiny:::.anomaly_scores_enabled( + list(big_diann_calculate_anomaly_scores = TRUE))) + # None + expect_false(MSstatsShiny:::.anomaly_scores_enabled(list())) + expect_false(MSstatsShiny:::.anomaly_scores_enabled( + list(calculate_anomaly_scores = FALSE, + diann_calculate_anomaly_scores = FALSE, + big_diann_calculate_anomaly_scores = FALSE))) + # NULL-safety + expect_false(MSstatsShiny:::.anomaly_scores_enabled( + list(calculate_anomaly_scores = NULL, + diann_calculate_anomaly_scores = NULL))) +}) + + test_file_tsv <- tempfile(fileext = ".tsv") writeLines("a\tb\tcd", test_file_tsv) @@ -1508,13 +1531,9 @@ describe("getData for Spectronaut input with anomaly scores", { stub(getData, "showNotification", function(msg, ...) expect_match(msg, "Run Order CSV")) - # getData starts with show_modal_spinner() — the validation - # must call remove_modal_spinner() before returning NULL so - # the spinner doesn't get stuck. Track that it was called. spinner_removed <- FALSE stub(getData, "remove_modal_spinner", function(...) { spinner_removed <<- TRUE; NULL }) - # The converter should never run; if it does, fail the test. stub(getData, "data.table::fread", function(...) stop("fread reached despite missing run order")) stub(getData, "SpectronauttoMSstatsFormat", @@ -1615,11 +1634,6 @@ describe("getData for Big Spectronaut", { expect_null(res) }) - # Capturing converter (returns its args so we can inspect what - # got forwarded). Same idea as mock_spectro_converter above; the - # big-file caller uses do.call(), but mockery intercepts the - # MSstatsBig::bigSpectronauttoMSstatsFormat symbol resolution - # rather than the call form, so this still works. mock_big_spec_converter <- function(...) list(...) dummy_annot_df <- data.frame( Run = c("run1", "run2"), @@ -1640,9 +1654,6 @@ describe("getData for Big Spectronaut", { stub(getData, "data.table::fread", dummy_annot_df) stub(getData, "MSstatsBig::bigSpectronauttoMSstatsFormat", mock_big_spec_converter) - # Hijack dplyr::collect to read back what the (stubbed) - # converter received — getData passes its return value into - # collect, so the captured value IS the list of args. captured_args <- NULL stub(getData, "dplyr::collect", function(x) { captured_args <<- x @@ -1673,8 +1684,6 @@ describe("getData for Big Spectronaut", { captured_args <<- x mock_df }) - # Skip the post-collect scoring call for this test — it's - # exercised separately below. stub(getData, "data.table::fread", data.frame(Run = "run1", Order = 1L)) stub(getData, "MSstatsConvert::MSstatsAnomalyScores", @@ -1683,15 +1692,10 @@ describe("getData for Big Spectronaut", { getData(input_with_anomaly) expect_true(isTRUE(captured_args$calculateAnomalyScores)) - # Raw Spectronaut export names — the converter applies - # .standardizeColnames internally on the way out. expect_equal(captured_args$anomalyModelFeatures, c("FG.ShapeQualityScore (MS2)", "FG.ShapeQualityScore (MS1)", "EG.DeltaRT")) - # The big-file converter itself does NOT take a runOrder arg — - # that's consumed by the separate MSstatsAnomalyScores step - # post-collect (covered in the next test). expect_null(captured_args$runOrder) }) @@ -1726,9 +1730,6 @@ describe("getData for Big Spectronaut", { expect_false(is.null(captured_scoring_args)) expect_equal(captured_scoring_args$input, mock_df) - # Standardized column names — the in-memory data after collect - # has had .standardizeColnames applied during the converter - # step, so MSstatsAnomalyScores must look for these names. expect_equal(captured_scoring_args$quality_metrics, c("FGShapeQualityScore(MS2)", "FGShapeQualityScore(MS1)", @@ -1754,7 +1755,6 @@ describe("getData for Big Spectronaut", { stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) stub(getData, "showNotification", function(msg, ...) expect_match(msg, "Run Order CSV")) - # The converter should never run; if it does, fail the test. stub(getData, "shinybusy::update_modal_spinner", function(...) stop("converter step reached despite missing run order")) @@ -1867,6 +1867,329 @@ test_that("extract_mod_ids_from_preview handles consecutive modifications", { expect_true(all(c("[Mod1]", "[Mod2]", "[Mod3]") %in% result)) }) +describe("getData for Big DIANN", { + + mock_input_big_diann <- list( + filetype = "diann", + big_file_diann = TRUE, + big_diann_browse = list(files = list("file.parquet")), + big_diann_MBR = TRUE, + big_diann_quantification_column = "FragmentQuantCorrected", + big_diann_global_qvalue_cutoff = 0.01, + big_diann_qvalue_cutoff = 0.01, + big_diann_pg_qvalue_cutoff = 0.01, + big_diann_max_feature_count = 100, + big_diann_filter_unique_peptides = FALSE, + big_diann_aggregate_psms = FALSE, + big_diann_filter_few_obs = FALSE, + big_diann_backend = "arrow", + BIO = "Protein", + DDA_DIA = "DIA" + ) + + mock_arrow_obj <- list(dummy = "arrow") + mock_df <- data.frame(ProteinName = "P1", Intensity = 100) + + test_that("Valid input routes to bigDIANNtoMSstatsFormat and returns data", { + stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) + stub(getData, "shinyFiles::parseFilePaths", + function(...) data.frame(datapath = "test.parquet")) + stub(getData, "file.exists", TRUE) + stub(getData, "MSstatsBig::bigDIANNtoMSstatsFormat", mock_arrow_obj) + stub(getData, "dplyr::collect", mock_df) + stub(getData, "showNotification", function(...) NULL) + stub(getData, "shinybusy::update_modal_spinner", function(...) NULL) + stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) + + res <- getData(mock_input_big_diann) + expect_equal(res, mock_df) + }) + + test_that("Invalid qvalue_cutoff returns NULL", { + bad_input <- mock_input_big_diann + bad_input$big_diann_qvalue_cutoff <- 1.5 + + stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) + stub(getData, "shinyFiles::parseFilePaths", + function(...) data.frame(datapath = "test.parquet")) + stub(getData, "showNotification", + function(msg, ...) expect_match(msg, "big_diann_qvalue_cutoff")) + stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) + + res <- getData(bad_input) + expect_null(res) + }) + + test_that("Invalid max_feature_count returns NULL", { + bad_input <- mock_input_big_diann + bad_input$big_diann_max_feature_count <- 0 + + stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) + stub(getData, "shinyFiles::parseFilePaths", + function(...) data.frame(datapath = "test.parquet")) + stub(getData, "showNotification", + function(msg, ...) expect_match(msg, "max_feature_count")) + stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) + + res <- getData(bad_input) + expect_null(res) + }) + + test_that("File not found returns NULL", { + stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) + stub(getData, "shinyFiles::parseFilePaths", + function(...) data.frame(datapath = "nonexistent.parquet")) + stub(getData, "file.exists", FALSE) + stub(getData, "showNotification", + function(msg, ...) expect_match(msg, "does not exist")) + stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) + + res <- getData(mock_input_big_diann) + expect_null(res) + }) + + test_that("passes annotation to converter when big_diann_annotation is supplied", { + input_with_annot <- mock_input_big_diann + input_with_annot$big_diann_annotation <- list(datapath = "annot.csv") + + dummy_annot_df <- data.frame( + Run = c("run1", "run2"), + BioReplicate = c(7L, 8L), + Condition = c("ctrl", "treat"), + stringsAsFactors = FALSE) + + stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) + stub(getData, "shinyFiles::parseFilePaths", + function(...) data.frame(datapath = "test.parquet")) + stub(getData, "file.exists", TRUE) + stub(getData, "shinybusy::update_modal_spinner", function(...) NULL) + stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) + stub(getData, "showNotification", function(...) NULL) + stub(getData, "data.table::fread", dummy_annot_df) + stub(getData, "MSstatsBig::bigDIANNtoMSstatsFormat", + function(...) list(...)) + captured_args <- NULL + stub(getData, "dplyr::collect", function(x) { + captured_args <<- x + mock_df + }) + + getData(input_with_annot) + + expect_true(!is.null(captured_args$annotation)) + expect_equal(captured_args$annotation, dummy_annot_df) + }) + + test_that("passes calculateAnomalyScores + anomalyModelFeatures to converter when big_diann_calculate_anomaly_scores = TRUE", { + input_with_anomaly <- mock_input_big_diann + input_with_anomaly$big_diann_calculate_anomaly_scores <- TRUE + input_with_anomaly$big_diann_run_order_file <- list(datapath = "run_order.csv") + + stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) + stub(getData, "shinyFiles::parseFilePaths", + function(...) data.frame(datapath = "test.parquet")) + stub(getData, "file.exists", TRUE) + stub(getData, "shinybusy::update_modal_spinner", function(...) NULL) + stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) + stub(getData, "showNotification", function(...) NULL) + stub(getData, "MSstatsBig::bigDIANNtoMSstatsFormat", + function(...) list(...)) + captured_args <- NULL + captured_anomaly_input <- NULL + stub(getData, "dplyr::collect", function(x) { + captured_args <<- x + # MSstatsImport's column standardization (called inside + # bigDIANNtoMSstatsFormat) strips dots, so the carried-through + # Predicted.RT lands here as PredictedRT. + data.frame(ProteinName = "P1", Intensity = 100, + RT = 10.0, PredictedRT = 9.5, + Ms1ProfileCorr = 0.9, Evidence = 1.0, + stringsAsFactors = FALSE) + }) + stub(getData, "data.table::fread", + data.frame(Run = "run1", Order = 1L)) + stub(getData, "MSstatsConvert::MSstatsAnomalyScores", + function(input, ...) { + captured_anomaly_input <<- input + input + }) + + getData(input_with_anomaly) + + expect_true(isTRUE(captured_args$calculateAnomalyScores)) + expect_equal(captured_args$anomalyModelFeatures, + c("Ms1ProfileCorr", "Evidence", "RT", "PredictedRT")) + # DeltaRT is engineered in-memory after collect, using the + # standardized PredictedRT column name. + expect_true("DeltaRT" %in% colnames(captured_anomaly_input)) + expect_equal(captured_anomaly_input$DeltaRT, + captured_anomaly_input$RT - captured_anomaly_input$PredictedRT) + }) + + test_that("fails fast when big_diann_calculate_anomaly_scores is TRUE but run_order_file is missing", { + input_missing_runorder <- mock_input_big_diann + input_missing_runorder$big_diann_calculate_anomaly_scores <- TRUE + input_missing_runorder$big_diann_run_order_file <- NULL + + stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) + stub(getData, "shinyFiles::parseFilePaths", + function(...) data.frame(datapath = "test.parquet")) + stub(getData, "file.exists", TRUE) + stub(getData, "showNotification", + function(msg, ...) expect_match(msg, "Run Order CSV")) + spinner_removed <- FALSE + stub(getData, "shinybusy::remove_modal_spinner", + function(...) { spinner_removed <<- TRUE; NULL }) + stub(getData, "MSstatsBig::bigDIANNtoMSstatsFormat", + function(...) stop("converter reached despite missing run order")) + + res <- getData(input_missing_runorder) + expect_null(res) + expect_true(spinner_removed) + }) + + test_that("passes converter knobs through to bigDIANNtoMSstatsFormat", { + stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) + stub(getData, "shinyFiles::parseFilePaths", + function(...) data.frame(datapath = "test.parquet")) + stub(getData, "file.exists", TRUE) + stub(getData, "shinybusy::update_modal_spinner", function(...) NULL) + stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) + stub(getData, "showNotification", function(...) NULL) + stub(getData, "MSstatsBig::bigDIANNtoMSstatsFormat", + function(...) list(...)) + captured_args <- NULL + stub(getData, "dplyr::collect", function(x) { + captured_args <<- x + mock_df + }) + + getData(mock_input_big_diann) + + expect_equal(captured_args$backend, "arrow") + expect_true(isTRUE(captured_args$MBR)) + expect_equal(captured_args$quantificationColumn, "FragmentQuantCorrected") + expect_equal(captured_args$global_qvalue_cutoff, 0.01) + expect_equal(captured_args$qvalue_cutoff, 0.01) + expect_equal(captured_args$pg_qvalue_cutoff, 0.01) + expect_equal(captured_args$max_feature_count, 100) + }) +}) + +describe("getData for regular DIANN with anomaly scoring", { + + mock_diann_raw <- data.frame( + ProteinNames = "P1", + StrippedSequence = "PEPTIDE", + ModifiedSequence = "PEPTIDE", + PrecursorCharge = 2, + FragmentQuantCorrected = 100, + QValue = 0.001, + GlobalQValue = 0.001, + GlobalPGQValue = 0.001, + LibQValue = 0.001, + LibPGQValue = 0.001, + Run = "run1", + RT = 10.0, + Predicted.RT = 9.5, + Ms1ProfileCorr = 0.9, + Evidence = 1.0, + stringsAsFactors = FALSE, + check.names = FALSE + ) + + mock_input_diann_anomaly <- list( + filetype = "diann", + BIO = "Protein", + DDA_DIA = "LType", + big_file_diann = FALSE, + dianndata = list(name = "report.tsv", datapath = "report.tsv"), + q_val = TRUE, + q_cutoff = 0.01, + MBR = FALSE, + diann_2plus = FALSE, + intensity_column = "FragmentQuantCorrected", + diann_calculate_anomaly_scores = TRUE, + diann_run_order_file = list(datapath = "run_order.csv") + ) + + test_that("fails fast when diann_calculate_anomaly_scores is TRUE but run_order_file is missing", { + bad_input <- mock_input_diann_anomaly + bad_input$diann_run_order_file <- NULL + + stub(getData, "showNotification", + function(msg, ...) expect_match(msg, "Run Order CSV")) + stub(getData, "remove_modal_spinner", function(...) NULL) + stub(getData, "DIANNtoMSstatsFormat", + function(...) stop("converter reached despite missing run order")) + + res <- getData(bad_input) + expect_null(res) + }) + + test_that("engineers DeltaRT and passes anomaly args to DIANNtoMSstatsFormat", { + captured_args <- NULL + stub(getData, "data.table::fread", function(...) { + args <- list(...) + # First fread is for the DIANN data, second is for the run-order file. + if (grepl("run_order", args[[1]] %||% "", fixed = TRUE)) { + data.frame(Run = "run1", Order = 1L) + } else { + data.table::as.data.table(mock_diann_raw) + } + }) + stub(getData, "getFileExtension", "tsv") + stub(getData, "getAnnot", + data.frame(Run = "run1", BioReplicate = 1L, Condition = "ctrl", + stringsAsFactors = FALSE)) + stub(getData, "DIANNtoMSstatsFormat", function(...) { + captured_args <<- list(...) + data.frame(ProteinName = "P1", Intensity = 100) + }) + stub(getData, "showNotification", function(...) NULL) + stub(getData, "remove_modal_spinner", function(...) NULL) + stub(getData, "show_modal_spinner", function(...) NULL) + + res <- getData(mock_input_diann_anomaly) + + expect_true(!is.null(res)) + expect_true(isTRUE(captured_args$calculateAnomalyScores)) + expect_equal(captured_args$anomalyModelFeatures, + c("Ms1ProfileCorr", "Evidence", "DeltaRT")) + expect_equal(captured_args$anomalyModelFeatureTemporal, + c("mean_decrease", "mean_decrease", "dispersion_increase")) + expect_true("DeltaRT" %in% colnames(captured_args$input)) + expect_equal(captured_args$input$DeltaRT, + captured_args$input$RT - captured_args$input$Predicted.RT) + }) + + test_that("regular DIANN path without anomaly checkbox does NOT add anomaly args", { + plain_input <- mock_input_diann_anomaly + plain_input$diann_calculate_anomaly_scores <- FALSE + plain_input$diann_run_order_file <- NULL + + captured_args <- NULL + stub(getData, "data.table::fread", data.table::as.data.table(mock_diann_raw)) + stub(getData, "getFileExtension", "tsv") + stub(getData, "getAnnot", + data.frame(Run = "run1", BioReplicate = 1L, Condition = "ctrl", + stringsAsFactors = FALSE)) + stub(getData, "DIANNtoMSstatsFormat", function(...) { + captured_args <<- list(...) + data.frame(ProteinName = "P1", Intensity = 100) + }) + stub(getData, "showNotification", function(...) NULL) + stub(getData, "remove_modal_spinner", function(...) NULL) + + getData(plain_input) + + expect_null(captured_args$calculateAnomalyScores) + expect_null(captured_args$anomalyModelFeatures) + expect_null(captured_args$runOrder) + expect_false("DeltaRT" %in% colnames(captured_args$input)) + }) +}) + # ============================================================================ # DIANN FORMAT DETECTION TESTS # ============================================================================