From d3f7ff49e00156aed803f2d290e6ae255e6f41cf Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 26 May 2026 09:23:10 -0400 Subject: [PATCH 1/7] feat(loadpage): Wire annotation upload + anomaly carry-through for big-file Spectronaut MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added create_spectronaut_large_annotation_ui helper that renders an optional annotation file upload and a "Carry anomaly model features through pipeline" checkbox in the big-file Spectronaut options panel. The new input IDs (big_spec_annotation, carry_anomaly_features) deliberately do not reuse the regular path's annot / calculate_anomaly_scores IDs since the semantics differ — the big-file converter does feature carry-through only, with no temporal RF model fit. * Refactored the bigSpectronauttoMSstatsFormat call site in getData (R/utils.R) to a big_spec_args list + do.call so the optional annotation and anomaly args splice in cleanly. * Extended getDataCode with a big-file Spectronaut branch that emits a reproducibility script reflecting the actual UI state (annotation arg + calculateAnomalyScores / anomalyModelFeatures when carry-through is on; no runOrder etc., which the big-file converter does not accept). * Added three unit tests under "getData for Big Spectronaut" that use a capture-args mock to verify annotation is forwarded when uploaded, anomaly args are forwarded when the checkbox is on, and both are omitted otherwise. Depends on MSstatsBig Phase 1 (PR pending) for the new annotation parameter to be accepted by bigSpectronauttoMSstatsFormat. Local testing requires devtools::install of MSstatsBig from the Phase 1 branch first; DESCRIPTION minimum-version bump deferred until MSstatsBig releases. See MSstats-ai/todos/active/TODO-MSBig-20260526_bigspectronaut_annotation_param.md Co-Authored-By: Claude --- R/module-loadpage-server.R | 6 ++- R/module-loadpage-ui.R | 32 +++++++++++++ R/utils.R | 65 +++++++++++++++++++++++-- tests/testthat/test-utils.R | 94 ++++++++++++++++++++++++++++++++++++- 4 files changed, 191 insertions(+), 6 deletions(-) diff --git a/R/module-loadpage-server.R b/R/module-loadpage-server.R index 35f45f8..6611dd2 100644 --- a/R/module-loadpage-server.R +++ b/R/module-loadpage-server.R @@ -235,11 +235,13 @@ loadpageServer <- function(id, parent_session, is_web_server = FALSE, app_templa unique_peps_def <- if (is.null(input$filter_unique_peptides)) FALSE else input$filter_unique_peptides agg_psms_def <- if (is.null(input$aggregate_psms)) FALSE else input$aggregate_psms few_obs_def <- if (is.null(input$filter_few_obs)) FALSE else input$filter_few_obs - + carry_anomaly_def <- if (is.null(input$carry_anomaly_features)) FALSE else input$carry_anomaly_features + tagList( create_spectronaut_large_filter_options(session$ns, excluded_def, identified_def, qval_def), if (qval_def) create_spectronaut_qvalue_cutoff_ui(session$ns, cutoff_def), - create_spectronaut_large_bottom_ui(session$ns, max_feature_def, unique_peps_def, agg_psms_def, few_obs_def) + create_spectronaut_large_bottom_ui(session$ns, max_feature_def, unique_peps_def, agg_psms_def, few_obs_def), + create_spectronaut_large_annotation_ui(session$ns, carry_anomaly_def) ) } else { NULL diff --git a/R/module-loadpage-ui.R b/R/module-loadpage-ui.R index 2de45d0..c30869f 100644 --- a/R/module-loadpage-ui.R +++ b/R/module-loadpage-ui.R @@ -342,6 +342,38 @@ create_spectronaut_large_bottom_ui <- function(ns, max_feature_def = 20, unique_ ) } +#' Create Spectronaut large file annotation override + anomaly carry-through UI +#' +#' Renders an optional annotation upload that overrides Spectronaut's embedded +#' R.Condition / R.Replicate columns on Run, and a checkbox that asks the +#' converter to carry the anomaly-model feature columns +#' (FG.ShapeQualityScore (MS2)/(MS1), EGDeltaRT) through the pipeline. The +#' big-file converter does not fit the temporal anomaly RF (MSstatsBig +#' provides no MSstatsAnomalyScores equivalent), so this is feature +#' carry-through only — distinct from the regular-Spectronaut +#' `calculate_anomaly_scores` checkbox which drives full model fitting. +#' @noRd +create_spectronaut_large_annotation_ui <- function(ns, carry_anomaly_def = FALSE) { + tagList( + tags$hr(), + h5("Annotation file (optional)", + class = "icon-wrapper", + icon("question-circle", lib = "font-awesome"), + div("Upload a CSV/TSV with columns Run, BioReplicate, Condition (and any extras). When supplied, the converter merges it on Run and overrides any Condition / BioReplicate values from Spectronaut's R.Condition / R.Replicate. Required for paired designs and other layouts Spectronaut's own annotation cannot express.", + class = "icon-tooltip")), + fileInput(ns("big_spec_annotation"), label = NULL, + multiple = FALSE, accept = c(".csv", ".tsv", ".txt")), + checkboxInput(ns("carry_anomaly_features"), + label = tags$span( + "Carry anomaly model features through pipeline", + class = "icon-wrapper", + icon("question-circle", lib = "font-awesome"), + div("Preserves the FG.ShapeQualityScore (MS2)/(MS1) and EGDeltaRT columns on the converted output so downstream tools can use them. Note: unlike the regular Spectronaut path, the large-file converter does not fit the temporal anomaly model itself — it only carries the columns through.", + class = "icon-tooltip")), + value = carry_anomaly_def) + ) +} + #' Create PTM FragPipe uploads #' @noRd create_ptm_fragpipe_uploads <- function(ns) { diff --git a/R/utils.R b/R/utils.R index cf9ea76..120e1bf 100644 --- a/R/utils.R +++ b/R/utils.R @@ -641,9 +641,12 @@ getData <- function(input) { } shinybusy::update_modal_spinner(text = "Processing large Spectronaut file...") - - # Call the big file conversion function from MSstatsConvert - converted_data <- MSstatsBig::bigSpectronauttoMSstatsFormat( + + # Base arguments shared by every large-file Spectronaut run. + # Optional args (annotation override, anomaly-feature + # carry-through) are spliced in below so callers that don't + # supply them aren't forced to pass NULL / FALSE explicitly. + big_spec_args <- list( input_file = local_big_file_path, output_file_name = "output_file.csv", backend = "arrow", @@ -656,6 +659,22 @@ getData <- function(input) { aggregate_psms = input$aggregate_psms, filter_few_obs = input$filter_few_obs ) + + if (!is.null(input$big_spec_annotation)) { + big_spec_args$annotation <- data.table::fread( + input$big_spec_annotation$datapath) + } + + if (isTRUE(input$carry_anomaly_features)) { + big_spec_args$calculateAnomalyScores <- TRUE + big_spec_args$anomalyModelFeatures <- c( + "FG.ShapeQualityScore (MS2)", + "FG.ShapeQualityScore (MS1)", + "EGDeltaRT") + } + + converted_data <- do.call( + MSstatsBig::bigSpectronauttoMSstatsFormat, big_spec_args) # Attempt to load the data into memory. mydata <- tryCatch({ @@ -958,6 +977,44 @@ library(MSstatsPTM)\n", sep = "") } else if(input$filetype == 'spec') { + if (isTRUE(input$big_file_spec)) { + codes = paste(codes, + "# Large-file (out-of-memory) Spectronaut path.\n", + "input_file = \"insert your raw Spectronaut export filepath\"\n", + sep = "") + + big_spec_extra <- "" + if (!is.null(input$big_spec_annotation)) { + codes = paste(codes, + "annot_file = data.table::fread(\"insert your annotation filepath (Run, BioReplicate, Condition)\")\n", + sep = "") + big_spec_extra <- paste0(big_spec_extra, + ",\n annotation = annot_file") + } + if (isTRUE(input$carry_anomaly_features)) { + big_spec_extra <- paste0(big_spec_extra, + ",\n calculateAnomalyScores = TRUE", + ",\n anomalyModelFeatures = c(\"FG.ShapeQualityScore (MS2)\", \"FG.ShapeQualityScore (MS1)\", \"EGDeltaRT\")") + } + + codes = paste(codes, + "converted = MSstatsBig::bigSpectronauttoMSstatsFormat(input_file, + output_file_name = \"output_file.csv\", + backend = \"arrow\", + filter_by_excluded = ", input$filter_by_excluded, ", + filter_by_identified = ", input$filter_by_identified, ", + filter_by_qvalue = ", input$filter_by_qvalue, ", + qvalue_cutoff = ", input$qvalue_cutoff, ", + max_feature_count = ", input$max_feature_count, ", + filter_unique_peptides = ", input$filter_unique_peptides, ", + aggregate_psms = ", input$aggregate_psms, ", + filter_few_obs = ", input$filter_few_obs, + big_spec_extra, + ")\ndata = dplyr::collect(converted)\n", + sep = "") + + } else { + codes = paste(codes, "data = data.table::fread(\"insert your MSstats scheme output from Spectronaut filepath\")\nannot_file = data.table::fread(\"insert your annotation filepath\")#Optional\n" , sep = "") @@ -984,6 +1041,8 @@ library(MSstatsPTM)\n", sep = "") removeProtein_with1Feature = ", input$remove, ", use_log_file = FALSE)\n", sep = "") } + + } } else if(input$filetype == 'diann') { diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index cf0b13a..0444f28 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1577,10 +1577,102 @@ describe("getData for Big Spectronaut", { stub(getData, "showNotification", function(msg, ...) expect_match(msg, "Memory Error")) stub(getData, "shinybusy::update_modal_spinner", function(...) NULL) stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) - + res <- getData(mock_input_big) expect_null(res) }) + + # Capturing converter (returns its args so we can inspect what + # got forwarded). Same idea as mock_spectro_converter above; the + # big-file caller uses do.call(), but mockery intercepts the + # MSstatsBig::bigSpectronauttoMSstatsFormat symbol resolution + # rather than the call form, so this still works. + mock_big_spec_converter <- function(...) list(...) + dummy_annot_df <- data.frame( + Run = c("run1", "run2"), + BioReplicate = c(7L, 8L), + Condition = c("ctrl", "treat"), + stringsAsFactors = FALSE) + + test_that("passes annotation to converter when big_spec_annotation is supplied", { + input_with_annot <- mock_input_big + input_with_annot$big_spec_annotation <- list(datapath = "annot.csv") + + stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) + stub(getData, "shinyFiles::parseFilePaths", function(...) data.frame(datapath = "test.csv")) + stub(getData, "file.exists", TRUE) + stub(getData, "shinybusy::update_modal_spinner", function(...) NULL) + stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) + stub(getData, "showNotification", function(...) NULL) + stub(getData, "data.table::fread", dummy_annot_df) + stub(getData, "MSstatsBig::bigSpectronauttoMSstatsFormat", + mock_big_spec_converter) + # Hijack dplyr::collect to read back what the (stubbed) + # converter received — getData passes its return value into + # collect, so the captured value IS the list of args. + captured_args <- NULL + stub(getData, "dplyr::collect", function(x) { + captured_args <<- x + mock_df + }) + + getData(input_with_annot) + + expect_true(!is.null(captured_args$annotation)) + expect_equal(captured_args$annotation, dummy_annot_df) + }) + + test_that("passes calculateAnomalyScores + anomalyModelFeatures when carry_anomaly_features = TRUE", { + input_with_anomaly <- mock_input_big + input_with_anomaly$carry_anomaly_features <- TRUE + + stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) + stub(getData, "shinyFiles::parseFilePaths", function(...) data.frame(datapath = "test.csv")) + stub(getData, "file.exists", TRUE) + stub(getData, "shinybusy::update_modal_spinner", function(...) NULL) + stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) + stub(getData, "showNotification", function(...) NULL) + stub(getData, "MSstatsBig::bigSpectronauttoMSstatsFormat", + mock_big_spec_converter) + captured_args <- NULL + stub(getData, "dplyr::collect", function(x) { + captured_args <<- x + mock_df + }) + + getData(input_with_anomaly) + + expect_true(isTRUE(captured_args$calculateAnomalyScores)) + expect_equal(captured_args$anomalyModelFeatures, + c("FG.ShapeQualityScore (MS2)", + "FG.ShapeQualityScore (MS1)", + "EGDeltaRT")) + # No runOrder for the big-file path — the converter doesn't + # accept one (MSstatsBig has no MSstatsAnomalyScores call). + expect_null(captured_args$runOrder) + }) + + test_that("omits annotation + anomaly args when neither is supplied", { + stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) + stub(getData, "shinyFiles::parseFilePaths", function(...) data.frame(datapath = "test.csv")) + stub(getData, "file.exists", TRUE) + stub(getData, "shinybusy::update_modal_spinner", function(...) NULL) + stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) + stub(getData, "showNotification", function(...) NULL) + stub(getData, "MSstatsBig::bigSpectronauttoMSstatsFormat", + mock_big_spec_converter) + captured_args <- NULL + stub(getData, "dplyr::collect", function(x) { + captured_args <<- x + mock_df + }) + + getData(mock_input_big) + + expect_null(captured_args$annotation) + expect_null(captured_args$calculateAnomalyScores) + expect_null(captured_args$anomalyModelFeatures) + }) }) # ============================================================================ From 7656f9fe7a54261e11d643e2b9698c8d42027f5a Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 26 May 2026 09:42:08 -0400 Subject: [PATCH 2/7] feat(loadpage): Add post-collect MSstatsAnomalyScores call to big-file Spectronaut MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Earlier commit on this branch only carried the anomaly feature columns through the converter; it never produced the AnomalyScores column. The actual anomaly scoring pipeline is two-step in the large-file path, mirroring what the regular Spectronaut path does internally: * Step 1 — bigSpectronauttoMSstatsFormat preserves the model feature columns (FG.ShapeQualityScore (MS2)/(MS1), EGDeltaRT) on the converted output when calculateAnomalyScores = TRUE. * Step 2 — after dplyr::collect, MSstatsConvert::MSstatsAnomalyScores fits the isolation-forest model on the in-memory result and adds the AnomalyScores column. Changes: * UI: relabeled the checkbox to "Calculate Anomaly Scores" (matching the regular path), added a conditional run-order file upload (big_run_order_file) since MSstatsAnomalyScores needs the Run / Order CSV for temporal feature engineering. Internal input ID stays carry_anomaly_features since it still drives step 1's converter flag. * getData: after dplyr::collect, when carry_anomaly_features && big_run_order_file are set, read the run-order and call MSstatsConvert::MSstatsAnomalyScores with the same defaults the regular path uses (missing_run_count = 0.5, n_feat = 100, n_trees = 100, max_depth = "auto", cores = 1). * getDataCode: emits the post-collect MSstatsAnomalyScores call too so the reproducibility script reflects the full pipeline. * Tests: rewrote the converter-arg test to no longer assert "no runOrder" (that argument now lives in the post-collect call, not the converter call), added two new tests covering the scoring call and its no-runorder-no-scoring guard. See MSstats-ai/todos/active/TODO-MSBig-20260526_bigspectronaut_annotation_param.md Co-Authored-By: Claude --- R/module-loadpage-ui.R | 39 ++++++++++++----- R/utils.R | 47 +++++++++++++++++++- tests/testthat/test-utils.R | 86 +++++++++++++++++++++++++++++++++++-- 3 files changed, 157 insertions(+), 15 deletions(-) diff --git a/R/module-loadpage-ui.R b/R/module-loadpage-ui.R index c30869f..222b585 100644 --- a/R/module-loadpage-ui.R +++ b/R/module-loadpage-ui.R @@ -342,16 +342,23 @@ create_spectronaut_large_bottom_ui <- function(ns, max_feature_def = 20, unique_ ) } -#' Create Spectronaut large file annotation override + anomaly carry-through UI +#' Create Spectronaut large file annotation override + anomaly UI #' #' Renders an optional annotation upload that overrides Spectronaut's embedded -#' R.Condition / R.Replicate columns on Run, and a checkbox that asks the -#' converter to carry the anomaly-model feature columns -#' (FG.ShapeQualityScore (MS2)/(MS1), EGDeltaRT) through the pipeline. The -#' big-file converter does not fit the temporal anomaly RF (MSstatsBig -#' provides no MSstatsAnomalyScores equivalent), so this is feature -#' carry-through only — distinct from the regular-Spectronaut -#' `calculate_anomaly_scores` checkbox which drives full model fitting. +#' R.Condition / R.Replicate columns on Run, plus the "Calculate Anomaly +#' Scores" controls. End-to-end anomaly scoring is a two-step pipeline in +#' the large-file path: +#' (1) `bigSpectronauttoMSstatsFormat` runs with +#' `calculateAnomalyScores = TRUE` + the model feature column list, +#' which carries those feature columns through the out-of-memory +#' reduce/preprocess steps. +#' (2) After `dplyr::collect`, `MSstatsConvert::MSstatsAnomalyScores` +#' is called on the in-memory result to fit the isolation-forest +#' model and produce the `AnomalyScores` column. +#' The internal input ID `carry_anomaly_features` is named for step (1) +#' but gates both steps; the user-facing label reflects step (2)'s outcome. +#' A run-order CSV is required (Run + Order columns) — `MSstatsAnomalyScores` +#' uses it for temporal feature engineering. #' @noRd create_spectronaut_large_annotation_ui <- function(ns, carry_anomaly_def = FALSE) { tagList( @@ -365,12 +372,22 @@ create_spectronaut_large_annotation_ui <- function(ns, carry_anomaly_def = FALSE multiple = FALSE, accept = c(".csv", ".tsv", ".txt")), checkboxInput(ns("carry_anomaly_features"), label = tags$span( - "Carry anomaly model features through pipeline", + "Calculate Anomaly Scores", class = "icon-wrapper", icon("question-circle", lib = "font-awesome"), - div("Preserves the FG.ShapeQualityScore (MS2)/(MS1) and EGDeltaRT columns on the converted output so downstream tools can use them. Note: unlike the regular Spectronaut path, the large-file converter does not fit the temporal anomaly model itself — it only carries the columns through.", + div("Runs the same anomaly scoring pipeline as the regular Spectronaut path: the converter carries FG.ShapeQualityScore (MS2)/(MS1) and EGDeltaRT through the out-of-memory steps, then MSstatsConvert::MSstatsAnomalyScores fits the isolation-forest model on the collected data and adds an AnomalyScores column. Requires a run order CSV.", class = "icon-tooltip")), - value = carry_anomaly_def) + value = carry_anomaly_def), + conditionalPanel( + condition = sprintf("input['%s']", ns("carry_anomaly_features")), + fileInput(ns("big_run_order_file"), + label = h5("Upload Run Order File", + class = "icon-wrapper", + icon("question-circle", lib = "font-awesome"), + div("CSV with two columns: 'Run' (sequence name matching the converter output) and 'Order' (chronological run number, e.g. 1, 2, 3...).", + class = "icon-tooltip")), + multiple = FALSE, accept = c(".csv")) + ) ) } diff --git a/R/utils.R b/R/utils.R index 120e1bf..ea8da01 100644 --- a/R/utils.R +++ b/R/utils.R @@ -692,7 +692,33 @@ getData <- function(input) { shinybusy::remove_modal_spinner() return(NULL) } - + + # Step 2 of the anomaly scoring pipeline: the converter only + # carries the model feature columns through the out-of-memory + # steps; MSstatsAnomalyScores fits the isolation-forest model + # on the in-memory result and adds the AnomalyScores column. + # Defaults mirror SpectronauttoMSstatsFormat's regular path + # (missing_run_count = 0.5, n_feat = 100, n_trees = 100, + # max_depth = "auto", cores = 1). + if (isTRUE(input$carry_anomaly_features) && + !is.null(input$big_run_order_file)) { + run_order <- data.table::fread(input$big_run_order_file$datapath) + mydata <- MSstatsConvert::MSstatsAnomalyScores( + input = mydata, + quality_metrics = c("FG.ShapeQualityScore (MS2)", + "FG.ShapeQualityScore (MS1)", + "EGDeltaRT"), + temporal_direction = c("mean_decrease", + "mean_decrease", + "dispersion_increase"), + missing_run_count = 0.5, + n_feat = 100, + run_order = run_order, + n_trees = 100, + max_depth = "auto", + cores = 1) + } + } else { data = data.table::fread(input$specdata$datapath) # Base arguments for the Spectronaut converter @@ -1013,6 +1039,25 @@ library(MSstatsPTM)\n", sep = "") ")\ndata = dplyr::collect(converted)\n", sep = "") + if (isTRUE(input$carry_anomaly_features)) { + codes = paste(codes, + "# Step 2 of the anomaly scoring pipeline: fit the\n", + "# isolation-forest model on the collected data and\n", + "# add an AnomalyScores column.\n", + "run_order = data.table::fread(\"insert your run order CSV filepath (Run, Order columns)\")\n", + "data = MSstatsConvert::MSstatsAnomalyScores(\n", + " input = data,\n", + " quality_metrics = c(\"FG.ShapeQualityScore (MS2)\", \"FG.ShapeQualityScore (MS1)\", \"EGDeltaRT\"),\n", + " temporal_direction = c(\"mean_decrease\", \"mean_decrease\", \"dispersion_increase\"),\n", + " missing_run_count = 0.5,\n", + " n_feat = 100,\n", + " run_order = run_order,\n", + " n_trees = 100,\n", + " max_depth = \"auto\",\n", + " cores = 1)\n", + sep = "") + } + } else { codes = paste(codes, "data = data.table::fread(\"insert your MSstats scheme output from Spectronaut filepath\")\nannot_file = data.table::fread(\"insert your annotation filepath\")#Optional\n" diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 0444f28..318c0d3 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1622,9 +1622,10 @@ describe("getData for Big Spectronaut", { expect_equal(captured_args$annotation, dummy_annot_df) }) - test_that("passes calculateAnomalyScores + anomalyModelFeatures when carry_anomaly_features = TRUE", { + test_that("passes calculateAnomalyScores + anomalyModelFeatures to converter when carry_anomaly_features = TRUE", { input_with_anomaly <- mock_input_big input_with_anomaly$carry_anomaly_features <- TRUE + input_with_anomaly$big_run_order_file <- list(datapath = "run_order.csv") stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) stub(getData, "shinyFiles::parseFilePaths", function(...) data.frame(datapath = "test.csv")) @@ -1639,6 +1640,12 @@ describe("getData for Big Spectronaut", { captured_args <<- x mock_df }) + # Skip the post-collect scoring call for this test — it's + # exercised separately below. + stub(getData, "data.table::fread", + data.frame(Run = "run1", Order = 1L)) + stub(getData, "MSstatsConvert::MSstatsAnomalyScores", + function(...) mock_df) getData(input_with_anomaly) @@ -1647,11 +1654,84 @@ describe("getData for Big Spectronaut", { c("FG.ShapeQualityScore (MS2)", "FG.ShapeQualityScore (MS1)", "EGDeltaRT")) - # No runOrder for the big-file path — the converter doesn't - # accept one (MSstatsBig has no MSstatsAnomalyScores call). + # The big-file converter itself does NOT take a runOrder arg — + # that's consumed by the separate MSstatsAnomalyScores step + # post-collect (covered in the next test). expect_null(captured_args$runOrder) }) + test_that("calls MSstatsConvert::MSstatsAnomalyScores after collect when carry_anomaly_features && big_run_order_file are set", { + input_with_full_anomaly <- mock_input_big + input_with_full_anomaly$carry_anomaly_features <- TRUE + input_with_full_anomaly$big_run_order_file <- list(datapath = "run_order.csv") + + stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) + stub(getData, "shinyFiles::parseFilePaths", function(...) data.frame(datapath = "test.csv")) + stub(getData, "file.exists", TRUE) + stub(getData, "shinybusy::update_modal_spinner", function(...) NULL) + stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) + stub(getData, "showNotification", function(...) NULL) + stub(getData, "MSstatsBig::bigSpectronauttoMSstatsFormat", + mock_arrow_obj) + stub(getData, "dplyr::collect", mock_df) + + run_order_df <- data.frame(Run = c("run1", "run2"), + Order = c(1L, 2L), + stringsAsFactors = FALSE) + stub(getData, "data.table::fread", run_order_df) + + captured_scoring_args <- NULL + stub(getData, "MSstatsConvert::MSstatsAnomalyScores", + function(...) { + captured_scoring_args <<- list(...) + mock_df + }) + + getData(input_with_full_anomaly) + + expect_false(is.null(captured_scoring_args)) + expect_equal(captured_scoring_args$input, mock_df) + expect_equal(captured_scoring_args$quality_metrics, + c("FG.ShapeQualityScore (MS2)", + "FG.ShapeQualityScore (MS1)", + "EGDeltaRT")) + expect_equal(captured_scoring_args$temporal_direction, + c("mean_decrease", + "mean_decrease", + "dispersion_increase")) + expect_equal(captured_scoring_args$run_order, run_order_df) + expect_equal(captured_scoring_args$n_trees, 100) + expect_equal(captured_scoring_args$max_depth, "auto") + expect_equal(captured_scoring_args$cores, 1) + }) + + test_that("does NOT call MSstatsAnomalyScores when carry_anomaly_features is TRUE but big_run_order_file is missing", { + input_no_runorder <- mock_input_big + input_no_runorder$carry_anomaly_features <- TRUE + input_no_runorder$big_run_order_file <- NULL + + stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) + stub(getData, "shinyFiles::parseFilePaths", function(...) data.frame(datapath = "test.csv")) + stub(getData, "file.exists", TRUE) + stub(getData, "shinybusy::update_modal_spinner", function(...) NULL) + stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) + stub(getData, "showNotification", function(...) NULL) + stub(getData, "MSstatsBig::bigSpectronauttoMSstatsFormat", + mock_arrow_obj) + stub(getData, "dplyr::collect", mock_df) + + scoring_called <- FALSE + stub(getData, "MSstatsConvert::MSstatsAnomalyScores", + function(...) { + scoring_called <<- TRUE + mock_df + }) + + getData(input_no_runorder) + + expect_false(scoring_called) + }) + test_that("omits annotation + anomaly args when neither is supplied", { stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) stub(getData, "shinyFiles::parseFilePaths", function(...) data.frame(datapath = "test.csv")) From fe8d2cf264f0832f8de15c298820821356f094d1 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 26 May 2026 10:18:55 -0400 Subject: [PATCH 3/7] feat(loadpage): Make Spectronaut intensity column a universal input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the spec_intensity_col textInput only rendered inside the protein-turnover-specific UI block, so users on the standard or chemoproteomics templates had no way to override the converter's default intensity column. Spectronaut export columns vary across vendor versions (F.NormalizedPeakArea, F.PeakArea, FG.MS1Quantity, etc.), so this is a useful universal option. * Added a new spectronaut_intensity_ui renderUI that always renders for filetype == 'spec'. Default tracks the active template: FG.MS1Quantity for protein turnover (preserving prior behavior) and F.NormalizedPeakArea otherwise (matches both the in-memory converter and bigSpectronauttoMSstatsFormat defaults). * Removed the duplicate spec_intensity_col textInput from spectronaut_turnover_ui — peptide_seq_col + heavy_labels remain there since they are turnover-specific. * Threaded the value through to bigSpectronauttoMSstatsFormat in the big-file getData path (was already wired for the regular path; the input was just never rendered outside turnover mode). * getDataCode now emits an intensity = "..." arg in both the regular and big-file Spectronaut reproducibility scripts when the user overrode the default. Also aligned the anomaly column-name strings the user had fixed: the carry-through args passed to the converter use raw Spectronaut names ("EG.DeltaRT"), and the post-collect MSstatsAnomalyScores call uses MSstats-standardized names ("FGShapeQualityScore(MS2)" etc.) since .standardizeColnames has already been applied to the in-memory data by then. Updated getDataCode emissions and the two unit tests that asserted the old uniform strings. See MSstats-ai/todos/active/TODO-MSBig-20260526_bigspectronaut_annotation_param.md Co-Authored-By: Claude --- R/module-loadpage-server.R | 27 +++++++++++++++++++++--- R/module-loadpage-ui.R | 1 + R/utils.R | 42 ++++++++++++++++++++++++------------- tests/testthat/test-utils.R | 34 +++++++++++++++++++++++++++--- 4 files changed, 84 insertions(+), 20 deletions(-) diff --git a/R/module-loadpage-server.R b/R/module-loadpage-server.R index 6611dd2..045f82d 100644 --- a/R/module-loadpage-server.R +++ b/R/module-loadpage-server.R @@ -190,6 +190,30 @@ loadpageServer <- function(id, parent_session, is_web_server = FALSE, app_templa ui_elements }) + # Spectronaut intensity column input — universal across both the + # regular (in-memory) and large-file paths, regardless of analysis + # template. Default tracks the template: turnover analyses want the + # MS1-only quantity, normal analyses want the normalized peak area + # (which is also `bigSpectronauttoMSstatsFormat`'s default). + output$spectronaut_intensity_ui <- renderUI({ + req(input$filetype == 'spec', input$BIO != 'PTM') + + default_intensity <- if (!is.null(app_template) && + app_template() == TEMPLATES$protein_turnover) { + "FG.MS1Quantity" + } else { + "F.NormalizedPeakArea" + } + + textInput(session$ns("spec_intensity_col"), + label = h5("Intensity column", + class = "icon-wrapper", + icon("question-circle", lib = "font-awesome"), + div("Spectronaut export column to use as the intensity measure (e.g. F.NormalizedPeakArea, F.PeakArea, FG.MS1Quantity). Leave at the default unless you have a specific reason to override it.", + class = "icon-tooltip")), + value = default_intensity) + }) + output$spectronaut_turnover_ui <- renderUI({ req(input$filetype == 'spec', input$BIO != 'PTM') req(!is.null(app_template) && app_template() == TEMPLATES$protein_turnover) @@ -198,9 +222,6 @@ loadpageServer <- function(id, parent_session, is_web_server = FALSE, app_templa tagList( tags$hr(), h4("Protein Turnover Options"), - textInput(ns("spec_intensity_col"), - "Intensity column", - value = "FG.MS1Quantity"), textInput(ns("spec_peptide_seq_col"), "Peptide sequence column", value = "FG.LabeledSequence"), diff --git a/R/module-loadpage-ui.R b/R/module-loadpage-ui.R index 222b585..75a02ec 100644 --- a/R/module-loadpage-ui.R +++ b/R/module-loadpage-ui.R @@ -282,6 +282,7 @@ create_spectronaut_uploads <- function(ns) { uiOutput(ns("spectronaut_header_ui")), uiOutput(ns("spectronaut_file_selection_ui")), uiOutput(ns("spectronaut_options_ui")), + uiOutput(ns("spectronaut_intensity_ui")), uiOutput(ns("spectronaut_turnover_ui")) ) } diff --git a/R/utils.R b/R/utils.R index ea8da01..4902ac9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -660,6 +660,11 @@ getData <- function(input) { filter_few_obs = input$filter_few_obs ) + if (!is.null(input$spec_intensity_col) && + nchar(trimws(input$spec_intensity_col)) > 0) { + big_spec_args$intensity <- trimws(input$spec_intensity_col) + } + if (!is.null(input$big_spec_annotation)) { big_spec_args$annotation <- data.table::fread( input$big_spec_annotation$datapath) @@ -670,7 +675,7 @@ getData <- function(input) { big_spec_args$anomalyModelFeatures <- c( "FG.ShapeQualityScore (MS2)", "FG.ShapeQualityScore (MS1)", - "EGDeltaRT") + "EG.DeltaRT") } converted_data <- do.call( @@ -693,20 +698,13 @@ getData <- function(input) { return(NULL) } - # Step 2 of the anomaly scoring pipeline: the converter only - # carries the model feature columns through the out-of-memory - # steps; MSstatsAnomalyScores fits the isolation-forest model - # on the in-memory result and adds the AnomalyScores column. - # Defaults mirror SpectronauttoMSstatsFormat's regular path - # (missing_run_count = 0.5, n_feat = 100, n_trees = 100, - # max_depth = "auto", cores = 1). if (isTRUE(input$carry_anomaly_features) && !is.null(input$big_run_order_file)) { run_order <- data.table::fread(input$big_run_order_file$datapath) mydata <- MSstatsConvert::MSstatsAnomalyScores( input = mydata, - quality_metrics = c("FG.ShapeQualityScore (MS2)", - "FG.ShapeQualityScore (MS1)", + quality_metrics = c("FGShapeQualityScore(MS2)", + "FGShapeQualityScore(MS1)", "EGDeltaRT"), temporal_direction = c("mean_decrease", "mean_decrease", @@ -1010,6 +1008,12 @@ library(MSstatsPTM)\n", sep = "") sep = "") big_spec_extra <- "" + if (!is.null(input$spec_intensity_col) && + nchar(trimws(input$spec_intensity_col)) > 0) { + big_spec_extra <- paste0(big_spec_extra, + ",\n intensity = \"", + trimws(input$spec_intensity_col), "\"") + } if (!is.null(input$big_spec_annotation)) { codes = paste(codes, "annot_file = data.table::fread(\"insert your annotation filepath (Run, BioReplicate, Condition)\")\n", @@ -1020,7 +1024,7 @@ library(MSstatsPTM)\n", sep = "") if (isTRUE(input$carry_anomaly_features)) { big_spec_extra <- paste0(big_spec_extra, ",\n calculateAnomalyScores = TRUE", - ",\n anomalyModelFeatures = c(\"FG.ShapeQualityScore (MS2)\", \"FG.ShapeQualityScore (MS1)\", \"EGDeltaRT\")") + ",\n anomalyModelFeatures = c(\"FG.ShapeQualityScore (MS2)\", \"FG.ShapeQualityScore (MS1)\", \"EG.DeltaRT\")") } codes = paste(codes, @@ -1047,7 +1051,9 @@ library(MSstatsPTM)\n", sep = "") "run_order = data.table::fread(\"insert your run order CSV filepath (Run, Order columns)\")\n", "data = MSstatsConvert::MSstatsAnomalyScores(\n", " input = data,\n", - " quality_metrics = c(\"FG.ShapeQualityScore (MS2)\", \"FG.ShapeQualityScore (MS1)\", \"EGDeltaRT\"),\n", + " # Standardized column names (raw Spectronaut names\n", + " # had `.` and ` ` stripped during the converter step).\n", + " quality_metrics = c(\"FGShapeQualityScore(MS2)\", \"FGShapeQualityScore(MS1)\", \"EGDeltaRT\"),\n", " temporal_direction = c(\"mean_decrease\", \"mean_decrease\", \"dispersion_increase\"),\n", " missing_run_count = 0.5,\n", " n_feat = 100,\n", @@ -1063,11 +1069,19 @@ library(MSstatsPTM)\n", sep = "") codes = paste(codes, "data = data.table::fread(\"insert your MSstats scheme output from Spectronaut filepath\")\nannot_file = data.table::fread(\"insert your annotation filepath\")#Optional\n" , sep = "") + reg_spec_intensity_arg <- if (!is.null(input$spec_intensity_col) && + nchar(trimws(input$spec_intensity_col)) > 0) { + paste0(" intensity = \"", + trimws(input$spec_intensity_col), "\",\n") + } else { + "" + } + if (isTRUE(input$calculate_anomaly_scores)) { codes = paste(codes, "run_order = data.table::fread(\"insert your run order CSV filepath (Run, Order columns)\")\n", sep = "") codes = paste(codes, "data = SpectronauttoMSstatsFormat(data, annotation = annot_file, #Optional - filter_with_Qvalue = ", input$q_val, ", +", reg_spec_intensity_arg, " filter_with_Qvalue = ", input$q_val, ", qvalue_cutoff = ", input$q_cutoff, ", removeProtein_with1Feature = ", input$remove, ", use_log_file = FALSE, @@ -1081,7 +1095,7 @@ library(MSstatsPTM)\n", sep = "") } else { codes = paste(codes, "data = SpectronauttoMSstatsFormat(data, annotation = annot_file, #Optional - filter_with_Qvalue = ", input$q_val, ", +", reg_spec_intensity_arg, " filter_with_Qvalue = ", input$q_val, ", qvalue_cutoff = ", input$q_cutoff, ", removeProtein_with1Feature = ", input$remove, ", use_log_file = FALSE)\n", sep = "") diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 318c0d3..73181f8 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1650,10 +1650,12 @@ describe("getData for Big Spectronaut", { getData(input_with_anomaly) expect_true(isTRUE(captured_args$calculateAnomalyScores)) + # Raw Spectronaut export names — the converter applies + # .standardizeColnames internally on the way out. expect_equal(captured_args$anomalyModelFeatures, c("FG.ShapeQualityScore (MS2)", "FG.ShapeQualityScore (MS1)", - "EGDeltaRT")) + "EG.DeltaRT")) # The big-file converter itself does NOT take a runOrder arg — # that's consumed by the separate MSstatsAnomalyScores step # post-collect (covered in the next test). @@ -1691,9 +1693,12 @@ describe("getData for Big Spectronaut", { expect_false(is.null(captured_scoring_args)) expect_equal(captured_scoring_args$input, mock_df) + # Standardized column names — the in-memory data after collect + # has had .standardizeColnames applied during the converter + # step, so MSstatsAnomalyScores must look for these names. expect_equal(captured_scoring_args$quality_metrics, - c("FG.ShapeQualityScore (MS2)", - "FG.ShapeQualityScore (MS1)", + c("FGShapeQualityScore(MS2)", + "FGShapeQualityScore(MS1)", "EGDeltaRT")) expect_equal(captured_scoring_args$temporal_direction, c("mean_decrease", @@ -1732,6 +1737,29 @@ describe("getData for Big Spectronaut", { expect_false(scoring_called) }) + test_that("passes intensity to converter when spec_intensity_col is set", { + input_with_intensity <- mock_input_big + input_with_intensity$spec_intensity_col <- "FG.MS1Quantity" + + stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) + stub(getData, "shinyFiles::parseFilePaths", function(...) data.frame(datapath = "test.csv")) + stub(getData, "file.exists", TRUE) + stub(getData, "shinybusy::update_modal_spinner", function(...) NULL) + stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) + stub(getData, "showNotification", function(...) NULL) + stub(getData, "MSstatsBig::bigSpectronauttoMSstatsFormat", + mock_big_spec_converter) + captured_args <- NULL + stub(getData, "dplyr::collect", function(x) { + captured_args <<- x + mock_df + }) + + getData(input_with_intensity) + + expect_equal(captured_args$intensity, "FG.MS1Quantity") + }) + test_that("omits annotation + anomaly args when neither is supplied", { stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) stub(getData, "shinyFiles::parseFilePaths", function(...) data.frame(datapath = "test.csv")) From 5422a9eab585d8586e2e3d84b687766e0a1d7057 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 26 May 2026 10:28:03 -0400 Subject: [PATCH 4/7] fix(loadpage): Consolidate calculate_anomaly_scores namespace across paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The big-file Spectronaut anomaly checkbox used a dedicated input ID (carry_anomaly_features) on the theory that the two checkboxes might collide. They cannot — the regular path's create_label_free_options is hidden when big_file_spec is on, and the big-file helper only renders when it is — so they share the namespace cleanly. The dedicated ID broke the downstream QC page, which reads loadpage_input()$calculate_anomaly_scores to gate the MSstats+ summarization method (module-qc-server.R:212) and the Quality Metrics plot type (module-qc-server.R:157). Big-file users who enabled anomaly scoring saw neither. * Renamed input IDs: carry_anomaly_features -> calculate_anomaly_scores, big_run_order_file -> run_order_file in module-loadpage-ui.R, module-loadpage-server.R, R/utils.R (getData big-file branch + getDataCode big-file branch), and tests/testthat/test-utils.R. * Updated the helper's roxygen note to document the deliberate namespace sharing and the mutual-exclusion that prevents collision. See MSstats-ai/todos/active/TODO-MSBig-20260526_bigspectronaut_annotation_param.md Co-Authored-By: Claude --- R/module-loadpage-server.R | 4 ++-- R/module-loadpage-ui.R | 20 +++++++++++++------- R/utils.R | 12 ++++++------ tests/testthat/test-utils.R | 18 +++++++++--------- 4 files changed, 30 insertions(+), 24 deletions(-) diff --git a/R/module-loadpage-server.R b/R/module-loadpage-server.R index 045f82d..fd1d3c3 100644 --- a/R/module-loadpage-server.R +++ b/R/module-loadpage-server.R @@ -256,13 +256,13 @@ loadpageServer <- function(id, parent_session, is_web_server = FALSE, app_templa unique_peps_def <- if (is.null(input$filter_unique_peptides)) FALSE else input$filter_unique_peptides agg_psms_def <- if (is.null(input$aggregate_psms)) FALSE else input$aggregate_psms few_obs_def <- if (is.null(input$filter_few_obs)) FALSE else input$filter_few_obs - carry_anomaly_def <- if (is.null(input$carry_anomaly_features)) FALSE else input$carry_anomaly_features + calculate_anomaly_def <- if (is.null(input$calculate_anomaly_scores)) FALSE else input$calculate_anomaly_scores tagList( create_spectronaut_large_filter_options(session$ns, excluded_def, identified_def, qval_def), if (qval_def) create_spectronaut_qvalue_cutoff_ui(session$ns, cutoff_def), create_spectronaut_large_bottom_ui(session$ns, max_feature_def, unique_peps_def, agg_psms_def, few_obs_def), - create_spectronaut_large_annotation_ui(session$ns, carry_anomaly_def) + create_spectronaut_large_annotation_ui(session$ns, calculate_anomaly_def) ) } else { NULL diff --git a/R/module-loadpage-ui.R b/R/module-loadpage-ui.R index 75a02ec..44e5ea3 100644 --- a/R/module-loadpage-ui.R +++ b/R/module-loadpage-ui.R @@ -356,12 +356,18 @@ create_spectronaut_large_bottom_ui <- function(ns, max_feature_def = 20, unique_ #' (2) After `dplyr::collect`, `MSstatsConvert::MSstatsAnomalyScores` #' is called on the in-memory result to fit the isolation-forest #' model and produce the `AnomalyScores` column. -#' The internal input ID `carry_anomaly_features` is named for step (1) -#' but gates both steps; the user-facing label reflects step (2)'s outcome. +#' Input IDs `calculate_anomaly_scores` and `run_order_file` are deliberately +#' the same as the regular Spectronaut path's so downstream pages +#' (module-qc-server's MSstats+ summarization gate, getDataCode's +#' reproducibility script, etc.) read a single source of truth regardless +#' of which path the user took. The two UI checkboxes never coexist — +#' the regular path's `create_label_free_options` is hidden when +#' `big_file_spec` is on, and this helper only renders when it is — so +#' there is no Shiny namespace collision. #' A run-order CSV is required (Run + Order columns) — `MSstatsAnomalyScores` #' uses it for temporal feature engineering. #' @noRd -create_spectronaut_large_annotation_ui <- function(ns, carry_anomaly_def = FALSE) { +create_spectronaut_large_annotation_ui <- function(ns, calculate_anomaly_def = FALSE) { tagList( tags$hr(), h5("Annotation file (optional)", @@ -371,17 +377,17 @@ create_spectronaut_large_annotation_ui <- function(ns, carry_anomaly_def = FALSE class = "icon-tooltip")), fileInput(ns("big_spec_annotation"), label = NULL, multiple = FALSE, accept = c(".csv", ".tsv", ".txt")), - checkboxInput(ns("carry_anomaly_features"), + checkboxInput(ns("calculate_anomaly_scores"), label = tags$span( "Calculate Anomaly Scores", class = "icon-wrapper", icon("question-circle", lib = "font-awesome"), div("Runs the same anomaly scoring pipeline as the regular Spectronaut path: the converter carries FG.ShapeQualityScore (MS2)/(MS1) and EGDeltaRT through the out-of-memory steps, then MSstatsConvert::MSstatsAnomalyScores fits the isolation-forest model on the collected data and adds an AnomalyScores column. Requires a run order CSV.", class = "icon-tooltip")), - value = carry_anomaly_def), + value = calculate_anomaly_def), conditionalPanel( - condition = sprintf("input['%s']", ns("carry_anomaly_features")), - fileInput(ns("big_run_order_file"), + condition = sprintf("input['%s']", ns("calculate_anomaly_scores")), + fileInput(ns("run_order_file"), label = h5("Upload Run Order File", class = "icon-wrapper", icon("question-circle", lib = "font-awesome"), diff --git a/R/utils.R b/R/utils.R index 4902ac9..cb0556d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -670,7 +670,7 @@ getData <- function(input) { input$big_spec_annotation$datapath) } - if (isTRUE(input$carry_anomaly_features)) { + if (isTRUE(input$calculate_anomaly_scores)) { big_spec_args$calculateAnomalyScores <- TRUE big_spec_args$anomalyModelFeatures <- c( "FG.ShapeQualityScore (MS2)", @@ -698,9 +698,9 @@ getData <- function(input) { return(NULL) } - if (isTRUE(input$carry_anomaly_features) && - !is.null(input$big_run_order_file)) { - run_order <- data.table::fread(input$big_run_order_file$datapath) + if (isTRUE(input$calculate_anomaly_scores) && + !is.null(input$run_order_file)) { + run_order <- data.table::fread(input$run_order_file$datapath) mydata <- MSstatsConvert::MSstatsAnomalyScores( input = mydata, quality_metrics = c("FGShapeQualityScore(MS2)", @@ -1021,7 +1021,7 @@ library(MSstatsPTM)\n", sep = "") big_spec_extra <- paste0(big_spec_extra, ",\n annotation = annot_file") } - if (isTRUE(input$carry_anomaly_features)) { + if (isTRUE(input$calculate_anomaly_scores)) { big_spec_extra <- paste0(big_spec_extra, ",\n calculateAnomalyScores = TRUE", ",\n anomalyModelFeatures = c(\"FG.ShapeQualityScore (MS2)\", \"FG.ShapeQualityScore (MS1)\", \"EG.DeltaRT\")") @@ -1043,7 +1043,7 @@ library(MSstatsPTM)\n", sep = "") ")\ndata = dplyr::collect(converted)\n", sep = "") - if (isTRUE(input$carry_anomaly_features)) { + if (isTRUE(input$calculate_anomaly_scores)) { codes = paste(codes, "# Step 2 of the anomaly scoring pipeline: fit the\n", "# isolation-forest model on the collected data and\n", diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 73181f8..c77ded6 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1622,10 +1622,10 @@ describe("getData for Big Spectronaut", { expect_equal(captured_args$annotation, dummy_annot_df) }) - test_that("passes calculateAnomalyScores + anomalyModelFeatures to converter when carry_anomaly_features = TRUE", { + test_that("passes calculateAnomalyScores + anomalyModelFeatures to converter when calculate_anomaly_scores = TRUE", { input_with_anomaly <- mock_input_big - input_with_anomaly$carry_anomaly_features <- TRUE - input_with_anomaly$big_run_order_file <- list(datapath = "run_order.csv") + input_with_anomaly$calculate_anomaly_scores <- TRUE + input_with_anomaly$run_order_file <- list(datapath = "run_order.csv") stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) stub(getData, "shinyFiles::parseFilePaths", function(...) data.frame(datapath = "test.csv")) @@ -1662,10 +1662,10 @@ describe("getData for Big Spectronaut", { expect_null(captured_args$runOrder) }) - test_that("calls MSstatsConvert::MSstatsAnomalyScores after collect when carry_anomaly_features && big_run_order_file are set", { + test_that("calls MSstatsConvert::MSstatsAnomalyScores after collect when calculate_anomaly_scores && run_order_file are set", { input_with_full_anomaly <- mock_input_big - input_with_full_anomaly$carry_anomaly_features <- TRUE - input_with_full_anomaly$big_run_order_file <- list(datapath = "run_order.csv") + input_with_full_anomaly$calculate_anomaly_scores <- TRUE + input_with_full_anomaly$run_order_file <- list(datapath = "run_order.csv") stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) stub(getData, "shinyFiles::parseFilePaths", function(...) data.frame(datapath = "test.csv")) @@ -1710,10 +1710,10 @@ describe("getData for Big Spectronaut", { expect_equal(captured_scoring_args$cores, 1) }) - test_that("does NOT call MSstatsAnomalyScores when carry_anomaly_features is TRUE but big_run_order_file is missing", { + test_that("does NOT call MSstatsAnomalyScores when calculate_anomaly_scores is TRUE but run_order_file is missing", { input_no_runorder <- mock_input_big - input_no_runorder$carry_anomaly_features <- TRUE - input_no_runorder$big_run_order_file <- NULL + input_no_runorder$calculate_anomaly_scores <- TRUE + input_no_runorder$run_order_file <- NULL stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) stub(getData, "shinyFiles::parseFilePaths", function(...) data.frame(datapath = "test.csv")) From f1a6142bc6ca81068d6b20cd4ce5e812814064cb Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 26 May 2026 10:56:50 -0400 Subject: [PATCH 5/7] fix(loadpage): Fail-fast when run order is missing in big-file mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, ticking Calculate Anomaly Scores without uploading a run-order CSV silently skipped the post-collect MSstatsAnomalyScores step — the converter ran, dplyr::collect ran, and the user saw no AnomalyScores column with no error message. Validate upfront alongside the other big-file pre-flight checks (qvalue_cutoff range, max_feature_count positive, file existence), matching their notification + spinner-removal + early-return shape. * Added the validation block between the existing file-existence check and the update_modal_spinner call, so the converter never starts when the input is incomplete. * New unit test `fails fast when calculate_anomaly_scores is TRUE but run_order_file is missing` stubs update_modal_spinner to throw — if the converter step is ever reached despite missing run order, the test fails loudly. * Removed the now-redundant `does NOT call MSstatsAnomalyScores when run_order_file is missing` test — its assertion was trivially true after the fail-fast change (getData returns NULL before the scoring stub could ever be invoked). See MSstats-ai/todos/active/TODO-MSBig-20260526_bigspectronaut_annotation_param.md Co-Authored-By: Claude --- R/utils.R | 11 ++++++++++- tests/testthat/test-utils.R | 24 ++++++++---------------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/R/utils.R b/R/utils.R index cb0556d..a53aebd 100644 --- a/R/utils.R +++ b/R/utils.R @@ -639,7 +639,16 @@ getData <- function(input) { shinybusy::remove_modal_spinner() return(NULL) } - + + if (isTRUE(input$calculate_anomaly_scores) && is.null(input$run_order_file)) { + showNotification( + "Error: Run Order CSV is required when Calculate Anomaly Scores is enabled. Please upload a CSV with Run and Order columns.", + type = "error", + duration = NULL) + shinybusy::remove_modal_spinner() + return(NULL) + } + shinybusy::update_modal_spinner(text = "Processing large Spectronaut file...") # Base arguments shared by every large-file Spectronaut run. diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index c77ded6..fb102cf 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1710,7 +1710,7 @@ describe("getData for Big Spectronaut", { expect_equal(captured_scoring_args$cores, 1) }) - test_that("does NOT call MSstatsAnomalyScores when calculate_anomaly_scores is TRUE but run_order_file is missing", { + test_that("fails fast when calculate_anomaly_scores is TRUE but run_order_file is missing", { input_no_runorder <- mock_input_big input_no_runorder$calculate_anomaly_scores <- TRUE input_no_runorder$run_order_file <- NULL @@ -1718,23 +1718,15 @@ describe("getData for Big Spectronaut", { stub(getData, "shinyFiles::getVolumes", function() function() c(root = "/")) stub(getData, "shinyFiles::parseFilePaths", function(...) data.frame(datapath = "test.csv")) stub(getData, "file.exists", TRUE) - stub(getData, "shinybusy::update_modal_spinner", function(...) NULL) stub(getData, "shinybusy::remove_modal_spinner", function(...) NULL) - stub(getData, "showNotification", function(...) NULL) - stub(getData, "MSstatsBig::bigSpectronauttoMSstatsFormat", - mock_arrow_obj) - stub(getData, "dplyr::collect", mock_df) - - scoring_called <- FALSE - stub(getData, "MSstatsConvert::MSstatsAnomalyScores", - function(...) { - scoring_called <<- TRUE - mock_df - }) - - getData(input_no_runorder) + stub(getData, "showNotification", + function(msg, ...) expect_match(msg, "Run Order CSV")) + # The converter should never run; if it does, fail the test. + stub(getData, "shinybusy::update_modal_spinner", + function(...) stop("converter step reached despite missing run order")) - expect_false(scoring_called) + res <- getData(input_no_runorder) + expect_null(res) }) test_that("passes intensity to converter when spec_intensity_col is set", { From 732350dcabebd49c898e66340b7df9e1ef31cf83 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 26 May 2026 11:02:59 -0400 Subject: [PATCH 6/7] fix(loadpage): Fail-fast on missing run order in regular Spectronaut path too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The big-file path already validated this in commit f1a6142, but the regular Spectronaut path had the same silent-skip pattern: ticking Calculate Anomaly Scores without uploading a run-order CSV caused the converter to run without anomaly scoring, with no error shown to the user. Now that the calculate_anomaly_scores / run_order_file input IDs are shared across paths (2e), the validation should be symmetric too. * Added the same showNotification + early return guard at the top of the regular Spectronaut else branch, before the fread of the spec data so we truly fail fast. * New unit test stubs both data.table::fread and SpectronauttoMSstatsFormat to throw — if either is reached despite the missing run order, the test fails loudly. See MSstats-ai/todos/active/TODO-MSBig-20260526_bigspectronaut_annotation_param.md Co-Authored-By: Claude --- R/utils.R | 9 +++++++++ tests/testthat/test-utils.R | 28 +++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index a53aebd..6ac8fd4 100644 --- a/R/utils.R +++ b/R/utils.R @@ -727,6 +727,15 @@ getData <- function(input) { } } else { + + if (isTRUE(input$calculate_anomaly_scores) && is.null(input$run_order_file)) { + showNotification( + "Error: Run Order CSV is required when Calculate Anomaly Scores is enabled. Please upload a CSV with Run and Order columns.", + type = "error", + duration = NULL) + return(NULL) + } + data = data.table::fread(input$specdata$datapath) # Base arguments for the Spectronaut converter converter_args = list( diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index fb102cf..e94da03 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1485,12 +1485,38 @@ describe("getData for Spectronaut input with anomaly scores", { #EXECUTION result_args <- getData(mock_input_no_anomaly) - + #ASSERTION: Check that the anomaly arguments are NOT present expect_null(result_args$calculateAnomalyScores) expect_null(result_args$runOrder) expect_null(result_args$anomalyModelFeatures) }) + + test_that("fails fast when calculate_anomaly_scores is TRUE but run_order_file is missing (regular path)", { + mock_input_missing_runorder <- list( + BIO = "Protein", + DDA_DIA = "DIA", + filetype = "spec", + specdata = list(datapath = "dummy_spec.csv"), + annot = list(datapath = "dummy_annot.csv"), + q_val = TRUE, + q_cutoff = 0.01, + remove = TRUE, + calculate_anomaly_scores = TRUE, + run_order_file = NULL + ) + + stub(getData, "showNotification", + function(msg, ...) expect_match(msg, "Run Order CSV")) + # The converter should never run; if it does, fail the test. + stub(getData, "data.table::fread", + function(...) stop("fread reached despite missing run order")) + stub(getData, "SpectronauttoMSstatsFormat", + function(...) stop("converter reached despite missing run order")) + + res <- getData(mock_input_missing_runorder) + expect_null(res) + }) }) describe("getData for Big Spectronaut", { From e0aad5801b98efe22ef0ff6c27b44e11fc4622b0 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 26 May 2026 11:07:57 -0400 Subject: [PATCH 7/7] fix(loadpage): Remove modal spinner before regular-path validation NULL return getData calls show_modal_spinner() at the top, then dispatches by filetype. The big-file branch's run-order validation already called remove_modal_spinner() before returning NULL (commit f1a6142), but the regular Spectronaut branch's validation (commit 732350d) missed it, leaving the spinner stuck on screen when the user ticked Calculate Anomaly Scores without uploading a run order. * Added remove_modal_spinner() (unqualified, matching the other unqualified calls in this file at L457/486/504/887) before the return(NULL) in the regular-path validation block. * Extended the regular-path fail-fast test to stub remove_modal_spinner with a flag and assert it was called. See MSstats-ai/todos/active/TODO-MSBig-20260526_bigspectronaut_annotation_param.md Co-Authored-By: Claude --- R/utils.R | 1 + tests/testthat/test-utils.R | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/R/utils.R b/R/utils.R index 6ac8fd4..8b41a8d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -733,6 +733,7 @@ getData <- function(input) { "Error: Run Order CSV is required when Calculate Anomaly Scores is enabled. Please upload a CSV with Run and Order columns.", type = "error", duration = NULL) + remove_modal_spinner() return(NULL) } diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index e94da03..3ed7d6c 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1508,6 +1508,12 @@ describe("getData for Spectronaut input with anomaly scores", { stub(getData, "showNotification", function(msg, ...) expect_match(msg, "Run Order CSV")) + # getData starts with show_modal_spinner() — the validation + # must call remove_modal_spinner() before returning NULL so + # the spinner doesn't get stuck. Track that it was called. + spinner_removed <- FALSE + stub(getData, "remove_modal_spinner", + function(...) { spinner_removed <<- TRUE; NULL }) # The converter should never run; if it does, fail the test. stub(getData, "data.table::fread", function(...) stop("fread reached despite missing run order")) @@ -1516,6 +1522,7 @@ describe("getData for Spectronaut input with anomaly scores", { res <- getData(mock_input_missing_runorder) expect_null(res) + expect_true(spinner_removed) }) })