From 11d4d101ac995c8cfd4489a1305034c3a6196862 Mon Sep 17 00:00:00 2001
From: Tony Wu <wu.anthon@northeastern.edu>
Date: Tue, 26 May 2026 08:48:32 -0400
Subject: [PATCH 1/3] Fix(Spectronaut): Enable annotation to be added to input

* Added annotation = NULL parameter to bigSpectronauttoMSstatsFormat
  (positional arg #2, mirroring bigDIANNtoMSstatsFormat from #16).
* When supplied, the converter merges the annotation onto the
  output via MSstatsAddAnnotationBig, overriding any Condition /
  BioReplicate columns that came from R.Condition / R.Replicate.
* Required for paired designs and other experimental layouts that
  Spectronaut's own annotation cannot express.
* Added override test under tests/testthat/test-converters.R.

See MSstats-ai/todos/active/TODO-MSBig-20260526_bigspectronaut_annotation_param.md

Co-Authored-By: Claude <noreply@anthropic.com>
---
 DESCRIPTION                          |  2 +-
 R/converters.R                       | 61 +++++++++++++++++++++-------
 man/bigSpectronauttoMSstatsFormat.Rd | 31 ++++++++++++--
 man/dot-prefixedPath.Rd              | 24 +++++++++++
 tests/testthat/test-converters.R     | 47 +++++++++++++++++++++
 5 files changed, 147 insertions(+), 18 deletions(-)
 create mode 100644 man/dot-prefixedPath.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 83fdc00..1371566 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -14,7 +14,6 @@ Description: MSstats package provide tools for preprocessing, summarization and
     processing larger than memory data sets.
 License: Artistic-2.0
 Encoding: UTF-8
-RoxygenNote: 7.3.3
 Imports: 
     arrow,
     DBI,
@@ -31,3 +30,4 @@ Suggests:
     rmarkdown
 VignetteBuilder: knitr
 biocViews: MassSpectrometry, Proteomics, Software
+Config/roxygen2/version: 8.0.0
diff --git a/R/converters.R b/R/converters.R
index 13b4383..d961a2e 100644
--- a/R/converters.R
+++ b/R/converters.R
@@ -112,6 +112,15 @@ bigFragPipetoMSstatsFormat <-  function(input_file, output_file_name,
 #' Convert out-of-memory Spectronaut files to MSstats format.
 #'
 #' @inheritParams MSstatsPreprocessBig
+#' @param annotation Optional data.frame with columns `Run`,
+#'   `BioReplicate`, `Condition` (plus any additional annotation
+#'   columns). If supplied, the converter merges it onto the output
+#'   on `Run` and overrides any `Condition` / `BioReplicate` values
+#'   that came from Spectronaut's `R.Condition` / `R.Replicate`
+#'   columns. Required when the experimental design cannot be
+#'   expressed in Spectronaut's own annotation — most notably paired
+#'   designs, where `BioReplicate` must encode the pairing
+#'   structure rather than per-sample IDs.
 #' @param intensity Name of the intensity column to be used in Spectronaut
 #' @param filter_by_excluded if TRUE, will filter by the `F.ExcludedFromQuantification` column.
 #' @param filter_by_identified if TRUE, will filter by the `EG.Identified` column.
@@ -122,16 +131,32 @@ bigFragPipetoMSstatsFormat <-  function(input_file, output_file_name,
 #'
 #' @examples
 #' converted_data <- bigSpectronauttoMSstatsFormat(
-#'   system.file("extdata", "spectronaut_input.csv", package = "MSstatsBig"),
-#'   "output_file.csv",
-#'   backend="arrow")
+#'   input_file = system.file("extdata", "spectronaut_input.csv",
+#'                            package = "MSstatsBig"),
+#'   output_file_name = "output_file.csv",
+#'   backend = "arrow")
 #' converted_data <- dplyr::collect(converted_data)
 #' head(converted_data)
 #'
+#' # Override Spectronaut's embedded Condition / BioReplicate with
+#' # a custom annotation (e.g. for a paired design):
+#' annot <- data.frame(Run = unique(converted_data[["Run"]]))
+#' annot$BioReplicate <- seq_len(nrow(annot))
+#' annot$Condition <- rep(c("ctrl", "treat"), length.out = nrow(annot))
+#' overridden <- bigSpectronauttoMSstatsFormat(
+#'   input_file = system.file("extdata", "spectronaut_input.csv",
+#'                            package = "MSstatsBig"),
+#'   annotation = annot,
+#'   output_file_name = "output_file.csv",
+#'   backend = "arrow")
+#' head(dplyr::collect(overridden))
+#'
 #' @return either arrow object or sparklyr table that can be optionally collected
 #' into memory by using dplyr::collect function.
 #'
-bigSpectronauttoMSstatsFormat <-  function(input_file, output_file_name,
+bigSpectronauttoMSstatsFormat <-  function(input_file,
+                                          annotation = NULL,
+                                          output_file_name,
                                           backend,
                                           intensity = "F.NormalizedPeakArea",
                                           filter_by_excluded = FALSE,
@@ -143,7 +168,7 @@ bigSpectronauttoMSstatsFormat <-  function(input_file, output_file_name,
                                           aggregate_psms =  FALSE,
                                           filter_few_obs =  FALSE,
                                           remove_annotation =  FALSE,
-                                          calculateAnomalyScores=FALSE, 
+                                          calculateAnomalyScores=FALSE,
                                           anomalyModelFeatures=c(),
                                           connection =  NULL) {
   reduced_file <- .prefixedPath("reduce_output_", output_file_name)
@@ -153,19 +178,27 @@ bigSpectronauttoMSstatsFormat <-  function(input_file, output_file_name,
                        calculateAnomalyScores, anomalyModelFeatures)
   msstats_data <- MSstatsPreprocessBig(
     input_file = reduced_file,
-    output_file_name = output_file_name, 
-    backend = backend, 
+    output_file_name = output_file_name,
+    backend = backend,
     max_feature_count = max_feature_count,
     filter_unique_peptides = filter_unique_peptides,
-    aggregate_psms = aggregate_psms, 
-    filter_few_obs = filter_few_obs, 
-    remove_annotation = remove_annotation, 
-    calculateAnomalyScores = calculateAnomalyScores, 
-    anomalyModelFeatures = anomalyModelFeatures, 
+    aggregate_psms = aggregate_psms,
+    filter_few_obs = filter_few_obs,
+    remove_annotation = remove_annotation,
+    calculateAnomalyScores = calculateAnomalyScores,
+    anomalyModelFeatures = anomalyModelFeatures,
     connection = connection)
-  
+
+  if (!is.null(annotation)) {
+    msstats_data <- MSstatsAddAnnotationBig(msstats_data, annotation)
+    if (backend == "arrow") {
+      unlink(output_file_name, recursive = TRUE, force = TRUE)
+      arrow::write_dataset(msstats_data, output_file_name, format = "csv")
+    }
+  }
+
   return(msstats_data)
-  
+
 }
 
 
diff --git a/man/bigSpectronauttoMSstatsFormat.Rd b/man/bigSpectronauttoMSstatsFormat.Rd
index 01706ef..e1a3f73 100644
--- a/man/bigSpectronauttoMSstatsFormat.Rd
+++ b/man/bigSpectronauttoMSstatsFormat.Rd
@@ -6,6 +6,7 @@
 \usage{
 bigSpectronauttoMSstatsFormat(
   input_file,
+  annotation = NULL,
   output_file_name,
   backend,
   intensity = "F.NormalizedPeakArea",
@@ -26,6 +27,16 @@ bigSpectronauttoMSstatsFormat(
 \arguments{
 \item{input_file}{name of the input text file in 10-column MSstats format.}
 
+\item{annotation}{Optional data.frame with columns `Run`,
+`BioReplicate`, `Condition` (plus any additional annotation
+columns). If supplied, the converter merges it onto the output
+on `Run` and overrides any `Condition` / `BioReplicate` values
+that came from Spectronaut's `R.Condition` / `R.Replicate`
+columns. Required when the experimental design cannot be
+expressed in Spectronaut's own annotation — most notably paired
+designs, where `BioReplicate` must encode the pairing
+structure rather than per-sample IDs.}
+
 \item{output_file_name}{name of an output file which will be saved after pre-processing}
 
 \item{backend}{"arrow" or "sparklyr". Option "sparklyr" requires a spark installation
@@ -73,10 +84,24 @@ Convert out-of-memory Spectronaut files to MSstats format.
 }
 \examples{
 converted_data <- bigSpectronauttoMSstatsFormat(
-  system.file("extdata", "spectronaut_input.csv", package = "MSstatsBig"),
-  "output_file.csv",
-  backend="arrow")
+  input_file = system.file("extdata", "spectronaut_input.csv",
+                           package = "MSstatsBig"),
+  output_file_name = "output_file.csv",
+  backend = "arrow")
 converted_data <- dplyr::collect(converted_data)
 head(converted_data)
 
+# Override Spectronaut's embedded Condition / BioReplicate with
+# a custom annotation (e.g. for a paired design):
+annot <- data.frame(Run = unique(converted_data[["Run"]]))
+annot$BioReplicate <- seq_len(nrow(annot))
+annot$Condition <- rep(c("ctrl", "treat"), length.out = nrow(annot))
+overridden <- bigSpectronauttoMSstatsFormat(
+  input_file = system.file("extdata", "spectronaut_input.csv",
+                           package = "MSstatsBig"),
+  annotation = annot,
+  output_file_name = "output_file.csv",
+  backend = "arrow")
+head(dplyr::collect(overridden))
+
 }
diff --git a/man/dot-prefixedPath.Rd b/man/dot-prefixedPath.Rd
new file mode 100644
index 0000000..be036ac
--- /dev/null
+++ b/man/dot-prefixedPath.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{.prefixedPath}
+\alias{.prefixedPath}
+\title{Build an intermediate output path by prefixing only the basename.}
+\usage{
+.prefixedPath(prefix, path)
+}
+\arguments{
+\item{prefix}{Character scalar prepended to the basename.}
+
+\item{path}{Output file path supplied by the caller.}
+}
+\value{
+Character scalar.
+}
+\description{
+Naive `paste0(prefix, output_file_name)` corrupts paths that contain a
+directory (`subdir/out.csv` → `topN_subdir/out.csv`,
+`/tmp/out.csv` → `topN_/tmp/out.csv`). Splitting via dirname/basename keeps
+the directory component intact so intermediate files land beside the final
+output.
+}
+\keyword{internal}
diff --git a/tests/testthat/test-converters.R b/tests/testthat/test-converters.R
index 78f6da3..51395fa 100644
--- a/tests/testthat/test-converters.R
+++ b/tests/testthat/test-converters.R
@@ -94,6 +94,53 @@ test_that("bigSpectronauttoMSstatsFormat works correctly", {
   unlink(paste0("reduce_output_", output_file), recursive = TRUE, force = TRUE)
 })
 
+test_that("bigSpectronauttoMSstatsFormat overrides Condition/BioReplicate from annotation", {
+  # Mock reduce to emit rows tagged with values we can distinguish
+  # from the supplied annotation — if the override works,
+  # downstream Condition/BioReplicate must come from `annotation`,
+  # not from these mocked values.
+  stub(bigSpectronauttoMSstatsFormat, "reduceBigSpectronaut", function(input_file, output_path, ...) {
+    msstats_data <- data.frame(
+      ProteinName = "P1", PeptideSequence = "PEPTIDE", PrecursorCharge = 2,
+      FragmentIon = "frag1", ProductCharge = 1,
+      IsotopeLabelType = "L",
+      Condition = "FROM_SPECTRONAUT", BioReplicate = 999,
+      Run = rep(c("run1", "run2"), each = 1),
+      Intensity = c(1000, 2000)
+    )
+    readr::write_csv(msstats_data, output_path)
+  })
+
+  input_file <- "dummy_spectro_input.csv"
+  output_file <- "spectro_output_annot.csv"
+
+  annotation <- data.frame(
+    Run = c("run1", "run2"),
+    BioReplicate = c(7L, 8L),
+    Condition = c("ctrl", "treat"),
+    stringsAsFactors = FALSE
+  )
+
+  processed <- bigSpectronauttoMSstatsFormat(
+    input_file = input_file,
+    annotation = annotation,
+    output_file_name = output_file,
+    backend = "arrow",
+    max_feature_count = 1
+  )
+  result <- dplyr::collect(processed)
+  result <- result[order(result$Run), ]
+
+  expect_equal(result$Condition, c("ctrl", "treat"))
+  expect_equal(result$BioReplicate, c(7L, 8L))
+  expect_false(any(result$Condition == "FROM_SPECTRONAUT"))
+  expect_false(any(result$BioReplicate == 999))
+
+  # Cleanup
+  unlink(output_file, recursive = TRUE, force = TRUE)
+  unlink(paste0("reduce_output_", output_file), recursive = TRUE, force = TRUE)
+})
+
 # test_that("bigDIANNtoMSstatsFormat works with real MSstatsConvert tinytest data", {
 #   input_file <- "/Users/rudhikshah/NorthEasternContractWork/MSstatsConvert/inst/tinytest/raw_data/DIANN/diann_input.tsv"
 #   annotation_file <- "/Users/rudhikshah/NorthEasternContractWork/MSstatsConvert/inst/tinytest/raw_data/DIANN/annotation.csv"

From e78a1596d08459a827e545feb8b799d1f1911393 Mon Sep 17 00:00:00 2001
From: Tony Wu <wu.anthon@northeastern.edu>
Date: Tue, 26 May 2026 09:00:01 -0400
Subject: [PATCH 2/3] Move annotation arg to end of
 bigSpectronauttoMSstatsFormat signature

* Slotted annotation = NULL just before connection = NULL instead
  of at position #2, so the pre-existing positional signature
  (input_file, output_file_name, backend, intensity, ...) keeps
  working for any external positional callers.
* This intentionally diverges from bigDIANNtoMSstatsFormat (#16),
  which puts annotation at position #2. Backward compatibility
  was prioritized for the Spectronaut converter because it had a
  longer pre-annotation life. DIANN can be re-flowed separately
  if consistency is needed later.
* Restored the simpler positional example call (no longer needs
  named-arg workaround that the position-#2 signature forced).

See MSstats-ai/todos/active/TODO-MSBig-20260526_bigspectronaut_annotation_param.md

Co-Authored-By: Claude <noreply@anthropic.com>
---
 R/converters.R                       | 19 +++++++--------
 man/bigSpectronauttoMSstatsFormat.Rd | 36 +++++++++++++---------------
 2 files changed, 25 insertions(+), 30 deletions(-)

diff --git a/R/converters.R b/R/converters.R
index d961a2e..8d543b8 100644
--- a/R/converters.R
+++ b/R/converters.R
@@ -131,9 +131,8 @@ bigFragPipetoMSstatsFormat <-  function(input_file, output_file_name,
 #'
 #' @examples
 #' converted_data <- bigSpectronauttoMSstatsFormat(
-#'   input_file = system.file("extdata", "spectronaut_input.csv",
-#'                            package = "MSstatsBig"),
-#'   output_file_name = "output_file.csv",
+#'   system.file("extdata", "spectronaut_input.csv", package = "MSstatsBig"),
+#'   "output_file.csv",
 #'   backend = "arrow")
 #' converted_data <- dplyr::collect(converted_data)
 #' head(converted_data)
@@ -144,19 +143,16 @@ bigFragPipetoMSstatsFormat <-  function(input_file, output_file_name,
 #' annot$BioReplicate <- seq_len(nrow(annot))
 #' annot$Condition <- rep(c("ctrl", "treat"), length.out = nrow(annot))
 #' overridden <- bigSpectronauttoMSstatsFormat(
-#'   input_file = system.file("extdata", "spectronaut_input.csv",
-#'                            package = "MSstatsBig"),
-#'   annotation = annot,
-#'   output_file_name = "output_file.csv",
-#'   backend = "arrow")
+#'   system.file("extdata", "spectronaut_input.csv", package = "MSstatsBig"),
+#'   "output_file.csv",
+#'   backend = "arrow",
+#'   annotation = annot)
 #' head(dplyr::collect(overridden))
 #'
 #' @return either arrow object or sparklyr table that can be optionally collected
 #' into memory by using dplyr::collect function.
 #'
-bigSpectronauttoMSstatsFormat <-  function(input_file,
-                                          annotation = NULL,
-                                          output_file_name,
+bigSpectronauttoMSstatsFormat <-  function(input_file, output_file_name,
                                           backend,
                                           intensity = "F.NormalizedPeakArea",
                                           filter_by_excluded = FALSE,
@@ -170,6 +166,7 @@ bigSpectronauttoMSstatsFormat <-  function(input_file,
                                           remove_annotation =  FALSE,
                                           calculateAnomalyScores=FALSE,
                                           anomalyModelFeatures=c(),
+                                          annotation = NULL,
                                           connection =  NULL) {
   reduced_file <- .prefixedPath("reduce_output_", output_file_name)
   reduceBigSpectronaut(input_file, reduced_file,
diff --git a/man/bigSpectronauttoMSstatsFormat.Rd b/man/bigSpectronauttoMSstatsFormat.Rd
index e1a3f73..99379e8 100644
--- a/man/bigSpectronauttoMSstatsFormat.Rd
+++ b/man/bigSpectronauttoMSstatsFormat.Rd
@@ -6,7 +6,6 @@
 \usage{
 bigSpectronauttoMSstatsFormat(
   input_file,
-  annotation = NULL,
   output_file_name,
   backend,
   intensity = "F.NormalizedPeakArea",
@@ -21,22 +20,13 @@ bigSpectronauttoMSstatsFormat(
   remove_annotation = FALSE,
   calculateAnomalyScores = FALSE,
   anomalyModelFeatures = c(),
+  annotation = NULL,
   connection = NULL
 )
 }
 \arguments{
 \item{input_file}{name of the input text file in 10-column MSstats format.}
 
-\item{annotation}{Optional data.frame with columns `Run`,
-`BioReplicate`, `Condition` (plus any additional annotation
-columns). If supplied, the converter merges it onto the output
-on `Run` and overrides any `Condition` / `BioReplicate` values
-that came from Spectronaut's `R.Condition` / `R.Replicate`
-columns. Required when the experimental design cannot be
-expressed in Spectronaut's own annotation — most notably paired
-designs, where `BioReplicate` must encode the pairing
-structure rather than per-sample IDs.}
-
 \item{output_file_name}{name of an output file which will be saved after pre-processing}
 
 \item{backend}{"arrow" or "sparklyr". Option "sparklyr" requires a spark installation
@@ -72,6 +62,16 @@ using dataProcess function. Only applicable to sparklyr backend.}
 
 \item{anomalyModelFeatures}{Character vector of column names to be carried through the pipeline}
 
+\item{annotation}{Optional data.frame with columns `Run`,
+`BioReplicate`, `Condition` (plus any additional annotation
+columns). If supplied, the converter merges it onto the output
+on `Run` and overrides any `Condition` / `BioReplicate` values
+that came from Spectronaut's `R.Condition` / `R.Replicate`
+columns. Required when the experimental design cannot be
+expressed in Spectronaut's own annotation — most notably paired
+designs, where `BioReplicate` must encode the pairing
+structure rather than per-sample IDs.}
+
 \item{connection}{Connection to a spark instance created with the
 `spark_connect` function from `sparklyr` package.}
 }
@@ -84,9 +84,8 @@ Convert out-of-memory Spectronaut files to MSstats format.
 }
 \examples{
 converted_data <- bigSpectronauttoMSstatsFormat(
-  input_file = system.file("extdata", "spectronaut_input.csv",
-                           package = "MSstatsBig"),
-  output_file_name = "output_file.csv",
+  system.file("extdata", "spectronaut_input.csv", package = "MSstatsBig"),
+  "output_file.csv",
   backend = "arrow")
 converted_data <- dplyr::collect(converted_data)
 head(converted_data)
@@ -97,11 +96,10 @@ annot <- data.frame(Run = unique(converted_data[["Run"]]))
 annot$BioReplicate <- seq_len(nrow(annot))
 annot$Condition <- rep(c("ctrl", "treat"), length.out = nrow(annot))
 overridden <- bigSpectronauttoMSstatsFormat(
-  input_file = system.file("extdata", "spectronaut_input.csv",
-                           package = "MSstatsBig"),
-  annotation = annot,
-  output_file_name = "output_file.csv",
-  backend = "arrow")
+  system.file("extdata", "spectronaut_input.csv", package = "MSstatsBig"),
+  "output_file.csv",
+  backend = "arrow",
+  annotation = annot)
 head(dplyr::collect(overridden))
 
 }

From b95ef9305da76fc4b48f83897a35532561c45949 Mon Sep 17 00:00:00 2001
From: Tony Wu <wu.anthon@northeastern.edu>
Date: Tue, 26 May 2026 09:12:41 -0400
Subject: [PATCH 3/3] update tests

---
 .Rbuildignore                    |  2 +
 .gitignore                       |  1 +
 tests/testthat/test-converters.R | 77 +-------------------------------
 3 files changed, 4 insertions(+), 76 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index 91114bf..ea84996 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1,2 +1,4 @@
 ^.*\.Rproj$
 ^\.Rproj\.user$
+^\.positai$
+^\.claude$
diff --git a/.gitignore b/.gitignore
index bf58646..c2a9478 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@
 .Ruserdata
 inst/doc
 *.Rproj
+.positai
diff --git a/tests/testthat/test-converters.R b/tests/testthat/test-converters.R
index 51395fa..1f9162d 100644
--- a/tests/testthat/test-converters.R
+++ b/tests/testthat/test-converters.R
@@ -95,10 +95,6 @@ test_that("bigSpectronauttoMSstatsFormat works correctly", {
 })
 
 test_that("bigSpectronauttoMSstatsFormat overrides Condition/BioReplicate from annotation", {
-  # Mock reduce to emit rows tagged with values we can distinguish
-  # from the supplied annotation — if the override works,
-  # downstream Condition/BioReplicate must come from `annotation`,
-  # not from these mocked values.
   stub(bigSpectronauttoMSstatsFormat, "reduceBigSpectronaut", function(input_file, output_path, ...) {
     msstats_data <- data.frame(
       ProteinName = "P1", PeptideSequence = "PEPTIDE", PrecursorCharge = 2,
@@ -139,75 +135,4 @@ test_that("bigSpectronauttoMSstatsFormat overrides Condition/BioReplicate from a
   # Cleanup
   unlink(output_file, recursive = TRUE, force = TRUE)
   unlink(paste0("reduce_output_", output_file), recursive = TRUE, force = TRUE)
-})
-
-# test_that("bigDIANNtoMSstatsFormat works with real MSstatsConvert tinytest data", {
-#   input_file <- "/Users/rudhikshah/NorthEasternContractWork/MSstatsConvert/inst/tinytest/raw_data/DIANN/diann_input.tsv"
-#   annotation_file <- "/Users/rudhikshah/NorthEasternContractWork/MSstatsConvert/inst/tinytest/raw_data/DIANN/annotation.csv"
-
-#   # Skip test if the local files are not found (e.g. on CI/CD)
-#   skip_if_not(file.exists(input_file), "Local DIANN input file not found")
-#   skip_if_not(file.exists(annotation_file), "Local annotation file not found")
-
-#   annot <- read.csv(annotation_file)
-#   output_file <- "real_diann_output.csv"
-
-#   processed <- bigDIANNtoMSstatsFormat(
-#     input_file = input_file,
-#     annotation = annot,
-#     output_file_name = output_file,
-#     backend = "arrow",
-#     MBR = FALSE,
-#     quantificationColumn = "FragmentQuantCorrected",
-#     max_feature_count = 100,
-#     filter_unique_peptides = FALSE,
-#     aggregate_psms = FALSE,
-#     filter_few_obs = FALSE
-#   )
-
-#   result <- dplyr::collect(processed)
-
-#   expect_true(!is.null(result))
-#   expect_true(nrow(result) > 0)
-
-#   # Cleanup — outputs may be directories when backend = "arrow"
-#   unlink(output_file, recursive = TRUE, force = TRUE)
-#   unlink(paste0("reduce_output_", output_file), recursive = TRUE, force = TRUE)
-#   unlink(paste0("topN_", output_file), recursive = TRUE, force = TRUE)
-# })
-
-# test_that("bigDIANNtoMSstatsFormat works with DIANN 2.0 parquet input", {
-#   input_file <- "/Users/rudhikshah/NorthEasternContractWork/MSstatsConvert/inst/tinytest/raw_data/DIANN/diann_2.0.parquet"
-#   annotation_file <- "/Users/rudhikshah/NorthEasternContractWork/MSstatsConvert/inst/tinytest/raw_data/DIANN/annotation_diann_2.0.csv"
-
-#   skip_if_not(file.exists(input_file), "Local DIANN 2.0 parquet file not found")
-#   skip_if_not(file.exists(annotation_file), "Local DIANN 2.0 annotation file not found")
-#   skip_if_not_installed("arrow")
-
-#   annot <- read.csv(annotation_file)
-#   output_file <- "diann_2_0_output.csv"
-
-#   processed <- bigDIANNtoMSstatsFormat(
-#     input_file = input_file,
-#     annotation = annot,
-#     output_file_name = output_file,
-#     backend = "arrow",
-#     MBR = FALSE,
-#     quantificationColumn = "auto",
-#     max_feature_count = 100,
-#     filter_unique_peptides = FALSE,
-#     aggregate_psms = FALSE,
-#     filter_few_obs = FALSE
-#   )
-
-#   result <- dplyr::collect(processed)
-
-#   expect_true(!is.null(result))
-#   expect_true(nrow(result) > 0)
-
-#   # Cleanup — outputs may be directories when backend = "arrow"
-#   unlink(output_file, recursive = TRUE, force = TRUE)
-#   unlink(paste0("reduce_output_", output_file), recursive = TRUE, force = TRUE)
-#   unlink(paste0("topN_", output_file), recursive = TRUE, force = TRUE)
-#   unlink(paste0("cleaned_", output_file), recursive = TRUE, force = TRUE)
-# })
\ No newline at end of file
+})
\ No newline at end of file