From 4dee0f22f8a00117d872da30588b1b9cbcd9b1fa Mon Sep 17 00:00:00 2001 From: anderstorstensson Date: Thu, 18 Jun 2026 08:59:24 +0200 Subject: [PATCH] feat: add import_png_folder_with_unclassified wrapper Combine import_png_folder_to_db() and fill_unclassified_db() into a single call. After importing the selected-taxa PNGs, backfill the remaining ROIs as "unclassified" for only the samples that were just imported, so each imported sample is fully represented without touching samples from earlier import sessions. Move the write_mock_adc test helper to helper-adc.R so it is shared across test files. Co-Authored-By: Claude Opus 4.8 --- NAMESPACE | 1 + NEWS.md | 1 + R/database.R | 62 ++++++++++++ _pkgdown.yml | 1 + man/import_png_folder_with_unclassified.Rd | 66 +++++++++++++ tests/testthat/helper-adc.R | 24 +++++ tests/testthat/test-database.R | 26 +---- tests/testthat/test-png-import.R | 108 +++++++++++++++++++++ 8 files changed, 264 insertions(+), 25 deletions(-) create mode 100644 man/import_png_folder_with_unclassified.Rd create mode 100644 tests/testthat/helper-adc.R diff --git a/NAMESPACE b/NAMESPACE index 564404f..fd14479 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ export(get_settings_path) export(import_all_mat_to_db) export(import_mat_to_db) export(import_png_folder_to_db) +export(import_png_folder_with_unclassified) export(init_python_env) export(is_valid_sample_name) export(list_annotated_samples_db) diff --git a/NEWS.md b/NEWS.md index 25226b0..ae0d52a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ ## New features - New exported function `fill_unclassified_db()` backfills a partially imported sample. After importing only selected taxa with `import_png_folder_to_db()`, it reads each sample's complete ROI list from its `.adc` file and inserts the remaining ROIs as `unclassified` (marked as not yet reviewed), leaving existing annotations untouched. +- New exported function `import_png_folder_with_unclassified()` wraps `import_png_folder_to_db()` and `fill_unclassified_db()` into a single call. It imports the selected-taxa PNGs and then backfills the remaining ROIs as `unclassified` for **only the samples that were just imported**, so each imported sample is fully represented without touching samples from earlier import sessions. ## Bug fixes diff --git a/R/database.R b/R/database.R index e767e2a..75a2f0e 100644 --- a/R/database.R +++ b/R/database.R @@ -1070,6 +1070,68 @@ fill_unclassified_db <- function(db_path, roi_folder, samples = NULL, counts } +#' Import a PNG class folder and backfill the rest as "unclassified" +#' +#' Convenience wrapper that runs \code{\link{import_png_folder_to_db}} followed +#' by \code{\link{fill_unclassified_db}} in a single call. After importing the +#' selected-taxa PNGs, it backfills the remaining ROIs of \emph{only the samples +#' that were just imported} as \code{"unclassified"}, so each imported sample is +#' fully represented in the database without touching samples from earlier +#' import sessions. +#' +#' @param png_folder Path to the top-level folder containing class subfolders +#' @param db_path Path to the SQLite database file +#' @param class2use Character vector of class names (preserves index order for +#' .mat export) +#' @param roi_folder Base ROI folder path, following the standard IFCB folder +#' structure (\code{roi_folder/YYYY/DYYYYMMDD/sample_name.adc}). Used by the +#' backfill step to read each sample's complete ROI list. +#' @param class_mapping Optional named character vector mapping scanned class +#' names to target class names. Passed to \code{\link{import_png_folder_to_db}}. +#' @param annotator Annotator name (defaults to \code{"imported"}). Applied to +#' both the import and the backfill. +#' @param fill Logical. When \code{TRUE} (the default), backfill the imported +#' samples with \code{"unclassified"} ROIs. Set to \code{FALSE} to import only. +#' @return Named list with two elements: \code{import} (the result of +#' \code{\link{import_png_folder_to_db}}) and \code{filled} (the result of +#' \code{\link{fill_unclassified_db}}, or zero counts when \code{fill} is +#' \code{FALSE} or no samples were imported). +#' @seealso \code{\link{import_png_folder_to_db}}, \code{\link{fill_unclassified_db}} +#' @export +#' @examples +#' \dontrun{ +#' db_path <- get_db_path("/data/manual") +#' class2use <- c("Diatom", "Dinoflagellate", "Ciliate") +#' result <- import_png_folder_with_unclassified( +#' "/data/png_export", db_path, class2use, +#' roi_folder = "/data/ifcb/raw" +#' ) +#' cat(result$import$success, "imported,", +#' result$filled$added, "backfilled\n") +#' } +import_png_folder_with_unclassified <- function(png_folder, db_path, class2use, + roi_folder, + class_mapping = NULL, + annotator = "imported", + fill = TRUE) { + import <- import_png_folder_to_db(png_folder, db_path, class2use, + class_mapping = class_mapping, + annotator = annotator) + + filled <- list(added = 0L, samples = 0L, skipped = 0L) + + if (isTRUE(fill)) { + samples <- unique(scan_png_class_folder(png_folder)$annotations$sample_name) + if (length(samples) > 0) { + filled <- fill_unclassified_db(db_path, roi_folder, + samples = samples, + annotator = annotator) + } + } + + list(import = import, filled = filled) +} + #' Bulk export all annotated samples from SQLite to class-organized PNGs #' #' Exports every annotated sample in the database to PNG images organized diff --git a/_pkgdown.yml b/_pkgdown.yml index 63c7816..b6baab6 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -80,6 +80,7 @@ reference: - export_all_db_to_zip - import_png_folder_to_db - fill_unclassified_db + - import_png_folder_with_unclassified - list_classes_db - save_class_taxonomy_db - load_class_taxonomy_db diff --git a/man/import_png_folder_with_unclassified.Rd b/man/import_png_folder_with_unclassified.Rd new file mode 100644 index 0000000..ce97897 --- /dev/null +++ b/man/import_png_folder_with_unclassified.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database.R +\name{import_png_folder_with_unclassified} +\alias{import_png_folder_with_unclassified} +\title{Import a PNG class folder and backfill the rest as "unclassified"} +\usage{ +import_png_folder_with_unclassified( + png_folder, + db_path, + class2use, + roi_folder, + class_mapping = NULL, + annotator = "imported", + fill = TRUE +) +} +\arguments{ +\item{png_folder}{Path to the top-level folder containing class subfolders} + +\item{db_path}{Path to the SQLite database file} + +\item{class2use}{Character vector of class names (preserves index order for +.mat export)} + +\item{roi_folder}{Base ROI folder path, following the standard IFCB folder +structure (\code{roi_folder/YYYY/DYYYYMMDD/sample_name.adc}). Used by the +backfill step to read each sample's complete ROI list.} + +\item{class_mapping}{Optional named character vector mapping scanned class +names to target class names. Passed to \code{\link{import_png_folder_to_db}}.} + +\item{annotator}{Annotator name (defaults to \code{"imported"}). Applied to +both the import and the backfill.} + +\item{fill}{Logical. When \code{TRUE} (the default), backfill the imported +samples with \code{"unclassified"} ROIs. Set to \code{FALSE} to import only.} +} +\value{ +Named list with two elements: \code{import} (the result of + \code{\link{import_png_folder_to_db}}) and \code{filled} (the result of + \code{\link{fill_unclassified_db}}, or zero counts when \code{fill} is + \code{FALSE} or no samples were imported). +} +\description{ +Convenience wrapper that runs \code{\link{import_png_folder_to_db}} followed +by \code{\link{fill_unclassified_db}} in a single call. After importing the +selected-taxa PNGs, it backfills the remaining ROIs of \emph{only the samples +that were just imported} as \code{"unclassified"}, so each imported sample is +fully represented in the database without touching samples from earlier +import sessions. +} +\examples{ +\dontrun{ +db_path <- get_db_path("/data/manual") +class2use <- c("Diatom", "Dinoflagellate", "Ciliate") +result <- import_png_folder_with_unclassified( + "/data/png_export", db_path, class2use, + roi_folder = "/data/ifcb/raw" +) +cat(result$import$success, "imported,", + result$filled$added, "backfilled\n") +} +} +\seealso{ +\code{\link{import_png_folder_to_db}}, \code{\link{fill_unclassified_db}} +} diff --git a/tests/testthat/helper-adc.R b/tests/testthat/helper-adc.R new file mode 100644 index 0000000..33d3237 --- /dev/null +++ b/tests/testthat/helper-adc.R @@ -0,0 +1,24 @@ +# Shared test helper: write a mock ADC file with n_roi rows in the standard +# IFCB folder structure under roi_folder. Columns 16/17 are width/height; ROIs +# listed in zero_dims get width/height 0 (no image). +write_mock_adc <- function(roi_folder, sample_name, n_roi, zero_dims = integer(0)) { + year <- substr(sample_name, 2, 5) + date_part <- substr(sample_name, 1, 9) + adc_dir <- file.path(roi_folder, year, date_part) + dir.create(adc_dir, recursive = TRUE, showWarnings = FALSE) + adc_path <- file.path(adc_dir, paste0(sample_name, ".adc")) + + width <- rep(100L, n_roi) + height <- rep(80L, n_roi) + width[zero_dims] <- 0L + height[zero_dims] <- 0L + + mock <- data.frame( + V1 = seq_len(n_roi), V2 = 0, V3 = 0, V4 = 0, V5 = 0, + V6 = 0, V7 = 0, V8 = 0, V9 = 0, V10 = 0, + V11 = 0, V12 = 0, V13 = 0, V14 = 0, V15 = 0, + V16 = width, V17 = height + ) + write.table(mock, adc_path, row.names = FALSE, col.names = FALSE, sep = ",") + adc_path +} diff --git a/tests/testthat/test-database.R b/tests/testthat/test-database.R index c2bd873..1a4e420 100644 --- a/tests/testthat/test-database.R +++ b/tests/testthat/test-database.R @@ -2042,31 +2042,7 @@ test_that("export_all_db_to_png filters by samples parameter", { }) # fill_unclassified_db tests - -# Helper: write a mock ADC file with n_roi rows in the standard IFCB folder -# structure under roi_folder. Columns 16/17 are width/height; ROIs listed in -# zero_dims get width/height 0 (no image). -write_mock_adc <- function(roi_folder, sample_name, n_roi, zero_dims = integer(0)) { - year <- substr(sample_name, 2, 5) - date_part <- substr(sample_name, 1, 9) - adc_dir <- file.path(roi_folder, year, date_part) - dir.create(adc_dir, recursive = TRUE, showWarnings = FALSE) - adc_path <- file.path(adc_dir, paste0(sample_name, ".adc")) - - width <- rep(100L, n_roi) - height <- rep(80L, n_roi) - width[zero_dims] <- 0L - height[zero_dims] <- 0L - - mock <- data.frame( - V1 = seq_len(n_roi), V2 = 0, V3 = 0, V4 = 0, V5 = 0, - V6 = 0, V7 = 0, V8 = 0, V9 = 0, V10 = 0, - V11 = 0, V12 = 0, V13 = 0, V14 = 0, V15 = 0, - V16 = width, V17 = height - ) - write.table(mock, adc_path, row.names = FALSE, col.names = FALSE, sep = ",") - adc_path -} +# (write_mock_adc helper lives in helper-adc.R) test_that("fill_unclassified_db backfills missing ROIs as unclassified", { db_dir <- tempfile("db_") diff --git a/tests/testthat/test-png-import.R b/tests/testthat/test-png-import.R index 9d351a0..b9e2514 100644 --- a/tests/testthat/test-png-import.R +++ b/tests/testthat/test-png-import.R @@ -298,3 +298,111 @@ test_that("scan_png_class_folder parses example_data/png correctly", { # Known classes from example data (without _NNN suffix) expect_true("Mesodinium_rubrum" %in% result$classes_found) }) + +# =========================================================================== +# import_png_folder_with_unclassified tests +# =========================================================================== + +test_that("import_png_folder_with_unclassified imports then backfills", { + png_folder <- create_test_png_folder() + db_dir <- tempfile("db_") + roi_dir <- tempfile("roi_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + on.exit({ + unlink(png_folder, recursive = TRUE) + unlink(c(db_dir, roi_dir), recursive = TRUE) + }) + + class2use <- c("unclassified", "Diatom", "Ciliate") + + # Sample 1 has ROIs 1,2,3 imported; the .adc says it really has 5 ROIs + write_mock_adc(roi_dir, "D20230101T120000_IFCB134", n_roi = 5) + # Sample 2 has ROI 1 imported; the .adc says it really has 3 ROIs + write_mock_adc(roi_dir, "D20230202T080000_IFCB134", n_roi = 3) + + result <- import_png_folder_with_unclassified( + png_folder, db_path, class2use, roi_folder = roi_dir, + annotator = "TestUser" + ) + + expect_equal(result$import$success, 2L) + expect_equal(result$import$failed, 0L) + # Sample 1: ROIs 4,5 missing; Sample 2: ROIs 2,3 missing + expect_equal(result$filled$added, 4L) + expect_equal(result$filled$samples, 2L) + expect_equal(result$filled$skipped, 0L) + + con <- dbConnect(SQLite(), db_path) + on.exit(dbDisconnect(con), add = TRUE) + + s1 <- dbGetQuery(con, + "SELECT roi_number, class_name, is_manual FROM annotations + WHERE sample_name = ? ORDER BY roi_number", + params = list("D20230101T120000_IFCB134")) + expect_equal(s1$roi_number, 1:5) + expect_equal(s1$class_name[4:5], c("unclassified", "unclassified")) + expect_equal(s1$is_manual[4:5], c(0L, 0L)) +}) + +test_that("import_png_folder_with_unclassified only backfills imported samples", { + png_folder <- create_test_png_folder() + db_dir <- tempfile("db_") + roi_dir <- tempfile("roi_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + on.exit({ + unlink(png_folder, recursive = TRUE) + unlink(c(db_dir, roi_dir), recursive = TRUE) + }) + + class2use <- c("unclassified", "Diatom", "Ciliate") + + # A pre-existing sample from an earlier session, present in the DB but NOT + # in the PNG folder being imported now. + other_sample <- "D20221231T000000_IFCB134" + save_annotations_db(db_path, other_sample, + data.frame(file_name = paste0(other_sample, "_00001.png"), + class_name = "Diatom", stringsAsFactors = FALSE), + class2use, "Earlier") + write_mock_adc(roi_dir, other_sample, n_roi = 9) + + # ADCs for the samples actually in the PNG folder + write_mock_adc(roi_dir, "D20230101T120000_IFCB134", n_roi = 5) + write_mock_adc(roi_dir, "D20230202T080000_IFCB134", n_roi = 3) + + import_png_folder_with_unclassified( + png_folder, db_path, class2use, roi_folder = roi_dir + ) + + # The earlier sample must be untouched (still just its 1 imported ROI) + con <- dbConnect(SQLite(), db_path) + on.exit(dbDisconnect(con), add = TRUE) + n_other <- dbGetQuery(con, + "SELECT COUNT(*) AS n FROM annotations WHERE sample_name = ?", + params = list(other_sample))$n + expect_equal(n_other, 1L) +}) + +test_that("import_png_folder_with_unclassified skips backfill when fill = FALSE", { + png_folder <- create_test_png_folder() + db_dir <- tempfile("db_") + roi_dir <- tempfile("roi_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + on.exit({ + unlink(png_folder, recursive = TRUE) + unlink(c(db_dir, roi_dir), recursive = TRUE) + }) + + class2use <- c("unclassified", "Diatom", "Ciliate") + write_mock_adc(roi_dir, "D20230101T120000_IFCB134", n_roi = 5) + + result <- import_png_folder_with_unclassified( + png_folder, db_path, class2use, roi_folder = roi_dir, fill = FALSE + ) + + expect_equal(result$import$success, 2L) + expect_equal(result$filled$added, 0L) + expect_equal(result$filled$samples, 0L) +})