diff --git a/NAMESPACE b/NAMESPACE index 564404f..fd14479 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ export(get_settings_path) export(import_all_mat_to_db) export(import_mat_to_db) export(import_png_folder_to_db) +export(import_png_folder_with_unclassified) export(init_python_env) export(is_valid_sample_name) export(list_annotated_samples_db) diff --git a/NEWS.md b/NEWS.md index 25226b0..ae0d52a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ ## New features - New exported function `fill_unclassified_db()` backfills a partially imported sample. After importing only selected taxa with `import_png_folder_to_db()`, it reads each sample's complete ROI list from its `.adc` file and inserts the remaining ROIs as `unclassified` (marked as not yet reviewed), leaving existing annotations untouched. +- New exported function `import_png_folder_with_unclassified()` wraps `import_png_folder_to_db()` and `fill_unclassified_db()` into a single call. It imports the selected-taxa PNGs and then backfills the remaining ROIs as `unclassified` for **only the samples that were just imported**, so each imported sample is fully represented without touching samples from earlier import sessions. ## Bug fixes diff --git a/R/database.R b/R/database.R index e767e2a..75a2f0e 100644 --- a/R/database.R +++ b/R/database.R @@ -1070,6 +1070,68 @@ fill_unclassified_db <- function(db_path, roi_folder, samples = NULL, counts } +#' Import a PNG class folder and backfill the rest as "unclassified" +#' +#' Convenience wrapper that runs \code{\link{import_png_folder_to_db}} followed +#' by \code{\link{fill_unclassified_db}} in a single call. After importing the +#' selected-taxa PNGs, it backfills the remaining ROIs of \emph{only the samples +#' that were just imported} as \code{"unclassified"}, so each imported sample is +#' fully represented in the database without touching samples from earlier +#' import sessions. +#' +#' @param png_folder Path to the top-level folder containing class subfolders +#' @param db_path Path to the SQLite database file +#' @param class2use Character vector of class names (preserves index order for +#' .mat export) +#' @param roi_folder Base ROI folder path, following the standard IFCB folder +#' structure (\code{roi_folder/YYYY/DYYYYMMDD/sample_name.adc}). Used by the +#' backfill step to read each sample's complete ROI list. +#' @param class_mapping Optional named character vector mapping scanned class +#' names to target class names. Passed to \code{\link{import_png_folder_to_db}}. +#' @param annotator Annotator name (defaults to \code{"imported"}). Applied to +#' both the import and the backfill. +#' @param fill Logical. When \code{TRUE} (the default), backfill the imported +#' samples with \code{"unclassified"} ROIs. Set to \code{FALSE} to import only. +#' @return Named list with two elements: \code{import} (the result of +#' \code{\link{import_png_folder_to_db}}) and \code{filled} (the result of +#' \code{\link{fill_unclassified_db}}, or zero counts when \code{fill} is +#' \code{FALSE} or no samples were imported). +#' @seealso \code{\link{import_png_folder_to_db}}, \code{\link{fill_unclassified_db}} +#' @export +#' @examples +#' \dontrun{ +#' db_path <- get_db_path("/data/manual") +#' class2use <- c("Diatom", "Dinoflagellate", "Ciliate") +#' result <- import_png_folder_with_unclassified( +#' "/data/png_export", db_path, class2use, +#' roi_folder = "/data/ifcb/raw" +#' ) +#' cat(result$import$success, "imported,", +#' result$filled$added, "backfilled\n") +#' } +import_png_folder_with_unclassified <- function(png_folder, db_path, class2use, + roi_folder, + class_mapping = NULL, + annotator = "imported", + fill = TRUE) { + import <- import_png_folder_to_db(png_folder, db_path, class2use, + class_mapping = class_mapping, + annotator = annotator) + + filled <- list(added = 0L, samples = 0L, skipped = 0L) + + if (isTRUE(fill)) { + samples <- unique(scan_png_class_folder(png_folder)$annotations$sample_name) + if (length(samples) > 0) { + filled <- fill_unclassified_db(db_path, roi_folder, + samples = samples, + annotator = annotator) + } + } + + list(import = import, filled = filled) +} + #' Bulk export all annotated samples from SQLite to class-organized PNGs #' #' Exports every annotated sample in the database to PNG images organized diff --git a/_pkgdown.yml b/_pkgdown.yml index 63c7816..b6baab6 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -80,6 +80,7 @@ reference: - export_all_db_to_zip - import_png_folder_to_db - fill_unclassified_db + - import_png_folder_with_unclassified - list_classes_db - save_class_taxonomy_db - load_class_taxonomy_db diff --git a/man/import_png_folder_with_unclassified.Rd b/man/import_png_folder_with_unclassified.Rd new file mode 100644 index 0000000..ce97897 --- /dev/null +++ b/man/import_png_folder_with_unclassified.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database.R +\name{import_png_folder_with_unclassified} +\alias{import_png_folder_with_unclassified} +\title{Import a PNG class folder and backfill the rest as "unclassified"} +\usage{ +import_png_folder_with_unclassified( + png_folder, + db_path, + class2use, + roi_folder, + class_mapping = NULL, + annotator = "imported", + fill = TRUE +) +} +\arguments{ +\item{png_folder}{Path to the top-level folder containing class subfolders} + +\item{db_path}{Path to the SQLite database file} + +\item{class2use}{Character vector of class names (preserves index order for +.mat export)} + +\item{roi_folder}{Base ROI folder path, following the standard IFCB folder +structure (\code{roi_folder/YYYY/DYYYYMMDD/sample_name.adc}). Used by the +backfill step to read each sample's complete ROI list.} + +\item{class_mapping}{Optional named character vector mapping scanned class +names to target class names. Passed to \code{\link{import_png_folder_to_db}}.} + +\item{annotator}{Annotator name (defaults to \code{"imported"}). Applied to +both the import and the backfill.} + +\item{fill}{Logical. When \code{TRUE} (the default), backfill the imported +samples with \code{"unclassified"} ROIs. Set to \code{FALSE} to import only.} +} +\value{ +Named list with two elements: \code{import} (the result of + \code{\link{import_png_folder_to_db}}) and \code{filled} (the result of + \code{\link{fill_unclassified_db}}, or zero counts when \code{fill} is + \code{FALSE} or no samples were imported). +} +\description{ +Convenience wrapper that runs \code{\link{import_png_folder_to_db}} followed +by \code{\link{fill_unclassified_db}} in a single call. After importing the +selected-taxa PNGs, it backfills the remaining ROIs of \emph{only the samples +that were just imported} as \code{"unclassified"}, so each imported sample is +fully represented in the database without touching samples from earlier +import sessions. +} +\examples{ +\dontrun{ +db_path <- get_db_path("/data/manual") +class2use <- c("Diatom", "Dinoflagellate", "Ciliate") +result <- import_png_folder_with_unclassified( + "/data/png_export", db_path, class2use, + roi_folder = "/data/ifcb/raw" +) +cat(result$import$success, "imported,", + result$filled$added, "backfilled\n") +} +} +\seealso{ +\code{\link{import_png_folder_to_db}}, \code{\link{fill_unclassified_db}} +} diff --git a/tests/testthat/helper-adc.R b/tests/testthat/helper-adc.R new file mode 100644 index 0000000..33d3237 --- /dev/null +++ b/tests/testthat/helper-adc.R @@ -0,0 +1,24 @@ +# Shared test helper: write a mock ADC file with n_roi rows in the standard +# IFCB folder structure under roi_folder. Columns 16/17 are width/height; ROIs +# listed in zero_dims get width/height 0 (no image). +write_mock_adc <- function(roi_folder, sample_name, n_roi, zero_dims = integer(0)) { + year <- substr(sample_name, 2, 5) + date_part <- substr(sample_name, 1, 9) + adc_dir <- file.path(roi_folder, year, date_part) + dir.create(adc_dir, recursive = TRUE, showWarnings = FALSE) + adc_path <- file.path(adc_dir, paste0(sample_name, ".adc")) + + width <- rep(100L, n_roi) + height <- rep(80L, n_roi) + width[zero_dims] <- 0L + height[zero_dims] <- 0L + + mock <- data.frame( + V1 = seq_len(n_roi), V2 = 0, V3 = 0, V4 = 0, V5 = 0, + V6 = 0, V7 = 0, V8 = 0, V9 = 0, V10 = 0, + V11 = 0, V12 = 0, V13 = 0, V14 = 0, V15 = 0, + V16 = width, V17 = height + ) + write.table(mock, adc_path, row.names = FALSE, col.names = FALSE, sep = ",") + adc_path +} diff --git a/tests/testthat/test-database.R b/tests/testthat/test-database.R index c2bd873..1a4e420 100644 --- a/tests/testthat/test-database.R +++ b/tests/testthat/test-database.R @@ -2042,31 +2042,7 @@ test_that("export_all_db_to_png filters by samples parameter", { }) # fill_unclassified_db tests - -# Helper: write a mock ADC file with n_roi rows in the standard IFCB folder -# structure under roi_folder. Columns 16/17 are width/height; ROIs listed in -# zero_dims get width/height 0 (no image). -write_mock_adc <- function(roi_folder, sample_name, n_roi, zero_dims = integer(0)) { - year <- substr(sample_name, 2, 5) - date_part <- substr(sample_name, 1, 9) - adc_dir <- file.path(roi_folder, year, date_part) - dir.create(adc_dir, recursive = TRUE, showWarnings = FALSE) - adc_path <- file.path(adc_dir, paste0(sample_name, ".adc")) - - width <- rep(100L, n_roi) - height <- rep(80L, n_roi) - width[zero_dims] <- 0L - height[zero_dims] <- 0L - - mock <- data.frame( - V1 = seq_len(n_roi), V2 = 0, V3 = 0, V4 = 0, V5 = 0, - V6 = 0, V7 = 0, V8 = 0, V9 = 0, V10 = 0, - V11 = 0, V12 = 0, V13 = 0, V14 = 0, V15 = 0, - V16 = width, V17 = height - ) - write.table(mock, adc_path, row.names = FALSE, col.names = FALSE, sep = ",") - adc_path -} +# (write_mock_adc helper lives in helper-adc.R) test_that("fill_unclassified_db backfills missing ROIs as unclassified", { db_dir <- tempfile("db_") diff --git a/tests/testthat/test-png-import.R b/tests/testthat/test-png-import.R index 9d351a0..b9e2514 100644 --- a/tests/testthat/test-png-import.R +++ b/tests/testthat/test-png-import.R @@ -298,3 +298,111 @@ test_that("scan_png_class_folder parses example_data/png correctly", { # Known classes from example data (without _NNN suffix) expect_true("Mesodinium_rubrum" %in% result$classes_found) }) + +# =========================================================================== +# import_png_folder_with_unclassified tests +# =========================================================================== + +test_that("import_png_folder_with_unclassified imports then backfills", { + png_folder <- create_test_png_folder() + db_dir <- tempfile("db_") + roi_dir <- tempfile("roi_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + on.exit({ + unlink(png_folder, recursive = TRUE) + unlink(c(db_dir, roi_dir), recursive = TRUE) + }) + + class2use <- c("unclassified", "Diatom", "Ciliate") + + # Sample 1 has ROIs 1,2,3 imported; the .adc says it really has 5 ROIs + write_mock_adc(roi_dir, "D20230101T120000_IFCB134", n_roi = 5) + # Sample 2 has ROI 1 imported; the .adc says it really has 3 ROIs + write_mock_adc(roi_dir, "D20230202T080000_IFCB134", n_roi = 3) + + result <- import_png_folder_with_unclassified( + png_folder, db_path, class2use, roi_folder = roi_dir, + annotator = "TestUser" + ) + + expect_equal(result$import$success, 2L) + expect_equal(result$import$failed, 0L) + # Sample 1: ROIs 4,5 missing; Sample 2: ROIs 2,3 missing + expect_equal(result$filled$added, 4L) + expect_equal(result$filled$samples, 2L) + expect_equal(result$filled$skipped, 0L) + + con <- dbConnect(SQLite(), db_path) + on.exit(dbDisconnect(con), add = TRUE) + + s1 <- dbGetQuery(con, + "SELECT roi_number, class_name, is_manual FROM annotations + WHERE sample_name = ? ORDER BY roi_number", + params = list("D20230101T120000_IFCB134")) + expect_equal(s1$roi_number, 1:5) + expect_equal(s1$class_name[4:5], c("unclassified", "unclassified")) + expect_equal(s1$is_manual[4:5], c(0L, 0L)) +}) + +test_that("import_png_folder_with_unclassified only backfills imported samples", { + png_folder <- create_test_png_folder() + db_dir <- tempfile("db_") + roi_dir <- tempfile("roi_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + on.exit({ + unlink(png_folder, recursive = TRUE) + unlink(c(db_dir, roi_dir), recursive = TRUE) + }) + + class2use <- c("unclassified", "Diatom", "Ciliate") + + # A pre-existing sample from an earlier session, present in the DB but NOT + # in the PNG folder being imported now. + other_sample <- "D20221231T000000_IFCB134" + save_annotations_db(db_path, other_sample, + data.frame(file_name = paste0(other_sample, "_00001.png"), + class_name = "Diatom", stringsAsFactors = FALSE), + class2use, "Earlier") + write_mock_adc(roi_dir, other_sample, n_roi = 9) + + # ADCs for the samples actually in the PNG folder + write_mock_adc(roi_dir, "D20230101T120000_IFCB134", n_roi = 5) + write_mock_adc(roi_dir, "D20230202T080000_IFCB134", n_roi = 3) + + import_png_folder_with_unclassified( + png_folder, db_path, class2use, roi_folder = roi_dir + ) + + # The earlier sample must be untouched (still just its 1 imported ROI) + con <- dbConnect(SQLite(), db_path) + on.exit(dbDisconnect(con), add = TRUE) + n_other <- dbGetQuery(con, + "SELECT COUNT(*) AS n FROM annotations WHERE sample_name = ?", + params = list(other_sample))$n + expect_equal(n_other, 1L) +}) + +test_that("import_png_folder_with_unclassified skips backfill when fill = FALSE", { + png_folder <- create_test_png_folder() + db_dir <- tempfile("db_") + roi_dir <- tempfile("roi_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + on.exit({ + unlink(png_folder, recursive = TRUE) + unlink(c(db_dir, roi_dir), recursive = TRUE) + }) + + class2use <- c("unclassified", "Diatom", "Ciliate") + write_mock_adc(roi_dir, "D20230101T120000_IFCB134", n_roi = 5) + + result <- import_png_folder_with_unclassified( + png_folder, db_path, class2use, roi_folder = roi_dir, fill = FALSE + ) + + expect_equal(result$import$success, 2L) + expect_equal(result$filled$added, 0L) + expect_equal(result$filled$samples, 0L) +})