Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export(get_settings_path)
export(import_all_mat_to_db)
export(import_mat_to_db)
export(import_png_folder_to_db)
export(import_png_folder_with_unclassified)
export(init_python_env)
export(is_valid_sample_name)
export(list_annotated_samples_db)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## New features

- New exported function `fill_unclassified_db()` backfills a partially imported sample. After importing only selected taxa with `import_png_folder_to_db()`, it reads each sample's complete ROI list from its `.adc` file and inserts the remaining ROIs as `unclassified` (marked as not yet reviewed), leaving existing annotations untouched.
- New exported function `import_png_folder_with_unclassified()` wraps `import_png_folder_to_db()` and `fill_unclassified_db()` into a single call. It imports the selected-taxa PNGs and then backfills the remaining ROIs as `unclassified` for **only the samples that were just imported**, so each imported sample is fully represented without touching samples from earlier import sessions.

## Bug fixes

Expand Down
62 changes: 62 additions & 0 deletions R/database.R
Original file line number Diff line number Diff line change
Expand Up @@ -1070,6 +1070,68 @@ fill_unclassified_db <- function(db_path, roi_folder, samples = NULL,
counts
}

#' Import a PNG class folder and backfill the rest as "unclassified"
#'
#' Convenience wrapper that runs \code{\link{import_png_folder_to_db}} followed
#' by \code{\link{fill_unclassified_db}} in a single call. After importing the
#' selected-taxa PNGs, it backfills the remaining ROIs of \emph{only the samples
#' that were just imported} as \code{"unclassified"}, so each imported sample is
#' fully represented in the database without touching samples from earlier
#' import sessions.
#'
#' @param png_folder Path to the top-level folder containing class subfolders
#' @param db_path Path to the SQLite database file
#' @param class2use Character vector of class names (preserves index order for
#' .mat export)
#' @param roi_folder Base ROI folder path, following the standard IFCB folder
#' structure (\code{roi_folder/YYYY/DYYYYMMDD/sample_name.adc}). Used by the
#' backfill step to read each sample's complete ROI list.
#' @param class_mapping Optional named character vector mapping scanned class
#' names to target class names. Passed to \code{\link{import_png_folder_to_db}}.
#' @param annotator Annotator name (defaults to \code{"imported"}). Applied to
#' both the import and the backfill.
#' @param fill Logical. When \code{TRUE} (the default), backfill the imported
#' samples with \code{"unclassified"} ROIs. Set to \code{FALSE} to import only.
#' @return Named list with two elements: \code{import} (the result of
#' \code{\link{import_png_folder_to_db}}) and \code{filled} (the result of
#' \code{\link{fill_unclassified_db}}, or zero counts when \code{fill} is
#' \code{FALSE} or no samples were imported).
#' @seealso \code{\link{import_png_folder_to_db}}, \code{\link{fill_unclassified_db}}
#' @export
#' @examples
#' \dontrun{
#' db_path <- get_db_path("/data/manual")
#' class2use <- c("Diatom", "Dinoflagellate", "Ciliate")
#' result <- import_png_folder_with_unclassified(
#' "/data/png_export", db_path, class2use,
#' roi_folder = "/data/ifcb/raw"
#' )
#' cat(result$import$success, "imported,",
#' result$filled$added, "backfilled\n")
#' }
import_png_folder_with_unclassified <- function(png_folder, db_path, class2use,
roi_folder,
class_mapping = NULL,
annotator = "imported",
fill = TRUE) {
import <- import_png_folder_to_db(png_folder, db_path, class2use,
class_mapping = class_mapping,
annotator = annotator)

filled <- list(added = 0L, samples = 0L, skipped = 0L)

if (isTRUE(fill)) {
samples <- unique(scan_png_class_folder(png_folder)$annotations$sample_name)
if (length(samples) > 0) {
filled <- fill_unclassified_db(db_path, roi_folder,
samples = samples,
annotator = annotator)
}
}

list(import = import, filled = filled)
}

#' Bulk export all annotated samples from SQLite to class-organized PNGs
#'
#' Exports every annotated sample in the database to PNG images organized
Expand Down
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ reference:
- export_all_db_to_zip
- import_png_folder_to_db
- fill_unclassified_db
- import_png_folder_with_unclassified
- list_classes_db
- save_class_taxonomy_db
- load_class_taxonomy_db
Expand Down
66 changes: 66 additions & 0 deletions man/import_png_folder_with_unclassified.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions tests/testthat/helper-adc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Shared test helper: write a mock ADC file with n_roi rows in the standard
# IFCB folder structure under roi_folder. Columns 16/17 are width/height; ROIs
# listed in zero_dims get width/height 0 (no image).
write_mock_adc <- function(roi_folder, sample_name, n_roi, zero_dims = integer(0)) {
year <- substr(sample_name, 2, 5)
date_part <- substr(sample_name, 1, 9)
adc_dir <- file.path(roi_folder, year, date_part)
dir.create(adc_dir, recursive = TRUE, showWarnings = FALSE)
adc_path <- file.path(adc_dir, paste0(sample_name, ".adc"))

width <- rep(100L, n_roi)
height <- rep(80L, n_roi)
width[zero_dims] <- 0L
height[zero_dims] <- 0L

mock <- data.frame(
V1 = seq_len(n_roi), V2 = 0, V3 = 0, V4 = 0, V5 = 0,
V6 = 0, V7 = 0, V8 = 0, V9 = 0, V10 = 0,
V11 = 0, V12 = 0, V13 = 0, V14 = 0, V15 = 0,
V16 = width, V17 = height
)
write.table(mock, adc_path, row.names = FALSE, col.names = FALSE, sep = ",")
adc_path
}
26 changes: 1 addition & 25 deletions tests/testthat/test-database.R
Original file line number Diff line number Diff line change
Expand Up @@ -2042,31 +2042,7 @@ test_that("export_all_db_to_png filters by samples parameter", {
})

# fill_unclassified_db tests

# Helper: write a mock ADC file with n_roi rows in the standard IFCB folder
# structure under roi_folder. Columns 16/17 are width/height; ROIs listed in
# zero_dims get width/height 0 (no image).
write_mock_adc <- function(roi_folder, sample_name, n_roi, zero_dims = integer(0)) {
year <- substr(sample_name, 2, 5)
date_part <- substr(sample_name, 1, 9)
adc_dir <- file.path(roi_folder, year, date_part)
dir.create(adc_dir, recursive = TRUE, showWarnings = FALSE)
adc_path <- file.path(adc_dir, paste0(sample_name, ".adc"))

width <- rep(100L, n_roi)
height <- rep(80L, n_roi)
width[zero_dims] <- 0L
height[zero_dims] <- 0L

mock <- data.frame(
V1 = seq_len(n_roi), V2 = 0, V3 = 0, V4 = 0, V5 = 0,
V6 = 0, V7 = 0, V8 = 0, V9 = 0, V10 = 0,
V11 = 0, V12 = 0, V13 = 0, V14 = 0, V15 = 0,
V16 = width, V17 = height
)
write.table(mock, adc_path, row.names = FALSE, col.names = FALSE, sep = ",")
adc_path
}
# (write_mock_adc helper lives in helper-adc.R)

test_that("fill_unclassified_db backfills missing ROIs as unclassified", {
db_dir <- tempfile("db_")
Expand Down
108 changes: 108 additions & 0 deletions tests/testthat/test-png-import.R
Original file line number Diff line number Diff line change
Expand Up @@ -298,3 +298,111 @@ test_that("scan_png_class_folder parses example_data/png correctly", {
# Known classes from example data (without _NNN suffix)
expect_true("Mesodinium_rubrum" %in% result$classes_found)
})

# ===========================================================================
# import_png_folder_with_unclassified tests
# ===========================================================================

test_that("import_png_folder_with_unclassified imports then backfills", {
png_folder <- create_test_png_folder()
db_dir <- tempfile("db_")
roi_dir <- tempfile("roi_")
dir.create(db_dir)
db_path <- get_db_path(db_dir)
on.exit({
unlink(png_folder, recursive = TRUE)
unlink(c(db_dir, roi_dir), recursive = TRUE)
})

class2use <- c("unclassified", "Diatom", "Ciliate")

# Sample 1 has ROIs 1,2,3 imported; the .adc says it really has 5 ROIs
write_mock_adc(roi_dir, "D20230101T120000_IFCB134", n_roi = 5)
# Sample 2 has ROI 1 imported; the .adc says it really has 3 ROIs
write_mock_adc(roi_dir, "D20230202T080000_IFCB134", n_roi = 3)

result <- import_png_folder_with_unclassified(
png_folder, db_path, class2use, roi_folder = roi_dir,
annotator = "TestUser"
)

expect_equal(result$import$success, 2L)
expect_equal(result$import$failed, 0L)
# Sample 1: ROIs 4,5 missing; Sample 2: ROIs 2,3 missing
expect_equal(result$filled$added, 4L)
expect_equal(result$filled$samples, 2L)
expect_equal(result$filled$skipped, 0L)

con <- dbConnect(SQLite(), db_path)
on.exit(dbDisconnect(con), add = TRUE)

s1 <- dbGetQuery(con,
"SELECT roi_number, class_name, is_manual FROM annotations
WHERE sample_name = ? ORDER BY roi_number",
params = list("D20230101T120000_IFCB134"))
expect_equal(s1$roi_number, 1:5)
expect_equal(s1$class_name[4:5], c("unclassified", "unclassified"))
expect_equal(s1$is_manual[4:5], c(0L, 0L))
})

test_that("import_png_folder_with_unclassified only backfills imported samples", {
png_folder <- create_test_png_folder()
db_dir <- tempfile("db_")
roi_dir <- tempfile("roi_")
dir.create(db_dir)
db_path <- get_db_path(db_dir)
on.exit({
unlink(png_folder, recursive = TRUE)
unlink(c(db_dir, roi_dir), recursive = TRUE)
})

class2use <- c("unclassified", "Diatom", "Ciliate")

# A pre-existing sample from an earlier session, present in the DB but NOT
# in the PNG folder being imported now.
other_sample <- "D20221231T000000_IFCB134"
save_annotations_db(db_path, other_sample,
data.frame(file_name = paste0(other_sample, "_00001.png"),
class_name = "Diatom", stringsAsFactors = FALSE),
class2use, "Earlier")
write_mock_adc(roi_dir, other_sample, n_roi = 9)

# ADCs for the samples actually in the PNG folder
write_mock_adc(roi_dir, "D20230101T120000_IFCB134", n_roi = 5)
write_mock_adc(roi_dir, "D20230202T080000_IFCB134", n_roi = 3)

import_png_folder_with_unclassified(
png_folder, db_path, class2use, roi_folder = roi_dir
)

# The earlier sample must be untouched (still just its 1 imported ROI)
con <- dbConnect(SQLite(), db_path)
on.exit(dbDisconnect(con), add = TRUE)
n_other <- dbGetQuery(con,
"SELECT COUNT(*) AS n FROM annotations WHERE sample_name = ?",
params = list(other_sample))$n
expect_equal(n_other, 1L)
})

test_that("import_png_folder_with_unclassified skips backfill when fill = FALSE", {
png_folder <- create_test_png_folder()
db_dir <- tempfile("db_")
roi_dir <- tempfile("roi_")
dir.create(db_dir)
db_path <- get_db_path(db_dir)
on.exit({
unlink(png_folder, recursive = TRUE)
unlink(c(db_dir, roi_dir), recursive = TRUE)
})

class2use <- c("unclassified", "Diatom", "Ciliate")
write_mock_adc(roi_dir, "D20230101T120000_IFCB134", n_roi = 5)

result <- import_png_folder_with_unclassified(
png_folder, db_path, class2use, roi_folder = roi_dir, fill = FALSE
)

expect_equal(result$import$success, 2L)
expect_equal(result$filled$added, 0L)
expect_equal(result$filled$samples, 0L)
})
Loading