|
| 1 | +# Internal helper: read a chunk from a CSV with optional retry on smaller chunks |
| 2 | +# |
| 3 | +# @param input_file Path to CSV file |
| 4 | +# @param start_line Line to start reading from |
| 5 | +# @param chunk_size Number of rows to read |
| 6 | +# @param error_handling If TRUE, retry with halved chunk_size up to max_retries times |
| 7 | +# @param max_retries Maximum number of retries (each halves chunk_size) |
| 8 | +# @param progress_status Path to progress status log file |
| 9 | +# @return A list with components `data` (data.table or NULL) and `chunk_size` (possibly reduced) |
| 10 | +# @keywords internal |
| 11 | +.safe_fread <- function(input_file, |
| 12 | + start_line, chunk_size, |
| 13 | + error_handling = FALSE, |
| 14 | + max_retries = 3, |
| 15 | + progress_status = NULL) { |
| 16 | + attempt <- 0 |
| 17 | + result <- NULL |
| 18 | + |
| 19 | + repeat { |
| 20 | + result <- tryCatch( |
| 21 | + { |
| 22 | + data.table::fread(input_file, |
| 23 | + skip = start_line - 1, |
| 24 | + nrows = chunk_size, |
| 25 | + header = FALSE, |
| 26 | + sep = ",", |
| 27 | + fill = TRUE |
| 28 | + ) |
| 29 | + }, |
| 30 | + error = function(e) { |
| 31 | + message("Error reading file: ", e$message) |
| 32 | + if (!is.null(progress_status)) { |
| 33 | + base::cat( |
| 34 | + paste0("Error reading file at line ", start_line, ": ", e$message, "\n"), |
| 35 | + file = progress_status, append = TRUE |
| 36 | + ) |
| 37 | + } |
| 38 | + return(NULL) |
| 39 | + } |
| 40 | + ) |
| 41 | + |
| 42 | + if (!is.null(result) || !error_handling || attempt >= max_retries) { |
| 43 | + break |
| 44 | + } |
| 45 | + |
| 46 | + # Retry with smaller chunk |
| 47 | + attempt <- attempt + 1 |
| 48 | + chunk_size <- chunk_size / 2 |
| 49 | + message("Trying smaller chunk size (attempt ", attempt, "/", max_retries, "): ", chunk_size) |
| 50 | + gc() |
| 51 | + } |
| 52 | + |
| 53 | + list(data = result, chunk_size = chunk_size) |
| 54 | +} |
| 55 | + |
| 56 | +# Internal helper: filter data by relatedness bin and mitRel value, then append to CSV |
| 57 | +# |
| 58 | +# @param data A data.table with columns including addRel and mitRel |
| 59 | +# @param range_min Minimum additive relatedness for this bin |
| 60 | +# @param range_max Maximum additive relatedness for this bin |
| 61 | +# @param mit_val mitochondrial relatedness value to filter on (0 or 1) |
| 62 | +# @param data_directory Output directory path |
| 63 | +# @param verbose Print file names if TRUE |
| 64 | +# @keywords internal |
| 65 | +.write_bin_data <- function(data, range_min, range_max, mit_val, data_directory, verbose = FALSE) { |
| 66 | + range_data <- data[ |
| 67 | + base::round(data$addRel, 6) >= range_min & |
| 68 | + base::round(data$addRel, 6) < range_max & |
| 69 | + data$mitRel == mit_val, |
| 70 | + ] |
| 71 | + |
| 72 | + if (base::nrow(range_data) > 0) { |
| 73 | + file_name <- file.path(data_directory, paste0("df_mt", mit_val, "_r", range_min, "-r", range_max, ".csv")) |
| 74 | + if (verbose) { |
| 75 | + message(file_name) |
| 76 | + } |
| 77 | + data.table::fwrite(range_data, |
| 78 | + file = file_name, |
| 79 | + sep = ",", |
| 80 | + append = TRUE, |
| 81 | + row.names = FALSE, |
| 82 | + col.names = FALSE |
| 83 | + ) |
| 84 | + } |
| 85 | +} |
0 commit comments