R-Computing-Lab
diff --git a/‎R/simulatePedigree.R‎
Lines changed: 30 additions & 15 deletions b/‎R/simulatePedigree.R‎
Lines changed: 30 additions & 15 deletions
diff --git a/‎man/simulatePedigree.Rd‎
Lines changed: 3 additions & 0 deletions b/‎man/simulatePedigree.Rd‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎man/simulatePedigrees.Rd‎
Lines changed: 8 additions & 3 deletions b/‎man/simulatePedigrees.Rd‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎tests/testthat/test-simulatePedigree.R‎
Lines changed: 8 additions & 8 deletions b/‎tests/testthat/test-simulatePedigree.R‎
Lines changed: 8 additions & 8 deletions
@@ -879,6 +879,7 @@ buildBtwnGenerations_opt <- function(df_Fam,
 #' @param ... Additional arguments to be passed to other functions.
 #' @inheritParams ped2fam
 #' @param spouseID The name of the column that will contain the spouse ID in the output data frame. Default is "spID".
+#' @param remap_ids logical. If TRUE, remap all ID columns to sequential integers (1, 2, 3, ...) in row order.
 #' @return A \code{data.frame} with each row representing a simulated individual. The columns are as follows:
 #' \itemize{
 #'   \item{fam: The family id of each simulated individual. It is 'fam1' in a single simulated pedigree.}
@@ -914,6 +915,7 @@ simulatePedigree <- function(kpc = 3,
                              code_male = "M",
                              code_female = "F",
                              fam_shift = 1L,
+                             remap_ids = FALSE,
                              beta = FALSE) {
   # SexRatio: ratio of male over female in the offspring setting; used in the between generation combinations
   # SexRatio <- sexR / (1 - sexR)
@@ -966,11 +968,24 @@ simulatePedigree <- function(kpc = 3,
   df_Fam <- df_Fam[, 1:7]
   df_Fam <- df_Fam[!(is.na(df_Fam$pat) & is.na(df_Fam$mat) & is.na(df_Fam$spID)), ]
 
-  colnames(df_Fam)[c(2, 4, 5)] <- c(personID, dadID, momID)
+  names(df_Fam) <-  c("fam", personID, "gen", dadID, momID, spouseID, "sex")
 
   # connect the detached members
   df_Fam[is.na(df_Fam[[momID]]) & is.na(df_Fam[[dadID]]) & df_Fam$gen > 1, ]
 
+
+  if(remap_ids) {
+    # Remap all ID columns to sequential integers (1, 2, 3, ...) in row order,
+    # so the final data frame has tidy consecutive IDs regardless of fam_shift offsets.
+    old_ids <- rbind( df_Fam[[personID]],  df_Fam[[momID]],  df_Fam[[dadID]],  df_Fam[[spouseID]])
+    old_ids <- unique(old_ids[!is.na(old_ids)])
+    id_map <- setNames(seq_along(old_ids), as.character(old_ids))
+
+    df_Fam[[personID]] <- as.integer(id_map[as.character( df_Fam[[personID]])])
+    df_Fam[[momID]] <- as.integer(id_map[as.character( df_Fam[[momID]])])
+    df_Fam[[dadID]] <- as.integer(id_map[as.character( df_Fam[[dadID]])])
+    df_Fam[[spouseID]] <- as.integer(id_map[as.character( df_Fam[[spouseID]])])
+  }
   df_Fam
 }
 
@@ -989,6 +1004,7 @@ SimPed <- function(...) { # nolint: object_name_linter.
 #' at once, with unique IDs across all families.
 #'
 #' @param n_fam Integer. Number of families to simulate. Default is 2.
+#' @param remap_ids Logical. If TRUE (default), all ID columns (personID, momID, dadID, spouseID) will be remapped to sequential integers starting at 1 across the combined data frame. This ensures tidy consecutive IDs regardless of fam_shift offsets. If FALSE, IDs will retain their original values from each pedigree simulation, which may include gaps or non-sequential values due to fam_shift.
 #' @inheritParams simulatePedigree
 #' @return A \code{data.frame} containing all simulated individuals from all
 #'   families combined, with the same columns as \code{\link{simulatePedigree}}.
@@ -1022,7 +1038,9 @@ simulatePedigrees <- function(n_fam = 2,
                               spouseID = "spouseID",
                               code_male = "M",
                               code_female = "F",
-                              beta = FALSE) {
+                              remap_ids = TRUE,
+                              beta = FALSE
+                              ) {
   n_fam <- as.integer(n_fam)
   if (is.na(n_fam) || n_fam < 1L) {
     stop("'n_fam' must be a positive integer.")
@@ -1045,28 +1063,25 @@ simulatePedigrees <- function(n_fam = 2,
       code_male = code_male,
       code_female = code_female,
       fam_shift = i,
+      remap_ids = FALSE, # Keep original IDs for now; we'll remap after combining.
       beta = beta
     )
     ped_i$fam <- paste0("fam", i)
     ped_list[[i]] <- ped_i
   }
   combined <- data.table::rbindlist(ped_list) |> as.data.frame()
-
+  names(combined) <-  c("fam", personID, "gen", dadID, momID, spouseID, "sex")
+if(remap_ids) {
   # Remap all ID columns to sequential integers (1, 2, 3, ...) in row order,
   # so the final data frame has tidy consecutive IDs regardless of fam_shift offsets.
-  old_ids <- combined[[personID]]
+  old_ids <- rbind(combined[[personID]], combined[[momID]], combined[[dadID]], combined[[spouseID]])
+  old_ids <- unique(old_ids[!is.na(old_ids)])
   id_map <- setNames(seq_along(old_ids), as.character(old_ids))
 
-  remap_col <- function(x) {
-    out <- id_map[as.character(x)]
-    out[is.na(x)] <- NA_integer_
-    as.integer(out)
-  }
-
-  combined[[personID]] <- as.integer(id_map[as.character(old_ids)])
-  combined[[momID]] <- remap_col(combined[[momID]])
-  combined[[dadID]] <- remap_col(combined[[dadID]])
-  combined[[spouseID]] <- remap_col(combined[[spouseID]])
-
+  combined[[personID]] <- as.integer(id_map[as.character(combined[[personID]])])
+  combined[[momID]] <- as.integer(id_map[as.character(combined[[momID]])])
+  combined[[dadID]] <- as.integer(id_map[as.character(combined[[dadID]])])
+  combined[[spouseID]] <- as.integer(id_map[as.character(combined[[spouseID]])])
+}
   combined
 }
@@ -126,9 +126,9 @@ test_that("simulated pedigree generates expected data structure when sexR is imb
     expect_equal(max(results$gen), Ngen, tolerance = strict_tolerance)
 
     # expect there to be parents in each for all generations except the first one
-    filter_parents <- results %>%
-      group_by(gen) %>%
-      summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
+    filter_parents <- results |>
+      dplyr::group_by(gen) |>
+      dplyr::summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
 
     expect_true(all(filter_parents$num_parents[filter_parents$gen > 1] > 0), info = paste0("Beta option: ", beta))
     expect_true(all(filter_parents$num_parents[filter_parents$gen == 1] == 0), info = paste0("Beta option: ", beta))
@@ -199,9 +199,9 @@ test_that("simulated pedigree generates expected data structure but supply var n
     expect_lt(sex_mean_male, sex_mean_female)
 
     # expect there to be parents in each for all generations except the first one
-    filter_parents <- results %>%
-      group_by(gen) %>%
-      summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
+    filter_parents <- results |>
+      dplyr::group_by(gen) |>
+      dplyr::summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
 
     expect_true(all(filter_parents$num_parents[filter_parents$gen > 1] > 0), info = paste0("Beta option: ", beta))
     expect_true(all(filter_parents$num_parents[filter_parents$gen == 1] == 0), info = paste0("Beta option: ", beta))
@@ -325,8 +325,8 @@ test_that("simulatePedigrees returns sequential IDs starting at 1", {
   set.seed(5)
   results <- simulatePedigrees(n_fam = 3, kpc = 3, Ngen = 4, marR = 0.6)
 
-  # Person IDs should be exactly 1:nrow
-  expect_equal(results$ID, seq_len(nrow(results)))
+  # Person IDs should be close to 1:nrow(results) spouse might change this but they should still be sequential and unique
+  expect_equal(sort(results$ID), seq_len(nrow(results)))
 
   # All parent/spouse references should be within the ID range (or NA)
   valid_ids <- seq_len(nrow(results))