R-Computing-Lab · smasongarrison · Apr 15, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/NAMESPACE b/NAMESPACE
@@ -49,6 +49,7 @@ export(related_coef)
 export(repairIDs)
 export(repairSex)
 export(simulatePedigree)
+export(simulatePedigrees)
 export(sliceFamilies)
 export(summariseFamilies)
 export(summariseMatrilines)

diff --git a/NEWS.md b/NEWS.md
@@ -7,6 +7,7 @@
 * Smarter string ID handling for ped2id
 * Fixed how different-sized matrices are handled by `com2links()`
 * Added alignPhenToMatrix function to align phenotypic data to the order of the relatedness matrix
+* Added `simulatePedigrees()` function to easily simulate multiple families at once and return them as a single combined data frame
 
 # BGmisc 1.7.0.0
 * Fixed bug in parList

diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R
@@ -879,6 +879,7 @@
 #' @param ... Additional arguments to be passed to other functions.
 #' @inheritParams ped2fam
 #' @param spouseID The name of the column that will contain the spouse ID in the output data frame. Default is "spID".
+#' @param remap_ids logical. If TRUE, remap all ID columns to sequential integers (1, 2, 3, ...) in row order.
 #' @return A \code{data.frame} with each row representing a simulated individual. The columns are as follows:
 #' \itemize{
 #'   \item{fam: The family id of each simulated individual. It is 'fam1' in a single simulated pedigree.}
@@ -914,6 +915,7 @@
                              code_male = "M",
                              code_female = "F",
                              fam_shift = 1L,
+                             remap_ids = FALSE,
                              beta = FALSE) {
   # SexRatio: ratio of male over female in the offspring setting; used in the between generation combinations
   # SexRatio <- sexR / (1 - sexR)
@@ -966,11 +968,24 @@
   df_Fam <- df_Fam[, 1:7]
   df_Fam <- df_Fam[!(is.na(df_Fam$pat) & is.na(df_Fam$mat) & is.na(df_Fam$spID)), ]
 
-  colnames(df_Fam)[c(2, 4, 5)] <- c(personID, dadID, momID)
+  names(df_Fam) <-  c("fam", personID, "gen", dadID, momID, spouseID, "sex")
 
   # connect the detached members
   df_Fam[is.na(df_Fam[[momID]]) & is.na(df_Fam[[dadID]]) & df_Fam$gen > 1, ]
 
+
+  if(remap_ids) {
+    # Remap all ID columns to sequential integers (1, 2, 3, ...) in row order,
+    # so the final data frame has tidy consecutive IDs regardless of fam_shift offsets.
+    old_ids <- rbind( df_Fam[[personID]],  df_Fam[[momID]],  df_Fam[[dadID]],  df_Fam[[spouseID]])
+    old_ids <- unique(old_ids[!is.na(old_ids)])
+    id_map <- setNames(seq_along(old_ids), as.character(old_ids))
+
+    df_Fam[[personID]] <- as.integer(id_map[as.character( df_Fam[[personID]])])
+    df_Fam[[momID]] <- as.integer(id_map[as.character( df_Fam[[momID]])])
+    df_Fam[[dadID]] <- as.integer(id_map[as.character( df_Fam[[dadID]])])
+    df_Fam[[spouseID]] <- as.integer(id_map[as.character( df_Fam[[spouseID]])])
+  }
   df_Fam
 }
 
@@ -980,3 +995,93 @@
   warning("The 'SimPed' function is deprecated. Please use 'simulatePedigree' instead.")
   simulatePedigree(...)
 }
+
+#' Simulate Multiple Pedigrees
+#'
+#' This function simulates multiple "balanced" pedigrees and returns them
+#' combined into a single data frame. It is a convenience wrapper around
+#' \code{\link{simulatePedigree}} that makes it easy to simulate many families
+#' at once, with unique IDs across all families.
+#'
+#' @param n_fam Integer. Number of families to simulate. Default is 2.
+#' @param remap_ids Logical. If TRUE (default), all ID columns (personID, momID, dadID, spouseID) will be remapped to sequential integers starting at 1 across the combined data frame. This ensures tidy consecutive IDs regardless of fam_shift offsets. If FALSE, IDs will retain their original values from each pedigree simulation, which may include gaps or non-sequential values due to fam_shift.
+#' @inheritParams simulatePedigree
+#' @return A \code{data.frame} containing all simulated individuals from all
+#'   families combined, with the same columns as \code{\link{simulatePedigree}}.
+#'   The \code{fam} column uniquely identifies each family (e.g., "fam1",
+#'   "fam2", ...). Individual IDs are sequential integers starting at 1
+#'   (i.e., \code{1:nrow(result)}), and all parent/spouse ID references are
+#'   remapped to match.
+#' @export
+#' @examples
+#' set.seed(5)
+#' df_peds <- simulatePedigrees(
+#'   n_fam = 3,
+#'   kpc = 4,
+#'   Ngen = 4,
+#'   sexR = .5,
+#'   marR = .7
+#' )
+#' summary(df_peds)
+simulatePedigrees <- function(n_fam = 2,
+                              kpc = 3,
+                              Ngen = 4,
+                              sexR = .5,
+                              marR = 2 / 3,
+                              rd_kpc = FALSE,
+                              balancedSex = TRUE,
+                              balancedMar = TRUE,
+                              verbose = FALSE,
+                              personID = "ID",
+                              momID = "momID",
+                              dadID = "dadID",
+                              spouseID = "spouseID",
+                              code_male = "M",
+                              code_female = "F",
+                              remap_ids = TRUE,
+                              beta = FALSE
+                              ) {
+  n_fam <- as.integer(n_fam)
+  if (is.na(n_fam) || n_fam < 1L) {
+    stop("'n_fam' must be a positive integer.")
+  }
+  ped_list <- vector("list", n_fam)
+  for (i in seq_len(n_fam)) {
+    ped_i <- simulatePedigree(
+      kpc = kpc,
+      Ngen = Ngen,
+      sexR = sexR,
+      marR = marR,
+      rd_kpc = rd_kpc,
+      balancedSex = balancedSex,
+      balancedMar = balancedMar,
+      verbose = verbose,
+      personID = personID,
+      momID = momID,
+      dadID = dadID,
+      spouseID = spouseID,
+      code_male = code_male,
+      code_female = code_female,
+      fam_shift = i,
+      remap_ids = FALSE, # Keep original IDs for now; we'll remap after combining.
+      beta = beta
+    )
+    ped_i$fam <- paste0("fam", i)
+    ped_list[[i]] <- ped_i
+  }
+  combined <- data.table::rbindlist(ped_list) |> as.data.frame()
+  names(combined) <-  c("fam", personID, "gen", dadID, momID, spouseID, "sex")
+if(remap_ids) {
+  # Remap all ID columns to sequential integers (1, 2, 3, ...) in row order,
+  # so the final data frame has tidy consecutive IDs regardless of fam_shift offsets.
+  old_ids <- rbind(combined[[personID]], combined[[momID]], combined[[dadID]], combined[[spouseID]])
+  old_ids <- unique(old_ids[!is.na(old_ids)])
+  id_map <- setNames(seq_along(old_ids), as.character(old_ids))
+
+  combined[[personID]] <- as.integer(id_map[as.character(combined[[personID]])])
+  combined[[momID]] <- as.integer(id_map[as.character(combined[[momID]])])
+  combined[[dadID]] <- as.integer(id_map[as.character(combined[[dadID]])])
+  combined[[spouseID]] <- as.integer(id_map[as.character(combined[[spouseID]])])
+}
+  combined
+}
diff --git a/man/simulatePedigree.Rd b/man/simulatePedigree.Rd
diff --git a/man/simulatePedigrees.Rd b/man/simulatePedigrees.Rd
diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R
@@ -355,6 +355,15 @@ test_that("com2links handles mismatched matrix dimensions by subsetting to small
   # Only IDs from the smaller matrix should appear
   all_output_ids <- unique(c(result_mismatch$ID1, result_mismatch$ID2))
   expect_true(all(all_output_ids %in% as.numeric(dimnames(ad_small)[[1]])))
+  expect_true(all(all_output_ids %in% as.numeric(dimnames(mit_ped_matrix)[[1]])))
+  expect_true(all(all_output_ids %in% as.numeric(dimnames(cn_ped_matrix)[[1]])))
+
+  # Check that the number of unique IDs in the output matches the number of IDs in the smallest matrix
+  expect_equal(length(all_output_ids), length(subset_ids))
+
+  # check that full matrix has more unique IDs than the smaller matrix
+  expect_true(length(unique(c(dimnames(mit_ped_matrix)[[1]],
+                              dimnames(cn_ped_matrix)[[1]]))) > length(subset_ids))
 })
 
 test_that("com2links mismatched dimensions with two matrices", {

diff --git a/tests/testthat/test-simulatePedigree.R b/tests/testthat/test-simulatePedigree.R
@@ -126,9 +126,8 @@
     expect_equal(max(results$gen), Ngen, tolerance = strict_tolerance)
 
     # expect there to be parents in each for all generations except the first one
-    filter_parents <- results %>%
-      group_by(gen) %>%
-      summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
+    filter_parents <- dplyr::group_by(results, gen) %>%
+      dplyr::summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
 
     expect_true(all(filter_parents$num_parents[filter_parents$gen > 1] > 0), info = paste0("Beta option: ", beta))
     expect_true(all(filter_parents$num_parents[filter_parents$gen == 1] == 0), info = paste0("Beta option: ", beta))
@@ -199,9 +198,8 @@
     expect_lt(sex_mean_male, sex_mean_female)
 
     # expect there to be parents in each for all generations except the first one
-    filter_parents <- results %>%
-      group_by(gen) %>%
-      summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
+    filter_parents <- dplyr::group_by(results, gen) %>%
+      dplyr::summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
 
     expect_true(all(filter_parents$num_parents[filter_parents$gen > 1] > 0), info = paste0("Beta option: ", beta))
     expect_true(all(filter_parents$num_parents[filter_parents$gen == 1] == 0), info = paste0("Beta option: ", beta))
@@ -289,3 +287,64 @@
     "not yet implemented"
   )
 })
+
+test_that("simulatePedigrees returns combined data frame for multiple families", {
+  set.seed(5)
+  n_fam <- 3
+  results <- simulatePedigrees(n_fam = n_fam, kpc = 3, Ngen = 4, marR = 0.6)
+
+  # Should return a data frame
+  expect_s3_class(results, "data.frame")
+
+  # Should have exactly n_fam unique family IDs
+  fam_ids <- unique(results$fam)
+  expect_setequal(fam_ids, paste0("fam", seq_len(n_fam)))
+
+  # All person IDs should be unique across families
+  expect_equal(length(unique(results$ID)), nrow(results))
+
+  # Should have standard pedigree columns
+  expect_true(all(c("fam", "ID", "gen", "dadID", "momID", "spouseID", "sex") %in% colnames(results)))
+})
+
+test_that("simulatePedigrees with n_fam = 1 matches simulatePedigree structure", {
+  set.seed(42)
+  result_multi <- simulatePedigrees(n_fam = 1, kpc = 3, Ngen = 4, marR = 0.6)
+
+  set.seed(42)
+  result_single <- simulatePedigree(kpc = 3, Ngen = 4, marR = 0.6, fam_shift = 1L)
+
+  # Both should have the same number of rows and columns
+  expect_equal(nrow(result_multi), nrow(result_single))
+  expect_equal(ncol(result_multi), ncol(result_single))
+})
+
+test_that("simulatePedigrees returns sequential IDs starting at 1", {
+  set.seed(5)
+  results <- simulatePedigrees(n_fam = 3, kpc = 3, Ngen = 4, marR = 0.6)
+
+  # Person IDs should be close to 1:nrow(results) spouse might change this but they should still be sequential and unique
+  expect_equal(sort(results$ID), seq_len(nrow(results)))
+
+  # All parent/spouse references should be within the ID range (or NA)
+  valid_ids <- seq_len(nrow(results))
+  expect_true(all(is.na(results$momID) | results$momID %in% valid_ids))
+  expect_true(all(is.na(results$dadID) | results$dadID %in% valid_ids))
+  expect_true(all(is.na(results$spouseID) | results$spouseID %in% valid_ids))
+})
+
+test_that("simulatePedigrees works with beta = TRUE", {
+  set.seed(5)
+  n_fam <- 2
+  results <- simulatePedigrees(n_fam = n_fam, kpc = 3, Ngen = 4, marR = 0.6, beta = TRUE)
+
+  expect_s3_class(results, "data.frame")
+  expect_equal(length(unique(results$fam)), n_fam)
+  expect_equal(length(unique(results$ID)), nrow(results))
+})
+
+test_that("simulatePedigrees validates n_fam input", {
+  expect_error(simulatePedigrees(n_fam = 0), "'n_fam' must be a positive integer")
+  expect_error(simulatePedigrees(n_fam = -1), "'n_fam' must be a positive integer")
+  expect_error(simulatePedigrees(n_fam = NA), "'n_fam' must be a positive integer")
+})
diff --git a/vignettes/v2_pedigree.Rmd b/vignettes/v2_pedigree.Rmd
@@ -97,7 +97,7 @@ df_ped_3$fam <- NULL
 df_ped_3$ID <- df_ped_3$ID / 100
 df_ped_3$dadID <- df_ped_3$dadID / 100
 df_ped_3$momID <- df_ped_3$momID / 100
-df_ped_3$spID <- df_ped_3$spID / 100
+df_ped_3$spouseID <- df_ped_3$spouseID / 100
 df_ped_4$famID <- 2
 df_ped_4$fam <- NULL