From 8bf36d6c2ed260dd4be075342c3453d3397ca63c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 13 Apr 2026 20:35:59 +0000
Subject: [PATCH 1/9] Initial plan


From b6de8f3c367feb6d80fac5b615341b947abaaf69 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 13 Apr 2026 20:39:43 +0000
Subject: [PATCH 2/9] Add simulatePedigrees() function to simulate multiple
 families at once

Agent-Logs-Url: https://github.com/R-Computing-Lab/BGmisc/sessions/9367c84a-deb8-4f13-a855-367a204cc843

Co-authored-by: smasongarrison <6001608+smasongarrison@users.noreply.github.com>
---
 NAMESPACE                              |   1 +
 NEWS.md                                |   1 +
 R/simulatePedigree.R                   |  66 ++++++++++++++++
 man/simulatePedigrees.Rd               | 105 +++++++++++++++++++++++++
 tests/testthat/test-simulatePedigree.R |  45 +++++++++++
 5 files changed, 218 insertions(+)
 create mode 100644 man/simulatePedigrees.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 6a7305c5..cfdbe433 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -49,6 +49,7 @@ export(related_coef)
 export(repairIDs)
 export(repairSex)
 export(simulatePedigree)
+export(simulatePedigrees)
 export(sliceFamilies)
 export(summariseFamilies)
 export(summariseMatrilines)
diff --git a/NEWS.md b/NEWS.md
index 93e04c5f..f125a57d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,6 +7,7 @@
 * Smarter string ID handling for ped2id
 * Fixed how different-sized matrices are handled by `com2links()`
 * Added alignPhenToMatrix function to align phenotypic data to the order of the relatedness matrix
+* Added `simulatePedigrees()` function to easily simulate multiple families at once and return them as a single combined data frame
 
 # BGmisc 1.7.0.0
 * Fixed bug in parList
diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R
index f3c170b6..33965c15 100644
--- a/R/simulatePedigree.R
+++ b/R/simulatePedigree.R
@@ -980,3 +980,69 @@ SimPed <- function(...) { # nolint: object_name_linter.
   warning("The 'SimPed' function is deprecated. Please use 'simulatePedigree' instead.")
   simulatePedigree(...)
 }
+
+#' Simulate Multiple Pedigrees
+#'
+#' This function simulates multiple "balanced" pedigrees and returns them
+#' combined into a single data frame. It is a convenience wrapper around
+#' \code{\link{simulatePedigree}} that makes it easy to simulate many families
+#' at once, with unique IDs across all families.
+#'
+#' @param n_fam Integer. Number of families to simulate. Default is 2.
+#' @inheritParams simulatePedigree
+#' @return A \code{data.frame} containing all simulated individuals from all
+#'   families combined, with the same columns as \code{\link{simulatePedigree}}.
+#'   The \code{fam} column uniquely identifies each family (e.g., "fam1",
+#'   "fam2", ...). Individual IDs are guaranteed to be unique across families.
+#' @export
+#' @examples
+#' set.seed(5)
+#' df_peds <- simulatePedigrees(
+#'   n_fam = 3,
+#'   kpc = 4,
+#'   Ngen = 4,
+#'   sexR = .5,
+#'   marR = .7
+#' )
+#' summary(df_peds)
+simulatePedigrees <- function(n_fam = 2,
+                              kpc = 3,
+                              Ngen = 4,
+                              sexR = .5,
+                              marR = 2 / 3,
+                              rd_kpc = FALSE,
+                              balancedSex = TRUE,
+                              balancedMar = TRUE,
+                              verbose = FALSE,
+                              personID = "ID",
+                              momID = "momID",
+                              dadID = "dadID",
+                              spouseID = "spouseID",
+                              code_male = "M",
+                              code_female = "F",
+                              beta = FALSE) {
+  ped_list <- vector("list", n_fam)
+  for (i in seq_len(n_fam)) {
+    ped_i <- simulatePedigree(
+      kpc = kpc,
+      Ngen = Ngen,
+      sexR = sexR,
+      marR = marR,
+      rd_kpc = rd_kpc,
+      balancedSex = balancedSex,
+      balancedMar = balancedMar,
+      verbose = verbose,
+      personID = personID,
+      momID = momID,
+      dadID = dadID,
+      spouseID = spouseID,
+      code_male = code_male,
+      code_female = code_female,
+      fam_shift = i,
+      beta = beta
+    )
+    ped_i$fam <- paste0("fam", i)
+    ped_list[[i]] <- ped_i
+  }
+  do.call(rbind, ped_list)
+}
diff --git a/man/simulatePedigrees.Rd b/man/simulatePedigrees.Rd
new file mode 100644
index 00000000..2f06ac50
--- /dev/null
+++ b/man/simulatePedigrees.Rd
@@ -0,0 +1,105 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simulatePedigree.R
+\name{simulatePedigrees}
+\alias{simulatePedigrees}
+\title{Simulate Multiple Pedigrees}
+\usage{
+simulatePedigrees(
+  n_fam = 2,
+  kpc = 3,
+  Ngen = 4,
+  sexR = 0.5,
+  marR = 2/3,
+  rd_kpc = FALSE,
+  balancedSex = TRUE,
+  balancedMar = TRUE,
+  verbose = FALSE,
+  personID = "ID",
+  momID = "momID",
+  dadID = "dadID",
+  spouseID = "spouseID",
+  code_male = "M",
+  code_female = "F",
+  beta = FALSE
+)
+}
+\arguments{
+\item{n_fam}{Integer. Number of families to simulate. Default is 2.}
+
+\item{kpc}{Number of kids per couple. An integer >= 2 that determines how
+many kids each fertilized mated couple will have in the pedigree. Default
+value is 3. Returns an error when kpc equals 1.}
+
+\item{Ngen}{Number of generations. An integer >= 2 that determines how many
+generations the simulated pedigree will have. The first generation is always
+a fertilized couple. The last generation has no mated individuals.}
+
+\item{sexR}{Sex ratio of offspring. A numeric value ranging from 0 to 1 that
+determines the proportion of males in all offspring in this pedigree. For
+instance, 0.4 means 40 percent of the offspring will be male.}
+
+\item{marR}{Mating rate. A numeric value ranging from 0 to 1 which determines
+the proportion of mated (fertilized) couples in the pedigree within each
+generation. For instance, marR  = 0.5 suggests 50 percent of the offspring in
+ a specific generation will be  mated and have their offspring.}
+
+\item{rd_kpc}{logical. If TRUE, the number of kids per mate will be randomly
+generated from a poisson distribution with mean kpc. If FALSE, the number of
+kids per mate will be fixed at kpc.}
+
+\item{balancedSex}{Not fully developed yet. Always \code{TRUE} in the
+current version.}
+
+\item{balancedMar}{Not fully developed yet. Always \code{TRUE} in the
+current version.}
+
+\item{verbose}{logical  If TRUE, message progress through stages of algorithm}
+
+\item{personID}{character.  Name of the column in ped for the person ID variable}
+
+\item{momID}{character.  Name of the column in ped for the mother ID variable}
+
+\item{dadID}{character.  Name of the column in ped for the father ID variable}
+
+\item{spouseID}{The name of the column that will contain the spouse ID in the output data frame. Default is "spouseID".}
+
+\item{code_male}{The value to use for males. Default is "M"}
+
+\item{code_female}{The value to use for females. Default is "F"}
+
+\item{beta}{logical or character. Controls which algorithm version to use:
+\itemize{
+  \item{\code{FALSE}, \code{"base"}, or \code{"original"} (default): Use the original algorithm.
+        Slower but ensures exact reproducibility with set.seed().}
+  \item{\code{TRUE} or \code{"optimized"}: Use the optimized algorithm with 4-5x speedup.
+        Produces statistically equivalent results but not identical to base version
+        due to different random number consumption. Recommended for large simulations
+        where speed matters more than exact reproducibility.}
+}
+Note: Both versions are mathematically correct and produce valid pedigrees with the
+same statistical properties (sex ratios, mating rates, etc.). The optimized version
+uses vectorized operations instead of loops, making it much faster for large pedigrees.}
+}
+\value{
+A \code{data.frame} containing all simulated individuals from all
+  families combined, with the same columns as \code{\link{simulatePedigree}}.
+  The \code{fam} column uniquely identifies each family (e.g., "fam1",
+  "fam2", ...). Individual IDs are guaranteed to be unique across families.
+}
+\description{
+This function simulates multiple "balanced" pedigrees and returns them
+combined into a single data frame. It is a convenience wrapper around
+\code{\link{simulatePedigree}} that makes it easy to simulate many families
+at once, with unique IDs across all families.
+}
+\examples{
+set.seed(5)
+df_peds <- simulatePedigrees(
+  n_fam = 3,
+  kpc = 4,
+  Ngen = 4,
+  sexR = .5,
+  marR = .7
+)
+summary(df_peds)
+}
diff --git a/tests/testthat/test-simulatePedigree.R b/tests/testthat/test-simulatePedigree.R
index 7c88145c..81d3f2a9 100644
--- a/tests/testthat/test-simulatePedigree.R
+++ b/tests/testthat/test-simulatePedigree.R
@@ -289,3 +289,48 @@ test_that("simulatePedigree accepts string aliases for beta parameter", {
     "not yet implemented"
   )
 })
+
+test_that("simulatePedigrees returns combined data frame for multiple families", {
+  set.seed(5)
+  n_fam <- 3
+  results <- simulatePedigrees(n_fam = n_fam, kpc = 3, Ngen = 4, marR = 0.6)
+
+  # Should return a data frame
+  expect_s3_class(results, "data.frame")
+
+  # Should have exactly n_fam unique family IDs
+  fam_ids <- unique(results$fam)
+  expect_equal(length(fam_ids), n_fam)
+  expect_true(all(paste0("fam", seq_len(n_fam)) %in% fam_ids))
+
+  # All person IDs should be unique across families
+  expect_equal(length(unique(results$ID)), nrow(results))
+
+  # Should have standard pedigree columns
+  expect_true(all(c("fam", "ID", "gen", "dadID", "momID", "spouseID", "sex") %in% colnames(results)))
+})
+
+test_that("simulatePedigrees with n_fam = 1 matches simulatePedigree structure", {
+  set.seed(42)
+  result_multi <- simulatePedigrees(n_fam = 1, kpc = 3, Ngen = 4, marR = 0.6)
+
+  set.seed(42)
+  result_single <- simulatePedigree(kpc = 3, Ngen = 4, marR = 0.6, fam_shift = 1L)
+
+  # Both should have the same number of rows and columns
+  expect_equal(nrow(result_multi), nrow(result_single))
+  expect_equal(ncol(result_multi), ncol(result_single))
+
+  # The IDs should match
+  expect_equal(result_multi$ID, result_single$ID)
+})
+
+test_that("simulatePedigrees works with beta = TRUE", {
+  set.seed(5)
+  n_fam <- 2
+  results <- simulatePedigrees(n_fam = n_fam, kpc = 3, Ngen = 4, marR = 0.6, beta = TRUE)
+
+  expect_s3_class(results, "data.frame")
+  expect_equal(length(unique(results$fam)), n_fam)
+  expect_equal(length(unique(results$ID)), nrow(results))
+})

From 34f818b57e0f5673b3512d5fcc50ab276f706c06 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 13 Apr 2026 20:40:45 +0000
Subject: [PATCH 3/9] Address code review: input validation, use rbindlist, fix
 test assertions

Agent-Logs-Url: https://github.com/R-Computing-Lab/BGmisc/sessions/9367c84a-deb8-4f13-a855-367a204cc843

Co-authored-by: smasongarrison <6001608+smasongarrison@users.noreply.github.com>
---
 R/simulatePedigree.R                   | 6 +++++-
 tests/testthat/test-simulatePedigree.R | 9 +++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R
index 33965c15..d48c8ad4 100644
--- a/R/simulatePedigree.R
+++ b/R/simulatePedigree.R
@@ -1021,6 +1021,10 @@ simulatePedigrees <- function(n_fam = 2,
                               code_male = "M",
                               code_female = "F",
                               beta = FALSE) {
+  n_fam <- as.integer(n_fam)
+  if (is.na(n_fam) || n_fam < 1L) {
+    stop("'n_fam' must be a positive integer.")
+  }
   ped_list <- vector("list", n_fam)
   for (i in seq_len(n_fam)) {
     ped_i <- simulatePedigree(
@@ -1044,5 +1048,5 @@ simulatePedigrees <- function(n_fam = 2,
     ped_i$fam <- paste0("fam", i)
     ped_list[[i]] <- ped_i
   }
-  do.call(rbind, ped_list)
+  data.table::rbindlist(ped_list) |> as.data.frame()
 }
diff --git a/tests/testthat/test-simulatePedigree.R b/tests/testthat/test-simulatePedigree.R
index 81d3f2a9..5f3e2873 100644
--- a/tests/testthat/test-simulatePedigree.R
+++ b/tests/testthat/test-simulatePedigree.R
@@ -300,8 +300,7 @@ test_that("simulatePedigrees returns combined data frame for multiple families",
 
   # Should have exactly n_fam unique family IDs
   fam_ids <- unique(results$fam)
-  expect_equal(length(fam_ids), n_fam)
-  expect_true(all(paste0("fam", seq_len(n_fam)) %in% fam_ids))
+  expect_setequal(fam_ids, paste0("fam", seq_len(n_fam)))
 
   # All person IDs should be unique across families
   expect_equal(length(unique(results$ID)), nrow(results))
@@ -334,3 +333,9 @@ test_that("simulatePedigrees works with beta = TRUE", {
   expect_equal(length(unique(results$fam)), n_fam)
   expect_equal(length(unique(results$ID)), nrow(results))
 })
+
+test_that("simulatePedigrees validates n_fam input", {
+  expect_error(simulatePedigrees(n_fam = 0), "'n_fam' must be a positive integer")
+  expect_error(simulatePedigrees(n_fam = -1), "'n_fam' must be a positive integer")
+  expect_error(simulatePedigrees(n_fam = NA), "'n_fam' must be a positive integer")
+})

From f3910a31231030f7bb31f6b3fe2a616213e270a4 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 13 Apr 2026 20:41:33 +0000
Subject: [PATCH 4/9] Fix minor punctuation in man/simulatePedigrees.Rd
 documentation

Agent-Logs-Url: https://github.com/R-Computing-Lab/BGmisc/sessions/9367c84a-deb8-4f13-a855-367a204cc843

Co-authored-by: smasongarrison <6001608+smasongarrison@users.noreply.github.com>
---
 man/simulatePedigrees.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/simulatePedigrees.Rd b/man/simulatePedigrees.Rd
index 2f06ac50..6397da55 100644
--- a/man/simulatePedigrees.Rd
+++ b/man/simulatePedigrees.Rd
@@ -53,7 +53,7 @@ current version.}
 \item{balancedMar}{Not fully developed yet. Always \code{TRUE} in the
 current version.}
 
-\item{verbose}{logical  If TRUE, message progress through stages of algorithm}
+\item{verbose}{logical. If TRUE, message progress through stages of algorithm}
 
 \item{personID}{character.  Name of the column in ped for the person ID variable}
 

From 1c193a46f13957383c1c786fa2ce645c927c0a83 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 13 Apr 2026 20:48:41 +0000
Subject: [PATCH 5/9] feat: remap IDs to sequential integers in
 simulatePedigrees()

Agent-Logs-Url: https://github.com/R-Computing-Lab/BGmisc/sessions/44f2511b-4beb-420a-924c-bd42c5f1de1f

Co-authored-by: smasongarrison <6001608+smasongarrison@users.noreply.github.com>
---
 R/simulatePedigree.R                   | 24 ++++++++++++++++++++++--
 tests/testthat/test-simulatePedigree.R | 15 +++++++++++++--
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R
index d48c8ad4..5e1fa4c0 100644
--- a/R/simulatePedigree.R
+++ b/R/simulatePedigree.R
@@ -993,7 +993,9 @@ SimPed <- function(...) { # nolint: object_name_linter.
 #' @return A \code{data.frame} containing all simulated individuals from all
 #'   families combined, with the same columns as \code{\link{simulatePedigree}}.
 #'   The \code{fam} column uniquely identifies each family (e.g., "fam1",
-#'   "fam2", ...). Individual IDs are guaranteed to be unique across families.
+#'   "fam2", ...). Individual IDs are sequential integers starting at 1
+#'   (i.e., \code{1:nrow(result)}), and all parent/spouse ID references are
+#'   remapped to match.
 #' @export
 #' @examples
 #' set.seed(5)
@@ -1048,5 +1050,23 @@ simulatePedigrees <- function(n_fam = 2,
     ped_i$fam <- paste0("fam", i)
     ped_list[[i]] <- ped_i
   }
-  data.table::rbindlist(ped_list) |> as.data.frame()
+  combined <- data.table::rbindlist(ped_list) |> as.data.frame()
+
+  # Remap all ID columns to sequential integers (1, 2, 3, ...) in row order,
+  # so the final data frame has tidy consecutive IDs regardless of fam_shift offsets.
+  old_ids <- combined[[personID]]
+  id_map <- setNames(seq_along(old_ids), as.character(old_ids))
+
+  remap_col <- function(x) {
+    out <- id_map[as.character(x)]
+    out[is.na(x)] <- NA_integer_
+    as.integer(out)
+  }
+
+  combined[[personID]] <- as.integer(id_map[as.character(old_ids)])
+  combined[[momID]] <- remap_col(combined[[momID]])
+  combined[[dadID]] <- remap_col(combined[[dadID]])
+  combined[[spouseID]] <- remap_col(combined[[spouseID]])
+
+  combined
 }
diff --git a/tests/testthat/test-simulatePedigree.R b/tests/testthat/test-simulatePedigree.R
index 5f3e2873..c2b5d66b 100644
--- a/tests/testthat/test-simulatePedigree.R
+++ b/tests/testthat/test-simulatePedigree.R
@@ -319,9 +319,20 @@ test_that("simulatePedigrees with n_fam = 1 matches simulatePedigree structure",
   # Both should have the same number of rows and columns
   expect_equal(nrow(result_multi), nrow(result_single))
   expect_equal(ncol(result_multi), ncol(result_single))
+})
+
+test_that("simulatePedigrees returns sequential IDs starting at 1", {
+  set.seed(5)
+  results <- simulatePedigrees(n_fam = 3, kpc = 3, Ngen = 4, marR = 0.6)
+
+  # Person IDs should be exactly 1:nrow
+  expect_equal(results$ID, seq_len(nrow(results)))
 
-  # The IDs should match
-  expect_equal(result_multi$ID, result_single$ID)
+  # All parent/spouse references should be within the ID range (or NA)
+  valid_ids <- seq_len(nrow(results))
+  expect_true(all(is.na(results$momID) | results$momID %in% valid_ids))
+  expect_true(all(is.na(results$dadID) | results$dadID %in% valid_ids))
+  expect_true(all(is.na(results$spouseID) | results$spouseID %in% valid_ids))
 })
 
 test_that("simulatePedigrees works with beta = TRUE", {

From 32684f627bdf927174903d74ceb2d857b0bb2094 Mon Sep 17 00:00:00 2001
From: Mason Garrison <garrissm@wfu.edu>
Date: Mon, 13 Apr 2026 17:07:15 -0400
Subject: [PATCH 6/9] Squashed commit of the following:

commit 42ae819f65bba25c79cbff86dfadca0a2b53f065
Author: Mason Garrison <garrissm@wfu.edu>
Date:   Mon Apr 13 16:08:31 2026 -0400

    Update test-makeLinks.R
---
 tests/testthat/test-makeLinks.R | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R
index f2340485..59e0847a 100644
--- a/tests/testthat/test-makeLinks.R
+++ b/tests/testthat/test-makeLinks.R
@@ -355,6 +355,15 @@ test_that("com2links handles mismatched matrix dimensions by subsetting to small
   # Only IDs from the smaller matrix should appear
   all_output_ids <- unique(c(result_mismatch$ID1, result_mismatch$ID2))
   expect_true(all(all_output_ids %in% as.numeric(dimnames(ad_small)[[1]])))
+  expect_true(all(all_output_ids %in% as.numeric(dimnames(mit_ped_matrix)[[1]])))
+  expect_true(all(all_output_ids %in% as.numeric(dimnames(cn_ped_matrix)[[1]])))
+
+  # Check that the number of unique IDs in the output matches the number of IDs in the smallest matrix
+  expect_equal(length(all_output_ids), length(subset_ids))
+
+  # check that full matrix has more unique IDs than the smaller matrix
+  expect_true(length(unique(c(dimnames(mit_ped_matrix)[[1]],
+                              dimnames(cn_ped_matrix)[[1]]))) > length(subset_ids))
 })
 
 test_that("com2links mismatched dimensions with two matrices", {

From b33820a56cd766bb9e1aef3acc01dd420200a8a5 Mon Sep 17 00:00:00 2001
From: Mason Garrison <garrissm@wfu.edu>
Date: Mon, 13 Apr 2026 17:35:57 -0400
Subject: [PATCH 7/9] smarter id handling

---
 R/simulatePedigree.R                   | 45 +++++++++++++++++---------
 man/simulatePedigree.Rd                |  3 ++
 man/simulatePedigrees.Rd               | 11 +++++--
 tests/testthat/test-simulatePedigree.R | 16 ++++-----
 4 files changed, 49 insertions(+), 26 deletions(-)

diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R
index 5e1fa4c0..f48cd0d3 100644
--- a/R/simulatePedigree.R
+++ b/R/simulatePedigree.R
@@ -879,6 +879,7 @@ buildBtwnGenerations_opt <- function(df_Fam,
 #' @param ... Additional arguments to be passed to other functions.
 #' @inheritParams ped2fam
 #' @param spouseID The name of the column that will contain the spouse ID in the output data frame. Default is "spID".
+#' @param remap_ids logical. If TRUE, remap all ID columns to sequential integers (1, 2, 3, ...) in row order.
 #' @return A \code{data.frame} with each row representing a simulated individual. The columns are as follows:
 #' \itemize{
 #'   \item{fam: The family id of each simulated individual. It is 'fam1' in a single simulated pedigree.}
@@ -914,6 +915,7 @@ simulatePedigree <- function(kpc = 3,
                              code_male = "M",
                              code_female = "F",
                              fam_shift = 1L,
+                             remap_ids = FALSE,
                              beta = FALSE) {
   # SexRatio: ratio of male over female in the offspring setting; used in the between generation combinations
   # SexRatio <- sexR / (1 - sexR)
@@ -966,11 +968,24 @@ simulatePedigree <- function(kpc = 3,
   df_Fam <- df_Fam[, 1:7]
   df_Fam <- df_Fam[!(is.na(df_Fam$pat) & is.na(df_Fam$mat) & is.na(df_Fam$spID)), ]
 
-  colnames(df_Fam)[c(2, 4, 5)] <- c(personID, dadID, momID)
+  names(df_Fam) <-  c("fam", personID, "gen", dadID, momID, spouseID, "sex")
 
   # connect the detached members
   df_Fam[is.na(df_Fam[[momID]]) & is.na(df_Fam[[dadID]]) & df_Fam$gen > 1, ]
 
+
+  if(remap_ids) {
+    # Remap all ID columns to sequential integers (1, 2, 3, ...) in row order,
+    # so the final data frame has tidy consecutive IDs regardless of fam_shift offsets.
+    old_ids <- rbind( df_Fam[[personID]],  df_Fam[[momID]],  df_Fam[[dadID]],  df_Fam[[spouseID]])
+    old_ids <- unique(old_ids[!is.na(old_ids)])
+    id_map <- setNames(seq_along(old_ids), as.character(old_ids))
+
+    df_Fam[[personID]] <- as.integer(id_map[as.character( df_Fam[[personID]])])
+    df_Fam[[momID]] <- as.integer(id_map[as.character( df_Fam[[momID]])])
+    df_Fam[[dadID]] <- as.integer(id_map[as.character( df_Fam[[dadID]])])
+    df_Fam[[spouseID]] <- as.integer(id_map[as.character( df_Fam[[spouseID]])])
+  }
   df_Fam
 }
 
@@ -989,6 +1004,7 @@ SimPed <- function(...) { # nolint: object_name_linter.
 #' at once, with unique IDs across all families.
 #'
 #' @param n_fam Integer. Number of families to simulate. Default is 2.
+#' @param remap_ids Logical. If TRUE (default), all ID columns (personID, momID, dadID, spouseID) will be remapped to sequential integers starting at 1 across the combined data frame. This ensures tidy consecutive IDs regardless of fam_shift offsets. If FALSE, IDs will retain their original values from each pedigree simulation, which may include gaps or non-sequential values due to fam_shift.
 #' @inheritParams simulatePedigree
 #' @return A \code{data.frame} containing all simulated individuals from all
 #'   families combined, with the same columns as \code{\link{simulatePedigree}}.
@@ -1022,7 +1038,9 @@ simulatePedigrees <- function(n_fam = 2,
                               spouseID = "spouseID",
                               code_male = "M",
                               code_female = "F",
-                              beta = FALSE) {
+                              remap_ids = TRUE,
+                              beta = FALSE
+                              ) {
   n_fam <- as.integer(n_fam)
   if (is.na(n_fam) || n_fam < 1L) {
     stop("'n_fam' must be a positive integer.")
@@ -1045,28 +1063,25 @@ simulatePedigrees <- function(n_fam = 2,
       code_male = code_male,
       code_female = code_female,
       fam_shift = i,
+      remap_ids = FALSE, # Keep original IDs for now; we'll remap after combining.
       beta = beta
     )
     ped_i$fam <- paste0("fam", i)
     ped_list[[i]] <- ped_i
   }
   combined <- data.table::rbindlist(ped_list) |> as.data.frame()
-
+  names(combined) <-  c("fam", personID, "gen", dadID, momID, spouseID, "sex")
+if(remap_ids) {
   # Remap all ID columns to sequential integers (1, 2, 3, ...) in row order,
   # so the final data frame has tidy consecutive IDs regardless of fam_shift offsets.
-  old_ids <- combined[[personID]]
+  old_ids <- rbind(combined[[personID]], combined[[momID]], combined[[dadID]], combined[[spouseID]])
+  old_ids <- unique(old_ids[!is.na(old_ids)])
   id_map <- setNames(seq_along(old_ids), as.character(old_ids))
 
-  remap_col <- function(x) {
-    out <- id_map[as.character(x)]
-    out[is.na(x)] <- NA_integer_
-    as.integer(out)
-  }
-
-  combined[[personID]] <- as.integer(id_map[as.character(old_ids)])
-  combined[[momID]] <- remap_col(combined[[momID]])
-  combined[[dadID]] <- remap_col(combined[[dadID]])
-  combined[[spouseID]] <- remap_col(combined[[spouseID]])
-
+  combined[[personID]] <- as.integer(id_map[as.character(combined[[personID]])])
+  combined[[momID]] <- as.integer(id_map[as.character(combined[[momID]])])
+  combined[[dadID]] <- as.integer(id_map[as.character(combined[[dadID]])])
+  combined[[spouseID]] <- as.integer(id_map[as.character(combined[[spouseID]])])
+}
   combined
 }
diff --git a/man/simulatePedigree.Rd b/man/simulatePedigree.Rd
index d56ab72c..df600970 100644
--- a/man/simulatePedigree.Rd
+++ b/man/simulatePedigree.Rd
@@ -26,6 +26,7 @@ simulatePedigree(
   code_male = "M",
   code_female = "F",
   fam_shift = 1L,
+  remap_ids = FALSE,
   beta = FALSE
 )
 
@@ -76,6 +77,8 @@ current version.}
 \item{fam_shift}{An integer to shift the person ID. Default is 1L.
 This is useful when simulating multiple pedigrees to avoid ID conflicts.}
 
+\item{remap_ids}{logical. If TRUE, remap all ID columns to sequential integers (1, 2, 3, ...) in row order.}
+
 \item{beta}{logical or character. Controls which algorithm version to use:
 \itemize{
   \item{\code{FALSE}, \code{"base"}, or \code{"original"} (default): Use the original algorithm.
diff --git a/man/simulatePedigrees.Rd b/man/simulatePedigrees.Rd
index 6397da55..0ae193d5 100644
--- a/man/simulatePedigrees.Rd
+++ b/man/simulatePedigrees.Rd
@@ -20,6 +20,7 @@ simulatePedigrees(
   spouseID = "spouseID",
   code_male = "M",
   code_female = "F",
+  remap_ids = TRUE,
   beta = FALSE
 )
 }
@@ -53,7 +54,7 @@ current version.}
 \item{balancedMar}{Not fully developed yet. Always \code{TRUE} in the
 current version.}
 
-\item{verbose}{logical. If TRUE, message progress through stages of algorithm}
+\item{verbose}{logical  If TRUE, message progress through stages of algorithm}
 
 \item{personID}{character.  Name of the column in ped for the person ID variable}
 
@@ -61,12 +62,14 @@ current version.}
 
 \item{dadID}{character.  Name of the column in ped for the father ID variable}
 
-\item{spouseID}{The name of the column that will contain the spouse ID in the output data frame. Default is "spouseID".}
+\item{spouseID}{The name of the column that will contain the spouse ID in the output data frame. Default is "spID".}
 
 \item{code_male}{The value to use for males. Default is "M"}
 
 \item{code_female}{The value to use for females. Default is "F"}
 
+\item{remap_ids}{Logical. If TRUE (default), all ID columns (personID, momID, dadID, spouseID) will be remapped to sequential integers starting at 1 across the combined data frame. This ensures tidy consecutive IDs regardless of fam_shift offsets. If FALSE, IDs will retain their original values from each pedigree simulation, which may include gaps or non-sequential values due to fam_shift.}
+
 \item{beta}{logical or character. Controls which algorithm version to use:
 \itemize{
   \item{\code{FALSE}, \code{"base"}, or \code{"original"} (default): Use the original algorithm.
@@ -84,7 +87,9 @@ uses vectorized operations instead of loops, making it much faster for large ped
 A \code{data.frame} containing all simulated individuals from all
   families combined, with the same columns as \code{\link{simulatePedigree}}.
   The \code{fam} column uniquely identifies each family (e.g., "fam1",
-  "fam2", ...). Individual IDs are guaranteed to be unique across families.
+  "fam2", ...). Individual IDs are sequential integers starting at 1
+  (i.e., \code{1:nrow(result)}), and all parent/spouse ID references are
+  remapped to match.
 }
 \description{
 This function simulates multiple "balanced" pedigrees and returns them
diff --git a/tests/testthat/test-simulatePedigree.R b/tests/testthat/test-simulatePedigree.R
index c2b5d66b..86044c01 100644
--- a/tests/testthat/test-simulatePedigree.R
+++ b/tests/testthat/test-simulatePedigree.R
@@ -126,9 +126,9 @@ test_that("simulated pedigree generates expected data structure when sexR is imb
     expect_equal(max(results$gen), Ngen, tolerance = strict_tolerance)
 
     # expect there to be parents in each for all generations except the first one
-    filter_parents <- results %>%
-      group_by(gen) %>%
-      summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
+    filter_parents <- results |>
+      dplyr::group_by(gen) |>
+      dplyr::summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
 
     expect_true(all(filter_parents$num_parents[filter_parents$gen > 1] > 0), info = paste0("Beta option: ", beta))
     expect_true(all(filter_parents$num_parents[filter_parents$gen == 1] == 0), info = paste0("Beta option: ", beta))
@@ -199,9 +199,9 @@ test_that("simulated pedigree generates expected data structure but supply var n
     expect_lt(sex_mean_male, sex_mean_female)
 
     # expect there to be parents in each for all generations except the first one
-    filter_parents <- results %>%
-      group_by(gen) %>%
-      summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
+    filter_parents <- results |>
+      dplyr::group_by(gen) |>
+      dplyr::summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
 
     expect_true(all(filter_parents$num_parents[filter_parents$gen > 1] > 0), info = paste0("Beta option: ", beta))
     expect_true(all(filter_parents$num_parents[filter_parents$gen == 1] == 0), info = paste0("Beta option: ", beta))
@@ -325,8 +325,8 @@ test_that("simulatePedigrees returns sequential IDs starting at 1", {
   set.seed(5)
   results <- simulatePedigrees(n_fam = 3, kpc = 3, Ngen = 4, marR = 0.6)
 
-  # Person IDs should be exactly 1:nrow
-  expect_equal(results$ID, seq_len(nrow(results)))
+  # Person IDs should be close to 1:nrow(results) spouse might change this but they should still be sequential and unique
+  expect_equal(sort(results$ID), seq_len(nrow(results)))
 
   # All parent/spouse references should be within the ID range (or NA)
   valid_ids <- seq_len(nrow(results))

From 4455fc703e921da22f6841048ffcc79636cfe964 Mon Sep 17 00:00:00 2001
From: Mason Garrison <garrissm@wfu.edu>
Date: Mon, 13 Apr 2026 17:47:47 -0400
Subject: [PATCH 8/9] pipes!

---
 tests/testthat/test-simulatePedigree.R | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test-simulatePedigree.R b/tests/testthat/test-simulatePedigree.R
index 86044c01..0ae04a5e 100644
--- a/tests/testthat/test-simulatePedigree.R
+++ b/tests/testthat/test-simulatePedigree.R
@@ -126,8 +126,7 @@ test_that("simulated pedigree generates expected data structure when sexR is imb
     expect_equal(max(results$gen), Ngen, tolerance = strict_tolerance)
 
     # expect there to be parents in each for all generations except the first one
-    filter_parents <- results |>
-      dplyr::group_by(gen) |>
+    filter_parents <- dplyr::group_by(results, gen) %>%
       dplyr::summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
 
     expect_true(all(filter_parents$num_parents[filter_parents$gen > 1] > 0), info = paste0("Beta option: ", beta))
@@ -199,8 +198,7 @@ test_that("simulated pedigree generates expected data structure but supply var n
     expect_lt(sex_mean_male, sex_mean_female)
 
     # expect there to be parents in each for all generations except the first one
-    filter_parents <- results |>
-      dplyr::group_by(gen) |>
+    filter_parents <- dplyr::group_by(results, gen) %>%
       dplyr::summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
 
     expect_true(all(filter_parents$num_parents[filter_parents$gen > 1] > 0), info = paste0("Beta option: ", beta))

From c2fa6057244bff2da054786ef0d0995d45835df1 Mon Sep 17 00:00:00 2001
From: Mason Garrison <garrissm@wfu.edu>
Date: Mon, 13 Apr 2026 18:03:27 -0400
Subject: [PATCH 9/9] Update v2_pedigree.Rmd

---
 vignettes/v2_pedigree.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/v2_pedigree.Rmd b/vignettes/v2_pedigree.Rmd
index b68b21f2..e0d0e1ae 100644
--- a/vignettes/v2_pedigree.Rmd
+++ b/vignettes/v2_pedigree.Rmd
@@ -97,7 +97,7 @@ df_ped_3$fam <- NULL
 df_ped_3$ID <- df_ped_3$ID / 100
 df_ped_3$dadID <- df_ped_3$dadID / 100
 df_ped_3$momID <- df_ped_3$momID / 100
-df_ped_3$spID <- df_ped_3$spID / 100
+df_ped_3$spouseID <- df_ped_3$spouseID / 100
 df_ped_4$famID <- 2
 df_ped_4$fam <- NULL