Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ export(related_coef)
export(repairIDs)
export(repairSex)
export(simulatePedigree)
export(simulatePedigrees)
export(sliceFamilies)
export(summariseFamilies)
export(summariseMatrilines)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* Smarter string ID handling for ped2id
* Fixed how different-sized matrices are handled by `com2links()`
* Added alignPhenToMatrix function to align phenotypic data to the order of the relatedness matrix
* Added `simulatePedigrees()` function to easily simulate multiple families at once and return them as a single combined data frame

# BGmisc 1.7.0.0
* Fixed bug in parList
Expand Down
107 changes: 106 additions & 1 deletion R/simulatePedigree.R
Original file line number Diff line number Diff line change
Expand Up @@ -879,6 +879,7 @@
#' @param ... Additional arguments to be passed to other functions.
#' @inheritParams ped2fam
#' @param spouseID The name of the column that will contain the spouse ID in the output data frame. Default is "spID".
#' @param remap_ids logical. If TRUE, remap all ID columns to sequential integers (1, 2, 3, ...) in row order.
#' @return A \code{data.frame} with each row representing a simulated individual. The columns are as follows:
#' \itemize{
#' \item{fam: The family id of each simulated individual. It is 'fam1' in a single simulated pedigree.}
Expand Down Expand Up @@ -914,6 +915,7 @@
code_male = "M",
code_female = "F",
fam_shift = 1L,
remap_ids = FALSE,
beta = FALSE) {
# SexRatio: ratio of male over female in the offspring setting; used in the between generation combinations
# SexRatio <- sexR / (1 - sexR)
Expand Down Expand Up @@ -966,11 +968,24 @@
df_Fam <- df_Fam[, 1:7]
df_Fam <- df_Fam[!(is.na(df_Fam$pat) & is.na(df_Fam$mat) & is.na(df_Fam$spID)), ]

colnames(df_Fam)[c(2, 4, 5)] <- c(personID, dadID, momID)
names(df_Fam) <- c("fam", personID, "gen", dadID, momID, spouseID, "sex")

# connect the detached members
df_Fam[is.na(df_Fam[[momID]]) & is.na(df_Fam[[dadID]]) & df_Fam$gen > 1, ]


if(remap_ids) {

Check notice on line 977 in R/simulatePedigree.R

View check run for this annotation

codefactor.io / CodeFactor

R/simulatePedigree.R#L977

Place a space before left parenthesis, except in a function call. (spaces_left_parentheses_linter)
# Remap all ID columns to sequential integers (1, 2, 3, ...) in row order,
# so the final data frame has tidy consecutive IDs regardless of fam_shift offsets.
old_ids <- rbind( df_Fam[[personID]], df_Fam[[momID]], df_Fam[[dadID]], df_Fam[[spouseID]])

Check notice on line 980 in R/simulatePedigree.R

View check run for this annotation

codefactor.io / CodeFactor

R/simulatePedigree.R#L980

Do not place spaces after parentheses. (spaces_inside_linter)
old_ids <- unique(old_ids[!is.na(old_ids)])
id_map <- setNames(seq_along(old_ids), as.character(old_ids))

df_Fam[[personID]] <- as.integer(id_map[as.character( df_Fam[[personID]])])

Check notice on line 984 in R/simulatePedigree.R

View check run for this annotation

codefactor.io / CodeFactor

R/simulatePedigree.R#L984

Do not place spaces after parentheses. (spaces_inside_linter)
df_Fam[[momID]] <- as.integer(id_map[as.character( df_Fam[[momID]])])

Check notice on line 985 in R/simulatePedigree.R

View check run for this annotation

codefactor.io / CodeFactor

R/simulatePedigree.R#L985

Do not place spaces after parentheses. (spaces_inside_linter)
df_Fam[[dadID]] <- as.integer(id_map[as.character( df_Fam[[dadID]])])

Check notice on line 986 in R/simulatePedigree.R

View check run for this annotation

codefactor.io / CodeFactor

R/simulatePedigree.R#L986

Do not place spaces after parentheses. (spaces_inside_linter)
df_Fam[[spouseID]] <- as.integer(id_map[as.character( df_Fam[[spouseID]])])

Check notice on line 987 in R/simulatePedigree.R

View check run for this annotation

codefactor.io / CodeFactor

R/simulatePedigree.R#L987

Do not place spaces after parentheses. (spaces_inside_linter)
}
df_Fam
}

Expand All @@ -980,3 +995,93 @@
warning("The 'SimPed' function is deprecated. Please use 'simulatePedigree' instead.")
simulatePedigree(...)
}

#' Simulate Multiple Pedigrees
#'
#' This function simulates multiple "balanced" pedigrees and returns them
#' combined into a single data frame. It is a convenience wrapper around
#' \code{\link{simulatePedigree}} that makes it easy to simulate many families
#' at once, with unique IDs across all families.
#'
#' @param n_fam Integer. Number of families to simulate. Default is 2.
#' @param remap_ids Logical. If TRUE (default), all ID columns (personID, momID, dadID, spouseID) will be remapped to sequential integers starting at 1 across the combined data frame. This ensures tidy consecutive IDs regardless of fam_shift offsets. If FALSE, IDs will retain their original values from each pedigree simulation, which may include gaps or non-sequential values due to fam_shift.
#' @inheritParams simulatePedigree
#' @return A \code{data.frame} containing all simulated individuals from all
#' families combined, with the same columns as \code{\link{simulatePedigree}}.
#' The \code{fam} column uniquely identifies each family (e.g., "fam1",
#' "fam2", ...). Individual IDs are sequential integers starting at 1
#' (i.e., \code{1:nrow(result)}), and all parent/spouse ID references are
#' remapped to match.
#' @export
#' @examples
#' set.seed(5)
#' df_peds <- simulatePedigrees(
#' n_fam = 3,
#' kpc = 4,
#' Ngen = 4,
#' sexR = .5,
#' marR = .7
#' )
#' summary(df_peds)
simulatePedigrees <- function(n_fam = 2,
kpc = 3,
Ngen = 4,
sexR = .5,
marR = 2 / 3,
rd_kpc = FALSE,
balancedSex = TRUE,
balancedMar = TRUE,
verbose = FALSE,
personID = "ID",
momID = "momID",
dadID = "dadID",
spouseID = "spouseID",
code_male = "M",
code_female = "F",
remap_ids = TRUE,
beta = FALSE
) {
n_fam <- as.integer(n_fam)
if (is.na(n_fam) || n_fam < 1L) {
stop("'n_fam' must be a positive integer.")
}
ped_list <- vector("list", n_fam)
for (i in seq_len(n_fam)) {
ped_i <- simulatePedigree(
kpc = kpc,
Ngen = Ngen,
sexR = sexR,
marR = marR,
rd_kpc = rd_kpc,
balancedSex = balancedSex,
balancedMar = balancedMar,
verbose = verbose,
personID = personID,
momID = momID,
dadID = dadID,
spouseID = spouseID,
code_male = code_male,
code_female = code_female,
fam_shift = i,
remap_ids = FALSE, # Keep original IDs for now; we'll remap after combining.
beta = beta
)
ped_i$fam <- paste0("fam", i)
ped_list[[i]] <- ped_i
}
combined <- data.table::rbindlist(ped_list) |> as.data.frame()
names(combined) <- c("fam", personID, "gen", dadID, momID, spouseID, "sex")
if(remap_ids) {

Check notice on line 1074 in R/simulatePedigree.R

View check run for this annotation

codefactor.io / CodeFactor

R/simulatePedigree.R#L1074

Place a space before left parenthesis, except in a function call. (spaces_left_parentheses_linter)
# Remap all ID columns to sequential integers (1, 2, 3, ...) in row order,
# so the final data frame has tidy consecutive IDs regardless of fam_shift offsets.
old_ids <- rbind(combined[[personID]], combined[[momID]], combined[[dadID]], combined[[spouseID]])
old_ids <- unique(old_ids[!is.na(old_ids)])
id_map <- setNames(seq_along(old_ids), as.character(old_ids))

combined[[personID]] <- as.integer(id_map[as.character(combined[[personID]])])
combined[[momID]] <- as.integer(id_map[as.character(combined[[momID]])])
combined[[dadID]] <- as.integer(id_map[as.character(combined[[dadID]])])
combined[[spouseID]] <- as.integer(id_map[as.character(combined[[spouseID]])])
}
combined
}
3 changes: 3 additions & 0 deletions man/simulatePedigree.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

110 changes: 110 additions & 0 deletions man/simulatePedigrees.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions tests/testthat/test-makeLinks.R
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,15 @@ test_that("com2links handles mismatched matrix dimensions by subsetting to small
# Only IDs from the smaller matrix should appear
all_output_ids <- unique(c(result_mismatch$ID1, result_mismatch$ID2))
expect_true(all(all_output_ids %in% as.numeric(dimnames(ad_small)[[1]])))
expect_true(all(all_output_ids %in% as.numeric(dimnames(mit_ped_matrix)[[1]])))
expect_true(all(all_output_ids %in% as.numeric(dimnames(cn_ped_matrix)[[1]])))

# Check that the number of unique IDs in the output matches the number of IDs in the smallest matrix
expect_equal(length(all_output_ids), length(subset_ids))

# check that full matrix has more unique IDs than the smaller matrix
expect_true(length(unique(c(dimnames(mit_ped_matrix)[[1]],
dimnames(cn_ped_matrix)[[1]]))) > length(subset_ids))
})

test_that("com2links mismatched dimensions with two matrices", {
Expand Down
71 changes: 65 additions & 6 deletions tests/testthat/test-simulatePedigree.R
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,8 @@
expect_equal(max(results$gen), Ngen, tolerance = strict_tolerance)

# expect there to be parents in each for all generations except the first one
filter_parents <- results %>%
group_by(gen) %>%
summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
filter_parents <- dplyr::group_by(results, gen) %>%

Check notice on line 129 in tests/testthat/test-simulatePedigree.R

View check run for this annotation

codefactor.io / CodeFactor

tests/testthat/test-simulatePedigree.R#L129

Use the |> pipe operator instead of the %>% pipe operator. (pipe_consistency_linter)
dplyr::summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))

expect_true(all(filter_parents$num_parents[filter_parents$gen > 1] > 0), info = paste0("Beta option: ", beta))
expect_true(all(filter_parents$num_parents[filter_parents$gen == 1] == 0), info = paste0("Beta option: ", beta))
Expand Down Expand Up @@ -199,9 +198,8 @@
expect_lt(sex_mean_male, sex_mean_female)

# expect there to be parents in each for all generations except the first one
filter_parents <- results %>%
group_by(gen) %>%
summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))
filter_parents <- dplyr::group_by(results, gen) %>%

Check notice on line 201 in tests/testthat/test-simulatePedigree.R

View check run for this annotation

codefactor.io / CodeFactor

tests/testthat/test-simulatePedigree.R#L201

Use the |> pipe operator instead of the %>% pipe operator. (pipe_consistency_linter)
dplyr::summarize(num_parents = sum(!is.na(dadID), na.rm = TRUE) + sum(!is.na(momID), na.rm = TRUE))

expect_true(all(filter_parents$num_parents[filter_parents$gen > 1] > 0), info = paste0("Beta option: ", beta))
expect_true(all(filter_parents$num_parents[filter_parents$gen == 1] == 0), info = paste0("Beta option: ", beta))
Expand Down Expand Up @@ -289,3 +287,64 @@
"not yet implemented"
)
})

test_that("simulatePedigrees returns combined data frame for multiple families", {
set.seed(5)
n_fam <- 3
results <- simulatePedigrees(n_fam = n_fam, kpc = 3, Ngen = 4, marR = 0.6)

# Should return a data frame
expect_s3_class(results, "data.frame")

# Should have exactly n_fam unique family IDs
fam_ids <- unique(results$fam)
expect_setequal(fam_ids, paste0("fam", seq_len(n_fam)))

# All person IDs should be unique across families
expect_equal(length(unique(results$ID)), nrow(results))

# Should have standard pedigree columns
expect_true(all(c("fam", "ID", "gen", "dadID", "momID", "spouseID", "sex") %in% colnames(results)))
})

test_that("simulatePedigrees with n_fam = 1 matches simulatePedigree structure", {
set.seed(42)
result_multi <- simulatePedigrees(n_fam = 1, kpc = 3, Ngen = 4, marR = 0.6)

set.seed(42)
result_single <- simulatePedigree(kpc = 3, Ngen = 4, marR = 0.6, fam_shift = 1L)

# Both should have the same number of rows and columns
expect_equal(nrow(result_multi), nrow(result_single))
expect_equal(ncol(result_multi), ncol(result_single))
})

test_that("simulatePedigrees returns sequential IDs starting at 1", {
set.seed(5)
results <- simulatePedigrees(n_fam = 3, kpc = 3, Ngen = 4, marR = 0.6)

# Person IDs should be close to 1:nrow(results) spouse might change this but they should still be sequential and unique
expect_equal(sort(results$ID), seq_len(nrow(results)))

# All parent/spouse references should be within the ID range (or NA)
valid_ids <- seq_len(nrow(results))
expect_true(all(is.na(results$momID) | results$momID %in% valid_ids))
expect_true(all(is.na(results$dadID) | results$dadID %in% valid_ids))
expect_true(all(is.na(results$spouseID) | results$spouseID %in% valid_ids))
})

test_that("simulatePedigrees works with beta = TRUE", {
set.seed(5)
n_fam <- 2
results <- simulatePedigrees(n_fam = n_fam, kpc = 3, Ngen = 4, marR = 0.6, beta = TRUE)

expect_s3_class(results, "data.frame")
expect_equal(length(unique(results$fam)), n_fam)
expect_equal(length(unique(results$ID)), nrow(results))
})

test_that("simulatePedigrees validates n_fam input", {
expect_error(simulatePedigrees(n_fam = 0), "'n_fam' must be a positive integer")
expect_error(simulatePedigrees(n_fam = -1), "'n_fam' must be a positive integer")
expect_error(simulatePedigrees(n_fam = NA), "'n_fam' must be a positive integer")
})
2 changes: 1 addition & 1 deletion vignettes/v2_pedigree.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ df_ped_3$fam <- NULL
df_ped_3$ID <- df_ped_3$ID / 100
df_ped_3$dadID <- df_ped_3$dadID / 100
df_ped_3$momID <- df_ped_3$momID / 100
df_ped_3$spID <- df_ped_3$spID / 100
df_ped_3$spouseID <- df_ped_3$spouseID / 100
df_ped_4$famID <- 2
df_ped_4$fam <- NULL

Expand Down
Loading