Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: BGmisc
Title: An R Package for Extended Behavior Genetics Analysis
Version: 1.7.0.1
Version: 1.7.0.1.1
Authors@R: c(
person("S. Mason", "Garrison", , "garrissm@wfu.edu", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-4804-6003")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(SimPed)
export(addPersonToPed)
export(alignPhenToMatrix)
export(buildFamilyGroups)
export(buildOneFamilyGroup)
export(buildPedigreeModelCovariance)
Expand Down
5 changes: 4 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# BGmisc NEWS

# Development version:

## BGmisc 1.7.0.1.1
* Optimized sliceFamilies to be more abstract
* Created `.require_openmx()` to make it easier to use OpenMx functions without making OpenMx a dependency
* Smarter string ID handling for ped2id
* Fixed how different-sized matrices are handled by `com2links()`
* Added alignPhenToMatrix function to align phenotypic data to the order of the relatedness matrix

# BGmisc 1.7.0.0
* Fixed bug in parList
Expand Down
18 changes: 18 additions & 0 deletions R/buildmxPedigrees.R
Original file line number Diff line number Diff line change
Expand Up @@ -420,3 +420,21 @@ fitPedigreeModel <- function(
}
fitted_model
}


#' Align Phenotype Vector to Matrix Format for OpenMx
#'
#' This function takes a pedigree data frame, a specified phenotype column, and a vector of IDs to keep, and returns a matrix formatted for use in OpenMx models. The resulting matrix has one row and columns corresponding to the specified IDs, with values taken from the phenotype column of the pedigree.
#' @param ped A data frame representing the pedigree, containing at least the columns specified by \code{phenotype} and \code{personID}.
#' @param phenotype A character string specifying the column name in \code{ped} that
#' contains the phenotype values to be aligned.
#' @param keep_ids A vector of IDs for which the phenotype values should be extracted and aligned. These IDs should correspond to the values in the \code{personID} of \code{ped}.
#' @param personID A character string specifying the column name in \code{ped} that contains the individual IDs. Default is "ID".
#' @export


alignPhenToMatrix <- function(ped, phenotype, keep_ids, personID = "ID") {
obs_ids <- make.names(as.character(keep_ids))
pheno_vals <- ped[[phenotype]][match(as.character(keep_ids), as.character(ped[[personID]]))]
matrix(as.double(pheno_vals), nrow = 1, dimnames = list(NULL, obs_ids))
}
52 changes: 41 additions & 11 deletions R/makeLinks.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,22 +79,50 @@ com2links <- function(

# Extract individual IDs from the first available matrix.
ids <- NULL
# Find the smallest matrix by ncol (avoids extracting IDs from large matrices).
mat_refs <- list()
if (!is.null(ad_ped_matrix)) mat_refs[["ad"]] <- ncol(ad_ped_matrix)
if (!is.null(mit_ped_matrix)) mat_refs[["mt"]] <- ncol(mit_ped_matrix)
if (!is.null(cn_ped_matrix)) mat_refs[["cn"]] <- ncol(cn_ped_matrix)

if (length(mat_refs) == 0L) {
stop("At least one relationship matrix must be provided.")
}

smallest <- names(which.min(unlist(mat_refs)))
guide_mat <- switch(smallest,
ad = ad_ped_matrix,
mt = mit_ped_matrix,
cn = cn_ped_matrix
)

if (!is.null(cn_ped_matrix)) {
ids <- as.numeric(dimnames(cn_ped_matrix)[[1]])
nc <- ncol(cn_ped_matrix)
} else if (!is.null(ad_ped_matrix)) {
ids <- as.numeric(dimnames(ad_ped_matrix)[[1]])
nc <- ncol(ad_ped_matrix)
} else if (!is.null(mit_ped_matrix)) {
ids <- as.numeric(dimnames(mit_ped_matrix)[[1]])
nc <- ncol(mit_ped_matrix)
# Extract IDs only from the smallest matrix.
guide_ids <- dimnames(guide_mat)[[1]]
if (is.null(guide_ids) || length(guide_ids) == 0L) {
stop("Could not extract IDs from the smallest matrix.")
}
ids <- suppressWarnings(as.numeric(guide_ids))
if (anyNA(ids)) {
warning(
"Matrix dimnames used as IDs should be strictly numeric for 'com2links()'. Found non-numeric or non-coercible IDs in the smallest matrix."
)
}
nc <- length(ids)

if (is.null(ids)) {
stop("Could not extract IDs from the provided matrices.")
# Subset only the larger matrices to match the smallest matrix's IDs and ordering.
if (!is.null(ad_ped_matrix) && ncol(ad_ped_matrix) > nc) {
if (verbose) message("Subsetting ad_ped_matrix from ", ncol(ad_ped_matrix), " to ", nc, " IDs.")
ad_ped_matrix <- ad_ped_matrix[guide_ids, guide_ids, drop = FALSE]
}
if (!is.null(mit_ped_matrix) && ncol(mit_ped_matrix) > nc) {
if (verbose) message("Subsetting mit_ped_matrix from ", ncol(mit_ped_matrix), " to ", nc, " IDs.")
mit_ped_matrix <- mit_ped_matrix[guide_ids, guide_ids, drop = FALSE]
}
if (!is.null(cn_ped_matrix) && ncol(cn_ped_matrix) > nc) {
if (verbose) message("Subsetting cn_ped_matrix from ", ncol(cn_ped_matrix), " to ", nc, " IDs.")
cn_ped_matrix <- cn_ped_matrix[guide_ids, guide_ids, drop = FALSE]
Comment thread
smasongarrison marked this conversation as resolved.
}


# --- matrix_case construction and switch dispatch ---
matrix_case <- paste(sort(c(
Expand Down Expand Up @@ -311,6 +339,8 @@ process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk,
file = rel_pairs_file,
row.names = FALSE, col.names = FALSE, append = TRUE, sep = ","
)
} else {
if (verbose) cat("No related pairs to write.\n")
}
}
if (gc == TRUE) {
Expand Down
32 changes: 28 additions & 4 deletions R/segmentPedigree.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#' @param dadID character. Name of the column in ped for the father ID variable
#' @param famID character. Name of the column to be created in ped for the family ID variable
#' @param twinID character. Name of the column in ped for the twin ID variable, if applicable
#' @param overwrite logical. If TRUE, will overwrite existing famID variable if it exists. Default is TRUE.
#' @param ... additional arguments to be passed to \code{\link{ped2com}}
#' @details
#' The general idea of this function is to use person ID, mother ID, and father ID to
Expand All @@ -31,18 +32,20 @@
ped2fam <- function(ped, personID = "ID",
momID = "momID", dadID = "dadID", famID = "famID",
twinID = "twinID",
overwrite = TRUE,
...) {
Comment thread
smasongarrison marked this conversation as resolved.
# Call to wrapper function
.ped2id(
ped = ped, personID = personID, momID = momID, dadID = dadID, famID = famID, twinID = twinID,
type = "parents"
type = "parents",
overwrite = overwrite
)
}

.ped2id <- function(ped,
personID = "ID", momID = "momID", dadID = "dadID",
famID = "famID", twinID = "twinID",
type,
type, overwrite = TRUE,
...) {
# Turn pedigree into family
pg <- ped2graph(
Expand All @@ -55,23 +58,44 @@

# Create famID data.frame
# Convert IDs to numeric, with warning if coercion collapses IDs

uniques <- suppressWarnings(unique(as.numeric(names(wcc$membership))))
keep_string <- FALSE

if (length(uniques) == 1L && is.na(uniques)) {
warning("After converting IDs to numeric, all IDs became NA. This indicates ID coercion collapsed IDs. Please ensure IDs aren't character or factor variables.")

keep_string <- TRUE
} else if (length(uniques) < length(wcc$membership)) {
warning("After converting IDs to numeric, some IDs became NA. This indicates ID coercion collapsed some IDs. Please ensure IDs aren't character or factor variables.")
Comment thread
smasongarrison marked this conversation as resolved.
keep_string <- TRUE
}
if(keep_string==TRUE) {

Check notice on line 72 in R/segmentPedigree.R

View check run for this annotation

codefactor.io / CodeFactor

R/segmentPedigree.R#L72

Place a space before left parenthesis, except in a function call. (spaces_left_parentheses_linter)

Check notice on line 72 in R/segmentPedigree.R

View check run for this annotation

codefactor.io / CodeFactor

R/segmentPedigree.R#L72

Put spaces around all infix operators. (infix_spaces_linter)
fam <- data.frame(
V1 = names(wcc$membership),
V2 = wcc$membership
)
} else {
} else {
fam <- data.frame(
V1 = as.numeric(names(wcc$membership)),
V2 = wcc$membership
)
}

names(fam) <- c(personID, famID)

if(famID %in% names(ped)) {

Check notice on line 86 in R/segmentPedigree.R

View check run for this annotation

codefactor.io / CodeFactor

R/segmentPedigree.R#L86

Place a space before left parenthesis, except in a function call. (spaces_left_parentheses_linter)
if(overwrite) {

Check notice on line 87 in R/segmentPedigree.R

View check run for this annotation

codefactor.io / CodeFactor

R/segmentPedigree.R#L87

Place a space before left parenthesis, except in a function call. (spaces_left_parentheses_linter)
overwrite_message <- "be overwritten."
ped[[famID]] <- NULL
} else {
overwrite_message <- "not be overwritten."
fam[[famID]] <- NULL
}

warning(sprintf("The famID variable '%s' already exists in the pedigree. The existing variable will %s", famID, overwrite_message))

}

ped2 <- merge(fam, ped,
by = personID, all.x = FALSE, all.y = TRUE
)
Expand Down
21 changes: 21 additions & 0 deletions man/alignPhenToMatrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/ped2fam.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

63 changes: 60 additions & 3 deletions tests/testthat/test-buildmxPedigrees.R
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,9 @@ test_that("fitPedigreeModel errors without OpenMx", {
regexp = "OpenMx"
)

expect_error(
.require_openmx()
)
expect_error(
.require_openmx()
)
})

test_that("fitPedigreeModel runs end-to-end with a trivial dataset", {
Expand Down Expand Up @@ -314,3 +314,60 @@ test_that("fitPedigreeModel errors when group_models and data are both NULL", {
regexp = "Either 'group_models' or 'data' must be provided"
)
})

# ─── alignPhenToMatrix ────────────────────────────────────────────────────────

test_that("alignPhenToMatrix returns a 1-row matrix with correct values", {
ped <- data.frame(ID = c(1L, 2L, 3L), pheno = c(1.1, 2.2, 3.3))
result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(1L, 2L, 3L))
expect_true(is.matrix(result))
expect_equal(nrow(result), 1L)
expect_equal(ncol(result), 3L)
expect_equal(as.numeric(result), c(1.1, 2.2, 3.3))
})

test_that("alignPhenToMatrix subsets to only the requested IDs", {
ped <- data.frame(ID = c(1L, 2L, 3L, 4L), pheno = c(10.0, 20.0, 30.0, 40.0))
result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(2L, 4L))
expect_equal(ncol(result), 2L)
expect_equal(as.numeric(result), c(20.0, 40.0))
})

test_that("alignPhenToMatrix preserves the order of keep_ids", {
ped <- data.frame(ID = c(1L, 2L, 3L), pheno = c(10.0, 20.0, 30.0))
result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(3L, 1L, 2L))
expect_equal(as.numeric(result), c(30.0, 10.0, 20.0))
})

test_that("alignPhenToMatrix column names are valid R names", {
# IDs starting with a digit are not valid R names; make.names() should fix them
ped <- data.frame(ID = c("1a", "2b"), pheno = c(5.5, 6.6))
result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c("1a", "2b"))
expect_true(all(make.names(colnames(result)) == colnames(result)))
})

test_that("alignPhenToMatrix returns NA for IDs not present in the pedigree", {
ped <- data.frame(ID = c(1L, 2L), pheno = c(1.0, 2.0))
result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(1L, 99L))
expect_equal(ncol(result), 2L)
ref_mat <- matrix(c(1.0, NA), nrow = 1, dimnames = list(NULL, c("X1", "X99")))
expect_equal(result[1, 1], ref_mat[1, 1])
expect_true(is.na(result[1, 2]))
})

test_that("alignPhenToMatrix respects a custom personID column", {
ped <- data.frame(personID = c("A", "B", "C"), score = c(7.0, 8.0, 9.0))
result <- alignPhenToMatrix(ped,
phenotype = "score",
keep_ids = c("B", "C"),
personID = "personID"
)
expect_equal(ncol(result), 2L)
expect_equal(as.numeric(result), c(8.0, 9.0))
})

test_that("alignPhenToMatrix coerces phenotype values to double", {
ped <- data.frame(ID = c(1L, 2L), pheno = c(1L, 2L)) # integer phenotype
result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(1L, 2L))
expect_true(is.double(result))
})
79 changes: 79 additions & 0 deletions tests/testthat/test-makeLinks.R
Original file line number Diff line number Diff line change
Expand Up @@ -324,3 +324,82 @@ test_that("com2links garbage collection does not affect output, using two compon

expect_equal(result_gc, result_no_gc)
})


test_that("com2links handles mismatched matrix dimensions by subsetting to smallest", {
data(hazard)
subset_ids <- hazard$ID[seq_len(ceiling(nrow(hazard) / 2))]

ad_small <- ped2add(hazard, sparse = TRUE, keep_ids = subset_ids)
mit_ped_matrix <- ped2mit(hazard, sparse = TRUE)
cn_ped_matrix <- ped2cn(hazard, sparse = TRUE)

# All three matrices, ad is smaller
result_mismatch <- com2links(
ad_ped_matrix = ad_small,
mit_ped_matrix = mit_ped_matrix,
cn_ped_matrix = cn_ped_matrix,
writetodisk = FALSE
)

# Reference: all three matrices built from the same subset
result_ref <- com2links(
ad_ped_matrix = ad_small,
mit_ped_matrix = ped2mit(hazard, sparse = TRUE, keep_ids = subset_ids),
cn_ped_matrix = ped2cn(hazard, sparse = TRUE, keep_ids = subset_ids),
writetodisk = FALSE
)

expect_equal(result_mismatch, result_ref)

# Only IDs from the smaller matrix should appear
all_output_ids <- unique(c(result_mismatch$ID1, result_mismatch$ID2))
expect_true(all(all_output_ids %in% as.numeric(dimnames(ad_small)[[1]])))
})

test_that("com2links mismatched dimensions with two matrices", {
data(hazard)
subset_ids <- hazard$ID[seq_len(ceiling(nrow(hazard) / 2))]

ad_ped_matrix <- ped2add(hazard, sparse = TRUE)
cn_small <- ped2cn(hazard, sparse = TRUE, keep_ids = subset_ids)

# cn is smaller than ad
result_mismatch <- com2links(
ad_ped_matrix = ad_ped_matrix,
cn_ped_matrix = cn_small,
writetodisk = FALSE
)

result_ref <- com2links(
ad_ped_matrix = ped2add(hazard, sparse = TRUE, keep_ids = subset_ids),
cn_ped_matrix = cn_small,
writetodisk = FALSE
)

expect_equal(result_mismatch, result_ref)
})

test_that("com2links mismatched dimensions with mit smaller", {
data(hazard)
subset_ids <- hazard$ID[seq_len(ceiling(nrow(hazard) / 2))]

ad_ped_matrix <- ped2add(hazard, sparse = TRUE)
mit_small <- ped2mit(hazard, sparse = TRUE, keep_ids = subset_ids)

result_mismatch <- com2links(
ad_ped_matrix = ad_ped_matrix,
mit_ped_matrix = mit_small,
writetodisk = FALSE
)

result_ref <- com2links(
ad_ped_matrix = ped2add(hazard, sparse = TRUE, keep_ids = subset_ids),
mit_ped_matrix = mit_small,
writetodisk = FALSE
)

expect_equal(result_mismatch, result_ref)
expect_true(all(unique(c(result_mismatch$ID1, result_mismatch$ID2)) %in% as.numeric(dimnames(mit_small)[[1]])))
expect_true(all(unique(c(result_mismatch$ID1, result_mismatch$ID2)) %in% as.numeric(dimnames(ad_ped_matrix)[[1]])))
})
Loading
Loading