Skip to content

Commit 8785cfa

Browse files
Merge pull request #148 from R-Computing-Lab/dev_main
Enhance com2links to subset matrices and improve error handling
2 parents 818aca5 + eaaa59b commit 8785cfa

14 files changed

Lines changed: 367 additions & 42 deletions

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: BGmisc
22
Title: An R Package for Extended Behavior Genetics Analysis
3-
Version: 1.7.0.1
3+
Version: 1.7.0.1.1
44
Authors@R: c(
55
person("S. Mason", "Garrison", , "garrissm@wfu.edu", role = c("aut", "cre"),
66
comment = c(ORCID = "0000-0002-4804-6003")),

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
export(SimPed)
44
export(addPersonToPed)
5+
export(alignPhenToMatrix)
56
export(buildFamilyGroups)
67
export(buildOneFamilyGroup)
78
export(buildPedigreeModelCovariance)

NEWS.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
# BGmisc NEWS
22

33
# Development version:
4-
4+
## BGmisc 1.7.0.1.1
55
* Optimized sliceFamilies to be more abstract
66
* Created `.require_openmx()` to make it easier to use OpenMx functions without making OpenMx a dependency
7+
* Smarter string ID handling for ped2id
8+
* Fixed how different-sized matrices are handled by `com2links()`
9+
* Added alignPhenToMatrix function to align phenotypic data to the order of the relatedness matrix
710

811
# BGmisc 1.7.0.0
912
* Fixed bug in parList

R/buildmxPedigrees.R

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,3 +420,21 @@ fitPedigreeModel <- function(
420420
}
421421
fitted_model
422422
}
423+
424+
425+
#' Align Phenotype Vector to Matrix Format for OpenMx
426+
#'
427+
#' This function takes a pedigree data frame, a specified phenotype column, and a vector of IDs to keep, and returns a matrix formatted for use in OpenMx models. The resulting matrix has one row and columns corresponding to the specified IDs, with values taken from the phenotype column of the pedigree.
428+
#' @param ped A data frame representing the pedigree, containing at least the columns specified by \code{phenotype} and \code{personID}.
429+
#' @param phenotype A character string specifying the column name in \code{ped} that
430+
#' contains the phenotype values to be aligned.
431+
#' @param keep_ids A vector of IDs for which the phenotype values should be extracted and aligned. These IDs should correspond to the values in the \code{personID} of \code{ped}.
432+
#' @param personID A character string specifying the column name in \code{ped} that contains the individual IDs. Default is "ID".
433+
#' @export
434+
435+
436+
alignPhenToMatrix <- function(ped, phenotype, keep_ids, personID = "ID") {
437+
obs_ids <- make.names(as.character(keep_ids))
438+
pheno_vals <- ped[[phenotype]][match(as.character(keep_ids), as.character(ped[[personID]]))]
439+
matrix(as.double(pheno_vals), nrow = 1, dimnames = list(NULL, obs_ids))
440+
}

R/makeLinks.R

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -79,22 +79,50 @@ com2links <- function(
7979

8080
# Extract individual IDs from the first available matrix.
8181
ids <- NULL
82+
# Find the smallest matrix by ncol (avoids extracting IDs from large matrices).
83+
mat_refs <- list()
84+
if (!is.null(ad_ped_matrix)) mat_refs[["ad"]] <- ncol(ad_ped_matrix)
85+
if (!is.null(mit_ped_matrix)) mat_refs[["mt"]] <- ncol(mit_ped_matrix)
86+
if (!is.null(cn_ped_matrix)) mat_refs[["cn"]] <- ncol(cn_ped_matrix)
87+
88+
if (length(mat_refs) == 0L) {
89+
stop("At least one relationship matrix must be provided.")
90+
}
8291

92+
smallest <- names(which.min(unlist(mat_refs)))
93+
guide_mat <- switch(smallest,
94+
ad = ad_ped_matrix,
95+
mt = mit_ped_matrix,
96+
cn = cn_ped_matrix
97+
)
8398

84-
if (!is.null(cn_ped_matrix)) {
85-
ids <- as.numeric(dimnames(cn_ped_matrix)[[1]])
86-
nc <- ncol(cn_ped_matrix)
87-
} else if (!is.null(ad_ped_matrix)) {
88-
ids <- as.numeric(dimnames(ad_ped_matrix)[[1]])
89-
nc <- ncol(ad_ped_matrix)
90-
} else if (!is.null(mit_ped_matrix)) {
91-
ids <- as.numeric(dimnames(mit_ped_matrix)[[1]])
92-
nc <- ncol(mit_ped_matrix)
99+
# Extract IDs only from the smallest matrix.
100+
guide_ids <- dimnames(guide_mat)[[1]]
101+
if (is.null(guide_ids) || length(guide_ids) == 0L) {
102+
stop("Could not extract IDs from the smallest matrix.")
103+
}
104+
ids <- suppressWarnings(as.numeric(guide_ids))
105+
if (anyNA(ids)) {
106+
warning(
107+
"Matrix dimnames used as IDs should be strictly numeric for 'com2links()'. Found non-numeric or non-coercible IDs in the smallest matrix."
108+
)
93109
}
110+
nc <- length(ids)
94111

95-
if (is.null(ids)) {
96-
stop("Could not extract IDs from the provided matrices.")
112+
# Subset only the larger matrices to match the smallest matrix's IDs and ordering.
113+
if (!is.null(ad_ped_matrix) && ncol(ad_ped_matrix) > nc) {
114+
if (verbose) message("Subsetting ad_ped_matrix from ", ncol(ad_ped_matrix), " to ", nc, " IDs.")
115+
ad_ped_matrix <- ad_ped_matrix[guide_ids, guide_ids, drop = FALSE]
116+
}
117+
if (!is.null(mit_ped_matrix) && ncol(mit_ped_matrix) > nc) {
118+
if (verbose) message("Subsetting mit_ped_matrix from ", ncol(mit_ped_matrix), " to ", nc, " IDs.")
119+
mit_ped_matrix <- mit_ped_matrix[guide_ids, guide_ids, drop = FALSE]
97120
}
121+
if (!is.null(cn_ped_matrix) && ncol(cn_ped_matrix) > nc) {
122+
if (verbose) message("Subsetting cn_ped_matrix from ", ncol(cn_ped_matrix), " to ", nc, " IDs.")
123+
cn_ped_matrix <- cn_ped_matrix[guide_ids, guide_ids, drop = FALSE]
124+
}
125+
98126

99127
# --- matrix_case construction and switch dispatch ---
100128
matrix_case <- paste(sort(c(
@@ -311,6 +339,8 @@ process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk,
311339
file = rel_pairs_file,
312340
row.names = FALSE, col.names = FALSE, append = TRUE, sep = ","
313341
)
342+
} else {
343+
if (verbose) cat("No related pairs to write.\n")
314344
}
315345
}
316346
if (gc == TRUE) {

R/segmentPedigree.R

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#' @param dadID character. Name of the column in ped for the father ID variable
1212
#' @param famID character. Name of the column to be created in ped for the family ID variable
1313
#' @param twinID character. Name of the column in ped for the twin ID variable, if applicable
14+
#' @param overwrite logical. If TRUE, will overwrite existing famID variable if it exists. Default is TRUE.
1415
#' @param ... additional arguments to be passed to \code{\link{ped2com}}
1516
#' @details
1617
#' The general idea of this function is to use person ID, mother ID, and father ID to
@@ -31,18 +32,20 @@
3132
ped2fam <- function(ped, personID = "ID",
3233
momID = "momID", dadID = "dadID", famID = "famID",
3334
twinID = "twinID",
35+
overwrite = TRUE,
3436
...) {
3537
# Call to wrapper function
3638
.ped2id(
3739
ped = ped, personID = personID, momID = momID, dadID = dadID, famID = famID, twinID = twinID,
38-
type = "parents"
40+
type = "parents",
41+
overwrite = overwrite
3942
)
4043
}
4144

4245
.ped2id <- function(ped,
4346
personID = "ID", momID = "momID", dadID = "dadID",
4447
famID = "famID", twinID = "twinID",
45-
type,
48+
type, overwrite = TRUE,
4649
...) {
4750
# Turn pedigree into family
4851
pg <- ped2graph(
@@ -55,23 +58,44 @@ ped2fam <- function(ped, personID = "ID",
5558

5659
# Create famID data.frame
5760
# Convert IDs to numeric, with warning if coercion collapses IDs
61+
5862
uniques <- suppressWarnings(unique(as.numeric(names(wcc$membership))))
63+
keep_string <- FALSE
5964

6065
if (length(uniques) == 1L && is.na(uniques)) {
6166
warning("After converting IDs to numeric, all IDs became NA. This indicates ID coercion collapsed IDs. Please ensure IDs aren't character or factor variables.")
62-
67+
keep_string <- TRUE
68+
} else if (length(uniques) < length(wcc$membership)) {
69+
warning("After converting IDs to numeric, some IDs became NA. This indicates ID coercion collapsed some IDs. Please ensure IDs aren't character or factor variables.")
70+
keep_string <- TRUE
71+
}
72+
if(keep_string==TRUE) {
6373
fam <- data.frame(
6474
V1 = names(wcc$membership),
6575
V2 = wcc$membership
6676
)
67-
} else {
77+
} else {
6878
fam <- data.frame(
6979
V1 = as.numeric(names(wcc$membership)),
7080
V2 = wcc$membership
7181
)
7282
}
7383

7484
names(fam) <- c(personID, famID)
85+
86+
if(famID %in% names(ped)) {
87+
if(overwrite) {
88+
overwrite_message <- "be overwritten."
89+
ped[[famID]] <- NULL
90+
} else {
91+
overwrite_message <- "not be overwritten."
92+
fam[[famID]] <- NULL
93+
}
94+
95+
warning(sprintf("The famID variable '%s' already exists in the pedigree. The existing variable will %s", famID, overwrite_message))
96+
97+
}
98+
7599
ped2 <- merge(fam, ped,
76100
by = personID, all.x = FALSE, all.y = TRUE
77101
)

man/alignPhenToMatrix.Rd

Lines changed: 21 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/ped2fam.Rd

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-buildmxPedigrees.R

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,9 @@ test_that("fitPedigreeModel errors without OpenMx", {
237237
regexp = "OpenMx"
238238
)
239239

240-
expect_error(
241-
.require_openmx()
242-
)
240+
expect_error(
241+
.require_openmx()
242+
)
243243
})
244244

245245
test_that("fitPedigreeModel runs end-to-end with a trivial dataset", {
@@ -314,3 +314,60 @@ test_that("fitPedigreeModel errors when group_models and data are both NULL", {
314314
regexp = "Either 'group_models' or 'data' must be provided"
315315
)
316316
})
317+
318+
# ─── alignPhenToMatrix ────────────────────────────────────────────────────────
319+
320+
test_that("alignPhenToMatrix returns a 1-row matrix with correct values", {
321+
ped <- data.frame(ID = c(1L, 2L, 3L), pheno = c(1.1, 2.2, 3.3))
322+
result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(1L, 2L, 3L))
323+
expect_true(is.matrix(result))
324+
expect_equal(nrow(result), 1L)
325+
expect_equal(ncol(result), 3L)
326+
expect_equal(as.numeric(result), c(1.1, 2.2, 3.3))
327+
})
328+
329+
test_that("alignPhenToMatrix subsets to only the requested IDs", {
330+
ped <- data.frame(ID = c(1L, 2L, 3L, 4L), pheno = c(10.0, 20.0, 30.0, 40.0))
331+
result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(2L, 4L))
332+
expect_equal(ncol(result), 2L)
333+
expect_equal(as.numeric(result), c(20.0, 40.0))
334+
})
335+
336+
test_that("alignPhenToMatrix preserves the order of keep_ids", {
337+
ped <- data.frame(ID = c(1L, 2L, 3L), pheno = c(10.0, 20.0, 30.0))
338+
result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(3L, 1L, 2L))
339+
expect_equal(as.numeric(result), c(30.0, 10.0, 20.0))
340+
})
341+
342+
test_that("alignPhenToMatrix column names are valid R names", {
343+
# IDs starting with a digit are not valid R names; make.names() should fix them
344+
ped <- data.frame(ID = c("1a", "2b"), pheno = c(5.5, 6.6))
345+
result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c("1a", "2b"))
346+
expect_true(all(make.names(colnames(result)) == colnames(result)))
347+
})
348+
349+
test_that("alignPhenToMatrix returns NA for IDs not present in the pedigree", {
350+
ped <- data.frame(ID = c(1L, 2L), pheno = c(1.0, 2.0))
351+
result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(1L, 99L))
352+
expect_equal(ncol(result), 2L)
353+
ref_mat <- matrix(c(1.0, NA), nrow = 1, dimnames = list(NULL, c("X1", "X99")))
354+
expect_equal(result[1, 1], ref_mat[1, 1])
355+
expect_true(is.na(result[1, 2]))
356+
})
357+
358+
test_that("alignPhenToMatrix respects a custom personID column", {
359+
ped <- data.frame(personID = c("A", "B", "C"), score = c(7.0, 8.0, 9.0))
360+
result <- alignPhenToMatrix(ped,
361+
phenotype = "score",
362+
keep_ids = c("B", "C"),
363+
personID = "personID"
364+
)
365+
expect_equal(ncol(result), 2L)
366+
expect_equal(as.numeric(result), c(8.0, 9.0))
367+
})
368+
369+
test_that("alignPhenToMatrix coerces phenotype values to double", {
370+
ped <- data.frame(ID = c(1L, 2L), pheno = c(1L, 2L)) # integer phenotype
371+
result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(1L, 2L))
372+
expect_true(is.double(result))
373+
})

tests/testthat/test-makeLinks.R

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,3 +324,82 @@ test_that("com2links garbage collection does not affect output, using two compon
324324

325325
expect_equal(result_gc, result_no_gc)
326326
})
327+
328+
329+
test_that("com2links handles mismatched matrix dimensions by subsetting to smallest", {
330+
data(hazard)
331+
subset_ids <- hazard$ID[seq_len(ceiling(nrow(hazard) / 2))]
332+
333+
ad_small <- ped2add(hazard, sparse = TRUE, keep_ids = subset_ids)
334+
mit_ped_matrix <- ped2mit(hazard, sparse = TRUE)
335+
cn_ped_matrix <- ped2cn(hazard, sparse = TRUE)
336+
337+
# All three matrices, ad is smaller
338+
result_mismatch <- com2links(
339+
ad_ped_matrix = ad_small,
340+
mit_ped_matrix = mit_ped_matrix,
341+
cn_ped_matrix = cn_ped_matrix,
342+
writetodisk = FALSE
343+
)
344+
345+
# Reference: all three matrices built from the same subset
346+
result_ref <- com2links(
347+
ad_ped_matrix = ad_small,
348+
mit_ped_matrix = ped2mit(hazard, sparse = TRUE, keep_ids = subset_ids),
349+
cn_ped_matrix = ped2cn(hazard, sparse = TRUE, keep_ids = subset_ids),
350+
writetodisk = FALSE
351+
)
352+
353+
expect_equal(result_mismatch, result_ref)
354+
355+
# Only IDs from the smaller matrix should appear
356+
all_output_ids <- unique(c(result_mismatch$ID1, result_mismatch$ID2))
357+
expect_true(all(all_output_ids %in% as.numeric(dimnames(ad_small)[[1]])))
358+
})
359+
360+
test_that("com2links mismatched dimensions with two matrices", {
361+
data(hazard)
362+
subset_ids <- hazard$ID[seq_len(ceiling(nrow(hazard) / 2))]
363+
364+
ad_ped_matrix <- ped2add(hazard, sparse = TRUE)
365+
cn_small <- ped2cn(hazard, sparse = TRUE, keep_ids = subset_ids)
366+
367+
# cn is smaller than ad
368+
result_mismatch <- com2links(
369+
ad_ped_matrix = ad_ped_matrix,
370+
cn_ped_matrix = cn_small,
371+
writetodisk = FALSE
372+
)
373+
374+
result_ref <- com2links(
375+
ad_ped_matrix = ped2add(hazard, sparse = TRUE, keep_ids = subset_ids),
376+
cn_ped_matrix = cn_small,
377+
writetodisk = FALSE
378+
)
379+
380+
expect_equal(result_mismatch, result_ref)
381+
})
382+
383+
test_that("com2links mismatched dimensions with mit smaller", {
384+
data(hazard)
385+
subset_ids <- hazard$ID[seq_len(ceiling(nrow(hazard) / 2))]
386+
387+
ad_ped_matrix <- ped2add(hazard, sparse = TRUE)
388+
mit_small <- ped2mit(hazard, sparse = TRUE, keep_ids = subset_ids)
389+
390+
result_mismatch <- com2links(
391+
ad_ped_matrix = ad_ped_matrix,
392+
mit_ped_matrix = mit_small,
393+
writetodisk = FALSE
394+
)
395+
396+
result_ref <- com2links(
397+
ad_ped_matrix = ped2add(hazard, sparse = TRUE, keep_ids = subset_ids),
398+
mit_ped_matrix = mit_small,
399+
writetodisk = FALSE
400+
)
401+
402+
expect_equal(result_mismatch, result_ref)
403+
expect_true(all(unique(c(result_mismatch$ID1, result_mismatch$ID2)) %in% as.numeric(dimnames(mit_small)[[1]])))
404+
expect_true(all(unique(c(result_mismatch$ID1, result_mismatch$ID2)) %in% as.numeric(dimnames(ad_ped_matrix)[[1]])))
405+
})

0 commit comments

Comments
 (0)