R-Computing-Lab · smasongarrison · Apr 12, 2026 · Mar 28, 2026 · Apr 3, 2026 · Apr 8, 2026
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: BGmisc
 Title: An R Package for Extended Behavior Genetics Analysis
-Version: 1.7.0.1
+Version: 1.7.0.1.1
 Authors@R: c(
     person("S. Mason", "Garrison", , "garrissm@wfu.edu", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0002-4804-6003")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(SimPed)
 export(addPersonToPed)
+export(alignPhenToMatrix)
 export(buildFamilyGroups)
 export(buildOneFamilyGroup)
 export(buildPedigreeModelCovariance)

diff --git a/NEWS.md b/NEWS.md
@@ -1,9 +1,12 @@
 # BGmisc NEWS
 
 # Development version:
-
+## BGmisc 1.7.0.1.1
 * Optimized sliceFamilies to be more abstract
 * Created `.require_openmx()` to make it easier to use OpenMx functions without making OpenMx a dependency
+* Smarter string ID handling for ped2id
+* Fixed how different-sized matrices are handled by `com2links()`
+* Added alignPhenToMatrix function to align phenotypic data to the order of the relatedness matrix
 
 # BGmisc 1.7.0.0
 * Fixed bug in parList

diff --git a/R/buildmxPedigrees.R b/R/buildmxPedigrees.R
@@ -420,3 +420,21 @@ fitPedigreeModel <- function(
   }
   fitted_model
 }
+
+
+#' Align Phenotype Vector to Matrix Format for OpenMx
+#'
+#' This function takes a pedigree data frame, a specified phenotype column, and a vector of IDs to keep, and returns a matrix formatted for use in OpenMx models. The resulting matrix has one row and columns corresponding to the specified IDs, with values taken from the phenotype column of the pedigree.
+#' @param ped A data frame representing the pedigree, containing at least the columns specified by \code{phenotype} and \code{personID}.
+#' @param phenotype A character string specifying the column name in \code{ped} that
+#'  contains the phenotype values to be aligned.
+#' @param keep_ids A vector of IDs for which the phenotype values should be extracted and aligned. These IDs should correspond to the values in the \code{personID} of \code{ped}.
+#' @param personID A character string specifying the column name in \code{ped} that contains the individual IDs. Default is "ID".
+#' @export
+
+
+alignPhenToMatrix <- function(ped, phenotype, keep_ids, personID = "ID") {
+  obs_ids <- make.names(as.character(keep_ids))
+  pheno_vals <- ped[[phenotype]][match(as.character(keep_ids), as.character(ped[[personID]]))]
+  matrix(as.double(pheno_vals), nrow = 1, dimnames = list(NULL, obs_ids))
+}
diff --git a/R/makeLinks.R b/R/makeLinks.R
@@ -79,22 +79,50 @@ com2links <- function(
 
   # Extract individual IDs from the first available matrix.
   ids <- NULL
+  # Find the smallest matrix by ncol (avoids extracting IDs from large matrices).
+  mat_refs <- list()
+  if (!is.null(ad_ped_matrix)) mat_refs[["ad"]] <- ncol(ad_ped_matrix)
+  if (!is.null(mit_ped_matrix)) mat_refs[["mt"]] <- ncol(mit_ped_matrix)
+  if (!is.null(cn_ped_matrix)) mat_refs[["cn"]] <- ncol(cn_ped_matrix)
+
+  if (length(mat_refs) == 0L) {
+    stop("At least one relationship matrix must be provided.")
+  }
 
+  smallest <- names(which.min(unlist(mat_refs)))
+  guide_mat <- switch(smallest,
+    ad = ad_ped_matrix,
+    mt = mit_ped_matrix,
+    cn = cn_ped_matrix
+  )
 
-  if (!is.null(cn_ped_matrix)) {
-    ids <- as.numeric(dimnames(cn_ped_matrix)[[1]])
-    nc <- ncol(cn_ped_matrix)
-  } else if (!is.null(ad_ped_matrix)) {
-    ids <- as.numeric(dimnames(ad_ped_matrix)[[1]])
-    nc <- ncol(ad_ped_matrix)
-  } else if (!is.null(mit_ped_matrix)) {
-    ids <- as.numeric(dimnames(mit_ped_matrix)[[1]])
-    nc <- ncol(mit_ped_matrix)
+  # Extract IDs only from the smallest matrix.
+  guide_ids <- dimnames(guide_mat)[[1]]
+  if (is.null(guide_ids) || length(guide_ids) == 0L) {
+    stop("Could not extract IDs from the smallest matrix.")
+  }
+  ids <- suppressWarnings(as.numeric(guide_ids))
+  if (anyNA(ids)) {
+    warning(
+      "Matrix dimnames used as IDs should be strictly numeric for 'com2links()'. Found non-numeric or non-coercible IDs in the smallest matrix."
+    )
   }
+  nc <- length(ids)
 
-  if (is.null(ids)) {
-    stop("Could not extract IDs from the provided matrices.")
+  # Subset only the larger matrices to match the smallest matrix's IDs and ordering.
+  if (!is.null(ad_ped_matrix) && ncol(ad_ped_matrix) > nc) {
+    if (verbose) message("Subsetting ad_ped_matrix from ", ncol(ad_ped_matrix), " to ", nc, " IDs.")
+    ad_ped_matrix <- ad_ped_matrix[guide_ids, guide_ids, drop = FALSE]
+  }
+  if (!is.null(mit_ped_matrix) && ncol(mit_ped_matrix) > nc) {
+    if (verbose) message("Subsetting mit_ped_matrix from ", ncol(mit_ped_matrix), " to ", nc, " IDs.")
+    mit_ped_matrix <- mit_ped_matrix[guide_ids, guide_ids, drop = FALSE]
   }
+  if (!is.null(cn_ped_matrix) && ncol(cn_ped_matrix) > nc) {
+    if (verbose) message("Subsetting cn_ped_matrix from ", ncol(cn_ped_matrix), " to ", nc, " IDs.")
+    cn_ped_matrix <- cn_ped_matrix[guide_ids, guide_ids, drop = FALSE]
+  }
+
 
   # --- matrix_case construction and switch dispatch ---
   matrix_case <- paste(sort(c(
@@ -311,6 +339,8 @@ process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk,
           file = rel_pairs_file,
           row.names = FALSE, col.names = FALSE, append = TRUE, sep = ","
         )
+      } else {
+        if (verbose) cat("No related pairs to write.\n")
       }
     }
     if (gc == TRUE) {

diff --git a/R/segmentPedigree.R b/R/segmentPedigree.R
@@ -11,6 +11,7 @@
 #' @param dadID character.  Name of the column in ped for the father ID variable
 #' @param famID character.  Name of the column to be created in ped for the family ID variable
 #' @param twinID character.  Name of the column in ped for the twin ID variable, if applicable
+#' @param overwrite logical.  If TRUE, will overwrite existing famID variable if it exists.  Default is TRUE.
 #' @param ... additional arguments to be passed to \code{\link{ped2com}}
 #' @details
 #' The general idea of this function is to use person ID, mother ID, and father ID to
@@ -31,18 +32,20 @@
 ped2fam <- function(ped, personID = "ID",
                     momID = "momID", dadID = "dadID", famID = "famID",
                     twinID = "twinID",
+                    overwrite = TRUE,
                     ...) {
   # Call to wrapper function
   .ped2id(
     ped = ped, personID = personID, momID = momID, dadID = dadID, famID = famID, twinID = twinID,
-    type = "parents"
+    type = "parents",
+    overwrite = overwrite
   )
 }
 
 .ped2id <- function(ped,
                     personID = "ID", momID = "momID", dadID = "dadID",
                     famID = "famID", twinID = "twinID",
-                    type,
+                    type, overwrite = TRUE,
                     ...) {
   # Turn pedigree into family
   pg <- ped2graph(
@@ -55,23 +58,44 @@
 
   # Create famID data.frame
   # Convert IDs to numeric, with warning if coercion collapses IDs
+
   uniques <- suppressWarnings(unique(as.numeric(names(wcc$membership))))
+  keep_string <- FALSE
 
   if (length(uniques) == 1L && is.na(uniques)) {
     warning("After converting IDs to numeric, all IDs became NA. This indicates ID coercion collapsed IDs. Please ensure IDs aren't character or factor variables.")
-
+ keep_string <- TRUE
+     } else if (length(uniques) < length(wcc$membership)) {
+     warning("After converting IDs to numeric, some IDs became NA. This indicates ID coercion collapsed some IDs. Please ensure IDs aren't character or factor variables.")
+  keep_string <- TRUE
+}
+if(keep_string==TRUE) {
     fam <- data.frame(
       V1 = names(wcc$membership),
       V2 = wcc$membership
     )
-  } else {
+    } else {
     fam <- data.frame(
       V1 = as.numeric(names(wcc$membership)),
       V2 = wcc$membership
     )
   }
 
   names(fam) <- c(personID, famID)
+
+  if(famID %in% names(ped)) {
+    if(overwrite) {
+    overwrite_message <-  "be overwritten."
+    ped[[famID]] <- NULL
+    } else {
+    overwrite_message <-  "not be overwritten."
+    fam[[famID]] <- NULL
+   }
+
+    warning(sprintf("The famID variable '%s' already exists in the pedigree. The existing variable will %s", famID, overwrite_message))
+
+   }
+
   ped2 <- merge(fam, ped,
     by = personID, all.x = FALSE, all.y = TRUE
   )

diff --git a/man/alignPhenToMatrix.Rd b/man/alignPhenToMatrix.Rd
diff --git a/man/ped2fam.Rd b/man/ped2fam.Rd
diff --git a/tests/testthat/test-buildmxPedigrees.R b/tests/testthat/test-buildmxPedigrees.R
@@ -237,9 +237,9 @@ test_that("fitPedigreeModel errors without OpenMx", {
     regexp = "OpenMx"
   )
 
-    expect_error(
-      .require_openmx()
-    )
+  expect_error(
+    .require_openmx()
+  )
 })
 
 test_that("fitPedigreeModel runs end-to-end with a trivial dataset", {
@@ -314,3 +314,60 @@ test_that("fitPedigreeModel errors when group_models and data are both NULL", {
     regexp = "Either 'group_models' or 'data' must be provided"
   )
 })
+
+# ─── alignPhenToMatrix ────────────────────────────────────────────────────────
+
+test_that("alignPhenToMatrix returns a 1-row matrix with correct values", {
+  ped <- data.frame(ID = c(1L, 2L, 3L), pheno = c(1.1, 2.2, 3.3))
+  result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(1L, 2L, 3L))
+  expect_true(is.matrix(result))
+  expect_equal(nrow(result), 1L)
+  expect_equal(ncol(result), 3L)
+  expect_equal(as.numeric(result), c(1.1, 2.2, 3.3))
+})
+
+test_that("alignPhenToMatrix subsets to only the requested IDs", {
+  ped <- data.frame(ID = c(1L, 2L, 3L, 4L), pheno = c(10.0, 20.0, 30.0, 40.0))
+  result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(2L, 4L))
+  expect_equal(ncol(result), 2L)
+  expect_equal(as.numeric(result), c(20.0, 40.0))
+})
+
+test_that("alignPhenToMatrix preserves the order of keep_ids", {
+  ped <- data.frame(ID = c(1L, 2L, 3L), pheno = c(10.0, 20.0, 30.0))
+  result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(3L, 1L, 2L))
+  expect_equal(as.numeric(result), c(30.0, 10.0, 20.0))
+})
+
+test_that("alignPhenToMatrix column names are valid R names", {
+  # IDs starting with a digit are not valid R names; make.names() should fix them
+  ped <- data.frame(ID = c("1a", "2b"), pheno = c(5.5, 6.6))
+  result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c("1a", "2b"))
+  expect_true(all(make.names(colnames(result)) == colnames(result)))
+})
+
+test_that("alignPhenToMatrix returns NA for IDs not present in the pedigree", {
+  ped <- data.frame(ID = c(1L, 2L), pheno = c(1.0, 2.0))
+  result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(1L, 99L))
+  expect_equal(ncol(result), 2L)
+  ref_mat <- matrix(c(1.0, NA), nrow = 1, dimnames = list(NULL, c("X1", "X99")))
+  expect_equal(result[1, 1], ref_mat[1, 1])
+  expect_true(is.na(result[1, 2]))
+})
+
+test_that("alignPhenToMatrix respects a custom personID column", {
+  ped <- data.frame(personID = c("A", "B", "C"), score = c(7.0, 8.0, 9.0))
+  result <- alignPhenToMatrix(ped,
+    phenotype = "score",
+    keep_ids = c("B", "C"),
+    personID = "personID"
+  )
+  expect_equal(ncol(result), 2L)
+  expect_equal(as.numeric(result), c(8.0, 9.0))
+})
+
+test_that("alignPhenToMatrix coerces phenotype values to double", {
+  ped <- data.frame(ID = c(1L, 2L), pheno = c(1L, 2L)) # integer phenotype
+  result <- alignPhenToMatrix(ped, phenotype = "pheno", keep_ids = c(1L, 2L))
+  expect_true(is.double(result))
+})
diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R
@@ -324,3 +324,82 @@ test_that("com2links garbage collection does not affect output, using two compon
 
   expect_equal(result_gc, result_no_gc)
 })
+
+
+test_that("com2links handles mismatched matrix dimensions by subsetting to smallest", {
+  data(hazard)
+  subset_ids <- hazard$ID[seq_len(ceiling(nrow(hazard) / 2))]
+
+  ad_small <- ped2add(hazard, sparse = TRUE, keep_ids = subset_ids)
+  mit_ped_matrix <- ped2mit(hazard, sparse = TRUE)
+  cn_ped_matrix <- ped2cn(hazard, sparse = TRUE)
+
+  # All three matrices, ad is smaller
+  result_mismatch <- com2links(
+    ad_ped_matrix = ad_small,
+    mit_ped_matrix = mit_ped_matrix,
+    cn_ped_matrix = cn_ped_matrix,
+    writetodisk = FALSE
+  )
+
+  # Reference: all three matrices built from the same subset
+  result_ref <- com2links(
+    ad_ped_matrix = ad_small,
+    mit_ped_matrix = ped2mit(hazard, sparse = TRUE, keep_ids = subset_ids),
+    cn_ped_matrix = ped2cn(hazard, sparse = TRUE, keep_ids = subset_ids),
+    writetodisk = FALSE
+  )
+
+  expect_equal(result_mismatch, result_ref)
+
+  # Only IDs from the smaller matrix should appear
+  all_output_ids <- unique(c(result_mismatch$ID1, result_mismatch$ID2))
+  expect_true(all(all_output_ids %in% as.numeric(dimnames(ad_small)[[1]])))
+})
+
+test_that("com2links mismatched dimensions with two matrices", {
+  data(hazard)
+  subset_ids <- hazard$ID[seq_len(ceiling(nrow(hazard) / 2))]
+
+  ad_ped_matrix <- ped2add(hazard, sparse = TRUE)
+  cn_small <- ped2cn(hazard, sparse = TRUE, keep_ids = subset_ids)
+
+  # cn is smaller than ad
+  result_mismatch <- com2links(
+    ad_ped_matrix = ad_ped_matrix,
+    cn_ped_matrix = cn_small,
+    writetodisk = FALSE
+  )
+
+  result_ref <- com2links(
+    ad_ped_matrix = ped2add(hazard, sparse = TRUE, keep_ids = subset_ids),
+    cn_ped_matrix = cn_small,
+    writetodisk = FALSE
+  )
+
+  expect_equal(result_mismatch, result_ref)
+})
+
+test_that("com2links mismatched dimensions with mit smaller", {
+  data(hazard)
+  subset_ids <- hazard$ID[seq_len(ceiling(nrow(hazard) / 2))]
+
+  ad_ped_matrix <- ped2add(hazard, sparse = TRUE)
+  mit_small <- ped2mit(hazard, sparse = TRUE, keep_ids = subset_ids)
+
+  result_mismatch <- com2links(
+    ad_ped_matrix = ad_ped_matrix,
+    mit_ped_matrix = mit_small,
+    writetodisk = FALSE
+  )
+
+  result_ref <- com2links(
+    ad_ped_matrix = ped2add(hazard, sparse = TRUE, keep_ids = subset_ids),
+    mit_ped_matrix = mit_small,
+    writetodisk = FALSE
+  )
+
+  expect_equal(result_mismatch, result_ref)
+  expect_true(all(unique(c(result_mismatch$ID1, result_mismatch$ID2)) %in% as.numeric(dimnames(mit_small)[[1]])))
+  expect_true(all(unique(c(result_mismatch$ID1, result_mismatch$ID2)) %in% as.numeric(dimnames(ad_ped_matrix)[[1]])))
+})