Skip to content

Commit 09fa620

Browse files
Merge pull request #112 from R-Computing-Lab/dev_main
Fixing the direction of the nodes and checking them
2 parents fd3b709 + a8ef38d commit 09fa620

15 files changed

Lines changed: 283 additions & 42 deletions

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: BGmisc
22
Title: An R Package for Extended Behavior Genetics Analysis
3-
Version: 1.6.0
3+
Version: 1.6.0.1
44
Authors@R: c(
55
person("S. Mason", "Garrison", , "garrissm@wfu.edu", role = c("aut", "cre"),
66
comment = c(ORCID = "0000-0002-4804-6003")),

NAMESPACE

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,10 @@ export(computeParentAdjacency)
1717
export(createGenDataFrame)
1818
export(determineSex)
1919
export(dropLink)
20-
export(evenInsert)
2120
export(fitComponentModel)
2221
export(getWikiTreeSummary)
2322
export(identifyComponentModel)
2423
export(inferRelatedness)
25-
export(insertEven)
2624
export(makeInbreeding)
2725
export(makeTwins)
2826
export(ped2add)

NEWS.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# BGmisc NEWS
2+
# Development version: 1.6.0.9000
23

3-
# BGmisc 1.6
4+
# BGmisc 1.6.0.1
5+
* Add helper functions for checkParents etc
6+
* fixed incorrect direction so that parents are pointing to children in the graphs
47
* Optimize simulatePedigree and helpers for speed and memory usage
58
* Major gains in speed for deeper pedigrees
69
* Added more tests for simulatePedigree

R/checkIDs.R

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) {
4141
cat("Validation Results:\n")
4242
message(validation_results)
4343
}
44-
if (repair) {
44+
if (repair == TRUE) {
4545
if (verbose == TRUE) {
4646
cat("Attempting to repair:\n")
4747
cat("Step 1: Attempting to repair non-unique IDs...\n")
@@ -56,19 +56,12 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) {
5656
# if there are non-unique IDs
5757
if (length(validation_results$non_unique_ids) > 0) {
5858
# loop through each non-unique ID
59-
for (id in validation_results$non_unique_ids) {
60-
rows_with_id <- repaired_ped[repaired_ped$ID == id, ]
61-
# If all rows with the same ID are truly identical, keep only the first occurrence
62-
if (nrow(unique(rows_with_id)) == 1) {
63-
# Mark as removed in the changes list
64-
changes[[paste0("ID", id)]] <- "Removed duplicates"
65-
# Keep only the first row, remove the rest
66-
repaired_ped <- repaired_ped[-which(repaired_ped$ID == id)[-1], ] # Remove all but the first occurrence
67-
} else {
68-
# Mark as kept in the changes list
69-
changes[[paste0("ID", id)]] <- "Kept duplicates"
70-
}
71-
}
59+
60+
processed <- dropIdenticalDuplicateIDs(ped = repaired_ped,
61+
ids = validation_results$non_unique_ids,
62+
changes = changes)
63+
repaired_ped <- processed$ped
64+
changes <- processed$changes
7265
}
7366
if (verbose == TRUE) {
7467
cat("Step 2: No repair for parents who are their children at this time\n")

R/checkParents.R

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,14 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
5252
missing_mothers <- ped$ID[which(!is.na(ped$dadID) & is.na(ped$momID))]
5353

5454
# Update the validation_results list
55-
if (length(missing_fathers) > 0) {
56-
validation_results$missing_fathers <- missing_fathers
57-
}
58-
if (length(missing_mothers) > 0) {
59-
validation_results$missing_mothers <- missing_mothers
60-
}
6155

62-
validation_results$single_parents <- length(validation_results) > 0
56+
validation_results <- addIfAny(validation_results, "missing_fathers", missing_fathers)
57+
validation_results <- addIfAny(validation_results, "missing_mothers", missing_mothers)
58+
59+
validation_results$single_parents <- (length(missing_fathers) + length(missing_mothers)) > 0
60+
61+
62+
6363

6464

6565
if (verbose && validation_results$single_parents) cat("Missing single parents found.\n")
@@ -269,11 +269,12 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
269269
}
270270

271271
# restore orginal names that the user orginally provided
272-
names(ped)[names(ped) == "ID"] <- personID
273-
names(ped)[names(ped) == "momID"] <- momID
274-
names(ped)[names(ped) == "dadID"] <- dadID
275-
names(ped)[names(ped) == "famID"] <- famID
276-
return(ped)
272+
ped <- restorePedColnames(ped,
273+
famID = famID,
274+
personID = personID,
275+
momID = momID,
276+
dadID = dadID)
277+
277278
}
278279
#' Repair Parent IDs
279280
#'

R/helpChecks.R

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#' Drop Identical Duplicate IDs from Pedigree Data Frame
2+
#'
3+
#' #' This function identifies and removes duplicate entries in a pedigree data frame
4+
#' based on a list of specified IDs. If multiple rows share the same ID and are
5+
#' identical, only one instance is retained. The function returns the modified pedigree
6+
#' data frame along with a log of changes made.
7+
#' @param ped A data frame representing the pedigree.
8+
#' @param ids A vector of IDs to check for duplicates in the pedigree.
9+
#' @param changes An optional list to log changes made during the process.
10+
dropIdenticalDuplicateIDs <- function(ped, ids, changes = NULL
11+
) {
12+
if (!is.data.frame(ped)) {
13+
stop("ped must be a data frame")
14+
}
15+
if (is.null(changes)) {
16+
changes <- list()
17+
} else if (!is.list(changes)) {
18+
stop("changes must be a list or NULL")
19+
}
20+
21+
out <- ped
22+
23+
if (!is.null(ids) && length(ids) > 0) {
24+
for (id in ids) {
25+
rows_with_id <- out[out$ID == id, , drop = FALSE]
26+
if (nrow(unique(rows_with_id)) == 1) {
27+
changes[[paste0("ID", id)]] <- "Removed duplicates"
28+
out <- out[-which(out$ID == id)[-1], , drop = FALSE]
29+
} else {
30+
changes[[paste0("ID", id)]] <- "Kept duplicates"
31+
}
32+
}
33+
}
34+
35+
list(ped = out, changes = changes)
36+
}
37+
38+
#' Helper function to conditionally add elements to a list
39+
#' @param validation A list to which elements may be added.
40+
#' @param name A character string representing the name of the element to add.
41+
#' @param value The value to add to the list if it is not NULL or empty
42+
#' @return The updated list with the new element added if applicable.
43+
#' @keywords internal
44+
addIfAny <- function(validation, name, value) {
45+
if (!is.null(value) && length(value) > 0) validation[[name]] <- value
46+
validation
47+
}

R/insertEven.R renamed to R/helpInsertEven.R

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
#' The function takes two vectors, m and n, and inserts the elements of m evenly into n.
1010
#' If the length of m is greater than the length of n, the vectors are swapped, and the insertion proceeds.
1111
#' The resulting vector is a combination of m and n, with the elements of m evenly distributed within n.
12-
#' @export
1312
#' @seealso \code{\link{SimPed}} for the main function that uses this supporting function.
1413

1514
insertEven <- function(m, n, verbose = FALSE) {
@@ -55,5 +54,4 @@ insertEven <- function(m, n, verbose = FALSE) {
5554
}
5655

5756
#' @rdname insertEven
58-
#' @export
5957
evenInsert <- insertEven

R/cleanPedigree.R renamed to R/helpNames.R

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,81 @@ standardizeColnames <- function(df, verbose = FALSE) {
4141

4242
return(df)
4343
}
44+
#' Restore Original Column Names in a Pedigree Dataframe
45+
#'
46+
#' This function restores the original column names of a pedigree dataframe
47+
#' based on user-specified names. It is useful for reverting standardized column
48+
#' names back to their original names after processing.
49+
#' @param ped A pedigree dataframe with standardized column names.
50+
#' @param famID The original name for the family ID column. Default is "fam
51+
#' ID".
52+
#' @param personID The original name for the person ID column. Default is "ID".
53+
#' @param momID The original name for the mother ID column. Default is "momID".
54+
#' @param dadID The original name for the father ID column. Default is "dadID".
55+
#' @param gen The original name for the generation column. Default is "gen".
56+
#' @param patID The original name for the paternal ID column. Default is "patID".
57+
#' @param matID The original name for the maternal ID column. Default is "matID".
58+
#' @param spID The original name for the spouse ID column. Default is "spID".
59+
#' @param twinID The original name for the twin ID column. Default is "twinID".
60+
#' @param zygosity The original name for the zygosity column. Default is "zygosity".
61+
#' @param sex The original name for the sex column. Default is "sex".
62+
#' @param verbose A logical indicating whether to print progress messages.
63+
#' @return A pedigree dataframe with restored original column names.
64+
restorePedColnames <- function(ped,
65+
famID = "famID",
66+
personID = "ID",
67+
momID = "momID",
68+
dadID = "dadID",
69+
gen = "gen",
70+
patID = "patID",
71+
matID = "matID",
72+
spID = "spID",
73+
twinID = "twinID",
74+
zygosity = "zygosity",
75+
sex = "sex",
76+
verbose = FALSE) {
77+
if (verbose == TRUE) {
78+
message("Restoring original column names...")
79+
}
80+
if (!inherits(ped, "data.frame")) {
81+
stop("ped should be a data.frame or inherit to a data.frame")
82+
}
83+
if (!is.null(personID) && !is.null(ped$ID)) {
84+
names(ped)[names(ped) == "ID"] <- personID
85+
}
86+
if (!is.null(momID) && !is.null(ped$momID)) {
87+
names(ped)[names(ped) == "momID"] <- momID
88+
}
89+
if (!is.null(dadID) && !is.null(ped$dadID)) {
90+
names(ped)[names(ped) == "dadID"] <- dadID
91+
}
92+
93+
if (!is.null(famID) && !is.null(ped$famID)) {
94+
names(ped)[names(ped) == "famID"] <- famID
95+
}
96+
if (!is.null(gen) && !is.null(ped$gen)) {
97+
names(ped)[names(ped) == "gen"] <- gen
98+
}
99+
if (!is.null(patID) && !is.null(ped$patID)) {
100+
names(ped)[names(ped) == "patID"] <- patID
101+
}
102+
if (!is.null(matID) && !is.null(ped$matID)) {
103+
names(ped)[names(ped) == "matID"] <- matID
104+
}
105+
if (!is.null(spID) && !is.null(ped$spID)) {
106+
names(ped)[names(ped) == "spID"] <- spID
107+
}
108+
if (!is.null(twinID) && !is.null(ped$twinID)) {
109+
names(ped)[names(ped) == "twinID"] <- twinID
110+
}
111+
if (!is.null(zygosity) && !is.null(ped$zygosity)) {
112+
names(ped)[names(ped) == "zygosity"] <- zygosity
113+
}
114+
if (!is.null(sex) && !is.null(ped$sex)) {
115+
names(ped)[names(ped) == "sex"] <- sex
116+
}
117+
ped
118+
}
44119

45120

46121
# Repair Pedigree

R/segmentPedigree.R

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -139,12 +139,13 @@ ped2graph <- function(ped,
139139
)
140140
edges <- rbind(
141141
as.matrix(data.frame(
142-
personID = as.character(ped[[personID]]),
143-
momID = as.character(ped[[momID]])
142+
# need to be parent to child for igraph
143+
momID = as.character(ped[[momID]]),
144+
personID = as.character(ped[[personID]])
144145
)),
145146
as.matrix(data.frame(
146-
personID = as.character(ped[[personID]]),
147-
dadID = as.character(ped[[dadID]])
147+
dadID = as.character(ped[[dadID]]),
148+
personID = as.character(ped[[personID]])
148149
))
149150
)
150151
} else if (adjacent == "mothers") {
@@ -154,8 +155,8 @@ ped2graph <- function(ped,
154155
)
155156
)
156157
edges <- as.matrix(data.frame(
157-
personID = as.character(ped[[personID]]),
158-
momID = as.character(ped[[momID]])
158+
momID = as.character(ped[[momID]]),
159+
personID = as.character(ped[[personID]])
159160
))
160161
} else if (adjacent == "fathers") {
161162
nodes <- unique(
@@ -164,8 +165,8 @@ ped2graph <- function(ped,
164165
)
165166
)
166167
edges <- as.matrix(data.frame(
167-
personID = as.character(ped[[personID]]),
168-
dadID = as.character(ped[[dadID]])
168+
dadID = as.character(ped[[dadID]]),
169+
personID = as.character(ped[[personID]])
169170
))
170171
}
171172
edges <- edges[stats::complete.cases(edges), ]

man/addIfAny.Rd

Lines changed: 22 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)