Skip to content

Commit cc87ab1

Browse files
Merge pull request #56 from R-Computing-Lab/povich
adding Povich Algorithm Optimization to Dev branch
2 parents 7005458 + 32c3d57 commit cc87ab1

4 files changed

Lines changed: 210 additions & 130 deletions

File tree

NEWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# BGmisc 1.3.4
2-
* Added alternative methods to create the adjacency matrix
2+
* Added alternative (and faster) methods to create the adjacency matrix
33
* Add tests for comparison of adjacency matrix methods
44
* Added Royal Family pedigree
55

R/convertPedigree.R

Lines changed: 171 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ ped2com <- function(ped, component,
3535
resume = FALSE,
3636
save_rate = 5,
3737
save_rate_gen = save_rate,
38-
save_rate_parlist = 1000 * save_rate,
38+
save_rate_parlist = 100000 * save_rate,
3939
update_rate = 100,
4040
save_path = "checkpoint/",
4141
...) {
@@ -87,8 +87,8 @@ ped2com <- function(ped, component,
8787
if (!transpose_method %in% c("tcrossprod", "crossprod", "star", "tcross.alt.crossprod", "tcross.alt.star")) {
8888
stop("Invalid method specified. Choose from 'tcrossprod', 'crossprod', or 'star' or 'tcross.alt.crossprod' or 'tcross.alt.star'.")
8989
}
90-
if (!adjacency_method %in% c("indexed", "loop")) {
91-
stop("Invalid method specified. Choose from 'indexed' or 'loop'.")
90+
if (!adjacency_method %in% c("indexed", "loop", "direct")) {
91+
stop("Invalid method specified. Choose from 'indexed', 'loop', or 'direct'.")
9292
}
9393

9494
# standardize colnames
@@ -153,17 +153,15 @@ ped2com <- function(ped, component,
153153
nr = nr,
154154
parList = parList, lens = lens
155155
)
156-
parList <- list_of_adjacencies$parList
157-
lens <- list_of_adjacencies$lens
158156
# Construct sparse matrix
159157
if (resume && file.exists(checkpoint_files$isPar)) { # fix to check actual
160158
if (verbose) cat("Resuming: Constructed matrix...\n")
161159
jss <- readRDS(checkpoint_files$jss)
162160
iss <- readRDS(checkpoint_files$iss)
163161
} else {
164162
# Construct sparse matrix
165-
jss <- rep(1L:nr, times = lens)
166-
iss <- unlist(parList)
163+
iss <- list_of_adjacencies$iss
164+
jss <- list_of_adjacencies$jss
167165

168166
if (verbose) {
169167
cat("Constructed sparse matrix\n")
@@ -222,7 +220,7 @@ ped2com <- function(ped, component,
222220
} else {
223221
# isChild is the 'S' matrix from RAM
224222
isChild <- apply(ped[, c("momID", "dadID")], 1, function(x) {
225-
2^(-!all(is.na(x)))
223+
.5 + .25*sum(is.na(x)) # 2 parents -> .5, 1 parent -> .75, 0 parents -> 1
226224
})
227225
if (saveable) {
228226
saveRDS(isChild, file = checkpoint_files$isChild)
@@ -355,6 +353,7 @@ ped2add <- function(ped, max.gen = 25, sparse = FALSE, verbose = FALSE,
355353
flatten.diag = flatten.diag,
356354
standardize.colnames = standardize.colnames,
357355
transpose_method = transpose_method,
356+
adjacency_method = adjacency_method,
358357
saveable = saveable,
359358
resume = resume,
360359
save_rate_gen = save_rate_gen,
@@ -393,6 +392,7 @@ ped2mit <- ped2mt <- function(ped, max.gen = 25,
393392
flatten.diag = flatten.diag,
394393
standardize.colnames = standardize.colnames,
395394
transpose_method = transpose_method,
395+
adjacency_method = adjacency_method,
396396
saveable = saveable,
397397
resume = resume,
398398
save_rate_gen = save_rate_gen,
@@ -466,153 +466,203 @@ compute_transpose <- function(r2, transpose_method = "tcrossprod", verbose = FAL
466466
}
467467
}
468468

469-
#' Compute Parent Adjacency Matrix with Multiple Approaches
470-
#' @inheritParams ped2com
471-
#' @inherit ped2com details
472-
#' @param nr the number of rows in the pedigree dataset
473-
#' @param lastComputed the last computed index
474-
#' @param parList a list of parent-child relationships
475-
#' @param lens a vector of the lengths of the parent-child relationships
476-
#' @param checkpoint_files a list of checkpoint files
477-
478-
compute_parent_adjacency <- function(ped, component,
479-
adjacency_method = "indexed",
480-
saveable, resume,
481-
save_path, verbose,
482-
lastComputed, nr,checkpoint_files,update_rate,
483-
parList, lens, save_rate_parlist,
484-
...) {
485-
if (adjacency_method == "loop") {
486-
# old
487-
if (lastComputed < nr) {
488-
# Loop through each individual in the pedigree build the adjacency matrix for parent-child relationships
489-
# Is person in column j the parent of the person in row i? .5 for yes, 0 for no.
490-
491-
ped$momID <- as.numeric(ped$momID)
492-
ped$dadID <- as.numeric(ped$dadID)
493-
ped$ID <- as.numeric(ped$ID)
494-
495-
for (i in (lastComputed + 1):nr) {
469+
.adjLoop <- function(ped, component, saveable, resume,
470+
save_path, verbose, lastComputed,
471+
nr, checkpoint_files, update_rate,
472+
parList, lens, save_rate_parlist,
473+
...){
474+
# Loop through each individual in the pedigree
475+
# Build the adjacency matrix for parent-child relationships
476+
# Is person in column j the parent of the person in row i? .5 for yes, 0 for no.
477+
ped$momID <- as.numeric(ped$momID)
478+
ped$dadID <- as.numeric(ped$dadID)
479+
ped$ID <- as.numeric(ped$ID)
480+
481+
for (i in (lastComputed + 1):nr) {
496482
x <- ped[i, , drop = FALSE]
497-
498-
# Handle parentage according to the 'component' specified # povitch algorymn
483+
# Handle parentage according to the 'component' specified
499484
if (component %in% c("generation", "additive")) {
500-
# Code for 'generation' and 'additive' components
501-
# Checks if is mom of ID or is dad of ID
502-
# do once
503-
# xID <- as.numeric(x["ID"])
504-
# sMom <- (xID == as.numeric(ped$momID))
505-
# sDad <- (xID == as.numeric(ped$dadID))
506-
# val <- sMom | sDad
507-
# val[is.na(val)] <- FALSE
508-
# reduces computations
509-
xID <- as.numeric(x["ID"])
510-
sMom <- (xID == ped$momID)
511-
sDad <- (xID == ped$dadID)
512-
val <- sMom | sDad
513-
val[is.na(val)] <- FALSE
485+
# Code for 'generation' and 'additive' components
486+
# Checks if is mom of ID or is dad of ID
487+
xID <- as.numeric(x["ID"])
488+
sMom <- (xID == ped$momID)
489+
sDad <- (xID == ped$dadID)
490+
val <- sMom | sDad
491+
val[is.na(val)] <- FALSE
514492
} else if (component %in% c("common nuclear")) {
515-
# Code for 'common nuclear' component
516-
# IDs have the Same mom and Same dad
517-
sMom <- (as.numeric(x["momID"]) == ped$momID)
518-
sMom[is.na(sMom)] <- FALSE
519-
sDad <- (as.numeric(x["dadID"]) == ped$dadID)
520-
sDad[is.na(sDad)] <- FALSE
521-
val <- sMom & sDad
493+
# Code for 'common nuclear' component
494+
# IDs have the Same mom and Same dad
495+
sMom <- (as.numeric(x["momID"]) == ped$momID)
496+
sMom[is.na(sMom)] <- FALSE
497+
sDad <- (as.numeric(x["dadID"]) == ped$dadID)
498+
sDad[is.na(sDad)] <- FALSE
499+
val <- sMom & sDad
522500
} else if (component %in% c("mitochondrial")) {
523-
# Code for 'mitochondrial' component
524-
# sMom <- (as.numeric(x["ID"]) == as.numeric(ped$momID))
525-
# sDad <- TRUE
526-
# val <- sMom & sDad
527-
# val[is.na(val)] <- FALSE
528-
529-
# reduces computations
530-
val <- (as.numeric(x["ID"]) == ped$momID)
531-
val[is.na(val)] <- FALSE
501+
# Code for 'mitochondrial' component
502+
val <- (as.numeric(x["ID"]) == ped$momID)
503+
val[is.na(val)] <- FALSE
532504
} else {
533-
stop("Unknown relatedness component requested")
505+
stop("Unknown relatedness component requested")
534506
}
535507
# Storing the indices of the parent-child relationships
536-
# keep track of indices only, and then initialize a single sparse matrix
508+
# Keep track of indices only, and then initialize a single sparse matrix
537509
wv <- which(val)
538510
parList[[i]] <- wv
539511
lens[i] <- length(wv)
540512
# Print progress if verbose is TRUE
541513
if (verbose && (i %% update_rate == 0)) {
542-
cat(paste0("Done with ", i, " of ", nr, "\n"))
514+
cat(paste0("Done with ", i, " of ", nr, "\n"))
543515
}
544516
# Checkpointing every save_rate iterations
545517
if (saveable && (i %% save_rate_parlist == 0)) {
546-
saveRDS(parList, file = checkpoint_files$parList)
547-
saveRDS(lens, file = checkpoint_files$lens)
548-
if (verbose) cat("Checkpointed parlist saved at iteration", i, "\n")
518+
saveRDS(parList, file = checkpoint_files$parList)
519+
saveRDS(lens, file = checkpoint_files$lens)
520+
if (verbose) cat("Checkpointed parlist saved at iteration", i, "\n")
549521
}
550-
}
551-
if (saveable) {
552-
saveRDS(parList, file = checkpoint_files$parList)
553-
saveRDS(lens, file = checkpoint_files$lens)
554-
if (verbose) cat("Final checkpoint saved for adjacency matrix.\n")
555-
}
556522
}
557-
} else if (adjacency_method == "indexed") {
558-
if (lastComputed < nr) {
559-
# Loop through each individual in the pedigree build the adjacency matrix for parent-child relationships
560-
# Is person in column j the parent of the person in row i? .5 for yes, 0 for no.
561-
# Convert IDs
562-
ped$ID <- as.numeric(ped$ID)
563-
ped$momID <- as.numeric(ped$momID)
564-
ped$dadID <- as.numeric(ped$dadID)
565-
566-
# parent-child lookup
567-
mom_index <- match(ped$momID, ped$ID, nomatch = 0)
568-
dad_index <- match(ped$dadID, ped$ID, nomatch = 0)
569-
570-
571-
for (i in (lastComputed + 1):nr) {
572-
if (component %in% c("generation", "additive")) {
523+
jss <- rep(1L:nr, times = lens)
524+
iss <- unlist(parList)
525+
list_of_adjacency <- list(iss=iss, jss=jss)
526+
return(list_of_adjacency)
527+
}
528+
529+
.adjIndexed <- function(ped, component, saveable, resume,
530+
save_path, verbose, lastComputed,
531+
nr, checkpoint_files, update_rate,
532+
parList, lens, save_rate_parlist,
533+
...){
534+
# Loop through each individual in the pedigree
535+
# Build the adjacency matrix for parent-child relationships
536+
# Is person in column j the parent of the person in row i? .5 for yes, 0 for no.
537+
538+
# Convert IDs
539+
ped$ID <- as.numeric(ped$ID)
540+
ped$momID <- as.numeric(ped$momID)
541+
ped$dadID <- as.numeric(ped$dadID)
542+
543+
# parent-child lookup
544+
mom_index <- match(ped$momID, ped$ID, nomatch = 0)
545+
dad_index <- match(ped$dadID, ped$ID, nomatch = 0)
546+
547+
for (i in (lastComputed + 1):nr) {
548+
if (component %in% c("generation", "additive")) {
573549
sMom <- (mom_index == i)
574550
sDad <- (dad_index == i)
575551
val <- sMom | sDad
576-
# val <- (mom_index == i) | (dad_index == i)
577-
} else if (component %in% c("common nuclear")) {
552+
} else if (component %in% c("common nuclear")) {
578553
# Code for 'common nuclear' component
579554
# IDs have the Same mom and Same dad
580555
sMom <- (ped$momID[i] == ped$momID)
581556
sMom[is.na(sMom)] <- FALSE
582557
sDad <- (ped$dadID[i] == ped$dadID)
583558
sDad[is.na(sDad)] <- FALSE
584559
val <- sMom & sDad
585-
#val <- (ped$momID[i] == ped$momID) & (ped$dadID[i] == ped$dadID)
586-
} else if (component %in% c("mitochondrial")) {
560+
} else if (component %in% c("mitochondrial")) {
587561
val <- (mom_index == i)
588-
} else {
562+
} else {
589563
stop("Unknown relatedness component requested")
590-
}
564+
}
591565

592-
val[is.na(val)] <- FALSE
593-
parList[[i]] <- which(val)
594-
lens[i] <- length(parList[[i]])
566+
val[is.na(val)] <- FALSE
567+
parList[[i]] <- which(val)
568+
lens[i] <- length(parList[[i]])
595569

596-
# Print progress if verbose is TRUE
597-
if (verbose && (i %% update_rate == 0)) {
570+
# Print progress if verbose is TRUE
571+
if (verbose && (i %% update_rate == 0)) {
598572
cat(paste0("Done with ", i, " of ", nr, "\n"))
599-
}
573+
}
600574

601-
# Checkpointing every save_rate iterations
602-
if (saveable && (i %% save_rate_parlist == 0)) {
575+
# Checkpointing every save_rate iterations
576+
if (saveable && (i %% save_rate_parlist == 0)) {
603577
saveRDS(parList, file = checkpoint_files$parList)
604578
saveRDS(lens, file = checkpoint_files$lens)
605579
if (verbose) cat("Checkpointed parlist saved at iteration", i, "\n")
606-
}
607-
}}
608-
if (saveable) {
609-
saveRDS(parList, file = checkpoint_files$parList)
610-
saveRDS(lens, file = checkpoint_files$lens)
611-
if (verbose) cat("Final checkpoint saved for adjacency matrix.\n")
612580
}
613-
} else {
614-
stop("Invalid method specified. Choose from 'loop' or 'indexed'.")
615-
}
616-
list_of_adjacency <- list(parList = parList, lens = lens)
617-
return(list_of_adjacency)
618-
}
581+
}
582+
jss <- rep(1L:nr, times = lens)
583+
iss <- unlist(parList)
584+
list_of_adjacency <- list(iss=iss, jss=jss)
585+
return(list_of_adjacency)
586+
}
587+
588+
.adjDirect <- function(ped, component, saveable, resume,
589+
save_path, verbose, lastComputed,
590+
nr, checkpoint_files, update_rate,
591+
parList, lens, save_rate_parlist,
592+
...){
593+
# Loop through each individual in the pedigree
594+
# Build the adjacency matrix for parent-child relationships
595+
# Is person in column j the parent of the person in row i? .5 for yes, 0 for no.
596+
uniID <- ped$ID # live dangerously without sort(unique(ped$ID))
597+
ped$ID <- as.numeric(factor(ped$ID, levels=uniID))
598+
ped$momID <- as.numeric(factor(ped$momID, levels=uniID))
599+
ped$dadID <- as.numeric(factor(ped$dadID, levels=uniID))
600+
601+
if (component %in% c("generation", "additive")) {
602+
mIDs <- na.omit(data.frame(rID=ped$ID, cID=ped$momID))
603+
dIDs <- na.omit(data.frame(rID=ped$ID, cID=ped$dadID))
604+
iss <- c(mIDs$rID, dIDs$rID)
605+
jss <- c(mIDs$cID, dIDs$cID)
606+
} else if (component %in% c("common nuclear")) {
607+
stop("Common Nuclear component is not yet implemented for direct method. Use index method.\n")
608+
} else if (component %in% c("mitochondrial")) {
609+
mIDs <- na.omit(data.frame(rID=ped$ID, cID=ped$momID))
610+
iss <- c(mIDs$rID)
611+
jss <- c(mIDs$cID)
612+
} else {
613+
stop("Unknown relatedness component requested")
614+
}
615+
list_of_adjacency <- list(iss=iss, jss=jss)
616+
return(list_of_adjacency)
617+
}
618+
619+
#' Compute Parent Adjacency Matrix with Multiple Approaches
620+
#' @inheritParams ped2com
621+
#' @inherit ped2com details
622+
#' @param nr the number of rows in the pedigree dataset
623+
#' @param lastComputed the last computed index
624+
#' @param parList a list of parent-child relationships
625+
#' @param lens a vector of the lengths of the parent-child relationships
626+
#' @param checkpoint_files a list of checkpoint files
627+
628+
compute_parent_adjacency <- function(ped, component,
629+
adjacency_method = "indexed",
630+
saveable, resume,
631+
save_path, verbose,
632+
lastComputed, nr, checkpoint_files, update_rate,
633+
parList, lens, save_rate_parlist,
634+
...) {
635+
if (adjacency_method == "loop") {
636+
if (lastComputed < nr) { # Original version
637+
list_of_adjacency <- .adjLoop(ped, component, saveable, resume,
638+
save_path, verbose, lastComputed,
639+
nr, checkpoint_files, update_rate,
640+
parList, lens, save_rate_parlist,
641+
...)
642+
}
643+
} else if (adjacency_method == "indexed") { # Garrison version
644+
if (lastComputed < nr) {
645+
list_of_adjacency <- .adjIndexed(ped, component, saveable, resume,
646+
save_path, verbose, lastComputed,
647+
nr, checkpoint_files, update_rate,
648+
parList, lens, save_rate_parlist,
649+
...)
650+
}
651+
} else if (adjacency_method == "direct"){ # Hunter version
652+
if (lastComputed < nr){
653+
list_of_adjacency <- .adjDirect(ped, component, saveable, resume,
654+
save_path, verbose, lastComputed,
655+
nr, checkpoint_files, update_rate,
656+
parList, lens, save_rate_parlist,
657+
...)
658+
}
659+
} else {
660+
stop("Invalid method specified. Choose from 'loop', 'direct', or 'indexed'.")
661+
}
662+
if (saveable) {
663+
saveRDS(parList, file = checkpoint_files$parList)
664+
saveRDS(lens, file = checkpoint_files$lens)
665+
if (verbose) cat("Final checkpoint saved for adjacency matrix.\n")
666+
}
667+
return(list_of_adjacency)
668+
}

man/ped2com.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)