Skip to content

Commit 08e4858

Browse files
Merge pull request #80 from R-Computing-Lab/dev_main
fixed globalVariables it's supposed to pass if the patch is within 10%, not 0%
2 parents d0c0613 + 3c0b4f8 commit 08e4858

8 files changed

Lines changed: 197 additions & 2034 deletions

File tree

R/summarizePedigree.R

Lines changed: 87 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,24 @@
1+
utils::globalVariables(c(".N", ".SD"))
2+
13
#' Summarize Pedigree Data
24
#'
3-
#' This function summarizes pedigree data, by
4-
#' computing key summary statistics for all numeric variables and identifying the
5-
#' originating member (founder) for each family, maternal, and paternal lineage.
5+
#' This function summarizes pedigree data, by computing key summary statistics
6+
#' for all numeric variables and identifying the originating member (founder)
7+
#' for each family, maternal, and paternal lineage.
68
#'
7-
#' The function calculates standard descriptive statistics, including the count of individuals in
8-
#' each lineage, means, medians, minimum and maximum values, and standard deviations.
9-
#' Additionally, if `five_num_summary = TRUE`, the function includes the first and third quartiles (Q1, Q3)
10-
#' to provide a more detailed distributional summary. Users can also specify variables to exclude from
11-
#' the analysis via `skip_var`.
9+
#' The function calculates standard descriptive statistics, including the count
10+
#' of individuals in each lineage, means, medians, minimum and maximum values,
11+
#' and standard deviations.
12+
#' Additionally, if `five_num_summary = TRUE`, the function includes the first
13+
#' and third quartiles (Q1, Q3) to provide a more detailed distributional
14+
#' summary. Users can also specify variables to exclude from the analysis via
15+
#' `skip_var`.
1216
#'
13-
#' Beyond summary statistics, the function identifies the founding member of each lineage
14-
#' based on the specified sorting variable (`founder_sort_var`), defaulting to birth year (`byr`)
15-
#' when available or `personID` otherwise. Users can retrieve the largest and oldest
16-
#' lineages by setting `nbiggest` and `noldest`, respectively.
17+
#' Beyond summary statistics, the function identifies the founding member of
18+
#' each lineage based on the specified sorting variable (`founder_sort_var`),
19+
#' defaulting to birth year (`byr`) when available or `personID` otherwise.
20+
#' Users can retrieve the largest and oldest lineages by setting `nbiggest`
21+
#' and `noldest`, respectively.
1722
#'
1823
#' @inheritParams ped2fam
1924
#' @inheritParams ped2maternal
@@ -38,9 +43,14 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
3843
momID = "momID", dadID = "dadID",
3944
matID = "matID", patID = "patID",
4045
type = c("fathers", "mothers", "families"),
41-
byr = NULL, include_founder = FALSE, founder_sort_var = NULL,
42-
nbiggest = 5, noldest = nbiggest, skip_var = NULL,
43-
five_num_summary = FALSE, network_checks = FALSE,
46+
byr = NULL,
47+
include_founder = FALSE,
48+
founder_sort_var = NULL,
49+
nbiggest = 5,
50+
noldest = nbiggest,
51+
skip_var = NULL,
52+
five_num_summary = FALSE,
53+
network_checks = FALSE,
4454
verbose = FALSE) {
4555
# Fast Fails
4656

@@ -80,15 +90,24 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
8090
# Build the pedigree using the provided functions
8191
if ("families" %in% type && !famID %in% names(ped)) {
8292
if (verbose) message("Counting families...")
83-
ped <- ped2fam(ped, personID = personID, momID = momID, dadID = dadID, famID = famID)
93+
ped <- ped2fam(ped,
94+
personID = personID,
95+
momID = momID, dadID = dadID, famID = famID
96+
)
8497
}
8598
if ("mothers" %in% type && !matID %in% names(ped)) {
86-
if (verbose) message("Counting mothers...")
87-
ped <- ped2maternal(ped, personID = personID, momID = momID, dadID = dadID, matID = matID)
99+
if (verbose == TRUE) message("Counting mothers...")
100+
ped <- ped2maternal(ped,
101+
personID = personID,
102+
momID = momID, dadID = dadID, matID = matID
103+
)
88104
}
89105
if ("fathers" %in% type && !patID %in% names(ped)) {
90-
if (verbose) message("Counting fathers...")
91-
ped <- ped2paternal(ped, personID = personID, momID = momID, dadID = dadID, patID = patID)
106+
if (verbose == TRUE) message("Counting fathers...")
107+
ped <- ped2paternal(ped,
108+
personID = personID,
109+
momID = momID, dadID = dadID, patID = patID
110+
)
92111
}
93112

94113

@@ -103,7 +122,7 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
103122

104123

105124
if (network_checks) {
106-
if (verbose) message("Performing network validation checks...")
125+
if (verbose == TRUE) message("Performing network validation checks...")
107126
output$network_validation <- checkPedigreeNetwork(
108127
ped,
109128
personID = personID,
@@ -116,13 +135,13 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
116135
# Calculate summary statistics for families, maternal lines, and paternal lines
117136

118137
if ("families" %in% type) {
119-
if (verbose) message("Summarizing families...")
138+
if (verbose == TRUE) message("Summarizing families...")
120139
family_summary_dt <- calculateSummaryDT(ped_dt, famID,
121140
skip_var = skip_var,
122141
five_num_summary = five_num_summary
123142
)
124143
# Find the originating member for each line
125-
if (include_founder) {
144+
if (include_founder == TRUE) {
126145
family_summary_dt <- summarizeFounder(
127146
verbose = verbose, ped_dt = ped_dt,
128147
group_var = famID,
@@ -132,11 +151,11 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
132151
}
133152
output$family_summary <- family_summary_dt
134153
n_families <- nrow(family_summary_dt)
135-
if (verbose) message("Summarized ", n_families, " families.")
154+
if (verbose == TRUE) message("Summarized ", n_families, " families.")
136155
}
137156

138157
if ("mothers" %in% type) {
139-
if (verbose) message("Summarizing maternal lines...")
158+
if (verbose == TRUE) message("Summarizing maternal lines...")
140159
maternal_summary_dt <- calculateSummaryDT(ped_dt, matID,
141160
skip_var = skip_var,
142161
five_num_summary = five_num_summary
@@ -151,10 +170,10 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
151170
}
152171
output$maternal_summary <- maternal_summary_dt
153172
n_mothers <- nrow(maternal_summary_dt)
154-
if (verbose) message("Summarized ", n_mothers, " maternal lines.")
173+
if (verbose == TRUE) message("Summarized ", n_mothers, " maternal lines.")
155174
}
156175
if ("fathers" %in% type) {
157-
if (verbose) message("Summarizing paternal lines...")
176+
if (verbose == TRUE) message("Summarizing paternal lines...")
158177
paternal_summary_dt <- calculateSummaryDT(ped_dt, patID,
159178
skip_var = skip_var,
160179
five_num_summary = five_num_summary
@@ -170,7 +189,7 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
170189

171190
output$paternal_summary <- paternal_summary_dt
172191
n_fathers <- nrow(paternal_summary_dt)
173-
if (verbose) message("Summarized ", n_fathers, " paternal lines.")
192+
if (verbose == TRUE) message("Summarized ", n_fathers, " paternal lines.")
174193
}
175194

176195
## Check errors
@@ -183,7 +202,7 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
183202
## oldest
184203
if (!is.null(byr) && noldest > 0) {
185204
if (!is.null(n_families) && "families" %in% type) {
186-
if (verbose) message("Finding oldest families...")
205+
if (verbose == TRUE) message("Finding oldest families...")
187206
output$oldest_families <- findOldest(
188207
foo_summary_dt = family_summary_dt,
189208
byr = byr,
@@ -192,7 +211,7 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
192211
)
193212
}
194213
if (!is.null(n_mothers) && "mothers" %in% type) {
195-
if (verbose) message("Finding oldest maternal lines...")
214+
if (verbose == TRUE) message("Finding oldest maternal lines...")
196215
output$oldest_maternal <- findOldest(
197216
foo_summary_dt = maternal_summary_dt,
198217
byr = byr,
@@ -201,7 +220,7 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
201220
)
202221
}
203222
if (!is.null(n_fathers) && "fathers" %in% type) {
204-
if (verbose) message("Finding oldest paternal lines...")
223+
if (verbose == TRUE) message("Finding oldest paternal lines...")
205224
output$oldest_paternal <- findOldest(
206225
foo_summary_dt = paternal_summary_dt,
207226
byr = byr,
@@ -250,7 +269,10 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
250269
calculateSummaryDT <- function(data, group_var, skip_var,
251270
five_num_summary = FALSE) {
252271
# Identify numeric columns excluding the group_var and skip_var
253-
numeric_cols <- setdiff(names(data)[vapply(data, is.numeric, logical(1))], c(group_var, skip_var))
272+
numeric_cols <- setdiff(
273+
names(data)[vapply(data, is.numeric, logical(1))],
274+
c(group_var, skip_var)
275+
)
254276
summary_stats <- data[,
255277
{
256278
count <- .N # Calculate count once per group
@@ -261,8 +283,12 @@ calculateSummaryDT <- function(data, group_var, skip_var,
261283
mean = as.double(base::mean(x, na.rm = TRUE)),
262284
median = as.double(stats::median(x, na.rm = TRUE)),
263285
# mode = as.double(stats::mode(x, na.rm = TRUE)),
264-
min = ifelse(all(is.na(x)), as.double(NA), as.double(base::min(x, na.rm = TRUE))),
265-
max = ifelse(all(is.na(x)), as.double(NA), as.double(base::max(x, na.rm = TRUE))),
286+
min = ifelse(all(is.na(x)), as.double(NA),
287+
as.double(base::min(x, na.rm = TRUE))
288+
),
289+
max = ifelse(all(is.na(x)), as.double(NA),
290+
as.double(base::max(x, na.rm = TRUE))
291+
),
266292
sd = as.double(stats::sd(x, na.rm = TRUE))
267293
)
268294
if (five_num_summary) {
@@ -303,15 +329,22 @@ findFounder <- function(data, group_var, sort_var) {
303329

304330
#' Function to summarize the originating members for each line
305331
#'
306-
#' This function summarizes the originating members for each line in a pedigree. It is supposed to be used internally by the \code{summarize_pedigree} function.
332+
#' This function summarizes the originating members for each line in a pedigree.
333+
#' It is supposed to be used internally by the \code{summarize_pedigree} function.
307334
#'
308335
#' @inheritParams summarizePedigrees
309336
#' @inheritParams findFounder
310337
#'
311338
#' @keywords internal
312339

313-
summarizeFounder <- function(ped_dt, group_var, sort_var, foo_summary_dt, verbose) {
314-
if (verbose) message(paste0("Finding originating members for ", "group_var"))
340+
summarizeFounder <- function(ped_dt, group_var, sort_var,
341+
foo_summary_dt, verbose) {
342+
if (verbose) {
343+
message(paste0(
344+
"Finding originating members for ",
345+
"group_var"
346+
))
347+
}
315348
originating_member_foo <- findFounder(
316349
data = ped_dt,
317350
group_var = group_var,
@@ -401,9 +434,15 @@ summarizeFamilies <- function(ped, famID = "famID", personID = "ID",
401434
noldest = noldest,
402435
byr = byr,
403436
include_founder = include_founder,
404-
momID = momID, dadID = dadID,
405-
famID = famID, matID = matID, patID = patID, skip_var = skip_var,
406-
type = "families", verbose = verbose, five_num_summary = five_num_summary,
437+
momID = momID,
438+
dadID = dadID,
439+
famID = famID,
440+
matID = matID,
441+
patID = patID,
442+
skip_var = skip_var,
443+
type = "families",
444+
verbose = verbose,
445+
five_num_summary = five_num_summary,
407446
founder_sort_var = founder_sort_var
408447
)
409448
}
@@ -422,16 +461,20 @@ findOldest <- function(foo_summary_dt, byr, noldest, n_foo) {
422461
}
423462

424463
# Function to find the biggest families in a pedigree
425-
#' This function finds the biggest families in a pedigree. It is supposed to be used internally by the \code{summarize_pedigree} function.
464+
465+
#' This function finds the biggest families in a pedigree. It is supposed to be
466+
#' used internally by the \code{summarize_pedigree} function.
426467
#' @inheritParams findOldest
427468
#' @inheritParams summarizePedigrees
428469
#' @returns a data.table containing the biggest families in the pedigree.
429470

430471

431472
findBiggest <- function(foo_summary_dt, nbiggest, n_foo) {
432-
biggest_foo <- try_na(foo_summary_dt[order(-get("count"))][1:min(c(nbiggest, n_foo),
433-
na.rm = TRUE
434-
)])
473+
biggest_foo <- try_na(
474+
foo_summary_dt[order(-get("count"))][1:min(c(nbiggest, n_foo),
475+
na.rm = TRUE
476+
)]
477+
)
435478
return(biggest_foo)
436479
}
437480

cran-comments.md

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,9 @@
11

22
# Description
33

4-
This update includes several enhancements and bug fixes to the BGmisc package, improving its functionality and usability for behavior genetics analysis. The key changes include:
5-
- **New Features**:
6-
- Added `calculateCIs` function for computing confidence intervals for correlation coefficients.
7-
- Introduced `addPhantomParents` function to handle phantom parents more efficiently.
8-
- Added aliases for mitochondrial-related terms (`mtdna`, `mitochondria`).
9-
- **Performance Improvements**:
10-
- Refactored `addPhantomParents` for better efficiency.
11-
- Optimized the `com2links`, `summarizePedigree`, and `checkIDs` functions to reduce complexity.
12-
- **Documentation and Testing**:
13-
- Updated documentation to reflect new features and improvements.
14-
- Added comprehensive tests for the `calculateCIs` function and other new features.
15-
- Reorganized unit tests for better structure and clarity.
4+
This update includes minor enhancements, a major update to the ASOIAF dataset, and rewritten vignettes to use ggpedigree.
165

6+
I am also moving the lone plotting function from BGmisc to ggpedigree package. I maintain both packages. As promised, now that the ggpedigree package update is on CRAN, I am uploading the new version of BGmisc that does not include the plotting function.
177

188
# Test Environments
199

man/findBiggest.Rd

Lines changed: 4 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/summarizeFounder.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/summarizePedigrees.Rd

Lines changed: 16 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)