1+ utils :: globalVariables(c(" .N" , " .SD" ))
2+
13# ' Summarize Pedigree Data
24# '
3- # ' This function summarizes pedigree data, by
4- # ' computing key summary statistics for all numeric variables and identifying the
5- # ' originating member (founder) for each family, maternal, and paternal lineage.
5+ # ' This function summarizes pedigree data, by computing key summary statistics
6+ # ' for all numeric variables and identifying the originating member (founder)
7+ # ' for each family, maternal, and paternal lineage.
68# '
7- # ' The function calculates standard descriptive statistics, including the count of individuals in
8- # ' each lineage, means, medians, minimum and maximum values, and standard deviations.
9- # ' Additionally, if `five_num_summary = TRUE`, the function includes the first and third quartiles (Q1, Q3)
10- # ' to provide a more detailed distributional summary. Users can also specify variables to exclude from
11- # ' the analysis via `skip_var`.
9+ # ' The function calculates standard descriptive statistics, including the count
10+ # ' of individuals in each lineage, means, medians, minimum and maximum values,
11+ # ' and standard deviations.
12+ # ' Additionally, if `five_num_summary = TRUE`, the function includes the first
13+ # ' and third quartiles (Q1, Q3) to provide a more detailed distributional
14+ # ' summary. Users can also specify variables to exclude from the analysis via
15+ # ' `skip_var`.
1216# '
13- # ' Beyond summary statistics, the function identifies the founding member of each lineage
14- # ' based on the specified sorting variable (`founder_sort_var`), defaulting to birth year (`byr`)
15- # ' when available or `personID` otherwise. Users can retrieve the largest and oldest
16- # ' lineages by setting `nbiggest` and `noldest`, respectively.
17+ # ' Beyond summary statistics, the function identifies the founding member of
18+ # ' each lineage based on the specified sorting variable (`founder_sort_var`),
19+ # ' defaulting to birth year (`byr`) when available or `personID` otherwise.
20+ # ' Users can retrieve the largest and oldest lineages by setting `nbiggest`
21+ # ' and `noldest`, respectively.
1722# '
1823# ' @inheritParams ped2fam
1924# ' @inheritParams ped2maternal
@@ -38,9 +43,14 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
3843 momID = " momID" , dadID = " dadID" ,
3944 matID = " matID" , patID = " patID" ,
4045 type = c(" fathers" , " mothers" , " families" ),
41- byr = NULL , include_founder = FALSE , founder_sort_var = NULL ,
42- nbiggest = 5 , noldest = nbiggest , skip_var = NULL ,
43- five_num_summary = FALSE , network_checks = FALSE ,
46+ byr = NULL ,
47+ include_founder = FALSE ,
48+ founder_sort_var = NULL ,
49+ nbiggest = 5 ,
50+ noldest = nbiggest ,
51+ skip_var = NULL ,
52+ five_num_summary = FALSE ,
53+ network_checks = FALSE ,
4454 verbose = FALSE ) {
4555 # Fast Fails
4656
@@ -80,15 +90,24 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
8090 # Build the pedigree using the provided functions
8191 if (" families" %in% type && ! famID %in% names(ped )) {
8292 if (verbose ) message(" Counting families..." )
83- ped <- ped2fam(ped , personID = personID , momID = momID , dadID = dadID , famID = famID )
93+ ped <- ped2fam(ped ,
94+ personID = personID ,
95+ momID = momID , dadID = dadID , famID = famID
96+ )
8497 }
8598 if (" mothers" %in% type && ! matID %in% names(ped )) {
86- if (verbose ) message(" Counting mothers..." )
87- ped <- ped2maternal(ped , personID = personID , momID = momID , dadID = dadID , matID = matID )
99+ if (verbose == TRUE ) message(" Counting mothers..." )
100+ ped <- ped2maternal(ped ,
101+ personID = personID ,
102+ momID = momID , dadID = dadID , matID = matID
103+ )
88104 }
89105 if (" fathers" %in% type && ! patID %in% names(ped )) {
90- if (verbose ) message(" Counting fathers..." )
91- ped <- ped2paternal(ped , personID = personID , momID = momID , dadID = dadID , patID = patID )
106+ if (verbose == TRUE ) message(" Counting fathers..." )
107+ ped <- ped2paternal(ped ,
108+ personID = personID ,
109+ momID = momID , dadID = dadID , patID = patID
110+ )
92111 }
93112
94113
@@ -103,7 +122,7 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
103122
104123
105124 if (network_checks ) {
106- if (verbose ) message(" Performing network validation checks..." )
125+ if (verbose == TRUE ) message(" Performing network validation checks..." )
107126 output $ network_validation <- checkPedigreeNetwork(
108127 ped ,
109128 personID = personID ,
@@ -116,13 +135,13 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
116135 # Calculate summary statistics for families, maternal lines, and paternal lines
117136
118137 if (" families" %in% type ) {
119- if (verbose ) message(" Summarizing families..." )
138+ if (verbose == TRUE ) message(" Summarizing families..." )
120139 family_summary_dt <- calculateSummaryDT(ped_dt , famID ,
121140 skip_var = skip_var ,
122141 five_num_summary = five_num_summary
123142 )
124143 # Find the originating member for each line
125- if (include_founder ) {
144+ if (include_founder == TRUE ) {
126145 family_summary_dt <- summarizeFounder(
127146 verbose = verbose , ped_dt = ped_dt ,
128147 group_var = famID ,
@@ -132,11 +151,11 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
132151 }
133152 output $ family_summary <- family_summary_dt
134153 n_families <- nrow(family_summary_dt )
135- if (verbose ) message(" Summarized " , n_families , " families." )
154+ if (verbose == TRUE ) message(" Summarized " , n_families , " families." )
136155 }
137156
138157 if (" mothers" %in% type ) {
139- if (verbose ) message(" Summarizing maternal lines..." )
158+ if (verbose == TRUE ) message(" Summarizing maternal lines..." )
140159 maternal_summary_dt <- calculateSummaryDT(ped_dt , matID ,
141160 skip_var = skip_var ,
142161 five_num_summary = five_num_summary
@@ -151,10 +170,10 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
151170 }
152171 output $ maternal_summary <- maternal_summary_dt
153172 n_mothers <- nrow(maternal_summary_dt )
154- if (verbose ) message(" Summarized " , n_mothers , " maternal lines." )
173+ if (verbose == TRUE ) message(" Summarized " , n_mothers , " maternal lines." )
155174 }
156175 if (" fathers" %in% type ) {
157- if (verbose ) message(" Summarizing paternal lines..." )
176+ if (verbose == TRUE ) message(" Summarizing paternal lines..." )
158177 paternal_summary_dt <- calculateSummaryDT(ped_dt , patID ,
159178 skip_var = skip_var ,
160179 five_num_summary = five_num_summary
@@ -170,7 +189,7 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
170189
171190 output $ paternal_summary <- paternal_summary_dt
172191 n_fathers <- nrow(paternal_summary_dt )
173- if (verbose ) message(" Summarized " , n_fathers , " paternal lines." )
192+ if (verbose == TRUE ) message(" Summarized " , n_fathers , " paternal lines." )
174193 }
175194
176195 # # Check errors
@@ -183,7 +202,7 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
183202 # # oldest
184203 if (! is.null(byr ) && noldest > 0 ) {
185204 if (! is.null(n_families ) && " families" %in% type ) {
186- if (verbose ) message(" Finding oldest families..." )
205+ if (verbose == TRUE ) message(" Finding oldest families..." )
187206 output $ oldest_families <- findOldest(
188207 foo_summary_dt = family_summary_dt ,
189208 byr = byr ,
@@ -192,7 +211,7 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
192211 )
193212 }
194213 if (! is.null(n_mothers ) && " mothers" %in% type ) {
195- if (verbose ) message(" Finding oldest maternal lines..." )
214+ if (verbose == TRUE ) message(" Finding oldest maternal lines..." )
196215 output $ oldest_maternal <- findOldest(
197216 foo_summary_dt = maternal_summary_dt ,
198217 byr = byr ,
@@ -201,7 +220,7 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
201220 )
202221 }
203222 if (! is.null(n_fathers ) && " fathers" %in% type ) {
204- if (verbose ) message(" Finding oldest paternal lines..." )
223+ if (verbose == TRUE ) message(" Finding oldest paternal lines..." )
205224 output $ oldest_paternal <- findOldest(
206225 foo_summary_dt = paternal_summary_dt ,
207226 byr = byr ,
@@ -250,7 +269,10 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
250269calculateSummaryDT <- function (data , group_var , skip_var ,
251270 five_num_summary = FALSE ) {
252271 # Identify numeric columns excluding the group_var and skip_var
253- numeric_cols <- setdiff(names(data )[vapply(data , is.numeric , logical (1 ))], c(group_var , skip_var ))
272+ numeric_cols <- setdiff(
273+ names(data )[vapply(data , is.numeric , logical (1 ))],
274+ c(group_var , skip_var )
275+ )
254276 summary_stats <- data [,
255277 {
256278 count <- .N # Calculate count once per group
@@ -261,8 +283,12 @@ calculateSummaryDT <- function(data, group_var, skip_var,
261283 mean = as.double(base :: mean(x , na.rm = TRUE )),
262284 median = as.double(stats :: median(x , na.rm = TRUE )),
263285 # mode = as.double(stats::mode(x, na.rm = TRUE)),
264- min = ifelse(all(is.na(x )), as.double(NA ), as.double(base :: min(x , na.rm = TRUE ))),
265- max = ifelse(all(is.na(x )), as.double(NA ), as.double(base :: max(x , na.rm = TRUE ))),
286+ min = ifelse(all(is.na(x )), as.double(NA ),
287+ as.double(base :: min(x , na.rm = TRUE ))
288+ ),
289+ max = ifelse(all(is.na(x )), as.double(NA ),
290+ as.double(base :: max(x , na.rm = TRUE ))
291+ ),
266292 sd = as.double(stats :: sd(x , na.rm = TRUE ))
267293 )
268294 if (five_num_summary ) {
@@ -303,15 +329,22 @@ findFounder <- function(data, group_var, sort_var) {
303329
304330# ' Function to summarize the originating members for each line
305331# '
306- # ' This function summarizes the originating members for each line in a pedigree. It is supposed to be used internally by the \code{summarize_pedigree} function.
332+ # ' This function summarizes the originating members for each line in a pedigree.
333+ # ' It is supposed to be used internally by the \code{summarize_pedigree} function.
307334# '
308335# ' @inheritParams summarizePedigrees
309336# ' @inheritParams findFounder
310337# '
311338# ' @keywords internal
312339
313- summarizeFounder <- function (ped_dt , group_var , sort_var , foo_summary_dt , verbose ) {
314- if (verbose ) message(paste0(" Finding originating members for " , " group_var" ))
340+ summarizeFounder <- function (ped_dt , group_var , sort_var ,
341+ foo_summary_dt , verbose ) {
342+ if (verbose ) {
343+ message(paste0(
344+ " Finding originating members for " ,
345+ " group_var"
346+ ))
347+ }
315348 originating_member_foo <- findFounder(
316349 data = ped_dt ,
317350 group_var = group_var ,
@@ -401,9 +434,15 @@ summarizeFamilies <- function(ped, famID = "famID", personID = "ID",
401434 noldest = noldest ,
402435 byr = byr ,
403436 include_founder = include_founder ,
404- momID = momID , dadID = dadID ,
405- famID = famID , matID = matID , patID = patID , skip_var = skip_var ,
406- type = " families" , verbose = verbose , five_num_summary = five_num_summary ,
437+ momID = momID ,
438+ dadID = dadID ,
439+ famID = famID ,
440+ matID = matID ,
441+ patID = patID ,
442+ skip_var = skip_var ,
443+ type = " families" ,
444+ verbose = verbose ,
445+ five_num_summary = five_num_summary ,
407446 founder_sort_var = founder_sort_var
408447 )
409448}
@@ -422,16 +461,20 @@ findOldest <- function(foo_summary_dt, byr, noldest, n_foo) {
422461}
423462
424463# Function to find the biggest families in a pedigree
425- # ' This function finds the biggest families in a pedigree. It is supposed to be used internally by the \code{summarize_pedigree} function.
464+
465+ # ' This function finds the biggest families in a pedigree. It is supposed to be
466+ # ' used internally by the \code{summarize_pedigree} function.
426467# ' @inheritParams findOldest
427468# ' @inheritParams summarizePedigrees
428469# ' @returns a data.table containing the biggest families in the pedigree.
429470
430471
431472findBiggest <- function (foo_summary_dt , nbiggest , n_foo ) {
432- biggest_foo <- try_na(foo_summary_dt [order(- get(" count" ))][1 : min(c(nbiggest , n_foo ),
433- na.rm = TRUE
434- )])
473+ biggest_foo <- try_na(
474+ foo_summary_dt [order(- get(" count" ))][1 : min(c(nbiggest , n_foo ),
475+ na.rm = TRUE
476+ )]
477+ )
435478 return (biggest_foo )
436479}
437480
0 commit comments