Skip to content

Commit 74f3eef

Browse files
Fix maintainability and style issues in R/helpers_regression.R
Co-authored-by: smasongarrison <6001608+smasongarrison@users.noreply.github.com>
1 parent 1ed700d commit 74f3eef

1 file changed

Lines changed: 57 additions & 26 deletions

File tree

R/helpers_regression.R

Lines changed: 57 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
#'
33
#' @description This function determines the order of sibling pairs based on an outcome variable.
44
#' The function checks which of the two kinship pairs has more of a specified outcome variable.
5-
#' It adds a new column named `order` to the dataset, indicating which sibling (identified as "s1" or "s2") has more of the outcome.
5+
#' It adds a new column named `order` to the dataset, indicating which sibling
6+
#' (identified as "s1" or "s2") has more of the outcome.
67
#' If the two siblings have the same amount of the outcome, it randomly assigns one as having more.
78
#
89
#' @inheritParams discord_data
@@ -16,15 +17,16 @@ check_sibling_order <- function(..., fast = FALSE) {
1617
if (fast == TRUE) {
1718
check_sibling_order_fast(...)
1819
} else {
19-
check_sibling_order_ram_optimized(...)
20+
check_sibling_order_ram(...)
2021
}
2122
}
2223

2324
#' @title Check Sibling Order RAM Optimized
2425
#'
2526
#' @description This function determines the order of sibling pairs based on an outcome variable.
2627
#' The function checks which of the two kinship pairs has more of a specified outcome variable.
27-
#' It adds a new column named `order` to the dataset, indicating which sibling (identified as "s1" or "s2") has more of the outcome.
28+
#' It adds a new column named `order` to the dataset, indicating which sibling
29+
#' (identified as "s1" or "s2") has more of the outcome.
2830
#' If the two siblings have the same amount of the outcome, it randomly assigns one as having more.
2931
#'
3032
#' @inheritParams discord_data
@@ -36,7 +38,7 @@ check_sibling_order <- function(..., fast = FALSE) {
3638
#' neither) has more of the outcome.
3739
#' @keywords internal
3840

39-
check_sibling_order_ram_optimized <- function(data, outcome, pair_identifiers, row) {
41+
check_sibling_order_ram <- function(data, outcome, pair_identifiers, row) {
4042
# Select the row of interest from the data frame
4143
data <- data[row, ]
4244

@@ -46,7 +48,11 @@ check_sibling_order_ram_optimized <- function(data, outcome, pair_identifiers, r
4648

4749
# Check if either sibling has missing (NA) outcome data
4850
if (is.na(outcome1) || is.na(outcome2)) {
49-
stop(paste0("There are missing data, encoded as `NA`, for at least one kinship pair in the '", outcome, "' variable and data cannot be prepped properly.\n Please remove or impute missing data."))
51+
stop(paste0(
52+
"There are missing data, encoded as `NA`, for at least one kinship pair in the '",
53+
outcome, "' variable and data cannot be prepped properly.\n",
54+
" Please remove or impute missing data."
55+
))
5056
}
5157
# Determine sibling order
5258
if (outcome1 > outcome2) {
@@ -63,7 +69,7 @@ check_sibling_order_ram_optimized <- function(data, outcome, pair_identifiers, r
6369
}
6470
}
6571

66-
return(data)
72+
data
6773
}
6874

6975
check_sibling_order_fast <- function(data, outcome, pair_identifiers) {
@@ -75,7 +81,11 @@ check_sibling_order_fast <- function(data, outcome, pair_identifiers) {
7581

7682
# Check for missing outcome data
7783
if (any(is.na(outcome1) | is.na(outcome2))) {
78-
stop(paste0("There are missing data, encoded as `NA`, for at least one kinship pair in the '", outcome, "' variable and data cannot be prepped properly.\n Please remove or impute missing data."))
84+
stop(paste0(
85+
"There are missing data, encoded as `NA`, for at least one kinship pair in the '",
86+
outcome, "' variable and data cannot be prepped properly.\n",
87+
" Please remove or impute missing data."
88+
))
7989
}
8090

8191
order <- ifelse(outcome1 > outcome2, "s1",
@@ -90,14 +100,17 @@ check_sibling_order_fast <- function(data, outcome, pair_identifiers) {
90100
}
91101

92102
data$order <- order
93-
return(data)
103+
data
94104
}
95105

96106

97107
#' @title Make Mean Differences
98108
#'
99-
#' @description This function calculates differences and means of a given variable for each kinship pair. The order of subtraction and the variables' names in the output dataframe depend on the order column set by check_sibling_order().
100-
#' If the demographics parameter is set to "race", "sex", or "both", it also prepares demographic information accordingly,
109+
#' @description This function calculates differences and means of a given variable for each
110+
#' kinship pair. The order of subtraction and the variables' names in the output dataframe
111+
#' depend on the order column set by check_sibling_order().
112+
#' If the demographics parameter is set to "race", "sex", or "both", it also prepares
113+
#' demographic information accordingly,
101114
#' swapping the order of demographics as per the order column.
102115
#' @inheritParams discord_data
103116
#' @inheritParams check_sibling_order
@@ -177,7 +190,7 @@ make_mean_diffs_ram_optimized <- function(data, id, sex, race, demographics,
177190
)
178191

179192

180-
return(output)
193+
output
181194
}
182195

183196

@@ -270,28 +283,34 @@ recode_demographics <- function(demographics, data, raceS1, raceS2,
270283
if (demographics == "both" || demographics == "race") {
271284
race_1_name <- paste0(race, "_1")
272285
race_2_name <- paste0(race, "_2")
273-
output_demographics[[paste0(race, "_binarymatch")]] <- ifelse(output_demographics[[race_1_name]] == output_demographics[[race_2_name]],
286+
output_demographics[[paste0(race, "_binarymatch")]] <- ifelse(
287+
output_demographics[[race_1_name]] == output_demographics[[race_2_name]],
274288
1, 0
275289
)
276-
output_demographics[[paste0(race, "_multimatch")]] <- ifelse(output_demographics[[race_1_name]] == output_demographics[[race_2_name]],
290+
output_demographics[[paste0(race, "_multimatch")]] <- ifelse(
291+
output_demographics[[race_1_name]] == output_demographics[[race_2_name]],
277292
as.character(output_demographics[[race_2_name]]), "mixed"
278293
)
279294
}
280295
if (demographics == "both" || demographics == "sex") {
281296
sex_1_name <- paste0(sex, "_1")
282297
sex_2_name <- paste0(sex, "_2")
283-
output_demographics[[paste0(sex, "_binarymatch")]] <- ifelse(output_demographics[[sex_1_name]] == output_demographics[[sex_2_name]],
298+
output_demographics[[paste0(sex, "_binarymatch")]] <- ifelse(
299+
output_demographics[[sex_1_name]] == output_demographics[[sex_2_name]],
284300
1, 0
285301
)
286-
output_demographics[[paste0(sex, "_multimatch")]] <- ifelse(output_demographics[[sex_1_name]] == output_demographics[[sex_2_name]], as.character(output_demographics[[sex_2_name]]), "mixed")
302+
output_demographics[[paste0(sex, "_multimatch")]] <- ifelse(
303+
output_demographics[[sex_1_name]] == output_demographics[[sex_2_name]],
304+
as.character(output_demographics[[sex_2_name]]), "mixed"
305+
)
287306
}
288307
}
289308

290309
if (exists("output_demographics")) {
291310
output <- base::cbind(output, output_demographics)
292311
}
293312

294-
return(output)
313+
output
295314
}
296315

297316

@@ -346,12 +365,13 @@ make_mean_diffs_fast <- function(data, id, sex, race, demographics,
346365
)
347366
diff_list[[var]] <- tmp
348367
}
349-
return(diff_list)
368+
diff_list
350369
}
351370

352371
#' @title Check Discord Errors
353372
#'
354-
#' @description This function checks for common errors in the provided data, including the correct specification of identifiers (ID, sex, race) and their existence in the data.
373+
#' @description This function checks for common errors in the provided data, including
374+
#' the correct specification of identifiers (ID, sex, race) and their existence in the data.
355375
#'
356376
#
357377
#' @param data The data to perform a discord regression on.
@@ -370,15 +390,26 @@ check_discord_errors <- function(data, id, sex, race, pair_identifiers) {
370390
}
371391

372392
if (!base::is.null(sex) && base::sum(base::grepl(sex, base::names(data))) == 0) {
373-
stop(paste0("The kinship pair sex identifier \"", sex, "\" is not appropriately defined. Please check that you have the correct column name."))
393+
stop(paste0(
394+
"The kinship pair sex identifier \"", sex,
395+
"\" is not appropriately defined. Please check that you have the correct column name."
396+
))
374397
}
375398
if (!base::is.null(race) && base::sum(base::grepl(race, base::names(data))) == 0) {
376-
stop(paste0("The kinship pair race identifier \"", race, "\" is not appropriately defined. Please check that you have the correct column name."))
399+
stop(paste0(
400+
"The kinship pair race identifier \"", race,
401+
"\" is not appropriately defined. Please check that you have the correct column name."
402+
))
377403
}
378-
if (base::sum(base::grepl(pair_identifiers[1], base::names(data))) == 0 | base::sum(base::grepl(pair_identifiers[2], base::names(data))) == 0) {
379-
stop(paste0("Please check that the kinship pair identifiers \"", pair_identifiers[1], "\" and \"", pair_identifiers[2], "\" are valid, i.e. ensure that you have the correct labels for each kin."))
404+
if (base::sum(base::grepl(pair_identifiers[1], base::names(data))) == 0 ||
405+
base::sum(base::grepl(pair_identifiers[2], base::names(data))) == 0) {
406+
stop(paste0(
407+
"Please check that the kinship pair identifiers \"", pair_identifiers[1],
408+
"\" and \"", pair_identifiers[2],
409+
"\" are valid, i.e. ensure that you have the correct labels for each kin."
410+
))
380411
}
381-
if (!base::is.null(sex) & !base::is.null(race) && sex == race) {
412+
if (!base::is.null(sex) && !base::is.null(race) && sex == race) {
382413
stop("Please check that your sex and race variables are not equal.")
383414
}
384415
}
@@ -401,11 +432,11 @@ valid_ids <- function(data, id) {
401432
dwarn("Specified id column does not contain unique values for each kin-pair.
402433
Adding row-wise ID for restructuring data into paired format for analysis.
403434
For more details, see <https://github.com/R-Computing-Lab/discord/issues/6>.")
404-
return(FALSE)
435+
FALSE
405436
} else if (id_length == nrow(data)) {
406-
return(TRUE)
437+
TRUE
407438
}
408439
} else if (is.null(id)) {
409-
return(FALSE)
440+
FALSE
410441
}
411442
}

0 commit comments

Comments
 (0)