Skip to content

Commit da3696a

Browse files
Add example templates as pkg data
1 parent 7ef6a71 commit da3696a

10 files changed

Lines changed: 10478 additions & 32 deletions

R/eg_wpp.R

Lines changed: 0 additions & 16 deletions
This file was deleted.

R/example_data.R

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#' Example of UN-WPP time-series data
2+
#'
3+
#' An example of the population estimate data used by VIMC.
4+
#'
5+
#' @format ## `eg_wpp`
6+
#' A data frame with 65,448 rows and 5 columns:
7+
#' \describe{
8+
#' \item{country}{Country name; this is a placeholder name.}
9+
#' \item{year}{Year}
10+
#' \item{age}{Age}
11+
#' \item{gender}{Sex given as three categories, "Male", "Female", or "Both"}.
12+
#' \item{value}{Population size}
13+
#' }
14+
#'
15+
#' @keywords data
16+
#'
17+
#' @source Derived from data originally prepared by the United Nations as part
18+
#' of the World Population Prospects: <https://population.un.org/wpp/>.
19+
"eg_wpp"
20+
21+
#' Example of VIMC burden template provided to modellers
22+
#'
23+
#' An example of the central burden template provided by VIMC to modelling
24+
#' groups.
25+
#'
26+
#' @format ## `eg_burden_template`
27+
#' A data frame with 10,201 rows and 10 columns:
28+
#' \describe{
29+
#' \item{disease}
30+
#' \item{year}{Year}
31+
#' \item{age}{Age}
32+
#' \item{country}{Country name in short format; this is a placeholder name.}
33+
#' \item{country_name}{Country name in long format; this is a placeholder.}
34+
#' \item{cases}{Cases of the disease averted}.
35+
#' \item{dalys}{DALYs averted}.
36+
#' \item{deaths}{Deaths averted}.
37+
#' \item{yll}{Years of life-loss averted}.
38+
#' \item{cohort_size}{Population size of the country in a year}.
39+
#' }
40+
#'
41+
#' @keywords data
42+
#'
43+
#' @source Prepared by the VIMC secretariat.
44+
"eg_burden_template"

_pkgdown.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ reference:
2121
desc: Internal helper functions.
2222
contents:
2323
- has_keyword("internal")
24+
- title: Data
25+
desc: Package data.
26+
contents:
27+
- has_keyword("data")
2428
- title: Constants
2529
desc: Package constants.
2630
contents:

data-raw/eg_burden_template.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
## code to prepare `eg_burden_template` dataset goes here
2+
3+
library(readr)
4+
5+
eg_burden_template <- read_csv("inst/extdata/central-burden-template.csv")
6+
7+
usethis::use_data(eg_burden_template, overwrite = TRUE)

data/eg_burden_template.rda

1.69 KB
Binary file not shown.

inst/extdata/central-burden-template.csv

Lines changed: 10202 additions & 0 deletions
Large diffs are not rendered by default.

man/check_demography_alignment.Rd

Lines changed: 5 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/eg_burden_template.Rd

Lines changed: 35 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/eg_wpp.Rd

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-burden_diagnostics.R

Lines changed: 179 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,41 +2,73 @@ test_that("`validate_file_dict_template()` works", {
22
# set up local tempdir to test missing bad file error msg
33
wd <- withr::local_tempdir()
44

5-
path_burden <- file.path(wd, "incoming_burden_estimates")
6-
dir.create(path_burden)
7-
file_dict <- file.path(path_burden, "file_dictionary.csv")
5+
tmp_path_burden <- file.path(wd, "incoming_burden_estimates")
6+
dir.create(tmp_path_burden)
7+
file_dict <- file.path(tmp_path_burden, "file_dictionary.csv")
88
dummy_data <- data.frame()
99
readr::write_csv(dummy_data, file_dict)
1010

1111
success_msg <- glue::glue(
1212
"(File dictionary found at)*({file_dict})*(no action needed)"
1313
)
14-
fail_msg <- "(File dictionary found)*(but it is not well formed)"
14+
fail_msg <- "(File dictionary)*(is not well formed)"
1515

1616
expect_error(
1717
validate_file_dict_template(
1818
disease,
19-
path_burden
19+
tmp_path_burden
2020
),
2121
fail_msg
2222
)
2323

2424
# check success segment
25-
path_burden <- test_path("incoming_burden_estimates")
25+
eg_path_burden <- test_path("incoming_burden_estimates")
2626
expect_message(
2727
validate_file_dict_template(
2828
disease,
29-
path_burden
29+
eg_path_burden
3030
),
3131
success_msg
3232
)
3333

34-
validate_file_dict_template(
35-
"dummy_disease",
36-
test_path("incoming_burden_estimates")
34+
# check ladder where file dict is missing
35+
file.remove(file_dict)
36+
expect_error(
37+
validate_file_dict_template(
38+
"dummy_disease",
39+
tmp_path_burden
40+
),
41+
"(model_inputs)*(does not exist)"
3742
)
3843

39-
# TODO: check other branch of if-else ladder
44+
# check no scenario.csv file found
45+
dir.create(file.path(tmp_path_burden, "model_inputs"))
46+
expect_error(
47+
validate_file_dict_template(
48+
"dummy_disease",
49+
tmp_path_burden
50+
),
51+
"(File does not exist)*(scenario\\.csv)"
52+
)
53+
54+
# check success when scenario.csv file found
55+
tmp_scenarios_file <- file.path(
56+
tmp_path_burden,
57+
"model_inputs",
58+
"scenario.csv"
59+
)
60+
file.copy(
61+
file.path(eg_path_burden, "model_inputs", "scenario.csv"),
62+
tmp_scenarios_file
63+
)
64+
65+
expect_message(
66+
validate_file_dict_template(
67+
"dummy_disease",
68+
tmp_path_burden
69+
),
70+
"(No file dictionary found)*(created a file dictionary)"
71+
)
4072
})
4173

4274
test_that("`validate_complete_incoming_files()`", {
@@ -68,7 +100,8 @@ test_that("`validate_complete_incoming_files()`", {
68100

69101
# check error on duplicated filenames
70102
dummy_data <- readr::read_csv(
71-
test_path("incoming_burden_estimates", "file_dictionary.csv")
103+
test_path("incoming_burden_estimates", "file_dictionary.csv"),
104+
show_col_types = FALSE
72105
)
73106
dummy_data$file <- "dummy_file.csv"
74107
readr::write_csv(dummy_data, file_dict)
@@ -81,12 +114,145 @@ test_that("`validate_complete_incoming_files()`", {
81114

82115
# check error on missing scenario files
83116
dummy_data <- readr::read_csv(
84-
test_path("incoming_burden_estimates", "file_dictionary.csv")
117+
test_path("incoming_burden_estimates", "file_dictionary.csv"),
118+
show_col_types = FALSE
85119
)
86120
readr::write_csv(dummy_data, file_dict)
87121
fail_msg <- "Expected as many scenario data files as scenarios"
88122
expect_error(
89123
validate_complete_incoming_files(path_burden),
90124
fail_msg
91125
)
126+
127+
# TODO: test success case
128+
})
129+
130+
# test for checking template alignment
131+
test_that("`check_template_alignment()` works", {
132+
template <- eg_burden_template
133+
burden_set <- template
134+
135+
expect_no_condition(
136+
validate_template_alignment(
137+
burden_set,
138+
template
139+
)
140+
)
141+
142+
diff_tibble <- tibble::tibble(
143+
disease = character(0),
144+
country = character(0),
145+
year = double(0),
146+
age = double(0)
147+
)
148+
149+
perfect_output <- list(
150+
missing_cols_in_burden = character(0),
151+
extra_cols_in_burden = character(0),
152+
burden_cols_matches_template = TRUE,
153+
missing_grid_in_burden = diff_tibble,
154+
extra_grid_in_burden = diff_tibble,
155+
burden_grid_matches_template = TRUE
156+
)
157+
158+
expect_identical(
159+
validate_template_alignment(
160+
burden_set,
161+
template
162+
),
163+
perfect_output,
164+
ignore_attr = TRUE
165+
)
166+
167+
# check when burden is missing cols
168+
burden_set_ <- burden_set
169+
missing_col <- "cohort_size"
170+
extra_col <- "extra_col"
171+
burden_set_[, missing_col] <- NULL
172+
burden_set_[, extra_col] <- NA_character_
173+
174+
extra_rows <- head(burden_set_)
175+
extra_rows$disease <- "dummy_disease_2"
176+
burden_set_ <- rbind(burden_set_, extra_rows)
177+
178+
output <- validate_template_alignment(
179+
burden_set_,
180+
template
181+
)
182+
expect_identical(
183+
output$missing_cols_in_burden,
184+
missing_col
185+
)
186+
expect_identical(
187+
output$extra_cols_in_burden,
188+
extra_col
189+
)
190+
expect_false(
191+
output$burden_cols_matches_template
192+
)
193+
expect_false(
194+
output$burden_grid_matches_template
195+
)
196+
197+
# check errors on inputs
198+
expect_error(
199+
validate_template_alignment(
200+
"burden_set",
201+
template
202+
),
203+
"Must be of type 'data.frame'"
204+
)
205+
expect_error(
206+
validate_template_alignment(
207+
burden_set,
208+
"template"
209+
),
210+
"Must be of type 'data.frame'"
211+
)
212+
})
213+
214+
test_that("`check_demography_alignment()` works", {
215+
# assume burden data are same as template
216+
burden <- eg_burden_template
217+
218+
# assign dummy cohort size value from random draw
219+
# 10e6 is the value in `eg_wpp`, allow negative differences
220+
burden$cohort_size <- withr::with_seed(
221+
1,
222+
rnorm(nrow(burden), 10e6, 1e4)
223+
)
224+
225+
expect_no_condition(
226+
check_demography_alignment(
227+
burden,
228+
eg_wpp
229+
)
230+
)
231+
232+
output <- check_demography_alignment(
233+
burden,
234+
eg_wpp
235+
)
236+
expected_names <- c(
237+
"country",
238+
"year",
239+
"age",
240+
"cohort_size",
241+
"provided",
242+
"expected",
243+
"difference",
244+
"abs_diff",
245+
"prop_diff"
246+
)
247+
248+
checkmate::expect_tibble(
249+
output,
250+
types = c("character", "numeric"), # allowed types, no order
251+
all.missing = FALSE,
252+
col.names = "unique"
253+
)
254+
checkmate::expect_names(
255+
colnames(output),
256+
permutation.of = expected_names
257+
)
92258
})

0 commit comments

Comments
 (0)