vimc
diff --git a/‎.Rbuildignore‎
Lines changed: 1 addition & 0 deletions b/‎.Rbuildignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 3 additions & 0 deletions b/‎DESCRIPTION‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 3 additions & 2 deletions b/‎NAMESPACE‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎R/burden_diagnositics.R‎
Lines changed: 26 additions & 17 deletions b/‎R/burden_diagnositics.R‎
Lines changed: 26 additions & 17 deletions
diff --git a/‎R/eg_wpp.R‎
Lines changed: 16 additions & 0 deletions b/‎R/eg_wpp.R‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎data-raw/eg_wpp.R‎
Lines changed: 33 additions & 0 deletions b/‎data-raw/eg_wpp.R‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎data/eg_wpp.rda‎
1.9 KB b/‎data/eg_wpp.rda‎
1.9 KB
diff --git a/‎man/check_template_alignment.Rd‎
Lines changed: 0 additions & 17 deletions b/‎man/check_template_alignment.Rd‎
Lines changed: 0 additions & 17 deletions
diff --git a/‎man/constants.Rd‎
Lines changed: 8 additions & 2 deletions b/‎man/constants.Rd‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎man/eg_wpp.Rd‎
Lines changed: 30 additions & 0 deletions b/‎man/eg_wpp.Rd‎
Lines changed: 30 additions & 0 deletions
@@ -15,3 +15,4 @@
 ^tools$
 ^scratch\.R$
 ^scratch$
+^data-raw$
@@ -40,3 +40,6 @@ Imports:
     readr,
     rlang,
     tidyr
+Depends: 
+    R (>= 3.5)
+LazyData: true
@@ -2,8 +2,7 @@
 
 export(basic_burden_sanity)
 export(check_demography_alignment)
-export(check_template_alignment)
-export(const_data_colnames)
+export(file_dict_colnames)
 export(impact_check)
 export(plot_age_patterns)
 export(plot_compare_demography)
@@ -15,4 +14,6 @@ export(theme_vimc)
 export(transfrom_coverage_fvps)
 export(validate_complete_incoming_files)
 export(validate_file_dict_template)
+export(validate_template_alignment)
+importFrom(dplyr,.data)
 importFrom(ggplot2,ggplot)
@@ -207,21 +207,25 @@ validate_complete_incoming_files <- function(
 #'
 #' @description
 #'
-#' @param burden_set
+#' @param burden_set A `<data.frame>` of modeller-provided burden-set data.
 #'
-#' @param template
+#' @param template A `<data.frame>` of the burden template as provided to
+#' modelling groups by VIMC.
 #'
-#' @return
+#' @return A named list of checks carried out on `burden_set` to comapre it
+#' against `template`, with information on missing and extra data.
 #'
 #' @examples
 #'
 #' @keywords diagnostics
 #'
 #' @export
-check_template_alignment <- function(burden_set, template) {
-  # TODO: figure out what the args are expected to be: dfs? lists, vecs?
-  expected <- names(template)
-  provided <- names(burden_set)
+validate_template_alignment <- function(burden_set, template) {
+  checkmate::assert_data_frame(burden_set)
+  checkmate::assert_data_frame(template)
+
+  expected <- colnames(template)
+  provided <- colnames(burden_set)
 
   missing_cols_in_burden <- setdiff(expected, provided)
   extra_cols_in_burden <- setdiff(provided, expected)
@@ -248,8 +252,8 @@ check_template_alignment <- function(burden_set, template) {
 
   # TODO: if these are data.frames, this might not be the best way to check
   # for differences
-  missing_grid_in_burden <- setdiff(template_grid, burden_grid)
-  extra_grid_in_burden <- setdiff(burden_grid, template_grid)
+  missing_grid_in_burden <- dplyr::setdiff(template_grid, burden_grid)
+  extra_grid_in_burden <- dplyr::setdiff(burden_grid, template_grid)
   burden_grid_matches_template <- all(
     c(
       nrow(missing_grid_in_burden),
@@ -283,30 +287,35 @@ check_template_alignment <- function(burden_set, template) {
 #' @keywords diagnostics
 #'
 #' @export
-check_demography_alignment <- function(burden_set, wpp, gender = "both") {
+check_demography_alignment <- function(
+  burden_set,
+  wpp,
+  gender = c("Both", "Male", "Female")
+) {
   # TODO: input checks
+  checkmate::assert_data_frame(burden_set)
+  checkmate::assert_data_frame(wpp)
+
+  gender <- rlang::arg_match(gender)
 
-  # TODO: check if these can be made constants
   cols_to_select <- c("country", "year", "age", "cohort_size")
   provided <- dplyr::select(
     burden_set,
     {{ cols_to_select }}
   )
   provided <- dplyr::mutate(
-    provided = cohort_size # check if this can be made a string const
+    provided,
+    provided = cohort_size
   )
 
   # TODO: explain what expected is
   # TODO: replace with a right-join?
   expected <- dplyr::filter(
     wpp,
-    country %in%
-      provided$country &
-      year %in% provided$year &
-      age %in% provided$age,
     gender == {{ gender }}
   )
 
+  # in case there are many extra cols
   cols_to_select <- c("country", "year", "age", "value")
   expected <- dplyr::select(
     expected,
@@ -315,7 +324,7 @@ check_demography_alignment <- function(burden_set, wpp, gender = "both") {
   expected <- dplyr::rename(
     expected,
     expected = value
-  ) # TODO: prefer not to use NSE
+  )
 
   # return left join
   alignment <- dplyr::left_join(
 
@@ -0,0 +1,16 @@
+#' Example of UN-WPP time-series data
+#'
+#' An example of the population estimate data used by VIMC.
+#'
+#' @format ## `eg_wpp`
+#' A data frame with 65,448 rows and 5 columns:
+#' \describe{
+#'   \item{country}{Country name; this is a placeholder name.}
+#'   \item{year}{Year}
+#'   \item{age}{Age}
+#'   \item{gender}{Sex given as three categories, "Male", "Female", or "Both"}.
+#'   \item{value}{Population size}
+#' }
+#' @source Derived from data originally prepared by the United Nations as part
+#' of the World Population Prospects: <https://population.un.org/wpp/>.
+"eg_wpp"
@@ -0,0 +1,33 @@
+## code to prepare `eg_wpp` dataset goes here
+
+# this code creates a template file that conforms to current VIMC-used
+# population estimate format from the UN-WPP https://population.un.org/wpp/
+
+library(tidyr)
+
+country <- "RFP"
+
+gender <- c("Male", "Female", "Both") # taken from existing reports
+
+year_start <- 1885
+year_end <- 2100
+year <- seq(year_start, year_end)
+
+value <- 10e6 # assuming a constant, medium-size pop. value
+
+age_min <- 0
+age_max <- 100
+age <- seq(age_min, age_max)
+
+eg_wpp <- crossing(
+  country = country,
+  year = year,
+  age = age,
+  gender = gender,
+  value = value
+)
+
+# NOTE that this table has more entries than seen in reports
+# as historical estimates are not available for all age groups
+
+usethis::use_data(eg_wpp, overwrite = TRUE)