Skip to content

Commit ba6c94f

Browse files
authored
Merge branch 'main' into sarah-dev
2 parents 3ef8ee7 + 6ab8153 commit ba6c94f

24 files changed

Lines changed: 320 additions & 57 deletions

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export(test_attribute_defs)
1010
export(test_content_units)
1111
export(test_creator)
1212
export(test_cui_dissemination)
13+
export(test_datatable_url_attributes)
1314
export(test_datatable_urls)
1415
export(test_datatable_urls_doi)
1516
export(test_date_range)

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
* Remove `test_public_points()` function. DataStore previously made GPS coordinates public, even when data files were restricted-access, so this function was written to alert users of that fact. DataStore now applies the same access rules to GPS coordinates and data files, so this check is no longer needed.
44

55
# DPchecker 1.0.1
6+
## 2025-04-24
7+
* add function `test_datatable_url_attributes` to test for the appropriate attribute in the xml designation in metadata for urls.
8+
* Added unit tests for `test_datatable_url_attributes`, `test_datatable_urls_doi`, and `test_datatable_urls`
9+
610
## 2025-04-16
711
* remove arcticdatautils as a dependency and replace functionality with equivalent functions from EMLeditor to reduce the total number of package dependencies.
812

R/run_checks.R

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,16 @@ run_congruence_checks <- function(directory = here::here(),
204204
warning = function(w) {
205205
warn_count <<- warn_count + 1
206206
cli::cli_bullets(c(w$message, w$body))
207-
}
208-
)
207+
})
208+
tryCatch(test_datatable_url_attributes(metadata),
209+
error = function(e) {
210+
err_count <<-err_count + 1
211+
cli::cli_bullets(c(e$message, e$body))
212+
},
213+
warning = function(w) {
214+
warn_count <<- warn_count + 1
215+
cli::cli_bullets(c(w$message, w$body))
216+
})
209217
tryCatch(test_publisher(metadata),
210218
error = function(e) {
211219
err_count <<- err_count + 1

R/tabular_data_congruence.R

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -452,12 +452,9 @@ test_datatable_urls_doi <- function(metadata = load_metadata(directory)) {
452452
return(invisible(metadata))
453453
}
454454

455-
# handle <url function="download> tag from ezEML:
455+
#ignore <url function="download> or <url function = "information> tags:
456456
if (length(seq_along(url)) > 1) {
457-
bad_url <- bad_url + 1
458-
tbl_name <- data_tbl[[i]][["physical"]][["objectName"]]
459-
cli::cli_warn(c("!" = "Metadata URL for the data table {.var {tbl_name}} is incorrectly formatted. Use {.fn EMLeditor::set_data_urls} to update the URLs."))
460-
return(invisible(metadata))
457+
url <- url[[1]]
461458
}
462459

463460
prefix <- stringr::str_sub(url, 1, stringr::str_length(url) - 7)
@@ -483,6 +480,86 @@ test_datatable_urls_doi <- function(metadata = load_metadata(directory)) {
483480
return(invisible(metadata))
484481
}
485482

483+
#' Test for the appropriate attribute for data table URLs in metadata
484+
#'
485+
#' @description `test_datatable_url_attributes` tests whether the 'function =' attribute for the <url> element for each data table in metadata is properly specified. If there is no attribute, the function is assumed to be a direct download (as per the EML schema). The user is warned to check that this is the case (as data packages on DataStore will typically have a direct download link to the data file). If the specified attribute is not either "information" or "download", the function will throw an error as these are the only allowable attributes. If the attribute is "download" the function will warn the user and ask them to double check this. If the attribute is "information" and does not correspond to a DataStore reference profile, the function will warn the user and ask them to check this. If the attribute is "information" and a DataStore reference profile page is supplied, the test will pass.
486+
#'
487+
#' @inheritParams test_metadata_version
488+
#'
489+
#' @returns invisible(metadata)
490+
#' @export
491+
#'
492+
#' @examples
493+
#' \dontrun{
494+
#' test_datatable_url_attributes(metadata)
495+
#' }
496+
test_datatable_url_attributes <- function(metadata = load_metadata(directory)) {
497+
is_eml(metadata)
498+
499+
#get dataTable urls
500+
data_tbl <- EML::eml_get(metadata, "dataTable")
501+
data_tbl <- within(data_tbl, rm("@context"))
502+
503+
# If there's only one csv, data_tbl ends up with one less level of nesting. Re-nest it so that the rest of the code works consistently
504+
if ("attributeList" %in% names(data_tbl)) {
505+
data_tbl <- list(data_tbl)
506+
}
507+
508+
for (i in seq_along(data_tbl)) {
509+
url <- data_tbl[[i]][["physical"]][["distribution"]][["online"]][["url"]]
510+
511+
#if data tables do not have URLs
512+
if(is.null(url)){
513+
cli::cli_abort(c("x" = "One or more data files lack URLs. Could not test whether URL attributes are properly formatted or correctly correspond to the URL. Use {.fn EMLeditor::set_data_urls} to add URLs and attributes."))
514+
return(invisible(metadata))
515+
}
516+
517+
#check for url attributes:
518+
if (length(seq_along(url)) == 1) {
519+
520+
#if no attributes, warn and exit:
521+
cli::cli_warn(c("!" = "One or more of data file URLs elements in metadata lack attributes. Either use {.fn EMLeditor::set_data_urls} to add the appropriate attribute for DataStore or make sure the URL provided is a direct download link."))
522+
return(invisible(metadata))
523+
}
524+
525+
# if URL element has an attribute:
526+
if (length(seq_along(url)) > 1) {
527+
tag <- url[[2]]
528+
url <- url[[1]]
529+
530+
# error if tag is not either "information" or "download"
531+
if (tag != "information" & tag != "download") {
532+
cli::cli_abort(c("x" = "The \"function = \" attribute in data table URLs must be either \"information\" or \"download\". Use {.fn EMLeditor::set_data_urls} to update the URL attribute."))
533+
return(invisible(metadata))
534+
}
535+
536+
# warn if attribute is download, warn
537+
if (tag == "download") {
538+
cli::cli_warn(c("!" = "One or more data file URL attributes in metadata are set to \"download\". Please make sure this is actually a direct download link to the data file. Use {.fn EMLeditor::set_data_urls} to update this attribute."))
539+
return(invisible(metadata))
540+
}
541+
542+
# if attribute is "information" check that it is a data store profile reference; warn if it is not:
543+
if (tag == "information") {
544+
545+
prefix <- stringr::str_sub(url, 1, stringr::str_length(url)-7)
546+
suffix <- stringr::str_sub(url, -7, -1)
547+
548+
if(!prefix == "https://irma.nps.gov/DataStore/Reference/Profile/") {
549+
cli::cli_warn(c("!" = "One or more data table URL attributes in metadata is set to \"information\" but the URL supplied is not a DataStore reference profile. Please check that the URL goes to the appropriate page and it is not a direct download link for the data file."))
550+
return(invisible(metadata))
551+
}
552+
}
553+
}
554+
}
555+
556+
# if you've gotten this far with no errors, it's probably OK:
557+
cli::cli_inform(c("v" = "Metadata datatable URLs and URL attributes are properly specified."))
558+
559+
return(invisible(metadata))
560+
}
561+
562+
486563

487564
#' File Name Match
488565
#'

docs/news/index.html

Lines changed: 12 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/pkgdown.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ pkgdown: 2.1.1
33
pkgdown_sha: ~
44
articles:
55
DPchecker: DPchecker.html
6-
last_built: 2025-04-16T19:17Z
6+
last_built: 2025-04-24T17:27Z

docs/reference/index.html

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)