|
13 | 13 | #' test_pii_data_emails() |
14 | 14 | #' } |
15 | 15 | test_pii_data_emails <- function(directory = here::here()) { |
16 | | - |
17 | | - #get the full file names and paths for all files in directory |
| 16 | + # get the full file names and paths for all files in directory |
18 | 17 | files <- list.files(directory, full.names = TRUE) |
19 | 18 |
|
20 | | - emails_list <- NULL #list of lists containing offending emails. |
21 | | - #populate emails_list with pii emails: |
22 | | - for(file in files){ |
23 | | - data_emails <- NULL #holds emails |
24 | | - email_files <- NULL #holds names of files that contain emails |
| 19 | + emails_list <- NULL # list of lists containing offending emails. |
| 20 | + # populate emails_list with pii emails: |
| 21 | + for (file in files) { |
| 22 | + data_emails <- NULL # holds emails |
| 23 | + email_files <- NULL # holds names of files that contain emails |
25 | 24 | personal_emails <- NULL # holds offending emails |
26 | | - if(grepl(".csv", file, ignore.case = TRUE)) { |
27 | | - #read data line by line, concatenate lines (emails split across a lines) |
| 25 | + if (grepl(".csv", file, ignore.case = TRUE)) { |
| 26 | + # read data line by line, concatenate lines (emails split across a lines) |
28 | 27 | data_lines <- paste(readLines(file), collapse = " ") |
29 | | - #for each csv file, extract all emails and add them to file_emails |
| 28 | + # for each csv file, extract all emails and add them to file_emails |
30 | 29 | data_emails <- suppressWarnings( |
31 | | - regmatches(data_lines,gregexpr("([_+a-z0-9-]+(\\.[_+a-z0-9-]+)*@[a-z0-9-]+(\\.[a-z0-9-]+)*(\\.[a-z]{2,14}))", data_lines))) |
| 30 | + regmatches(data_lines, gregexpr("([_+a-z0-9-]+(\\.[_+a-z0-9-]+)*@[a-z0-9-]+(\\.[a-z0-9-]+)*(\\.[a-z]{2,14}))", data_lines)) |
| 31 | + ) |
32 | 32 | data_emails <- unlist(data_emails, recursive = FALSE) |
33 | 33 |
|
34 | | - #if a file contains emails, check for offending emails: |
35 | | - if(length(seq_along(data_emails)) > 0){ |
36 | | - for(i in seq_along(data_emails)){ |
37 | | - #filter out .govs |
38 | | - if(!stringr::str_detect(data_emails[i], ".gov")){ |
| 34 | + # if a file contains emails, check for offending emails: |
| 35 | + if (length(seq_along(data_emails)) > 0) { |
| 36 | + for (i in seq_along(data_emails)) { |
| 37 | + # filter out .govs |
| 38 | + if (!stringr::str_detect(data_emails[i], ".gov")) { |
39 | 39 | personal_emails <- append(personal_emails, data_emails[i]) |
40 | 40 | } |
41 | 41 | } |
42 | 42 | } |
43 | | - if(!is.null(personal_emails)){ |
| 43 | + if (!is.null(personal_emails)) { |
44 | 44 | pii_emails <- list(personal_emails) |
45 | 45 | names(pii_emails) <- basename(file) |
46 | | - emails_list<-append(emails_list, pii_emails) |
| 46 | + emails_list <- append(emails_list, pii_emails) |
47 | 47 | } |
48 | 48 | } |
49 | 49 | } |
50 | | - #if there are offending emails, fail with warning: |
51 | | - if(!is.null(emails_list)){ |
| 50 | + # if there are offending emails, fail with warning: |
| 51 | + if (!is.null(emails_list)) { |
52 | 52 | msg <- paste0("--> {.file ", names(emails_list), "}: ", unlist(emails_list)) |
53 | 53 | names(msg) <- rep(" ", length(msg)) |
54 | 54 | err <- paste0("The following data files contain emails with potential PII:") |
55 | 55 |
|
56 | 56 | cli::cli_warn(c("!" = err, msg)) |
57 | | - #if no pii emails (non .gov), pass test: |
| 57 | + # if no pii emails (non .gov), pass test: |
58 | 58 | } else { |
59 | 59 | cli::cli_inform( |
60 | | - c("v" = "Data files do not appear to contain any personal emails.")) |
| 60 | + c("v" = "Data files do not appear to contain any personal emails.") |
| 61 | + ) |
61 | 62 | } |
62 | | - #return(invisible(metadata)) |
| 63 | + # return(invisible(metadata)) |
63 | 64 | } |
0 commit comments