@@ -10014,12 +10014,26 @@ postprocess_entire_dataset <- function(site_data,
1001410014 # legal_details_scrape(dataset_version = dataset_version)
1001510015
1001610016 warning('TEMPORARY: removing all remaining NEON data, flux data, and in-progress domains')
10017- remove_flux_neon_etc(where)
10017+ remove_flux_neon_etc(where = fs_dir )
1001810018
1001910019 log_with_indent(glue('Uploading dataset v{vv} to Figshare',
1002010020 vv = dataset_version),
1002110021 logger = logger_module)
1002210022 # upload_dataset_to_figshare(dataset_version = dataset_version)
10023+
10024+ warning('IMPROVE THE FOLLOWING system calls (fix these issues upstream)')
10025+ system(glue("find {fs_dir} -name '*.csv' | xargs sed -e 's/cloased_shrub/closed_shrub/g' -i"))
10026+ system(glue("find {fs_dir} -name '*.csv' | xargs sed -e 's/lg_lncd/lg_nlcd/g' -i"))
10027+ system(glue("find {fs_dir} -name '*.csv' | xargs sed -e 's/ci_mean_annual_et/ck_mean_annual_et/g' -i"))
10028+ system("find ../portal -name '*.csv' | xargs sed -e 's/cloased_shrub/closed_shrub/g' -i")
10029+ system("find ../portal -name '*.csv' | xargs sed -e 's/lg_lncd/lg_nlcd/g' -i")
10030+ system("find ../portal -name '*.csv' | xargs sed -e 's/ci_mean_annual_et/ck_mean_annual_et/g' -i")
10031+ system("find ../portal -name '*.csv' | xargs sed -e 's/idbp/igbp/g' -i")
10032+ read_feather('../portal/data/general/biplot/year.feather') %>%
10033+ mutate(var = ifelse(var == 'lb_igbp_cloased_shrub', 'lb_igbp_closed_shrub', var),
10034+ var = ifelse(var == 'lg_lncd_lichens', 'lg_nlcd_lichens', var)) %>%
10035+ write_feather('../portal/data/general/biplot/year.feather')
10036+
1002310037 upload_dataset_to_figshare_packageversion(dataset_version = dataset_version)
1002410038 } else {
1002510039 log_with_indent('NOT pushing data to Figshare.',
@@ -10894,7 +10908,8 @@ figshare_create_article <- function(title,
1089410908 keywords,
1089510909 category_ids,
1089610910 authors,
10897- token){
10911+ token,
10912+ verbose = FALSE){
1089810913
1089910914 if(is.character(keywords)) keywords <- as.list(keywords)
1090010915 if(is.numeric(category_ids)) category_ids <- as.list(category_ids)
@@ -10913,6 +10928,12 @@ figshare_create_article <- function(title,
1091310928 authors = authors),
1091410929 auto_unbox = TRUE)
1091510930
10931+ if(verbose){
10932+ print(request)
10933+ print(header)
10934+ print(meta)
10935+ }
10936+
1091610937 post <- expo_backoff(
1091710938 expr = {
1091810939 httr::POST(request,
@@ -11445,7 +11466,15 @@ upload_dataset_to_figshare_packageversion <- function(dataset_version){
1144511466 ### CREATE, UPLOAD, PUBLISH SITES, VARS, LEGAL STUFF, SPATIAL DATA, AND DOCUMENTATION
1144611467 other_uploadsA <- list.files('../portal/data/general/spatial_downloadables',
1144711468 full.names = TRUE)
11448- other_uploadsA = grep('spatial_timeseries', other_uploadsA, invert = TRUE, value = TRUE) #patch. see upload_dataset_to_figshare()
11469+ # other_uploadsA = grep('spatial_timeseries', other_uploadsA, invert = TRUE, value = TRUE) #patch. see upload_dataset_to_figshare()
11470+ # rmneon = grep('spatial_timeseries', other_uploadsA, value = T)
11471+ # for(dd in rmneon){
11472+ # read_csv(dd) %>% filter(domain != 'neon') %>% write_csv(dd)
11473+ # }
11474+ # fff <- list.files('macrosheds_figshare_v1/1_watershed_attribute_data/ws_attr_timeseries', full.names = TRUE)
11475+ # for(dd in fff){
11476+ # read_csv(dd) %>% filter(domain != 'neon') %>% write_csv(dd)
11477+ # }
1144911478 names(other_uploadsA) <- rep('watershed_attributes', length(other_uploadsA))
1145011479 titlesA <- str_match(other_uploadsA, '/([^/]+)\\.csv(?:\\.zip)?$')[, 2]
1145111480
@@ -11479,6 +11508,16 @@ upload_dataset_to_figshare_packageversion <- function(dataset_version){
1147911508
1148011509 print(paste('uploading extras'))
1148111510
11511+ #removing items that should now be accessed via EDI portal
11512+ rms <- (names(other_uploads) == 'metadata' | grepl('(?:columns|codes)\\.(?:txt|csv)$', other_uploads))
11513+ other_uploads <- other_uploads[! rms]
11514+ titles <- titles[! rms]
11515+
11516+ #variable catalog can be included with package data
11517+ ms_var_catalog <- paste0('macrosheds_figshare_v', dataset_version, '/macrosheds_documentation_packageformat/variable_catalog.csv')
11518+ save(ms_var_catalog, file = '../r_package/data/ms_var_catalog.RData')
11519+
11520+ file_ids_for_r_package2 <- tibble()
1148211521 for(i in seq_along(other_uploads)){
1148311522
1148411523 uf <- other_uploads[i]
@@ -11500,19 +11539,23 @@ upload_dataset_to_figshare_packageversion <- function(dataset_version){
1150011539 #if existing article, delete old version
1150111540 if(ut %in% existing_extras_deets$title){
1150211541
11503- fls <- figshare_list_article_files(fs_id,
11504- token = token)
11542+ for(fsid_ in fs_id){
1150511543
11544+ fls <- figshare_list_article_files(fsid_,
11545+ token = token)
1150611546
11507- if(length(fls) >= 1){
11508- for(j in seq_along(fls)){
11509- figshare_delete_article_file(fs_id,
11510- file_id = fls[[j]]$id,
11511- token = token)
11547+ if(length(fls) >= 1){
11548+ for(j in seq_along(fls)){
11549+ figshare_delete_article_file(fsid_,
11550+ file_id = fls[[j]]$id,
11551+ token = token)
11552+ }
1151211553 }
1151311554 }
1151411555 }
1151511556
11557+ fs_id <- fs_id[1]
11558+
1151611559 figshare_upload_article(fs_id,
1151711560 file = unname(uf),
1151811561 token = token)
@@ -11524,42 +11567,48 @@ upload_dataset_to_figshare_packageversion <- function(dataset_version){
1152411567 fls <- figshare_list_article_files(fs_id,
1152511568 token = token)
1152611569
11527- if(ut == 'site_metadata'){
11528- sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11529- fls[[1]]$id,
11530- "/g' ../r_package/R/ms_download_site_data.R -i"),
11531- intern = TRUE,
11532- ignore.stdout = FALSE,
11533- ignore.stderr = FALSE)
11534- if(length(sysout)) stop('cannot update file ID in r_package/R/ms_download_site_data.R. maybe your path is different?')
11535- }
11536-
11537- if(ut == 'variable_metadata'){
11538- sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11539- fls[[1]]$id,
11540- "/g' ../r_package/R/ms_download_variables.R -i"),
11541- intern = TRUE,
11542- ignore.stdout = FALSE,
11543- ignore.stderr = FALSE)
11544- if(length(sysout)) stop('cannot update file ID in r_package/R/ms_download_variables.R or ms_conversions.R. maybe your path is different?')
11545- sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11546- fls[[1]]$id,
11547- "/g' ../r_package/R/ms_conversions.R -i"),
11548- intern = TRUE,
11549- ignore.stdout = FALSE,
11550- ignore.stderr = FALSE)
11551- }
11552-
11553- if(ut == 'variable_catalog'){
11554- sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11555- fls[[1]]$id,
11556- "/g' ../r_package/R/ms_catalog.R -i"),
11557- intern = TRUE,
11558- ignore.stdout = FALSE,
11559- ignore.stderr = FALSE)
11560- if(length(sysout)) stop('cannot update file ID in r_package/R/ms_catalog.R maybe your path is different?')
11561- }
11562- }
11570+ file_ids_for_r_package2 <- bind_rows(
11571+ file_ids_for_r_package2,
11572+ tibble(ut, fig_code = fls[[1]]$id))
11573+ # if(ut == 'site_metadata'){
11574+ # sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11575+ # fls[[1]]$id,
11576+ # "/g' ../r_package/R/ms_download_site_data.R -i"),
11577+ # intern = TRUE,
11578+ # ignore.stdout = FALSE,
11579+ # ignore.stderr = FALSE)
11580+ # if(length(sysout)) stop('cannot update file ID in r_package/R/ms_download_site_data.R. maybe your path is different?')
11581+ # }
11582+
11583+ # if(ut == 'variable_metadata'){
11584+ # sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11585+ # fls[[1]]$id,
11586+ # "/g' ../r_package/R/ms_download_variables.R -i"),
11587+ # intern = TRUE,
11588+ # ignore.stdout = FALSE,
11589+ # ignore.stderr = FALSE)
11590+ # if(length(sysout)) stop('cannot update file ID in r_package/R/ms_download_variables.R or ms_conversions.R. maybe your path is different?')
11591+ # sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11592+ # fls[[1]]$id,
11593+ # "/g' ../r_package/R/ms_conversions.R -i"),
11594+ # intern = TRUE,
11595+ # ignore.stdout = FALSE,
11596+ # ignore.stderr = FALSE)
11597+ # }
11598+
11599+ # if(ut == 'variable_catalog'){
11600+ # sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11601+ # fls[[1]]$id,
11602+ # "/g' ../r_package/R/ms_catalog.R -i"),
11603+ # intern = TRUE,
11604+ # ignore.stdout = FALSE,
11605+ # ignore.stderr = FALSE)
11606+ # if(length(sysout)) stop('cannot update file ID in r_package/R/ms_catalog.R maybe your path is different?')
11607+ # }
11608+ }
11609+
11610+ save(file_ids_for_r_package2,
11611+ file = '../r_package/data/sysdata2.RData')
1156311612}
1156411613
1156511614detrmin_mean_record_length <- function(df){
@@ -14049,9 +14098,10 @@ generate_watershed_summaries <- function(){
1404914098
1405014099 precip <- map_dfr(precip_files, read_feather) %>%
1405114100 filter(year != substr(Sys.Date(), 0, 4),
14052- var == 'cc_cumulative_precip') %>%
14101+ var == 'cc_cumulative_precip',
14102+ val < 30000) %>%
1405314103 group_by(site_code) %>%
14054- summarize(cc_mean_annual_precip = mean(val, na.arm = TRUE)) %>%
14104+ summarize(cc_mean_annual_precip = mean(val, na.rm = TRUE)) %>%
1405514105 filter(!is.na(cc_mean_annual_precip))
1405614106
1405714107 # Prism temp
@@ -14062,7 +14112,7 @@ generate_watershed_summaries <- function(){
1406214112 filter(year != substr(Sys.Date(), 0, 4),
1406314113 var == 'cc_temp_mean') %>%
1406414114 group_by(site_code) %>%
14065- summarize(cc_mean_annual_temp = mean(val, na.arm = TRUE)) %>%
14115+ summarize(cc_mean_annual_temp = mean(val, na.rm = TRUE)) %>%
1406614116 filter(!is.na(cc_mean_annual_temp))
1406714117
1406814118 # start of season
@@ -14072,7 +14122,7 @@ generate_watershed_summaries <- function(){
1407214122 filter(year != substr(Sys.Date(), 0, 4),
1407314123 var == 'vd_sos_mean') %>%
1407414124 group_by(site_code) %>%
14075- summarize(vd_mean_sos = mean(val, na.arm = TRUE)) %>%
14125+ summarize(vd_mean_sos = mean(val, na.rm = TRUE)) %>%
1407614126 filter(!is.na(vd_mean_sos))
1407714127
1407814128 # end of season
@@ -14082,7 +14132,7 @@ generate_watershed_summaries <- function(){
1408214132 filter(year != substr(Sys.Date(), 0, 4),
1408314133 var == 'vd_eos_mean') %>%
1408414134 group_by(site_code) %>%
14085- summarize(vd_mean_eos = mean(val, na.arm = TRUE)) %>%
14135+ summarize(vd_mean_eos = mean(val, na.rm = TRUE)) %>%
1408614136 filter(!is.na(vd_mean_eos))
1408714137
1408814138 # length of season
@@ -14092,7 +14142,7 @@ generate_watershed_summaries <- function(){
1409214142 filter(year != substr(Sys.Date(), 0, 4),
1409314143 var == 'vd_los_mean') %>%
1409414144 group_by(site_code) %>%
14095- summarize(vd_mean_los = mean(val, na.arm = TRUE)) %>%
14145+ summarize(vd_mean_los = mean(val, na.rm = TRUE)) %>%
1409614146 filter(!is.na(vd_mean_los))
1409714147
1409814148 # maximum day of photosynthesis
@@ -14102,7 +14152,7 @@ generate_watershed_summaries <- function(){
1410214152 filter(year != substr(Sys.Date(), 0, 4),
1410314153 var == 'vd_mos_mean') %>%
1410414154 group_by(site_code) %>%
14105- summarize(vd_mean_mos = mean(val, na.arm = TRUE)) %>%
14155+ summarize(vd_mean_mos = mean(val, na.rm = TRUE)) %>%
1410614156 filter(!is.na(vd_mean_mos))
1410714157
1410814158 # gpp
@@ -14113,7 +14163,7 @@ generate_watershed_summaries <- function(){
1411314163 filter(year != substr(Sys.Date(), 0, 4),
1411414164 var == 'va_gpp_sum') %>%
1411514165 group_by(site_code) %>%
14116- summarize(va_mean_annual_gpp = mean(val, na.arm = TRUE)) %>%
14166+ summarize(va_mean_annual_gpp = mean(val, na.rm = TRUE)) %>%
1411714167 filter(!is.na(va_mean_annual_gpp))
1411814168
1411914169 # npp
@@ -14123,7 +14173,7 @@ generate_watershed_summaries <- function(){
1412314173 filter(year != substr(Sys.Date(), 0, 4),
1412414174 var == 'va_npp_median') %>%
1412514175 group_by(site_code) %>%
14126- summarize(va_mean_annual_npp = mean(val, na.arm = TRUE)) %>%
14176+ summarize(va_mean_annual_npp = mean(val, na.rm = TRUE)) %>%
1412714177 filter(! is.na(va_mean_annual_npp))
1412814178
1412914179 # terrain
@@ -14524,6 +14574,8 @@ compute_yearly_summary <- function(filter_ms_interp = FALSE,
1452414574 # this and compute_yearly_summary_ws should probably be combined at some point, but for now,
1452514575 # compute_yearly_summary_ws() appends compute_yearly_summary with ws_traits
1452614576
14577+ #does not affect published dataset, only portal data, so no worries about filter settings.
14578+
1452714579 #df = default sites for each domain
1452814580 df <- site_data %>%
1452914581 filter(site_type != 'rain_gauge') %>%
0 commit comments