Skip to content

Commit 55f98d7

Browse files
committed
small fixes
1 parent 8477d0b commit 55f98d7

4 files changed

Lines changed: 125 additions & 64 deletions

File tree

src/acquisition_master.R

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ ms_globals <- c(ls(all.names = TRUE), 'ms_globals')
320320
dir.create('logs', showWarnings = FALSE)
321321

322322
## change string in line below to find row index of your desired domain
323-
dmnrow <- which(network_domain$domain == 'calhoun')
323+
# dmnrow <- which(network_domain$domain == 'suef')
324324
for(dmnrow in 1:nrow(network_domain)){
325325

326326
# drop_automated_entries('.') #use with caution!
@@ -359,7 +359,7 @@ for(dmnrow in 1:nrow(network_domain)){
359359
get_all_local_helpers(network = network,
360360
domain = domain)
361361

362-
# stop here and go to processing_kernels.R to continue
362+
stop here and go to processing_kernels.R to continue
363363
ms_retrieve(network = network,
364364
# prodname_filter = c('stream_chemistry'),
365365
domain = domain)
@@ -383,11 +383,8 @@ for(dmnrow in 1:nrow(network_domain)){
383383

384384
ms_general(network = network,
385385
domain = domain,
386-
get_missing_only = F,
387-
# general_prod_filter = NULL)
388-
general_prod_filter = c('prism_precip', 'prism_temp_mean', 'et_ref'))
389-
# general_prod_filter = c('prism_temp_mean', 'et_ref'))
390-
general_prod_filter = c('et_ref'))
386+
get_missing_only = FALSE,
387+
general_prod_filter = NULL)
391388
}
392389

393390
retain_ms_globals(ms_globals)

src/global/global_helpers.R

Lines changed: 107 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -10014,12 +10014,26 @@ postprocess_entire_dataset <- function(site_data,
1001410014
# legal_details_scrape(dataset_version = dataset_version)
1001510015

1001610016
warning('TEMPORARY: removing all remaining NEON data, flux data, and in-progress domains')
10017-
remove_flux_neon_etc(where)
10017+
remove_flux_neon_etc(where = fs_dir)
1001810018

1001910019
log_with_indent(glue('Uploading dataset v{vv} to Figshare',
1002010020
vv = dataset_version),
1002110021
logger = logger_module)
1002210022
# upload_dataset_to_figshare(dataset_version = dataset_version)
10023+
10024+
warning('IMPROVE THE FOLLOWING system calls (fix these issues upstream)')
10025+
system(glue("find {fs_dir} -name '*.csv' | xargs sed -e 's/cloased_shrub/closed_shrub/g' -i"))
10026+
system(glue("find {fs_dir} -name '*.csv' | xargs sed -e 's/lg_lncd/lg_nlcd/g' -i"))
10027+
system(glue("find {fs_dir} -name '*.csv' | xargs sed -e 's/ci_mean_annual_et/ck_mean_annual_et/g' -i"))
10028+
system("find ../portal -name '*.csv' | xargs sed -e 's/cloased_shrub/closed_shrub/g' -i")
10029+
system("find ../portal -name '*.csv' | xargs sed -e 's/lg_lncd/lg_nlcd/g' -i")
10030+
system("find ../portal -name '*.csv' | xargs sed -e 's/ci_mean_annual_et/ck_mean_annual_et/g' -i")
10031+
system("find ../portal -name '*.csv' | xargs sed -e 's/idbp/igbp/g' -i")
10032+
read_feather('../portal/data/general/biplot/year.feather') %>%
10033+
mutate(var = ifelse(var == 'lb_igbp_cloased_shrub', 'lb_igbp_closed_shrub', var),
10034+
var = ifelse(var == 'lg_lncd_lichens', 'lg_nlcd_lichens', var)) %>%
10035+
write_feather('../portal/data/general/biplot/year.feather')
10036+
1002310037
upload_dataset_to_figshare_packageversion(dataset_version = dataset_version)
1002410038
} else {
1002510039
log_with_indent('NOT pushing data to Figshare.',
@@ -10894,7 +10908,8 @@ figshare_create_article <- function(title,
1089410908
keywords,
1089510909
category_ids,
1089610910
authors,
10897-
token){
10911+
token,
10912+
verbose = FALSE){
1089810913

1089910914
if(is.character(keywords)) keywords <- as.list(keywords)
1090010915
if(is.numeric(category_ids)) category_ids <- as.list(category_ids)
@@ -10913,6 +10928,12 @@ figshare_create_article <- function(title,
1091310928
authors = authors),
1091410929
auto_unbox = TRUE)
1091510930

10931+
if(verbose){
10932+
print(request)
10933+
print(header)
10934+
print(meta)
10935+
}
10936+
1091610937
post <- expo_backoff(
1091710938
expr = {
1091810939
httr::POST(request,
@@ -11445,7 +11466,15 @@ upload_dataset_to_figshare_packageversion <- function(dataset_version){
1144511466
### CREATE, UPLOAD, PUBLISH SITES, VARS, LEGAL STUFF, SPATIAL DATA, AND DOCUMENTATION
1144611467
other_uploadsA <- list.files('../portal/data/general/spatial_downloadables',
1144711468
full.names = TRUE)
11448-
other_uploadsA = grep('spatial_timeseries', other_uploadsA, invert = TRUE, value = TRUE) #patch. see upload_dataset_to_figshare()
11469+
# other_uploadsA = grep('spatial_timeseries', other_uploadsA, invert = TRUE, value = TRUE) #patch. see upload_dataset_to_figshare()
11470+
# rmneon = grep('spatial_timeseries', other_uploadsA, value = T)
11471+
# for(dd in rmneon){
11472+
# read_csv(dd) %>% filter(domain != 'neon') %>% write_csv(dd)
11473+
# }
11474+
# fff <- list.files('macrosheds_figshare_v1/1_watershed_attribute_data/ws_attr_timeseries', full.names = TRUE)
11475+
# for(dd in fff){
11476+
# read_csv(dd) %>% filter(domain != 'neon') %>% write_csv(dd)
11477+
# }
1144911478
names(other_uploadsA) <- rep('watershed_attributes', length(other_uploadsA))
1145011479
titlesA <- str_match(other_uploadsA, '/([^/]+)\\.csv(?:\\.zip)?$')[, 2]
1145111480

@@ -11479,6 +11508,16 @@ upload_dataset_to_figshare_packageversion <- function(dataset_version){
1147911508

1148011509
print(paste('uploading extras'))
1148111510

11511+
#removing items that should now be accessed via EDI portal
11512+
rms <- (names(other_uploads) == 'metadata' | grepl('(?:columns|codes)\\.(?:txt|csv)$', other_uploads))
11513+
other_uploads <- other_uploads[! rms]
11514+
titles <- titles[! rms]
11515+
11516+
#variable catalog can be included with package data
11517+
ms_var_catalog <- paste0('macrosheds_figshare_v', dataset_version, '/macrosheds_documentation_packageformat/variable_catalog.csv')
11518+
save(ms_var_catalog, file = '../r_package/data/ms_var_catalog.RData')
11519+
11520+
file_ids_for_r_package2 <- tibble()
1148211521
for(i in seq_along(other_uploads)){
1148311522

1148411523
uf <- other_uploads[i]
@@ -11500,19 +11539,23 @@ upload_dataset_to_figshare_packageversion <- function(dataset_version){
1150011539
#if existing article, delete old version
1150111540
if(ut %in% existing_extras_deets$title){
1150211541

11503-
fls <- figshare_list_article_files(fs_id,
11504-
token = token)
11542+
for(fsid_ in fs_id){
1150511543

11544+
fls <- figshare_list_article_files(fsid_,
11545+
token = token)
1150611546

11507-
if(length(fls) >= 1){
11508-
for(j in seq_along(fls)){
11509-
figshare_delete_article_file(fs_id,
11510-
file_id = fls[[j]]$id,
11511-
token = token)
11547+
if(length(fls) >= 1){
11548+
for(j in seq_along(fls)){
11549+
figshare_delete_article_file(fsid_,
11550+
file_id = fls[[j]]$id,
11551+
token = token)
11552+
}
1151211553
}
1151311554
}
1151411555
}
1151511556

11557+
fs_id <- fs_id[1]
11558+
1151611559
figshare_upload_article(fs_id,
1151711560
file = unname(uf),
1151811561
token = token)
@@ -11524,42 +11567,48 @@ upload_dataset_to_figshare_packageversion <- function(dataset_version){
1152411567
fls <- figshare_list_article_files(fs_id,
1152511568
token = token)
1152611569

11527-
if(ut == 'site_metadata'){
11528-
sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11529-
fls[[1]]$id,
11530-
"/g' ../r_package/R/ms_download_site_data.R -i"),
11531-
intern = TRUE,
11532-
ignore.stdout = FALSE,
11533-
ignore.stderr = FALSE)
11534-
if(length(sysout)) stop('cannot update file ID in r_package/R/ms_download_site_data.R. maybe your path is different?')
11535-
}
11536-
11537-
if(ut == 'variable_metadata'){
11538-
sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11539-
fls[[1]]$id,
11540-
"/g' ../r_package/R/ms_download_variables.R -i"),
11541-
intern = TRUE,
11542-
ignore.stdout = FALSE,
11543-
ignore.stderr = FALSE)
11544-
if(length(sysout)) stop('cannot update file ID in r_package/R/ms_download_variables.R or ms_conversions.R. maybe your path is different?')
11545-
sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11546-
fls[[1]]$id,
11547-
"/g' ../r_package/R/ms_conversions.R -i"),
11548-
intern = TRUE,
11549-
ignore.stdout = FALSE,
11550-
ignore.stderr = FALSE)
11551-
}
11552-
11553-
if(ut == 'variable_catalog'){
11554-
sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11555-
fls[[1]]$id,
11556-
"/g' ../r_package/R/ms_catalog.R -i"),
11557-
intern = TRUE,
11558-
ignore.stdout = FALSE,
11559-
ignore.stderr = FALSE)
11560-
if(length(sysout)) stop('cannot update file ID in r_package/R/ms_catalog.R maybe your path is different?')
11561-
}
11562-
}
11570+
file_ids_for_r_package2 <- bind_rows(
11571+
file_ids_for_r_package2,
11572+
tibble(ut, fig_code = fls[[1]]$id))
11573+
# if(ut == 'site_metadata'){
11574+
# sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11575+
# fls[[1]]$id,
11576+
# "/g' ../r_package/R/ms_download_site_data.R -i"),
11577+
# intern = TRUE,
11578+
# ignore.stdout = FALSE,
11579+
# ignore.stderr = FALSE)
11580+
# if(length(sysout)) stop('cannot update file ID in r_package/R/ms_download_site_data.R. maybe your path is different?')
11581+
# }
11582+
11583+
# if(ut == 'variable_metadata'){
11584+
# sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11585+
# fls[[1]]$id,
11586+
# "/g' ../r_package/R/ms_download_variables.R -i"),
11587+
# intern = TRUE,
11588+
# ignore.stdout = FALSE,
11589+
# ignore.stderr = FALSE)
11590+
# if(length(sysout)) stop('cannot update file ID in r_package/R/ms_download_variables.R or ms_conversions.R. maybe your path is different?')
11591+
# sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11592+
# fls[[1]]$id,
11593+
# "/g' ../r_package/R/ms_conversions.R -i"),
11594+
# intern = TRUE,
11595+
# ignore.stdout = FALSE,
11596+
# ignore.stderr = FALSE)
11597+
# }
11598+
11599+
# if(ut == 'variable_catalog'){
11600+
# sysout <- system(paste0("sed -r 's/files\\/[0-9]+/files\\/",
11601+
# fls[[1]]$id,
11602+
# "/g' ../r_package/R/ms_catalog.R -i"),
11603+
# intern = TRUE,
11604+
# ignore.stdout = FALSE,
11605+
# ignore.stderr = FALSE)
11606+
# if(length(sysout)) stop('cannot update file ID in r_package/R/ms_catalog.R maybe your path is different?')
11607+
# }
11608+
}
11609+
11610+
save(file_ids_for_r_package2,
11611+
file = '../r_package/data/sysdata2.RData')
1156311612
}
1156411613

1156511614
detrmin_mean_record_length <- function(df){
@@ -14049,9 +14098,10 @@ generate_watershed_summaries <- function(){
1404914098

1405014099
precip <- map_dfr(precip_files, read_feather) %>%
1405114100
filter(year != substr(Sys.Date(), 0, 4),
14052-
var == 'cc_cumulative_precip') %>%
14101+
var == 'cc_cumulative_precip',
14102+
val < 30000) %>%
1405314103
group_by(site_code) %>%
14054-
summarize(cc_mean_annual_precip = mean(val, na.arm = TRUE)) %>%
14104+
summarize(cc_mean_annual_precip = mean(val, na.rm = TRUE)) %>%
1405514105
filter(!is.na(cc_mean_annual_precip))
1405614106

1405714107
# Prism temp
@@ -14062,7 +14112,7 @@ generate_watershed_summaries <- function(){
1406214112
filter(year != substr(Sys.Date(), 0, 4),
1406314113
var == 'cc_temp_mean') %>%
1406414114
group_by(site_code) %>%
14065-
summarize(cc_mean_annual_temp = mean(val, na.arm = TRUE)) %>%
14115+
summarize(cc_mean_annual_temp = mean(val, na.rm = TRUE)) %>%
1406614116
filter(!is.na(cc_mean_annual_temp))
1406714117

1406814118
# start of season
@@ -14072,7 +14122,7 @@ generate_watershed_summaries <- function(){
1407214122
filter(year != substr(Sys.Date(), 0, 4),
1407314123
var == 'vd_sos_mean') %>%
1407414124
group_by(site_code) %>%
14075-
summarize(vd_mean_sos = mean(val, na.arm = TRUE)) %>%
14125+
summarize(vd_mean_sos = mean(val, na.rm = TRUE)) %>%
1407614126
filter(!is.na(vd_mean_sos))
1407714127

1407814128
# end of season
@@ -14082,7 +14132,7 @@ generate_watershed_summaries <- function(){
1408214132
filter(year != substr(Sys.Date(), 0, 4),
1408314133
var == 'vd_eos_mean') %>%
1408414134
group_by(site_code) %>%
14085-
summarize(vd_mean_eos = mean(val, na.arm = TRUE)) %>%
14135+
summarize(vd_mean_eos = mean(val, na.rm = TRUE)) %>%
1408614136
filter(!is.na(vd_mean_eos))
1408714137

1408814138
# length of season
@@ -14092,7 +14142,7 @@ generate_watershed_summaries <- function(){
1409214142
filter(year != substr(Sys.Date(), 0, 4),
1409314143
var == 'vd_los_mean') %>%
1409414144
group_by(site_code) %>%
14095-
summarize(vd_mean_los = mean(val, na.arm = TRUE)) %>%
14145+
summarize(vd_mean_los = mean(val, na.rm = TRUE)) %>%
1409614146
filter(!is.na(vd_mean_los))
1409714147

1409814148
# maximum day of photosynthesis
@@ -14102,7 +14152,7 @@ generate_watershed_summaries <- function(){
1410214152
filter(year != substr(Sys.Date(), 0, 4),
1410314153
var == 'vd_mos_mean') %>%
1410414154
group_by(site_code) %>%
14105-
summarize(vd_mean_mos = mean(val, na.arm = TRUE)) %>%
14155+
summarize(vd_mean_mos = mean(val, na.rm = TRUE)) %>%
1410614156
filter(!is.na(vd_mean_mos))
1410714157

1410814158
# gpp
@@ -14113,7 +14163,7 @@ generate_watershed_summaries <- function(){
1411314163
filter(year != substr(Sys.Date(), 0, 4),
1411414164
var == 'va_gpp_sum') %>%
1411514165
group_by(site_code) %>%
14116-
summarize(va_mean_annual_gpp = mean(val, na.arm = TRUE)) %>%
14166+
summarize(va_mean_annual_gpp = mean(val, na.rm = TRUE)) %>%
1411714167
filter(!is.na(va_mean_annual_gpp))
1411814168

1411914169
# npp
@@ -14123,7 +14173,7 @@ generate_watershed_summaries <- function(){
1412314173
filter(year != substr(Sys.Date(), 0, 4),
1412414174
var == 'va_npp_median') %>%
1412514175
group_by(site_code) %>%
14126-
summarize(va_mean_annual_npp = mean(val, na.arm = TRUE)) %>%
14176+
summarize(va_mean_annual_npp = mean(val, na.rm = TRUE)) %>%
1412714177
filter(! is.na(va_mean_annual_npp))
1412814178

1412914179
# terrain
@@ -14524,6 +14574,8 @@ compute_yearly_summary <- function(filter_ms_interp = FALSE,
1452414574
# this and compute_yearly_summary_ws should probably be combined at some point, but for now,
1452514575
# compute_yearly_summary_ws() appends compute_yearly_summary with ws_traits
1452614576

14577+
#does not affect published dataset, only portal data, so no worries about filter settings.
14578+
1452714579
#df = default sites for each domain
1452814580
df <- site_data %>%
1452914581
filter(site_type != 'rain_gauge') %>%

src/templates/figshare_docfiles/packageformat_readme.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
This dataset is intended to be used through the "macrosheds" package for R
22
(https://github.com/MacroSHEDS/macrosheds). It is not documented or arranged to be
3-
accessed directly by end-users, but you're welcome to use it to download data one
4-
MacroSheds domain at a time. The official MacroSheds dataset is available at https://doi.org/10.6084/m9.figshare.c.5621740.
3+
accessed directly by end-users. The official MacroSheds dataset is available on the data portal of
4+
the Environmental Data Initiative (EDI). Publication is currently pending.
55

66
PLEASE carefully consider the data policy. Most of the data included here are (re)distributed
77
under CC BY 4.0, so you're free to share and adapt with attribution, but a few of our
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
@article{vlah_etal_macrosheds_2023,
2+
title = {MacroSheds: a synthesis of long-term biogeochemical, hydroclimatic, and geospatial data from small watershed ecosystem studies},
3+
author = {Vlah, M.J. and Rhea, S. and Bernhardt, E.S. and Slaughter, W. and Gubbins, N. and DelVecchia, A.G. and Thellman, A. and Ross, M.R.V.},
4+
year = {2023},
5+
journal = {Limnology and Oceanography Letters},
6+
% volume = {},
7+
% number = {},
8+
% pages = {5--18},
9+
% publisher = {Elsevier},
10+
}
11+
12+

0 commit comments

Comments
 (0)