Skip to content

Commit 81b3ae2

Browse files
committed
data fully restructured. just making metadata now
1 parent a3406fc commit 81b3ae2

4 files changed

Lines changed: 175 additions & 17 deletions

File tree

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
attributeName attributeDefinition class unit dateTimeFormatString missingValueCode missingValueCodeExplanation
2+
p_mean numeric !Add units here!
3+
pet_mean numeric !Add units here!
4+
aridity numeric !Add units here!
5+
p_seasonality numeric !Add units here!
6+
frac_snow numeric !Add units here!
7+
high_prec_freq numeric !Add units here!
8+
high_prec_dur numeric !Add units here!
9+
high_prec_timing categorical
10+
low_prec_freq numeric !Add units here!
11+
low_prec_dur numeric !Add units here!
12+
low_prec_timing categorical
13+
site_code character
14+
geol_1st_class categorical
15+
glim_1st_class_frac numeric !Add units here!
16+
geol_2nd_class categorical
17+
glim_2nd_class_frac numeric !Add units here!
18+
carbonate_rocks_frac numeric !Add units here!
19+
geol_porosity numeric !Add units here!
20+
geol_permeability numeric !Add units here!
21+
sand_frac numeric !Add units here!
22+
silt_frac numeric !Add units here!
23+
clay_frac numeric !Add units here!
24+
organic_frac numeric !Add units here!
25+
gauge_lat numeric !Add units here!
26+
gauge_lon numeric !Add units here!
27+
area numeric !Add units here!
28+
elev_mean numeric !Add units here!
29+
slope_mean numeric !Add units here!
30+
frac_forest numeric !Add units here!
31+
dom_land_cover_frac numeric !Add units here!
32+
dom_land_cover categorical
33+
root_depth_50 numeric !Add units here!
34+
root_depth_99 numeric !Add units here!
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
attributeName attributeDefinition class unit dateTimeFormatString missingValueCode missingValueCodeExplanation
2+
network categorical
3+
domain categorical
4+
site_code categorical
5+
var categorical
6+
date Date !Add datetime specifier here!
7+
val numeric !Add units here!
8+
pctCellErr numeric !Add units here!

src/global/global_helpers.R

Lines changed: 89 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9963,7 +9963,6 @@ postprocess_entire_dataset <- function(site_data,
99639963
prepare_for_edi(where = edi_dir,
99649964
dataset_version = dataset_version)
99659965

9966-
remove_more_neon_stuff_temporarily()
99679966
manually_edit_eml()
99689967

99699968
log_with_indent(glue('Uploading dataset v{vv} to EDI',
@@ -9984,27 +9983,46 @@ remove_more_neon_stuff_temporarily <- function(){
99849983

99859984
manually_edit_eml <- function(){
99869985

9987-
if(.Platform$OS.type == 'windows') stop('this will not work on windows')
9986+
if(.Platform$OS.type == 'windows') stop('this will not work on windows (but can be adapted quickly)')
99889987

99899988
att <- read_tsv('eml/eml_templates/attributes_ws_attr_summaries.txt')
99909989

99919990
most_recent_eml <- system('ls -t eml/eml_out | head -n 1', intern = TRUE)
99929991
eml <- read_lines(file.path('eml/eml_out', most_recent_eml))
99939992

9993+
new_eml_chunk <- c('\t<methods>', '\t\t<methodStep>', '\t\t\t<description>',
9994+
NA, '\t\t\t</description>', '\t\t</methodStep>', '\t</methods>')
9995+
99949996
mvclines <- grep('</missingValueCode>', eml)
99959997
attlines <- grep('</attribute>', eml)
9998+
atnlines <- grep('<attributeName>', eml)
99969999

999710000
for(i in rev(seq_along(mvclines))){
999810001

999910002
mvcl <- mvclines[i]
1000010003

1000110004
if((mvcl + 1) %in% attlines){
10002-
#HERE: figure out which variable this is, look up its details in att.
10003-
#if no details, next. otherwise, insert a methods-methodStep-description block
1000410005

1000510006
attl <- mvcl + 1
10006-
eml <- c(eml[1:mvcl], neweml, eml[attl:length(eml)])
10007+
attdif <- atnlines - attl
10008+
attdif <- attdif[attdif < 0]
10009+
atnl <- atnlines[which.max(attdif)]
10010+
varn <- str_match(eml[atnl], '\\<attributeName\\>([^\\<]+)\\<\\/attributeName\\>$')[, 2]
10011+
10012+
if(length(varn) != 1 || is.na(varn)) stop('problem with varn')
10013+
10014+
if(! varn %in% att$attributeName) next
10015+
10016+
attdeets <- pull(att[att$attributeName == varn, 'details'])
10017+
10018+
if(is.na(attdeets)) next
1000710019

10020+
new_eml_chunk[4] <- attdeets
10021+
eml <- c(eml[1:mvcl], new_eml_chunk, eml[attl:length(eml)])
10022+
}
10023+
}
10024+
10025+
write_lines(eml, file.path('eml/eml_out', most_recent_eml))
1000810026
}
1000910027

1001010028
make_figshare_docs_skeleton <- function(where){
@@ -10340,7 +10358,7 @@ prepare_for_figshare <- function(where, dataset_version){
1034010358
#prepare documentation and metadata
1034110359
make_figshare_docs_skeleton(where = where)
1034210360
prepare_site_metadata_for_figshare(outfile = file.path(where, 'macrosheds_documentation/04_site_documentation/04a_site_metadata.csv'))
10343-
prepare_variable_metadata_for_figshare(outfile = file.path(where, '/macrosheds_documentation/variable_metadata.csv'),
10361+
prepare_variable_meta`data_for_figshare(outfile = file.path(where, '/macrosheds_documentation/variable_metadata.csv'),
1034410362
fs_format = 'new')
1034510363
assemble_misc_docs_figshare(where = where)
1034610364

@@ -10415,7 +10433,7 @@ combine_ts_csvs <- function(where){
1041510433

1041610434
domain_combined %>%
1041710435
arrange(site_code, var_category, var) %>%
10418-
write_csv(file.path(network_dir, paste0(d, '.csv')))
10436+
write_csv(file.path(network_dir, paste0('timeseries_', d, '.csv')))
1041910437
}
1042010438
}
1042110439

@@ -10502,11 +10520,75 @@ prepare_for_edi <- function(where, dataset_version){
1050210520
logger = logger_module)
1050310521
combine_daymet_csvs(file.path(where, '4_CAMELS-compliant_Daymet_forcings'))
1050410522

10523+
log_with_indent('Combining ws attrs (separately for ms and camels-compliant)',
10524+
indent = 2,
10525+
logger = logger_module)
10526+
combine_ws_attrs(where)
10527+
1050510528
log_with_indent('Combining spatial objects by domain',
1050610529
indent = 2,
1050710530
logger = logger_module)
1050810531
combine_and_move_spatial_objects(from = file.path(where, '2_timeseries_data'),
1050910532
to = file.path(where, '5_shapefiles'))
10533+
10534+
#TEMPORARY
10535+
remove_more_neon_stuff_temporarily()
10536+
10537+
eml_misc(where)
10538+
}
10539+
10540+
combine_ws_attrs <- function(){
10541+
10542+
#ms-standard watershed attributes
10543+
ws_attrs <- list.files(glue('{where}/1_watershed_attribute_data/ws_attr_timeseries'),
10544+
full.names = TRUE)
10545+
10546+
map_dfr(ws_attrs, read_csv) %>%
10547+
write_csv(glue('{where}/1_watershed_attribute_data/ws_attr_timeseries.csv'))
10548+
10549+
file.remove(ws_attrs)
10550+
file.remove(glue('{where}/1_watershed_attribute_data/ws_attr_timeseries'))
10551+
10552+
#camels-compliant watershed attributes
10553+
ws_attrs <- list.files(glue('{where}/3_CAMELS-compliant_watershed_attributes'),
10554+
full.names = TRUE)
10555+
10556+
d <- read_csv(ws_attrs[1])
10557+
for(i in 2:length(ws_attrs)){
10558+
d <- full_join(d, read_csv(ws_attrs[i]), by = 'site_code')
10559+
}
10560+
10561+
write_csv(d, glue('{where}/3_CAMELS-compliant_watershed_attributes/CAMELS_compliant_ws_attr.csv'))
10562+
10563+
file.remove(ws_attrs)
10564+
}
10565+
10566+
eml_misc <- function(where){
10567+
10568+
## rename some files to clarify what they are in the absence of dir structure
10569+
10570+
# fs <- list.files(glue('{where}/1_watershed_attribute_data/ws_attr_timeseries'),
10571+
# full.names = TRUE)
10572+
# file.rename(fs, sub('ws_attr_timeseries/', 'ws_attr_timeseries/ws_attr_ts_', fs))
10573+
#
10574+
# fs <- list.files(glue('{where}/3_CAMELS-compliant_watershed_attributes'),
10575+
# full.names = TRUE)
10576+
# file.rename(fs, sub('watershed_attributes/', 'watershed_attributes/CAMELS-compliant_ws_attr_ts_', fs))
10577+
10578+
file.rename(glue('{where}/4_CAMELS-compliant_Daymet_forcings/CAMELS-compliant_Daymet_forcings.csv'),
10579+
glue('{where}/4_CAMELS-compliant_Daymet_forcings/CAMELS_compliant_Daymet_forcings.csv'))
10580+
10581+
## link shapefiles to eml loading dock and zip them together
10582+
10583+
dir.create('eml/data_links/shapefiles', showWarnings = FALSE)
10584+
10585+
sfs <- list.files(glue('{where}/5_shapefiles'), full.names = TRUE)
10586+
sfs_basenames <- basename(sfs)
10587+
file.link(sfs, file.path('eml/data_links/shapefiles', sfs_basenames))
10588+
10589+
zip(zipfile = 'eml/data_links/shapefiles.zip',
10590+
files = 'eml/data_links/shapefiles',
10591+
flags = '-r9Xq')
1051010592
}
1051110593

1051210594
prepare_for_figshare_packageformat <- function(where, dataset_version){

src/global/one-off/build_eml_templates.R

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,21 @@
11
#don't source this file. it was used in the creation of macrosheds/data_processing/eml,
22
#but everything in there should now be edited manually or piecemeal
33

4+
stop('if this is for macrosheds > v1, generalize paths')
5+
46
library(EMLassemblyline)
7+
library(tidyverse)
58

69
setwd('~/git/macrosheds/data_acquisition')
710

11+
conf <- jsonlite::fromJSON('config.json',
12+
simplifyDataFrame = FALSE)
13+
814
wd <- file.path('eml', 'eml_templates')
915
ed <- file.path('eml', 'eml_out')
1016
dd <- file.path('eml', 'data_links')
1117

18+
unlink(dd, recursive = TRUE)
1219
dir.create(wd, recursive = TRUE)
1320
dir.create(ed, recursive = TRUE)
1421
dir.create(dd, recursive = TRUE)
@@ -42,31 +49,58 @@ template_annotations()
4249

4350
# template_arguments()
4451

45-
files_to_link <- c('macrosheds_figshare_v1/1_watershed_attribute_data/ws_attr_summaries.csv',
46-
'macrosheds_figshare_v1/2_timeseries_data/bear/bear/discharge/EB.csv')
52+
ts_tables <- list.files('macrosheds_figshare_v1/2_timeseries_data', pattern = '\\.csv$',
53+
recursive = TRUE, full.names = TRUE)
54+
55+
files_to_link <- c(ts_tables,
56+
list.files('macrosheds_figshare_v1/1_watershed_attribute_data',
57+
full.names = TRUE, recursive = TRUE),
58+
'macrosheds_figshare_v1/3_CAMELS-compliant_watershed_attributes/CAMELS_compliant_ws_attr.csv',
59+
'macrosheds_figshare_v1/4_CAMELS-compliant_Daymet_forcings/CAMELS_compliant_Daymet_forcings.csv')
4760
basenames <- basename(files_to_link)
4861
link_locs <- file.path(dd, basenames)
4962

63+
descriptions <- basenames
64+
descriptions <- str_replace(descriptions,
65+
'^timeseries_([a-z_]+)\\.csv$',
66+
'Time-series (streamflow, precip if available, chemistry) for domain: \\1')
67+
descriptions <- str_replace(descriptions,
68+
'^ws_attr_summaries\\.csv$',
69+
'Watershed attribute data, summarized across time, for all domains')
70+
descriptions <- str_replace(descriptions,
71+
'^ws_attr_timeseries\\.csv$',
72+
'Watershed attribute data, temporally explicit, for all domains')
73+
descriptions <- str_replace(descriptions,
74+
'^CAMELS_compliant_ws_attr\\.csv$',
75+
'Watershed attribute data, temporally explicit, for all domains, and interoperable with the CAMELS dataset (https://ral.ucar.edu/solutions/products/camels)')
76+
descriptions <- str_replace(descriptions,
77+
'^CAMELS_compliant_Daymet_forcings\\.csv$',
78+
'Daymet climate forcings for all domains; interoperable with the CAMELS dataset (https://ral.ucar.edu/solutions/products/camels)')
79+
5080
for(i in seq_along(files_to_link)){
5181
suppressWarnings(file.link(files_to_link[i], link_locs[i]))
5282
}
5383

84+
85+
# temporal_coverage <- map(ts_tables, ~range(read_csv(.)$datetime)) %>%
86+
# reduce(~c(min(c(.x[1], .y[1])), max(c(.x[2], .y[2]))))
87+
temporal_coverage <- c("1945-07-01", "2022-04-16")
88+
5489
make_eml(wd, dd, ed,
5590
dataset.title = 'MacroSheds',
56-
temporal.coverage = c('2012-05-01', '2014-11-30'),
91+
temporal.coverage = as.Date(temporal_coverage),
5792
geographic.description = NULL,#not needed if geographic_coverage.txt exists,
5893
geographic.coordinates = NULL,#same,
5994
maintenance.description = 'ongoing',
6095
data.table = basenames,
61-
# data.table.name = data.table, #takes care of itself
62-
data.table.description = rep('placeholder', length(files_to_link)),
96+
# data.table.name = data.table,
97+
data.table.description = descriptions,
6398
data.table.quote.character = rep('"', length(files_to_link)),
6499
data.table.url = NULL,
65-
other.entity = NULL, #list any non-table (zip, shp, R, etc) file here
100+
other.entity = 'eml/data_links/shapefiles.zip',
66101
# other.entity.name = other.entity,
67-
other.entity.description = NULL,
102+
other.entity.description = 'Watershed boundaries, stream gauge locations, and precip gauge locations, for all domains',
68103
other.entity.url = NULL,
69-
# provenance = c('knb-lter-cap.46.3'), #deprecated. use template_provenance()
70-
user.id = NULL,
71-
user.domain = NULL, #pretty sure this doesn't apply to me
104+
user.id = conf$edi_user_id,
105+
user.domain = NULL, #pretty sure this doesn't apply to us
72106
package.id = NULL)

0 commit comments

Comments
 (0)