Skip to content

Commit 401ada1

Browse files
Merge pull request #157 from vlahm/master
precaution against possible data loss
2 parents aa800c4 + daaa4d4 commit 401ada1

15 files changed

Lines changed: 379 additions & 201 deletions

src/acquisition_master.R

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ suppressPackageStartupMessages({
3939
library(doParallel) #replaced by doFuture, but still needed on BM1
4040
library(doFuture)
4141
library(googlesheets4)
42+
library(googledrive)
4243
library(rgee) #requires geojsonio package
4344
library(osmdata)
4445

@@ -263,7 +264,7 @@ ms_globals <- c(ls(all.names = TRUE), 'ms_globals')
263264

264265
dir.create('logs', showWarnings = FALSE)
265266

266-
# dmnrow = 1
267+
# dmnrow = 20
267268
# print(network_domain, n=50)
268269
for(dmnrow in 1:nrow(network_domain)){
269270

@@ -302,7 +303,7 @@ for(dmnrow in 1:nrow(network_domain)){
302303
# prodname_filter = c('stream_chemistry'),
303304
domain = domain)
304305
ms_munge(network = network,
305-
# prodname_filter = c('stream_chemistry'),
306+
# prodname_filter = c('discharge'),
306307
domain = domain)
307308
if(domain != 'mcmurdo'){
308309
sw(ms_delineate(network = network,
@@ -311,13 +312,13 @@ for(dmnrow in 1:nrow(network_domain)){
311312
verbose = TRUE))
312313
}
313314
ms_derive(network = network,
314-
# prodname_filter = c('precip_pchem_pflux'),
315+
prodname_filter = c('discharge'),
315316
domain = domain)
316-
317+
317318
if(domain != 'mcmurdo'){
318319
ms_general(network = network,
319320
domain = domain,
320-
get_missing_only = F)
321+
get_missing_only = TRUE)
321322
}
322323

323324
retain_ms_globals(ms_globals)

src/dev/compare_points_to_nhd.R

Lines changed: 60 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ buf <- function(site, buf_dist){
4646
site_csv <- suppressMessages(googlesheets4::read_sheet(
4747
'https://docs.google.com/spreadsheets/d/1Xd38tvB0upHjDRDa5qalGN2Ors6HHWZKpT4Bw0dDqU4/edit?usp=drive_web&ouid=111793152718629438266',
4848
na = c('', 'NA'),
49-
col_types = 'ccccccccnnnnnccc'
49+
col_types = 'ccccccccnnnnncccc'
5050
))
5151

5252
sites <- filter(site_csv,
@@ -56,66 +56,65 @@ sites <- filter(site_csv,
5656
sites$NHD_COMID <- '?'
5757
manual_input <- 1:233
5858
total_len <- nrow(sites)
59-
sites$NHD_COMID[manual_input] <- c('HR only', 'too small', 'HR only', '6729679',
60-
'HR only', 'HR only', '6729787', 'HR only',
61-
'too small', '23773411', 'HR only', 'HR only',
62-
'HR only', 'HR only', 'HR only', 'HR only',
63-
'HR only', 'HR only', '23774053', '3424530',
64-
'18548462', '18211220', '1239639', '3649284',
65-
'8444872', '698676', '22050327', '7690025',#28
66-
'24505800', '22048111', 'HR only', '20440650',
67-
'2889452', '2964310', '1306285', '23773423',#COMO, BLDE, PRIN, MCRA,
68-
'nonCONUS', 'nonCONUS', '11722717', 'nonCONUS',#first two AK, last PR: '800035089'
69-
'nonCONUS', 'HR only', '18841314', '18208464', #first PR 800026322
70-
'HR only', '22144520', '18841358', 'HR only',#48
71-
'18841356', 'HR only', '11689212', '11688596',#... GFCP, GFGB
72-
'11688596', '11689106', 'too small', '11689186',#GFVN on same reach as GFGB
73-
'11688418', 'too small', '11689206', '2889280',
74-
'2889770', '2889186', 'too small', 'HR only',#GREEN4...
75-
'too small', '17595459', '17595361', '17595359',#68
76-
'17595433', '17594763', '17594741', '17595453', #same as next
77-
'17595453', '17594769', '17594785', '17595305',
78-
'17596097', '17595477', '17595369', '17595473',
79-
'17596159', '17595473', '17596161', 'nonCONUS', #bonanza, then mcmurdo
80-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
81-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
82-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
83-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
84-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
85-
'nonCONUS', 'nonCONUS', '5860599', '5862611', #plum
86-
'5862611', '5862581', 'nonCONUS', 'nonCONUS',#112
87-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS', #arctic
88-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
89-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
90-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
91-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
92-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
93-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',#140
94-
'nonCONUS', 'nonCONUS', '2889384', '2889360', #boulder
95-
'2889410', '2891254', 'too small', '17827556',
96-
'17827556', '17826162', '17826162', '17827558',
97-
'17826228', 'HR only', 'HR only', 'too small',
98-
'HR only', '17826228', 'HR only', 'HR only',#160
99-
'HR only', 'HR only', 'nonCONUS', 'nonCONUS', #luquillo
100-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
101-
'nonCONUS', 'HR only', 'too small', 'too small',
102-
'3775221', 'too small', 'too small', 'too small',
103-
'HR only', 'too small', 'HR only', '1332754',#180
104-
'1332754', '1332672', '1332674', '1332674',
105-
'HR only', 'HR only', '1332198', '1332190',
106-
'1332186', '1332186', 'HR only', '1332204',
107-
'1332224', '13633173', 'HR only', 'HR only',
108-
'HR only', 'HR only', 'HR only', 'HR only',#200
109-
'HR only', '22050299', '22050299', '22050323', #krew. some questionable point-segment associations in here
110-
'23903201', 'HR only', 'HR only', 'HR only',
111-
'9643235', '9643251', '9643235', 'HR only',
112-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS', #krycklan
113-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
114-
'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',#224
115-
'nonCONUS', '22125024', 'HR only', '4681928',
116-
'4682266', '4682628', 'too small', '2679458',
117-
'2679458')
118-
# for(i in seq_len(nrow(sites))){
59+
# sites$NHD_COMID[manual_input] <- c('HR only', 'too small', 'HR only', '6729679',
60+
# 'HR only', 'HR only', '6729787', 'HR only',
61+
# 'too small', '23773411', 'HR only', 'HR only',
62+
# 'HR only', 'HR only', 'HR only', 'HR only',
63+
# 'HR only', 'HR only', '23774053', '3424530',
64+
# '18548462', '18211220', '1239639', '3649284',
65+
# '8444872', '698676', '22050327', '7690025',#28
66+
# '24505800', '22048111', 'HR only', '20440650',
67+
# '2889452', '2964310', '1306285', '23773423',#COMO, BLDE, PRIN, MCRA,
68+
# 'nonCONUS', 'nonCONUS', '11722717', 'nonCONUS',#first two AK, last PR: '800035089'
69+
# 'nonCONUS', 'HR only', '18841314', '18208464', #first PR 800026322
70+
# 'HR only', '22144520', '18841358', 'HR only',#48
71+
# '18841356', 'HR only', '11689212', '11688596',#... GFCP, GFGB
72+
# '11688596', '11689106', 'too small', '11689186',#GFVN on same reach as GFGB
73+
# '11688418', 'too small', '11689206', '2889280',
74+
# '2889770', '2889186', 'too small', 'HR only',#GREEN4...
75+
# 'too small', '17595459', '17595361', '17595359',#68
76+
# '17595433', '17594763', '17594741', '17595453', #same as next
77+
# '17595453', '17594769', '17594785', '17595305',
78+
# '17596097', '17595477', '17595369', '17595473',
79+
# '17596159', '17595473', '17596161', 'nonCONUS', #bonanza, then mcmurdo
80+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
81+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
82+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
83+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
84+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
85+
# 'nonCONUS', 'nonCONUS', '5860599', '5862611', #plum
86+
# '5862611', '5862581', 'nonCONUS', 'nonCONUS',#112
87+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS', #arctic
88+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
89+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
90+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
91+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
92+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
93+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',#140
94+
# 'nonCONUS', 'nonCONUS', '2889384', '2889360', #boulder
95+
# '2889410', '2891254', 'too small', '17827556',
96+
# '17827556', '17826162', '17826162', '17827558',
97+
# '17826228', 'HR only', 'HR only', 'too small',
98+
# 'HR only', '17826228', 'HR only', 'HR only',#160
99+
# 'HR only', 'HR only', 'nonCONUS', 'nonCONUS', #luquillo
100+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
101+
# 'nonCONUS', 'HR only', 'too small', 'too small',
102+
# '3775221', 'too small', 'too small', 'too small',
103+
# 'HR only', 'too small', 'HR only', '1332754',#180
104+
# '1332754', '1332672', '1332674', '1332674',
105+
# 'HR only', 'HR only', '1332198', '1332190',
106+
# '1332186', '1332186', 'HR only', '1332204',
107+
# '1332224', '13633173', 'HR only', 'HR only',
108+
# 'HR only', 'HR only', 'HR only', 'HR only',#200
109+
# 'HR only', '22050299', '22050299', '22050323', #krew. some questionable point-segment associations in here
110+
# '23903201', 'HR only', 'HR only', 'HR only',
111+
# '9643235', '9643251', '9643235', 'HR only',
112+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS', #krycklan
113+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',
114+
# 'nonCONUS', 'nonCONUS', 'nonCONUS', 'nonCONUS',#224
115+
# 'nonCONUS', '22125024', 'HR only', '4681928',
116+
# '4682266', '4682628', 'too small', '2679458',
117+
# '2679458')
119118

120119
# loop 1: NHDPlusV2 or NHD-HR (kinda obsolete) ####
121120

@@ -202,14 +201,6 @@ for(i in 1:total_len){
202201
print(paste('map saved to', mapview_save_path))
203202
print(xx)
204203

205-
# gg <- ggplot() +
206-
# geom_sf(data = NHD_HR, color = 'darkslategray3') +
207-
# geom_sf(data = NHDPlus, color = 'deepskyblue4') +
208-
# geom_sf(data = site, color = 'red') +
209-
# coord_sf()
210-
#
211-
# print(gg)
212-
213204
system('spd-say "chili chili chili"')
214205
x <- readline(cat('This point is on: [A] an NHDPlus flowline, [B] an NHD_HR flowline, or [C] neither >\n'))
215206

src/dev/dev_helpers.R

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,6 +1291,11 @@ correct_all_geometries <- function(path, dir_pattern = 'ws_boundary'){
12911291
}
12921292
}
12931293

1294+
# correct_all_geometries(path = '~/git/macrosheds/data_acquisition/data')
1295+
# correct_all_geometries(path = '~/git/macrosheds/portal/data')
1296+
# correct_all_geometries(path = '~/git/macrosheds/data_acquisition/macrosheds_dataset_v1')
1297+
# correct_all_geometries(path = '~/git/macrosheds/data_acquisition/macrosheds_figshare_v1')
1298+
12941299
rebuild_portal_data_before_postprocessing <- function(network_domain, backup = TRUE){
12951300

12961301
dir_wrapper <- function(path, keyword){
@@ -1393,7 +1398,28 @@ rebuild_portal_data_before_postprocessing <- function(network_domain, backup = T
13931398
'sure it looks good before and after postprocessing. especially portal/data/general'))
13941399
}
13951400

1396-
# correct_all_geometries(path = '~/git/macrosheds/data_acquisition/data')
1397-
# correct_all_geometries(path = '~/git/macrosheds/portal/data')
1398-
# correct_all_geometries(path = '~/git/macrosheds/data_acquisition/macrosheds_dataset_v1')
1399-
# correct_all_geometries(path = '~/git/macrosheds/data_acquisition/macrosheds_figshare_v1')
1401+
insert_retrieval_datetimes <- function(){
1402+
1403+
#this inserts the last modification datetime of each raw documentation file as
1404+
#the presumed retrieval datetime for the corresponding raw data product.
1405+
1406+
#used in a pinch. hopefully a real recording method has been implemented by now.
1407+
1408+
fs <- list.files('data',
1409+
recursive = TRUE, full.names = TRUE)
1410+
fs <- fs[grepl('/raw/', fs)]
1411+
fs <- fs[grepl('/documentation/', fs)]
1412+
1413+
for(f in fs){
1414+
1415+
rt <- read_lines(f)
1416+
if(length(rt) != 1) stop('sup with this')
1417+
if(grepl('UTC\\)$', rt)) next
1418+
rt <- glue(
1419+
rt, ' (',
1420+
as.character(lubridate::with_tz(file.info(f)$mtime, 'UTC')),
1421+
' UTC)')
1422+
1423+
write_lines(rt, f)
1424+
}
1425+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
library(tidyverse)
2+
library(feather)
3+
4+
setwd('/home/mike/git/macrosheds/data_acquisition/macrosheds_figshare_v1/1_watershed_attribute_data')
5+
6+
d = map_dfr(list.files('ws_attr_timeseries', full.names = TRUE), read_csv)
7+
filter(d, domain == 'hbef') %>%
8+
write_feather('~/Desktop/hbef_ws_attr_timeseries.feather')
9+
10+
d = map_dfr(list.files('../4_CAMELS-compliant_Daymet_forcings/', full.names = TRUE), read_csv)
11+
filter(d, site_code %in% paste0('w', 1:9)) %>%
12+
write_feather('~/Desktop/hbef_ws_attr_timeseries_daymet.feather')
13+
14+
d = map_dfr(list.files('../3_CAMELS-compliant_watershed_attributes/', full.names = TRUE), read_csv)
15+
filter(d, site_code %in% paste0('w', 1:9)) %>%
16+
select(site_code, everything()) %>%
17+
write_feather('~/Desktop/hbef_ws_attr_camels.csv')
18+
19+
d = read_csv('../1_watershed_attribute_data/ws_attr_summaries.csv')
20+
filter(d, domain == 'hbef') %>%
21+
write_feather('~/Desktop/hbef_ws_attr.csv')
22+

src/global/general_kernels.R

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,11 +1152,11 @@ process_3_ms816 <- function(network, domain, prodname_ms, site_code,
11521152
}
11531153

11541154
if(nrow(all_vars) == 0){
1155-
1155+
11561156
msg <- generate_ms_exception(glue('No data available for: ', sites[s]))
11571157
logerror(msg = msg,
11581158
logger = logger_module)
1159-
1159+
11601160
next
11611161
}
11621162

@@ -1272,10 +1272,10 @@ process_3_ms818 <- function(network, domain, prodname_ms, site_code,
12721272
if(inherits(ws_values, 'try-error')){
12731273
msg <- generate_ms_exception(glue('No data was retrived for {s}',
12741274
s = sites[s]))
1275-
1275+
12761276
logerror(msg = msg,
12771277
logger = logger_module)
1278-
1278+
12791279
next
12801280
}
12811281

@@ -1460,14 +1460,14 @@ process_3_ms821 <- function(network, domain, prodname_ms, site_code,
14601460
d = domain))
14611461

14621462
ws_prodname <- grep('ws_boundary', files, value = TRUE)
1463-
1464-
# If there are multiple ws boundary folders, get largest prod code
1463+
1464+
# If there are multiple ws boundary folders, get largest prod code
14651465
if(length(ws_prodname) > 1){
14661466

14671467
prod_codes <- str_match(ws_prodname, 'ms([0-9]{3})$')[,2]
1468-
1468+
14691469
max_code <- max(prod_codes)
1470-
1470+
14711471
ws_prodname <- ws_prodname[grep(max_code, prod_codes)]
14721472
}
14731473

@@ -1503,7 +1503,7 @@ process_3_ms821 <- function(network, domain, prodname_ms, site_code,
15031503
snow_year <- str_match(snow_files[p], 'WY([0-9]{4})_v01\\.nc$')[1,2]
15041504
site_boundary <- sf::st_read(ws_path, quiet = TRUE) %>%
15051505
terra::vect(.)
1506-
1506+
15071507
snow_file <- terra::rast(snow_files[p])
15081508

15091509
swe_tib = terra::extract(snow_file, site_boundary, weights = TRUE)
@@ -1604,22 +1604,23 @@ process_3_ms822 <- function(network, domain, prodname_ms, site_code,
16041604

16051605
dir.create(glue('data/{n}/{d}/ws_traits/glhymps/',
16061606
n = network,
1607-
d = domain), recursive = TRUE)
1607+
d = domain), recursive = TRUE, showWarnings = FALSE)
16081608

16091609
glhymps <- st_read('data/spatial/GLHYMPS/GLHYMPS.shp')
1610-
1610+
16111611
sites <- boundaries$site_code
16121612
for(s in 1:length(sites)){
16131613

16141614
site_boundary <- boundaries %>%
16151615
filter(site_code == !!sites[s]) %>%
16161616
sf::st_transform(sf::st_crs(glhymps)) %>%
16171617
sf::st_make_valid()
1618-
1618+
16191619
site_area <- site_data %>%
16201620
filter(network == !!network,
16211621
domain == !!domain,
1622-
site_code == !!sites[s]) %>%
1622+
site_code == !!sites[s],
1623+
site_type != 'rain_gauge') %>%
16231624
pull(ws_area_ha)
16241625

16251626
site_area <- site_area * 10000
@@ -1824,7 +1825,7 @@ process_3_ms824 <- function(network, domain, prodname_ms, site_code,
18241825

18251826
final <- fin_table %>%
18261827
select(date, site_code, dayl, prcp, srad, swe, tmax, tmin, vp)
1827-
1828+
18281829
if(nrow(final) == 0){
18291830
return(generate_ms_exception(glue('No data was retrived for {s}',
18301831
s = site_code)))

0 commit comments

Comments
 (0)