Skip to content

Commit 2b839bd

Browse files
Merge pull request #164 from bini-a/master
first commit, add products.csv for panola mountain
2 parents bd0ec91 + 2c69ca4 commit 2b839bd

5 files changed

Lines changed: 282 additions & 2 deletions

File tree

src/acquisition_master.R

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,16 @@ ms_init <- function(use_gpu = FALSE,
218218
op_system <- NA
219219
}
220220

221+
222+
res <- try(setwd('C:/Users/Dell/Documents/Projects/data_processing'), silent=TRUE) #server
223+
if(! 'try-error' %in% class(res)){
224+
successes <- successes + 1
225+
which_machine <- 'bini'
226+
instance_type <- 'dev'
227+
machine_status <- 'noob'
228+
op_system <- 'windows'
229+
}
230+
221231
if(successes > 1){
222232
stop(glue('more than one working directory was available. must set the ',
223233
'correct one manually'))
@@ -267,8 +277,7 @@ googledrive::drive_auth(email = gee_login)
267277
#initialize and authorize GEE account
268278
try(rgee::ee_Initialize(user = gee_login,
269279
drive = TRUE))
270-
271-
280+
272281
#set up global logger. network-domain loggers are set up later
273282
logging::basicConfig()
274283
logging::addHandler(logging::writeToFile,

src/webb/panola/domain_helpers.R

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
## run these before working inside retrieve_product func
2+
## network = network
3+
## domain = domain
4+
## prodname_ms = prodname_ms
5+
## site_code = site_code
6+
## tracker = held_data
7+
## url = prod_info$url[i]
8+
9+
retrieve_sleepers_product <- function(network,
10+
domain,
11+
prodname_ms,
12+
site_code,
13+
tracker,
14+
url){
15+
# creating a string which matches the names of processing kernels
16+
processing_func <- get(paste0('process_0_',
17+
# these names or based off of prod names in products.csv
18+
prodcode_from_prodname_ms(prodname_ms)))
19+
20+
# tracking the "version" of the product
21+
rt <- tracker[[prodname_ms]][[site_code]]$retrieve
22+
23+
held_dt <- as.POSIXct(rt$held_version,
24+
tz = 'UTC')
25+
26+
# "deets" is a list of all the information originally from a row in products.csv
27+
deets <- list(prodname_ms = prodname_ms,
28+
site_code = site_code,
29+
component = rt$component,
30+
last_mod_dt = held_dt,
31+
url = url)
32+
33+
# these "deets" are fed as arguments to wwhatever processing kernel is currently being called
34+
# remember, this "retrieve_product" function is being called, in the retrieve.R script,
35+
# in a loop over the product names from products.csv -- this is why the products.csv prod names
36+
# must match the end of the procesing kernels which are written to retrieve that product
37+
38+
# if you're working on pkernels and not actually running this func, uncomment and run these lines:
39+
## set_details = deets
40+
## network = network
41+
## domain = domain
42+
43+
result <- do.call(processing_func,
44+
args = list(set_details = deets,
45+
network = network,
46+
domain = domain))
47+
48+
49+
new_status <- evaluate_result_status(result)
50+
51+
if('access_time' %in% names(result) && any(! is.na(result$access_time))){
52+
deets$last_mod_dt <- result$access_time[! is.na(result$access_time)][1]
53+
}
54+
55+
update_data_tracker_r(network = network,
56+
domain = domain,
57+
tracker_name = 'held_data',
58+
set_details = deets,
59+
new_status = new_status)
60+
61+
source_urls <- get_source_urls(result_obj = result,
62+
processing_func = processing_func)
63+
64+
write_metadata_r(murl = source_urls,
65+
network = network,
66+
domain = domain,
67+
prodname_ms = prodname_ms)
68+
69+
}
70+
71+
retrieve_usgs_sleeper_daily_q <- function(set_details) {
72+
if(grepl("w5", set_details$component) == TRUE) {
73+
q <- dataRetrieval::readNWISdv(siteNumbers = "01135300",
74+
parameterCd = "00060")
75+
} else if(grepl("w3", set_details$component) == TRUE) {
76+
q <- dataRetrieval::readNWISdv(siteNumbers = "01135150",
77+
parameterCd = "00060")
78+
}
79+
80+
return(q)
81+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# source('src/webb/sleeper/domain_helpers.R')
2+
#retrieval kernels ####
3+
4+
5+
6+
#discharge: STATUS=READY
7+
#. handle_errors
8+
process_0_VERSIONLESS001 <- function(set_details, network, domain) {
9+
10+
raw_data_dest <- glue('data/{n}/{d}/raw/{p}/{s}',
11+
n = network,
12+
d = domain,
13+
p = prodname_ms,
14+
s = set_details$site_code)
15+
16+
dir.create(path = raw_data_dest,
17+
showWarnings = FALSE,
18+
recursive = TRUE)
19+
20+
rawfile <- glue('{rd}/{c}.csv',
21+
rd = raw_data_dest,
22+
c = set_details$component)
23+
24+
# call our dataRetrieval function
25+
q <- retrieve_usgs_sleeper_daily_q(set_details)
26+
27+
# download it to the raw file locatin
28+
write_csv(q, file = rawfile)
29+
30+
res <- httr::HEAD(set_details$url)
31+
32+
last_mod_dt <- strptime(x = substr(res$headers$`last-modified`,
33+
start = 1,
34+
stop = 19),
35+
format = '%Y-%m-%dT%H:%M:%S') %>%
36+
with_tz(tzone = 'UTC')
37+
38+
deets_out <- list(url = paste(set_details$url, '(requires authentication)'),
39+
access_time = as.character(with_tz(Sys.time(),
40+
tzone = 'UTC')),
41+
last_mod_dt = last_mod_dt)
42+
43+
return(deets_out)
44+
}
45+
46+
#stream_chemistry: STATUS=READY
47+
#. handle_errors
48+
process_0_VERSIONLESS002 <- function(set_details, network, domain) {
49+
50+
raw_data_dest <- glue('data/{n}/{d}/raw/{p}/{s}',
51+
n = network,
52+
d = domain,
53+
p = prodname_ms,
54+
s = set_details$site_code)
55+
56+
dir.create(path = raw_data_dest,
57+
showWarnings = FALSE,
58+
recursive = TRUE)
59+
60+
rawfile <- glue('{rd}/{c}.zip',
61+
rd = raw_data_dest,
62+
c = set_details$component)
63+
64+
R.utils::downloadFile(url = set_details$url,
65+
filename = rawfile,
66+
skip = FALSE,
67+
overwrite = TRUE)
68+
69+
res <- httr::HEAD(set_details$url)
70+
71+
last_mod_dt <- strptime(x = substr(res$headers$`last-modified`,
72+
start = 1,
73+
stop = 19),
74+
format = '%Y-%m-%dT%H:%M:%S') %>%
75+
with_tz(tzone = 'UTC')
76+
77+
deets_out <- list(url = paste(set_details$url, '(requires authentication)'),
78+
access_time = as.character(with_tz(Sys.time(),
79+
tzone = 'UTC')),
80+
last_mod_dt = last_mod_dt)
81+
82+
return(deets_out)
83+
}
84+
85+

src/webb/panola/products.csv

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
prodcode,prodname,retrieve_status,munge_status,derive_status,precursor_of,notes,components,url
2+
VERSIONLESS_001,discharge,,,,stream_flux_inst_ms001,,panola_discharge,http://catalog/file/get/5c5b0b83e4b070828902ac9b?f=__disk__b4%2F4d%2Fc1%2Fb44dc1405627810cbf6ef48e0a9ad77e7a3d7f62
3+
VERSIONLESS_002,stream_chemistry,,,,stream_flux_inst_ms001,,panola_chem,http://catalog/file/get/5c5b0b83e4b070828902ac9b?f=__disk__b4%2F4d%2Fc1%2Fb44dc1405627810cbf6ef48e0a9ad77e7a3d7f62
4+
ms001,stream_flux_inst,,,,NA,NA,NA,NA
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
#this is for unorganized versioness data (e.g. a single zip file for all
2+
#sites). we could turn this into a function, and make a separate function for
3+
#versionless data that's separated into several files.
4+
5+
loginfo('Beginning retrieve (versionless products)',
6+
logger = logger_module)
7+
8+
prod_info <- get_product_info(network = network,
9+
domain = domain,
10+
status_level = 'retrieve',
11+
get_statuses = 'ready') %>%
12+
filter(grepl(pattern = '^VERSIONLESS',
13+
x = prodcode))
14+
15+
if(! is.null(prodname_filter)){
16+
prod_info <- filter(prod_info, prodname %in% prodname_filter)
17+
}
18+
19+
if(nrow(prod_info) == 0) return()
20+
21+
site_code <- 'sitename_NA'
22+
23+
## i = 4
24+
for(i in seq_len(nrow(prod_info))){
25+
26+
prodcode <- prod_info$prodcode[i]
27+
28+
prodname_ms <<- paste0(prod_info$prodname[i],
29+
'__',
30+
prodcode)
31+
32+
held_data <<- get_data_tracker(network = network,
33+
domain = domain)
34+
35+
if(! product_is_tracked(tracker = held_data,
36+
prodname_ms = prodname_ms)){
37+
38+
held_data <<- track_new_product(tracker = held_data,
39+
prodname_ms = prodname_ms)
40+
}
41+
42+
if(! site_is_tracked(tracker = held_data,
43+
prodname_ms = prodname_ms,
44+
site_code = site_code)){
45+
46+
held_data <<- insert_site_skeleton(
47+
tracker = held_data,
48+
prodname_ms = prodname_ms,
49+
site_code = site_code,
50+
site_components = prod_info$components[i],
51+
versionless = TRUE
52+
)
53+
}
54+
55+
update_data_tracker_r(network = network,
56+
domain = domain,
57+
tracker = held_data)
58+
59+
dest_dir <- glue('data/{n}/{d}/raw/{p}/{s}',
60+
n = network,
61+
d = domain,
62+
p = prodname_ms,
63+
s = site_code)
64+
65+
dir.create(path = dest_dir,
66+
showWarnings = FALSE,
67+
recursive = TRUE)
68+
69+
retrieval_s <- held_data[[prodname_ms]][['sitename_NA']][['retrieve']][['status']]
70+
71+
if(retrieval_s == 'ok'){
72+
loginfo(glue('Nothing to do for {s} {p}',
73+
s=site_code, p=prodname_ms), logger=logger_module)
74+
next
75+
} else {
76+
loginfo(glue('Retrieving {s} {p}',
77+
s=site_code, p=prodname_ms), logger=logger_module)
78+
}
79+
80+
retrieve_sleepers_product(network = network,
81+
domain = domain,
82+
prodname_ms = prodname_ms,
83+
site_code = site_code,
84+
tracker = held_data,
85+
url = prod_info$url[i])
86+
87+
if(! is.na(prod_info$munge_status[i])){
88+
update_data_tracker_m(network = network,
89+
domain = domain,
90+
tracker_name = 'held_data',
91+
prodname_ms = prodname_ms,
92+
site_code = site_code,
93+
new_status = 'pending')
94+
}
95+
# }
96+
97+
gc()
98+
}
99+
100+
loginfo('Retrieval complete for all versionless products',
101+
logger = logger_module)

0 commit comments

Comments
 (0)