@@ -5935,7 +5935,8 @@ shortcut_idw <- function(encompassing_dem,
59355935 d_elev <- tibble(site_code = rownames(dk),
59365936 d = dk[,1]) %>%
59375937 left_join(data_locations,
5938- by = 'site_code')
5938+ by = 'site_code') %>%
5939+ mutate(d = errors::drop_errors(d))
59395940 mod <- lm(d ~ elevation, data = d_elev)
59405941 ab <- as.list(mod$coefficients)
59415942
@@ -5945,9 +5946,12 @@ shortcut_idw <- function(encompassing_dem,
59455946 # Set all negative values to 0
59465947 d_from_elev[d_from_elev < 0] <- 0
59475948
5948- #average both approaches (this should be weighted toward idw
5949- #when close to any data location, and weighted half and half when far)
5950- d_idw <- (d_idw + d_from_elev) / 2
5949+ #get weighted mean of both approaches:
5950+ #weight on idw is 1; weight on elev-predicted is |R^2|
5951+ abs_r2 <- abs(cor(d_elev$d, mod$fitted.values)^2)
5952+ # abs_adjr2 <- abs(1 - (1 - r2) * ((nobs(mod) - 1) / mod$df.residual))
5953+
5954+ d_idw <- (d_idw + d_from_elev * abs_r2) / (1 + abs_r2)
59515955 }
59525956
59535957 ws_mean[k] <- mean(d_idw, na.rm=TRUE)
@@ -6567,12 +6571,12 @@ ms_linear_interpolate <- function(d, interval){
65676571 ms_interp_column <- is.na(d$val)
65686572
65696573 d_interp <- d %>%
6570- mutate(
6574+ mutate(val_err = errors::errors(val),
6575+ val = errors::drop_errors(val),
65716576
6572- #carry ms_status to any rows that have just been populated (probably
6573- #redundant now, but can't hurt)
65746577 ms_status = imputeTS::na_locf(ms_status,
6575- na_remaining = 'rev'),
6578+ na_remaining = 'rev',
6579+ maxgap = max_samples_to_impute),
65766580
65776581 # val = if(sum(! is.na(val)) > 2){
65786582 #
@@ -6591,38 +6595,31 @@ ms_linear_interpolate <- function(d, interval){
65916595 maxgap = max_samples_to_impute)
65926596
65936597 #unless not enough data in group; then do nothing
6594- } else val
6598+ } else val,
6599+ val_err = if(sum(! is.na(val_err)) > 1){
6600+ #do the same for uncertainty
6601+ imputeTS::na_interpolation(val_err,
6602+ maxgap = max_samples_to_impute)
6603+ } else val_err
65956604 )
65966605
6597- err <- errors(d_interp$val) #extract error from data vals
6598- err[err == 0] <- NA_real_ #change new uncerts (0s by default) to NA
6599- if(sum(! is.na(err)) > 0){
6600- #and then carry error to interped rows
6601- errors(d_interp$val) <- imputeTS::na_locf(err, na_remaining = 'rev')
6602- } else {
6603- errors(d_interp$val) <- 0 # #unless not enough error to interp
6604- }
6606+ errors::errors(d_interp$val) <- d_interp$val_err
6607+ d_interp$val_err <- NULL
6608+
6609+ # err <- errors(d_interp$val) #extract error from data vals
6610+ # err[err == 0] <- NA_real_ #change new uncerts (0s by default) to NA
6611+ # if(sum(! is.na(err)) > 0){
6612+ # #and then carry error to interped rows
6613+ # errors(d_interp$val) <- imputeTS::na_locf(err, na_remaining = 'rev')
6614+ # } else {
6615+ # errors(d_interp$val) <- 0 # #unless not enough error to interp
6616+ # }
66056617
66066618 d_interp <- d_interp %>%
66076619 select(any_of(c('datetime', 'site_code', 'var', 'val', 'ms_status', 'ms_interp'))) %>%
66086620 arrange(site_code, var, datetime)
66096621
6610- # mutate(
6611- # err = errors(val), #extract error from data vals
6612- # err = case_when(
6613- # err == 0 ~ NA_real_, #change new uncerts (0s by default) to NA
6614- # TRUE ~ err),
6615- # val = if(sum(! is.na(err)) > 0){
6616- # set_errors(val, #and then carry error to interped rows
6617- # imputeTS::na_locf(err,
6618- # na_remaining = 'rev'))
6619- # } else {
6620- # set_errors(val, #unless not enough error to interp
6621- # 0)
6622- # }) %>%
6623- # select(any_of(c('datetime', 'site_code', 'var', 'val', 'ms_status', 'ms_interp'))) %>%
6624- # arrange(site_code, var, datetime)
6625-
6622+ d_interp$ms_status[is.na(d_interp$ms_status)] = 0
66266623 ms_interp_column <- ms_interp_column & ! is.na(d_interp$val)
66276624 d_interp$ms_interp <- as.numeric(ms_interp_column)
66286625 d_interp <- filter(d_interp,
@@ -6666,13 +6663,14 @@ ms_nocb_interpolate <- function(d, interval){
66666663 ms_interp_column <- is.na(d$val)
66676664
66686665 d_interp <- d %>%
6669- mutate(
6666+ mutate(val_err = errors::errors(val),
6667+ val = errors::drop_errors(val),
66706668
6671- #carry ms_status to any rows that have just been populated (probably
6672- #redundant now, but can't hurt)
6669+ #carry ms_status to any rows that have just been populated
66736670 ms_status = imputeTS::na_locf(ms_status,
66746671 option = 'nocb',
6675- na_remaining = 'rev'),
6672+ na_remaining = 'rev',
6673+ maxgap = max_samples_to_impute),
66766674
66776675 val = if(sum(! is.na(val)) > 1){
66786676
@@ -6683,22 +6681,90 @@ ms_nocb_interpolate <- function(d, interval){
66836681 maxgap = max_samples_to_impute)
66846682
66856683 #unless not enough data in group; then do nothing
6686- } else val
6684+ } else val,
6685+ val_err = if(sum(! is.na(val_err)) > 1){
6686+
6687+ #do the same for uncertainty
6688+ imputeTS::na_locf(val_err,
6689+ option = 'nocb',
6690+ na_remaining = 'keep',
6691+ maxgap = max_samples_to_impute)
6692+ } else val_err
66876693 )
66886694
6689- err <- errors(d_interp$val) #extract error from data vals
6690- err[err == 0] <- NA_real_ #change new uncerts (0s by default) to NA
6691- if(sum(! is.na(err)) > 0){
6692- #and then carry error to interped rows
6693- errors(d_interp$val) <- imputeTS::na_locf(err, option = 'nocb')
6694- } else {
6695- errors(d_interp$val) <- 0 # #unless not enough error to interp
6695+ errors::errors(d_interp$val) <- d_interp$val_err
6696+ d_interp$val_err <- NULL
6697+
6698+ d_interp <- d_interp %>%
6699+ select(any_of(c('datetime', 'site_code', 'var', 'val', 'ms_status', 'ms_interp'))) %>%
6700+ arrange(site_code, var, datetime)
6701+
6702+
6703+ d_interp$ms_status[is.na(d_interp$ms_status)] = 0
6704+ ms_interp_column <- ms_interp_column & ! is.na(d_interp$val)
6705+ d_interp$ms_interp <- as.numeric(ms_interp_column)
6706+ d_interp <- filter(d_interp,
6707+ ! is.na(val))
6708+
6709+ return(d_interp)
6710+ }
6711+
6712+ ms_zero_interpolate <- function(d, interval){
6713+
6714+ #d: a ms tibble with no ms_interp column (this will be created)
6715+ #interval: the sampling interval (either '15 min' or '1 day').
6716+
6717+ #for precip only, and only relevant at konza (so far)
6718+
6719+ #fills gaps up to maxgap (determined automatically), then removes missing values
6720+
6721+ if(length(unique(d$site_code)) > 1){
6722+ stop(paste('ms_zero_interpolate is not designed to handle datasets',
6723+ 'with more than one site.'))
6724+ }
6725+
6726+ if(length(unique(d$var)) > 1){
6727+ stop(paste('ms_zero_interpolate is not designed to handle datasets',
6728+ 'with more than one variable'))
6729+ }
6730+
6731+ if(! interval %in% c('15 min', '1 day')){
6732+ stop('interval must be "15 min" or "1 day", unless we have decided otherwise')
66966733 }
66976734
6735+ var <- drop_var_prefix(d$var[1])
6736+ max_samples_to_impute <- 45 #fixed because this func is only called for precip
6737+
6738+ if(interval == '15 min'){
6739+ max_samples_to_impute <- max_samples_to_impute * 96
6740+ }
6741+
6742+ d <- arrange(d, datetime)
6743+ ms_interp_column <- is.na(d$val)
6744+
6745+ d_interp <- d %>%
6746+ mutate(
6747+
6748+ ms_status = imputeTS::na_replace(ms_status,
6749+ fill = 1,
6750+ maxgap = max_samples_to_impute),
6751+
6752+ val = if(sum(! is.na(val)) > 1){
6753+
6754+ #nocb interp NA vals
6755+ imputeTS::na_replace(val,
6756+ fill = 0,
6757+ maxgap = max_samples_to_impute)
6758+
6759+ #unless not enough data in group; then do nothing
6760+ } else val
6761+ )
6762+
66986763 d_interp <- d_interp %>%
66996764 select(any_of(c('datetime', 'site_code', 'var', 'val', 'ms_status', 'ms_interp'))) %>%
67006765 arrange(site_code, var, datetime)
67016766
6767+ d_interp$ms_status[is.na(d_interp$ms_status)] = 0
67026768 ms_interp_column <- ms_interp_column & ! is.na(d_interp$val)
67036769 d_interp$ms_interp <- as.numeric(ms_interp_column)
67046770 d_interp <- filter(d_interp,
@@ -6744,13 +6810,15 @@ ms_nocb_mean_interpolate <- function(d, interval){
67446810 ms_interp_column <- is.na(d$val)
67456811
67466812 d_interp <- d %>%
6747- mutate(
6813+ mutate(val_err = errors::errors(val),
6814+ val = errors::drop_errors(val),
67486815
67496816 #carry ms_status to any rows that have just been populated (probably
67506817 #redundant now, but can't hurt)
67516818 ms_status = imputeTS::na_locf(ms_status,
67526819 option = 'nocb',
6753- na_remaining = 'rev'),
6820+ na_remaining = 'rev',
6821+ maxgap = max_samples_to_impute),
67546822
67556823 val = if(sum(! is.na(val)) > 1){
67566824
@@ -6761,17 +6829,28 @@ ms_nocb_mean_interpolate <- function(d, interval){
67616829 maxgap = max_samples_to_impute)
67626830
67636831 #unless not enough data in group; then do nothing
6764- } else val
6832+ } else val,
6833+ val_err = if(sum(! is.na(val_err)) > 1){
6834+
6835+ #do the same for uncertainty
6836+ imputeTS::na_locf(val_err,
6837+ option = 'nocb',
6838+ na_remaining = 'keep',
6839+ maxgap = max_samples_to_impute)
6840+ } else val_err
67656841 )
67666842
6767- err <- errors(d_interp$val) #extract error from data vals
6768- err[err == 0] <- NA_real_ #change new uncerts (0s by default) to NA
6769- if(sum(! is.na(err)) > 0){
6770- #and then carry error to interped rows
6771- errors(d_interp$val) <- imputeTS::na_locf(err, option = 'nocb')
6772- } else {
6773- errors(d_interp$val) <- 0 # #unless not enough error to interp
6774- }
6843+ errors::errors(d_interp$val) <- d_interp$val_err
6844+ d_interp$val_err <- NULL
6845+
6846+ # err <- errors(d_interp$val) #extract error from data vals
6847+ # err[err == 0] <- NA_real_ #change new uncerts (0s by default) to NA
6848+ # if(sum(! is.na(err)) > 0){
6849+ # #and then carry error to interped rows
6850+ # errors(d_interp$val) <- imputeTS::na_locf(err, option = 'nocb')
6851+ # } else {
6852+ # errors(d_interp$val) <- 0 # #unless not enough error to interp
6853+ # }
67756854
67766855 d_interp <- d_interp %>%
67776856 select(any_of(c('datetime', 'site_code', 'var', 'val', 'ms_status', 'ms_interp'))) %>%
@@ -6789,19 +6868,30 @@ ms_nocb_mean_interpolate <- function(d, interval){
67896868
67906869 err_ <- errors::errors(d_interp$val)
67916870 d_interp$val <- errors::drop_errors(d_interp$val)
6792- vals_interpted <- d_interp$val * laginterp
6871+ vals_interped <- d_interp$val * laginterp
6872+ err_interped <- err_ * laginterp
67936873
67946874 #use run length encoding to do the division quickly
6795- vals_new <- rle2(vals_interpted ) %>%
6875+ vals_new <- rle2(vals_interped ) %>%
67966876 mutate(values = values / lengths) %>%
67976877 select(lengths, values) %>%
67986878 as.list()
67996879 class(vals_new) <- 'rle'
68006880 vals_new <- inverse.rle(vals_new)
68016881
6882+ #same for uncertainty
6883+ err_new <- rle2(err_interped) %>%
6884+ mutate(values = values / lengths) %>%
6885+ select(lengths, values) %>%
6886+ as.list()
6887+ class(err_new) <- 'rle'
6888+ err_new <- inverse.rle(err_new)
6889+
68026890 real_vals_new <- vals_new != 0
68036891 d_interp$val[real_vals_new] <- vals_new[real_vals_new]
6804- errors::errors(d_interp$val) <- err_
6892+ errors::errors(d_interp$val) <- err_new
6893+
6894+ d_interp$ms_status[is.na(d_interp$ms_status)] = 0
68056895
68066896 return(d_interp)
68076897}
@@ -6984,9 +7074,12 @@ synchronize_timestep <- function(d, prodname_ms_ = get('prodname_ms')){
69847074 d = sitevar_chunk,
69857075 interval = rounding_intervals[i])
69867076 } else { #precip
6987- d_split[[i]] <- ms_nocb_mean_interpolate(
7077+ d_split[[i]] <- ms_zero_interpolate( #so far only needed for konza
69887078 d = sitevar_chunk,
69897079 interval = rounding_intervals[i])
7080+ # d_split[[i]] <- ms_nocb_mean_interpolate( #this might apply in some cases, but not yet.
7081+ # d = sitevar_chunk,
7082+ # interval = rounding_intervals[i])
69907083 }
69917084 }
69927085
@@ -11582,14 +11675,6 @@ approxjoin_datetime <- function(x,
1158211675 datetime_max = datetime_x + rollmax)]
1158311676 y[, `:=` (datetime_y_orig = datetime_y)] #datetime col will be dropped from y
1158411677
11585- # if(indices_only){
11586- # y_indices <- y[x,
11587- # on = .(datetime_y <= datetime_max,
11588- # datetime_y >= datetime_min),
11589- # which = TRUE]
11590- # return(y_indices)
11591- # }
11592-
1159311678 #join x rows to y if y's datetime falls within the x range
1159411679 joined <- y[x, on = .(datetime_y <= datetime_max,
1159511680 datetime_y >= datetime_min)]
@@ -11598,8 +11683,9 @@ approxjoin_datetime <- function(x,
1159811683 #for any datetimes in x or y that were matched more than once, keep only
1159911684 #the nearest match
1160011685 joined[, `:=` (datetime_match_diff = abs(datetime_x - datetime_y_orig))]
11601- joined <- joined[, .SD[which.min(datetime_match_diff)], by = datetime_x]
11602- joined <- joined[, .SD[which.min(datetime_match_diff)], by = datetime_y_orig]
11686+ joined = joined[order(datetime_match_diff),
11687+ lapply(.SD, function(z) first(na.omit(z))),
11688+ by = datetime_x]
1160311689
1160411690 if(indices_only){
1160511691 y_indices <- which(y$datetime_y %in% joined$datetime_y_orig)
0 commit comments