Skip to content

Commit dae8726

Browse files
committed
broken state in idw
1 parent 36400b7 commit dae8726

2 files changed

Lines changed: 157 additions & 71 deletions

File tree

src/acquisition_master.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ for(dmnrow in 1:nrow(network_domain)){
312312
verbose = TRUE))
313313
}
314314
ms_derive(network = network,
315-
# prodname_filter = c('discharge'),
315+
prodname_filter = c('precip_pchem_pflux'),
316316
domain = domain)
317317

318318
if(domain != 'mcmurdo'){

src/global/global_helpers.R

Lines changed: 156 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -5935,7 +5935,8 @@ shortcut_idw <- function(encompassing_dem,
59355935
d_elev <- tibble(site_code = rownames(dk),
59365936
d = dk[,1]) %>%
59375937
left_join(data_locations,
5938-
by = 'site_code')
5938+
by = 'site_code') %>%
5939+
mutate(d = errors::drop_errors(d))
59395940
mod <- lm(d ~ elevation, data = d_elev)
59405941
ab <- as.list(mod$coefficients)
59415942

@@ -5945,9 +5946,12 @@ shortcut_idw <- function(encompassing_dem,
59455946
# Set all negative values to 0
59465947
d_from_elev[d_from_elev < 0] <- 0
59475948

5948-
#average both approaches (this should be weighted toward idw
5949-
#when close to any data location, and weighted half and half when far)
5950-
d_idw <- (d_idw + d_from_elev) / 2
5949+
#get weighted mean of both approaches:
5950+
#weight on idw is 1; weight on elev-predicted is |R^2|
5951+
abs_r2 <- abs(cor(d_elev$d, mod$fitted.values)^2)
5952+
# abs_adjr2 <- abs(1 - (1 - r2) * ((nobs(mod) - 1) / mod$df.residual))
5953+
5954+
d_idw <- (d_idw + d_from_elev * abs_r2) / (1 + abs_r2)
59515955
}
59525956

59535957
ws_mean[k] <- mean(d_idw, na.rm=TRUE)
@@ -6567,12 +6571,12 @@ ms_linear_interpolate <- function(d, interval){
65676571
ms_interp_column <- is.na(d$val)
65686572

65696573
d_interp <- d %>%
6570-
mutate(
6574+
mutate(val_err = errors::errors(val),
6575+
val = errors::drop_errors(val),
65716576

6572-
#carry ms_status to any rows that have just been populated (probably
6573-
#redundant now, but can't hurt)
65746577
ms_status = imputeTS::na_locf(ms_status,
6575-
na_remaining = 'rev'),
6578+
na_remaining = 'rev',
6579+
maxgap = max_samples_to_impute),
65766580

65776581
# val = if(sum(! is.na(val)) > 2){
65786582
#
@@ -6591,38 +6595,31 @@ ms_linear_interpolate <- function(d, interval){
65916595
maxgap = max_samples_to_impute)
65926596

65936597
#unless not enough data in group; then do nothing
6594-
} else val
6598+
} else val,
6599+
val_err = if(sum(! is.na(val_err)) > 1){
6600+
#do the same for uncertainty
6601+
imputeTS::na_interpolation(val_err,
6602+
maxgap = max_samples_to_impute)
6603+
} else val_err
65956604
)
65966605

6597-
err <- errors(d_interp$val) #extract error from data vals
6598-
err[err == 0] <- NA_real_ #change new uncerts (0s by default) to NA
6599-
if(sum(! is.na(err)) > 0){
6600-
#and then carry error to interped rows
6601-
errors(d_interp$val) <- imputeTS::na_locf(err, na_remaining = 'rev')
6602-
} else {
6603-
errors(d_interp$val) <- 0 # #unless not enough error to interp
6604-
}
6606+
errors::errors(d_interp$val) <- d_interp$val_err
6607+
d_interp$val_err <- NULL
6608+
6609+
# err <- errors(d_interp$val) #extract error from data vals
6610+
# err[err == 0] <- NA_real_ #change new uncerts (0s by default) to NA
6611+
# if(sum(! is.na(err)) > 0){
6612+
# #and then carry error to interped rows
6613+
# errors(d_interp$val) <- imputeTS::na_locf(err, na_remaining = 'rev')
6614+
# } else {
6615+
# errors(d_interp$val) <- 0 # #unless not enough error to interp
6616+
# }
66056617

66066618
d_interp <- d_interp %>%
66076619
select(any_of(c('datetime', 'site_code', 'var', 'val', 'ms_status', 'ms_interp'))) %>%
66086620
arrange(site_code, var, datetime)
66096621

6610-
# mutate(
6611-
# err = errors(val), #extract error from data vals
6612-
# err = case_when(
6613-
# err == 0 ~ NA_real_, #change new uncerts (0s by default) to NA
6614-
# TRUE ~ err),
6615-
# val = if(sum(! is.na(err)) > 0){
6616-
# set_errors(val, #and then carry error to interped rows
6617-
# imputeTS::na_locf(err,
6618-
# na_remaining = 'rev'))
6619-
# } else {
6620-
# set_errors(val, #unless not enough error to interp
6621-
# 0)
6622-
# }) %>%
6623-
# select(any_of(c('datetime', 'site_code', 'var', 'val', 'ms_status', 'ms_interp'))) %>%
6624-
# arrange(site_code, var, datetime)
6625-
6622+
d_interp$ms_status[is.na(d_interp$ms_status)] = 0
66266623
ms_interp_column <- ms_interp_column & ! is.na(d_interp$val)
66276624
d_interp$ms_interp <- as.numeric(ms_interp_column)
66286625
d_interp <- filter(d_interp,
@@ -6666,13 +6663,14 @@ ms_nocb_interpolate <- function(d, interval){
66666663
ms_interp_column <- is.na(d$val)
66676664

66686665
d_interp <- d %>%
6669-
mutate(
6666+
mutate(val_err = errors::errors(val),
6667+
val = errors::drop_errors(val),
66706668

6671-
#carry ms_status to any rows that have just been populated (probably
6672-
#redundant now, but can't hurt)
6669+
#carry ms_status to any rows that have just been populated
66736670
ms_status = imputeTS::na_locf(ms_status,
66746671
option = 'nocb',
6675-
na_remaining = 'rev'),
6672+
na_remaining = 'rev',
6673+
maxgap = max_samples_to_impute),
66766674

66776675
val = if(sum(! is.na(val)) > 1){
66786676

@@ -6683,22 +6681,90 @@ ms_nocb_interpolate <- function(d, interval){
66836681
maxgap = max_samples_to_impute)
66846682

66856683
#unless not enough data in group; then do nothing
6686-
} else val
6684+
} else val,
6685+
val_err = if(sum(! is.na(val_err)) > 1){
6686+
6687+
#do the same for uncertainty
6688+
imputeTS::na_locf(val_err,
6689+
option = 'nocb',
6690+
na_remaining = 'keep',
6691+
maxgap = max_samples_to_impute)
6692+
} else val_err
66876693
)
66886694

6689-
err <- errors(d_interp$val) #extract error from data vals
6690-
err[err == 0] <- NA_real_ #change new uncerts (0s by default) to NA
6691-
if(sum(! is.na(err)) > 0){
6692-
#and then carry error to interped rows
6693-
errors(d_interp$val) <- imputeTS::na_locf(err, option = 'nocb')
6694-
} else {
6695-
errors(d_interp$val) <- 0 # #unless not enough error to interp
6695+
errors::errors(d_interp$val) <- d_interp$val_err
6696+
d_interp$val_err <- NULL
6697+
6698+
d_interp <- d_interp %>%
6699+
select(any_of(c('datetime', 'site_code', 'var', 'val', 'ms_status', 'ms_interp'))) %>%
6700+
arrange(site_code, var, datetime)
6701+
6702+
6703+
d_interp$ms_status[is.na(d_interp$ms_status)] = 0
6704+
ms_interp_column <- ms_interp_column & ! is.na(d_interp$val)
6705+
d_interp$ms_interp <- as.numeric(ms_interp_column)
6706+
d_interp <- filter(d_interp,
6707+
! is.na(val))
6708+
6709+
return(d_interp)
6710+
}
6711+
6712+
ms_zero_interpolate <- function(d, interval){
6713+
6714+
#d: a ms tibble with no ms_interp column (this will be created)
6715+
#interval: the sampling interval (either '15 min' or '1 day').
6716+
6717+
#for precip only, and only relevant at konza (so far)
6718+
6719+
#fills gaps up to maxgap (determined automatically), then removes missing values
6720+
6721+
if(length(unique(d$site_code)) > 1){
6722+
stop(paste('ms_zero_interpolate is not designed to handle datasets',
6723+
'with more than one site.'))
6724+
}
6725+
6726+
if(length(unique(d$var)) > 1){
6727+
stop(paste('ms_zero_interpolate is not designed to handle datasets',
6728+
'with more than one variable'))
6729+
}
6730+
6731+
if(! interval %in% c('15 min', '1 day')){
6732+
stop('interval must be "15 min" or "1 day", unless we have decided otherwise')
66966733
}
66976734

6735+
var <- drop_var_prefix(d$var[1])
6736+
max_samples_to_impute <- 45 #fixed because this func is only called for precip
6737+
6738+
if(interval == '15 min'){
6739+
max_samples_to_impute <- max_samples_to_impute * 96
6740+
}
6741+
6742+
d <- arrange(d, datetime)
6743+
ms_interp_column <- is.na(d$val)
6744+
6745+
d_interp <- d %>%
6746+
mutate(
6747+
6748+
ms_status = imputeTS::na_replace(ms_status,
6749+
fill = 1,
6750+
maxgap = max_samples_to_impute),
6751+
6752+
val = if(sum(! is.na(val)) > 1){
6753+
6754+
#nocb interp NA vals
6755+
imputeTS::na_replace(val,
6756+
fill = 0,
6757+
maxgap = max_samples_to_impute)
6758+
6759+
#unless not enough data in group; then do nothing
6760+
} else val
6761+
)
6762+
66986763
d_interp <- d_interp %>%
66996764
select(any_of(c('datetime', 'site_code', 'var', 'val', 'ms_status', 'ms_interp'))) %>%
67006765
arrange(site_code, var, datetime)
67016766

6767+
d_interp$ms_status[is.na(d_interp$ms_status)] = 0
67026768
ms_interp_column <- ms_interp_column & ! is.na(d_interp$val)
67036769
d_interp$ms_interp <- as.numeric(ms_interp_column)
67046770
d_interp <- filter(d_interp,
@@ -6744,13 +6810,15 @@ ms_nocb_mean_interpolate <- function(d, interval){
67446810
ms_interp_column <- is.na(d$val)
67456811

67466812
d_interp <- d %>%
6747-
mutate(
6813+
mutate(val_err = errors::errors(val),
6814+
val = errors::drop_errors(val),
67486815

67496816
#carry ms_status to any rows that have just been populated (probably
67506817
#redundant now, but can't hurt)
67516818
ms_status = imputeTS::na_locf(ms_status,
67526819
option = 'nocb',
6753-
na_remaining = 'rev'),
6820+
na_remaining = 'rev',
6821+
maxgap = max_samples_to_impute),
67546822

67556823
val = if(sum(! is.na(val)) > 1){
67566824

@@ -6761,17 +6829,28 @@ ms_nocb_mean_interpolate <- function(d, interval){
67616829
maxgap = max_samples_to_impute)
67626830

67636831
#unless not enough data in group; then do nothing
6764-
} else val
6832+
} else val,
6833+
val_err = if(sum(! is.na(val_err)) > 1){
6834+
6835+
#do the same for uncertainty
6836+
imputeTS::na_locf(val_err,
6837+
option = 'nocb',
6838+
na_remaining = 'keep',
6839+
maxgap = max_samples_to_impute)
6840+
} else val_err
67656841
)
67666842

6767-
err <- errors(d_interp$val) #extract error from data vals
6768-
err[err == 0] <- NA_real_ #change new uncerts (0s by default) to NA
6769-
if(sum(! is.na(err)) > 0){
6770-
#and then carry error to interped rows
6771-
errors(d_interp$val) <- imputeTS::na_locf(err, option = 'nocb')
6772-
} else {
6773-
errors(d_interp$val) <- 0 # #unless not enough error to interp
6774-
}
6843+
errors::errors(d_interp$val) <- d_interp$val_err
6844+
d_interp$val_err <- NULL
6845+
6846+
# err <- errors(d_interp$val) #extract error from data vals
6847+
# err[err == 0] <- NA_real_ #change new uncerts (0s by default) to NA
6848+
# if(sum(! is.na(err)) > 0){
6849+
# #and then carry error to interped rows
6850+
# errors(d_interp$val) <- imputeTS::na_locf(err, option = 'nocb')
6851+
# } else {
6852+
# errors(d_interp$val) <- 0 # #unless not enough error to interp
6853+
# }
67756854

67766855
d_interp <- d_interp %>%
67776856
select(any_of(c('datetime', 'site_code', 'var', 'val', 'ms_status', 'ms_interp'))) %>%
@@ -6789,19 +6868,30 @@ ms_nocb_mean_interpolate <- function(d, interval){
67896868

67906869
err_ <- errors::errors(d_interp$val)
67916870
d_interp$val <- errors::drop_errors(d_interp$val)
6792-
vals_interpted <- d_interp$val * laginterp
6871+
vals_interped <- d_interp$val * laginterp
6872+
err_interped <- err_ * laginterp
67936873

67946874
#use run length encoding to do the division quickly
6795-
vals_new <- rle2(vals_interpted) %>%
6875+
vals_new <- rle2(vals_interped) %>%
67966876
mutate(values = values / lengths) %>%
67976877
select(lengths, values) %>%
67986878
as.list()
67996879
class(vals_new) <- 'rle'
68006880
vals_new <- inverse.rle(vals_new)
68016881

6882+
#same for uncertainty
6883+
err_new <- rle2(err_interped) %>%
6884+
mutate(values = values / lengths) %>%
6885+
select(lengths, values) %>%
6886+
as.list()
6887+
class(err_new) <- 'rle'
6888+
err_new <- inverse.rle(err_new)
6889+
68026890
real_vals_new <- vals_new != 0
68036891
d_interp$val[real_vals_new] <- vals_new[real_vals_new]
6804-
errors::errors(d_interp$val) <- err_
6892+
errors::errors(d_interp$val) <- err_new
6893+
6894+
d_interp$ms_status[is.na(d_interp$ms_status)] = 0
68056895

68066896
return(d_interp)
68076897
}
@@ -6984,9 +7074,12 @@ synchronize_timestep <- function(d, prodname_ms_ = get('prodname_ms')){
69847074
d = sitevar_chunk,
69857075
interval = rounding_intervals[i])
69867076
} else { #precip
6987-
d_split[[i]] <- ms_nocb_mean_interpolate(
7077+
d_split[[i]] <- ms_zero_interpolate( #so far only needed for konza
69887078
d = sitevar_chunk,
69897079
interval = rounding_intervals[i])
7080+
# d_split[[i]] <- ms_nocb_mean_interpolate( #this might apply in some cases, but not yet.
7081+
# d = sitevar_chunk,
7082+
# interval = rounding_intervals[i])
69907083
}
69917084
}
69927085

@@ -11582,14 +11675,6 @@ approxjoin_datetime <- function(x,
1158211675
datetime_max = datetime_x + rollmax)]
1158311676
y[, `:=` (datetime_y_orig = datetime_y)] #datetime col will be dropped from y
1158411677

11585-
# if(indices_only){
11586-
# y_indices <- y[x,
11587-
# on = .(datetime_y <= datetime_max,
11588-
# datetime_y >= datetime_min),
11589-
# which = TRUE]
11590-
# return(y_indices)
11591-
# }
11592-
1159311678
#join x rows to y if y's datetime falls within the x range
1159411679
joined <- y[x, on = .(datetime_y <= datetime_max,
1159511680
datetime_y >= datetime_min)]
@@ -11598,8 +11683,9 @@ approxjoin_datetime <- function(x,
1159811683
#for any datetimes in x or y that were matched more than once, keep only
1159911684
#the nearest match
1160011685
joined[, `:=` (datetime_match_diff = abs(datetime_x - datetime_y_orig))]
11601-
joined <- joined[, .SD[which.min(datetime_match_diff)], by = datetime_x]
11602-
joined <- joined[, .SD[which.min(datetime_match_diff)], by = datetime_y_orig]
11686+
joined = joined[order(datetime_match_diff),
11687+
lapply(.SD, function(z) first(na.omit(z))),
11688+
by = datetime_x]
1160311689

1160411690
if(indices_only){
1160511691
y_indices <- which(y$datetime_y %in% joined$datetime_y_orig)

0 commit comments

Comments
 (0)