Skip to content

Commit 8201596

Browse files
committed
kincaid filter script
1 parent a2bee15 commit 8201596

2 files changed

Lines changed: 192 additions & 0 deletions

File tree

src/dev/misc/kincaid_filtering.R

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
2+
#filter sites for Dustin Kincaid's study
3+
#included site-vars must have:
4+
# - at least 3 years of data
5+
# - at least 20 paired C-Q obs
6+
# - Q that covers 50% of the observed range
7+
8+
library(tidyverse)
9+
library(feather)
10+
library(sf)
11+
library(lubridate)
12+
# library(ggplot2)
13+
# library(ggmap)
14+
# library(usmap)
15+
# library(leaflet)
16+
library(tmap)
17+
18+
setwd('~/git/macrosheds/data_acquisition/macrosheds_figshare_v1')
19+
20+
# conf <- jsonlite::fromJSON('../config.json',
21+
# simplifyDataFrame = FALSE)
22+
23+
#get chem, q, site data
24+
dd = list.dirs('2_timeseries_data')
25+
dd = grep('stream_chemistry', dd, value = TRUE)
26+
all_chem = tibble()
27+
for(d in dd){
28+
all_chem = map_dfr(list.files(d, full.names = T), read_feather) %>%
29+
bind_rows(all_chem)
30+
}
31+
32+
dd = list.dirs('2_timeseries_data')
33+
dd = grep('discharge', dd, value = TRUE)
34+
all_q = tibble()
35+
for(d in dd){
36+
all_q = map_dfr(list.files(d, full.names = T), read_feather) %>%
37+
bind_rows(all_q)
38+
}
39+
40+
sites = read_csv('0_documentation_and_metadata/04_site_documentation/04a_site_metadata.csv') %>%
41+
filter(! is.na(latitude), ! is.na(longitude), site_type == 'stream_gauge') %>%
42+
filter(! domain %in% c('mcmurdo', 'krycklan'))
43+
# st_as_sf(coords = c('longitude', 'latitude'), crs = 4326)
44+
45+
# gg_vars = c('GN_NH4_NH3_N', 'GN_NO3_NO2_N', 'GN_TN', 'GN_UTKN', 'GN_UTP',
46+
# 'GN_DON', 'GN_NH3_N', 'GN_NO3_N', 'GN_TPN', 'GN_TKN', 'GN_TDP',
47+
# 'GN_PO4_P', 'GN_TPP', 'GN_DOC', 'GN_NH4_N', 'GN_P', 'GN_TDN',
48+
# 'GN_TPC', 'GN_POC', 'GN_PON', 'IN_NO3_NO2_N', 'IN_TN', 'IN_TOC',
49+
# 'IN_TP', 'GN_NO2_N', 'GN_DOP', 'GN_TP', 'GN_TOC', 'GN_TIP',
50+
# 'GN_TIN', 'GN_SRP', 'GN_DIC', 'GN_UTN', 'IN_DIC', 'IN_DOC',
51+
# 'IN_DON', 'IN_NO3_N', 'IN_TDN', 'GN_CO2', 'GN_BPC', 'GN_BPN',
52+
# 'GN_CH4', 'GN_CO2_C', 'GN_CH4_C', 'GN_DIN', 'GN_NO2', 'GN_TIC',
53+
# 'IN_TIC', 'IN_NO2', 'IN_PO4_P', 'GN_TC')
54+
55+
ggvars2 = c('DOC', 'Al', 'Ca', 'Cl', 'DON', 'K', 'Mg', 'Na', 'NO3', 'NO3_N',
56+
'Si', 'SO4', 'SO4_S', 'NH3', 'NH3_N', 'PO4_P', 'SRP')
57+
58+
xx = read_csv('../../portal/data/general/catalog_files/all_variables.csv') %>%
59+
mutate(len = (as.Date(LastRecordUTC) - as.Date(FirstRecordUTC)) / 365) %>%
60+
filter(len >= 30,
61+
VariableCode == 'NO3_N')
62+
63+
chm = all_chem %>%
64+
mutate(var_ = macrosheds::ms_drop_var_prefix(var)) %>%
65+
filter(var_ %in% ggvars2)
66+
67+
qq = chm %>%
68+
group_by(site_code, var_) %>%
69+
summarize(nyr = length(unique(lubridate::year(datetime)))) %>%
70+
ungroup() %>%
71+
filter(nyr >= 30)
72+
73+
#filter by n years requiremend
74+
chm = right_join(chm, qq, by = c('site_code', 'var_'))
75+
76+
jn = left_join(chm, all_q, by = c('datetime', 'site_code'))
77+
78+
jn = jn %>%
79+
filter(! is.na(val.y)) %>%
80+
filter(! is.na(val.x))
81+
82+
nobs = jn %>%
83+
group_by(site_code, var_) %>%
84+
summarize(n = n(),
85+
nyr = unique(nyr)) %>%
86+
ungroup()
87+
88+
#filter by n paired obs requirement
89+
nobs = filter(nobs, `n` >= 20)
90+
91+
jnn = right_join(jn, nobs, by = c('site_code', 'var_'))
92+
93+
q_ranges = all_q %>%
94+
group_by(site_code) %>%
95+
summarize(obs_min = min(val, na.rm = TRUE),
96+
obs_max = max(val, na.rm = TRUE)) %>%
97+
ungroup()
98+
99+
q_ranges_sub = jnn %>%
100+
group_by(site_code) %>%
101+
summarize(obs_min = min(val.y, na.rm = TRUE),
102+
obs_max = max(val.y, na.rm = TRUE)) %>%
103+
ungroup()
104+
105+
qr = left_join(q_ranges_sub, q_ranges, by = c('site_code'))
106+
107+
qr$q50 = apply(qr[,4:5], 1, function(x) quantile(x, probs = 0.5))
108+
109+
qr = qr %>%
110+
filter(obs_max.x >= q50)
111+
112+
#filter to sites with paired q that's at least 50% of obs q range
113+
jnn = filter(jnn, site_code %in% qr$site_code)
114+
115+
#plot
116+
plt = jnn %>%
117+
group_by(site_code, var_) %>%
118+
summarize(nyr = unique(nyr.x),
119+
nobs = unique(n)) %>%
120+
ungroup() %>%
121+
left_join(select(sites, site_code, latitude, longitude)) %>%
122+
rename(n_years = nyr)
123+
124+
# plt = filter(plt, ! is.na(geometry))
125+
plt = filter(plt, ! is.na(latitude), ! is.na(longitude))
126+
plt = st_as_sf(plt, coords = c('longitude', 'latitude'), crs = 4326)
127+
128+
# states <- map_data("state")
129+
# usa = get_map("usa", source = "google", apikey = conf$gmaps_apikey)
130+
#
131+
# # Create map plot
132+
# register_google(key = conf$gmaps_apikey)
133+
# # ggmap(get_map("usa", source = "osm")) +
134+
# ggmap::ggmap(usa) +
135+
# # ggmap(get_map("usa", source = "stamen", maptype = 'toner')) +
136+
# # map_osm(xlim = c(-126, -66), ylim = c(24, 50), zoom = 4)
137+
# # plot_usmap(regions = "states") +
138+
# # ggplot() %>%
139+
# # map_data("usa")
140+
# geom_polygon(data = states, aes(x = long, y = lat, group = group), color = "black", fill = "white") +
141+
# geom_point(data = plt, aes(x = longitude, y = latitude, color = n_years)) +
142+
# scale_color_gradient(low = "darkblue", high = "lightblue") +
143+
# facet_wrap(~ var_)
144+
145+
# leaflet() %>%
146+
# setView(-96, 37.8, 4) %>%
147+
# addTiles() %>%
148+
149+
tmap_mode("plot")
150+
data(World)
151+
152+
# tm_shape(world, query = "NAME = 'United States of America'")
153+
# tm_shape(World) +
154+
# tm_borders("World", lwd = 0.4) +
155+
# tm_shape(subset = World$NAME == "United States of America") +
156+
# tm_borders("World", lwd = 0.8, col = "black")
157+
ex = sf::st_bbox(plt)
158+
159+
mp = tm_shape(World[World$name == "United States", ], bbox = ex) +
160+
tm_borders(lwd = 1, col = "gray20") +
161+
# tm_shape(World[World$NAME == "United States", ]) +
162+
tm_shape(plt) +
163+
# tm_dots(col = "red", size = 0.5,
164+
tm_dots(col = "n_years", size = 0.5,
165+
legend.is.portrait = FALSE,
166+
title = "", palette = "Blues") +
167+
tm_facets('var_') +
168+
tm_layout(outer.margins = c(0,0,0,0),
169+
legend.outside.position = "bottom",
170+
asp=4)
171+
172+
tmap_save(mp, filename="../plots/sitevars_for_cq.png",
173+
bg="white", dpi = 300)
174+
# bg="white", dpi = 300, width = 10, height = 8)
175+
176+
plt %>%
177+
as_tibble() %>%
178+
select(-geometry) %>%
179+
group_by(var_) %>%
180+
summarize(n_sites = length(unique(site_code))) %>%
181+
ungroup()

src/global/global_helpers.R

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10033,6 +10033,10 @@ postprocess_entire_dataset <- function(site_data,
1003310033
mutate(var = ifelse(var == 'lb_igbp_cloased_shrub', 'lb_igbp_closed_shrub', var),
1003410034
var = ifelse(var == 'lg_lncd_lichens', 'lg_nlcd_lichens', var)) %>%
1003510035
write_feather('../portal/data/general/biplot/year.feather')
10036+
read_feather('../portal/data/general/biplot/year.feather') %>%
10037+
filter(! (domain == 'usgs' & site_code %in% c('BARN', 'DRKR', 'GFCP', 'GFGB', 'GFGL', 'GFVN', 'MAWI', 'MCDN', 'POBR'))) %>%
10038+
filter(domain != 'neon') %>%
10039+
write_feather('../portal/data/general/biplot/year.feather')
1003610040

1003710041
#make feather versions of ws attr files (crude af, whatev)
1003810042
read_csv(file.path(fs_dir, '1_watershed_attribute_data/ws_attr_summaries.csv')) %>%
@@ -10286,6 +10290,13 @@ prepare_variable_metadata_for_figshare <- function(outfile, fs_format){
1028610290
filter(! grepl('_flux$', chem_category)) #TEMP: removing flux metadata
1028710291

1028810292
write_csv(ms_vars_ts, outfile_ts)
10293+
10294+
ms_vars_ts <- ms_vars %>%
10295+
select(variable_code, molecule, valence, flux_convertible) %>%
10296+
right_join(ms_vars_ts, by = 'variable_code') %>%
10297+
relocate(molecule, valence, .after = 'unit') %>%
10298+
relocate(flux_convertible, .after = 'last_record_utc')
10299+
1028910300
save(ms_vars_ts, file = '../r_package/data/ms_vars_ts.RData')
1029010301

1029110302
ms_vars_ws <- ms_vars %>%

0 commit comments

Comments
 (0)