|
| 1 | + |
| 2 | +#filter sites for Dustin Kincaid's study |
| 3 | +#included site-vars must have: |
| 4 | +# - at least 3 years of data |
| 5 | +# - at least 20 paired C-Q obs |
| 6 | +# - Q that covers 50% of the observed range |
| 7 | + |
| 8 | +library(tidyverse) |
| 9 | +library(feather) |
| 10 | +library(sf) |
| 11 | +library(lubridate) |
| 12 | +# library(ggplot2) |
| 13 | +# library(ggmap) |
| 14 | +# library(usmap) |
| 15 | +# library(leaflet) |
| 16 | +library(tmap) |
| 17 | + |
| 18 | +setwd('~/git/macrosheds/data_acquisition/macrosheds_figshare_v1') |
| 19 | + |
| 20 | +# conf <- jsonlite::fromJSON('../config.json', |
| 21 | +# simplifyDataFrame = FALSE) |
| 22 | + |
| 23 | +#get chem, q, site data |
| 24 | +dd = list.dirs('2_timeseries_data') |
| 25 | +dd = grep('stream_chemistry', dd, value = TRUE) |
| 26 | +all_chem = tibble() |
| 27 | +for(d in dd){ |
| 28 | + all_chem = map_dfr(list.files(d, full.names = T), read_feather) %>% |
| 29 | + bind_rows(all_chem) |
| 30 | +} |
| 31 | + |
| 32 | +dd = list.dirs('2_timeseries_data') |
| 33 | +dd = grep('discharge', dd, value = TRUE) |
| 34 | +all_q = tibble() |
| 35 | +for(d in dd){ |
| 36 | + all_q = map_dfr(list.files(d, full.names = T), read_feather) %>% |
| 37 | + bind_rows(all_q) |
| 38 | +} |
| 39 | + |
| 40 | +sites = read_csv('0_documentation_and_metadata/04_site_documentation/04a_site_metadata.csv') %>% |
| 41 | + filter(! is.na(latitude), ! is.na(longitude), site_type == 'stream_gauge') %>% |
| 42 | + filter(! domain %in% c('mcmurdo', 'krycklan')) |
| 43 | + # st_as_sf(coords = c('longitude', 'latitude'), crs = 4326) |
| 44 | + |
| 45 | +# gg_vars = c('GN_NH4_NH3_N', 'GN_NO3_NO2_N', 'GN_TN', 'GN_UTKN', 'GN_UTP', |
| 46 | +# 'GN_DON', 'GN_NH3_N', 'GN_NO3_N', 'GN_TPN', 'GN_TKN', 'GN_TDP', |
| 47 | +# 'GN_PO4_P', 'GN_TPP', 'GN_DOC', 'GN_NH4_N', 'GN_P', 'GN_TDN', |
| 48 | +# 'GN_TPC', 'GN_POC', 'GN_PON', 'IN_NO3_NO2_N', 'IN_TN', 'IN_TOC', |
| 49 | +# 'IN_TP', 'GN_NO2_N', 'GN_DOP', 'GN_TP', 'GN_TOC', 'GN_TIP', |
| 50 | +# 'GN_TIN', 'GN_SRP', 'GN_DIC', 'GN_UTN', 'IN_DIC', 'IN_DOC', |
| 51 | +# 'IN_DON', 'IN_NO3_N', 'IN_TDN', 'GN_CO2', 'GN_BPC', 'GN_BPN', |
| 52 | +# 'GN_CH4', 'GN_CO2_C', 'GN_CH4_C', 'GN_DIN', 'GN_NO2', 'GN_TIC', |
| 53 | +# 'IN_TIC', 'IN_NO2', 'IN_PO4_P', 'GN_TC') |
| 54 | + |
| 55 | +ggvars2 = c('DOC', 'Al', 'Ca', 'Cl', 'DON', 'K', 'Mg', 'Na', 'NO3', 'NO3_N', |
| 56 | + 'Si', 'SO4', 'SO4_S', 'NH3', 'NH3_N', 'PO4_P', 'SRP') |
| 57 | + |
| 58 | +xx = read_csv('../../portal/data/general/catalog_files/all_variables.csv') %>% |
| 59 | + mutate(len = (as.Date(LastRecordUTC) - as.Date(FirstRecordUTC)) / 365) %>% |
| 60 | + filter(len >= 30, |
| 61 | + VariableCode == 'NO3_N') |
| 62 | + |
| 63 | +chm = all_chem %>% |
| 64 | + mutate(var_ = macrosheds::ms_drop_var_prefix(var)) %>% |
| 65 | + filter(var_ %in% ggvars2) |
| 66 | + |
| 67 | +qq = chm %>% |
| 68 | + group_by(site_code, var_) %>% |
| 69 | + summarize(nyr = length(unique(lubridate::year(datetime)))) %>% |
| 70 | + ungroup() %>% |
| 71 | + filter(nyr >= 30) |
| 72 | + |
| 73 | +#filter by n years requiremend |
| 74 | +chm = right_join(chm, qq, by = c('site_code', 'var_')) |
| 75 | + |
| 76 | +jn = left_join(chm, all_q, by = c('datetime', 'site_code')) |
| 77 | + |
| 78 | +jn = jn %>% |
| 79 | + filter(! is.na(val.y)) %>% |
| 80 | + filter(! is.na(val.x)) |
| 81 | + |
| 82 | +nobs = jn %>% |
| 83 | + group_by(site_code, var_) %>% |
| 84 | + summarize(n = n(), |
| 85 | + nyr = unique(nyr)) %>% |
| 86 | + ungroup() |
| 87 | + |
| 88 | +#filter by n paired obs requirement |
| 89 | +nobs = filter(nobs, `n` >= 20) |
| 90 | + |
| 91 | +jnn = right_join(jn, nobs, by = c('site_code', 'var_')) |
| 92 | + |
| 93 | +q_ranges = all_q %>% |
| 94 | + group_by(site_code) %>% |
| 95 | + summarize(obs_min = min(val, na.rm = TRUE), |
| 96 | + obs_max = max(val, na.rm = TRUE)) %>% |
| 97 | + ungroup() |
| 98 | + |
| 99 | +q_ranges_sub = jnn %>% |
| 100 | + group_by(site_code) %>% |
| 101 | + summarize(obs_min = min(val.y, na.rm = TRUE), |
| 102 | + obs_max = max(val.y, na.rm = TRUE)) %>% |
| 103 | + ungroup() |
| 104 | + |
| 105 | +qr = left_join(q_ranges_sub, q_ranges, by = c('site_code')) |
| 106 | + |
| 107 | +qr$q50 = apply(qr[,4:5], 1, function(x) quantile(x, probs = 0.5)) |
| 108 | + |
| 109 | +qr = qr %>% |
| 110 | + filter(obs_max.x >= q50) |
| 111 | + |
| 112 | +#filter to sites with paired q that's at least 50% of obs q range |
| 113 | +jnn = filter(jnn, site_code %in% qr$site_code) |
| 114 | + |
| 115 | +#plot |
| 116 | +plt = jnn %>% |
| 117 | + group_by(site_code, var_) %>% |
| 118 | + summarize(nyr = unique(nyr.x), |
| 119 | + nobs = unique(n)) %>% |
| 120 | + ungroup() %>% |
| 121 | + left_join(select(sites, site_code, latitude, longitude)) %>% |
| 122 | + rename(n_years = nyr) |
| 123 | + |
| 124 | +# plt = filter(plt, ! is.na(geometry)) |
| 125 | +plt = filter(plt, ! is.na(latitude), ! is.na(longitude)) |
| 126 | +plt = st_as_sf(plt, coords = c('longitude', 'latitude'), crs = 4326) |
| 127 | + |
| 128 | +# states <- map_data("state") |
| 129 | +# usa = get_map("usa", source = "google", apikey = conf$gmaps_apikey) |
| 130 | +# |
| 131 | +# # Create map plot |
| 132 | +# register_google(key = conf$gmaps_apikey) |
| 133 | +# # ggmap(get_map("usa", source = "osm")) + |
| 134 | +# ggmap::ggmap(usa) + |
| 135 | +# # ggmap(get_map("usa", source = "stamen", maptype = 'toner')) + |
| 136 | +# # map_osm(xlim = c(-126, -66), ylim = c(24, 50), zoom = 4) |
| 137 | +# # plot_usmap(regions = "states") + |
| 138 | +# # ggplot() %>% |
| 139 | +# # map_data("usa") |
| 140 | +# geom_polygon(data = states, aes(x = long, y = lat, group = group), color = "black", fill = "white") + |
| 141 | +# geom_point(data = plt, aes(x = longitude, y = latitude, color = n_years)) + |
| 142 | +# scale_color_gradient(low = "darkblue", high = "lightblue") + |
| 143 | +# facet_wrap(~ var_) |
| 144 | + |
| 145 | +# leaflet() %>% |
| 146 | +# setView(-96, 37.8, 4) %>% |
| 147 | +# addTiles() %>% |
| 148 | + |
| 149 | +tmap_mode("plot") |
| 150 | +data(World) |
| 151 | + |
| 152 | +# tm_shape(world, query = "NAME = 'United States of America'") |
| 153 | +# tm_shape(World) + |
| 154 | +# tm_borders("World", lwd = 0.4) + |
| 155 | +# tm_shape(subset = World$NAME == "United States of America") + |
| 156 | +# tm_borders("World", lwd = 0.8, col = "black") |
| 157 | +ex = sf::st_bbox(plt) |
| 158 | + |
| 159 | +mp = tm_shape(World[World$name == "United States", ], bbox = ex) + |
| 160 | + tm_borders(lwd = 1, col = "gray20") + |
| 161 | + # tm_shape(World[World$NAME == "United States", ]) + |
| 162 | + tm_shape(plt) + |
| 163 | + # tm_dots(col = "red", size = 0.5, |
| 164 | + tm_dots(col = "n_years", size = 0.5, |
| 165 | + legend.is.portrait = FALSE, |
| 166 | + title = "", palette = "Blues") + |
| 167 | + tm_facets('var_') + |
| 168 | + tm_layout(outer.margins = c(0,0,0,0), |
| 169 | + legend.outside.position = "bottom", |
| 170 | + asp=4) |
| 171 | + |
| 172 | +tmap_save(mp, filename="../plots/sitevars_for_cq.png", |
| 173 | + bg="white", dpi = 300) |
| 174 | + # bg="white", dpi = 300, width = 10, height = 8) |
| 175 | + |
| 176 | +plt %>% |
| 177 | + as_tibble() %>% |
| 178 | + select(-geometry) %>% |
| 179 | + group_by(var_) %>% |
| 180 | + summarize(n_sites = length(unique(site_code))) %>% |
| 181 | + ungroup() |
0 commit comments