Skip to content

Commit c8d9dae

Browse files
authored
Merge pull request #149 from terraref/gh121_synthesis_update
[WIP] begin updating synthesis vignette
2 parents d9df075 + 8b64249 commit c8d9dae

1 file changed

Lines changed: 95 additions & 77 deletions

File tree

vignettes/04-synthesis-data.Rmd

Lines changed: 95 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -9,34 +9,34 @@ The second analysis compares greenness from image data with canopy cover.
99
## Get and join data
1010

1111
Here we combine two dataframes.
12-
The first contains all the canopy height values for 2017, which was created in the traits vignette.
13-
The second is the cumulative growing degree days for all of 2017, which were calculated from the daily minimum and maximum temperatures in the weather vignette.
12+
The first contains all the canopy cover values for 2018, which was created in the traits vignette.
13+
The second is the cumulative growing degree days for all of 2018, which were calculated from the daily minimum and maximum temperatures in the weather vignette.
1414
They are combined by their common column, the date.
1515

16-
```{r synth_setup}
16+
```{r synth_setup, message=FALSE}
1717
library(dplyr)
1818
library(ggplot2)
1919
library(jsonlite)
2020
library(lubridate)
2121
library(traits)
22-
library(inflection)
2322
library(sf)
2423
library(stringr)
2524
options(betydb_url = "https://terraref.ncsa.illinois.edu/bety/",
26-
betydb_api_version = 'v1')
25+
betydb_api_version = 'beta',
26+
betydb_key = '9999999999999999999999999999999999999999')
2727
```
2828

2929
```{r get_trait_data, message = FALSE}
30-
trait_canopy_height <- betydb_query(table = "search",
31-
trait = "canopy_height",
32-
date = "~2017",
30+
trait_canopy_cover <- betydb_query(table = "search",
31+
trait = "canopy_cover",
32+
date = "~2018",
3333
limit = "none")
34-
trait_canopy_height_day = trait_canopy_height %>%
34+
trait_canopy_cover_day = trait_canopy_cover %>%
3535
mutate(day = as.Date(raw_date))
3636
```
3737

3838
```{r get_weather_data}
39-
weather <- fromJSON('https://terraref.ncsa.illinois.edu/clowder/api/geostreams/datapoints?stream_id=46431&since=2017-01-01&until=2017-12-31', flatten = FALSE)
39+
weather <- fromJSON('https://terraref.ncsa.illinois.edu/clowder/api/geostreams/datapoints?stream_id=46431&since=2018-01-01&until=2018-12-31', flatten = FALSE)
4040
weather <- weather$properties %>%
4141
mutate(time = ymd_hms(weather$end_time))
4242
daily_values = weather %>%
@@ -52,7 +52,7 @@ daily_values <- daily_values %>%
5252
```
5353

5454
```{r combine_trait_weather}
55-
trait_weather_df <- full_join(trait_canopy_height_day, daily_values, by = "day") %>%
55+
trait_weather_df <- full_join(trait_canopy_cover_day, daily_values, by = "day") %>%
5656
select(day, cultivar, mean, gdd_cum) %>%
5757
na.omit()
5858
```
@@ -61,22 +61,60 @@ trait_weather_df <- full_join(trait_canopy_height_day, daily_values, by = "day")
6161

6262
We are interested in how growing degree days affects canopy cover.
6363
To investigate this, we are going to model and plot their relationship.
64+
We are using a logistic growth model here because it is appropriate for the shape of the GDD-cover relationship.
65+
66+
The logistic growth model is specified as
67+
68+
$$y = \frac{c}{1+e^{a + b * \textrm{x}}}$$
69+
70+
where $y$ is the response variable canopy cover, $x$ is the predictor growing degree days, $c$ is the asymptote or maximum canopy cover, $a$ is the initial value for canopy cover, and $b$ is the steepness of the curve. (reference)
71+
6472
We want to know the relationship for each cultivar, so we'll start of by determining the parameters of the model for one of the cultivars in our dataset.
65-
We are using a logistic growth model here because it is appropriate for the shape of the GDD-cover relationship.
73+
We provide estimated values for the asymptote $c$ and initial canopy cover value $a$, and provide canopy cover $y$ with corresponding growing degree days $x$ for one measurement of the chosen cultivar.
74+
75+
The below provides better estimates for the $c$, $a$, and $b$ parameters, which are used to plot the model as an orange line on top of the black points which are actual values.
6676

6777
```{r model_get_parameters}
6878
single_cultivar <- trait_weather_df %>%
6979
filter(cultivar == "PI656026")
70-
cap <- 150
71-
initial <- 25
72-
mean <- single_cultivar$mean[15]
73-
gdd_cum <- single_cultivar$gdd_cum[15]
74-
rate <- ((log((cap/mean) - 1)) - initial)/gdd_cum
75-
model_single_cultivar <- nls(mean ~ cap / (1 + exp(initial + rate * gdd_cum)),
76-
start = list(cap = cap, initial = initial, rate = rate),
77-
data = single_cultivar, trace = TRUE)
80+
81+
c <- 90
82+
a <- 0.1
83+
y <- single_cultivar$mean[3]
84+
g <- single_cultivar$gdd_cum[3]
85+
b <- ((log((c/y) - 1)) - a)/g
86+
model_single_cultivar <- nls(mean ~ c / (1 + exp(a + b * gdd_cum)),
87+
start = list(c = c, a = a, b = b),
88+
data = single_cultivar)
89+
summary(model_single_cultivar)
90+
coef(model_single_cultivar)
91+
92+
single_c <- coef(model_single_cultivar)[1]
93+
single_a <- coef(model_single_cultivar)[2]
94+
single_b <- coef(model_single_cultivar)[3]
95+
7896
single_cultivar <- single_cultivar %>%
79-
mutate(mean_predict = coef(model_single_cultivar)[1] / (1 + exp(coef(model_single_cultivar)[2] + coef(model_single_cultivar)[3] * gdd_cum)))
97+
mutate(mean_predict = single_c / (1 + exp(single_a + single_b * gdd_cum)))
98+
ggplot(single_cultivar) +
99+
geom_point(aes(x = gdd_cum, y = mean)) +
100+
geom_line(aes(x = gdd_cum, y = mean_predict), color = "orange") +
101+
labs(x = "Cumulative growing degree days", y = "Canopy Height")
102+
```
103+
104+
We then calculate the inflection point for this cultivar's model.
105+
106+
The maximum growth rate is the change in canopy cover per day at the rate of maximum growth. The growing degree day at which maximum growth is obtained is called the _inflection point_. This occurs near the midpoint of the y-axis, or $\frac{c - a}{2}$.
107+
108+
```{r}
109+
inf_y <- (as.numeric(single_c) - as.numeric(single_a)) / 2
110+
inf_x <- ((log((as.numeric(single_c) / inf_y) - 1)) - as.numeric(single_a)) / as.numeric(single_b)
111+
112+
ggplot(single_cultivar) +
113+
geom_point(aes(x = gdd_cum, y = mean)) +
114+
geom_line(aes(x = gdd_cum, y = mean_predict), color = "orange") +
115+
geom_hline(yintercept = inf_y, linetype = "dashed") +
116+
geom_vline(xintercept = inf_x) +
117+
labs(x = "Cumulative growing degree days", y = "Canopy Height")
80118
```
81119

82120
We then use the parameters from a single cultivar to run a model for each of the rest of the cultivars.
@@ -85,24 +123,31 @@ We also calculated the inflection point from each cultivar's model, which will b
85123

86124
```{r model_all_cultivars}
87125
all_cultivars <- c(day = as.double(), cultivar = as.character(), mean = as.numeric(),
88-
gdd_cum = as.numeric(), mean_predict = as.numeric())
126+
gdd_cum = as.numeric(), mean_predict = as.numeric(),
127+
inf_y = as.numeric(), inf_x = as.numeric())
128+
89129
for(each_cultivar in unique(trait_weather_df$cultivar)){
90130
each_cultivar_df <- filter(trait_weather_df, cultivar == each_cultivar)
91-
each_cultivar_model <- nls(mean ~ cap / (1 + exp(initial + rate * gdd_cum)),
92-
start = list(cap = cap, initial = initial, rate = rate),
93-
data = each_cultivar_df)
94-
model_cap <- coef(each_cultivar_model)[1]
95-
model_initial <- coef(each_cultivar_model)[2]
96-
model_rate <- coef(each_cultivar_model)[3]
131+
each_cultivar_model <- nls(mean ~ c / (1 + exp(a + b * gdd_cum)),
132+
start = list(c = c, a = a, b = b),
133+
data = each_cultivar_df)
134+
model_c <- coef(each_cultivar_model)[1]
135+
model_a <- coef(each_cultivar_model)[2]
136+
model_b <- coef(each_cultivar_model)[3]
97137
each_cultivar_df <- each_cultivar_df %>%
98-
mutate(mean_predict = model_cap / (1 + exp(model_initial + model_rate * gdd_cum)),
99-
inf_point = ((log((model_cap / 100) - 1)) - model_initial) / model_rate)
138+
mutate(mean_predict = model_c / (1 + exp(model_a + model_b * gdd_cum)),
139+
inf_y = (as.numeric(model_c) - as.numeric(model_a)) / 2,
140+
inf_x = ((log((as.numeric(model_c) / inf_y) - 1)) -
141+
as.numeric(single_a)) / as.numeric(single_b))
100142
all_cultivars <- rbind(each_cultivar_df, all_cultivars)
101143
}
144+
102145
ggplot(all_cultivars) +
103146
geom_point(aes(x = gdd_cum, y = mean)) +
104147
geom_line(aes(x = gdd_cum, y = mean_predict), color = "orange") +
105148
facet_wrap(~cultivar, scales = "free_y") +
149+
geom_hline(yintercept = inf_y, linetype = "dashed") +
150+
geom_vline(xintercept = inf_x) +
106151
labs(x = "Cumulative growing degree days", y = "Canopy Height")
107152
```
108153

@@ -112,15 +157,9 @@ The last thing that we are going to do is assess the difference in this relation
112157
We are going to use the inflection point from the logistic growth model, which indicates when canopy cover stops increasing as quickly with increasingly more warm days.
113158
The resulting inflection points for each cultivar are plotted as a histogram.
114159

115-
```{r plot_inflections}
116-
ggplot(all_cultivars) +
117-
geom_point(aes(x = gdd_cum, y = mean)) +
118-
geom_line(aes(x = gdd_cum, y = mean_predict), color = "orange") +
119-
geom_vline(aes(xintercept = inf_point)) +
120-
facet_wrap(~cultivar, scales = "free_y") +
121-
labs(x = "Cumulative growing degree days", y = "Canopy Height")
122-
ggplot(data.frame(inf_points = unique(all_cultivars$inf_point))) +
123-
geom_histogram(aes(x = inf_points)) +
160+
```{r plot_inflections, warning=FALSE}
161+
ggplot(data.frame(inf_points = unique(all_cultivars$inf_x))) +
162+
geom_histogram(aes(x = inf_points), bins = 300) +
124163
xlim(min(all_cultivars$gdd_cum), max(all_cultivars$gdd_cum)) +
125164
labs(x = "Inflection points", y = "Number")
126165
```
@@ -129,15 +168,12 @@ ggplot(data.frame(inf_points = unique(all_cultivars$inf_point))) +
129168

130169
In this examnple we will extract our plot data from a series of images taken in May of Season 6, measure its "greeness" annd plot that against the plant heights from above in this vignette.
131170

132-
The chosen statistic here is the normalised green-red difference index, NGRDI=(R-G)/(R+G) (Rasmussen et al., 2016), which uses the red and green bands from the image raster.
171+
The chosen statistic here is the normalised green-red difference index, $\textrm{NGRDI}=\frac{R-G}/{R+G}$ (Rasmussen et al., 2016), which uses the red and green bands from the image raster.
133172

134173
Below we retrieve all the available plots for a particular date, then find and convert the plot boundary JSON into tuples.
135174
We will use these tuples to extract the data for our plot.
136175

137176
```{r get_plot_boundary}
138-
# Setting up our options
139-
options(betydb_url = "https://terraref.ncsa.illinois.edu/bety/",
140-
betydb_api_version = 'v1')
141177
142178
# Making the query for our site
143179
sites <- betydb_query(table = "sites",
@@ -154,43 +190,26 @@ site.clip <- as(site.poly,"Spatial")
154190

155191
These are the names of the full field RGB data for the month of May.
156192
We will be extracting our plot data from these files.
157-
A compressed file containing these images can be found on [Google Drive](https://drive.google.com/file/d/1UuVHHcyf9sxjX9fEUpD4qa9LGlBR0XnK/view?usp=sharing).
158-
Be sure to extract the image files into a folder that's accessible to the code below.
193+
A compressed file containing these images can be found on [Clowder](https://terraref.ncsa.illinois.edu/clowder/files/5c8175874f0c78f6486d6870?dataset=5c81709a4f0c78f6486d686c&space=).
194+
The code below downloads the image files into a .zip file, which takes a few minutes, and then unzips that file so the image files are accessible.
159195

160196
```{r synth_filename_array}
161-
image_files <-
162-
c('fullfield_L1_ua-mac_2018-05-01_rgb_stereovis_ir_sensors_fullfield_sorghum6_shade_may2018_thumb.tif',
163-
'fullfield_L1_ua-mac_2018-05-02_rgb_stereovis_ir_sensors_fullfield_sorghum6_shade_may2018_thumb.tif',
164-
'fullfield_L1_ua-mac_2018-05-03_rgb_stereovis_ir_sensors_fullfield_sorghum6_shade_may2018_thumb.tif',
165-
'fullfield_L1_ua-mac_2018-05-05_rgb_stereovis_ir_sensors_fullfield_sorghum6_shade_may2018_thumb.tif',
166-
'fullfield_L1_ua-mac_2018-05-06_rgb_stereovis_ir_sensors_fullfield_sorghum6_shade_may2018_thumb.tif',
167-
'fullfield_L1_ua-mac_2018-05-08_rgb_stereovis_ir_sensors_fullfield_sorghum6_shade_may2018_thumb.tif',
168-
'fullfield_L1_ua-mac_2018-05-09_rgb_stereovis_ir_sensors_fullfield_sorghum6_shade_may2018_thumb.tif',
169-
'fullfield_L1_ua-mac_2018-05-10_rgb_stereovis_ir_sensors_fullfield_sorghum6_sun_may2018_-_copy_thumb.tif',
170-
'fullfield_L1_ua-mac_2018-05-12_rgb_stereovis_ir_sensors_fullfield_sorghum6_shade_may2018_thumb.tif',
171-
'fullfield_L1_ua-mac_2018-05-13_rgb_stereovis_ir_sensors_fullfield_sorghum6_shade_may2018_thumb.tif',
172-
'fullfield_L1_ua-mac_2018-05-14_rgb_stereovis_ir_sensors_fullfield_sorghum6_shade_may2018_thumb.tif',
173-
'fullfield_L1_ua-mac_2018-05-15_rgb_stereovis_ir_sensors_fullfield_sorghum6_sun_may2018_thumb.tif',
174-
'fullfield_L1_ua-mac_2018-05-17_rgb_stereovis_ir_sensors_fullfield_sorghum6_shade_may2018_thumb.tif',
175-
'fullfield_L1_ua-mac_2018-05-18_rgb_stereovis_ir_sensors_fullfield_sorghum6_sun_may2018_thumb.tif',
176-
'fullfield_L1_ua-mac_2018-05-20_rgb_stereovis_ir_sensors_plots_sorghum6_shade_thumb.tif',
177-
'fullfield_L1_ua-mac_2018-05-21_rgb_stereovis_ir_sensors_fullfield_sorghum6_shade_may2018_thumb.tif',
178-
'fullfield_L1_ua-mac_2018-05-22_rgb_stereovis_ir_sensors_plots_sorghum6_sun_thumb.tif',
179-
'fullfield_L1_ua-mac_2018-05-23_rgb_stereovis_ir_sensors_plots_sorghum6_sun_thumb.tif',
180-
'fullfield_L1_ua-mac_2018-05-28_rgb_stereovis_ir_sensors_plots_sorghum6_shade_rgb_eastedge_mn_thumb.tif'
181-
)
182-
```
183-
184-
```{r, echo = F}
185-
image_files_paths <- file.path("vignettes/", image_files)
197+
if(!file.exists("rgb_images.zip")){
198+
download.file("https://terraref.ncsa.illinois.edu/clowder/files/5c8175874f0c78f6486d6870/blob", destfile = "rgb_images.zip")
199+
unzip("rgb_images.zip", exdir = ".")
200+
}
186201
```
187202

188203
We will loop through these images, extract our plot data, and calculate the "greeness" of each extract.
189204
We are using the name of the file to extract the date for later.
190205

191-
```{r synth_get_greeness}
206+
```{r synth_get_greeness, message=FALSE}
192207
library(raster)
193208
209+
# Get file paths for all image files
210+
image_files <- list.files(".", pattern = "*.tif")
211+
image_files_paths <- file.path(".", image_files)
212+
194213
# Extract the date from the file name
195214
getDate <- function(file_name){
196215
date <- str_match_all(file_name, '[0-9]{4}-[0-9]{2}-[0-9]{2}')[[1]][,1]
@@ -232,9 +251,9 @@ We then pull in the canopy data for our charting purposes.
232251

233252
```{r get_trait_data_2, message = FALSE}
234253
trait_canopy_cover <- betydb_query(table = "search",
235-
trait = "canopy_cover",
236-
date = "~2018 May",
237-
limit = "none")
254+
trait = "canopy_cover",
255+
date = "~2018 May",
256+
limit = "none")
238257
239258
trait_canopy_cover_day <- trait_canopy_cover %>%
240259
mutate(day = as.Date(raw_date))
@@ -245,15 +264,14 @@ We now need to add the height data to the data set to plot.
245264
We then determine the average canopy cover across the site for the day that the sensor data were collected.
246265
The relationship between our greenness metric and average canopy cover are plotted.
247266

248-
```{r plot_sensor_trait}
267+
```{r plot_sensor_trait, warning=FALSE}
249268
trait_canopy_cover_daily <- trait_canopy_cover_day %>%
250269
filter(day %in% greenness_df$day) %>%
251270
group_by(day) %>%
252271
summarise(mean_canopy_cover = mean(mean),
253272
sd_canopy_cover = sd(mean))
254273
sensor_trait_df <- left_join(trait_canopy_cover_daily, greenness_df, by = "day")
274+
255275
ggplot(sensor_trait_df, aes(x = mean_canopy_cover, y = greeness)) +
256276
geom_point()
257277
```
258-
259-

0 commit comments

Comments
 (0)