-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathtimeSeries.R
More file actions
62 lines (53 loc) · 2.6 KB
/
timeSeries.R
File metadata and controls
62 lines (53 loc) · 2.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# time series - missing entries
# Berry Boessenkool, June 2023 + Dec 2025, berry-b@gmx.de
# fake data:
values <- c("2023-07-26 11:20", "2023-08-02 19:40")
values <- strptime(values, "%F %H:%M")
values <- seq(values[1], values[2], by="20 min")
values <- data.frame(time=values)
values$A <- round(300+cumsum(rnorm(530)),1)
values$B <- round( 20*cumsum(rnorm(530)),0)
values <- values[-sample(1:530, 40), ] # fake gaps (not NA; not there at all)
# time differences:
values$diff <- c(0, diff(values$time))
View(values[order(values$diff,decreasing=TRUE),])
# Visualize gaps:
plot(diff~time, data=values, type="l", las=1)
plot(A~time, data=values, type="l", las=1)
points(A~time, data=values[values$diff>20,], col="red")
# completize time series:
allvalues <- seq(values$time[1], tail(values$time,1), by="20 min")
allvalues <- data.frame(time=allvalues)
allvalues <- merge(allvalues, values, all.x=TRUE)
allvalues$diff <- NULL # now obsolete (and wrong)
rm(values) # if no longer needed, remove this for a clean workspace
# to avoid accidentally accessing it later...
# impute missing values:
# in lieu of complex proper methods, here are some quick options:
if(!requireNamespace("zoo", quietly=TRUE)) install.packages("zoo")
allvalues$A_linear <- zoo::na.approx (allvalues$A) # interpolation
allvalues$A_spline <- zoo::na.spline (allvalues$A)
allvalues$A_last <- zoo::na.locf (allvalues$A)
allvalues$A_mean <- zoo::na.fill (allvalues$A, mean(allvalues$A,na.rm=TRUE))
allvalues$A_median <- zoo::na.aggregate(allvalues$A, FUN=median)
# na.aggregate does the same as na.fill here, with less code + more options
plot(A~time, data=head(allvalues,50), type="l", las=1)
lines(A_spline~time, data=head(allvalues,50), col="red")
lines(A_mean~time, data=head(allvalues,50), col="blue")
lines(A_linear~time, data=head(allvalues,50), col="orange")
lines(A~time, data=head(allvalues,50))
# Often, simple linear interpolation is the best of these options in my opinion.
# If anyhow possible, work with methods that can handle NAs and skip imputation.
# Only the black line is truth!
# In real life, if at all, more complex imputation may be needed.
# this might be a starting point:
zoo_A <- as.ts(zoo::read.zoo(allvalues)[1:40,"A"])
zoo_A_ts <- ts(zoo_A, frequency=72)
zoo_A_imp_ts <- zoo::na.StructTS(zoo_A_ts) # takes a few seconds, more with all rows
# Convert back to zoo with original index:
zoo_A_imp <- zoo::zoo(zoo_A_imp_ts, zoo::index(zoo_A))
plot(zoo_A_imp)
lines(zoo_A, col="purple", lwd=3)
# Please do not linearly interpolate daily rainfall over 3 months.
# Annual sums will be way too high.
# Looking at you, Austrian weather service...