Skip to content

Commit b5ca4ab

Browse files
committed
slicker formula, some comments
1 parent b3a009d commit b5ca4ab

1 file changed

Lines changed: 22 additions & 63 deletions

File tree

confIntVariance/R/main.R

Lines changed: 22 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,33 @@
1-
2-
productsOfDisjointTuples <- function(s) {
3-
n <- length(s)
4-
N <- matrix(0, ncol = n, nrow=min(n, 4))
5-
N[1, 1] <- s[1]
6-
for (i in seq(2, n)) {
7-
N[1, i] <- s[i] + N[1, i-1]
8-
for (j in seq(2, min(n, 4)))
9-
N[j, i] <- s[i] * N[j - 1, i-1] + N[j, i-1]
10-
}
11-
N[, n]
12-
}
13-
14-
standardEx <- c(2,2,4,6,2, 6,4,5,4,6)
15-
16-
lsesv <- function(x) {
17-
v <- var(x)
1+
# the standard example, not used
2+
standardEx <- c(2,2,4,6,2,6,4,5,4,6)
3+
4+
# least squares unbiased estimator of the square of the population variance
5+
lsepvs <- function(x) {
6+
m <- mean(x)
7+
# central second moment (caution: biased)
8+
c2 <- mean((x - m)^2)
9+
# central fourth moment (caution: biased)
10+
c4 <- mean((x - m)^4)
1811
n <- length(x)
19-
elSym <- productsOfDisjointTuples(x)
20-
# triple-checked formula
21-
(2/n/(n-1) * elSym[2]^2 - 4/n/(n-2) * elSym[1] * elSym[3] + 4/(n-2)/(n-3) * elSym[4])
12+
# least-squares unbiased estimator of the square of the population variance, contains a joint bias correction
13+
# equals the U-statistic but in a computationally efficient form
14+
# verified
15+
(1 + 1/2/(n-1) + 5/2/(n-2) + 9/2/(n-2)/(n-3)) * c2^2 - (1/(n-2) + 3/(n-2)/(n-3)) * c4
2216
}
2317

24-
lsesvExp <- function(x) {
25-
stopifnot(length(x)==6)
26-
p1 <- sum(x)
27-
p2 <- sum(x^2)
28-
p3 <- sum(x^3)
29-
p4 <- sum(x^4)
30-
(1/360)*p1^4 +(-1/30)*p1^2*p2 +(7/120)*p2^2 +(1/18)*p1*p3 +(-1/12)*p4
31-
}
3218

19+
# least square unbiased estimator of the variance of the usual unbiased sample variance
20+
# equals the U-statistic but in a computationally efficient form
3321
varianceOfSampleVariance <- function(x) {
3422
v <- var(x)
3523
n <- length(x)
36-
p1 <- productsOfDisjointTuples(x)
37-
p2 <- productsOfDisjointTuples(x^2)
38-
sv <- lsesv(x)
39-
# give the same value, up to numerical inaccuracies
40-
k1 <- var(x)^2 - p2[2] / choose(n, 2) + 4/n/(n-1)/(n-2)*(p1[3]*sum(x) - 4 * p1[4]) - p1[4]/choose(n, 4)
41-
k2 <- var(x)^2 - sv
42-
k1
24+
# expectation of square minus square of expectation, and analogously for the estimators
25+
# the first term estimates its expectation, the second term the square of the expectation of the unbiased sample variance, i.e., the square of the population variance
26+
var(x)^2 - lsepvs2(x)
4327
}
4428

45-
46-
x <- (1:6)^3
47-
print(
48-
mean(
49-
apply(
50-
combn(6, 4),
51-
2,
52-
function(col) {
53-
mean(
54-
c(
55-
(x[col[1]] - x[col[2]])^2 * (x[col[3]] - x[col[4]])^2,
56-
(x[col[1]] - x[col[3]])^2 * (x[col[2]] - x[col[4]])^2,
57-
(x[col[1]] - x[col[4]])^2 * (x[col[2]] - x[col[3]])^2
58-
)
59-
)
60-
}
61-
)
62-
)
63-
/4
64-
)
65-
66-
67-
68-
69-
70-
71-
# the confidence interval for the variance
29+
# the confidence interval for the population variance around the usual unbiased sample variance
30+
# using as standard deviation the square of the estimated variance of the usual unbiased sample variance, as estimated in the preceding function
7231
varwci <- function(x, conf.level=0.95) {
7332
if (is.data.frame(x)) {
7433
stopifnot(dim(x)[2] == 1)
@@ -79,7 +38,7 @@ varwci <- function(x, conf.level=0.95) {
7938
stopifnot(n>=4)
8039
varsv <- varianceOfSampleVariance(x)
8140
if (varsv < 0) {
82-
warning("Sample size too small for estimation of the variance of the sample variance")
41+
warning("Sample size too small for estimation of the variance of the sample variance. Please use a larger sample.")
8342
r <- c(NA, NA)
8443
} else {
8544
t <- qt((1+conf.level)/2, df=n-1)

0 commit comments

Comments
 (0)