@@ -72,31 +72,33 @@ We provide @tbl-zcompare partly as a mechanism for understanding $z$-procedures
7272#| label: tbl-zcompare
7373#| tbl-cap: |
7474#| Similarities of z-methods across one sample and two independent samples
75- #| analysis of a binary response variable. $p$ represents the population
76- #| proportion, $\hat{p}$ represents the sample proportion, $p_0$ represents
77- #| the null hypothesized proportion, $\hat{p}_{pool}$ represents the pooled
78- #| proportion, and $n$ represents the sample size. The subscripts of 1 and 2
75+ #| analysis of a binary response variable. $p$ represents the population
76+ #| proportion, $\hat{p}$ represents the sample proportion, $p_0$ represents
77+ #| the null hypothesized proportion, $\hat{p}_{pool}$ represents the pooled
78+ #| proportion, and $n$ represents the sample size. The subscripts of 1 and 2
7979#| indicate that the values are measured separately for samples 1 and 2.
8080#| tbl-pos: H
8181zsim_table <- tribble(
82- ~variable, ~col1, ~col2,
83- "Response variable", "Binary", "Binary",
84- "Parameter of interest", "Proportion: $p$", "Difference in proportions: $p_1 - p_2$",
85- "Statistic of interest", "Proportion: $\\widehat{p}$", "Difference in proportions: $\\widehat{p}_1 - \\widehat{p}_2$",
86- "Standard error: HT", "$\\sqrt{\\frac{p_0(1-p_0)}{n}}$", "$\\sqrt{\\widehat{p}_{pool}\\bigg(1-\\widehat{p}_{pool}\\bigg)\\bigg(\\frac{1}{n_1} + \\frac{1}{n_2}}\\bigg)$",
87- "Standard error: CI", "$\\sqrt{\\frac{\\widehat{p}(1-\\widehat{p})}{n}}$", "$\\sqrt{\\frac{\\widehat{p}_{1}(1-\\widehat{p}_{1})}{n_1} + \\frac{\\widehat{p}_{2}(1-\\widehat{p}_{2})}{n_2}}$",
88- "Conditions", "1. Independence, 2. Success-failure", "1. Independence, 2. Success-failure"
82+ ~variable , ~col1 , ~col2 ,
83+ "Response variable" , "Binary" , "Binary" ,
84+ "Parameter of interest" , "Proportion: $p$" , "Difference in proportions: $p_1 - p_2$" ,
85+ "Statistic of interest" , "Proportion: $\\widehat{p}$" , "Difference in proportions: $\\widehat{p}_1 - \\widehat{p}_2$" ,
86+ "Standard error: HT" , "$\\sqrt{\\frac{p_0(1-p_0)}{n}}$" , "$\\sqrt{\\widehat{p}_{pool}\\bigg(1-\\widehat{p}_{pool}\\bigg)\\bigg(\\frac{1}{n_1} + \\frac{1}{n_2}}\\bigg)$" ,
87+ "Standard error: CI" , "$\\sqrt{\\frac{\\widehat{p}(1-\\widehat{p})}{n}}$" , "$\\sqrt{\\frac{\\widehat{p}_{1}(1-\\widehat{p}_{1})}{n_1} + \\frac{\\widehat{p}_{2}(1-\\widehat{p}_{2})}{n_2}}$" ,
88+ "Conditions" , "1. Independence, 2. Success-failure" , "1. Independence, 2. Success-failure"
8989)
9090
9191zsim_table |>
9292 kbl(
93- booktabs = TRUE, linesep = "\\addlinespace",
94- format = "latex", escape = FALSE,
93+ booktabs = TRUE,
94+ linesep = "\\addlinespace",
95+ escape = FALSE,
9596 col.names = c("", "One sample ", "Two independent samples")
9697 ) |>
9798 kable_styling(
9899 bootstrap_options = c("striped", "condensed"),
99- latex_options = c("striped"), full_width = TRUE
100+ latex_options = c("striped"),
101+ full_width = TRUE
100102 ) |>
101103 column_spec(1, width = "10em")
102104```
@@ -142,24 +144,31 @@ We provide @tbl-tcompare partly as a mechanism for understanding $t$-procedures
142144#| values are measured separately on sample $1$ and sample $2$.
143145#| tbl-pos: H
144146tsim_table <- tribble(
145- ~variable, ~col1, ~col2, ~col3,
146- "Response variable", "Numeric", "Numeric", "Numeric",
147- "Parameter of interest", "Mean: $\\mu$", "Paired mean: $\\mu_{diff}$", "Difference in means: $\\mu_1 - \\mu_2$",
148- "Statistic of interest", "Mean: $\\bar{x}$", "Paired mean: $\\bar{x}_{diff}$", "Difference in means: $\\bar{x}_1 - \\bar{x}_2$",
149- "Standard error", "$\\frac{s}{\\sqrt{n}}$", "$\\frac{s_{diff}}{\\sqrt{n_{diff}}}$", "$\\sqrt{\\frac{s_1^2}{n_1} + \\frac{s_2^2}{n_2}}$",
150- "Degrees of freedom", "$n-1$", "$n_{diff} -1$", "$\\min(n_1 -1, n_2 - 1)$",
151- "Conditions", "1. Independence, 2. Normality or large samples", "1. Independence, 2. Normality or large samples", "1. Independence, 2. Normality or large samples"
147+ ~variable , ~col1 , ~col2 , ~col3 ,
148+ "Response variable" , "Numeric" , "Numeric" , "Numeric" ,
149+ "Parameter of interest" , "Mean: $\\mu$" , "Paired mean: $\\mu_{diff}$" , "Difference in means: $\\mu_1 - \\mu_2$" ,
150+ "Statistic of interest" , "Mean: $\\bar{x}$" , "Paired mean: $\\bar{x}_{diff}$" , "Difference in means: $\\bar{x}_1 - \\bar{x}_2$" ,
151+ "Standard error" , "$\\frac{s}{\\sqrt{n}}$" , "$\\frac{s_{diff}}{\\sqrt{n_{diff}}}$" , "$\\sqrt{\\frac{s_1^2}{n_1} + \\frac{s_2^2}{n_2}}$" ,
152+ "Degrees of freedom" , "$n-1$" , "$n_{diff} -1$" , "$\\min(n_1 -1, n_2 - 1)$" ,
153+ "Conditions" , "1. Independence, 2. Normality or large samples" , "1. Independence, 2. Normality or large samples" , "1. Independence, 2. Normality or large samples"
152154)
153155
154156tsim_table |>
155157 kbl(
156- linesep = "\\addlinespace", booktabs = TRUE,
157- col.names = c("", "One sample ", "Paired sample", "Two independent samples"),
158- escape = FALSE, format = "latex"
158+ linesep = "\\addlinespace",
159+ booktabs = TRUE,
160+ col.names = c(
161+ "",
162+ "One sample ",
163+ "Paired sample",
164+ "Two independent samples"
165+ ),
166+ escape = FALSE
159167 ) |>
160168 kable_styling(
161169 bootstrap_options = c("striped", "condensed"),
162- latex_options = c("striped"), full_width = TRUE
170+ latex_options = c("striped"),
171+ full_width = TRUE
163172 ) |>
164173 column_spec(1, width = "10em")
165174```
@@ -206,10 +215,10 @@ Does your answer change?
206215#| - The circled triangle is the only triangle.
207216#| - The circled triangle is the only blue triangle.
208217#| fig-alt: |
209- #| Four shapes are presented twice. In the first set the shapes and colors are
210- #| all different -- pink circle, yellow square, red diamond, blue triangle.
218+ #| Four shapes are presented twice. In the first set the shapes and colors are
219+ #| all different -- pink circle, yellow square, red diamond, blue triangle.
211220#| In the second set the colors are all different but the triangle shape is
212- #| repeated -- pink circle, yellow square, red triangle, blue triangle.
221+ #| repeated -- pink circle, yellow square, red triangle, blue triangle.
213222#| In each set, the blue triangle is circled.
214223#| fig-asp: 0.15
215224shape_names <- c(
@@ -228,10 +237,15 @@ shapes <- data.frame(
228237 figure = c(rep(1, 4), rep(2, 4)),
229238 x = rep(1:4, 2),
230239 y = 1,
231- color = rep(c(
232- IMSCOL["pink", "full"], IMSCOL["yellow", "full"],
233- IMSCOL["red", "full"], IMSCOL["blue", "full"]
234- ), 2)
240+ color = rep(
241+ c(
242+ IMSCOL["pink", "full"],
243+ IMSCOL["yellow", "full"],
244+ IMSCOL["red", "full"],
245+ IMSCOL["blue", "full"]
246+ ),
247+ 2
248+ )
235249)
236250
237251ggplot(shapes |> filter(figure == 1), aes(x, y)) +
@@ -241,7 +255,14 @@ ggplot(shapes |> filter(figure == 1), aes(x, y)) +
241255 scale_fill_identity() +
242256 theme_void() +
243257 expand_limits(x = c(0.5, 4.5)) +
244- annotate("point", x = 4, y = 1, shape = "circle open", color = "black", size = 40)
258+ annotate(
259+ "point",
260+ x = 4,
261+ y = 1,
262+ shape = "circle open",
263+ color = "black",
264+ size = 40
265+ )
245266
246267ggplot(shapes |> filter(figure == 2), aes(x, y)) +
247268 geom_point(aes(shape = shape_names, color = color, fill = color), size = 20) +
@@ -250,7 +271,14 @@ ggplot(shapes |> filter(figure == 2), aes(x, y)) +
250271 scale_fill_identity() +
251272 theme_void() +
252273 expand_limits(x = c(0.5, 4.5)) +
253- annotate("point", x = 4, y = 1, shape = "circle open", color = "black", size = 40)
274+ annotate(
275+ "point",
276+ x = 4,
277+ y = 1,
278+ shape = "circle open",
279+ color = "black",
280+ size = 40
281+ )
254282```
255283
256284In @fig-blue-triangle-shapes-1 the circled item is the only triangle, but in the bottom image the circled item is one of two triangles.
@@ -318,7 +346,8 @@ If the variable had been "success or failure" (e.g., "used redundant or didn't")
318346redundant |>
319347 slice_head(n = 6) |>
320348 kbl(
321- linesep = "", booktabs = TRUE,
349+ linesep = "",
350+ booktabs = TRUE,
322351 align = "lrrrr"
323352 ) |>
324353 kable_styling(
@@ -361,7 +390,7 @@ redundant_summary |>
361390 geom_col(position = "dodge") +
362391 geom_text(
363392 aes(
364- y = c(0.75, 1.75, 1.25, 2.25),
393+ y = c(0.75, 1.75, 1.25, 2.25),
365394 x = mean_redundant_perc - c(5, 5, -5, 5),
366395 label = paste(round(mean_redundant_perc, 2), "%")
367396 ),
@@ -426,20 +455,28 @@ Note that this interval for the true population parameter is only valid if we ca
426455boot_eng_4 |>
427456 ggplot(aes(x = stat)) +
428457 geom_histogram(binwidth = 5, fill = IMSCOL["green", "full"]) +
429- annotate("line",
458+ annotate(
459+ "line",
430460 x = c(ci_eng_4_lower, ci_eng_4_lower),
431461 y = c(0, 250),
432- color = IMSCOL["green", "f2"], size = 1
462+ color = IMSCOL["green", "f2"],
463+ size = 1
433464 ) +
434- annotate("line",
465+ annotate(
466+ "line",
435467 x = c(ci_eng_4_upper, ci_eng_4_upper),
436468 y = c(0, 250),
437- color = IMSCOL["green", "f2"], size = 1
469+ color = IMSCOL["green", "f2"],
470+ size = 1
438471 ) +
439- annotate("rect",
440- xmin = ci_eng_4_lower, xmax = ci_eng_4_upper,
441- ymin = 0, ymax = 250,
442- alpha = 0.3, fill = IMSCOL["green", "full"]
472+ annotate(
473+ "rect",
474+ xmin = ci_eng_4_lower,
475+ xmax = ci_eng_4_upper,
476+ ymin = 0,
477+ ymax = 250,
478+ alpha = 0.3,
479+ fill = IMSCOL["green", "full"]
443480 ) +
444481 labs(
445482 x = "Mean redundant adjective usage percentage",
@@ -460,7 +497,9 @@ redundant_paired <- redundant |>
460497 filter(language == "English") |>
461498 select(-language, -n_questions) |>
462499 pivot_wider(
463- id_cols = subject, names_from = items, names_prefix = "redundant_perc_",
500+ id_cols = subject,
501+ names_from = items,
502+ names_prefix = "redundant_perc_",
464503 values_from = redundant_perc
465504 ) |>
466505 mutate(diff_redundant_perc = redundant_perc_16 - redundant_perc_4)
@@ -485,7 +524,8 @@ Although the redundancy percentages seem higher in the 16 item task, a hypothesi
485524redundant_paired |>
486525 slice_head(n = 6) |>
487526 kbl(
488- linesep = "", booktabs = TRUE,
527+ linesep = "",
528+ booktabs = TRUE,
489529 align = "lrrrr"
490530 ) |>
491531 kable_styling(
@@ -541,13 +581,15 @@ null_eng |>
541581 "line",
542582 x = c(obs_stat_eng, obs_stat_eng),
543583 y = c(0, 200),
544- color = IMSCOL["red", "full"], size = 1
584+ color = IMSCOL["red", "full"],
585+ size = 1
545586 ) +
546587 annotate(
547588 "line",
548589 x = c(-obs_stat_eng, -obs_stat_eng),
549590 y = c(0, 200),
550- color = IMSCOL["red", "full"], size = 1,
591+ color = IMSCOL["red", "full"],
592+ size = 1,
551593 linetype = "dashed"
552594 ) +
553595 labs(
@@ -621,15 +663,15 @@ The p-value for the 4 item display comparison is very small (`r pval_4`) while t
621663#| usage percentage between English and Spanish speakers. In each
622664#| plot, the observed differences in the sample (solid line) and the
623665#| differences in the other direction (dashed line) are overlaid.
624- #| fig-subcap:
666+ #| fig-subcap:
625667#| - The differences in 4 item displays.
626668#| - The differences in 16 item displays.
627669#| fig-alt: |
628670#| Distributions of 1,000 differences in randomized means of redundant adjective
629- #| usage percentage between English and Spanish speakers. Plot A shows the
671+ #| usage percentage between English and Spanish speakers. Plot A shows the
630672#| differences in 4 item displays and Plot B shows the differences in 16 item
631- #| displays. In each plot, the observed differences in the sample (solid line)
632- #| and the differences in the other direction (dashed line) are overlaid. In
673+ #| displays. In each plot, the observed differences in the sample (solid line)
674+ #| and the differences in the other direction (dashed line) are overlaid. In
633675#| the 4 item display the observed value is not seen as a potential observation
634676#| from the randomized mean distribution. In the 16 item display, the observed
635677#| value is a possible value on the randomized mean distribution, but it still
@@ -642,13 +684,15 @@ null_4 |>
642684 "line",
643685 x = c(obs_stat_4, obs_stat_4),
644686 y = c(0, 300),
645- color = IMSCOL["red", "full"], size = 1
687+ color = IMSCOL["red", "full"],
688+ size = 1
646689 ) +
647690 annotate(
648691 "line",
649692 x = -1 * c(obs_stat_4, obs_stat_4),
650693 y = c(0, 300),
651- color = IMSCOL["red", "full"], size = 1,
694+ color = IMSCOL["red", "full"],
695+ size = 1,
652696 linetype = "dashed"
653697 ) +
654698 labs(
@@ -666,13 +710,15 @@ null_16 |>
666710 "line",
667711 x = c(obs_stat_16, obs_stat_16),
668712 y = c(0, 200),
669- color = IMSCOL["red", "full"], size = 1
713+ color = IMSCOL["red", "full"],
714+ size = 1
670715 ) +
671716 annotate(
672717 "line",
673718 x = -1 * c(obs_stat_16, obs_stat_16),
674719 y = c(0, 200),
675- color = IMSCOL["red", "full"], size = 1,
720+ color = IMSCOL["red", "full"],
721+ size = 1,
676722 linetype = "dashed"
677723 ) +
678724 labs(
0 commit comments