Skip to content

Commit 6b56bf1

Browse files
committed
feat(data): integrate dynamic behavioral attrs into epigamesDiffNet (Issue #75)
- Add data-raw/epigames.R: bundles epigames_hourly + dynamic_attrs_hourly.csv into epigames list with new $dyn_attrs slot (long format, 201,366 rows) - Add data-raw/epigamesDiffNet.R: collapses hourly attrs to 15 daily windows, populates vertex.dyn.attrs with mask/med/quarantine proportions per day - Regenerate data/epigames.rda and data/epigamesDiffNet.rda Dynamic attributes (mask, med, quarantine) now visible in print(epigamesDiffNet): Dynamic attributes: mask, med, quarantine (3) Validated: exposure(epigamesDiffNet, attrs = 'mask') works with time-varying data. Correlation with static proxy = 0.88, confirming dynamic attrs capture additional temporal variation.
1 parent a0ef61d commit 6b56bf1

4 files changed

Lines changed: 158 additions & 25 deletions

File tree

data-raw/epigames.R

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,68 @@
11
# data-raw/epigames.R
22
# Pre-processing script for the EpiGames Raw Dataset
3+
# Issue #75: Extended to include dynamic behavioral attributes (mask, med, quarantine)
4+
#
5+
# Prerequisites:
6+
# 1. Run playground/epigames-stuff/epigames-analysis-copy/extract_dynamic_attrs.py
7+
# to generate dynamic_attrs_hourly.csv in that same folder.
8+
# 2. Have data-raw/epigames_hourly.rda available (generated by the previous pipeline).
9+
#
10+
# This script is run from the package root directory.
311

412
rm(list = ls())
513

6-
# The raw data consists of an attributes data frame and an hourly edgelist,
7-
# both using consistent node IDs (1-594).
14+
# ---------------------------------------------------------------------------
15+
# 1. Load the base hourly dataset (edgelist + static attributes)
16+
# ---------------------------------------------------------------------------
817
load("data-raw/epigames_hourly.rda")
18+
# epigames_hourly is a list with $attributes and $edgelist
919

10-
epigames <- epigames_hourly
20+
# ---------------------------------------------------------------------------
21+
# 2. Load the hourly dynamic behavioral attributes produced by Python
22+
# ---------------------------------------------------------------------------
23+
dyn_attrs_path <- "playground/epigames-stuff/epigames-analysis-copy/dynamic_attrs_hourly.csv"
1124

12-
# Save compressed raw data
25+
if (!file.exists(dyn_attrs_path)) {
26+
stop(
27+
"dynamic_attrs_hourly.csv not found.\n",
28+
"Please run extract_dynamic_attrs.py first:\n",
29+
" cd playground/epigames-stuff/epigames-analysis-copy\n",
30+
" source .venv/bin/activate\n",
31+
" python3 extract_dynamic_attrs.py"
32+
)
33+
}
34+
35+
dyn_attrs_hourly <- read.csv(dyn_attrs_path, stringsAsFactors = FALSE)
36+
37+
# Sanity checks
38+
stopifnot(ncol(dyn_attrs_hourly) == 5) # id, hour, mask, med, quarantine
39+
stopifnot(nrow(dyn_attrs_hourly) == 594 * 339) # 201,366 rows
40+
stopifnot(all(dyn_attrs_hourly$id %in% 1:594))
41+
stopifnot(all(dyn_attrs_hourly$hour %in% 0:338))
42+
43+
cat("Dynamic attrs loaded:", nrow(dyn_attrs_hourly), "rows,",
44+
ncol(dyn_attrs_hourly), "cols\n")
45+
cat(" Nodes wearing mask (ever):",
46+
length(unique(dyn_attrs_hourly$id[dyn_attrs_hourly$mask == 1])), "\n")
47+
cat(" Nodes in quarantine (ever):",
48+
length(unique(dyn_attrs_hourly$id[dyn_attrs_hourly$quarantine == 1])), "\n")
49+
50+
# ---------------------------------------------------------------------------
51+
# 3. Bundle into the epigames list (3 elements)
52+
# ---------------------------------------------------------------------------
53+
epigames <- list(
54+
attributes = epigames_hourly$attributes, # static, 594 x 6
55+
edgelist = epigames_hourly$edgelist, # hourly, ~39k rows
56+
dyn_attrs = dyn_attrs_hourly # NEW: 201,366 rows (long format)
57+
)
58+
59+
cat("\nepigames list structure:\n")
60+
cat(" $attributes:", nrow(epigames$attributes), "rows x", ncol(epigames$attributes), "cols\n")
61+
cat(" $edgelist: ", nrow(epigames$edgelist), "rows x", ncol(epigames$edgelist), "cols\n")
62+
cat(" $dyn_attrs: ", nrow(epigames$dyn_attrs), "rows x", ncol(epigames$dyn_attrs), "cols\n")
63+
64+
# ---------------------------------------------------------------------------
65+
# 4. Save compressed .rda
66+
# ---------------------------------------------------------------------------
1367
usethis::use_data(epigames, overwrite = TRUE, compress = "xz")
68+
cat("\nSaved: data/epigames.rda\n")

data-raw/epigamesDiffNet.R

Lines changed: 99 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,128 @@
11
# data-raw/epigamesDiffNet.R
2-
# Generating the dynamic diffnet object using netdiffuseR + collapse_timeframes()
2+
# Generating the daily diffnet object from epigames using collapse_timeframes()
3+
# Issue #75: Now includes vertex.dyn.attrs (mask, med, quarantine per day)
4+
#
5+
# Run after data-raw/epigames.R has built data/epigames.rda.
36

47
rm(list = ls())
58
library(netdiffuseR)
69

7-
# Load the base raw dataset created in data-raw/epigames.R (hourly resolution)
10+
# ---------------------------------------------------------------------------
11+
# 1. Load the base epigames dataset (with dynamic attrs)
12+
# ---------------------------------------------------------------------------
813
load("data/epigames.rda")
914

10-
attrs <- epigames$attributes
11-
edges <- epigames$edgelist
15+
attrs <- epigames$attributes # 594 x 6: id, toa, qyes_total, qno_total, mask_prop, med_prop
16+
edges <- epigames$edgelist # hourly edgelist: sender, receiver, time (0-338), weight
17+
dyn_long <- epigames$dyn_attrs # long format: id, hour (0-338), mask, med, quarantine
1218

13-
# Collapse hourly edgelist (hours 0-338) into daily windows (days 1-15)
14-
source("R/collapse_timeframes.R")
19+
# ---------------------------------------------------------------------------
20+
# 2. Collapse hourly edgelist into 15 daily windows via collapse_timeframes()
21+
# ---------------------------------------------------------------------------
22+
WINDOW_SIZE <- 24 # hours per day
23+
N_DAYS <- 15
1524

1625
daily_edgelist <- collapse_timeframes(
17-
edgelist = edges,
18-
ego = "sender",
19-
alter = "receiver",
20-
timevar = "time",
21-
weightvar = "weight",
22-
window_size = 24,
23-
binarize = TRUE,
24-
cumulative = TRUE,
25-
symmetric = TRUE
26+
edgelist = edges,
27+
ego = "sender",
28+
alter = "receiver",
29+
timevar = "time",
30+
weightvar = "weight",
31+
window_size = WINDOW_SIZE,
32+
binarize = TRUE,
33+
cumulative = TRUE,
34+
symmetric = TRUE
2635
)
2736

28-
# Build daily adjacency matrices
37+
cat("Daily edgelist: ", nrow(daily_edgelist), "rows, time range:",
38+
range(daily_edgelist$time), "\n")
39+
40+
# Build adjacency matrices
2941
adjmat <- edgelist_to_adjmat(
3042
daily_edgelist[, c("sender", "receiver")],
31-
w = daily_edgelist$weight,
32-
t0 = daily_edgelist$time,
43+
w = daily_edgelist$weight,
44+
t0 = daily_edgelist$time,
3345
keep.isolates = TRUE,
3446
multiple = TRUE
3547
)
3648

37-
max_t <- max(daily_edgelist$time, na.rm = TRUE)
49+
# ---------------------------------------------------------------------------
50+
# 3. Build vertex.dyn.attrs: one data.frame per day (15 total)
51+
# Each data.frame: 594 rows, columns: mask, med, quarantine (daily means)
52+
# ---------------------------------------------------------------------------
53+
# Map hourly data to day index (day d = hours [(d-1)*24 .. d*24-1])
54+
dyn_long$day <- (dyn_long$hour %/% WINDOW_SIZE) + 1 # 1-based day
55+
dyn_long$day <- pmin(dyn_long$day, N_DAYS) # clamp hour 336-338 to day 15
56+
57+
vertex_dyn <- lapply(1:N_DAYS, function(d) {
58+
sub <- dyn_long[dyn_long$day == d, ]
59+
60+
# Aggregate per node: mean within each 24-hour window
61+
# (proportion of hours in that day where behavior was active)
62+
agg <- aggregate(
63+
cbind(mask, med, quarantine) ~ id,
64+
data = sub,
65+
FUN = mean
66+
)
67+
68+
# Sort by id to match the node ordering in the diffnet object
69+
agg <- agg[order(agg$id), ]
70+
rownames(agg) <- NULL
71+
72+
# Return only the behavior columns (not id — diffnet uses position)
73+
agg[, c("mask", "med", "quarantine")]
74+
})
75+
76+
# Sanity check: each element should be 594 rows x 3 cols
77+
stopifnot(all(sapply(vertex_dyn, nrow) == 594))
78+
stopifnot(all(sapply(vertex_dyn, ncol) == 3))
79+
80+
cat("vertex.dyn.attrs built: ", N_DAYS, "data.frames of",
81+
nrow(vertex_dyn[[1]]), "rows x", ncol(vertex_dyn[[1]]), "cols\n")
82+
cat(" Day 1 — mean mask usage:", round(mean(vertex_dyn[[1]]$mask), 3),
83+
" mean quarantine:", round(mean(vertex_dyn[[1]]$quarantine), 3), "\n")
84+
cat(" Day 15 — mean mask usage:", round(mean(vertex_dyn[[15]]$mask), 3),
85+
" mean quarantine:", round(mean(vertex_dyn[[15]]$quarantine), 3), "\n")
3886

39-
# Prepare TOA vector: real adoption times from attrs, NA for non-adopters
87+
# ---------------------------------------------------------------------------
88+
# 4. Prepare TOA vector
89+
# ---------------------------------------------------------------------------
4090
toa_vec <- stats::setNames(attrs$toa, as.character(attrs$id))
4191

92+
# ---------------------------------------------------------------------------
93+
# 5. Assemble diffnet object
94+
# ---------------------------------------------------------------------------
4295
epigamesDiffNet <- as_diffnet(
4396
adjmat,
4497
toa = toa_vec,
4598
vertex.static.attrs = attrs,
99+
vertex.dyn.attrs = vertex_dyn,
46100
t0 = 1,
47-
t1 = max_t
101+
t1 = N_DAYS
48102
)
49103

104+
cat("\nepigamesDiffNet summary:\n")
105+
print(epigamesDiffNet)
106+
107+
# ---------------------------------------------------------------------------
108+
# 6. Quick validation: dynamic exposure vs static exposure
109+
# ---------------------------------------------------------------------------
110+
cat("\nValidating exposure() with dynamic mask attrs...\n")
111+
expo_static <- exposure(
112+
epigamesDiffNet,
113+
attrs = matrix(
114+
rep(epigamesDiffNet$vertex.static.attrs$mask_prop, N_DAYS),
115+
nrow = 594, ncol = N_DAYS
116+
)
117+
)
118+
expo_dynamic <- exposure(epigamesDiffNet, attrs = "mask")
119+
120+
cor_val <- cor(as.vector(expo_static), as.vector(expo_dynamic), use = "complete.obs")
121+
cat(" Correlation static vs dynamic mask exposure:", round(cor_val, 4), "\n")
122+
cat(" (Should be < 1.0, confirming dynamic attrs add new information)\n")
123+
124+
# ---------------------------------------------------------------------------
125+
# 7. Save
126+
# ---------------------------------------------------------------------------
50127
usethis::use_data(epigamesDiffNet, overwrite = TRUE, compress = "xz")
128+
cat("\nSaved: data/epigamesDiffNet.rda\n")

data/epigames.rda

4.63 KB
Binary file not shown.

data/epigamesDiffNet.rda

1.91 KB
Binary file not shown.

0 commit comments

Comments
 (0)