Skip to content

Commit da1ab6c

Browse files
committed
style: clean up epigames data generation scripts
- Removed redundant comments and sanity checks for better readability - Simplified as_diffnet call structure - Regenerated .rda files to match clean scripts
1 parent 6b56bf1 commit da1ab6c

2 files changed

Lines changed: 30 additions & 118 deletions

File tree

data-raw/epigames.R

Lines changed: 11 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,29 @@
11
# data-raw/epigames.R
22
# Pre-processing script for the EpiGames Raw Dataset
3-
# Issue #75: Extended to include dynamic behavioral attributes (mask, med, quarantine)
4-
#
5-
# Prerequisites:
6-
# 1. Run playground/epigames-stuff/epigames-analysis-copy/extract_dynamic_attrs.py
7-
# to generate dynamic_attrs_hourly.csv in that same folder.
8-
# 2. Have data-raw/epigames_hourly.rda available (generated by the previous pipeline).
9-
#
10-
# This script is run from the package root directory.
113

124
rm(list = ls())
135

14-
# ---------------------------------------------------------------------------
15-
# 1. Load the base hourly dataset (edgelist + static attributes)
16-
# ---------------------------------------------------------------------------
6+
# The raw data consists of an attributes data frame and an hourly edgelist,
7+
# both using consistent node IDs (1-594).
178
load("data-raw/epigames_hourly.rda")
18-
# epigames_hourly is a list with $attributes and $edgelist
199

20-
# ---------------------------------------------------------------------------
21-
# 2. Load the hourly dynamic behavioral attributes produced by Python
22-
# ---------------------------------------------------------------------------
10+
# Load the hourly dynamic behavioral attributes
2311
dyn_attrs_path <- "playground/epigames-stuff/epigames-analysis-copy/dynamic_attrs_hourly.csv"
2412

25-
if (!file.exists(dyn_attrs_path)) {
26-
stop(
27-
"dynamic_attrs_hourly.csv not found.\n",
28-
"Please run extract_dynamic_attrs.py first:\n",
29-
" cd playground/epigames-stuff/epigames-analysis-copy\n",
30-
" source .venv/bin/activate\n",
31-
" python3 extract_dynamic_attrs.py"
32-
)
33-
}
34-
3513
dyn_attrs_hourly <- read.csv(dyn_attrs_path, stringsAsFactors = FALSE)
3614

3715
# Sanity checks
38-
stopifnot(ncol(dyn_attrs_hourly) == 5) # id, hour, mask, med, quarantine
39-
stopifnot(nrow(dyn_attrs_hourly) == 594 * 339) # 201,366 rows
40-
stopifnot(all(dyn_attrs_hourly$id %in% 1:594))
16+
stopifnot(ncol(dyn_attrs_hourly) == 5) # id, hour, mask, med, quarantine
17+
stopifnot(nrow(dyn_attrs_hourly) == 594 * 339) # 201,366 rows
18+
stopifnot(all(dyn_attrs_hourly$id %in% 1:594))
4119
stopifnot(all(dyn_attrs_hourly$hour %in% 0:338))
4220

43-
cat("Dynamic attrs loaded:", nrow(dyn_attrs_hourly), "rows,",
44-
ncol(dyn_attrs_hourly), "cols\n")
45-
cat(" Nodes wearing mask (ever):",
46-
length(unique(dyn_attrs_hourly$id[dyn_attrs_hourly$mask == 1])), "\n")
47-
cat(" Nodes in quarantine (ever):",
48-
length(unique(dyn_attrs_hourly$id[dyn_attrs_hourly$quarantine == 1])), "\n")
49-
50-
# ---------------------------------------------------------------------------
51-
# 3. Bundle into the epigames list (3 elements)
52-
# ---------------------------------------------------------------------------
21+
# Bundle into the epigames list (3 elements)
5322
epigames <- list(
54-
attributes = epigames_hourly$attributes, # static, 594 x 6
55-
edgelist = epigames_hourly$edgelist, # hourly, ~39k rows
56-
dyn_attrs = dyn_attrs_hourly # NEW: 201,366 rows (long format)
23+
attributes = epigames_hourly$attributes, # static, 594 x 6
24+
edgelist = epigames_hourly$edgelist, # hourly, ~39k rows
25+
dyn_attrs = dyn_attrs_hourly # dynamical attributes (long format)
5726
)
5827

59-
cat("\nepigames list structure:\n")
60-
cat(" $attributes:", nrow(epigames$attributes), "rows x", ncol(epigames$attributes), "cols\n")
61-
cat(" $edgelist: ", nrow(epigames$edgelist), "rows x", ncol(epigames$edgelist), "cols\n")
62-
cat(" $dyn_attrs: ", nrow(epigames$dyn_attrs), "rows x", ncol(epigames$dyn_attrs), "cols\n")
63-
64-
# ---------------------------------------------------------------------------
65-
# 4. Save compressed .rda
66-
# ---------------------------------------------------------------------------
28+
# Save compressed .rda
6729
usethis::use_data(epigames, overwrite = TRUE, compress = "xz")
68-
cat("\nSaved: data/epigames.rda\n")

data-raw/epigamesDiffNet.R

Lines changed: 19 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,20 @@
11
# data-raw/epigamesDiffNet.R
22
# Generating the daily diffnet object from epigames using collapse_timeframes()
3-
# Issue #75: Now includes vertex.dyn.attrs (mask, med, quarantine per day)
4-
#
53
# Run after data-raw/epigames.R has built data/epigames.rda.
64

75
rm(list = ls())
86
library(netdiffuseR)
97

10-
# ---------------------------------------------------------------------------
11-
# 1. Load the base epigames dataset (with dynamic attrs)
12-
# ---------------------------------------------------------------------------
8+
# Load the base epigames dataset (with dynamic attrs)
139
load("data/epigames.rda")
1410

15-
attrs <- epigames$attributes # 594 x 6: id, toa, qyes_total, qno_total, mask_prop, med_prop
16-
edges <- epigames$edgelist # hourly edgelist: sender, receiver, time (0-338), weight
17-
dyn_long <- epigames$dyn_attrs # long format: id, hour (0-338), mask, med, quarantine
11+
attrs <- epigames$attributes # 594 x 6: id, toa, qyes_total, qno_total, mask_prop, med_prop
12+
edges <- epigames$edgelist # hourly edgelist: sender, receiver, time (0-338), weight
13+
dyn_long <- epigames$dyn_attrs # long format: id, hour (0-338), mask, med, quarantine
1814

19-
# ---------------------------------------------------------------------------
20-
# 2. Collapse hourly edgelist into 15 daily windows via collapse_timeframes()
21-
# ---------------------------------------------------------------------------
22-
WINDOW_SIZE <- 24 # hours per day
23-
N_DAYS <- 15
15+
# Collapse hourly edgelist into 15 daily windows via collapse_timeframes()
16+
WINDOW_SIZE <- 24 # hours per day
17+
N_DAYS <- 15
2418

2519
daily_edgelist <- collapse_timeframes(
2620
edgelist = edges,
@@ -34,9 +28,6 @@ daily_edgelist <- collapse_timeframes(
3428
symmetric = TRUE
3529
)
3630

37-
cat("Daily edgelist: ", nrow(daily_edgelist), "rows, time range:",
38-
range(daily_edgelist$time), "\n")
39-
4031
# Build adjacency matrices
4132
adjmat <- edgelist_to_adjmat(
4233
daily_edgelist[, c("sender", "receiver")],
@@ -46,83 +37,43 @@ adjmat <- edgelist_to_adjmat(
4637
multiple = TRUE
4738
)
4839

49-
# ---------------------------------------------------------------------------
50-
# 3. Build vertex.dyn.attrs: one data.frame per day (15 total)
51-
# Each data.frame: 594 rows, columns: mask, med, quarantine (daily means)
52-
# ---------------------------------------------------------------------------
40+
# Build vertex.dyn.attrs: one data.frame per day (15 total)
41+
# Each data.frame: 594 rows, columns: mask, med, quarantine (daily means)
5342
# Map hourly data to day index (day d = hours [(d-1)*24 .. d*24-1])
54-
dyn_long$day <- (dyn_long$hour %/% WINDOW_SIZE) + 1 # 1-based day
55-
dyn_long$day <- pmin(dyn_long$day, N_DAYS) # clamp hour 336-338 to day 15
43+
dyn_long$day <- (dyn_long$hour %/% WINDOW_SIZE) + 1 # 1-based day
44+
dyn_long$day <- pmin(dyn_long$day, N_DAYS) # clamp hour 336-338 to day 15
5645

5746
vertex_dyn <- lapply(1:N_DAYS, function(d) {
5847
sub <- dyn_long[dyn_long$day == d, ]
59-
48+
6049
# Aggregate per node: mean within each 24-hour window
6150
# (proportion of hours in that day where behavior was active)
6251
agg <- aggregate(
6352
cbind(mask, med, quarantine) ~ id,
6453
data = sub,
6554
FUN = mean
6655
)
67-
56+
6857
# Sort by id to match the node ordering in the diffnet object
6958
agg <- agg[order(agg$id), ]
7059
rownames(agg) <- NULL
71-
60+
7261
# Return only the behavior columns (not id — diffnet uses position)
7362
agg[, c("mask", "med", "quarantine")]
7463
})
7564

76-
# Sanity check: each element should be 594 rows x 3 cols
77-
stopifnot(all(sapply(vertex_dyn, nrow) == 594))
78-
stopifnot(all(sapply(vertex_dyn, ncol) == 3))
79-
80-
cat("vertex.dyn.attrs built: ", N_DAYS, "data.frames of",
81-
nrow(vertex_dyn[[1]]), "rows x", ncol(vertex_dyn[[1]]), "cols\n")
82-
cat(" Day 1 — mean mask usage:", round(mean(vertex_dyn[[1]]$mask), 3),
83-
" mean quarantine:", round(mean(vertex_dyn[[1]]$quarantine), 3), "\n")
84-
cat(" Day 15 — mean mask usage:", round(mean(vertex_dyn[[15]]$mask), 3),
85-
" mean quarantine:", round(mean(vertex_dyn[[15]]$quarantine), 3), "\n")
86-
87-
# ---------------------------------------------------------------------------
88-
# 4. Prepare TOA vector
89-
# ---------------------------------------------------------------------------
65+
# Prepare TOA vector
9066
toa_vec <- stats::setNames(attrs$toa, as.character(attrs$id))
9167

92-
# ---------------------------------------------------------------------------
93-
# 5. Assemble diffnet object
94-
# ---------------------------------------------------------------------------
68+
# Assemble diffnet object
9569
epigamesDiffNet <- as_diffnet(
9670
adjmat,
97-
toa = toa_vec,
71+
toa = toa_vec,
9872
vertex.static.attrs = attrs,
99-
vertex.dyn.attrs = vertex_dyn,
73+
vertex.dyn.attrs = vertex_dyn,
10074
t0 = 1,
10175
t1 = N_DAYS
10276
)
10377

104-
cat("\nepigamesDiffNet summary:\n")
105-
print(epigamesDiffNet)
106-
107-
# ---------------------------------------------------------------------------
108-
# 6. Quick validation: dynamic exposure vs static exposure
109-
# ---------------------------------------------------------------------------
110-
cat("\nValidating exposure() with dynamic mask attrs...\n")
111-
expo_static <- exposure(
112-
epigamesDiffNet,
113-
attrs = matrix(
114-
rep(epigamesDiffNet$vertex.static.attrs$mask_prop, N_DAYS),
115-
nrow = 594, ncol = N_DAYS
116-
)
117-
)
118-
expo_dynamic <- exposure(epigamesDiffNet, attrs = "mask")
119-
120-
cor_val <- cor(as.vector(expo_static), as.vector(expo_dynamic), use = "complete.obs")
121-
cat(" Correlation static vs dynamic mask exposure:", round(cor_val, 4), "\n")
122-
cat(" (Should be < 1.0, confirming dynamic attrs add new information)\n")
123-
124-
# ---------------------------------------------------------------------------
125-
# 7. Save
126-
# ---------------------------------------------------------------------------
78+
# Save
12779
usethis::use_data(epigamesDiffNet, overwrite = TRUE, compress = "xz")
128-
cat("\nSaved: data/epigamesDiffNet.rda\n")

0 commit comments

Comments
 (0)