Skip to content

Commit 14971f0

Browse files
committed
feat: add recursive experiment directory parsing with OUTDATED filtering
- Update experiment parser to recursively search all subdirectories - Automatically filter out directories containing "OUTDATED" (case-insensitive) - Improve documentation with flexible directory structure examples - Add comprehensive test coverage for OUTDATED directory exclusion
1 parent 3157e46 commit 14971f0

3 files changed

Lines changed: 98 additions & 23 deletions

File tree

README.md

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,20 @@ uvx align-browser ./experiment-data --host 0.0.0.0
2828
uvx align-browser ./experiment-data --build-only
2929
```
3030

31-
### Expected Directory Structure
31+
### Directory Structure
3232

33-
The experiments directory should be the root containing pipeline directories (e.g., `pipeline_baseline`, `pipeline_random`), not an individual pipeline directory.
33+
The build system supports **flexible directory structures** and will recursively search for valid experiment directories at any depth. You can point it to any directory containing experiment data, regardless of how it's organized.
34+
35+
#### Required Files Per Experiment
36+
37+
Each experiment directory must contain:
38+
39+
- `.hydra/config.yaml` - Hydra configuration file
40+
- `input_output.json` - Experiment input/output data
41+
- `scores.json` - Scoring results
42+
- `timing.json` - Timing information
43+
44+
**Example Structure:**
3445

3546
```
3647
experiments/
@@ -49,15 +60,17 @@ experiments/
4960
└── ...
5061
```
5162

52-
The build.py script will search for:
63+
#### Automatic Filtering
64+
65+
The build system will automatically:
5366

54-
- Pipeline directories at the root level
55-
- KDMA experiment directories within each pipeline (identified by presence of `input_output.json`)
56-
- Required files: `.hydra/config.yaml`, `input_output.json`
67+
- **Recursively search** through all subdirectories
68+
- **Skip directories** containing `OUTDATED` in their path (case-insensitive)
69+
- **Only process directories** that contain all required files
5770

5871
### Sharing Results
5972

60-
The browser application stores the current selection state in the URL, making it easy to share specific views:
73+
The browser application stores the current selection state in the URL so you can:
6174

6275
- **Share a specific scenario**: URLs automatically update when you select different pipelines, KDMAs, or experiments
6376
- **Bookmark results**: Save URLs to return to specific experiment comparisons
@@ -78,7 +91,7 @@ For active development of the HTML/CSS/JavaScript:
7891

7992
```bash
8093
# Development mode: edit files in align-browser-site/ directory directly
81-
uv run align-browser ./experiment-data/phase2_june --dev
94+
uv run align-browser --dev ./experiment-data/phase2_june
8295
```
8396

8497
Edit align-browser-site/index.html, align-browser-site/app.js, align-browser-site/style.css and refresh browser to see changes immediately.

align_browser/experiment_parser.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ def parse_experiments_directory(experiments_root: Path) -> List[ExperimentData]:
99
"""
1010
Parse the experiments directory structure and return a list of ExperimentData.
1111
12+
Recursively searches through the directory structure to find all directories
13+
that contain the required experiment files (input_output.json, scores.json,
14+
timing.json, and .hydra/config.yaml).
15+
1216
Args:
1317
experiments_root: Path to the root experiments directory
1418
@@ -17,26 +21,27 @@ def parse_experiments_directory(experiments_root: Path) -> List[ExperimentData]:
1721
"""
1822
experiments = []
1923

20-
for pipeline_dir in experiments_root.iterdir():
21-
if not pipeline_dir.is_dir():
24+
# Recursively find all directories that have required experiment files
25+
for experiment_dir in experiments_root.rglob("*"):
26+
if not experiment_dir.is_dir():
2227
continue
2328

24-
for experiment_dir in pipeline_dir.glob("*"):
25-
if not experiment_dir.is_dir():
26-
continue
29+
# Skip directories containing "OUTDATED" in their path
30+
if "OUTDATED" in str(experiment_dir).upper():
31+
continue
2732

28-
# Check if directory has all required files
29-
if not ExperimentData.has_required_files(experiment_dir):
30-
continue
33+
# Check if directory has all required files
34+
if not ExperimentData.has_required_files(experiment_dir):
35+
continue
3136

32-
try:
33-
# Load experiment data using Pydantic models
34-
experiment = ExperimentData.from_directory(experiment_dir)
35-
experiments.append(experiment)
37+
try:
38+
# Load experiment data using Pydantic models
39+
experiment = ExperimentData.from_directory(experiment_dir)
40+
experiments.append(experiment)
3641

37-
except Exception as e:
38-
print(f"Error processing {experiment_dir}: {e}")
39-
continue
42+
except Exception as e:
43+
print(f"Error processing {experiment_dir}: {e}")
44+
continue
4045

4146
return experiments
4247

align_browser/test_experiment_parser.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,63 @@ def test_parse_experiments_directory():
266266
assert experiments[0].key == "pipeline_random_llama3.3-70b_affiliation-0.5"
267267

268268

269+
def test_parse_experiments_directory_excludes_outdated():
270+
"""Test that parse_experiments_directory correctly excludes OUTDATED directories."""
271+
with tempfile.TemporaryDirectory() as temp_dir:
272+
temp_path = Path(temp_dir)
273+
274+
# Create experiments structure
275+
experiments_root = temp_path / "experiments"
276+
experiments_root.mkdir()
277+
278+
# Create a valid experiment directory
279+
pipeline_dir = experiments_root / "pipeline_test"
280+
pipeline_dir.mkdir()
281+
282+
valid_experiment = pipeline_dir / "affiliation-0.5"
283+
valid_experiment.mkdir()
284+
hydra_dir = valid_experiment / ".hydra"
285+
hydra_dir.mkdir()
286+
287+
# Create required files for valid experiment
288+
config_data = create_sample_config_data()
289+
with open(hydra_dir / "config.yaml", "w") as f:
290+
yaml.dump(config_data, f)
291+
with open(valid_experiment / "input_output.json", "w") as f:
292+
json.dump(create_sample_input_output_data(), f)
293+
with open(valid_experiment / "scores.json", "w") as f:
294+
json.dump(create_sample_scores_data(), f)
295+
with open(valid_experiment / "timing.json", "w") as f:
296+
json.dump(create_sample_timing_data(), f)
297+
298+
# Create OUTDATED experiment directory with all required files
299+
outdated_experiment = pipeline_dir / "OUTDATED-affiliation-0.5"
300+
outdated_experiment.mkdir()
301+
outdated_hydra_dir = outdated_experiment / ".hydra"
302+
outdated_hydra_dir.mkdir()
303+
304+
# Create required files for OUTDATED experiment (same structure)
305+
with open(outdated_hydra_dir / "config.yaml", "w") as f:
306+
yaml.dump(config_data, f)
307+
with open(outdated_experiment / "input_output.json", "w") as f:
308+
json.dump(create_sample_input_output_data(), f)
309+
with open(outdated_experiment / "scores.json", "w") as f:
310+
json.dump(create_sample_scores_data(), f)
311+
with open(outdated_experiment / "timing.json", "w") as f:
312+
json.dump(create_sample_timing_data(), f)
313+
314+
# Test parsing - should only find the valid experiment, not the OUTDATED one
315+
experiments = parse_experiments_directory(experiments_root)
316+
assert len(experiments) == 1, f"Expected 1 experiment, found {len(experiments)}"
317+
assert experiments[0].key == "pipeline_random_llama3.3-70b_affiliation-0.5"
318+
319+
# Verify the OUTDATED experiment was actually excluded
320+
experiment_paths = [str(exp.experiment_path) for exp in experiments]
321+
assert not any("OUTDATED" in path for path in experiment_paths), (
322+
f"OUTDATED experiment was not filtered out: {experiment_paths}"
323+
)
324+
325+
269326
def test_build_manifest_from_experiments():
270327
"""Test building manifest from experiments."""
271328
with tempfile.TemporaryDirectory() as temp_dir:

0 commit comments

Comments
 (0)