Skip to content

Commit 9843b4e

Browse files
committed
Improve runs table UX and fix decider batch naming
- Rename "Add Selected to Comparison" to "Compare Selected" and replace existing runs instead of appending - Rename "Clear All" to "Delete All" for consistency - Add "Clear Filters" button in toolbar and no-data state - Fix decider naming to use batch folder name consistently - Improve alignment comparison by comparing only value and score
1 parent 0508890 commit 9843b4e

9 files changed

Lines changed: 151 additions & 107 deletions

align_app/adm/decider_definitions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ def get_system_prompt(
237237
"""Generate system prompt for a decider with given alignment target."""
238238
decider_main_config = all_deciders.get(decider)
239239
if not decider_main_config:
240-
raise ValueError(f"Decider '{decider}' not found in all_deciders configuration")
240+
return "Decider configuration not available"
241241

242242
generate_sys_prompt = decider_main_config.get("system_prompt_generator")
243243
if not generate_sys_prompt:
Lines changed: 50 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
"""Pure functions to convert experiment data to domain types."""
22

3-
import copy
4-
import hashlib
5-
import json
63
import uuid
74
from pathlib import Path
85
from typing import List, Dict, Any, Optional
@@ -22,6 +19,21 @@
2219
from .run_models import Run, RunDecision
2320

2421

22+
def get_decider_batch_name(experiment_path: Path, root_path: Path) -> str:
23+
"""Derive decider batch name from experiment path depth relative to root.
24+
25+
Depth 1: experiment folder IS the batch (flat structure)
26+
Depth 2+: parent folder is the batch (nested with alignment subdirs)
27+
"""
28+
relative = experiment_path.relative_to(root_path)
29+
depth = len(relative.parts)
30+
31+
if depth == 1:
32+
return experiment_path.name
33+
else:
34+
return experiment_path.parent.name
35+
36+
2537
def probes_from_experiment_items(items: List[ExperimentItem]) -> List[Probe]:
2638
"""Convert experiment items to probes, deduping by probe_id."""
2739
seen = set()
@@ -36,79 +48,46 @@ def probes_from_experiment_items(items: List[ExperimentItem]) -> List[Probe]:
3648

3749
def deciders_from_experiments(
3850
experiments: List[ExperimentData],
51+
root_path: Path,
3952
) -> Dict[str, Dict[str, Any]]:
40-
"""Extract unique decider configs from experiments.
53+
"""Extract deciders from experiments, one per unique decider batch name.
4154
4255
Returns dict: {decider_name: decider_entry}
4356
"""
44-
seen_hashes: Dict[str, tuple] = {}
57+
deciders: Dict[str, Dict[str, Any]] = {}
4558

4659
sorted_experiments = sorted(experiments, key=lambda e: str(e.experiment_path))
4760
for exp in sorted_experiments:
61+
decider_batch = get_decider_batch_name(exp.experiment_path, root_path)
62+
if decider_batch in deciders:
63+
continue
64+
4865
adm_config = load_experiment_adm_config(exp.experiment_path)
4966
if adm_config is None:
5067
continue
5168

52-
normalized = _normalize_adm_config(adm_config)
53-
config_hash = _hash_config(normalized)
54-
55-
if config_hash not in seen_hashes:
56-
exp_name = exp.experiment_path.parent.name
57-
58-
if "structured_inference_engine" in adm_config:
59-
experiment_llm = adm_config["structured_inference_engine"].get(
60-
"model_name"
61-
)
62-
llm_backbones = (
63-
[experiment_llm]
64-
+ [llm for llm in LLM_BACKBONES if llm != experiment_llm]
65-
if experiment_llm
66-
else list(LLM_BACKBONES)
67-
)
68-
else:
69-
llm_backbones = []
70-
71-
decider_entry = {
72-
"experiment_path": str(exp.experiment_path),
73-
"experiment_config": True,
74-
"llm_backbones": llm_backbones,
75-
"max_alignment_attributes": 10,
76-
}
77-
seen_hashes[config_hash] = (exp_name, decider_entry)
78-
79-
return {name: entry for name, entry in seen_hashes.values()}
80-
81-
82-
def _normalize_adm_config(config: Dict[str, Any]) -> Dict[str, Any]:
83-
"""Normalize config for comparison by stripping absolute paths to filenames."""
84-
normalized = copy.deepcopy(config)
85-
_normalize_paths_recursive(normalized)
86-
return normalized
87-
88-
89-
def _normalize_paths_recursive(obj: Any) -> None:
90-
"""Recursively normalize path-like strings to just filenames."""
91-
if isinstance(obj, dict):
92-
for key, value in obj.items():
93-
if isinstance(value, str) and "/" in value and value.endswith(".json"):
94-
obj[key] = Path(value).name
95-
else:
96-
_normalize_paths_recursive(value)
97-
elif isinstance(obj, list):
98-
for i, item in enumerate(obj):
99-
if isinstance(item, str) and "/" in item and item.endswith(".json"):
100-
obj[i] = Path(item).name
101-
else:
102-
_normalize_paths_recursive(item)
103-
104-
105-
def _hash_config(config: Dict[str, Any]) -> str:
106-
"""Create deterministic hash of config dict."""
107-
config_str = json.dumps(config, sort_keys=True)
108-
return hashlib.sha256(config_str.encode()).hexdigest()[:16]
109-
110-
111-
def run_from_experiment_item(item: ExperimentItem) -> Optional[Run]:
69+
if "structured_inference_engine" in adm_config:
70+
experiment_llm = adm_config["structured_inference_engine"].get("model_name")
71+
llm_backbones = (
72+
[experiment_llm]
73+
+ [llm for llm in LLM_BACKBONES if llm != experiment_llm]
74+
if experiment_llm
75+
else list(LLM_BACKBONES)
76+
)
77+
else:
78+
llm_backbones = []
79+
80+
deciders[decider_batch] = {
81+
"experiment_path": str(exp.experiment_path),
82+
"experiment_config": True,
83+
"llm_backbones": llm_backbones,
84+
"max_alignment_attributes": 10,
85+
}
86+
87+
return deciders
88+
89+
90+
def run_from_experiment_item(item: ExperimentItem, root_path: Path) -> Optional[Run]:
11291
"""Convert ExperimentItem to Run with decision populated."""
11392
if not item.item.output:
11493
return None
@@ -134,19 +113,21 @@ def run_from_experiment_item(item: ExperimentItem) -> Optional[Run]:
134113
choice_index=output.choice,
135114
)
136115

137-
decider_name = item.experiment_path.parent.name
116+
decider_batch = get_decider_batch_name(item.experiment_path, root_path)
138117

139118
return Run(
140119
id=str(uuid.uuid4()),
141120
probe_id=probe_id,
142-
decider_name=decider_name,
121+
decider_name=decider_batch,
143122
llm_backbone_name=item.config.adm.llm_backbone or "N/A",
144123
system_prompt="",
145124
decider_params=decider_params,
146125
decision=decision,
147126
)
148127

149128

150-
def runs_from_experiment_items(items: List[ExperimentItem]) -> List[Run]:
129+
def runs_from_experiment_items(
130+
items: List[ExperimentItem], root_path: Path
131+
) -> List[Run]:
151132
"""Convert experiment items to runs, filtering out items without output."""
152-
return [run for item in items if (run := run_from_experiment_item(item))]
133+
return [run for item in items if (run := run_from_experiment_item(item, root_path))]

align_app/app/import_experiments.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from ..adm.experiment_converters import (
2222
deciders_from_experiments,
23+
get_decider_batch_name,
2324
probes_from_experiment_items,
2425
)
2526
from ..adm.experiment_config_loader import load_experiment_adm_config
@@ -36,6 +37,7 @@ class StoredExperimentItem:
3637
item: ExperimentItem
3738
resolved_config: Dict
3839
cache_key: str
40+
decider_batch: str
3941

4042

4143
@dataclass
@@ -59,13 +61,18 @@ def import_experiments(experiments_path: Path) -> ExperimentImportResult:
5961
]
6062

6163
probes = probes_from_experiment_items(all_items)
62-
deciders = deciders_from_experiments(experiments)
64+
deciders = deciders_from_experiments(experiments, experiments_path)
6365

6466
items: Dict[str, StoredExperimentItem] = {}
6567
for item in all_items:
6668
resolved_config = load_experiment_adm_config(item.experiment_path) or {}
67-
cache_key = compute_experiment_item_cache_key(item, resolved_config)
68-
items[cache_key] = StoredExperimentItem(item, resolved_config, cache_key)
69+
decider_batch = get_decider_batch_name(item.experiment_path, experiments_path)
70+
cache_key = compute_experiment_item_cache_key(
71+
item, resolved_config, decider_batch
72+
)
73+
items[cache_key] = StoredExperimentItem(
74+
item, resolved_config, cache_key, decider_batch
75+
)
6976

7077
print(f"Loaded {len(items)} experiment items from {len(experiments)} experiments")
7178
return ExperimentImportResult(probes, deciders, items)
@@ -114,12 +121,10 @@ def run_from_stored_experiment_item(stored: StoredExperimentItem) -> Optional[Ru
114121
choice_index=output.choice,
115122
)
116123

117-
decider_name = item.experiment_path.parent.name
118-
119124
return Run(
120125
id=str(uuid.uuid4()),
121126
probe_id=probe_id,
122-
decider_name=decider_name,
127+
decider_name=stored.decider_batch,
123128
llm_backbone_name=item.config.adm.llm_backbone or "N/A",
124129
system_prompt="",
125130
decider_params=decider_params,

align_app/app/runs_presentation.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717
def compute_experiment_item_cache_key(
1818
item: ExperimentItem,
1919
resolved_config: Dict[str, Any],
20+
decider_batch: str,
2021
) -> str:
2122
"""Compute cache_key for an experiment item (same as Run.compute_cache_key).
2223
2324
Takes resolved_config as param since it must be loaded while paths are valid.
2425
"""
2526
probe_id = get_probe_id(item.item)
26-
decider_name = item.experiment_path.parent.name
2727
llm_backbone = item.config.adm.llm_backbone or "N/A"
2828

2929
decider_params = DeciderParams(
@@ -32,11 +32,11 @@ def compute_experiment_item_cache_key(
3232
resolved_config=resolved_config,
3333
)
3434

35-
return hash_run_params(probe_id, decider_name, llm_backbone, decider_params)
35+
return hash_run_params(probe_id, decider_batch, llm_backbone, decider_params)
3636

3737

3838
def experiment_item_to_table_row(
39-
item: ExperimentItem, cache_key: str
39+
item: ExperimentItem, cache_key: str, decider_batch: str
4040
) -> Dict[str, Any]:
4141
"""Convert ExperimentItem to table row format."""
4242
scene_id = ""
@@ -52,7 +52,7 @@ def experiment_item_to_table_row(
5252

5353
kdma_values = item.config.alignment_target.kdma_values
5454
alignment_summary = (
55-
", ".join(f"{kv.kdma} {kv.value}" for kv in kdma_values)
55+
", ".join(f"{readable(kv.kdma)} {kv.value}" for kv in kdma_values)
5656
if kdma_values
5757
else "None"
5858
)
@@ -66,7 +66,7 @@ def experiment_item_to_table_row(
6666
"scenario_id": item.item.input.scenario_id,
6767
"scene_id": scene_id,
6868
"probe_text": display_state,
69-
"decider_name": item.config.adm.name,
69+
"decider_name": decider_batch,
7070
"llm_backbone_name": item.config.adm.llm_backbone or "N/A",
7171
"alignment_summary": alignment_summary,
7272
"decision_text": decision_text,

align_app/app/runs_state_adapter.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def _sync_from_runs_data(self, runs_dict: Dict[str, Run]):
8080
stored_items = self.runs_registry.get_all_experiment_items()
8181
experiment_table_rows = [
8282
runs_presentation.experiment_item_to_table_row(
83-
stored.item, stored.cache_key
83+
stored.item, stored.cache_key, stored.decider_batch
8484
)
8585
for cache_key, stored in stored_items.items()
8686
if cache_key not in active_cache_keys
@@ -607,7 +607,7 @@ def add_selected_runs_to_compare(self):
607607
if not selected:
608608
return
609609

610-
existing = list(self.state.runs_to_compare)
610+
new_runs_to_compare = []
611611

612612
for item in selected:
613613
cache_key = item["id"] if isinstance(item, dict) else item
@@ -617,10 +617,10 @@ def add_selected_runs_to_compare(self):
617617
if not run:
618618
run = self.runs_registry.materialize_experiment_item(cache_key)
619619

620-
if run and run.id not in existing:
621-
existing.append(run.id)
620+
if run and run.id not in new_runs_to_compare:
621+
new_runs_to_compare.append(run.id)
622622

623-
self.state.runs_to_compare = existing
623+
self.state.runs_to_compare = new_runs_to_compare
624624
self.state.runs_table_modal_open = False
625625
self.state.runs_table_selected = []
626626
self._sync_from_runs_data(self.runs_registry.get_all_runs())

align_app/app/runs_table_filter.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ class RunsTableFilter:
6666
def __init__(self, server):
6767
self.server = server
6868
self._all_rows: List[Dict[str, Any]] = []
69+
self.controller = server.controller
6970

7071
self.state.runs_table_filter_scenario = []
7172
self.state.runs_table_filter_scene = []
@@ -74,6 +75,8 @@ def __init__(self, server):
7475
self.state.runs_table_filter_alignment = []
7576
self.state.runs_table_filter_decision = []
7677

78+
self.controller.set("clear_all_table_filters")(self.clear_all_filters)
79+
7780
self.state.runs_table_scenario_options = []
7881
self.state.runs_table_scene_options = []
7982
self.state.runs_table_decider_options = []
@@ -112,3 +115,12 @@ def _apply_filters(self):
112115
for state_key, col_key in FILTER_COLUMNS
113116
]
114117
self.state.runs_table_items = filter_rows(self._all_rows, filters)
118+
119+
def clear_all_filters(self):
120+
self.state.runs_table_filter_scenario = []
121+
self.state.runs_table_filter_scene = []
122+
self.state.runs_table_filter_decider = []
123+
self.state.runs_table_filter_llm = []
124+
self.state.runs_table_filter_alignment = []
125+
self.state.runs_table_filter_decision = []
126+
self.state.runs_table_search = ""

0 commit comments

Comments
 (0)