11"""Pure functions to convert experiment data to domain types."""
22
3- import copy
4- import hashlib
5- import json
63import uuid
74from pathlib import Path
85from typing import List , Dict , Any , Optional
2219from .run_models import Run , RunDecision
2320
2421
22+ def get_decider_batch_name (experiment_path : Path , root_path : Path ) -> str :
23+ """Derive decider batch name from experiment path depth relative to root.
24+
25+ Depth 1: experiment folder IS the batch (flat structure)
26+ Depth 2+: parent folder is the batch (nested with alignment subdirs)
27+ """
28+ relative = experiment_path .relative_to (root_path )
29+ depth = len (relative .parts )
30+
31+ if depth == 1 :
32+ return experiment_path .name
33+ else :
34+ return experiment_path .parent .name
35+
36+
2537def probes_from_experiment_items (items : List [ExperimentItem ]) -> List [Probe ]:
2638 """Convert experiment items to probes, deduping by probe_id."""
2739 seen = set ()
@@ -36,79 +48,46 @@ def probes_from_experiment_items(items: List[ExperimentItem]) -> List[Probe]:
3648
3749def deciders_from_experiments (
3850 experiments : List [ExperimentData ],
51+ root_path : Path ,
3952) -> Dict [str , Dict [str , Any ]]:
40- """Extract unique decider configs from experiments .
53+ """Extract deciders from experiments, one per unique decider batch name .
4154
4255 Returns dict: {decider_name: decider_entry}
4356 """
44- seen_hashes : Dict [str , tuple ] = {}
57+ deciders : Dict [str , Dict [ str , Any ] ] = {}
4558
4659 sorted_experiments = sorted (experiments , key = lambda e : str (e .experiment_path ))
4760 for exp in sorted_experiments :
61+ decider_batch = get_decider_batch_name (exp .experiment_path , root_path )
62+ if decider_batch in deciders :
63+ continue
64+
4865 adm_config = load_experiment_adm_config (exp .experiment_path )
4966 if adm_config is None :
5067 continue
5168
52- normalized = _normalize_adm_config (adm_config )
53- config_hash = _hash_config (normalized )
54-
55- if config_hash not in seen_hashes :
56- exp_name = exp .experiment_path .parent .name
57-
58- if "structured_inference_engine" in adm_config :
59- experiment_llm = adm_config ["structured_inference_engine" ].get (
60- "model_name"
61- )
62- llm_backbones = (
63- [experiment_llm ]
64- + [llm for llm in LLM_BACKBONES if llm != experiment_llm ]
65- if experiment_llm
66- else list (LLM_BACKBONES )
67- )
68- else :
69- llm_backbones = []
70-
71- decider_entry = {
72- "experiment_path" : str (exp .experiment_path ),
73- "experiment_config" : True ,
74- "llm_backbones" : llm_backbones ,
75- "max_alignment_attributes" : 10 ,
76- }
77- seen_hashes [config_hash ] = (exp_name , decider_entry )
78-
79- return {name : entry for name , entry in seen_hashes .values ()}
80-
81-
82- def _normalize_adm_config (config : Dict [str , Any ]) -> Dict [str , Any ]:
83- """Normalize config for comparison by stripping absolute paths to filenames."""
84- normalized = copy .deepcopy (config )
85- _normalize_paths_recursive (normalized )
86- return normalized
87-
88-
89- def _normalize_paths_recursive (obj : Any ) -> None :
90- """Recursively normalize path-like strings to just filenames."""
91- if isinstance (obj , dict ):
92- for key , value in obj .items ():
93- if isinstance (value , str ) and "/" in value and value .endswith (".json" ):
94- obj [key ] = Path (value ).name
95- else :
96- _normalize_paths_recursive (value )
97- elif isinstance (obj , list ):
98- for i , item in enumerate (obj ):
99- if isinstance (item , str ) and "/" in item and item .endswith (".json" ):
100- obj [i ] = Path (item ).name
101- else :
102- _normalize_paths_recursive (item )
103-
104-
105- def _hash_config (config : Dict [str , Any ]) -> str :
106- """Create deterministic hash of config dict."""
107- config_str = json .dumps (config , sort_keys = True )
108- return hashlib .sha256 (config_str .encode ()).hexdigest ()[:16 ]
109-
110-
111- def run_from_experiment_item (item : ExperimentItem ) -> Optional [Run ]:
69+ if "structured_inference_engine" in adm_config :
70+ experiment_llm = adm_config ["structured_inference_engine" ].get ("model_name" )
71+ llm_backbones = (
72+ [experiment_llm ]
73+ + [llm for llm in LLM_BACKBONES if llm != experiment_llm ]
74+ if experiment_llm
75+ else list (LLM_BACKBONES )
76+ )
77+ else :
78+ llm_backbones = []
79+
80+ deciders [decider_batch ] = {
81+ "experiment_path" : str (exp .experiment_path ),
82+ "experiment_config" : True ,
83+ "llm_backbones" : llm_backbones ,
84+ "max_alignment_attributes" : 10 ,
85+ }
86+
87+ return deciders
88+
89+
90+ def run_from_experiment_item (item : ExperimentItem , root_path : Path ) -> Optional [Run ]:
11291 """Convert ExperimentItem to Run with decision populated."""
11392 if not item .item .output :
11493 return None
@@ -134,19 +113,21 @@ def run_from_experiment_item(item: ExperimentItem) -> Optional[Run]:
134113 choice_index = output .choice ,
135114 )
136115
137- decider_name = item .experiment_path . parent . name
116+ decider_batch = get_decider_batch_name ( item .experiment_path , root_path )
138117
139118 return Run (
140119 id = str (uuid .uuid4 ()),
141120 probe_id = probe_id ,
142- decider_name = decider_name ,
121+ decider_name = decider_batch ,
143122 llm_backbone_name = item .config .adm .llm_backbone or "N/A" ,
144123 system_prompt = "" ,
145124 decider_params = decider_params ,
146125 decision = decision ,
147126 )
148127
149128
150- def runs_from_experiment_items (items : List [ExperimentItem ]) -> List [Run ]:
129+ def runs_from_experiment_items (
130+ items : List [ExperimentItem ], root_path : Path
131+ ) -> List [Run ]:
151132 """Convert experiment items to runs, filtering out items without output."""
152- return [run for item in items if (run := run_from_experiment_item (item ))]
133+ return [run for item in items if (run := run_from_experiment_item (item , root_path ))]
0 commit comments