Skip to content

Commit bf6e0e4

Browse files
committed
Add analysis notebook and final training script
1 parent f8b2813 commit bf6e0e4

3 files changed

Lines changed: 531 additions & 0 deletions

File tree

experiments.bak.zip

2 MB
Binary file not shown.
Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# HPO Analysis"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": null,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"import json\n",
17+
"import sys\n",
18+
"from pathlib import Path\n",
19+
"from collections import defaultdict\n",
20+
"import matplotlib.pyplot as plt\n",
21+
"import numpy as np\n",
22+
"\n",
23+
"REPO_ROOT = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()\n",
24+
"sys.path.append(str(REPO_ROOT))\n",
25+
"\n",
26+
"EXPERIMENT_DIR = REPO_ROOT / '.cache' / 'experiment'\n",
27+
"FINAL_TRAINING_DIR = REPO_ROOT / '.cache' / 'final_training'\n",
28+
"COLORS = {'RS': '#1f77b4', 'GA-STANDARD': '#ff7f0e', 'GA-MEMETIC': '#d62728', 'PSO': '#2ca02c'}\n",
29+
"\n",
30+
"def parse_experiment_name(exp_name):\n",
31+
" if '-' not in exp_name:\n",
32+
" return None, None\n",
33+
" parts = exp_name.split('-', 1)\n",
34+
" return parts[0].upper(), parts[1].upper()\n",
35+
"\n",
36+
"def load_experiment_summaries(exp_dir, filter_fn=None):\n",
37+
" data = []\n",
38+
" for run_dir in sorted(exp_dir.iterdir()):\n",
39+
" if not run_dir.is_dir() or not run_dir.name.startswith('run_'):\n",
40+
" continue\n",
41+
" summary_file = run_dir / 'summary.json'\n",
42+
" if summary_file.exists():\n",
43+
" with open(summary_file) as f:\n",
44+
" summary = json.load(f)\n",
45+
" if filter_fn is None or filter_fn(summary):\n",
46+
" data.append((run_dir.name, summary))\n",
47+
" return data\n",
48+
"\n",
49+
"print(f\"Repository root: {REPO_ROOT}\")\n"
50+
]
51+
},
52+
{
53+
"cell_type": "markdown",
54+
"metadata": {},
55+
"source": [
56+
"## Box Plots of Final Fitness"
57+
]
58+
},
59+
{
60+
"cell_type": "code",
61+
"execution_count": null,
62+
"metadata": {},
63+
"outputs": [],
64+
"source": [
65+
"hpo_grouped = defaultdict(lambda: defaultdict(list))\n",
66+
"\n",
67+
"for exp_dir in sorted(EXPERIMENT_DIR.iterdir()):\n",
68+
" if not exp_dir.is_dir():\n",
69+
" continue\n",
70+
" model, optimizer = parse_experiment_name(exp_dir.name)\n",
71+
" if not optimizer:\n",
72+
" continue\n",
73+
" for _, summary in load_experiment_summaries(exp_dir, lambda s: s.get('final_fitness') is not None):\n",
74+
" hpo_grouped[model][optimizer].append(summary['final_fitness'])\n",
75+
"\n",
76+
"print(f\"Loaded {sum(len(v) for d in hpo_grouped.values() for v in d.values())} HPO runs\\n\")\n",
77+
"for model in sorted(hpo_grouped.keys()):\n",
78+
" print(f\"{model}:\")\n",
79+
" for opt in sorted(hpo_grouped[model].keys()):\n",
80+
" scores = hpo_grouped[model][opt]\n",
81+
" print(f\" {opt}: {len(scores)} runs, mean={np.mean(scores):.4f}\")\n"
82+
]
83+
},
84+
{
85+
"cell_type": "code",
86+
"execution_count": null,
87+
"metadata": {},
88+
"outputs": [],
89+
"source": [
90+
"models = sorted(hpo_grouped.keys())\n",
91+
"fig, axes = plt.subplots(1, len(models), figsize=(5 * len(models), 5))\n",
92+
"if len(models) == 1:\n",
93+
" axes = [axes]\n",
94+
"\n",
95+
"for ax, model in zip(axes, models):\n",
96+
" optimizers = sorted(hpo_grouped[model].keys())\n",
97+
" data_to_plot = [hpo_grouped[model][opt] for opt in optimizers]\n",
98+
" all_values = [val for sublist in data_to_plot for val in sublist]\n",
99+
" \n",
100+
" if all_values:\n",
101+
" y_min, y_max = np.percentile(all_values, [2, 98])\n",
102+
" ax.set_ylim(y_min - (y_max - y_min) * 0.1, y_max + (y_max - y_min) * 0.1)\n",
103+
" \n",
104+
" bp = ax.boxplot(data_to_plot, tick_labels=optimizers, patch_artist=True)\n",
105+
" for patch in bp['boxes']:\n",
106+
" patch.set_facecolor('lightblue')\n",
107+
" \n",
108+
" ax.set_title(f'{model} - Final Fitness (HPO)', fontweight='bold')\n",
109+
" ax.set_xlabel('Optimizer')\n",
110+
" ax.set_ylabel('Composite Fitness')\n",
111+
" ax.grid(True, alpha=0.3)\n",
112+
"\n",
113+
"plt.tight_layout()\n",
114+
"plt.show()\n"
115+
]
116+
},
117+
{
118+
"cell_type": "markdown",
119+
"metadata": {},
120+
"source": [
121+
"## Test Set Results"
122+
]
123+
},
124+
{
125+
"cell_type": "code",
126+
"execution_count": null,
127+
"metadata": {},
128+
"outputs": [],
129+
"source": [
130+
"final_grouped = defaultdict(list)\n",
131+
"\n",
132+
"for exp_dir in sorted(FINAL_TRAINING_DIR.iterdir()):\n",
133+
" if not exp_dir.is_dir():\n",
134+
" continue\n",
135+
" model, optimizer = parse_experiment_name(exp_dir.name)\n",
136+
" if not optimizer:\n",
137+
" continue\n",
138+
" \n",
139+
" run_dirs = sorted([d for d in exp_dir.iterdir() if d.is_dir() and d.name.startswith('run_')])\n",
140+
" if run_dirs:\n",
141+
" summaries = load_experiment_summaries(run_dirs[-1].parent, \n",
142+
" lambda s: s.get('test_metrics', {}).get('composite_fitness') is not None)\n",
143+
" if summaries:\n",
144+
" _, summary = summaries[-1]\n",
145+
" test_metrics = summary['test_metrics']\n",
146+
" final_grouped[model].append({\n",
147+
" 'Optimizer': optimizer,\n",
148+
" 'Composite': test_metrics['composite_fitness'],\n",
149+
" 'Accuracy': test_metrics.get('accuracy'),\n",
150+
" 'F1': test_metrics.get('f1_score')\n",
151+
" })\n",
152+
"\n",
153+
"print(f\"Loaded {sum(len(v) for v in final_grouped.values())} final training results\\n\")\n",
154+
"for model in sorted(final_grouped.keys()):\n",
155+
" print(f\"{model}:\")\n",
156+
" for entry in final_grouped[model]:\n",
157+
" print(f\" {entry['Optimizer']}: composite={entry['Composite']:.4f}\")\n"
158+
]
159+
},
160+
{
161+
"cell_type": "code",
162+
"execution_count": null,
163+
"metadata": {},
164+
"outputs": [],
165+
"source": [
166+
"models = sorted(final_grouped.keys())\n",
167+
"fig, axes = plt.subplots(1, len(models), figsize=(5 * len(models), 5))\n",
168+
"if len(models) == 1:\n",
169+
" axes = [axes]\n",
170+
"\n",
171+
"for ax, model in zip(axes, models):\n",
172+
" entries = final_grouped[model]\n",
173+
" labels = [e['Optimizer'] for e in entries]\n",
174+
" values = [e['Composite'] for e in entries]\n",
175+
" bar_colors = [COLORS.get(opt, '#888888') for opt in labels]\n",
176+
" \n",
177+
" bars = ax.bar(labels, values, color=bar_colors, alpha=0.8, edgecolor='black')\n",
178+
" for bar, val in zip(bars, values):\n",
179+
" ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.01,\n",
180+
" f'{val:.4f}', ha='center', va='bottom', fontsize=9)\n",
181+
" \n",
182+
" ax.set_ylim(0, 1)\n",
183+
" ax.set_title(f'{model} - Test Performance', fontweight='bold')\n",
184+
" ax.set_xlabel('Optimizer')\n",
185+
" ax.set_ylabel('Composite Fitness')\n",
186+
" ax.grid(True, alpha=0.3, axis='y')\n",
187+
"\n",
188+
"plt.tight_layout()\n",
189+
"plt.show()\n"
190+
]
191+
},
192+
{
193+
"cell_type": "markdown",
194+
"metadata": {},
195+
"source": [
196+
"## Convergence Plots"
197+
]
198+
},
199+
{
200+
"cell_type": "code",
201+
"execution_count": null,
202+
"metadata": {},
203+
"outputs": [],
204+
"source": [
205+
"convergence_data = defaultdict(lambda: defaultdict(list))\n",
206+
"\n",
207+
"for exp_dir in sorted(EXPERIMENT_DIR.iterdir()):\n",
208+
" if not exp_dir.is_dir():\n",
209+
" continue\n",
210+
" model, optimizer = parse_experiment_name(exp_dir.name)\n",
211+
" if not optimizer:\n",
212+
" continue\n",
213+
" \n",
214+
" for _, summary in load_experiment_summaries(exp_dir, lambda s: 'convergence_trace' in s and isinstance(s.get('convergence_trace'), dict)):\n",
215+
" trace = summary['convergence_trace']\n",
216+
" best_fitness = trace.get('best_fitness', [])\n",
217+
" if best_fitness:\n",
218+
" convergence_data[model][optimizer].append(best_fitness)\n",
219+
"\n",
220+
"print(f\"Loaded convergence data for {len(convergence_data)} models\")\n",
221+
"for model in sorted(convergence_data.keys()):\n",
222+
" print(f\"{model}: {sum(len(v) for v in convergence_data[model].values())} runs\")\n"
223+
]
224+
},
225+
{
226+
"cell_type": "code",
227+
"execution_count": null,
228+
"metadata": {},
229+
"outputs": [],
230+
"source": [
231+
"models = sorted(convergence_data.keys())\n",
232+
"fig, axes = plt.subplots(1, len(models), figsize=(6 * len(models), 5))\n",
233+
"if len(models) == 1:\n",
234+
" axes = [axes]\n",
235+
"\n",
236+
"for ax, model in zip(axes, models):\n",
237+
" for optimizer in sorted(convergence_data[model].keys()):\n",
238+
" runs = convergence_data[model][optimizer]\n",
239+
" if not runs:\n",
240+
" continue\n",
241+
" \n",
242+
" max_len = max(len(r) for r in runs)\n",
243+
" padded = [r + [r[-1]] * (max_len - len(r)) if len(r) < max_len else r for r in runs]\n",
244+
" runs_array = np.array(padded)\n",
245+
" \n",
246+
" mean_curve = runs_array.mean(axis=0)\n",
247+
" std_curve = runs_array.std(axis=0)\n",
248+
" generations = np.arange(len(mean_curve))\n",
249+
" color = COLORS.get(optimizer, '#888888')\n",
250+
" \n",
251+
" ax.plot(generations, mean_curve, label=optimizer, color=color, linewidth=2)\n",
252+
" ax.fill_between(generations, mean_curve - std_curve, mean_curve + std_curve, \n",
253+
" color=color, alpha=0.2)\n",
254+
" \n",
255+
" ax.set_title(f'{model} - Convergence', fontweight='bold')\n",
256+
" ax.set_xlabel('Generation')\n",
257+
" ax.set_ylabel('Best Fitness')\n",
258+
" ax.legend()\n",
259+
" ax.grid(True, alpha=0.3)\n",
260+
"\n",
261+
"plt.tight_layout()\n",
262+
"plt.show()\n"
263+
]
264+
},
265+
{
266+
"cell_type": "markdown",
267+
"metadata": {},
268+
"source": [
269+
"## Wilcoxon Tests"
270+
]
271+
},
272+
{
273+
"cell_type": "code",
274+
"execution_count": null,
275+
"metadata": {},
276+
"outputs": [],
277+
"source": [
278+
"from scipy.stats import wilcoxon\n",
279+
"from itertools import combinations\n",
280+
"\n",
281+
"for model in sorted(hpo_grouped.keys()):\n",
282+
" print(f\"\\n--- {model} ---\")\n",
283+
" optimizers = hpo_grouped[model]\n",
284+
" \n",
285+
" optimizer_names = sorted(optimizers.keys())\n",
286+
" optimizer_scores = {name: optimizers[name] for name in optimizer_names}\n",
287+
" \n",
288+
" for opt1, opt2 in combinations(optimizer_names, 2):\n",
289+
" scores1 = optimizer_scores[opt1]\n",
290+
" scores2 = optimizer_scores[opt2]\n",
291+
" \n",
292+
" if len(scores1) == len(scores2) and len(scores1) > 0:\n",
293+
" _, p = wilcoxon(scores1, scores2)\n",
294+
" sig = ' (Significant)' if p < 0.05 else ''\n",
295+
" print(f\"{opt1} vs {opt2}: p-value = {p:.5f}{sig}\")\n",
296+
" else:\n",
297+
" print(f\"{opt1} vs {opt2}: Sample size mismatch ({len(scores1)} vs {len(scores2)})\")\n"
298+
]
299+
}
300+
],
301+
"metadata": {
302+
"kernelspec": {
303+
"display_name": ".venv",
304+
"language": "python",
305+
"name": "python3"
306+
},
307+
"language_info": {
308+
"codemirror_mode": {
309+
"name": "ipython",
310+
"version": 3
311+
},
312+
"file_extension": ".py",
313+
"mimetype": "text/x-python",
314+
"name": "python",
315+
"nbconvert_exporter": "python",
316+
"pygments_lexer": "ipython3",
317+
"version": "3.13.7"
318+
}
319+
},
320+
"nbformat": 4,
321+
"nbformat_minor": 2
322+
}

0 commit comments

Comments
 (0)