diff --git a/.gitignore b/.gitignore
index a781e8f..2b043c0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@ wheels/
# Virtual environments
.venv
+.ruff-venv
# Misc
.DS_Store
@@ -16,3 +17,11 @@ simulation_results/
# Result files
*.json
+
+# Jupyter notebooks
+*.ipynb
+
+# Syncthing temporary files
+.syncthing.*.tmp
+
+.ruff-venv
diff --git a/players/player_10/.syncthing.__init__.py.tmp b/players/player_10/.syncthing.__init__.py.tmp
new file mode 100644
index 0000000..a2d1396
Binary files /dev/null and b/players/player_10/.syncthing.__init__.py.tmp differ
diff --git a/players/player_10/Analyse_results.ipynb b/players/player_10/Analyse_results.ipynb
new file mode 100644
index 0000000..c9deba4
--- /dev/null
+++ b/players/player_10/Analyse_results.ipynb
@@ -0,0 +1,891 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "ac93bef8-0df4-4f98-bc7e-f1fc50bc29cb",
+ "metadata": {},
+ "source": [
+ "\"\"\"\n",
+ "Results analysis and visualization tools for Monte Carlo simulations.\n",
+ "\n",
+ "This module provides tools to analyze simulation results and create visualizations\n",
+ "to understand the performance of different Player10 configurations.\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "0264f566-fb53-4e69-974b-d6cd7313bc9d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Matplotlib is building the font cache; this may take a moment.\n"
+ ]
+ },
+ {
+ "ename": "ImportError",
+ "evalue": "attempted relative import with no known parent package",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[1], line 15\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mseaborn\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msns\u001b[39;00m\n\u001b[0;32m---> 15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msim\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmonte_carlo\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m MonteCarloSimulator, SimulationResult\n",
+ "\u001b[0;31mImportError\u001b[0m: attempted relative import with no known parent package"
+ ]
+ }
+ ],
+ "source": [
+ "from __future__ import annotations\n",
+ "\n",
+ "import argparse\n",
+ "import os\n",
+ "from pathlib import Path\n",
+ "from typing import Any\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import seaborn as sns\n",
+ "\n",
+ "from ..sim.monte_carlo import MonteCarloSimulator, SimulationResult"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a5efd409-016b-4a02-ab5d-3a507b905b46",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ----------------------------\n",
+ "# Analyzer\n",
+ "# ----------------------------\n",
+ "\n",
+ "class ResultsAnalyzer:\n",
+ " \"\"\"Analyzer for Monte Carlo simulation results (enhanced).\"\"\"\n",
+ "\n",
+ " def __init__(self, results_file: str | None = None):\n",
+ " \"\"\"\n",
+ " Initialize the analyzer.\n",
+ "\n",
+ " Args:\n",
+ " results_file: Path to results JSON file to load\n",
+ " \"\"\"\n",
+ " self.simulator = MonteCarloSimulator()\n",
+ " self.results: list[SimulationResult] = []\n",
+ " self.metadata: dict[str, Any] = {}\n",
+ "\n",
+ " if results_file:\n",
+ " self.load_results(results_file)\n",
+ "\n",
+ " # ---------- IO ----------\n",
+ "\n",
+ " def load_results(self, filename: str):\n",
+ " \"\"\"Load results from a JSON file (produced by MonteCarloSimulator.save_results).\"\"\"\n",
+ " self.results = self.simulator.load_results(filename)\n",
+ " self.metadata = self.simulator.last_metadata\n",
+ " print(f'Loaded {len(self.results)} simulation results')\n",
+ "\n",
+ " # ---------- DataFrames ----------\n",
+ "\n",
+ " def create_dataframe(self) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " Convert results to a pandas DataFrame for run-level analysis.\n",
+ "\n",
+ " Includes:\n",
+ " - All important config knobs (altruism prob, tau, epsilons, weights)\n",
+ " - Run outcomes (scores, lengths, pauses, etc.)\n",
+ " - Shared component breakdown (prefixed with shared_)\n",
+ " - Derived feature: length_utilization\n",
+ " \"\"\"\n",
+ " data: list[dict[str, Any]] = []\n",
+ "\n",
+ " for r in self.results:\n",
+ " row = {\n",
+ " # core config knobs\n",
+ " 'altruism_prob': r.config.altruism_prob,\n",
+ " 'tau_margin': r.config.tau_margin,\n",
+ " 'epsilon_fresh': r.config.epsilon_fresh,\n",
+ " 'epsilon_mono': r.config.epsilon_mono,\n",
+ " 'subjects': r.config.subjects,\n",
+ " 'memory_size': r.config.memory_size,\n",
+ " 'conversation_length_cfg': r.config.conversation_length,\n",
+ " 'seed': r.config.seed,\n",
+ "\n",
+ " # weights / algo params\n",
+ " 'min_samples_pid': r.config.min_samples_pid,\n",
+ " 'ewma_alpha': r.config.ewma_alpha,\n",
+ " 'importance_weight': r.config.importance_weight,\n",
+ " 'coherence_weight': r.config.coherence_weight,\n",
+ " 'freshness_weight': r.config.freshness_weight,\n",
+ " 'monotony_weight': r.config.monotony_weight,\n",
+ "\n",
+ " # run-level outcomes\n",
+ " 'total_score': r.total_score,\n",
+ " 'player10_score': r.player10_total_mean,\n",
+ " 'player10_individual': r.player10_individual_mean,\n",
+ " 'player10_rank': r.player10_rank_mean,\n",
+ " 'player10_gap_to_best': r.player10_gap_to_best,\n",
+ " 'player10_instances': r.player10_instances,\n",
+ " 'best_total_score': r.best_total_score,\n",
+ " 'conversation_length': r.conversation_length,\n",
+ " 'early_termination': float(r.early_termination),\n",
+ " 'pause_count': r.pause_count,\n",
+ " 'unique_items_used': r.unique_items_used,\n",
+ " 'execution_time': r.execution_time,\n",
+ " }\n",
+ "\n",
+ " # Include shared score components (flatten)\n",
+ " for comp, val in (r.score_breakdown or {}).items():\n",
+ " if comp == 'total':\n",
+ " continue\n",
+ " row[f'shared_{comp}'] = val\n",
+ "\n",
+ " data.append(row)\n",
+ "\n",
+ " df = pd.DataFrame(data)\n",
+ "\n",
+ " # Derived features\n",
+ " if 'conversation_length_cfg' in df and 'conversation_length' in df:\n",
+ " with np.errstate(divide='ignore', invalid='ignore'):\n",
+ " df['length_utilization'] = df['conversation_length'] / df['conversation_length_cfg']\n",
+ "\n",
+ " return df\n",
+ "\n",
+ " def create_player_long(self) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " Explode SimulationResult.player_metrics into a long-form dataframe.\n",
+ "\n",
+ " Columns: seed, config knobs, label, class_name, alias, total, shared, individual, rank\n",
+ " Useful for rank distributions and per-player analyses.\n",
+ " \"\"\"\n",
+ " rows: list[dict[str, Any]] = []\n",
+ " for r in self.results:\n",
+ " cfg = {\n",
+ " 'altruism_prob': r.config.altruism_prob,\n",
+ " 'tau_margin': r.config.tau_margin,\n",
+ " 'epsilon_fresh': r.config.epsilon_fresh,\n",
+ " 'epsilon_mono': r.config.epsilon_mono,\n",
+ " 'seed': r.config.seed,\n",
+ " }\n",
+ " for label, m in (r.player_metrics or {}).items():\n",
+ " rows.append({\n",
+ " **cfg,\n",
+ " 'label': label,\n",
+ " 'class_name': m.get('class_name'),\n",
+ " 'alias': m.get('alias'),\n",
+ " 'total': m.get('total'),\n",
+ " 'shared': m.get('shared'),\n",
+ " 'individual': m.get('individual'),\n",
+ " 'rank': m.get('rank'),\n",
+ " })\n",
+ " return pd.DataFrame(rows)\n",
+ "\n",
+ " # ---------- Statistics helpers ----------\n",
+ "\n",
+ " def bootstrap_ci(\n",
+ " self,\n",
+ " df: pd.DataFrame,\n",
+ " group_cols: list[str],\n",
+ " metric: str,\n",
+ " B: int = 1000,\n",
+ " ci: float = 0.95\n",
+ " ) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " Bootstrapped mean & CI for (group_cols, metric).\n",
+ " Returns columns: group_cols..., mean, ci_low, ci_high, n\n",
+ " \"\"\"\n",
+ " out = []\n",
+ " q_lo, q_hi = (1 - ci) / 2, 1 - (1 - ci) / 2\n",
+ " for key, g in df.groupby(group_cols):\n",
+ " values = g[metric].dropna().to_numpy()\n",
+ " if len(values) == 0:\n",
+ " continue\n",
+ " boot = []\n",
+ " for _ in range(B):\n",
+ " sample = np.random.choice(values, size=len(values), replace=True)\n",
+ " boot.append(sample.mean())\n",
+ " lo, hi = np.quantile(boot, [q_lo, q_hi])\n",
+ " row = {'mean': values.mean(), 'ci_low': float(lo), 'ci_high': float(hi), 'n': len(values)}\n",
+ " # Attach group key(s)\n",
+ " if isinstance(key, tuple):\n",
+ " for c, v in zip(group_cols, key, strict=False):\n",
+ " row[c] = v\n",
+ " else:\n",
+ " row[group_cols[0]] = key\n",
+ " out.append(row)\n",
+ " cols = group_cols + ['mean', 'ci_low', 'ci_high', 'n']\n",
+ " return pd.DataFrame(out)[cols]\n",
+ "\n",
+ " def pairwise_altruism_deltas(self, metric: str = 'total_score') -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " Pairwise mean delta & Cohen's d between altruism levels for a metric.\n",
+ " Returns: a, b, delta_mean, cohens_d, nx, ny\n",
+ " \"\"\"\n",
+ " df = self.create_dataframe()\n",
+ " levels = sorted(df['altruism_prob'].unique())\n",
+ " rows = []\n",
+ " for i, a in enumerate(levels):\n",
+ " for b in levels[i + 1:]:\n",
+ " x = df[df.altruism_prob == a][metric].dropna()\n",
+ " y = df[df.altruism_prob == b][metric].dropna()\n",
+ " if len(x) and len(y):\n",
+ " delta = y.mean() - x.mean()\n",
+ " pooled = np.sqrt(\n",
+ " ((x.var(ddof=1) * (len(x) - 1)) + (y.var(ddof=1) * (len(y) - 1)))\n",
+ " / (len(x) + len(y) - 2)\n",
+ " )\n",
+ " d = delta / pooled if pooled > 0 else np.nan\n",
+ " rows.append({\n",
+ " 'a': a, 'b': b,\n",
+ " 'delta_mean': float(delta),\n",
+ " 'cohens_d': float(d),\n",
+ " 'nx': int(len(x)), 'ny': int(len(y)),\n",
+ " })\n",
+ " return pd.DataFrame(rows)\n",
+ "\n",
+ " # ---------- Plots (existing + new) ----------\n",
+ "\n",
+ " def plot_altruism_comparison(self, save_path: str | None = None):\n",
+ " \"\"\"Create plots comparing different altruism probabilities (existing, kept).\"\"\"\n",
+ " if not self.results:\n",
+ " print('No results loaded. Please load results first.')\n",
+ " return\n",
+ "\n",
+ " df = self.create_dataframe()\n",
+ "\n",
+ " altruism_groups = (\n",
+ " df.groupby('altruism_prob')\n",
+ " .agg(\n",
+ " {\n",
+ " 'total_score': ['mean', 'std', 'count'],\n",
+ " 'player10_score': ['mean', 'std'],\n",
+ " 'conversation_length': 'mean',\n",
+ " 'early_termination': 'mean',\n",
+ " 'pause_count': 'mean',\n",
+ " }\n",
+ " )\n",
+ " .round(3)\n",
+ " )\n",
+ "\n",
+ " fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n",
+ " fig.suptitle('Player10 Altruism Probability Comparison', fontsize=16)\n",
+ "\n",
+ " # Plot 1: Total Score vs Altruism Probability\n",
+ " ax1 = axes[0, 0]\n",
+ " altruism_probs = altruism_groups.index\n",
+ " mean_scores = altruism_groups[('total_score', 'mean')]\n",
+ " std_scores = altruism_groups[('total_score', 'std')]\n",
+ " ax1.errorbar(altruism_probs, mean_scores, yerr=std_scores, marker='o', capsize=5, capthick=2)\n",
+ " ax1.set_xlabel('Altruism Probability')\n",
+ " ax1.set_ylabel('Total Score')\n",
+ " ax1.set_title('Total Score vs Altruism Probability')\n",
+ " ax1.grid(True, alpha=0.3)\n",
+ "\n",
+ " # Plot 2: Player10 Score vs Altruism Probability\n",
+ " ax2 = axes[0, 1]\n",
+ " mean_p10_scores = altruism_groups[('player10_score', 'mean')]\n",
+ " std_p10_scores = altruism_groups[('player10_score', 'std')]\n",
+ " ax2.errorbar(\n",
+ " altruism_probs, mean_p10_scores, yerr=std_p10_scores,\n",
+ " marker='s', capsize=5, capthick=2, color='orange'\n",
+ " )\n",
+ " ax2.set_xlabel('Altruism Probability')\n",
+ " ax2.set_ylabel('Player10 Score')\n",
+ " ax2.set_title('Player10 Individual Score vs Altruism Probability')\n",
+ " ax2.grid(True, alpha=0.3)\n",
+ "\n",
+ " # Plot 3: Conversation Length vs Altruism Probability\n",
+ " ax3 = axes[1, 0]\n",
+ " conv_lengths = altruism_groups[('conversation_length', 'mean')]\n",
+ " ax3.plot(altruism_probs, conv_lengths, marker='^')\n",
+ " ax3.set_xlabel('Altruism Probability')\n",
+ " ax3.set_ylabel('Average Conversation Length')\n",
+ " ax3.set_title('Conversation Length vs Altruism Probability')\n",
+ " ax3.grid(True, alpha=0.3)\n",
+ "\n",
+ " # Plot 4: Early Termination Rate vs Altruism Probability\n",
+ " ax4 = axes[1, 1]\n",
+ " early_term_rates = altruism_groups[('early_termination', 'mean')]\n",
+ " ax4.plot(altruism_probs, early_term_rates, marker='d')\n",
+ " ax4.set_xlabel('Altruism Probability')\n",
+ " ax4.set_ylabel('Early Termination Rate')\n",
+ " ax4.set_title('Early Termination Rate vs Altruism Probability')\n",
+ " ax4.grid(True, alpha=0.3)\n",
+ "\n",
+ " plt.tight_layout()\n",
+ " if save_path:\n",
+ " plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Plot saved to: {save_path}')\n",
+ " plt.show()\n",
+ "\n",
+ " def plot_parameter_heatmap(\n",
+ " self, param1: str, param2: str, metric: str = 'total_score', save_path: str | None = None\n",
+ " ):\n",
+ " \"\"\"Create a heatmap showing the interaction between two parameters.\"\"\"\n",
+ " if not self.results:\n",
+ " print('No results loaded. Please load results first.')\n",
+ " return\n",
+ "\n",
+ " df = self.create_dataframe()\n",
+ " pivot = df.groupby([param1, param2])[metric].mean().unstack()\n",
+ "\n",
+ " plt.figure(figsize=(10, 8))\n",
+ " sns.heatmap(pivot, annot=True, fmt='.2f', cmap='viridis')\n",
+ " plt.title(f'{metric.title()} Heatmap: {param1} vs {param2}')\n",
+ " plt.xlabel(param2.replace('_', ' ').title())\n",
+ " plt.ylabel(param1.replace('_', ' ').title())\n",
+ "\n",
+ " if save_path:\n",
+ " plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Heatmap saved to: {save_path}')\n",
+ " plt.show()\n",
+ "\n",
+ " def plot_score_distributions(self, save_path: str | None = None):\n",
+ " \"\"\"Plot score distributions for different altruism probabilities.\"\"\"\n",
+ " if not self.results:\n",
+ " print('No results loaded. Please load results first.')\n",
+ " return\n",
+ "\n",
+ " df = self.create_dataframe()\n",
+ " altruism_probs = sorted(df['altruism_prob'].unique())\n",
+ "\n",
+ " fig, axes = plt.subplots(1, 2, figsize=(15, 6))\n",
+ " fig.suptitle('Score Distributions by Altruism Probability', fontsize=16)\n",
+ "\n",
+ " # Plot 1: Total Score Distributions\n",
+ " ax1 = axes[0]\n",
+ " for prob in altruism_probs:\n",
+ " scores = df[df['altruism_prob'] == prob]['total_score']\n",
+ " ax1.hist(scores, alpha=0.6, label=f'Altruism: {prob:.1f}', bins=20)\n",
+ " ax1.set_xlabel('Total Score'); ax1.set_ylabel('Frequency')\n",
+ " ax1.set_title('Total Score Distributions'); ax1.legend(); ax1.grid(True, alpha=0.3)\n",
+ "\n",
+ " # Plot 2: Player10 Score Distributions\n",
+ " ax2 = axes[1]\n",
+ " for prob in altruism_probs:\n",
+ " scores = df[df['altruism_prob'] == prob]['player10_score']\n",
+ " ax2.hist(scores, alpha=0.6, label=f'Altruism: {prob:.1f}', bins=20)\n",
+ " ax2.set_xlabel('Player10 Score'); ax2.set_ylabel('Frequency')\n",
+ " ax2.set_title('Player10 Individual Score Distributions'); ax2.legend(); ax2.grid(True, alpha=0.3)\n",
+ "\n",
+ " plt.tight_layout()\n",
+ " if save_path:\n",
+ " plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Distributions plot saved to: {save_path}')\n",
+ " plt.show()\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "45cbdb6f-ded2-42f7-8232-ee810b7f8a8a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ----- NEW PLOTS -----\n",
+ "\n",
+ " def plot_component_stack(self, save_path: str | None = None):\n",
+ " \"\"\"Stacked bars of shared component means vs altruism_prob.\"\"\"\n",
+ " df = self.create_dataframe()\n",
+ " keep = ['shared_importance', 'shared_coherence', 'shared_freshness', 'shared_nonmonotonousness']\n",
+ " have = [c for c in keep if c in df.columns]\n",
+ " if not have:\n",
+ " print('No shared component breakdown in results.')\n",
+ " return\n",
+ " g = df.groupby('altruism_prob')[have].mean().reset_index().sort_values('altruism_prob')\n",
+ " ax = g.set_index('altruism_prob')[have].plot(kind='bar', stacked=True, figsize=(12, 6))\n",
+ " ax.set_ylabel('Mean shared component score'); ax.set_xlabel('Altruism probability')\n",
+ " ax.set_title('Shared score component breakdown vs altruism'); ax.legend(title='Component')\n",
+ " ax.grid(True, axis='y', alpha=0.3); plt.tight_layout()\n",
+ " if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')\n",
+ " plt.show()\n",
+ "\n",
+ " def plot_pareto_tradeoff(self, save_path: str | None = None):\n",
+ " \"\"\"Scatter of mean Player10 individual vs mean total score, colored by altruism.\"\"\"\n",
+ " df = self.create_dataframe()\n",
+ " agg = (df.groupby(['altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono'])\n",
+ " .agg({'total_score': 'mean', 'player10_individual': 'mean', 'early_termination': 'mean'})\n",
+ " .reset_index())\n",
+ " plt.figure(figsize=(9, 7))\n",
+ " s = plt.scatter(\n",
+ " agg['player10_individual'], agg['total_score'],\n",
+ " c=agg['altruism_prob'], cmap='viridis', s=60, alpha=0.85\n",
+ " )\n",
+ " plt.colorbar(s, label='altruism_prob')\n",
+ " # Annotate \"risky\" configs\n",
+ " for _, r in agg.iterrows():\n",
+ " if r['early_termination'] > 0.30:\n",
+ " plt.annotate('ET>0.3', (r['player10_individual'], r['total_score']), fontsize=8)\n",
+ " plt.xlabel('Player10 individual (mean)')\n",
+ " plt.ylabel('Total score (mean)')\n",
+ " plt.title('Pareto trade-off: Player10 individual vs Total')\n",
+ " plt.grid(True, alpha=0.3); plt.tight_layout()\n",
+ " if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')\n",
+ " plt.show()\n",
+ "\n",
+ " def plot_rank_distribution(self, save_path: str | None = None):\n",
+ " \"\"\"Violin plots of Player10 rank across seeds vs altruism.\"\"\"\n",
+ " dfp = self.create_player_long()\n",
+ " if dfp.empty or 'rank' not in dfp:\n",
+ " print('No per-player metrics available.')\n",
+ " return\n",
+ " dfp_p10 = dfp[dfp['class_name'] == 'Player10']\n",
+ " if dfp_p10.empty:\n",
+ " print('No Player10 entries in per-player metrics.')\n",
+ " return\n",
+ " plt.figure(figsize=(10, 5))\n",
+ " sns.violinplot(data=dfp_p10, x='altruism_prob', y='rank', inner='quartile', cut=0)\n",
+ " plt.gca().invert_yaxis() # rank 1 is best\n",
+ " plt.title('Player10 rank distribution across seeds'); plt.xlabel('Altruism probability')\n",
+ " plt.ylabel('Rank (lower is better)'); plt.tight_layout()\n",
+ " if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')\n",
+ " plt.show()\n",
+ "\n",
+ " def plot_seed_stability(self, metric: str = 'total_score', save_path: str | None = None):\n",
+ " \"\"\"Cumulative mean vs number of simulations (sorted by seed) to show stabilization.\"\"\"\n",
+ " df = self.create_dataframe().sort_values('seed')\n",
+ " curves: list[tuple[float, np.ndarray]] = []\n",
+ " for p, g in df.groupby('altruism_prob'):\n",
+ " means = g[metric].expanding().mean().values\n",
+ " curves.append((p, means))\n",
+ "\n",
+ " plt.figure(figsize=(10, 6))\n",
+ " for p, means in curves:\n",
+ " plt.plot(range(1, len(means) + 1), means, label=f'p={p}')\n",
+ " plt.xlabel('Number of simulations (cumulative)'); plt.ylabel(f'Cumulative mean {metric}')\n",
+ " plt.title('Seed stability of the estimate'); plt.legend(title='altruism_prob')\n",
+ " plt.grid(True, alpha=0.3); plt.tight_layout()\n",
+ " if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')\n",
+ " plt.show()\n",
+ "\n",
+ " def plot_correlation_heatmap(self, save_path: str | None = None):\n",
+ " \"\"\"Correlation among knobs and outcomes.\"\"\"\n",
+ " df = self.create_dataframe()\n",
+ " cols = [\n",
+ " 'altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono',\n",
+ " 'importance_weight', 'coherence_weight', 'freshness_weight', 'monotony_weight',\n",
+ " 'total_score', 'player10_score', 'early_termination', 'pause_count',\n",
+ " 'unique_items_used', 'length_utilization'\n",
+ " ]\n",
+ " cols = [c for c in cols if c in df.columns]\n",
+ " corr = df[cols].corr(numeric_only=True)\n",
+ " plt.figure(figsize=(10, 8))\n",
+ " sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm', center=0)\n",
+ " plt.title('Correlation matrix: knobs vs outcomes'); plt.tight_layout()\n",
+ " if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')\n",
+ " plt.show()\n",
+ "\n",
+ " def plot_multi_heatmaps(\n",
+ " self,\n",
+ " fixed: str = 'altruism_prob',\n",
+ " metric: str = 'total_score',\n",
+ " cols: tuple[str, str] = ('tau_margin', 'epsilon_fresh'),\n",
+ " save_path: str | None = None\n",
+ " ):\n",
+ " \"\"\"Small-multiple heatmaps for metric by two parameters, faceted by a fixed param.\"\"\"\n",
+ " df = self.create_dataframe()\n",
+ " vals = sorted(df[fixed].unique())\n",
+ " n = len(vals)\n",
+ " fig, axes = plt.subplots(1, n, figsize=(6 * n, 5), sharey=True)\n",
+ " if n == 1:\n",
+ " axes = [axes]\n",
+ " for ax, v in zip(axes, vals, strict=False):\n",
+ " sub = df[df[fixed] == v]\n",
+ " if sub.empty:\n",
+ " ax.set_visible(False)\n",
+ " continue\n",
+ " pivot = sub.groupby(list(cols))[metric].mean().unstack()\n",
+ " sns.heatmap(pivot, ax=ax, annot=True, fmt='.2f', cmap='viridis')\n",
+ " ax.set_title(f'{metric} | {fixed}={v}')\n",
+ " ax.set_xlabel(cols[1]); ax.set_ylabel(cols[0])\n",
+ " plt.tight_layout()\n",
+ " if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')\n",
+ " plt.show()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "78d49284-708b-4c7a-a0c9-b75cb05453a5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ---------- Modeling (optional) ----------\n",
+ "\n",
+ " def run_ols(self, metric: str = 'total_score'):\n",
+ " \"\"\"\n",
+ " OLS regression of metric on config knobs (robust SE). Requires statsmodels.\n",
+ " Returns the fitted model.\n",
+ " \"\"\"\n",
+ " try:\n",
+ " import statsmodels.api as sm\n",
+ " except ImportError:\n",
+ " print(\"statsmodels not installed. `pip install statsmodels` to use run_ols().\")\n",
+ " return None\n",
+ "\n",
+ " df = self.create_dataframe().dropna(subset=[metric])\n",
+ " X_cols = [\n",
+ " 'altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono',\n",
+ " 'importance_weight', 'coherence_weight', 'freshness_weight', 'monotony_weight'\n",
+ " ]\n",
+ " X_cols = [c for c in X_cols if c in df.columns]\n",
+ " X = df[X_cols].copy()\n",
+ " X = sm.add_constant(X)\n",
+ " y = df[metric]\n",
+ " model = sm.OLS(y, X).fit(cov_type='HC3') # robust SE\n",
+ " print(model.summary())\n",
+ " return model\n",
+ "\n",
+ " def run_logistic_early_term(self):\n",
+ " \"\"\"\n",
+ " Logistic regression predicting early termination. Requires statsmodels.\n",
+ " Returns the fitted model.\n",
+ " \"\"\"\n",
+ " try:\n",
+ " import statsmodels.api as sm\n",
+ " except ImportError:\n",
+ " print(\"statsmodels not installed. `pip install statsmodels` to use run_logistic_early_term().\")\n",
+ " return None\n",
+ "\n",
+ " df = self.create_dataframe().dropna(subset=['early_termination'])\n",
+ " X_cols = [\n",
+ " 'altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono',\n",
+ " 'pause_count', 'unique_items_used', 'conversation_length_cfg'\n",
+ " ]\n",
+ " X_cols = [c for c in X_cols if c in df.columns]\n",
+ " X = df[X_cols].copy()\n",
+ " X = sm.add_constant(X)\n",
+ " y = df['early_termination'].astype(int)\n",
+ " model = sm.Logit(y, X).fit(disp=False)\n",
+ " print(model.summary())\n",
+ " return model\n",
+ "\n",
+ " # ---------- Config search ----------\n",
+ "\n",
+ " def best_configs(\n",
+ " self,\n",
+ " objective: str = 'total_score',\n",
+ " constraints: dict[str, tuple[float | None, float | None]] | None = None,\n",
+ " top_k: int = 10\n",
+ " ) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " Find top configs by objective subject to optional constraints.\n",
+ " constraints example: {'early_termination': (None, 0.2)} # <= 0.2\n",
+ " \"\"\"\n",
+ " df = self.create_dataframe()\n",
+ " agg = (\n",
+ " df.groupby(['altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono'])\n",
+ " .agg({\n",
+ " objective: 'mean',\n",
+ " 'early_termination': 'mean',\n",
+ " 'player10_individual': 'mean',\n",
+ " 'total_score': 'mean'\n",
+ " })\n",
+ " .reset_index()\n",
+ " )\n",
+ " if constraints:\n",
+ " mask = pd.Series(True, index=agg.index)\n",
+ " for col, (lo, hi) in constraints.items():\n",
+ " if lo is not None:\n",
+ " mask &= agg[col] >= lo\n",
+ " if hi is not None:\n",
+ " mask &= agg[col] <= hi\n",
+ " agg = agg[mask]\n",
+ " return agg.sort_values(objective, ascending=False).head(top_k)\n",
+ "\n",
+ " # ---------- Quick report ----------\n",
+ "\n",
+ " def save_quick_report(self, out_dir: str = 'report_out'):\n",
+ " \"\"\"Save a set of figures and a short markdown summary to a directory.\"\"\"\n",
+ " os.makedirs(out_dir, exist_ok=True)\n",
+ "\n",
+ " # figures\n",
+ " self.plot_altruism_comparison(f'{out_dir}/altruism_comparison.png')\n",
+ " self.plot_component_stack(f'{out_dir}/component_stack.png')\n",
+ " self.plot_pareto_tradeoff(f'{out_dir}/pareto.png')\n",
+ " self.plot_rank_distribution(f'{out_dir}/rank_violin.png')\n",
+ " self.plot_seed_stability(save_path=f'{out_dir}/seed_stability.png')\n",
+ " self.plot_correlation_heatmap(f'{out_dir}/corr.png')\n",
+ " self.plot_multi_heatmaps(save_path=f'{out_dir}/multi_heatmaps.png')\n",
+ "\n",
+ " # analysis text\n",
+ " df = self.create_dataframe()\n",
+ " lines = [\n",
+ " '# Simulation Summary',\n",
+ " '',\n",
+ " f'- Total sims: {len(df)}',\n",
+ " f'- Unique configs: {df.groupby([\"altruism_prob\",\"tau_margin\",\"epsilon_fresh\",\"epsilon_mono\"]).ngroups}',\n",
+ " f'- Overall total mean ± std: {df[\"total_score\"].mean():.2f} ± {df[\"total_score\"].std():.2f}',\n",
+ " f'- Early termination rate: {df[\"early_termination\"].mean():.2f}',\n",
+ " ]\n",
+ " (Path(out_dir) / 'SUMMARY.md').write_text('\\n'.join(lines), encoding='utf-8')\n",
+ " print(f'Report written to {out_dir}/')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "25c5656e-9904-4a41-8ed3-585695394245",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ " # ---------- Text summary (existing, kept with minor tweaks) ----------\n",
+ "\n",
+ " def print_detailed_analysis(self):\n",
+ " \"\"\"Print detailed analysis of the results.\"\"\"\n",
+ " if not self.results:\n",
+ " print('No results loaded. Please load results first.')\n",
+ " return\n",
+ "\n",
+ " df = self.create_dataframe()\n",
+ "\n",
+ " print('=== DETAILED ANALYSIS ===')\n",
+ " print(f'Total simulations: {len(df)}')\n",
+ " print(\n",
+ " f'Unique configurations: {df.groupby([\"altruism_prob\", \"tau_margin\", \"epsilon_fresh\", \"epsilon_mono\"]).ngroups}'\n",
+ " )\n",
+ "\n",
+ " # Overall statistics\n",
+ " print('\\n=== OVERALL STATISTICS ===')\n",
+ " print(f'Total Score - Mean: {df[\"total_score\"].mean():.2f}, Std: {df[\"total_score\"].std():.2f}')\n",
+ " print(f'Player10 Score - Mean: {df[\"player10_score\"].mean():.2f}, Std: {df[\"player10_score\"].std():.2f}')\n",
+ " if 'player10_individual' in df:\n",
+ " print(\n",
+ " f'Player10 Individual - Mean: {df[\"player10_individual\"].mean():.2f}, '\n",
+ " f'Std: {df[\"player10_individual\"].std():.2f}'\n",
+ " )\n",
+ " if 'player10_rank' in df:\n",
+ " print(\n",
+ " f'Player10 Rank - Mean: {df[\"player10_rank\"].mean():.2f}, '\n",
+ " f'Std: {df[\"player10_rank\"].std():.2f}'\n",
+ " )\n",
+ " print(\n",
+ " f'Conversation Length - Mean: {df[\"conversation_length\"].mean():.1f}, '\n",
+ " f'Std: {df[\"conversation_length\"].std():.1f}'\n",
+ " )\n",
+ " print(f'Early Termination Rate: {df[\"early_termination\"].mean():.2f}')\n",
+ "\n",
+ " # Best configurations\n",
+ " print('\\n=== TOP 10 CONFIGURATIONS ===')\n",
+ " agg_map = {'total_score': ['mean', 'std', 'count'], 'player10_score': 'mean'}\n",
+ " if 'player10_rank' in df:\n",
+ " agg_map['player10_rank'] = 'mean'\n",
+ " if 'player10_individual' in df:\n",
+ " agg_map['player10_individual'] = 'mean'\n",
+ "\n",
+ " top_configs = (\n",
+ " df.groupby(['altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono'])\n",
+ " .agg(agg_map)\n",
+ " .round(3)\n",
+ " )\n",
+ "\n",
+ " new_columns = ['total_mean', 'total_std', 'count', 'p10_mean']\n",
+ " if 'player10_rank' in agg_map:\n",
+ " new_columns.append('p10_rank_mean')\n",
+ " if 'player10_individual' in agg_map:\n",
+ " new_columns.append('p10_individual_mean')\n",
+ " top_configs.columns = new_columns\n",
+ " top_configs = top_configs.sort_values('total_mean', ascending=False).head(10)\n",
+ "\n",
+ " for i, (config, row) in enumerate(top_configs.iterrows(), 1):\n",
+ " altruism, tau, fresh, mono = config\n",
+ " parts = [\n",
+ " f'{i:2d}. Altruism: {altruism:.1f}',\n",
+ " f'Tau: {tau:.2f}',\n",
+ " f'Fresh: {fresh:.2f}',\n",
+ " f'Mono: {mono:.2f}',\n",
+ " f'Total: {row[\"total_mean\"]:.2f}±{row[\"total_std\"]:.2f}',\n",
+ " f'P10: {row[\"p10_mean\"]:.2f}',\n",
+ " ]\n",
+ " if 'p10_rank_mean' in row:\n",
+ " parts.append(f'P10 Rank: {row[\"p10_rank_mean\"]:.2f}')\n",
+ " if 'p10_individual_mean' in row:\n",
+ " parts.append(f'P10 Individual: {row[\"p10_individual_mean\"]:.2f}')\n",
+ " print(' -> '.join(parts))\n",
+ "\n",
+ " # Altruism analysis\n",
+ " print('\\n=== ALTRUISM ANALYSIS ===')\n",
+ " agg_map = {\n",
+ " 'total_score': ['mean', 'std'],\n",
+ " 'player10_score': ['mean', 'std'],\n",
+ " 'conversation_length': 'mean',\n",
+ " 'early_termination': 'mean',\n",
+ " }\n",
+ " if 'player10_rank' in df:\n",
+ " agg_map['player10_rank'] = ['mean', 'std']\n",
+ " if 'player10_individual' in df:\n",
+ " agg_map['player10_individual'] = ['mean', 'std']\n",
+ "\n",
+ " altruism_stats = df.groupby('altruism_prob').agg(agg_map).round(3)\n",
+ " for prob in sorted(df['altruism_prob'].unique()):\n",
+ " stats = altruism_stats.loc[prob]\n",
+ " parts = [\n",
+ " f'Altruism {prob:.1f}:',\n",
+ " f'Total={stats[(\"total_score\", \"mean\")]:.2f}±{stats[(\"total_score\", \"std\")]:.2f}',\n",
+ " f'P10={stats[(\"player10_score\", \"mean\")]:.2f}±{stats[(\"player10_score\", \"std\")]:.2f}',\n",
+ " f'Length={stats[(\"conversation_length\", \"mean\")]:.1f}',\n",
+ " f'EarlyTerm={stats[(\"early_termination\", \"mean\")]:.2f}',\n",
+ " ]\n",
+ " if ('player10_rank', 'mean') in stats:\n",
+ " parts.append(\n",
+ " f'P10 Rank={stats[(\"player10_rank\", \"mean\")]:.2f}±{stats[(\"player10_rank\", \"std\")]:.2f}'\n",
+ " )\n",
+ " if ('player10_individual', 'mean') in stats:\n",
+ " parts.append(\n",
+ " f'P10 Ind={stats[(\"player10_individual\", \"mean\")]:.2f}±{stats[(\"player10_individual\", \"std\")]:.2f}'\n",
+ " )\n",
+ " print(' '.join(parts))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4a34895e-f309-46f1-a96c-635e623dddbb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ---------- Convenience: expose bootstrap/effect sizes quickly ----------\n",
+ "\n",
+ " def print_ci_and_effects(self, metric: str = 'total_score'):\n",
+ " \"\"\"Print bootstrapped CIs per altruism level and pairwise effect sizes.\"\"\"\n",
+ " df = self.create_dataframe()\n",
+ " ci = self.bootstrap_ci(df, ['altruism_prob'], metric)\n",
+ " print('\\n=== BOOTSTRAP CI (by altruism_prob) ===')\n",
+ " print(ci.sort_values('altruism_prob').to_string(index=False))\n",
+ " deltas = self.pairwise_altruism_deltas(metric=metric)\n",
+ " print('\\n=== PAIRWISE DELTAS (a->b) ===')\n",
+ " print(deltas.to_string(index=False))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3a6ef4e6-e198-4694-9545-899e533c8cb3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ----------------------------\n",
+ "# CLI\n",
+ "# ----------------------------\n",
+ "\n",
+ "def main():\n",
+ " \"\"\"Main function for command-line usage.\"\"\"\n",
+ " parser = argparse.ArgumentParser(description='Analyze Monte Carlo simulation results')\n",
+ " parser.add_argument('results_file', help='Path to results JSON file')\n",
+ "\n",
+ " parser.add_argument(\n",
+ " '--plot',\n",
+ " choices=[\n",
+ " 'altruism', 'heatmap', 'distributions',\n",
+ " 'components', 'pareto', 'rank', 'seed', 'corr', 'multi-heatmap'\n",
+ " ],\n",
+ " default='altruism',\n",
+ " help='Type of plot to create',\n",
+ " )\n",
+ " parser.add_argument('--param1', default='altruism_prob', help='Param for heatmap / multi-heatmap (rows)')\n",
+ " parser.add_argument('--param2', default='tau_margin', help='Param for heatmap / multi-heatmap (cols)')\n",
+ " parser.add_argument('--metric', default='total_score', help='Metric for heatmaps / stability')\n",
+ " parser.add_argument('--fixed', default='altruism_prob', help='Facet for multi-heatmap')\n",
+ " parser.add_argument('--save', help='Save plot to file')\n",
+ " parser.add_argument('--analysis', action='store_true', help='Print detailed analysis')\n",
+ " parser.add_argument('--ci', action='store_true', help='Print bootstrapped CIs and effect sizes')\n",
+ " parser.add_argument('--report', help='Save a quick report to a directory (path)')\n",
+ " parser.add_argument('--ols', action='store_true', help='Run OLS on total_score with knobs')\n",
+ " parser.add_argument('--logit', action='store_true', help='Run logistic regression for early termination')\n",
+ "\n",
+ " args = parser.parse_args()\n",
+ "\n",
+ " # Load results\n",
+ " analyzer = ResultsAnalyzer(args.results_file)\n",
+ "\n",
+ " # Print analysis tables\n",
+ " if args.analysis:\n",
+ " analyzer.print_detailed_analysis()\n",
+ " if args.ci:\n",
+ " analyzer.print_ci_and_effects(metric=args.metric)\n",
+ " if args.report:\n",
+ " analyzer.save_quick_report(args.report)\n",
+ "\n",
+ " # Optional modeling\n",
+ " if args.ols:\n",
+ " analyzer.run_ols(metric='total_score')\n",
+ " if args.logit:\n",
+ " analyzer.run_logistic_early_term()\n",
+ "\n",
+ " # Create plots\n",
+ " if args.plot == 'altruism':\n",
+ " analyzer.plot_altruism_comparison(args.save)\n",
+ " elif args.plot == 'heatmap':\n",
+ " analyzer.plot_parameter_heatmap(args.param1, args.param2, metric=args.metric, save_path=args.save)\n",
+ " elif args.plot == 'distributions':\n",
+ " analyzer.plot_score_distributions(args.save)\n",
+ " elif args.plot == 'components':\n",
+ " analyzer.plot_component_stack(args.save)\n",
+ " elif args.plot == 'pareto':\n",
+ " analyzer.plot_pareto_tradeoff(args.save)\n",
+ " elif args.plot == 'rank':\n",
+ " analyzer.plot_rank_distribution(args.save)\n",
+ " elif args.plot == 'seed':\n",
+ " analyzer.plot_seed_stability(metric=args.metric, save_path=args.save)\n",
+ " elif args.plot == 'corr':\n",
+ " analyzer.plot_correlation_heatmap(args.save)\n",
+ " elif args.plot == 'multi-heatmap':\n",
+ " analyzer.plot_multi_heatmaps(fixed=args.fixed, metric=args.metric, save_path=args.save)\n",
+ "\n",
+ " # Done\n",
+ "\n",
+ "\n",
+ "if __name__ == '__main__':\n",
+ " main()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "98c654c5-9e33-440d-a720-89a1adbe29e1",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "09df6723-3e92-4b6a-a307-29cf288dc512",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "87df6905-7a17-48e0-a101-cf7744261428",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python [conda env:base] *",
+ "language": "python",
+ "name": "conda-base-py"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/players/player_10/__init__.py b/players/player_10/__init__.py
index 7e8a23a..001daf9 100644
--- a/players/player_10/__init__.py
+++ b/players/player_10/__init__.py
@@ -1,8 +1,20 @@
from .agent.player import Player10Agent # Agent-based player for comparison
-from .rl.eval_player import EvalPlayer, create_eval_player # RL evaluation player
-# Use the trained RL model as Player10 by default
-Player10 = EvalPlayer
+try:
+ from .rl.eval_player import EvalPlayer, create_eval_player # RL evaluation player
+except Exception: # pragma: no cover - optional dependency
+ EvalPlayer = None
+
+ def create_eval_player(*_args, **_kwargs):
+ message = (
+ 'Player10 RL evaluation requires the optional torch dependency and a trained model. '
+ 'Install torch and ensure models are available to use EvalPlayer.'
+ )
+ raise RuntimeError(message)
+
+
+# Use the original Player10Agent as Player10 by default (instead of EvalPlayer)
+Player10 = Player10Agent
__all__ = [
'Player10',
diff --git a/players/player_10/analysis b/players/player_10/analysis
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/players/player_10/analysis
@@ -0,0 +1 @@
+
diff --git a/players/player_10/tools/.syncthing.reporting.py.tmp b/players/player_10/tools/.syncthing.reporting.py.tmp
new file mode 100644
index 0000000..3e870ac
Binary files /dev/null and b/players/player_10/tools/.syncthing.reporting.py.tmp differ
diff --git a/players/player_10/tools/dashboard/builder.py b/players/player_10/tools/dashboard/builder.py
index 27410c1..083ce7b 100644
--- a/players/player_10/tools/dashboard/builder.py
+++ b/players/player_10/tools/dashboard/builder.py
@@ -4,6 +4,7 @@
import json
import re
+from collections import defaultdict
from datetime import datetime
from pathlib import Path
@@ -34,6 +35,193 @@ def _format_number(value: float | None, digits: int = 2) -> str:
return f'{value:.{digits}f}'
+COLORWAY = [
+ '#3867d6',
+ '#fa8231',
+ '#20bf6b',
+ '#a55eea',
+ '#fed330',
+ '#fc5c65',
+ '#2d98da',
+]
+
+COMPONENT_LABELS = {
+ 'importance': 'Importance',
+ 'coherence': 'Coherence',
+ 'freshness': 'Freshness',
+ 'nonmonotonousness': 'Monotony relief',
+}
+
+
+def _config_value(result, attr: str):
+ config = getattr(result, 'config', None)
+ if config is None:
+ return None
+ if hasattr(config, attr):
+ return getattr(config, attr)
+ if isinstance(config, dict):
+ return config.get(attr)
+ return None
+
+
+def _metric_value(result, metric: str):
+ if metric == 'total_score':
+ return getattr(result, 'total_score', None)
+ if metric == 'player10_score':
+ return getattr(result, 'player10_total_mean', None)
+ if metric == 'player10_individual':
+ return getattr(result, 'player10_individual_mean', None)
+ if metric == 'early_termination':
+ value = getattr(result, 'early_termination', None)
+ if value is None:
+ return None
+ try:
+ return float(value)
+ except (TypeError, ValueError):
+ return None
+ return getattr(result, metric, None)
+
+
+def _compute_heatmap_data(results, row_attr: str, col_attr: str, metric: str):
+ matrix = defaultdict(lambda: defaultdict(list))
+ rows: set = set()
+ cols: set = set()
+ for result in results:
+ row_value = _config_value(result, row_attr)
+ col_value = _config_value(result, col_attr)
+ metric_value = _metric_value(result, metric)
+ if row_value is None or col_value is None or metric_value is None:
+ continue
+ matrix[row_value][col_value].append(float(metric_value))
+ rows.add(row_value)
+ cols.add(col_value)
+ if not rows or not cols:
+ return None
+ row_order = sorted(rows)
+ col_order = sorted(cols)
+ grid: list[list[float | None]] = []
+ for row_value in row_order:
+ row_data: list[float | None] = []
+ for col_value in col_order:
+ bucket = matrix.get(row_value, {}).get(col_value, [])
+ if bucket:
+ row_data.append(sum(bucket) / len(bucket))
+ else:
+ row_data.append(None)
+ grid.append(row_data)
+ return row_order, col_order, grid
+
+
+def _collect_scores_by_altruism(results):
+ buckets = defaultdict(lambda: {'total': [], 'player10': []})
+ for result in results:
+ altruism = _config_value(result, 'altruism_prob')
+ if altruism is None:
+ continue
+ total_value = _metric_value(result, 'total_score')
+ if total_value is not None:
+ buckets[altruism]['total'].append(float(total_value))
+ p10_value = _metric_value(result, 'player10_score')
+ if p10_value is not None:
+ buckets[altruism]['player10'].append(float(p10_value))
+ if not buckets:
+ return None
+ return dict(sorted(buckets.items()))
+
+
+def _component_means_by_altruism(results):
+ sums = defaultdict(lambda: defaultdict(float))
+ counts = defaultdict(lambda: defaultdict(int))
+ for result in results:
+ altruism = _config_value(result, 'altruism_prob')
+ breakdown = getattr(result, 'score_breakdown', None) or {}
+ if altruism is None:
+ continue
+ for key in COMPONENT_LABELS:
+ value = breakdown.get(key)
+ if value is None:
+ continue
+ try:
+ value = float(value)
+ except (TypeError, ValueError):
+ continue
+ sums[altruism][key] += value
+ counts[altruism][key] += 1
+ if not sums:
+ return None
+ altruism_values = sorted(sums.keys())
+ component_series: dict[str, list[float]] = {key: [] for key in COMPONENT_LABELS}
+ for altruism in altruism_values:
+ for key in COMPONENT_LABELS:
+ count = counts[altruism].get(key, 0)
+ if count:
+ component_series[key].append(sums[altruism][key] / count)
+ else:
+ component_series[key].append(0.0)
+ return altruism_values, component_series
+
+
+def _aggregate_pareto_points(results):
+ groups = defaultdict(
+ lambda: {
+ 'total_sum': 0.0,
+ 'total_count': 0,
+ 'p10_sum': 0.0,
+ 'p10_count': 0,
+ 'early_sum': 0.0,
+ 'early_count': 0,
+ }
+ )
+ for result in results:
+ key = (
+ _config_value(result, 'altruism_prob'),
+ _config_value(result, 'tau_margin'),
+ _config_value(result, 'epsilon_fresh'),
+ _config_value(result, 'epsilon_mono'),
+ )
+ if any(value is None for value in key):
+ continue
+ total_value = _metric_value(result, 'total_score')
+ if total_value is not None:
+ groups[key]['total_sum'] += float(total_value)
+ groups[key]['total_count'] += 1
+ p10_value = _metric_value(result, 'player10_individual')
+ if p10_value is not None:
+ groups[key]['p10_sum'] += float(p10_value)
+ groups[key]['p10_count'] += 1
+ early_value = _metric_value(result, 'early_termination')
+ if early_value is not None:
+ groups[key]['early_sum'] += float(early_value)
+ groups[key]['early_count'] += 1
+ points: list[dict[str, float | int | None]] = []
+ for key, data in groups.items():
+ if not data['total_count'] or not data['p10_count']:
+ continue
+ altruism, tau, fresh, mono = key
+ point = {
+ 'altruism': altruism,
+ 'tau': tau,
+ 'fresh': fresh,
+ 'mono': mono,
+ 'total': data['total_sum'] / data['total_count'],
+ 'player10': data['p10_sum'] / data['p10_count'],
+ 'early': (data['early_sum'] / data['early_count']) if data['early_count'] else None,
+ 'runs': data['total_count'],
+ }
+ points.append(point)
+ if not points:
+ return None
+ points.sort(key=lambda item: (item['altruism'], item['tau'], item['fresh'], item['mono']))
+ return points
+
+
+def _format_axis_value(value):
+ if isinstance(value, float):
+ formatted = f'{value:.3f}' if abs(value) < 1 else f'{value:.2f}'
+ return formatted.rstrip('0').rstrip('.')
+ return str(value)
+
+
def generate_dashboard(
results,
analysis,
@@ -45,12 +233,17 @@ def generate_dashboard(
try:
import plotly.graph_objects as go
import plotly.io as pio
+
+ from plotly.subplots import make_subplots
+
except ImportError:
return None
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
+ analysis = analysis or {}
+
aggregated = summarize_parameterizations(results)
table_rows: list[dict] = []
for row in aggregated:
@@ -116,23 +309,48 @@ def generate_dashboard(
chart_sections: list[dict[str, str]] = []
if top_rows:
- labels = [parameter_label(row['meta']) for row in top_rows]
- total_means = [row['mean'] for row in top_rows]
- fig_top = go.Figure(
- go.Bar(
- x=labels,
- y=total_means,
- text=[f'±{row["std"]:.2f}' for row in top_rows],
- textposition='outside',
- marker=dict(color='#3867d6'),
+ fig_top = go.Figure()
+ rank_labels: list[str] = []
+ for idx, row in enumerate(top_rows, start=1):
+ full_label = parameter_label(row['meta'])
+ mean_value = row['mean']
+ std_value = row.get('std', 0.0)
+ rank_label = f'#{idx}'
+ rank_labels.append(rank_label)
+ fig_top.add_trace(
+ go.Bar(
+ x=[mean_value],
+ y=[rank_label],
+ orientation='h',
+ name=full_label,
+ marker=dict(color=COLORWAY[(idx - 1) % len(COLORWAY)]),
+ text=[f'{mean_value:.2f} ± {std_value:.2f}'],
+ textposition='outside',
+ customdata=[[full_label, std_value]],
+ hovertemplate=(
+ '%{customdata[0]}
Mean: %{x:.2f}
Std: %{customdata[1]:.2f}'
+ ),
+ )
)
- )
fig_top.update_layout(
title='Top Parameterizations by Total Score',
- xaxis_title='Parameterization label',
- yaxis_title='Mean total score',
+ xaxis_title='Mean total score',
+ yaxis_title='Rank',
+ yaxis=dict(categoryorder='array', categoryarray=rank_labels),
+ margin=dict(l=0, r=20, t=60, b=40),
+ height=max(320, 90 * len(rank_labels)),
uniformtext_minsize=10,
- uniformtext_mode='show',
+ uniformtext_mode='hide',
+ legend=dict(
+ title='Parameterization label',
+ yanchor='top',
+ y=1.0,
+ xanchor='left',
+ x=1.02,
+ bgcolor='rgba(255,255,255,0.85)',
+ bordercolor='rgba(0,0,0,0.1)',
+ borderwidth=1,
+ ),
)
chart_sections.append(
{
@@ -224,6 +442,201 @@ def generate_dashboard(
},
)
+ # Enhanced analysis sections derived from notebook utilities
+ heatmap_data = _compute_heatmap_data(results, 'altruism_prob', 'tau_margin', 'total_score')
+ if heatmap_data:
+ row_values, col_values, matrix = heatmap_data
+ y_labels = [_format_axis_value(value) for value in row_values]
+ x_labels = [_format_axis_value(value) for value in col_values]
+ fig_heatmap = go.Figure(
+ go.Heatmap(
+ z=matrix,
+ x=x_labels,
+ y=y_labels,
+ colorscale='Viridis',
+ colorbar={'title': 'Mean total score'},
+ )
+ )
+ fig_heatmap.update_layout(
+ title='Total Score Heatmap',
+ xaxis_title='Tau margin',
+ yaxis_title='Altruism probability',
+ margin={'l': 80, 'r': 40, 't': 60, 'b': 60},
+ )
+ chart_sections.append(
+ {
+ 'title': 'Parameter Heatmap',
+ 'description': 'Average total score for each altruism/tau combination helps spot sweet spots quickly.',
+ 'html': pio.to_html(
+ fig_heatmap,
+ include_plotlyjs=False,
+ full_html=False,
+ config={'displaylogo': False},
+ default_width='100%',
+ default_height='420px',
+ ),
+ },
+ )
+
+ score_buckets = _collect_scores_by_altruism(results)
+ if score_buckets:
+ fig_dist = make_subplots(rows=1, cols=2, subplot_titles=('Total score', 'Player10 score'))
+ for idx, (prob, values) in enumerate(score_buckets.items()):
+ label = f'altruism {prob:.2f}' if isinstance(prob, float) else f'altruism {prob}'
+ color = COLORWAY[idx % len(COLORWAY)]
+ if values['total']:
+ fig_dist.add_trace(
+ go.Histogram(
+ x=values['total'],
+ name=label,
+ legendgroup=label,
+ marker={'color': color},
+ opacity=0.55,
+ nbinsx=20,
+ showlegend=True,
+ ),
+ row=1,
+ col=1,
+ )
+ if values['player10']:
+ fig_dist.add_trace(
+ go.Histogram(
+ x=values['player10'],
+ name=label,
+ legendgroup=label,
+ marker={'color': color},
+ opacity=0.55,
+ nbinsx=20,
+ showlegend=False,
+ ),
+ row=1,
+ col=2,
+ )
+ fig_dist.update_layout(
+ title_text='Score Distributions by Altruism',
+ barmode='overlay',
+ legend={'orientation': 'h', 'y': 1.12, 'x': 0.5, 'xanchor': 'center'},
+ xaxis_title='Total score',
+ xaxis2_title='Player10 score',
+ yaxis_title='Frequency',
+ margin={'l': 60, 'r': 20, 't': 80, 'b': 60},
+ )
+ chart_sections.append(
+ {
+ 'title': 'Score Distributions',
+ 'description': 'Histogram overlays reveal how each altruism setting shifts total and individual score shapes.',
+ 'html': pio.to_html(
+ fig_dist,
+ include_plotlyjs=False,
+ full_html=False,
+ config={'displaylogo': False},
+ default_width='100%',
+ default_height='420px',
+ ),
+ },
+ )
+
+ component_data = _component_means_by_altruism(results)
+ if component_data:
+ altruism_values, component_series = component_data
+ labels = [_format_axis_value(value) for value in altruism_values]
+ fig_components = go.Figure()
+ for idx, (comp_key, comp_label) in enumerate(COMPONENT_LABELS.items()):
+ values = component_series.get(comp_key, [])
+ if not values:
+ continue
+ fig_components.add_trace(
+ go.Bar(
+ x=labels,
+ y=values,
+ name=comp_label,
+ marker={'color': COLORWAY[idx % len(COLORWAY)]},
+ )
+ )
+ fig_components.update_layout(
+ title='Shared Component Breakdown',
+ barmode='stack',
+ xaxis_title='Altruism probability',
+ yaxis_title='Mean shared score',
+ legend={'orientation': 'h', 'y': 1.1, 'x': 0.5, 'xanchor': 'center'},
+ margin={'l': 60, 'r': 20, 't': 80, 'b': 60},
+ )
+ chart_sections.append(
+ {
+ 'title': 'Shared Components',
+ 'description': 'Stacks quantify how shared scoring components vary with altruism levels.',
+ 'html': pio.to_html(
+ fig_components,
+ include_plotlyjs=False,
+ full_html=False,
+ config={'displaylogo': False},
+ default_width='100%',
+ default_height='420px',
+ ),
+ },
+ )
+
+ pareto_points = _aggregate_pareto_points(results)
+ if pareto_points:
+ customdata = [
+ [
+ _format_axis_value(point['altruism']),
+ _format_axis_value(point['tau']),
+ _format_axis_value(point['fresh']),
+ _format_axis_value(point['mono']),
+ (f'{point["early"]:.1%}' if point['early'] is not None else 'n/a'),
+ point['runs'],
+ ]
+ for point in pareto_points
+ ]
+ fig_pareto = go.Figure(
+ go.Scatter(
+ x=[point['player10'] for point in pareto_points],
+ y=[point['total'] for point in pareto_points],
+ mode='markers',
+ marker={
+ 'size': 10,
+ 'color': [point['altruism'] for point in pareto_points],
+ 'colorscale': 'Viridis',
+ 'showscale': True,
+ 'colorbar': {'title': 'Altruism p'},
+ },
+ text=['ET>0.3' if (point['early'] or 0) > 0.3 else '' for point in pareto_points],
+ textposition='top center',
+ customdata=customdata,
+ hovertemplate=(
+ 'Player10 mean: %{x:.2f}
'
+ 'Total mean: %{y:.2f}
'
+ 'Altruism: %{customdata[0]}
'
+ 'Tau margin: %{customdata[1]}
'
+ 'Epsilon fresh: %{customdata[2]}
'
+ 'Epsilon mono: %{customdata[3]}
'
+ 'Early termination: %{customdata[4]}
'
+ 'Runs: %{customdata[5]}'
+ ),
+ )
+ )
+ fig_pareto.update_layout(
+ title='Pareto: Player10 vs Total Score',
+ xaxis_title='Player10 individual mean',
+ yaxis_title='Total score mean',
+ margin={'l': 60, 'r': 20, 't': 60, 'b': 60},
+ )
+ chart_sections.append(
+ {
+ 'title': 'Pareto Trade-off',
+ 'description': 'Scatter highlights where individual gains align with team score, colored by altruism.',
+ 'html': pio.to_html(
+ fig_pareto,
+ include_plotlyjs=False,
+ full_html=False,
+ config={'displaylogo': False},
+ default_width='100%',
+ default_height='420px',
+ ),
+ },
+ )
+
total_simulations = analysis.get('total_simulations', len(results))
unique_configs = analysis.get('unique_configurations', len(aggregated))
best_entry = next(iter(analysis.get('best_configurations', [])), None)
diff --git a/players/player_10/tools/manual_dashboard.py b/players/player_10/tools/manual_dashboard.py
new file mode 100644
index 0000000..2bd84c2
--- /dev/null
+++ b/players/player_10/tools/manual_dashboard.py
@@ -0,0 +1,346 @@
+"""Generate a dashboard for manual engine experiments.
+
+This script recreates two lightweight experiment profiles (balanced vs adversarial
+supporting casts) across a small set of seeds and toggles Player10's altruism
+probability. The per-run outputs are converted into the same shape that the
+Plotly dashboard expects, so we can reuse generate_dashboard without relying
+on the MonteCarlo simulator assets that aren't available locally.
+"""
+
+from __future__ import annotations
+
+import json
+import statistics as stats
+import sys
+from collections import Counter
+from collections.abc import Sequence
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+PROJECT_ROOT = Path(__file__).resolve().parents[3]
+if str(PROJECT_ROOT) not in sys.path:
+ sys.path.append(str(PROJECT_ROOT))
+
+
+@dataclass
+class ManualConfig:
+ """Minimal config stub exposing the knobs used by the dashboard helpers."""
+
+ altruism_prob: float
+ tau_margin: float
+ epsilon_fresh: float
+ epsilon_mono: float
+ seed: int
+ players: dict[str, int]
+ subjects: int
+ memory_size: int
+ conversation_length: int
+ min_samples_pid: int
+ ewma_alpha: float
+ importance_weight: float
+ coherence_weight: float
+ freshness_weight: float
+ monotony_weight: float
+
+
+@dataclass
+class ManualResult:
+ """Container that matches the attributes accessed by the dashboard builder."""
+
+ config: ManualConfig
+ total_score: float
+ best_total_score: float
+ player_scores: dict[str, float]
+ player_contributions: dict[str, int]
+ conversation_length: int
+ early_termination: bool
+ pause_count: int
+ unique_items_used: int
+ execution_time: float
+ score_breakdown: dict[str, float]
+ player_metrics: dict[str, dict[str, float | str | int | None]]
+ player10_total_mean: float
+ player10_individual_mean: float
+ player10_rank_mean: float
+ player10_gap_to_best: float
+ player10_instances: int
+
+
+def _build_label_map(engine: Any) -> dict[str, str]:
+ """Assign stable human-readable labels to player UUIDs."""
+ counts: Counter[str] = Counter()
+ labels: dict[str, str] = {}
+
+ for player in engine.players:
+ class_name = type(player).__name__
+ if class_name == 'Player10Agent':
+ label = 'Player10'
+ else:
+ counts[class_name] += 1
+ label = f'{class_name}#{counts[class_name]}'
+ labels[str(player.id)] = label
+
+ return labels
+
+
+def _rank_players(totals: dict[str, float]) -> dict[str, float]:
+ """Return 1-based ranks (dense ranking) for each player label."""
+ sorted_totals = sorted(totals.items(), key=lambda item: item[1], reverse=True)
+ ranks: dict[str, float] = {}
+
+ current_rank = 1
+ previous_value: float | None = None
+
+ for index, (label, value) in enumerate(sorted_totals, start=1):
+ if previous_value is None or value < previous_value:
+ current_rank = index
+ previous_value = value
+ ranks[label] = float(current_rank)
+
+ return ranks
+
+
+def _build_manual_result(
+ engine: Any,
+ seed: int,
+ altruism: float,
+ roster: Sequence[type],
+ subjects: int,
+ memory_size: int,
+ conversation_length: int,
+) -> ManualResult:
+ """Run the engine once and transform the output into a dashboard result."""
+ from players.player_10.agent import config as p10_config
+
+ output = engine.run(list(roster))
+ history = output['history']
+ scores = output['scores']
+
+ label_map = _build_label_map(engine)
+
+ player_scores_dict: dict[str, float] = {}
+ player_metrics: dict[str, dict[str, float | str | int | None]] = {}
+
+ totals_for_ranking: dict[str, float] = {}
+
+ for entry in scores['player_scores']:
+ label = label_map[str(entry['id'])]
+ total = float(entry['scores']['total'])
+ individual = float(entry['scores']['individual'])
+ shared = float(entry['scores']['shared'])
+
+ player_scores_dict[label] = total
+ player_metrics[label] = {
+ 'class_name': label.split('#')[0],
+ 'alias': label,
+ 'total': total,
+ 'individual': individual,
+ 'shared': shared,
+ 'rank': None, # filled in after ranking
+ }
+ totals_for_ranking[label] = total
+
+ ranks = _rank_players(totals_for_ranking)
+ for label, rank in ranks.items():
+ player_metrics[label]['rank'] = rank
+
+ player10_total = player_scores_dict['Player10']
+ best_total = max(player_scores_dict.values())
+
+ player_contributions_counts = {
+ label_map[str(uid)]: len(items) for uid, items in engine.player_contributions.items()
+ }
+
+ unique_items = {item.id for item in history if item is not None}
+ pause_count = sum(1 for item in history if item is None)
+
+ config = ManualConfig(
+ altruism_prob=altruism,
+ tau_margin=p10_config.TAU_MARGIN,
+ epsilon_fresh=p10_config.EPSILON_FRESH,
+ epsilon_mono=p10_config.EPSILON_MONO,
+ seed=seed,
+ players=dict(Counter(type(player).__name__ for player in engine.players)),
+ subjects=subjects,
+ memory_size=memory_size,
+ conversation_length=conversation_length,
+ min_samples_pid=p10_config.MIN_SAMPLES_PID,
+ ewma_alpha=p10_config.EWMA_ALPHA,
+ importance_weight=p10_config.IMPORTANCE_WEIGHT,
+ coherence_weight=p10_config.COHERENCE_WEIGHT,
+ freshness_weight=p10_config.FRESHNESS_WEIGHT,
+ monotony_weight=p10_config.MONOTONY_WEIGHT,
+ )
+
+ return ManualResult(
+ config=config,
+ total_score=float(output['score_breakdown']['total']),
+ best_total_score=best_total,
+ player_scores=player_scores_dict,
+ player_contributions=player_contributions_counts,
+ conversation_length=len(history),
+ early_termination=len(history) < conversation_length,
+ pause_count=pause_count,
+ unique_items_used=len(unique_items),
+ execution_time=0.0,
+ score_breakdown={k: float(v) for k, v in output['score_breakdown'].items()},
+ player_metrics=player_metrics,
+ player10_total_mean=player10_total,
+ player10_individual_mean=float(
+ next(
+ entry['scores']['individual']
+ for entry in scores['player_scores']
+ if label_map[str(entry['id'])] == 'Player10'
+ )
+ ),
+ player10_rank_mean=ranks['Player10'],
+ player10_gap_to_best=best_total - player10_total,
+ player10_instances=1,
+ )
+
+
+def run_manual_experiments() -> tuple[list[ManualResult], dict[str, dict[str, float]]]:
+ """Return all per-run results plus an aggregate summary per configuration."""
+ from core.engine import Engine
+ from players.pause_player import PausePlayer
+ from players.player_10.agent import config as p10_config
+ from players.player_10.agent.player import Player10Agent
+ from players.random_pause_player import RandomPausePlayer
+ from players.random_player import RandomPlayer
+
+ subjects = 10
+ memory_size = 16
+ conversation_length = 40
+ seeds = list(range(100, 116))
+
+ rosters: dict[str, Sequence[type]] = {
+ 'Balanced support (3 Random)': [Player10Agent, RandomPlayer, RandomPlayer, RandomPlayer],
+ 'Adversarial mix (Random, RandomPause, Pause)': [
+ Player10Agent,
+ RandomPlayer,
+ RandomPausePlayer,
+ PausePlayer,
+ ],
+ }
+
+ results: list[ManualResult] = []
+ aggregates: dict[str, list[float]] = {}
+
+ original_altruism = p10_config.ALTRUISM_USE_PROB
+
+ for roster_name, roster in rosters.items():
+ for altruism_value in (0.0, 0.6):
+ p10_config.ALTRUISM_USE_PROB = altruism_value
+
+ key = f'{roster_name} | altruism={altruism_value:.1f}'
+ aggregates[key] = []
+
+ for seed in seeds:
+ engine = Engine(
+ players=list(roster),
+ player_count=len(roster),
+ subjects=subjects,
+ memory_size=memory_size,
+ conversation_length=conversation_length,
+ seed=seed,
+ )
+ result = _build_manual_result(
+ engine,
+ seed=seed,
+ altruism=altruism_value,
+ roster=roster,
+ subjects=subjects,
+ memory_size=memory_size,
+ conversation_length=conversation_length,
+ )
+ results.append(result)
+ aggregates[key].append(result.total_score)
+
+ # Restore the original altruism probability so we do not affect other tooling
+ p10_config.ALTRUISM_USE_PROB = original_altruism
+
+ aggregate_summary = {
+ key: {
+ 'mean': stats.mean(values),
+ 'std': stats.pstdev(values) if len(values) > 1 else 0.0,
+ }
+ for key, values in aggregates.items()
+ }
+
+ output_payload = [
+ {
+ 'config': asdict(result.config),
+ 'total_score': result.total_score,
+ 'best_total_score': result.best_total_score,
+ 'player_scores': result.player_scores,
+ 'player_contributions': result.player_contributions,
+ 'conversation_length': result.conversation_length,
+ 'early_termination': result.early_termination,
+ 'pause_count': result.pause_count,
+ 'unique_items_used': result.unique_items_used,
+ 'execution_time': result.execution_time,
+ 'score_breakdown': result.score_breakdown,
+ 'player_metrics': result.player_metrics,
+ 'player10_total_mean': result.player10_total_mean,
+ 'player10_individual_mean': result.player10_individual_mean,
+ 'player10_rank_mean': result.player10_rank_mean,
+ 'player10_gap_to_best': result.player10_gap_to_best,
+ 'player10_instances': result.player10_instances,
+ 'altruism_prob': result.config.altruism_prob,
+ 'seed': result.config.seed,
+ 'players': result.config.players,
+ }
+ for result in results
+ ]
+
+ output_path_json = Path('players/player_10/results/manual_dashboard_runs.json')
+ output_path_json.write_text(json.dumps(output_payload, indent=2))
+ print(f'Detailed run data written to {output_path_json}')
+
+ return results, aggregate_summary
+
+
+def main(open_browser: bool = False) -> None:
+ from players.player_10.tools.dashboard import generate_dashboard
+
+ results, summary = run_manual_experiments()
+
+ analysis = {
+ 'total_simulations': len(results),
+ 'unique_configurations': len(summary),
+ 'best_configurations': [
+ {
+ 'label': label,
+ 'mean_score': stats['mean'],
+ 'std_score': stats['std'],
+ }
+ for label, stats in sorted(
+ summary.items(), key=lambda item: item[1]['mean'], reverse=True
+ )
+ ],
+ }
+
+ dashboard_config = SimpleNamespace(
+ name='Manual Engine Experiments',
+ description='Player10 altruism sensitivity across two roster archetypes.',
+ output_dir='players/player_10/results',
+ )
+
+ output_path = generate_dashboard(
+ results,
+ analysis,
+ dashboard_config,
+ output_dir='players/player_10/results/dashboards',
+ open_browser=open_browser,
+ )
+
+ if output_path:
+ print(f'Dashboard written to: {output_path}')
+ else:
+ print('Plotly is not installed; dashboard generation skipped.')
+
+
+if __name__ == '__main__':
+ main(open_browser=False)
diff --git a/players/player_10/tools/reporting.py b/players/player_10/tools/reporting.py
index 441f266..2a8a023 100644
--- a/players/player_10/tools/reporting.py
+++ b/players/player_10/tools/reporting.py
@@ -10,16 +10,44 @@
from collections import defaultdict
from typing import Any
-from ..sim.test_framework import ParameterRange, TestConfiguration
+try:
+ from ..sim.test_framework import (
+ ParameterRange,
+ TestConfiguration,
+ )
+except ModuleNotFoundError:
+ ParameterRange = None # type: ignore
+ TestConfiguration = None # type: ignore
+ _BASELINE_CONFIG = None
+else:
+ _BASELINE_CONFIG = TestConfiguration(name='baseline_snapshot')
-_BASELINE_CONFIG = TestConfiguration(name='baseline_snapshot')
-
-def _first(range_field: ParameterRange) -> Any:
- return range_field.values[0] if range_field.values else None
+def _first(range_field) -> Any:
+ if range_field is None:
+ return None
+ return range_field.values[0] if getattr(range_field, 'values', None) else None
def _capture_baseline_meta() -> dict[str, Any]:
+ if _BASELINE_CONFIG is None:
+ return {
+ 'altruism_prob': 0.0,
+ 'tau_margin': 0.0,
+ 'epsilon_fresh': 0.0,
+ 'epsilon_mono': 0.0,
+ 'min_samples_pid': 5,
+ 'ewma_alpha': 0.0,
+ 'importance_weight': 1.0,
+ 'coherence_weight': 1.0,
+ 'freshness_weight': 1.0,
+ 'monotony_weight': 1.0,
+ 'conversation_length': 0,
+ 'subjects': 0,
+ 'memory_size': 0,
+ 'players': {},
+ }
+
players = dict(_BASELINE_CONFIG.player_configs[0]) if _BASELINE_CONFIG.player_configs else {}
return {
'altruism_prob': _first(_BASELINE_CONFIG.altruism_probs),
diff --git a/pyproject.toml b/pyproject.toml
index 9c5bf04..15e3d0b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,8 +8,8 @@ dependencies = [
"pygame>=2.6.1",
"openai",
"ruff>=0.12.8",
- "numpy>=2.3.3",
- "torch>=2.8.0",
+ "numpy",
+ "torch",
]
[tool.ruff]
diff --git a/uv.lock b/uv.lock
index 23a48b9..7efbbe4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -61,11 +61,13 @@ dev = [
[package.metadata]
requires-dist = [
- { name = "numpy", specifier = ">=2.3.3" },
+
+ { name = "numpy" },
{ name = "openai" },
{ name = "pygame", specifier = ">=2.6.1" },
{ name = "ruff", specifier = ">=0.12.8" },
- { name = "torch", specifier = ">=2.8.0" },
+ { name = "torch" },
+
]
[package.metadata.requires-dev]