diff --git a/.gitignore b/.gitignore index 58fc268..2b043c0 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,11 @@ simulation_results/ # Result files *.json + +# Jupyter notebooks +*.ipynb + +# Syncthing temporary files +.syncthing.*.tmp + +.ruff-venv diff --git a/players/player_10/.syncthing.__init__.py.tmp b/players/player_10/.syncthing.__init__.py.tmp new file mode 100644 index 0000000..a2d1396 Binary files /dev/null and b/players/player_10/.syncthing.__init__.py.tmp differ diff --git a/players/player_10/Analyse_results.ipynb b/players/player_10/Analyse_results.ipynb new file mode 100644 index 0000000..c9deba4 --- /dev/null +++ b/players/player_10/Analyse_results.ipynb @@ -0,0 +1,891 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ac93bef8-0df4-4f98-bc7e-f1fc50bc29cb", + "metadata": {}, + "source": [ + "\"\"\"\n", + "Results analysis and visualization tools for Monte Carlo simulations.\n", + "\n", + "This module provides tools to analyze simulation results and create visualizations\n", + "to understand the performance of different Player10 configurations.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0264f566-fb53-4e69-974b-d6cd7313bc9d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Matplotlib is building the font cache; this may take a moment.\n" + ] + }, + { + "ename": "ImportError", + "evalue": "attempted relative import with no known parent package", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 15\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mseaborn\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msns\u001b[39;00m\n\u001b[0;32m---> 15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msim\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmonte_carlo\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m MonteCarloSimulator, SimulationResult\n", + "\u001b[0;31mImportError\u001b[0m: attempted relative import with no known parent package" + ] + } + ], + "source": [ + "from __future__ import annotations\n", + "\n", + "import argparse\n", + "import os\n", + "from pathlib import Path\n", + "from typing import Any\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "from ..sim.monte_carlo import MonteCarloSimulator, SimulationResult" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5efd409-016b-4a02-ab5d-3a507b905b46", + "metadata": {}, + "outputs": [], + "source": [ + "# ----------------------------\n", + "# Analyzer\n", + "# ----------------------------\n", + "\n", + "class ResultsAnalyzer:\n", + " \"\"\"Analyzer for Monte Carlo simulation results (enhanced).\"\"\"\n", + "\n", + " def __init__(self, results_file: str | None = None):\n", + " \"\"\"\n", + " Initialize the analyzer.\n", + "\n", + " Args:\n", + " results_file: Path to results JSON file to load\n", + " \"\"\"\n", + " self.simulator = MonteCarloSimulator()\n", + " self.results: list[SimulationResult] = []\n", + " self.metadata: dict[str, Any] = {}\n", + "\n", + " if results_file:\n", + " self.load_results(results_file)\n", + "\n", + " # ---------- IO ----------\n", + "\n", + " def load_results(self, filename: str):\n", + " \"\"\"Load results from a JSON file (produced by MonteCarloSimulator.save_results).\"\"\"\n", + " self.results = self.simulator.load_results(filename)\n", + " self.metadata = self.simulator.last_metadata\n", + " print(f'Loaded {len(self.results)} simulation results')\n", + "\n", + " # ---------- DataFrames ----------\n", + "\n", + " def create_dataframe(self) -> pd.DataFrame:\n", + " \"\"\"\n", + " Convert results to a pandas DataFrame for run-level analysis.\n", + "\n", + " Includes:\n", + " - All important config knobs (altruism prob, tau, epsilons, weights)\n", + " - Run outcomes (scores, lengths, pauses, etc.)\n", + " - Shared component breakdown (prefixed with shared_)\n", + " - Derived feature: length_utilization\n", + " \"\"\"\n", + " data: list[dict[str, Any]] = []\n", + "\n", + " for r in self.results:\n", + " row = {\n", + " # core config knobs\n", + " 'altruism_prob': r.config.altruism_prob,\n", + " 'tau_margin': r.config.tau_margin,\n", + " 'epsilon_fresh': r.config.epsilon_fresh,\n", + " 'epsilon_mono': r.config.epsilon_mono,\n", + " 'subjects': r.config.subjects,\n", + " 'memory_size': r.config.memory_size,\n", + " 'conversation_length_cfg': r.config.conversation_length,\n", + " 'seed': r.config.seed,\n", + "\n", + " # weights / algo params\n", + " 'min_samples_pid': r.config.min_samples_pid,\n", + " 'ewma_alpha': r.config.ewma_alpha,\n", + " 'importance_weight': r.config.importance_weight,\n", + " 'coherence_weight': r.config.coherence_weight,\n", + " 'freshness_weight': r.config.freshness_weight,\n", + " 'monotony_weight': r.config.monotony_weight,\n", + "\n", + " # run-level outcomes\n", + " 'total_score': r.total_score,\n", + " 'player10_score': r.player10_total_mean,\n", + " 'player10_individual': r.player10_individual_mean,\n", + " 'player10_rank': r.player10_rank_mean,\n", + " 'player10_gap_to_best': r.player10_gap_to_best,\n", + " 'player10_instances': r.player10_instances,\n", + " 'best_total_score': r.best_total_score,\n", + " 'conversation_length': r.conversation_length,\n", + " 'early_termination': float(r.early_termination),\n", + " 'pause_count': r.pause_count,\n", + " 'unique_items_used': r.unique_items_used,\n", + " 'execution_time': r.execution_time,\n", + " }\n", + "\n", + " # Include shared score components (flatten)\n", + " for comp, val in (r.score_breakdown or {}).items():\n", + " if comp == 'total':\n", + " continue\n", + " row[f'shared_{comp}'] = val\n", + "\n", + " data.append(row)\n", + "\n", + " df = pd.DataFrame(data)\n", + "\n", + " # Derived features\n", + " if 'conversation_length_cfg' in df and 'conversation_length' in df:\n", + " with np.errstate(divide='ignore', invalid='ignore'):\n", + " df['length_utilization'] = df['conversation_length'] / df['conversation_length_cfg']\n", + "\n", + " return df\n", + "\n", + " def create_player_long(self) -> pd.DataFrame:\n", + " \"\"\"\n", + " Explode SimulationResult.player_metrics into a long-form dataframe.\n", + "\n", + " Columns: seed, config knobs, label, class_name, alias, total, shared, individual, rank\n", + " Useful for rank distributions and per-player analyses.\n", + " \"\"\"\n", + " rows: list[dict[str, Any]] = []\n", + " for r in self.results:\n", + " cfg = {\n", + " 'altruism_prob': r.config.altruism_prob,\n", + " 'tau_margin': r.config.tau_margin,\n", + " 'epsilon_fresh': r.config.epsilon_fresh,\n", + " 'epsilon_mono': r.config.epsilon_mono,\n", + " 'seed': r.config.seed,\n", + " }\n", + " for label, m in (r.player_metrics or {}).items():\n", + " rows.append({\n", + " **cfg,\n", + " 'label': label,\n", + " 'class_name': m.get('class_name'),\n", + " 'alias': m.get('alias'),\n", + " 'total': m.get('total'),\n", + " 'shared': m.get('shared'),\n", + " 'individual': m.get('individual'),\n", + " 'rank': m.get('rank'),\n", + " })\n", + " return pd.DataFrame(rows)\n", + "\n", + " # ---------- Statistics helpers ----------\n", + "\n", + " def bootstrap_ci(\n", + " self,\n", + " df: pd.DataFrame,\n", + " group_cols: list[str],\n", + " metric: str,\n", + " B: int = 1000,\n", + " ci: float = 0.95\n", + " ) -> pd.DataFrame:\n", + " \"\"\"\n", + " Bootstrapped mean & CI for (group_cols, metric).\n", + " Returns columns: group_cols..., mean, ci_low, ci_high, n\n", + " \"\"\"\n", + " out = []\n", + " q_lo, q_hi = (1 - ci) / 2, 1 - (1 - ci) / 2\n", + " for key, g in df.groupby(group_cols):\n", + " values = g[metric].dropna().to_numpy()\n", + " if len(values) == 0:\n", + " continue\n", + " boot = []\n", + " for _ in range(B):\n", + " sample = np.random.choice(values, size=len(values), replace=True)\n", + " boot.append(sample.mean())\n", + " lo, hi = np.quantile(boot, [q_lo, q_hi])\n", + " row = {'mean': values.mean(), 'ci_low': float(lo), 'ci_high': float(hi), 'n': len(values)}\n", + " # Attach group key(s)\n", + " if isinstance(key, tuple):\n", + " for c, v in zip(group_cols, key, strict=False):\n", + " row[c] = v\n", + " else:\n", + " row[group_cols[0]] = key\n", + " out.append(row)\n", + " cols = group_cols + ['mean', 'ci_low', 'ci_high', 'n']\n", + " return pd.DataFrame(out)[cols]\n", + "\n", + " def pairwise_altruism_deltas(self, metric: str = 'total_score') -> pd.DataFrame:\n", + " \"\"\"\n", + " Pairwise mean delta & Cohen's d between altruism levels for a metric.\n", + " Returns: a, b, delta_mean, cohens_d, nx, ny\n", + " \"\"\"\n", + " df = self.create_dataframe()\n", + " levels = sorted(df['altruism_prob'].unique())\n", + " rows = []\n", + " for i, a in enumerate(levels):\n", + " for b in levels[i + 1:]:\n", + " x = df[df.altruism_prob == a][metric].dropna()\n", + " y = df[df.altruism_prob == b][metric].dropna()\n", + " if len(x) and len(y):\n", + " delta = y.mean() - x.mean()\n", + " pooled = np.sqrt(\n", + " ((x.var(ddof=1) * (len(x) - 1)) + (y.var(ddof=1) * (len(y) - 1)))\n", + " / (len(x) + len(y) - 2)\n", + " )\n", + " d = delta / pooled if pooled > 0 else np.nan\n", + " rows.append({\n", + " 'a': a, 'b': b,\n", + " 'delta_mean': float(delta),\n", + " 'cohens_d': float(d),\n", + " 'nx': int(len(x)), 'ny': int(len(y)),\n", + " })\n", + " return pd.DataFrame(rows)\n", + "\n", + " # ---------- Plots (existing + new) ----------\n", + "\n", + " def plot_altruism_comparison(self, save_path: str | None = None):\n", + " \"\"\"Create plots comparing different altruism probabilities (existing, kept).\"\"\"\n", + " if not self.results:\n", + " print('No results loaded. Please load results first.')\n", + " return\n", + "\n", + " df = self.create_dataframe()\n", + "\n", + " altruism_groups = (\n", + " df.groupby('altruism_prob')\n", + " .agg(\n", + " {\n", + " 'total_score': ['mean', 'std', 'count'],\n", + " 'player10_score': ['mean', 'std'],\n", + " 'conversation_length': 'mean',\n", + " 'early_termination': 'mean',\n", + " 'pause_count': 'mean',\n", + " }\n", + " )\n", + " .round(3)\n", + " )\n", + "\n", + " fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n", + " fig.suptitle('Player10 Altruism Probability Comparison', fontsize=16)\n", + "\n", + " # Plot 1: Total Score vs Altruism Probability\n", + " ax1 = axes[0, 0]\n", + " altruism_probs = altruism_groups.index\n", + " mean_scores = altruism_groups[('total_score', 'mean')]\n", + " std_scores = altruism_groups[('total_score', 'std')]\n", + " ax1.errorbar(altruism_probs, mean_scores, yerr=std_scores, marker='o', capsize=5, capthick=2)\n", + " ax1.set_xlabel('Altruism Probability')\n", + " ax1.set_ylabel('Total Score')\n", + " ax1.set_title('Total Score vs Altruism Probability')\n", + " ax1.grid(True, alpha=0.3)\n", + "\n", + " # Plot 2: Player10 Score vs Altruism Probability\n", + " ax2 = axes[0, 1]\n", + " mean_p10_scores = altruism_groups[('player10_score', 'mean')]\n", + " std_p10_scores = altruism_groups[('player10_score', 'std')]\n", + " ax2.errorbar(\n", + " altruism_probs, mean_p10_scores, yerr=std_p10_scores,\n", + " marker='s', capsize=5, capthick=2, color='orange'\n", + " )\n", + " ax2.set_xlabel('Altruism Probability')\n", + " ax2.set_ylabel('Player10 Score')\n", + " ax2.set_title('Player10 Individual Score vs Altruism Probability')\n", + " ax2.grid(True, alpha=0.3)\n", + "\n", + " # Plot 3: Conversation Length vs Altruism Probability\n", + " ax3 = axes[1, 0]\n", + " conv_lengths = altruism_groups[('conversation_length', 'mean')]\n", + " ax3.plot(altruism_probs, conv_lengths, marker='^')\n", + " ax3.set_xlabel('Altruism Probability')\n", + " ax3.set_ylabel('Average Conversation Length')\n", + " ax3.set_title('Conversation Length vs Altruism Probability')\n", + " ax3.grid(True, alpha=0.3)\n", + "\n", + " # Plot 4: Early Termination Rate vs Altruism Probability\n", + " ax4 = axes[1, 1]\n", + " early_term_rates = altruism_groups[('early_termination', 'mean')]\n", + " ax4.plot(altruism_probs, early_term_rates, marker='d')\n", + " ax4.set_xlabel('Altruism Probability')\n", + " ax4.set_ylabel('Early Termination Rate')\n", + " ax4.set_title('Early Termination Rate vs Altruism Probability')\n", + " ax4.grid(True, alpha=0.3)\n", + "\n", + " plt.tight_layout()\n", + " if save_path:\n", + " plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Plot saved to: {save_path}')\n", + " plt.show()\n", + "\n", + " def plot_parameter_heatmap(\n", + " self, param1: str, param2: str, metric: str = 'total_score', save_path: str | None = None\n", + " ):\n", + " \"\"\"Create a heatmap showing the interaction between two parameters.\"\"\"\n", + " if not self.results:\n", + " print('No results loaded. Please load results first.')\n", + " return\n", + "\n", + " df = self.create_dataframe()\n", + " pivot = df.groupby([param1, param2])[metric].mean().unstack()\n", + "\n", + " plt.figure(figsize=(10, 8))\n", + " sns.heatmap(pivot, annot=True, fmt='.2f', cmap='viridis')\n", + " plt.title(f'{metric.title()} Heatmap: {param1} vs {param2}')\n", + " plt.xlabel(param2.replace('_', ' ').title())\n", + " plt.ylabel(param1.replace('_', ' ').title())\n", + "\n", + " if save_path:\n", + " plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Heatmap saved to: {save_path}')\n", + " plt.show()\n", + "\n", + " def plot_score_distributions(self, save_path: str | None = None):\n", + " \"\"\"Plot score distributions for different altruism probabilities.\"\"\"\n", + " if not self.results:\n", + " print('No results loaded. Please load results first.')\n", + " return\n", + "\n", + " df = self.create_dataframe()\n", + " altruism_probs = sorted(df['altruism_prob'].unique())\n", + "\n", + " fig, axes = plt.subplots(1, 2, figsize=(15, 6))\n", + " fig.suptitle('Score Distributions by Altruism Probability', fontsize=16)\n", + "\n", + " # Plot 1: Total Score Distributions\n", + " ax1 = axes[0]\n", + " for prob in altruism_probs:\n", + " scores = df[df['altruism_prob'] == prob]['total_score']\n", + " ax1.hist(scores, alpha=0.6, label=f'Altruism: {prob:.1f}', bins=20)\n", + " ax1.set_xlabel('Total Score'); ax1.set_ylabel('Frequency')\n", + " ax1.set_title('Total Score Distributions'); ax1.legend(); ax1.grid(True, alpha=0.3)\n", + "\n", + " # Plot 2: Player10 Score Distributions\n", + " ax2 = axes[1]\n", + " for prob in altruism_probs:\n", + " scores = df[df['altruism_prob'] == prob]['player10_score']\n", + " ax2.hist(scores, alpha=0.6, label=f'Altruism: {prob:.1f}', bins=20)\n", + " ax2.set_xlabel('Player10 Score'); ax2.set_ylabel('Frequency')\n", + " ax2.set_title('Player10 Individual Score Distributions'); ax2.legend(); ax2.grid(True, alpha=0.3)\n", + "\n", + " plt.tight_layout()\n", + " if save_path:\n", + " plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Distributions plot saved to: {save_path}')\n", + " plt.show()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45cbdb6f-ded2-42f7-8232-ee810b7f8a8a", + "metadata": {}, + "outputs": [], + "source": [ + "# ----- NEW PLOTS -----\n", + "\n", + " def plot_component_stack(self, save_path: str | None = None):\n", + " \"\"\"Stacked bars of shared component means vs altruism_prob.\"\"\"\n", + " df = self.create_dataframe()\n", + " keep = ['shared_importance', 'shared_coherence', 'shared_freshness', 'shared_nonmonotonousness']\n", + " have = [c for c in keep if c in df.columns]\n", + " if not have:\n", + " print('No shared component breakdown in results.')\n", + " return\n", + " g = df.groupby('altruism_prob')[have].mean().reset_index().sort_values('altruism_prob')\n", + " ax = g.set_index('altruism_prob')[have].plot(kind='bar', stacked=True, figsize=(12, 6))\n", + " ax.set_ylabel('Mean shared component score'); ax.set_xlabel('Altruism probability')\n", + " ax.set_title('Shared score component breakdown vs altruism'); ax.legend(title='Component')\n", + " ax.grid(True, axis='y', alpha=0.3); plt.tight_layout()\n", + " if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')\n", + " plt.show()\n", + "\n", + " def plot_pareto_tradeoff(self, save_path: str | None = None):\n", + " \"\"\"Scatter of mean Player10 individual vs mean total score, colored by altruism.\"\"\"\n", + " df = self.create_dataframe()\n", + " agg = (df.groupby(['altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono'])\n", + " .agg({'total_score': 'mean', 'player10_individual': 'mean', 'early_termination': 'mean'})\n", + " .reset_index())\n", + " plt.figure(figsize=(9, 7))\n", + " s = plt.scatter(\n", + " agg['player10_individual'], agg['total_score'],\n", + " c=agg['altruism_prob'], cmap='viridis', s=60, alpha=0.85\n", + " )\n", + " plt.colorbar(s, label='altruism_prob')\n", + " # Annotate \"risky\" configs\n", + " for _, r in agg.iterrows():\n", + " if r['early_termination'] > 0.30:\n", + " plt.annotate('ET>0.3', (r['player10_individual'], r['total_score']), fontsize=8)\n", + " plt.xlabel('Player10 individual (mean)')\n", + " plt.ylabel('Total score (mean)')\n", + " plt.title('Pareto trade-off: Player10 individual vs Total')\n", + " plt.grid(True, alpha=0.3); plt.tight_layout()\n", + " if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')\n", + " plt.show()\n", + "\n", + " def plot_rank_distribution(self, save_path: str | None = None):\n", + " \"\"\"Violin plots of Player10 rank across seeds vs altruism.\"\"\"\n", + " dfp = self.create_player_long()\n", + " if dfp.empty or 'rank' not in dfp:\n", + " print('No per-player metrics available.')\n", + " return\n", + " dfp_p10 = dfp[dfp['class_name'] == 'Player10']\n", + " if dfp_p10.empty:\n", + " print('No Player10 entries in per-player metrics.')\n", + " return\n", + " plt.figure(figsize=(10, 5))\n", + " sns.violinplot(data=dfp_p10, x='altruism_prob', y='rank', inner='quartile', cut=0)\n", + " plt.gca().invert_yaxis() # rank 1 is best\n", + " plt.title('Player10 rank distribution across seeds'); plt.xlabel('Altruism probability')\n", + " plt.ylabel('Rank (lower is better)'); plt.tight_layout()\n", + " if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')\n", + " plt.show()\n", + "\n", + " def plot_seed_stability(self, metric: str = 'total_score', save_path: str | None = None):\n", + " \"\"\"Cumulative mean vs number of simulations (sorted by seed) to show stabilization.\"\"\"\n", + " df = self.create_dataframe().sort_values('seed')\n", + " curves: list[tuple[float, np.ndarray]] = []\n", + " for p, g in df.groupby('altruism_prob'):\n", + " means = g[metric].expanding().mean().values\n", + " curves.append((p, means))\n", + "\n", + " plt.figure(figsize=(10, 6))\n", + " for p, means in curves:\n", + " plt.plot(range(1, len(means) + 1), means, label=f'p={p}')\n", + " plt.xlabel('Number of simulations (cumulative)'); plt.ylabel(f'Cumulative mean {metric}')\n", + " plt.title('Seed stability of the estimate'); plt.legend(title='altruism_prob')\n", + " plt.grid(True, alpha=0.3); plt.tight_layout()\n", + " if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')\n", + " plt.show()\n", + "\n", + " def plot_correlation_heatmap(self, save_path: str | None = None):\n", + " \"\"\"Correlation among knobs and outcomes.\"\"\"\n", + " df = self.create_dataframe()\n", + " cols = [\n", + " 'altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono',\n", + " 'importance_weight', 'coherence_weight', 'freshness_weight', 'monotony_weight',\n", + " 'total_score', 'player10_score', 'early_termination', 'pause_count',\n", + " 'unique_items_used', 'length_utilization'\n", + " ]\n", + " cols = [c for c in cols if c in df.columns]\n", + " corr = df[cols].corr(numeric_only=True)\n", + " plt.figure(figsize=(10, 8))\n", + " sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm', center=0)\n", + " plt.title('Correlation matrix: knobs vs outcomes'); plt.tight_layout()\n", + " if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')\n", + " plt.show()\n", + "\n", + " def plot_multi_heatmaps(\n", + " self,\n", + " fixed: str = 'altruism_prob',\n", + " metric: str = 'total_score',\n", + " cols: tuple[str, str] = ('tau_margin', 'epsilon_fresh'),\n", + " save_path: str | None = None\n", + " ):\n", + " \"\"\"Small-multiple heatmaps for metric by two parameters, faceted by a fixed param.\"\"\"\n", + " df = self.create_dataframe()\n", + " vals = sorted(df[fixed].unique())\n", + " n = len(vals)\n", + " fig, axes = plt.subplots(1, n, figsize=(6 * n, 5), sharey=True)\n", + " if n == 1:\n", + " axes = [axes]\n", + " for ax, v in zip(axes, vals, strict=False):\n", + " sub = df[df[fixed] == v]\n", + " if sub.empty:\n", + " ax.set_visible(False)\n", + " continue\n", + " pivot = sub.groupby(list(cols))[metric].mean().unstack()\n", + " sns.heatmap(pivot, ax=ax, annot=True, fmt='.2f', cmap='viridis')\n", + " ax.set_title(f'{metric} | {fixed}={v}')\n", + " ax.set_xlabel(cols[1]); ax.set_ylabel(cols[0])\n", + " plt.tight_layout()\n", + " if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')\n", + " plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78d49284-708b-4c7a-a0c9-b75cb05453a5", + "metadata": {}, + "outputs": [], + "source": [ + "# ---------- Modeling (optional) ----------\n", + "\n", + " def run_ols(self, metric: str = 'total_score'):\n", + " \"\"\"\n", + " OLS regression of metric on config knobs (robust SE). Requires statsmodels.\n", + " Returns the fitted model.\n", + " \"\"\"\n", + " try:\n", + " import statsmodels.api as sm\n", + " except ImportError:\n", + " print(\"statsmodels not installed. `pip install statsmodels` to use run_ols().\")\n", + " return None\n", + "\n", + " df = self.create_dataframe().dropna(subset=[metric])\n", + " X_cols = [\n", + " 'altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono',\n", + " 'importance_weight', 'coherence_weight', 'freshness_weight', 'monotony_weight'\n", + " ]\n", + " X_cols = [c for c in X_cols if c in df.columns]\n", + " X = df[X_cols].copy()\n", + " X = sm.add_constant(X)\n", + " y = df[metric]\n", + " model = sm.OLS(y, X).fit(cov_type='HC3') # robust SE\n", + " print(model.summary())\n", + " return model\n", + "\n", + " def run_logistic_early_term(self):\n", + " \"\"\"\n", + " Logistic regression predicting early termination. Requires statsmodels.\n", + " Returns the fitted model.\n", + " \"\"\"\n", + " try:\n", + " import statsmodels.api as sm\n", + " except ImportError:\n", + " print(\"statsmodels not installed. `pip install statsmodels` to use run_logistic_early_term().\")\n", + " return None\n", + "\n", + " df = self.create_dataframe().dropna(subset=['early_termination'])\n", + " X_cols = [\n", + " 'altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono',\n", + " 'pause_count', 'unique_items_used', 'conversation_length_cfg'\n", + " ]\n", + " X_cols = [c for c in X_cols if c in df.columns]\n", + " X = df[X_cols].copy()\n", + " X = sm.add_constant(X)\n", + " y = df['early_termination'].astype(int)\n", + " model = sm.Logit(y, X).fit(disp=False)\n", + " print(model.summary())\n", + " return model\n", + "\n", + " # ---------- Config search ----------\n", + "\n", + " def best_configs(\n", + " self,\n", + " objective: str = 'total_score',\n", + " constraints: dict[str, tuple[float | None, float | None]] | None = None,\n", + " top_k: int = 10\n", + " ) -> pd.DataFrame:\n", + " \"\"\"\n", + " Find top configs by objective subject to optional constraints.\n", + " constraints example: {'early_termination': (None, 0.2)} # <= 0.2\n", + " \"\"\"\n", + " df = self.create_dataframe()\n", + " agg = (\n", + " df.groupby(['altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono'])\n", + " .agg({\n", + " objective: 'mean',\n", + " 'early_termination': 'mean',\n", + " 'player10_individual': 'mean',\n", + " 'total_score': 'mean'\n", + " })\n", + " .reset_index()\n", + " )\n", + " if constraints:\n", + " mask = pd.Series(True, index=agg.index)\n", + " for col, (lo, hi) in constraints.items():\n", + " if lo is not None:\n", + " mask &= agg[col] >= lo\n", + " if hi is not None:\n", + " mask &= agg[col] <= hi\n", + " agg = agg[mask]\n", + " return agg.sort_values(objective, ascending=False).head(top_k)\n", + "\n", + " # ---------- Quick report ----------\n", + "\n", + " def save_quick_report(self, out_dir: str = 'report_out'):\n", + " \"\"\"Save a set of figures and a short markdown summary to a directory.\"\"\"\n", + " os.makedirs(out_dir, exist_ok=True)\n", + "\n", + " # figures\n", + " self.plot_altruism_comparison(f'{out_dir}/altruism_comparison.png')\n", + " self.plot_component_stack(f'{out_dir}/component_stack.png')\n", + " self.plot_pareto_tradeoff(f'{out_dir}/pareto.png')\n", + " self.plot_rank_distribution(f'{out_dir}/rank_violin.png')\n", + " self.plot_seed_stability(save_path=f'{out_dir}/seed_stability.png')\n", + " self.plot_correlation_heatmap(f'{out_dir}/corr.png')\n", + " self.plot_multi_heatmaps(save_path=f'{out_dir}/multi_heatmaps.png')\n", + "\n", + " # analysis text\n", + " df = self.create_dataframe()\n", + " lines = [\n", + " '# Simulation Summary',\n", + " '',\n", + " f'- Total sims: {len(df)}',\n", + " f'- Unique configs: {df.groupby([\"altruism_prob\",\"tau_margin\",\"epsilon_fresh\",\"epsilon_mono\"]).ngroups}',\n", + " f'- Overall total mean ± std: {df[\"total_score\"].mean():.2f} ± {df[\"total_score\"].std():.2f}',\n", + " f'- Early termination rate: {df[\"early_termination\"].mean():.2f}',\n", + " ]\n", + " (Path(out_dir) / 'SUMMARY.md').write_text('\\n'.join(lines), encoding='utf-8')\n", + " print(f'Report written to {out_dir}/')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25c5656e-9904-4a41-8ed3-585695394245", + "metadata": {}, + "outputs": [], + "source": [ + " # ---------- Text summary (existing, kept with minor tweaks) ----------\n", + "\n", + " def print_detailed_analysis(self):\n", + " \"\"\"Print detailed analysis of the results.\"\"\"\n", + " if not self.results:\n", + " print('No results loaded. Please load results first.')\n", + " return\n", + "\n", + " df = self.create_dataframe()\n", + "\n", + " print('=== DETAILED ANALYSIS ===')\n", + " print(f'Total simulations: {len(df)}')\n", + " print(\n", + " f'Unique configurations: {df.groupby([\"altruism_prob\", \"tau_margin\", \"epsilon_fresh\", \"epsilon_mono\"]).ngroups}'\n", + " )\n", + "\n", + " # Overall statistics\n", + " print('\\n=== OVERALL STATISTICS ===')\n", + " print(f'Total Score - Mean: {df[\"total_score\"].mean():.2f}, Std: {df[\"total_score\"].std():.2f}')\n", + " print(f'Player10 Score - Mean: {df[\"player10_score\"].mean():.2f}, Std: {df[\"player10_score\"].std():.2f}')\n", + " if 'player10_individual' in df:\n", + " print(\n", + " f'Player10 Individual - Mean: {df[\"player10_individual\"].mean():.2f}, '\n", + " f'Std: {df[\"player10_individual\"].std():.2f}'\n", + " )\n", + " if 'player10_rank' in df:\n", + " print(\n", + " f'Player10 Rank - Mean: {df[\"player10_rank\"].mean():.2f}, '\n", + " f'Std: {df[\"player10_rank\"].std():.2f}'\n", + " )\n", + " print(\n", + " f'Conversation Length - Mean: {df[\"conversation_length\"].mean():.1f}, '\n", + " f'Std: {df[\"conversation_length\"].std():.1f}'\n", + " )\n", + " print(f'Early Termination Rate: {df[\"early_termination\"].mean():.2f}')\n", + "\n", + " # Best configurations\n", + " print('\\n=== TOP 10 CONFIGURATIONS ===')\n", + " agg_map = {'total_score': ['mean', 'std', 'count'], 'player10_score': 'mean'}\n", + " if 'player10_rank' in df:\n", + " agg_map['player10_rank'] = 'mean'\n", + " if 'player10_individual' in df:\n", + " agg_map['player10_individual'] = 'mean'\n", + "\n", + " top_configs = (\n", + " df.groupby(['altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono'])\n", + " .agg(agg_map)\n", + " .round(3)\n", + " )\n", + "\n", + " new_columns = ['total_mean', 'total_std', 'count', 'p10_mean']\n", + " if 'player10_rank' in agg_map:\n", + " new_columns.append('p10_rank_mean')\n", + " if 'player10_individual' in agg_map:\n", + " new_columns.append('p10_individual_mean')\n", + " top_configs.columns = new_columns\n", + " top_configs = top_configs.sort_values('total_mean', ascending=False).head(10)\n", + "\n", + " for i, (config, row) in enumerate(top_configs.iterrows(), 1):\n", + " altruism, tau, fresh, mono = config\n", + " parts = [\n", + " f'{i:2d}. Altruism: {altruism:.1f}',\n", + " f'Tau: {tau:.2f}',\n", + " f'Fresh: {fresh:.2f}',\n", + " f'Mono: {mono:.2f}',\n", + " f'Total: {row[\"total_mean\"]:.2f}±{row[\"total_std\"]:.2f}',\n", + " f'P10: {row[\"p10_mean\"]:.2f}',\n", + " ]\n", + " if 'p10_rank_mean' in row:\n", + " parts.append(f'P10 Rank: {row[\"p10_rank_mean\"]:.2f}')\n", + " if 'p10_individual_mean' in row:\n", + " parts.append(f'P10 Individual: {row[\"p10_individual_mean\"]:.2f}')\n", + " print(' -> '.join(parts))\n", + "\n", + " # Altruism analysis\n", + " print('\\n=== ALTRUISM ANALYSIS ===')\n", + " agg_map = {\n", + " 'total_score': ['mean', 'std'],\n", + " 'player10_score': ['mean', 'std'],\n", + " 'conversation_length': 'mean',\n", + " 'early_termination': 'mean',\n", + " }\n", + " if 'player10_rank' in df:\n", + " agg_map['player10_rank'] = ['mean', 'std']\n", + " if 'player10_individual' in df:\n", + " agg_map['player10_individual'] = ['mean', 'std']\n", + "\n", + " altruism_stats = df.groupby('altruism_prob').agg(agg_map).round(3)\n", + " for prob in sorted(df['altruism_prob'].unique()):\n", + " stats = altruism_stats.loc[prob]\n", + " parts = [\n", + " f'Altruism {prob:.1f}:',\n", + " f'Total={stats[(\"total_score\", \"mean\")]:.2f}±{stats[(\"total_score\", \"std\")]:.2f}',\n", + " f'P10={stats[(\"player10_score\", \"mean\")]:.2f}±{stats[(\"player10_score\", \"std\")]:.2f}',\n", + " f'Length={stats[(\"conversation_length\", \"mean\")]:.1f}',\n", + " f'EarlyTerm={stats[(\"early_termination\", \"mean\")]:.2f}',\n", + " ]\n", + " if ('player10_rank', 'mean') in stats:\n", + " parts.append(\n", + " f'P10 Rank={stats[(\"player10_rank\", \"mean\")]:.2f}±{stats[(\"player10_rank\", \"std\")]:.2f}'\n", + " )\n", + " if ('player10_individual', 'mean') in stats:\n", + " parts.append(\n", + " f'P10 Ind={stats[(\"player10_individual\", \"mean\")]:.2f}±{stats[(\"player10_individual\", \"std\")]:.2f}'\n", + " )\n", + " print(' '.join(parts))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a34895e-f309-46f1-a96c-635e623dddbb", + "metadata": {}, + "outputs": [], + "source": [ + "# ---------- Convenience: expose bootstrap/effect sizes quickly ----------\n", + "\n", + " def print_ci_and_effects(self, metric: str = 'total_score'):\n", + " \"\"\"Print bootstrapped CIs per altruism level and pairwise effect sizes.\"\"\"\n", + " df = self.create_dataframe()\n", + " ci = self.bootstrap_ci(df, ['altruism_prob'], metric)\n", + " print('\\n=== BOOTSTRAP CI (by altruism_prob) ===')\n", + " print(ci.sort_values('altruism_prob').to_string(index=False))\n", + " deltas = self.pairwise_altruism_deltas(metric=metric)\n", + " print('\\n=== PAIRWISE DELTAS (a->b) ===')\n", + " print(deltas.to_string(index=False))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a6ef4e6-e198-4694-9545-899e533c8cb3", + "metadata": {}, + "outputs": [], + "source": [ + "# ----------------------------\n", + "# CLI\n", + "# ----------------------------\n", + "\n", + "def main():\n", + " \"\"\"Main function for command-line usage.\"\"\"\n", + " parser = argparse.ArgumentParser(description='Analyze Monte Carlo simulation results')\n", + " parser.add_argument('results_file', help='Path to results JSON file')\n", + "\n", + " parser.add_argument(\n", + " '--plot',\n", + " choices=[\n", + " 'altruism', 'heatmap', 'distributions',\n", + " 'components', 'pareto', 'rank', 'seed', 'corr', 'multi-heatmap'\n", + " ],\n", + " default='altruism',\n", + " help='Type of plot to create',\n", + " )\n", + " parser.add_argument('--param1', default='altruism_prob', help='Param for heatmap / multi-heatmap (rows)')\n", + " parser.add_argument('--param2', default='tau_margin', help='Param for heatmap / multi-heatmap (cols)')\n", + " parser.add_argument('--metric', default='total_score', help='Metric for heatmaps / stability')\n", + " parser.add_argument('--fixed', default='altruism_prob', help='Facet for multi-heatmap')\n", + " parser.add_argument('--save', help='Save plot to file')\n", + " parser.add_argument('--analysis', action='store_true', help='Print detailed analysis')\n", + " parser.add_argument('--ci', action='store_true', help='Print bootstrapped CIs and effect sizes')\n", + " parser.add_argument('--report', help='Save a quick report to a directory (path)')\n", + " parser.add_argument('--ols', action='store_true', help='Run OLS on total_score with knobs')\n", + " parser.add_argument('--logit', action='store_true', help='Run logistic regression for early termination')\n", + "\n", + " args = parser.parse_args()\n", + "\n", + " # Load results\n", + " analyzer = ResultsAnalyzer(args.results_file)\n", + "\n", + " # Print analysis tables\n", + " if args.analysis:\n", + " analyzer.print_detailed_analysis()\n", + " if args.ci:\n", + " analyzer.print_ci_and_effects(metric=args.metric)\n", + " if args.report:\n", + " analyzer.save_quick_report(args.report)\n", + "\n", + " # Optional modeling\n", + " if args.ols:\n", + " analyzer.run_ols(metric='total_score')\n", + " if args.logit:\n", + " analyzer.run_logistic_early_term()\n", + "\n", + " # Create plots\n", + " if args.plot == 'altruism':\n", + " analyzer.plot_altruism_comparison(args.save)\n", + " elif args.plot == 'heatmap':\n", + " analyzer.plot_parameter_heatmap(args.param1, args.param2, metric=args.metric, save_path=args.save)\n", + " elif args.plot == 'distributions':\n", + " analyzer.plot_score_distributions(args.save)\n", + " elif args.plot == 'components':\n", + " analyzer.plot_component_stack(args.save)\n", + " elif args.plot == 'pareto':\n", + " analyzer.plot_pareto_tradeoff(args.save)\n", + " elif args.plot == 'rank':\n", + " analyzer.plot_rank_distribution(args.save)\n", + " elif args.plot == 'seed':\n", + " analyzer.plot_seed_stability(metric=args.metric, save_path=args.save)\n", + " elif args.plot == 'corr':\n", + " analyzer.plot_correlation_heatmap(args.save)\n", + " elif args.plot == 'multi-heatmap':\n", + " analyzer.plot_multi_heatmaps(fixed=args.fixed, metric=args.metric, save_path=args.save)\n", + "\n", + " # Done\n", + "\n", + "\n", + "if __name__ == '__main__':\n", + " main()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98c654c5-9e33-440d-a720-89a1adbe29e1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09df6723-3e92-4b6a-a307-29cf288dc512", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87df6905-7a17-48e0-a101-cf7744261428", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:base] *", + "language": "python", + "name": "conda-base-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/players/player_10/tools/.syncthing.reporting.py.tmp b/players/player_10/tools/.syncthing.reporting.py.tmp new file mode 100644 index 0000000..3e870ac Binary files /dev/null and b/players/player_10/tools/.syncthing.reporting.py.tmp differ diff --git a/pyproject.toml b/pyproject.toml index 9c5bf04..6e3fdbb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,8 +8,9 @@ dependencies = [ "pygame>=2.6.1", "openai", "ruff>=0.12.8", - "numpy>=2.3.3", - "torch>=2.8.0", + "numpy", + "torch", + ] [tool.ruff] diff --git a/uv.lock b/uv.lock index 23a48b9..2e5cff6 100644 --- a/uv.lock +++ b/uv.lock @@ -61,11 +61,11 @@ dev = [ [package.metadata] requires-dist = [ - { name = "numpy", specifier = ">=2.3.3" }, + { name = "numpy" }, { name = "openai" }, { name = "pygame", specifier = ">=2.6.1" }, { name = "ruff", specifier = ">=0.12.8" }, - { name = "torch", specifier = ">=2.8.0" }, + { name = "torch" }, ] [package.metadata.requires-dev]