diff --git a/.gitignore b/.gitignore
index a781e8f..58fc268 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@ wheels/
# Virtual environments
.venv
+.ruff-venv
# Misc
.DS_Store
diff --git a/players/player_10/__init__.py b/players/player_10/__init__.py
index 7e8a23a..3071b50 100644
--- a/players/player_10/__init__.py
+++ b/players/player_10/__init__.py
@@ -1,8 +1,6 @@
from .agent.player import Player10Agent # Agent-based player for comparison
-from .rl.eval_player import EvalPlayer, create_eval_player # RL evaluation player
-# Use the trained RL model as Player10 by default
-Player10 = EvalPlayer
+Player10 = Player10Agent
__all__ = [
'Player10',
diff --git a/players/player_10/analysis b/players/player_10/analysis
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/players/player_10/analysis
@@ -0,0 +1 @@
+
diff --git a/players/player_10/tools/dashboard/builder.py b/players/player_10/tools/dashboard/builder.py
index 27410c1..553c665 100644
--- a/players/player_10/tools/dashboard/builder.py
+++ b/players/player_10/tools/dashboard/builder.py
@@ -4,6 +4,7 @@
import json
import re
+from collections import defaultdict
from datetime import datetime
from pathlib import Path
@@ -34,6 +35,193 @@ def _format_number(value: float | None, digits: int = 2) -> str:
return f'{value:.{digits}f}'
+COLORWAY = [
+ '#3867d6',
+ '#fa8231',
+ '#20bf6b',
+ '#a55eea',
+ '#fed330',
+ '#fc5c65',
+ '#2d98da',
+]
+
+COMPONENT_LABELS = {
+ 'importance': 'Importance',
+ 'coherence': 'Coherence',
+ 'freshness': 'Freshness',
+ 'nonmonotonousness': 'Monotony relief',
+}
+
+
+def _config_value(result, attr: str):
+ config = getattr(result, 'config', None)
+ if config is None:
+ return None
+ if hasattr(config, attr):
+ return getattr(config, attr)
+ if isinstance(config, dict):
+ return config.get(attr)
+ return None
+
+
+def _metric_value(result, metric: str):
+ if metric == 'total_score':
+ return getattr(result, 'total_score', None)
+ if metric == 'player10_score':
+ return getattr(result, 'player10_total_mean', None)
+ if metric == 'player10_individual':
+ return getattr(result, 'player10_individual_mean', None)
+ if metric == 'early_termination':
+ value = getattr(result, 'early_termination', None)
+ if value is None:
+ return None
+ try:
+ return float(value)
+ except (TypeError, ValueError):
+ return None
+ return getattr(result, metric, None)
+
+
+def _compute_heatmap_data(results, row_attr: str, col_attr: str, metric: str):
+ matrix = defaultdict(lambda: defaultdict(list))
+ rows: set = set()
+ cols: set = set()
+ for result in results:
+ row_value = _config_value(result, row_attr)
+ col_value = _config_value(result, col_attr)
+ metric_value = _metric_value(result, metric)
+ if row_value is None or col_value is None or metric_value is None:
+ continue
+ matrix[row_value][col_value].append(float(metric_value))
+ rows.add(row_value)
+ cols.add(col_value)
+ if not rows or not cols:
+ return None
+ row_order = sorted(rows)
+ col_order = sorted(cols)
+ grid: list[list[float | None]] = []
+ for row_value in row_order:
+ row_data: list[float | None] = []
+ for col_value in col_order:
+ bucket = matrix.get(row_value, {}).get(col_value, [])
+ if bucket:
+ row_data.append(sum(bucket) / len(bucket))
+ else:
+ row_data.append(None)
+ grid.append(row_data)
+ return row_order, col_order, grid
+
+
+def _collect_scores_by_altruism(results):
+ buckets = defaultdict(lambda: {'total': [], 'player10': []})
+ for result in results:
+ altruism = _config_value(result, 'altruism_prob')
+ if altruism is None:
+ continue
+ total_value = _metric_value(result, 'total_score')
+ if total_value is not None:
+ buckets[altruism]['total'].append(float(total_value))
+ p10_value = _metric_value(result, 'player10_score')
+ if p10_value is not None:
+ buckets[altruism]['player10'].append(float(p10_value))
+ if not buckets:
+ return None
+ return dict(sorted(buckets.items()))
+
+
+def _component_means_by_altruism(results):
+ sums = defaultdict(lambda: defaultdict(float))
+ counts = defaultdict(lambda: defaultdict(int))
+ for result in results:
+ altruism = _config_value(result, 'altruism_prob')
+ breakdown = getattr(result, 'score_breakdown', None) or {}
+ if altruism is None:
+ continue
+ for key in COMPONENT_LABELS:
+ value = breakdown.get(key)
+ if value is None:
+ continue
+ try:
+ value = float(value)
+ except (TypeError, ValueError):
+ continue
+ sums[altruism][key] += value
+ counts[altruism][key] += 1
+ if not sums:
+ return None
+ altruism_values = sorted(sums.keys())
+ component_series: dict[str, list[float]] = {key: [] for key in COMPONENT_LABELS}
+ for altruism in altruism_values:
+ for key in COMPONENT_LABELS:
+ count = counts[altruism].get(key, 0)
+ if count:
+ component_series[key].append(sums[altruism][key] / count)
+ else:
+ component_series[key].append(0.0)
+ return altruism_values, component_series
+
+
+def _aggregate_pareto_points(results):
+ groups = defaultdict(
+ lambda: {
+ 'total_sum': 0.0,
+ 'total_count': 0,
+ 'p10_sum': 0.0,
+ 'p10_count': 0,
+ 'early_sum': 0.0,
+ 'early_count': 0,
+ }
+ )
+ for result in results:
+ key = (
+ _config_value(result, 'altruism_prob'),
+ _config_value(result, 'tau_margin'),
+ _config_value(result, 'epsilon_fresh'),
+ _config_value(result, 'epsilon_mono'),
+ )
+ if any(value is None for value in key):
+ continue
+ total_value = _metric_value(result, 'total_score')
+ if total_value is not None:
+ groups[key]['total_sum'] += float(total_value)
+ groups[key]['total_count'] += 1
+ p10_value = _metric_value(result, 'player10_individual')
+ if p10_value is not None:
+ groups[key]['p10_sum'] += float(p10_value)
+ groups[key]['p10_count'] += 1
+ early_value = _metric_value(result, 'early_termination')
+ if early_value is not None:
+ groups[key]['early_sum'] += float(early_value)
+ groups[key]['early_count'] += 1
+ points: list[dict[str, float | int | None]] = []
+ for key, data in groups.items():
+ if not data['total_count'] or not data['p10_count']:
+ continue
+ altruism, tau, fresh, mono = key
+ point = {
+ 'altruism': altruism,
+ 'tau': tau,
+ 'fresh': fresh,
+ 'mono': mono,
+ 'total': data['total_sum'] / data['total_count'],
+ 'player10': data['p10_sum'] / data['p10_count'],
+ 'early': (data['early_sum'] / data['early_count']) if data['early_count'] else None,
+ 'runs': data['total_count'],
+ }
+ points.append(point)
+ if not points:
+ return None
+ points.sort(key=lambda item: (item['altruism'], item['tau'], item['fresh'], item['mono']))
+ return points
+
+
+def _format_axis_value(value):
+ if isinstance(value, float):
+ formatted = f'{value:.3f}' if abs(value) < 1 else f'{value:.2f}'
+ return formatted.rstrip('0').rstrip('.')
+ return str(value)
+
+
def generate_dashboard(
results,
analysis,
@@ -45,12 +233,14 @@ def generate_dashboard(
try:
import plotly.graph_objects as go
import plotly.io as pio
+ from plotly.subplots import make_subplots
except ImportError:
return None
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
+ analysis = analysis or {}
aggregated = summarize_parameterizations(results)
table_rows: list[dict] = []
for row in aggregated:
@@ -116,23 +306,48 @@ def generate_dashboard(
chart_sections: list[dict[str, str]] = []
if top_rows:
- labels = [parameter_label(row['meta']) for row in top_rows]
- total_means = [row['mean'] for row in top_rows]
- fig_top = go.Figure(
- go.Bar(
- x=labels,
- y=total_means,
- text=[f'±{row["std"]:.2f}' for row in top_rows],
- textposition='outside',
- marker=dict(color='#3867d6'),
+ fig_top = go.Figure()
+ rank_labels: list[str] = []
+ for idx, row in enumerate(top_rows, start=1):
+ full_label = parameter_label(row['meta'])
+ mean_value = row['mean']
+ std_value = row.get('std', 0.0)
+ rank_label = f'#{idx}'
+ rank_labels.append(rank_label)
+ fig_top.add_trace(
+ go.Bar(
+ x=[mean_value],
+ y=[rank_label],
+ orientation='h',
+ name=full_label,
+ marker=dict(color=COLORWAY[(idx - 1) % len(COLORWAY)]),
+ text=[f'{mean_value:.2f} ± {std_value:.2f}'],
+ textposition='outside',
+ customdata=[[full_label, std_value]],
+ hovertemplate=(
+ '%{customdata[0]}
Mean: %{x:.2f}
Std: %{customdata[1]:.2f}'
+ ),
+ )
)
- )
fig_top.update_layout(
title='Top Parameterizations by Total Score',
- xaxis_title='Parameterization label',
- yaxis_title='Mean total score',
+ xaxis_title='Mean total score',
+ yaxis_title='Rank',
+ yaxis=dict(categoryorder='array', categoryarray=rank_labels),
+ margin=dict(l=0, r=20, t=60, b=40),
+ height=max(320, 90 * len(rank_labels)),
uniformtext_minsize=10,
- uniformtext_mode='show',
+ uniformtext_mode='hide',
+ legend=dict(
+ title='Parameterization label',
+ yanchor='top',
+ y=1.0,
+ xanchor='left',
+ x=1.02,
+ bgcolor='rgba(255,255,255,0.85)',
+ bordercolor='rgba(0,0,0,0.1)',
+ borderwidth=1,
+ ),
)
chart_sections.append(
{
@@ -224,6 +439,201 @@ def generate_dashboard(
},
)
+ # Enhanced analysis sections derived from notebook utilities
+ heatmap_data = _compute_heatmap_data(results, 'altruism_prob', 'tau_margin', 'total_score')
+ if heatmap_data:
+ row_values, col_values, matrix = heatmap_data
+ y_labels = [_format_axis_value(value) for value in row_values]
+ x_labels = [_format_axis_value(value) for value in col_values]
+ fig_heatmap = go.Figure(
+ go.Heatmap(
+ z=matrix,
+ x=x_labels,
+ y=y_labels,
+ colorscale='Viridis',
+ colorbar={'title': 'Mean total score'},
+ )
+ )
+ fig_heatmap.update_layout(
+ title='Total Score Heatmap',
+ xaxis_title='Tau margin',
+ yaxis_title='Altruism probability',
+ margin={'l': 80, 'r': 40, 't': 60, 'b': 60},
+ )
+ chart_sections.append(
+ {
+ 'title': 'Parameter Heatmap',
+ 'description': 'Average total score for each altruism/tau combination helps spot sweet spots quickly.',
+ 'html': pio.to_html(
+ fig_heatmap,
+ include_plotlyjs=False,
+ full_html=False,
+ config={'displaylogo': False},
+ default_width='100%',
+ default_height='420px',
+ ),
+ },
+ )
+
+ score_buckets = _collect_scores_by_altruism(results)
+ if score_buckets:
+ fig_dist = make_subplots(rows=1, cols=2, subplot_titles=('Total score', 'Player10 score'))
+ for idx, (prob, values) in enumerate(score_buckets.items()):
+ label = f'altruism {prob:.2f}' if isinstance(prob, float) else f'altruism {prob}'
+ color = COLORWAY[idx % len(COLORWAY)]
+ if values['total']:
+ fig_dist.add_trace(
+ go.Histogram(
+ x=values['total'],
+ name=label,
+ legendgroup=label,
+ marker={'color': color},
+ opacity=0.55,
+ nbinsx=20,
+ showlegend=True,
+ ),
+ row=1,
+ col=1,
+ )
+ if values['player10']:
+ fig_dist.add_trace(
+ go.Histogram(
+ x=values['player10'],
+ name=label,
+ legendgroup=label,
+ marker={'color': color},
+ opacity=0.55,
+ nbinsx=20,
+ showlegend=False,
+ ),
+ row=1,
+ col=2,
+ )
+ fig_dist.update_layout(
+ title_text='Score Distributions by Altruism',
+ barmode='overlay',
+ legend={'orientation': 'h', 'y': 1.12, 'x': 0.5, 'xanchor': 'center'},
+ xaxis_title='Total score',
+ xaxis2_title='Player10 score',
+ yaxis_title='Frequency',
+ margin={'l': 60, 'r': 20, 't': 80, 'b': 60},
+ )
+ chart_sections.append(
+ {
+ 'title': 'Score Distributions',
+ 'description': 'Histogram overlays reveal how each altruism setting shifts total and individual score shapes.',
+ 'html': pio.to_html(
+ fig_dist,
+ include_plotlyjs=False,
+ full_html=False,
+ config={'displaylogo': False},
+ default_width='100%',
+ default_height='420px',
+ ),
+ },
+ )
+
+ component_data = _component_means_by_altruism(results)
+ if component_data:
+ altruism_values, component_series = component_data
+ labels = [_format_axis_value(value) for value in altruism_values]
+ fig_components = go.Figure()
+ for idx, (comp_key, comp_label) in enumerate(COMPONENT_LABELS.items()):
+ values = component_series.get(comp_key, [])
+ if not values:
+ continue
+ fig_components.add_trace(
+ go.Bar(
+ x=labels,
+ y=values,
+ name=comp_label,
+ marker={'color': COLORWAY[idx % len(COLORWAY)]},
+ )
+ )
+ fig_components.update_layout(
+ title='Shared Component Breakdown',
+ barmode='stack',
+ xaxis_title='Altruism probability',
+ yaxis_title='Mean shared score',
+ legend={'orientation': 'h', 'y': 1.1, 'x': 0.5, 'xanchor': 'center'},
+ margin={'l': 60, 'r': 20, 't': 80, 'b': 60},
+ )
+ chart_sections.append(
+ {
+ 'title': 'Shared Components',
+ 'description': 'Stacks quantify how shared scoring components vary with altruism levels.',
+ 'html': pio.to_html(
+ fig_components,
+ include_plotlyjs=False,
+ full_html=False,
+ config={'displaylogo': False},
+ default_width='100%',
+ default_height='420px',
+ ),
+ },
+ )
+
+ pareto_points = _aggregate_pareto_points(results)
+ if pareto_points:
+ customdata = [
+ [
+ _format_axis_value(point['altruism']),
+ _format_axis_value(point['tau']),
+ _format_axis_value(point['fresh']),
+ _format_axis_value(point['mono']),
+ (f'{point["early"]:.1%}' if point['early'] is not None else 'n/a'),
+ point['runs'],
+ ]
+ for point in pareto_points
+ ]
+ fig_pareto = go.Figure(
+ go.Scatter(
+ x=[point['player10'] for point in pareto_points],
+ y=[point['total'] for point in pareto_points],
+ mode='markers',
+ marker={
+ 'size': 10,
+ 'color': [point['altruism'] for point in pareto_points],
+ 'colorscale': 'Viridis',
+ 'showscale': True,
+ 'colorbar': {'title': 'Altruism p'},
+ },
+ text=['ET>0.3' if (point['early'] or 0) > 0.3 else '' for point in pareto_points],
+ textposition='top center',
+ customdata=customdata,
+ hovertemplate=(
+ 'Player10 mean: %{x:.2f}
'
+ 'Total mean: %{y:.2f}
'
+ 'Altruism: %{customdata[0]}
'
+ 'Tau margin: %{customdata[1]}
'
+ 'Epsilon fresh: %{customdata[2]}
'
+ 'Epsilon mono: %{customdata[3]}
'
+ 'Early termination: %{customdata[4]}
'
+ 'Runs: %{customdata[5]}'
+ ),
+ )
+ )
+ fig_pareto.update_layout(
+ title='Pareto: Player10 vs Total Score',
+ xaxis_title='Player10 individual mean',
+ yaxis_title='Total score mean',
+ margin={'l': 60, 'r': 20, 't': 60, 'b': 60},
+ )
+ chart_sections.append(
+ {
+ 'title': 'Pareto Trade-off',
+ 'description': 'Scatter highlights where individual gains align with team score, colored by altruism.',
+ 'html': pio.to_html(
+ fig_pareto,
+ include_plotlyjs=False,
+ full_html=False,
+ config={'displaylogo': False},
+ default_width='100%',
+ default_height='420px',
+ ),
+ },
+ )
+
total_simulations = analysis.get('total_simulations', len(results))
unique_configs = analysis.get('unique_configurations', len(aggregated))
best_entry = next(iter(analysis.get('best_configurations', [])), None)
diff --git a/players/player_10/tools/manual_dashboard.py b/players/player_10/tools/manual_dashboard.py
new file mode 100644
index 0000000..2bd84c2
--- /dev/null
+++ b/players/player_10/tools/manual_dashboard.py
@@ -0,0 +1,346 @@
+"""Generate a dashboard for manual engine experiments.
+
+This script recreates two lightweight experiment profiles (balanced vs adversarial
+supporting casts) across a small set of seeds and toggles Player10's altruism
+probability. The per-run outputs are converted into the same shape that the
+Plotly dashboard expects, so we can reuse generate_dashboard without relying
+on the MonteCarlo simulator assets that aren't available locally.
+"""
+
+from __future__ import annotations
+
+import json
+import statistics as stats
+import sys
+from collections import Counter
+from collections.abc import Sequence
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+PROJECT_ROOT = Path(__file__).resolve().parents[3]
+if str(PROJECT_ROOT) not in sys.path:
+ sys.path.append(str(PROJECT_ROOT))
+
+
+@dataclass
+class ManualConfig:
+ """Minimal config stub exposing the knobs used by the dashboard helpers."""
+
+ altruism_prob: float
+ tau_margin: float
+ epsilon_fresh: float
+ epsilon_mono: float
+ seed: int
+ players: dict[str, int]
+ subjects: int
+ memory_size: int
+ conversation_length: int
+ min_samples_pid: int
+ ewma_alpha: float
+ importance_weight: float
+ coherence_weight: float
+ freshness_weight: float
+ monotony_weight: float
+
+
+@dataclass
+class ManualResult:
+ """Container that matches the attributes accessed by the dashboard builder."""
+
+ config: ManualConfig
+ total_score: float
+ best_total_score: float
+ player_scores: dict[str, float]
+ player_contributions: dict[str, int]
+ conversation_length: int
+ early_termination: bool
+ pause_count: int
+ unique_items_used: int
+ execution_time: float
+ score_breakdown: dict[str, float]
+ player_metrics: dict[str, dict[str, float | str | int | None]]
+ player10_total_mean: float
+ player10_individual_mean: float
+ player10_rank_mean: float
+ player10_gap_to_best: float
+ player10_instances: int
+
+
+def _build_label_map(engine: Any) -> dict[str, str]:
+ """Assign stable human-readable labels to player UUIDs."""
+ counts: Counter[str] = Counter()
+ labels: dict[str, str] = {}
+
+ for player in engine.players:
+ class_name = type(player).__name__
+ if class_name == 'Player10Agent':
+ label = 'Player10'
+ else:
+ counts[class_name] += 1
+ label = f'{class_name}#{counts[class_name]}'
+ labels[str(player.id)] = label
+
+ return labels
+
+
+def _rank_players(totals: dict[str, float]) -> dict[str, float]:
+ """Return 1-based ranks (dense ranking) for each player label."""
+ sorted_totals = sorted(totals.items(), key=lambda item: item[1], reverse=True)
+ ranks: dict[str, float] = {}
+
+ current_rank = 1
+ previous_value: float | None = None
+
+ for index, (label, value) in enumerate(sorted_totals, start=1):
+ if previous_value is None or value < previous_value:
+ current_rank = index
+ previous_value = value
+ ranks[label] = float(current_rank)
+
+ return ranks
+
+
+def _build_manual_result(
+ engine: Any,
+ seed: int,
+ altruism: float,
+ roster: Sequence[type],
+ subjects: int,
+ memory_size: int,
+ conversation_length: int,
+) -> ManualResult:
+ """Run the engine once and transform the output into a dashboard result."""
+ from players.player_10.agent import config as p10_config
+
+ output = engine.run(list(roster))
+ history = output['history']
+ scores = output['scores']
+
+ label_map = _build_label_map(engine)
+
+ player_scores_dict: dict[str, float] = {}
+ player_metrics: dict[str, dict[str, float | str | int | None]] = {}
+
+ totals_for_ranking: dict[str, float] = {}
+
+ for entry in scores['player_scores']:
+ label = label_map[str(entry['id'])]
+ total = float(entry['scores']['total'])
+ individual = float(entry['scores']['individual'])
+ shared = float(entry['scores']['shared'])
+
+ player_scores_dict[label] = total
+ player_metrics[label] = {
+ 'class_name': label.split('#')[0],
+ 'alias': label,
+ 'total': total,
+ 'individual': individual,
+ 'shared': shared,
+ 'rank': None, # filled in after ranking
+ }
+ totals_for_ranking[label] = total
+
+ ranks = _rank_players(totals_for_ranking)
+ for label, rank in ranks.items():
+ player_metrics[label]['rank'] = rank
+
+ player10_total = player_scores_dict['Player10']
+ best_total = max(player_scores_dict.values())
+
+ player_contributions_counts = {
+ label_map[str(uid)]: len(items) for uid, items in engine.player_contributions.items()
+ }
+
+ unique_items = {item.id for item in history if item is not None}
+ pause_count = sum(1 for item in history if item is None)
+
+ config = ManualConfig(
+ altruism_prob=altruism,
+ tau_margin=p10_config.TAU_MARGIN,
+ epsilon_fresh=p10_config.EPSILON_FRESH,
+ epsilon_mono=p10_config.EPSILON_MONO,
+ seed=seed,
+ players=dict(Counter(type(player).__name__ for player in engine.players)),
+ subjects=subjects,
+ memory_size=memory_size,
+ conversation_length=conversation_length,
+ min_samples_pid=p10_config.MIN_SAMPLES_PID,
+ ewma_alpha=p10_config.EWMA_ALPHA,
+ importance_weight=p10_config.IMPORTANCE_WEIGHT,
+ coherence_weight=p10_config.COHERENCE_WEIGHT,
+ freshness_weight=p10_config.FRESHNESS_WEIGHT,
+ monotony_weight=p10_config.MONOTONY_WEIGHT,
+ )
+
+ return ManualResult(
+ config=config,
+ total_score=float(output['score_breakdown']['total']),
+ best_total_score=best_total,
+ player_scores=player_scores_dict,
+ player_contributions=player_contributions_counts,
+ conversation_length=len(history),
+ early_termination=len(history) < conversation_length,
+ pause_count=pause_count,
+ unique_items_used=len(unique_items),
+ execution_time=0.0,
+ score_breakdown={k: float(v) for k, v in output['score_breakdown'].items()},
+ player_metrics=player_metrics,
+ player10_total_mean=player10_total,
+ player10_individual_mean=float(
+ next(
+ entry['scores']['individual']
+ for entry in scores['player_scores']
+ if label_map[str(entry['id'])] == 'Player10'
+ )
+ ),
+ player10_rank_mean=ranks['Player10'],
+ player10_gap_to_best=best_total - player10_total,
+ player10_instances=1,
+ )
+
+
+def run_manual_experiments() -> tuple[list[ManualResult], dict[str, dict[str, float]]]:
+ """Return all per-run results plus an aggregate summary per configuration."""
+ from core.engine import Engine
+ from players.pause_player import PausePlayer
+ from players.player_10.agent import config as p10_config
+ from players.player_10.agent.player import Player10Agent
+ from players.random_pause_player import RandomPausePlayer
+ from players.random_player import RandomPlayer
+
+ subjects = 10
+ memory_size = 16
+ conversation_length = 40
+ seeds = list(range(100, 116))
+
+ rosters: dict[str, Sequence[type]] = {
+ 'Balanced support (3 Random)': [Player10Agent, RandomPlayer, RandomPlayer, RandomPlayer],
+ 'Adversarial mix (Random, RandomPause, Pause)': [
+ Player10Agent,
+ RandomPlayer,
+ RandomPausePlayer,
+ PausePlayer,
+ ],
+ }
+
+ results: list[ManualResult] = []
+ aggregates: dict[str, list[float]] = {}
+
+ original_altruism = p10_config.ALTRUISM_USE_PROB
+
+ for roster_name, roster in rosters.items():
+ for altruism_value in (0.0, 0.6):
+ p10_config.ALTRUISM_USE_PROB = altruism_value
+
+ key = f'{roster_name} | altruism={altruism_value:.1f}'
+ aggregates[key] = []
+
+ for seed in seeds:
+ engine = Engine(
+ players=list(roster),
+ player_count=len(roster),
+ subjects=subjects,
+ memory_size=memory_size,
+ conversation_length=conversation_length,
+ seed=seed,
+ )
+ result = _build_manual_result(
+ engine,
+ seed=seed,
+ altruism=altruism_value,
+ roster=roster,
+ subjects=subjects,
+ memory_size=memory_size,
+ conversation_length=conversation_length,
+ )
+ results.append(result)
+ aggregates[key].append(result.total_score)
+
+ # Restore the original altruism probability so we do not affect other tooling
+ p10_config.ALTRUISM_USE_PROB = original_altruism
+
+ aggregate_summary = {
+ key: {
+ 'mean': stats.mean(values),
+ 'std': stats.pstdev(values) if len(values) > 1 else 0.0,
+ }
+ for key, values in aggregates.items()
+ }
+
+ output_payload = [
+ {
+ 'config': asdict(result.config),
+ 'total_score': result.total_score,
+ 'best_total_score': result.best_total_score,
+ 'player_scores': result.player_scores,
+ 'player_contributions': result.player_contributions,
+ 'conversation_length': result.conversation_length,
+ 'early_termination': result.early_termination,
+ 'pause_count': result.pause_count,
+ 'unique_items_used': result.unique_items_used,
+ 'execution_time': result.execution_time,
+ 'score_breakdown': result.score_breakdown,
+ 'player_metrics': result.player_metrics,
+ 'player10_total_mean': result.player10_total_mean,
+ 'player10_individual_mean': result.player10_individual_mean,
+ 'player10_rank_mean': result.player10_rank_mean,
+ 'player10_gap_to_best': result.player10_gap_to_best,
+ 'player10_instances': result.player10_instances,
+ 'altruism_prob': result.config.altruism_prob,
+ 'seed': result.config.seed,
+ 'players': result.config.players,
+ }
+ for result in results
+ ]
+
+ output_path_json = Path('players/player_10/results/manual_dashboard_runs.json')
+ output_path_json.write_text(json.dumps(output_payload, indent=2))
+ print(f'Detailed run data written to {output_path_json}')
+
+ return results, aggregate_summary
+
+
+def main(open_browser: bool = False) -> None:
+ from players.player_10.tools.dashboard import generate_dashboard
+
+ results, summary = run_manual_experiments()
+
+ analysis = {
+ 'total_simulations': len(results),
+ 'unique_configurations': len(summary),
+ 'best_configurations': [
+ {
+ 'label': label,
+ 'mean_score': stats['mean'],
+ 'std_score': stats['std'],
+ }
+ for label, stats in sorted(
+ summary.items(), key=lambda item: item[1]['mean'], reverse=True
+ )
+ ],
+ }
+
+ dashboard_config = SimpleNamespace(
+ name='Manual Engine Experiments',
+ description='Player10 altruism sensitivity across two roster archetypes.',
+ output_dir='players/player_10/results',
+ )
+
+ output_path = generate_dashboard(
+ results,
+ analysis,
+ dashboard_config,
+ output_dir='players/player_10/results/dashboards',
+ open_browser=open_browser,
+ )
+
+ if output_path:
+ print(f'Dashboard written to: {output_path}')
+ else:
+ print('Plotly is not installed; dashboard generation skipped.')
+
+
+if __name__ == '__main__':
+ main(open_browser=False)
diff --git a/players/player_10/tools/reporting.py b/players/player_10/tools/reporting.py
index 441f266..3f1f2ca 100644
--- a/players/player_10/tools/reporting.py
+++ b/players/player_10/tools/reporting.py
@@ -10,16 +10,43 @@
from collections import defaultdict
from typing import Any
-from ..sim.test_framework import ParameterRange, TestConfiguration
+try:
+ from ..sim.test_framework import (
+ ParameterRange,
+ TestConfiguration,
+ )
+except ModuleNotFoundError:
+ ParameterRange = None # type: ignore
+ TestConfiguration = None # type: ignore
+ _BASELINE_CONFIG = None
+else:
+ _BASELINE_CONFIG = TestConfiguration(name='baseline_snapshot')
-_BASELINE_CONFIG = TestConfiguration(name='baseline_snapshot')
-
-def _first(range_field: ParameterRange) -> Any:
- return range_field.values[0] if range_field.values else None
+def _first(range_field) -> Any:
+ if range_field is None:
+ return None
+ return range_field.values[0] if getattr(range_field, 'values', None) else None
def _capture_baseline_meta() -> dict[str, Any]:
+ if _BASELINE_CONFIG is None:
+ return {
+ 'altruism_prob': 0.0,
+ 'tau_margin': 0.0,
+ 'epsilon_fresh': 0.0,
+ 'epsilon_mono': 0.0,
+ 'min_samples_pid': 5,
+ 'ewma_alpha': 0.0,
+ 'importance_weight': 1.0,
+ 'coherence_weight': 1.0,
+ 'freshness_weight': 1.0,
+ 'monotony_weight': 1.0,
+ 'conversation_length': 0,
+ 'subjects': 0,
+ 'memory_size': 0,
+ 'players': {},
+ }
players = dict(_BASELINE_CONFIG.player_configs[0]) if _BASELINE_CONFIG.player_configs else {}
return {
'altruism_prob': _first(_BASELINE_CONFIG.altruism_probs),