Add dumb muscle and rich kid baseline examples

roackb2 · roackb2 · commit 3ef474cef4d2 · 2026-01-01T20:40:00.000+08:00
diff --git a/package.json b/package.json
@@ -47,7 +47,8 @@
     "examples:quickstart": "tsx src/examples/quickstart.ts",
     "examples:github": "tsx src/examples/github/agent-demo.ts",
     "examples:github:baseline": "tsx src/examples/github/baseline-demo.ts",
-    "examples:wikipedia": "tsx src/examples/wikipedia/demo.ts"
+    "examples:wikipedia": "tsx src/examples/wikipedia/demo.ts",
+    "examples:wikipedia:baseline": "tsx src/examples/wikipedia/baseline-agent.ts"
   },
   "devDependencies": {
     "@eslint/js": "^9.37.0",
diff --git a/src/adapters/wikipedia/policies/llm-cot.ts b/src/adapters/wikipedia/policies/llm-cot.ts
@@ -0,0 +1,133 @@
+import { type OpenAI } from 'openai';
+
+import type { Ladder, ProbePolicy } from '../../../core/interfaces';
+import { logger } from '../telemetry';
+import type { WikiAction, WikiState } from '../types';
+
+interface CoTResponse {
+  reasoning: string;
+  selected_link: string;
+}
+
+export class LlmCoTPolicy implements ProbePolicy<WikiState, WikiAction, number> {
+  public id = 'llm-cot-policy';
+  private chatHistory: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = [];
+
+  constructor(
+    private openai: OpenAI
+  ) { }
+
+  initialize(state: WikiState): void {
+    logger.info(`[LlmCoTPolicy] Initializing stateful agent. Goal: ${state.goal}`);
+    this.chatHistory = [
+      {
+        role: 'system',
+        content: `You are a strategic Wikipedia navigation agent.
+Your goal is to reach the topic "${state.goal}" starting from "${state.currentTitle}".
+You must strictly output JSON.
+`
+      }
+    ];
+  }
+
+  isStable(state: WikiState): boolean {
+    return state.currentTitle === state.goal;
+  }
+
+  async decide(state: WikiState, _ladder: Ladder<number>): Promise<WikiAction> {
+    if (state.currentTitle === state.goal) {
+      return { type: 'DONE', result: "Arrived at Goal!" };
+    }
+
+    if (!state.links || state.links.length === 0) {
+      return { type: 'DONE', result: "Dead End" };
+    }
+
+    // Filter out meta-namespaces
+    const basicValidLinks = state.links.filter(link =>
+      !link.startsWith('Wikipedia:') &&
+      !link.startsWith('Template:') &&
+      !link.startsWith('Category:') &&
+      !link.startsWith('Help:') &&
+      !link.startsWith('Portal:') &&
+      !link.startsWith('Talk:') &&
+      !link.startsWith('Special:') &&
+      !link.startsWith('File:')
+    );
+
+    // Limit to top 50 to fit context
+    const candidates = basicValidLinks.slice(0, 50);
+
+    const prompt = `
+Current Topic: "${state.currentTitle}"
+Goal Topic: "${state.goal}"
+History: ${state.history.join(' -> ')}
+
+Available Links (Top 50):
+${JSON.stringify(candidates)}
+
+Your task:
+1. Analyze the relationship between the current topic and the goal.
+2. Evaluate the available links to see which one is semantically closest or most likely to lead to the goal.
+3. Reason step-by-step about why you are choosing a specific link.
+4. Output your decision in strict JSON format.
+
+JSON Format:
+{
+  "reasoning": "your step-by-step reasoning here",
+  "selected_link": "exact string from the available links list"
+}
+`;
+
+    // Append user message
+    this.chatHistory.push({ role: 'user', content: prompt });
+
+    // Context Window Management: Keep System + Last 10 messages
+    // If history grows beyond ~12 items (1 system + 11 others), truncate
+    if (this.chatHistory.length > 12) {
+      const system = this.chatHistory[0];
+      const recent = this.chatHistory.slice(-10);
+      this.chatHistory = [system, ...recent];
+      logger.debug(`[LlmCoTPolicy] Truncated history to last 10 turns.`);
+    }
+
+    logger.info(`[LlmCoTPolicy] Thinking... (History Depth: ${this.chatHistory.length})`);
+
+    try {
+      const response = await this.openai.chat.completions.create({
+        model: 'gpt-4o-mini',
+        messages: this.chatHistory,
+        response_format: { type: 'json_object' },
+        temperature: 0.2
+      });
+
+      const content = response.choices[0].message.content;
+      if (!content) {
+        throw new Error("Empty response from LLM");
+      }
+
+      logger.info(`[LlmCoTPolicy] LLM Response: ${content}`);
+
+      // Append assistant response
+      this.chatHistory.push({ role: 'assistant', content });
+
+      const parsed = JSON.parse(content) as CoTResponse;
+      const selectedLink = parsed.selected_link;
+
+      if (!candidates.includes(selectedLink)) {
+        logger.warn(`[LlmCoTPolicy] Hallucinated link: ${selectedLink}. Fallback to first candidate.`);
+        return { type: 'NAVIGATE', title: candidates[0] };
+      }
+
+      logger.info(`[LlmCoTPolicy] Reasoning: ${parsed.reasoning}`);
+      logger.info(`[LlmCoTPolicy] Selected: ${selectedLink}`);
+
+      return { type: 'NAVIGATE', title: selectedLink };
+
+    } catch (error) {
+      logger.error(`[LlmCoTPolicy] Error: ${error instanceof Error ? error.message : String(error)}`);
+      // Fallback
+      return { type: 'NAVIGATE', title: candidates[0] };
+    }
+  }
+}
diff --git a/src/adapters/wikipedia/policies/naive.ts b/src/adapters/wikipedia/policies/naive.ts
@@ -0,0 +1,82 @@
+import type { Ladder, ProbePolicy } from '../../../core/interfaces';
+import { dot, norm } from '../../../core/kinematics/math';
+import type { WikipediaEmbedder } from '../embedder';
+import { logger } from '../telemetry';
+import type { WikiAction, WikiState } from '../types';
+
+export class NaiveGreedyPolicy implements ProbePolicy<WikiState, WikiAction, number> {
+  public id = 'naive-greedy-policy';
+
+  constructor(
+    private embedder: WikipediaEmbedder,
+    private goalEmbedding: number[]
+  ) { }
+
+  initialize(_state: WikiState): void {
+    // No-op for greedy policy
+  }
+
+  isStable(state: WikiState): boolean {
+    return state.currentTitle === state.goal;
+  }
+
+  async decide(state: WikiState, _ladder: Ladder<number>): Promise<WikiAction> {
+    if (state.currentTitle === state.goal) {
+      return { type: 'DONE', result: "Arrived at Goal!" };
+    }
+
+    if (!state.links || state.links.length === 0) {
+      logger.warn("[NaiveGreedyPolicy] Dead end! No links found.");
+      return { type: 'DONE', result: "Dead End" };
+    }
+
+    // Filter out meta-namespaces
+    const basicValidLinks = state.links.filter(link =>
+      !link.startsWith('Wikipedia:') &&
+      !link.startsWith('Template:') &&
+      !link.startsWith('Category:') &&
+      !link.startsWith('Help:') &&
+      !link.startsWith('Portal:') &&
+      !link.startsWith('Talk:') &&
+      !link.startsWith('Special:') &&
+      !link.startsWith('File:')
+    );
+
+    if (basicValidLinks.length === 0) {
+      logger.warn("[NaiveGreedyPolicy] Dead end after basic filtering! No valid links found.");
+      return { type: 'DONE', result: "Dead End (Filtered)" };
+    }
+
+    // Optimization: Take first 50 links (or random sample) to evaluate
+    const candidates = basicValidLinks.slice(0, 50);
+
+    logger.debug(`[NaiveGreedyPolicy] Evaluating ${candidates.length} candidates...`);
+
+    // 2. Embed candidates
+    const embeddings = await this.embedder.embedBatch(candidates);
+
+    // 3. Rank by similarity to GOAL
+    const scores = candidates.map((link, i) => {
+      const emb = embeddings[i];
+      // Cosine similarity
+      const rawSim = dot(emb, this.goalEmbedding) / (norm(emb) * norm(this.goalEmbedding));
+
+      // Apply weights if present (from guards)
+      const weight = state.candidateWeights?.[link] ?? 1.0;
+      const sim = rawSim * weight;
+
+      return { link, sim, rawSim, weight };
+    });
+
+    // Sort descending
+    scores.sort((a, b) => b.sim - a.sim);
+
+    // 4. Select Top-1 (Greedy) or Stochastic Top-3
+    // For pure "Dumb Muscle" greedy, we strictly take Top-1
+    const selected = scores[0];
+
+    logger.info(`[NaiveGreedyPolicy] Selected: ${selected.link} (Score: ${selected.sim.toFixed(4)})`);
+
+    return { type: 'NAVIGATE', title: selected.link };
+  }
+}
diff --git a/src/adapters/wikipedia/policy.ts b/src/adapters/wikipedia/policy.ts
@@ -4,16 +4,16 @@ import type { WikipediaEmbedder } from './embedder';
 import { logger } from './telemetry';
 import type { WikiAction, WikiState } from './types';
 
-export class GreedyWikiPolicy implements ProbePolicy<WikiState, WikiAction, number> {
-  public id = 'greedy-wiki-policy';
+export class StochasticHeuristicPolicy implements ProbePolicy<WikiState, WikiAction, number> {
+  public id = 'stochastic-heuristic-policy';
 
   constructor(
     private embedder: WikipediaEmbedder,
     private goalEmbedding: number[]
   ) { }
 
   initialize(_state: WikiState): void {
-    // No-op for greedy policy
+    // No-op for stochastic policy
   }
 
   isStable(state: WikiState): boolean {
@@ -26,7 +26,7 @@ export class GreedyWikiPolicy implements ProbePolicy<WikiState, WikiAction, numb
     }
 
     if (!state.links || state.links.length === 0) {
-      logger.warn("[GreedyWikiPolicy] Dead end! No links found.");
+      logger.warn("[StochasticHeuristicPolicy] Dead end! No links found.");
       return { type: 'DONE', result: "Dead End" };
     }
 
@@ -43,14 +43,14 @@ export class GreedyWikiPolicy implements ProbePolicy<WikiState, WikiAction, numb
     );
 
     if (basicValidLinks.length === 0) {
-      logger.warn("[GreedyWikiPolicy] Dead end after basic filtering! No valid links found.");
+      logger.warn("[StochasticHeuristicPolicy] Dead end after basic filtering! No valid links found.");
       return { type: 'DONE', result: "Dead End (Filtered)" };
     }
 
     // Optimization: Take first 50 links (or random sample) to evaluate
     const candidates = basicValidLinks.slice(0, 50);
 
-    logger.debug(`[GreedyWikiPolicy] Evaluating ${candidates.length} candidates...`);
+    logger.debug(`[StochasticHeuristicPolicy] Evaluating ${candidates.length} candidates...`);
 
     // 2. Embed candidates
     const embeddings = await this.embedder.embedBatch(candidates);
@@ -75,7 +75,7 @@ export class GreedyWikiPolicy implements ProbePolicy<WikiState, WikiAction, numb
     const top3 = scores.slice(0, 3);
     const selected = top3[Math.floor(Math.random() * top3.length)];
 
-    logger.info(`[GreedyWikiPolicy] Selected: ${selected.link} (Score: ${selected.sim.toFixed(4)}, Raw: ${selected.rawSim.toFixed(4)}, Weight: ${selected.weight.toFixed(2)})`);
+    logger.info(`[StochasticHeuristicPolicy] Selected: ${selected.link} (Score: ${selected.sim.toFixed(4)}, Raw: ${selected.rawSim.toFixed(4)}, Weight: ${selected.weight.toFixed(2)})`);
 
     return { type: 'NAVIGATE', title: selected.link };
   }
diff --git a/src/examples/wikipedia/demo.ts b/src/examples/wikipedia/demo.ts