Skip to content

Commit 3ef474c

Browse files
committed
Add dumb muscle and rich kid baseline examples
1 parent b162102 commit 3ef474c

5 files changed

Lines changed: 301 additions & 48 deletions

File tree

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@
4747
"examples:quickstart": "tsx src/examples/quickstart.ts",
4848
"examples:github": "tsx src/examples/github/agent-demo.ts",
4949
"examples:github:baseline": "tsx src/examples/github/baseline-demo.ts",
50-
"examples:wikipedia": "tsx src/examples/wikipedia/demo.ts"
50+
"examples:wikipedia": "tsx src/examples/wikipedia/demo.ts",
51+
"examples:wikipedia:baseline": "tsx src/examples/wikipedia/baseline-agent.ts"
5152
},
5253
"devDependencies": {
5354
"@eslint/js": "^9.37.0",
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
import { type OpenAI } from 'openai';
2+
3+
import type { Ladder, ProbePolicy } from '../../../core/interfaces';
4+
import { logger } from '../telemetry';
5+
import type { WikiAction, WikiState } from '../types';
6+
7+
interface CoTResponse {
8+
reasoning: string;
9+
selected_link: string;
10+
}
11+
12+
export class LlmCoTPolicy implements ProbePolicy<WikiState, WikiAction, number> {
13+
public id = 'llm-cot-policy';
14+
private chatHistory: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = [];
15+
16+
constructor(
17+
private openai: OpenAI
18+
) { }
19+
20+
initialize(state: WikiState): void {
21+
logger.info(`[LlmCoTPolicy] Initializing stateful agent. Goal: ${state.goal}`);
22+
this.chatHistory = [
23+
{
24+
role: 'system',
25+
content: `You are a strategic Wikipedia navigation agent.
26+
Your goal is to reach the topic "${state.goal}" starting from "${state.currentTitle}".
27+
You must strictly output JSON.
28+
`
29+
}
30+
];
31+
}
32+
33+
isStable(state: WikiState): boolean {
34+
return state.currentTitle === state.goal;
35+
}
36+
37+
async decide(state: WikiState, _ladder: Ladder<number>): Promise<WikiAction> {
38+
if (state.currentTitle === state.goal) {
39+
return { type: 'DONE', result: "Arrived at Goal!" };
40+
}
41+
42+
if (!state.links || state.links.length === 0) {
43+
return { type: 'DONE', result: "Dead End" };
44+
}
45+
46+
// Filter out meta-namespaces
47+
const basicValidLinks = state.links.filter(link =>
48+
!link.startsWith('Wikipedia:') &&
49+
!link.startsWith('Template:') &&
50+
!link.startsWith('Category:') &&
51+
!link.startsWith('Help:') &&
52+
!link.startsWith('Portal:') &&
53+
!link.startsWith('Talk:') &&
54+
!link.startsWith('Special:') &&
55+
!link.startsWith('File:')
56+
);
57+
58+
// Limit to top 50 to fit context
59+
const candidates = basicValidLinks.slice(0, 50);
60+
61+
const prompt = `
62+
Current Topic: "${state.currentTitle}"
63+
Goal Topic: "${state.goal}"
64+
History: ${state.history.join(' -> ')}
65+
66+
Available Links (Top 50):
67+
${JSON.stringify(candidates)}
68+
69+
Your task:
70+
1. Analyze the relationship between the current topic and the goal.
71+
2. Evaluate the available links to see which one is semantically closest or most likely to lead to the goal.
72+
3. Reason step-by-step about why you are choosing a specific link.
73+
4. Output your decision in strict JSON format.
74+
75+
JSON Format:
76+
{
77+
"reasoning": "your step-by-step reasoning here",
78+
"selected_link": "exact string from the available links list"
79+
}
80+
`;
81+
82+
// Append user message
83+
this.chatHistory.push({ role: 'user', content: prompt });
84+
85+
// Context Window Management: Keep System + Last 10 messages
86+
// If history grows beyond ~12 items (1 system + 11 others), truncate
87+
if (this.chatHistory.length > 12) {
88+
const system = this.chatHistory[0];
89+
const recent = this.chatHistory.slice(-10);
90+
this.chatHistory = [system, ...recent];
91+
logger.debug(`[LlmCoTPolicy] Truncated history to last 10 turns.`);
92+
}
93+
94+
logger.info(`[LlmCoTPolicy] Thinking... (History Depth: ${this.chatHistory.length})`);
95+
96+
try {
97+
const response = await this.openai.chat.completions.create({
98+
model: 'gpt-4o-mini',
99+
messages: this.chatHistory,
100+
response_format: { type: 'json_object' },
101+
temperature: 0.2
102+
});
103+
104+
const content = response.choices[0].message.content;
105+
if (!content) {
106+
throw new Error("Empty response from LLM");
107+
}
108+
109+
logger.info(`[LlmCoTPolicy] LLM Response: ${content}`);
110+
111+
// Append assistant response
112+
this.chatHistory.push({ role: 'assistant', content });
113+
114+
const parsed = JSON.parse(content) as CoTResponse;
115+
const selectedLink = parsed.selected_link;
116+
117+
if (!candidates.includes(selectedLink)) {
118+
logger.warn(`[LlmCoTPolicy] Hallucinated link: ${selectedLink}. Fallback to first candidate.`);
119+
return { type: 'NAVIGATE', title: candidates[0] };
120+
}
121+
122+
logger.info(`[LlmCoTPolicy] Reasoning: ${parsed.reasoning}`);
123+
logger.info(`[LlmCoTPolicy] Selected: ${selectedLink}`);
124+
125+
return { type: 'NAVIGATE', title: selectedLink };
126+
127+
} catch (error) {
128+
logger.error(`[LlmCoTPolicy] Error: ${error instanceof Error ? error.message : String(error)}`);
129+
// Fallback
130+
return { type: 'NAVIGATE', title: candidates[0] };
131+
}
132+
}
133+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import type { Ladder, ProbePolicy } from '../../../core/interfaces';
2+
import { dot, norm } from '../../../core/kinematics/math';
3+
import type { WikipediaEmbedder } from '../embedder';
4+
import { logger } from '../telemetry';
5+
import type { WikiAction, WikiState } from '../types';
6+
7+
export class NaiveGreedyPolicy implements ProbePolicy<WikiState, WikiAction, number> {
8+
public id = 'naive-greedy-policy';
9+
10+
constructor(
11+
private embedder: WikipediaEmbedder,
12+
private goalEmbedding: number[]
13+
) { }
14+
15+
initialize(_state: WikiState): void {
16+
// No-op for greedy policy
17+
}
18+
19+
isStable(state: WikiState): boolean {
20+
return state.currentTitle === state.goal;
21+
}
22+
23+
async decide(state: WikiState, _ladder: Ladder<number>): Promise<WikiAction> {
24+
if (state.currentTitle === state.goal) {
25+
return { type: 'DONE', result: "Arrived at Goal!" };
26+
}
27+
28+
if (!state.links || state.links.length === 0) {
29+
logger.warn("[NaiveGreedyPolicy] Dead end! No links found.");
30+
return { type: 'DONE', result: "Dead End" };
31+
}
32+
33+
// Filter out meta-namespaces
34+
const basicValidLinks = state.links.filter(link =>
35+
!link.startsWith('Wikipedia:') &&
36+
!link.startsWith('Template:') &&
37+
!link.startsWith('Category:') &&
38+
!link.startsWith('Help:') &&
39+
!link.startsWith('Portal:') &&
40+
!link.startsWith('Talk:') &&
41+
!link.startsWith('Special:') &&
42+
!link.startsWith('File:')
43+
);
44+
45+
if (basicValidLinks.length === 0) {
46+
logger.warn("[NaiveGreedyPolicy] Dead end after basic filtering! No valid links found.");
47+
return { type: 'DONE', result: "Dead End (Filtered)" };
48+
}
49+
50+
// Optimization: Take first 50 links (or random sample) to evaluate
51+
const candidates = basicValidLinks.slice(0, 50);
52+
53+
logger.debug(`[NaiveGreedyPolicy] Evaluating ${candidates.length} candidates...`);
54+
55+
// 2. Embed candidates
56+
const embeddings = await this.embedder.embedBatch(candidates);
57+
58+
// 3. Rank by similarity to GOAL
59+
const scores = candidates.map((link, i) => {
60+
const emb = embeddings[i];
61+
// Cosine similarity
62+
const rawSim = dot(emb, this.goalEmbedding) / (norm(emb) * norm(this.goalEmbedding));
63+
64+
// Apply weights if present (from guards)
65+
const weight = state.candidateWeights?.[link] ?? 1.0;
66+
const sim = rawSim * weight;
67+
68+
return { link, sim, rawSim, weight };
69+
});
70+
71+
// Sort descending
72+
scores.sort((a, b) => b.sim - a.sim);
73+
74+
// 4. Select Top-1 (Greedy) or Stochastic Top-3
75+
// For pure "Dumb Muscle" greedy, we strictly take Top-1
76+
const selected = scores[0];
77+
78+
logger.info(`[NaiveGreedyPolicy] Selected: ${selected.link} (Score: ${selected.sim.toFixed(4)})`);
79+
80+
return { type: 'NAVIGATE', title: selected.link };
81+
}
82+
}

src/adapters/wikipedia/policy.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@ import type { WikipediaEmbedder } from './embedder';
44
import { logger } from './telemetry';
55
import type { WikiAction, WikiState } from './types';
66

7-
export class GreedyWikiPolicy implements ProbePolicy<WikiState, WikiAction, number> {
8-
public id = 'greedy-wiki-policy';
7+
export class StochasticHeuristicPolicy implements ProbePolicy<WikiState, WikiAction, number> {
8+
public id = 'stochastic-heuristic-policy';
99

1010
constructor(
1111
private embedder: WikipediaEmbedder,
1212
private goalEmbedding: number[]
1313
) { }
1414

1515
initialize(_state: WikiState): void {
16-
// No-op for greedy policy
16+
// No-op for stochastic policy
1717
}
1818

1919
isStable(state: WikiState): boolean {
@@ -26,7 +26,7 @@ export class GreedyWikiPolicy implements ProbePolicy<WikiState, WikiAction, numb
2626
}
2727

2828
if (!state.links || state.links.length === 0) {
29-
logger.warn("[GreedyWikiPolicy] Dead end! No links found.");
29+
logger.warn("[StochasticHeuristicPolicy] Dead end! No links found.");
3030
return { type: 'DONE', result: "Dead End" };
3131
}
3232

@@ -43,14 +43,14 @@ export class GreedyWikiPolicy implements ProbePolicy<WikiState, WikiAction, numb
4343
);
4444

4545
if (basicValidLinks.length === 0) {
46-
logger.warn("[GreedyWikiPolicy] Dead end after basic filtering! No valid links found.");
46+
logger.warn("[StochasticHeuristicPolicy] Dead end after basic filtering! No valid links found.");
4747
return { type: 'DONE', result: "Dead End (Filtered)" };
4848
}
4949

5050
// Optimization: Take first 50 links (or random sample) to evaluate
5151
const candidates = basicValidLinks.slice(0, 50);
5252

53-
logger.debug(`[GreedyWikiPolicy] Evaluating ${candidates.length} candidates...`);
53+
logger.debug(`[StochasticHeuristicPolicy] Evaluating ${candidates.length} candidates...`);
5454

5555
// 2. Embed candidates
5656
const embeddings = await this.embedder.embedBatch(candidates);
@@ -75,7 +75,7 @@ export class GreedyWikiPolicy implements ProbePolicy<WikiState, WikiAction, numb
7575
const top3 = scores.slice(0, 3);
7676
const selected = top3[Math.floor(Math.random() * top3.length)];
7777

78-
logger.info(`[GreedyWikiPolicy] Selected: ${selected.link} (Score: ${selected.sim.toFixed(4)}, Raw: ${selected.rawSim.toFixed(4)}, Weight: ${selected.weight.toFixed(2)})`);
78+
logger.info(`[StochasticHeuristicPolicy] Selected: ${selected.link} (Score: ${selected.sim.toFixed(4)}, Raw: ${selected.rawSim.toFixed(4)}, Weight: ${selected.weight.toFixed(2)})`);
7979

8080
return { type: 'NAVIGATE', title: selected.link };
8181
}

0 commit comments

Comments
 (0)