|
| 1 | +import { type OpenAI } from 'openai'; |
| 2 | + |
| 3 | +import type { Ladder, ProbePolicy } from '../../../core/interfaces'; |
| 4 | +import { logger } from '../telemetry'; |
| 5 | +import type { WikiAction, WikiState } from '../types'; |
| 6 | + |
| 7 | +interface CoTResponse { |
| 8 | + reasoning: string; |
| 9 | + selected_link: string; |
| 10 | +} |
| 11 | + |
| 12 | +export class LlmCoTPolicy implements ProbePolicy<WikiState, WikiAction, number> { |
| 13 | + public id = 'llm-cot-policy'; |
| 14 | + private chatHistory: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = []; |
| 15 | + |
| 16 | + constructor( |
| 17 | + private openai: OpenAI |
| 18 | + ) { } |
| 19 | + |
| 20 | + initialize(state: WikiState): void { |
| 21 | + logger.info(`[LlmCoTPolicy] Initializing stateful agent. Goal: ${state.goal}`); |
| 22 | + this.chatHistory = [ |
| 23 | + { |
| 24 | + role: 'system', |
| 25 | + content: `You are a strategic Wikipedia navigation agent. |
| 26 | +Your goal is to reach the topic "${state.goal}" starting from "${state.currentTitle}". |
| 27 | +You must strictly output JSON. |
| 28 | +` |
| 29 | + } |
| 30 | + ]; |
| 31 | + } |
| 32 | + |
| 33 | + isStable(state: WikiState): boolean { |
| 34 | + return state.currentTitle === state.goal; |
| 35 | + } |
| 36 | + |
| 37 | + async decide(state: WikiState, _ladder: Ladder<number>): Promise<WikiAction> { |
| 38 | + if (state.currentTitle === state.goal) { |
| 39 | + return { type: 'DONE', result: "Arrived at Goal!" }; |
| 40 | + } |
| 41 | + |
| 42 | + if (!state.links || state.links.length === 0) { |
| 43 | + return { type: 'DONE', result: "Dead End" }; |
| 44 | + } |
| 45 | + |
| 46 | + // Filter out meta-namespaces |
| 47 | + const basicValidLinks = state.links.filter(link => |
| 48 | + !link.startsWith('Wikipedia:') && |
| 49 | + !link.startsWith('Template:') && |
| 50 | + !link.startsWith('Category:') && |
| 51 | + !link.startsWith('Help:') && |
| 52 | + !link.startsWith('Portal:') && |
| 53 | + !link.startsWith('Talk:') && |
| 54 | + !link.startsWith('Special:') && |
| 55 | + !link.startsWith('File:') |
| 56 | + ); |
| 57 | + |
| 58 | + // Limit to top 50 to fit context |
| 59 | + const candidates = basicValidLinks.slice(0, 50); |
| 60 | + |
| 61 | + const prompt = ` |
| 62 | +Current Topic: "${state.currentTitle}" |
| 63 | +Goal Topic: "${state.goal}" |
| 64 | +History: ${state.history.join(' -> ')} |
| 65 | +
|
| 66 | +Available Links (Top 50): |
| 67 | +${JSON.stringify(candidates)} |
| 68 | +
|
| 69 | +Your task: |
| 70 | +1. Analyze the relationship between the current topic and the goal. |
| 71 | +2. Evaluate the available links to see which one is semantically closest or most likely to lead to the goal. |
| 72 | +3. Reason step-by-step about why you are choosing a specific link. |
| 73 | +4. Output your decision in strict JSON format. |
| 74 | +
|
| 75 | +JSON Format: |
| 76 | +{ |
| 77 | + "reasoning": "your step-by-step reasoning here", |
| 78 | + "selected_link": "exact string from the available links list" |
| 79 | +} |
| 80 | +`; |
| 81 | + |
| 82 | + // Append user message |
| 83 | + this.chatHistory.push({ role: 'user', content: prompt }); |
| 84 | + |
| 85 | + // Context Window Management: Keep System + Last 10 messages |
| 86 | + // If history grows beyond ~12 items (1 system + 11 others), truncate |
| 87 | + if (this.chatHistory.length > 12) { |
| 88 | + const system = this.chatHistory[0]; |
| 89 | + const recent = this.chatHistory.slice(-10); |
| 90 | + this.chatHistory = [system, ...recent]; |
| 91 | + logger.debug(`[LlmCoTPolicy] Truncated history to last 10 turns.`); |
| 92 | + } |
| 93 | + |
| 94 | + logger.info(`[LlmCoTPolicy] Thinking... (History Depth: ${this.chatHistory.length})`); |
| 95 | + |
| 96 | + try { |
| 97 | + const response = await this.openai.chat.completions.create({ |
| 98 | + model: 'gpt-4o-mini', |
| 99 | + messages: this.chatHistory, |
| 100 | + response_format: { type: 'json_object' }, |
| 101 | + temperature: 0.2 |
| 102 | + }); |
| 103 | + |
| 104 | + const content = response.choices[0].message.content; |
| 105 | + if (!content) { |
| 106 | + throw new Error("Empty response from LLM"); |
| 107 | + } |
| 108 | + |
| 109 | + logger.info(`[LlmCoTPolicy] LLM Response: ${content}`); |
| 110 | + |
| 111 | + // Append assistant response |
| 112 | + this.chatHistory.push({ role: 'assistant', content }); |
| 113 | + |
| 114 | + const parsed = JSON.parse(content) as CoTResponse; |
| 115 | + const selectedLink = parsed.selected_link; |
| 116 | + |
| 117 | + if (!candidates.includes(selectedLink)) { |
| 118 | + logger.warn(`[LlmCoTPolicy] Hallucinated link: ${selectedLink}. Fallback to first candidate.`); |
| 119 | + return { type: 'NAVIGATE', title: candidates[0] }; |
| 120 | + } |
| 121 | + |
| 122 | + logger.info(`[LlmCoTPolicy] Reasoning: ${parsed.reasoning}`); |
| 123 | + logger.info(`[LlmCoTPolicy] Selected: ${selectedLink}`); |
| 124 | + |
| 125 | + return { type: 'NAVIGATE', title: selectedLink }; |
| 126 | + |
| 127 | + } catch (error) { |
| 128 | + logger.error(`[LlmCoTPolicy] Error: ${error instanceof Error ? error.message : String(error)}`); |
| 129 | + // Fallback |
| 130 | + return { type: 'NAVIGATE', title: candidates[0] }; |
| 131 | + } |
| 132 | + } |
| 133 | +} |
0 commit comments