-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy path_storm_agent.py
More file actions
205 lines (176 loc) · 7.92 KB
/
_storm_agent.py
File metadata and controls
205 lines (176 loc) · 7.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import asyncio
import logging
import os
import tempfile
import traceback
from typing import Any, Optional, Sequence, Union
from autogen_agentchat.agents import BaseChatAgent
from autogen_agentchat.base import Response
from autogen_agentchat.messages import ChatMessage, TextMessage
from autogen_core import CancellationToken
from knowledge_storm import (
STORMWikiLMConfigs,
STORMWikiRunner,
STORMWikiRunnerArguments,
)
from knowledge_storm.lm import AzureOpenAIModel
from knowledge_storm.rm import (
TavilySearchRM,
)
from sunagent_ext.secret_management.config import Config
from sunagent_app._constants import LOGGER_NAME
logger = logging.getLogger(LOGGER_NAME)
class StormConfig:
"""Config loader for environment variables."""
def __init__(self) -> None:
# OpenAI parameters
self.openai_api_key: str = ""
self.openai_model_name: str = ""
self.azure_api_base: str = ""
self.azure_api_version: str = ""
# String parameters
self.output_dir: str = ""
self.tavily_api_key: str = ""
# Integer parameters
self.max_thread_num: int = 2
self.max_conv_turn: int = 2
self.max_perspective: int = 2
self.search_top_k: int = 2
self.retrieve_top_k: int = 2
async def initialize(self, config: Config) -> None:
self.openai_api_key = await config.get_env("openai/OPENAI_API_KEY")
self.openai_model_name = await config.get_env("openai/OPENAI_DEPLOYMENT")
self.azure_api_base = await config.get_env("openai/OPENAI_ENDPOINT")
self.azure_api_version = await config.get_env("openai/OPENAI_API_VERSION")
# String parameters
self.output_dir = await config.get_env("OUTPUT_DIR")
self.tavily_api_key = await config.get_env("TAVILY_API_KEY")
# Integer parameters
self.max_thread_num = int(await config.get_env("MAX_THREAD_NUM", "2"))
self.max_conv_turn = int(await config.get_env("MAX_CONV_TURN", "2"))
self.max_perspective = int(await config.get_env("MAX_PERSPECTIVE", "2"))
self.search_top_k = int(await config.get_env("SEARCH_TOP_K", "2"))
self.retrieve_top_k = int(await config.get_env("RETRIEVE_TOP_K", "2"))
def _str_to_bool(self, value: Union[str, bool]) -> bool:
if isinstance(value, bool):
return value
return value.lower() in ("true", "1", "yes", "on")
class StormAgent(BaseChatAgent):
"""
STORM Agent for generating wiki-style research reports.
Stages: research, outline generation, article writing, and polishing.
"""
def __init__(
self,
name: str,
description: str = "STORM Agent: research and wiki-style report generation.",
config: Optional[StormConfig] = None,
):
super().__init__(name=name, description=description)
self.config = config or StormConfig()
self._setup_storm_runner()
def _setup_storm_runner(self) -> None:
"""Initialize the STORM runner."""
# Language model configuration
self.lm_configs = STORMWikiLMConfigs()
openai_kwargs = {
"api_key": self.config.openai_api_key,
"temperature": 1.0,
"top_p": 0.9,
"api_base": self.config.azure_api_base,
"api_version": self.config.azure_api_version,
}
# Instantiate language models
conv_simulator_lm = AzureOpenAIModel(model=self.config.openai_model_name, max_tokens=500, **openai_kwargs)
question_asker_lm = AzureOpenAIModel(model=self.config.openai_model_name, max_tokens=500, **openai_kwargs)
outline_gen_lm = AzureOpenAIModel(model=self.config.openai_model_name, max_tokens=400, **openai_kwargs)
article_gen_lm = AzureOpenAIModel(model=self.config.openai_model_name, max_tokens=700, **openai_kwargs)
article_polish_lm = AzureOpenAIModel(model=self.config.openai_model_name, max_tokens=4000, **openai_kwargs)
# Set model configs
self.lm_configs.set_conv_simulator_lm(conv_simulator_lm)
self.lm_configs.set_question_asker_lm(question_asker_lm)
self.lm_configs.set_outline_gen_lm(outline_gen_lm)
self.lm_configs.set_article_gen_lm(article_gen_lm)
self.lm_configs.set_article_polish_lm(article_polish_lm)
# Set retriever
self.rm = TavilySearchRM(
tavily_search_api_key=self.config.tavily_api_key,
k=self.config.search_top_k,
include_raw_content=True,
)
async def on_reset(self, cancellation_token: Optional[CancellationToken] = None) -> None:
"""Reset agent state."""
pass
async def on_messages(self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken) -> Response:
"""Handle incoming messages and run STORM."""
try:
last_message = messages[-1]
# Extract content from different message types
if isinstance(last_message, TextMessage):
topic = last_message.content.strip()
else:
# Handle other message types or fall back to string representation
topic = str(last_message).strip()
# Create a temporary output directory
with tempfile.TemporaryDirectory() as temp_dir:
# Use temp dir if output_dir is not set
output_dir = self.config.output_dir or temp_dir
logger.info(f"stormAgent output directory: {output_dir}")
# Engine arguments
engine_args = STORMWikiRunnerArguments(
output_dir=output_dir,
max_conv_turn=self.config.max_conv_turn,
max_perspective=self.config.max_perspective,
search_top_k=self.config.search_top_k,
max_thread_num=self.config.max_thread_num,
)
# Create STORM runner
runner = STORMWikiRunner(engine_args, self.lm_configs, self.rm)
# Run STORM
await asyncio.get_event_loop().run_in_executor(None, self._run_storm, runner, topic)
# Get result
result = StormAgent._get_storm_result(output_dir, topic)
return Response(
chat_message=TextMessage(
content=result,
source=self.name,
)
)
except Exception as e:
error_message = f"An error occurred during the STORM process: {str(e)}"
logger.error(error_message)
logger.error(traceback.format_exc())
return self._create_error_response()
def _create_error_response(self) -> Response:
return Response(
chat_message=TextMessage(
content="system internal error, EARLY_TERMINATE",
source=self.name,
)
)
def _run_storm(self, runner: Any, topic: str) -> None:
"""Synchronous wrapper to run STORM."""
runner.run(
topic=topic,
)
runner.post_run()
runner.summary()
@staticmethod
def truncate_filename(filename: str, max_length: int = 125) -> str:
if len(filename) > max_length:
return filename[:max_length]
return filename
@staticmethod
def _get_storm_result(output_dir: str, topic: str) -> str:
"""Read storm_gen_article_polished.txt and return its content."""
article_dir_name = StormAgent.truncate_filename(topic.replace(" ", "_").replace("/", "_"))
polished_path = os.path.join(output_dir, article_dir_name, "storm_gen_article_polished.txt")
if os.path.exists(polished_path):
with open(polished_path, "r", encoding="utf-8") as f:
return f.read()
else:
raise RuntimeError("No polished article available")
@property
def produced_message_types(self) -> Sequence[type[ChatMessage]]:
"""Message types produced by this agent."""
return [TextMessage]