Skip to content

Commit 7c1b1b7

Browse files
committed
Improve agent defaults
1 parent 49fa419 commit 7c1b1b7

7 files changed

Lines changed: 99 additions & 46 deletions

File tree

ensemble_agent/agent.py

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from .archive import ArchiveManager
1212
from .config import AgentConfig, INPUT_MARKER
13+
from .debug import DebugLogger
1314
from .llm import create_llm
1415
from .mcp_client import connect_mcp, find_mcp_server
1516
from .prompts import (
@@ -22,6 +23,14 @@
2223
from .skills import load_skills
2324
from .skills.generator import GeneratorSkill
2425

26+
DEFAULT_PROMPT = """Create six_hump_camel APOSMM scripts:
27+
- Executable: six_hump_camel/six_hump_camel.x
28+
- Input: six_hump_camel/input.txt
29+
- Template vars: X0, X1
30+
- 4 workers, 100 sims.
31+
- The output file for each simulation is output.txt
32+
- The bounds should be 0,1 and -1,2 for X0 and X1 respectively"""
33+
2534

2635
async def run_agent(config: AgentConfig):
2736
"""Main entry point: build skills, connect MCP, run the agent loop."""
@@ -32,6 +41,12 @@ async def run_agent(config: AgentConfig):
3241
# Set up archive manager
3342
archive = ArchiveManager(config.output_dir)
3443

44+
# Debug logger
45+
debug = None
46+
if config.debug:
47+
log_path = Path(config.output_dir) / "debug_log.txt"
48+
debug = DebugLogger(log_path, model=config.model)
49+
3550
# Load skills — drop generator when using existing scripts
3651
skill_names = config.skills
3752
if config.scripts_dir and "generator" in skill_names:
@@ -69,16 +84,20 @@ async def run_agent(config: AgentConfig):
6984
system_prompt = build_system_prompt(skills, has_generator)
7085
messages = [("system", system_prompt)]
7186

87+
if debug:
88+
debug.log_system_prompt(system_prompt)
89+
debug.log_tool_schemas(tools)
90+
7291
if config.show_prompts:
7392
print(f"System prompt:\n{system_prompt}\n")
7493

7594
# Determine initial message
7695
initial_msg = _build_initial_message(config, archive)
7796

7897
if not config.interactive:
79-
await _run_autonomous(agent, messages, initial_msg, config)
98+
await _run_autonomous(agent, messages, initial_msg, config, debug)
8099
else:
81-
await _run_interactive(agent, messages, initial_msg, config, has_generator)
100+
await _run_interactive(agent, messages, initial_msg, config, has_generator, debug)
82101

83102

84103
def _build_initial_message(config, archive):
@@ -105,20 +124,12 @@ def _build_initial_message(config, archive):
105124
user_input = input().strip()
106125
if user_input:
107126
return user_input
108-
print("Using default demo prompt")
109127

110-
return (
111-
"Create six_hump_camel APOSMM scripts:\n"
112-
"- Executable: six_hump_camel/six_hump_camel.x\n"
113-
"- Input: six_hump_camel/input.txt\n"
114-
"- Template vars: X0, X1\n"
115-
"- 4 workers, 100 sims.\n"
116-
"- The output file for each simulation is output.txt\n"
117-
"- The bounds should be 0,1 and -1,2 for X0 and X1 respectively"
118-
)
128+
print("Using demo prompt")
129+
return DEFAULT_PROMPT
119130

120131

121-
async def _run_autonomous(agent, messages, initial_msg, config):
132+
async def _run_autonomous(agent, messages, initial_msg, config, debug):
122133
"""Single invocation — agent generates/loads, runs, fixes, reports."""
123134
goal = AUTONOMOUS_GOAL.format(initial_msg=initial_msg)
124135
messages.append(("user", goal))
@@ -128,6 +139,8 @@ async def _run_autonomous(agent, messages, initial_msg, config):
128139
print("Starting agent...\n")
129140

130141
result = await agent.ainvoke({"messages": messages})
142+
if debug:
143+
debug.dump_messages(result["messages"], "Autonomous run complete")
131144
print(f"\n{'=' * 60}")
132145
print("Agent completed")
133146
print(f"{'=' * 60}")
@@ -137,19 +150,23 @@ async def _run_autonomous(agent, messages, initial_msg, config):
137150
print(content)
138151

139152

140-
async def _run_interactive(agent, messages, initial_msg, config, has_generator):
153+
async def _run_interactive(agent, messages, initial_msg, config, has_generator, debug):
141154
"""Chat loop — agent responds, waits for user input, repeats."""
142155
if has_generator:
143156
goal = INTERACTIVE_GOAL.format(initial_msg=initial_msg)
144157
else:
145158
goal = initial_msg
146159
messages.append(("user", goal))
147160
print("Starting agent...\n")
161+
turn = 0
148162

149163
while True:
150164
try:
151165
result = await agent.ainvoke({"messages": messages})
152166
messages = result["messages"]
167+
turn += 1
168+
if debug:
169+
debug.dump_messages(messages, f"Interactive turn {turn}")
153170
response = messages[-1].content
154171
if isinstance(response, list):
155172
response = "".join(block.get("text", "") for block in response)

ensemble_agent/archive.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def __init__(self, work_dir):
1515
self.work_dir.mkdir(parents=True, exist_ok=True)
1616
self._counter = 1
1717
self._current = None
18+
self.run_succeeded = False
1819

1920
@property
2021
def current_archive(self):

ensemble_agent/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ class AgentConfig:
7575
# Output
7676
output_dir: str = DEFAULT_OUTPUT_DIR
7777
show_prompts: bool = False
78+
debug: bool = False
7879

7980
# Execution limits
8081
max_retries: int = MAX_RETRIES
@@ -118,6 +119,7 @@ def parse_args(argv=None) -> AgentConfig:
118119
parser.add_argument("--mcp-server", help="Path to generator mcp_server.mjs")
119120
parser.add_argument("--generate-only", action="store_true", help="Only generate scripts, don't run")
120121
parser.add_argument("--show-prompts", action="store_true", help="Print prompts sent to AI")
122+
parser.add_argument("--debug", action="store_true", help="Dump full message log to debug_log.txt")
121123
parser.add_argument("--max-iterations", type=int, default=MAX_AGENT_ITERATIONS, help="Max agent iterations")
122124
args = parser.parse_args(argv)
123125

@@ -130,6 +132,7 @@ def parse_args(argv=None) -> AgentConfig:
130132
skills=args.skills,
131133
mcp_server=args.mcp_server,
132134
show_prompts=args.show_prompts,
135+
debug=args.debug or bool(os.environ.get("AGENT_DEBUG")),
133136
max_iterations=args.max_iterations,
134137
)
135138
if args.model:

ensemble_agent/debug.py

Lines changed: 53 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,58 @@
1-
"""Debug logging for agent message history."""
1+
"""Debug logging for agent message history.
22
3-
from datetime import datetime
3+
Writes to a single debug_log.txt that captures system prompt, tool schemas,
4+
and full message history after each agent invocation.
5+
6+
Activated via --debug flag or AGENT_DEBUG=1 env var.
7+
"""
8+
9+
import time
410
from pathlib import Path
511

612

713
class DebugLogger:
8-
"""Dumps message history and prompts to timestamped files."""
9-
10-
def __init__(self, output_dir):
11-
self.output_dir = Path(output_dir)
12-
13-
def dump(self, content, stage=""):
14-
"""Write content to a timestamped debug file."""
15-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
16-
debug_file = self.output_dir / f"debug_{stage}_{timestamp}.txt"
17-
debug_file.parent.mkdir(parents=True, exist_ok=True)
18-
debug_file.write_text(content)
19-
print(f" (Debug dumped to {debug_file})")
20-
21-
def dump_messages(self, messages, stage=""):
22-
"""Dump a list of LangChain messages."""
23-
lines = []
24-
for msg in messages:
25-
role = getattr(msg, "type", "unknown")
26-
content = getattr(msg, "content", str(msg))
27-
lines.append(f"[{role}]\n{content}\n")
28-
self.dump("\n---\n".join(lines), stage)
29-
30-
def dump_prompt(self, prompt_text, stage=""):
31-
"""Print and optionally save a prompt."""
32-
slen = 15
33-
print(f"\n{'=' * slen} PROMPT TO AI ({stage}) {'=' * slen}")
34-
print(prompt_text)
35-
print(f"{'=' * slen} END AI PROMPT ({stage}) {'=' * slen}\n")
14+
"""Appends agent conversation details to debug_log.txt."""
15+
16+
def __init__(self, log_path, model=""):
17+
self.log_path = Path(log_path)
18+
self.log_path.parent.mkdir(parents=True, exist_ok=True)
19+
with open(self.log_path, "w") as f:
20+
f.write(f"Debug log started: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
21+
f.write(f"Model: {model}\n\n")
22+
23+
def log_system_prompt(self, prompt):
24+
with open(self.log_path, "a") as f:
25+
f.write("SYSTEM PROMPT\n" + "=" * 80 + "\n")
26+
f.write(prompt + "\n\n")
27+
28+
def log_tool_schemas(self, tools):
29+
with open(self.log_path, "a") as f:
30+
f.write("TOOL SCHEMAS\n" + "=" * 80 + "\n")
31+
for t in tools:
32+
f.write(f"\n{t.name}: {t.description}\n")
33+
f.write("\n")
34+
35+
def dump_messages(self, messages, label=""):
36+
with open(self.log_path, "a") as f:
37+
f.write(f"\n{'=' * 80}\n")
38+
if label:
39+
f.write(f" {label}\n{'=' * 80}\n")
40+
for msg in messages:
41+
role = type(msg).__name__
42+
f.write(f"\n--- {role} ---\n")
43+
if hasattr(msg, "tool_calls") and msg.tool_calls:
44+
f.write("[Tool calls]\n")
45+
for tc in msg.tool_calls:
46+
f.write(f" {tc.get('name', '?')}({tc.get('args', {})})\n")
47+
content = msg.content if isinstance(msg.content, str) else str(msg.content)
48+
if content:
49+
if len(content) > 2000:
50+
f.write(
51+
content[:1000]
52+
+ f"\n... [{len(content)} chars total] ...\n"
53+
+ content[-500:]
54+
+ "\n"
55+
)
56+
else:
57+
f.write(content + "\n")
58+
f.write(f"\n{'=' * 80}\n\n")

ensemble_agent/prompts.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,13 @@
55
IMPORTANT RULES:
66
{generator_rules}
77
- For ANY modifications, use read_file to see the current file, then write_file to save the edited version.
8+
- DO NOT merge or consolidate files - keep the same file structure.
9+
- DO NOT create new files unless explicitly asked. Fix existing files only.
10+
- DO NOT make any other changes or improvements beyond what is needed.
11+
- If a script fails because an executable or input file is not found, report the error and stop.
812
- If the user asks to see something, use read_file and show them the content.
913
- Don't run scripts unless the user explicitly asks you to run them.
1014
- When reviewing scripts, highlight key configuration: generator bounds/parameters and the objective function.
11-
- After running, if scripts fail, explain the error and offer to fix using write_file.
1215
1316
{skills_context}"""
1417

@@ -24,7 +27,10 @@
2427

2528
AUTONOMOUS_GOAL = """{initial_msg}
2629
27-
After generating/loading scripts: review them, run them, fix errors and retry (max 3 attempts). Report the result."""
30+
After generating/loading scripts: review them, run them, and if they fail fix the error and retry.
31+
DO NOT make any other changes or improvements.
32+
DO NOT wrap in markdown or add explanations when fixing.
33+
Report the result."""
2834

2935
INTERACTIVE_GOAL = """User request: {initial_msg}
3036

ensemble_agent/skills/file_ops.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ async def read_file_tool(filepath: str) -> str:
3636
return file_path.read_text()
3737

3838
async def write_file_tool(filepath: str, content: str) -> str:
39+
if archive.run_succeeded:
40+
return "Script already ran successfully. No further changes needed."
3941
try:
4042
file_path = work_dir / filepath
4143
# Diff against old content for summary

ensemble_agent/skills/runner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ async def run_script_tool(script_name: str) -> str:
4949
)
5050
if result.returncode == 0:
5151
skill._succeeded = True
52+
archive.run_succeeded = True
5253
print("Script ran successfully", flush=True)
5354
return f"SUCCESS\nOutput:\n{result.stdout[:500]}"
5455
else:

0 commit comments

Comments
 (0)