Improve agent defaults

shuds13 · shuds13 · commit 7c1b1b772e12 · 2026-03-01T14:11:13.000-06:00
diff --git a/ensemble_agent/agent.py b/ensemble_agent/agent.py
@@ -10,6 +10,7 @@
 
 from .archive import ArchiveManager
 from .config import AgentConfig, INPUT_MARKER
+from .debug import DebugLogger
 from .llm import create_llm
 from .mcp_client import connect_mcp, find_mcp_server
 from .prompts import (
@@ -22,6 +23,14 @@
 from .skills import load_skills
 from .skills.generator import GeneratorSkill
 
+DEFAULT_PROMPT = """Create six_hump_camel APOSMM scripts:
+- Executable: six_hump_camel/six_hump_camel.x
+- Input: six_hump_camel/input.txt
+- Template vars: X0, X1
+- 4 workers, 100 sims.
+- The output file for each simulation is output.txt
+- The bounds should be 0,1 and -1,2 for X0 and X1 respectively"""
+
 
 async def run_agent(config: AgentConfig):
     """Main entry point: build skills, connect MCP, run the agent loop."""
@@ -32,6 +41,12 @@ async def run_agent(config: AgentConfig):
     # Set up archive manager
     archive = ArchiveManager(config.output_dir)
 
+    # Debug logger
+    debug = None
+    if config.debug:
+        log_path = Path(config.output_dir) / "debug_log.txt"
+        debug = DebugLogger(log_path, model=config.model)
+
     # Load skills — drop generator when using existing scripts
     skill_names = config.skills
     if config.scripts_dir and "generator" in skill_names:
@@ -69,16 +84,20 @@ async def run_agent(config: AgentConfig):
         system_prompt = build_system_prompt(skills, has_generator)
         messages = [("system", system_prompt)]
 
+        if debug:
+            debug.log_system_prompt(system_prompt)
+            debug.log_tool_schemas(tools)
+
         if config.show_prompts:
             print(f"System prompt:\n{system_prompt}\n")
 
         # Determine initial message
         initial_msg = _build_initial_message(config, archive)
 
         if not config.interactive:
-            await _run_autonomous(agent, messages, initial_msg, config)
+            await _run_autonomous(agent, messages, initial_msg, config, debug)
         else:
-            await _run_interactive(agent, messages, initial_msg, config, has_generator)
+            await _run_interactive(agent, messages, initial_msg, config, has_generator, debug)
 
 
 def _build_initial_message(config, archive):
@@ -105,20 +124,12 @@ def _build_initial_message(config, archive):
         user_input = input().strip()
         if user_input:
             return user_input
-        print("Using default demo prompt")
 
-    return (
-        "Create six_hump_camel APOSMM scripts:\n"
-        "- Executable: six_hump_camel/six_hump_camel.x\n"
-        "- Input: six_hump_camel/input.txt\n"
-        "- Template vars: X0, X1\n"
-        "- 4 workers, 100 sims.\n"
-        "- The output file for each simulation is output.txt\n"
-        "- The bounds should be 0,1 and -1,2 for X0 and X1 respectively"
-    )
+    print("Using demo prompt")
+    return DEFAULT_PROMPT
 
 
-async def _run_autonomous(agent, messages, initial_msg, config):
+async def _run_autonomous(agent, messages, initial_msg, config, debug):
     """Single invocation — agent generates/loads, runs, fixes, reports."""
     goal = AUTONOMOUS_GOAL.format(initial_msg=initial_msg)
     messages.append(("user", goal))
@@ -128,6 +139,8 @@ async def _run_autonomous(agent, messages, initial_msg, config):
     print("Starting agent...\n")
 
     result = await agent.ainvoke({"messages": messages})
+    if debug:
+        debug.dump_messages(result["messages"], "Autonomous run complete")
     print(f"\n{'=' * 60}")
     print("Agent completed")
     print(f"{'=' * 60}")
@@ -137,19 +150,23 @@ async def _run_autonomous(agent, messages, initial_msg, config):
     print(content)
 
 
-async def _run_interactive(agent, messages, initial_msg, config, has_generator):
+async def _run_interactive(agent, messages, initial_msg, config, has_generator, debug):
     """Chat loop — agent responds, waits for user input, repeats."""
     if has_generator:
         goal = INTERACTIVE_GOAL.format(initial_msg=initial_msg)
     else:
         goal = initial_msg
     messages.append(("user", goal))
     print("Starting agent...\n")
+    turn = 0
 
     while True:
         try:
             result = await agent.ainvoke({"messages": messages})
             messages = result["messages"]
+            turn += 1
+            if debug:
+                debug.dump_messages(messages, f"Interactive turn {turn}")
             response = messages[-1].content
             if isinstance(response, list):
                 response = "".join(block.get("text", "") for block in response)
diff --git a/ensemble_agent/archive.py b/ensemble_agent/archive.py
@@ -15,6 +15,7 @@ def __init__(self, work_dir):
         self.work_dir.mkdir(parents=True, exist_ok=True)
         self._counter = 1
         self._current = None
+        self.run_succeeded = False
 
     @property
     def current_archive(self):
diff --git a/ensemble_agent/config.py b/ensemble_agent/config.py
@@ -75,6 +75,7 @@ class AgentConfig:
     # Output
     output_dir: str = DEFAULT_OUTPUT_DIR
     show_prompts: bool = False
+    debug: bool = False
 
     # Execution limits
     max_retries: int = MAX_RETRIES
@@ -118,6 +119,7 @@ def parse_args(argv=None) -> AgentConfig:
     parser.add_argument("--mcp-server", help="Path to generator mcp_server.mjs")
     parser.add_argument("--generate-only", action="store_true", help="Only generate scripts, don't run")
     parser.add_argument("--show-prompts", action="store_true", help="Print prompts sent to AI")
+    parser.add_argument("--debug", action="store_true", help="Dump full message log to debug_log.txt")
     parser.add_argument("--max-iterations", type=int, default=MAX_AGENT_ITERATIONS, help="Max agent iterations")
     args = parser.parse_args(argv)
 
@@ -130,6 +132,7 @@ def parse_args(argv=None) -> AgentConfig:
         skills=args.skills,
         mcp_server=args.mcp_server,
         show_prompts=args.show_prompts,
+        debug=args.debug or bool(os.environ.get("AGENT_DEBUG")),
         max_iterations=args.max_iterations,
     )
     if args.model:
diff --git a/ensemble_agent/debug.py b/ensemble_agent/debug.py
@@ -1,35 +1,58 @@
-"""Debug logging for agent message history."""
+"""Debug logging for agent message history.
 
-from datetime import datetime
+Writes to a single debug_log.txt that captures system prompt, tool schemas,
+and full message history after each agent invocation.
+
+Activated via --debug flag or AGENT_DEBUG=1 env var.
+"""
+
+import time
 from pathlib import Path
 
 
 class DebugLogger:
-    """Dumps message history and prompts to timestamped files."""
-
-    def __init__(self, output_dir):
-        self.output_dir = Path(output_dir)
-
-    def dump(self, content, stage=""):
-        """Write content to a timestamped debug file."""
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        debug_file = self.output_dir / f"debug_{stage}_{timestamp}.txt"
-        debug_file.parent.mkdir(parents=True, exist_ok=True)
-        debug_file.write_text(content)
-        print(f"  (Debug dumped to {debug_file})")
-
-    def dump_messages(self, messages, stage=""):
-        """Dump a list of LangChain messages."""
-        lines = []
-        for msg in messages:
-            role = getattr(msg, "type", "unknown")
-            content = getattr(msg, "content", str(msg))
-            lines.append(f"[{role}]\n{content}\n")
-        self.dump("\n---\n".join(lines), stage)
-
-    def dump_prompt(self, prompt_text, stage=""):
-        """Print and optionally save a prompt."""
-        slen = 15
-        print(f"\n{'=' * slen} PROMPT TO AI ({stage}) {'=' * slen}")
-        print(prompt_text)
-        print(f"{'=' * slen} END AI PROMPT ({stage}) {'=' * slen}\n")
+    """Appends agent conversation details to debug_log.txt."""
+
+    def __init__(self, log_path, model=""):
+        self.log_path = Path(log_path)
+        self.log_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(self.log_path, "w") as f:
+            f.write(f"Debug log started: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
+            f.write(f"Model: {model}\n\n")
+
+    def log_system_prompt(self, prompt):
+        with open(self.log_path, "a") as f:
+            f.write("SYSTEM PROMPT\n" + "=" * 80 + "\n")
+            f.write(prompt + "\n\n")
+
+    def log_tool_schemas(self, tools):
+        with open(self.log_path, "a") as f:
+            f.write("TOOL SCHEMAS\n" + "=" * 80 + "\n")
+            for t in tools:
+                f.write(f"\n{t.name}: {t.description}\n")
+            f.write("\n")
+
+    def dump_messages(self, messages, label=""):
+        with open(self.log_path, "a") as f:
+            f.write(f"\n{'=' * 80}\n")
+            if label:
+                f.write(f"  {label}\n{'=' * 80}\n")
+            for msg in messages:
+                role = type(msg).__name__
+                f.write(f"\n--- {role} ---\n")
+                if hasattr(msg, "tool_calls") and msg.tool_calls:
+                    f.write("[Tool calls]\n")
+                    for tc in msg.tool_calls:
+                        f.write(f"  {tc.get('name', '?')}({tc.get('args', {})})\n")
+                content = msg.content if isinstance(msg.content, str) else str(msg.content)
+                if content:
+                    if len(content) > 2000:
+                        f.write(
+                            content[:1000]
+                            + f"\n... [{len(content)} chars total] ...\n"
+                            + content[-500:]
+                            + "\n"
+                        )
+                    else:
+                        f.write(content + "\n")
+            f.write(f"\n{'=' * 80}\n\n")
diff --git a/ensemble_agent/prompts.py b/ensemble_agent/prompts.py
@@ -5,10 +5,13 @@
 IMPORTANT RULES:
 {generator_rules}
 - For ANY modifications, use read_file to see the current file, then write_file to save the edited version.
+- DO NOT merge or consolidate files - keep the same file structure.
+- DO NOT create new files unless explicitly asked. Fix existing files only.
+- DO NOT make any other changes or improvements beyond what is needed.
+- If a script fails because an executable or input file is not found, report the error and stop.
 - If the user asks to see something, use read_file and show them the content.
 - Don't run scripts unless the user explicitly asks you to run them.
 - When reviewing scripts, highlight key configuration: generator bounds/parameters and the objective function.
-- After running, if scripts fail, explain the error and offer to fix using write_file.
 
 {skills_context}"""
 
@@ -24,7 +27,10 @@
 
 AUTONOMOUS_GOAL = """{initial_msg}
 
-After generating/loading scripts: review them, run them, fix errors and retry (max 3 attempts). Report the result."""
+After generating/loading scripts: review them, run them, and if they fail fix the error and retry.
+DO NOT make any other changes or improvements.
+DO NOT wrap in markdown or add explanations when fixing.
+Report the result."""
 
 INTERACTIVE_GOAL = """User request: {initial_msg}
 
diff --git a/ensemble_agent/skills/file_ops.py b/ensemble_agent/skills/file_ops.py
@@ -36,6 +36,8 @@ async def read_file_tool(filepath: str) -> str:
             return file_path.read_text()
 
         async def write_file_tool(filepath: str, content: str) -> str:
+            if archive.run_succeeded:
+                return "Script already ran successfully. No further changes needed."
             try:
                 file_path = work_dir / filepath
                 # Diff against old content for summary
diff --git a/ensemble_agent/skills/runner.py b/ensemble_agent/skills/runner.py
@@ -49,6 +49,7 @@ async def run_script_tool(script_name: str) -> str:
                 )
                 if result.returncode == 0:
                     skill._succeeded = True
+                    archive.run_succeeded = True
                     print("Script ran successfully", flush=True)
                     return f"SUCCESS\nOutput:\n{result.stdout[:500]}"
                 else:

Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,7 @@ async def run_script_tool(script_name: str) -> str:`
`49`	`49`	`)`
`50`	`50`	`if result.returncode == 0:`
`51`	`51`	`skill._succeeded = True`
	`52`	`+ archive.run_succeeded = True`
`52`	`53`	`print("Script ran successfully", flush=True)`
`53`	`54`	`return f"SUCCESS\nOutput:\n{result.stdout[:500]}"`
`54`	`55`	`else:`