python-kernel

moohax · moohax · commit ca1750c67d5c · 2025-10-29T20:21:36.000-06:00
diff --git a/dotnet_reversing/main.py b/dotnet_reversing/main.py
@@ -18,8 +18,6 @@
 
 console = Console()
 
-if t.TYPE_CHECKING:
-    from loguru import Record as LogRecord
 
 # CLI
 
@@ -58,16 +56,6 @@ class DreadnodeArgs:
     """Show span information in the console"""
 
 
-def log_formatter(record: "LogRecord") -> str:
-    return "".join(
-        (
-            "<green>{time:HH:mm:ss.SSS}</green> | ",
-            "<dim>{extra[prefix]}</dim> " if record["extra"].get("prefix") else "",
-            "<level>{message}</level>\n",
-        ),
-    )
-
-
 @tool()
 async def report_finding(file: str, method: str, criticality: str, content: str) -> str:
     """
diff --git a/python_agent/README.md b/python_agent/README.md
@@ -4,9 +4,6 @@
 
 This agent provides a general-purpose, sandboxed environment for executing Python code to accomplish user-defined tasks. It leverages a Large Language Model (LLM) to interpret a natural language task, generate Python code, and execute it within a Docker container. The agent operates by creating an interactive session with a [Jupyter kernel](https://docs.jupyter.org/en/latest/projects/kernels.html) running inside the container, allowing it to iteratively write code, execute it, and use the output to inform its next steps until the task is complete.
 
-## Intended Use
-
-The agent is designed for a wide range of tasks that can be solved programmatically with Python.
 
 ## Environment
 
@@ -18,16 +15,7 @@ To run this agent, a Docker daemon must be available and running on the host mac
 - `restart_kernel`
 - `complete_task`
 
-## Features
-
-- **Sandboxed Execution**: All code is executed within a secure and isolated Docker container, preventing unintended side effects on the host machine.
-- **Customizable Environment**: Users can specify any Docker image for the execution environment and mount local directories as volumes into the container.
-- **LLM-Powered Task Resolution**: The agent takes a high-level, natural language task and intelligently generates and executes the code needed to complete it.
-- **Interactive Code Execution**: Provides tools for the LLM to `execute_code` and `restart_kernel`, allowing for an interactive and stateful problem-solving process.
-- **Task Completion Reporting**: The agent can explicitly mark a task as complete with a success or failure status and a final summary.
-- **Step-by-Step Iteration**: The agent operates within a defined loop with a maximum number of steps (max_steps) to ensure termination.
-- **Artifact Logging**: Upon completion, the agent can log the entire working directory as an artifact to Dreadnode, preserving any generated files.
 
-## References
+## Examples
 
-- None
+`uv run python_agent/main.py --model "anthropic/claude-haiku-4-5-20251001" --task "please generate an interesting dataset, and visualize it"`
diff --git a/python_agent/kernel.py b/python_agent/kernel.py
@@ -6,6 +6,7 @@
 from dataclasses import field
 from functools import cached_property
 from pathlib import Path
+from typing import Any, Optional
 
 import aiodocker
 import aiodocker.containers
@@ -14,8 +15,10 @@
 import dreadnode as dn
 import rigging as rg
 import tenacity
+from dreadnode.agent.tools import Toolset, tool_method
+from dreadnode.meta import Config
 from loguru import logger
-from pydantic import BaseModel
+from pydantic import BaseModel, PrivateAttr
 
 # Helpers
 
@@ -175,31 +178,40 @@ def __init__(self, message: str = "Failed to start kernel") -> None:
 # Main class
 
 
-class PythonKernel:
+class PythonKernel(Toolset):
     """A Python kernel for executing code."""
 
-    def __init__(
-        self,
-        image: str = "jupyter/datascience-notebook:latest",
-        *,
-        memory_limit: str = "4g",
-        kernel_name: str = "python3",
-        work_dir: Path | str | None = None,
-        volumes: list[str] | None = None,
-    ) -> None:
-        """Create a python kernel."""
-        self.image = image
-        self.memory_limit = memory_limit
-        self.kernel_name = kernel_name
-        self.volumes = volumes or []
+    # Public configuration fields
+    image: str = Config(default="jupyter/datascience-notebook:latest", expose_as=str)
+    memory_limit: str = Config(default="4g", expose_as=str)
+    kernel_name: str = Config(default="python3", expose_as=str)
+    work_dir: Path | str | None = Config(default=None, expose_as=Path | str | None)
+    volumes: list[str] | None = Config(default=None, expose_as=list[str] | None)
+
+    # Private instance attributes
+    _token: str = PrivateAttr(default_factory=lambda: uuid.uuid4().hex)
+    _client: Optional["aiodocker.Docker"] = PrivateAttr(default=None)
+    _container: Optional["aiodocker.containers.DockerContainer"] = PrivateAttr(default=None)
+    _work_dir: Path = PrivateAttr()
+    _kernel_id: str | None = PrivateAttr(default=None)
+    _base_url: str | None = PrivateAttr(default=None)
+
+    def model_post_init(self, __context: Any) -> None:
+        """
+        Post-initialization to set up the work directory and volumes
+        after the model's public fields have been populated.
+        """
+        # Initialize _work_dir based on the value of the public work_dir field.
+        if self.work_dir:
+            self._work_dir = Path(self.work_dir)
+        else:
+            self.work_dir = Path(f".work/{uuid.uuid4().hex[:8]}")
+            self._work_dir = self.work_dir
 
-        self._token = uuid.uuid4().hex
+        self._work_dir.mkdir(parents=True, exist_ok=True)
 
-        self._client: aiodocker.Docker | None = None
-        self._container: aiodocker.containers.DockerContainer | None = None
-        self._work_dir = Path(work_dir or f".work/{uuid.uuid4().hex[:8]}")
-        self._kernel_id: str | None = None
-        self._base_url: str | None = None
+        if self.volumes is None:
+            self.volumes = []
 
     @property
     def base_url(self) -> str:
@@ -229,10 +241,6 @@ def container(self) -> aiodocker.containers.DockerContainer:
             raise PythonKernelNotRunningError
         return self._container
 
-    @property
-    def work_dir(self) -> Path:
-        return self._work_dir
-
     @cached_property
     def tools(self) -> list[t.Callable[..., t.Any]]:
         return [
@@ -263,7 +271,10 @@ async def _start_container(self) -> None:
                 "PortBindings": {
                     "8888/tcp": [{"HostPort": "0"}],  # Let Docker choose a port
                 },
-                "Binds": [f"{self._work_dir.absolute()!s}:/home/jovyan/work", *self.volumes],
+                "Binds": [
+                    f"{self._work_dir.absolute()!s}:/home/jovyan/work",
+                    *(self.volumes or []),
+                ],
             },
             "Env": [
                 f"JUPYTER_TOKEN={self._token}",
@@ -631,6 +642,7 @@ async def execute_notebook(
 
         return notebook
 
+    @tool_method()
     async def execute_code(self, code: str) -> str:
         """
         Execute Python code in the jupyter kernel and return the output.
@@ -654,10 +666,12 @@ async def get_kernel_state(self) -> KernelState:
 
         return t.cast("KernelState", kernel_info["execution_state"])
 
+    @tool_method()
     async def busy(self) -> bool:
         """Check if the kernel is busy executing code."""
         return await self.get_kernel_state() == "busy"
 
+    @tool_method()
     async def interrupt(self) -> None:
         """Interrupt the kernel."""
         if not self._kernel_id:
@@ -674,6 +688,7 @@ async def interrupt(self) -> None:
 
         logger.debug(f"Kernel {self._kernel_id} interrupted")
 
+    @tool_method()
     async def restart(self) -> None:
         """Restart the kernel."""
         if not self._kernel_id:
diff --git a/python_agent/main.py b/python_agent/main.py
@@ -1,17 +1,20 @@
-import sys
 import typing as t
 from dataclasses import dataclass, field
+from pathlib import Path
+from textwrap import dedent
 
 import cyclopts
 import dreadnode as dn
-import litellm
-import rigging as rg
-from loguru import logger
+from dreadnode.agent import Agent
+from dreadnode.agent.events import AgentEnd
+from dreadnode.agent.hooks import Hook
+from dreadnode.agent.tools import tool
+from dreadnode.data_types import Markdown
+from kernel import PythonKernel
+from rich.console import Console
 
-from python_agent.kernel import PythonKernel
+console = Console()
 
-if t.TYPE_CHECKING:
-    from loguru import Record as LogRecord
 
 # CLI
 
@@ -36,8 +39,6 @@ class Args:
     ] = field(default_factory=list)
     max_steps: int = 50
     """Maximum number of steps to take"""
-    log_level: str = "INFO"
-    """Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)"""
 
 
 @cyclopts.Parameter(name="*", group="dreadnode")
@@ -53,27 +54,22 @@ class DreadnodeArgs:
     """Show span information in the console"""
 
 
-def log_formatter(record: "LogRecord") -> str:
-    return "".join(
-        (
-            "<green>{time:HH:mm:ss.SSS}</green> | ",
-            "<dim>{extra[prefix]}</dim> " if record["extra"].get("prefix") else "",
-            "<level>{message}</level>\n",
-        ),
-    )
-
-
-@dn.task(name="Complete task", log_output=False)
+@tool()
 async def complete_task(success: bool, markdown_summary: str) -> None:  # noqa: FBT001
     """
     Mark your task as complete with a success/failure status and markdown summary.
     """
     dn.log_metric("task_success", success, to="run")
+    dn.log_output("task_summary", Markdown(markdown_summary), to="run")
+
 
-    log_func = logger.success if success else logger.warning
-    log_func(f"Agent finished the task (success={success}): {markdown_summary}")
+def upload_work_hook(
+    work_dir: Path,
+) -> Hook:
+    async def upload_work(event: AgentEnd) -> None:
+        dn.log_artifact(str(work_dir))
 
-    dn.log_output("task_summary", markdown_summary, to="run")
+    return upload_work
 
 
 @app.default
@@ -82,10 +78,6 @@ async def agent(*, args: Args, dn_args: DreadnodeArgs | None = None) -> None:
     General agent with access to a dockerized jupyter environment.
     """
 
-    logger.remove()
-    logger.add(sys.stderr, format=log_formatter, level=args.log_level)
-    logger.enable("rigging")
-
     dn_args = dn_args or DreadnodeArgs()
     dn.configure(
         server=dn_args.server,
@@ -94,90 +86,37 @@ async def agent(*, args: Args, dn_args: DreadnodeArgs | None = None) -> None:
         console=dn_args.console,
     )
 
-    with dn.run(), dn.task_span("Agent"):
-        dn.log_params(
+    instructions = dedent(f"""\
+        Work to complete the following task. You have access to a dockerized jupyter environment.
+        You can run code in the environment and use the results to help you complete the task.
+
+        Unless otherwise specified, use `~/work` to store files and data. Additional volumes are listed below.
+
+        <volumes>
+        {args.volumes}
+        </volumes>
+
+        <task>
+        {args.task}
+        </task>
+        """)
+
+    async with PythonKernel(
+        image=args.image,
+        volumes=args.volumes,
+    ) as kernel:
+        agent = Agent(
+            name="python-agent",
             model=args.model,
-            image=args.image,
-            max_steps=args.max_steps,
+            description="An agent with access to a dockerized jupyter environment.",
+            instructions=instructions,
+            tools=[kernel],
+            hooks=[upload_work_hook(work_dir=kernel.work_dir)],
         )
-        dn.log_input("task", args.task, to="run")
-        dn.log_input("volumes", "\n".join(args.volumes), to="run")
-        dn.push_update()
-
-        generator = rg.get_generator(args.model)
-
-        logger.info("Starting agent ...")
-
-        async with PythonKernel(
-            image=args.image,
-            volumes=args.volumes,
-        ) as kernel:
-
-            @dn.task(name="Execute code")
-            async def execute_code(code: str) -> str:
-                """
-                Execute code in the kernel and return the result.
-                """
-                logger.info(f"Executing:\n{code}")
-                result = await kernel.execute_code(code)
-                logger.info(f"Result:\n{result}")
-                return result
-
-            @dn.task(name="Restart kernel")
-            async def restart_kernel() -> None:
-                """
-                Restart the kernel.
-                """
-                logger.info("Restarting kernel ...")
-                await kernel.restart()
-
-            chat = (
-                await generator.chat(
-                    f"""\
-                    Work to complete the following task. You have access to a dockerized jupyter environment.
-                    You can run code in the environment and use the results to help you complete the task.
-
-                    Unless otherwise specified, use `~/work` to store files and data. Additional volumes are listed below.
-
-                    <volumes>
-                    {args.volumes}
-                    </volumes>
-
-                    <task>
-                    {args.task}
-                    </task>
-                    """,
-                )
-                .catch(
-                    *litellm.exceptions.LITELLM_EXCEPTION_TYPES,
-                    on_failed="include",
-                )
-                .using(
-                    execute_code,
-                    restart_kernel,
-                    complete_task,
-                    max_depth=args.max_steps,
-                )
-                .run()
-            )
-
-            dn.log_artifact(kernel.work_dir)
-
-        if chat.failed and chat.error:
-            if isinstance(chat.error, rg.error.MaxDepthError):
-                logger.warning(f"Max steps reached ({args.max_steps})")
-                dn.log_metric("max_steps_reached", 1)
-                dn.log_output("task_summary", f"Max steps ({args.max_steps}) reached", to="run")
-            else:
-                logger.warning(f"Failed with {chat.error}")
-                dn.log_metric("inference_failed", 1)
-                dn.log_output("task_summary", f"Inference failed with {chat.error}", to="run")
-
-        elif chat.last.role == "assistant":
-            dn.log_output("last_message", chat.last.content, to="run")
-            logger.info(str(chat.last))
-
-    logger.info("Done.")
+
+        async with agent.stream(args.task) as events:
+            async for event in events:
+                console.print(event)
 
 
 if __name__ == "__main__":