Skip to content

Commit ca1750c

Browse files
committed
python-kernel
1 parent 29e9aa7 commit ca1750c

4 files changed

Lines changed: 92 additions & 162 deletions

File tree

dotnet_reversing/main.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818

1919
console = Console()
2020

21-
if t.TYPE_CHECKING:
22-
from loguru import Record as LogRecord
2321

2422
# CLI
2523

@@ -58,16 +56,6 @@ class DreadnodeArgs:
5856
"""Show span information in the console"""
5957

6058

61-
def log_formatter(record: "LogRecord") -> str:
62-
return "".join(
63-
(
64-
"<green>{time:HH:mm:ss.SSS}</green> | ",
65-
"<dim>{extra[prefix]}</dim> " if record["extra"].get("prefix") else "",
66-
"<level>{message}</level>\n",
67-
),
68-
)
69-
70-
7159
@tool()
7260
async def report_finding(file: str, method: str, criticality: str, content: str) -> str:
7361
"""

python_agent/README.md

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44

55
This agent provides a general-purpose, sandboxed environment for executing Python code to accomplish user-defined tasks. It leverages a Large Language Model (LLM) to interpret a natural language task, generate Python code, and execute it within a Docker container. The agent operates by creating an interactive session with a [Jupyter kernel](https://docs.jupyter.org/en/latest/projects/kernels.html) running inside the container, allowing it to iteratively write code, execute it, and use the output to inform its next steps until the task is complete.
66

7-
## Intended Use
8-
9-
The agent is designed for a wide range of tasks that can be solved programmatically with Python.
107

118
## Environment
129

@@ -18,16 +15,7 @@ To run this agent, a Docker daemon must be available and running on the host mac
1815
- `restart_kernel`
1916
- `complete_task`
2017

21-
## Features
22-
23-
- **Sandboxed Execution**: All code is executed within a secure and isolated Docker container, preventing unintended side effects on the host machine.
24-
- **Customizable Environment**: Users can specify any Docker image for the execution environment and mount local directories as volumes into the container.
25-
- **LLM-Powered Task Resolution**: The agent takes a high-level, natural language task and intelligently generates and executes the code needed to complete it.
26-
- **Interactive Code Execution**: Provides tools for the LLM to `execute_code` and `restart_kernel`, allowing for an interactive and stateful problem-solving process.
27-
- **Task Completion Reporting**: The agent can explicitly mark a task as complete with a success or failure status and a final summary.
28-
- **Step-by-Step Iteration**: The agent operates within a defined loop with a maximum number of steps (max_steps) to ensure termination.
29-
- **Artifact Logging**: Upon completion, the agent can log the entire working directory as an artifact to Dreadnode, preserving any generated files.
3018

31-
## References
19+
## Examples
3220

33-
- None
21+
`uv run python_agent/main.py --model "anthropic/claude-haiku-4-5-20251001" --task "please generate an interesting dataset, and visualize it"`

python_agent/kernel.py

Lines changed: 42 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from dataclasses import field
77
from functools import cached_property
88
from pathlib import Path
9+
from typing import Any, Optional
910

1011
import aiodocker
1112
import aiodocker.containers
@@ -14,8 +15,10 @@
1415
import dreadnode as dn
1516
import rigging as rg
1617
import tenacity
18+
from dreadnode.agent.tools import Toolset, tool_method
19+
from dreadnode.meta import Config
1720
from loguru import logger
18-
from pydantic import BaseModel
21+
from pydantic import BaseModel, PrivateAttr
1922

2023
# Helpers
2124

@@ -175,31 +178,40 @@ def __init__(self, message: str = "Failed to start kernel") -> None:
175178
# Main class
176179

177180

178-
class PythonKernel:
181+
class PythonKernel(Toolset):
179182
"""A Python kernel for executing code."""
180183

181-
def __init__(
182-
self,
183-
image: str = "jupyter/datascience-notebook:latest",
184-
*,
185-
memory_limit: str = "4g",
186-
kernel_name: str = "python3",
187-
work_dir: Path | str | None = None,
188-
volumes: list[str] | None = None,
189-
) -> None:
190-
"""Create a python kernel."""
191-
self.image = image
192-
self.memory_limit = memory_limit
193-
self.kernel_name = kernel_name
194-
self.volumes = volumes or []
184+
# Public configuration fields
185+
image: str = Config(default="jupyter/datascience-notebook:latest", expose_as=str)
186+
memory_limit: str = Config(default="4g", expose_as=str)
187+
kernel_name: str = Config(default="python3", expose_as=str)
188+
work_dir: Path | str | None = Config(default=None, expose_as=Path | str | None)
189+
volumes: list[str] | None = Config(default=None, expose_as=list[str] | None)
190+
191+
# Private instance attributes
192+
_token: str = PrivateAttr(default_factory=lambda: uuid.uuid4().hex)
193+
_client: Optional["aiodocker.Docker"] = PrivateAttr(default=None)
194+
_container: Optional["aiodocker.containers.DockerContainer"] = PrivateAttr(default=None)
195+
_work_dir: Path = PrivateAttr()
196+
_kernel_id: str | None = PrivateAttr(default=None)
197+
_base_url: str | None = PrivateAttr(default=None)
198+
199+
def model_post_init(self, __context: Any) -> None:
200+
"""
201+
Post-initialization to set up the work directory and volumes
202+
after the model's public fields have been populated.
203+
"""
204+
# Initialize _work_dir based on the value of the public work_dir field.
205+
if self.work_dir:
206+
self._work_dir = Path(self.work_dir)
207+
else:
208+
self.work_dir = Path(f".work/{uuid.uuid4().hex[:8]}")
209+
self._work_dir = self.work_dir
195210

196-
self._token = uuid.uuid4().hex
211+
self._work_dir.mkdir(parents=True, exist_ok=True)
197212

198-
self._client: aiodocker.Docker | None = None
199-
self._container: aiodocker.containers.DockerContainer | None = None
200-
self._work_dir = Path(work_dir or f".work/{uuid.uuid4().hex[:8]}")
201-
self._kernel_id: str | None = None
202-
self._base_url: str | None = None
213+
if self.volumes is None:
214+
self.volumes = []
203215

204216
@property
205217
def base_url(self) -> str:
@@ -229,10 +241,6 @@ def container(self) -> aiodocker.containers.DockerContainer:
229241
raise PythonKernelNotRunningError
230242
return self._container
231243

232-
@property
233-
def work_dir(self) -> Path:
234-
return self._work_dir
235-
236244
@cached_property
237245
def tools(self) -> list[t.Callable[..., t.Any]]:
238246
return [
@@ -263,7 +271,10 @@ async def _start_container(self) -> None:
263271
"PortBindings": {
264272
"8888/tcp": [{"HostPort": "0"}], # Let Docker choose a port
265273
},
266-
"Binds": [f"{self._work_dir.absolute()!s}:/home/jovyan/work", *self.volumes],
274+
"Binds": [
275+
f"{self._work_dir.absolute()!s}:/home/jovyan/work",
276+
*(self.volumes or []),
277+
],
267278
},
268279
"Env": [
269280
f"JUPYTER_TOKEN={self._token}",
@@ -631,6 +642,7 @@ async def execute_notebook(
631642

632643
return notebook
633644

645+
@tool_method()
634646
async def execute_code(self, code: str) -> str:
635647
"""
636648
Execute Python code in the jupyter kernel and return the output.
@@ -654,10 +666,12 @@ async def get_kernel_state(self) -> KernelState:
654666

655667
return t.cast("KernelState", kernel_info["execution_state"])
656668

669+
@tool_method()
657670
async def busy(self) -> bool:
658671
"""Check if the kernel is busy executing code."""
659672
return await self.get_kernel_state() == "busy"
660673

674+
@tool_method()
661675
async def interrupt(self) -> None:
662676
"""Interrupt the kernel."""
663677
if not self._kernel_id:
@@ -674,6 +688,7 @@ async def interrupt(self) -> None:
674688

675689
logger.debug(f"Kernel {self._kernel_id} interrupted")
676690

691+
@tool_method()
677692
async def restart(self) -> None:
678693
"""Restart the kernel."""
679694
if not self._kernel_id:

python_agent/main.py

Lines changed: 48 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
1-
import sys
21
import typing as t
32
from dataclasses import dataclass, field
3+
from pathlib import Path
4+
from textwrap import dedent
45

56
import cyclopts
67
import dreadnode as dn
7-
import litellm
8-
import rigging as rg
9-
from loguru import logger
8+
from dreadnode.agent import Agent
9+
from dreadnode.agent.events import AgentEnd
10+
from dreadnode.agent.hooks import Hook
11+
from dreadnode.agent.tools import tool
12+
from dreadnode.data_types import Markdown
13+
from kernel import PythonKernel
14+
from rich.console import Console
1015

11-
from python_agent.kernel import PythonKernel
16+
console = Console()
1217

13-
if t.TYPE_CHECKING:
14-
from loguru import Record as LogRecord
1518

1619
# CLI
1720

@@ -36,8 +39,6 @@ class Args:
3639
] = field(default_factory=list)
3740
max_steps: int = 50
3841
"""Maximum number of steps to take"""
39-
log_level: str = "INFO"
40-
"""Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)"""
4142

4243

4344
@cyclopts.Parameter(name="*", group="dreadnode")
@@ -53,27 +54,22 @@ class DreadnodeArgs:
5354
"""Show span information in the console"""
5455

5556

56-
def log_formatter(record: "LogRecord") -> str:
57-
return "".join(
58-
(
59-
"<green>{time:HH:mm:ss.SSS}</green> | ",
60-
"<dim>{extra[prefix]}</dim> " if record["extra"].get("prefix") else "",
61-
"<level>{message}</level>\n",
62-
),
63-
)
64-
65-
66-
@dn.task(name="Complete task", log_output=False)
57+
@tool()
6758
async def complete_task(success: bool, markdown_summary: str) -> None: # noqa: FBT001
6859
"""
6960
Mark your task as complete with a success/failure status and markdown summary.
7061
"""
7162
dn.log_metric("task_success", success, to="run")
63+
dn.log_output("task_summary", Markdown(markdown_summary), to="run")
64+
7265

73-
log_func = logger.success if success else logger.warning
74-
log_func(f"Agent finished the task (success={success}): {markdown_summary}")
66+
def upload_work_hook(
67+
work_dir: Path,
68+
) -> Hook:
69+
async def upload_work(event: AgentEnd) -> None:
70+
dn.log_artifact(str(work_dir))
7571

76-
dn.log_output("task_summary", markdown_summary, to="run")
72+
return upload_work
7773

7874

7975
@app.default
@@ -82,10 +78,6 @@ async def agent(*, args: Args, dn_args: DreadnodeArgs | None = None) -> None:
8278
General agent with access to a dockerized jupyter environment.
8379
"""
8480

85-
logger.remove()
86-
logger.add(sys.stderr, format=log_formatter, level=args.log_level)
87-
logger.enable("rigging")
88-
8981
dn_args = dn_args or DreadnodeArgs()
9082
dn.configure(
9183
server=dn_args.server,
@@ -94,90 +86,37 @@ async def agent(*, args: Args, dn_args: DreadnodeArgs | None = None) -> None:
9486
console=dn_args.console,
9587
)
9688

97-
with dn.run(), dn.task_span("Agent"):
98-
dn.log_params(
89+
instructions = dedent(f"""\
90+
Work to complete the following task. You have access to a dockerized jupyter environment.
91+
You can run code in the environment and use the results to help you complete the task.
92+
93+
Unless otherwise specified, use `~/work` to store files and data. Additional volumes are listed below.
94+
95+
<volumes>
96+
{args.volumes}
97+
</volumes>
98+
99+
<task>
100+
{args.task}
101+
</task>
102+
""")
103+
104+
async with PythonKernel(
105+
image=args.image,
106+
volumes=args.volumes,
107+
) as kernel:
108+
agent = Agent(
109+
name="python-agent",
99110
model=args.model,
100-
image=args.image,
101-
max_steps=args.max_steps,
111+
description="An agent with access to a dockerized jupyter environment.",
112+
instructions=instructions,
113+
tools=[kernel],
114+
hooks=[upload_work_hook(work_dir=kernel.work_dir)],
102115
)
103-
dn.log_input("task", args.task, to="run")
104-
dn.log_input("volumes", "\n".join(args.volumes), to="run")
105-
dn.push_update()
106-
107-
generator = rg.get_generator(args.model)
108-
109-
logger.info("Starting agent ...")
110-
111-
async with PythonKernel(
112-
image=args.image,
113-
volumes=args.volumes,
114-
) as kernel:
115-
116-
@dn.task(name="Execute code")
117-
async def execute_code(code: str) -> str:
118-
"""
119-
Execute code in the kernel and return the result.
120-
"""
121-
logger.info(f"Executing:\n{code}")
122-
result = await kernel.execute_code(code)
123-
logger.info(f"Result:\n{result}")
124-
return result
125-
126-
@dn.task(name="Restart kernel")
127-
async def restart_kernel() -> None:
128-
"""
129-
Restart the kernel.
130-
"""
131-
logger.info("Restarting kernel ...")
132-
await kernel.restart()
133-
134-
chat = (
135-
await generator.chat(
136-
f"""\
137-
Work to complete the following task. You have access to a dockerized jupyter environment.
138-
You can run code in the environment and use the results to help you complete the task.
139-
140-
Unless otherwise specified, use `~/work` to store files and data. Additional volumes are listed below.
141-
142-
<volumes>
143-
{args.volumes}
144-
</volumes>
145-
146-
<task>
147-
{args.task}
148-
</task>
149-
""",
150-
)
151-
.catch(
152-
*litellm.exceptions.LITELLM_EXCEPTION_TYPES,
153-
on_failed="include",
154-
)
155-
.using(
156-
execute_code,
157-
restart_kernel,
158-
complete_task,
159-
max_depth=args.max_steps,
160-
)
161-
.run()
162-
)
163-
164-
dn.log_artifact(kernel.work_dir)
165-
166-
if chat.failed and chat.error:
167-
if isinstance(chat.error, rg.error.MaxDepthError):
168-
logger.warning(f"Max steps reached ({args.max_steps})")
169-
dn.log_metric("max_steps_reached", 1)
170-
dn.log_output("task_summary", f"Max steps ({args.max_steps}) reached", to="run")
171-
else:
172-
logger.warning(f"Failed with {chat.error}")
173-
dn.log_metric("inference_failed", 1)
174-
dn.log_output("task_summary", f"Inference failed with {chat.error}", to="run")
175-
176-
elif chat.last.role == "assistant":
177-
dn.log_output("last_message", chat.last.content, to="run")
178-
logger.info(str(chat.last))
179-
180-
logger.info("Done.")
116+
117+
async with agent.stream(args.task) as events:
118+
async for event in events:
119+
console.print(event)
181120

182121

183122
if __name__ == "__main__":

0 commit comments

Comments
 (0)