From 0886870c6a634da9e450d0bc58848cd280bd8d8b Mon Sep 17 00:00:00 2001 From: Luis Orofino Date: Thu, 21 May 2026 17:25:50 +0200 Subject: [PATCH 1/8] Implement spawn subagent tool --- ddev/src/ddev/ai/agent/build.py | 101 ++++- ddev/src/ddev/ai/callbacks/file_logger.py | 105 +++++ ddev/src/ddev/ai/phases/agentic_phase.py | 30 +- ddev/src/ddev/ai/phases/base.py | 17 +- ddev/src/ddev/ai/phases/checkpoint.py | 9 +- ddev/src/ddev/ai/tools/agents/__init__.py | 3 + .../ddev/ai/tools/agents/spawn_subagent.py | 156 +++++++ ddev/src/ddev/ai/tools/registry.py | 31 +- ddev/tests/ai/agent/test_build.py | 140 ++++-- ddev/tests/ai/callbacks/test_file_logger.py | 114 +++++ ddev/tests/ai/phases/conftest.py | 4 +- ddev/tests/ai/phases/test_agentic_phase.py | 428 ++++++++---------- ddev/tests/ai/tools/agents/__init__.py | 3 + .../ai/tools/agents/test_spawn_subagent.py | 310 +++++++++++++ ddev/tests/ai/tools/test_registry.py | 16 +- 15 files changed, 1140 insertions(+), 327 deletions(-) create mode 100644 ddev/src/ddev/ai/callbacks/file_logger.py create mode 100644 ddev/src/ddev/ai/tools/agents/__init__.py create mode 100644 ddev/src/ddev/ai/tools/agents/spawn_subagent.py create mode 100644 ddev/tests/ai/callbacks/test_file_logger.py create mode 100644 ddev/tests/ai/tools/agents/__init__.py create mode 100644 ddev/tests/ai/tools/agents/test_spawn_subagent.py diff --git a/ddev/src/ddev/ai/agent/build.py b/ddev/src/ddev/ai/agent/build.py index 97e24aecb5183..6ab2e30fb9428 100644 --- a/ddev/src/ddev/ai/agent/build.py +++ b/ddev/src/ddev/ai/agent/build.py @@ -5,17 +5,26 @@ from __future__ import annotations from collections.abc import Callable +from pathlib import Path from typing import TYPE_CHECKING, Any from ddev.ai.agent.anthropic_client import AnthropicAgent from ddev.ai.agent.base import BaseAgent +from ddev.ai.tools.fs.file_access_policy import FileAccessPolicy from ddev.ai.tools.fs.file_registry import FileRegistry from ddev.ai.tools.registry import ToolRegistry if TYPE_CHECKING: from ddev.ai.phases.config import AgentConfig -AgentBuilder = Callable[[str, str], tuple[BaseAgent[Any], ToolRegistry]] +SubagentBuilder = Callable[ + [str, str, list[str]], # (system_prompt, owner_id, tool_names) + tuple[BaseAgent[Any], ToolRegistry], +] +AgentBuilder = Callable[ + [str, str, SubagentBuilder | None, Path | None], # system_prompt, owner_id, subagent_builder, log_dir + tuple[BaseAgent[Any], ToolRegistry], +] def _resolve_client(agent_clients: dict[str, Any], provider: str) -> Any: @@ -25,19 +34,22 @@ def _resolve_client(agent_clients: dict[str, Any], provider: str) -> Any: return client -def build_agent( +def _build_agent_and_registry( agent_config: AgentConfig, agent_clients: dict[str, Any], system_prompt: str, owner_id: str, + tool_names: list[str], file_registry: FileRegistry, + subagent_builder: SubagentBuilder | None = None, + log_dir: Path | None = None, ) -> tuple[BaseAgent[Any], ToolRegistry]: - """Construct a provider-specific BaseAgent and its ToolRegistry from an AgentConfig.""" - tool_registry = ToolRegistry.from_names( - agent_config.tools, + tool_names, owner_id=owner_id, file_registry=file_registry, + subagent_builder=subagent_builder, + log_dir=log_dir, ) if agent_config.provider == "anthropic": @@ -58,20 +70,93 @@ def build_agent( raise ValueError(f"Unknown agent provider: {agent_config.provider!r}") +def build_agent( + agent_config: AgentConfig, + agent_clients: dict[str, Any], + system_prompt: str, + owner_id: str, + file_registry: FileRegistry, + subagent_builder: SubagentBuilder | None = None, + log_dir: Path | None = None, +) -> tuple[BaseAgent[Any], ToolRegistry]: + """Construct a provider-specific BaseAgent and its ToolRegistry from an AgentConfig.""" + return _build_agent_and_registry( + agent_config=agent_config, + agent_clients=agent_clients, + system_prompt=system_prompt, + owner_id=owner_id, + tool_names=agent_config.tools, + file_registry=file_registry, + subagent_builder=subagent_builder, + log_dir=log_dir, + ) + + +def build_subagent( + parent_agent_config: AgentConfig, + agent_clients: dict[str, Any], + file_access_policy: FileAccessPolicy, + system_prompt: str, + owner_id: str, + tool_names: list[str], +) -> tuple[BaseAgent[Any], ToolRegistry]: + """Build a subagent + ToolRegistry. Always uses a fresh FileRegistry. + + Reuses the parent's provider/model/max_tokens. No subagent_builder or + log_dir is forwarded, so the subagent cannot recursively spawn subagents — + ToolRegistry.from_names will raise if spawn_subagent is in tool_names. + """ + return _build_agent_and_registry( + agent_config=parent_agent_config, + agent_clients=agent_clients, + system_prompt=system_prompt, + owner_id=owner_id, + tool_names=tool_names, + file_registry=FileRegistry(policy=file_access_policy), + ) + + def make_agent_builder( agent_config: AgentConfig, agent_clients: dict[str, Any], file_registry: FileRegistry, ) -> AgentBuilder: - """Return a closure that builds an agent+registry given a rendered system_prompt and owner_id.""" - - def builder(system_prompt: str, owner_id: str) -> tuple[BaseAgent[Any], ToolRegistry]: + """Return a closure that builds an agent+registry given system_prompt, owner_id, subagent_builder, log_dir.""" + + def builder( + system_prompt: str, + owner_id: str, + subagent_builder: SubagentBuilder | None, + log_dir: Path | None, + ) -> tuple[BaseAgent[Any], ToolRegistry]: return build_agent( agent_config=agent_config, agent_clients=agent_clients, system_prompt=system_prompt, owner_id=owner_id, file_registry=file_registry, + subagent_builder=subagent_builder, + log_dir=log_dir, + ) + + return builder + + +def make_subagent_builder( + parent_agent_config: AgentConfig, + agent_clients: dict[str, Any], + file_access_policy: FileAccessPolicy, +) -> SubagentBuilder: + """Return a closure that builds a subagent+registry given (system_prompt, owner_id, tool_names).""" + + def builder(system_prompt: str, owner_id: str, tool_names: list[str]) -> tuple[BaseAgent[Any], ToolRegistry]: + return build_subagent( + parent_agent_config=parent_agent_config, + agent_clients=agent_clients, + file_access_policy=file_access_policy, + system_prompt=system_prompt, + owner_id=owner_id, + tool_names=tool_names, ) return builder diff --git a/ddev/src/ddev/ai/callbacks/file_logger.py b/ddev/src/ddev/ai/callbacks/file_logger.py new file mode 100644 index 0000000000000..2e58e4057e915 --- /dev/null +++ b/ddev/src/ddev/ai/callbacks/file_logger.py @@ -0,0 +1,105 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import json +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from ddev.ai.agent.types import AgentResponse, ToolCall +from ddev.ai.callbacks.callbacks import Callbacks, CallbackSet +from ddev.ai.tools.core.types import ToolResult + + +class FileLogger: + """Append-only JSONL writer for ReAct events plus subagent start/finish bookkeeping. + + Owns the file handle. Call build_callbacks() to obtain a Callbacks object whose + handlers route ReAct events through _emit. Call close() in a finally to release + the handle. Assumes log_path.parent already exists. + """ + + def __init__(self, log_path: Path) -> None: + self._log_path = log_path + self._fh = log_path.open("a", encoding="utf-8") + self._closed = False + + @property + def log_path(self) -> Path: + return self._log_path + + def _emit(self, event: dict[str, Any]) -> None: + if self._closed: + return + record = {"ts": datetime.now(UTC).isoformat(), **event} + self._fh.write(json.dumps(record, default=str) + "\n") + self._fh.flush() + + def log_start(self, *, system_prompt: str, prompt: str, tools: list[str]) -> None: + self._emit({"event": "start", "system_prompt": system_prompt, "prompt": prompt, "tools": tools}) + + def log_finish(self, *, success: bool, **fields: Any) -> None: + self._emit({"event": "finish", "success": success, **fields}) + + def close(self) -> None: + if not self._closed: + self._fh.close() + self._closed = True + + def build_callbacks(self) -> Callbacks: + cb_set = CallbackSet() + + @cb_set.on_before_agent_send + async def _on_before_send(iteration: int) -> None: + self._emit({"event": "before_agent_send", "iter": iteration}) + + @cb_set.on_agent_response + async def _on_agent_response(response: AgentResponse, iteration: int) -> None: + self._emit( + { + "event": "agent_response", + "iter": iteration, + "text": response.text, + "tool_calls": [{"id": tc.id, "name": tc.name, "input": tc.input} for tc in response.tool_calls], + "stop_reason": str(response.stop_reason), + "tokens": { + "input": response.usage.input_tokens, + "output": response.usage.output_tokens, + "cache_read": response.usage.cache_read_input_tokens, + "cache_creation": response.usage.cache_creation_input_tokens, + }, + } + ) + + @cb_set.on_tool_call + async def _on_tool_call(tool_call: ToolCall, result: ToolResult, iteration: int) -> None: + self._emit( + { + "event": "tool_call", + "iter": iteration, + "tool_call_id": tool_call.id, + "name": tool_call.name, + "input": tool_call.input, + "result": { + "success": result.success, + "data": result.data, + "error": result.error, + "truncated": result.truncated, + }, + } + ) + + @cb_set.on_before_compact + async def _on_before_compact() -> None: + self._emit({"event": "before_compact"}) + + @cb_set.on_after_compact + async def _on_after_compact() -> None: + self._emit({"event": "after_compact"}) + + @cb_set.on_error + async def _on_error(error: BaseException) -> None: + self._emit({"event": "error", "exception": f"{type(error).__name__}: {error}"}) + + return Callbacks([cb_set]) diff --git a/ddev/src/ddev/ai/phases/agentic_phase.py b/ddev/src/ddev/ai/phases/agentic_phase.py index c8a36767cd24b..25431f7a44c20 100644 --- a/ddev/src/ddev/ai/phases/agentic_phase.py +++ b/ddev/src/ddev/ai/phases/agentic_phase.py @@ -8,7 +8,7 @@ from typing import Any from ddev.ai.agent.base import BaseAgent -from ddev.ai.agent.build import AgentBuilder, make_agent_builder +from ddev.ai.agent.build import AgentBuilder, SubagentBuilder, make_agent_builder, make_subagent_builder from ddev.ai.callbacks.callbacks import Callbacks from ddev.ai.phases.base import Phase, PhaseOutcome from ddev.ai.phases.checkpoint import CheckpointManager @@ -59,6 +59,7 @@ def __init__( flow_variables: dict[str, str], config_dir: Path, file_registry: FileRegistry, + subagent_builder: SubagentBuilder | None = None, callbacks: Callbacks | None = None, logger: logging.Logger | None = None, ) -> None: @@ -75,6 +76,7 @@ def __init__( logger=logger, ) self._agent_builder = agent_builder + self._subagent_builder = subagent_builder @classmethod def validate_config( @@ -91,22 +93,36 @@ def validate_config( raise FlowConfigError(f"Phase {phase_id!r} (AgenticPhase) must have at least one task") @classmethod - def extra_init_kwargs( + def extra_init_kwargs( # type: ignore[override] cls, + *, phase_id: str, phase_config: PhaseConfig, agents: dict[str, AgentConfig], agent_clients: dict[str, Any], file_registry: FileRegistry, + **_: Any, ) -> dict[str, Any]: if phase_config.agent is None: raise FlowConfigError(f"Phase {phase_id!r} (AgenticPhase) requires 'agent'") + agent_config = agents[phase_config.agent] + + subagent_builder = None + # TODO: generalize this dispatch if more agent-meta tools appear in tools/agents/. + if "spawn_subagent" in agent_config.tools: + subagent_builder = make_subagent_builder( + parent_agent_config=agent_config, + agent_clients=agent_clients, + file_access_policy=file_registry.policy, + ) + return { "agent_builder": make_agent_builder( - agent_config=agents[phase_config.agent], + agent_config=agent_config, agent_clients=agent_clients, file_registry=file_registry, - ) + ), + "subagent_builder": subagent_builder, } def before_react(self) -> None: @@ -146,7 +162,11 @@ def _build_agent_and_process(self, context: dict[str, Any]) -> tuple[BaseAgent[A context, self._resolver, ) - agent, tool_registry = self._agent_builder(system_prompt, self._phase_id) + log_dir = None + if self._subagent_builder is not None: + log_dir = self._checkpoint_manager.root / "subagents" / self._phase_id + + agent, tool_registry = self._agent_builder(system_prompt, self._phase_id, self._subagent_builder, log_dir) process = ReActProcess( agent=agent, tool_registry=tool_registry, diff --git a/ddev/src/ddev/ai/phases/base.py b/ddev/src/ddev/ai/phases/base.py index 126d5772ba1ea..227b92560a60f 100644 --- a/ddev/src/ddev/ai/phases/base.py +++ b/ddev/src/ddev/ai/phases/base.py @@ -109,15 +109,14 @@ def validate_config( return None @classmethod - def extra_init_kwargs( - cls, - phase_id: str, - phase_config: PhaseConfig, - agents: dict[str, AgentConfig], - agent_clients: dict[str, Any], - file_registry: FileRegistry, - ) -> dict[str, Any]: - """Override to inject subclass-specific kwargs into __init__ at construction time.""" + def extra_init_kwargs(cls, **kwargs: Any) -> dict[str, Any]: + """Override to inject subclass-specific kwargs into __init__ at construction time. + + The orchestrator passes every framework-level dep (phase_id, phase_config, agents, + agent_clients, file_registry, checkpoint_manager, ...) as keyword arguments. + Subclasses pick the ones they need by declaring them explicitly and accept the + rest via **kwargs. + """ return {} @abstractmethod diff --git a/ddev/src/ddev/ai/phases/checkpoint.py b/ddev/src/ddev/ai/phases/checkpoint.py index 1a23b8ce63b7f..3f32edb158a5b 100644 --- a/ddev/src/ddev/ai/phases/checkpoint.py +++ b/ddev/src/ddev/ai/phases/checkpoint.py @@ -18,8 +18,13 @@ class CheckpointManager: def __init__(self, path: Path) -> None: self._path = path + @property + def root(self) -> Path: + """Directory that holds checkpoints.yaml, per-phase memory files, and any side artifacts.""" + return self._path.parent + def _ensure_dir(self) -> None: - self._path.parent.mkdir(parents=True, exist_ok=True) + self.root.mkdir(parents=True, exist_ok=True) def read(self) -> dict[str, Any]: """Return full checkpoint data, keyed by phase_id. Empty dict if file absent.""" @@ -44,7 +49,7 @@ def build_memory_prompt(self, user_additions: str | None) -> str: def memory_path(self, phase_id: str) -> Path: """Return the resolved path to a phase's memory file.""" - return (self._path.parent / f"{phase_id}_memory.md").resolve() + return (self.root / f"{phase_id}_memory.md").resolve() def write_memory(self, phase_id: str, text: str) -> None: """Write agent-authored text to this phase's memory file.""" diff --git a/ddev/src/ddev/ai/tools/agents/__init__.py b/ddev/src/ddev/ai/tools/agents/__init__.py new file mode 100644 index 0000000000000..75c6647cb9233 --- /dev/null +++ b/ddev/src/ddev/ai/tools/agents/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/ddev/src/ddev/ai/tools/agents/spawn_subagent.py b/ddev/src/ddev/ai/tools/agents/spawn_subagent.py new file mode 100644 index 0000000000000..ec9b03f66cd85 --- /dev/null +++ b/ddev/src/ddev/ai/tools/agents/spawn_subagent.py @@ -0,0 +1,156 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from pathlib import Path +from typing import Annotated + +from pydantic import Field + +from ddev.ai.agent.build import SubagentBuilder +from ddev.ai.agent.types import StopReason +from ddev.ai.callbacks.file_logger import FileLogger +from ddev.ai.react.process import ReActProcess +from ddev.ai.tools.core.base import BaseTool, BaseToolInput +from ddev.ai.tools.core.types import ToolResult + + +class SpawnSubagentInput(BaseToolInput): + system_prompt: Annotated[ + str, + Field(description="System prompt that defines the subagent's role and behavior."), + ] + prompt: Annotated[ + str, + Field(description="The task prompt sent to the subagent as its first (and only) user turn."), + ] + tools: Annotated[ + list[str], + Field( + description=( + "Names of tools the subagent may use. Must be a subset of your tool list and " + "may not include 'spawn_subagent'. May be empty if the subagent should answer " + "from the prompt alone." + ), + ), + ] = [] + name: Annotated[ + str | None, + Field( + description=("Optional short human-readable name for the subagent."), + ), + ] = None + + +class SpawnSubagentTool(BaseTool[SpawnSubagentInput]): + """Delegate a self-contained subtask to a fresh subagent. + + The subagent runs one Reason-Action loop with the provided system prompt, user prompt, and tool subset. + Only the subagent's final assistant message is returned to you. Instruct the subagent in your prompt + to put anything you need in its final message. Include every piece of context the subagent needs + inside the system prompt and the user prompt.""" + + def __init__( + self, + owner_id: str, + subagent_builder: SubagentBuilder, + allowed_tools: list[str], + log_dir: Path, + ) -> None: + self._owner_id = owner_id + self._subagent_builder = subagent_builder + # Parent may itself have spawn_subagent; never offer it to children. + self._allowed_tools = set(allowed_tools) - {self.name} + self._log_dir = log_dir + self._counter = 0 + + @property + def name(self) -> str: + return "spawn_subagent" + + def _label(self, tool_input: SpawnSubagentInput) -> str: + return tool_input.name or "unnamed" + + async def __call__(self, tool_input: SpawnSubagentInput) -> ToolResult: + label = self._label(tool_input) + + # Subset validation — return failed ToolResult; no log file is opened. + if self.name in tool_input.tools: + return ToolResult( + success=False, + error=( + f"Subagent {label!r} not spawned: subagents cannot spawn further subagents " + f"('{self.name}' is not allowed in 'tools')." + ), + ) + disallowed = sorted(set(tool_input.tools) - self._allowed_tools) + if disallowed: + return ToolResult( + success=False, + error=( + f"Subagent {label!r} not spawned: disallowed tools requested: {disallowed}. " + f"Allowed subset: {sorted(self._allowed_tools)}." + ), + ) + + try: + self._log_dir.mkdir(parents=True, exist_ok=True) + except OSError as e: + return ToolResult( + success=False, + error=(f"Subagent {label!r} not spawned: cannot create log directory {self._log_dir}: {e}"), + ) + + self._counter += 1 + subagent_id = f"{self._owner_id}.sub.{self._counter:03d}-{label}" + log_path = self._log_dir / f"{self._counter:03d}-{label}.jsonl" + + logger = FileLogger(log_path) + try: + logger.log_start( + system_prompt=tool_input.system_prompt, + prompt=tool_input.prompt, + tools=tool_input.tools, + ) + + try: + agent, tool_registry = self._subagent_builder( + tool_input.system_prompt, + subagent_id, + tool_input.tools, + ) + except Exception as e: + logger.log_finish(success=False, error=f"build failed: {type(e).__name__}: {e}") + return ToolResult( + success=False, + error=f"Subagent {label!r} failed to build: {type(e).__name__}: {e}", + ) + + process = ReActProcess( + agent=agent, + tool_registry=tool_registry, + callbacks=logger.build_callbacks(), + ) + try: + result = await process.start(tool_input.prompt) + except Exception as e: + logger.log_finish(success=False, error=f"{type(e).__name__}: {e}") + return ToolResult( + success=False, + error=f"Subagent {label!r} failed: {type(e).__name__}: {e}", + ) + + logger.log_finish( + success=True, + iterations=result.iterations, + total_input_tokens=result.total_input_tokens, + total_output_tokens=result.total_output_tokens, + stop_reason=str(result.final_response.stop_reason), + ) + + data = result.final_response.text + if result.final_response.stop_reason == StopReason.MAX_TOKENS: + data = "[SUBAGENT HIT MAX_TOKENS — RESPONSE MAY BE TRUNCATED]\n\n" + data + return ToolResult(success=True, data=data) + finally: + logger.close() diff --git a/ddev/src/ddev/ai/tools/registry.py b/ddev/src/ddev/ai/tools/registry.py index 3fd255f7bda1b..828694eed6c11 100644 --- a/ddev/src/ddev/ai/tools/registry.py +++ b/ddev/src/ddev/ai/tools/registry.py @@ -4,8 +4,10 @@ from __future__ import annotations from collections.abc import Callable -from dataclasses import dataclass +from dataclasses import dataclass, field from importlib import import_module +from pathlib import Path +from typing import TYPE_CHECKING from anthropic.types import ToolParam @@ -15,6 +17,9 @@ from .core.protocol import ToolProtocol from .core.types import ToolResult +if TYPE_CHECKING: + from ddev.ai.agent.build import SubagentBuilder + @dataclass class ToolContext: @@ -22,6 +27,9 @@ class ToolContext: file_registry: FileRegistry owner_id: str + allowed_tool_names: tuple[str, ...] = field(default_factory=tuple) + subagent_builder: SubagentBuilder | None = None + log_dir: Path | None = None @property def policy(self) -> FileAccessPolicy: @@ -40,6 +48,21 @@ def _file_policy_factory(tool_cls: type, ctx: ToolContext) -> ToolProtocol: return tool_cls(ctx.policy) +def _spawn_subagent_factory(tool_cls: type, ctx: ToolContext) -> ToolProtocol: + if ctx.subagent_builder is None or ctx.log_dir is None: + raise ValueError( + "Tool 'spawn_subagent' requires both 'subagent_builder' and 'log_dir' to be " + "passed to ToolRegistry.from_names." + ) + allowed = [name for name in ctx.allowed_tool_names if name != "spawn_subagent"] + return tool_cls( + owner_id=ctx.owner_id, + subagent_builder=ctx.subagent_builder, + allowed_tools=allowed, + log_dir=ctx.log_dir, + ) + + @dataclass(frozen=True) class ToolSpec: """Lazy pointer to a tool class and how to construct it. @@ -71,6 +94,7 @@ class ToolSpec: "ddev_env_test": ToolSpec("shell.ddev.env_test", "DdevEnvTestTool"), "ddev_release_changelog": ToolSpec("shell.ddev.release_changelog", "DdevReleaseChangelogTool"), "ddev_validate": ToolSpec("shell.ddev.validate", "DdevValidateTool"), + "spawn_subagent": ToolSpec("agents.spawn_subagent", "SpawnSubagentTool", factory=_spawn_subagent_factory), } @@ -92,6 +116,8 @@ def from_names( *, owner_id: str, file_registry: FileRegistry, + subagent_builder: SubagentBuilder | None = None, + log_dir: Path | None = None, ) -> ToolRegistry: """Build a ToolRegistry from a list of tool name strings. @@ -102,6 +128,9 @@ def from_names( ctx = ToolContext( file_registry=file_registry, owner_id=owner_id, + allowed_tool_names=tuple(tool_names), + subagent_builder=subagent_builder, + log_dir=log_dir, ) tools: list[ToolProtocol] = [] for name in tool_names: diff --git a/ddev/tests/ai/agent/test_build.py b/ddev/tests/ai/agent/test_build.py index 254e1530dc11b..9cff25a6f8881 100644 --- a/ddev/tests/ai/agent/test_build.py +++ b/ddev/tests/ai/agent/test_build.py @@ -7,7 +7,7 @@ import pytest from ddev.ai.agent.anthropic_client import AnthropicAgent -from ddev.ai.agent.build import build_agent, make_agent_builder +from ddev.ai.agent.build import build_agent, build_subagent, make_agent_builder, make_subagent_builder from ddev.ai.phases.config import AgentConfig from ddev.ai.tools.fs.file_access_policy import FileAccessPolicy from ddev.ai.tools.fs.file_registry import FileRegistry @@ -15,62 +15,102 @@ @pytest.fixture -def file_registry(tmp_path) -> FileRegistry: - return FileRegistry(policy=FileAccessPolicy(write_root=tmp_path)) +def policy(tmp_path) -> FileAccessPolicy: + return FileAccessPolicy(write_root=tmp_path) -def test_build_agent_anthropic_returns_agent_and_registry(file_registry): - agent_config = AgentConfig(provider="anthropic", model="claude-test", max_tokens=1024, tools=[]) - agent_clients = {"anthropic": MagicMock()} +@pytest.fixture +def file_registry(policy) -> FileRegistry: + return FileRegistry(policy=policy) + + +@pytest.fixture +def clients() -> dict: + return {"anthropic": MagicMock()} + + +# --------------------------------------------------------------------------- +# Core builder behaviour +# --------------------------------------------------------------------------- + + +def test_unknown_provider_raises(file_registry, clients): + config = AgentConfig.model_construct(provider="bad_provider", tools=[]) + with pytest.raises(ValueError, match="Unknown agent provider: 'bad_provider'"): + build_agent(config, clients, "sys", "p1", file_registry) - agent, registry = build_agent( - agent_config=agent_config, - agent_clients=agent_clients, - system_prompt="hello", - owner_id="p1", - file_registry=file_registry, - ) +def test_missing_client_raises(file_registry): + config = AgentConfig(provider="anthropic", tools=[]) + with pytest.raises(ValueError, match="No client provided for agent provider 'anthropic'"): + build_agent(config, {}, "sys", "p1", file_registry) + + +def test_builds_anthropic_agent_with_correct_types_and_name(file_registry, clients): + config = AgentConfig(provider="anthropic", tools=[]) + agent, registry = build_agent(config, clients, "sys", "p1", file_registry) assert isinstance(agent, AnthropicAgent) assert isinstance(registry, ToolRegistry) assert agent.name == "p1" -def test_build_agent_missing_client_raises(file_registry): - agent_config = AgentConfig(provider="anthropic", tools=[]) - with pytest.raises(ValueError, match="No client provided for agent provider 'anthropic'"): - build_agent( - agent_config=agent_config, - agent_clients={}, - system_prompt="hello", - owner_id="p1", - file_registry=file_registry, - ) - - -def test_build_agent_unknown_provider_raises(file_registry): - agent_config = AgentConfig(provider="openai", tools=[]) - with pytest.raises(ValueError, match="Unknown agent provider: 'openai'"): - build_agent( - agent_config=agent_config, - agent_clients={"openai": MagicMock()}, - system_prompt="hello", - owner_id="p1", - file_registry=file_registry, - ) - - -def test_make_agent_builder_returns_callable_that_delegates_to_build_agent(file_registry): - agent_config = AgentConfig(provider="anthropic", tools=[]) - agent_clients = {"anthropic": MagicMock()} - - builder = make_agent_builder( - agent_config=agent_config, - agent_clients=agent_clients, - file_registry=file_registry, - ) - - agent, registry = builder("system prompt", "p2") +@pytest.mark.parametrize( + "model,max_tokens", + [ + ("claude-opus-4-7", 2048), + ("claude-haiku-4-5", 512), + ], +) +def test_model_and_max_tokens_forwarded(file_registry, clients, model, max_tokens): + config = AgentConfig(provider="anthropic", model=model, max_tokens=max_tokens, tools=[]) + agent, _ = build_agent(config, clients, "sys", "p1", file_registry) + assert agent._model == model + assert agent._max_tokens == max_tokens + + +def test_build_agent_uses_config_tools(file_registry, clients): + config = AgentConfig(provider="anthropic", tools=["read_file"]) + _, registry = build_agent(config, clients, "sys", "p1", file_registry) + assert len(registry.definitions) == 1 + assert registry.definitions[0]["name"] == "read_file" + + +# --------------------------------------------------------------------------- +# build_subagent +# --------------------------------------------------------------------------- + + +def test_build_subagent_creates_fresh_file_registry(policy, clients): + config = AgentConfig(provider="anthropic", tools=[]) + caller_registry = FileRegistry(policy=policy) + _, reg_a = build_subagent(config, clients, policy, "sys", "a", []) + _, reg_b = build_subagent(config, clients, policy, "sys", "b", []) + assert reg_a is not reg_b + assert reg_a is not caller_registry + + +def test_build_subagent_recursion_guard(policy, clients): + config = AgentConfig.model_construct(provider="anthropic", tools=[]) + with pytest.raises(ValueError): + build_subagent(config, clients, policy, "sys", "sub", ["spawn_subagent"]) + + +# --------------------------------------------------------------------------- +# Closures — verify delegation works and signatures are correct +# --------------------------------------------------------------------------- + + +def test_make_agent_builder(file_registry, clients): + config = AgentConfig(provider="anthropic", tools=[]) + builder = make_agent_builder(config, clients, file_registry) + agent, registry = builder("sys", "p1", None, None) assert isinstance(agent, AnthropicAgent) - assert isinstance(registry, ToolRegistry) - assert agent.name == "p2" + assert agent.name == "p1" + + +def test_make_subagent_builder(policy, clients): + config = AgentConfig(provider="anthropic", tools=[]) + builder = make_subagent_builder(config, clients, policy) + agent, registry = builder("sys", "sub-1", []) + assert isinstance(agent, AnthropicAgent) + assert agent.name == "sub-1" diff --git a/ddev/tests/ai/callbacks/test_file_logger.py b/ddev/tests/ai/callbacks/test_file_logger.py new file mode 100644 index 0000000000000..1b4f52bf251cb --- /dev/null +++ b/ddev/tests/ai/callbacks/test_file_logger.py @@ -0,0 +1,114 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import json + +import pytest + +from ddev.ai.agent.types import AgentResponse, StopReason, TokenUsage, ToolCall +from ddev.ai.callbacks.file_logger import FileLogger +from ddev.ai.tools.core.types import ToolResult + + +def make_response(text: str = "", stop_reason: StopReason = StopReason.END_TURN) -> AgentResponse: + return AgentResponse( + stop_reason=stop_reason, + text=text, + tool_calls=[], + usage=TokenUsage(input_tokens=10, output_tokens=5, cache_read_input_tokens=0, cache_creation_input_tokens=0), + ) + + +def read_events(log_path) -> list[dict]: + return [json.loads(line) for line in log_path.read_text(encoding="utf-8").splitlines() if line.strip()] + + +# --------------------------------------------------------------------------- +# File mechanics +# --------------------------------------------------------------------------- + + +def test_log_entries_are_valid_jsonl_with_timestamp(tmp_path): + log_path = tmp_path / "log.jsonl" + logger = FileLogger(log_path) + logger.log_start(system_prompt="sys", prompt="go", tools=["read_file"]) + logger.log_finish(success=True, iterations=1) + logger.close() + + events = read_events(log_path) + assert len(events) == 2 + assert all("ts" in e for e in events) + assert events[0]["event"] == "start" + assert events[1]["event"] == "finish" + + +def test_flush_after_each_write(tmp_path): + log_path = tmp_path / "log.jsonl" + logger = FileLogger(log_path) + logger.log_start(system_prompt="s", prompt="p", tools=[]) + # A second file handle reads the line without closing the logger first + assert log_path.read_text(encoding="utf-8").strip() != "" + logger.close() + + +def test_close_is_idempotent_and_prevents_further_writes(tmp_path): + log_path = tmp_path / "log.jsonl" + logger = FileLogger(log_path) + logger.log_start(system_prompt="s", prompt="p", tools=[]) + logger.close() + logger.close() # must not raise + logger.log_finish(success=False) # must not write + assert len(read_events(log_path)) == 1 + + +def test_constructor_requires_existing_parent(tmp_path): + with pytest.raises(OSError): + FileLogger(tmp_path / "doesnotexist" / "log.jsonl") + + +def test_non_serializable_values_use_str_repr(tmp_path): + log_path = tmp_path / "log.jsonl" + logger = FileLogger(log_path) + + class Unserializable: + def __repr__(self): + return "" + + logger.log_finish(success=True, extra=Unserializable()) + logger.close() + + assert read_events(log_path)[0]["extra"] == "" + + +# --------------------------------------------------------------------------- +# Callbacks wiring +# --------------------------------------------------------------------------- + + +async def test_build_callbacks_fires_all_event_types(tmp_path): + log_path = tmp_path / "log.jsonl" + logger = FileLogger(log_path) + callbacks = logger.build_callbacks() + + tool_call = ToolCall(id="tc1", name="read_file", input={"path": "/f"}) + tool_result = ToolResult(success=True, data="content") + + await callbacks.fire_before_agent_send(2) + await callbacks.fire_agent_response(make_response("hi"), 2) + await callbacks.fire_tool_call(tool_call, tool_result, 2) + await callbacks.fire_before_compact() + await callbacks.fire_after_compact() + await callbacks.fire_error(ValueError("oops")) + logger.close() + + events = {e["event"]: e for e in read_events(log_path)} + + assert events["before_agent_send"]["iter"] == 2 + assert events["agent_response"]["text"] == "hi" + assert events["agent_response"]["iter"] == 2 + assert events["tool_call"]["tool_call_id"] == "tc1" + assert events["tool_call"]["result"]["success"] is True + assert "before_compact" in events + assert "after_compact" in events + assert "ValueError" in events["error"]["exception"] diff --git a/ddev/tests/ai/phases/conftest.py b/ddev/tests/ai/phases/conftest.py index 7f07c1734ffdc..96149455e1385 100644 --- a/ddev/tests/ai/phases/conftest.py +++ b/ddev/tests/ai/phases/conftest.py @@ -92,7 +92,9 @@ def make_agent_builder(mock_agent: MockAgent, captured_kwargs: dict[str, Any] | owner_id passed in — useful for asserting on prompt rendering. """ - def builder(system_prompt: str, owner_id: str) -> tuple[MockAgent, ToolRegistry]: + def builder( + system_prompt: str, owner_id: str, subagent_builder=None, log_dir=None + ) -> tuple[MockAgent, ToolRegistry]: if captured_kwargs is not None: captured_kwargs["system_prompt"] = system_prompt captured_kwargs["owner_id"] = owner_id diff --git a/ddev/tests/ai/phases/test_agentic_phase.py b/ddev/tests/ai/phases/test_agentic_phase.py index e41188c46e8f2..bc333f965aee9 100644 --- a/ddev/tests/ai/phases/test_agentic_phase.py +++ b/ddev/tests/ai/phases/test_agentic_phase.py @@ -2,17 +2,28 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +import json from pathlib import Path import pytest +from ddev.ai.agent.types import AgentResponse, StopReason, TokenUsage, ToolCall from ddev.ai.callbacks.callbacks import Callbacks, CallbackSet from ddev.ai.phases.agentic_phase import AgenticPhase, render_memory_prompt, render_task_prompt +from ddev.ai.phases.checkpoint import CheckpointManager from ddev.ai.phases.config import AgentConfig, CheckpointConfig, FlowConfigError, PhaseConfig, TaskConfig -from ddev.ai.phases.messages import PhaseTrigger +from ddev.ai.phases.messages import PhaseFailedMessage, PhaseTrigger +from ddev.ai.tools.fs.file_access_policy import FileAccessPolicy +from ddev.ai.tools.fs.file_registry import FileRegistry +from ddev.ai.tools.registry import ToolRegistry from .conftest import MockAgent, make_agent_phase, make_response, resolve_key + +def read_jsonl(path: Path) -> list[dict]: + return [json.loads(line) for line in path.read_text(encoding="utf-8").splitlines() if line.strip()] + + # --------------------------------------------------------------------------- # render_task_prompt # --------------------------------------------------------------------------- @@ -21,29 +32,24 @@ def test_render_task_prompt_from_file(tmp_path): prompt_file = tmp_path / "task.md" prompt_file.write_text("Hello ${name}.") - task = TaskConfig(name="t1", prompt_path="task.md") - result = render_task_prompt(task, tmp_path, {"name": "Alice"}) + result = render_task_prompt(TaskConfig(name="t1", prompt_path="task.md"), tmp_path, {"name": "Alice"}) assert result == "Hello Alice." def test_render_task_prompt_inline(): - task = TaskConfig(name="t1", prompt="Hello ${name}.") - result = render_task_prompt(task, None, {"name": "Bob"}) + result = render_task_prompt(TaskConfig(name="t1", prompt="Hello ${name}."), None, {"name": "Bob"}) assert result == "Hello Bob." def test_render_task_prompt_forwards_resolver(tmp_path): - prompt_file = tmp_path / "task.md" - prompt_file.write_text("Memory: ${draft_memory}") - task = TaskConfig(name="t1", prompt_path="task.md") - result = render_task_prompt(task, tmp_path, {}, resolve_key) + (tmp_path / "task.md").write_text("Memory: ${draft_memory}") + result = render_task_prompt(TaskConfig(name="t1", prompt_path="task.md"), tmp_path, {}, resolve_key) assert result == "Memory: resolved(draft_memory)" -def test_render_task_prompt_raises_when_both_unset(): - task = TaskConfig.model_construct(name="t1", prompt=None, prompt_path=None) +def test_render_task_prompt_raises_when_no_source(): with pytest.raises(FlowConfigError, match="prompt"): - render_task_prompt(task, None, {}) + render_task_prompt(TaskConfig.model_construct(name="t1", prompt=None, prompt_path=None), None, {}) # --------------------------------------------------------------------------- @@ -52,23 +58,21 @@ def test_render_task_prompt_raises_when_both_unset(): def test_render_memory_prompt_from_file(tmp_path): - mem_file = tmp_path / "mem.md" - mem_file.write_text("List files for ${phase_name}.") - checkpoint = CheckpointConfig(memory_prompt_path="mem.md") - result = render_memory_prompt(checkpoint, tmp_path, {"phase_name": "draft"}) + (tmp_path / "mem.md").write_text("List files for ${phase_name}.") + result = render_memory_prompt(CheckpointConfig(memory_prompt_path="mem.md"), tmp_path, {"phase_name": "draft"}) assert result == "List files for draft." def test_render_memory_prompt_inline(): - checkpoint = CheckpointConfig(memory_prompt="List files for ${phase_name}.") - result = render_memory_prompt(checkpoint, None, {"phase_name": "draft"}) + result = render_memory_prompt( + CheckpointConfig(memory_prompt="List files for ${phase_name}."), None, {"phase_name": "draft"} + ) assert result == "List files for draft." -def test_render_memory_prompt_raises_when_both_unset(): - checkpoint = CheckpointConfig.model_construct(memory_prompt=None, memory_prompt_path=None) +def test_render_memory_prompt_raises_when_no_source(): with pytest.raises(FlowConfigError, match="memory_prompt"): - render_memory_prompt(checkpoint, None, {}) + render_memory_prompt(CheckpointConfig.model_construct(memory_prompt=None, memory_prompt_path=None), None, {}) # --------------------------------------------------------------------------- @@ -76,262 +80,153 @@ def test_render_memory_prompt_raises_when_both_unset(): # --------------------------------------------------------------------------- -def test_agentic_phase_validate_config_rejects_missing_agent(): - config = PhaseConfig(tasks=[TaskConfig(name="t1", prompt="x")]) - with pytest.raises(FlowConfigError, match="requires 'agent'"): - AgenticPhase.validate_config("p1", config, {}) - - -def test_agentic_phase_validate_config_rejects_unknown_agent(): - config = PhaseConfig(agent="ghost", tasks=[TaskConfig(name="t1", prompt="x")]) - with pytest.raises(FlowConfigError, match="unknown agent"): - AgenticPhase.validate_config("p1", config, {"writer": AgentConfig()}) - - -def test_agentic_phase_validate_config_rejects_empty_tasks(): - config = PhaseConfig(agent="writer") - with pytest.raises(FlowConfigError, match="at least one task"): +@pytest.mark.parametrize( + "config,match", + [ + (PhaseConfig(tasks=[TaskConfig(name="t1", prompt="x")]), "requires 'agent'"), + (PhaseConfig(agent="ghost", tasks=[TaskConfig(name="t1", prompt="x")]), "unknown agent"), + (PhaseConfig(agent="writer"), "at least one task"), + ], + ids=["missing_agent", "unknown_agent", "empty_tasks"], +) +def test_validate_config_rejects_invalid(config, match): + with pytest.raises(FlowConfigError, match=match): AgenticPhase.validate_config("p1", config, {"writer": AgentConfig()}) -def test_agentic_phase_validate_config_accepts_valid(): - config = PhaseConfig(agent="writer", tasks=[TaskConfig(name="t1", prompt="x")]) - AgenticPhase.validate_config("p1", config, {"writer": AgentConfig()}) +def test_validate_config_accepts_valid(): + AgenticPhase.validate_config( + "p1", PhaseConfig(agent="writer", tasks=[TaskConfig(name="t1", prompt="x")]), {"writer": AgentConfig()} + ) # --------------------------------------------------------------------------- -# AgenticPhase.process_message — happy path +# process_message — happy path # --------------------------------------------------------------------------- async def test_happy_path_single_task(flow_dir, monkeypatch, message_queue): - responses = [ - make_response("task done", 100, 50), # task 1 via ReActProcess - make_response("summary", 10, 5), # memory step - ] - mock_agent = MockAgent(responses) + mock_agent = MockAgent([make_response("task done", 100, 50), make_response("summary", 10, 5)]) phase, mgr = make_agent_phase(flow_dir, mock_agent, monkeypatch, message_queue) await phase.process_message(PhaseTrigger(id="start", phase_id=None)) assert mgr.memory_content("p1") == "summary" - checkpoint = mgr.read()["p1"] assert checkpoint["status"] == "success" - assert checkpoint["tokens"]["total_input"] == 110 - assert checkpoint["tokens"]["total_output"] == 55 - assert checkpoint["memory_path"] - - assert len(mock_agent.send_calls) == 2 + assert checkpoint["tokens"] == {"total_input": 110, "total_output": 55} assert mock_agent.send_calls[0] == "Do the work." assert "Write a brief summary" in mock_agent.send_calls[1] + # checkpoint memory_path points to the written file + memory_path = Path(checkpoint["memory_path"]) + assert memory_path.is_absolute() and memory_path.exists() and memory_path.name == "p1_memory.md" -async def test_happy_path_two_tasks(flow_dir, monkeypatch, message_queue): - responses = [ - make_response("task1 done", 100, 50), - make_response("task2 done", 200, 80), - make_response("summary", 10, 5), - ] - mock_agent = MockAgent(responses) +async def test_happy_path_two_tasks_accumulates_tokens(flow_dir, monkeypatch, message_queue): + mock_agent = MockAgent( + [ + make_response("t1 done", 100, 50), + make_response("t2 done", 200, 80), + make_response("summary", 10, 5), + ] + ) phase, mgr = make_agent_phase( flow_dir, mock_agent, monkeypatch, message_queue, - tasks=[ - TaskConfig(name="t1", prompt="First task."), - TaskConfig(name="t2", prompt="Second task."), - ], + tasks=[TaskConfig(name="t1", prompt="First."), TaskConfig(name="t2", prompt="Second.")], ) await phase.process_message(PhaseTrigger(id="start", phase_id=None)) - checkpoint = mgr.read()["p1"] - assert checkpoint["tokens"]["total_input"] == 310 - assert checkpoint["tokens"]["total_output"] == 135 - assert checkpoint["memory_path"] + assert mgr.read()["p1"]["tokens"] == {"total_input": 310, "total_output": 135} # --------------------------------------------------------------------------- -# AgenticPhase.process_message — memory step with checkpoint config +# process_message — context compaction # --------------------------------------------------------------------------- -async def test_memory_step_with_checkpoint_config(flow_dir, monkeypatch, message_queue): - responses = [ - make_response("task done", 100, 50), - make_response("summary with files", 10, 5), - ] - mock_agent = MockAgent(responses) - phase, mgr = make_agent_phase( +@pytest.mark.parametrize("context_pct,expect_compact", [(85, True), (50, False)], ids=["above", "below"]) +async def test_compact_between_tasks(flow_dir, monkeypatch, message_queue, context_pct, expect_compact): + mock_agent = MockAgent( + [ + make_response("t1 done", 100, 50, context_pct=context_pct), + make_response("t2 done", 200, 80), + make_response("summary", 10, 5), + ] + ) + phase, _ = make_agent_phase( flow_dir, mock_agent, monkeypatch, message_queue, - checkpoint=CheckpointConfig(memory_prompt="Also list the files."), + tasks=[TaskConfig(name="t1", prompt="First."), TaskConfig(name="t2", prompt="Second.")], ) await phase.process_message(PhaseTrigger(id="start", phase_id=None)) - memory_prompt = mock_agent.send_calls[1] - assert "Also list the files." in memory_prompt - assert "Write a brief summary" in memory_prompt - - -async def test_memory_step_without_checkpoint_config(flow_dir, monkeypatch, message_queue): - responses = [ - make_response("task done", 100, 50), - make_response("summary", 10, 5), - ] - mock_agent = MockAgent(responses) - phase, mgr = make_agent_phase(flow_dir, mock_agent, monkeypatch, message_queue) - - await phase.process_message(PhaseTrigger(id="start", phase_id=None)) - - memory_prompt = mock_agent.send_calls[1] - assert memory_prompt == "Write a brief summary of what you accomplished in this phase." + assert (mock_agent.compact_call_count >= 1) == expect_compact # --------------------------------------------------------------------------- -# AgenticPhase.process_message — context compaction between tasks +# process_message — before_react / after_react hooks # --------------------------------------------------------------------------- -async def test_compact_between_tasks_when_above_threshold(flow_dir, monkeypatch, message_queue): - responses = [ - make_response("task1 done", 100, 50, context_pct=85), # above 80% threshold - make_response("task2 done", 200, 80), - make_response("summary", 10, 5), - ] - mock_agent = MockAgent(responses) - phase, mgr = make_agent_phase( - flow_dir, - mock_agent, - monkeypatch, - message_queue, - tasks=[ - TaskConfig(name="t1", prompt="First task."), - TaskConfig(name="t2", prompt="Second task."), - ], - ) - - await phase.process_message(PhaseTrigger(id="start", phase_id=None)) - - checkpoint = mgr.read()["p1"] - assert checkpoint["status"] == "success" - assert checkpoint["memory_path"] - assert mock_agent.compact_call_count >= 1 - +@pytest.mark.parametrize("hook_name", ["before_react", "after_react"], ids=["before", "after"]) +async def test_react_hook_failure_fails_phase(flow_dir, monkeypatch, message_queue, hook_name): + mock_agent = MockAgent([make_response("done", 100, 50)]) + phase, mgr = make_agent_phase(flow_dir, mock_agent, monkeypatch, message_queue) + setattr(phase, hook_name, lambda: (_ for _ in ()).throw(RuntimeError("hook failed"))) -async def test_no_compact_when_below_threshold(flow_dir, monkeypatch, message_queue): - responses = [ - make_response("task1 done", 100, 50, context_pct=50), # below 80% threshold - make_response("task2 done", 200, 80), - make_response("summary", 10, 5), - ] - mock_agent = MockAgent(responses) - phase, mgr = make_agent_phase( - flow_dir, - mock_agent, - monkeypatch, - message_queue, - tasks=[ - TaskConfig(name="t1", prompt="First task."), - TaskConfig(name="t2", prompt="Second task."), - ], - ) + with pytest.raises(RuntimeError, match="hook failed"): + await phase.process_message(PhaseTrigger(id="start", phase_id=None)) - await phase.process_message(PhaseTrigger(id="start", phase_id=None)) - checkpoint = mgr.read()["p1"] - assert checkpoint["status"] == "success" - assert checkpoint["memory_path"] - assert mock_agent.compact_call_count == 0 + assert mgr.read() == {} # --------------------------------------------------------------------------- -# AgenticPhase.process_message — template context +# process_message — template context # --------------------------------------------------------------------------- -async def test_flow_variables_in_system_prompt(flow_dir, monkeypatch, message_queue): +async def test_flow_variables_rendered_in_system_prompt(flow_dir, monkeypatch, message_queue): (flow_dir / "prompts" / "writer.md").write_text("Project: ${project}") mock_agent = MockAgent([make_response("done", 100, 50), make_response("summary", 10, 5)]) - captured_kwargs: dict = {} + captured: dict = {} phase, _ = make_agent_phase( flow_dir, mock_agent, monkeypatch, message_queue, flow_variables={"project": "myproj"}, - captured_agent_kwargs=captured_kwargs, + captured_agent_kwargs=captured, ) await phase.process_message(PhaseTrigger(id="start", phase_id=None)) - assert captured_kwargs["system_prompt"] == "Project: myproj" + assert captured["system_prompt"] == "Project: myproj" async def test_runtime_variables_override_flow_variables(flow_dir, monkeypatch, message_queue): (flow_dir / "prompts" / "writer.md").write_text("Project: ${project}") mock_agent = MockAgent([make_response("done", 100, 50), make_response("summary", 10, 5)]) - captured_kwargs: dict = {} + captured: dict = {} phase, _ = make_agent_phase( flow_dir, mock_agent, monkeypatch, message_queue, - flow_variables={"project": "flow_default"}, - runtime_variables={"project": "runtime_override"}, - captured_agent_kwargs=captured_kwargs, + flow_variables={"project": "flow"}, + runtime_variables={"project": "runtime"}, + captured_agent_kwargs=captured, ) await phase.process_message(PhaseTrigger(id="start", phase_id=None)) - assert captured_kwargs["system_prompt"] == "Project: runtime_override" - - -# --------------------------------------------------------------------------- -# AgenticPhase.process_message — before_react / after_react errors -# --------------------------------------------------------------------------- - - -async def test_before_react_raises_propagates(flow_dir, monkeypatch, message_queue): - mock_agent = MockAgent([]) - phase, mgr = make_agent_phase(flow_dir, mock_agent, monkeypatch, message_queue) - - def failing_hook(): - raise RuntimeError("setup failed") - - phase.before_react = failing_hook - - with pytest.raises(RuntimeError, match="setup failed"): - await phase.process_message(PhaseTrigger(id="start", phase_id=None)) - - assert mgr.read() == {} - - -async def test_after_react_raises_propagates(flow_dir, monkeypatch, message_queue): - responses = [ - make_response("done", 100, 50), - ] - mock_agent = MockAgent(responses) - phase, mgr = make_agent_phase(flow_dir, mock_agent, monkeypatch, message_queue) - - def failing_hook(): - raise RuntimeError("teardown failed") - - phase.after_react = failing_hook - - with pytest.raises(RuntimeError, match="teardown failed"): - await phase.process_message(PhaseTrigger(id="start", phase_id=None)) - - assert mgr.read() == {} - - -# --------------------------------------------------------------------------- -# AgenticPhase.process_message — resolver integration with memory files -# --------------------------------------------------------------------------- + assert captured["system_prompt"] == "Project: runtime" async def test_task_prompt_resolves_memory_variable(flow_dir, monkeypatch, message_queue): @@ -344,7 +239,6 @@ async def test_task_prompt_resolves_memory_variable(flow_dir, monkeypatch, messa phase_id="review", tasks=[TaskConfig(name="t1", prompt="Review: ${draft_memory}")], ) - mgr.write_phase_checkpoint("draft", {"status": "success"}) mgr.write_memory("draft", "Created file.py") await phase.process_message(PhaseTrigger(id="start", phase_id=None)) @@ -353,14 +247,15 @@ async def test_task_prompt_resolves_memory_variable(flow_dir, monkeypatch, messa # --------------------------------------------------------------------------- -# AgenticPhase.process_message — memory step failure behaviour +# process_message — failure modes # --------------------------------------------------------------------------- async def test_memory_api_failure_fails_phase(flow_dir, monkeypatch, message_queue): - responses = [make_response("task done", 100, 50)] - mock_agent = MockAgent(responses) - phase, mgr = make_agent_phase(flow_dir, mock_agent, monkeypatch, message_queue) + # Only one response — IndexError when memory step tries to call agent again + phase, mgr = make_agent_phase( + flow_dir, MockAgent([make_response("task done", 100, 50)]), monkeypatch, message_queue + ) with pytest.raises(IndexError): await phase.process_message(PhaseTrigger(id="start", phase_id=None)) @@ -368,45 +263,37 @@ async def test_memory_api_failure_fails_phase(flow_dir, monkeypatch, message_que assert mgr.read() == {} -async def test_memory_template_error_fails_phase(flow_dir, monkeypatch, message_queue): - responses = [make_response("task done", 100, 50)] - mock_agent = MockAgent(responses) +async def test_memory_template_render_failure_fails_phase(flow_dir, monkeypatch, message_queue): phase, mgr = make_agent_phase( flow_dir, - mock_agent, + MockAgent([make_response("task done", 100, 50)]), monkeypatch, message_queue, checkpoint=CheckpointConfig(memory_prompt="Summarize."), ) + monkeypatch.setattr( + "ddev.ai.phases.agentic_phase.render_memory_prompt", + lambda *a, **kw: (_ for _ in ()).throw(ValueError("bad template")), + ) - def raise_render_error(*args, **kwargs): - raise ValueError("template error") - - monkeypatch.setattr("ddev.ai.phases.agentic_phase.render_memory_prompt", raise_render_error) - - with pytest.raises(ValueError, match="template error"): + with pytest.raises(ValueError, match="bad template"): await phase.process_message(PhaseTrigger(id="start", phase_id=None)) assert mgr.read() == {} -async def test_successful_phase_writes_memory_path_into_checkpoint(flow_dir, monkeypatch, message_queue): - responses = [ - make_response("task done", 100, 50), - make_response("summary text", 10, 5), - ] - mock_agent = MockAgent(responses) +async def test_disk_failure_on_write_memory_fails_phase(flow_dir, monkeypatch, message_queue): + mock_agent = MockAgent([make_response("task done", 100, 50), make_response("summary", 10, 5)]) phase, mgr = make_agent_phase(flow_dir, mock_agent, monkeypatch, message_queue) + monkeypatch.setattr( + "ddev.ai.phases.checkpoint.CheckpointManager.write_memory", + lambda *a, **kw: (_ for _ in ()).throw(PermissionError("read-only")), + ) - await phase.process_message(PhaseTrigger(id="start", phase_id=None)) + with pytest.raises(PermissionError, match="read-only"): + await phase.process_message(PhaseTrigger(id="start", phase_id=None)) - checkpoint = mgr.read()["p1"] - assert "memory_path" in checkpoint - memory_path = Path(checkpoint["memory_path"]) - assert memory_path.is_absolute() - assert memory_path.exists() - assert memory_path.name == "p1_memory.md" - assert memory_path.read_text() == "summary text" + assert mgr.read() == {} # --------------------------------------------------------------------------- @@ -415,19 +302,15 @@ async def test_successful_phase_writes_memory_path_into_checkpoint(flow_dir, mon @pytest.mark.parametrize( - "checkpoint, expected_build_arg", - [ - (None, None), - (CheckpointConfig(memory_prompt="anything"), "USER_ADDITIONS"), - ], + "checkpoint,expected_user_additions", + [(None, None), (CheckpointConfig(memory_prompt="anything"), "USER_ADDITIONS")], ids=["no_checkpoint", "with_checkpoint"], ) -async def test_run_memory_step_forwards_user_additions_to_build( - flow_dir, monkeypatch, message_queue, checkpoint, expected_build_arg +async def test_run_memory_step_passes_user_additions_to_build( + flow_dir, monkeypatch, message_queue, checkpoint, expected_user_additions ): mock_agent = MockAgent([make_response("ok", 0, 0)]) phase, mgr = make_agent_phase(flow_dir, mock_agent, monkeypatch, message_queue, checkpoint=checkpoint) - monkeypatch.setattr("ddev.ai.phases.agentic_phase.render_memory_prompt", lambda *a, **kw: "USER_ADDITIONS") build_calls: list = [] monkeypatch.setattr( @@ -436,7 +319,7 @@ async def test_run_memory_step_forwards_user_additions_to_build( await phase._run_memory_step(mock_agent, {}) - assert build_calls == [expected_build_arg] + assert build_calls == [expected_user_additions] async def test_run_memory_step_sends_built_prompt_with_no_tools(flow_dir, monkeypatch, message_queue): @@ -444,13 +327,12 @@ async def test_run_memory_step_sends_built_prompt_with_no_tools(flow_dir, monkey class CapturingAgent(MockAgent): async def send(self, content, allowed_tools=None): - captured["content"] = content - captured["allowed_tools"] = allowed_tools + captured.update({"content": content, "allowed_tools": allowed_tools}) return await super().send(content, allowed_tools) agent = CapturingAgent([make_response("ok", 0, 0)]) phase, mgr = make_agent_phase(flow_dir, agent, monkeypatch, message_queue) - monkeypatch.setattr(mgr, "build_memory_prompt", lambda user_additions: "BUILT") + monkeypatch.setattr(mgr, "build_memory_prompt", lambda _: "BUILT") await phase._run_memory_step(agent, {}) @@ -479,24 +361,74 @@ async def _response(response, iteration): # --------------------------------------------------------------------------- -# AgenticPhase.process_message — disk failure regression +# AgenticPhase with spawn_subagent — wiring smoke test # --------------------------------------------------------------------------- -async def test_write_memory_disk_failure_fails_phase(flow_dir, monkeypatch, message_queue): - responses = [ - make_response("task done", 100, 50), - make_response("summary text", 10, 5), - ] - mock_agent = MockAgent(responses) - phase, mgr = make_agent_phase(flow_dir, mock_agent, monkeypatch, message_queue) +async def test_spawn_subagent_wiring(flow_dir, message_queue): + """Phase correctly passes subagent_builder + log_dir to the agent builder at execute time.""" - def raise_permission_error(*args, **kwargs): - raise PermissionError("disk is read-only") + def make_usage() -> TokenUsage: + return TokenUsage(input_tokens=100, output_tokens=50, cache_read_input_tokens=0, cache_creation_input_tokens=0) - monkeypatch.setattr("ddev.ai.phases.checkpoint.CheckpointManager.write_memory", raise_permission_error) + spawn_call = ToolCall( + id="tc1", + name="spawn_subagent", + input={"system_prompt": "you are a helper", "prompt": "answer 42", "tools": [], "name": "child"}, + ) + parent_agent = MockAgent( + [ + AgentResponse(stop_reason=StopReason.TOOL_USE, text="", tool_calls=[spawn_call], usage=make_usage()), + AgentResponse(stop_reason=StopReason.END_TURN, text="parent done", tool_calls=[], usage=make_usage()), + AgentResponse(stop_reason=StopReason.END_TURN, text="memory summary", tool_calls=[], usage=make_usage()), + ] + ) - with pytest.raises(PermissionError, match="disk is read-only"): - await phase.process_message(PhaseTrigger(id="start", phase_id=None)) + subagent_calls: list = [] + + def mock_subagent_builder(system_prompt: str, owner_id: str, tool_names: list[str]): + subagent_calls.append(system_prompt) + return MockAgent( + [AgentResponse(stop_reason=StopReason.END_TURN, text="42", tool_calls=[], usage=make_usage())] + ), ToolRegistry([]) + + from ddev.ai.tools.agents.spawn_subagent import SpawnSubagentTool + + def agent_builder_fn(system_prompt: str, owner_id: str, subagent_builder=None, log_dir=None): + parent_agent.name = owner_id + return parent_agent, ToolRegistry( + [ + SpawnSubagentTool( + owner_id=owner_id, + subagent_builder=subagent_builder, + allowed_tools=[], + log_dir=log_dir, + ) + ] + ) + + checkpoint_manager = CheckpointManager(flow_dir / "checkpoints.yaml") + phase = AgenticPhase( + phase_id="p1", + dependencies=[], + config=PhaseConfig(agent="writer", tasks=[TaskConfig(name="t1", prompt="Do the work.")]), + agent_builder=agent_builder_fn, + checkpoint_manager=checkpoint_manager, + runtime_variables={}, + flow_variables={}, + config_dir=flow_dir, + file_registry=FileRegistry(policy=FileAccessPolicy(write_root=flow_dir)), + subagent_builder=mock_subagent_builder, + ) + phase.queue = message_queue - assert mgr.read() == {} + await phase.process_message(PhaseTrigger(id="start", phase_id=None)) + + submitted = [message_queue.get_nowait() for _ in range(message_queue.qsize())] + assert not any(isinstance(m, PhaseFailedMessage) for m in submitted) + assert subagent_calls == ["you are a helper"] + + log_file = checkpoint_manager.root / "subagents" / "p1" / "001-child.jsonl" + assert log_file.exists() + events = {e["event"] for e in read_jsonl(log_file)} + assert {"start", "finish"} <= events diff --git a/ddev/tests/ai/tools/agents/__init__.py b/ddev/tests/ai/tools/agents/__init__.py new file mode 100644 index 0000000000000..75c6647cb9233 --- /dev/null +++ b/ddev/tests/ai/tools/agents/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/ddev/tests/ai/tools/agents/test_spawn_subagent.py b/ddev/tests/ai/tools/agents/test_spawn_subagent.py new file mode 100644 index 0000000000000..95c7c3e2d581c --- /dev/null +++ b/ddev/tests/ai/tools/agents/test_spawn_subagent.py @@ -0,0 +1,310 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import asyncio +import json +from pathlib import Path + +import pytest + +from ddev.ai.agent.exceptions import AgentError +from ddev.ai.agent.types import AgentResponse, StopReason, TokenUsage, ToolCall, ToolResultMessage +from ddev.ai.tools.agents.spawn_subagent import SpawnSubagentInput, SpawnSubagentTool +from ddev.ai.tools.core.types import ToolResult + +# --------------------------------------------------------------------------- +# Mock helpers +# --------------------------------------------------------------------------- + + +class MockAgent: + def __init__(self, responses: list[AgentResponse]) -> None: + self._responses = list(responses) + self._index = 0 + self.name = "mock" + self._history: list = [] + + async def send( + self, content: str | list[ToolResultMessage], allowed_tools: list[str] | None = None + ) -> AgentResponse: + response = self._responses[self._index] + self._index += 1 + return response + + def reset(self) -> None: + self._history = [] + + async def compact(self) -> AgentResponse | None: + return None + + async def compact_preserving_last_turn(self) -> AgentResponse | None: + return None + + +class _RaisingAgent: + """Raises a fixed exception on every send() call.""" + + def __init__(self, exc: BaseException) -> None: + self._exc = exc + self.name = "raising" + self._history: list = [] + + async def send(self, content, allowed_tools=None) -> AgentResponse: + raise self._exc + + def reset(self) -> None: + self._history = [] + + async def compact(self) -> AgentResponse | None: + return None + + async def compact_preserving_last_turn(self) -> AgentResponse | None: + return None + + +class MockToolRegistry: + def __init__(self, result: ToolResult | None = None) -> None: + self._result = result or ToolResult(success=True, data="ok") + + @property + def definitions(self) -> list: + return [] + + async def run(self, name: str, raw: dict) -> ToolResult: + return self._result + + +def make_response( + text: str = "", + stop_reason: StopReason = StopReason.END_TURN, + tool_calls: list[ToolCall] | None = None, +) -> AgentResponse: + return AgentResponse( + stop_reason=stop_reason, + text=text, + tool_calls=tool_calls or [], + usage=TokenUsage(input_tokens=10, output_tokens=5, cache_read_input_tokens=0, cache_creation_input_tokens=0), + ) + + +def make_builder(responses: list[AgentResponse], tool_result: ToolResult | None = None): + """Return a builder closure that replays fixed responses.""" + tr = tool_result or ToolResult(success=True, data="ok") + + def builder(system_prompt: str, owner_id: str, tool_names: list[str]): + return MockAgent(list(responses)), MockToolRegistry(tr) + + return builder + + +def make_tool( + log_dir: Path, builder, allowed_tools: list[str] | None = None, owner_id: str = "parent" +) -> SpawnSubagentTool: + return SpawnSubagentTool( + owner_id=owner_id, + subagent_builder=builder, + allowed_tools=allowed_tools if allowed_tools is not None else ["read_file", "edit_file"], + log_dir=log_dir, + ) + + +def read_events(log_path: Path) -> list[dict]: + return [json.loads(line) for line in log_path.read_text(encoding="utf-8").splitlines() if line.strip()] + + +# --------------------------------------------------------------------------- +# Input validation — fails before any log file is opened +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "tools,allowed,error_fragment", + [ + (["spawn_subagent"], ["read_file"], "spawn further subagents"), + (["read_file", "edit_file"], ["read_file"], "edit_file"), + ], + ids=["recursive", "disallowed"], +) +async def test_input_validation_fails_before_logging(tmp_path, tools, allowed, error_fragment): + tool = make_tool(tmp_path, make_builder([make_response()]), allowed_tools=allowed) + result = await tool(SpawnSubagentInput(system_prompt="s", prompt="p", tools=tools, name="x")) + + assert result.success is False + assert error_fragment in result.error + assert "x" in result.error + assert list(tmp_path.glob("*.jsonl")) == [] + assert tool._counter == 0 + + +# --------------------------------------------------------------------------- +# mkdir failure — after validation, before counter advances +# --------------------------------------------------------------------------- + + +async def test_mkdir_failure(tmp_path): + blocker = tmp_path / "blocked" + blocker.write_text("I am a file") + log_dir = blocker / "subagents" + + tool = make_tool(log_dir, make_builder([make_response()])) + result = await tool(SpawnSubagentInput(system_prompt="s", prompt="p", tools=[], name="x")) + + assert result.success is False + assert "x" in result.error + assert str(log_dir) in result.error + assert tool._counter == 0 + + +# --------------------------------------------------------------------------- +# Happy path +# --------------------------------------------------------------------------- + + +async def test_happy_path(tmp_path): + tool = make_tool(tmp_path, make_builder([make_response(text="ok")])) + result = await tool(SpawnSubagentInput(system_prompt="sys", prompt="do it", tools=[], name="worker")) + + assert result.success is True + assert result.data == "ok" + + events = read_events(tmp_path / "001-worker.jsonl") + assert events[0]["event"] == "start" + assert events[-1]["event"] == "finish" + assert events[-1]["success"] is True + + +async def test_multi_iteration_wires_callbacks(tmp_path): + """Proves FileLogger callbacks are wired: a subagent tool call produces a tool_call log event.""" + tool_call = ToolCall(id="tc1", name="read_file", input={"path": "/f"}) + tool = make_tool( + tmp_path, + make_builder( + [make_response(stop_reason=StopReason.TOOL_USE, tool_calls=[tool_call]), make_response(text="done")], + tool_result=ToolResult(success=True, data="content"), + ), + allowed_tools=["read_file"], + ) + + result = await tool(SpawnSubagentInput(system_prompt="sys", prompt="go", tools=["read_file"])) + + assert result.success is True + assert result.data == "done" + assert "tool_call" in [e["event"] for e in read_events(tmp_path / "001-unnamed.jsonl")] + + +async def test_max_tokens_response_prefixed(tmp_path): + tool = make_tool(tmp_path, make_builder([make_response(text="partial", stop_reason=StopReason.MAX_TOKENS)])) + result = await tool(SpawnSubagentInput(system_prompt="s", prompt="p", tools=[], name="mt")) + + assert result.success is True + assert result.data.startswith("[SUBAGENT HIT MAX_TOKENS — RESPONSE MAY BE TRUNCATED]") + assert "partial" in result.data + + finish = next(e for e in read_events(tmp_path / "001-mt.jsonl") if e["event"] == "finish") + assert finish["stop_reason"] == "max_tokens" + + +# --------------------------------------------------------------------------- +# Failure paths +# --------------------------------------------------------------------------- + + +async def test_builder_failure(tmp_path): + def failing_builder(sp, oid, tns): + raise ValueError("boom") + + tool = SpawnSubagentTool(owner_id="parent", subagent_builder=failing_builder, allowed_tools=[], log_dir=tmp_path) + result = await tool(SpawnSubagentInput(system_prompt="s", prompt="p", tools=[], name="fail")) + + assert result.success is False + assert "fail" in result.error and "ValueError" in result.error and "boom" in result.error + + events = read_events(tmp_path / "001-fail.jsonl") + assert [e["event"] for e in events] == ["start", "finish"] + assert events[-1]["success"] is False + + +async def test_react_process_failure(tmp_path): + def builder(sp, oid, tns): + return _RaisingAgent(AgentError("rate limit")), MockToolRegistry() + + tool = make_tool(tmp_path, builder) + result = await tool(SpawnSubagentInput(system_prompt="s", prompt="p", tools=[], name="rl")) + + assert result.success is False + assert "rl" in result.error and "AgentError" in result.error + + names = [e["event"] for e in read_events(tmp_path / "001-rl.jsonl")] + assert "error" in names and "finish" in names + assert names.index("error") < names.index("finish") + assert next(e for e in read_events(tmp_path / "001-rl.jsonl") if e["event"] == "finish")["success"] is False + + +async def test_finally_close_runs_on_base_exception(tmp_path): + """KeyboardInterrupt propagates but logger.close() still runs via finally.""" + + def builder(sp, oid, tns): + return _RaisingAgent(KeyboardInterrupt()), MockToolRegistry() + + tool = make_tool(tmp_path, builder) + + with pytest.raises(KeyboardInterrupt): + await tool(SpawnSubagentInput(system_prompt="s", prompt="p", tools=[], name="ki")) + + names = [e["event"] for e in read_events(tmp_path / "001-ki.jsonl")] + assert "error" in names + assert "finish" not in names + + +# --------------------------------------------------------------------------- +# Counter and log file naming +# --------------------------------------------------------------------------- + + +async def test_counter_increments_per_invocation(tmp_path): + tool = make_tool(tmp_path, make_builder([make_response(text="r1"), make_response(text="r2")])) + + await tool(SpawnSubagentInput(system_prompt="s", prompt="p", tools=[], name="a")) + await tool(SpawnSubagentInput(system_prompt="s", prompt="p", tools=[], name="b")) + + assert (tmp_path / "001-a.jsonl").exists() + assert (tmp_path / "002-b.jsonl").exists() + + +async def test_parallel_spawns_get_distinct_counters(tmp_path): + owner_ids: list[str] = [] + + def recording_builder(sp: str, owner_id: str, tns: list[str]): + owner_ids.append(owner_id) + return MockAgent([make_response(text="ok")]), MockToolRegistry() + + tool = SpawnSubagentTool(owner_id="parent", subagent_builder=recording_builder, allowed_tools=[], log_dir=tmp_path) + results = await asyncio.gather( + *[tool(SpawnSubagentInput(system_prompt="s", prompt="p", tools=[], name=n)) for n in ["x", "y", "z"]] + ) + + assert all(r.success for r in results) + assert len(list(tmp_path.glob("*.jsonl"))) == 3 + assert len(set(owner_ids)) == 3 + + +# --------------------------------------------------------------------------- +# Pydantic input validation (via BaseTool.run) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "raw", + [ + {"prompt": "x"}, + {"system_prompt": "s", "prompt": "p", "tools": [], "bad_field": True}, + ], + ids=["missing_field", "extra_field"], +) +async def test_pydantic_rejects_invalid_input(raw): + tool = SpawnSubagentTool( + owner_id="p", subagent_builder=lambda *a: (None, None), allowed_tools=[], log_dir=Path("/tmp") + ) + result = await tool.run(raw) + assert result.success is False diff --git a/ddev/tests/ai/tools/test_registry.py b/ddev/tests/ai/tools/test_registry.py index e307773f5b7b6..a8e3f4f40ea3e 100644 --- a/ddev/tests/ai/tools/test_registry.py +++ b/ddev/tests/ai/tools/test_registry.py @@ -153,6 +153,7 @@ def test_available_tool_names_returns_fresh_copy(): OWNER_ID = "test-agent" +TOOLS_WITHOUT_EXTRA_DEPS = [n for n in ToolRegistry.available_tool_names() if n != "spawn_subagent"] def test_from_names_empty(tmp_path): @@ -169,7 +170,7 @@ def test_from_names_unknown_raises(tmp_path): ) -@pytest.mark.parametrize("name", ToolRegistry.available_tool_names()) +@pytest.mark.parametrize("name", TOOLS_WITHOUT_EXTRA_DEPS) def test_from_names_each_known_tool(name, tmp_path): registry = ToolRegistry.from_names( [name], owner_id=OWNER_ID, file_registry=FileRegistry(policy=FileAccessPolicy(write_root=tmp_path)) @@ -179,7 +180,7 @@ def test_from_names_each_known_tool(name, tmp_path): def test_from_names_all_at_once(tmp_path): - all_names = ToolRegistry.available_tool_names() + all_names = TOOLS_WITHOUT_EXTRA_DEPS registry = ToolRegistry.from_names( all_names, owner_id=OWNER_ID, file_registry=FileRegistry(policy=FileAccessPolicy(write_root=tmp_path)) ) @@ -187,9 +188,18 @@ def test_from_names_all_at_once(tmp_path): assert built_names == set(all_names) +def test_from_names_spawn_subagent_without_deps_raises(tmp_path): + with pytest.raises(ValueError, match="requires both 'subagent_builder' and 'log_dir'"): + ToolRegistry.from_names( + ["spawn_subagent"], + owner_id=OWNER_ID, + file_registry=FileRegistry(policy=FileAccessPolicy(write_root=tmp_path)), + ) + + def test_from_names_fs_tools_share_file_registry(tmp_path): """All tools that use the file registry in the same ToolRegistry share a single instance.""" - all_names = ToolRegistry.available_tool_names() + all_names = TOOLS_WITHOUT_EXTRA_DEPS registry = ToolRegistry.from_names( all_names, owner_id=OWNER_ID, file_registry=FileRegistry(policy=FileAccessPolicy(write_root=tmp_path)) ) From ffbe46fed59e523a3e2f4a426344d897c0e43c81 Mon Sep 17 00:00:00 2001 From: Luis Orofino Date: Fri, 22 May 2026 11:01:01 +0200 Subject: [PATCH 2/8] Move Filelogger to tools/agents/ and rename it to Agentlogger --- .../agents/agent_logger.py} | 2 +- ddev/src/ddev/ai/tools/agents/spawn_subagent.py | 4 ++-- .../agents/test_agent_logger.py} | 14 +++++++------- ddev/tests/ai/tools/agents/test_spawn_subagent.py | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) rename ddev/src/ddev/ai/{callbacks/file_logger.py => tools/agents/agent_logger.py} (99%) rename ddev/tests/ai/{callbacks/test_file_logger.py => tools/agents/test_agent_logger.py} (92%) diff --git a/ddev/src/ddev/ai/callbacks/file_logger.py b/ddev/src/ddev/ai/tools/agents/agent_logger.py similarity index 99% rename from ddev/src/ddev/ai/callbacks/file_logger.py rename to ddev/src/ddev/ai/tools/agents/agent_logger.py index 2e58e4057e915..14a20a43d567a 100644 --- a/ddev/src/ddev/ai/callbacks/file_logger.py +++ b/ddev/src/ddev/ai/tools/agents/agent_logger.py @@ -12,7 +12,7 @@ from ddev.ai.tools.core.types import ToolResult -class FileLogger: +class AgentLogger: """Append-only JSONL writer for ReAct events plus subagent start/finish bookkeeping. Owns the file handle. Call build_callbacks() to obtain a Callbacks object whose diff --git a/ddev/src/ddev/ai/tools/agents/spawn_subagent.py b/ddev/src/ddev/ai/tools/agents/spawn_subagent.py index ec9b03f66cd85..ce714d1350013 100644 --- a/ddev/src/ddev/ai/tools/agents/spawn_subagent.py +++ b/ddev/src/ddev/ai/tools/agents/spawn_subagent.py @@ -9,8 +9,8 @@ from ddev.ai.agent.build import SubagentBuilder from ddev.ai.agent.types import StopReason -from ddev.ai.callbacks.file_logger import FileLogger from ddev.ai.react.process import ReActProcess +from ddev.ai.tools.agents.agent_logger import AgentLogger from ddev.ai.tools.core.base import BaseTool, BaseToolInput from ddev.ai.tools.core.types import ToolResult @@ -105,7 +105,7 @@ async def __call__(self, tool_input: SpawnSubagentInput) -> ToolResult: subagent_id = f"{self._owner_id}.sub.{self._counter:03d}-{label}" log_path = self._log_dir / f"{self._counter:03d}-{label}.jsonl" - logger = FileLogger(log_path) + logger = AgentLogger(log_path) try: logger.log_start( system_prompt=tool_input.system_prompt, diff --git a/ddev/tests/ai/callbacks/test_file_logger.py b/ddev/tests/ai/tools/agents/test_agent_logger.py similarity index 92% rename from ddev/tests/ai/callbacks/test_file_logger.py rename to ddev/tests/ai/tools/agents/test_agent_logger.py index 1b4f52bf251cb..808e9a84e44da 100644 --- a/ddev/tests/ai/callbacks/test_file_logger.py +++ b/ddev/tests/ai/tools/agents/test_agent_logger.py @@ -7,7 +7,7 @@ import pytest from ddev.ai.agent.types import AgentResponse, StopReason, TokenUsage, ToolCall -from ddev.ai.callbacks.file_logger import FileLogger +from ddev.ai.tools.agents.agent_logger import AgentLogger from ddev.ai.tools.core.types import ToolResult @@ -31,7 +31,7 @@ def read_events(log_path) -> list[dict]: def test_log_entries_are_valid_jsonl_with_timestamp(tmp_path): log_path = tmp_path / "log.jsonl" - logger = FileLogger(log_path) + logger = AgentLogger(log_path) logger.log_start(system_prompt="sys", prompt="go", tools=["read_file"]) logger.log_finish(success=True, iterations=1) logger.close() @@ -45,7 +45,7 @@ def test_log_entries_are_valid_jsonl_with_timestamp(tmp_path): def test_flush_after_each_write(tmp_path): log_path = tmp_path / "log.jsonl" - logger = FileLogger(log_path) + logger = AgentLogger(log_path) logger.log_start(system_prompt="s", prompt="p", tools=[]) # A second file handle reads the line without closing the logger first assert log_path.read_text(encoding="utf-8").strip() != "" @@ -54,7 +54,7 @@ def test_flush_after_each_write(tmp_path): def test_close_is_idempotent_and_prevents_further_writes(tmp_path): log_path = tmp_path / "log.jsonl" - logger = FileLogger(log_path) + logger = AgentLogger(log_path) logger.log_start(system_prompt="s", prompt="p", tools=[]) logger.close() logger.close() # must not raise @@ -64,12 +64,12 @@ def test_close_is_idempotent_and_prevents_further_writes(tmp_path): def test_constructor_requires_existing_parent(tmp_path): with pytest.raises(OSError): - FileLogger(tmp_path / "doesnotexist" / "log.jsonl") + AgentLogger(tmp_path / "doesnotexist" / "log.jsonl") def test_non_serializable_values_use_str_repr(tmp_path): log_path = tmp_path / "log.jsonl" - logger = FileLogger(log_path) + logger = AgentLogger(log_path) class Unserializable: def __repr__(self): @@ -88,7 +88,7 @@ def __repr__(self): async def test_build_callbacks_fires_all_event_types(tmp_path): log_path = tmp_path / "log.jsonl" - logger = FileLogger(log_path) + logger = AgentLogger(log_path) callbacks = logger.build_callbacks() tool_call = ToolCall(id="tc1", name="read_file", input={"path": "/f"}) diff --git a/ddev/tests/ai/tools/agents/test_spawn_subagent.py b/ddev/tests/ai/tools/agents/test_spawn_subagent.py index 95c7c3e2d581c..f2fbca64f6d45 100644 --- a/ddev/tests/ai/tools/agents/test_spawn_subagent.py +++ b/ddev/tests/ai/tools/agents/test_spawn_subagent.py @@ -175,7 +175,7 @@ async def test_happy_path(tmp_path): async def test_multi_iteration_wires_callbacks(tmp_path): - """Proves FileLogger callbacks are wired: a subagent tool call produces a tool_call log event.""" + """Proves AgentLogger callbacks are wired: a subagent tool call produces a tool_call log event.""" tool_call = ToolCall(id="tc1", name="read_file", input={"path": "/f"}) tool = make_tool( tmp_path, From 2423dd19e22390168d16e85e6ccc1219f6f90d58 Mon Sep 17 00:00:00 2001 From: Luis Orofino Date: Fri, 22 May 2026 12:29:39 +0200 Subject: [PATCH 3/8] Few bugs fixed --- ddev/src/ddev/ai/phases/agentic_phase.py | 23 +++-- .../ddev/ai/tools/agents/spawn_subagent.py | 1 + ddev/src/ddev/ai/tools/registry.py | 9 +- ddev/tests/ai/phases/test_agentic_phase.py | 34 ++++++- .../ai/tools/agents/test_agent_logger.py | 22 ++++- .../ai/tools/agents/test_spawn_subagent.py | 90 ++++++++++++++----- 6 files changed, 145 insertions(+), 34 deletions(-) diff --git a/ddev/src/ddev/ai/phases/agentic_phase.py b/ddev/src/ddev/ai/phases/agentic_phase.py index 25431f7a44c20..ab98e38722b45 100644 --- a/ddev/src/ddev/ai/phases/agentic_phase.py +++ b/ddev/src/ddev/ai/phases/agentic_phase.py @@ -16,6 +16,7 @@ from ddev.ai.phases.template import render_inline, render_prompt from ddev.ai.react.process import ReActProcess from ddev.ai.tools.fs.file_registry import FileRegistry +from ddev.ai.tools.registry import TOOL_MANIFEST def render_task_prompt( @@ -77,6 +78,9 @@ def __init__( ) self._agent_builder = agent_builder self._subagent_builder = subagent_builder + self._subagent_log_dir = ( + checkpoint_manager.root / "subagents" / phase_id if subagent_builder is not None else None + ) @classmethod def validate_config( @@ -108,8 +112,12 @@ def extra_init_kwargs( # type: ignore[override] agent_config = agents[phase_config.agent] subagent_builder = None - # TODO: generalize this dispatch if more agent-meta tools appear in tools/agents/. - if "spawn_subagent" in agent_config.tools: + requires_subagent_builder = any( + spec.requires_subagent_builder + for name in agent_config.tools + if (spec := TOOL_MANIFEST.get(name)) is not None + ) + if requires_subagent_builder: subagent_builder = make_subagent_builder( parent_agent_config=agent_config, agent_clients=agent_clients, @@ -162,11 +170,12 @@ def _build_agent_and_process(self, context: dict[str, Any]) -> tuple[BaseAgent[A context, self._resolver, ) - log_dir = None - if self._subagent_builder is not None: - log_dir = self._checkpoint_manager.root / "subagents" / self._phase_id - - agent, tool_registry = self._agent_builder(system_prompt, self._phase_id, self._subagent_builder, log_dir) + agent, tool_registry = self._agent_builder( + system_prompt, + self._phase_id, + self._subagent_builder, + self._subagent_log_dir, + ) process = ReActProcess( agent=agent, tool_registry=tool_registry, diff --git a/ddev/src/ddev/ai/tools/agents/spawn_subagent.py b/ddev/src/ddev/ai/tools/agents/spawn_subagent.py index ce714d1350013..2e126da14ee4d 100644 --- a/ddev/src/ddev/ai/tools/agents/spawn_subagent.py +++ b/ddev/src/ddev/ai/tools/agents/spawn_subagent.py @@ -38,6 +38,7 @@ class SpawnSubagentInput(BaseToolInput): str | None, Field( description=("Optional short human-readable name for the subagent."), + pattern=r"^$|^[A-Za-z0-9._-]{1,64}$", ), ] = None diff --git a/ddev/src/ddev/ai/tools/registry.py b/ddev/src/ddev/ai/tools/registry.py index 828694eed6c11..48b99e8f065da 100644 --- a/ddev/src/ddev/ai/tools/registry.py +++ b/ddev/src/ddev/ai/tools/registry.py @@ -70,11 +70,13 @@ class ToolSpec: ``module`` is relative to the registry's package (e.g. ``"fs.read_file"``). ``factory`` receives the already-imported class and the shared ToolContext and returns a constructed tool instance. + ``requires_subagent_builder`` marks agentic tools that need subagent wiring. """ module: str cls: str factory: Callable[[type, ToolContext], ToolProtocol] = _plain_factory + requires_subagent_builder: bool = False TOOL_MANIFEST: dict[str, ToolSpec] = { @@ -94,7 +96,12 @@ class ToolSpec: "ddev_env_test": ToolSpec("shell.ddev.env_test", "DdevEnvTestTool"), "ddev_release_changelog": ToolSpec("shell.ddev.release_changelog", "DdevReleaseChangelogTool"), "ddev_validate": ToolSpec("shell.ddev.validate", "DdevValidateTool"), - "spawn_subagent": ToolSpec("agents.spawn_subagent", "SpawnSubagentTool", factory=_spawn_subagent_factory), + "spawn_subagent": ToolSpec( + "agents.spawn_subagent", + "SpawnSubagentTool", + factory=_spawn_subagent_factory, + requires_subagent_builder=True, + ), } diff --git a/ddev/tests/ai/phases/test_agentic_phase.py b/ddev/tests/ai/phases/test_agentic_phase.py index bc333f965aee9..36d4701b5b353 100644 --- a/ddev/tests/ai/phases/test_agentic_phase.py +++ b/ddev/tests/ai/phases/test_agentic_phase.py @@ -365,6 +365,27 @@ async def _response(response, iteration): # --------------------------------------------------------------------------- +@pytest.mark.parametrize( + ("tools", "expected"), + [(["spawn_subagent"], True), (["read_file"], False), ([], False)], + ids=["spawn", "regular_tool", "no_tools"], +) +def test_extra_init_kwargs_creates_subagent_builder_from_tool_metadata( + flow_dir: Path, + tools: list[str], + expected: bool, +) -> None: + kwargs = AgenticPhase.extra_init_kwargs( + phase_id="p1", + phase_config=PhaseConfig(agent="writer", tasks=[TaskConfig(name="t1", prompt="Do the work.")]), + agents={"writer": AgentConfig(tools=tools)}, + agent_clients={}, + file_registry=FileRegistry(policy=FileAccessPolicy(write_root=flow_dir)), + ) + + assert (kwargs["subagent_builder"] is not None) is expected + + async def test_spawn_subagent_wiring(flow_dir, message_queue): """Phase correctly passes subagent_builder + log_dir to the agent builder at execute time.""" @@ -394,7 +415,17 @@ def mock_subagent_builder(system_prompt: str, owner_id: str, tool_names: list[st from ddev.ai.tools.agents.spawn_subagent import SpawnSubagentTool - def agent_builder_fn(system_prompt: str, owner_id: str, subagent_builder=None, log_dir=None): + captured_log_dirs: list[Path | None] = [] + + def agent_builder_fn( + system_prompt: str, + owner_id: str, + subagent_builder=None, + log_dir: Path | None = None, + ): + captured_log_dirs.append(log_dir) + assert subagent_builder is not None + assert log_dir is not None parent_agent.name = owner_id return parent_agent, ToolRegistry( [ @@ -427,6 +458,7 @@ def agent_builder_fn(system_prompt: str, owner_id: str, subagent_builder=None, l submitted = [message_queue.get_nowait() for _ in range(message_queue.qsize())] assert not any(isinstance(m, PhaseFailedMessage) for m in submitted) assert subagent_calls == ["you are a helper"] + assert captured_log_dirs == [checkpoint_manager.root / "subagents" / "p1"] log_file = checkpoint_manager.root / "subagents" / "p1" / "001-child.jsonl" assert log_file.exists() diff --git a/ddev/tests/ai/tools/agents/test_agent_logger.py b/ddev/tests/ai/tools/agents/test_agent_logger.py index 808e9a84e44da..7cbd58767c859 100644 --- a/ddev/tests/ai/tools/agents/test_agent_logger.py +++ b/ddev/tests/ai/tools/agents/test_agent_logger.py @@ -52,7 +52,7 @@ def test_flush_after_each_write(tmp_path): logger.close() -def test_close_is_idempotent_and_prevents_further_writes(tmp_path): +def test_close_is_idempotent_and_prevents_further_writes(tmp_path, caplog): log_path = tmp_path / "log.jsonl" logger = AgentLogger(log_path) logger.log_start(system_prompt="s", prompt="p", tools=[]) @@ -60,6 +60,26 @@ def test_close_is_idempotent_and_prevents_further_writes(tmp_path): logger.close() # must not raise logger.log_finish(success=False) # must not write assert len(read_events(log_path)) == 1 + assert "dropping event 'finish'" in caplog.text + + +def test_reopening_same_path_appends_start_run_delimiter(tmp_path): + log_path = tmp_path / "log.jsonl" + + logger = AgentLogger(log_path) + logger.log_start(system_prompt="s", prompt="first", tools=[]) + logger.log_finish(success=True) + logger.close() + + logger = AgentLogger(log_path) + logger.log_start(system_prompt="s", prompt="second", tools=[]) + logger.log_finish(success=True) + logger.close() + + events = read_events(log_path) + assert [event["event"] for event in events] == ["start", "finish", "start", "finish"] + assert events[0]["prompt"] == "first" + assert events[2]["prompt"] == "second" def test_constructor_requires_existing_parent(tmp_path): diff --git a/ddev/tests/ai/tools/agents/test_spawn_subagent.py b/ddev/tests/ai/tools/agents/test_spawn_subagent.py index f2fbca64f6d45..9856169722463 100644 --- a/ddev/tests/ai/tools/agents/test_spawn_subagent.py +++ b/ddev/tests/ai/tools/agents/test_spawn_subagent.py @@ -5,28 +5,34 @@ import asyncio import json from pathlib import Path +from typing import Any import pytest +from anthropic.types import ToolParam +from ddev.ai.agent.base import BaseAgent +from ddev.ai.agent.build import SubagentBuilder from ddev.ai.agent.exceptions import AgentError from ddev.ai.agent.types import AgentResponse, StopReason, TokenUsage, ToolCall, ToolResultMessage from ddev.ai.tools.agents.spawn_subagent import SpawnSubagentInput, SpawnSubagentTool from ddev.ai.tools.core.types import ToolResult +from ddev.ai.tools.registry import ToolRegistry # --------------------------------------------------------------------------- # Mock helpers # --------------------------------------------------------------------------- -class MockAgent: +class MockAgent(BaseAgent[Any]): def __init__(self, responses: list[AgentResponse]) -> None: + super().__init__("mock", "", ToolRegistry([])) self._responses = list(responses) self._index = 0 - self.name = "mock" - self._history: list = [] async def send( - self, content: str | list[ToolResultMessage], allowed_tools: list[str] | None = None + self, + content: str | list[ToolResultMessage], + allowed_tools: list[str] | None = None, ) -> AgentResponse: response = self._responses[self._index] self._index += 1 @@ -42,15 +48,18 @@ async def compact_preserving_last_turn(self) -> AgentResponse | None: return None -class _RaisingAgent: +class _RaisingAgent(BaseAgent[Any]): """Raises a fixed exception on every send() call.""" def __init__(self, exc: BaseException) -> None: + super().__init__("raising", "", ToolRegistry([])) self._exc = exc - self.name = "raising" - self._history: list = [] - async def send(self, content, allowed_tools=None) -> AgentResponse: + async def send( + self, + content: str | list[ToolResultMessage], + allowed_tools: list[str] | None = None, + ) -> AgentResponse: raise self._exc def reset(self) -> None: @@ -63,15 +72,16 @@ async def compact_preserving_last_turn(self) -> AgentResponse | None: return None -class MockToolRegistry: +class MockToolRegistry(ToolRegistry): def __init__(self, result: ToolResult | None = None) -> None: + super().__init__([]) self._result = result or ToolResult(success=True, data="ok") @property - def definitions(self) -> list: + def definitions(self) -> list[ToolParam]: return [] - async def run(self, name: str, raw: dict) -> ToolResult: + async def run(self, name: str, raw: dict[str, object]) -> ToolResult: return self._result @@ -88,18 +98,21 @@ def make_response( ) -def make_builder(responses: list[AgentResponse], tool_result: ToolResult | None = None): +def make_builder(responses: list[AgentResponse], tool_result: ToolResult | None = None) -> SubagentBuilder: """Return a builder closure that replays fixed responses.""" tr = tool_result or ToolResult(success=True, data="ok") - def builder(system_prompt: str, owner_id: str, tool_names: list[str]): + def builder(system_prompt: str, owner_id: str, tool_names: list[str]) -> tuple[BaseAgent[Any], ToolRegistry]: return MockAgent(list(responses)), MockToolRegistry(tr) return builder def make_tool( - log_dir: Path, builder, allowed_tools: list[str] | None = None, owner_id: str = "parent" + log_dir: Path, + builder: SubagentBuilder, + allowed_tools: list[str] | None = None, + owner_id: str = "parent", ) -> SpawnSubagentTool: return SpawnSubagentTool( owner_id=owner_id, @@ -161,14 +174,23 @@ async def test_mkdir_failure(tmp_path): # --------------------------------------------------------------------------- -async def test_happy_path(tmp_path): +@pytest.mark.parametrize( + ("name", "expected_log_name"), + [ + ("worker", "001-worker.jsonl"), + (None, "001-unnamed.jsonl"), + ("", "001-unnamed.jsonl"), + ], + ids=["named", "none", "empty"], +) +async def test_happy_path(tmp_path: Path, name: str | None, expected_log_name: str) -> None: tool = make_tool(tmp_path, make_builder([make_response(text="ok")])) - result = await tool(SpawnSubagentInput(system_prompt="sys", prompt="do it", tools=[], name="worker")) + result = await tool(SpawnSubagentInput(system_prompt="sys", prompt="do it", tools=[], name=name)) assert result.success is True assert result.data == "ok" - events = read_events(tmp_path / "001-worker.jsonl") + events = read_events(tmp_path / expected_log_name) assert events[0]["event"] == "start" assert events[-1]["event"] == "finish" assert events[-1]["success"] is True @@ -211,7 +233,11 @@ async def test_max_tokens_response_prefixed(tmp_path): async def test_builder_failure(tmp_path): - def failing_builder(sp, oid, tns): + def failing_builder( + system_prompt: str, + owner_id: str, + tool_names: list[str], + ) -> tuple[BaseAgent[Any], ToolRegistry]: raise ValueError("boom") tool = SpawnSubagentTool(owner_id="parent", subagent_builder=failing_builder, allowed_tools=[], log_dir=tmp_path) @@ -226,7 +252,11 @@ def failing_builder(sp, oid, tns): async def test_react_process_failure(tmp_path): - def builder(sp, oid, tns): + def builder( + system_prompt: str, + owner_id: str, + tool_names: list[str], + ) -> tuple[BaseAgent[Any], ToolRegistry]: return _RaisingAgent(AgentError("rate limit")), MockToolRegistry() tool = make_tool(tmp_path, builder) @@ -244,7 +274,11 @@ def builder(sp, oid, tns): async def test_finally_close_runs_on_base_exception(tmp_path): """KeyboardInterrupt propagates but logger.close() still runs via finally.""" - def builder(sp, oid, tns): + def builder( + system_prompt: str, + owner_id: str, + tool_names: list[str], + ) -> tuple[BaseAgent[Any], ToolRegistry]: return _RaisingAgent(KeyboardInterrupt()), MockToolRegistry() tool = make_tool(tmp_path, builder) @@ -275,7 +309,11 @@ async def test_counter_increments_per_invocation(tmp_path): async def test_parallel_spawns_get_distinct_counters(tmp_path): owner_ids: list[str] = [] - def recording_builder(sp: str, owner_id: str, tns: list[str]): + def recording_builder( + system_prompt: str, + owner_id: str, + tool_names: list[str], + ) -> tuple[BaseAgent[Any], ToolRegistry]: owner_ids.append(owner_id) return MockAgent([make_response(text="ok")]), MockToolRegistry() @@ -299,12 +337,16 @@ def recording_builder(sp: str, owner_id: str, tns: list[str]): [ {"prompt": "x"}, {"system_prompt": "s", "prompt": "p", "tools": [], "bad_field": True}, + {"system_prompt": "s", "prompt": "p", "tools": [], "name": "../oops"}, ], - ids=["missing_field", "extra_field"], + ids=["missing_field", "extra_field", "unsafe_name"], ) -async def test_pydantic_rejects_invalid_input(raw): +async def test_pydantic_rejects_invalid_input(raw: dict[str, object]) -> None: tool = SpawnSubagentTool( - owner_id="p", subagent_builder=lambda *a: (None, None), allowed_tools=[], log_dir=Path("/tmp") + owner_id="p", + subagent_builder=make_builder([make_response()]), + allowed_tools=[], + log_dir=Path("/tmp"), ) result = await tool.run(raw) assert result.success is False From 1bdbad81aec5ae4a7d2e352a00d178076fcd499f Mon Sep 17 00:00:00 2001 From: Luis Orofino Date: Fri, 22 May 2026 12:53:39 +0200 Subject: [PATCH 4/8] Fix bug in test --- ddev/tests/ai/tools/agents/test_agent_logger.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ddev/tests/ai/tools/agents/test_agent_logger.py b/ddev/tests/ai/tools/agents/test_agent_logger.py index 7cbd58767c859..6f15343fad07c 100644 --- a/ddev/tests/ai/tools/agents/test_agent_logger.py +++ b/ddev/tests/ai/tools/agents/test_agent_logger.py @@ -52,7 +52,7 @@ def test_flush_after_each_write(tmp_path): logger.close() -def test_close_is_idempotent_and_prevents_further_writes(tmp_path, caplog): +def test_close_is_idempotent_and_prevents_further_writes(tmp_path): log_path = tmp_path / "log.jsonl" logger = AgentLogger(log_path) logger.log_start(system_prompt="s", prompt="p", tools=[]) @@ -60,7 +60,6 @@ def test_close_is_idempotent_and_prevents_further_writes(tmp_path, caplog): logger.close() # must not raise logger.log_finish(success=False) # must not write assert len(read_events(log_path)) == 1 - assert "dropping event 'finish'" in caplog.text def test_reopening_same_path_appends_start_run_delimiter(tmp_path): From 7375f938ebebcface4a0df99f21530d1c39c800a Mon Sep 17 00:00:00 2001 From: Luis Orofino Date: Fri, 22 May 2026 15:21:47 +0200 Subject: [PATCH 5/8] Make subagents share same Fileregistry as their parent --- ddev/src/ddev/ai/agent/build.py | 11 ++++---- ddev/src/ddev/ai/phases/agentic_phase.py | 2 +- ddev/tests/ai/agent/test_build.py | 34 +++++++++++++++++------- 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/ddev/src/ddev/ai/agent/build.py b/ddev/src/ddev/ai/agent/build.py index 6ab2e30fb9428..28a56e620bbbc 100644 --- a/ddev/src/ddev/ai/agent/build.py +++ b/ddev/src/ddev/ai/agent/build.py @@ -10,7 +10,6 @@ from ddev.ai.agent.anthropic_client import AnthropicAgent from ddev.ai.agent.base import BaseAgent -from ddev.ai.tools.fs.file_access_policy import FileAccessPolicy from ddev.ai.tools.fs.file_registry import FileRegistry from ddev.ai.tools.registry import ToolRegistry @@ -95,12 +94,12 @@ def build_agent( def build_subagent( parent_agent_config: AgentConfig, agent_clients: dict[str, Any], - file_access_policy: FileAccessPolicy, + file_registry: FileRegistry, system_prompt: str, owner_id: str, tool_names: list[str], ) -> tuple[BaseAgent[Any], ToolRegistry]: - """Build a subagent + ToolRegistry. Always uses a fresh FileRegistry. + """Build a subagent + ToolRegistry using the shared FileRegistry. Reuses the parent's provider/model/max_tokens. No subagent_builder or log_dir is forwarded, so the subagent cannot recursively spawn subagents — @@ -112,7 +111,7 @@ def build_subagent( system_prompt=system_prompt, owner_id=owner_id, tool_names=tool_names, - file_registry=FileRegistry(policy=file_access_policy), + file_registry=file_registry, ) @@ -145,7 +144,7 @@ def builder( def make_subagent_builder( parent_agent_config: AgentConfig, agent_clients: dict[str, Any], - file_access_policy: FileAccessPolicy, + file_registry: FileRegistry, ) -> SubagentBuilder: """Return a closure that builds a subagent+registry given (system_prompt, owner_id, tool_names).""" @@ -153,7 +152,7 @@ def builder(system_prompt: str, owner_id: str, tool_names: list[str]) -> tuple[B return build_subagent( parent_agent_config=parent_agent_config, agent_clients=agent_clients, - file_access_policy=file_access_policy, + file_registry=file_registry, system_prompt=system_prompt, owner_id=owner_id, tool_names=tool_names, diff --git a/ddev/src/ddev/ai/phases/agentic_phase.py b/ddev/src/ddev/ai/phases/agentic_phase.py index ab98e38722b45..948bcfb3e7728 100644 --- a/ddev/src/ddev/ai/phases/agentic_phase.py +++ b/ddev/src/ddev/ai/phases/agentic_phase.py @@ -121,7 +121,7 @@ def extra_init_kwargs( # type: ignore[override] subagent_builder = make_subagent_builder( parent_agent_config=agent_config, agent_clients=agent_clients, - file_access_policy=file_registry.policy, + file_registry=file_registry, ) return { diff --git a/ddev/tests/ai/agent/test_build.py b/ddev/tests/ai/agent/test_build.py index 9cff25a6f8881..2ee6a42762bf6 100644 --- a/ddev/tests/ai/agent/test_build.py +++ b/ddev/tests/ai/agent/test_build.py @@ -80,19 +80,33 @@ def test_build_agent_uses_config_tools(file_registry, clients): # --------------------------------------------------------------------------- -def test_build_subagent_creates_fresh_file_registry(policy, clients): +def test_build_subagent_reuses_shared_file_registry(file_registry, clients): config = AgentConfig(provider="anthropic", tools=[]) - caller_registry = FileRegistry(policy=policy) - _, reg_a = build_subagent(config, clients, policy, "sys", "a", []) - _, reg_b = build_subagent(config, clients, policy, "sys", "b", []) - assert reg_a is not reg_b - assert reg_a is not caller_registry + _, registry = build_subagent(config, clients, file_registry, "sys", "child", ["read_file", "edit_file"]) + for tool in registry._tools.values(): + assert tool._registry is file_registry + assert tool._owner_id == "child" -def test_build_subagent_recursion_guard(policy, clients): + +def test_build_subagent_recursion_guard(file_registry, clients): config = AgentConfig.model_construct(provider="anthropic", tools=[]) with pytest.raises(ValueError): - build_subagent(config, clients, policy, "sys", "sub", ["spawn_subagent"]) + build_subagent(config, clients, file_registry, "sys", "sub", ["spawn_subagent"]) + + +async def test_shared_registry_does_not_share_parent_read_authorization(file_registry, clients, tmp_path): + config = AgentConfig(provider="anthropic", tools=[]) + path = tmp_path / "file.txt" + path.write_text("before", encoding="utf-8") + file_registry.record("parent", str(path), "before") + + _, registry = build_subagent(config, clients, file_registry, "sys", "parent.sub.001-child", ["edit_file"]) + result = await registry.run("edit_file", {"path": str(path), "old_string": "before", "new_string": "after"}) + + assert result.success is False + assert "Not authorized" in result.error + assert path.read_text(encoding="utf-8") == "before" # --------------------------------------------------------------------------- @@ -108,9 +122,9 @@ def test_make_agent_builder(file_registry, clients): assert agent.name == "p1" -def test_make_subagent_builder(policy, clients): +def test_make_subagent_builder(file_registry, clients): config = AgentConfig(provider="anthropic", tools=[]) - builder = make_subagent_builder(config, clients, policy) + builder = make_subagent_builder(config, clients, file_registry) agent, registry = builder("sys", "sub-1", []) assert isinstance(agent, AnthropicAgent) assert agent.name == "sub-1" From 0a23136acc3091e2a62aae0e88fcfdf44188cbdd Mon Sep 17 00:00:00 2001 From: Luis Orofino Date: Fri, 22 May 2026 16:41:05 +0200 Subject: [PATCH 6/8] Keep track of subagents tokens --- ddev/src/ddev/ai/react/process.py | 2 ++ ddev/src/ddev/ai/tools/agents/spawn_subagent.py | 7 ++++++- ddev/src/ddev/ai/tools/core/types.py | 2 ++ ddev/tests/ai/react/test_process.py | 14 ++++++++++++++ ddev/tests/ai/tools/agents/test_spawn_subagent.py | 2 ++ 5 files changed, 26 insertions(+), 1 deletion(-) diff --git a/ddev/src/ddev/ai/react/process.py b/ddev/src/ddev/ai/react/process.py index 4ed620947fc63..c925af8e31e6a 100644 --- a/ddev/src/ddev/ai/react/process.py +++ b/ddev/src/ddev/ai/react/process.py @@ -113,6 +113,8 @@ async def start(self, prompt: str, allowed_tools: list[str] | None = None) -> Re r if isinstance(r, ToolResult) else ToolResult(success=False, error=f"{type(r).__name__}: {r}") for r in raw_results ] + total_input += sum(result.total_input_tokens for result in tool_results) + total_output += sum(result.total_output_tokens for result in tool_results) tool_call_results = list(zip(response.tool_calls, tool_results, strict=True)) diff --git a/ddev/src/ddev/ai/tools/agents/spawn_subagent.py b/ddev/src/ddev/ai/tools/agents/spawn_subagent.py index 2e126da14ee4d..d73b085d0055a 100644 --- a/ddev/src/ddev/ai/tools/agents/spawn_subagent.py +++ b/ddev/src/ddev/ai/tools/agents/spawn_subagent.py @@ -152,6 +152,11 @@ async def __call__(self, tool_input: SpawnSubagentInput) -> ToolResult: data = result.final_response.text if result.final_response.stop_reason == StopReason.MAX_TOKENS: data = "[SUBAGENT HIT MAX_TOKENS — RESPONSE MAY BE TRUNCATED]\n\n" + data - return ToolResult(success=True, data=data) + return ToolResult( + success=True, + data=data, + total_input_tokens=result.total_input_tokens, + total_output_tokens=result.total_output_tokens, + ) finally: logger.close() diff --git a/ddev/src/ddev/ai/tools/core/types.py b/ddev/src/ddev/ai/tools/core/types.py index 1e5c89c9929c7..b5a0fc413d492 100644 --- a/ddev/src/ddev/ai/tools/core/types.py +++ b/ddev/src/ddev/ai/tools/core/types.py @@ -15,3 +15,5 @@ class ToolResult(BaseModel): total_size: int | None = None shown_size: int | None = None hint: str | None = None + total_input_tokens: int = 0 + total_output_tokens: int = 0 diff --git a/ddev/tests/ai/react/test_process.py b/ddev/tests/ai/react/test_process.py index 898a983ab4074..e29e6af80b70a 100644 --- a/ddev/tests/ai/react/test_process.py +++ b/ddev/tests/ai/react/test_process.py @@ -494,6 +494,20 @@ async def test_total_tokens_summed_across_iterations() -> None: assert result.iterations == 2 +async def test_tool_result_tokens_included_in_total_tokens() -> None: + responses = [ + make_response(StopReason.TOOL_USE, tool_calls=[make_tool_call()], input_tokens=100, output_tokens=50), + make_response(StopReason.END_TURN, input_tokens=200, output_tokens=80), + ] + agent = MockAgent(responses) + registry = MockToolRegistry(ToolResult(success=True, data="ok", total_input_tokens=30, total_output_tokens=10)) + + result = await make_process(agent, registry=registry).start("Task") + + assert result.total_input_tokens == 330 + assert result.total_output_tokens == 140 + + # --------------------------------------------------------------------------- # Context usage propagation — parametrized None vs present # --------------------------------------------------------------------------- diff --git a/ddev/tests/ai/tools/agents/test_spawn_subagent.py b/ddev/tests/ai/tools/agents/test_spawn_subagent.py index 9856169722463..b1d3cecd0e6c0 100644 --- a/ddev/tests/ai/tools/agents/test_spawn_subagent.py +++ b/ddev/tests/ai/tools/agents/test_spawn_subagent.py @@ -189,6 +189,8 @@ async def test_happy_path(tmp_path: Path, name: str | None, expected_log_name: s assert result.success is True assert result.data == "ok" + assert result.total_input_tokens == 10 + assert result.total_output_tokens == 5 events = read_events(tmp_path / expected_log_name) assert events[0]["event"] == "start" From eb0d2c06350a19042934c7530862f0898432cc0b Mon Sep 17 00:00:00 2001 From: Luis Orofino Date: Fri, 22 May 2026 16:47:37 +0200 Subject: [PATCH 7/8] Add try catch in logger opening --- .../ddev/ai/tools/agents/spawn_subagent.py | 9 ++++++- .../ai/tools/agents/test_spawn_subagent.py | 24 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/ddev/src/ddev/ai/tools/agents/spawn_subagent.py b/ddev/src/ddev/ai/tools/agents/spawn_subagent.py index d73b085d0055a..6cc5139fea006 100644 --- a/ddev/src/ddev/ai/tools/agents/spawn_subagent.py +++ b/ddev/src/ddev/ai/tools/agents/spawn_subagent.py @@ -106,7 +106,14 @@ async def __call__(self, tool_input: SpawnSubagentInput) -> ToolResult: subagent_id = f"{self._owner_id}.sub.{self._counter:03d}-{label}" log_path = self._log_dir / f"{self._counter:03d}-{label}.jsonl" - logger = AgentLogger(log_path) + try: + logger = AgentLogger(log_path) + except OSError as e: + return ToolResult( + success=False, + error=f"Subagent {label!r} not spawned: cannot open log file {log_path}: {e}", + ) + try: logger.log_start( system_prompt=tool_input.system_prompt, diff --git a/ddev/tests/ai/tools/agents/test_spawn_subagent.py b/ddev/tests/ai/tools/agents/test_spawn_subagent.py index b1d3cecd0e6c0..f3881b539b805 100644 --- a/ddev/tests/ai/tools/agents/test_spawn_subagent.py +++ b/ddev/tests/ai/tools/agents/test_spawn_subagent.py @@ -169,6 +169,30 @@ async def test_mkdir_failure(tmp_path): assert tool._counter == 0 +async def test_logger_open_failure_returns_tool_result(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + def builder( + system_prompt: str, + owner_id: str, + tool_names: list[str], + ) -> tuple[BaseAgent[Any], ToolRegistry]: + raise AssertionError("builder should not be called") + + def failing_logger(log_path: Path) -> None: + raise OSError("permission denied") + + monkeypatch.setattr("ddev.ai.tools.agents.spawn_subagent.AgentLogger", failing_logger) + + tool = make_tool(tmp_path, builder, allowed_tools=[]) + result = await tool(SpawnSubagentInput(system_prompt="s", prompt="p", tools=[], name="x")) + + assert result.success is False + assert "x" in result.error + assert "cannot open log file" in result.error + assert str(tmp_path / "001-x.jsonl") in result.error + assert "permission denied" in result.error + assert list(tmp_path.glob("*.jsonl")) == [] + + # --------------------------------------------------------------------------- # Happy path # --------------------------------------------------------------------------- From a5757e5e7c25d8edd850f140cfd2da07e637500c Mon Sep 17 00:00:00 2001 From: Luis Orofino Date: Tue, 26 May 2026 17:47:52 +0200 Subject: [PATCH 8/8] Fix pyproject.toml --- ddev/pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/ddev/pyproject.toml b/ddev/pyproject.toml index f2baa037e9570..73928e956e9d4 100644 --- a/ddev/pyproject.toml +++ b/ddev/pyproject.toml @@ -140,6 +140,3 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] #Tests can use assertions and relative imports "**/tests/**/*" = ["I252"] - -[tool.pytest.ini_options] -asyncio_mode = "auto"