Skip to content

Commit 890e17d

Browse files
feat: add Sandbox abstraction for agent code execution environments
Add the Sandbox interface that decouples tool logic from where code runs. Tools that need to execute code or access a filesystem receive a Sandbox instead of managing their own execution, enabling portability across local, Docker, and cloud environments. Core components: - Sandbox ABC with streaming AsyncGenerator interface (base.py) - LocalSandbox for host-process execution via asyncio subprocesses (local.py) - DockerSandbox for containerized execution via docker exec (docker.py) - Agent integration: sandbox parameter on Agent.__init__, defaults to LocalSandbox Key design decisions: - Only core abstractions in SDK; AgentCoreSandbox and sandbox tools belong in separate packages (external dependencies, different release cycles) - Streaming output via AsyncGenerator[str | ExecutionResult] - yields lines as they arrive, ExecutionResult as the final yield - Security: randomized heredoc delimiters, shlex.quote for all paths, stdin piping in DockerSandbox to prevent injection - Auto-start lifecycle: sandbox starts on first execute() call - Zero external dependencies for core sandbox package Tests: 76 new tests (base: 22, local: 17, docker: 18, agent: 6, + shared) All 76 sandbox tests passing, 1686 existing tests still passing.
1 parent a110149 commit 890e17d

11 files changed

Lines changed: 1621 additions & 1 deletion

File tree

src/strands/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
"""A framework for building, deploying, and managing AI agents."""
22

3-
from . import agent, models, telemetry, types
3+
from . import agent, models, sandbox, telemetry, types
44
from .agent.agent import Agent
55
from .agent.base import AgentBase
66
from .event_loop._retry import ModelRetryStrategy
77
from .plugins import Plugin
8+
from .sandbox.base import ExecutionResult, Sandbox
9+
from .sandbox.docker import DockerSandbox
10+
from .sandbox.local import LocalSandbox
811
from .tools.decorator import tool
912
from .types.tools import ToolContext
1013
from .vended_plugins.skills import AgentSkills, Skill
@@ -14,9 +17,14 @@
1417
"AgentBase",
1518
"AgentSkills",
1619
"agent",
20+
"DockerSandbox",
21+
"ExecutionResult",
22+
"LocalSandbox",
1723
"models",
1824
"ModelRetryStrategy",
1925
"Plugin",
26+
"sandbox",
27+
"Sandbox",
2028
"Skill",
2129
"tool",
2230
"ToolContext",

src/strands/agent/agent.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
from .._async import run_async
2929
from ..event_loop._retry import ModelRetryStrategy
3030
from ..event_loop.event_loop import INITIAL_DELAY, MAX_ATTEMPTS, MAX_DELAY, event_loop_cycle
31+
from ..sandbox.base import Sandbox
32+
from ..sandbox.local import LocalSandbox
3133
from ..tools._tool_helpers import generate_missing_tool_result_content
3234

3335
if TYPE_CHECKING:
@@ -135,6 +137,7 @@ def __init__(
135137
tool_executor: ToolExecutor | None = None,
136138
retry_strategy: ModelRetryStrategy | _DefaultRetryStrategySentinel | None = _DEFAULT_RETRY_STRATEGY,
137139
concurrent_invocation_mode: ConcurrentInvocationMode = ConcurrentInvocationMode.THROW,
140+
sandbox: Sandbox | None = None,
138141
):
139142
"""Initialize the Agent with the specified configuration.
140143
@@ -201,6 +204,9 @@ def __init__(
201204
Set to "unsafe_reentrant" to skip lock acquisition entirely, allowing concurrent invocations.
202205
Warning: "unsafe_reentrant" makes no guarantees about resulting behavior and is provided
203206
only for advanced use cases where the caller understands the risks.
207+
sandbox: Execution environment for agent tools. Tools access the sandbox via
208+
tool_context.agent.sandbox to execute commands, code, and filesystem operations.
209+
Defaults to LocalSandbox (local host execution) when not specified.
204210
205211
Raises:
206212
ValueError: If agent id contains path separators.
@@ -273,6 +279,9 @@ def __init__(
273279

274280
self.tool_caller = _ToolCaller(self)
275281

282+
# Initialize sandbox for tool execution environment
283+
self.sandbox: Sandbox = sandbox if sandbox is not None else LocalSandbox()
284+
276285
self.hooks = HookRegistry()
277286

278287
self._plugin_registry = _PluginRegistry(self)

src/strands/sandbox/__init__.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"""Sandbox abstraction for agent code execution environments.
2+
3+
This module provides the Sandbox interface that decouples tool logic from where code runs.
4+
Tools that need to execute code or access a filesystem receive a Sandbox instead of managing
5+
their own execution, enabling portability across local, Docker, and cloud environments.
6+
7+
Concrete implementations:
8+
9+
- ``LocalSandbox`` — runs on the host via asyncio subprocesses (default)
10+
- ``DockerSandbox`` — runs inside a Docker container
11+
"""
12+
13+
from .base import ExecutionResult, Sandbox
14+
from .docker import DockerSandbox
15+
from .local import LocalSandbox
16+
17+
__all__ = [
18+
"DockerSandbox",
19+
"ExecutionResult",
20+
"LocalSandbox",
21+
"Sandbox",
22+
]

src/strands/sandbox/base.py

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
"""Base sandbox interface for agent code execution environments.
2+
3+
This module defines the abstract Sandbox class and the ExecutionResult dataclass.
4+
Sandbox implementations provide the runtime context where tools execute code, run commands,
5+
and interact with a filesystem. Multiple tools share the same Sandbox instance, giving them
6+
a common working directory, environment variables, and filesystem.
7+
8+
Implementations only need to provide execute(). All other methods are built on top of it.
9+
Implementations may override convenience methods with native versions for better performance.
10+
"""
11+
12+
import logging
13+
import secrets
14+
import shlex
15+
from abc import ABC, abstractmethod
16+
from collections.abc import AsyncGenerator
17+
from dataclasses import dataclass
18+
from typing import Any
19+
20+
logger = logging.getLogger(__name__)
21+
22+
23+
@dataclass
24+
class ExecutionResult:
25+
"""Result of code or command execution in a sandbox.
26+
27+
Attributes:
28+
exit_code: The exit code of the command or code execution.
29+
stdout: Standard output captured from execution.
30+
stderr: Standard error captured from execution.
31+
"""
32+
33+
exit_code: int
34+
stdout: str
35+
stderr: str
36+
37+
38+
class Sandbox(ABC):
39+
"""Abstract execution environment for agent tools.
40+
41+
A Sandbox provides the runtime context where tools execute code,
42+
run commands, and interact with a filesystem. Multiple tools
43+
share the same Sandbox instance, giving them a common working
44+
directory, environment variables, and filesystem.
45+
46+
Implementations only need to provide execute(). All other methods
47+
are built on top of it. Implementations may override convenience
48+
methods with native versions for better performance (for example,
49+
LocalSandbox overrides read_file/write_file with native file I/O).
50+
51+
The sandbox auto-starts on the first ``execute()`` call if not already
52+
started, so callers do not need to manually call ``start()`` or use
53+
the async context manager.
54+
55+
Example:
56+
```python
57+
from strands.sandbox import LocalSandbox
58+
59+
sandbox = LocalSandbox(working_dir="/tmp/workspace")
60+
async for chunk in sandbox.execute("echo hello"):
61+
if isinstance(chunk, str):
62+
print(chunk, end="") # stream output
63+
```
64+
"""
65+
66+
def __init__(self) -> None:
67+
"""Initialize base sandbox state."""
68+
self._started = False
69+
70+
@abstractmethod
71+
async def execute(
72+
self,
73+
command: str,
74+
timeout: int | None = None,
75+
) -> AsyncGenerator[str | ExecutionResult, None]:
76+
"""Execute a shell command, streaming output.
77+
78+
Yields stdout/stderr lines as they arrive. The final yield
79+
is an ExecutionResult with the exit code and complete output.
80+
81+
This is the only method implementations must provide. All other
82+
methods are built on top of this one by default.
83+
84+
The sandbox is auto-started on the first call if not already started.
85+
86+
Args:
87+
command: The shell command to execute.
88+
timeout: Maximum execution time in seconds. None means no timeout.
89+
90+
Yields:
91+
str lines of output as they arrive, then a final ExecutionResult.
92+
"""
93+
...
94+
# Make the method signature an async generator for type checkers.
95+
# Concrete subclasses must yield at least one ExecutionResult.
96+
yield # type: ignore[misc] # pragma: no cover
97+
98+
async def execute_code(
99+
self,
100+
code: str,
101+
language: str = "python",
102+
timeout: int | None = None,
103+
) -> AsyncGenerator[str | ExecutionResult, None]:
104+
"""Execute code in the sandbox, streaming output.
105+
106+
Override for native code execution support. The default implementation
107+
passes code to the language interpreter via ``-c`` with proper shell
108+
quoting.
109+
110+
Args:
111+
code: The source code to execute.
112+
language: The programming language interpreter to use.
113+
timeout: Maximum execution time in seconds. None means no timeout.
114+
115+
Yields:
116+
str lines of output as they arrive, then a final ExecutionResult.
117+
"""
118+
async for chunk in self.execute(f"{language} -c {shlex.quote(code)}", timeout=timeout):
119+
yield chunk
120+
121+
async def _execute_to_result(self, command: str, timeout: int | None = None) -> ExecutionResult:
122+
"""Helper: consume the execute() stream and return the final ExecutionResult.
123+
124+
Convenience methods like read_file, write_file, and list_files use
125+
this to get just the final result without dealing with the stream.
126+
127+
Args:
128+
command: The shell command to execute.
129+
timeout: Maximum execution time in seconds.
130+
131+
Returns:
132+
The final ExecutionResult from the stream.
133+
134+
Raises:
135+
RuntimeError: If execute() did not yield an ExecutionResult.
136+
"""
137+
result = None
138+
async for chunk in self.execute(command, timeout=timeout):
139+
if isinstance(chunk, ExecutionResult):
140+
result = chunk
141+
if result is None:
142+
raise RuntimeError("execute() did not yield an ExecutionResult")
143+
return result
144+
145+
async def _execute_code_to_result(
146+
self, code: str, language: str = "python", timeout: int | None = None
147+
) -> ExecutionResult:
148+
"""Helper: consume the execute_code() stream and return the final ExecutionResult.
149+
150+
Args:
151+
code: The source code to execute.
152+
language: The programming language interpreter to use.
153+
timeout: Maximum execution time in seconds.
154+
155+
Returns:
156+
The final ExecutionResult from the stream.
157+
158+
Raises:
159+
RuntimeError: If execute_code() did not yield an ExecutionResult.
160+
"""
161+
result = None
162+
async for chunk in self.execute_code(code, language=language, timeout=timeout):
163+
if isinstance(chunk, ExecutionResult):
164+
result = chunk
165+
if result is None:
166+
raise RuntimeError("execute_code() did not yield an ExecutionResult")
167+
return result
168+
169+
async def read_file(self, path: str) -> str:
170+
"""Read a file from the sandbox filesystem.
171+
172+
Override for native file I/O support. The default implementation
173+
uses shell commands.
174+
175+
Args:
176+
path: Path to the file to read.
177+
178+
Returns:
179+
The file contents as a string.
180+
181+
Raises:
182+
FileNotFoundError: If the file does not exist or cannot be read.
183+
"""
184+
result = await self._execute_to_result(f"cat {shlex.quote(path)}")
185+
if result.exit_code != 0:
186+
raise FileNotFoundError(result.stderr)
187+
return result.stdout
188+
189+
async def write_file(self, path: str, content: str) -> None:
190+
"""Write a file to the sandbox filesystem.
191+
192+
Override for native file I/O support. The default implementation
193+
uses a shell heredoc with a randomized delimiter to prevent
194+
content injection.
195+
196+
Args:
197+
path: Path to the file to write.
198+
content: The content to write to the file.
199+
200+
Raises:
201+
IOError: If the file cannot be written.
202+
"""
203+
# Use a randomized heredoc delimiter to prevent injection when content
204+
# contains the delimiter string.
205+
delimiter = f"STRANDS_EOF_{secrets.token_hex(8)}"
206+
result = await self._execute_to_result(
207+
f"cat > {shlex.quote(path)} << '{delimiter}'\n{content}\n{delimiter}"
208+
)
209+
if result.exit_code != 0:
210+
raise IOError(result.stderr)
211+
212+
async def list_files(self, path: str = ".") -> list[str]:
213+
"""List files in a sandbox directory.
214+
215+
Override for native directory listing support. The default
216+
implementation uses shell commands.
217+
218+
Args:
219+
path: Path to the directory to list.
220+
221+
Returns:
222+
A list of filenames in the directory.
223+
224+
Raises:
225+
FileNotFoundError: If the directory does not exist.
226+
"""
227+
result = await self._execute_to_result(f"ls -1 {shlex.quote(path)}")
228+
if result.exit_code != 0:
229+
raise FileNotFoundError(result.stderr)
230+
return [f for f in result.stdout.strip().split("\n") if f]
231+
232+
async def _ensure_started(self) -> None:
233+
"""Auto-start the sandbox if it has not been started yet."""
234+
if not self._started:
235+
await self.start()
236+
self._started = True
237+
238+
async def start(self) -> None:
239+
"""Initialize the sandbox.
240+
241+
Called once before first use. Override to perform setup such as
242+
starting containers or creating temporary directories.
243+
"""
244+
self._started = True
245+
246+
async def stop(self) -> None:
247+
"""Clean up sandbox resources.
248+
249+
Override to perform cleanup such as stopping containers or
250+
removing temporary directories.
251+
"""
252+
self._started = False
253+
254+
async def __aenter__(self) -> "Sandbox":
255+
"""Enter the async context manager, starting the sandbox."""
256+
await self.start()
257+
self._started = True
258+
return self
259+
260+
async def __aexit__(self, *args: Any) -> None:
261+
"""Exit the async context manager, stopping the sandbox."""
262+
await self.stop()
263+
self._started = False

0 commit comments

Comments
 (0)