11"""Base sandbox interface for agent code execution environments.
22
3- This module defines the abstract Sandbox class and the ExecutionResult dataclass.
3+ This module defines the abstract Sandbox class, the ShellBasedSandbox class, and
4+ the ExecutionResult dataclass.
5+
46Sandbox implementations provide the runtime context where tools execute code, run commands,
57and interact with a filesystem. Multiple tools share the same Sandbox instance, giving them
68a common working directory, environment variables, and filesystem.
79
8- Implementations only need to provide execute(). All other methods are built on top of it.
9- Implementations may override convenience methods with native versions for better performance.
10+ Class hierarchy:
11+
12+ - ``Sandbox`` (ABC): All 5 operations are abstract. Implement this for non-shell-based
13+ sandboxes (e.g., API-based cloud sandboxes).
14+ - ``ShellBasedSandbox`` (ABC): Provides shell-based defaults for file operations and code
15+ execution. Subclasses only need to implement ``execute()``.
1016"""
1117
1218import logging
19+ import re
1320import secrets
1421import shlex
1522from abc import ABC , abstractmethod
1926
2027logger = logging .getLogger (__name__ )
2128
29+ #: Allowlist of language interpreters permitted in :meth:`ShellBasedSandbox.execute_code`.
30+ #: Prevents command injection via the ``language`` parameter.
31+ ALLOWED_LANGUAGES = frozenset (
32+ {
33+ "bash" ,
34+ "node" ,
35+ "perl" ,
36+ "php" ,
37+ "python" ,
38+ "python3" ,
39+ "ruby" ,
40+ "sh" ,
41+ }
42+ )
43+
44+ #: Regex that matches a bare interpreter name (alphanumeric, dots, hyphens).
45+ _LANGUAGE_RE = re .compile (r"^[a-zA-Z0-9._-]+$" )
46+
2247
2348@dataclass
2449class ExecutionResult :
@@ -43,10 +68,10 @@ class Sandbox(ABC):
4368 share the same Sandbox instance, giving them a common working
4469 directory, environment variables, and filesystem.
4570
46- Implementations only need to provide execute(). All other methods
47- are built on top of it. Implementations may override convenience
48- methods with native versions for better performance (for example,
49- LocalSandbox overrides read_file/write_file with native file I/O) .
71+ All five operations — `` execute``, ``execute_code``, ``read_file``,
72+ ``write_file``, and ``list_files`` — are abstract. Implement this
73+ directly for non-shell-based backends (e.g., API-driven cloud sandboxes).
74+ For shell-based backends, extend :class:`ShellBasedSandbox` instead .
5075
5176 The sandbox auto-starts on the first ``execute()`` call if not already
5277 started, so callers do not need to manually call ``start()`` or use
@@ -78,9 +103,6 @@ async def execute(
78103 Yields stdout/stderr lines as they arrive. The final yield
79104 is an ExecutionResult with the exit code and complete output.
80105
81- This is the only method implementations must provide. All other
82- methods are built on top of this one by default.
83-
84106 The sandbox is auto-started on the first call if not already started.
85107
86108 Args:
@@ -95,6 +117,7 @@ async def execute(
95117 # Concrete subclasses must yield at least one ExecutionResult.
96118 yield # type: ignore[misc] # pragma: no cover
97119
120+ @abstractmethod
98121 async def execute_code (
99122 self ,
100123 code : str ,
@@ -103,19 +126,143 @@ async def execute_code(
103126 ) -> AsyncGenerator [str | ExecutionResult , None ]:
104127 """Execute code in the sandbox, streaming output.
105128
106- Override for native code execution support. The default implementation
107- passes code to the language interpreter via ``-c`` with proper shell
108- quoting.
129+ Args:
130+ code: The source code to execute.
131+ language: The programming language interpreter to use.
132+ timeout: Maximum execution time in seconds. None means no timeout.
133+
134+ Yields:
135+ str lines of output as they arrive, then a final ExecutionResult.
136+ """
137+ ...
138+ yield # type: ignore[misc] # pragma: no cover
139+
140+ @abstractmethod
141+ async def read_file (self , path : str ) -> str :
142+ """Read a file from the sandbox filesystem.
143+
144+ Args:
145+ path: Path to the file to read.
146+
147+ Returns:
148+ The file contents as a string.
149+
150+ Raises:
151+ FileNotFoundError: If the file does not exist or cannot be read.
152+ """
153+ ...
154+
155+ @abstractmethod
156+ async def write_file (self , path : str , content : str ) -> None :
157+ """Write a file to the sandbox filesystem.
158+
159+ Args:
160+ path: Path to the file to write.
161+ content: The content to write to the file.
162+
163+ Raises:
164+ IOError: If the file cannot be written.
165+ """
166+ ...
167+
168+ @abstractmethod
169+ async def list_files (self , path : str = "." ) -> list [str ]:
170+ """List files in a sandbox directory.
171+
172+ Args:
173+ path: Path to the directory to list.
174+
175+ Returns:
176+ A list of filenames in the directory.
177+
178+ Raises:
179+ FileNotFoundError: If the directory does not exist.
180+ """
181+ ...
182+
183+ async def _ensure_started (self ) -> None :
184+ """Auto-start the sandbox if it has not been started yet."""
185+ if not self ._started :
186+ await self .start ()
187+ self ._started = True
188+
189+ async def start (self ) -> None :
190+ """Initialize the sandbox.
191+
192+ Called once before first use. Override to perform setup such as
193+ starting containers or creating temporary directories.
194+ """
195+ self ._started = True
196+
197+ async def stop (self ) -> None :
198+ """Clean up sandbox resources.
199+
200+ Override to perform cleanup such as stopping containers or
201+ removing temporary directories.
202+ """
203+ self ._started = False
204+
205+ async def __aenter__ (self ) -> "Sandbox" :
206+ """Enter the async context manager, starting the sandbox."""
207+ await self .start ()
208+ self ._started = True
209+ return self
210+
211+ async def __aexit__ (self , * args : Any ) -> None :
212+ """Exit the async context manager, stopping the sandbox."""
213+ await self .stop ()
214+ self ._started = False
215+
216+
217+ class ShellBasedSandbox (Sandbox , ABC ):
218+ """Abstract sandbox that provides shell-based defaults for file and code operations.
219+
220+ Subclasses only need to implement :meth:`execute`. The remaining four
221+ operations — ``read_file``, ``write_file``, ``list_files``, and
222+ ``execute_code`` — are implemented via shell commands piped through
223+ ``execute()``.
224+
225+ Subclasses may override any method with a native implementation for
226+ better performance (e.g., ``LocalSandbox`` overrides ``read_file``
227+ and ``write_file`` with direct filesystem calls).
228+
229+ Class hierarchy::
230+
231+ Sandbox (ABC, all 5 abstract + lifecycle)
232+ └── ShellBasedSandbox (ABC, only execute() abstract)
233+ ├── LocalSandbox
234+ └── DockerSandbox
235+ """
236+
237+ async def execute_code (
238+ self ,
239+ code : str ,
240+ language : str = "python" ,
241+ timeout : int | None = None ,
242+ ) -> AsyncGenerator [str | ExecutionResult , None ]:
243+ """Execute code in the sandbox, streaming output.
244+
245+ The default implementation passes code to the language interpreter
246+ via ``-c`` with proper shell quoting. The ``language`` parameter is
247+ validated against an allowlist to prevent command injection.
109248
110249 Args:
111250 code: The source code to execute.
112251 language: The programming language interpreter to use.
252+ Must match :data:`ALLOWED_LANGUAGES` or be a simple
253+ interpreter name (alphanumeric, dots, hyphens only).
113254 timeout: Maximum execution time in seconds. None means no timeout.
114255
115256 Yields:
116257 str lines of output as they arrive, then a final ExecutionResult.
258+
259+ Raises:
260+ ValueError: If the language name is not allowed.
117261 """
118- async for chunk in self .execute (f"{ language } -c { shlex .quote (code )} " , timeout = timeout ):
262+ _validate_language (language )
263+ async for chunk in self .execute (
264+ f"{ shlex .quote (language )} -c { shlex .quote (code )} " , timeout = timeout
265+ ):
119266 yield chunk
120267
121268 async def _execute_to_result (self , command : str , timeout : int | None = None ) -> ExecutionResult :
@@ -229,35 +376,23 @@ async def list_files(self, path: str = ".") -> list[str]:
229376 raise FileNotFoundError (result .stderr )
230377 return [f for f in result .stdout .strip ().split ("\n " ) if f ]
231378
232- async def _ensure_started (self ) -> None :
233- """Auto-start the sandbox if it has not been started yet."""
234- if not self ._started :
235- await self .start ()
236- self ._started = True
237-
238- async def start (self ) -> None :
239- """Initialize the sandbox.
240-
241- Called once before first use. Override to perform setup such as
242- starting containers or creating temporary directories.
243- """
244- self ._started = True
245379
246- async def stop ( self ) -> None :
247- """Clean up sandbox resources .
380+ def _validate_language ( language : str ) -> None :
381+ """Validate a language interpreter name to prevent command injection .
248382
249- Override to perform cleanup such as stopping containers or
250- removing temporary directories.
251- """
252- self ._started = False
383+ The language must either be in the :data:`ALLOWED_LANGUAGES` allowlist or
384+ match a strict regex pattern (alphanumeric characters, dots, and hyphens only).
253385
254- async def __aenter__ (self ) -> "Sandbox" :
255- """Enter the async context manager, starting the sandbox."""
256- await self .start ()
257- self ._started = True
258- return self
386+ Args:
387+ language: The language interpreter name to validate.
259388
260- async def __aexit__ (self , * args : Any ) -> None :
261- """Exit the async context manager, stopping the sandbox."""
262- await self .stop ()
263- self ._started = False
389+ Raises:
390+ ValueError: If the language name is not allowed.
391+ """
392+ if language in ALLOWED_LANGUAGES :
393+ return
394+ if not _LANGUAGE_RE .match (language ):
395+ raise ValueError (
396+ "language must be an alphanumeric interpreter name "
397+ "(e.g. 'python', 'node'), got: %r" % language
398+ )
0 commit comments