Skip to content

Commit 070a7f3

Browse files
Fieldnote-Echoenystopenhands-agentcsmith49
authored
feat(security): defense-in-depth security analyzers (#2472)
Co-authored-by: Engel Nyst <engel.nyst@gmail.com> Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Calvin Smith <email@cjsmith.io>
1 parent 9f6718a commit 070a7f3

15 files changed

Lines changed: 2463 additions & 8 deletions

File tree

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""Defense-in-Depth Security: composing local analyzers with ConfirmRisky.
2+
3+
This example demonstrates how to wire the defense-in-depth analyzer family
4+
into a conversation. The analyzers classify agent actions at the action
5+
boundary; the confirmation policy decides whether to prompt the user.
6+
7+
Analyzer selection does not automatically change confirmation policy --
8+
you must configure both explicitly.
9+
"""
10+
11+
from openhands.sdk.security import (
12+
ConfirmRisky,
13+
EnsembleSecurityAnalyzer,
14+
PatternSecurityAnalyzer,
15+
PolicyRailSecurityAnalyzer,
16+
SecurityRisk,
17+
)
18+
19+
20+
# Create the analyzer ensemble
21+
security_analyzer = EnsembleSecurityAnalyzer(
22+
analyzers=[
23+
PolicyRailSecurityAnalyzer(),
24+
PatternSecurityAnalyzer(),
25+
]
26+
)
27+
28+
# Confirmation policy: prompt the user for HIGH-risk actions
29+
confirmation_policy = ConfirmRisky(threshold=SecurityRisk.HIGH)
30+
31+
# Wire into a conversation:
32+
#
33+
# conversation = Conversation(agent=agent, workspace=".")
34+
# conversation.set_security_analyzer(security_analyzer)
35+
# conversation.set_confirmation_policy(confirmation_policy)
36+
#
37+
# Every agent action now passes through the analyzer.
38+
# HIGH -> confirmation prompt. MEDIUM/LOW -> allowed.
39+
# UNKNOWN -> confirmed by default (confirm_unknown=True).
40+
#
41+
# For stricter environments, lower the threshold:
42+
# confirmation_policy = ConfirmRisky(threshold=SecurityRisk.MEDIUM)
43+
44+
print("Defense-in-depth security analyzer configured.")
45+
print(f"Analyzer: {security_analyzer}")
46+
print(f"Confirmation policy: {confirmation_policy}")
47+
print("EXAMPLE_COST: 0")

openhands-sdk/openhands/sdk/security/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
ConfirmRisky,
66
NeverConfirm,
77
)
8+
from openhands.sdk.security.defense_in_depth import (
9+
PatternSecurityAnalyzer,
10+
PolicyRailSecurityAnalyzer,
11+
)
12+
from openhands.sdk.security.ensemble import EnsembleSecurityAnalyzer
813
from openhands.sdk.security.grayswan import GraySwanAnalyzer
914
from openhands.sdk.security.llm_analyzer import LLMSecurityAnalyzer
1015
from openhands.sdk.security.risk import SecurityRisk
@@ -15,6 +20,9 @@
1520
"SecurityAnalyzerBase",
1621
"LLMSecurityAnalyzer",
1722
"GraySwanAnalyzer",
23+
"PatternSecurityAnalyzer",
24+
"PolicyRailSecurityAnalyzer",
25+
"EnsembleSecurityAnalyzer",
1826
"ConfirmationPolicyBase",
1927
"AlwaysConfirm",
2028
"NeverConfirm",
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"""Deterministic, local security analyzers for agent action boundaries.
2+
3+
Two analyzers, each owning one job:
4+
5+
- ``PatternSecurityAnalyzer`` -- regex signatures with two-corpus scanning
6+
- ``PolicyRailSecurityAnalyzer`` -- composed-condition rules (fetch-to-exec, etc.)
7+
8+
Wire them into a conversation alongside ``EnsembleSecurityAnalyzer`` and
9+
``ConfirmRisky`` to classify agent actions before execution. No network
10+
calls, no model inference, no dependencies beyond the SDK runtime.
11+
"""
12+
13+
from openhands.sdk.security.defense_in_depth.pattern import PatternSecurityAnalyzer
14+
from openhands.sdk.security.defense_in_depth.policy_rails import (
15+
PolicyRailSecurityAnalyzer,
16+
)
17+
18+
19+
__all__ = [
20+
"PatternSecurityAnalyzer",
21+
"PolicyRailSecurityAnalyzer",
22+
]
Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
"""Classify agent actions by matching content against known threat signatures.
2+
3+
When an agent is about to run ``rm -rf /``, you want to catch it. When
4+
the agent merely *thinks about* ``rm -rf /`` while running ``ls /tmp``,
5+
you do not. This module solves that with two scanning corpora:
6+
7+
- **Executable corpus** (tool_name, tool_call arguments): scanned for
8+
shell-destructive, code-execution, and network-to-exec patterns.
9+
- **All-field corpus** (executable + thought/reasoning/summary): scanned
10+
for injection and social-engineering patterns that are dangerous
11+
wherever they appear.
12+
13+
Each pattern carries a stable detector ID for telemetry readiness.
14+
"""
15+
16+
from __future__ import annotations
17+
18+
import re
19+
from typing import Any
20+
21+
from pydantic import Field, PrivateAttr
22+
23+
from openhands.sdk.event import ActionEvent
24+
from openhands.sdk.logger import get_logger
25+
from openhands.sdk.security.analyzer import SecurityAnalyzerBase
26+
from openhands.sdk.security.defense_in_depth.utils import (
27+
_extract_content,
28+
_extract_exec_content,
29+
_normalize,
30+
)
31+
from openhands.sdk.security.risk import SecurityRisk
32+
33+
34+
logger = get_logger(__name__)
35+
36+
# ---------------------------------------------------------------------------
37+
# Stable detector IDs -- do not change between releases without documentation.
38+
# Format: DET_{CORPUS}_{FAMILY}_{SPECIFIC}
39+
# ---------------------------------------------------------------------------
40+
41+
DET_EXEC_DESTRUCT_RM_RF = "exec.destruct.rm_rf"
42+
DET_EXEC_DESTRUCT_SUDO_RM = "exec.destruct.sudo_rm"
43+
DET_EXEC_DESTRUCT_MKFS = "exec.destruct.mkfs"
44+
DET_EXEC_DESTRUCT_DD = "exec.destruct.dd_raw_disk"
45+
DET_EXEC_CODE_EVAL = "exec.code.eval_call"
46+
DET_EXEC_CODE_EXEC = "exec.code.exec_call"
47+
DET_EXEC_CODE_OS_SYSTEM = "exec.code.os_system"
48+
DET_EXEC_CODE_SUBPROCESS = "exec.code.subprocess"
49+
DET_EXEC_NET_CURL_EXEC = "exec.net.curl_pipe_exec"
50+
DET_EXEC_NET_WGET_EXEC = "exec.net.wget_pipe_exec"
51+
DET_EXEC_NET_CURL = "exec.net.curl"
52+
DET_EXEC_NET_WGET = "exec.net.wget"
53+
DET_INJECT_OVERRIDE = "inject.override"
54+
DET_INJECT_MODE_SWITCH = "inject.mode_switch"
55+
DET_INJECT_IDENTITY = "inject.identity"
56+
57+
# ---------------------------------------------------------------------------
58+
# Pattern definitions
59+
#
60+
# Format: (regex_pattern, description, detector_id)
61+
#
62+
# Pattern design constraints:
63+
# - No unbounded .* or .+ around alternations (catastrophic backtracking)
64+
# - Risky spans are bounded ({0,N}) to prevent ReDoS
65+
# - \s* and \w+ are acceptable in non-alternation positions
66+
# - \b-anchored to avoid substring matches
67+
# - IGNORECASE compiled in
68+
# ---------------------------------------------------------------------------
69+
70+
DEFAULT_HIGH_PATTERNS: list[tuple[str, str, str]] = [
71+
# Destructive filesystem operations
72+
(
73+
r"\brm\s+(?:-[frR]{2,}|-[rR]\s+-f|-f\s+-[rR]"
74+
r"|--recursive\s+--force|--force\s+--recursive)\b",
75+
"Recursive force-delete (rm -rf variants)",
76+
DET_EXEC_DESTRUCT_RM_RF,
77+
),
78+
(r"\bsudo\s+rm\b", "Privileged file deletion", DET_EXEC_DESTRUCT_SUDO_RM),
79+
(r"\bmkfs\.\w+", "Filesystem format command", DET_EXEC_DESTRUCT_MKFS),
80+
(r"\bdd\b.{0,100}of=/dev/", "Raw disk write", DET_EXEC_DESTRUCT_DD),
81+
# Code invocation via dynamic interpreters
82+
(r"\beval\s*\(", "Dynamic code evaluation", DET_EXEC_CODE_EVAL),
83+
(r"\bexec\s*\(", "Dynamic code execution", DET_EXEC_CODE_EXEC),
84+
(r"\bos\.system\s*\(", "OS-level command execution", DET_EXEC_CODE_OS_SYSTEM),
85+
(
86+
r"\bsubprocess\.(?:call|run|Popen|check_output|check_call)\s*\(",
87+
"Subprocess invocation",
88+
DET_EXEC_CODE_SUBPROCESS,
89+
),
90+
# Download-and-run
91+
(
92+
r"\bcurl\b[^|]{0,200}\|\s*(?:ba)?sh\b",
93+
"Download and run (curl | sh)",
94+
DET_EXEC_NET_CURL_EXEC,
95+
),
96+
(
97+
r"\bwget\b[^|]{0,200}\|\s*(?:ba)?sh\b",
98+
"Download and run (wget | sh)",
99+
DET_EXEC_NET_WGET_EXEC,
100+
),
101+
]
102+
103+
DEFAULT_MEDIUM_PATTERNS: list[tuple[str, str, str]] = [
104+
# Network access without invocation pipe
105+
(r"\bcurl\b.{0,100}https?://", "HTTP request via curl", DET_EXEC_NET_CURL),
106+
(r"\bwget\b.{0,100}https?://", "Download via wget", DET_EXEC_NET_WGET),
107+
]
108+
109+
# Injection patterns: scanned against ALL fields (invocation + reasoning).
110+
# These are textual attacks targeting instruction-following, not the OS.
111+
112+
DEFAULT_INJECTION_HIGH_PATTERNS: list[tuple[str, str, str]] = [
113+
(
114+
r"\b(?:ignore|disregard|forget|override|bypass)\s+(?:all\s+)?"
115+
r"(?:previous|prior|above)\s+(?:instructions?|prompts?|rules?|directives?)\b",
116+
"Instruction override attempt",
117+
DET_INJECT_OVERRIDE,
118+
),
119+
]
120+
121+
DEFAULT_INJECTION_MEDIUM_PATTERNS: list[tuple[str, str, str]] = [
122+
(
123+
r"\byou\s+are\s+now\s+(?:in\s+)?(?:\w+\s+)?mode\b",
124+
"Mode switching attempt",
125+
DET_INJECT_MODE_SWITCH,
126+
),
127+
(
128+
r"\bpretend\s+(?:you\s+are|to\s+be)\s+(?:a\s+)?different\b",
129+
"Identity manipulation",
130+
DET_INJECT_IDENTITY,
131+
),
132+
]
133+
134+
135+
# ---------------------------------------------------------------------------
136+
# PatternSecurityAnalyzer
137+
# ---------------------------------------------------------------------------
138+
139+
140+
class PatternSecurityAnalyzer(SecurityAnalyzerBase):
141+
"""Catch dangerous agent actions through deterministic signature scanning.
142+
143+
Use this when you want fast, local, no-network threat detection at the
144+
action boundary. It returns ``SecurityRisk.HIGH``, ``MEDIUM``, or ``LOW``
145+
-- pair it with ``ConfirmRisky`` to decide what gets confirmed.
146+
147+
The key design choice: shell-destructive patterns only scan what the
148+
agent will *execute* (tool arguments), never what it *thought about*
149+
(reasoning text). Injection patterns scan everything, because
150+
"ignore all previous instructions" is dangerous wherever it appears.
151+
152+
Normalization is always on -- invisible characters and fullwidth
153+
substitutions are collapsed before matching.
154+
155+
Example::
156+
157+
from openhands.sdk.security import PatternSecurityAnalyzer, ConfirmRisky
158+
159+
analyzer = PatternSecurityAnalyzer()
160+
policy = ConfirmRisky(threshold=SecurityRisk.MEDIUM)
161+
"""
162+
163+
high_patterns: list[tuple[str, str, str]] = Field(
164+
default_factory=lambda: list(DEFAULT_HIGH_PATTERNS),
165+
description="HIGH patterns scanned against executable fields only",
166+
)
167+
medium_patterns: list[tuple[str, str, str]] = Field(
168+
default_factory=lambda: list(DEFAULT_MEDIUM_PATTERNS),
169+
description="MEDIUM patterns scanned against executable fields only",
170+
)
171+
injection_high_patterns: list[tuple[str, str, str]] = Field(
172+
default_factory=lambda: list(DEFAULT_INJECTION_HIGH_PATTERNS),
173+
description="HIGH patterns scanned against all fields",
174+
)
175+
injection_medium_patterns: list[tuple[str, str, str]] = Field(
176+
default_factory=lambda: list(DEFAULT_INJECTION_MEDIUM_PATTERNS),
177+
description="MEDIUM patterns scanned against all fields",
178+
)
179+
180+
_compiled_high: list[tuple[re.Pattern[str], str, str]] = PrivateAttr(
181+
default_factory=list,
182+
)
183+
_compiled_medium: list[tuple[re.Pattern[str], str, str]] = PrivateAttr(
184+
default_factory=list,
185+
)
186+
_compiled_injection_high: list[tuple[re.Pattern[str], str, str]] = PrivateAttr(
187+
default_factory=list,
188+
)
189+
_compiled_injection_medium: list[tuple[re.Pattern[str], str, str]] = PrivateAttr(
190+
default_factory=list,
191+
)
192+
193+
def model_post_init(self, __context: Any) -> None:
194+
"""Compile regex patterns after model initialization."""
195+
self._compiled_high = [
196+
(re.compile(p, re.IGNORECASE), d, det_id)
197+
for p, d, det_id in self.high_patterns
198+
]
199+
self._compiled_medium = [
200+
(re.compile(p, re.IGNORECASE), d, det_id)
201+
for p, d, det_id in self.medium_patterns
202+
]
203+
self._compiled_injection_high = [
204+
(re.compile(p, re.IGNORECASE), d, det_id)
205+
for p, d, det_id in self.injection_high_patterns
206+
]
207+
self._compiled_injection_medium = [
208+
(re.compile(p, re.IGNORECASE), d, det_id)
209+
for p, d, det_id in self.injection_medium_patterns
210+
]
211+
212+
def security_risk(self, action: ActionEvent) -> SecurityRisk:
213+
"""Evaluate security risk via two-corpus pattern matching."""
214+
exec_content = _normalize(_extract_exec_content(action))
215+
all_content = _normalize(_extract_content(action))
216+
217+
if not exec_content and not all_content:
218+
return SecurityRisk.LOW
219+
220+
# HIGH: patterns on executable fields only
221+
for pattern, _desc, det_id in self._compiled_high:
222+
if pattern.search(exec_content):
223+
logger.debug("Pattern matched: %s -> HIGH", det_id)
224+
return SecurityRisk.HIGH
225+
226+
# HIGH: injection patterns on all fields
227+
for pattern, _desc, det_id in self._compiled_injection_high:
228+
if pattern.search(all_content):
229+
logger.debug("Pattern matched: %s -> HIGH", det_id)
230+
return SecurityRisk.HIGH
231+
232+
# MEDIUM: patterns on executable fields only
233+
for pattern, _desc, det_id in self._compiled_medium:
234+
if pattern.search(exec_content):
235+
logger.debug("Pattern matched: %s -> MEDIUM", det_id)
236+
return SecurityRisk.MEDIUM
237+
238+
# MEDIUM: injection patterns on all fields
239+
for pattern, _desc, det_id in self._compiled_injection_medium:
240+
if pattern.search(all_content):
241+
logger.debug("Pattern matched: %s -> MEDIUM", det_id)
242+
return SecurityRisk.MEDIUM
243+
244+
return SecurityRisk.LOW

0 commit comments

Comments
 (0)