diff --git a/architecture/sandbox.md b/architecture/sandbox.md index 71dd35227..4bc6803eb 100644 --- a/architecture/sandbox.md +++ b/architecture/sandbox.md @@ -89,6 +89,21 @@ Sandbox logs are emitted locally and can also be pushed back to the gateway. Security-relevant sandbox behavior uses OCSF structured events; internal diagnostics use ordinary tracing. +## Policy Proposals + +When an L4 CONNECT is denied, the proxy emits a `DenialEvent`. The denial +aggregator batches these events and flushes summaries to the gateway every 10 +seconds (configurable via `OPENSHELL_DENIAL_FLUSH_INTERVAL_SECS`). The gateway +runs them through the mechanistic mapper, which generates a pending +`NetworkPolicyRule` proposal visible under `openshell rule get --status pending`. + +L7 denials (HTTP 403 from method/path rules) are intentionally excluded from +mechanistic mapping. L4 denials carry only `host:port`, which a deterministic mapper can handle. +L7 denials carry method, path, query, and body context. The agent loop reads +the structured 403 and authors the narrowest rule. Mechanistically mapping L7 +would either over-broaden rules or require path-templating logic that rots +quickly. + ## Failure Behavior - If gateway config polling fails, the sandbox keeps its last-known-good policy. diff --git a/crates/openshell-sandbox/src/proxy.rs b/crates/openshell-sandbox/src/proxy.rs index ad10a26ed..093df901c 100644 --- a/crates/openshell-sandbox/src/proxy.rs +++ b/crates/openshell-sandbox/src/proxy.rs @@ -6443,4 +6443,28 @@ network_policies: } } } + + #[test] + fn test_emit_denial_enqueues_denial_event() { + let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::(); + let decision = ConnectDecision { + action: NetworkAction::Deny { reason: "no matching policy".into() }, + generation: 0, + binary: Some(std::path::PathBuf::from("/usr/bin/curl")), + binary_pid: Some(1234), + ancestors: vec![], + cmdline_paths: vec![], + }; + + emit_denial(&Some(tx), "blocked.invalid", 443, "/usr/bin/curl", &decision, "no matching policy", "connect"); + + let event = rx.try_recv().expect("DenialEvent should be enqueued after L4 deny"); + assert_eq!(event.host, "blocked.invalid"); + assert_eq!(event.port, 443); + assert_eq!(event.binary, "/usr/bin/curl"); + assert_eq!(event.denial_stage, "connect"); + assert_eq!(event.deny_reason, "no matching policy"); + assert!(event.l7_method.is_none()); + assert!(event.l7_path.is_none()); + } } diff --git a/e2e/policy-advisor/README.md b/e2e/policy-advisor/README.md index 79f496e3e..2c2c96f19 100644 --- a/e2e/policy-advisor/README.md +++ b/e2e/policy-advisor/README.md @@ -52,3 +52,18 @@ bash e2e/policy-advisor/test.sh Requires Docker, `agent_policy_proposals_enabled=true`, and a GitHub token with contents write on the repository. The test auto-resolves the token from `DEMO_GITHUB_TOKEN`, `GITHUB_TOKEN`, `GH_TOKEN`, or `gh auth token`. + +## Mechanistic smoke + +Lightweight regression for the L4 CONNECT deny → mechanistic chunk pipeline. +No GitHub token or LLM required. + +```bash +mise run e2e:mechanistic-smoke +``` + +Or manually against a running gateway with `agent_policy_proposals_enabled=true`: + +```bash +OPENSHELL_BIN=target/debug/openshell bash e2e/policy-advisor/mechanistic-smoke.sh +``` diff --git a/e2e/policy-advisor/mechanistic-smoke.sh b/e2e/policy-advisor/mechanistic-smoke.sh new file mode 100755 index 000000000..546346705 --- /dev/null +++ b/e2e/policy-advisor/mechanistic-smoke.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Regression smoke for the mechanistic policy mapper. +# +# Triggers an L4 CONNECT deny from inside a sandbox, waits for the denial +# aggregator to flush, and asserts that a pending mechanistic chunk appears +# under `openshell rule get --status pending`. +# +# This is deliberately L4-only. L7 denials (method/path 403s) are the agent +# loop's job; the mechanistic mapper only covers L4 CONNECT denials. See #1333. +# +# Prereqs: a running gateway with agent_policy_proposals_enabled=true. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" + +if [[ -z "${OPENSHELL_BIN:-}" ]]; then + if [[ -x "${REPO_ROOT}/target/debug/openshell" ]]; then + OPENSHELL_BIN="${REPO_ROOT}/target/debug/openshell" + else + OPENSHELL_BIN="openshell" + fi +fi + +RUN_ID="${RUN_ID:-$(date +%Y%m%d-%H%M%S)}" +SANDBOX="${SANDBOX:-mechanistic-smoke-${RUN_ID}}" +KEEP_SANDBOX="${KEEP_SANDBOX:-0}" +# Allow override so CI can set a shorter interval via OPENSHELL_DENIAL_FLUSH_INTERVAL_SECS. +FLUSH_WAIT="${FLUSH_WAIT:-15}" + +BOLD='\033[1m' +CYAN='\033[36m' +GREEN='\033[32m' +RED='\033[31m' +RESET='\033[0m' + +step() { printf "\n${BOLD}${CYAN}==> %s${RESET}\n\n" "$1"; } +ok() { printf " ${GREEN}✓${RESET} %s\n" "$*"; } +fail() { printf "\n${RED}FAIL:${RESET} %s\n" "$*" >&2; exit 1; } + +TMP_DIR="" + +cleanup() { + if [[ "$KEEP_SANDBOX" != "1" ]]; then + "$OPENSHELL_BIN" sandbox delete "$SANDBOX" >/dev/null 2>&1 || true + fi + [[ -z "$TMP_DIR" ]] || rm -rf "$TMP_DIR" +} +trap cleanup EXIT + +preflight() { + step "Preflight" + local raw_settings + if ! raw_settings="$("$OPENSHELL_BIN" settings get --global --json 2>&1)"; then + fail "cannot reach gateway: ${raw_settings}" + fi + local enabled + enabled="$(printf '%s' "$raw_settings" \ + | jq -r '.settings.agent_policy_proposals_enabled // ""')" + [[ "$enabled" == "true" ]] \ + || fail "set agent_policy_proposals_enabled=true first: + $OPENSHELL_BIN settings set --global --key agent_policy_proposals_enabled --value true --yes" + ok "agent_policy_proposals_enabled=true" +} + +create_sandbox() { + step "Creating sandbox '${SANDBOX}' (no network policy)" + TMP_DIR="$(mktemp -d)" + SSH_CONFIG="${TMP_DIR}/ssh_config" + + "$OPENSHELL_BIN" sandbox delete "$SANDBOX" >/dev/null 2>&1 || true + "$OPENSHELL_BIN" sandbox create \ + --name "$SANDBOX" \ + --no-auto-providers \ + --no-tty \ + --keep \ + -- bash -lc "echo sandbox ready" \ + | sed 's/^/ /' + + "$OPENSHELL_BIN" sandbox ssh-config "$SANDBOX" > "$SSH_CONFIG" + SSH_HOST="$(awk '/^Host / { print $2; exit }' "$SSH_CONFIG")" + [[ -n "$SSH_HOST" ]] || fail "could not parse SSH host" + + for _i in $(seq 1 30); do + ssh -F "$SSH_CONFIG" "$SSH_HOST" true >/dev/null 2>&1 && { ok "SSH up"; return; } + sleep 2 + done + fail "SSH timed out" +} + +trigger_l4_deny() { + step "Triggering L4 CONNECT deny from inside sandbox" + # blocked.invalid is guaranteed unroutable and not in any policy. + ssh -F "$SSH_CONFIG" "$SSH_HOST" \ + "curl -sf --max-time 5 https://blocked.invalid/ || true" >/dev/null 2>&1 || true + ok "curl attempted (deny expected)" +} + +assert_pending_chunk() { + step "Waiting ${FLUSH_WAIT}s then checking for pending chunk" + sleep "$FLUSH_WAIT" + local output + output="$("$OPENSHELL_BIN" rule get "$SANDBOX" --status pending 2>&1)" + printf '%s\n' "$output" | sed 's/^/ /' + printf '%s\n' "$output" | grep -qi "blocked.invalid" \ + || fail "no pending chunk for blocked.invalid" + ok "pending mechanistic chunk present for blocked.invalid" +} + +main() { + command -v jq >/dev/null || fail "jq is required" + preflight + create_sandbox + trigger_l4_deny + assert_pending_chunk + step "Smoke pass" +} + +main "$@" diff --git a/tasks/test.toml b/tasks/test.toml index 91d2c44f6..3e3bf668f 100644 --- a/tasks/test.toml +++ b/tasks/test.toml @@ -78,6 +78,13 @@ run = "e2e/rust/e2e-vm.sh" description = "Run smoke e2e against a standalone gateway with the Docker compute driver" run = "e2e/rust/e2e-docker.sh" +["e2e:mechanistic-smoke"] +description = "Run mechanistic L4 smoke against a Docker-backed gateway" +run = [ + "cargo build -p openshell-cli --features openshell-core/dev-settings", + "e2e/with-docker-gateway.sh bash -lc 'target/debug/openshell settings set --global --key agent_policy_proposals_enabled --value true --yes && OPENSHELL_BIN=$PWD/target/debug/openshell bash e2e/policy-advisor/mechanistic-smoke.sh'", +] + ["e2e:docker:gpu"] description = "Run GPU e2e against a standalone gateway with the Docker compute driver" env = { OPENSHELL_E2E_DOCKER_GPU = "1", OPENSHELL_E2E_DOCKER_TEST = "gpu_device_selection", OPENSHELL_E2E_DOCKER_FEATURES = "e2e-docker-gpu" }