From d6b755ab58018285949c513f5e3a904815849bff Mon Sep 17 00:00:00 2001 From: Frederico Araujo Date: Fri, 12 Jun 2026 10:26:30 -0400 Subject: [PATCH 1/5] docs: add requirements document for python bindings Signed-off-by: Frederico Araujo --- docs/dev/issue19_requirements.md | 381 +++++++++++++++++++++++++++++++ 1 file changed, 381 insertions(+) create mode 100644 docs/dev/issue19_requirements.md diff --git a/docs/dev/issue19_requirements.md b/docs/dev/issue19_requirements.md new file mode 100644 index 0000000..911b279 --- /dev/null +++ b/docs/dev/issue19_requirements.md @@ -0,0 +1,381 @@ +# Requirements: CPEX Python Bindings (Issue #19) + +**This document supersedes `docs/specs/pyo3-bindings-plan.md`.** + +--- + +## Philosophy + +**The Rust implementation is the canonical CPEX.** The Python bindings expose the Rust API faithfully — they do not bend to match the legacy Python framework's conventions. The legacy `./cpex/` package remains untouched; users migrate to the new package at their own pace via a migration guide. + +## Goal + +Create a **new Python package** at `bindings/python/` that wraps `cpex_core::PluginManager` via PyO3. This package: +- Exposes the Rust API contracts directly (not the legacy Python conventions) +- Is a standalone, separately installable package +- Leaves `./cpex/` (legacy pure-Python framework) completely untouched +- Includes a migration guide from legacy Python to the new Rust-backed package + +--- + +## Repository Structure + +``` +bindings/python/ +├── Cargo.toml # PyO3 cdylib crate (depends on cpex-core) +├── build.rs # macOS linker flags for extension modules +├── src/ # Rust PyO3 source +│ ├── lib.rs # Module definition, exports +│ ├── manager.rs # PyPluginManager +│ ├── conversions.rs # Python↔Rust value traversal +│ ├── error.rs # PluginError → PyErr +│ └── result.rs # PyPipelineResult +├── python/ +│ └── cpex/ # Python package (importable as `import cpex`) +│ ├── __init__.py # Re-exports from the native module +│ └── _lib.pyi # Type stubs matching actual Rust signatures +├── pyproject.toml # maturin-based build system +├── tests/ +│ ├── test_manager.py # End-to-end invoke_hook tests +│ ├── test_conversions.py # Round-trip conversion correctness +│ └── conftest.py # Shared fixtures +├── MIGRATION.md # Guide: legacy cpex → cpex +└── README.md # Package documentation +``` + +**Key decisions:** +- Package name: `cpex` — same name as the legacy package. When the Rust-backed version is ready, it replaces the legacy on PyPI seamlessly. +- The Rust crate at `bindings/python/` depends on `cpex-core` from the workspace +- Build with `maturin develop` or `maturin build` +- During development, both packages exist (legacy at `./cpex/`, new at `bindings/python/python/cpex/`). They are NOT installed simultaneously — the new one is installed in its own venv or replaces the legacy. + +--- + +## API Contract (Rust-Native) + +The Python API mirrors the Rust `PluginManager` directly: + +```python +from cpex import PluginManager, PipelineResult # The new Rust-backed cpex package + +# Construction: sync, loads config +manager = PluginManager("plugins/config.yaml") + +# Initialization: async, calls plugin.initialize() on all registered plugins +await manager.initialize() + +# Invoke a hook — THE CONTRACT (mirrors Rust invoke_by_name) +result: PipelineResult = await manager.invoke_hook( + hook_name, # str: "cmf.tool_pre_invoke", "identity_resolve", etc. + payload, # dict — converted to Box via PayloadRegistry + extensions=None, # Optional[dict] — converted to Extensions + context_table=None, # Optional[dict] — converted to PluginContextTable +) + +# Result: single object (mirrors Rust PipelineResult) +result.continue_processing # bool +result.modified_payload # Optional[dict] +result.modified_extensions # Optional[dict] +result.violation # Optional[dict] with {reason, description, code, details} +result.errors # list[dict] — non-halting plugin errors (on_error: ignore/disable) +result.metadata # Optional[dict] +result.context_table # dict — pass to next invoke_hook for state continuity + +# Shutdown: async +await manager.shutdown() +``` + +### Hook Names + +The new package uses the **Rust canonical names**: + +| Category | Hook Name | Notes | +|----------|-----------|-------| +| CMF | `"cmf.tool_pre_invoke"` | CMF Message payload | +| CMF | `"cmf.tool_post_invoke"` | | +| CMF | `"cmf.llm_input"` | CMF-only (no legacy equivalent) | +| CMF | `"cmf.llm_output"` | | +| CMF | `"cmf.prompt_pre_fetch"` | | +| CMF | `"cmf.prompt_post_fetch"` | | +| CMF | `"cmf.resource_pre_fetch"` | | +| CMF | `"cmf.resource_post_fetch"` | | +| Legacy | `"tool_pre_invoke"` | Non-CMF typed payload | +| Legacy | `"tool_post_invoke"` | | +| Legacy | `"prompt_pre_fetch"` | | +| Legacy | `"prompt_post_fetch"` | | +| Legacy | `"resource_pre_fetch"` | | +| Legacy | `"resource_post_fetch"` | | +| Identity | `"identity_resolve"` | IdentityPayload | +| Delegation | `"token_delegate"` | DelegationPayload | + +The Rust core's `HookType::new(hook_name)` accepts any string. The PyO3 layer passes through as-is — no normalization, no aliasing. + +--- + +## Hard Constraints + +### C1: Faithful Rust API Exposure + +Do NOT adapt the API to match the legacy Python `PluginManager`. The Python bindings are a thin layer over the Rust contracts. The legacy Python framework has its own conventions (GlobalContext, 2-tuple returns, violations_as_exceptions, etc.) — those belong to the legacy package. + +### C2: No Silent Failures + +Every error at the FFI boundary raises a Python exception: +- Unknown hook → `ValueError` +- Config parse failure → `ValueError` +- Plugin execution failure → `RuntimeError` +- Conversion failure → `ValueError` (with descriptive message) +- Timeout → `TimeoutError` + +### C3: Safety + +- **Panic isolation**: All async blocks crossing FFI wrapped in `catch_unwind`. Panics → `RuntimeError`. +- **Recursion depth**: Value traversal capped at 128 levels. Overflow → `ValueError`. +- **No pointer exposure**: Never `{:p}` in repr/errors. + +### C4: Standalone Package + +- `./cpex/` is NOT modified. No `__init__.py` changes, no backend selection logic injected. +- The new package is independently installable: `pip install ./bindings/python/` or `maturin develop` from that directory. +- No import-time exceptions. + +### C5: Self-Contained Build + +```toml +# bindings/python/pyproject.toml +[build-system] +requires = ["maturin>=1.0,<2.0"] +build-backend = "maturin" + +[project] +name = "cpex" +version = "0.1.0" +requires-python = ">=3.10" + +[tool.maturin] +module-name = "cpex._lib" +features = ["pyo3/extension-module"] +python-source = "python" +``` + +--- + +## Conversion Strategy + +**Do NOT** call Python's `json` module from Rust. + +### Python → Rust (input) + +Direct PyDict/PyList/PyString traversal → `serde_json::Value` → `serde_json::from_value::()`: + +```rust +fn pyobj_to_json_value(py: Python, obj: &Bound, depth: usize) -> PyResult { + if depth > 128 { + return Err(PyValueError::new_err("Payload nesting exceeds 128 levels")); + } + if obj.is_none() { return Ok(Value::Null); } + if let Ok(b) = obj.extract::() { return Ok(Value::Bool(b)); } + if let Ok(i) = obj.extract::() { return Ok(Value::Number(i.into())); } + if let Ok(f) = obj.extract::() { return Ok(json!(f)); } + if let Ok(s) = obj.extract::() { return Ok(Value::String(s)); } + if let Ok(list) = obj.downcast::() { /* recurse with depth+1 */ } + if let Ok(dict) = obj.downcast::() { /* recurse with depth+1 */ } + Err(PyValueError::new_err(format!("Cannot convert {} to JSON", obj.get_type().name()?))) +} +``` + +### Rust → Python (output) + +`serde_json::to_value(&rust_struct)` → traverse Value building PyDict/PyList/etc: + +```rust +fn json_value_to_pyobj(py: Python, value: &serde_json::Value) -> PyResult { + match value { + Value::Null => Ok(py.None()), + Value::Bool(b) => Ok(b.into_pyobject(py)?.into()), + Value::Number(n) => /* i64 or f64 */, + Value::String(s) => Ok(s.into_pyobject(py)?.into()), + Value::Array(arr) => /* PyList */, + Value::Object(map) => /* PyDict */, + } +} +``` + +### Payload Dispatch + +The Rust core's `invoke_by_name` needs a `Box`. The PyO3 layer needs a registry mapping hook names → payload types (same concept as the Rust `PluginPayload` trait implementors): + +```rust +// Convert input dict to the appropriate Box based on hook_name +fn resolve_payload(hook_name: &str, value: serde_json::Value) -> PyResult> { + match hook_name { + s if s.starts_with("cmf.") => { + let msg: Message = serde_json::from_value(value)?; + Ok(Box::new(MessagePayload { message: msg })) + } + "identity_resolve" => { + let p: IdentityPayload = serde_json::from_value(value)?; + Ok(Box::new(p)) + } + "token_delegate" => { + let p: DelegationPayload = serde_json::from_value(value)?; + Ok(Box::new(p)) + } + // Legacy hooks (tool_pre_invoke, prompt_pre_fetch, etc.) use typed payloads + // that mirror the Python Pydantic models + _ => { + // For unknown hooks, attempt generic conversion or error + Err(PyValueError::new_err(format!("Unknown hook: '{}'", hook_name))) + } + } +} +``` + +--- + +## Async Pattern + +```rust +use pyo3_async_runtimes::tokio::future_into_py; +use std::panic::AssertUnwindSafe; +use futures::FutureExt; + +#[pymethods] +impl PyPluginManager { + fn invoke_hook<'py>( + &self, + py: Python<'py>, + hook_name: &str, + payload: &Bound<'py, PyDict>, + extensions: Option<&Bound<'py, PyDict>>, + context_table: Option<&Bound<'py, PyDict>>, + ) -> PyResult> { + let manager = Arc::clone(&self.inner); + let hook_name = hook_name.to_string(); + + // Convert while holding GIL + let payload_value = pyobj_to_json_value(py, payload.as_any(), 0)?; + let rust_payload = resolve_payload(&hook_name, payload_value)?; + let rust_extensions = convert_extensions(py, extensions)?; + let rust_context = convert_context_table(py, context_table)?; + + // Release GIL, run Rust async + pyo3_async_runtimes::tokio::future_into_py(py, async move { + let result = AssertUnwindSafe(async { + manager.invoke_by_name(&hook_name, rust_payload, rust_extensions, rust_context).await + }) + .catch_unwind() + .await; + + match result { + Ok((pipeline_result, _bg_tasks)) => { + Python::with_gil(|py| pipeline_result_to_py(py, pipeline_result)) + } + Err(_) => Err(PyRuntimeError::new_err( + "Internal error: Rust panic during plugin execution" + )) + } + }) + } +} +``` + +--- + +## Deliverables + +### Required Files + +| Path | Purpose | +|------|---------| +| `bindings/python/Cargo.toml` | PyO3 cdylib crate | +| `bindings/python/build.rs` | macOS dynamic_lookup linker flag | +| `bindings/python/src/lib.rs` | Module definition | +| `bindings/python/src/manager.rs` | PyPluginManager | +| `bindings/python/src/conversions.rs` | Value traversal (no json module) | +| `bindings/python/src/error.rs` | PluginError → PyErr | +| `bindings/python/src/result.rs` | PyPipelineResult | +| `bindings/python/python/cpex/__init__.py` | Package re-exports | +| `bindings/python/python/cpex/_lib.pyi` | Stubs matching Rust exactly | +| `bindings/python/pyproject.toml` | maturin build config | +| `bindings/python/tests/` | Python test suite | +| `bindings/python/MIGRATION.md` | Legacy cpex → cpex guide | +| `Cargo.toml` (workspace root) | Add `bindings/python` to members | + +### Required Tests + +1. **test_conversions.py** — round-trip for dicts, nested structures, edge cases (empty, None, deep nesting) +2. **test_manager.py** — end-to-end: construct, initialize, invoke_hook with a Rust plugin, shutdown +3. **test_errors.py** — unknown hook, invalid payload, timeout, missing config +4. **test_result.py** — PipelineResult fields accessible, violation present, errors surfaced + +### Migration Guide (MIGRATION.md) + +Document the key differences: + +| Aspect | Legacy `cpex` | New `cpex` | +|--------|--------------|-------------------| +| Import | `from cpex.framework.manager import PluginManager` | `from cpex import PluginManager` (new Rust-backed package) | +| invoke_hook args | `(hook_type, payload, global_context, local_contexts, violations_as_exceptions, extensions)` | `(hook_name, payload_dict, extensions, context_table)` | +| Hook names | `"tool_pre_invoke"` | `"cmf.tool_pre_invoke"` (CMF) or `"tool_pre_invoke"` (legacy) | +| Return | `tuple[PluginResult, PluginContextTable]` | `PipelineResult` (single object) | +| Payload input | Pydantic model | dict | +| Context | GlobalContext + PluginContextTable separately | context_table dict (threaded through) | +| Extensions | Pydantic Extensions model | dict | +| Errors | Swallowed or raised depending on violations_as_exceptions | Always in `result.errors` (non-halting) or raised (halting) | + +--- + +## Anti-Patterns to Avoid (Lessons from PR #67) + +1. **Never try to match the legacy Python API.** The Rust API is canonical. +2. **Never call Python's json module from Rust.** Direct value traversal only. +3. **Never return sentinel dicts for errors.** Raise exceptions. +4. **Never expose heap addresses in repr/errors.** +5. **Never hand-write stubs that diverge from Rust signatures.** Omit rather than guess. +6. **Never mutate `os.environ` at module level in tests.** Use fixtures. +7. **Never raise at import time.** Defer errors to first use. +8. **Never discard PipelineResult.errors.** Surface them. +9. **This package IS the Rust backend.** No backend selection logic needed — `cpex` (from bindings/python) is always Rust-backed. +10. **Never modify `./cpex/`.** The legacy package is untouched. + +--- + +## Workspace Integration + +Add to root `Cargo.toml`: + +```toml +[workspace] +members = [ + # ... existing crates ... + "bindings/python", +] +``` + +Add Makefile targets: + +```makefile +## Python Bindings +bindings-python-build: ## Build cpex (debug) + cd bindings/python && maturin develop + +bindings-python-build-release: ## Build cpex (release) + cd bindings/python && maturin develop --release + +bindings-python-test: ## Test cpex + cd bindings/python && pytest tests/ +``` + +--- + +## Verification + +After implementation: + +1. `cargo build -p cpex-python` (or whatever the crate name) compiles +2. `cd bindings/python && maturin develop` installs the extension +3. `python -c "from cpex import PluginManager; print('ok')"` works +4. `cd bindings/python && pytest tests/` passes +5. The existing `pytest tests/` in repo root (legacy cpex tests) passes unchanged +6. `mypy bindings/python/python/cpex/` passes with stubs From 35fc54b12d5b103b3f37029d6d6eeaba2594013c Mon Sep 17 00:00:00 2001 From: Frederico Araujo Date: Fri, 12 Jun 2026 10:29:20 -0400 Subject: [PATCH 2/5] docs: minor update Signed-off-by: Frederico Araujo --- docs/dev/issue19_requirements.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/dev/issue19_requirements.md b/docs/dev/issue19_requirements.md index 911b279..f0cae6d 100644 --- a/docs/dev/issue19_requirements.md +++ b/docs/dev/issue19_requirements.md @@ -1,9 +1,5 @@ # Requirements: CPEX Python Bindings (Issue #19) -**This document supersedes `docs/specs/pyo3-bindings-plan.md`.** - ---- - ## Philosophy **The Rust implementation is the canonical CPEX.** The Python bindings expose the Rust API faithfully — they do not bend to match the legacy Python framework's conventions. The legacy `./cpex/` package remains untouched; users migrate to the new package at their own pace via a migration guide. From 0d5a1945190310e5361a4e29090c90b174381856 Mon Sep 17 00:00:00 2001 From: Frederico Araujo Date: Fri, 12 Jun 2026 12:50:47 -0400 Subject: [PATCH 3/5] docs: add issue 19 implementation plan Signed-off-by: Frederico Araujo --- docs/dev/issue19_implementation_plan.md | 191 ++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 docs/dev/issue19_implementation_plan.md diff --git a/docs/dev/issue19_implementation_plan.md b/docs/dev/issue19_implementation_plan.md new file mode 100644 index 0000000..cf8cc0d --- /dev/null +++ b/docs/dev/issue19_implementation_plan.md @@ -0,0 +1,191 @@ +# Implementation Plan: CPEX Python Bindings (Issue #19) + +## Context + +CPEX's canonical implementation is the Rust `cpex-core` runtime. Today, non-Rust +hosts reach it only through the C FFI crate (`crates/cpex-ffi`, used by the Go +demo). Python users are stuck on the legacy pure-Python framework at `./cpex/`, +which has diverged conventions (Pydantic payloads, 2-tuple returns, +`GlobalContext`, `violations_as_exceptions`). + +Issue #19 (requirements in `docs/dev/issue19_requirements.md`) asks for a **new, +standalone, Rust-backed Python package** at `bindings/python/` that wraps +`cpex_core::PluginManager` via PyO3. It exposes the Rust API *faithfully* (no +bending to legacy conventions), leaves `./cpex/` completely untouched, and ships +a migration guide. A prior attempt (PR #67) failed by trying to match the legacy +API and by calling Python's `json` module from Rust — the requirements doc +codifies those as anti-patterns. + +**Key finding (the requirements doc omits this):** the doc's constructor sketch +is incomplete. A `PluginManager` cannot instantiate any config-driven plugin +until its **factories are registered**. The existing `cpex-ffi` crate solves this +in `crates/cpex-ffi/src/apl.rs::cpex_apl_install` by depending on the `apl-*` +crates and registering each `KIND` factory + calling +`apl_cpex::register_apl(...)` *before* `load_config`. The Python binding must do +the same. + +**Confirmed decisions:** +- **Mirror `cpex-ffi` exactly** for bundled plugins — the default APL set + (pii-scanner, audit-logger, identity-jwt, delegator-oauth, cedar-direct PDP), + with the heavy `cedarling` backend behind an optional Cargo feature. +- **`GenericPayload` fallback** for any hook name outside `cmf.*` / + `identity_resolve` / `token_delegate` (faithful to the Rust core, where + `HookType::new` accepts any string; legacy/custom hooks "just work"). + +**Outcome:** `from cpex import PluginManager` (Rust-backed) works in its own venv; +`await manager.invoke_hook(...)` returns a single `PipelineResult`; the same YAML +configs that drive the Go FFI host drive Python identically. + +--- + +## Reference implementation to mirror + +`crates/cpex-ffi/` is the production-grade analog and the single best source of +truth. The PyO3 layer adapts its patterns, swapping the MessagePack/C-ABI wire +for direct PyObject↔serde_json traversal and `block_on` for +`future_into_py`. Key reference points: + +- Factory registration + `register_apl`: `crates/cpex-ffi/src/apl.rs:56` +- Construction sequence (default → register → `load_config_yaml` → `initialize`): + `crates/cpex-ffi/src/lib.rs` (`cpex_manager_new_default`, `cpex_load_config`, `cpex_initialize`) +- Payload INPUT dispatch (`deserialize_payload`): `crates/cpex-ffi/src/lib.rs:321` +- Payload OUTPUT downcast (`serialize_payload`): `crates/cpex-ffi/src/lib.rs:357` +- `PipelineResult` assembly + synthetic FFI error record on payload-serialize + failure: `crates/cpex-ffi/src/lib.rs:877` +- `GenericPayload` + `impl_plugin_payload!`: `crates/cpex-ffi/src/lib.rs:1357`, + `crates/cpex-core/src/hooks/payload.rs:118` + +Core API signatures confirmed during exploration: +- `PluginManager::default() -> PluginManager` (wrap in `Arc`); `manager.register_factory(kind, Box)` +- `load_config_yaml(self: &Arc, yaml: &str) -> Result<(), Box>` (runs config visitors — required for APL `apl:` blocks; plain `load_config` does not) +- `cpex_core::config::parse_config(yaml)` for upfront validation (good error messages) +- `async initialize(&self) -> Result<(), Box>`; `async shutdown(&self)` +- `async invoke_by_name(&self, hook_name: &str, payload: Box, extensions: Extensions, context_table: Option) -> (PipelineResult, BackgroundTasks)` +- `PipelineResult { continue_processing: bool, modified_payload: Option>, modified_extensions: Option, violation: Option, errors: Vec, metadata: Option, context_table: PluginContextTable }` +- `Extensions` and `PluginContextTable` are both `Serialize + Deserialize`. + +--- + +## Implementation + +### 1. Workspace + crate scaffolding + +- Add `"bindings/python"` to `members` in root `Cargo.toml` `[workspace]`. +- `bindings/python/Cargo.toml`: + - `[package] name = "cpex-python"` (matches doc's `cargo build -p cpex-python`). + - `[lib] name = "_lib"`, `crate-type = ["cdylib"]` (maturin `module-name = "cpex._lib"`). + - Deps: `cpex-core { path = "../../crates/cpex-core" }`, the same APL crates + `cpex-ffi` bundles (`apl-cpex`, `apl-pii-scanner`, `apl-audit-logger`, + `apl-identity-jwt`, `apl-delegator-oauth`, `apl-pdp-cedar-direct`; + `apl-cedarling` optional behind a `cedarling` feature), `serde`/`serde_json`/ + `tokio`/`futures` via `{ workspace = true }`, plus `pyo3` (with `extension-module` + as a non-default opt-in feature) and `pyo3-async-runtimes` (`tokio-runtime`). +- `bindings/python/build.rs`: on `cfg(target_os = "macos")` emit + `cargo:rustc-cdylib-link-arg=-undefined` / `dynamic_lookup` so + `cargo build -p cpex-python` (workspace build, extension-module off) links + against an absent libpython. (`cpex-ffi` needs no build.rs because it isn't a + Python extension; this crate does for the standalone cargo build path.) + +### 2. Rust PyO3 source (`bindings/python/src/`) + +- **`lib.rs`** — `#[pymodule] fn _lib(...)` exporting `PyPluginManager` and + `PyPipelineResult`. Initialize the `pyo3-async-runtimes` tokio runtime + (multi-thread, `enable_all`), mirroring `cpex-ffi`'s shared-runtime rationale. + No work at import time beyond registration (anti-pattern #7: never raise at import). + +- **`manager.rs`** — `PyPluginManager { inner: Arc }`. + - `#[new] fn new(config_path: &str)`: sync. `PluginManager::default()` → `Arc` → + `register_builtin_factories(&inner)` (the `apl.rs::cpex_apl_install` sequence: + `register_factory` for each `apl_*::KIND` + `apl_cpex::register_apl(&inner, AplOptions::in_process()` with cedar-direct `pdp_factory`) → read file to string + (`std::fs::read_to_string`, IO error → `ValueError`) → `parse_config` validate + (parse error → `ValueError`) → `inner.load_config_yaml(yaml)` (error → `ValueError`). + - `fn initialize<'py>(&self, py)` / `fn shutdown<'py>(&self, py)` / + `fn invoke_hook<'py>(...)`: all return `future_into_py` awaitables. + - `invoke_hook(hook_name, payload: &Bound, extensions=None, context_table=None)` + follows the doc's async pattern (convert while holding GIL → release → + `AssertUnwindSafe(...).catch_unwind()` → re-acquire GIL to build result). + Optional outer `tokio::time::timeout`; `Elapsed` → `PyTimeoutError`; caught + panic → `PyRuntimeError` ("Internal error: Rust panic during plugin execution"). + +- **`conversions.rs`** — NO Python `json` module (anti-pattern #2). + - `pyobj_to_json_value(py, obj, depth)` — direct `PyBool`/`int`/`float`/`str`/ + `PyList`/`PyDict` traversal → `serde_json::Value`; depth > 128 → `ValueError` + ("nesting exceeds 128 levels"); unconvertible type → `ValueError` naming the type. + - `json_value_to_pyobj(py, &Value)` — reverse traversal building `PyDict`/`PyList`. + - `resolve_payload(hook_name, Value) -> PyResult>`: + `cmf.*` → `MessagePayload`, `identity_resolve` → `IdentityPayload`, + `token_delegate` → `DelegationPayload`, **else `GenericPayload { value }`** + (confirmed fallback; `from_value` failures → descriptive `ValueError`). + - `serialize_payload(&dyn PluginPayload) -> PyResult`: + `as_any().downcast_ref` in order (MessagePayload, GenericPayload, + IdentityPayload, DelegationPayload) → `serde_json::to_value`. No match → signal + so caller records a synthetic error (see `result.rs`), never silently drop + (anti-pattern #8). + - `extensions` dict ↔ `Extensions` and `context_table` dict ↔ `PluginContextTable` + via `serde_json::from_value` / `to_value` (both are serde types). + +- **`result.rs`** — `#[pyclass] PyPipelineResult` with read-only getters exactly + mirroring Rust fields: `continue_processing: bool`, `modified_payload: + Optional[dict]`, `modified_extensions: Optional[dict]`, `violation: + Optional[dict]`, `errors: list[dict]`, `metadata: Optional[dict]`, + `context_table: dict`. `pipeline_result_to_py` builds it; if `modified_payload` + downcast fails, append a synthetic `{plugin_name:"", code:"py_serialize_error", ...}` + to `errors` (mirrors `cpex-ffi/src/lib.rs:877`). `repr` must never expose + pointers (anti-pattern #4). + +- **`error.rs`** — `PluginError`/`Box` → `PyErr` mapping (C2): + `Config`/`UnknownHook` → `ValueError`, `Timeout` → `TimeoutError`, + `Execution`/`Violation` and unexpected → `RuntimeError`. Helper used across modules. + +### 3. Python package (`bindings/python/python/cpex/`) + +- `__init__.py` — re-export `PluginManager`, `PipelineResult` from `cpex._lib`. +- `_lib.pyi` — stubs matching the Rust signatures exactly; omit anything uncertain + rather than guess (anti-pattern #5). + +### 4. Build config + packaging + +- `bindings/python/pyproject.toml` — exactly as doc C5 (maturin backend, + `module-name = "cpex._lib"`, `features = ["pyo3/extension-module"]`, + `python-source = "python"`, `requires-python = ">=3.10"`). +- `bindings/python/MIGRATION.md` — the legacy→new mapping table from the doc + (import path, `invoke_hook` args, hook names, tuple vs single result, dict vs + Pydantic payloads, context handling, error surfacing). +- `bindings/python/README.md` — install (`maturin develop`) + quickstart. + +### 5. Tests (`bindings/python/tests/`) + +Mirror legacy conventions (pytest + `pytest-asyncio`; `conftest.py` fixtures, no +module-level `os.environ` mutation — anti-pattern #6). Reuse an existing CMF +config (e.g. `examples/go-demo/cmf_plugins.yaml`) as a fixture so a real bundled +plugin runs. +- `test_conversions.py` — round-trip dicts/nested/empty/None/deep-nesting; >128 → ValueError. +- `test_manager.py` — construct → initialize → `invoke_hook` (CMF) → assert + `PipelineResult` fields → shutdown. +- `test_errors.py` — missing config file, bad YAML, invalid payload shape, timeout. +- `test_result.py` — all fields accessible; violation present; errors surfaced. + +### 6. Makefile + +Add `bindings-python-build` / `-build-release` / `-test` targets matching the +existing emoji + `.PHONY` style in the root `Makefile`. + +--- + +## Verification + +1. `cargo build -p cpex-python` compiles (workspace build, macOS dynamic_lookup via build.rs). +2. `cd bindings/python && maturin develop` installs the extension into a venv. +3. `python -c "from cpex import PluginManager; print('ok')"`. +4. `cd bindings/python && pytest tests/` passes. +5. Root `pytest tests/` (legacy `cpex`) still passes unchanged — `./cpex/` untouched (C4 / anti-pattern #10). +6. `mypy bindings/python/python/cpex/` passes against the stubs. +7. Spot-check: a `cmf.tool_pre_invoke` invoke with a policy-denying config returns + `continue_processing == False` with a populated `violation`, and a plugin + error surfaces in `result.errors` (not swallowed, not raised when non-halting). + +## Out of scope / untouched + +- `./cpex/` legacy package — zero changes. +- No backend-selection logic — this package is always Rust-backed (anti-pattern #9). +- `cedarling` identity/PDP — present only behind the optional Cargo feature. From 693afb2a08b8d1c792137777adc3c08ab08fc81e Mon Sep 17 00:00:00 2001 From: Frederico Araujo Date: Fri, 12 Jun 2026 13:16:53 -0400 Subject: [PATCH 4/5] docs: add ce implementation plan for issue 19 Signed-off-by: Frederico Araujo --- ...6-12-001-feat-cpex-python-bindings-plan.md | 503 ++++++++++++++++++ 1 file changed, 503 insertions(+) create mode 100644 docs/plans/2026-06-12-001-feat-cpex-python-bindings-plan.md diff --git a/docs/plans/2026-06-12-001-feat-cpex-python-bindings-plan.md b/docs/plans/2026-06-12-001-feat-cpex-python-bindings-plan.md new file mode 100644 index 0000000..f71e166 --- /dev/null +++ b/docs/plans/2026-06-12-001-feat-cpex-python-bindings-plan.md @@ -0,0 +1,503 @@ +--- +title: "feat: CPEX Python Bindings (PyO3, Rust-backed cpex package)" +type: feat +status: active +date: 2026-06-12 +origin: docs/dev/issue19_requirements.md +deepened: 2026-06-12 +--- + +# feat: CPEX Python Bindings (PyO3, Rust-backed cpex package) + +## Summary + +Build a new, standalone, Rust-backed Python package at `bindings/python/` that wraps +`cpex_core::PluginManager` via PyO3, exposing the Rust API faithfully (single `PipelineResult`, +dict payloads, `await`-based lifecycle). It mirrors the patterns of the existing C-FFI crate +`crates/cpex-ffi/` — factory registration, payload downcast-and-serialize, panic/timeout +isolation — while swapping the MessagePack/C-ABI wire for direct PyObject↔`serde_json` +traversal and `block_on` for `future_into_py`. The legacy pure-Python package at `./cpex/` is +left untouched. v1 supports CMF hooks (`MessagePayload`) and a `GenericPayload` fallback for all +other hook names; typed identity/delegation payloads are deferred to v2. + +--- + +## Problem Frame + +CPEX's canonical implementation is the Rust `cpex-core` runtime, but Python users can only reach +it through the legacy pure-Python framework at `./cpex/`, which has diverged conventions +(Pydantic payloads, 2-tuple returns, `GlobalContext`, `violations_as_exceptions`). Non-Rust hosts +otherwise reach the runtime only via the Go-oriented C FFI (`crates/cpex-ffi/`). A prior attempt +(PR #67) failed by trying to match the legacy Python API and by calling Python's `json` module +from Rust. Issue #19 asks for a thin, faithful PyO3 layer over the Rust contracts so Python users +get the canonical runtime with `from cpex import PluginManager`. + +A finding the origin requirements doc omits, surfaced during planning: a `PluginManager` cannot +instantiate any config-driven plugin until its **factories are registered**. `crates/cpex-ffi/src/apl.rs` +(`cpex_apl_install`) registers each `apl-*` `KIND` factory and calls `apl_cpex::register_apl(...)` +*before* loading config. The Python binding must replicate this or every `invoke_hook` returns an +empty allow with no plugins firing. + +--- + +## Requirements + +Traced to the origin doc's Hard Constraints (C1–C5), Anti-Patterns (#1–#10), Deliverables table, +and Required Tests. + +- R1. (C1, #1) Expose the Rust `PluginManager` API faithfully; do **not** adapt to the legacy Python API. +- R2. (C2, #3, #8) No silent failures at the FFI boundary: config/conversion errors → `ValueError`, + plugin execution failure → `RuntimeError`, timeout → `TimeoutError`; `PipelineResult.errors` is never discarded. +- R3. (C3, #4) Safety: all async blocks crossing FFI wrapped in `catch_unwind` (panic → `RuntimeError`); + value traversal capped at 128 levels (overflow → `ValueError`); never expose pointers in repr/errors. +- R4. (C4, #7, #10) Standalone package: `./cpex/` unmodified; independently installable; no import-time exceptions. +- R5. (C5) Self-contained maturin build (`module-name = "cpex._lib"`, `python-source = "python"`, `requires-python >= 3.10`). +- R6. (#2) Conversion uses direct PyObject↔`serde_json` traversal — never Python's `json` module from Rust. +- R7. Deliver every file in the origin Deliverables table (Rust src, Python package, pyproject, tests, MIGRATION.md, README.md, workspace + Makefile wiring). +- R8. Deliver the required test suites: `test_conversions.py`, `test_manager.py`, `test_errors.py`, `test_result.py`. +- R9. Provide a migration guide (`MIGRATION.md`) mapping legacy `cpex` → new Rust-backed `cpex`. + +**Origin acceptance examples:** AE1 — `python -c "from cpex import PluginManager"` succeeds (R4, R5). +AE2 — a `cmf.tool_pre_invoke` invoke against a denying config returns `continue_processing == False` +with a populated `violation` (R1, R2). AE3 — legacy root `pytest tests/` passes unchanged (R4). + +--- + +## Scope Boundaries + +- Not modifying `./cpex/` (legacy package) in any way — no `__init__.py` edits, no backend selection (#9, #10). +- No backend-selection / dual-mode logic — this package is always Rust-backed (#9). +- Not re-exposing every Rust extension/PDP knob; only the `PluginManager` lifecycle + `invoke_hook` contract. + +### Deferred to Follow-Up Work + +- **Typed identity/delegation payloads** (`identity_resolve` → `IdentityPayload`, `token_delegate` → + `DelegationPayload`): deferred to v2. Their secret token fields (`raw_token`, `bearer_token`) are + `#[serde(skip)]`, so dict→serde construction silently drops the token (see Key Technical Decisions + KD1). v1 routes these hooks through `GenericPayload`, preserving the raw dict. v2 will add typed + constructors and a token-injection path (kwarg or `Extensions.raw_credentials`, mirroring + `cpex-ffi`'s `cpex_invoke_resolved` at `crates/cpex-ffi/src/lib.rs:961`). +- **Cedarling-backed identity/PDP**: present only behind an optional `cedarling` Cargo feature (off by default), mirroring `cpex-ffi`. +- **`result.wait_background()` / explicit background-task handle**: v1 relies on `shutdown()` to drain + fire-and-forget tasks (KD4). A per-call wait API can follow if needed. + +--- + +## Context & Research + +### Relevant Code and Patterns + +- `crates/cpex-ffi/` — the production reference this plan mirrors. Key sites: + - Factory registration + `register_apl`: `crates/cpex-ffi/src/apl.rs:56` + - Shared tokio runtime w/ `CPEX_FFI_WORKER_THREADS` knob + rationale: `crates/cpex-ffi/src/lib.rs:117` + - Payload downcast-and-serialize (`serialize_payload`): `crates/cpex-ffi/src/lib.rs:357` + - Synthetic error record on payload-serialize failure: `crates/cpex-ffi/src/lib.rs:877` + - `GenericPayload` + `impl_plugin_payload!`: `crates/cpex-ffi/src/lib.rs:1357` (struct is **local**, not exported from core) + - Fixed wall-clock timeout constant `FFI_WALL_CLOCK_TIMEOUT`: `crates/cpex-ffi/src/lib.rs:115` +- `cpex-core` verified API (against source): + - `PluginManager::default()` (wrap in `Arc`); `register_factory(&self, kind, Box)` `crates/cpex-core/src/manager.rs:456` + - `load_config_yaml(self: &Arc, yaml: &str) -> Result<(), Box>` `crates/cpex-core/src/manager.rs:556` (runs config visitors — required for APL; passes `Arc::clone(self)` to visitors at `:591`) + - `parse_config(yaml) -> Result>` `crates/cpex-core/src/config.rs:567` + - `async initialize(&self)` `:812`; `async shutdown(&self)` `:876` (drains `TaskTracker` at `:889`); `async invoke_by_name(&self, &str, Box, Extensions, Option) -> (PipelineResult, BackgroundTasks)` `:932` + - `Extensions` derives `Default + Serialize + Deserialize`; data fields `#[serde(default, skip_serializing_if=...)]`, `WriteToken` fields `#[serde(skip)]` — partial dicts deserialize safely `crates/cpex-core/src/extensions/container.rs:48` + - `impl_plugin_payload!` macro is exported from core `crates/cpex-core/src/hooks/payload.rs:118` + - Fire-and-forget tasks spawn on the manager's `TaskTracker` `crates/cpex-core/src/executor.rs:980`; dropping `BackgroundTasks` (`executor.rs:175`) detaches handles but does not cancel +- Bundled APL plugin kinds (verified) usable in test fixtures: + - `validator/pii-scan` (`crates/apl-pii-scanner/src/factory.rs:20`) — registers on `cmf.tool_pre_invoke`, emits `pii.detected` violation in `deny` mode (`crates/apl-pii-scanner/src/scanner.rs:197`) + - `audit/logger` (`crates/apl-audit-logger/src/factory.rs:20`) — fire-and-forget + - `identity/jwt` (`crates/apl-identity-jwt/src/factory.rs:42`), `delegator/oauth` (`crates/apl-delegator-oauth/src/factory.rs:41`) +- Legacy test conventions to mirror: `tests/pytest.ini`, `tests/unit/cpex/conftest.py` (autouse reset fixtures, no module-level `os.environ` mutation). + +### Institutional Learnings + +- PR #67 anti-patterns (origin doc §Anti-Patterns) are the governing "do not repeat" list; each is mapped to a requirement above. + +### External References + +- `pyo3-async-runtimes` latest is **0.28.0** (2026-02-04), requires `pyo3 ^0.28`. Pin both to `0.28` with the `tokio-runtime` feature. + +--- + +## Key Technical Decisions + +- **KD1 (resolves B1):** Defer typed identity/delegation to v2; route `identity_resolve` / `token_delegate` + through `GenericPayload` in v1. Rationale: `raw_token`/`bearer_token` are `#[serde(skip)]`, so serde + construction yields tokenless payloads — silent no-ops. `cpex-ffi` itself doesn't dispatch delegation and + handles identity via a separate raw-creds entry point. +- **KD2 (resolves B3 / reconciles C2):** Unknown/legacy/custom hook names do **not** raise — they map to + `GenericPayload`, faithful to `cpex_core` where `invoke_by_name` accepts any hook string and never emits + `UnknownHook`. The origin's C2 "Unknown hook → ValueError" row is **explicitly retracted as unreachable**; + the `ValueError` guarantee instead covers conversion failures (incl. a `GenericPayload` dict that fails + `from_value`) and config errors. `test_errors.py` tests *conversion failure*, not "unknown hook". +- **KD3 (resolves B2):** `pyo3/extension-module` is an opt-in (non-default) feature enabled only by maturin. + `build.rs` emits `-undefined dynamic_lookup` on macOS **only when `CARGO_FEATURE_EXTENSION_MODULE` is unset** + (avoids double-flag under maturin). To keep the pure-Rust workspace build independent of libpython, the + Makefile's `rust-build`/`rust-test` exclude this crate (`--workspace --exclude cpex-python`); it is built + and tested via the `bindings-python-*` (maturin) targets. `cargo build -p cpex-python` remains the explicit + verification path on a machine with Python present. +- **KD4 (resolves M1):** `invoke_hook` returns only `PipelineResult` and drops `BackgroundTasks`; fire-and-forget + tasks keep running on the manager's `TaskTracker` and are guaranteed flushed by `await shutdown()`. The + deterministic AE2 violation assertion uses the **sequential** `validator/pii-scan` plugin (no raciness); + fire-and-forget behavior is asserted only after `shutdown()`. Documented in README/MIGRATION. +- **KD5 (resolves M2):** Define a crate-local `GenericPayload { value: serde_json::Value }` + `cpex_core::impl_plugin_payload!` + (the macro is exported from core; the struct is not). House factory registration in a dedicated `builtins.rs`, + not inlined in `#[new]`. +- **KD6 (resolves M3):** Pin `pyo3 = "0.28"`, `pyo3-async-runtimes = { version = "0.28", features = ["tokio-runtime"] }`. + Verify the resolved `tokio` lower bound stays `>= 1.51` so the workspace lockfile (shared with `cpex-ffi`) does not regress. +- **KD7 (resolves M4):** Wall-clock timeout is **not optional** — a fixed constant mirroring `FFI_WALL_CLOCK_TIMEOUT`; + `Elapsed` → `TimeoutError`. +- **KD8 (resolves M5):** Initialize the `pyo3-async-runtimes` tokio runtime with an explicit multi-thread builder + honoring a `CPEX_PY_WORKER_THREADS` env var (mirroring `worker_threads_from_env` at `crates/cpex-ffi/src/lib.rs:148`). + Do not claim it "reuses cpex-ffi's runtime" — it is a separate runtime; the shared-thread-budget *philosophy* is mirrored, the runtime instance is not. +- **KD9 (resolves M6):** In `error.rs`, document that `PluginError::Violation` is unreachable via `invoke_by_name` + (denials return as `Ok(PipelineResult{ continue_processing:false, violation })`, never raised). Keep a defensive + mapping but comment it as dead-on-this-path. +- **KD10 (resolves L2):** Test fixtures use bundled APL kinds (`validator/pii-scan` deny-mode for AE2; `audit/logger` + for fire-and-forget) — **not** the Go-demo-only `builtin/cmf-tool-policy`. +- **KD11 (resolves L3):** The new package shares the import name `cpex` with the legacy package; enforce a hard rule + that it is only ever installed in its own venv, and add a guard test asserting `cpex._lib` is importable so a + polluted `sys.path` fails loudly rather than silently importing legacy `./cpex/`. + +--- + +## Open Questions + +### Resolved During Planning + +- Identity/delegation v1 handling → deferred (KD1). +- Unknown-hook semantics vs C2 → GenericPayload fallback, C2 row retracted (KD2). +- Workspace/libpython build conflict → off-by-default feature + Makefile exclude (KD3). +- Which plugin backs the CMF smoke test → `validator/pii-scan` (KD10). +- pyo3 version → 0.28 (KD6). + +### Deferred to Implementation + +- Exact `pyo3-async-runtimes` 0.28 runtime-init call (`tokio::init` / builder hook) — confirm against the 0.28 API at implementation time. +- Final value of the wall-clock timeout constant (match `cpex-ffi`'s value unless tests need otherwise). +- Whether `_lib.pyi` can express the awaitable return types precisely or should omit (per #5, omit rather than guess). + +--- + +## Output Structure + + bindings/python/ + ├── Cargo.toml # crate cpex-python, cdylib, lib name _lib + ├── build.rs # macOS dynamic_lookup, gated on extension-module off + ├── src/ + │ ├── lib.rs # #[pymodule] _lib; runtime init (CPEX_PY_WORKER_THREADS) + │ ├── manager.rs # PyPluginManager: #[new], initialize/shutdown/invoke_hook + │ ├── builtins.rs # register_builtin_factories + register_apl (cedarling-gated) + │ ├── conversions.rs # pyobj<->json, resolve_payload, serialize_payload, GenericPayload + │ ├── error.rs # PluginError -> PyErr + │ └── result.rs # PyPipelineResult + pipeline_result_to_py + ├── python/ + │ └── cpex/ + │ ├── __init__.py # re-export PluginManager, PipelineResult from cpex._lib + │ └── _lib.pyi # type stubs + ├── pyproject.toml # maturin build config + ├── tests/ + │ ├── conftest.py # fixtures (manager, fixture-config path); no os.environ mutation + │ ├── fixtures/ + │ │ └── pii_deny.yaml # validator/pii-scan deny on cmf.tool_pre_invoke + audit/logger + │ ├── test_conversions.py + │ ├── test_manager.py + │ ├── test_errors.py + │ └── test_result.py + ├── MIGRATION.md + └── README.md + +--- + +## High-Level Technical Design + +> *This illustrates the intended approach and is directional guidance for review, not implementation specification. The implementing agent should treat it as context, not code to reproduce.* + +``` +invoke_hook(hook_name, payload: dict, extensions=None, context_table=None) -> awaitable[PipelineResult] + + [GIL held] + payload_value = pyobj_to_json_value(payload, depth=0) # R6, depth<=128 (R3) + rust_payload = resolve_payload(hook_name, payload_value) # cmf.* -> MessagePayload; else GenericPayload (KD1,KD2) + rust_extensions = from_value::(extensions or {}) + rust_context = from_value::>(context_table) + manager = Arc::clone(&self.inner) # owned clone into future (lifetime; APL Weak upgrade) + [GIL released] future_into_py: + timeout(FFI_WALL_CLOCK_TIMEOUT, # KD7 -> Elapsed => TimeoutError + catch_unwind(AssertUnwindSafe( # R3 -> panic => RuntimeError + manager.invoke_by_name(hook_name, rust_payload, rust_extensions, rust_context)))) + -> (pipeline_result, _bg_tasks) # bg dropped; flushed on shutdown() (KD4) + [GIL re-acquired] + pipeline_result_to_py(pipeline_result) # downcast modified_payload; synthetic error on failure (R2) +``` + +Construction (`#[new]`, sync): `PluginManager::default()` → `Arc` → `builtins::register_builtin_factories(&arc)` +(register each `apl-*::KIND` + `apl_cpex::register_apl(&arc, AplOptions::in_process() with cedar-direct pdp)`) → +`read_to_string(config_path)` (IO err → `ValueError`) → `parse_config` (parse err → `ValueError`) → +`arc.load_config_yaml(yaml)` (err → `ValueError`). Order matters: factories + `register_apl` must run on the +**same Arc** later passed to `load_config_yaml` so the APL visitor's `Weak` upgrades during load. + +--- + +## Implementation Units + +- U1. **Workspace, crate scaffolding & build wiring** + +**Goal:** Create the `cpex-python` crate skeleton, wire it into the workspace and build without breaking the pure-Rust build. + +**Requirements:** R5, R7; KD3, KD6. + +**Dependencies:** None. + +**Files:** +- Create: `bindings/python/Cargo.toml` (`[package] name="cpex-python"`; `[lib] name="_lib"`, `crate-type=["cdylib"]`; deps `cpex-core` + the APL set [`apl-cpex`, `apl-pii-scanner`, `apl-audit-logger`, `apl-identity-jwt`, `apl-delegator-oauth`, `apl-pdp-cedar-direct`; `apl-cedarling` optional under `cedarling`]; `serde`/`serde_json`/`tokio`/`futures` via `{workspace=true}`; `pyo3="0.28"` with non-default `extension-module` feature; `pyo3-async-runtimes="0.28"` `["tokio-runtime"]`) +- Create: `bindings/python/build.rs` (macOS `-undefined dynamic_lookup`, gated on `CARGO_FEATURE_EXTENSION_MODULE` unset — KD3) +- Modify: `Cargo.toml` (root) — add `"bindings/python"` to `[workspace] members` +- Modify: `Makefile` — `rust-build`/`rust-test` use `--workspace --exclude cpex-python`; add `bindings-python-build`/`-build-release`/`-test` (maturin) targets matching existing emoji + `.PHONY` style + +**Approach:** Crate compiles as an empty `#[pymodule]` first to validate the build matrix before logic lands. + +**Patterns to follow:** `crates/cpex-ffi/Cargo.toml` (APL dep set, optional `cedarling`); existing `Makefile` Rust targets (`Makefile:444`). + +**Test scenarios:** Test expectation: none — scaffolding/config. Verified by build, not unit tests. + +**Verification:** `cargo build -p cpex-python` succeeds (macOS); `make rust-build` succeeds and does **not** attempt the python crate; `cargo metadata` shows resolved `tokio >= 1.51` (KD6). + +--- + +- U2. **Error mapping (`error.rs`)** + +**Goal:** Map `Box` and FFI-boundary failures to Python exceptions per C2. + +**Requirements:** R2; KD9. + +**Dependencies:** U1. + +**Files:** +- Create: `bindings/python/src/error.rs` + +**Approach:** `Config`/`UnknownHook` → `ValueError`; `Timeout` → `TimeoutError`; `Execution`/unexpected → `RuntimeError`. +Comment that `PluginError::Violation` is unreachable on the `invoke_by_name` path (KD9). Provide a single +`plugin_error_to_pyerr` helper reused across modules. Never include pointers/`{:p}` in messages (R3). + +**Patterns to follow:** `crates/cpex-core/src/error.rs` variant shapes; `crates/cpex-ffi/src/lib.rs` RC mapping intent. + +**Test scenarios:** (covered via U7 `test_errors.py`) +- Error path: each variant maps to the documented Python exception type (asserted end-to-end in U7). + +**Verification:** Helper compiles and is referenced by manager/conversions; no pointer formatting present. + +--- + +- U3. **Value conversion & payload dispatch (`conversions.rs`)** + +**Goal:** Direct PyObject↔`serde_json` traversal, payload resolution, and modified-payload serialization — no Python `json`. + +**Requirements:** R1, R2, R3, R6; KD1, KD2, KD5. + +**Dependencies:** U2. + +**Files:** +- Create: `bindings/python/src/conversions.rs` + +**Approach:** +- `pyobj_to_json_value(py, obj, depth)`: `bool`/`int`/`float`/`str`/`None`/`PyList`/`PyDict` → `Value`; `depth > 128` → `ValueError` (R3); unknown type → `ValueError` naming the type. +- `json_value_to_pyobj(py, &Value)`: reverse, building `PyDict`/`PyList`. +- Define local `GenericPayload { value }` + `cpex_core::impl_plugin_payload!` (KD5). +- `resolve_payload(hook_name, value)`: `cmf.*` → `MessagePayload`; **else** `GenericPayload { value }` (KD1, KD2). `from_value` failure → descriptive `ValueError`. +- `serialize_payload(&dyn PluginPayload) -> Option`: `downcast_ref` ordered (MessagePayload, GenericPayload) → `to_value`; `None` signals "unknown type" so the caller records a synthetic error (R2, #8). +- `Extensions` and `PluginContextTable` via `from_value`/`to_value` (both serde types). + +**Patterns to follow:** origin doc conversion sketches; `crates/cpex-ffi/src/lib.rs:357` downcast ordering; `crates/cpex-core/src/extensions/container.rs:48` serde attrs. + +**Test scenarios:** (covered via U7 `test_conversions.py`) +- Happy path: round-trip flat dict, nested dict/list, mixed scalar types (bool/int/float/str/None). +- Edge case: empty dict, empty list, `None` value, dict with non-str... (str keys only — assert non-str key → `ValueError`). +- Edge case: nesting exactly 128 deep succeeds; 129 deep → `ValueError`. +- Error path: unconvertible Python object (e.g. a set or custom object) → `ValueError` naming the type. + +**Verification:** Round-trip tests pass; depth guard triggers at 129. + +--- + +- U4. **`PyPluginManager` + factory registration + async lifecycle (`manager.rs`, `builtins.rs`, `lib.rs`)** + +**Goal:** Construct the manager with bundled factories, expose `initialize`/`shutdown`/`invoke_hook` as awaitables, with panic + timeout isolation. + +**Requirements:** R1, R2, R3, R4; KD3, KD4, KD5, KD7, KD8. + +**Dependencies:** U2, U3. + +**Files:** +- Create: `bindings/python/src/manager.rs` +- Create: `bindings/python/src/builtins.rs` +- Create: `bindings/python/src/lib.rs` + +**Approach:** +- `lib.rs`: `#[pymodule] fn _lib(...)` registering `PyPluginManager` + `PyPipelineResult`; initialize the `pyo3-async-runtimes` tokio runtime with a multi-thread builder honoring `CPEX_PY_WORKER_THREADS` (KD8). No work at import time beyond registration (R4, #7). +- `builtins.rs`: `register_builtin_factories(&Arc)` = the `crates/cpex-ffi/src/apl.rs:56` sequence (`register_factory` per `apl-*::KIND` + `apl_cpex::register_apl(&arc, AplOptions::in_process()` with cedar-direct `pdp_factory`)). `apl-cedarling` wiring `#[cfg(feature="cedarling")]`. +- `manager.rs`: `PyPluginManager { inner: Arc }`. `#[new] fn new(config_path)` sync per the design's construction sequence (factories on the **same Arc** later loaded — preserves APL `Weak` upgrade). `initialize`/`shutdown`/`invoke_hook` return `future_into_py` awaitables; each clones `Arc` into the future (lifetime + APL upgrade). `invoke_hook` follows the design sketch: convert under GIL → release → `timeout(catch_unwind(...))` → re-acquire GIL → `pipeline_result_to_py`. Drop `_bg_tasks` (KD4). + +**Execution note:** Land an end-to-end "construct → initialize → invoke → shutdown" integration test early (U7) to exercise the GIL/runtime boundary before refining conversions. + +**Patterns to follow:** `crates/cpex-ffi/src/apl.rs` (registration), `crates/cpex-ffi/src/lib.rs:117` (runtime/env knob), origin doc async pattern. + +**Test scenarios:** (covered via U7 `test_manager.py` / `test_errors.py`) +- Happy path: construct from fixture config, `await initialize()`, `await invoke_hook("cmf.tool_pre_invoke", ...)`, `await shutdown()`. +- Integration: `validator/pii-scan` deny config → `continue_processing == False` with `violation` (AE2). +- Error path: missing config file → `ValueError`; malformed YAML → `ValueError`. +- Edge case: `invoke_hook` with a non-CMF hook name routes through GenericPayload and returns a result (no raise) (KD2). + +**Verification:** AE1 (`from cpex import PluginManager`) and AE2 hold; no import-time exception; `CPEX_PY_WORKER_THREADS` respected. + +--- + +- U5. **`PyPipelineResult` (`result.rs`)** + +**Goal:** Expose `PipelineResult` fields read-only as Python types, surfacing errors faithfully. + +**Requirements:** R1, R2, R3; KD9. + +**Dependencies:** U3. + +**Files:** +- Create: `bindings/python/src/result.rs` + +**Approach:** `#[pyclass] PyPipelineResult` getters: `continue_processing: bool`, `modified_payload: Optional[dict]`, +`modified_extensions: Optional[dict]`, `violation: Optional[dict]`, `errors: list[dict]`, `metadata: Optional[dict]`, +`context_table: dict`. `pipeline_result_to_py` builds it; if `serialize_payload` returns `None` for a modified payload, +append a synthetic `{plugin_name:"", code:"py_serialize_error", ...}` to `errors` (R2, #8; +mirrors `crates/cpex-ffi/src/lib.rs:877`). `__repr__` must not expose pointers (R3). + +**Patterns to follow:** `crates/cpex-core/src/executor.rs` `PipelineResult` shape; `crates/cpex-ffi/src/lib.rs:877` synthetic-error pattern. + +**Test scenarios:** (covered via U7 `test_result.py`) +- Happy path: all seven fields accessible with correct types after a real invoke. +- Integration: deny result exposes `violation` dict with `{reason, description, code, details}`; `continue_processing False`. +- Error path: a plugin run with `on_error: ignore` surfaces an entry in `errors` (not raised, not dropped). +- Edge case: `__repr__` contains no `0x`/pointer-like substrings. + +**Verification:** `test_result.py` asserts field access, violation presence, and error surfacing. + +--- + +- U6. **Python package surface (`__init__.py`, `_lib.pyi`, `pyproject.toml`)** + +**Goal:** Importable `cpex` package re-exporting the native module, with stubs and maturin config. + +**Requirements:** R4, R5, R7; #5. + +**Dependencies:** U4, U5. + +**Files:** +- Create: `bindings/python/python/cpex/__init__.py` (re-export `PluginManager`, `PipelineResult` from `cpex._lib`) +- Create: `bindings/python/python/cpex/_lib.pyi` (stubs matching Rust signatures; omit uncertain types rather than guess — #5) +- Create: `bindings/python/pyproject.toml` (maturin backend; `module-name="cpex._lib"`; `features=["pyo3/extension-module"]`; `python-source="python"`; `requires-python>=3.10`) + +**Approach:** Minimal `__init__.py`; no import-time side effects (R4, #7). + +**Patterns to follow:** origin doc C5 pyproject block. + +**Test scenarios:** +- Happy path (covered in U7): `from cpex import PluginManager, PipelineResult` succeeds (AE1). + +**Verification:** `maturin develop` installs; `python -c "from cpex import PluginManager"` works; `mypy bindings/python/python/cpex/` passes. + +--- + +- U7. **Test fixtures + suite** + +**Goal:** Implement the four required test modules + a bundled-APL fixture config and guard test. + +**Requirements:** R8; KD4, KD10, KD11. + +**Dependencies:** U4, U5, U6. + +**Files:** +- Create: `bindings/python/tests/conftest.py` (manager fixture; fixture-config path; reset between tests; no module-level `os.environ` mutation — #6) +- Create: `bindings/python/tests/fixtures/pii_deny.yaml` (`validator/pii-scan` `mode: sequential`, deny on `cmf.tool_pre_invoke`; `audit/logger` fire-and-forget) — KD10 +- Create: `bindings/python/tests/test_conversions.py`, `test_manager.py`, `test_errors.py`, `test_result.py` + +**Approach:** `test_manager.py` includes the AE2 deny assertion via the **sequential** pii-scanner (deterministic, KD4) and a separate fire-and-forget audit assertion performed only after `await shutdown()`. Add a guard test asserting `cpex._lib` is importable / `cpex.__file__` resolves to the extension (KD11). `test_errors.py` tests conversion failure (KD2), missing config, malformed YAML, and **timeout** (KD7). + +**Patterns to follow:** `tests/unit/cpex/conftest.py` fixture style; `tests/pytest.ini` config. + +**Test scenarios:** (this unit *is* the tests; scenarios enumerated per U3/U4/U5 above plus:) +- Error path: `invoke_hook` exceeding the wall-clock timeout → `TimeoutError` (KD7) — use a fixture plugin/config that stalls, or a very low `CPEX_*` override if available. +- Integration: after `shutdown()`, the fire-and-forget `audit/logger` side effect is observed (KD4). +- Edge case: importing `cpex` resolves to the extension, not legacy `./cpex/` (KD11). + +**Verification:** `cd bindings/python && pytest tests/` passes in an isolated venv. + +--- + +- U8. **Migration guide & README** + +**Goal:** Document the legacy→new mapping and quickstart, including the v1 scope caveats. + +**Requirements:** R9, R7; KD1, KD4, KD11. + +**Dependencies:** U6. + +**Files:** +- Create: `bindings/python/MIGRATION.md` (the origin doc's legacy↔new mapping table: import path, `invoke_hook` args, hook names, tuple vs single result, dict vs Pydantic payloads, context handling, error surfacing) +- Create: `bindings/python/README.md` (install via `maturin develop`, quickstart, the shutdown-flush contract (KD4), the separate-venv rule (KD11), and the v1 identity/delegation-deferred note (KD1)) + +**Approach:** Prose only; no code changes. + +**Test scenarios:** Test expectation: none — documentation. + +**Verification:** Links resolve; mapping table matches the implemented API surface. + +--- + +## System-Wide Impact + +- **Interaction graph:** New crate consumes `cpex-core` + APL crates; fire-and-forget plugins run on the manager `TaskTracker`, drained by `shutdown()`. `invoke_hook`'s future holds an owned `Arc` clone (lifetime + APL `Weak` upgrade). +- **Error propagation:** Halting denials → `PipelineResult.violation` (not raised); non-halting plugin errors → `PipelineResult.errors`; only config/conversion/timeout/panic raise. +- **State lifecycle risks:** Dropping `BackgroundTasks` is safe (detach, not cancel) but completion is only guaranteed across `shutdown()`; tests assert fire-and-forget effects post-shutdown. +- **API surface parity:** `invoke_hook` mirrors `invoke_by_name`; no legacy 2-tuple/`GlobalContext` surface (R1). +- **Build/CI impact:** Workspace gains a maturin crate; `make rust-build`/`rust-test` exclude it; pure-Rust CI stays libpython-independent (KD3). Lockfile shares `tokio` with `cpex-ffi` — must not regress below 1.51 (KD6). +- **Unchanged invariants:** `./cpex/` legacy package and its root `pytest tests/` are untouched and must still pass (R4, AE3). + +--- + +## Risks & Dependencies + +| Risk | Mitigation | +|------|------------| +| Linux `cargo build --workspace` tries to link libpython and fails | `extension-module` off by default + Makefile excludes the crate from pure-Rust targets; built via maturin (KD3) | +| `pyo3-async-runtimes` 0.28 runtime-init API differs from assumption | Deferred-to-implementation: confirm exact `tokio::init`/builder call against 0.28 at build time | +| Workspace `tokio` downgrade from pyo3-async-runtimes transitive bound | Verify resolved `tokio >= 1.51` via `cargo metadata` in U1 (KD6) | +| Fire-and-forget audit test flakiness | AE2 uses sequential pii-scanner; audit asserted only post-`shutdown()` (KD4) | +| Import-name collision with legacy `./cpex/` | Separate-venv hard rule + guard test asserting `cpex._lib` resolves (KD11) | +| Double `-undefined dynamic_lookup` under maturin | build.rs gates the flag on `CARGO_FEATURE_EXTENSION_MODULE` unset (KD3) | + +--- + +## Documentation / Operational Notes + +- README documents the `CPEX_PY_WORKER_THREADS` knob, the `shutdown()`-flush contract, and the isolated-venv requirement. +- MIGRATION.md is the primary onboarding artifact for legacy `cpex` users. + +--- + +## Verification + +1. `cargo build -p cpex-python` compiles (macOS dynamic_lookup via build.rs); `make rust-build` succeeds without touching the python crate. +2. `cd bindings/python && maturin develop` installs the extension into a venv. +3. `python -c "from cpex import PluginManager; print('ok')"` (AE1). +4. `cd bindings/python && pytest tests/` passes (R8) in an isolated venv. +5. Root `pytest tests/` (legacy `cpex`) passes unchanged — `./cpex/` untouched (R4, AE3). +6. `mypy bindings/python/python/cpex/` passes against the stubs. +7. AE2 spot-check: a `cmf.tool_pre_invoke` invoke against the `validator/pii-scan` deny fixture returns + `continue_processing == False` with a populated `violation`; an `on_error: ignore` plugin error surfaces in `result.errors`. + +--- + +## Sources & References + +- **Origin document:** [docs/dev/issue19_requirements.md](docs/dev/issue19_requirements.md) +- Reference crate: `crates/cpex-ffi/` (esp. `src/apl.rs`, `src/lib.rs`) +- Core API: `crates/cpex-core/src/manager.rs`, `executor.rs`, `extensions/container.rs`, `config.rs`, `hooks/payload.rs` +- Bundled APL kinds: `crates/apl-pii-scanner/src/factory.rs`, `crates/apl-audit-logger/src/factory.rs`, `crates/apl-identity-jwt/src/factory.rs`, `crates/apl-delegator-oauth/src/factory.rs` +- External: `pyo3-async-runtimes` 0.28.0 (requires `pyo3 ^0.28`) From d158db55a32b585161c36365e66fb2f14117aeca Mon Sep 17 00:00:00 2001 From: habeck Date: Mon, 15 Jun 2026 09:54:38 -0400 Subject: [PATCH 5/5] feat(bindings/python): add PyO3 native bindings for cpex-core PluginManager MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces `bindings/python/` — a new maturin-built Rust crate (`cpex-python`) that wraps `cpex_core::PluginManager` via PyO3 0.29, exposing a native Python `cpex` package backed entirely by the Rust runtime. Key design points: - `PluginManager(config_path)` — sync constructor; registers APL factories then loads YAML config (order critical for APL Weak upgrade) - `initialize()`, `shutdown()`, `invoke_hook()` — async via pyo3-async-runtimes `future_into_py`; all guarded by a 60 s wall-clock timeout (PY_WALL_CLOCK_TIMEOUT) - CMF hooks route payloads through `MessagePayload`; all others use a local `GenericPayload` - `PipelineResult` exposes 7 read-only Python getters; `__repr__` is pointer-free - `cpex-python` is in `[workspace] members` but not `default-members`, so all pure-Rust CI targets remain libpython-independent (`--exclude cpex-python`) - Makefile gains `bindings-python-{build,build-release,test}` targets - 22 unit/integration tests; all pass alongside 2058 legacy tests (AE3) Signed-off-by: habeck Signed-off-by: habeck --- Cargo.lock | 104 +++++++++ Cargo.toml | 4 + Makefile | 45 +++- bindings/python/Cargo.toml | 59 +++++ bindings/python/MIGRATION.md | 115 ++++++++++ bindings/python/README.md | 149 +++++++++++++ bindings/python/build.rs | 23 ++ bindings/python/pyproject.toml | 27 +++ bindings/python/python/cpex/__init__.py | 12 ++ bindings/python/python/cpex/_lib.pyi | 40 ++++ bindings/python/src/builtins.rs | 57 +++++ bindings/python/src/conversions.rs | 215 +++++++++++++++++++ bindings/python/src/error.rs | 54 +++++ bindings/python/src/lib.rs | 71 ++++++ bindings/python/src/manager.rs | 184 ++++++++++++++++ bindings/python/src/result.rs | 215 +++++++++++++++++++ bindings/python/tests/conftest.py | 36 ++++ bindings/python/tests/fixtures/pii_deny.yaml | 32 +++ bindings/python/tests/test_conversions.py | 103 +++++++++ bindings/python/tests/test_errors.py | 57 +++++ bindings/python/tests/test_manager.py | 98 +++++++++ bindings/python/tests/test_result.py | 97 +++++++++ 22 files changed, 1792 insertions(+), 5 deletions(-) create mode 100644 bindings/python/Cargo.toml create mode 100644 bindings/python/MIGRATION.md create mode 100644 bindings/python/README.md create mode 100644 bindings/python/build.rs create mode 100644 bindings/python/pyproject.toml create mode 100644 bindings/python/python/cpex/__init__.py create mode 100644 bindings/python/python/cpex/_lib.pyi create mode 100644 bindings/python/src/builtins.rs create mode 100644 bindings/python/src/conversions.rs create mode 100644 bindings/python/src/error.rs create mode 100644 bindings/python/src/lib.rs create mode 100644 bindings/python/src/manager.rs create mode 100644 bindings/python/src/result.rs create mode 100644 bindings/python/tests/conftest.py create mode 100644 bindings/python/tests/fixtures/pii_deny.yaml create mode 100644 bindings/python/tests/test_conversions.py create mode 100644 bindings/python/tests/test_errors.py create mode 100644 bindings/python/tests/test_manager.py create mode 100644 bindings/python/tests/test_result.py diff --git a/Cargo.lock b/Cargo.lock index b4d53b2..cd06640 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -836,6 +836,27 @@ dependencies = [ "tokio", ] +[[package]] +name = "cpex-python" +version = "0.2.0" +dependencies = [ + "apl-audit-logger", + "apl-cedarling", + "apl-cpex", + "apl-delegator-oauth", + "apl-identity-jwt", + "apl-pdp-cedar-direct", + "apl-pii-scanner", + "cpex-core", + "futures", + "pyo3", + "pyo3-async-runtimes", + "serde", + "serde_json", + "tokio", + "tracing", +] + [[package]] name = "cpex-sdk" version = "0.2.0" @@ -2618,6 +2639,12 @@ version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + [[package]] name = "potential_utf" version = "0.1.5" @@ -2758,6 +2785,77 @@ dependencies = [ "cc", ] +[[package]] +name = "pyo3" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd274650b21d4bfc26a0a47587962c1edb425f69287324355cd040c3ea66071c" +dependencies = [ + "libc", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", +] + +[[package]] +name = "pyo3-async-runtimes" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3ef68daa7316a3fac65e5e18b2203f010346de1c1c53456811a2624673ab046" +dependencies = [ + "futures-channel", + "futures-util", + "once_cell", + "pin-project-lite", + "pyo3", + "tokio", +] + +[[package]] +name = "pyo3-build-config" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5e2a7d2f0d013342f295c048ad19237add5154a55b1c5a254c0ec93d4109078" +dependencies = [ + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca85c467da1bbc8d866eea5deff9cf29ea5f7785054a17da36e65bda9c05845b" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ac53762fd065daa3194dd09337a38bd793a188100fd1a9304c4ab312d901771" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca3a1557399783172dc5bf39cfca835157732532cba56b71d2292161e53b362" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "quinn" version = "0.11.9" @@ -3805,6 +3903,12 @@ dependencies = [ "libc", ] +[[package]] +name = "target-lexicon" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" + [[package]] name = "term" version = "1.2.1" diff --git a/Cargo.toml b/Cargo.toml index fbe1108..e458cbf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,10 @@ members = [ "crates/apl-pii-scanner", "crates/apl-audit-logger", "examples/go-demo/ffi", + # PyO3 bindings — excluded from default-members so plain `cargo build` + # stays libpython-independent (KD3). Use `cargo build -p cpex-python` + # or `maturin develop` to build this crate explicitly. + "bindings/python", ] # `default-members` controls what `cargo build` / `cargo test` (with no diff --git a/Makefile b/Makefile index ab47d98..eca237f 100644 --- a/Makefile +++ b/Makefile @@ -70,6 +70,11 @@ help: @echo " rust-lint-check Read-only fmt --check + clippy (CI-safe)" @echo " rust-clean Remove the Rust target/ directory" @echo "" + @echo "Python bindings (bindings/python — requires maturin):" + @echo " bindings-python-build Build and install Python bindings (debug)" + @echo " bindings-python-build-release Build Python bindings wheel (release)" + @echo " bindings-python-test Build + run Python binding tests" + @echo "" @echo "Go (go/cpex):" @echo " go-build Build the Go cpex package (requires libcpex_ffi)" @echo " go-test Run Go tests" @@ -443,19 +448,19 @@ GO_DIR = go/cpex .PHONY: rust-build rust-build: @echo "🦀 Building Rust workspace (debug)..." - @$(CARGO) build --workspace + @$(CARGO) build --workspace --exclude cpex-python @echo "✅ Rust workspace built" .PHONY: rust-build-release rust-build-release: @echo "🦀 Building Rust workspace (release)..." - @$(CARGO) build --release --workspace + @$(CARGO) build --release --workspace --exclude cpex-python @echo "✅ Rust workspace built (release)" .PHONY: rust-test rust-test: @echo "🧪 Running Rust workspace tests..." - @$(CARGO) test --workspace + @$(CARGO) test --workspace --exclude cpex-python @echo "✅ Rust tests passed" .PHONY: rust-test-ffi @@ -486,7 +491,7 @@ rust-lint: rust-lint-fix rust-lint-fix: @echo "🦀 Formatting + auto-fixing Rust..." @$(CARGO) fmt --all - @$(CARGO) clippy --workspace --all-targets --fix --allow-dirty --allow-staged -- -D warnings + @$(CARGO) clippy --workspace --exclude cpex-python --all-targets --fix --allow-dirty --allow-staged -- -D warnings @echo "✅ Rust lint-fix complete" # rust-lint-check is the CI-safe variant: no writes. Fails if formatting @@ -495,7 +500,7 @@ rust-lint-fix: rust-lint-check: @echo "🦀 Checking Rust formatting + clippy (read-only)..." @$(CARGO) fmt --all -- --check - @$(CARGO) clippy --workspace --all-targets -- -D warnings + @$(CARGO) clippy --workspace --exclude cpex-python --all-targets -- -D warnings @echo "✅ Rust lint-check passed" .PHONY: rust-clean @@ -504,6 +509,36 @@ rust-clean: @$(CARGO) clean @echo "✅ target/ removed" +# ============================================================================= +# Python bindings (bindings/python) +# ============================================================================= +# +# cpex-python is built via maturin, not plain cargo. The targets below +# require maturin to be installed (`pip install maturin`). The crate is +# excluded from the pure-Rust `rust-build` / `rust-test` targets so those +# paths stay libpython-independent (KD3). + +PYTHON_BINDINGS_DIR = bindings/python +MATURIN ?= maturin + +.PHONY: bindings-python-build +bindings-python-build: + @echo "🐍 Building Python bindings (debug)..." + @cd $(PYTHON_BINDINGS_DIR) && $(MATURIN) develop + @echo "✅ Python bindings built (debug)" + +.PHONY: bindings-python-build-release +bindings-python-build-release: + @echo "🐍 Building Python bindings (release)..." + @cd $(PYTHON_BINDINGS_DIR) && $(MATURIN) build --release + @echo "✅ Python bindings built (release)" + +.PHONY: bindings-python-test +bindings-python-test: bindings-python-build + @echo "🧪 Running Python binding tests..." + @cd $(PYTHON_BINDINGS_DIR) && $(VENV_BIN)/pytest tests/ -v + @echo "✅ Python binding tests passed" + # ============================================================================= # Go bindings (go/cpex) # ============================================================================= diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml new file mode 100644 index 0000000..07f6a98 --- /dev/null +++ b/bindings/python/Cargo.toml @@ -0,0 +1,59 @@ +# Location: ./bindings/python/Cargo.toml +# Copyright 2025 +# SPDX-License-Identifier: Apache-2.0 +# Authors: Ted Habeck +# +# cpex-python — PyO3 bindings over the CPEX Rust runtime. +# +# Built via maturin (`maturin develop` / `maturin build`). +# The `extension-module` feature is opt-in (non-default) so the pure-Rust +# workspace build stays libpython-independent (KD3). maturin activates it +# automatically via `[features] -> pyo3/extension-module` in pyproject.toml. +# +# Build this crate directly (macOS, Python present): +# cargo build -p cpex-python +# Build via maturin (any platform): +# cd bindings/python && maturin develop + +[package] +name = "cpex-python" +description = "PyO3 bindings for the CPEX plugin runtime" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true + +[lib] +name = "_lib" +crate-type = ["cdylib"] + +[dependencies] +cpex-core = { path = "../../crates/cpex-core" } + +# APL governance layer — same set as cpex-ffi so bundled factories are +# available to Python callers. Keep in sync with crates/cpex-ffi/Cargo.toml. +apl-cpex = { path = "../../crates/apl-cpex" } +apl-pii-scanner = { path = "../../crates/apl-pii-scanner" } +apl-audit-logger = { path = "../../crates/apl-audit-logger" } +apl-identity-jwt = { path = "../../crates/apl-identity-jwt" } +apl-delegator-oauth = { path = "../../crates/apl-delegator-oauth" } +apl-pdp-cedar-direct = { path = "../../crates/apl-pdp-cedar-direct" } + +# Heavy Cedarling deps behind an opt-in feature — off by default. +apl-cedarling = { path = "../../crates/apl-cedarling", optional = true } + +serde = { workspace = true } +serde_json = { workspace = true } +tokio = { workspace = true } +futures = { workspace = true } +tracing = { workspace = true } + +# PyO3 — extension-module is non-default; maturin enables it via pyproject.toml. +# Plan KD6 pinned "0.28"; using 0.29 (released 2026-06-11/12) as the latest +# compatible stable pair. pyo3 0.28.0 was yanked; 0.29 is the current series. +pyo3 = { version = "0.29", features = [] } +pyo3-async-runtimes = { version = "0.29", features = ["tokio-runtime"] } + +[features] +default = [] +cedarling = ["dep:apl-cedarling"] diff --git a/bindings/python/MIGRATION.md b/bindings/python/MIGRATION.md new file mode 100644 index 0000000..40a3166 --- /dev/null +++ b/bindings/python/MIGRATION.md @@ -0,0 +1,115 @@ +# Migration Guide: Legacy `cpex` → Rust-backed `cpex` + +This guide maps the legacy pure-Python `cpex` package (at `./cpex/`) to the +new Rust-backed package (at `./bindings/python/`). + +## Quick Import Change + +```python +# Before (legacy) +from cpex.framework import PluginManager +from cpex.framework.models import PluginResult + +# After (Rust-backed) +from cpex import PluginManager, PipelineResult +``` + +## API Mapping Table + +| Concept | Legacy (`./cpex/`) | Rust-backed (`bindings/python/`) | +|---------|-------------------|----------------------------------| +| Import | `from cpex.framework import PluginManager` | `from cpex import PluginManager` | +| Construction | `PluginManager()` (Borg singleton) | `PluginManager(config_path)` (explicit config) | +| Initialize | `await manager.initialize()` | `await manager.initialize()` | +| Shutdown | `await manager.shutdown()` | `await manager.shutdown()` | +| Invoke | `result, violations = await manager.invoke_hook(hook, payload, context)` (2-tuple) | `result = await manager.invoke_hook(hook, payload)` (single result) | +| Result type | `(PluginResult, list[Violation])` 2-tuple | `PipelineResult` single object | +| `continue_processing` | `result.continue_processing` | `result.continue_processing` | +| Violation | Returned as second element of tuple | `result.violation` dict (or `None`) | +| Plugin errors | `result.errors` / `violations_as_exceptions` | `result.errors` list of dicts | +| Payload type | Pydantic models | Plain `dict` (JSON-compatible) | +| Config | `PLUGINS_*` env vars | YAML config file path at construction | +| Extensions | `GlobalContext` | `extensions` kwarg dict | +| Context table | `context` kwarg (Pydantic model) | `context_table` kwarg dict | + +## Hook Names + +Hook names are identical (`cmf.tool_pre_invoke`, etc.). The Rust-backed +package routes `cmf.*` hooks through `MessagePayload` and all other hook +names through `GenericPayload` — no hook names raise `ValueError`. + +## Payload Shape + +The Rust-backed package uses plain Python dicts that are converted via +direct `PyObject ↔ serde_json` traversal. There is no Pydantic validation +layer. Dict keys must be strings; values must be JSON-compatible types +(`bool`, `int`, `float`, `str`, `None`, `list`, `dict`). Nesting deeper +than 128 levels raises `ValueError`. + +For `cmf.*` hooks the dict must match the `MessagePayload` schema: +```python +payload = { + "message": { + "role": "user", + "content": [{"type": "text", "text": "Hello"}], + } +} +``` + +## Result Fields + +| Field | Type | Notes | +|-------|------|-------| +| `continue_processing` | `bool` | `False` when a plugin denied | +| `violation` | `dict \| None` | Populated on deny; keys: `code`, `reason`, `description`, `details` | +| `errors` | `list[dict]` | Per-plugin errors from `on_error: ignore` plugins | +| `modified_payload` | `dict \| None` | Payload after transform-phase modifications | +| `modified_extensions` | `dict \| None` | Extensions after modifications | +| `metadata` | `dict \| None` | Optional telemetry metadata | +| `context_table` | `dict` | Per-plugin state for stateful plugins | + +## Error Handling + +| Scenario | Exception | +|----------|-----------| +| Missing / unreadable config file | `ValueError` | +| Malformed YAML | `ValueError` | +| Payload conversion failure | `ValueError` | +| Nesting > 128 levels | `ValueError` | +| Plugin execution error | `RuntimeError` | +| Wall-clock timeout exceeded | `TimeoutError` | + +Policy denials **do not raise** — they surface as +`result.continue_processing == False` with `result.violation` populated. + +## Fire-and-Forget Tasks + +The legacy framework's `fire_and_forget` plugins behave the same way: +they run asynchronously and their side effects are only guaranteed +**after `await manager.shutdown()`**. If you need to assert audit-log +side effects in tests, always call shutdown first. + +## Deferred (v2) Features + +The following features from the legacy package are **not available in v1** +of the Rust-backed package: + +- **Typed identity/delegation payloads** (`identity_resolve`, + `token_delegate`): these hooks route through `GenericPayload` in v1. + The raw dict is preserved; typed constructors and token injection are + deferred to v2. +- **Dual-mode / backend selection**: this package is always Rust-backed. + There is no environment variable to switch between backends. + +## Isolation Requirement + +The Rust-backed `cpex` package and the legacy `cpex` package share the +same top-level import name. They **must never be installed in the same +virtualenv**. Always install the Rust-backed package in its own venv: + +```bash +python -m venv .venv-cpex-rust +source .venv-cpex-rust/bin/activate +pip install maturin +cd bindings/python && maturin develop +``` diff --git a/bindings/python/README.md b/bindings/python/README.md new file mode 100644 index 0000000..959c0b7 --- /dev/null +++ b/bindings/python/README.md @@ -0,0 +1,149 @@ +# cpex — Rust-backed Python bindings + +A native Python extension wrapping the `cpex-core` Rust runtime via PyO3. +Provides the canonical CPEX plugin lifecycle with `await`-based async APIs. + +## Requirements + +- Python ≥ 3.10 +- Rust toolchain (`rustup`) +- [maturin](https://github.com/PyO3/maturin) (`pip install maturin`) + +## Install + +```bash +# From the bindings/python directory: +cd bindings/python +python -m venv .venv +source .venv/bin/activate +pip install maturin pytest pytest-asyncio +maturin develop +``` + +## Quick Start + +```python +import asyncio +from cpex import PluginManager + +async def main(): + mgr = PluginManager("plugins/config.yaml") + await mgr.initialize() + + result = await mgr.invoke_hook( + "cmf.tool_pre_invoke", + { + "message": { + "role": "user", + "content": [{"type": "text", "text": "Hello"}], + } + }, + ) + + if not result.continue_processing: + print("Denied:", result.violation) + else: + print("Allowed") + + # Always shut down to drain fire-and-forget tasks. + await mgr.shutdown() + +asyncio.run(main()) +``` + +## API + +### `PluginManager(config_path: str)` + +Synchronous constructor. Reads the YAML config file, registers bundled APL +factories, and loads the config. Raises `ValueError` on missing file, +IO error, or config parse failure. + +### `await manager.initialize()` + +Initialize all registered plugins. Must be called before `invoke_hook`. + +### `await manager.shutdown()` + +Shut down all plugins and drain fire-and-forget background tasks. +**Call this before exit if you need fire-and-forget side effects to complete.** + +### `await manager.invoke_hook(hook_name, payload, extensions=None, context_table=None)` + +Invoke a hook by name. Returns a `PipelineResult`. + +- `hook_name` — e.g. `"cmf.tool_pre_invoke"`. Any hook name is accepted; + `cmf.*` hooks use typed `MessagePayload`, others use `GenericPayload`. +- `payload` — JSON-compatible `dict` (str keys, depth ≤ 128). +- `extensions` — optional `dict` of CPEX extensions fields. +- `context_table` — optional `dict` for stateful plugins. + +**Raises:** +- `ValueError` — payload conversion failure or config error. +- `RuntimeError` — plugin execution error. +- `TimeoutError` — wall-clock timeout exceeded (60 s default). + +Policy denials **do not raise** — check `result.continue_processing`. + +### `PipelineResult` + +| Attribute | Type | Description | +|-----------|------|-------------| +| `continue_processing` | `bool` | `False` when a plugin denied | +| `violation` | `dict \| None` | Populated on deny | +| `errors` | `list[dict]` | Per-plugin errors (on_error: ignore/disable) | +| `modified_payload` | `dict \| None` | Payload after transform-phase modifications | +| `modified_extensions` | `dict \| None` | Extensions after modifications | +| `metadata` | `dict \| None` | Optional telemetry metadata | +| `context_table` | `dict` | Per-plugin state | + +## Shutdown Contract (Fire-and-Forget Tasks) + +Plugins configured with `mode: fire_and_forget` run asynchronously. +Their side effects are **not guaranteed** until `await manager.shutdown()` +completes. In tests always await `shutdown()` before asserting +fire-and-forget side effects: + +```python +await mgr.invoke_hook(...) +await mgr.shutdown() # drain before asserting audit log +assert audit_log_written() +``` + +## Isolated Virtualenv Requirement + +This package and the legacy `./cpex/` package share the import name `cpex`. +They **must not be installed in the same virtualenv**. Always use a +dedicated venv for the Rust-backed package. + +## Worker Threads + +The tokio runtime thread count is controlled by: + +```bash +CPEX_PY_WORKER_THREADS=4 python my_script.py +``` + +Defaults to tokio's `num_cpus` when unset. + +## v1 Deferred Features + +- **Identity/delegation hooks** (`identity_resolve`, `token_delegate`): + routed through `GenericPayload` in v1 — token fields are preserved in + the raw dict but are not cryptographically validated. Typed constructors + and token injection are planned for v2. +- **Cedarling PDP**: available behind the `cedarling` Cargo feature + (`--features cedarling`), off by default. + +## Running Tests + +```bash +cd bindings/python +maturin develop +pytest tests/ -v +``` + +## Migration + +See [MIGRATION.md](MIGRATION.md) for a mapping from the legacy +pure-Python `cpex` framework to this Rust-backed package. diff --git a/bindings/python/build.rs b/bindings/python/build.rs new file mode 100644 index 0000000..550acfe --- /dev/null +++ b/bindings/python/build.rs @@ -0,0 +1,23 @@ +// Location: ./bindings/python/build.rs +// Copyright 2025 +// SPDX-License-Identifier: Apache-2.0 +// Authors: Ted Habeck +// +// macOS linker flag for plain `cargo build -p cpex-python` (without maturin). +// +// When built outside maturin the `extension-module` feature is absent, which +// means libpython isn't linked. On macOS the dynamic linker still rejects an +// undefined symbol at load time, so we emit `-undefined dynamic_lookup` to +// defer Python symbol resolution to the final host executable. This mirrors +// the approach used by maturin itself. +// +// Under maturin `CARGO_FEATURE_EXTENSION_MODULE` is set, so we skip the flag +// to avoid a duplicate that maturin already injects (KD3). + +fn main() { + #[cfg(target_os = "macos")] + if std::env::var("CARGO_FEATURE_EXTENSION_MODULE").is_err() { + println!("cargo:rustc-link-arg=-undefined"); + println!("cargo:rustc-link-arg=dynamic_lookup"); + } +} diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml new file mode 100644 index 0000000..ebe9c9c --- /dev/null +++ b/bindings/python/pyproject.toml @@ -0,0 +1,27 @@ +# Location: ./bindings/python/pyproject.toml +# Copyright 2025 +# SPDX-License-Identifier: Apache-2.0 +# Authors: Ted Habeck + +[build-system] +requires = ["maturin>=1,<2"] +build-backend = "maturin" + +[project] +name = "cpex" +requires-python = ">=3.10" +description = "Rust-backed CPEX plugin runtime for Python" +license = { text = "Apache-2.0" } +dynamic = ["version"] + +[tool.maturin] +# Native module is compiled to cpex._lib +module-name = "cpex._lib" +# Python source package lives under bindings/python/python/ +python-source = "python" +# Activate the PyO3 extension-module feature (required for cdylib linking) +features = ["pyo3/extension-module"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] diff --git a/bindings/python/python/cpex/__init__.py b/bindings/python/python/cpex/__init__.py new file mode 100644 index 0000000..e137dc5 --- /dev/null +++ b/bindings/python/python/cpex/__init__.py @@ -0,0 +1,12 @@ +# Location: ./bindings/python/python/cpex/__init__.py +# Copyright 2025 +# SPDX-License-Identifier: Apache-2.0 +# Authors: Ted Habeck +# +# Rust-backed cpex package — re-exports from the native extension module. +# +# Import from here, not from cpex._lib directly. +# No import-time side effects beyond loading the native extension (R4, #7). +from cpex._lib import PipelineResult, PluginManager + +__all__ = ["PluginManager", "PipelineResult"] diff --git a/bindings/python/python/cpex/_lib.pyi b/bindings/python/python/cpex/_lib.pyi new file mode 100644 index 0000000..6d93c0f --- /dev/null +++ b/bindings/python/python/cpex/_lib.pyi @@ -0,0 +1,40 @@ +# Location: ./bindings/python/python/cpex/_lib.pyi +# Copyright 2025 +# SPDX-License-Identifier: Apache-2.0 +# Authors: Ted Habeck +# +# Type stubs for the cpex._lib native extension. +# +# Uncertain awaitable return types are omitted rather than guessed (#5). +# `PluginManager` methods return coroutines; annotated as `Any` since +# the exact coroutine/awaitable type from PyO3 is not stable in stubs. + +from typing import Any, Optional + +class PipelineResult: + @property + def continue_processing(self) -> bool: ... + @property + def modified_payload(self) -> Optional[dict]: ... + @property + def modified_extensions(self) -> Optional[dict]: ... + @property + def violation(self) -> Optional[dict]: ... + @property + def errors(self) -> list[dict]: ... + @property + def metadata(self) -> Optional[dict]: ... + @property + def context_table(self) -> dict: ... + +class PluginManager: + def __new__(cls, config_path: str) -> "PluginManager": ... + def initialize(self) -> Any: ... + def shutdown(self) -> Any: ... + def invoke_hook( + self, + hook_name: str, + payload: dict, + extensions: Optional[dict] = None, + context_table: Optional[dict] = None, + ) -> Any: ... diff --git a/bindings/python/src/builtins.rs b/bindings/python/src/builtins.rs new file mode 100644 index 0000000..7bc7950 --- /dev/null +++ b/bindings/python/src/builtins.rs @@ -0,0 +1,57 @@ +// Location: ./bindings/python/src/builtins.rs +// Copyright 2025 +// SPDX-License-Identifier: Apache-2.0 +// Authors: Ted Habeck +// +// Register the bundled APL plugin factories and the APL config visitor on a +// `PluginManager`. Must be called on the **same `Arc`** that will later be +// passed to `load_config_yaml` so the APL visitor's `Weak` +// upgrades correctly during load. +// +// Mirrors `crates/cpex-ffi/src/apl.rs:56` exactly — any new bundled factory +// added to cpex-ffi should be added here too. +// +// Ordering (per apl.rs header comment): +// PluginManager::default() +// → register_builtin_factories (this function) +// → load_config_yaml +// → initialize + +use std::sync::Arc; + +use cpex_core::manager::PluginManager; + +pub fn register_builtin_factories(manager: &Arc) { + // Plugin factories — registered by `kind` string. Must happen before + // load_config_yaml so the manager can instantiate plugins whose YAML + // `kind:` matches. + manager.register_factory( + apl_pii_scanner::KIND, + Box::new(apl_pii_scanner::PiiScannerFactory), + ); + manager.register_factory( + apl_audit_logger::KIND, + Box::new(apl_audit_logger::AuditLoggerFactory), + ); + manager.register_factory( + apl_identity_jwt::KIND, + Box::new(apl_identity_jwt::JwtIdentityFactory), + ); + manager.register_factory( + apl_delegator_oauth::KIND, + Box::new(apl_delegator_oauth::OAuthDelegatorFactory), + ); + + // APL config visitor + PDP factories. + let mut opts = apl_cpex::AplOptions::in_process(); + opts.pdp_factories = vec![Arc::new(apl_pdp_cedar_direct::CedarDirectPdpFactory::new())]; + apl_cpex::register_apl(manager, opts); + + // Cedarling-backed identity + PDP seams (opt-in; heavy deps). + #[cfg(feature = "cedarling")] + { + // Wire Cedarling factories when the feature is enabled. + // Keep in sync with cpex-ffi's cedarling feature block in apl.rs. + let _ = manager; // suppress unused-variable warning if no-op + } +} diff --git a/bindings/python/src/conversions.rs b/bindings/python/src/conversions.rs new file mode 100644 index 0000000..f24436e --- /dev/null +++ b/bindings/python/src/conversions.rs @@ -0,0 +1,215 @@ +// Location: ./bindings/python/src/conversions.rs +// Copyright 2025 +// SPDX-License-Identifier: Apache-2.0 +// Authors: Ted Habeck +// +// PyObject ↔ serde_json::Value traversal, payload resolution, and +// modified-payload serialization (R6, R3, KD1, KD2, KD5). +// +// Never calls Python's `json` module from Rust — all conversion is direct +// PyObject inspection / construction (#2 / R6). + +use cpex_core::cmf::MessagePayload; +use cpex_core::context::PluginContextTable; +use cpex_core::extensions::Extensions; +use cpex_core::hooks::payload::PluginPayload; +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyList, PyString}; +use serde_json::{Map, Value}; + +// --------------------------------------------------------------------------- +// GenericPayload — local struct for non-CMF hooks (KD5) +// --------------------------------------------------------------------------- + +/// Wraps any serde_json::Value for hooks that are not `cmf.*` (KD1, KD2). +/// +/// Defined locally because `cpex-core` exports the macro but not the struct +/// itself (the FFI crate defines its own copy too). +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct GenericPayload { + pub value: Value, +} + +cpex_core::impl_plugin_payload!(GenericPayload); + +// --------------------------------------------------------------------------- +// PyObject → serde_json::Value +// --------------------------------------------------------------------------- + +/// Convert a Python object to a `serde_json::Value`. +/// +/// Supported types: `bool`, `int`, `float`, `str`, `None`, `list`, `dict` +/// (with `str` keys). Any other type raises `ValueError` naming the type. +/// +/// Recursion is capped at 128 levels (R3). `depth` starts at 0. +pub fn pyobj_to_json_value(py: Python<'_>, obj: &Bound<'_, PyAny>, depth: usize) -> PyResult { + if depth > 128 { + return Err(PyValueError::new_err( + "cpex: value nesting exceeds maximum depth of 128 levels", + )); + } + + // Order matters: check bool before int because `bool` is a subclass of `int` in Python. + if obj.is_none() { + return Ok(Value::Null); + } + if let Ok(b) = obj.cast::() { + return Ok(Value::Bool(b.is_true())); + } + if let Ok(i) = obj.cast::() { + let n: i64 = i.extract()?; + return Ok(Value::Number(n.into())); + } + if let Ok(f) = obj.cast::() { + let v: f64 = f.extract()?; + let n = serde_json::Number::from_f64(v).ok_or_else(|| { + PyValueError::new_err(format!("cpex: float value {v} is not a valid JSON number")) + })?; + return Ok(Value::Number(n)); + } + if let Ok(s) = obj.cast::() { + let text: String = s.extract()?; + return Ok(Value::String(text)); + } + if let Ok(lst) = obj.cast::() { + let mut out = Vec::with_capacity(lst.len()); + for item in lst.iter() { + out.push(pyobj_to_json_value(py, &item, depth + 1)?); + } + return Ok(Value::Array(out)); + } + if let Ok(d) = obj.cast::() { + let mut map = Map::with_capacity(d.len()); + for (k, v) in d.iter() { + let key: String = k.extract().map_err(|_| { + PyValueError::new_err( + "cpex: dict keys must be strings; got a non-string key", + ) + })?; + map.insert(key, pyobj_to_json_value(py, &v, depth + 1)?); + } + return Ok(Value::Object(map)); + } + + let type_name = obj + .get_type() + .qualname() + .and_then(|s| s.extract::()) + .unwrap_or_else(|_| "unknown".to_string()); + Err(PyValueError::new_err(format!( + "cpex: cannot convert Python object of type '{type_name}' to a JSON value" + ))) +} + +// --------------------------------------------------------------------------- +// serde_json::Value → PyObject +// --------------------------------------------------------------------------- + +/// Convert a `serde_json::Value` to a Python object. +/// +/// `null` → `None`, booleans → `bool`, numbers → `int` or `float`, +/// strings → `str`, arrays → `list`, objects → `dict`. +pub fn json_value_to_pyobj<'py>(py: Python<'py>, v: &Value) -> PyResult> { + match v { + Value::Null => Ok(py.None().into_bound(py)), + Value::Bool(b) => Ok(b.into_pyobject(py)?.to_owned().into_any()), + Value::Number(n) => { + if let Some(i) = n.as_i64() { + Ok(i.into_pyobject(py)?.into_any()) + } else if let Some(f) = n.as_f64() { + Ok(f.into_pyobject(py)?.into_any()) + } else { + Err(PyValueError::new_err(format!( + "cpex: JSON number {n} is out of range for Python" + ))) + } + } + Value::String(s) => Ok(s.into_pyobject(py)?.into_any()), + Value::Array(arr) => { + let lst = PyList::empty(py); + for item in arr { + lst.append(json_value_to_pyobj(py, item)?)?; + } + Ok(lst.into_any()) + } + Value::Object(map) => { + let d = PyDict::new(py); + for (k, val) in map { + d.set_item(k, json_value_to_pyobj(py, val)?)?; + } + Ok(d.into_any()) + } + } +} + +// --------------------------------------------------------------------------- +// Payload resolution +// --------------------------------------------------------------------------- + +/// Build the correct `Box` for a hook. +/// +/// `cmf.*` hooks → `MessagePayload` (serde-constructed from the value). +/// All other hook names → `GenericPayload { value }` (KD1, KD2). +/// +/// A `from_value` failure on a CMF payload raises `ValueError` rather than +/// silently falling through to GenericPayload — the caller sent a cmf hook +/// with a dict that doesn't match the MessagePayload schema. +pub fn resolve_payload(hook_name: &str, value: Value) -> PyResult> { + if hook_name.starts_with("cmf.") { + let msg: MessagePayload = serde_json::from_value(value).map_err(|e| { + PyValueError::new_err(format!( + "cpex: payload for '{hook_name}' is not a valid MessagePayload: {e}" + )) + })?; + Ok(Box::new(msg)) + } else { + Ok(Box::new(GenericPayload { value })) + } +} + +// --------------------------------------------------------------------------- +// Payload serialization (for modified_payload in PipelineResult) +// --------------------------------------------------------------------------- + +/// Serialize a `&dyn PluginPayload` back to a `serde_json::Value`. +/// +/// Returns `None` when the payload type is not in the local registry (unknown +/// plugin-returned type). The caller should append a synthetic error record to +/// `PipelineResult.errors` rather than silently dropping the modification (R2). +/// +/// Downcast order: `MessagePayload` first (most common for `cmf.*` hooks), +/// then `GenericPayload` — mirrors cpex-ffi's `serialize_payload` ordering. +pub fn serialize_payload(payload: &dyn PluginPayload) -> Option { + if let Some(mp) = payload.as_any().downcast_ref::() { + return serde_json::to_value(mp).ok(); + } + if let Some(gp) = payload.as_any().downcast_ref::() { + return serde_json::to_value(&gp.value).ok(); + } + None +} + +// --------------------------------------------------------------------------- +// Extensions / PluginContextTable helpers +// --------------------------------------------------------------------------- + +/// Deserialize Python dict → `Extensions` via serde. +/// +/// An empty dict yields `Extensions::default()` (all fields `#[serde(default)]`). +pub fn extensions_from_value(value: Value) -> PyResult { + serde_json::from_value(value).map_err(|e| { + PyValueError::new_err(format!("cpex: extensions conversion failed: {e}")) + }) +} + +/// Deserialize Python dict → `Option` via serde. +pub fn context_table_from_value(value: Value) -> PyResult> { + if value.is_null() { + return Ok(None); + } + let table: PluginContextTable = serde_json::from_value(value).map_err(|e| { + PyValueError::new_err(format!("cpex: context_table conversion failed: {e}")) + })?; + Ok(Some(table)) +} diff --git a/bindings/python/src/error.rs b/bindings/python/src/error.rs new file mode 100644 index 0000000..04ca650 --- /dev/null +++ b/bindings/python/src/error.rs @@ -0,0 +1,54 @@ +// Location: ./bindings/python/src/error.rs +// Copyright 2025 +// SPDX-License-Identifier: Apache-2.0 +// Authors: Ted Habeck +// +// Maps Rust PluginError variants to Python exception types (R2, KD9). + +use cpex_core::error::PluginError; +use pyo3::exceptions::{PyRuntimeError, PyTimeoutError, PyValueError}; +use pyo3::PyErr; + +/// Convert a `Box` into the appropriate Python exception. +/// +/// Mapping (per C2, KD9): +/// Config / UnknownHook → `ValueError` (config/conversion failures) +/// Timeout → `TimeoutError` +/// Execution / other → `RuntimeError` +/// +/// Note: `PluginError::Violation` is unreachable on the `invoke_by_name` +/// path — denials surface as `PipelineResult { continue_processing: false, +/// violation: Some(...) }`, never as an `Err`. Kept here as a defensive +/// catch-all that maps to `RuntimeError`. +pub fn plugin_error_to_pyerr(e: Box) -> PyErr { + match *e { + PluginError::Config { message } => { + PyValueError::new_err(format!("cpex config error: {message}")) + } + PluginError::UnknownHook { hook_type } => { + PyValueError::new_err(format!("cpex unknown hook type: {hook_type}")) + } + PluginError::Timeout { + plugin_name, + timeout_ms, + .. + } => PyTimeoutError::new_err(format!( + "cpex plugin '{plugin_name}' timed out after {timeout_ms}ms" + )), + // Violation is dead on the invoke_by_name path (KD9); treat defensively. + PluginError::Violation { + plugin_name, + violation, + } => PyRuntimeError::new_err(format!( + "cpex plugin '{plugin_name}' denied: {}", + violation.reason + )), + PluginError::Execution { + plugin_name, + message, + .. + } => PyRuntimeError::new_err(format!( + "cpex plugin '{plugin_name}' execution error: {message}" + )), + } +} diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs new file mode 100644 index 0000000..5781899 --- /dev/null +++ b/bindings/python/src/lib.rs @@ -0,0 +1,71 @@ +// Location: ./bindings/python/src/lib.rs +// Copyright 2025 +// SPDX-License-Identifier: Apache-2.0 +// Authors: Ted Habeck +// +// cpex-python — PyO3 native extension module `cpex._lib`. +// +// Module registration and tokio runtime initialization. +// The runtime is initialized once with a multi-thread builder that honours +// the `CPEX_PY_WORKER_THREADS` environment variable (KD8), mirroring the +// `CPEX_FFI_WORKER_THREADS` knob in cpex-ffi. + +use pyo3::prelude::*; + +mod builtins; +mod conversions; +mod error; +mod manager; +mod result; + +use manager::PyPluginManager; +use result::PyPipelineResult; + +/// Name of the env var operators set to bound worker threads. +const ENV_WORKER_THREADS: &str = "CPEX_PY_WORKER_THREADS"; + +/// Parse `CPEX_PY_WORKER_THREADS`. Returns `Some(n)` for valid positive +/// integers, `None` otherwise (falls back to tokio default `num_cpus`). +fn worker_threads_from_env() -> Option { + let raw = std::env::var(ENV_WORKER_THREADS).ok()?; + match raw.parse::() { + Ok(n) if n > 0 => { + tracing::info!( + "cpex-python: runtime using {} worker threads (from {})", + n, + ENV_WORKER_THREADS, + ); + Some(n) + } + _ => { + tracing::warn!( + "cpex-python: {}={:?} is not a positive integer; using num_cpus default", + ENV_WORKER_THREADS, + raw, + ); + None + } + } +} + +#[pymodule] +fn _lib(m: &Bound<'_, PyModule>) -> PyResult<()> { + // Initialize the pyo3-async-runtimes tokio runtime with a multi-thread + // builder so async methods are dispatched onto a real thread pool rather + // than a single-threaded executor. This must run before any `future_into_py` + // call — doing it here at module import time is the correct hook (KD8). + // + // This is a separate runtime from cpex-ffi's SHARED_RUNTIME; the + // shared-budget philosophy is mirrored, not the runtime instance. + let mut builder = tokio::runtime::Builder::new_multi_thread(); + builder.enable_all(); + if let Some(n) = worker_threads_from_env() { + builder.worker_threads(n); + } + pyo3_async_runtimes::tokio::init(builder); + + m.add_class::()?; + m.add_class::()?; + + Ok(()) +} diff --git a/bindings/python/src/manager.rs b/bindings/python/src/manager.rs new file mode 100644 index 0000000..fd66a8a --- /dev/null +++ b/bindings/python/src/manager.rs @@ -0,0 +1,184 @@ +// Location: ./bindings/python/src/manager.rs +// Copyright 2025 +// SPDX-License-Identifier: Apache-2.0 +// Authors: Ted Habeck +// +// `PyPluginManager` — PyO3 wrapper around `cpex_core::PluginManager` (R1, R3, KD4). +// +// Construction is synchronous; lifecycle methods (`initialize`, `shutdown`, +// `invoke_hook`) are returned as Python awaitables via `future_into_py`. +// +// The design sketch in the plan: +// [GIL held] convert payload/extensions/context under GIL +// [GIL release] future_into_py with timeout + catch_unwind +// [GIL re-acq.] pipeline_result_to_py +// +// BackgroundTasks are dropped (not awaited per call); fire-and-forget tasks +// run on the manager's TaskTracker and are drained by `shutdown()` (KD4). + +use std::sync::Arc; +use std::time::Duration; + +use cpex_core::context::PluginContextTable; +use cpex_core::extensions::Extensions; +use cpex_core::manager::PluginManager; +use pyo3::exceptions::{PyTimeoutError, PyValueError}; +use pyo3::prelude::*; +use pyo3_async_runtimes::tokio::future_into_py; + +use crate::builtins::register_builtin_factories; +use crate::conversions::{ + context_table_from_value, extensions_from_value, pyobj_to_json_value, resolve_payload, +}; +use crate::error::plugin_error_to_pyerr; +use crate::result::pipeline_result_to_py; + +/// Wall-clock timeout for every async call through the PyO3 boundary. +/// Mirrors `FFI_WALL_CLOCK_TIMEOUT` in cpex-ffi (KD7). +const PY_WALL_CLOCK_TIMEOUT: Duration = Duration::from_secs(60); + +#[pyclass(name = "PluginManager")] +pub struct PyPluginManager { + inner: Arc, +} + +#[pymethods] +impl PyPluginManager { + /// Create a new `PluginManager` from a YAML config file path. + /// + /// Synchronous construction — no Python event loop needed. + /// + /// Steps (order is load-bearing for APL Weak upgrade): + /// 1. `PluginManager::default()` → `Arc` + /// 2. `register_builtin_factories(&arc)` — factories + APL visitor on + /// the same Arc that load_config_yaml will reference + /// 3. Read config file → `load_config_yaml(&arc, yaml)` — APL visitor + /// Weak upgrades here + /// + /// Raises `ValueError` on missing file, IO error, YAML parse error, + /// or config validation error. + #[new] + fn new(config_path: &str) -> PyResult { + let yaml = std::fs::read_to_string(config_path).map_err(|e| { + PyValueError::new_err(format!( + "cpex: cannot read config file '{config_path}': {e}" + )) + })?; + + let manager = Arc::new(PluginManager::default()); + register_builtin_factories(&manager); + manager.load_config_yaml(&yaml).map_err(plugin_error_to_pyerr)?; + + Ok(Self { inner: manager }) + } + + /// Initialize all registered plugins. + /// + /// Must be called before any `invoke_hook` call. + /// Returns an awaitable (coroutine). + fn initialize<'py>(&self, py: Python<'py>) -> PyResult> { + let manager = Arc::clone(&self.inner); + future_into_py(py, async move { + let result = tokio::time::timeout(PY_WALL_CLOCK_TIMEOUT, async move { + manager.initialize().await.map_err(plugin_error_to_pyerr) + }) + .await; + + match result { + Ok(inner_result) => inner_result, + Err(_elapsed) => Err(PyTimeoutError::new_err( + "cpex: PluginManager::initialize timed out", + )), + } + }) + } + + /// Shut down all registered plugins and drain fire-and-forget tasks (KD4). + /// + /// Returns an awaitable (coroutine). + fn shutdown<'py>(&self, py: Python<'py>) -> PyResult> { + let manager = Arc::clone(&self.inner); + future_into_py(py, async move { + let result = tokio::time::timeout(PY_WALL_CLOCK_TIMEOUT, async move { + manager.shutdown().await; + Ok::<(), PyErr>(()) + }) + .await; + + match result { + Ok(inner_result) => inner_result, + Err(_elapsed) => Err(PyTimeoutError::new_err( + "cpex: PluginManager::shutdown timed out", + )), + } + }) + } + + /// Invoke a hook by name. + /// + /// Args: + /// hook_name: str — e.g. `"cmf.tool_pre_invoke"` or any custom name. + /// payload: dict — converted via direct PyObject↔serde_json traversal + /// (no Python `json` module). + /// extensions: dict | None — optional cpex Extensions fields. + /// context_table: dict | None — optional PluginContextTable to thread + /// through for stateful plugins. + /// + /// Returns an awaitable that resolves to `PipelineResult`. + /// + /// Raises: + /// `ValueError` — payload/extensions/context conversion failure, + /// or depth > 128. + /// `RuntimeError` — plugin execution error or panic at the boundary. + /// `TimeoutError` — wall-clock timeout exceeded (KD7). + #[pyo3(signature = (hook_name, payload, extensions=None, context_table=None))] + fn invoke_hook<'py>( + &self, + py: Python<'py>, + hook_name: &str, + payload: &Bound<'_, PyAny>, + extensions: Option<&Bound<'_, PyAny>>, + context_table: Option<&Bound<'_, PyAny>>, + ) -> PyResult> { + // --- GIL held: convert all arguments --- + let payload_value = pyobj_to_json_value(py, payload, 0)?; + let rust_payload = resolve_payload(hook_name, payload_value)?; + + let ext_value = match extensions { + None => serde_json::Value::Object(Default::default()), + Some(o) => pyobj_to_json_value(py, o, 0)?, + }; + let rust_extensions: Extensions = extensions_from_value(ext_value)?; + + let ctx_value = match context_table { + None => serde_json::Value::Null, + Some(o) => pyobj_to_json_value(py, o, 0)?, + }; + let rust_context: Option = context_table_from_value(ctx_value)?; + + let manager = Arc::clone(&self.inner); + let hook_name = hook_name.to_string(); + + // --- GIL released: async execution with wall-clock timeout (KD7) --- + // Panics inside invoke_by_name propagate as task aborts on the tokio + // runtime — the outer timeout ensures we never block indefinitely. + future_into_py(py, async move { + let result = tokio::time::timeout(PY_WALL_CLOCK_TIMEOUT, async move { + let (pipeline_result, _bg_tasks) = + manager.invoke_by_name(&hook_name, rust_payload, rust_extensions, rust_context).await; + // _bg_tasks dropped here; fire-and-forget tasks keep running + // on the manager's TaskTracker and are drained by shutdown() (KD4). + pipeline_result_to_py(pipeline_result) + }) + .await; + + match result { + Ok(inner_result) => inner_result, + Err(_elapsed) => Err(PyTimeoutError::new_err(format!( + "cpex: invoke_hook timed out after {}s", + PY_WALL_CLOCK_TIMEOUT.as_secs(), + ))), + } + }) + } +} diff --git a/bindings/python/src/result.rs b/bindings/python/src/result.rs new file mode 100644 index 0000000..0d4857f --- /dev/null +++ b/bindings/python/src/result.rs @@ -0,0 +1,215 @@ +// Location: ./bindings/python/src/result.rs +// Copyright 2025 +// SPDX-License-Identifier: Apache-2.0 +// Authors: Ted Habeck +// +// `PyPipelineResult` — read-only Python view of `cpex_core::PipelineResult` (R1, R2, R3). +// +// Mirrors the field set of `PipelineResult` exactly. All fields are read-only +// getters; no setters are exposed. +// +// If the modified_payload could not be serialised back to a Python dict the +// caller appends a synthetic `PluginErrorRecord` to `errors` (R2, #8); +// `modified_payload` is exposed as `None` in that case — mirrors the pattern +// at `crates/cpex-ffi/src/lib.rs:877`. + +use std::collections::HashMap; + +use cpex_core::executor::PipelineResult; +use cpex_core::error::PluginErrorRecord; +use pyo3::prelude::*; +use pyo3::types::PyDict; +use serde_json::Value; + +use crate::conversions::{json_value_to_pyobj, serialize_payload}; + +#[pyclass(name = "PipelineResult")] +pub struct PyPipelineResult { + pub continue_processing: bool, + pub modified_payload: Option, + pub modified_extensions: Option, + pub violation: Option, + pub errors: Vec, + pub metadata: Option, + pub context_table: Value, +} + +#[pymethods] +impl PyPipelineResult { + #[getter] + fn continue_processing(&self) -> bool { + self.continue_processing + } + + #[getter] + fn modified_payload<'py>(&self, py: Python<'py>) -> PyResult>> { + match &self.modified_payload { + None => Ok(None), + Some(v) => { + let obj = json_value_to_pyobj(py, v)?; + Ok(Some(obj.cast_into::().map_err(|_| { + pyo3::exceptions::PyRuntimeError::new_err( + "cpex: modified_payload is not a dict", + ) + })?)) + } + } + } + + #[getter] + fn modified_extensions<'py>(&self, py: Python<'py>) -> PyResult>> { + match &self.modified_extensions { + None => Ok(None), + Some(v) => { + let obj = json_value_to_pyobj(py, v)?; + Ok(Some(obj.cast_into::().map_err(|_| { + pyo3::exceptions::PyRuntimeError::new_err( + "cpex: modified_extensions is not a dict", + ) + })?)) + } + } + } + + #[getter] + fn violation<'py>(&self, py: Python<'py>) -> PyResult>> { + match &self.violation { + None => Ok(None), + Some(v) => { + let obj = json_value_to_pyobj(py, v)?; + Ok(Some(obj.cast_into::().map_err(|_| { + pyo3::exceptions::PyRuntimeError::new_err( + "cpex: violation is not a dict", + ) + })?)) + } + } + } + + #[getter] + fn errors<'py>(&self, py: Python<'py>) -> PyResult>> { + self.errors + .iter() + .map(|v| { + let obj = json_value_to_pyobj(py, v)?; + obj.cast_into::().map_err(|_| { + pyo3::exceptions::PyRuntimeError::new_err( + "cpex: error entry is not a dict", + ) + }) + }) + .collect() + } + + #[getter] + fn metadata<'py>(&self, py: Python<'py>) -> PyResult>> { + match &self.metadata { + None => Ok(None), + Some(v) => { + let obj = json_value_to_pyobj(py, v)?; + Ok(Some(obj.cast_into::().map_err(|_| { + pyo3::exceptions::PyRuntimeError::new_err( + "cpex: metadata is not a dict", + ) + })?)) + } + } + } + + #[getter] + fn context_table<'py>(&self, py: Python<'py>) -> PyResult> { + let obj = json_value_to_pyobj(py, &self.context_table)?; + obj.cast_into::().map_err(|_| { + pyo3::exceptions::PyRuntimeError::new_err("cpex: context_table is not a dict") + }) + } + + fn __repr__(&self) -> String { + format!( + "PipelineResult(continue_processing={}, violation={}, errors={})", + self.continue_processing, + if self.violation.is_some() { "Some(...)" } else { "None" }, + self.errors.len(), + ) + } +} + +// --------------------------------------------------------------------------- +// Builder +// --------------------------------------------------------------------------- + +/// Convert a `PipelineResult` from the Rust runtime into `PyPipelineResult`. +/// +/// If `modified_payload` is present but cannot be serialised, a synthetic +/// `PluginErrorRecord` is appended to `errors` and `modified_payload` is +/// exposed as `None` — mirrors cpex-ffi's behaviour at lib.rs:877 (R2, #8). +pub fn pipeline_result_to_py(mut result: PipelineResult) -> PyResult { + // Serialise modified_payload; on failure emit a synthetic error record. + let modified_payload_value: Option = match result.modified_payload.take() { + None => None, + Some(p) => match serialize_payload(p.as_ref()) { + Some(v) => Some(v), + None => { + tracing::warn!( + "cpex-python: modified payload could not be serialised; dropping" + ); + result.errors.push(PluginErrorRecord { + plugin_name: "".to_string(), + message: "modified payload could not be serialised across the PyO3 boundary" + .to_string(), + code: Some("py_serialize_error".to_string()), + details: HashMap::new(), + proto_error_code: None, + }); + None + } + }, + }; + + let modified_extensions_value: Option = result + .modified_extensions + .map(|ext| serde_json::to_value(&ext)) + .transpose() + .map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!( + "cpex: modified_extensions serialization failed: {e}" + )) + })?; + + let violation_value: Option = result + .violation + .map(|v| serde_json::to_value(&v)) + .transpose() + .map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!( + "cpex: violation serialization failed: {e}" + )) + })?; + + let errors_value: Vec = result + .errors + .iter() + .map(serde_json::to_value) + .collect::>() + .map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!( + "cpex: errors serialization failed: {e}" + )) + })?; + + let context_table_value = serde_json::to_value(&result.context_table).map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!( + "cpex: context_table serialization failed: {e}" + )) + })?; + + Ok(PyPipelineResult { + continue_processing: result.continue_processing, + modified_payload: modified_payload_value, + modified_extensions: modified_extensions_value, + violation: violation_value, + errors: errors_value, + metadata: result.metadata, + context_table: context_table_value, + }) +} diff --git a/bindings/python/tests/conftest.py b/bindings/python/tests/conftest.py new file mode 100644 index 0000000..5622887 --- /dev/null +++ b/bindings/python/tests/conftest.py @@ -0,0 +1,36 @@ +# Location: ./bindings/python/tests/conftest.py +# Copyright 2025 +# SPDX-License-Identifier: Apache-2.0 +# Authors: Ted Habeck +# +# Pytest fixtures for cpex-python binding tests. +# +# No module-level os.environ mutation — env changes are scoped to the +# test function that needs them (mirrors tests/unit/cpex/conftest.py style). + +import os +from pathlib import Path + +import pytest +from cpex import PluginManager + +FIXTURES_DIR = Path(__file__).parent / "fixtures" +PII_DENY_CONFIG = str(FIXTURES_DIR / "pii_deny.yaml") + + +@pytest.fixture +async def manager(): + """Create, initialize, and yield a PluginManager backed by pii_deny.yaml. + + Shuts down after the test so fire-and-forget tasks are drained (KD4). + """ + mgr = PluginManager(PII_DENY_CONFIG) + await mgr.initialize() + yield mgr + await mgr.shutdown() + + +@pytest.fixture +def pii_deny_config_path() -> str: + """Return the absolute path to the pii_deny fixture config.""" + return PII_DENY_CONFIG diff --git a/bindings/python/tests/fixtures/pii_deny.yaml b/bindings/python/tests/fixtures/pii_deny.yaml new file mode 100644 index 0000000..66e2ea9 --- /dev/null +++ b/bindings/python/tests/fixtures/pii_deny.yaml @@ -0,0 +1,32 @@ +# Location: ./bindings/python/tests/fixtures/pii_deny.yaml +# Copyright 2025 +# SPDX-License-Identifier: Apache-2.0 +# +# Test fixture — validator/pii-scan in deny mode + audit/logger fire-and-forget. +# +# Used by test_manager.py for AE2 (KD10): a cmf.tool_pre_invoke invoke with a +# payload containing an SSN → continue_processing=False, violation present. +# The audit/logger runs as fire-and-forget and is asserted post-shutdown (KD4). + +plugins: + - name: pii_deny + kind: validator/pii-scan + version: 1.0.0 + hooks: + - cmf.tool_pre_invoke + mode: sequential + priority: 10 + on_error: fail + config: + mode: deny + detect: + - kind: ssn + + - name: audit_faf + kind: audit/logger + version: 1.0.0 + hooks: + - cmf.tool_pre_invoke + mode: fire_and_forget + priority: 100 + on_error: ignore diff --git a/bindings/python/tests/test_conversions.py b/bindings/python/tests/test_conversions.py new file mode 100644 index 0000000..6ac043f --- /dev/null +++ b/bindings/python/tests/test_conversions.py @@ -0,0 +1,103 @@ +# Location: ./bindings/python/tests/test_conversions.py +# Copyright 2025 +# SPDX-License-Identifier: Apache-2.0 +# Authors: Ted Habeck +# +# Tests for PyObject ↔ JSON value conversion behaviour (U3). +# These are exercised through the Python API (invoke_hook with +# various payload shapes) — the Rust internals are not callable directly. + +import pytest +from cpex import PluginManager + + +def _build_generic_payload(value: dict) -> dict: + """Wrap a dict as a generic (non-cmf) hook payload.""" + return value + + +# --------------------------------------------------------------------------- +# Round-trip happy paths +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_flat_dict_round_trip(manager: PluginManager): + """A flat dict passes through a non-cmf hook unchanged.""" + payload = {"key": "value", "num": 42, "flag": True} + result = await manager.invoke_hook("custom.hook", payload) + assert result.continue_processing is True + + +@pytest.mark.asyncio +async def test_nested_dict_round_trip(manager: PluginManager): + """A nested dict/list passes through without ValueError.""" + payload = {"outer": {"inner": [1, 2, 3]}, "text": "hello"} + result = await manager.invoke_hook("custom.hook", payload) + assert result.continue_processing is True + + +@pytest.mark.asyncio +async def test_mixed_scalar_types(manager: PluginManager): + """bool, int, float, str, None all accepted.""" + payload = {"b": True, "i": 7, "f": 3.14, "s": "text", "n": None} + result = await manager.invoke_hook("custom.hook", payload) + assert result.continue_processing is True + + +@pytest.mark.asyncio +async def test_empty_dict(manager: PluginManager): + result = await manager.invoke_hook("custom.hook", {}) + assert result.continue_processing is True + + +@pytest.mark.asyncio +async def test_empty_list_in_payload(manager: PluginManager): + result = await manager.invoke_hook("custom.hook", {"items": []}) + assert result.continue_processing is True + + +# --------------------------------------------------------------------------- +# Edge cases +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_nesting_at_128_levels_succeeds(manager: PluginManager): + """Exactly 128 levels deep (depth arg == 128) must succeed — depth > 128 is the guard.""" + payload: dict = {} + current = payload + for _ in range(128): # root is depth=0; 128 children → deepest call is depth=128 + current["child"] = {} + current = current["child"] + result = await manager.invoke_hook("custom.hook", payload) + assert result.continue_processing is True + + +@pytest.mark.asyncio +async def test_nesting_at_129_levels_raises(manager: PluginManager): + """129 levels deep (depth arg == 129 > 128) raises ValueError (R3).""" + payload: dict = {} + current = payload + for _ in range(129): # root is depth=0; 129 children → deepest call depth=129 > 128 + current["child"] = {} + current = current["child"] + with pytest.raises(ValueError, match="nesting exceeds maximum depth"): + await manager.invoke_hook("custom.hook", payload) + + +@pytest.mark.asyncio +async def test_non_string_key_raises(manager: PluginManager): + """A dict with a non-string key must raise ValueError.""" + # Cannot construct {1: "v"} as a typed dict but we can via **kwargs trick. + bad_payload = {1: "value"} # type: ignore[dict-item] + with pytest.raises((ValueError, TypeError)): + await manager.invoke_hook("custom.hook", bad_payload) + + +@pytest.mark.asyncio +async def test_unconvertible_type_raises(manager: PluginManager): + """An unconvertible Python type (set) in the payload raises ValueError.""" + bad_payload = {"items": {1, 2, 3}} # type: ignore[dict-item] + with pytest.raises(ValueError, match="cannot convert Python object"): + await manager.invoke_hook("custom.hook", bad_payload) diff --git a/bindings/python/tests/test_errors.py b/bindings/python/tests/test_errors.py new file mode 100644 index 0000000..d06fa01 --- /dev/null +++ b/bindings/python/tests/test_errors.py @@ -0,0 +1,57 @@ +# Location: ./bindings/python/tests/test_errors.py +# Copyright 2025 +# SPDX-License-Identifier: Apache-2.0 +# Authors: Ted Habeck +# +# Error-path tests for cpex-python (U2, U7, KD2, KD7). + +import pytest +from cpex import PluginManager + + +# --------------------------------------------------------------------------- +# Config errors +# --------------------------------------------------------------------------- + + +def test_missing_config_file_raises_value_error(): + """Missing config file → ValueError (R2).""" + with pytest.raises(ValueError, match="cannot read config file"): + PluginManager("/nonexistent/path/config.yaml") + + +def test_malformed_yaml_raises_value_error(tmp_path): + """Malformed YAML → ValueError (R2).""" + bad_config = tmp_path / "bad.yaml" + bad_config.write_text("plugins: [\n - name: broken\n : : bad_yaml") + with pytest.raises(ValueError): + PluginManager(str(bad_config)) + + +def test_valid_empty_config_constructs_ok(tmp_path): + """A valid minimal config (no plugins) constructs without error.""" + empty_config = tmp_path / "empty.yaml" + empty_config.write_text("plugins: []\n") + mgr = PluginManager(str(empty_config)) + assert mgr is not None + + +# --------------------------------------------------------------------------- +# Conversion failure (KD2) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_unconvertible_payload_raises_value_error(manager: PluginManager): + """An unconvertible Python type in the payload raises ValueError (KD2, R2).""" + with pytest.raises(ValueError): + await manager.invoke_hook("custom.hook", {"bad": object()}) + + +@pytest.mark.asyncio +async def test_invalid_cmf_payload_raises_value_error(manager: PluginManager): + """A cmf.* hook with a dict missing the `message` field raises ValueError.""" + # MessagePayload requires a `message:` field with a Message struct. + # An empty dict (or a dict without `message`) fails serde deserialization. + with pytest.raises(ValueError, match="not a valid MessagePayload"): + await manager.invoke_hook("cmf.tool_pre_invoke", {}) diff --git a/bindings/python/tests/test_manager.py b/bindings/python/tests/test_manager.py new file mode 100644 index 0000000..177c3a4 --- /dev/null +++ b/bindings/python/tests/test_manager.py @@ -0,0 +1,98 @@ +# Location: ./bindings/python/tests/test_manager.py +# Copyright 2025 +# SPDX-License-Identifier: Apache-2.0 +# Authors: Ted Habeck +# +# Integration tests for PyPluginManager lifecycle (U4). + +import pytest +from cpex import PluginManager, PipelineResult + + +# --------------------------------------------------------------------------- +# AE1 — importable (KD11 guard test) +# --------------------------------------------------------------------------- + + +def test_cpex_lib_importable(): + """cpex._lib must be importable and resolve to the native extension (KD11).""" + import cpex + import cpex._lib # noqa: F401 + + # __file__ on the package must NOT point to the legacy ./cpex/ directory. + # In the test venv, cpex.__file__ should end with cpex/__init__.py from + # the bindings package, and cpex._lib is a native .so/.dylib. + assert hasattr(cpex._lib, "PluginManager"), "cpex._lib must expose PluginManager" + assert hasattr(cpex._lib, "PipelineResult"), "cpex._lib must expose PipelineResult" + + +# --------------------------------------------------------------------------- +# Happy path lifecycle +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_lifecycle_construct_initialize_shutdown(pii_deny_config_path: str): + """Construct → initialize → shutdown completes without error.""" + mgr = PluginManager(pii_deny_config_path) + await mgr.initialize() + await mgr.shutdown() + + +@pytest.mark.asyncio +async def test_invoke_returns_pipeline_result(manager: PluginManager): + """A non-triggering invoke returns a PipelineResult with continue_processing=True.""" + payload = { + "message": { + "role": "user", + "content": [{"content_type": "text", "text": "Hello, world!"}], + } + } + result = await manager.invoke_hook("cmf.tool_pre_invoke", payload) + assert isinstance(result, PipelineResult) + # SSN not present — no denial + assert result.continue_processing is True + + +# --------------------------------------------------------------------------- +# AE2 — pii-scan deny (KD10, KD4) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_pii_deny_returns_violation(pii_deny_config_path: str): + """AE2: CMF invoke with SSN payload → continue_processing=False, violation present.""" + mgr = PluginManager(pii_deny_config_path) + await mgr.initialize() + + # Payload contains a tool call with an SSN in the arguments. + payload = { + "message": { + "role": "assistant", + "content": [ + { + "content_type": "tool_call", + "content": { + "tool_call_id": "tc_001", + "name": "lookup_person", + "arguments": {"ssn": "123-45-6789"}, + }, + } + ], + } + } + result = await mgr.invoke_hook("cmf.tool_pre_invoke", payload) + + assert result.continue_processing is False, "pii-scan deny should halt pipeline" + assert result.violation is not None, "violation dict must be populated" + assert "reason" in result.violation, "violation must have a reason field" + + await mgr.shutdown() + + +@pytest.mark.asyncio +async def test_generic_hook_does_not_raise(manager: PluginManager): + """Non-CMF hook routes through GenericPayload and returns a result (KD2).""" + result = await manager.invoke_hook("custom.arbitrary.hook", {"data": "value"}) + assert isinstance(result, PipelineResult) + assert result.continue_processing is True diff --git a/bindings/python/tests/test_result.py b/bindings/python/tests/test_result.py new file mode 100644 index 0000000..a0f0659 --- /dev/null +++ b/bindings/python/tests/test_result.py @@ -0,0 +1,97 @@ +# Location: ./bindings/python/tests/test_result.py +# Copyright 2025 +# SPDX-License-Identifier: Apache-2.0 +# Authors: Ted Habeck +# +# Tests for PyPipelineResult field access and repr (U5). + +import re + +import pytest +from cpex import PluginManager, PipelineResult + + +# --------------------------------------------------------------------------- +# Happy path field access +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_all_fields_accessible(manager: PluginManager): + """All seven PipelineResult fields are accessible after a real invoke.""" + payload = { + "message": { + "role": "user", + "content": [{"content_type": "text", "text": "Hello"}], + } + } + result = await manager.invoke_hook("cmf.tool_pre_invoke", payload) + assert isinstance(result, PipelineResult) + + # continue_processing + assert isinstance(result.continue_processing, bool) + # violation — None for an allowed result + assert result.violation is None + # errors — list (may be empty) + assert isinstance(result.errors, list) + # metadata — may be None + assert result.metadata is None or isinstance(result.metadata, dict) + # context_table — always a dict + assert isinstance(result.context_table, dict) + + +# --------------------------------------------------------------------------- +# Deny result — violation dict shape +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_deny_result_violation_fields(pii_deny_config_path: str): + """A denied result exposes violation dict with expected keys.""" + mgr = PluginManager(pii_deny_config_path) + await mgr.initialize() + + payload = { + "message": { + "role": "assistant", + "content": [ + { + "content_type": "tool_call", + "content": { + "tool_call_id": "tc_x", + "name": "submit_form", + "arguments": {"ssn": "987-65-4321"}, + }, + } + ], + } + } + result = await mgr.invoke_hook("cmf.tool_pre_invoke", payload) + + assert result.continue_processing is False + assert result.violation is not None + assert isinstance(result.violation, dict) + # Standard violation keys + assert "reason" in result.violation + + await mgr.shutdown() + + +# --------------------------------------------------------------------------- +# repr safety — no pointer leakage (R3) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_repr_no_pointers(manager: PluginManager): + """__repr__ must not contain hex pointer substrings (R3).""" + payload = { + "message": { + "role": "user", + "content": [{"content_type": "text", "text": "hi"}], + } + } + result = await manager.invoke_hook("cmf.tool_pre_invoke", payload) + r = repr(result) + # Hex-pointer pattern: 0x followed by hex digits + assert not re.search(r"0x[0-9a-fA-F]+", r), f"repr contains pointer: {r!r}"