diff --git a/.cargo/audit.toml b/.cargo/audit.toml new file mode 100644 index 0000000..d561358 --- /dev/null +++ b/.cargo/audit.toml @@ -0,0 +1,32 @@ +# cargo-audit configuration. +# +# The advisories ignored below are ALL transitively pinned by dependencies we cannot bump from this +# repo, and each has been assessed for actual exposure. They are listed individually (not blanket +# `informational` suppression) so a NEW advisory still fails the audit. Re-evaluate when the upstream +# pins move — chiefly when `pingora` publishes past 0.8 (it is the latest published release as of +# this writing) and when `beyond-slipstream` relaxes its `async-nats ^0.46` requirement. +[advisories] +ignore = [ + # rustls-webpki 0.102.8: reachable panic in CRL parsing; name-constraint acceptance bugs (URI / + # wildcard); CRL distribution-point matching. Pulled ONLY by async-nats 0.46 (pinned by + # beyond-slipstream `^0.46`), used for the NATS/slipstream control-channel TLS — NOT the client- + # or provider-facing TLS, which already resolve the patched rustls-webpki 0.103.13. Blast radius + # is limited to MITM of the deny-set channel, which is fail-open and carries only deny entries. + # Fix path: a beyond-slipstream release on async-nats >= 0.47 (uses rustls-webpki 0.103+). + "RUSTSEC-2026-0104", + "RUSTSEC-2026-0098", + "RUSTSEC-2026-0099", + "RUSTSEC-2026-0049", + + # protobuf 2.28.0: DoS via uncontrolled recursion when PARSING protobuf. Pulled by prometheus + # 0.13 (both our direct dep — kept at 0.13 to share pingora-core's default registry — and + # pingora-core 0.8 itself). We never parse untrusted protobuf: metrics are exposed in the text + # exposition format via pingora's prometheus_http_service. Fix path: pingora past 0.8 (drops the + # prometheus 0.13 / protobuf 2.x chain). + "RUSTSEC-2024-0437", + + # Unmaintained-crate warnings (no known vulnerability), all transitive via pingora 0.8: + "RUSTSEC-2025-0134", # rustls-pemfile (via rustls-native-certs <- pingora-rustls / async-nats) + "RUSTSEC-2025-0069", # daemonize (via pingora-core) + "RUSTSEC-2024-0388", # derivative (via a pingora dependency) +] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..3827dd3 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,28 @@ +name: CI +on: + pull_request: + branches: [main] +env: + CARGO_TERM_COLOR: always + # Belt-and-suspenders: the panic-surface + `unused_must_use` denies live in `[lints]` (Cargo.toml) + # so they bind locally too, but escalate *every* warning to an error in CI in case a lint isn't + # expressible there (build scripts, future targets). + RUSTFLAGS: -D warnings +jobs: + check: + name: Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + - uses: jdx/mise-action@v2 + - uses: Swatinem/rust-cache@v2 + # Formatting: dprint (config/json/etc) + rustfmt. + - run: mise check:fmt + - run: cargo fmt --all --check + # Lints: clippy `-D warnings` across all targets. With `[lints.clippy]` denying the panic + # surface (unwrap/expect/panic/todo/unimplemented), a new `.unwrap()` in production code + # fails the build here. + - run: mise check:rs + - run: mise test:unit:rs + - run: mise test:integration:rs + - run: mise build:rs:release diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..18aa6a2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +target/ +dist/ +**/*.rs.bk +.env +.env.* +!.env.example +.claude/settings.local.json +.sqlx +.wiki +node_modules/ +bench/out/ +.mcp.json +.claude +.env diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..8f9b40d --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,456 @@ +# Beyond AI Gateway — Architecture + +Takes HTTP requests carrying an OpenAI- or Anthropic-dialect payload, authenticates the caller via +Ed25519 virtual key or BYO provider token, swaps in a pool key for managed traffic, relays the +request and response byte-for-byte to the upstream provider, and emits a token-usage billing fact +(`ai.usage`) on completion — all without buffering the body or response stream. + +**Self-contained:** no `path` deps into the `beyond` repo. Depends only on crates.io + the +published `beyond-slipstream` — clones, CI-builds, and publishes anywhere. + +--- + +## Concepts & Terminology + +| Term | What It Controls / Gates | NOT | +| ------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | +| **Managed key** (`bai_v1.…`) | Ed25519-verified identity; enables key swap, deny-set check, and `ai.usage` billing | A session token or capability grant — just tenant attribution | +| **BYO key** (anything else) | Forwarded as-is to the provider; no swap, no billing, no deny-set | A lesser tier — same proxy, minus attribution and billing | +| **Pool key** | Real provider API key held by the gateway; swapped in for managed traffic | Per-tenant — one key per provider, shared by all managed callers | +| **Tenant** | The billing entity from the virtual key payload (`tenant_id: u64`) | An org, user, or namespace — an opaque integer the gateway doesn't interpret | +| **Dialect** | A provider attribute (OpenAI-wire vs Anthropic-wire) driving usage parsing; for a bare-path request it's derived from the path to pick the default provider | The provider — a prefixed request uses its provider's dialect, not the path | +| **Provider** | The request's **first path segment** (`/{provider}/…`); a named row in the routing table: authority, dialect, auth scheme | A vendor relationship — just connection facts and auth wiring | +| **Deny-set** | Sparse map of denied `tenant_id`s → reason; gates managed traffic; default-allow | An allowlist or ACL — misses are allowed, not blocked | +| **Tail tap** | Bounded 64KB window kept from the end of the response for usage extraction | A buffer or copy — the response is relayed unbuffered; only the tail is kept | +| **Snapshot** | On-disk deny-set cache (entries + NATS cursor) for edge/tunnel deployments | Persistent store — a pure cache; delete it and the gateway re-scans NATS | +| **Virtual key** (`bai_v1.{kid}.{payload}.{sig}`) | Ed25519-signed token encoding `tenant_id` + `vpc_id` (16-byte fixed payload) | A session or auth token — stateless, no server-side lookup, no revocation | + +--- + +## Data Flow + +### Happy Path + +``` +Client (stock OpenAI/Anthropic SDK) + │ + ▼ request_filter (proxy.rs) + │ ├─ Route: first segment → provider row (authority, dialect, auth scheme) + │ ├─ Extract key from Authorization: Bearer or x-api-key + │ ├─ Rate guardrails (BEFORE verify — keeps forged-key floods at ns cost) + │ │ per-credential count-min ──────────────────────────────► 429 + │ │ global BYO aggregate (managed exempt) ─────────────────► 429 + │ ├─ Content-Length abuse guard ──────────────────────────────► 413 + │ └─ Identity branch: + │ bai_v1.… → Ed25519 verify → deny-set check (O(1)) + │ │ │ │ + │ │ 401 (bad sig) 402 Spend / 403 Fraud + │ │ │ + │ │ pool key required ───────────────────────── 503 + │ └─ BYO: pass through (no verify, no deny-set, no billing) + │ └─ Circuit breaker (per provider, all traffic): if OPEN ─────► 503 + │ (claims a half-open probe permit only on an actual attempt) + │ + ▼ upstream_peer (proxy.rs) + │ TTL-cached DNS resolve (60s) → HttpPeer (TLS, H2 pref, timeouts) + │ DNS fail ──────────────────────────────────────────────────── 502 + │ TCP connect fail (retry 2×) ──────────────────────────────── 502 + │ + ▼ upstream_request_filter (proxy.rs) + │ Managed: remove both auth headers → inject pool key + │ BYO: leave auth header unchanged + │ Set Host; forward path verbatim (/{provider} prefix stripped) + │ + ▼ request_body_filter (proxy.rs) — body streamed through, never buffered + │ Feed chunks → ModelScanner (peek.rs) — extract root-level `model`, O(1) mem + │ Enforce running size cap (chunked-safe) ──────────────────── 413 + │ Injection-eligible (managed OpenAI chat/responses + stream): + │ buffer full body → inject stream_options.include_usage → re-frame chunked + │ + ▼ Provider upstream (OpenAI / Anthropic / Groq / DeepSeek / …) + │ + ▼ response_filter (proxy.rs) + │ Record TTFT; detect streaming (Content-Type: text/event-stream) + │ Count upstream response by provider + status class + │ Set x-beyond-request-id header + │ + ▼ response_body_filter (proxy.rs) — response relayed chunk-by-chunk, never buffered + │ Feed chunks → ModelScanner over response head → extract billed model + │ Append to bounded 64KB tail (compact drain(..half) if tail > 128KB) + │ + ▼ logging (proxy.rs) + Parse usage from tail (by dialect + streaming flag) + Emit ai.usage fact: tenant, vpc, model, requested_model, token counts (managed only) + Record circuit-breaker outcome (once): 5xx / connect-fail → failure; else → success (429 incl.) + Decrement requests_in_flight gauge +``` + +### Background: Deny-Set Watcher + +``` +NATS (blackhole.* KV entries) + │ + ▼ store_watch.rs (Pingora BackgroundService) + │ On connect: seed from disk snapshot (if snapshot_path set) or full NATS scan + │ Resume watch from saved revision (gap-free — no entry lost mid-connect) + │ Reconnect backoff: 1s → 30s exponential + │ + ▼ ArcSwap (state.rs) + Lock-free read on every managed request + Written only by the watcher on entry add/remove +``` + +--- + +## Core Mechanism + +### Routing (`route.rs`) + +Providers are **data rows**, not code paths. `KNOWN_PROVIDERS` in `route.rs` lists 10 built-in +providers (openai, anthropic, openrouter, fireworks, groq, deepseek, together, cerebras, mistral, +xai); each row carries its authority (host:port), dialect (OpenAI-wire vs Anthropic-wire), and auth +scheme (Bearer vs x-api-key). The `provider_authorities` config key adds or overrides rows at boot +with zero code change. + +The routing rule: **first path segment = provider name**. `/groq/openai/v1/chat/completions` routes +to Groq and forwards `/openai/v1/chat/completions` verbatim. A bare `/v1/…` path matches the +dialect default (OpenAI or Anthropic based on which default is set). Unknown segment → 404. Model +is not known at peer-selection time and is never used for routing. + +### Identity (`key.rs`) + +Virtual key format: `bai_v1.{kid}.{payload}.{sig}` where payload is exactly 16 bytes (8-byte +`tenant_id` + 8-byte `vpc_id`, little-endian u64). Verification is **stateless Ed25519** — no +database, no network call. The keyring holds multiple `kid` → public key mappings simultaneously +(zero-downtime rotation: add the new kid, deploy, remove the old kid). A tampered or forged key +falls through to BYO treatment; it does not error in a way that reveals which part failed. + +Verification cost ≈ 28µs per request — this is the gateway's only meaningful per-request CPU cost +(everything else runs in nanoseconds; see Benchmarking). The rate guardrails sit **before** verify +precisely because of this: a forged-key flood is rejected in tens of nanoseconds, not 28µs each. + +### Model Extraction (`peek.rs:ModelScanner`) + +A streaming structural scanner fed body or response chunks as they arrive. Tracks JSON nesting +depth, string-escape state, and quote boundaries. Captures the **root-level `model` field only** +(depth 0 in the object), ignoring nested `model` keys in tool calls or message content. +SIMD-accelerated via `memchr2` to skip over large string values (base64-encoded images, long +prompts). O(1) memory: one struct, no heap growth with payload size — proven by the unit bench +which shows a single allocation independent of whether the body is 0 bytes, 4 KB, or 256 KB. + +The billing fact carries **two model fields**: + +- `requested_model` — what the client sent (extracted from the request body) +- `model` — what the provider resolved and billed (extracted from the response head; falls back to + `requested_model` when the response carries no model field, e.g. an error body) + +`model` is what reconciles against the provider's invoice (which itemizes by pinned snapshot, e.g. +`gpt-4o-2024-08-06`, not alias). `requested_model` serves product analytics and as a fallback rate +when the snapshot is newer than the downstream price table. + +### Usage Extraction (`usage.rs`) + +The tail tap feeds the parser after `logging` fires. Two dialects: + +| Dialect | Format | Fields | +| --------- | ---------- | ----------------------------------------------------------------------------------------------------------------- | +| OpenAI | JSON body | `usage.prompt_tokens`, `usage.completion_tokens`, `usage.prompt_tokens_details.cached_tokens` | +| OpenAI | SSE stream | Terminal `data:` line (before `[DONE]`), same fields | +| Anthropic | JSON body | `usage.input_tokens`, `usage.output_tokens`, `usage.cache_read_input_tokens`, `usage.cache_creation_input_tokens` | +| Anthropic | SSE stream | `message_delta` event with `usage` block | + +Missing or zero usage fields deserialize to zero (safe default). If the tail is truncated by the +compaction drain, the usage chunk is still present because SSE usage is always the final `data:` +line and the tail keeps the last 64KB. + +### Deny-Set (`deny.rs`) + +A `HashMap` (tenant_id → reason). Only denied tenants are stored — the map is +`O(denied)` in memory regardless of total tenant count. Lookup is one hash probe. Written +exclusively by the NATS watcher via `ArcSwap`; reads on the hot path are lock-free. + +Reasons: `Spend` (→ 402), `Fraud` (→ 403), `Unknown` (→ 403, fail-safe for unrecognized values). +Restore = explicit delete from NATS KV or TTL expiry — no gateway-side timer. + +### Rate Guardrails (`ratelimit.rs`) + +Two fixed-memory count-min sketch tiers, checked before Ed25519 verify and before any upstream +connection: + +| Tier | Key | Bucket count | Default ceiling | Managed exempt? | +| -------------------- | --------------- | ------------ | --------------- | --------------- | +| Per-credential | Hash of raw key | 5 MB sketch | 100 req/s | No | +| Global BYO aggregate | Single bucket | 1 bucket | 1000 req/s | **Yes** | + +The per-credential tier is keyed on the **raw presented credential** (not the verified tenant), +which has two consequences: (1) the guard sits ahead of verify, so forged tokens are rejected +before any crypto work; (2) virtual keys are deterministic per `(tenant, app)`, so this is +effectively per-(tenant, app) granularity without a registry lookup. + +The global BYO aggregate exists because BYO traffic exits from the gateway's own egress IPs +carrying the caller's raw token. A flood of distinct junk BYO tokens each get their own +per-credential bucket and slip through that tier — the aggregate caps total BYO egress rate to +protect the gateway's IP reputation with providers. Managed traffic is exempt because it's verified +before any upstream connection and cannot be forged. + +Both tiers are generous circuit breakers, not quotas. `rate_limit_rps = 0` / `byo_rate_limit_rps = +0` disable them independently. + +### Circuit Breaker (`circuit_breaker.rs`) + +A per-provider, lock-free circuit breaker (single packed `AtomicU64`; windowed failure policy) sits +on the upstream path. It protects against a **broken provider**, which is a different failure than +the rate guardrails (which protect against abusive _inbound_ load): + +- **Failure = the provider is broken** — a `5xx` response or a connect failure. After + `circuit_breaker_threshold` failures within `circuit_breaker_window_secs`, the breaker **opens**: + requests to that provider fast-fail with `503` (`ai_rejections_total{reason="circuit_open"}`) + instead of piling up against `read_timeout_secs` and exhausting connection / in-flight slots for + _every_ provider (head-of-line blocking by one sick dependency). After `circuit_breaker_reset_secs` + it half-opens and admits a probe; success closes it, failure reopens it. +- **A `429` is NOT a failure.** It means the provider is healthy and throttling our pool key — a + velocity/spend signal the rate limiter and the client's `Retry-After` backoff own. Tripping on it + would convert a self-healing throttle into a self-inflicted outage. The breaker records any response + that _arrived_ (2xx/3xx/4xx incl. 429) as a **success**; only 5xx and transport failures count + against it. +- **Applies to all traffic** (managed + BYO) — a down provider is down regardless of whose key is + used. One breaker per provider, built at boot, shared lock-free across callers. +- The `allow()` check is the **last** thing in `request_filter` (after every other rejection), so a + scarce half-open probe permit is only claimed for a request that will actually attempt the upstream; + the outcome is recorded exactly once in `logging`, so a permit can never leak. +- `circuit_breaker_threshold = 0` disables it. + +--- + +## Why It Behaves This Way + +### Why rate guardrails sit before Ed25519 verify + +Ed25519 verify is ~28µs — roughly 350–650× more expensive than every other per-request operation. +A flood of forged `bai_v1` tokens could drive unbounded crypto work if the rate limit came after +verify. By checking the per-credential bucket first (keyed on the raw token, no crypto), a +forged-key flood is rejected in tens of nanoseconds per request. Legit traffic is unaffected: the +rate guard passes through, then verify runs as normal. The unit bench (`benches/unit.rs`) asserts +this: `key/verify` ≈ 28µs; `ratelimit::check` ≈ 43–83ns; 0 allocations for either. + +### Why the body injection exception exists (`managed + OpenAI + streaming`) + +OpenAI streams no usage chunk unless `stream_options.include_usage: true` is set. Without it, a +streaming managed request is unmeterable: no usage block in the response means no billing fact. The +gateway injects this field server-side so callers using stock SDKs get metered without any +cooperation. The request is buffered (`MAX_REQUEST_BODY` cap), the field injected, and the body +re-framed as chunked upstream. Scoped to managed + OpenAI-dialect + streaming only — BYO and +non-streaming requests remain pure passthrough. + +### Why the deny-set watch resumes from a saved revision + +A plain `watch_prefix` (NATS `DeliverPolicy::New`) would miss any entry written in the window +between the initial seed scan and the live watch attaching. `store_watch.rs` records the stream +revision at which the seed was complete and calls `watch_prefix_from` to resume from that revision +— so a deny written during the gap is delivered, not silently dropped. This revision is also +persisted across reconnects, so a NATS blip resumes from the last-seen point instead of re-scanning +the entire keyspace. + +### Why BYO token validity is never checked + +Checking a BYO token requires a round-trip to the provider. The provider does that check anyway and +returns 401 if the token is invalid — the client sees the same rejection it would get going direct, +just routed through the gateway. Adding a gateway-side preflight check would double the latency for +every BYO request on the error path with no security benefit at the gateway layer. + +### Why pricing is absent from the gateway + +The gateway emits token _facts_ (`ai.usage`): counts and model identifiers. Applying prices to +those facts is a downstream concern. Provider pricing changes frequently, varies by contract tier, +and is sometimes retroactively corrected on invoices. A downstream consumer can reprice historical +facts; the gateway's facts cannot be regenerated once the request is gone. + +### Why routing uses the first path segment, not a header + +Path-based routing makes the target provider explicit in every request URL — visible in logs, +traces, and curl output without inspecting headers. It also survives transparent proxies and load +balancers that strip custom headers. A `/{provider}/` prefix was preferred over a separate header +because SDKs already let callers set the base URL; swapping in the gateway's URL with a provider +prefix requires no SDK modification. + +--- + +## Trust Boundaries + +**What the gateway verifies (rejects if invalid):** + +- Virtual key signature (Ed25519, stateless — no DB lookup) +- Virtual key format (`bai_v1.{kid}.{payload}.{sig}`, fixed 16-byte payload) +- Tenant not in deny-set (managed traffic only; O(1) HashMap lookup) +- Pool key configured for the requested provider (managed traffic only — else 503) +- Request body size ≤ `MAX_REQUEST_BODY` (declared `Content-Length` + streaming running total) +- Per-credential request rate within ceiling; aggregate BYO rate within ceiling + +**What passes through unchecked:** + +- Request body content and schema — no validation at the gateway layer +- Model name in the request — extracted for billing facts, never validated against an allowlist +- Provider response content — relayed byte-for-byte +- BYO token validity — forwarded as-is; the provider rejects it if invalid +- `vpc_id` in the virtual key — decoded and emitted in billing facts, not used for access control + +**Why these boundaries are where they are:** + +- Body schema validation belongs to the provider — duplicate validation adds latency without a + security benefit at the gateway layer +- Model allowlisting would require a per-provider list coupled to model release cadence +- BYO token validation requires a provider round-trip — the provider does it anyway + +--- + +## Configuration + +All fields configurable via `config.example.toml` and environment (`AI_` prefix, flat merge). +Secret-bearing fields (`pool_keys`, `nats_creds`) are held as `Secret` — stray `Debug` or +`Serialize` output redacts to `"***"` and the value is zeroized on drop (`secret.rs`). + +| Field | Default | Runtime Effect | +| ------------------------------- | --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `signing_keys` | _(required)_ | Map of kid → base64 Ed25519 public key. Multiple kids enable rotation. Missing → all traffic falls through to BYO treatment. | +| `require_signing_keys` | `false` | When `true`, an empty `signing_keys` is a hard boot failure instead of silent BYO-only mode. Set on managed deployments so a typo'd/absent SSM param fails fast rather than silently serving for free. | +| `pool_keys.` | _(from `AI_POOL_KEY_` env)_ | Real provider API key. Missing for a provider → managed requests to that provider return 503 before any upstream connection. | +| `provider_authorities.` | _(none)_ | Override or add a provider's `authority` (host:port). Enables config-added providers beyond `KNOWN_PROVIDERS` with zero code change. | +| `snapshot_path` | _(unset)_ | Path for the on-disk deny-set cache. Unset → re-scan NATS on every cold boot. Set → load from disk and enforce before NATS reconnects (edge/tunnel deployments). | +| `rate_limit_rps` | `100` | Per-credential request ceiling (count-min, keyed on raw key hash). `0` disables. Exceeded → 429. Checked before Ed25519 verify. | +| `byo_rate_limit_rps` | `1000` | Aggregate ceiling for all BYO traffic (single shared bucket). `0` disables. Managed traffic exempt. Exceeded → 429. | +| `circuit_breaker_threshold` | `20` | Per-provider upstream failures (5xx / connect; **not** 429) within the window before the breaker opens. While open, requests to that provider fast-fail with 503. `0` disables. | +| `circuit_breaker_window_secs` | `10` | Rolling window over which failures are counted (trips on a burst, not a slow trickle). | +| `circuit_breaker_reset_secs` | `30` | How long the breaker stays open before admitting a half-open probe. Probe success closes it; failure reopens it. | +| `connect_timeout_secs` | `10` | TCP connect timeout to the upstream provider. Exceeded → retry up to 2×, then 502. | +| `read_timeout_secs` | `600` | Response read timeout (10 min accommodates long-running LLM streams). | +| `write_timeout_secs` | `60` | Upstream request-write timeout (sending the request to the provider). | +| `idle_timeout_secs` | `90` | Idle timeout on a pooled upstream connection before it's closed. | +| `shutdown_grace_period_secs` | `600` | SIGTERM drain window for in-flight requests (= `read_timeout_secs` so a deploy never truncates a stream). Capped by the orchestrator's stop timeout (ECS Fargate: 120s). | +| `shutdown_runtime_timeout_secs` | `10` | Final runtime-teardown backstop after the drain window. | +| `nats_url` | `nats://localhost:4222` | NATS server for the deny-set watcher. Unreachable → fail-open (deny-set stays empty or stale). | +| `nats_creds` | _(unset)_ | NATS credentials file path. Required for authenticated clusters. | +| `listen_addr` | `0.0.0.0:8080` | Proxy listener address (client traffic). | +| `metrics_listen` | `0.0.0.0:9090` | Internal admin/observability listener: `/metrics` (Prometheus scrape), `/livez`, `/readyz`. Separate from the client listener — not externally reachable. | + +--- + +## Failure Modes + +| Failure | What Actually Happens | Recovery | +| ------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| NATS unreachable at boot | Deny-set starts empty (fail-open). Auth still works — keys from config. | Watcher reconnects; seeds from NATS or disk snapshot on connect. | +| NATS disconnects mid-run | Last-known deny-set stays active. New deny entries not applied until reconnect. | Watcher reconnects (1s→30s exponential backoff, reset on success) and resumes from saved revision — no re-scan. | +| NATS history compacted past snapshot cursor | `CursorExpired` → full re-scan from current NATS state. | After re-scan, new cursor set; delta watch resumes normally. | +| Virtual key tampered or forged | Ed25519 verify fails → falls through to BYO treatment. No billing event. No error reveals which part failed. | Billing miss detectable downstream; no security boundary breach. | +| `signing_keys` absent (typo'd/missing SSM) | Default: warn + BYO-only (silently drops all managed billing + deny-set). With `require_signing_keys=true`: hard boot failure. | Set `require_signing_keys=true` on managed deployments so the mis-deploy fails fast and visibly at boot. | +| Pool key missing for provider | Managed request returns 503 before any upstream connection. | Add `AI_POOL_KEY_` env and redeploy. | +| Provider DNS fails | `upstream_peer` returns error → 502 to client. | TTL-cached DNS (60s) serves stale; poisoned-lock guard re-resolves on next request. | +| Provider TCP connect fails | `fail_to_connect` retries up to 2×, then returns 502. Counts as a circuit-breaker failure. | Client SDK retries with backoff. No HTTP-status retries (Pingora-idiomatic). | +| Provider brownout (sustained 5xx) | After `circuit_breaker_threshold` 5xx/connect failures in the window, the breaker opens; requests fast-fail 503 (`circuit_open`) instead of stalling against the read timeout. | Auto: after `circuit_breaker_reset_secs` a half-open probe is admitted — success closes the breaker, failure reopens it. Per-provider, so other providers are unaffected. | +| Provider throttles (429 storm) | Relayed to the client as 429; the client's `Retry-After` backoff applies. Does **not** trip the breaker (provider is healthy). | Backpressure via client + the rate guardrails; no gateway-side circuit action. | +| Response body > 128KB before usage chunk | Tail compaction fires: `drain(..half)` discards first half, keeps tail. Usage extracted from retained tail. | No action — SSE usage is always in the final `data:` line, which always lands in the tail. | +| Gateway crash mid-request | In-flight request drops; client receives TCP close. No partial state written. | Client SDK retries. No DB writes in the request path — no cleanup needed. | + +--- + +## Metrics + +Prometheus on the default registry, exposed at `/metrics` on `metrics_listen`. + +| Metric | Type | Labels | What It Measures | +| ----------------------------- | --------- | -------------------- | -------------------------------------------------------------------------------------- | +| `ai_requests_total` | Counter | — | Total admitted requests | +| `ai_rejections_total` | Counter | `reason` | Rejected requests by cause (auth, deny_spend, deny_fraud, rate_limit, circuit_open, …) | +| `ai_upstream_responses_total` | Counter | `provider`, `status` | Upstream responses by provider and status class | +| `ai_tokens_total` | Counter | `kind` | input / output / cache_read / cache_write token counts | +| `ai_ttft_seconds` | Histogram | `provider` | Time to first token (50ms–30s buckets) | +| `ai_upstream_latency_seconds` | Histogram | `provider` | Full request latency (100ms–600s buckets) | +| `ai_active_streams` | Gauge | — | Open SSE streams | +| `ai_requests_in_flight` | Gauge | — | All in-flight requests (streaming + non-streaming) | +| `ai_deny_set_size` | Gauge | — | Current number of denied tenants | +| `ai_nats_connected` | Gauge | — | 1 if NATS watcher is connected, 0 otherwise | + +--- + +## Modules + +| Module | Role | Tested | +| ----------------- | ---------------------------------------------------------------------------------------------------- | -------------- | +| `proxy` | `ProxyHttp` impl — request/response pipeline (request_filter through logging) | e2e ✓ | +| `key` | `bai_v1` parse + Ed25519 verify + mint; keyring with multi-kid rotation support | unit ✓ | +| `route` | Data-driven provider table (name / authority / auth) + dialect default routing | unit ✓ | +| `peek` | `ModelScanner` — streaming structural scan for the root-level `model`; O(1) memory | unit ✓ | +| `usage` | Token extraction (OpenAI / Anthropic, body + SSE) | unit ✓ | +| `deny` | Sparse deny-set, default-allow, reason → HTTP status | unit ✓ | +| `ratelimit` | Two-tier guardrail: per-credential + global BYO (count-min sketches, fixed memory, no GC) | unit ✓ | +| `circuit_breaker` | Per-provider lock-free breaker (packed `AtomicU64`, windowed policy) — trips on 5xx/connect, not 429 | unit ✓ + e2e ✓ | +| `state` | Keyring + resolved provider registry + watched deny-set (ArcSwap) + TTL DNS cache | unit ✓ | +| `store_watch` | NATS watcher — gap-free deny-set seeding + delta watch as Pingora `BackgroundService` | e2e ✓ | +| `config` | Figment config; build keyring; pool keys / authorities by provider name | unit ✓ | +| `secret` | Redacting, zeroize-on-drop `Secret` newtype for pool keys and NATS creds | unit ✓ | +| `admin` | `ServeHttp` on the metrics listener: `/livez`, `/readyz`, `/metrics` | e2e ✓ | +| `metrics` | Prometheus counter/histogram/gauge registration and update helpers | compile ✓ | +| `doctor` | Boot-time diagnostics (`beyond-ai doctor`) | compile ✓ | +| `main` | CLI (`run` / `doctor`), rustls init, config load, Pingora server + three services bootstrap | compile ✓ | + +--- + +## Verification + +- **Unit (`cargo test --lib`):** key, route, peek, usage, deny, secret, config. `clippy + --all-targets -D warnings` clean. +- **End-to-end (`tests/e2e.rs`, `mise run test:integration:rs`):** real `beyond-ai` binary + real + nats-server + mock upstream. Covers managed key-swap + passthrough fidelity + usage metering + (OpenAI JSON + SSE, **Anthropic `/v1/messages`** with `x-api-key` swap + metering), **BYO + passthrough** (raw token unchanged), the **virtual key in either inbound header** (`Bearer` or + `x-api-key`), and deny-set propagation: spend (write `blackhole.{tenant}` → 402, delete → 200) + and **fraud** (→ 403). Error/edge paths: **missing key → 401**, **oversized `Content-Length` → + 413**, **managed key for an unconfigured provider → 503**, **streaming tail compaction** (>128KB + before the usage chunk still meters), **deny-set fail-open** (kill NATS → stale set retained, + auth still works), and **on-disk snapshot survival** (blackhole a tenant, restart with NATS down + → the hold is still enforced from disk). +- **Live smoke (`tests/smoke.rs`, `mise run test:smoke`):** the real `beyond-ai` binary against the + **real** provider hosts over TLS, one per provider in `KNOWN_PROVIDERS`. Proves real TLS/SNI, + the `/v1` → base-path rewrite landing on a live mount (200, not 404), and BYO auth passthrough. + Every test is `#[ignore]` and skips unless its provider's API key env var is set — CI stays + hermetic; you only hit providers you have keys for. + +--- + +## Benchmarking + +Two harnesses, mirroring the unit/e2e split of the tests. The framing is **Theory of Constraints**: +a proxy's steady-state constraint is upstream I/O, not gateway CPU. The benches **prove the +gateway's added cost is negligible and bounded** — i.e. it never becomes the constraint. + +- **Unit micro (`benches/unit.rs`, `mise run bench:unit`) — `divan`.** Times IO-free hot paths and + measures allocations natively (divan's `AllocProfiler` reports alloc/dealloc/grow count + bytes + beside ns/iter, no `unsafe` needed). Coverage: `key` verify/mint; `peek::ModelScanner` over + 0/4KB/256KB bodies with `model` placed last (worst case); `usage` parsers; `route`; `deny` + (`parse_key`/`parse_reason` off-path + `reason()` on-path); `ratelimit::check` (managed tier + only vs. BYO which runs both tiers). + + What the alloc numbers assert: + | Operation | Cost | Allocations | Claim verified | + | ------------------- | -------- | ---------------------------- | ----------------------------- | + | `key/verify` | ~28µs | 0 | Stack-only Ed25519 decode | + | `peek/ModelScanner` | varies | 1 (independent of body size) | O(1) memory | + | `route` | ~ns | 0 | — | + | `deny::reason` | ~1–8ns | 0, flat 0→1M entries | O(1) lookup, O(denied) memory | + | `ratelimit::check` | ~43–83ns | 0 | Fixed-memory count-min | + + **Headline: `key/verify` ≈ 28µs is ~350–650× every other per-request op.** This is why the rate + guardrail sits before verify in `proxy::request_filter`. + +- **End-to-end (`benches/e2e.rs`, `mise run bench:e2e`) — `criterion`.** Real `beyond-ai` binary + - real nats-server + mock upstream (reuses `tests/common`). Four decomposed cases: + `reject_missing_key_latency` (401, short-circuit before any upstream connection — transport floor), + `byo_json_latency` (pure passthrough), `managed_json_latency` (verify + deny + key swap), + `managed_sse_latency` (streaming response tap). Plus a concurrent-throughput group. + + All four cases land in ~110–120µs on loopback with ±15–20µs jitter — larger than the gateway's + own CPU cost. This harness cannot resolve the verify cost (that's the unit bench's job). Its value: + catching gross regressions (a buffering mistake, a dropped connection pool, an O(n) path added + would move the band by far more than 20µs) and saved-baseline RPS trend via `--save-baseline`. + +`mise run bench` runs both. diff --git a/CLAUDE.md b/CLAUDE.md index b7a270e..67e4bef 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -51,24 +51,3 @@ Apply the **Theory of Constraints**: a system's throughput is limited by its sin 5. **Repeat.** The bottleneck has shifted. Go back to step 1. The corollary: if you can't name the current constraint, you aren't ready to optimize. - - - -## Wiki - -This repo uses [agent-wiki](.wiki/): `.wiki/` indexes repo markdown docs and code symbols into a queryable knowledge graph. - -**Read the wiki before grepping the codebase or reading ARCHITECTURE.md.** Pages are pre-indexed — searching them is faster and ~5–10× cheaper than re-deriving from raw files. - -Wiki tools — pick based on what you need: - -- `wiki_query ""` — first move for any specific question. BM25++ over repo docs and code symbols; returns ranked hits with paths, scores, and inline snippets. -- `wiki_answer ""` — returns top-ranked pages with query-relevant passage extracts in one round-trip. Best when you expect the answer exists and want it immediately. -- `wiki_read "path/to/page.md"` (optionally `section: "..."` or `paths: [...]`) — full page, one section, or multiple pages in one call. -- `wiki_search_code ""` — search exported symbols, signatures, and doc comments when you need to locate a declaration or understand an API. -- `wiki_usage_examples ""` — real call sites with surrounding source code. Use before changing a function (to see every calling convention you must preserve) or when learning how an unfamiliar API is actually used. -- `wiki_impact ""` — blast radius: every symbol that transitively calls this one, ranked by hop distance. Use before refactoring or renaming to know what breaks. -- `wiki_callees ""` — outgoing call hierarchy (rust-analyzer equivalent): every function this symbol transitively calls, ranked by hop distance. Use when you need to understand what a function depends on before touching it — its DB calls, service calls, and abstractions. -- `wiki_implementors ""` — go-to-implementations (rust-analyzer equivalent): every concrete type that implements a trait or interface. Use when you need to know what's behind a trait object, or how many types a trait change will affect. - - diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..1a9bf79 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,4504 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "aliasable" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "arc-swap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" +dependencies = [ + "rustversion", +] + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "asn1-rs" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5493c3bedbacf7fd7382c6346bbd66687d12bbaad3a89a2d2c303ee6cf20b048" +dependencies = [ + "asn1-rs-derive", + "asn1-rs-impl", + "displaydoc", + "nom", + "num-traits", + "rusticata-macros", + "thiserror 1.0.69", + "time", +] + +[[package]] +name = "asn1-rs-derive" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "965c2d33e53cb6b267e148a4cb0760bc01f4904c1cd4bb4002a085bb016d1490" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "asn1-rs-impl" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "async-nats" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df5af9ebfb0a14481d3eaf6101e6391261e4f30d25b26a7635ade8a39482ded0" +dependencies = [ + "base64", + "bytes", + "futures-util", + "memchr", + "nkeys", + "nuid", + "once_cell", + "pin-project", + "portable-atomic", + "rand 0.8.6", + "regex", + "ring", + "rustls-native-certs 0.7.3", + "rustls-pki-types", + "rustls-webpki 0.102.8", + "serde", + "serde_json", + "serde_nanos", + "serde_repr", + "thiserror 1.0.69", + "time", + "tokio", + "tokio-rustls", + "tokio-stream", + "tokio-util", + "tokio-websockets", + "tracing", + "tryhard", + "url", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "atomic" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "autocfg" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" + +[[package]] +name = "aws-lc-rs" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ec2f1fc3ec205783a5da9a7e6c1509cc69dedf09a1949e412c1e18469326d00" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a2f9779ce85b93ab6170dd940ad0169b5766ff848247aff13bb788b832fe3f4" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + +[[package]] +name = "backtrace" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-link", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + +[[package]] +name = "beyond-ai" +version = "0.1.0" +dependencies = [ + "arc-swap", + "arrayvec", + "async-trait", + "base64", + "beyond-slipstream", + "bytes", + "clap", + "criterion", + "divan", + "ed25519-dalek", + "figment", + "getrandom 0.3.4", + "http", + "http-body-util", + "hyper", + "hyper-util", + "memchr", + "pingora", + "pingora-core", + "pingora-limits", + "pingora-proxy", + "prometheus", + "rcgen", + "reqwest", + "rustls", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tokio-rustls", + "tracing", + "tracing-subscriber", + "zeroize", +] + +[[package]] +name = "beyond-slipstream" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b07e54aae9b02cf7d2e9d935bd99cbc4a045f19d00738f069f44ba238a01600" +dependencies = [ + "async-nats", + "async-trait", + "base64", + "crc32fast", + "futures", + "serde_json", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "brotli" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "bumpalo" +version = "3.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" + +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +dependencies = [ + "serde", +] + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cc" +version = "1.2.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cf-rustracing" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6565523d8145e63e0cf1b397a5f1bd4e90d5652a7dffb2de8cec460ff23ef6b1" +dependencies = [ + "backtrace", + "rand 0.10.1", + "tokio", + "trackable", +] + +[[package]] +name = "cf-rustracing-jaeger" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16c0e4d8cce27f6a6eaff58d2b66f063a18b8ed0d6ef0947ae7a263afa3b7c08" +dependencies = [ + "cf-rustracing", + "hostname", + "local-ip-address", + "percent-encoding", + "rand 0.10.1", + "thrift_codec", + "tokio", + "trackable", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chacha20" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "rand_core 0.10.1", +] + +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "num-traits", +] + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", + "terminal_size", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "cmake" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" +dependencies = [ + "cc", +] + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + +[[package]] +name = "condtype" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "futures", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "tokio", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "curve25519-dalek" +version = "4.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "curve25519-dalek-derive", + "digest", + "fiat-crypto", + "rustc_version", + "subtle", + "zeroize", +] + +[[package]] +name = "curve25519-dalek-derive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "daemonize" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab8bfdaacb3c887a54d41bdf48d3af8873b3f5566469f8ba21b92057509f116e" +dependencies = [ + "libc", +] + +[[package]] +name = "daggy" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70def8d72740e44d9f676d8dab2c933a236663d86dd24319b57a2bed4d694774" +dependencies = [ + "petgraph", +] + +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.117", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "data-encoding" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" + +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + +[[package]] +name = "der-parser" +version = "9.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cd0a5c643689626bec213c4d8bd4d96acc8ffdb4ad4bb6bc16abf27d5f4b553" +dependencies = [ + "asn1-rs", + "displaydoc", + "nom", + "num-bigint", + "num-traits", + "rusticata-macros", +] + +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", + "serde_core", +] + +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.117", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "displaydoc" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "divan" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a405457ec78b8fe08b0e32b4a3570ab5dff6dd16eb9e76a5ee0a9d9cbd898933" +dependencies = [ + "cfg-if", + "clap", + "condtype", + "divan-macros", + "libc", + "regex-lite", +] + +[[package]] +name = "divan-macros" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9556bc800956545d6420a640173e5ba7dfa82f38d3ea5a167eb555bc69ac3323" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + +[[package]] +name = "ed25519" +version = "2.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53" +dependencies = [ + "pkcs8", + "signature", +] + +[[package]] +name = "ed25519-dalek" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e796c081cee67dc755e1a36a0a172b897fab85fc3f6bc48307991f64e4eca9" +dependencies = [ + "curve25519-dalek", + "ed25519", + "serde", + "sha2", + "signature", + "subtle", + "zeroize", +] + +[[package]] +name = "either" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "fiat-crypto" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" + +[[package]] +name = "figment" +version = "0.10.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3" +dependencies = [ + "atomic", + "pear", + "serde", + "toml", + "uncased", + "version_check", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "libz-ng-sys", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi 5.3.0", + "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "rand_core 0.10.1", + "wasip2", + "wasip3", +] + +[[package]] +name = "getset" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf0fc11e47561d47397154977bc219f4cf809b2974facc3ccb3b89e2436f912" +dependencies = [ + "proc-macro-error2", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "gimli" +version = "0.32.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" + +[[package]] +name = "h2" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap 2.14.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash 0.1.5", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hostname" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "617aaa3557aef3810a6369d0a99fac8a080891b68bd9f9812a1eeda0c0730cbd" +dependencies = [ + "cfg-if", + "libc", + "windows-link", +] + +[[package]] +name = "http" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8be7462df143984c4598a256ef469b251d7d7f9e271135073e78fc535414f3d0" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" +dependencies = [ + "http", + "hyper", + "hyper-util", + "rustls", + "tokio", + "tokio-rustls", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", +] + +[[package]] +name = "icu_collections" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" +dependencies = [ + "displaydoc", + "potential_utf", + "utf8_iter", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" + +[[package]] +name = "icu_properties" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" + +[[package]] +name = "icu_provider" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.1", + "serde", + "serde_core", +] + +[[package]] +name = "inlinable_string" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" + +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "jni" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efd9a482cf3a427f00d6b35f14332adc7902ce91efb778580e180ff90fa3498" +dependencies = [ + "cfg-if", + "combine", + "jni-macros", + "jni-sys", + "log", + "simd_cesu8", + "thiserror 2.0.18", + "walkdir", + "windows-link", +] + +[[package]] +name = "jni-macros" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00109accc170f0bdb141fed3e393c565b6f5e072365c3bd58f5b062591560a3" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "simd_cesu8", + "syn 2.0.117", +] + +[[package]] +name = "jni-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2" +dependencies = [ + "jni-sys-macros", +] + +[[package]] +name = "jni-sys-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" +dependencies = [ + "quote", + "syn 2.0.117", +] + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11" +dependencies = [ + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libz-ng-sys" +version = "1.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be734b33b7bc6a42d92d23e25e69758f866cf564a88d0bf80866fcf5a52c2255" +dependencies = [ + "cmake", + "libc", +] + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "litemap" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" + +[[package]] +name = "local-ip-address" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa08fb2b1ec3ea84575e94b489d06d4ce0cbf052d12acd515838f50e3c3d63e3" +dependencies = [ + "libc", + "neli", + "windows-sys 0.61.2", +] + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616ec5685824bcc94416c6d4a7a446eea774a31efd7062c8480ba6fd06d7a6e5" + +[[package]] +name = "lru" +version = "0.16.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39" +dependencies = [ + "hashbrown 0.16.1", +] + +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "memchr" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8" + +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "mio" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "neli" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22f9786d56d972959e1408b6a93be6af13b9c1392036c5c1fafa08a1b0c6ee87" +dependencies = [ + "bitflags 2.11.1", + "byteorder", + "derive_builder", + "getset", + "libc", + "log", + "neli-proc-macros", + "parking_lot", +] + +[[package]] +name = "neli-proc-macros" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05d8d08c6e98f20a62417478ebf7be8e1425ec9acecc6f63e22da633f6b71609" +dependencies = [ + "either", + "proc-macro2", + "quote", + "serde", + "syn 2.0.117", +] + +[[package]] +name = "nix" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", + "memoffset", +] + +[[package]] +name = "nkeys" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879011babc47a1c7fdf5a935ae3cfe94f34645ca0cac1c7f6424b36fc743d1bf" +dependencies = [ + "data-encoding", + "ed25519", + "ed25519-dalek", + "getrandom 0.2.17", + "log", + "rand 0.8.6", + "signatory", +] + +[[package]] +name = "no_debug" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f23a60c850e1144fc1dd9435152e0cfdc7dd18725350b4243584118013a52a4" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "nuid" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc895af95856f929163a0aa20c26a78d26bfdc839f51b9d5aa7a5b79e52b7e83" +dependencies = [ + "rand 0.8.6", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521739c6d2bac4aa25192232afe6841231376b2b26d4d9fae5ecf8ca5772e441" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + +[[package]] +name = "oid-registry" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8d8034d9489cdaf79228eb9f6a3b8d7bb32ba00d6645ebd48eef4077ceb5bd9" +dependencies = [ + "asn1-rs", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + +[[package]] +name = "openssl-probe" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" + +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + +[[package]] +name = "ouroboros" +version = "0.18.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0f050db9c44b97a94723127e6be766ac5c340c48f2c4bb3ffa11713744be59" +dependencies = [ + "aliasable", + "ouroboros_macro", + "static_assertions", +] + +[[package]] +name = "ouroboros_macro" +version = "0.18.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c7028bdd3d43083f6d8d4d5187680d0d3560d54df4cc9d752005268b41e64d0" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "pear" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467" +dependencies = [ + "inlinable_string", + "pear_codegen", + "yansi", +] + +[[package]] +name = "pear_codegen" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147" +dependencies = [ + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "pem" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" +dependencies = [ + "base64", + "serde_core", +] + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset", + "indexmap 2.14.0", +] + +[[package]] +name = "pin-project" +version = "1.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2466b2336ed02bcdca6b294417127b90ec92038d1d5c4fbeac971a922e0e0924" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c96395f0a926bc13b1c17622aaddda1ecb55d49c8f1bf9777e4d877800a43f8b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pingora" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "844a13b16e556293f4ea96dc5ac0923ac6f36855a9dfc13b640d0da183f6b5b7" +dependencies = [ + "pingora-cache", + "pingora-core", + "pingora-http", + "pingora-load-balancing", + "pingora-proxy", + "pingora-timeout", +] + +[[package]] +name = "pingora-cache" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59d8c4c939a3a193a3da0e061aa7acf7432431f92ee62a26f5a9e5167a0ade2" +dependencies = [ + "ahash", + "async-trait", + "blake2", + "bstr", + "bytes", + "cf-rustracing", + "cf-rustracing-jaeger", + "hex", + "http", + "httparse", + "httpdate", + "indexmap 1.9.3", + "log", + "lru", + "once_cell", + "parking_lot", + "pingora-core", + "pingora-error", + "pingora-header-serde", + "pingora-http", + "pingora-lru", + "pingora-timeout", + "rand 0.8.6", + "regex", + "rmp", + "rmp-serde", + "serde", + "strum", + "tokio", +] + +[[package]] +name = "pingora-core" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08973c4853cef4c682f7a592907e81a32dcad69476c4846e5de079f16448b177" +dependencies = [ + "ahash", + "async-trait", + "brotli", + "bstr", + "bytes", + "chrono", + "clap", + "daemonize", + "daggy", + "derivative", + "flate2", + "futures", + "h2", + "http", + "httparse", + "httpdate", + "libc", + "log", + "nix", + "once_cell", + "openssl-probe 0.1.6", + "ouroboros", + "parking_lot", + "percent-encoding", + "pingora-error", + "pingora-http", + "pingora-pool", + "pingora-runtime", + "pingora-rustls", + "pingora-timeout", + "prometheus", + "rand 0.8.6", + "regex", + "serde", + "serde_yaml", + "sfv", + "socket2", + "strum", + "strum_macros", + "tokio", + "tokio-stream", + "tokio-test", + "unicase", + "windows-sys 0.59.0", + "x509-parser", + "zstd", +] + +[[package]] +name = "pingora-error" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9fa97a500e7e5c27a7b8609b9294c8922c9656322285268bfad9520f12feb38" + +[[package]] +name = "pingora-header-serde" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2705feb8b50d4e734e0c7d3879aa040e655a45656276323ff530e254585dd816" +dependencies = [ + "bytes", + "http", + "httparse", + "pingora-error", + "pingora-http", + "thread_local", + "zstd", + "zstd-safe", +] + +[[package]] +name = "pingora-http" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbb52d4651b687fab6abf669539cfd97b7cd94b301fde8f57c63354f9c9cc5e2" +dependencies = [ + "bytes", + "http", + "pingora-error", +] + +[[package]] +name = "pingora-ketama" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0286fb5a0359dca1e2e137dfe14ca4d94f676635a5eae4616bb3d8d4ce06d120" +dependencies = [ + "crc32fast", +] + +[[package]] +name = "pingora-limits" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7568624fc0e2f11fa32d27053ac862048b40bad98140b07a11d82f1b4989700" +dependencies = [ + "ahash", +] + +[[package]] +name = "pingora-load-balancing" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2606e9e22e72927a69772cefe56b0d41d251c3ffdfcd548a6020fe157fb79ad" +dependencies = [ + "arc-swap", + "async-trait", + "derivative", + "fnv", + "futures", + "http", + "log", + "pingora-core", + "pingora-error", + "pingora-http", + "pingora-ketama", + "pingora-runtime", + "rand 0.8.6", + "tokio", +] + +[[package]] +name = "pingora-lru" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91bb5030596a3d442c0866ac68afe29c14ba558e77c726dcdf7016b0dbb359d9" +dependencies = [ + "arrayvec", + "hashbrown 0.17.1", + "parking_lot", + "rand 0.8.6", +] + +[[package]] +name = "pingora-pool" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67f034be36772f318370d058913db43dbd22c3763ad974c995ba2e4afb2bb52a" +dependencies = [ + "crossbeam-queue", + "log", + "lru", + "parking_lot", + "pingora-timeout", + "thread_local", + "tokio", +] + +[[package]] +name = "pingora-proxy" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e1e070a98a70d0d05f2fdcfb706237e06a043b2fbc9261e8772a3459cc2175e" +dependencies = [ + "async-trait", + "bytes", + "clap", + "futures", + "h2", + "http", + "log", + "once_cell", + "pingora-cache", + "pingora-core", + "pingora-error", + "pingora-http", + "rand 0.8.6", + "regex", + "tokio", +] + +[[package]] +name = "pingora-runtime" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e371315b1c44c2e5a8788fdc61577527b785e121e6ff49144755f40d86511430" +dependencies = [ + "once_cell", + "rand 0.8.6", + "thread_local", + "tokio", +] + +[[package]] +name = "pingora-rustls" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "239b663618bb822ddeddaf6d8384177a8ab226cb22febc627a72c2fd55e7bb75" +dependencies = [ + "log", + "no_debug", + "pingora-error", + "ring", + "rustls", + "rustls-native-certs 0.7.3", + "rustls-pemfile", + "rustls-pki-types", + "tokio-rustls", +] + +[[package]] +name = "pingora-timeout" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a853fee5ce510a7f5db2561f99c752724112ed13fc3820e70d462d278d704ea" +dependencies = [ + "once_cell", + "parking_lot", + "pin-project-lite", + "thread_local", + "tokio", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "potential_utf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" +dependencies = [ + "zerovec", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.117", +] + +[[package]] +name = "proc-macro-error-attr2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "proc-macro-error2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" +dependencies = [ + "proc-macro-error-attr2", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "version_check", + "yansi", +] + +[[package]] +name = "prometheus" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "memchr", + "parking_lot", + "protobuf", + "thiserror 1.0.69", +] + +[[package]] +name = "protobuf" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" + +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls", + "socket2", + "thiserror 2.0.18", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +dependencies = [ + "aws-lc-rs", + "bytes", + "getrandom 0.3.4", + "lru-slab", + "rand 0.9.4", + "ring", + "rustc-hash", + "rustls", + "rustls-pki-types", + "slab", + "thiserror 2.0.18", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.59.0", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rand" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" +dependencies = [ + "chacha20", + "getrandom 0.4.2", + "rand_core 0.10.1", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_core" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" + +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "rcgen" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75e669e5202259b5314d1ea5397316ad400819437857b90861765f24c4cf80a2" +dependencies = [ + "pem", + "ring", + "rustls-pki-types", + "time", + "yasna", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags 2.11.1", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-lite" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "reqwest" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219c5811de6525e5416c7d5d53bb656d3afdbc6c5af816e0802bcfa42dbdc1c3" +dependencies = [ + "base64", + "bytes", + "futures-core", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-pki-types", + "rustls-platform-verifier", + "serde", + "serde_json", + "sync_wrapper", + "tokio", + "tokio-rustls", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rmp" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ba8be72d372b2c9b35542551678538b562e7cf86c3315773cae48dfbfe7790c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "rmp-serde" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f81bee8c8ef9b577d1681a70ebbc962c232461e397b22c208c43c04b67a155" +dependencies = [ + "rmp", + "serde", +] + +[[package]] +name = "rust_decimal" +version = "1.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c5108e3d4d903e21aac27f12ba5377b6b34f9f44b325e4894c7924169d06995" +dependencies = [ + "arrayvec", + "num-traits", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" + +[[package]] +name = "rustc-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rusticata-macros" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632" +dependencies = [ + "nom", +] + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags 2.11.1", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.23.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" +dependencies = [ + "aws-lc-rs", + "log", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki 0.103.13", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-native-certs" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" +dependencies = [ + "openssl-probe 0.1.6", + "rustls-pemfile", + "rustls-pki-types", + "schannel", + "security-framework 2.11.1", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe 0.2.1", + "rustls-pki-types", + "schannel", + "security-framework 3.7.0", +] + +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" +dependencies = [ + "web-time", + "zeroize", +] + +[[package]] +name = "rustls-platform-verifier" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d1e2536ce4f35f4846aa13bff16bd0ff40157cdb14cc056c7b14ba41233ba0" +dependencies = [ + "core-foundation 0.10.1", + "core-foundation-sys", + "jni", + "log", + "once_cell", + "rustls", + "rustls-native-certs 0.8.3", + "rustls-platform-verifier-android", + "rustls-webpki 0.103.13", + "security-framework 3.7.0", + "security-framework-sys", + "webpki-root-certs", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls-platform-verifier-android" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" + +[[package]] +name = "rustls-webpki" +version = "0.102.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +dependencies = [ + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" +dependencies = [ + "aws-lc-rs", + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "schannel" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags 2.11.1", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags 2.11.1", + "core-foundation 0.10.1", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "serde_json" +version = "1.0.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_nanos" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a93142f0367a4cc53ae0fead1bcda39e85beccfad3dcd717656cacab94b12985" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap 2.14.0", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "sfv" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fa1f336066b758b7c9df34ed049c0e693a426afe2b27ff7d5b14f410ab1a132" +dependencies = [ + "base64", + "indexmap 2.14.0", + "rust_decimal", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + +[[package]] +name = "signatory" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1e303f8205714074f6068773f0e29527e0453937fe837c9717d066635b65f31" +dependencies = [ + "pkcs8", + "rand_core 0.6.4", + "signature", + "zeroize", +] + +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + +[[package]] +name = "simd_cesu8" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f90157bb87cddf702797c5dadfa0be7d266cdf49e22da2fcaa32eff75b2c33" +dependencies = [ + "rustc_version", + "simdutf8", +] + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.117", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "terminal_size" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "230a1b821ccbd75b185820a1f1ff7b14d21da1e442e22c0863ea5f08771a8874" +dependencies = [ + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "thrift_codec" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83d957f535b242b91aa9f47bde08080f9a6fef276477e55b0079979d002759d5" +dependencies = [ + "byteorder", + "trackable", +] + +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tinystr" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "tinyvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" +dependencies = [ + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-macros" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-test" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6d24790a10a7af737693a3e8f1d03faef7e6ca0cc99aae5066f533766de545" +dependencies = [ + "futures-core", + "tokio", + "tokio-stream", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-websockets" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f591660438b3038dd04d16c938271c79e7e06260ad2ea2885a4861bfb238605d" +dependencies = [ + "base64", + "bytes", + "futures-core", + "futures-sink", + "http", + "httparse", + "rand 0.8.6", + "ring", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tokio-util", + "webpki-roots 0.26.11", +] + +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap 2.14.0", + "serde", + "serde_spanned", + "toml_datetime", + "toml_write", + "winnow", +] + +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-http" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840" +dependencies = [ + "bitflags 2.11.1", + "bytes", + "futures-util", + "http", + "http-body", + "pin-project-lite", + "tower", + "tower-layer", + "tower-service", + "url", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "serde", + "serde_json", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", + "tracing-serde", +] + +[[package]] +name = "trackable" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15bd114abb99ef8cee977e517c8f37aee63f184f2d08e3e6ceca092373369ae" +dependencies = [ + "trackable_derive", +] + +[[package]] +name = "trackable_derive" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebeb235c5847e2f82cfe0f07eb971d1e5f6804b18dac2ae16349cc604380f82f" +dependencies = [ + "quote", + "syn 1.0.109", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "tryhard" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fe58ebd5edd976e0fe0f8a14d2a04b7c81ef153ea9a54eebc42e67c2c23b4e5" +dependencies = [ + "pin-project-lite", + "tokio", +] + +[[package]] +name = "typenum" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" + +[[package]] +name = "uncased" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697" +dependencies = [ + "version_check", +] + +[[package]] +name = "unicase" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9473dbd2991ae90b6291c3c32c30c6187ac49aa32f9905d1cce280ec1e110b0f" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.117", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap 2.14.0", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.11.1", + "hashbrown 0.15.5", + "indexmap 2.14.0", + "semver", +] + +[[package]] +name = "web-sys" +version = "0.3.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621441cfc37b84979402712047321980c178f299193a3589d05b99e8763436" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-root-certs" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31141ce3fc3e300ae89b78c0dd67f9708061d1d2eda54b8209346fd6be9a92c" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.7", +] + +[[package]] +name = "webpki-roots" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck 0.5.0", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck 0.5.0", + "indexmap 2.14.0", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.11.1", + "indexmap 2.14.0", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap 2.14.0", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "writeable" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" + +[[package]] +name = "x509-parser" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcbc162f30700d6f3f82a24bf7cc62ffe7caea42c0b2cba8bf7f3ae50cf51f69" +dependencies = [ + "asn1-rs", + "data-encoding", + "der-parser", + "lazy_static", + "nom", + "oid-registry", + "rusticata-macros", + "thiserror 1.0.69", + "time", +] + +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "yasna" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd" +dependencies = [ + "time", +] + +[[package]] +name = "yoke" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zerofrom" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..c73b00c --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,97 @@ +[package] +name = "beyond-ai" +version = "0.1.0" +edition = "2024" +license = "MIT" +rust-version = "1.85" +description = "Beyond AI gateway — egress L7 proxy to LLM providers" + +[lib] +name = "beyond_ai" +path = "src/lib.rs" + +[[bin]] +name = "beyond-ai" +path = "src/main.rs" + +# `[lints]` binds every target in the crate — lib, bin, tests, benches. That matters: a crate-level +# `#![deny(...)]` attribute only covers the unit it's written in, so the binary root (`main.rs`) +# would otherwise escape the library's denies. Declaring them here closes that gap structurally. +[lints.rust] +unsafe_code = "forbid" +unused_must_use = "deny" + +# Panic surface: a stray `.unwrap()`/`.expect()`/`panic!`/`todo!` in a request path is a worker +# crash, not an error response. Deny them so a new one is a hard CI failure (mise `check:rs` runs +# clippy with `-D warnings`). These are clippy *restriction* lints (allow-by-default); naming them +# here turns them on. The handful of genuine boot-time invariants carry a local +# `#[allow(clippy::expect_used)]` with a SAFETY-style note; test/bench targets allow them wholesale +# at the file head (asserting a precondition with `.unwrap()` is the point of a test). +[lints.clippy] +unwrap_used = "deny" +expect_used = "deny" +panic = "deny" +todo = "deny" +unimplemented = "deny" + +# Release builds wrap arithmetic silently by default; turn that into a panic so an overflow on a +# size/count never goes unnoticed. Negligible cost for a proxy (arithmetic isn't the bottleneck). +[profile.release] +overflow-checks = true + +[dependencies] +# slipstream is published — consume it from crates.io, aliased to `store` so the code's +# `use store::...` is unchanged. No path deps into the `beyond` repo: this crate builds standalone. +store = { package = "beyond-slipstream", version = "0.1.0" } + +pingora = { version = "0.8", default-features = false, features = ["rustls"] } +pingora-core = "0.8" +pingora-limits = "0.8" +pingora-proxy = "0.8" + +arc-swap = "1" +arrayvec = "0.7" +async-trait = "0.1" +base64 = "0.22" +bytes = "1" +clap = { version = "4", features = ["derive", "env"] } +ed25519-dalek = "2.2" +figment = { version = "0.10", features = ["toml", "env"] } +getrandom = "0.3" +# The types Pingora's `ServeHttp` trait speaks (`Response>`); pin to the same 1.x already in +# the tree via Pingora so the admin app can name them directly. +http = "1" +memchr = "2" +prometheus = "0.13" +rustls = { version = "0.23", default-features = false, features = ["ring"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +thiserror = "2" +tokio = { version = "1", features = ["full"] } +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } +zeroize = "1" + +[dev-dependencies] +# Bench harnesses. Best tool per job: `divan` for the unit micro-bench (it measures allocations +# natively via AllocProfiler, alongside timing); `criterion` for the e2e macro-bench (`async_tokio` +# drives the round-trips, and its saved-baseline comparison tracks latency/RPS over time). +criterion = { version = "0.5", features = ["async_tokio"] } +divan = "0.1" +http-body-util = "0.1" +# `http2` + hyper-util's `server-auto` let the mock upstream serve H1 *and* H2 on one TLS listener +# (protocol chosen by ALPN), so the concurrency bench can drive the gateway's H2 path. `rcgen` mints a +# throwaway self-signed cert for that listener; `tokio-rustls` terminates TLS in front of hyper. +hyper = { version = "1", features = ["server", "http1", "http2"] } +hyper-util = { version = "0.1", features = ["tokio", "server-auto"] } +rcgen = "0.13" +reqwest = { version = "0.13", default-features = false, features = ["json", "rustls"] } +tokio-rustls = "0.26" + +[[bench]] +name = "unit" +harness = false + +[[bench]] +name = "e2e" +harness = false diff --git a/README.md b/README.md new file mode 100644 index 0000000..e852c19 --- /dev/null +++ b/README.md @@ -0,0 +1,84 @@ +# beyond/ai + +Route LLM traffic through one internal proxy. Apps use their stock OpenAI or Anthropic SDK unchanged — the gateway authenticates, swaps in the real provider key, and meters every token. + +## Quick Start + +```sh +cp config.example.toml config.toml +# Set at minimum: signing_keys and one pool key +AI_POOL_KEY_OPENAI=sk-... cargo run --release +``` + +Point any OpenAI-wire SDK at `http://ai.internal` with a virtual key: + +```python +from openai import OpenAI +client = OpenAI(base_url="http://ai.internal/v1", api_key="bai_v1.1..") +``` + +Or pass your own provider key directly (BYO — forwarded unchanged, no swap): + +```python +client = OpenAI(base_url="http://ai.internal/v1", api_key="sk-your-openai-key") +``` + +## What It Does + +- **Managed keys** (`bai_v1…`) — Ed25519-verified, stateless. Swaps to the pool key. Attributes usage to tenant + VPC. Deny-set checked (spend/fraud). +- **BYO keys** — any other token passes through to the provider untouched. No key-swap, no deny-set, no attribution, no `ai.usage` billing event (aggregate throughput metrics still count it). +- **10 providers, zero config** — openai, anthropic, openrouter, fireworks, groq, deepseek, together, cerebras, mistral, xai. Add more in `config.toml` under `[provider_authorities]`. +- **Never buffers** — request and response stream through; a SIMD scanner extracts `model` in O(1) memory. 64KB tail taps usage without holding the body. +- **Token facts, not pricing** — emits `ai.usage` token-count events as structured logs (stdout → logfwd/OTLP → ClickHouse). A closed downstream consumer prices; slipstream carries only the deny-set. +- **Rate guardrail** — per-key request ceiling (`rate_limit_rps`). Circuit breaker against runaway keys. Deny-set owns spend control. +- **Fail-open NATS** — auth works without NATS. A NATS outage stales the deny-set; existing allows stay allowed. + +## Providers + +The provider is the **first path segment** of the base URL — no header, nothing tool-specific. Bare +`/v1` defaults to OpenAI (and `/v1/messages` to Anthropic), so the two big providers are a host-only +swap; everything else is `/{provider}/…` using that provider's own path (forwarded verbatim). + +```python +# OpenAI (default) — change only the host +client = OpenAI(base_url="http://ai.internal/v1", api_key="bai_v1...") + +# Groq — its native base path is /openai/v1, so the gateway path is /groq/openai/v1 +client = OpenAI(base_url="http://ai.internal/groq/openai/v1", api_key="bai_v1...") + +# Fireworks mounts at /inference/v1 → /fireworks/inference/v1; OpenRouter at /api/v1 → /openrouter/api/v1 +``` + +An unknown first segment is a 404. See `route::KNOWN_PROVIDERS` for each provider's native base path. + +## Config + +All config keys are overridable by `AI_`-prefixed env vars (`AI_NATS_URL`, `AI_POOL_KEY_OPENAI`, …). See `config.example.toml` for the full reference. + +Required to serve managed traffic: + +| Key | Source | Purpose | +| -------------------- | ------------- | ------------------------------------------------------- | +| `signing_keys` | `config.toml` | Ed25519 public keys by `kid` — verifies `bai_v1` tokens | +| `AI_POOL_KEY_` | env (SSM) | Provider key swapped in for managed requests | + +Optional: + +| Key | Default | Purpose | +| ------------------------ | --------- | ------------------------------------------------------------------------ | +| `snapshot_path` | unset | On-disk deny-set snapshot — set on durable nodes, leave unset on Fargate | +| `rate_limit_rps` | `100` | Per-key request ceiling; `0` disables | +| `[provider_authorities]` | built-ins | Override or add upstream hosts | + +## Running Tests + +```sh +mise run test:unit:rs # pure-logic unit tests (no network) +mise run test:integration:rs # gateway + mock upstream + NATS +mise run test:smoke # live providers — needs API keys in env, bills real (tiny) requests +mise run bench # unit micro-benchmarks + end-to-end throughput +``` + +## Architecture + +[ARCHITECTURE.md](ARCHITECTURE.md) — request flow, module map, key invariants. diff --git a/benches/e2e.rs b/benches/e2e.rs new file mode 100644 index 0000000..4e39b43 --- /dev/null +++ b/benches/e2e.rs @@ -0,0 +1,360 @@ +// Bench target: `.unwrap()`/`.expect()` set up the harness; not production code. See tests/e2e.rs. +#![allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] + +//! A-1 end-to-end bench: the real `beyond-ai` binary + real `nats-server` + a mock upstream, +//! driven over real HTTP. Run with `mise run bench:e2e` (needs `nats-server` on PATH — mise +//! provides it). This is the macro counterpart to `unit.rs`: it measures the *whole* request path +//! (TCP accept → Pingora filters → key verify → key swap → body stream → upstream → usage tap), +//! not a single function. +//! +//! Reuses the e2e test harness (`tests/common`) verbatim so the bench and the integration tests +//! exercise the same stack. Allocations are deliberately *not* measured here — the gateway is a +//! separate process, so its heap is invisible to this binary; allocation regressions belong to the +//! in-process `unit` bench. +//! +//! The stack starts **once** and stays warm for the whole run; each iteration is one (or, for the +//! throughput group, N concurrent) HTTP round-trip(s) against that live gateway. + +#[path = "../tests/common/mod.rs"] +mod common; + +use std::time::Duration; + +use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; +use tokio::runtime::Runtime; +use tokio::task::JoinSet; + +use beyond_ai::key::{VirtualKey, mint}; +use common::*; + +const MANAGED_BODY: &str = r#"{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}"#; + +/// A plausible BYO provider token (anything not starting with `bai_` is BYO — passed through +/// unchanged, no verify/deny/swap). The mock upstream accepts any token. +const BYO_KEY: &str = "sk-byo-provider-token-1234567890"; + +/// Concurrency level for the throughput group — enough in-flight requests to expose per-request +/// overhead and connection-pool behavior without saturating a laptop. +const CONCURRENCY: u64 = 32; + +/// A live, warmed-up stack. Field order matters only for drop (children are killed on drop); we +/// hold every piece so nothing is torn down mid-bench. +struct Stack { + // RAII guards: held only so their `Drop` (kill subprocess / abort task / clean tempdir) fires + // when the bench ends. Never read directly — the requests go through `url`/`client`. + #[allow(dead_code)] + gw: Gateway, + #[allow(dead_code)] + mock: MockUpstream, + #[allow(dead_code)] + nats: Nats, + client: reqwest::Client, + vkey: String, + url: String, +} + +async fn start_stack() -> Stack { + start_stack_with(Mode::Json).await +} + +async fn start_stack_with(mode: Mode) -> Stack { + let nats = Nats::start().await; + let (pubkey, sk) = test_keypair(1); + let mock = MockUpstream::start(mode).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + let vkey = mint( + &VirtualKey { + tenant_id: 42, + vpc_id: 7, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + let url = gw.url(); + + // Warm until the gateway answers 200 — the watcher connects to NATS and the DNS cache fills on + // the first call, neither of which we want inside the timed loop. + { + let (c, u, k) = (client.clone(), url.clone(), vkey.clone()); + wait_for_status(200, move || { + let (c, u, k) = (c.clone(), u.clone(), k.clone()); + async move { + c.post(format!("{u}/v1/chat/completions")) + .header("authorization", format!("Bearer {k}")) + .header("content-type", "application/json") + .body(MANAGED_BODY) + .send() + .await + .map(|r| r.status().as_u16()) + .unwrap_or(0) + } + }) + .await; + } + + Stack { + gw, + mock, + nats, + client, + vkey, + url, + } +} + +/// One full managed round-trip: key swap + body relay + non-streaming usage tap. Drains the +/// response body so the connection is returned to the pool (otherwise reqwest would open a new +/// socket every iteration and we'd be benching `connect`, not the gateway). +async fn managed_roundtrip(s: &Stack) { + let resp = s + .client + .post(format!("{}/v1/chat/completions", s.url)) + .header("authorization", format!("Bearer {}", s.vkey)) + .header("content-type", "application/json") + .body(MANAGED_BODY) + .send() + .await + .expect("request"); + debug_assert_eq!(resp.status().as_u16(), 200); + let _ = resp.bytes().await.expect("body"); +} + +/// One **BYO** round-trip: a non-`bai_` token, passed straight through — no key verify, no deny-set +/// check, no key swap. Isolates the passthrough path's overhead from the managed path's auth work. +async fn byo_roundtrip(s: &Stack) { + let resp = s + .client + .post(format!("{}/v1/chat/completions", s.url)) + .header("authorization", format!("Bearer {BYO_KEY}")) + .header("content-type", "application/json") + .body(MANAGED_BODY) + .send() + .await + .expect("request"); + debug_assert_eq!(resp.status().as_u16(), 200); + let _ = resp.bytes().await.expect("body"); +} + +/// One **rejected** request: no API key ⇒ 401, short-circuited in `request_filter` **before** any +/// upstream connection. Benched to prove a flood of rejects costs far less than a proxied request — +/// the rate-guardrail/flood rationale (a reject must not consume the upstream-connection +/// constraint). The gap between this and `managed_json_latency` is the gateway's reject headroom. +async fn reject_roundtrip(s: &Stack) { + let resp = s + .client + .post(format!("{}/v1/chat/completions", s.url)) + .header("content-type", "application/json") + .body(MANAGED_BODY) + .send() + .await + .expect("request"); + debug_assert_eq!(resp.status().as_u16(), 401); + let _ = resp.bytes().await.expect("body"); +} + +fn bench_e2e(c: &mut Criterion) { + let rt = Runtime::new().expect("tokio runtime"); + let stack = rt.block_on(start_stack()); + // A second stack whose mock streams SSE, so the response-tap (tail buffer + compaction) hot path + // is actually exercised — it's a near no-op for the single-shot JSON body. + let sse_stack = rt.block_on(start_stack_with(Mode::Sse)); + + let mut group = c.benchmark_group("e2e"); + // Real round-trips are sub-millisecond on loopback but still ~100× a micro-bench; trim the + // sample count so the suite stays in the seconds, not minutes. + group.sample_size(50); + group.measurement_time(Duration::from_secs(10)); + + // Single-request latency through the full proxy: managed (verify + deny + key swap), BYO + // (pure passthrough), SSE relay (exercises the streaming response tap), and the reject + // fast-path (401, no upstream). Compared against each other these isolate where time goes. + group.bench_function("managed_json_latency", |b| { + b.to_async(&rt).iter(|| managed_roundtrip(&stack)); + }); + group.bench_function("byo_json_latency", |b| { + b.to_async(&rt).iter(|| byo_roundtrip(&stack)); + }); + group.bench_function("managed_sse_latency", |b| { + b.to_async(&rt).iter(|| managed_roundtrip(&sse_stack)); + }); + group.bench_function("reject_missing_key_latency", |b| { + b.to_async(&rt).iter(|| reject_roundtrip(&stack)); + }); + + // Throughput: CONCURRENCY requests in flight per iteration. `Throughput::Elements` makes + // criterion report requests/sec. + group.throughput(Throughput::Elements(CONCURRENCY)); + group.bench_function("managed_json_throughput", |b| { + b.to_async(&rt).iter(|| async { + let mut set = JoinSet::new(); + for _ in 0..CONCURRENCY { + let client = stack.client.clone(); + let url = stack.url.clone(); + let vkey = stack.vkey.clone(); + set.spawn(async move { + let resp = client + .post(format!("{url}/v1/chat/completions")) + .header("authorization", format!("Bearer {vkey}")) + .header("content-type", "application/json") + .body(MANAGED_BODY) + .send() + .await + .expect("request"); + let _ = resp.bytes().await.expect("body"); + }); + } + while let Some(r) = set.join_next().await { + r.expect("task"); + } + }); + }); + + group.finish(); + + // Keep the stacks alive until every bench has run, then tear them down explicitly. + drop(stack); + drop(sse_stack); +} + +/// Concurrency levels swept by `bench_concurrency`. Spans below and above hyper's default +/// `SETTINGS_MAX_CONCURRENT_STREAMS` (200) so an H2 stream-concurrency cliff (if any) shows up against +/// H1's connection pool. +const SWEEP: &[u64] = &[1, 8, 32, 128, 512]; + +/// Fire `conc` managed requests at `url` concurrently and drain each body (returns the connection to +/// the pool). This is one bench iteration; `Throughput::Elements(conc)` makes criterion report req/s. +async fn drive(client: &reqwest::Client, url: &str, vkey: &str, conc: u64) { + let mut set = JoinSet::new(); + for _ in 0..conc { + let (c, u, k) = (client.clone(), url.to_string(), vkey.to_string()); + set.spawn(async move { + let resp = c + .post(format!("{u}/v1/chat/completions")) + .header("authorization", format!("Bearer {k}")) + .header("content-type", "application/json") + .body(MANAGED_BODY) + .send() + .await + .expect("request"); + let _ = resp.bytes().await.expect("body"); + }); + } + while let Some(r) = set.join_next().await { + r.expect("task"); + } +} + +/// Warm a gateway until it answers 200, then return the protocol it used to reach the upstream — read +/// from the `x-mock-proto` header the TLS mock stamps and the gateway relays. This is the proof the +/// "h2"/"h1" bench labels reflect what actually negotiated, not just what we configured. +async fn warm_and_proto(client: &reqwest::Client, url: &str, vkey: &str) -> String { + { + let (c, u, k) = (client.clone(), url.to_string(), vkey.to_string()); + wait_for_status(200, move || { + let (c, u, k) = (c.clone(), u.clone(), k.clone()); + async move { + c.post(format!("{u}/v1/chat/completions")) + .header("authorization", format!("Bearer {k}")) + .header("content-type", "application/json") + .body(MANAGED_BODY) + .send() + .await + .map(|r| r.status().as_u16()) + .unwrap_or(0) + } + }) + .await; + } + let resp = client + .post(format!("{url}/v1/chat/completions")) + .header("authorization", format!("Bearer {vkey}")) + .header("content-type", "application/json") + .body(MANAGED_BODY) + .send() + .await + .expect("warm request"); + resp.headers() + .get("x-mock-proto") + .and_then(|v| v.to_str().ok()) + .unwrap_or("unknown") + .to_string() +} + +/// H2-vs-H1 to the upstream, under concurrency. One TLS+H2 mock; two gateways against it — one with +/// `upstream_http2 = true` (ALPN H2H1 → h2), one `false` (ALPN H1). Same client→gateway transport +/// (plain H1) for both, so the only variable is the gateway→upstream protocol. The sweep exposes +/// whether H2 multiplexing wins or hits its stream-concurrency cap vs H1's connection pool. +fn bench_concurrency(c: &mut Criterion) { + let rt = Runtime::new().expect("tokio runtime"); + let nats = rt.block_on(Nats::start()); + let mock = rt.block_on(MockUpstream::start_tls(Mode::Json)); + let (pubkey, sk) = test_keypair(1); + let vkey = mint( + &VirtualKey { + tenant_id: 42, + vpc_id: 7, + }, + 1, + &sk, + ); + + // Two gateways at the same self-signed TLS mock; ALPN is the only difference. Rate limits OFF + // (both tiers): the sweep drives one credential well past the 100 rps default, and a rate-limited + // 429 short-circuits *before* the upstream — it would measure the reject path, not H2-vs-H1. + let gw_h2 = rt.block_on( + Gateway::builder(nats.port, &mock.authority(), &b64(&pubkey)) + .tls_upstream() + .upstream_http2(true) + .rate_limit_rps(0) + .byo_rate_limit_rps(0) + .start(), + ); + let gw_h1 = rt.block_on( + Gateway::builder(nats.port, &mock.authority(), &b64(&pubkey)) + .tls_upstream() + .upstream_http2(false) + .rate_limit_rps(0) + .byo_rate_limit_rps(0) + .start(), + ); + let client = reqwest::Client::new(); + let (url_h2, url_h1) = (gw_h2.url(), gw_h1.url()); + + // Prove the gateways actually negotiated what we asked for before trusting the labels. + let proto_h2 = rt.block_on(warm_and_proto(&client, &url_h2, &vkey)); + let proto_h1 = rt.block_on(warm_and_proto(&client, &url_h1, &vkey)); + assert_eq!( + proto_h2, "h2", + "upstream_http2=true should negotiate h2 to the mock" + ); + assert_eq!( + proto_h1, "http/1.1", + "upstream_http2=false should stay http/1.1 to the mock" + ); + eprintln!("e2e_concurrency: confirmed gw_h2→upstream=h2, gw_h1→upstream=http/1.1"); + + let mut group = c.benchmark_group("e2e_concurrency"); + group.sample_size(10); + group.measurement_time(Duration::from_secs(6)); + for &conc in SWEEP { + group.throughput(Throughput::Elements(conc)); + group.bench_with_input(BenchmarkId::new("h2", conc), &conc, |b, &conc| { + b.to_async(&rt) + .iter(|| drive(&client, &url_h2, &vkey, conc)); + }); + group.bench_with_input(BenchmarkId::new("h1", conc), &conc, |b, &conc| { + b.to_async(&rt) + .iter(|| drive(&client, &url_h1, &vkey, conc)); + }); + } + group.finish(); + + drop(gw_h2); + drop(gw_h1); + drop(mock); + drop(nats); +} + +criterion_group!(benches, bench_e2e, bench_concurrency); +criterion_main!(benches); diff --git a/benches/unit.rs b/benches/unit.rs new file mode 100644 index 0000000..e35a195 --- /dev/null +++ b/benches/unit.rs @@ -0,0 +1,232 @@ +// Bench target: `.unwrap()`/`.expect()` set up fixtures; not production code. See tests/e2e.rs. +#![allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] + +//! Unit bench: the pure, IO-free hot paths. Timing **and** allocations come from `divan` — its +//! `AllocProfiler` (installed as the global allocator below) reports alloc count + bytes per +//! sample right beside ns/iter, so the design's allocation claims are visible in one table. +//! Run with `mise run bench:unit` (or `cargo bench --bench unit`). +//! +//! The headline invariant to watch: managed-key **verify** is 0 allocs — it decodes onto the +//! stack (see `key.rs`). `peek` should hold a flat, tiny alloc count independent of body size +//! (the O(1)-memory claim). A regression shows up as a non-zero / grown number in the alloc +//! columns the moment this runs. +//! +//! Fixtures are built *outside* the closure handed to `Bencher::bench` (or in `args`), so only the +//! measured call is timed and counted — setup allocations don't pollute the numbers. + +use std::hint::black_box; + +use divan::Bencher; +use divan::counter::BytesCount; + +#[global_allocator] +static ALLOC: divan::AllocProfiler = divan::AllocProfiler::system(); + +fn main() { + divan::main(); +} + +mod key { + use super::*; + use beyond_ai::key::{Keyring, VirtualKey, mint}; + use ed25519_dalek::SigningKey; + + const ID: VirtualKey = VirtualKey { + tenant_id: 42, + vpc_id: 7, + }; + + /// Stateless verify — must not touch the heap (stack-only base64 decode + signature check). + #[divan::bench] + fn verify(bencher: Bencher) { + let sk = SigningKey::from_bytes(&[1u8; 32]); + let mut ring = Keyring::new(); + ring.insert(1, sk.verifying_key()); + let token = mint(&ID, 1, &sk); + bencher.bench(|| ring.verify(black_box(&token))); + } + + /// Reference mint path (allocates the output string + base64 segments) — tracked so the Go + /// control-plane parity implementation has a baseline. + #[divan::bench] + fn mint_key(bencher: Bencher) { + let sk = SigningKey::from_bytes(&[1u8; 32]); + bencher.bench(|| mint(black_box(&ID), 1, &sk)); + } +} + +mod route { + use super::*; + use beyond_ai::route::{Dialect, dialect_default}; + + // Dialect → default provider name: the per-request routing decision (sans override). 0-alloc. + #[divan::bench(args = [Dialect::OpenAI, Dialect::Anthropic])] + fn dialect_default_name(bencher: Bencher, dialect: Dialect) { + bencher.bench(|| dialect_default(black_box(dialect))); + } +} + +mod deny { + use super::*; + use beyond_ai::deny::{self, DenyReason, DenySet}; + + // --- ingest path: parse a watched NATS key/value into the set (off the request hot path) --- + + #[divan::bench] + fn parse_key() -> Option { + deny::parse_key(black_box("blackhole.123456789")) + } + + #[divan::bench] + fn parse_reason_bare() -> beyond_ai::deny::DenyReason { + deny::parse_reason(black_box(b"spend")) + } + + #[divan::bench] + fn parse_reason_json() -> beyond_ai::deny::DenyReason { + deny::parse_reason(black_box(br#"{"reason":"fraud","exp":123}"#)) + } + + // --- request hot path: the lookup run on EVERY managed request (`proxy::request_filter`) --- + + /// Build a deny-set holding `n` cut-off tenants (ids `0..n`). Built outside the timed closure. + fn populated(n: u64) -> DenySet { + (0..n).map(|t| (t, DenyReason::Spend)).collect() + } + + /// The common case: tenant **absent** from the set (default-allow). The headline invariant is + /// that this is O(1) and **0-alloc regardless of set size** — so the args span an empty set and + /// a large one (1M cut-off tenants); the ns/iter and the (absent) alloc columns must stay flat. + /// A regression to anything size-dependent shows up as the big-`n` row diverging from the small. + #[divan::bench(args = [0, 1_000_000])] + fn reason_miss(bencher: Bencher, n: u64) { + let set = populated(n); + // A tenant id past the populated range → guaranteed miss (the allow path). + bencher.bench(|| set.reason(black_box(n + 1))); + } + + /// The deny case: tenant present. Same O(1) hash lookup, returning the reason — proves the + /// enforce path costs the same as the allow path (no surprise on the rejection branch). + #[divan::bench(args = [1, 1_000_000])] + fn reason_hit(bencher: Bencher, n: u64) { + let set = populated(n); + bencher.bench(|| set.reason(black_box(n / 2))); + } +} + +mod ratelimit { + use super::*; + use beyond_ai::ratelimit::RateLimit; + + /// Guardrail charged on **every request before verify** (`proxy::request_filter`). Managed: a + /// seeded hash of the raw credential + the per-credential sketch `observe` (the BYO global tier is + /// skipped). Fixed memory regardless of key cardinality, so this must be flat and low-alloc. + #[divan::bench] + fn check_managed(bencher: Bencher) { + let rl = RateLimit::new(1_000_000, 1_000_000).expect("enabled"); + let cred = "bai_v1.1.AAAAAAAAAAAAAAAAAAAAAA.signature-base64url-payload-here"; + bencher.bench(|| rl.check(black_box(cred), black_box(true))); + } + + /// A longer BYO provider token — exercises both tiers (global BYO bucket + per-credential sketch) + /// against a realistic raw token length: the full per-request BYO cost. + #[divan::bench] + fn check_byo(bencher: Bencher) { + let rl = RateLimit::new(1_000_000, 1_000_000).expect("enabled"); + let token = "sk-some-byo-provider-token-of-realistic-length-abcdef0123456789"; + bencher.bench(|| rl.check(black_box(token), black_box(false))); + } +} + +mod usage { + use super::*; + use beyond_ai::usage::{self, Usage}; + + const OAI: &[u8] = br#"{"usage":{"prompt_tokens":12,"completion_tokens":34,"prompt_tokens_details":{"cached_tokens":4}}}"#; + const ANT: &[u8] = br#"{"usage":{"input_tokens":100,"output_tokens":50,"cache_read_input_tokens":10,"cache_creation_input_tokens":7}}"#; + const OAI_SSE: &[u8] = b"data: {\"choices\":[{\"delta\":{\"content\":\"hi\"}}]}\n\ndata: {\"choices\":[],\"usage\":{\"prompt_tokens\":5,\"completion_tokens\":9}}\n\ndata: [DONE]\n\n"; + const ANT_SSE: &[u8] = b"event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"usage\":{\"input_tokens\":20,\"output_tokens\":0}}}\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"usage\":{\"output_tokens\":15}}\n\n"; + + #[divan::bench] + fn openai_body() -> Option { + usage::openai_body(black_box(OAI)) + } + + #[divan::bench] + fn anthropic_body() -> Option { + usage::anthropic_body(black_box(ANT)) + } + + #[divan::bench] + fn openai_stream() -> Option { + usage::openai_stream(black_box(OAI_SSE)) + } + + #[divan::bench] + fn anthropic_stream() -> Option { + usage::anthropic_stream(black_box(ANT_SSE)) + } +} + +mod peek { + use super::*; + use beyond_ai::peek::ModelScanner; + + /// A realistic chat body with `padding` bytes of message content, the root `model` placed + /// **last** so the scanner must walk the whole body (worst case for the streaming scan). + fn body_with_model_last(padding: usize) -> Vec { + let content = "x".repeat(padding); + format!(r#"{{"messages":[{{"role":"user","content":"{content}"}}],"stream":true,"model":"claude-opus-4-8"}}"#) + .into_bytes() + } + + /// Sizes span a tiny request, a typical prompt, and a large one (e.g. a pasted document / + /// base64 image) that exercises the SIMD fast-skip over uninteresting string content. The + /// `BytesCount` makes divan report bytes/sec; the alloc columns should stay flat across sizes. + #[divan::bench(args = [0, 4 * 1024, 256 * 1024])] + fn scan_model_last(bencher: Bencher, padding: usize) { + let body = body_with_model_last(padding); + bencher.counter(BytesCount::of_slice(&body)).bench(|| { + let mut scanner = ModelScanner::new(); + scanner.feed(black_box(&body)); + scanner.take_model() + }); + } + + use beyond_ai::peek::plan_stream_usage_injection; + + /// A streaming body whose large `content` value precedes the root `stream` field — the worst + /// case for the injection planner: it must walk past `padding` bytes of uninteresting string + /// content (the SIMD fast-skip target) before it can decide. + fn streaming_body(padding: usize) -> Vec { + let content = "x".repeat(padding); + format!(r#"{{"messages":[{{"role":"user","content":"{content}"}}],"model":"gpt-4o","stream":true}}"#) + .into_bytes() + } + + /// The common case: a non-streaming body (no `stream` field). The planner must prove absence, + /// which today means a full structural walk — the case the `memmem` pre-filter short-circuits. + fn non_streaming_body(padding: usize) -> Vec { + let content = "x".repeat(padding); + format!(r#"{{"messages":[{{"role":"user","content":"{content}"}}],"model":"gpt-4o"}}"#) + .into_bytes() + } + + /// Plan injection on a **streaming** body (must walk past the big content value to find `stream`). + #[divan::bench(args = [0, 4 * 1024, 256 * 1024])] + fn plan_inject_streaming(bencher: Bencher, padding: usize) { + let body = streaming_body(padding); + bencher + .counter(BytesCount::of_slice(&body)) + .bench(|| plan_stream_usage_injection(black_box(&body))); + } + + /// Plan injection on a **non-streaming** body (no `stream` key — the majority case). + #[divan::bench(args = [0, 4 * 1024, 256 * 1024])] + fn plan_inject_non_streaming(bencher: Bencher, padding: usize) { + let body = non_streaming_body(padding); + bencher + .counter(BytesCount::of_slice(&body)) + .bench(|| plan_stream_usage_injection(black_box(&body))); + } +} diff --git a/config.example.toml b/config.example.toml new file mode 100644 index 0000000..ed7f7af --- /dev/null +++ b/config.example.toml @@ -0,0 +1,89 @@ +# Beyond AI gateway — example config. Every key is overridable by an `AI_`-prefixed env var +# (e.g. `AI_NATS_URL`, `AI_POOL_KEY_OPENAI`, `AI_READ_TIMEOUT_SECS`). Values below are defaults. + +listen = "0.0.0.0:8080" # client (app) traffic; internal-only, fronted as ai.internal +metrics_listen = "0.0.0.0:9090" # internal admin: /metrics (Prometheus), /livez, /readyz + +# NATS / slipstream — carries ONLY the deny-set (`blackhole.*`). Auth + keys come from this file, +# so the gateway authenticates + serves managed traffic even if NATS is down. +nats_url = "nats://localhost:4222" +# nats_creds = "" # ECS via SOPS +# nats_creds_file = "/path/to/nats.creds" +config_bucket = "ai-gateway" + +# Optional on-disk deny-set snapshot (slipstream append-log + resume cursor). Set this ONLY on +# durable storage (edge/tunnel nodes): a restart then seeds the deny-set from disk and resumes the +# NATS watch from the saved revision — enforcing immediately, even before NATS reconnects, and +# skipping the boot scan. Leave unset on ephemeral hosts (e.g. Fargate); the gateway seeds from a +# NATS scan each boot. It's a pure cache: deleting the file just forces a rescan. +# snapshot_path = "/var/lib/beyond-ai/denyset.snap" + +# Upstream timeouts. read/idle are generous: LLM streams can run for minutes. +connect_timeout_secs = 10 +read_timeout_secs = 600 +write_timeout_secs = 60 +idle_timeout_secs = 90 + +# Graceful shutdown. On SIGTERM, in-flight requests drain for up to grace_period_secs before the +# runtimes are torn down (then runtime_timeout_secs is the final teardown backstop). +# Default = read_timeout_secs so a deploy NEVER truncates an in-flight stream — the gateway is a +# transparent proxy and must not mangle a paid-for generation (a half-delivered SSE can't be cleanly +# retried). Pingora stops accepting new connections at SIGTERM, so this only waits out the longest +# existing stream, not new work; slower rollouts are the deliberate price. +# The orchestrator must grant the same window or it caps us (it SIGKILLs at its own stop timeout): +# set k8s terminationGracePeriodSeconds (or the EC2 agent's ECS_CONTAINER_STOP_TIMEOUT) to match. +# NOTE: ECS Fargate caps stopTimeout at 120s — there, streams past 120s are still cut (a Fargate limit). +shutdown_grace_period_secs = 600 +shutdown_runtime_timeout_secs = 10 + +# upstream_tls = true # set false only for a plaintext mock (tests) + +# Per-credential request-rate ceiling (requests/sec) — a blast-radius circuit breaker, not a spend +# control (the deny-set owns spend). Caps how fast one credential (managed virtual key ≈ a tenant+app, +# or a BYO token) can drive the gateway, bounding a leaked/runaway key during the deny-set's reaction +# lag and a failure flood that never bills. Generous by default so legitimate traffic never trips it; +# set 0 to disable. Tune from the `ai_rejections_total{reason="rate_limit"}` metric. +rate_limit_rps = 100 + +# Per-provider circuit breaker. Trips when `circuit_breaker_threshold` upstream FAILURES occur within +# `circuit_breaker_window_secs`; while open, requests to that provider fast-fail with 503 +# (ai_rejections_total{reason="circuit_open"}) instead of piling up against read_timeout_secs and +# exhausting connection/in-flight slots for every provider. After `circuit_breaker_reset_secs` a probe +# is allowed — success closes it, failure reopens it. A FAILURE is a 5xx response or a connect failure +# (the provider is broken); a 429 is NOT a failure (a healthy provider throttling our pool key — the +# rate limiter and the client's Retry-After own that). Applies to all traffic (managed + BYO). Set +# threshold 0 to disable. Defaults are generous so normal background 5xx noise never trips it. +circuit_breaker_threshold = 20 +circuit_breaker_window_secs = 10 +circuit_breaker_reset_secs = 30 + +# Aggregate request-rate ceiling (requests/sec) for ALL BYO traffic combined — one shared bucket. +# BYO is unverified and upstream-bound: a flood of *distinct* random BYO tokens slips past the +# per-credential ceiling and would open junk-auth connections to providers from our egress IPs, +# getting them rate-limited or banned. This bounds that aggregate regardless of token variation. +# Managed traffic is EXEMPT (verified before any upstream connect, can't be forged), so this bucket +# never sheds core tenant load. Generous by default; set 0 to disable. Tune from the +# `ai_rejections_total{reason="rate_limit_byo_global"}` metric. +byo_rate_limit_rps = 1000 + +# Optional per-provider upstream authority (host:port), BY PROVIDER NAME. For a known provider this +# overrides its built-in default; for an unknown name it ADDS a new OpenAI-wire provider, then +# reachable at `/{name}/…` (the provider is the first path segment of the request). Known providers +# (zero-config defaults): openai, anthropic, openrouter, fireworks, groq, deepseek, together, +# cerebras, mistral, xai. +# [provider_authorities] +# openai = "api.openai.com:443" +# my-self-hosted = "llm.internal:8443" + +# Managed Beyond pool keys, BY PROVIDER NAME. Inject via SSM-backed env in production +# (AI_POOL_KEY_OPENAI, AI_POOL_KEY_GROQ, …) rather than this file; env wins over any value here. +# A provider with no pool key can't serve managed traffic (→ 503); BYO is unaffected. +# [pool_keys] +# openai = "sk-..." +# anthropic = "sk-ant-..." +# fireworks = "fw-..." + +# Trusted Ed25519 signing PUBLIC keys (kid -> base64). Multiple for zero-downtime rotation. +# Managed virtual keys (bai_…) are verified against these; BYO raw tokens skip verification. +[signing_keys] +# 1 = "" diff --git a/mise.toml b/mise.toml new file mode 100644 index 0000000..be93405 --- /dev/null +++ b/mise.toml @@ -0,0 +1,52 @@ +[tools] +dprint = "latest" +rust = { version = "1.92", components = "rustfmt,clippy", targets = "aarch64-unknown-linux-gnu,x86_64-unknown-linux-gnu" } +yamlfmt = "latest" +cargo-binstall = "latest" +"cargo:cross" = "latest" +# nats-server for the e2e harness (real JetStream KV backing slipstream). +"ubi:nats-io/nats-server" = { version = "latest", exe = "nats-server" } + +[tasks."build:rs"] +run = "cargo build" + +[tasks."build:rs:release"] +run = "cargo build --release" + +[tasks."check:rs"] +run = "cargo clippy --all-targets -- -D warnings" + +[tasks."check:fmt"] +run = "dprint check" + +[tasks."format"] +run = "dprint fmt && cargo fmt" + +[tasks."test:unit:rs"] +description = "Unit tests for the pure-logic modules (key/route/peek/usage/deny/config/resolver)." +run = "cargo test --lib" + +# Integration tests (gateway driven against a mock upstream + NATS) — to be added; see ARCHITECTURE.md. +[tasks."test:integration:rs"] +description = "End-to-end gateway tests against a mock provider + docker-compose NATS (TODO)." +run = "cargo test --test '*'" + +[tasks."test:smoke"] +description = "Live smoke tests against REAL providers. Auto-loads .env if present; set the API keys you have (ANTHROPIC_API_KEY, OPENAI_API_KEY, GROQ_API_KEY, …) there or in the environment. Each test skips if its key is unset. Bills real (tiny, max_tokens-capped) requests." +run = """ +# Auto-load .env if it exists (export every assignment), so the keys reach the test process. +if [ -f .env ]; then set -a; . ./.env; set +a; fi +cargo test -p beyond-ai --test smoke -- --ignored --nocapture +""" + +[tasks."bench:unit"] +description = "divan micro-benchmarks of the IO-free hot paths (key/peek/usage/route/deny): timing + native allocation counts." +run = "cargo bench --bench unit" + +[tasks."bench:e2e"] +description = "A-1 end-to-end bench: real beyond-ai binary + nats-server + mock upstream over HTTP." +run = "cargo bench --bench e2e" + +[tasks."bench"] +description = "Run both bench harnesses (unit micro + e2e macro)." +depends = ["bench:unit", "bench:e2e"] diff --git a/src/admin.rs b/src/admin.rs new file mode 100644 index 0000000..8b52d3d --- /dev/null +++ b/src/admin.rs @@ -0,0 +1,96 @@ +//! Admin / observability HTTP surface served on the metrics listener: `/livez`, `/readyz`, +//! `/metrics`. +//! +//! Matches the Beyond service convention (cf. `auth`, `objects`): the body is `{"status", +//! "version"}` and there are two probes. **Both always return HTTP 200** once the process is +//! answering, because the gateway is **fail-open by design** — auth + key swap come from boot +//! config, and a NATS outage degrades only the (stale) deny-set, never the ability to serve. So +//! readiness must *not* gate on NATS: a cold boot with NATS down can still serve correctly, and a +//! non-200 would pull a healthy gateway out of the load balancer for no reason. +//! +//! `readyz` does, however, carry a distinct *body* signal that `livez` doesn't: when the deny-set +//! watcher is disconnected from NATS, `readyz` reports `"status":"degraded"` (still 200). This lets +//! an operator alert on "readyz has been degraded for >N minutes" — the spend/fraud enforcement is +//! stale — without ever risking an LB eviction. `livez` is pure liveness: 200/`"ok"` whenever the +//! process can answer. (The `ai_nats_connected` gauge is the same signal in Prometheus; the body +//! flag is for orchestrators that probe HTTP but don't scrape.) +//! +//! Implemented as a Pingora `ServeHttp` app so all three paths share the one (internal) metrics +//! port — Pingora's built-in prometheus service only serves `/metrics`, so we hand-route all three. + +use crate::metrics::Metrics; +use async_trait::async_trait; +use http::Response; +use pingora_core::apps::http_app::ServeHttp; +use pingora_core::protocols::http::ServerSession; +use prometheus::{Encoder, TextEncoder}; +use std::sync::Arc; + +/// Compile-time service version, surfaced in every health body (matches the sibling services). +const VERSION: &str = env!("CARGO_PKG_VERSION"); + +pub struct AdminApp { + /// Read-only handle to the metric gauges. Used by `/readyz` to reflect NATS connectivity in the + /// health body (never to gate the HTTP status — see module docs). + pub metrics: Arc, +} + +impl AdminApp { + /// Build a `{"status","version"}` JSON health response. `status` is `"ok"`/`"degraded"` so a + /// human or a probe can read intent without parsing the code. Header values are all static or + /// integer, so the builder can't fail — `expect` documents that invariant. + #[allow(clippy::expect_used)] // builder inputs are all static/integer; cannot fail + fn health(status: u16, health: &str) -> Response> { + let body = serde_json::json!({ "status": health, "version": VERSION }) + .to_string() + .into_bytes(); + Response::builder() + .status(status) + .header(http::header::CONTENT_TYPE, "application/json") + .header(http::header::CONTENT_LENGTH, body.len()) + .body(body) + .expect("static health response is always valid") + } + + /// Encode the default Prometheus registry as text (same output as Pingora's built-in service). + #[allow(clippy::expect_used)] // builder inputs are encoder-derived/integer; cannot fail + fn metrics() -> Response> { + let encoder = TextEncoder::new(); + // Pre-size for a typical scrape: the gateway's fixed metric set renders to a few KiB of + // text, so one allocation up front avoids the handful of reallocs `Vec::new` would incur as + // the encoder appends. 8 KiB comfortably covers the current set with headroom. + let mut buffer = Vec::with_capacity(8 * 1024); + // `encode` only errors if the writer fails; a `Vec` never does, so the result is infallible + // here — discard it explicitly (the crate denies `unused_must_use`). + let _ = encoder.encode(&prometheus::gather(), &mut buffer); + Response::builder() + .status(200) + .header(http::header::CONTENT_TYPE, encoder.format_type()) + .header(http::header::CONTENT_LENGTH, buffer.len()) + .body(buffer) + .expect("metrics response is always valid") + } +} + +#[async_trait] +impl ServeHttp for AdminApp { + async fn response(&self, session: &mut ServerSession) -> Response> { + match session.req_header().uri.path() { + // Pure liveness: 200/ok whenever the process can answer. + "/livez" => Self::health(200, "ok"), + // Readiness: always 200 (fail-open — never pull a serving gateway from the LB), but the + // body reports `degraded` when the deny-set watcher is disconnected from NATS, so an + // operator can alert on stale spend/fraud enforcement without an eviction. + "/readyz" => { + let health = if self.metrics.nats_connected.get() == 1 { + "ok" + } else { + "degraded" + }; + Self::health(200, health) + } + "/metrics" => Self::metrics(), + _ => Self::health(404, "not_found"), + } + } +} diff --git a/src/circuit_breaker.rs b/src/circuit_breaker.rs new file mode 100644 index 0000000..af8c1e1 --- /dev/null +++ b/src/circuit_breaker.rs @@ -0,0 +1,935 @@ +//! Lock-free circuit breaker for protecting external service calls. +//! +//! This implementation is provably race-free through: +//! 1. Atomic words for all mutable state (no multi-variable coordination) +//! 2. Compare-and-swap loops for all state transitions +//! 3. Monotonic timestamps for timeout detection +//! +//! # States +//! +//! ```text +//! failure_threshold reached +//! ┌─────────┐ ──────────────────────────► ┌────────┐ +//! │ Closed │ │ Open │ +//! └─────────┘ ◄────────────────────────── └────────┘ +//! ▲ success in half-open │ +//! │ │ reset_timeout elapsed +//! │ ┌─────────────┐ │ +//! └─────── │ Half-Open │ ◄──────────────┘ +//! success└─────────────┘ +//! │ +//! │ failure +//! ▼ +//! back to Open +//! ``` +//! +//! # Failure Policies +//! +//! Two failure detection policies are supported: +//! +//! - **Consecutive**: Opens after N failures in a row. Any success resets the count. +//! Good for detecting complete backend failures. +//! +//! - **Windowed**: Opens after N failures within a time window. Failures outside +//! the window are forgotten. Good for detecting degraded backends with partial failures. +//! +//! # Example +//! +//! ```rust +//! use beyond_ai::circuit_breaker::{CircuitBreaker, CircuitBreakerConfig, FailurePolicy}; +//! use std::time::Duration; +//! +//! // Consecutive failures (default) +//! let cb = CircuitBreaker::new(CircuitBreakerConfig::default()); +//! +//! // Windowed failures (better for edge proxies) +//! let cb = CircuitBreaker::new( +//! CircuitBreakerConfig::windowed(3, Duration::from_secs(10)) +//! .reset_timeout(Duration::from_secs(30)) +//! ); +//! +//! // Before calling external service +//! if cb.allow().is_err() { +//! // return Err("service temporarily unavailable"); +//! } +//! +//! // match call_external_service().await { +//! // Ok(result) => { +//! // cb.record_success(); +//! // Ok(result) +//! // } +//! // Err(e) if is_connectivity_error(&e) => { +//! // cb.record_failure(); +//! // Err(e) +//! // } +//! // Err(e) => Err(e), // Don't count business logic errors +//! // } +//! ``` + +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::Duration; + +/// How failures are counted before opening the circuit. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum FailurePolicy { + /// N consecutive failures opens the circuit. Any success resets the count. + Consecutive { + /// Number of consecutive failures before opening. + threshold: u32, + }, + /// N failures within the window opens the circuit. + /// Failures older than the window are forgotten. + Windowed { + /// Number of failures within the window before opening. + threshold: u32, + /// Time window for counting failures. + window: Duration, + }, +} + +impl Default for FailurePolicy { + fn default() -> Self { + FailurePolicy::Consecutive { threshold: 5 } + } +} + +/// Circuit breaker configuration. +#[derive(Debug, Clone)] +pub struct CircuitBreakerConfig { + /// How failures are counted. + pub failure_policy: FailurePolicy, + /// Time to wait in open state before transitioning to half-open. + pub reset_timeout: Duration, + /// Number of probe requests allowed in half-open state. + pub half_open_permits: u32, +} + +impl Default for CircuitBreakerConfig { + fn default() -> Self { + Self { + failure_policy: FailurePolicy::default(), + reset_timeout: Duration::from_secs(30), + half_open_permits: 3, + } + } +} + +impl CircuitBreakerConfig { + /// Create a config with consecutive failure detection. + pub fn consecutive(threshold: u32) -> Self { + Self { + failure_policy: FailurePolicy::Consecutive { threshold }, + ..Default::default() + } + } + + /// Create a config with windowed failure detection. + pub fn windowed(threshold: u32, window: Duration) -> Self { + Self { + failure_policy: FailurePolicy::Windowed { threshold, window }, + ..Default::default() + } + } + + /// Set the reset timeout (time in open state before half-open). + pub fn reset_timeout(mut self, timeout: Duration) -> Self { + self.reset_timeout = timeout; + self + } + + /// Set the number of half-open permits. + pub fn half_open_permits(mut self, permits: u32) -> Self { + self.half_open_permits = permits; + self + } + + /// Get the failure threshold from the policy. + #[allow(dead_code)] + fn threshold(&self) -> u32 { + match &self.failure_policy { + FailurePolicy::Consecutive { threshold } => *threshold, + FailurePolicy::Windowed { threshold, .. } => *threshold, + } + } +} + +/// Lock-free circuit breaker. +/// +/// All state is packed into a single 64-bit atomic: +/// - Bits 62-63: State (0=closed, 1=open, 2=half-open) +/// - Bits 48-61: Failure count (14 bits, max 16383) +/// - Bits 32-47: Half-open permits remaining (16 bits) +/// - Bits 0-31: Timestamp of last state change (seconds since epoch, wraps every 136 years) +/// +/// For windowed mode, a second atomic tracks the window start timestamp. +/// +/// This packing ensures all state transitions are atomic via single CAS operations. +pub struct CircuitBreaker { + /// Packed state word. + state: AtomicU64, + /// Window start timestamp (only used in windowed mode). + /// Stores seconds since epoch when the first failure in the current window occurred. + /// 0 means no active window. + window_start: AtomicU64, + /// Configuration (immutable after construction). + config: CircuitBreakerConfig, + /// Clock function for getting current time in seconds. + clock: fn() -> u64, +} + +impl std::fmt::Debug for CircuitBreaker { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CircuitBreaker") + .field("state", &self.state) + .field("window_start", &self.window_start) + .field("config", &self.config) + .finish_non_exhaustive() + } +} + +// State encoding constants +const STATE_CLOSED: u64 = 0; +const STATE_OPEN: u64 = 1; +const STATE_HALF_OPEN: u64 = 2; + +const STATE_SHIFT: u32 = 62; +const STATE_MASK: u64 = 0b11; + +const FAILURE_SHIFT: u32 = 48; +const FAILURE_MASK: u64 = 0x3FFF; // 14 bits + +const PERMIT_SHIFT: u32 = 32; +const PERMIT_MASK: u64 = 0xFFFF; // 16 bits + +const TIMESTAMP_MASK: u64 = 0xFFFF_FFFF; // 32 bits + +impl CircuitBreaker { + /// Create a new circuit breaker with the given configuration. + pub fn new(config: CircuitBreakerConfig) -> Self { + Self::with_clock(config, Self::system_clock) + } + + /// Create a circuit breaker with a custom clock (for testing). + pub fn with_clock(config: CircuitBreakerConfig, clock: fn() -> u64) -> Self { + let initial = Self::pack(STATE_CLOSED, 0, 0, clock()); + Self { + state: AtomicU64::new(initial), + window_start: AtomicU64::new(0), + config, + clock, + } + } + + /// System clock returning seconds since epoch (32-bit, wrapping). + #[inline] + fn system_clock() -> u64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs() & TIMESTAMP_MASK) + .unwrap_or(0) + } + + /// Get current time from the configured clock. + #[inline] + fn now_secs(&self) -> u64 { + (self.clock)() + } + + /// Pack state components into a single u64. + #[inline] + fn pack(state: u64, failures: u64, permits: u64, timestamp: u64) -> u64 { + ((state & STATE_MASK) << STATE_SHIFT) + | ((failures & FAILURE_MASK) << FAILURE_SHIFT) + | ((permits & PERMIT_MASK) << PERMIT_SHIFT) + | (timestamp & TIMESTAMP_MASK) + } + + /// Unpack a u64 into state components. + #[inline] + fn unpack(packed: u64) -> (u64, u64, u64, u64) { + let state = (packed >> STATE_SHIFT) & STATE_MASK; + let failures = (packed >> FAILURE_SHIFT) & FAILURE_MASK; + let permits = (packed >> PERMIT_SHIFT) & PERMIT_MASK; + let timestamp = packed & TIMESTAMP_MASK; + (state, failures, permits, timestamp) + } + + /// Check if a request should be allowed through the circuit. + /// + /// Returns `Ok(())` if the request is allowed, `Err(CircuitOpen)` if the + /// circuit is open and the request should be rejected. + /// + /// In half-open state, this atomically decrements the permit count. + pub fn allow(&self) -> Result<(), CircuitOpen> { + loop { + let packed = self.state.load(Ordering::Acquire); + let (state, failures, permits, timestamp) = Self::unpack(packed); + + match state { + STATE_CLOSED => return Ok(()), + + STATE_OPEN => { + let now = self.now_secs(); + let elapsed = now.wrapping_sub(timestamp); + + if elapsed >= self.config.reset_timeout.as_secs() { + // Timeout elapsed, try to transition to half-open + let new_packed = Self::pack( + STATE_HALF_OPEN, + 0, + u64::from(self.config.half_open_permits), + now, + ); + + match self.state.compare_exchange_weak( + packed, + new_packed, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => continue, // Transitioned, retry allow() + Err(_) => continue, // Someone else modified, retry + } + } + return Err(CircuitOpen); + } + + STATE_HALF_OPEN => { + if permits == 0 { + return Err(CircuitOpen); + } + + // Try to claim a permit + let new_packed = Self::pack(STATE_HALF_OPEN, failures, permits - 1, timestamp); + + match self.state.compare_exchange_weak( + packed, + new_packed, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => return Ok(()), + Err(_) => continue, // CAS failed, retry + } + } + + _ => { + // Invalid state, reset to closed + let new_packed = Self::pack(STATE_CLOSED, 0, 0, self.now_secs()); + let _ = self.state.compare_exchange( + packed, + new_packed, + Ordering::AcqRel, + Ordering::Acquire, + ); + return Ok(()); + } + } + } + } + + /// Record a successful request. + /// + /// In closed state, resets the failure counter (and window for windowed mode). + /// In half-open state, closes the circuit (service is healthy again). + pub fn record_success(&self) { + // Reset window start for windowed mode + self.window_start.store(0, Ordering::Release); + + loop { + let packed = self.state.load(Ordering::Acquire); + let (state, _, _, _) = Self::unpack(packed); + + let new_packed = match state { + STATE_CLOSED => { + // Reset failure count, keep closed + Self::pack(STATE_CLOSED, 0, 0, self.now_secs()) + } + STATE_HALF_OPEN => { + // Success in half-open: close the circuit + Self::pack(STATE_CLOSED, 0, 0, self.now_secs()) + } + STATE_OPEN => return, // Shouldn't record success while open + _ => return, + }; + + match self.state.compare_exchange_weak( + packed, + new_packed, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => return, + Err(_) => continue, + } + } + } + + /// Record a failed request. + /// + /// In closed state, increments the failure counter and opens the circuit + /// if the threshold is reached. + /// In half-open state, reopens the circuit immediately. + pub fn record_failure(&self) { + match &self.config.failure_policy { + FailurePolicy::Consecutive { threshold } => { + self.record_failure_consecutive(*threshold); + } + FailurePolicy::Windowed { threshold, window } => { + self.record_failure_windowed(*threshold, window.as_secs()); + } + } + } + + /// Record failure with consecutive failure tracking. + fn record_failure_consecutive(&self, threshold: u32) { + loop { + let packed = self.state.load(Ordering::Acquire); + let (state, failures, _, _) = Self::unpack(packed); + let now = self.now_secs(); + + let new_packed = match state { + STATE_CLOSED => { + let new_failures = failures + 1; + if new_failures >= u64::from(threshold) { + Self::pack(STATE_OPEN, 0, 0, now) + } else { + Self::pack(STATE_CLOSED, new_failures, 0, now) + } + } + STATE_HALF_OPEN => Self::pack(STATE_OPEN, 0, 0, now), + STATE_OPEN => return, + _ => return, + }; + + match self.state.compare_exchange_weak( + packed, + new_packed, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => return, + Err(_) => continue, + } + } + } + + /// Record failure with windowed failure tracking. + fn record_failure_windowed(&self, threshold: u32, window_secs: u64) { + let now = self.now_secs(); + + // Handle window timing + let window_start = self.window_start.load(Ordering::Acquire); + let (new_window_start, reset_count) = if window_start == 0 { + // First failure, start new window + (now, true) + } else if now.wrapping_sub(window_start) >= window_secs { + // Window expired, start new window + (now, true) + } else { + // Within window, continue counting + (window_start, false) + }; + + // Update window start if needed (best-effort, races are acceptable) + if new_window_start != window_start { + let _ = self.window_start.compare_exchange( + window_start, + new_window_start, + Ordering::Release, + Ordering::Relaxed, + ); + } + + // Now update the main state + loop { + let packed = self.state.load(Ordering::Acquire); + let (state, failures, _, _) = Self::unpack(packed); + + let new_packed = match state { + STATE_CLOSED => { + let new_failures = if reset_count { 1 } else { failures + 1 }; + if new_failures >= u64::from(threshold) { + // Reset window when opening circuit + self.window_start.store(0, Ordering::Release); + Self::pack(STATE_OPEN, 0, 0, now) + } else { + Self::pack(STATE_CLOSED, new_failures, 0, now) + } + } + STATE_HALF_OPEN => { + self.window_start.store(0, Ordering::Release); + Self::pack(STATE_OPEN, 0, 0, now) + } + STATE_OPEN => return, + _ => return, + }; + + match self.state.compare_exchange_weak( + packed, + new_packed, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => return, + Err(_) => continue, + } + } + } + + /// Get the current circuit state for observability. + pub fn state(&self) -> CircuitState { + let packed = self.state.load(Ordering::Acquire); + let (state, failures, permits, _) = Self::unpack(packed); + + match state { + STATE_CLOSED => CircuitState::Closed { + failure_count: failures as u32, + }, + STATE_OPEN => CircuitState::Open, + STATE_HALF_OPEN => CircuitState::HalfOpen { + permits_remaining: permits as u32, + }, + _ => CircuitState::Closed { failure_count: 0 }, + } + } + + /// Reset the circuit breaker to closed state. + pub fn reset(&self) { + self.window_start.store(0, Ordering::Release); + let packed = Self::pack(STATE_CLOSED, 0, 0, self.now_secs()); + self.state.store(packed, Ordering::Release); + } + + /// Force the circuit to a specific state (for testing/admin). + #[cfg(test)] + pub fn force_state(&self, new_state: CircuitState) { + let now = self.now_secs(); + let packed = match new_state { + CircuitState::Closed { failure_count } => { + Self::pack(STATE_CLOSED, u64::from(failure_count), 0, now) + } + CircuitState::Open => Self::pack(STATE_OPEN, 0, 0, now), + CircuitState::HalfOpen { permits_remaining } => { + Self::pack(STATE_HALF_OPEN, 0, u64::from(permits_remaining), now) + } + }; + self.state.store(packed, Ordering::Release); + } +} + +/// Error returned when the circuit is open. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct CircuitOpen; + +impl std::fmt::Display for CircuitOpen { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "circuit breaker is open") + } +} + +impl std::error::Error for CircuitOpen {} + +/// Observable circuit state. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CircuitState { + /// Circuit is closed, requests flow through normally. + Closed { + /// Number of failures since last success/reset. + failure_count: u32, + }, + /// Circuit is open, requests are rejected immediately. + Open, + /// Circuit is half-open, limited probe requests allowed. + HalfOpen { + /// Number of probe requests still allowed. + permits_remaining: u32, + }, +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + use std::thread; + + // ========================================================================= + // Consecutive mode tests + // ========================================================================= + + #[test] + fn test_initial_state_is_closed() { + let cb = CircuitBreaker::new(CircuitBreakerConfig::default()); + assert_eq!(cb.state(), CircuitState::Closed { failure_count: 0 }); + } + + #[test] + fn test_allow_when_closed() { + let cb = CircuitBreaker::new(CircuitBreakerConfig::default()); + assert!(cb.allow().is_ok()); + } + + #[test] + fn test_consecutive_failures_increment() { + let cb = CircuitBreaker::new(CircuitBreakerConfig::consecutive(5)); + + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Closed { failure_count: 1 }); + + cb.record_failure(); + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Closed { failure_count: 3 }); + } + + #[test] + fn test_consecutive_success_resets_failures() { + let cb = CircuitBreaker::new(CircuitBreakerConfig::consecutive(5)); + + cb.record_failure(); + cb.record_failure(); + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Closed { failure_count: 3 }); + + cb.record_success(); + assert_eq!(cb.state(), CircuitState::Closed { failure_count: 0 }); + } + + #[test] + fn test_consecutive_opens_at_threshold() { + let cb = CircuitBreaker::new(CircuitBreakerConfig::consecutive(3)); + + cb.record_failure(); + cb.record_failure(); + assert!(matches!(cb.state(), CircuitState::Closed { .. })); + + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Open); + } + + #[test] + fn test_rejects_when_open() { + let cb = CircuitBreaker::new( + CircuitBreakerConfig::consecutive(1).reset_timeout(Duration::from_secs(3600)), + ); + + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Open); + assert!(cb.allow().is_err()); + } + + #[test] + fn test_half_open_after_timeout() { + let cb = CircuitBreaker::new( + CircuitBreakerConfig::consecutive(1) + .reset_timeout(Duration::from_millis(1)) + .half_open_permits(2), + ); + + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Open); + + thread::sleep(Duration::from_millis(10)); + + assert!(cb.allow().is_ok()); + assert!(matches!(cb.state(), CircuitState::HalfOpen { .. })); + } + + #[test] + fn test_half_open_permits_decrement() { + let cb = CircuitBreaker::new( + CircuitBreakerConfig::consecutive(1) + .reset_timeout(Duration::from_millis(1)) + .half_open_permits(3), + ); + + cb.record_failure(); + thread::sleep(Duration::from_millis(10)); + + assert!(cb.allow().is_ok()); + assert_eq!( + cb.state(), + CircuitState::HalfOpen { + permits_remaining: 2 + } + ); + + assert!(cb.allow().is_ok()); + assert_eq!( + cb.state(), + CircuitState::HalfOpen { + permits_remaining: 1 + } + ); + + assert!(cb.allow().is_ok()); + assert_eq!( + cb.state(), + CircuitState::HalfOpen { + permits_remaining: 0 + } + ); + + assert!(cb.allow().is_err()); + } + + #[test] + fn test_half_open_success_closes() { + let cb = CircuitBreaker::new( + CircuitBreakerConfig::consecutive(1) + .reset_timeout(Duration::from_millis(1)) + .half_open_permits(3), + ); + + cb.record_failure(); + thread::sleep(Duration::from_millis(10)); + let _ = cb.allow(); + + cb.record_success(); + assert_eq!(cb.state(), CircuitState::Closed { failure_count: 0 }); + } + + #[test] + fn test_half_open_failure_reopens() { + let cb = CircuitBreaker::new( + CircuitBreakerConfig::consecutive(1) + .reset_timeout(Duration::from_millis(1)) + .half_open_permits(3), + ); + + cb.record_failure(); + thread::sleep(Duration::from_millis(10)); + let _ = cb.allow(); + + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Open); + } + + // ========================================================================= + // Windowed mode tests + // ========================================================================= + + #[test] + fn test_windowed_opens_at_threshold() { + let cb = CircuitBreaker::new(CircuitBreakerConfig::windowed(3, Duration::from_secs(10))); + + cb.record_failure(); + cb.record_failure(); + assert!(matches!(cb.state(), CircuitState::Closed { .. })); + + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Open); + } + + #[test] + fn test_windowed_resets_after_window() { + // Note: window uses second-level precision, so use 1 second window + let cb = CircuitBreaker::new(CircuitBreakerConfig::windowed(3, Duration::from_secs(1))); + + cb.record_failure(); + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Closed { failure_count: 2 }); + + // Wait for window to expire (1 second + buffer) + thread::sleep(Duration::from_millis(1100)); + + // This failure starts a new window + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Closed { failure_count: 1 }); + + // Two more to hit threshold + cb.record_failure(); + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Open); + } + + #[test] + fn test_windowed_success_resets_window() { + let cb = CircuitBreaker::new(CircuitBreakerConfig::windowed(3, Duration::from_secs(10))); + + cb.record_failure(); + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Closed { failure_count: 2 }); + + // Success resets the failure count + cb.record_success(); + assert_eq!(cb.state(), CircuitState::Closed { failure_count: 0 }); + + // Need 3 fresh failures to open + cb.record_failure(); + cb.record_failure(); + assert!(matches!(cb.state(), CircuitState::Closed { .. })); + + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Open); + } + + #[test] + fn test_windowed_half_open_recovery() { + let cb = CircuitBreaker::new( + CircuitBreakerConfig::windowed(2, Duration::from_secs(10)) + .reset_timeout(Duration::from_millis(1)), + ); + + cb.record_failure(); + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Open); + + thread::sleep(Duration::from_millis(10)); + + assert!(cb.allow().is_ok()); + assert!(matches!(cb.state(), CircuitState::HalfOpen { .. })); + + cb.record_success(); + assert_eq!(cb.state(), CircuitState::Closed { failure_count: 0 }); + } + + // ========================================================================= + // Concurrency tests + // ========================================================================= + + #[test] + fn test_concurrent_failures_open_exactly_once() { + for _ in 0..100 { + let cb = Arc::new(CircuitBreaker::new( + CircuitBreakerConfig::consecutive(10).reset_timeout(Duration::from_secs(3600)), + )); + + let handles: Vec<_> = (0..20) + .map(|_| { + let cb = Arc::clone(&cb); + thread::spawn(move || { + cb.record_failure(); + }) + }) + .collect(); + + for h in handles { + h.join().unwrap(); + } + + assert_eq!(cb.state(), CircuitState::Open); + } + } + + #[test] + fn test_concurrent_allow_in_half_open_respects_permits() { + for _ in 0..100 { + let cb = Arc::new(CircuitBreaker::new( + CircuitBreakerConfig::consecutive(1) + .reset_timeout(Duration::from_millis(1)) + .half_open_permits(5), + )); + + cb.record_failure(); + thread::sleep(Duration::from_millis(10)); + + let allowed = Arc::new(std::sync::atomic::AtomicU32::new(0)); + + let handles: Vec<_> = (0..20) + .map(|_| { + let cb = Arc::clone(&cb); + let allowed = Arc::clone(&allowed); + thread::spawn(move || { + if cb.allow().is_ok() { + allowed.fetch_add(1, Ordering::SeqCst); + } + }) + }) + .collect(); + + for h in handles { + h.join().unwrap(); + } + + let total_allowed = allowed.load(Ordering::SeqCst); + assert!( + total_allowed <= 5, + "allowed {} requests but only 5 permits", + total_allowed + ); + } + } + + #[test] + fn test_concurrent_windowed_failures() { + for _ in 0..50 { + let cb = Arc::new(CircuitBreaker::new(CircuitBreakerConfig::windowed( + 10, + Duration::from_secs(60), + ))); + + let handles: Vec<_> = (0..20) + .map(|_| { + let cb = Arc::clone(&cb); + thread::spawn(move || { + cb.record_failure(); + }) + }) + .collect(); + + for h in handles { + h.join().unwrap(); + } + + assert_eq!(cb.state(), CircuitState::Open); + } + } + + // ========================================================================= + // Pack/unpack tests + // ========================================================================= + + #[test] + fn test_pack_unpack_roundtrip() { + let test_cases = [ + (STATE_CLOSED, 0, 0, 0), + (STATE_OPEN, 0, 0, 12345), + (STATE_HALF_OPEN, 100, 50, 999999), + (STATE_CLOSED, FAILURE_MASK, PERMIT_MASK, TIMESTAMP_MASK), + ]; + + for (state, failures, permits, timestamp) in test_cases { + let packed = CircuitBreaker::pack(state, failures, permits, timestamp); + let (s, f, p, t) = CircuitBreaker::unpack(packed); + assert_eq!(s, state, "state mismatch"); + assert_eq!(f, failures, "failures mismatch"); + assert_eq!(p, permits, "permits mismatch"); + assert_eq!(t, timestamp, "timestamp mismatch"); + } + } + + // ========================================================================= + // Builder API tests + // ========================================================================= + + #[test] + fn test_builder_consecutive() { + let config = CircuitBreakerConfig::consecutive(5) + .reset_timeout(Duration::from_secs(60)) + .half_open_permits(10); + + assert_eq!( + config.failure_policy, + FailurePolicy::Consecutive { threshold: 5 } + ); + assert_eq!(config.reset_timeout, Duration::from_secs(60)); + assert_eq!(config.half_open_permits, 10); + } + + #[test] + fn test_builder_windowed() { + let config = CircuitBreakerConfig::windowed(3, Duration::from_secs(10)) + .reset_timeout(Duration::from_secs(30)) + .half_open_permits(5); + + assert_eq!( + config.failure_policy, + FailurePolicy::Windowed { + threshold: 3, + window: Duration::from_secs(10) + } + ); + assert_eq!(config.reset_timeout, Duration::from_secs(30)); + assert_eq!(config.half_open_permits, 5); + } +} diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..c454ad9 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,482 @@ +//! Layered configuration (PATTERNS.md: Figment defaults → TOML → `AI_`-prefixed env). +//! +//! Auth + key material come from config (signing public keys, managed pool keys), so the gateway +//! is fully functional from boot config alone — NATS is only needed for the deny-set. + +use crate::error::{GatewayError, Result}; +use crate::key::{Keyring, Kid}; +use crate::secret::Secret; +use figment::Figment; +use figment::providers::{Env, Format, Toml}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::path::Path; + +#[derive(Debug, Clone, Serialize, Deserialize)] +// `default` so every field is optional. We deliberately do NOT set serde's `deny_unknown_fields`: +// config is merged from `Env::prefixed("AI_")`, a namespace shared with foreign variables the +// platform injects (e.g. `AI_AGENT`, `AI_LOG`), so rejecting unknown keys at the serde layer would +// fail load on a valid environment. Typo protection is instead enforced one layer down, against the +// *TOML file only* (`reject_unknown_toml_keys`): the file is ours alone — not a shared namespace — +// so an unrecognized key there is unambiguously a mistake, and a silent one (it loads its default +// and the setting does nothing), worth a hard, visible boot failure. +#[serde(default)] +pub struct AiConfig { + /// Downstream listener for client (app) traffic. Internal-only in production (Service Connect + /// fronts it as `ai.internal`); no public ingress, so plain HTTP here is fine. + pub listen: String, + /// Prometheus metrics listener. + pub metrics_listen: String, + + /// NATS / slipstream connection (cf. `_envcommon/ecs-service.hcl`: `tls://connect.ngs.global`). + /// Used only for the watched deny-set (`blackhole.*`). + pub nats_url: String, + /// Base64 `.creds` (ECS via SOPS) — takes priority over `nats_creds_file`. Held in `Secret` so + /// it can't leak through the `Debug`/`Serialize` this struct derives (a stray `?config` log). + pub nats_creds: Option, + pub nats_creds_file: Option, + /// slipstream bucket holding `blackhole.*` (the deny-set — the only thing in NATS). + pub config_bucket: String, + + /// Optional path to an on-disk deny-set snapshot (slipstream's append-log + resume cursor). When + /// set **and on durable storage** (the edge/tunnel deployment model), a restart seeds the + /// deny-set from this file and *resumes the NATS watch from the saved revision* — skipping the + /// boot scan and surviving a restart with enforcement intact even before NATS reconnects. Unset + /// (the default, e.g. ephemeral/Fargate) ⇒ seed from a NATS scan each boot, unchanged. The file + /// is a pure cache: delete it (or point at scratch) and the gateway falls back to scanning. + pub snapshot_path: Option, + + /// Trusted Ed25519 signing **public** keys: `kid` (as string — TOML/JSON map keys are strings) + /// → base64 public key. Multiple allowed for zero-downtime rotation. Config, not NATS. + pub signing_keys: HashMap, + + /// Fail the boot if `signing_keys` is empty, instead of degrading to BYO-only. Empty signing + /// keys is a *legitimate* mode (a BYO-only deployment) but is far more often a mis-deploy — a + /// typo'd/absent SSM param — that looks healthy while silently dropping **all** managed billing + /// and deny-set enforcement. A managed deployment should set this `true` so a bad deploy fails + /// fast and visibly at boot rather than serving for free. Default `false` to keep BYO-only and + /// the test/e2e harnesses (which run keyless) working out of the box. + pub require_signing_keys: bool, + + /// Managed Beyond pool keys, **by provider name** (`openai`, `anthropic`, `fireworks`, …). + /// From the `[pool_keys]` TOML table or SSM-injected `AI_POOL_KEY_` env (the env form is + /// the production path — see `load_with_path`). A provider with no pool key here can't serve + /// managed traffic (→ 503); BYO is unaffected. Values are `Secret` so a key can't leak through + /// the `Debug`/`Serialize` this struct derives; read the plaintext via `expose` at the use site. + pub pool_keys: HashMap, + + /// Per-provider upstream authority (`host:port`), **by provider name**. For a known provider + /// (see `route::KNOWN_PROVIDERS`) this *overrides* its default; for an unknown name it *adds* a + /// new OpenAI-wire provider, then reachable at `/{name}/…` (the provider is the request's first + /// path segment). Empty = every known provider uses its built-in default. (The e2e harness points + /// providers at a mock here.) + pub provider_authorities: HashMap, + + /// Upstream timeouts (seconds). Streaming responses are long, so read/idle are generous. + pub connect_timeout_secs: u64, + pub read_timeout_secs: u64, + pub write_timeout_secs: u64, + pub idle_timeout_secs: u64, + + /// Graceful-shutdown drain window (seconds): after SIGTERM, how long Pingora lets **in-flight + /// requests finish** before tearing the runtimes down. Maps to Pingora's `grace_period_seconds` + /// (left unset, Pingora silently defaults to 300s — this knob makes the window explicit). + /// + /// **Default to `read_timeout_secs` so we never truncate a response.** The gateway is a + /// transparent man-in-the-middle: cutting an in-flight stream on deploy corrupts a generation the + /// caller is paying for and can't cleanly retry (a half-delivered SSE isn't idempotent). The + /// longest a request can live is `read_timeout_secs`, so a drain window of at least that + /// guarantees every accepted request finishes — Pingora stops *accepting* new connections the + /// instant SIGTERM lands, so this only ever waits out the existing longest stream, not new work. + /// Slower rollouts are the deliberate price of not mangling responses. + /// + /// **The orchestrator must grant the same window**, or it caps us: the platform SIGKILLs at its + /// own stop timeout regardless of this value. Set k8s `terminationGracePeriodSeconds` (or the EC2 + /// agent's `ECS_CONTAINER_STOP_TIMEOUT`) to match. Note **ECS Fargate caps `stopTimeout` at 120s** + /// — there, full coverage of a 600s stream is impossible and the longest streams will still be + /// cut at 120s; that's a Fargate limitation, not a reason to default to truncating. + pub shutdown_grace_period_secs: u64, + /// Final runtime-teardown timeout (seconds) **after** the drain window: how long Pingora waits for + /// the tokio runtimes to exit before forcing the process down. Maps to Pingora's + /// `graceful_shutdown_timeout_seconds` (unset ⇒ a silent 5s default). A few seconds is enough to + /// flush logs/metrics; this is a backstop against a wedged runtime hanging shutdown forever, not a + /// second drain window (that's `shutdown_grace_period_secs`). + pub shutdown_runtime_timeout_secs: u64, + + /// TLS to the upstream provider. Real providers are HTTPS (true); the e2e harness sets false + /// to talk to a plaintext mock. + pub upstream_tls: bool, + + /// Prefer HTTP/2 (with HTTP/1.1 fallback) to the upstream. `true` ⇒ peer ALPN `H2H1`: every + /// provider that offers `h2` over TLS is reached over a multiplexed H2 connection (fewer sockets + /// and TLS handshakes from our egress IPs), and any host that doesn't offer it negotiates down to + /// H1. `false` ⇒ ALPN `H1` (one connection per in-flight request, pooled). The knob exists so an + /// operator can fall back to H1 without a code redeploy if a provider's h2 stack misbehaves, and + /// so the e2e concurrency bench can compare the two. Only consulted over TLS — a plaintext upstream + /// (the mock) has no ALPN and is always H1 regardless. + pub upstream_http2: bool, + + /// Verify the upstream's TLS certificate (and that it matches the SNI). `true` everywhere in + /// production. The **only** intended `false` is the e2e concurrency bench, whose TLS mock presents + /// a self-signed cert — turning verification off there lets us exercise the real TLS+ALPN+H2 path + /// against a local mock without a CA. Never set this `false` against a real provider. + pub upstream_verify_cert: bool, + + /// Per-credential request-rate ceiling (requests/sec). A blast-radius guardrail (see `ratelimit`), + /// not a spend control: it caps how fast a single credential (managed virtual key ≈ a `(tenant, + /// app)`, or a BYO token) can drive the gateway, bounding a leaked/runaway key during the + /// deny-set's reaction lag and a failure flood that never bills. `0` disables it. The default is + /// generous — a circuit breaker, not a quota; tune from `ai_rejections_total{reason="rate_limit"}`. + pub rate_limit_rps: u32, + + /// Aggregate request-rate ceiling (requests/sec) for **all BYO traffic combined** — a single + /// shared bucket. BYO is unverified and upstream-bound, so a flood of *distinct* random BYO tokens + /// slips past the per-credential ceiling and would open junk-auth connections to providers from + /// our egress IPs (getting them rate-limited or banned). This bounds that aggregate regardless of + /// token variation. Managed traffic is **exempt** (it's Ed25519-verified before any upstream + /// connect and can't be forged), so this shared bucket never sheds core tenant load. `0` disables + /// it. Generous by default; tune from `ai_rejections_total{reason="rate_limit_byo_global"}`. + /// + /// Before changing this (or reaching for per-IP limiting), read the **design-decision** block in + /// the `ratelimit` module docs: it records why this is a global cap and not per-source-IP, what it + /// deliberately doesn't cover, and why the real fix for egress-reputation pain is a + /// provider-feedback circuit breaker rather than a bigger number here. + pub byo_rate_limit_rps: u32, + + /// Per-provider circuit breaker: number of upstream **failures within `circuit_breaker_window_secs`** + /// that trips the breaker open for that provider. A failure is a **5xx response or a connect + /// failure** — i.e. the *provider is broken*. A `429` is deliberately **not** a failure: it means + /// the provider is healthy and throttling our pool key (a velocity/spend signal the rate limiter + /// and the client's `Retry-After` backoff own), so tripping on it would convert a self-healing + /// throttle into a self-inflicted outage. While open, requests to that provider fast-fail with a + /// `503` (`ai_rejections_total{reason="circuit_open"}`) instead of piling up against + /// `read_timeout_secs` and exhausting connection/in-flight slots for *every* provider. After + /// `circuit_breaker_reset_secs` a probe request is allowed; success closes it, failure reopens it. + /// Applies to **all** traffic to the provider (managed + BYO) — a down provider is down regardless + /// of whose key is used. `0` disables the breaker entirely. Default is generous so normal + /// background 5xx noise never trips it. + pub circuit_breaker_threshold: u32, + /// Rolling window (seconds) over which `circuit_breaker_threshold` failures are counted. Failures + /// older than the window are forgotten — so it trips on a *burst* of failures, not on a slow trickle + /// spread across a healthy day. + pub circuit_breaker_window_secs: u64, + /// How long the breaker stays open before allowing a half-open probe request (seconds). Long enough + /// to let a provider recover, short enough that recovery is detected promptly. + pub circuit_breaker_reset_secs: u64, +} + +impl Default for AiConfig { + fn default() -> Self { + Self { + listen: "0.0.0.0:8080".to_string(), + metrics_listen: "0.0.0.0:9090".to_string(), + nats_url: "nats://localhost:4222".to_string(), + nats_creds: None, + nats_creds_file: None, + config_bucket: "ai-gateway".to_string(), + snapshot_path: None, + signing_keys: HashMap::new(), + require_signing_keys: false, + pool_keys: HashMap::new(), + provider_authorities: HashMap::new(), + connect_timeout_secs: 10, + // Generous: LLM streams can run for minutes; a tight read timeout would kill them. + read_timeout_secs: 600, + write_timeout_secs: 60, + idle_timeout_secs: 90, + // Drain for the full request lifetime (= read_timeout_secs) so a deploy never truncates + // an in-flight stream — we're a transparent proxy and must not mangle a paid-for + // generation. Pingora stops accepting new connections at SIGTERM, so this only waits out + // the longest existing stream. The orchestrator's stop timeout must match (see field + // docs; ECS Fargate's 120s cap is a hard limit there). Then a short teardown backstop. + shutdown_grace_period_secs: 600, + shutdown_runtime_timeout_secs: 10, + upstream_tls: true, + // Prefer H2 to providers by default (all of `KNOWN_PROVIDERS` offer it; H1 fallback is + // automatic). Flip to false for an all-H1 upstream without recompiling. + upstream_http2: true, + // Verify upstream certs by default; only the bench's self-signed TLS mock turns this off. + upstream_verify_cert: true, + // Generous per-credential circuit breaker, on by default. Won't touch legitimate + // steady-state traffic; caps a runaway/leaked key or a retry-storm flood. Set 0 to disable. + rate_limit_rps: 100, + // Generous aggregate BYO ceiling, on by default — well above any expected legitimate BYO + // throughput, low enough that a junk-auth flood can't get our egress IPs flagged by the + // providers. Tune from the metric; set 0 to disable. (Managed traffic is exempt.) + byo_rate_limit_rps: 1_000, + // Per-provider breaker: trip after 20 upstream failures (5xx/connect) within 10s, stay + // open 30s, then probe. Generous enough that a provider's occasional background 5xx never + // trips it — only a sustained brownout does. Set threshold 0 to disable. + circuit_breaker_threshold: 20, + circuit_breaker_window_secs: 10, + circuit_breaker_reset_secs: 30, + } + } +} + +impl AiConfig { + pub fn load_with_path(path: Option<&Path>) -> Result { + let toml_path = path.unwrap_or_else(|| Path::new("config.toml")); + // Catch a typo'd key in the operator's own TOML *before* any of it merges — a misspelled + // `require_signing_keys` would otherwise load its default and silently drop all managed + // billing while the gateway looks healthy. Only the TOML file is checked (see the + // `deny_unknown_fields` note on `AiConfig`); the env layer must stay lenient. + reject_unknown_toml_keys(toml_path)?; + + let mut fig = Figment::from(figment::providers::Serialized::defaults(AiConfig::default())); + fig = fig.merge(Toml::file(toml_path)); + // Flat mapping: `AI_READ_TIMEOUT_SECS` → `read_timeout_secs`. (No `.split('_')` — these are + // flat fields, not nested tables.) Unknown `AI_*` vars are tolerated (see the + // `deny_unknown_fields` note on `AiConfig`) — which is also why pool keys are collected + // separately below rather than via this flat merge. + fig = fig.merge(Env::prefixed("AI_")); + let mut cfg: AiConfig = fig + .extract() + .map_err(|e| GatewayError::Config(e.to_string()))?; + cfg.merge_pool_key_env(std::env::vars()); + cfg.validate()?; + Ok(cfg) + } + + /// Reject nonsensical values that would otherwise fail silently at runtime. A `0` connect/read + /// timeout (a typo'd SSM param) becomes a `Duration::from_secs(0)` deadline that fails every + /// upstream call immediately — surfacing only as a 502 cascade, not a loud boot failure. Catch it + /// here so a mis-deploy fails fast and visibly. Write/idle are not load-bearing for correctness + /// (Pingora treats them as best-effort), so they're left unconstrained. + fn validate(&self) -> Result<()> { + if self.connect_timeout_secs == 0 { + return Err(GatewayError::Config( + "connect_timeout_secs must be > 0 (a 0 connect timeout fails every upstream connect)" + .to_string(), + )); + } + if self.read_timeout_secs == 0 { + return Err(GatewayError::Config( + "read_timeout_secs must be > 0 (a 0 read timeout aborts every response before it arrives)" + .to_string(), + )); + } + Ok(()) + } + + /// The per-provider circuit-breaker config, or `None` when disabled (`circuit_breaker_threshold + /// == 0`). Windowed policy: a degrading backend trips on a *burst* of failures, not a slow + /// trickle (see `circuit_breaker` crate docs). Each provider gets its own breaker built from this + /// (see `state::build_providers`). + pub fn circuit_breaker_config(&self) -> Option { + if self.circuit_breaker_threshold == 0 { + return None; + } + Some( + crate::circuit_breaker::CircuitBreakerConfig::windowed( + self.circuit_breaker_threshold, + std::time::Duration::from_secs(self.circuit_breaker_window_secs), + ) + .reset_timeout(std::time::Duration::from_secs( + self.circuit_breaker_reset_secs, + )), + ) + } + + /// Fold `AI_POOL_KEY_` environment variables into `pool_keys` (provider name lowercased). + /// This is the production secret path (SSM-injected env); a flat figment merge can't target a + /// map field, and env must win over any `[pool_keys]` value baked into a config file. + fn merge_pool_key_env(&mut self, vars: impl Iterator) { + for (k, v) in vars { + if let Some(name) = k.strip_prefix("AI_POOL_KEY_") { + self.pool_keys + .insert(name.to_ascii_lowercase(), Secret::new(v)); + } + } + } + + /// Build the trusted keyring from the configured signing public keys. + pub fn build_keyring(&self) -> Result { + let mut ring = Keyring::new(); + for (kid_str, b64) in &self.signing_keys { + let kid: Kid = kid_str + .parse() + .map_err(|_| GatewayError::Config(format!("invalid signing key id {kid_str}")))?; + let vk = crate::key::verifying_key_from_value(b64.as_bytes()).ok_or_else(|| { + GatewayError::Config(format!("invalid signing public key for kid {kid}")) + })?; + ring.insert(kid, vk); + } + Ok(ring) + } +} + +/// The set of top-level keys a config file may set, derived from `AiConfig` itself by serializing +/// its defaults — so it tracks the struct automatically and can never drift from the field list. +fn known_config_keys() -> std::collections::BTreeSet { + use figment::Provider as _; + figment::providers::Serialized::defaults(AiConfig::default()) + .data() + .map(|profiles| { + profiles + .into_values() + .flat_map(|dict| dict.into_keys()) + .collect() + }) + .unwrap_or_default() +} + +/// Fail the load if the TOML file at `path` carries any key that isn't an `AiConfig` field. A +/// missing file is fine (the gateway runs on defaults + env), so an unreadable/absent file yields no +/// keys and passes. See the `deny_unknown_fields` note on `AiConfig` for why this is scoped to the +/// TOML file and not the env layer. +fn reject_unknown_toml_keys(path: &Path) -> Result<()> { + use figment::Provider as _; + let known = known_config_keys(); + let unknown: std::collections::BTreeSet = Toml::file(path) + .data() + .map(|profiles| { + profiles + .into_values() + .flat_map(|dict| dict.into_keys()) + .filter(|k| !known.contains(k)) + .collect() + }) + .unwrap_or_default(); + if unknown.is_empty() { + return Ok(()); + } + let unknown: Vec = unknown.into_iter().collect(); + Err(GatewayError::Config(format!( + "unknown key(s) in {}: {} — check for a typo (known keys: {})", + path.display(), + unknown.join(", "), + known.into_iter().collect::>().join(", "), + ))) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn defaults_are_sane() { + let c = AiConfig::default(); + // Read timeout must comfortably exceed a long stream. + assert!(c.read_timeout_secs >= 300); + assert_eq!(c.config_bucket, "ai-gateway"); + } + + #[test] + fn loads_without_a_file() { + let c = AiConfig::load_with_path(None).unwrap(); + assert_eq!(c.listen, "0.0.0.0:8080"); + } + + #[test] + fn validate_rejects_zero_connect_and_read_timeouts() { + // A 0 connect/read timeout (a typo'd SSM param) must fail boot loudly, not degrade into a + // 502 cascade at runtime. + assert!( + AiConfig { + connect_timeout_secs: 0, + ..Default::default() + } + .validate() + .is_err() + ); + assert!( + AiConfig { + read_timeout_secs: 0, + ..Default::default() + } + .validate() + .is_err() + ); + // Defaults are valid. + assert!(AiConfig::default().validate().is_ok()); + } + + /// Write `body` to a uniquely-named temp TOML file (the literal `label` keeps parallel tests + /// from colliding) and return its path; the caller removes it. + fn temp_toml(label: &str, body: &str) -> std::path::PathBuf { + use std::io::Write as _; + let path = std::env::temp_dir().join(format!("beyond-ai-cfg-{label}.toml")); + let mut f = std::fs::File::create(&path).unwrap(); + f.write_all(body.as_bytes()).unwrap(); + path + } + + #[test] + fn rejects_typod_toml_key() { + // A misspelled key in the operator's own TOML is a silent footgun (loads its default, the + // setting does nothing) — load must fail loudly and name the offending key, not boot healthy. + let path = temp_toml( + "typo", + "listen = \"0.0.0.0:1234\"\nreqiure_signing_keys = true\n", + ); + let err = AiConfig::load_with_path(Some(&path)).unwrap_err(); + let _ = std::fs::remove_file(&path); + match err { + GatewayError::Config(msg) => assert!( + msg.contains("reqiure_signing_keys"), + "error must name the typo'd key, got: {msg}" + ), + other => panic!("expected Config error, got {other:?}"), + } + } + + #[test] + fn accepts_known_toml_keys() { + // Every key here is a real `AiConfig` field (including the `[signing_keys]` table) — load + // must succeed and apply the values. + let path = temp_toml( + "known", + "listen = \"0.0.0.0:1234\"\nrequire_signing_keys = true\nrate_limit_rps = 7\n\n[signing_keys]\n1 = \"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\"\n", + ); + let c = AiConfig::load_with_path(Some(&path)).unwrap(); + let _ = std::fs::remove_file(&path); + assert_eq!(c.listen, "0.0.0.0:1234"); + assert!(c.require_signing_keys); + assert_eq!(c.rate_limit_rps, 7); + assert!(c.signing_keys.contains_key("1")); + } + + #[test] + fn build_keyring_rejects_non_numeric_kid() { + // `kid` is parsed as `u32`; a non-numeric map key must fail boot (loud) rather than + // silently drop a trusted signing key (which would 401 every token under it). + let c = AiConfig { + signing_keys: HashMap::from([("not-a-number".to_string(), "AAAA".to_string())]), + ..Default::default() + }; + assert!(c.build_keyring().is_err()); + } + + #[test] + fn build_keyring_rejects_invalid_public_key() { + // A value that is neither raw 32 bytes nor base64 of 32 bytes must fail boot, not install a + // bogus key that can never verify anything. + let c = AiConfig { + signing_keys: HashMap::from([("1".to_string(), "!!! not base64 !!!".to_string())]), + ..Default::default() + }; + assert!(c.build_keyring().is_err()); + } + + #[test] + fn pool_key_env_merges_and_overrides() { + // `AI_POOL_KEY_` → `pool_keys[name]` (lowercased), and env wins over a config-file + // value (the production secret path). A non-pool `AI_*` var is ignored. + let mut c = AiConfig { + pool_keys: HashMap::from([("openai".to_string(), Secret::new("from-file"))]), + ..Default::default() + }; + c.merge_pool_key_env( + [ + ("AI_POOL_KEY_OPENAI".to_string(), "from-env".to_string()), + ("AI_POOL_KEY_GROQ".to_string(), "gsk-x".to_string()), + ("AI_LOG".to_string(), "debug".to_string()), + ] + .into_iter(), + ); + assert_eq!(c.pool_keys.get("openai").unwrap().expose(), "from-env"); + assert_eq!(c.pool_keys.get("groq").unwrap().expose(), "gsk-x"); + assert!(!c.pool_keys.contains_key("log")); + } +} diff --git a/src/deny.rs b/src/deny.rs new file mode 100644 index 0000000..af9964f --- /dev/null +++ b/src/deny.rs @@ -0,0 +1,155 @@ +//! Sparse per-tenant deny-set — the gateway's *entire* spend/fraud surface. +//! +//! Design (deliberate, see plan): the gateway only ever asks "is this tenant cut off?" and +//! default-**allows** on a miss. We hold **only the exceptions** (the cut-off tenants), so memory +//! is `O(denied)`, not `O(tenants)` — this scales to millions of tenants because `denied` stays a +//! tiny slice (a few MB even at 1M entries; a tenant id is 8 bytes). The gateway never decides +//! *why* a tenant is denied — the control plane writes/removes entries; we just enforce + log. +//! +//! TTL/auto-restore is handled by slipstream, not here: spend holds are written with a TTL to the +//! next budget reset, so they expire into a `Del` event that removes them; fraud holds have no TTL +//! (sticky). This struct only reflects current membership. + +use std::collections::HashMap; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DenyReason { + /// Over budget. Typically written with a TTL to the next reset → auto-restores. + Spend, + /// Abuse / fraud. Sticky (no TTL) until a human clears it. + Fraud, + /// Reason not recognized in the entry value — still denied (fail safe on the enforce side). + Unknown, +} + +impl DenyReason { + /// HTTP status to return. 402 Payment Required for spend, 403 Forbidden for fraud/other — + /// gives the client (and our own dashboards) a meaningful signal without leaking detail. + pub fn http_status(self) -> u16 { + match self { + DenyReason::Spend => 402, + DenyReason::Fraud | DenyReason::Unknown => 403, + } + } +} + +#[derive(Debug, Default, Clone)] +pub struct DenySet { + denied: HashMap, +} + +impl DenySet { + pub fn new() -> Self { + Self::default() + } + + /// Default-allow: absence from the set = allowed. This is the safe-for-availability default — + /// a tenant we've never heard of is served, not blocked. + pub fn is_denied(&self, tenant_id: u64) -> bool { + self.denied.contains_key(&tenant_id) + } + + pub fn reason(&self, tenant_id: u64) -> Option { + self.denied.get(&tenant_id).copied() + } + + pub fn insert(&mut self, tenant_id: u64, reason: DenyReason) { + self.denied.insert(tenant_id, reason); + } + + pub fn remove(&mut self, tenant_id: u64) { + self.denied.remove(&tenant_id); + } + + pub fn len(&self) -> usize { + self.denied.len() + } + + pub fn is_empty(&self) -> bool { + self.denied.is_empty() + } +} + +impl FromIterator<(u64, DenyReason)> for DenySet { + fn from_iter>(iter: I) -> Self { + Self { + denied: iter.into_iter().collect(), + } + } +} + +/// Parse a slipstream deny key `blackhole.{tenant_id}` → tenant id. Returns `None` for keys that +/// don't match (so an unrelated watched key never corrupts the set). +pub fn parse_key(key: &str) -> Option { + key.strip_prefix("blackhole.")?.parse().ok() +} + +/// Parse the entry value into a reason. Accepts either a bare token (`spend`/`fraud`) or a JSON +/// object `{"reason":"spend", ...}`. Anything else → `Unknown` (still denied — fail safe). +pub fn parse_reason(value: &[u8]) -> DenyReason { + let s = std::str::from_utf8(value).unwrap_or("").trim(); + // The JSON branch must own its extracted reason (it's borrowed from a temporary `Value`); the + // bare-token branch matches the borrowed `&str` directly — no allocation on the common path. + let json_reason: Option; + let token: &str = if s.starts_with('{') { + json_reason = serde_json::from_slice::(value) + .ok() + .and_then(|v| v.get("reason").and_then(|r| r.as_str()).map(str::to_owned)); + json_reason.as_deref().unwrap_or("") + } else { + s + }; + match token { + "spend" => DenyReason::Spend, + "fraud" => DenyReason::Fraud, + _ => DenyReason::Unknown, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_allows_unknown_tenants() { + let set = DenySet::new(); + assert!(!set.is_denied(12345)); + } + + #[test] + fn insert_remove_and_reason() { + let mut set = DenySet::new(); + set.insert(1, DenyReason::Spend); + set.insert(2, DenyReason::Fraud); + assert!(set.is_denied(1)); + assert_eq!(set.reason(1), Some(DenyReason::Spend)); + assert_eq!(set.reason(2).unwrap().http_status(), 403); + set.remove(1); + assert!(!set.is_denied(1)); // restored + assert_eq!(set.len(), 1); + } + + #[test] + fn key_parsing() { + assert_eq!(parse_key("blackhole.42"), Some(42)); + assert_eq!(parse_key("blackhole.notanumber"), None); + assert_eq!(parse_key("signkey.1"), None); + } + + #[test] + fn reason_parsing_bare_and_json() { + assert_eq!(parse_reason(b"spend"), DenyReason::Spend); + assert_eq!(parse_reason(b" fraud "), DenyReason::Fraud); + assert_eq!( + parse_reason(br#"{"reason":"spend","exp":123}"#), + DenyReason::Spend + ); + assert_eq!(parse_reason(b"weird"), DenyReason::Unknown); + } + + #[test] + fn spend_is_402_fraud_is_403() { + assert_eq!(DenyReason::Spend.http_status(), 402); + assert_eq!(DenyReason::Fraud.http_status(), 403); + } +} diff --git a/src/doctor.rs b/src/doctor.rs new file mode 100644 index 0000000..3effe4e --- /dev/null +++ b/src/doctor.rs @@ -0,0 +1,227 @@ +//! Diagnostics (PATTERNS.md `doctor` pattern): fast prerequisite checks, exit 0/1. +//! +//! The point is to catch a misconfiguration *before* traffic lands on the instance, where it would +//! otherwise surface as a first-request failure (a 401 from an empty keyring, a 503 from a missing +//! pool key, a 502 from an unresolvable provider). We check the things boot does lazily or never: +//! NATS reachability, the signing keyring, managed pool keys, and provider DNS. + +use crate::config::AiConfig; +use crate::route; +use std::collections::BTreeMap; +use std::time::Duration; + +pub struct CheckResult { + pub name: &'static str, + pub passed: bool, + pub message: String, + pub hint: Option, +} + +fn pass(name: &'static str, message: impl Into) -> CheckResult { + CheckResult { + name, + passed: true, + message: message.into(), + hint: None, + } +} + +fn fail(name: &'static str, message: impl Into, hint: &str) -> CheckResult { + CheckResult { + name, + passed: false, + message: message.into(), + hint: Some(hint.to_string()), + } +} + +pub async fn run_checks(config: &AiConfig) -> Vec { + let mut out = Vec::new(); + + // NATS / slipstream reachability — without it we can't load signing keys or the deny-set. + match store::nats_connect( + &config.nats_url, + config.nats_creds.as_ref().map(|s| s.expose()), + config.nats_creds_file.as_deref(), + ) + .await + { + Ok(_) => out.push(pass("nats", format!("connected to {}", config.nats_url))), + Err(e) => out.push(fail( + "nats", + e.to_string(), + "check AI_NATS_URL and credentials", + )), + } + + out.push(check_signing_keys(config)); + out.push(check_pool_keys(config)); + out.extend(check_provider_dns(config).await); + + out +} + +/// The signing keyring is what authenticates managed traffic. An empty or invalid keyring isn't a +/// hard boot failure (the gateway still serves BYO), but it silently turns *every* `bai_…` key into a +/// 401 — a footgun worth surfacing loudly here. `build_keyring` already rejects a non-numeric kid or +/// an unparseable public key, so a success means every configured key installed. +fn check_signing_keys(config: &AiConfig) -> CheckResult { + match config.build_keyring() { + Ok(ring) if ring.is_empty() => fail( + "signing_keys", + "no signing keys configured — all managed (bai_…) traffic will 401, only BYO works", + "set [signing_keys] (kid → base64 Ed25519 public key) in config or AI_ env", + ), + Ok(ring) => pass( + "signing_keys", + format!("{} signing key(s) loaded", ring.len()), + ), + Err(e) => fail( + "signing_keys", + e.to_string(), + "every kid must be numeric and every value a base64 (or raw 32-byte) Ed25519 public key", + ), + } +} + +/// Pool keys back managed traffic (swapped in per provider). Cross-check against the keyring: if +/// signing keys are present the operator *intends* to serve managed traffic, so zero pool keys means +/// every managed request 503s — a real misconfiguration. A pure-BYO deployment (no signing keys) with +/// no pool keys is legitimate, so that case passes with a note instead of failing. +fn check_pool_keys(config: &AiConfig) -> CheckResult { + let mut names: Vec<&str> = config.pool_keys.keys().map(String::as_str).collect(); + names.sort_unstable(); + let managed_intended = !config.signing_keys.is_empty(); + match (names.is_empty(), managed_intended) { + (true, true) => fail( + "pool_keys", + "signing keys are configured (managed traffic expected) but no pool keys are set — \ + every managed request will 503", + "set AI_POOL_KEY_ (e.g. AI_POOL_KEY_OPENAI) for each provider you serve", + ), + (true, false) => pass( + "pool_keys", + "none configured (BYO-only deployment — no signing keys either)", + ), + (false, _) => pass("pool_keys", format!("pool keys for: {}", names.join(", "))), + } +} + +/// Resolve every provider authority the gateway might dial (known providers + config overrides/adds), +/// so a DNS or typo'd-authority problem shows up here rather than as a 502 on the first request. Each +/// lookup is bounded so one black-holed host can't hang the doctor. We don't connect (no auth, no TLS +/// handshake) — reachability of the *name* is the prerequisite; live auth is proven by the smoke test. +async fn check_provider_dns(config: &AiConfig) -> Vec { + // Effective authority per provider name: the known default unless config overrides it, plus any + // config-only provider. A BTreeMap dedups and keeps the output stable/ordered. + let mut authorities: BTreeMap<&str, String> = BTreeMap::new(); + for spec in route::KNOWN_PROVIDERS { + authorities.insert(spec.name, spec.authority.to_string()); + } + for (name, authority) in &config.provider_authorities { + authorities.insert(name.as_str(), authority.clone()); + } + + let mut results = Vec::with_capacity(authorities.len()); + for (name, authority) in authorities { + // `CheckResult.name` is `&'static str`: a known provider lends its static name; a config-only + // provider (non-'static) reports under a generic label, with the real name in the message. + let check_name: &'static str = route::KNOWN_PROVIDERS + .iter() + .find(|s| s.name == name) + .map_or("provider_dns", |s| s.name); + let lookup = tokio::time::timeout( + Duration::from_secs(3), + tokio::net::lookup_host(authority.clone()), + ) + .await; + let res = match lookup { + Ok(Ok(mut addrs)) => match addrs.next() { + Some(addr) => pass(check_name, format!("{name} → {authority} ({addr})")), + None => fail( + check_name, + format!("{name}: {authority} resolved to no addresses"), + "check the provider authority (host:port) in provider_authorities", + ), + }, + Ok(Err(e)) => fail( + check_name, + format!("{name}: {authority}: {e}"), + "check the provider authority (host:port) and DNS", + ), + Err(_) => fail( + check_name, + format!("{name}: {authority}: DNS lookup timed out (>3s)"), + "the upstream host may be unreachable or DNS is slow", + ), + }; + results.push(res); + } + results +} + +pub fn print_results(title: &str, results: &[CheckResult]) { + println!("== {title} =="); + for r in results { + let mark = if r.passed { "ok" } else { "FAIL" }; + println!("[{mark}] {}: {}", r.name, r.message); + if let (false, Some(hint)) = (r.passed, &r.hint) { + println!(" hint: {hint}"); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::secret::Secret; + use std::collections::HashMap; + + #[test] + fn signing_keys_empty_fails() { + // No keys ⇒ every managed token 401s; doctor must flag it, not pass silently. + let c = AiConfig::default(); + assert!(!check_signing_keys(&c).passed); + } + + #[test] + fn signing_keys_valid_passes() { + let c = AiConfig { + // 32 zero bytes, base64 — a structurally valid Ed25519 public key. + signing_keys: HashMap::from([( + "1".to_string(), + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA".to_string(), + )]), + ..Default::default() + }; + assert!(check_signing_keys(&c).passed); + } + + #[test] + fn pool_keys_missing_with_signing_keys_fails() { + // Signing keys present (managed intended) but no pool keys ⇒ every managed request 503s. + let c = AiConfig { + signing_keys: HashMap::from([( + "1".to_string(), + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA".to_string(), + )]), + ..Default::default() + }; + assert!(!check_pool_keys(&c).passed); + } + + #[test] + fn pool_keys_absent_byo_only_passes() { + // No signing keys and no pool keys is a legitimate BYO-only deployment — must not fail. + assert!(check_pool_keys(&AiConfig::default()).passed); + } + + #[test] + fn pool_keys_present_passes() { + let c = AiConfig { + pool_keys: HashMap::from([("openai".to_string(), Secret::new("sk-x"))]), + ..Default::default() + }; + assert!(check_pool_keys(&c).passed); + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..9f841ea --- /dev/null +++ b/src/error.rs @@ -0,0 +1,15 @@ +//! Structured error type (PATTERNS.md convention: `thiserror` enum, `From` for foreign errors). + +#[derive(Debug, thiserror::Error)] +pub enum GatewayError { + #[error("configuration error: {0}")] + Config(String), + + #[error("store error: {0}")] + Store(#[from] store::KvError), + + #[error("dns resolution error: {0}")] + Dns(String), +} + +pub type Result = std::result::Result; diff --git a/src/key.rs b/src/key.rs new file mode 100644 index 0000000..4cdce56 --- /dev/null +++ b/src/key.rs @@ -0,0 +1,363 @@ +//! Stateless virtual API key: `bai_v1.{kid}.{payload}.{sig}`. +//! +//! The gateway authenticates every request from a `{payload}` it can verify **without a +//! lookup**: tenant/app identity lives *inside* the token, signed with Ed25519. We hold only +//! the *public* keys (by `kid`), so a compromised — or third-party / OSS — gateway can verify +//! but **cannot mint** new tenant keys; the private signing key lives only in the control plane. +//! +//! Why signed-token instead of opaque-token + registry lookup: at millions of tenants we don't +//! want a per-request lookup (latency + a state dependency) just to learn *who* is calling. +//! Identity is stateless here; the only per-request state is the sparse deny-set (see `deny`), +//! which is a membership check, not an identity lookup. +//! +//! Why deterministic (no nonce/timestamp in the payload): `mint(tenant, app)` is reproducible, +//! so the control plane can re-derive a tenant's key on demand and store nothing. Revocation is +//! handled out-of-band by the deny-set, not by per-key expiry. + +use base64::Engine; +use base64::engine::general_purpose::URL_SAFE_NO_PAD; +use ed25519_dalek::{Signature, Signer, SigningKey, Verifier, VerifyingKey}; +use std::collections::HashMap; + +/// Wire prefix + version. Bumping the version is a breaking change to the token format; +/// the version is inside the signed bytes so it cannot be downgraded by an attacker. +pub const PREFIX: &str = "bai_v1"; + +/// Signing-key identifier. Lets the control plane rotate signing keys: new tokens are minted +/// under a new `kid` while the gateway still trusts the public keys of older, un-retired `kid`s. +pub type Kid = u32; + +/// The identity carried by (and the entire contents of) a virtual key. +/// +/// `tenant_id`/`vpc_id` are `u64` to match the platform's id width (cf. ClickHouse +/// `tenant_id UInt64` / `vpc_id UInt64`) and to keep the payload a fixed 16 bytes. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct VirtualKey { + pub tenant_id: u64, + pub vpc_id: u64, +} + +impl VirtualKey { + /// Fixed 16-byte little-endian payload: `tenant_id ++ vpc_id`. Fixed layout (not JSON) so + /// the encoding is deterministic byte-for-byte — required for `mint` to be reproducible. + fn encode_payload(&self) -> [u8; 16] { + let mut out = [0u8; 16]; + out[..8].copy_from_slice(&self.tenant_id.to_le_bytes()); + out[8..].copy_from_slice(&self.vpc_id.to_le_bytes()); + out + } + + fn decode_payload(bytes: &[u8]) -> Option { + if bytes.len() != 16 { + return None; + } + Some(Self { + tenant_id: u64::from_le_bytes(bytes[..8].try_into().ok()?), + vpc_id: u64::from_le_bytes(bytes[8..].try_into().ok()?), + }) + } +} + +#[derive(Debug, thiserror::Error, PartialEq, Eq)] +pub enum KeyError { + #[error("malformed virtual key")] + Malformed, + #[error("unsupported key version")] + BadVersion, + #[error("unknown signing key id {0}")] + UnknownKid(Kid), + #[error("signature verification failed")] + BadSignature, +} + +/// The set of trusted Ed25519 public keys, indexed by `kid`. Built once at boot from config +/// (`signing_keys`); multiple kids may be trusted at once for zero-downtime rotation via redeploy. +#[derive(Debug, Default, Clone)] +pub struct Keyring { + keys: HashMap, +} + +impl Keyring { + pub fn new() -> Self { + Self::default() + } + + pub fn insert(&mut self, kid: Kid, key: VerifyingKey) { + self.keys.insert(kid, key); + } + + pub fn get(&self, kid: Kid) -> Option<&VerifyingKey> { + self.keys.get(&kid) + } + + pub fn remove(&mut self, kid: Kid) { + self.keys.remove(&kid); + } + + pub fn len(&self) -> usize { + self.keys.len() + } + + pub fn is_empty(&self) -> bool { + self.keys.is_empty() + } + + /// Verify a virtual key string and extract its identity. Stateless: the only input besides + /// the token is the public keyring. + pub fn verify(&self, token: &str) -> Result { + // Split into exactly 4 parts: `bai_v1`, kid, payload, sig. `splitn(4, '.')` rejects any + // token with fewer separators; a payload/sig never contains '.' (base64url has none). + let mut parts = token.splitn(4, '.'); + let prefix = parts.next().ok_or(KeyError::Malformed)?; + let kid_str = parts.next().ok_or(KeyError::Malformed)?; + let payload_b64 = parts.next().ok_or(KeyError::Malformed)?; + let sig_b64 = parts.next().ok_or(KeyError::Malformed)?; + + if prefix != PREFIX { + // Distinguish "wrong version of our token" from "not our token at all" only loosely; + // both are unauthenticated. A `bai_vN` with N != 1 reports BadVersion for clarity. + return if prefix.starts_with("bai_v") { + Err(KeyError::BadVersion) + } else { + Err(KeyError::Malformed) + }; + } + + let kid: Kid = kid_str.parse().map_err(|_| KeyError::Malformed)?; + + // Decode the fixed-size fields straight onto the stack — no per-request heap allocation on + // the verify hot path. The payload is always 16 bytes, the signature 64. `decode_slice` + // sizes its bounds check against a (ceil) estimate, so the buffers are a few bytes larger + // than the exact decoded length; we slice to what was actually written and the fixed-size + // checks below reject anything off (an oversized field overruns the estimate → Malformed). + let mut payload_buf = [0u8; 24]; // ≥ estimate for a 22-char (16-byte) payload + let plen = URL_SAFE_NO_PAD + .decode_slice(payload_b64, &mut payload_buf) + .map_err(|_| KeyError::Malformed)?; + let payload = &payload_buf[..plen]; + + let mut sig_buf = [0u8; 72]; // ≥ estimate for an 86-char (64-byte) signature + let slen = URL_SAFE_NO_PAD + .decode_slice(sig_b64, &mut sig_buf) + .map_err(|_| KeyError::Malformed)?; + let sig_arr: [u8; 64] = sig_buf[..slen] + .try_into() + .map_err(|_| KeyError::Malformed)?; + let signature = Signature::from_bytes(&sig_arr); + + // Resolve the public key *before* the cryptographic check so an unknown kid is a distinct, + // cheap rejection (no signature math on keys we don't trust). + let vk = self.get(kid).ok_or(KeyError::UnknownKid(kid))?; + + // The signed message binds version + kid + payload, so none can be swapped independently. + // Build it into a stack buffer (≤ 40 bytes) — no allocation per verify. A payload longer + // than the buffer can hold can't be a valid 16-byte payload anyway, so it's `Malformed` + // rather than a panic on this per-request hot path. + let mut signed_buf = [0u8; SIGNED_BYTES_CAP]; + let signed = + write_signed_bytes(&mut signed_buf, kid, payload_b64).ok_or(KeyError::Malformed)?; + vk.verify(signed, &signature) + .map_err(|_| KeyError::BadSignature)?; + + VirtualKey::decode_payload(payload).ok_or(KeyError::Malformed) + } +} + +/// Upper bound on `bai_v1.{kid}.{payload}`: `PREFIX` (6) + `.` + a `u32` kid (≤ 10 digits) + `.` +/// + a 16-byte base64url payload (22 chars) = 40 bytes. 64 leaves headroom. +const SIGNED_BYTES_CAP: usize = 64; + +/// Write the signature-covered bytes `bai_v1.{kid}.{payload}` into `buf`, returning the written +/// slice — or `None` if they don't fit in `SIGNED_BYTES_CAP`. Binding kid + payload here is what +/// stops an attacker from re-pointing a valid signature at a different kid or a tampered payload. +/// For a well-formed key the length is bounded (≤ 40 bytes; see `SIGNED_BYTES_CAP`), so `None` +/// means the input was malformed — `write!` returns `WriteZero` rather than panicking or +/// truncating, keeping the verify hot path allocation- *and* panic-free. +fn write_signed_bytes<'a>( + buf: &'a mut [u8; SIGNED_BYTES_CAP], + kid: Kid, + payload_b64: &str, +) -> Option<&'a [u8]> { + use std::io::Write; + let mut cur = std::io::Cursor::new(&mut buf[..]); + write!(cur, "{PREFIX}.{kid}.{payload_b64}").ok()?; + let n = cur.position() as usize; + Some(&buf[..n]) +} + +/// Parse an Ed25519 public key from a slipstream `signkey.*` value: accept raw 32 bytes or +/// base64 (standard or url-safe) of 32 bytes, so the control plane can store whichever form. +pub fn verifying_key_from_value(bytes: &[u8]) -> Option { + if let Ok(arr) = <[u8; 32]>::try_from(bytes) { + return VerifyingKey::from_bytes(&arr).ok(); + } + let s = std::str::from_utf8(bytes).ok()?.trim(); + for decoded in [ + base64::engine::general_purpose::STANDARD.decode(s).ok(), + URL_SAFE_NO_PAD.decode(s).ok(), + ] + .into_iter() + .flatten() + { + if let Ok(arr) = <[u8; 32]>::try_from(decoded.as_slice()) { + return VerifyingKey::from_bytes(&arr).ok(); + } + } + None +} + +/// Mint a virtual key. Lives here for tests + determinism checks and as the reference +/// implementation; production minting is the Go control plane (`crypto/ed25519`), which must +/// produce byte-identical output for the same inputs. +#[allow(clippy::expect_used)] // payload is a fixed 22-char base64 of 16 bytes; always fits the cap +pub fn mint(vk: &VirtualKey, kid: Kid, signing_key: &SigningKey) -> String { + let payload_b64 = URL_SAFE_NO_PAD.encode(vk.encode_payload()); + let mut signed_buf = [0u8; SIGNED_BYTES_CAP]; + // mint builds the payload itself (a fixed 22-char base64 of 16 bytes) from controlled inputs, + // so it always fits; this `expect` is a true invariant assertion, not a fallible runtime path. + let signed = write_signed_bytes(&mut signed_buf, kid, &payload_b64) + .expect("minted signed bytes fit in SIGNED_BYTES_CAP"); + let sig: Signature = signing_key.sign(signed); + let sig_b64 = URL_SAFE_NO_PAD.encode(sig.to_bytes()); + format!("{PREFIX}.{kid}.{payload_b64}.{sig_b64}") +} + +#[cfg(test)] +mod tests { + use super::*; + + // Deterministic test keypair from a fixed seed — avoids an RNG dep and keeps tests reproducible. + fn test_keypair(seed: u8) -> (SigningKey, VerifyingKey) { + let sk = SigningKey::from_bytes(&[seed; 32]); + let vk = sk.verifying_key(); + (sk, vk) + } + + fn ring_with(kid: Kid, vk: VerifyingKey) -> Keyring { + let mut r = Keyring::new(); + r.insert(kid, vk); + r + } + + #[test] + fn mint_then_verify_roundtrips_identity() { + let (sk, vk) = test_keypair(1); + let ring = ring_with(7, vk); + let id = VirtualKey { + tenant_id: 42, + vpc_id: 99, + }; + + let token = mint(&id, 7, &sk); + assert_eq!(ring.verify(&token).unwrap(), id); + } + + #[test] + fn mint_is_deterministic() { + let (sk, _) = test_keypair(2); + let id = VirtualKey { + tenant_id: 1, + vpc_id: 2, + }; + // Ed25519 is deterministic (RFC 8032) and the payload has no nonce, so two mints match. + assert_eq!(mint(&id, 1, &sk), mint(&id, 1, &sk)); + } + + #[test] + fn tampered_payload_is_rejected() { + let (sk, vk) = test_keypair(3); + let ring = ring_with(1, vk); + let token = mint( + &VirtualKey { + tenant_id: 10, + vpc_id: 20, + }, + 1, + &sk, + ); + + // Flip a byte in the payload segment; the signature no longer covers it. + let mut parts: Vec<&str> = token.split('.').collect(); + let mut payload = URL_SAFE_NO_PAD.decode(parts[2]).unwrap(); + payload[0] ^= 0xff; + let tampered_payload = URL_SAFE_NO_PAD.encode(&payload); + parts[2] = &tampered_payload; + let tampered = parts.join("."); + + assert_eq!(ring.verify(&tampered), Err(KeyError::BadSignature)); + } + + #[test] + fn tampered_signature_is_rejected() { + let (sk, vk) = test_keypair(4); + let ring = ring_with(1, vk); + let token = mint( + &VirtualKey { + tenant_id: 5, + vpc_id: 6, + }, + 1, + &sk, + ); + + let mut sig = URL_SAFE_NO_PAD + .decode(token.rsplit('.').next().unwrap()) + .unwrap(); + sig[0] ^= 0xff; + let bad_sig = URL_SAFE_NO_PAD.encode(&sig); + let base = &token[..token.rfind('.').unwrap()]; + let tampered = format!("{base}.{bad_sig}"); + + assert_eq!(ring.verify(&tampered), Err(KeyError::BadSignature)); + } + + #[test] + fn unknown_kid_is_rejected_without_crypto() { + let (sk, vk) = test_keypair(5); + let ring = ring_with(1, vk); // trusts kid=1 only + let token = mint( + &VirtualKey { + tenant_id: 1, + vpc_id: 1, + }, + 2, + &sk, + ); // minted under kid=2 + assert_eq!(ring.verify(&token), Err(KeyError::UnknownKid(2))); + } + + #[test] + fn signature_from_a_different_kid_is_rejected() { + // A valid signature minted under kid=2 must not verify when presented as kid=1, even if + // the gateway trusts both — because kid is part of the signed bytes. + let (sk1, vk1) = test_keypair(6); + let (sk2, vk2) = test_keypair(7); + let mut ring = Keyring::new(); + ring.insert(1, vk1); + ring.insert(2, vk2); + + let id = VirtualKey { + tenant_id: 3, + vpc_id: 4, + }; + let token2 = mint(&id, 2, &sk2); + // Re-label the kid segment as 1 while keeping kid=2's signature. + let parts: Vec<&str> = token2.split('.').collect(); + let relabeled = format!("{}.1.{}.{}", parts[0], parts[2], parts[3]); + assert_eq!(ring.verify(&relabeled), Err(KeyError::BadSignature)); + let _ = sk1; + } + + #[test] + fn malformed_and_version_errors() { + let (_, vk) = test_keypair(8); + let ring = ring_with(1, vk); + assert_eq!(ring.verify("garbage"), Err(KeyError::Malformed)); + assert_eq!(ring.verify("bai_v1.1.only-three"), Err(KeyError::Malformed)); + assert_eq!(ring.verify("bai_v2.1.aaaa.bbbb"), Err(KeyError::BadVersion)); + assert_eq!( + ring.verify("sk-openai.1.aaaa.bbbb"), + Err(KeyError::Malformed) + ); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..bfdf419 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,32 @@ +//! Beyond AI gateway library. +//! +//! `src/main.rs` wires these modules into a Pingora `ProxyHttp` service. The load-bearing logic +//! (virtual-key verification, deny-set, usage parsing, routing, request peek) lives in modules +//! free of Pingora/IO so it is unit-tested without a running proxy or live providers. + +// Lint gates (`unsafe_code = "forbid"`, `unused_must_use = "deny"`) live in `[workspace.lints]` so +// they apply to *both* crate roots — this lib and the `main.rs` binary — not just whichever unit +// carries a crate-level `#![deny]`. A dropped `Result` (e.g. an unchecked `write_response_*`) is +// therefore a hard error, and `unsafe` is forbidden, everywhere in the crate. +// +// `unwrap_used`/`expect_used`/`panic` are denied in production code (see `[workspace.lints.clippy]`) +// but a unit test's whole job is to assert a precondition holds — `.unwrap()` *is* the assertion — so +// allow them in `#[cfg(test)]` modules. +#![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used, clippy::panic))] + +pub mod admin; +pub mod circuit_breaker; +pub mod config; +pub mod deny; +pub mod doctor; +pub mod error; +pub mod key; +pub mod metrics; +pub mod peek; +pub mod proxy; +pub mod ratelimit; +pub mod route; +pub mod secret; +pub mod state; +pub mod store_watch; +pub mod usage; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..6d77699 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,167 @@ +//! Beyond AI gateway binary: clap `Run`/`Doctor`, Pingora server bootstrap, services. + +// See `lib.rs`: deny the panic surface in production, allow it in `#[cfg(test)]` assertions. +#![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used, clippy::panic))] + +use beyond_ai::admin::AdminApp; +use beyond_ai::config::AiConfig; +use beyond_ai::doctor; +use beyond_ai::metrics::Metrics; +use beyond_ai::proxy::AiProxy; +use beyond_ai::state::GatewayState; +use beyond_ai::store_watch::WatcherService; +use clap::{Parser, Subcommand}; +use pingora_core::apps::http_app::HttpServer; +use pingora_core::server::Server; +use pingora_core::server::configuration::ServerConf; +use pingora_core::services::background::background_service; +use pingora_core::services::listening::Service as ListeningService; +use pingora_proxy::http_proxy_service; +use std::path::Path; +use std::process::exit; +use tracing_subscriber::EnvFilter; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; + +#[derive(Parser)] +#[command( + name = "beyond-ai", + about = "Beyond AI gateway — egress proxy to LLM providers" +)] +struct Cli { + /// Path to config file (defaults to ./config.toml). + #[arg(short, long, env = "AI_CONFIG_PATH", global = true)] + config: Option, + + #[command(subcommand)] + command: Option, +} + +#[derive(Subcommand)] +enum Commands { + /// Run prerequisite diagnostics and exit. + Doctor, + /// Start the gateway (default). + Run, +} + +fn load_config(path: Option<&Path>) -> AiConfig { + match AiConfig::load_with_path(path) { + Ok(c) => c, + Err(e) => { + eprintln!("failed to load config: {e}"); + exit(1); + } + } +} + +fn init_tracing() { + // JSON to stdout; the `ai.usage` target carries billing facts that logfwd/OTLP ships to + // ClickHouse. `AI_LOG` overrides the level filter. + let filter = EnvFilter::try_from_env("AI_LOG").unwrap_or_else(|_| EnvFilter::new("info")); + tracing_subscriber::registry() + .with(tracing_subscriber::fmt::layer().json()) + .with(filter) + .init(); +} + +// Boot path: every `.expect()` here is a fatal start-up invariant (no runtime to build, no Pingora +// server) — a panic before we serve a single request is the correct, visible failure. +#[allow(clippy::expect_used)] +fn main() { + // rustls 0.23 requires a process-wide crypto provider for the TLS connections to providers. + // Idempotent: an `Err` means a provider is already installed (e.g. a second init in tests), + // which is fine to ignore — the provider we want is in place either way. + let _ = rustls::crypto::ring::default_provider().install_default(); + + let cli = Cli::parse(); + + // Doctor runs before any server setup (minimal current-thread runtime), exits 0/1. + if matches!(cli.command, Some(Commands::Doctor)) { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("runtime"); + let config = load_config(cli.config.as_deref()); + let results = rt.block_on(doctor::run_checks(&config)); + doctor::print_results("Beyond AI Gateway Doctor", &results); + exit(if results.iter().all(|r| r.passed) { + 0 + } else { + 1 + }); + } + + init_tracing(); + let config = load_config(cli.config.as_deref()); + let listen = config.listen.clone(); + let metrics_listen = config.metrics_listen.clone(); + // Capture the shutdown knobs before `config` is moved into the gateway state below. + let grace_period_secs = config.shutdown_grace_period_secs; + let runtime_timeout_secs = config.shutdown_runtime_timeout_secs; + let metrics = match Metrics::new() { + Ok(m) => m, + Err(e) => { + eprintln!("failed to register metrics: {e}"); + exit(1); + } + }; + let state = match GatewayState::new(config, metrics) { + Ok(s) => s, + Err(e) => { + eprintln!("failed to build gateway state: {e}"); + exit(1); + } + }; + + // Make the graceful-shutdown drain window explicit instead of inheriting Pingora's silent + // defaults (300s grace / 5s runtime teardown). `grace_period_seconds` is how long in-flight + // requests get to finish after SIGTERM before teardown; `graceful_shutdown_timeout_seconds` is + // the final runtime-exit backstop. See the `AiConfig` field docs for the read_timeout / + // orchestrator-stopTimeout tradeoffs. + let conf = ServerConf { + grace_period_seconds: Some(grace_period_secs), + graceful_shutdown_timeout_seconds: Some(runtime_timeout_secs), + ..ServerConf::default() + }; + let mut server = Server::new_with_opt_and_conf(None, conf); + server.bootstrap(); + + // Client (app) traffic. + let mut proxy_svc = http_proxy_service( + &server.configuration, + AiProxy { + state: state.clone(), + }, + ); + proxy_svc.add_tcp(&listen); + server.add_service(proxy_svc); + + // slipstream watchers + NATS connectivity (connects on Pingora's runtime; see WatcherService). + server.add_service(background_service( + "ai-watchers", + WatcherService { + state: state.clone(), + }, + )); + + // Metrics listener now also serves /livez + /readyz for the ECS/k8s probes. Pingora's built-in + // prometheus service only does /metrics, so we hand-route all three in one small ServeHttp. + let mut admin = ListeningService::new( + "ai-admin".to_string(), + HttpServer::new_app(AdminApp { + metrics: state.metrics.clone(), + }), + ); + admin.add_tcp(&metrics_listen); + server.add_service(admin); + + tracing::info!( + %listen, + %metrics_listen, + grace_period_secs, + runtime_timeout_secs, + "starting beyond-ai" + ); + server.run_forever(); +} diff --git a/src/metrics.rs b/src/metrics.rs new file mode 100644 index 0000000..b3ac19e --- /dev/null +++ b/src/metrics.rs @@ -0,0 +1,258 @@ +//! Prometheus metrics (PATTERNS.md: `Arc`). +//! +//! Registered on the **default** registry so Pingora's built-in `prometheus_http_service` +//! exposes them with no extra wiring. `Metrics::new` is called exactly once (in `main`). + +use prometheus::{ + Histogram, HistogramOpts, HistogramVec, IntCounter, IntCounterVec, IntGauge, Opts, + default_registry, +}; +use std::sync::Arc; + +pub struct Metrics { + pub requests_total: IntCounter, + /// Labeled by reason ("auth", "deny_spend", "deny_fraud") so we can see *why* we rejected. + pub rejections_total: IntCounterVec, + /// Upstream responses by provider + status class ("2xx"/"4xx"/"5xx"). A provider degrading + /// (429/5xx) is otherwise invisible until it surfaces as latency or missing usage events — + /// this is the per-provider error-rate signal an oncall pages on. + pub upstream_responses_total: IntCounterVec, + /// Upstream **connect** retries by provider (see `proxy::fail_to_connect`). A partially-down + /// provider TCP layer (or an egress-IP ban) silently retries up to `MAX_CONNECT_RETRIES` times + /// per request; without this, the extra latency looks like a slow provider, not a connect + /// problem. Pairs with a `warn!` on the same path so the dashboard spike has a log to grep. + pub connect_retries_total: IntCounterVec, + /// Labeled by kind: input|output|cache_read|cache_write. Cache tokens are also in the `ai.usage` + /// billing log, but that ships with lag — the Prometheus counter is the alerting surface for + /// "cache hit rate fell off a cliff after a deploy" (cache write ≈ 3× input, cache read ≈ 0.1×, + /// so a regression is a real cost event, not just a latency one). + pub tokens_total: IntCounterVec, + /// The four `tokens_total` children, resolved once at boot. The label set (`input`/`output`/ + /// `cache_read`/`cache_write`) is fixed and known at compile time, so we pay the + /// `with_label_values` map lookup once here instead of four times per metered response. + pub tokens_input: IntCounter, + pub tokens_output: IntCounter, + pub tokens_cache_read: IntCounter, + pub tokens_cache_write: IntCounter, + /// Labeled by provider: TTFT varies by an order of magnitude across providers (Groq/Cerebras + /// <100ms vs. a large Anthropic/xAI model at seconds), so an unlabeled histogram can't tell you + /// *which* provider's first-token time regressed. + pub ttft_seconds: HistogramVec, + /// Labeled by provider, same rationale as `ttft_seconds`: full-request duration is dominated by + /// the model's generation time, which is per-provider. + pub upstream_latency_seconds: HistogramVec, + pub active_streams: IntGauge, + /// Total in-flight requests (streaming + non-streaming), incremented once a request is admitted + /// in `request_filter` and decremented in `logging`. `active_streams` only covers SSE; under a + /// burst or a stalled upstream this is what distinguishes "high rps, fast upstreams" from + /// "connections piling up" — the difference between a perf blip and a connection-exhaustion + /// incident. + pub requests_in_flight: IntGauge, + /// Current deny-set cardinality (denied tenants). The set is `O(denied)` and fed from NATS; a + /// fraud event or a control-plane bug that mass-denies tenants would otherwise grow it invisibly + /// until it shows up as memory pressure. Updated on every seed and every applied delta. + pub deny_set_size: IntGauge, + /// NATS connectivity for the deny-set watcher (1 = connected, 0 = disconnected). The gateway is + /// fail-open — it serves on the last-known set when NATS is down — so staleness is otherwise + /// silent; this is the metric to alert "deny-set has been stale for >N minutes" on. + pub nats_connected: IntGauge, +} + +/// TTFT buckets (seconds). Tuned for LLM latency: sub-second prompts up through the multi-second +/// first-token times of large models. The default prometheus buckets top out at 10s, but TTFT for a +/// busy model can exceed that, so the tail goes to 30s. +const TTFT_BUCKETS: &[f64] = &[0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0]; + +/// Full-request duration buckets (seconds). A streaming completion runs far longer than the +/// default 10s ceiling (`read_timeout_secs` defaults to 600), so the tail reaches 300s — without +/// these, every long stream lands in `+Inf` and the p99/p999 tail is unrecoverable. +const LATENCY_BUCKETS: &[f64] = &[ + 0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0, 600.0, +]; + +impl Metrics { + /// Build and register every metric on the default registry. Fallible: registering a name that + /// already exists (a second `Metrics::new()` against the process-wide default registry) returns + /// `AlreadyRegisteredError` rather than panicking, so a double-init surfaces as an error the + /// caller can report instead of crashing the process. + pub fn new() -> prometheus::Result> { + let r = default_registry(); + + let requests_total = + IntCounter::with_opts(Opts::new("ai_requests_total", "Total requests handled"))?; + let rejections_total = IntCounterVec::new( + Opts::new("ai_rejections_total", "Requests rejected before upstream"), + &["reason"], + )?; + let upstream_responses_total = IntCounterVec::new( + Opts::new( + "ai_upstream_responses_total", + "Upstream responses by provider and status class", + ), + &["provider", "status"], + )?; + let connect_retries_total = IntCounterVec::new( + Opts::new( + "ai_connect_retries_total", + "Upstream connect retries by provider", + ), + &["provider"], + )?; + let tokens_total = + IntCounterVec::new(Opts::new("ai_tokens_total", "Tokens metered"), &["kind"])?; + // Resolve the fixed-label children once. Created against the (about-to-be-registered) vec, so + // they export normally; the hot path then bumps a direct handle, no per-call label lookup. + let tokens_input = tokens_total.with_label_values(&["input"]); + let tokens_output = tokens_total.with_label_values(&["output"]); + let tokens_cache_read = tokens_total.with_label_values(&["cache_read"]); + let tokens_cache_write = tokens_total.with_label_values(&["cache_write"]); + let ttft_seconds = HistogramVec::new( + HistogramOpts::new("ai_ttft_seconds", "Time to first byte from upstream") + .buckets(TTFT_BUCKETS.to_vec()), + &["provider"], + )?; + let upstream_latency_seconds = HistogramVec::new( + HistogramOpts::new( + "ai_upstream_latency_seconds", + "Full upstream request duration", + ) + .buckets(LATENCY_BUCKETS.to_vec()), + &["provider"], + )?; + let active_streams = IntGauge::with_opts(Opts::new( + "ai_active_streams", + "In-flight streaming responses", + ))?; + let requests_in_flight = IntGauge::with_opts(Opts::new( + "ai_requests_in_flight", + "In-flight requests (streaming + non-streaming)", + ))?; + let deny_set_size = + IntGauge::with_opts(Opts::new("ai_deny_set_size", "Currently denied tenants"))?; + let nats_connected = IntGauge::with_opts(Opts::new( + "ai_nats_connected", + "Deny-set watcher NATS connectivity (1=connected, 0=disconnected)", + ))?; + + r.register(Box::new(requests_total.clone()))?; + r.register(Box::new(rejections_total.clone()))?; + r.register(Box::new(upstream_responses_total.clone()))?; + r.register(Box::new(connect_retries_total.clone()))?; + r.register(Box::new(tokens_total.clone()))?; + r.register(Box::new(ttft_seconds.clone()))?; + r.register(Box::new(upstream_latency_seconds.clone()))?; + r.register(Box::new(active_streams.clone()))?; + r.register(Box::new(requests_in_flight.clone()))?; + r.register(Box::new(deny_set_size.clone()))?; + r.register(Box::new(nats_connected.clone()))?; + + Ok(Arc::new(Self { + requests_total, + rejections_total, + upstream_responses_total, + connect_retries_total, + tokens_total, + tokens_input, + tokens_output, + tokens_cache_read, + tokens_cache_write, + ttft_seconds, + upstream_latency_seconds, + active_streams, + requests_in_flight, + deny_set_size, + nats_connected, + })) + } +} + +/// Per-provider metric handles, resolved once at boot and held on the [`Provider`](crate::route::Provider). +/// +/// Every per-provider metric (`ttft_seconds`, `upstream_latency_seconds`, `upstream_responses_total`, +/// `connect_retries_total`) is keyed on the provider name — a label known at boot from the provider +/// registry. Resolving the child handles here lets the response path bump a direct counter/histogram +/// instead of doing a string-keyed `with_label_values` map lookup on every response. +pub struct ProviderMetrics { + pub ttft_seconds: Histogram, + pub upstream_latency_seconds: Histogram, + pub connect_retries_total: IntCounter, + /// Responses by status class, indexed `[1xx, 2xx, 3xx, 4xx, 5xx]` (see [`Self::record_response`]). + responses: [IntCounter; 5], +} + +impl ProviderMetrics { + /// Resolve the child handles for `provider` from the shared label vecs. Called once per provider + /// at boot (see `state::build_providers`). + pub fn resolve(m: &Metrics, provider: &str) -> Self { + ProviderMetrics { + ttft_seconds: m.ttft_seconds.with_label_values(&[provider]), + upstream_latency_seconds: m.upstream_latency_seconds.with_label_values(&[provider]), + connect_retries_total: m.connect_retries_total.with_label_values(&[provider]), + responses: [ + m.upstream_responses_total + .with_label_values(&[provider, "1xx"]), + m.upstream_responses_total + .with_label_values(&[provider, "2xx"]), + m.upstream_responses_total + .with_label_values(&[provider, "3xx"]), + m.upstream_responses_total + .with_label_values(&[provider, "4xx"]), + m.upstream_responses_total + .with_label_values(&[provider, "5xx"]), + ], + } + } + + /// Count one upstream response, bucketed by status class (`1xx`/`2xx`/`3xx`/`4xx`/`5xx`). + /// A `1xx` (e.g. `100 Continue`, `101 Switching Protocols`) gets its own bucket rather than + /// falling through to `5xx` — providers don't normally emit it, but a misbucketed informational + /// status would otherwise read as a phantom upstream-error spike on the dashboard. + pub fn record_response(&self, status: u16) { + let idx = match status { + 100..=199 => 0, + 200..=299 => 1, + 300..=399 => 2, + 400..=499 => 3, + _ => 4, + }; + self.responses[idx].inc(); + } + + /// Standalone, **unregistered** handles for tests that build a `Provider` without a live registry. + #[cfg(test)] + pub fn disconnected() -> Self { + let counter = || IntCounter::new("t", "t").expect("valid counter opts"); + let hist = + || Histogram::with_opts(HistogramOpts::new("t", "t")).expect("valid histogram opts"); + ProviderMetrics { + ttft_seconds: hist(), + upstream_latency_seconds: hist(), + connect_retries_total: counter(), + responses: [counter(), counter(), counter(), counter(), counter()], + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn record_response_buckets_by_status_class() { + // Lock the index mapping: a 1xx must land in its own bucket, never the 5xx fallback (which + // would read as a phantom upstream-error spike on the provider dashboard). + let pm = ProviderMetrics::disconnected(); + pm.record_response(100); // 1xx + pm.record_response(204); // 2xx + pm.record_response(301); // 3xx + pm.record_response(404); // 4xx + pm.record_response(503); // 5xx + for (idx, status) in [100u16, 204, 301, 404, 503].iter().enumerate() { + assert_eq!( + pm.responses[idx].get(), + 1, + "status {status} landed in the wrong class bucket" + ); + } + } +} diff --git a/src/peek.rs b/src/peek.rs new file mode 100644 index 0000000..c8ac81b --- /dev/null +++ b/src/peek.rs @@ -0,0 +1,509 @@ +//! Streaming, 100%-accurate extraction of the **root-level `model`** from a JSON request body. +//! +//! Both OpenAI and Anthropic require `model` as a top-level field of the request object. We extract +//! it with a structural state machine fed the body chunks *as they stream through* — the body is +//! never buffered or reordered. This is exact (not a byte-heuristic): it tracks nesting depth and +//! string/escape state, so a `"model"` appearing inside a nested object (e.g. a message) or inside +//! a string value is correctly ignored, and field order is irrelevant. Memory is O(1): only short +//! root-level *keys* and the `model` value are accumulated. Large uninteresting string content +//! (system prompts, base64 images) is skipped with a SIMD-accelerated `memchr2` search to the next +//! `"`/`\`, not inspected byte-by-byte — so even a multi-MB request is walked cheaply. + +#[derive(Clone, Copy, PartialEq, Default)] +enum Cap { + #[default] + No, + Key, + ModelValue, +} + +#[derive(Default)] +pub struct ModelScanner { + model: Option, + done: bool, + /// Nesting depth: number of currently-open `{`/`[`. Root object contents are at depth 1. + depth: u32, + root_is_object: bool, + in_string: bool, + escaped: bool, + /// Whether the next root-level string is a key (`{`/`,` → key; `:` → value). + expect_key: bool, + /// The most recent root-level key was exactly `model`. + last_key_is_model: bool, + /// What (if anything) we're accumulating into `cur` for the current string. + cap: Cap, + cur: Vec, +} + +impl ModelScanner { + pub fn new() -> Self { + Self::default() + } + + /// Take the extracted model, if found. (Available as soon as the value is seen.) + pub fn take_model(&mut self) -> Option { + self.model.take() + } + + #[inline] + fn at_root_object(&self) -> bool { + self.depth == 1 && self.root_is_object + } + + pub fn feed(&mut self, bytes: &[u8]) { + if self.done { + return; + } + let mut i = 0; + let n = bytes.len(); + while i < n { + if self.in_string { + // Fast path: the content of a string we don't accumulate (a big base64 image, a long + // prompt, anything nested) — jump straight to the next `"` or `\` with a + // SIMD-accelerated search instead of inspecting every byte. + if self.cap == Cap::No && !self.escaped { + match memchr::memchr2(b'"', b'\\', &bytes[i..]) { + Some(rel) => i += rel, + None => return, // rest of this chunk is skippable string content + } + } + let b = bytes[i]; + i += 1; + if self.escaped { + self.escaped = false; + if self.cap != Cap::No { + self.cur.push(b); + } + } else if b == b'\\' { + self.escaped = true; + } else if b == b'"' { + self.in_string = false; + match self.cap { + Cap::Key => self.last_key_is_model = self.cur == b"model", + Cap::ModelValue => { + // A valid JSON string value is UTF-8; if a malformed/adversarial body + // smuggles non-UTF-8 bytes here we record "unknown" rather than emitting + // a `U+FFFD`-corrupted model into the billing log. Either way we're done. + self.model = Some( + String::from_utf8(std::mem::take(&mut self.cur)) + .unwrap_or_else(|_| "unknown".to_string()), + ); + self.done = true; + return; + } + Cap::No => {} + } + self.cap = Cap::No; + self.cur.clear(); + } else if self.cap != Cap::No { + self.cur.push(b); + } + continue; + } + + let b = bytes[i]; + i += 1; + match b { + b'"' => { + self.in_string = true; + self.cur.clear(); + // Decide whether this string is worth accumulating — only root-object keys and + // the `model` value matter. + self.cap = if self.at_root_object() { + if self.expect_key { + Cap::Key + } else if self.last_key_is_model { + Cap::ModelValue + } else { + Cap::No + } + } else { + Cap::No + }; + } + b'{' => { + if self.depth == 0 { + self.root_is_object = true; + self.expect_key = true; + } + self.depth += 1; + } + b'[' => { + if self.depth == 0 { + self.root_is_object = false; + } + self.depth += 1; + } + b'}' | b']' => self.depth = self.depth.saturating_sub(1), + b':' if self.depth == 1 => self.expect_key = false, + b',' if self.depth == 1 => { + self.expect_key = true; + self.last_key_is_model = false; + } + _ => {} + } + } + } +} + +/// Decide whether an OpenAI **chat** request body needs `stream_options.include_usage` injected, +/// and where. Returns `Some(offset)` — the byte index just after the root object's opening `{`, where +/// the caller splices `"stream_options":{"include_usage":true},` — **only** when the body is a JSON +/// object with a root-level `"stream": true` and **no** root-level `"stream_options"` key. Otherwise +/// `None` (not a stream, options already set, or not an object) → forward unchanged. +/// +/// Why this exists: OpenAI only emits a usage chunk on a stream when the request carries +/// `stream_options.include_usage = true`. A stock client that omits it would stream with no usage, +/// so managed traffic couldn't be metered. We can't ask for it via a header and can't set it in a +/// client SDK we don't control, so the gateway injects it — for every OpenAI streaming chat request, +/// out of the box. +/// +/// Structural (depth + string + escape aware), so a `"stream"` inside a message object or inside a +/// string value never triggers injection — only the genuine root-level field. The returned offset is +/// always inside a non-empty object (a root `"stream"` is present), so the caller always follows the +/// fragment with a comma. +pub fn plan_stream_usage_injection(body: &[u8]) -> Option { + let n = body.len(); + // Cheap pre-filter: injection is only ever needed when a root-level `"stream"` key is present. + // If the substring `"stream"` doesn't occur *anywhere*, the structural answer is unconditionally + // `None`, so a single SIMD `memmem` pass lets us skip the whole walk — the common case, since + // most requests aren't streaming. (The needle is a substring of `"stream_options"` too, so a + // body carrying only stream_options still passes the filter and is correctly resolved to `None` + // by the walk below.) + memchr::memmem::find(body, b"\"stream\"")?; + let mut i = 0; + while i < n && body[i].is_ascii_whitespace() { + i += 1; + } + // Must be a JSON object; anything else (array, scalar, garbage) we never rewrite. + if i >= n || body[i] != b'{' { + return None; + } + let insert_at = i + 1; + + let mut depth = 0u32; + let mut in_string = false; + let mut escaped = false; + let mut expect_key = false; + let mut capturing_key = false; + // Start index (just past the opening `"`) of the root-level key currently being scanned. The + // body is fully in hand, so we slice the key out of it at the closing quote — no accumulation + // buffer, zero-copy. (Escaped keys are sliced raw; since neither `stream` nor `stream_options` + // contains an escape, an escaped key simply doesn't match either needle — the correct answer.) + let mut key_start = 0usize; + // The current root-level key is exactly `stream` (so the next literal is its value). + let mut last_key_is_stream = false; + let mut stream_true = false; + + let mut j = i; + while j < n { + if in_string { + // Fast path: inside a string we're not capturing (any non-root-key string — message + // content, system prompts, base64 images), jump straight to the next `"`/`\` with a + // SIMD search instead of inspecting every byte. Mirrors the skip in `ModelScanner::feed`. + if !capturing_key && !escaped { + match memchr::memchr2(b'"', b'\\', &body[j..]) { + Some(rel) => j += rel, + None => break, // rest of the body is skippable string content + } + } + let b = body[j]; + if escaped { + escaped = false; + } else if b == b'\\' { + escaped = true; + } else if b == b'"' { + in_string = false; + if capturing_key { + capturing_key = false; + // Only root-level (`depth == 1`) keys matter. + if depth == 1 { + let key = &body[key_start..j]; + // A root `stream_options` means the client already controls usage — the + // answer is `None` regardless of anything else in the body, so stop now + // rather than walking the remainder for a result we already know. + if key == b"stream_options" { + return None; + } + last_key_is_stream = key == b"stream"; + } + } + } + j += 1; + continue; + } + let b = body[j]; + match b { + b'"' => { + // A root-level key starts only where one is expected (just after `{` or `,`). + if depth == 1 && expect_key { + capturing_key = true; + key_start = j + 1; // first key byte is just past this opening quote + } else { + capturing_key = false; + } + in_string = true; + } + b'{' => { + depth += 1; + if depth == 1 { + expect_key = true; + } + } + b'[' => depth += 1, + b'}' | b']' => depth = depth.saturating_sub(1), + b':' if depth == 1 => expect_key = false, + b',' if depth == 1 => { + expect_key = true; + last_key_is_stream = false; + } + // The value of a root-level `stream` key: a bare `true` literal. + b't' if depth == 1 && last_key_is_stream => { + if body[j..].starts_with(b"true") { + stream_true = true; + } + last_key_is_stream = false; + } + _ => {} + } + j += 1; + } + + // `stream_options` would have already returned `None` above, so reaching here means it's absent. + if stream_true { Some(insert_at) } else { None } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn scan(body: &[u8]) -> Option { + let mut s = ModelScanner::new(); + s.feed(body); + s.take_model() + } + + #[test] + fn extracts_model_from_sse_first_chunk() { + // The response-side model tap feeds SSE through this same scanner. `data: ` is non-structural + // noise at depth 0, so the scanner reads the first chunk's root `model` — the provider's + // resolved/billed id — and stops. This is what makes the billing model authoritative. + let sse = b"data: {\"id\":\"chatcmpl-x\",\"object\":\"chat.completion.chunk\",\"model\":\"gpt-4o-2024-08-06\",\"choices\":[]}\n\n"; + assert_eq!(scan(sse).as_deref(), Some("gpt-4o-2024-08-06")); + } + + /// Apply `plan_stream_usage_injection` and return the rewritten body (or unchanged if no plan), + /// so tests assert the *resulting* JSON — the thing the upstream actually receives. + fn inject(body: &str) -> String { + match plan_stream_usage_injection(body.as_bytes()) { + Some(at) => { + let frag = br#""stream_options":{"include_usage":true},"#; + let mut out = Vec::with_capacity(body.len() + frag.len()); + out.extend_from_slice(&body.as_bytes()[..at]); + out.extend_from_slice(frag); + out.extend_from_slice(&body.as_bytes()[at..]); + String::from_utf8(out).unwrap() + } + None => body.to_string(), + } + } + + #[test] + fn injects_when_streaming_and_absent() { + let out = inject(r#"{"model":"gpt-4o","stream":true,"messages":[]}"#); + assert_eq!( + out, + r#"{"stream_options":{"include_usage":true},"model":"gpt-4o","stream":true,"messages":[]}"# + ); + // The result must be valid JSON with the option set. + let v: serde_json::Value = serde_json::from_str(&out).unwrap(); + assert_eq!( + v["stream_options"]["include_usage"], + serde_json::json!(true) + ); + } + + #[test] + fn stream_can_be_the_only_or_last_key() { + assert!(plan_stream_usage_injection(br#"{"stream":true}"#).is_some()); + let v: serde_json::Value = + serde_json::from_str(&inject(r#"{"model":"x","stream":true}"#)).unwrap(); + assert_eq!( + v["stream_options"]["include_usage"], + serde_json::json!(true) + ); + } + + #[test] + fn skips_when_options_already_present() { + // Client already asked for usage (in any form) — never touch it. + assert_eq!( + plan_stream_usage_injection( + br#"{"stream":true,"stream_options":{"include_usage":false}}"# + ), + None + ); + // Order-independent: options before stream. + assert_eq!( + plan_stream_usage_injection(br#"{"stream_options":{},"stream":true}"#), + None + ); + } + + #[test] + fn skips_when_not_streaming() { + assert_eq!( + plan_stream_usage_injection(br#"{"model":"x","stream":false}"#), + None + ); + assert_eq!(plan_stream_usage_injection(br#"{"model":"x"}"#), None); + } + + #[test] + fn ignores_nested_or_in_string_stream() { + // `stream` inside a message object is not the root field. + assert_eq!( + plan_stream_usage_injection( + br#"{"messages":[{"role":"u","stream":true}],"model":"x"}"# + ), + None + ); + // `stream` mentioned inside a string value must not trigger. + assert_eq!( + plan_stream_usage_injection(br#"{"system":"set stream:true please","model":"x"}"#), + None + ); + } + + #[test] + fn injects_with_large_content_before_stream() { + // Exercises the SIMD fast-skip in the planner: a large content value must be skipped, and + // the genuine root `stream` after it still triggers injection. + let big = "x".repeat(64 * 1024); + let body = format!(r#"{{"messages":[{{"content":"{big}"}}],"stream":true}}"#); + let v: serde_json::Value = serde_json::from_str(&inject(&body)).unwrap(); + assert_eq!( + v["stream_options"]["include_usage"], + serde_json::json!(true) + ); + } + + #[test] + fn skips_word_stream_inside_large_value() { + // The word `stream` (even `"stream"`) buried in a big string value must not trigger — the + // memmem pre-filter passes, but the structural walk correctly skips over the string content. + let big = "x".repeat(64 * 1024); + let body = format!(r#"{{"system":"{big} \"stream\":true","model":"x"}}"#); + assert_eq!(plan_stream_usage_injection(body.as_bytes()), None); + } + + #[test] + fn stream_options_after_large_content_suppresses() { + // The early-return-on-stream_options path: stream_options appearing (in any order, after a + // big value) must suppress injection even though `stream:true` is also present. + let big = "x".repeat(64 * 1024); + let body = format!( + r#"{{"content":"{big}","stream":true,"stream_options":{{"include_usage":false}}}}"# + ); + assert_eq!(plan_stream_usage_injection(body.as_bytes()), None); + } + + #[test] + fn tolerates_whitespace_and_non_objects() { + assert!(plan_stream_usage_injection(b" { \"stream\" : true }").is_some()); + assert_eq!(plan_stream_usage_injection(b"[1,2,3]"), None); + assert_eq!(plan_stream_usage_injection(b"not json"), None); + } + + #[test] + fn simple() { + assert_eq!( + scan(br#"{"model":"gpt-4o","messages":[]}"#).as_deref(), + Some("gpt-4o") + ); + } + + #[test] + fn model_last_after_huge_array() { + let body = br#"{"messages":[{"role":"user","content":"...lots of text..."}],"stream":true,"model":"claude-opus-4-8"}"#; + assert_eq!(scan(body).as_deref(), Some("claude-opus-4-8")); + } + + #[test] + fn nested_model_is_ignored() { + // `"model"` inside a message object must NOT win over the real root-level one. + let body = br#"{"messages":[{"role":"x","model":"NESTED"}],"model":"real"}"#; + assert_eq!(scan(body).as_deref(), Some("real")); + } + + #[test] + fn model_word_inside_a_string_value_is_ignored() { + let body = br#"{"system":"use the model called \"gpt\" please","model":"real"}"#; + assert_eq!(scan(body).as_deref(), Some("real")); + } + + #[test] + fn whitespace_tolerant() { + assert_eq!( + scan(br#"{ "model" : "m1" , "x":1 }"#).as_deref(), + Some("m1") + ); + } + + #[test] + fn vendor_prefixed_value() { + assert_eq!( + scan(br#"{"model":"openrouter/meta-llama/llama-3.1"}"#).as_deref(), + Some("openrouter/meta-llama/llama-3.1") + ); + } + + #[test] + fn split_across_feeds() { + let mut s = ModelScanner::new(); + for part in [ + &b"{\"messages\":[],\"mod"[..], + &b"el\":\"gp"[..], + &b"t-4o\"}"[..], + ] { + s.feed(part); + } + assert_eq!(s.take_model().as_deref(), Some("gpt-4o")); + } + + #[test] + fn absent_is_none() { + assert_eq!(scan(br#"{"messages":[]}"#), None); + assert_eq!(scan(b"not json"), None); + } + + #[test] + fn large_skipped_value_then_model() { + // Exercises the SIMD fast-skip: a ~256KB content string (with an escaped quote) then the + // real model. Must skip the bulk and still find the root model exactly. + let big = "x".repeat(256 * 1024); + let body = + format!(r#"{{"messages":[{{"content":"{big}\"still in string"}}],"model":"gpt-4o"}}"#); + assert_eq!(scan(body.as_bytes()).as_deref(), Some("gpt-4o")); + } + + #[test] + fn nested_object_value_then_root_model() { + // A root key whose value is an object, followed by the real model. + let body = br#"{"response_format":{"type":"json_object"},"model":"gpt-4o"}"#; + assert_eq!(scan(body).as_deref(), Some("gpt-4o")); + } + + #[test] + fn escaped_quote_inside_model_value_does_not_terminate_it() { + // An escaped `"` *inside the model value itself* exercises the `Cap::ModelValue` escape + // path (line ~72): the backslash-escaped quote must be kept in the accumulated value rather + // than ending the string early. (Model ids never really contain quotes, but a structural + // regression here would truncate the model — and thus mislabel usage — for any value that + // happens to contain an escape.) + assert_eq!( + scan(br#"{"model":"gpt-4\"o"}"#).as_deref(), + Some("gpt-4\"o") + ); + } +} diff --git a/src/proxy.rs b/src/proxy.rs new file mode 100644 index 0000000..9083ea7 --- /dev/null +++ b/src/proxy.rs @@ -0,0 +1,943 @@ +//! The Pingora `ProxyHttp` passthrough service. +//! +//! Flow: pick the provider from the **first path segment** (`/{provider}/…`) → verify the virtual +//! key (stateless) → deny-set check (O(1), default-allow) → swap the auth +//! header to the pool key (managed only) → **stream the request body straight through** (never +//! buffered; original framing preserved) while feeding it to a structural scanner that extracts the +//! exact root-level `model` → relay the response **without buffering** → tap usage from a bounded +//! tail → emit a usage fact. Whether the call is streaming is derived from the *response* +//! Content-Type. +//! +//! Verified end-to-end (`tests/e2e.rs`): a real `beyond-ai` binary against real nats-server + a +//! mock upstream — passthrough fidelity, key swap, usage metering (non-streaming + SSE), BYO +//! passthrough, and deny-set propagation all pass. +//! +//! We never read the request body in `request_filter`: Pingora's body-forward phase reads the +//! downstream body itself, so draining it earlier would make Pingora send `Content-Length` bytes +//! with no body and the upstream would hang. We let the body flow through `request_body_filter` +//! (the supported hook), feeding each chunk to a streaming structural scanner (`peek::ModelScanner`, +//! O(1) memory) — never withholding or buffering it. +//! +//! One deliberate exception to the no-buffer rule: a **managed** OpenAI chat/responses request is +//! buffered and gets `stream_options.include_usage` injected when it streams without it — otherwise +//! OpenAI emits no usage chunk and the request couldn't be metered. We can't set that option in a +//! client SDK we don't control, so the gateway guarantees it, out of the box. Scoped to exactly that +//! path (managed + OpenAI dialect + streaming-capable); BYO and everything else stay pure passthrough. +//! +//! Auth branches on key format: `bai_…` is a managed virtual key (verify → deny-check → swap to +//! the pool key); anything else is a **BYO** request — the user's own provider token, passed +//! through unchanged (no swap, no Beyond identity, no deny-set). +//! +//! Routing is by the **first path segment** = provider name (`route`, data-driven): `/{provider}/…` +//! selects the provider and the rest of the path is forwarded **verbatim** (the gateway holds no +//! per-provider mount knowledge). A bare path with no provider prefix that starts with `/v1` is the +//! drop-in default — dialect picks openai/anthropic (`dialect_for_path`) — so an OpenAI/Anthropic +//! client works by changing only the host. An unknown first segment is a 404. Model isn't used for +//! routing (the body isn't read pre-connect); it's still captured from the body for usage. + +use crate::route::{self, Dialect, Provider}; +use crate::state::{GatewayState, RequestId}; +use crate::{peek, usage}; +use async_trait::async_trait; +use bytes::Bytes; +use pingora::http::ResponseHeader; +use pingora_core::Result; +use pingora_core::protocols::ALPN; +use pingora_core::upstreams::peer::HttpPeer; +use pingora_proxy::{ProxyHttp, Session}; +use std::borrow::Cow; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tracing::{info, warn}; + +/// Response header carrying the per-request id (`{instance}-{seq}`). Set on both the proxied +/// response and every reject body so a client can quote it and an oncall can grep for it. +const REQUEST_ID_HEADER: &str = "x-beyond-request-id"; + +/// Reject requests whose declared Content-Length exceeds this. The body itself is **not** buffered +/// (it streams straight through); this is purely an abuse guard checked up front via the header. +const MAX_REQUEST_BODY: usize = 100 * 1024 * 1024; + +/// Bounded tail of the response kept for usage extraction. The usage event is the final SSE chunk +/// / the whole non-streaming body; keeping a tail means we never buffer a long stream. +const USAGE_TAIL_CAP: usize = 64 * 1024; + +/// Max upstream **connect** retries before surfacing the failure to the client. +/// +/// We retry connect failures only (the idiomatic Pingora pattern, same as edge). Retrying on a +/// received **5xx/429 response** is deliberately *not* done: Pingora 0.8 has no clean +/// post-response retry hook for a streaming passthrough (edge doesn't do it either), the upstream +/// may have started streaming, and the provider SDKs already back off on 429/5xx + `Retry-After`. +const MAX_CONNECT_RETRIES: u8 = 2; + +pub struct AiProxy { + pub state: Arc, +} + +/// Per-request context. `None` until `request_filter` admits the request; short-circuited +/// requests (auth/deny failures) leave it `None`, so later filters no-op. +pub struct RequestCtx { + tenant_id: u64, + vpc_id: u64, + dialect: Dialect, + /// The resolved upstream provider (authority/host + precomputed managed auth value), shared from + /// the boot-time registry — a cheap `Arc` clone, nothing re-allocated per request. + provider: Arc, + /// The path (+ query) to send upstream: the client path with the `/{provider}` segment stripped + /// (provider-prefixed request) or unchanged (bare-path default). Forwarded **verbatim** — the + /// gateway does no per-provider path rewriting. Applied as the upstream URI in + /// `upstream_request_filter`. + forward_path: String, + /// Whether this is a **managed** request (`bai_…` key → swap to the pool key). `false` for + /// **BYO** — we leave the user's own auth header untouched (passthrough). + managed: bool, + /// Model the client *requested*, extracted from the request body. This is the billing-log + /// **fallback** — the authoritative value is the model the provider echoes in its response (see + /// `resp_model_scanner`), because a client may send an alias (`gpt-4o`) that the provider resolves + /// to and bills under a pinned id (`gpt-4o-2024-08-06`). + model: String, + model_scanner: peek::ModelScanner, + /// Extracts the model the **provider** reports in its response (the resolved/billed id), fed the + /// response stream in `response_body_filter`. Preferred over `model` in the `ai.usage` event so + /// the billed model is authoritative, not the requested alias. Works for SSE too: the scanner + /// skips the `data: ` prefix and reads the first chunk's root `model`. Falls back to `model` when + /// the response carries none (e.g. an error body). + resp_model_scanner: peek::ModelScanner, + /// Whether the upstream response is an SSE stream — set in `response_filter` from the response + /// Content-Type (we don't read the request to learn this). + streaming: bool, + /// Bounded tail of the response, for the usage tap. + resp_tail: Vec, + /// Running total of request-body bytes seen, to enforce `MAX_REQUEST_BODY` even when the client + /// uses chunked transfer encoding (no `Content-Length` to check up front). + body_bytes_fed: usize, + /// Upstream HTTP status, set in `response_filter` once the response head arrives. Drives the + /// circuit-breaker outcome recorded once in `logging`: `5xx` → failure, any other response → + /// success (the provider answered — a `429` is a healthy throttle, not a breaker trip), and a + /// `None` here with an upstream error → failure (connect/read failed before any response). + upstream_status: Option, + /// Managed OpenAI chat/responses request: buffer the body and inject + /// `stream_options.include_usage` if it streams without it, so the usage chunk (hence the + /// billable token count) is guaranteed. The single, deliberate exception to "never buffer the + /// request body" — scoped to the managed OpenAI streaming-capable path and bounded by + /// `MAX_REQUEST_BODY`. BYO and every other request still stream straight through. + inject_eligible: bool, + /// Accumulated request body — populated only when `inject_eligible`; otherwise stays empty and + /// the body is never buffered. + req_buf: Vec, + start: Instant, + /// Connect-retry counter (see `fail_to_connect`). + attempt: u8, + /// Process-unique id for this request (`{instance}-{seq}`), echoed in the `x-beyond-request-id` + /// response header and the `ai.usage` event so a client report ties back to a log line. + request_id: RequestId, +} + +impl AiProxy { + /// Write a small JSON error and signal `request_filter` to short-circuit. The body is built with + /// `serde_json` (not `format!`) so a `typ`/`msg` containing `"` or `\` can never break out of the + /// JSON structure — keeps this safe if a future caller passes a non-literal message. + /// + /// Every rejection logs one structured `warn` line (the rejection counter only says *how many*, + /// not *which request* — this is what an oncall greps when a `deny_fraud`/`rate_limit` spike + /// shows on the dashboard) and echoes the `request_id` in a response header so a client report + /// quoting that id lands on this line. + async fn reject( + session: &mut Session, + request_id: &str, + status: u16, + typ: &str, + msg: &str, + ) -> Result { + warn!(request_id, status, error_type = typ, "request rejected"); + let body = Bytes::from( + serde_json::json!({ "error": { "type": typ, "message": msg } }).to_string(), + ); + let mut resp = ResponseHeader::build(status, None)?; + resp.insert_header("content-type", "application/json")?; + resp.insert_header("content-length", body.len().to_string())?; + resp.insert_header(REQUEST_ID_HEADER, request_id)?; + session.write_response_header(Box::new(resp), false).await?; + session.write_response_body(Some(body), true).await?; + Ok(true) + } +} + +fn extract_virtual_key(session: &Session) -> Option<&str> { + let h = session.req_header(); + // Anthropic SDK sends `x-api-key`; OpenAI SDK sends `Authorization: Bearer`. One neutral + // virtual key works in either, so check both. Borrowed from the header — no per-request copy. + if let Some(v) = h.headers.get("x-api-key").and_then(|v| v.to_str().ok()) { + return Some(v); + } + h.headers + .get("authorization") + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.strip_prefix("Bearer ")) +} + +/// Upper bound on a model id we'll record. Real ids are short (`claude-opus-4-8`, +/// `accounts/fireworks/models/…`); anything longer is junk or an attempt to bloat the billing log. +const MAX_MODEL_LEN: usize = 128; + +/// Sanitize the model id extracted from the (client-controlled) request body before it lands in the +/// `ai.usage` billing log. `tracing`'s JSON layer escapes the value, but a downstream consumer +/// (logfwd/OTLP → ClickHouse) may re-handle it, so we refuse anything that could break out of a JSON +/// string or a line-oriented log: control bytes, `"`, `\`, `DEL`. A violating or over-long value is +/// recorded as `"unknown"` (matching `peek`'s non-UTF-8 fallback) rather than the raw bytes — a +/// mislabeled-but-safe usage row beats a corrupted or injected one. +fn sanitize_model(model: String) -> Cow<'static, str> { + let bad = model.len() > MAX_MODEL_LEN + || model + .bytes() + .any(|b| b < 0x20 || b == b'"' || b == b'\\' || b == 0x7f); + if bad { + Cow::Borrowed("unknown") + } else { + Cow::Owned(model) + } +} + +fn dialect_for_path(path: &str) -> Dialect { + // Anthropic Messages vs OpenAI Chat Completions/Embeddings. Embeddings are OpenAI-dialect only. + if path.starts_with("/v1/messages") { + Dialect::Anthropic + } else { + Dialect::OpenAI + } +} + +/// Whether the **forwarded** (provider-native) path targets an OpenAI streaming-capable endpoint — +/// chat completions or the Responses API. Checked by *suffix*, so it holds regardless of the +/// provider's mount prefix (`/v1/chat/completions`, `/openai/v1/chat/completions`, +/// `/inference/v1/chat/completions`, …). Only these get buffered for `stream_options.include_usage` +/// injection — embeddings and everything else never stream, so there's nothing to meter. +fn is_streamable_path(forward_path: &str) -> bool { + forward_path.ends_with("/chat/completions") || forward_path.ends_with("/responses") +} + +/// Splice `stream_options.include_usage` into a buffered OpenAI chat body when it streams without it +/// (see `peek::plan_stream_usage_injection`); otherwise return it unchanged. This is what guarantees +/// a usage chunk — hence a billable token count — from a stock client that never set the option. +fn maybe_inject_stream_usage(body: Vec) -> Vec { + match peek::plan_stream_usage_injection(&body) { + Some(at) => { + const FRAG: &[u8] = br#""stream_options":{"include_usage":true},"#; + let mut out = Vec::with_capacity(body.len() + FRAG.len()); + out.extend_from_slice(&body[..at]); + out.extend_from_slice(FRAG); + out.extend_from_slice(&body[at..]); + out + } + None => body, + } +} + +#[async_trait] +impl ProxyHttp for AiProxy { + type CTX = Option; + + fn new_ctx(&self) -> Self::CTX { + None + } + + async fn request_filter(&self, session: &mut Session, ctx: &mut Self::CTX) -> Result { + self.state.metrics.requests_total.inc(); + let start = Instant::now(); + // One id per request, generated before any reject path so even a 400/401 carries it (in the + // log line and the `x-beyond-request-id` header). Moved into `ctx` at the end for the + // admitted path. Cheap: a counter bump + a short `format!` (see `next_request_id`). + let request_id = self.state.next_request_id(); + + // 1. Route by the **first path segment** = provider; forward the rest of the path verbatim + // (native passthrough — the gateway holds no per-provider mount knowledge). A path with no + // provider segment that starts with `/v1` is the drop-in default: dialect picks + // openai/anthropic and the path is forwarded as-is. Anything else → unknown provider (404). + // We resolve before auth (an unknown route is cheap) and compute owned values inside the + // block so the session borrow ends before any `&mut session` reject below. + let (provider_opt, forward_path) = { + let uri = &session.req_header().uri; + let path = uri.path(); + let query = uri.query(); + // `nth(1)`: `/openai/v1/…` → "openai"; `/v1/…` → "v1"; "/" or "" → "". + let first = path.split('/').nth(1).unwrap_or(""); + let with_query = |p: &str| match query { + Some(q) => format!("{p}?{q}"), + None => p.to_string(), + }; + if let Some(p) = self.state.provider(first) { + // Provider-prefixed: strip the leading `/{first}` segment, forward the remainder. + let rest = &path[1 + first.len()..]; + ( + Some(p.clone()), + with_query(if rest.is_empty() { "/" } else { rest }), + ) + } else if path.starts_with(route::DEFAULT_PREFIX) { + // Bare default: dialect picks the provider; forward the path unchanged. + let name = route::dialect_default(dialect_for_path(path)); + (self.state.provider(name).cloned(), with_query(path)) + } else { + (None, String::new()) + } + }; + let Some(provider) = provider_opt else { + return Self::reject( + session, + &request_id, + 404, + "invalid_request_error", + "unknown provider", + ) + .await; + }; + // Dialect now comes from the resolved provider (usage parsing + injection eligibility). + let dialect = provider.dialect; + + // 2. Extract the presented key — a managed virtual key (`bai_…`) or a raw BYO provider token. + let Some(raw_key) = extract_virtual_key(session) else { + return Self::reject( + session, + &request_id, + 401, + "authentication_error", + "missing API key", + ) + .await; + }; + + // 3. Rate guardrails (see `ratelimit`), charged on the *raw presented key* **before** any + // verification or upstream connect. Keying on the credential we already hold (rather than the + // verified tenant id) is what lets this sit ahead of the Ed25519 verify: a single leaked, + // runaway, or forged key can't drive unbounded crypto work (per-credential tier), and a flood + // of distinct random BYO tokens can't drive junk-auth connects to providers from our egress + // IPs (global BYO tier — managed traffic is exempt, see `ratelimit`). The `check` borrow of + // `raw_key` ends as the call returns, so the `&mut session` reject is free to run on the + // over-limit path (where `raw_key` is unused afterward). + if let Some(rl) = &self.state.rate_limit { + if let Some(reason) = rl.check(raw_key, raw_key.starts_with("bai_")) { + self.state + .metrics + .rejections_total + .with_label_values(&[reason.label()]) + .inc(); + return Self::reject( + session, + &request_id, + 429, + "rate_limit_error", + "rate limit exceeded", + ) + .await; + } + } + + // 4. Reject oversized bodies up front (Content-Length) so we never buffer a huge upload. + let declared_len = session + .req_header() + .headers + .get("content-length") + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.parse::().ok()); + if let Some(len) = declared_len { + if len > MAX_REQUEST_BODY { + return Self::reject( + session, + &request_id, + 413, + "invalid_request_error", + "request body too large", + ) + .await; + } + } + + // 5. Identity + key handling. `bai_…` → managed (stateless verify → deny-check → swap to the + // pool key). Anything else → BYO: the user's own provider token, passed through unchanged + // (no Beyond identity, so no deny-set and no per-tenant attribution). + let (tenant_id, vpc_id, managed) = if raw_key.starts_with("bai_") { + let Ok(identity) = self.state.keyring.verify(raw_key) else { + self.state + .metrics + .rejections_total + .with_label_values(&["auth"]) + .inc(); + return Self::reject( + session, + &request_id, + 401, + "authentication_error", + "invalid API key", + ) + .await; + }; + // Deny-set: O(1), default-allow. The gateway never learns *why*, only the reason code. + if let Some(reason) = self.state.deny.load().reason(identity.tenant_id) { + // Distinct label per reason — `Unknown` is *not* folded into `deny_fraud`. An + // `Unknown` arises when the control plane writes a reason string this gateway + // doesn't recognize (a control-plane deploy ahead of a gateway deploy), which would + // otherwise spike the fraud counter and mask the real fraud signal. A `deny_unknown` + // label surfaces it as the deployment-coordination issue it is. + let label = match reason { + crate::deny::DenyReason::Spend => "deny_spend", + crate::deny::DenyReason::Fraud => "deny_fraud", + crate::deny::DenyReason::Unknown => "deny_unknown", + }; + self.state + .metrics + .rejections_total + .with_label_values(&[label]) + .inc(); + return Self::reject( + session, + &request_id, + reason.http_status(), + "access_denied", + "tenant is over limit or suspended", + ) + .await; + } + // The actual `Bearer …`/`x-api-key` value is precomputed in the provider registry and + // applied in `upstream_request_filter`; here we only confirm a pool key exists. + if provider.pool_auth_value.is_none() { + return Self::reject( + session, + &request_id, + 503, + "api_error", + "no provider key available", + ) + .await; + } + (identity.tenant_id, identity.vpc_id, true) + } else { + (0, 0, false) + }; + + // Mark OpenAI managed chat/responses streams for body buffering + `stream_options` injection + // (handled in `request_body_filter`). Scoped tight: managed only (BYO stays pure + // passthrough), OpenAI dialect only, streaming-capable paths only — so everything else still + // streams through untouched. Checked on the forwarded path (suffix), so it's prefix-agnostic. + let inject_eligible = + managed && dialect == Dialect::OpenAI && is_streamable_path(&forward_path); + + // Circuit breaker (per provider, all traffic — a down provider is down regardless of whose + // key is used). Checked here, after every other rejection, so claiming a half-open probe + // permit corresponds to an *actual* upstream attempt — and balanced by exactly one + // `record_*` in `logging` (which runs once per admitted request), so a permit can't leak. + // When open, fast-fail 503 instead of piling the request against `read_timeout_secs` and + // exhausting connection/in-flight slots for every provider. 5xx/connect failures trip it; + // 429 never does (that's a healthy provider throttling — see `logging`). + if let Some(breaker) = &provider.breaker { + if breaker.allow().is_err() { + self.state + .metrics + .rejections_total + .with_label_values(&["circuit_open"]) + .inc(); + return Self::reject( + session, + &request_id, + 503, + "api_error", + "provider temporarily unavailable", + ) + .await; + } + } + + *ctx = Some(RequestCtx { + tenant_id, + vpc_id, + dialect, + provider, + forward_path, + managed, + model: String::new(), + model_scanner: peek::ModelScanner::new(), + resp_model_scanner: peek::ModelScanner::new(), + streaming: false, + inject_eligible, + // Only the inject-eligible path ever buffers the request body (to splice + // `stream_options` after the root `{`; the `stream` key can appear anywhere in the root + // object, so the decision needs the whole body — buffering is inherent here, not + // incidental). When it does, pre-size from the declared Content-Length so accumulation is + // a single allocation instead of a geometric realloc chain; capped at `MAX_REQUEST_BODY` + // so a lying header can't pre-allocate unbounded memory. Every other request leaves this + // empty and never buffers. + req_buf: match (inject_eligible, declared_len) { + (true, Some(len)) => Vec::with_capacity(len.min(MAX_REQUEST_BODY)), + _ => Vec::new(), + }, + // Grown lazily by the response tap (`response_body_filter`), not pre-reserved: a + // non-streaming response — the common case — is a few hundred bytes, so reserving the + // full 64KB cap up front would waste an allocation on every request to hold ~200B. A + // long stream grows it geometrically to the bounded 2×cap and compacts; that handful of + // reallocs is lost in the network noise of a stream we're already relaying chunk by chunk. + resp_tail: Vec::new(), + body_bytes_fed: 0, + upstream_status: None, + start, + attempt: 0, + request_id, + }); + // Admitted: count it in-flight. Balanced by the decrement in `logging`, which runs exactly + // once per admitted request (rejected requests leave `ctx` None and never reach that path, + // so the gauge can't leak). `active_streams` only covers SSE; this covers every request. + self.state.metrics.requests_in_flight.inc(); + Ok(false) + } + + async fn upstream_peer( + &self, + _session: &mut Session, + ctx: &mut Self::CTX, + ) -> Result> { + // `ctx` is set by `request_filter` for every admitted request; a missing ctx here means an + // unadmitted request reached `upstream_peer` (a Pingora ordering change or future refactor). + // Surface it as an error rather than panicking the worker. + let Some(rc) = ctx.as_ref() else { + return Err(pingora_core::Error::new_str( + "upstream_peer reached without request context", + )); + }; + + // Resolve via the TTL cache (async, non-blocking) rather than `HttpPeer::new`'s eager + // blocking `getaddrinfo`. SNI/Host = the configured host; TLS on for real providers (the + // e2e harness flips `upstream_tls=false` for a plaintext mock). + let addr = match self.state.resolve(&rc.provider.authority).await { + Ok(a) => a, + Err(e) => { + // DNS failures are rare and usually mean a misconfigured `provider_authorities` + // override — so keep the diagnostic (provider name + authority + the resolver error, + // already formatted into `e`) instead of discarding it behind an opaque static string. + // `error_because` chains `e` as the cause so it shows in the Pingora error log. + warn!( + request_id = %rc.request_id, + provider = rc.provider.name.as_str(), + authority = rc.provider.authority.as_str(), + error = %e, + "upstream dns resolution failed", + ); + return Err(pingora_core::Error::because( + pingora_core::ErrorType::ConnectError, + "upstream dns resolution failed", + e, + )); + } + }; + let mut peer = HttpPeer::new( + addr, + self.state.config.upstream_tls, + rc.provider.host.clone(), + ); + // Prefer HTTP/2 to the provider (config `upstream_http2`, default on), fall back to HTTP/1.1. + // Every provider in `KNOWN_PROVIDERS` negotiates `h2` over TLS (verified by handshake), and H2 + // multiplexes many concurrent requests/streams over one connection — fewer sockets and TLS + // handshakes from our egress IPs (which also eases the egress-reputation pressure `ratelimit` + // guards). `H2H1` is strictly ≥ `H1` on compatibility: ALPN negotiates down to H1 for any host + // that doesn't offer h2, and a plaintext upstream (the mock, `upstream_tls=false`) has no ALPN + // at all and stays H1. The negotiated protocol is then visible per-request as + // `upstream_request.version` (see `upstream_request_filter`), which is what lets the + // body-injection path frame correctly. The knob lets an operator force all-H1 without a code + // redeploy, and lets the e2e bench compare the two head-to-head. + peer.options.alpn = if self.state.config.upstream_http2 { + ALPN::H2H1 + } else { + ALPN::H1 + }; + // Cert verification is on everywhere except the bench's self-signed TLS mock (see config). + if !self.state.config.upstream_verify_cert { + peer.options.verify_cert = false; + peer.options.verify_hostname = false; + } + peer.options.connection_timeout = + Some(Duration::from_secs(self.state.config.connect_timeout_secs)); + peer.options.read_timeout = Some(Duration::from_secs(self.state.config.read_timeout_secs)); + peer.options.write_timeout = + Some(Duration::from_secs(self.state.config.write_timeout_secs)); + peer.options.idle_timeout = Some(Duration::from_secs(self.state.config.idle_timeout_secs)); + Ok(Box::new(peer)) + } + + async fn upstream_request_filter( + &self, + _session: &mut Session, + upstream_request: &mut pingora::http::RequestHeader, + ctx: &mut Self::CTX, + ) -> Result<()> { + let Some(rc) = ctx.as_ref() else { + return Ok(()); + }; + + // Managed: swap the virtual key for the real pool key (precomputed at boot) in the scheme + // the upstream wants — removing *both* inbound auth headers first so the virtual key never + // leaks upstream. BYO (`!managed`): leave the user's own auth header exactly as presented. + if rc.managed { + if let Some(av) = &rc.provider.pool_auth_value { + upstream_request.remove_header("authorization"); + upstream_request.remove_header("x-api-key"); + upstream_request.insert_header(rc.provider.auth.header(), av.expose())?; + } + } + + // Point Host at the upstream. + upstream_request.insert_header("host", rc.provider.host.as_str())?; + + // Forward the provider-native path (computed in `request_filter`): the client path with the + // `/{provider}` segment stripped, or unchanged for a bare-path default. We send it verbatim — + // no per-provider rewriting. Only set the URI when it actually differs from the inbound path + // (i.e. a `/{provider}` prefix was stripped); the bare-path case needs no change, so we skip + // the parse + realloc. The body's framing (Content-Length / chunked) is preserved. + if rc.forward_path + != upstream_request + .uri + .path_and_query() + .map(|pq| pq.as_str()) + .unwrap_or("") + && let Ok(uri) = rc.forward_path.parse() + { + upstream_request.set_uri(uri); + } + + // Injection-eligible (OpenAI managed stream): the body is rewritten in `request_body_filter`, + // changing its length, and we can't know the new length here (headers go out before the body + // filter runs). So drop the client's `Content-Length`; how the now-unknown length is framed + // depends on the **negotiated upstream protocol**, which is reliably readable here as + // `upstream_request.version`: pingora-proxy sets it to HTTP/2 before this filter on the H2 path + // (`proxy_h2.rs`) and to HTTP/1.1 on the H1 path (`proxy_h1.rs`). + // + // - **H1**: a body with neither `content-length` nor `transfer-encoding` is framed as + // *zero-length* by pingora's H1 client (RFC 9112 §6.3) — the injected body would be + // silently dropped. So we must set `transfer-encoding: chunked`. + // - **H2**: bodies are delimited by `END_STREAM`, and `transfer-encoding` is a forbidden + // connection-specific header — the `h2` crate *rejects the whole request* + // (`UserError::MalformedHeaders`) if it's present. So we must NOT set it; removing + // `content-length` is sufficient and correct. + if rc.inject_eligible { + upstream_request.remove_header("content-length"); + if upstream_request.version != http::Version::HTTP_2 { + upstream_request.insert_header("transfer-encoding", "chunked")?; + } + } + Ok(()) + } + + async fn request_body_filter( + &self, + _session: &mut Session, + body: &mut Option, + end_of_stream: bool, + ctx: &mut Self::CTX, + ) -> Result<()> { + let Some(rc) = ctx.as_mut() else { + return Ok(()); + }; + // Feed the body through the structural scanner as it passes (never withheld, never + // buffered) to extract the exact root-level `model`. Body framing is untouched. + if let Some(chunk) = body.as_ref() { + // Enforce the body cap on the *streamed* size too: the up-front `Content-Length` check in + // `request_filter` can't see a chunked-encoded body (no declared length). We don't buffer + // — we just count — and abort the proxied request once the running total crosses the cap. + // Aborting (vs. a clean 413) is acceptable here: headers are already away to the upstream, + // and this is an abuse guard, not a normal client path. + rc.body_bytes_fed = rc.body_bytes_fed.saturating_add(chunk.len()); + if rc.body_bytes_fed > MAX_REQUEST_BODY { + self.state + .metrics + .rejections_total + .with_label_values(&["body_too_large"]) + .inc(); + return Err(pingora_core::Error::new_str("request body exceeds limit")); + } + rc.model_scanner.feed(chunk); + // Eligible requests are buffered so we can splice the root object before any byte reaches + // the upstream (injection inserts near the front, so we can't have forwarded it already). + if rc.inject_eligible { + rc.req_buf.extend_from_slice(chunk); + } + } + + if rc.inject_eligible { + if end_of_stream { + // Emit the whole (possibly rewritten) body in one shot; `transfer-encoding: chunked` + // (set in `upstream_request_filter`) makes the changed length fine. + let buf = std::mem::take(&mut rc.req_buf); + *body = Some(Bytes::from(maybe_inject_stream_usage(buf))); + } else { + // Withhold — the bytes are buffered above; nothing goes upstream until end-of-stream. + *body = None; + } + } + + if end_of_stream && rc.model.is_empty() { + if let Some(m) = rc.model_scanner.take_model() { + rc.model = sanitize_model(m).into_owned(); + } + } + Ok(()) + } + + async fn response_filter( + &self, + _session: &mut Session, + upstream_response: &mut ResponseHeader, + ctx: &mut Self::CTX, + ) -> Result<()> { + if let Some(rc) = ctx.as_mut() { + // Headers arrived ≈ time-to-first-byte. Per-provider handle resolved once at boot (see + // `ProviderMetrics`) — first-token latency is per-provider, so an unlabeled histogram + // can't tell you which one regressed. + rc.provider + .metrics + .ttft_seconds + .observe(rc.start.elapsed().as_secs_f64()); + + // Per-provider response counter, bucketed by status class — the signal that a provider + // is degrading (429/5xx) before it shows up only as latency or a missing usage event. + let status = upstream_response.status.as_u16(); + rc.provider.metrics.record_response(status); + // Remember the status for the circuit-breaker outcome resolved in `logging` (a response + // arrived, so the provider is reachable — even a 429/5xx is a real answer, not a connect + // failure). `logging` decides failure-vs-success from this. + rc.upstream_status = Some(status); + + // Derive streaming from the response, not the request: SSE ⇒ use the streaming usage + // parser; otherwise the body is a single JSON object. + rc.streaming = upstream_response + .headers + .get("content-type") + .and_then(|v| v.to_str().ok()) + .is_some_and(|ct| ct.contains("event-stream")); + // Track concurrent SSE streams. Incremented here (response head is in), decremented in + // `logging` once the stream completes — so the gauge reflects in-flight streams, not a + // counter that only ever climbs. Non-streaming responses don't touch it. + if rc.streaming { + self.state.metrics.active_streams.inc(); + } + + // Echo the request id so a client (or an oncall reading a captured response) can quote it + // and land on this request's log line. `insert_header` only fails on an invalid value; + // our id is `[0-9a-f-]`, always valid — but surface a failure rather than silently drop. + upstream_response.insert_header(REQUEST_ID_HEADER, rc.request_id.as_str())?; + } + Ok(()) + } + + fn response_body_filter( + &self, + _session: &mut Session, + body: &mut Option, + _end_of_stream: bool, + ctx: &mut Self::CTX, + ) -> Result> + where + Self::CTX: Send + Sync, + { + // Passive tap: copy each chunk into a bounded tail for usage parsing, but never withhold it + // — chunks pass straight through, so the stream is relayed with no added buffering. + // + // We let the tail grow to 2× the cap, then compact once with a single `copy_within` that + // keeps the last cap bytes. This bounds memory the same way the old per-chunk `drain` did, + // but moves bytes O(stream_len / cap) times instead of once per chunk — for a long stream of + // small chunks that's the difference between one memmove per 64 KB and one per chunk. + if let (Some(rc), Some(chunk)) = (ctx.as_mut(), body.as_ref()) { + // Tap the provider-reported (resolved/billed) model from the response *head* — the + // scanner stops at the first root `model`, so this is O(1) and cheap (it finds the model + // in the first chunk and ignores the rest). Kept separate from the tail because the model + // is at the start of the response while the usage event is at the end. + rc.resp_model_scanner.feed(chunk); + + rc.resp_tail.extend_from_slice(chunk); + if rc.resp_tail.len() > 2 * USAGE_TAIL_CAP { + let keep_from = rc.resp_tail.len() - USAGE_TAIL_CAP; + rc.resp_tail.copy_within(keep_from.., 0); + rc.resp_tail.truncate(USAGE_TAIL_CAP); + } + } + Ok(None) + } + + fn fail_to_connect( + &self, + _session: &mut Session, + _peer: &HttpPeer, + ctx: &mut Self::CTX, + mut e: Box, + ) -> Box { + if let Some(rc) = ctx.as_mut() { + // Retry transient connect failures a couple of times (Pingora re-invokes upstream_peer). + if rc.attempt < MAX_CONNECT_RETRIES { + rc.attempt += 1; + // Surface the retry. Without this, a partially-down provider TCP layer (or an + // egress-IP ban — connect is where that first bites) shows up only as extra latency + // on `upstream_latency_seconds`, indistinguishable from a slow model. The counter is + // the dashboard signal; the `warn!` carries the request_id to grep. + rc.provider.metrics.connect_retries_total.inc(); + warn!( + request_id = %rc.request_id, + provider = rc.provider.name.as_str(), + attempt = rc.attempt, + error = %e, + "upstream connect failed; retrying", + ); + e.set_retry(true); + } + } + e + } + + async fn logging( + &self, + _session: &mut Session, + e: Option<&pingora_core::Error>, + ctx: &mut Self::CTX, + ) { + let Some(rc) = ctx.as_mut() else { return }; + + // Balance the in-flight gauge incremented at admission. `logging` runs exactly once per + // admitted request — including on upstream errors and client disconnects — so the gauge + // always returns to baseline and can't drift upward. + self.state.metrics.requests_in_flight.dec(); + + // An upstream error (DNS/connect timeout, read timeout, abort) lands here with `Some(e)` but + // no `ai.usage` row (no parseable body) — and the earlier `warn!` in `upstream_peer` only + // fires for DNS, not connect/read failures. Log it with the full identity so "why did tenant + // 42 get 502s for 5 minutes" is one grep on the request_id, not a reconstruction. + if let Some(e) = e { + warn!( + request_id = %rc.request_id, + tenant_id = rc.tenant_id, + vpc_id = rc.vpc_id, + provider = rc.provider.name.as_str(), + error = %e, + "upstream request errored", + ); + } + + // Resolve the circuit-breaker outcome exactly once per admitted request (every request that + // claimed a permit in `request_filter` records here, so a half-open probe permit can't leak). + // Failure = the provider is *broken*: a 5xx response, or no response at all paired with an + // upstream error (connect/read failure). Success = the provider *answered* — 2xx/3xx, and + // deliberately **4xx/429 too**: a 429 is a healthy provider throttling our pool key, which the + // rate limiter and the client's `Retry-After` own, NOT a reason to cut all traffic to it. + if let Some(breaker) = &rc.provider.breaker { + match rc.upstream_status { + Some(s) if s >= 500 => breaker.record_failure(), + Some(_) => breaker.record_success(), + None if e.is_some() => breaker.record_failure(), + // No response and no error ⇒ client went away before the upstream answered; don't + // blame the provider — record success so the probe permit resolves. + None => breaker.record_success(), + } + } + + // The buffer may transiently hold up to 2× the cap before compaction; the usage event is + // always in the last cap bytes, so slice to that bounded tail before parsing. + let tail_start = rc.resp_tail.len().saturating_sub(USAGE_TAIL_CAP); + let tail = &rc.resp_tail[tail_start..]; + + // Extract usage facts from the tail (shape depends on dialect + streaming). + let usage = match (rc.dialect, rc.streaming) { + (Dialect::OpenAI, true) => usage::openai_stream(tail), + (Dialect::OpenAI, false) => usage::openai_body(tail), + (Dialect::Anthropic, true) => usage::anthropic_stream(tail), + (Dialect::Anthropic, false) => usage::anthropic_body(tail), + } + .unwrap_or_default(); + + let m = &self.state.metrics; + // Pre-resolved fixed-label children (see `Metrics`) — no per-call `with_label_values` lookup. + m.tokens_input.inc_by(usage.input_tokens); + m.tokens_output.inc_by(usage.output_tokens); + // Cache tokens, too — these are in the `ai.usage` billing log below, but that ships with lag; + // the counter is the alerting surface for a cache-hit-rate cliff after a deploy. + m.tokens_cache_read.inc_by(usage.cache_read_tokens); + m.tokens_cache_write.inc_by(usage.cache_write_tokens); + rc.provider + .metrics + .upstream_latency_seconds + .observe(rc.start.elapsed().as_secs_f64()); + // Balance the `active_streams` increment from `response_filter`. `logging` runs exactly once + // per request (including on upstream errors / client disconnects), so a stream that opened is + // always accounted closed here — the gauge can't leak upward. + if rc.streaming { + m.active_streams.dec(); + } + + // Emit the usage *fact* on a dedicated target — **managed only**. The event is an + // identity-keyed billing record (logfwd/OTLP ships `ai.usage` → ClickHouse → a closed + // pricing consumer); BYO carries no Beyond identity, so a BYO event would be a billing row + // with `tenant_id=0` — unbillable, unattributable, and a footgun for any consumer that sums + // without filtering it out. Aggregate gateway throughput (incl. BYO) is already covered by + // the Prometheus metrics above, which is the right tool for non-billing observability. + if rc.managed { + // Emit BOTH models. `model` is the one the *provider* resolved + billed (echoed in its + // response) — the key for pricing AND for reconciling against the provider's invoice, + // which itemizes by the pinned snapshot. `requested_model` is the alias the client sent — + // product analytics ("what they asked for") and a fallback rate when a snapshot is newer + // than the downstream price table. They're equal when the response carried no model (e.g. + // an error body), where `model` falls back to the request alias. Both sanitized. + let billed = rc.resp_model_scanner.take_model().map(sanitize_model); + // Borrow the requested model as the fallback rather than cloning it — it's still read as + // `requested_model` below, so a clone would be pure waste on every managed response. + let billed_model = billed.as_deref().unwrap_or(&rc.model); + info!( + target: "ai.usage", + request_id = %rc.request_id, + tenant_id = rc.tenant_id, + vpc_id = rc.vpc_id, + provider = rc.provider.name.as_str(), + model = billed_model, + requested_model = %rc.model, + stream = rc.streaming, + input_tokens = usage.input_tokens, + output_tokens = usage.output_tokens, + cache_read_tokens = usage.cache_read_tokens, + cache_write_tokens = usage.cache_write_tokens, + latency_ms = rc.start.elapsed().as_millis() as u64, + "usage" + ); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sanitize_model_passes_real_ids() { + for id in [ + "gpt-4o", + "claude-opus-4-8", + "openrouter/meta-llama/llama-3.1", + "accounts/fireworks/models/llama-v3p1-70b-instruct", + "gpt-4o-mini-2024-07-18", + ] { + assert_eq!(sanitize_model(id.to_string()), id); + } + } + + #[test] + fn sanitize_model_rejects_json_and_log_injection() { + // A `"` would close the JSON string; `\` could escape; a newline breaks line-oriented log + // shipping. Any of them ⇒ recorded as "unknown" rather than injected into the billing log. + for evil in [ + r#"real","injected":"x"#, + r#"a\b"#, + "line1\nline2", + "ctrl\u{0}byte", + ] { + assert_eq!(sanitize_model(evil.to_string()), "unknown"); + } + } + + #[test] + fn sanitize_model_rejects_overlong() { + let long = "a".repeat(MAX_MODEL_LEN + 1); + assert_eq!(sanitize_model(long), "unknown"); + // Exactly at the cap is fine. + let ok = "a".repeat(MAX_MODEL_LEN); + assert_eq!(sanitize_model(ok.clone()), ok); + } +} diff --git a/src/ratelimit.rs b/src/ratelimit.rs new file mode 100644 index 0000000..19caa9d --- /dev/null +++ b/src/ratelimit.rs @@ -0,0 +1,233 @@ +//! Request-rate guardrails — blast-radius circuit breakers, **not** a spend control. +//! +//! The deny-set (see `deny`) is the spend/fraud authority, but it's *cumulative* and reacts on a +//! lag: it only learns of spend after usage facts round-trip through the control plane, and it's +//! structurally blind to request floods that never bill — auth failures (rejected here, never reach +//! upstream), provider 4xx, and BYO traffic (on the caller's own key, no Beyond identity). Two tiers +//! cap velocity, both charged in `proxy::request_filter` *before* the Ed25519 verify and the upstream +//! connect, so a flood can't drive unbounded crypto/socket work: +//! +//! 1. **Per-credential** — keyed by the raw presented credential (the whole `bai_…` virtual key or +//! BYO token). Catches a single leaked/runaway key. Granularity is per-credential: managed virtual +//! keys are deterministic per `(tenant, app)`, so this is effectively a per-(tenant, app) ceiling — +//! one credential's runaway can't throttle another. A flood of *distinct* credentials slips past +//! it (every random string is its own bucket), which is what tier 2 exists for. +//! +//! 2. **Global BYO aggregate** — a single bucket for *all* BYO traffic combined. BYO is unverified +//! and upstream-bound: a flood of distinct random BYO tokens would otherwise open junk-auth +//! connections to providers from our egress IPs, getting them rate-limited or banned (we put +//! ourselves in the firing line). This bounds that aggregate regardless of how the tokens vary. +//! **Managed traffic is exempt** — it's Ed25519-verified before any upstream connect and can't be +//! forged (the signing key lives only in the control plane), so a random `bai_` flood fails verify +//! and never reaches a provider (CPU only, no egress impact). Exempting it means this shared bucket +//! only ever sheds BYO load under a flood, never the core managed tenants. +//! +//! Both tiers are deliberately generous: ceilings well above legitimate steady state, so they never +//! trip in normal operation. Tune from `ai_rejections_total{reason="rate_limit"}` (per-credential) +//! and `{reason="rate_limit_byo_global"}` (BYO aggregate). +//! +//! ## Design decision: why a global BYO cap and not per-source-IP (READ BEFORE CHANGING) +//! +//! The threat that shaped tier 2 is **egress-IP reputation**, not gateway CPU. We are an egress proxy: +//! BYO requests connect outward to OpenAI/Anthropic/OpenRouter/… *from our IPs* carrying the caller's +//! token. A flood of distinct **junk** BYO tokens makes those providers see a torrent of failed-auth +//! connections from us and rate-limit or ban our egress IPs — taking down BYO for *everyone*, and +//! degrading managed traffic that shares the same egress. That blast radius is why this lives here and +//! is on by default, rather than being pushed entirely to the mesh/ingress. +//! +//! **Per-source-IP limiting was considered and rejected** as the primary control. It's the surgical +//! answer in principle (throttle only the noisy source), but it depends on the calling task's real IP +//! being visible here — and in production we front this with ECS Service Connect, where it is unclear +//! whether the peer address is the client task or a collapsed mesh/proxy hop. If it's collapsed, +//! per-IP keying is worse than nothing: it either does nothing (all sources share one IP, so no single +//! key trips) or throttles every tenant at once. We refused to hinge an egress-protection control on +//! an unverified topology assumption. The global BYO cap is **topology-independent** — it bounds the +//! aggregate no matter how source identity is mangled. (If/when we confirm real per-task IPs reach us, +//! a per-IP tier is a reasonable *addition* in front of this — not a replacement.) +//! +//! ## What this deliberately does NOT cover (the residual — don't assume it's solved) +//! +//! - **The BYO cap is a shared bucket.** A flood large enough to hit `byo_rate_limit_rps` *does* shed +//! legitimate BYO callers along with the attacker — they're indistinguishable at admit time (we +//! reject before we know a token is junk). The trust segmentation (managed exempt) bounds the blast +//! radius to BYO only; it does not make the BYO shedding selective. +//! - **The default ceiling is a guess.** `byo_rate_limit_rps = 1000` was picked without real BYO +//! traffic numbers — high enough to clear plausible legitimate use, low enough that a junk flood +//! can't realistically get us banned. It is meant to be tuned from the metric, not trusted as-is. +//! - **A more selective control is the next step, not this.** The surgical fix for egress reputation +//! is a **provider-feedback circuit breaker**: watch upstream responses and back BYO off a provider +//! when we see a burst of `401`s (junk auth) from it, instead of capping all BYO blindly. That reacts +//! to the actual signal (providers rejecting us) and spares legitimate BYO. It's a real feature, not +//! a guardrail, so it's intentionally out of scope here. If you're here because the blunt cap hurt, +//! build that — don't just raise the number. +//! +//! Backed by pingora-limits' `Rate`: count-min-sketch estimators with **fixed memory regardless of +//! key cardinality** (no per-credential entry, no background GC), matching the deny-set's O(denied) +//! ethos. A sketch can *over*estimate a key's rate on hash collision but never under, so a cap is +//! always enforced; `SLOTS` is sized wide enough that overestimation stays negligible. + +use pingora_limits::rate::Rate; +use std::hash::{BuildHasher, RandomState}; +use std::time::Duration; + +/// Count-min sketch dimensions for the per-credential tier. The estimator can only *over*estimate a +/// key's rate (never under — so the cap always holds); the additive error is bounded by +/// `(e / SLOTS) × N`, where `N` is total req/s across *all* credentials on the node. Sized for a +/// single high-volume node: at `SLOTS = 65536` that error stays ≤ ~5 even at ~100k req/s aggregate — +/// far under the per-credential ceiling, so a legitimate caller near its limit isn't false-throttled. +/// `HASHES = 5` sets the tail confidence (≈ `e^-5` ≈ 0.7% of checks may exceed that bound; the +/// estimate is the min over the 5 rows). Memory is `2 × HASHES × SLOTS × 8 B` ≈ **5 MB, fixed** +/// regardless of credential cardinality (no per-key entry, no GC). To resize: `SLOTS ≈ e × peak_N / +/// tolerable_error`. +const SLOTS: usize = 65536; +const HASHES: usize = 5; + +/// The rate window. Every ceiling is expressed per this interval, i.e. requests/second. +const WINDOW: Duration = Duration::from_secs(1); + +/// The single sketch key the global BYO tier counts everything under (one shared bucket). +const BYO_GLOBAL_KEY: u8 = 0; + +/// Why a request was throttled — carried out so the caller can label the rejection metric and an +/// operator can tell *which* ceiling tripped (and thus which knob to tune). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Throttled { + /// A single credential exceeded its per-credential ceiling. + PerCredential, + /// Aggregate BYO traffic exceeded the global ceiling. + ByoGlobal, +} + +impl Throttled { + /// The `ai_rejections_total{reason=…}` label. `PerCredential` keeps the original `"rate_limit"` + /// label so existing dashboards/alerts are unbroken. + pub fn label(self) -> &'static str { + match self { + Throttled::PerCredential => "rate_limit", + Throttled::ByoGlobal => "rate_limit_byo_global", + } + } +} + +pub struct RateLimit { + /// `(sketch, max_per_window)` for the per-credential tier. `None` disables it. + per_cred: Option<(Rate, isize)>, + /// `(sketch, max_per_window)` for the global BYO aggregate tier. `None` disables it. + byo_global: Option<(Rate, isize)>, + /// Process-random hash state. The raw credential is reduced to the per-credential sketch key + /// through this, so the SipHash key is per-process and secret. Without it the digest would be + /// precomputable (`DefaultHasher` keys on zeros), letting an attacker craft two tokens that + /// collide into the same slots and inflate another caller's counter — false throttling. Random + /// seeding makes that collision search infeasible. + hasher: RandomState, +} + +impl RateLimit { + /// `per_cred_rps` is the per-credential ceiling; `byo_global_rps` is the aggregate BYO ceiling. + /// Either tier is disabled by passing `0`. Returns `None` (no limiter at all) only when both are + /// `0`, so the hot path can skip it entirely. + pub fn new(per_cred_rps: u32, byo_global_rps: u32) -> Option { + if per_cred_rps == 0 && byo_global_rps == 0 { + return None; + } + Some(Self { + per_cred: (per_cred_rps != 0).then(|| { + ( + Rate::new_with_estimator_config(WINDOW, HASHES, SLOTS), + per_cred_rps as isize, + ) + }), + // One bucket, so the default estimator is plenty — no need for the wide sketch. + byo_global: (byo_global_rps != 0).then(|| (Rate::new(WINDOW), byo_global_rps as isize)), + // `RandomState::new()` draws a fresh SipHash key from the OS RNG per process. + hasher: RandomState::new(), + }) + } + + /// Charge one request. `managed` is `true` for a verified-path (`bai_…`) credential, `false` for + /// BYO. Returns `None` when within budget, or `Some(reason)` once a ceiling is crossed — the very + /// request that crosses the line is the first one rejected (`observe` returns the running total). + /// The credential itself is never stored; only its seeded digest feeds the per-credential sketch. + /// + /// `#[must_use]`: `observe` has already incremented the counters by the time this returns, so a + /// caller that drops the result has *charged* the request but skipped enforcement — the limiter is + /// silently bypassed. The crate's `#![deny(unused_must_use)]` only bites with this attribute + /// present, so it's load-bearing, not decorative. + #[must_use = "the throttle decision must be enforced — dropping it charges the request but lets it through"] + pub fn check(&self, raw_credential: &str, managed: bool) -> Option { + // Global BYO backstop first: BYO is unverified and upstream-bound, so this is the ceiling that + // protects our egress IPs from a distinct-token flood. Managed traffic skips it (verified, + // can't be forged, already bounded per-credential) so it never shares this bucket. + if !managed { + if let Some((rate, max)) = &self.byo_global { + if rate.observe(&BYO_GLOBAL_KEY, 1) > *max { + return Some(Throttled::ByoGlobal); + } + } + } + // Per-credential ceiling: a single leaked/runaway key (managed or BYO), capped before verify. + if let Some((rate, max)) = &self.per_cred { + let key = self.hasher.hash_one(raw_credential); + if rate.observe(&key, 1) > *max { + return Some(Throttled::PerCredential); + } + } + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn both_zero_disables() { + assert!(RateLimit::new(0, 0).is_none()); + } + + #[test] + fn per_credential_allows_up_to_ceiling_then_rejects() { + let rl = RateLimit::new(5, 0).unwrap(); + let cred = "bai_v1.1.payload.sig"; + for _ in 0..5 { + assert_eq!(rl.check(cred, true), None); + } + // 6th request in the same 1s window crosses the per-credential ceiling. + assert_eq!(rl.check(cred, true), Some(Throttled::PerCredential)); + } + + #[test] + fn credentials_have_independent_budgets() { + let rl = RateLimit::new(2, 0).unwrap(); + assert_eq!(rl.check("token-1", false), None); + assert_eq!(rl.check("token-1", false), None); + assert_eq!(rl.check("token-1", false), Some(Throttled::PerCredential)); // token-1 exhausted + assert_eq!(rl.check("token-2", false), None); // a different credential is unaffected + } + + #[test] + fn byo_global_caps_distinct_tokens_but_exempts_managed() { + // Per-credential disabled, global BYO ceiling = 3. A flood of *distinct* BYO tokens (which + // would each slip past per-credential keying) is still bounded by the shared bucket. + let rl = RateLimit::new(0, 3).unwrap(); + assert_eq!(rl.check("byo-aaaa", false), None); + assert_eq!(rl.check("byo-bbbb", false), None); + assert_eq!(rl.check("byo-cccc", false), None); + assert_eq!(rl.check("byo-dddd", false), Some(Throttled::ByoGlobal)); // 4th distinct token + + // Managed traffic is exempt from the BYO bucket — a distinct `bai_…` flood is never throttled + // here (it's bounded by verify failing, not by this ceiling). + for i in 0..10 { + assert_eq!(rl.check(&format!("bai_v1.1.p{i}.s{i}"), true), None); + } + } + + #[test] + fn byo_global_does_not_touch_managed_budget() { + // With only the global BYO tier on, managed requests pass freely while BYO is being capped. + let rl = RateLimit::new(0, 1).unwrap(); + assert_eq!(rl.check("byo-1", false), None); + assert_eq!(rl.check("byo-2", false), Some(Throttled::ByoGlobal)); // BYO bucket exhausted + assert_eq!(rl.check("bai_v1.1.p.s", true), None); // managed unaffected + } +} diff --git a/src/route.rs b/src/route.rs new file mode 100644 index 0000000..e59c5bf --- /dev/null +++ b/src/route.rs @@ -0,0 +1,305 @@ +//! Provider routing and per-provider wire details — **data-driven**. +//! +//! The provider is the **first path segment** of the request (`/{provider}/…`); the rest of the path +//! is forwarded to the upstream **verbatim** (native passthrough — the gateway holds no per-provider +//! path knowledge). A path with no provider prefix that starts with `/v1` routes by *dialect* — +//! `/v1/messages*` → `anthropic`, else → `openai` — so an OpenAI/Anthropic client is drop-in by +//! changing only the host. An unrecognized first segment is a 404 (see `proxy::request_filter`). +//! +//! A provider is a *row* in [`KNOWN_PROVIDERS`] (name, upstream authority, dialect, auth scheme) — +//! adding an OpenAI-wire provider (Groq, DeepSeek, Together, …) is one line there, no new code +//! paths. Operators can also add/override providers from config (see `state`/`config`). We do not +//! translate between dialects — that's deliberately out of scope. + +use crate::circuit_breaker::CircuitBreaker; +use crate::metrics::ProviderMetrics; +use crate::secret::Secret; + +/// The default API prefix OpenAI/Anthropic clients use. A request with no provider segment that +/// starts with this is routed to a default provider by [`dialect_for_path`](crate::proxy) (the +/// bare-path drop-in case); anything else with an unknown first segment is a 404. +pub const DEFAULT_PREFIX: &str = "/v1"; + +/// Which API surface the client called. Drives usage parsing and the bare-path default provider. +/// On a provider-prefixed request it's the selected provider's own [`Provider::dialect`]; on a +/// bare-path request it's derived from the path (`proxy::dialect_for_path`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Dialect { + OpenAI, + Anthropic, +} + +/// How the upstream expects the API key. OpenAI-wire providers use `Authorization: Bearer `; +/// Anthropic uses the `x-api-key` header. The gateway swaps the client's virtual key for the real +/// pool key in whichever header the upstream wants (see `proxy`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AuthScheme { + Bearer, + XApiKey, +} + +impl AuthScheme { + /// The request header the upstream expects the key in. + pub fn header(self) -> &'static str { + match self { + AuthScheme::Bearer => "authorization", + AuthScheme::XApiKey => "x-api-key", + } + } + + /// Format `key` as the upstream wants it for [`Self::header`]. + pub fn format(self, key: &str) -> String { + match self { + AuthScheme::Bearer => format!("Bearer {key}"), + AuthScheme::XApiKey => key.to_string(), + } + } +} + +/// Static wire facts for a known provider. Adding a provider = one row in [`KNOWN_PROVIDERS`]. +pub struct ProviderSpec { + pub name: &'static str, + /// Default upstream `host:port` (TLS:443). Overridable per-provider via config. + pub authority: &'static str, + /// The provider's wire format — drives usage parsing and `stream_options` injection eligibility. + /// (We forward the client's path verbatim, so the *path* doesn't tell us the wire format; the + /// provider does.) + pub dialect: Dialect, + pub auth: AuthScheme, +} + +/// The providers the gateway knows out of the box. All but Anthropic speak the OpenAI wire format +/// (Bearer auth, chat/completions + embeddings); a new one is a single row here, then reachable at +/// `/{name}/…`. (Config can add further OpenAI-wire providers or override any authority — see +/// `state::build_providers`.) +/// +/// We forward the path after `/{name}` **verbatim**, so the gateway carries no per-provider mount +/// path — the client uses the provider's native base path (e.g. `/groq/openai/v1/chat/completions`, +/// `/fireworks/inference/v1/chat/completions`), exactly as it would hitting the provider directly. +/// Each row's `authority`/`auth` is verified against the provider's **official** docs (cited inline) +/// as of 2026-05; the client-facing native path is noted alongside as a convenience. +pub const KNOWN_PROVIDERS: &[ProviderSpec] = &[ + // docs: https://platform.openai.com/docs/api-reference/authentication — base https://api.openai.com/v1, Bearer. + // Client path: /openai/v1/… (or bare /v1/… as the default). + ProviderSpec { + name: "openai", + authority: "api.openai.com:443", + dialect: Dialect::OpenAI, + auth: AuthScheme::Bearer, + }, + // docs: https://docs.claude.com/en/api/messages — base https://api.anthropic.com, Messages at /v1/messages, + // auth is `x-api-key` (NOT Bearer). The required `anthropic-version` header is the client's; we pass it through. + // Client path: /anthropic/v1/messages (or bare /v1/messages as the default). + ProviderSpec { + name: "anthropic", + authority: "api.anthropic.com:443", + dialect: Dialect::Anthropic, + auth: AuthScheme::XApiKey, + }, + // docs: https://openrouter.ai/docs/quickstart — base https://openrouter.ai/api/v1, Bearer. + // Client path: /openrouter/api/v1/chat/completions. + ProviderSpec { + name: "openrouter", + authority: "openrouter.ai:443", + dialect: Dialect::OpenAI, + auth: AuthScheme::Bearer, + }, + // docs: https://docs.fireworks.ai/tools-sdks/openai-compatibility — base https://api.fireworks.ai/inference/v1, Bearer. + // Client path: /fireworks/inference/v1/chat/completions. + ProviderSpec { + name: "fireworks", + authority: "api.fireworks.ai:443", + dialect: Dialect::OpenAI, + auth: AuthScheme::Bearer, + }, + // docs: https://console.groq.com/docs/openai — base https://api.groq.com/openai/v1, Bearer. + // Client path: /groq/openai/v1/chat/completions. + ProviderSpec { + name: "groq", + authority: "api.groq.com:443", + dialect: Dialect::OpenAI, + auth: AuthScheme::Bearer, + }, + // docs: https://api-docs.deepseek.com/ — base https://api.deepseek.com/v1 (the `/v1` is an OpenAI-compat alias, + // not API versioning); /v1/chat/completions is officially supported. Bearer. Client path: /deepseek/v1/…. + ProviderSpec { + name: "deepseek", + authority: "api.deepseek.com:443", + dialect: Dialect::OpenAI, + auth: AuthScheme::Bearer, + }, + // docs: https://docs.together.ai/docs/openai-api-compatibility — base https://api.together.ai/v1, Bearer. + // Canonical host is `api.together.ai`; the legacy `api.together.xyz` is still live but no longer documented. + // Client path: /together/v1/…. + ProviderSpec { + name: "together", + authority: "api.together.ai:443", + dialect: Dialect::OpenAI, + auth: AuthScheme::Bearer, + }, + // docs: https://inference-docs.cerebras.ai/resources/openai — base https://api.cerebras.ai/v1, Bearer. + // Client path: /cerebras/v1/…. + ProviderSpec { + name: "cerebras", + authority: "api.cerebras.ai:443", + dialect: Dialect::OpenAI, + auth: AuthScheme::Bearer, + }, + // docs: https://docs.mistral.ai/api/ — base https://api.mistral.ai/v1, Bearer. Client path: /mistral/v1/…. + ProviderSpec { + name: "mistral", + authority: "api.mistral.ai:443", + dialect: Dialect::OpenAI, + auth: AuthScheme::Bearer, + }, + // docs: https://docs.x.ai/docs/api-reference — base https://api.x.ai/v1, Bearer. Reasoning models are slow: + // the generous read/idle timeouts (see `config`) matter here. Client path: /xai/v1/…. + ProviderSpec { + name: "xai", + authority: "api.x.ai:443", + dialect: Dialect::OpenAI, + auth: AuthScheme::Bearer, + }, +]; + +/// The default provider name for a dialect — used only for the **bare-path** request (no provider +/// segment), where the dialect is derived from the path. A provider-prefixed request names its +/// provider directly. +pub fn dialect_default(d: Dialect) -> &'static str { + match d { + Dialect::OpenAI => "openai", + Dialect::Anthropic => "anthropic", + } +} + +/// A *resolved* provider: static wire facts + the boot-resolved upstream authority/host + (for +/// managed traffic) the precomputed pool auth header value. Built once at boot (see +/// `state::build_providers`); the request hot path holds an `Arc` (cheap clone) and +/// borrows these fields, so nothing is re-allocated or re-formatted per request. +pub struct Provider { + pub name: String, + /// Upstream `host:port`. + pub authority: String, + /// Bare upstream host (SNI / `Host` header) = authority without the port. + pub host: String, + /// The provider's wire format (usage parsing + injection eligibility). See [`ProviderSpec::dialect`]. + pub dialect: Dialect, + pub auth: AuthScheme, + /// Precomputed managed auth header value (`Bearer ` / bare key). `None` ⇒ no pool key is + /// configured for this provider ⇒ managed requests to it are rejected (503). Kept in `Secret` + /// for the redacting-`Debug` + zeroize-on-drop hygiene of the underlying key. + pub pool_auth_value: Option, + /// Per-provider metric handles, resolved once here so the response path bumps a direct + /// counter/histogram instead of a string-keyed label lookup per response. + pub metrics: ProviderMetrics, + /// Per-provider circuit breaker, shared across all callers to this provider. `None` when the + /// breaker is disabled (`circuit_breaker_threshold == 0`). Checked before connect and fed the + /// 5xx/connect outcome — see `proxy`. Lock-free, so the hot path reads it without contention. + pub breaker: Option, +} + +impl Provider { + /// Resolve a provider from its name, upstream authority, dialect, auth scheme, (optional) pool + /// key, and pre-resolved per-provider metric handles. Derives the bare host and precomputes the + /// managed auth header value once. + pub fn resolve( + name: &str, + authority: String, + dialect: Dialect, + auth: AuthScheme, + pool_key: Option<&str>, + metrics: ProviderMetrics, + breaker: Option, + ) -> Self { + let host = authority + .split(':') + .next() + .unwrap_or(&authority) + .to_string(); + let pool_auth_value = pool_key.map(|k| Secret::new(auth.format(k))); + Provider { + name: name.to_string(), + authority, + host, + dialect, + auth, + pool_auth_value, + metrics, + breaker, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn dialect_defaults() { + assert_eq!(dialect_default(Dialect::OpenAI), "openai"); + assert_eq!(dialect_default(Dialect::Anthropic), "anthropic"); + } + + #[test] + fn known_provider_names_are_unique() { + let mut names: Vec<_> = KNOWN_PROVIDERS.iter().map(|p| p.name).collect(); + names.sort_unstable(); + let before = names.len(); + names.dedup(); + assert_eq!( + before, + names.len(), + "duplicate provider name in KNOWN_PROVIDERS" + ); + } + + #[test] + fn anthropic_is_the_only_anthropic_dialect() { + // Dialect drives usage parsing + injection; getting Anthropic's wire wrong mis-meters it. + for spec in KNOWN_PROVIDERS { + let want = if spec.name == "anthropic" { + Dialect::Anthropic + } else { + Dialect::OpenAI + }; + assert_eq!(spec.dialect, want, "{} dialect", spec.name); + } + } + + #[test] + fn auth_scheme_formats_and_headers() { + assert_eq!(AuthScheme::Bearer.header(), "authorization"); + assert_eq!(AuthScheme::XApiKey.header(), "x-api-key"); + assert_eq!(AuthScheme::Bearer.format("k"), "Bearer k"); + // Anthropic wants the bare key (no `Bearer`). Getting this wrong → upstream 401. + assert_eq!(AuthScheme::XApiKey.format("k"), "k"); + } + + #[test] + fn resolve_derives_host_and_pool_auth() { + let p = Provider::resolve( + "openai", + "api.openai.com:443".to_string(), + Dialect::OpenAI, + AuthScheme::Bearer, + Some("sk-x"), + ProviderMetrics::disconnected(), + None, + ); + assert_eq!(p.host, "api.openai.com"); + assert_eq!(p.dialect, Dialect::OpenAI); + assert_eq!(p.pool_auth_value.as_ref().unwrap().expose(), "Bearer sk-x"); + + // No pool key ⇒ no managed auth value (managed requests to it would 503). + let a = Provider::resolve( + "anthropic", + "api.anthropic.com:443".to_string(), + Dialect::Anthropic, + AuthScheme::XApiKey, + None, + ProviderMetrics::disconnected(), + None, + ); + assert!(a.pool_auth_value.is_none()); + } +} diff --git a/src/secret.rs b/src/secret.rs new file mode 100644 index 0000000..5fcd8d8 --- /dev/null +++ b/src/secret.rs @@ -0,0 +1,77 @@ +//! A string secret that won't leak into logs and is scrubbed on drop. +//! +//! Hygiene, not a hard control: provider keys are necessarily long-lived in RAM (held for the +//! process life, copied into Pingora's request headers we don't own), so zeroize-on-drop only +//! helps at rotation/shutdown. The real protections are SSM-at-rest + never logging + rotation. +//! What this newtype buys: a redacting `Debug` (so a stray `{:?}` or `tracing` field can't print a +//! key) and a best-effort scrub when the value is dropped. + +use std::fmt; +use zeroize::Zeroize; + +#[derive(Clone)] +pub struct Secret(String); + +impl Secret { + pub fn new(s: impl Into) -> Self { + Self(s.into()) + } + + /// Borrow the plaintext. Call sites should keep the exposure as narrow as possible. + pub fn expose(&self) -> &str { + &self.0 + } +} + +impl From for Secret { + fn from(s: String) -> Self { + Self(s) + } +} + +impl fmt::Debug for Secret { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("Secret(***)") + } +} + +// Deserialize transparently from a plain string so config (`AI_POOL_KEY_*`, `nats_creds`) can load a +// secret straight into `Option`. +impl<'de> serde::Deserialize<'de> for Secret { + fn deserialize>(d: D) -> Result { + Ok(Self(String::deserialize(d)?)) + } +} + +// Serialize **redacting** — same threat model as `Debug`: a stray `serde_json::to_string(&config)` +// in a log line must not leak the key. This is sound for our only serialize path (figment's +// `Serialized::defaults`, where every secret field defaults to `None` and is skipped); a `Secret` +// is for holding a key, never for round-tripping config back out. Read the plaintext via `expose`. +impl serde::Serialize for Secret { + fn serialize(&self, s: S) -> Result { + s.serialize_str("***") + } +} + +impl Drop for Secret { + fn drop(&mut self) { + self.0.zeroize(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn debug_redacts() { + let s = Secret::new("sk-supersecret"); + assert_eq!(format!("{s:?}"), "Secret(***)"); + assert!(!format!("{s:?}").contains("supersecret")); + } + + #[test] + fn expose_returns_plaintext() { + assert_eq!(Secret::new("abc").expose(), "abc"); + } +} diff --git a/src/state.rs b/src/state.rs new file mode 100644 index 0000000..26a7446 --- /dev/null +++ b/src/state.rs @@ -0,0 +1,329 @@ +//! Shared gateway state. +//! +//! Only the **deny-set** is dynamic (watched from NATS, behind `ArcSwap` for lock-free reads). +//! Everything else — the signing keyring and the resolved provider registry (upstreams + pool auth +//! values) — is built once at boot from config (SSM/env), so the auth + key paths have **no runtime +//! dependency on NATS**. + +use crate::config::AiConfig; +use crate::deny::DenySet; +use crate::error::{GatewayError, Result}; +use crate::key::Keyring; +use crate::metrics::{Metrics, ProviderMetrics}; +use crate::ratelimit::RateLimit; +use crate::route::{self, AuthScheme, Dialect, Provider}; +use arc_swap::ArcSwap; +use arrayvec::ArrayString; +use std::collections::HashMap; +use std::fmt::Write as _; +use std::net::SocketAddr; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; +use tracing::warn; + +/// How long a resolved upstream address is reused before re-resolving. +const DNS_TTL: Duration = Duration::from_secs(60); + +/// A process-unique request id, `{instance:x}-{seq:x}`. Two `u64`s in hex (≤16 chars each) plus the +/// `-` separator never exceed 33 bytes, so it lives inline on the stack — no per-request heap +/// allocation on the admitted path (it's minted for every request, including fast rejects). +pub type RequestId = ArrayString<33>; + +/// Build the resolved provider registry from the static known set + config: every known provider +/// (its authority overridable by `provider_authorities`), plus any config-only OpenAI-wire provider +/// (a `provider_authorities` entry whose name isn't known). Each provider's pool key (if any) is +/// looked up by name and its managed auth header value precomputed. +fn build_providers(config: &AiConfig, metrics: &Metrics) -> HashMap> { + // One independent breaker per provider, all built from the same config (the breaker holds + // atomics so it can't be cloned — we mint a fresh one per provider). `None` ⇒ breaker disabled. + let cb_config = config.circuit_breaker_config(); + let breaker = || { + cb_config + .clone() + .map(crate::circuit_breaker::CircuitBreaker::new) + }; + + let mut providers = HashMap::new(); + for spec in route::KNOWN_PROVIDERS { + let authority = config + .provider_authorities + .get(spec.name) + .cloned() + .unwrap_or_else(|| spec.authority.to_string()); + let pool_key = config.pool_keys.get(spec.name).map(|s| s.expose()); + providers.insert( + spec.name.to_string(), + Arc::new(Provider::resolve( + spec.name, + authority, + spec.dialect, + spec.auth, + pool_key, + ProviderMetrics::resolve(metrics, spec.name), + breaker(), + )), + ); + } + // Config-only providers (name not in the known set): assume OpenAI-wire (Bearer). A non-OpenAI + // wire format would need real code, so we don't pretend to support it from config alone. + for (name, authority) in &config.provider_authorities { + if !providers.contains_key(name) { + let pool_key = config.pool_keys.get(name).map(|s| s.expose()); + providers.insert( + name.clone(), + Arc::new(Provider::resolve( + name, + authority.clone(), + Dialect::OpenAI, + AuthScheme::Bearer, + pool_key, + ProviderMetrics::resolve(metrics, name), + breaker(), + )), + ); + } + } + providers +} + +pub struct GatewayState { + pub config: AiConfig, + pub metrics: Arc, + + /// Trusted Ed25519 public keys by kid — from config (rotate via redeploy). Static for life. + pub keyring: Keyring, + /// Resolved providers by name (upstream authority/host + precomputed managed auth value). Built + /// once at boot from `route::KNOWN_PROVIDERS` + config; the request path clones the `Arc`. + providers: HashMap>, + + /// Sparse deny-set — the ONE thing watched from NATS. Default-allow on miss; fail-open. + pub deny: ArcSwap, + + /// Per-key request-rate guardrail (see `ratelimit`). `None` when `rate_limit_rps == 0`. Fixed + /// memory regardless of tenant count, so it lives in the static state with no GC. + pub rate_limit: Option, + + /// TTL cache of resolved upstream addresses, so `upstream_peer` neither blocks on a synchronous + /// `getaddrinfo` nor re-resolves the same provider host every request. `ArcSwap` so the common + /// case — a cache hit, on every admitted request after warmup — is a lock-free atomic load; the + /// only writes are the ~10 providers' entries refreshed once per `DNS_TTL`, applied via `rcu`. + dns_cache: ArcSwap>, + + /// Per-process instance token (8 OS-random bytes), the high half of every `request_id`. + /// Random rather than a uuid dep, so log lines from two gateways don't collide when aggregated — + /// and random rather than the boot wall-clock, which collides when a rapid scale-up boots several + /// instances within the same nanosecond. + instance_id: u64, + /// Monotonic per-request counter, the low half of `request_id`. A relaxed `fetch_add` — the only + /// requirement is uniqueness within the process, not cross-request ordering. + request_seq: AtomicU64, +} + +impl GatewayState { + pub fn new(config: AiConfig, metrics: Arc) -> Result> { + let keyring = config.build_keyring()?; + // No signing keys ⇒ every `bai_…` fails verify and falls through to BYO treatment: no key + // swap, no deny-set, no `ai.usage` billing. That's a *valid* mode (a BYO-only deployment), + // but a far more common cause is a missing/typo'd `signing_keys` (SSM param, env) — which + // looks healthy while silently dropping all billing. A managed deployment sets + // `require_signing_keys = true` so this mis-deploy is a hard, visible boot failure; otherwise + // we warn loudly and continue (BYO-only is legitimate and the test/e2e harnesses run keyless). + if config.signing_keys.is_empty() { + if config.require_signing_keys { + return Err(GatewayError::Config( + "require_signing_keys is set but no signing_keys are configured — refusing to \ + boot into silent BYO-only mode (no key swap, no deny-set, no billing). Check \ + the signing_keys config / SSM param." + .to_string(), + )); + } + warn!( + "no signing_keys configured — all managed (bai_) traffic will be treated as BYO \ + (no key swap, no deny-set, no billing). Expected only for a BYO-only deployment." + ); + } + let providers = build_providers(&config, &metrics); + let rate_limit = RateLimit::new(config.rate_limit_rps, config.byo_rate_limit_rps); + + // 8 OS-random bytes as the instance token, so two gateways' request_ids never collide when + // aggregated — including when a rapid scale-up boots several instances within the same + // nanosecond (which a wall-clock token can't distinguish). If the OS RNG is somehow + // unavailable, fall back to the boot wall-clock rather than panicking — a degraded-uniqueness + // id beats failing to start. + let instance = { + let mut buf = [0u8; 8]; + match getrandom::fill(&mut buf) { + Ok(()) => u64::from_le_bytes(buf), + Err(_) => SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_nanos() as u64) + .unwrap_or(0), + } + }; + + Ok(Arc::new(Self { + metrics, + keyring, + providers, + deny: ArcSwap::from_pointee(DenySet::new()), + rate_limit, + dns_cache: ArcSwap::from_pointee(HashMap::new()), + instance_id: instance, + request_seq: AtomicU64::new(0), + config, + })) + } + + /// A process-unique request id (`{instance}-{seq}`) for log correlation and the + /// `x-beyond-request-id` response header. Deliberately *not* a uuid: a per-process instance + /// token (computed once at boot) plus a relaxed atomic counter is unique across the fleet, costs + /// one `fetch_add` + a hex format into a stack buffer (no heap allocation), and needs no + /// randomness per request. + pub fn next_request_id(&self) -> RequestId { + let seq = self.request_seq.fetch_add(1, Ordering::Relaxed); + let mut id = RequestId::new(); + // Can't overflow: two `u64`s in hex + `-` is ≤33 bytes, exactly the buffer's capacity. The + // `write!` is infallible here, but if a future format change ever exceeded the cap we'd + // rather emit a truncated id than panic on a correlation aid — so swallow the result. + let _ = write!(id, "{:x}-{seq:x}", self.instance_id); + id + } + + /// The resolved provider for `name` (the request's first path segment, or the bare-path dialect + /// default), or `None` if no such provider is registered — which `request_filter` turns into a + /// 404. + pub fn provider(&self, name: &str) -> Option<&Arc> { + self.providers.get(name) + } + + /// Resolve an `host:port` authority to a `SocketAddr`, cached for `DNS_TTL`. Uses + /// `tokio::net::lookup_host` (runs `getaddrinfo` on the blocking pool — async-safe) instead of + /// `HttpPeer::new`'s eager blocking resolve. + pub async fn resolve(&self, authority: &str) -> Result { + // Cache hit (the common case after warmup): a lock-free `ArcSwap` load — no mutex, no + // syscall — so concurrent workers never serialize on a DNS lookup that's already resolved. + if let Some((addr, at)) = self.dns_cache.load().get(authority) { + if at.elapsed() < DNS_TTL { + return Ok(*addr); + } + } + let addr = tokio::net::lookup_host(authority) + .await + .map_err(|e| GatewayError::Dns(format!("{authority}: {e}")))? + .next() + .ok_or_else(|| GatewayError::Dns(format!("{authority}: no addresses")))?; + // rcu the new/refreshed entry in. Two concurrent misses for the same host may both resolve + // and both rcu; that's harmless (same answer, last writer wins) and far cheaper than holding + // a lock across `getaddrinfo`. The clone-on-write copies a ~10-entry map — trivial, and only + // on the rare miss/refresh path, never on a hit. + // + // Sweep entries that are long dead while we're already paying for the clone. The cache keys + // are provider authorities, which come entirely from the boot-time registry (so in practice + // the map is bounded by the provider count, not by traffic) — this sweep is belt-and- + // suspenders against authorities ever becoming dynamic, and it's a *TTL* drop, not an + // eviction *policy*: there's no capacity contest here, so LRU/SIEVE would be machinery for a + // problem we don't have. We keep anything within `2 × DNS_TTL` so a still-live provider whose + // entry just expired (and is about to be refreshed) is never dropped out from under a + // concurrent resolve. + let now = Instant::now(); + self.dns_cache.rcu(|cur| { + let mut next = HashMap::clone(cur); + next.retain(|_, (_, at)| now.duration_since(*at) < DNS_TTL * 2); + next.insert(authority.to_string(), (addr, now)); + next + }); + Ok(addr) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::route::AuthScheme; + use crate::secret::Secret; + + /// One process-wide `Metrics` (it registers on the default Prometheus registry, which rejects a + /// second registration), shared by every test that needs a `GatewayState`. + fn test_metrics() -> Arc { + use std::sync::OnceLock; + static M: OnceLock> = OnceLock::new(); + M.get_or_init(|| Metrics::new().expect("register metrics once")) + .clone() + } + + #[test] + fn registry_resolves_known_overrides_and_additions() { + let config = AiConfig { + // Override a known provider's authority + give it a pool key; add a config-only one. + // `custom2` is a config-only provider with **no** pool key — the condition that makes a + // managed request to it 503 (no managed auth value to swap in). + provider_authorities: HashMap::from([ + ("openai".to_string(), "127.0.0.1:9".to_string()), + ("custom".to_string(), "llm.internal:8443".to_string()), + ("custom2".to_string(), "other.internal:8443".to_string()), + ]), + pool_keys: HashMap::from([ + ("openai".to_string(), Secret::new("sk-openai")), + ("custom".to_string(), Secret::new("sk-custom")), + ]), + ..Default::default() + }; + let providers = build_providers(&config, &test_metrics()); + + // Known provider: authority overridden, pool auth precomputed in the right scheme. + let openai = providers.get("openai").unwrap(); + assert_eq!(openai.authority, "127.0.0.1:9"); + assert_eq!(openai.auth, AuthScheme::Bearer); + assert_eq!( + openai.pool_auth_value.as_ref().unwrap().expose(), + "Bearer sk-openai" + ); + + // Known provider, no override: built-in default + no pool key ⇒ no managed auth value. + let anthropic = providers.get("anthropic").unwrap(); + assert_eq!(anthropic.authority, "api.anthropic.com:443"); + assert_eq!(anthropic.auth, AuthScheme::XApiKey); + assert!(anthropic.pool_auth_value.is_none()); + + // Config-only provider: added as OpenAI-wire (Bearer), reachable by name. + let custom = providers.get("custom").unwrap(); + assert_eq!(custom.host, "llm.internal"); + assert_eq!( + custom.pool_auth_value.as_ref().unwrap().expose(), + "Bearer sk-custom" + ); + + // Config-only provider with no pool key: registered (reachable by name) but with no managed + // auth value — this `None` is exactly what `request_filter` turns into a 503 for a managed + // request. (BYO to it still works; it just can't serve the pooled path.) + let custom2 = providers.get("custom2").unwrap(); + assert!( + custom2.pool_auth_value.is_none(), + "a provider with no configured pool key must have no managed auth value (→ 503)" + ); + } + + #[tokio::test] + async fn resolve_caches_hit_and_errors_on_bad_host() { + // `resolve` is on the request hot path (every admitted request hits `upstream_peer`). Cover + // the three outcomes: a successful resolve, a cache hit returning the same address without a + // fresh lookup, and a lookup failure surfacing as `GatewayError::Dns` (not a panic/hang). + let config = AiConfig::default(); + let state = GatewayState::new(config, test_metrics()).unwrap(); + + // An IP literal resolves through `lookup_host` without real DNS — deterministic, offline-safe. + let addr = state.resolve("127.0.0.1:9").await.unwrap(); + assert_eq!(addr, "127.0.0.1:9".parse().unwrap()); + + // Second call is served from the TTL cache: same answer, and the entry is now present. + assert_eq!(state.resolve("127.0.0.1:9").await.unwrap(), addr); + assert!(state.dns_cache.load().contains_key("127.0.0.1:9")); + + // A guaranteed-NXDOMAIN host (RFC 6761 reserves `.invalid`) → a Dns error, never a panic. + assert!(matches!( + state.resolve("nonexistent.invalid:80").await, + Err(GatewayError::Dns(_)) + )); + } +} diff --git a/src/store_watch.rs b/src/store_watch.rs new file mode 100644 index 0000000..72c3602 --- /dev/null +++ b/src/store_watch.rs @@ -0,0 +1,459 @@ +//! slipstream deny-set watcher — the gateway's **only** use of NATS. +//! +//! Seeds the deny-set at boot, then streams deltas. **Fail-open**: a NATS blip keeps the last-known +//! set (we never clear), so an outage degrades to a stale deny-set, not "reject everything". Auth +//! and pool/signing keys come from config, so they're unaffected by NATS being down — only +//! spend/fraud enforcement goes stale. +//! +//! Seeding has two modes, chosen by `config.snapshot_path`: +//! +//! - **Unset (ephemeral, e.g. Fargate):** scan `blackhole.*` from NATS on first connect. The resume +//! revision is kept *in memory* across reconnects, so a NATS blip resumes the watch from where it +//! left off (gap-free) rather than re-scanning. +//! - **Set (edge/tunnel, durable disk):** load slipstream's on-disk snapshot (entries + a saved +//! watch cursor), seed from it, and resume the watch from that cursor — a restart skips the scan +//! and enforces immediately, even before NATS reconnects. Every applied delta is appended back to +//! the snapshot so the file tracks the live set. +//! +//! Either way the watch resumes from a **revision** (`watch_prefix_from`), never a bare +//! `watch_prefix`: the latter uses NATS `DeliverPolicy::New` (no replay), so a deny entry written in +//! the window between seeding and the subscription attaching would be silently lost. Resuming from +//! the seeded revision closes that window with no gap and no double-apply (it starts strictly after +//! the seeded revision). If the backend compacted past the cursor (`CursorExpired`), we drop back to +//! a fresh scan, which re-establishes a valid baseline. +//! +//! Runs as a Pingora `BackgroundService` so the NATS client is created on the serving runtime +//! (async-nats ties its tasks to the runtime it's built on; connecting earlier would break it). + +use crate::deny::{self, DenySet}; +use crate::state::GatewayState; +use async_trait::async_trait; +use pingora_core::server::ShutdownWatch; +use pingora_core::services::background::BackgroundService; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; +use store::snapshot::SnapshotWriter; +use store::{ + Connection, KvEntry, KvError, KvStore, KvUpdate, NatsConnection, NatsConnectionConfig, + StoreConfig, WatchCursor, +}; +use tracing::{error, info, warn}; + +const BLACKHOLE_PREFIX: &str = "blackhole."; + +/// Compact the on-disk snapshot once it grows past this many bytes of appended deltas. The deny-set +/// is low-churn, so this is rarely hit; it just bounds the log if a tenant flaps. +const SNAPSHOT_COMPACT_THRESHOLD: u64 = 1024 * 1024; + +/// Reconnect backoff bounds: start at 1s, double to a 30s ceiling. Generous enough to stop log +/// spam during a long NATS outage, tight enough that recovery is near-immediate once it returns. +const RECONNECT_BACKOFF_BASE: Duration = Duration::from_secs(1); +const RECONNECT_BACKOFF_MAX: Duration = Duration::from_secs(30); + +pub struct WatcherService { + pub state: Arc, +} + +#[async_trait] +impl BackgroundService for WatcherService { + async fn start(&self, mut shutdown: ShutdownWatch) { + // Resume position + on-disk snapshot writer persist across reconnects: a NATS blip resumes + // the watch from `cursor` instead of re-scanning, and `seeded` stays true so we don't reseed. + let mut cursor = WatchCursor::none(); + let mut writer: Option = None; + let mut seeded = false; + + if let Some(path) = self.state.config.snapshot_path.clone() { + let path = PathBuf::from(path); + // Snapshot I/O is synchronous (whole-file read/rewrite) — offload it so we never stall + // the serving runtime this BackgroundService shares with the proxy. + let load_path = path.clone(); + match tokio::task::spawn_blocking(move || store::snapshot::load(&load_path)).await { + Ok(Ok(Some(snap))) => { + let set = denyset_from_entries(snap.entries.values()); + info!(count = set.len(), "seeded deny-set from on-disk snapshot"); + self.state.metrics.deny_set_size.set(set.len() as i64); + self.state.deny.store(Arc::new(set)); + // A snapshot without a saved cursor can't safely resume (a bare watch would + // race), so only treat it as seeded when it carries a resume point; otherwise + // fall through to a NATS scan on connect. + if !snap.cursor.is_none() { + cursor = snap.cursor; + seeded = true; + } + } + Ok(Ok(None)) => info!("no on-disk snapshot yet; will seed from a NATS scan"), + Ok(Err(e)) => warn!(error = %e, "snapshot load failed; will seed from a NATS scan"), + Err(e) => warn!(error = %e, "snapshot load task panicked; seeding from NATS"), + } + let open_path = path.clone(); + match tokio::task::spawn_blocking(move || { + SnapshotWriter::open(&open_path, SNAPSHOT_COMPACT_THRESHOLD) + }) + .await + { + Ok(Ok(w)) => writer = Some(w), + Ok(Err(e)) => warn!(error = %e, "snapshot writer open failed; running without it"), + Err(e) => warn!(error = %e, "snapshot writer open task panicked"), + } + } + + // Reconnect backoff: 1s doubling to a 30s cap, reset on every successful connect. A fixed + // 2s retry hammered the log at a constant rate through a long outage (minutes to hours), + // burying other signals during the very incident an oncall is reading these logs for. The + // gateway serves correctly on the stale set throughout — this is purely about log volume + // and not pointlessly spinning on a down NATS. + let mut backoff = RECONNECT_BACKOFF_BASE; + loop { + // Connect, but bail immediately if Pingora signals shutdown mid-connect (e.g. NATS is + // down and `connect` is retrying its own backoff) rather than blocking teardown. + let store = tokio::select! { + _ = shutdown.changed() => { + info!( + in_flight = self.state.metrics.requests_in_flight.get(), + "shutdown signaled; deny-set watcher exiting" + ); + return; + } + outcome = connect(&self.state) => match outcome { + Ok(store) => store, + Err(e) => { + self.state.metrics.nats_connected.set(0); + error!(error = %e, backoff_secs = backoff.as_secs(), "slipstream connect failed; retrying"); + // Reconnect backoff, also interruptible by shutdown. + tokio::select! { + _ = shutdown.changed() => return, + _ = tokio::time::sleep(backoff) => { + backoff = (backoff * 2).min(RECONNECT_BACKOFF_MAX); + continue; + } + } + } + }, + }; + + backoff = RECONNECT_BACKOFF_BASE; + self.state.metrics.nats_connected.set(1); + info!("slipstream connected; watching deny-set"); + // `watch_deny` returns `true` when it exited because shutdown was signaled — stop the + // reconnect loop cleanly instead of trying to reconnect a shutting-down process. + if watch_deny( + &self.state, + store, + &mut cursor, + &mut writer, + &mut seeded, + &mut shutdown, + ) + .await + { + info!("shutdown signaled; deny-set watcher exiting"); + return; + } + self.state.metrics.nats_connected.set(0); + warn!("deny-set watch exited; reconnecting"); + tokio::select! { + _ = shutdown.changed() => return, + _ = tokio::time::sleep(backoff) => { + backoff = (backoff * 2).min(RECONNECT_BACKOFF_MAX); + } + } + } + } +} + +/// Build a `DenySet` from KV entries, dropping any whose key isn't a `blackhole.{tenant}`. +fn denyset_from_entries<'a>(entries: impl Iterator) -> DenySet { + entries + .filter_map(|e| Some((deny::parse_key(&e.key)?, deny::parse_reason(&e.value)))) + .collect() +} + +/// Rewrite the on-disk snapshot from a fresh scan: truncate, write one `Put` per live entry, and +/// checkpoint the cursor. Returns the reopened writer, or `None` if the rewrite failed (the gateway +/// then runs snapshot-less — the in-memory deny-set is unaffected). Synchronous file I/O, so it runs +/// on a blocking thread off the serving runtime. +async fn rebuild_snapshot( + path: PathBuf, + entries: Vec, + cursor: WatchCursor, +) -> Option { + let res = tokio::task::spawn_blocking( + move || -> Result { + // Remove the old log so we don't replay a deleted-but-uncompacted key on a later load. + // A failed removal is *not* ignorable: if `SnapshotWriter::open` then appends to the + // surviving file, a compacted-away `Delete` can't undo its stale `Put`, and a later + // `load()` resurrects a tenant we no longer deny — the exact corruption this rebuild + // exists to prevent. `NotFound` is the expected, benign case (first boot, or scratch + // storage); any other error aborts the rebuild so we run snapshot-less rather than on + // poisoned state. + match std::fs::remove_file(&path) { + Ok(()) => {} + Err(e) if e.kind() == std::io::ErrorKind::NotFound => {} + Err(e) => return Err(e.into()), + } + let mut w = SnapshotWriter::open(&path, SNAPSHOT_COMPACT_THRESHOLD)?; + for e in &entries { + w.write_update(&KvUpdate::Put(e.clone()))?; + } + w.checkpoint(&cursor)?; + Ok(w) + }, + ) + .await; + match res { + Ok(Ok(w)) => Some(w), + Ok(Err(e)) => { + warn!(error = %e, "snapshot rebuild failed; running without on-disk snapshot"); + None + } + Err(e) => { + warn!(error = %e, "snapshot rebuild task panicked"); + None + } + } +} + +async fn connect(state: &GatewayState) -> crate::error::Result> { + let cfg = &state.config; + // `expose().to_string()` lifts the creds out of our `Secret` into the plain `String` the store's + // config requires. This doesn't widen the leak surface: `NatsConnectionConfig` has a hand-written + // redacting `Debug` (prints `creds: [redacted]`), so a stray `{:?}` on it — in a span, an error + // context, a reconnect log — can't print the credential. The plaintext copy is necessarily + // un-zeroized for the connection's life (we hand ownership to the store); same trade-off the pool + // keys make once they reach Pingora's headers (see `secret`). Redaction, not zeroization, is the + // control here. + let conn = NatsConnection::new(NatsConnectionConfig { + url: cfg.nats_url.clone(), + creds: cfg.nats_creds.as_ref().map(|s| s.expose().to_string()), + creds_file: cfg.nats_creds_file.clone(), + }); + conn.connect().await?; + let store = conn + .store_with_config(StoreConfig { + name: cfg.config_bucket.clone(), + ..Default::default() + }) + .await?; + Ok(store) +} + +/// Seed (if needed) and stream deny-set deltas until the watch ends or shutdown is signaled. +/// Returns `true` iff it exited because `shutdown` fired — the caller then stops reconnecting. +async fn watch_deny( + state: &Arc, + store: Arc, + cursor: &mut WatchCursor, + writer: &mut Option, + seeded: &mut bool, + shutdown: &mut ShutdownWatch, +) -> bool { + // Seed once, on the first connect that lacks a usable resume point (cold boot with no snapshot, + // or after a `CursorExpired` reset). A NATS scan is a point-in-time read of the live set; the + // highest revision among its entries is the baseline the watch resumes strictly after. An empty + // set ⇒ revision 0 ⇒ resume from the start of history (the deny bucket is low-churn, so a full + // replay is cheap and still gap-free). + if !*seeded { + match store.reader().scan(BLACKHOLE_PREFIX).await { + Ok(entries) => { + let baseline_rev = entries + .iter() + .filter_map(|e| e.version.as_u64()) + .max() + .unwrap_or(0); + let set = denyset_from_entries(entries.iter()); + info!( + count = set.len(), + revision = baseline_rev, + "seeded deny-set from scan" + ); + state.metrics.deny_set_size.set(set.len() as i64); + state.deny.store(Arc::new(set)); + *cursor = WatchCursor::from_u64(baseline_rev); + // Persist the freshly-scanned baseline so a later restart can skip the scan. We + // *rebuild* the file (not append): this path runs on a cold boot or after a + // `CursorExpired` reset, and a stale prior log could otherwise contain a `Put` for a + // tenant deleted while we were offline — whose `Delete` was compacted away — which a + // later `load()` would replay and resurrect (wrongly re-denying a tenant). A clean + // rewrite from the live scan makes the on-disk state exactly match NATS. + if writer.is_some() { + if let Some(path) = state.config.snapshot_path.clone() { + *writer = + rebuild_snapshot(PathBuf::from(path), entries, cursor.clone()).await; + } + } + *seeded = true; + } + Err(e) => { + // No baseline yet — serve whatever's already in memory (fail-open) and let the + // reconnect loop retry the scan. + warn!(error = %e, "deny-set scan failed; serving current set, will retry"); + return false; + } + } + } + + // Stream deltas, resuming from the seeded revision. Never a bare `watch_prefix` (DeliverPolicy:: + // New) — that would drop anything written in the seed→subscribe window. + let Some(watcher) = store.watcher() else { + warn!("store has no watcher; deny-set will not update"); + return false; + }; + let (tx, mut rx) = tokio::sync::mpsc::channel::(256); + let w = watcher.clone(); + let start_cursor = cursor.clone(); + let watch = tokio::spawn(async move { + w.watch_prefix_from(BLACKHOLE_PREFIX, &start_cursor, tx) + .await + }); + + // Updates are rcu (clone-on-write); the set is tiny (O(denied)). Each applied delta also + // advances the in-memory cursor (so a reconnect resumes from here) and is appended to the + // on-disk snapshot if one is configured. We `select!` on shutdown so a quiet stream (no deltas + // arriving) doesn't pin the task open through teardown; `select!` can only switch at an await + // point — between updates — so we never abort mid-`persist_update`, leaving the snapshot intact. + loop { + let update = tokio::select! { + _ = shutdown.changed() => { + watch.abort(); + return true; + } + update = rx.recv() => match update { + Some(u) => u, + None => break, + }, + }; + state.deny.rcu(|cur| { + let mut set = (**cur).clone(); + match &update { + KvUpdate::Put(e) => { + if let Some(t) = deny::parse_key(&e.key) { + set.insert(t, deny::parse_reason(&e.value)); + } + } + // Delete/Purge = restore (explicit delete or TTL expiry). + KvUpdate::Delete { key, .. } | KvUpdate::Purge { key, .. } => { + if let Some(t) = deny::parse_key(key) { + set.remove(t); + } + } + } + Arc::new(set) + }); + // Reflect the new cardinality. A lock-free load of the set we just swapped in — cheap, and + // the deltas are low-churn, so this is far off any hot path. + state + .metrics + .deny_set_size + .set(state.deny.load().len() as i64); + *cursor = WatchCursor::from_version(update.version().clone()); + persist_update(writer, &update, cursor).await; + } + + // The watch ended (NATS dropped, or the cursor was compacted away). Inspect why so a compacted + // cursor forces a fresh scan on the next connect instead of resuming from a dead revision. + match watch.await { + Ok(Ok(())) => {} + Ok(Err(KvError::CursorExpired)) => { + warn!("deny-set resume cursor expired (history compacted past it); will rescan"); + *seeded = false; + *cursor = WatchCursor::none(); + } + Ok(Err(e)) => warn!(error = %e, "deny-set watch ended"), + Err(e) => warn!(error = %e, "deny-set watch task panicked"), + } + false +} + +/// Append one applied delta to the on-disk snapshot (if configured) and checkpoint the cursor. +/// `write_update`/`checkpoint` are buffered/`write(2)` and cheap; `compact` reads+rewrites the whole +/// file, so it's offloaded off the serving runtime when the log crosses its threshold. +async fn persist_update( + writer: &mut Option, + update: &KvUpdate, + cursor: &WatchCursor, +) { + let needs_compact = match writer.as_mut() { + Some(w) => { + if let Err(e) = w.write_update(update) { + warn!(error = %e, "snapshot write failed"); + } + match w.checkpoint(cursor) { + Ok(b) => b, + Err(e) => { + warn!(error = %e, "snapshot checkpoint failed"); + false + } + } + } + None => false, + }; + if needs_compact { + // Move the writer into a blocking task for the rewrite, then take it back. If it fails we + // drop the writer (None) and run snapshot-less until the next restart reopens the file — + // the deny-set itself is unaffected (it lives in the ArcSwap, fed by NATS). + if let Some(mut w) = writer.take() { + match tokio::task::spawn_blocking(move || w.compact().map(|()| w)).await { + Ok(Ok(w)) => *writer = Some(w), + Ok(Err(e)) => { + warn!(error = %e, "snapshot compaction failed; disabling snapshot writer") + } + Err(e) => warn!(error = %e, "snapshot compaction task panicked"), + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::deny::DenyReason; + use store::VersionToken; + + fn entry(key: &str, value: &[u8]) -> KvEntry { + KvEntry { + key: key.to_string(), + value: value.to_vec(), + version: VersionToken::from_u64(1), + } + } + + #[test] + fn denyset_from_entries_seeds_and_skips_malformed() { + // This is the seeding core: every boot turns raw KV entries into the live deny-set. A bug + // here (or a foreign key bleeding through the `filter_map`) means the deny-set is silently + // wrong at boot — denied tenants served, or unrelated keys denying real tenants. + let entries = [ + entry("blackhole.42", b"spend"), + entry("blackhole.99", b"fraud"), + // Not a `blackhole.{tenant}` key — must be dropped, never inserted as tenant 0 or junk. + entry("signkey.1", b"spend"), + // `blackhole.` with a non-numeric tail — `parse_key` rejects it, so it's dropped too. + entry("blackhole.notanumber", b"spend"), + // Unrecognized reason value still denies (fail-safe) under `DenyReason::Unknown`. + entry("blackhole.7", b"mystery"), + ]; + + let set = denyset_from_entries(entries.iter()); + + assert_eq!( + set.len(), + 3, + "only the three valid blackhole keys are seeded" + ); + assert_eq!(set.reason(42), Some(DenyReason::Spend)); + assert_eq!(set.reason(99), Some(DenyReason::Fraud)); + assert_eq!(set.reason(7), Some(DenyReason::Unknown)); + // The malformed keys produced no entries (and crucially no spurious tenant 0). + assert!(!set.is_denied(0)); + assert!(!set.is_denied(1)); + } + + #[test] + fn denyset_from_entries_empty_is_allow_all() { + let set = denyset_from_entries([].iter()); + assert!(set.is_empty()); + assert!(!set.is_denied(42)); // default-allow on a cold/empty scan + } +} diff --git a/src/usage.rs b/src/usage.rs new file mode 100644 index 0000000..22443a0 --- /dev/null +++ b/src/usage.rs @@ -0,0 +1,269 @@ +//! Token-usage extraction — the "passive tap" the gateway emits as billing *facts*. +//! +//! We never compute price here (pricing is a closed downstream consumer); we only extract raw +//! token counts. Two shapes per provider: the non-streaming JSON body, and the terminal event of +//! an SSE stream. For streaming we scan the relayed bytes for the usage event but never block the +//! relay on it (see `proxy`). + +use serde::Deserialize; + +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct Usage { + pub input_tokens: u64, + pub output_tokens: u64, + pub cache_read_tokens: u64, + pub cache_write_tokens: u64, +} + +// Typed views of just the fields we meter. Deserializing into these (rather than a +// `serde_json::Value` DOM) lets serde skip every field we don't read without allocating a node for +// it — no `Map`/`String`/`Number` tree to build and drop per body or per SSE line. Every field is +// `#[serde(default)]` so a missing or partial `usage` block reads as zeros, matching the prior +// pointer-with-`unwrap_or(0)` behavior. + +/// OpenAI `usage` block (chat/completions + responses). `prompt`/`completion` map to in/out; cached +/// input rides in `prompt_tokens_details.cached_tokens`. No cache-write concept on the OpenAI wire. +#[derive(Deserialize, Default)] +struct OpenAiUsage { + #[serde(default)] + prompt_tokens: u64, + #[serde(default)] + completion_tokens: u64, + #[serde(default)] + prompt_tokens_details: OpenAiPromptDetails, +} + +#[derive(Deserialize, Default)] +struct OpenAiPromptDetails { + #[serde(default)] + cached_tokens: u64, +} + +impl From for Usage { + fn from(u: OpenAiUsage) -> Self { + Usage { + input_tokens: u.prompt_tokens, + output_tokens: u.completion_tokens, + cache_read_tokens: u.prompt_tokens_details.cached_tokens, + cache_write_tokens: 0, + } + } +} + +/// Anthropic `usage` block (`/v1/messages` body + streaming events). +#[derive(Deserialize, Default)] +struct AnthropicUsage { + #[serde(default)] + input_tokens: u64, + #[serde(default)] + output_tokens: u64, + #[serde(default)] + cache_read_input_tokens: u64, + #[serde(default)] + cache_creation_input_tokens: u64, +} + +/// OpenAI non-streaming: top-level `usage`. `None` (absent/`null`) ⇒ no usage to meter. +pub fn openai_body(body: &[u8]) -> Option { + #[derive(Deserialize)] + struct Body { + usage: Option, + } + serde_json::from_slice::(body) + .ok()? + .usage + .map(Usage::from) +} + +/// Anthropic non-streaming: top-level `usage.{input,output,cache_*}`. +pub fn anthropic_body(body: &[u8]) -> Option { + #[derive(Deserialize)] + struct Body { + usage: Option, + } + let u = serde_json::from_slice::(body).ok()?.usage?; + Some(Usage { + input_tokens: u.input_tokens, + output_tokens: u.output_tokens, + cache_read_tokens: u.cache_read_input_tokens, + cache_write_tokens: u.cache_creation_input_tokens, + }) +} + +/// Iterate the raw JSON payloads carried on `data:` lines of an SSE byte stream. `[DONE]` and the +/// `data:` framing are stripped; each caller deserializes the payload into its own typed view. +fn sse_data_lines(sse: &[u8]) -> impl Iterator + '_ { + sse.split(|&b| b == b'\n').filter_map(|line| { + let line = line.strip_prefix(b"data:")?; + // SSE strips *all* leading spaces after the field colon (not exactly one) — OpenAI/Anthropic + // emit `data: ` (one space), but a config-added OpenAI-wire provider that pads with more + // would otherwise leave whitespace in the payload and fail the JSON parse → silent zero usage. + let line = line.trim_ascii_start(); + (line != b"[DONE]").then_some(line) + }) +} + +/// OpenAI streaming (requires `stream_options.include_usage`): the penultimate chunk carries a +/// top-level `usage` object. Last one with usage wins. +pub fn openai_stream(sse: &[u8]) -> Option { + #[derive(Deserialize)] + struct Chunk { + usage: Option, + } + let mut found = None; + for line in sse_data_lines(sse) { + if let Ok(chunk) = serde_json::from_slice::(line) { + if let Some(u) = chunk.usage { + found = Some(Usage::from(u)); + } + } + } + found +} + +/// Anthropic streaming: input + cache tokens arrive in `message_start.message.usage`; output +/// accumulates in `message_delta.usage.output_tokens` (last delta is the cumulative total). +pub fn anthropic_stream(sse: &[u8]) -> Option { + #[derive(Deserialize)] + struct Message { + usage: Option, + } + #[derive(Deserialize)] + struct Chunk { + // `message_start` nests usage under `message`; `message_delta` carries it top-level. + message: Option, + usage: Option, + } + let mut usage = Usage::default(); + let mut saw_any = false; + for line in sse_data_lines(sse) { + let Ok(chunk) = serde_json::from_slice::(line) else { + continue; + }; + if let Some(u) = chunk.message.and_then(|m| m.usage) { + usage.input_tokens = u.input_tokens; + usage.cache_read_tokens = u.cache_read_input_tokens; + usage.cache_write_tokens = u.cache_creation_input_tokens; + saw_any = true; + } + if let Some(u) = chunk.usage { + // message_delta carries the running output token count. + if u.output_tokens > 0 { + usage.output_tokens = u.output_tokens; + } + saw_any = true; + } + } + saw_any.then_some(usage) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn openai_nonstreaming() { + let body = br#"{"usage":{"prompt_tokens":12,"completion_tokens":34, + "prompt_tokens_details":{"cached_tokens":4}}}"#; + assert_eq!( + openai_body(body).unwrap(), + Usage { + input_tokens: 12, + output_tokens: 34, + cache_read_tokens: 4, + cache_write_tokens: 0 + } + ); + } + + #[test] + fn anthropic_nonstreaming() { + let body = br#"{"usage":{"input_tokens":100,"output_tokens":50, + "cache_read_input_tokens":10,"cache_creation_input_tokens":7}}"#; + assert_eq!( + anthropic_body(body).unwrap(), + Usage { + input_tokens: 100, + output_tokens: 50, + cache_read_tokens: 10, + cache_write_tokens: 7 + } + ); + } + + #[test] + fn openai_streaming_terminal_usage() { + let sse = b"data: {\"choices\":[{\"delta\":{\"content\":\"hi\"}}]}\n\n\ + data: {\"choices\":[],\"usage\":{\"prompt_tokens\":5,\"completion_tokens\":9}}\n\n\ + data: [DONE]\n\n"; + assert_eq!( + openai_stream(sse).unwrap(), + Usage { + input_tokens: 5, + output_tokens: 9, + cache_read_tokens: 0, + cache_write_tokens: 0 + } + ); + } + + #[test] + fn anthropic_streaming_accumulates() { + let sse = b"event: message_start\n\ + data: {\"type\":\"message_start\",\"message\":{\"usage\":{\"input_tokens\":20,\"output_tokens\":0}}}\n\n\ + event: message_delta\n\ + data: {\"type\":\"message_delta\",\"usage\":{\"output_tokens\":15}}\n\n"; + assert_eq!( + anthropic_stream(sse).unwrap(), + Usage { + input_tokens: 20, + output_tokens: 15, + cache_read_tokens: 0, + cache_write_tokens: 0 + } + ); + } + + #[test] + fn anthropic_streaming_includes_cache_tokens() { + // Cache tokens ride in `message_start.message.usage` alongside input_tokens. The earlier + // accumulation test omits them; this guards the `cache_read`/`cache_creation` pointers so a + // regression can't silently zero cache billing. + let sse = b"event: message_start\n\ + data: {\"type\":\"message_start\",\"message\":{\"usage\":{\"input_tokens\":20,\"output_tokens\":0,\"cache_read_input_tokens\":12,\"cache_creation_input_tokens\":8}}}\n\n\ + event: message_delta\n\ + data: {\"type\":\"message_delta\",\"usage\":{\"output_tokens\":15}}\n\n"; + assert_eq!( + anthropic_stream(sse).unwrap(), + Usage { + input_tokens: 20, + output_tokens: 15, + cache_read_tokens: 12, + cache_write_tokens: 8 + } + ); + } + + #[test] + fn tolerates_extra_leading_spaces_after_data_colon() { + // SSE strips all leading spaces, not just one. A provider padding `data: {…}` must still + // parse — the alternative is a silent zero-usage row for that request. + let sse = + b"data: {\"choices\":[],\"usage\":{\"prompt_tokens\":3,\"completion_tokens\":7}}\n\n"; + assert_eq!( + openai_stream(sse).unwrap(), + Usage { + input_tokens: 3, + output_tokens: 7, + cache_read_tokens: 0, + cache_write_tokens: 0 + } + ); + } + + #[test] + fn no_usage_returns_none() { + assert!(openai_stream(b"data: {\"choices\":[]}\n\n").is_none()); + assert!(anthropic_body(b"{}").map(|u| u.input_tokens).unwrap_or(0) == 0); + } +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs new file mode 100644 index 0000000..a5d4f1e --- /dev/null +++ b/tests/common/mod.rs @@ -0,0 +1,716 @@ +//! e2e harness: a real `beyond-ai` binary, a real `nats-server` (JetStream KV backing the deny-set), +//! and a mock HTTP upstream that records what the gateway forwarded. +//! +//! Requires `nats-server` on PATH — run via `mise run test:integration:rs`. +//! Signing keys + pool keys are passed via the gateway's *config* (not NATS); NATS carries only the +//! deny-set. Every component picks a free port and cleans up on drop, so tests run in parallel. + +#![allow(dead_code)] +// Test harness: `.unwrap()`/`.expect()`/`panic!` are assertions, not production code. See e2e.rs. +#![allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] + +use std::io::Write; +use std::net::TcpListener as StdTcpListener; +use std::process::{Child, Command}; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +use base64::Engine; +use bytes::Bytes; +use http_body_util::{BodyExt, Full}; +use hyper::service::service_fn; +use hyper::{Request, Response}; +use hyper_util::rt::{TokioExecutor, TokioIo}; +use hyper_util::server::conn::auto; +use store::Connection; +use tokio::net::TcpListener; +use tokio::time::{sleep, timeout}; +use tokio_rustls::TlsAcceptor; + +/// Hand out a TCP port no other `free_port()` call in this test binary has returned. +/// +/// Tests run as concurrent threads in **one** process, and closing a `bind(:0)` listener lets the OS +/// immediately re-hand that ephemeral port to the next `bind(:0)` — so two `free_port()` calls (a +/// gateway's `listen` + `metrics` ports, or two tests at once) can collide, and a component then +/// fails to bind → a *different* test flakes. A process-global reservation set makes every returned +/// port distinct within the run; binding fresh listeners on collision forces the OS off the just-used +/// port (it can't re-hand a port still held open) so the loop makes progress. +/// +/// A residual TOCTOU window remains between returning a port and a *subprocess* (nats/gateway) binding +/// it, vs. other OS processes — unavoidable when the bind happens in another process. In-process +/// servers must instead bind `:0` and read the port back (see `MockUpstream`), which has no window. +pub fn free_port() -> u16 { + use std::collections::HashSet; + use std::sync::OnceLock; + static USED: OnceLock>> = OnceLock::new(); + let used = USED.get_or_init(|| Mutex::new(HashSet::new())); + + let mut held = Vec::new(); + for _ in 0..1000 { + let listener = StdTcpListener::bind("127.0.0.1:0").unwrap(); + let port = listener.local_addr().unwrap().port(); + if used.lock().unwrap_or_else(|p| p.into_inner()).insert(port) { + return port; // `listener` drops here, freeing the port for the (sub)process to bind. + } + // Already handed out: keep this listener open so the next bind gets a different port, then + // try again. The held listeners all drop at return, releasing those ports back to the OS. + held.push(listener); + } + panic!("could not find an unused free port after 1000 attempts"); +} + +/// Base64 (standard) — used to put an Ed25519 public key into the gateway's `signing_keys` config. +pub fn b64(bytes: &[u8]) -> String { + base64::engine::general_purpose::STANDARD.encode(bytes) +} + +/// Deterministic Ed25519 keypair: (raw 32-byte public key, signing key). +pub fn test_keypair(seed: u8) -> (Vec, ed25519_dalek::SigningKey) { + let sk = ed25519_dalek::SigningKey::from_bytes(&[seed; 32]); + (sk.verifying_key().to_bytes().to_vec(), sk) +} + +async fn wait_for_port(port: u16, what: &str) { + timeout(Duration::from_secs(20), async { + loop { + if tokio::net::TcpStream::connect(("127.0.0.1", port)) + .await + .is_ok() + { + return; + } + sleep(Duration::from_millis(50)).await; + } + }) + .await + .unwrap_or_else(|_| panic!("{what} did not come up on port {port}")); +} + +// --- nats-server (JetStream) ------------------------------------------------ + +pub struct Nats { + child: Child, + pub port: u16, + store_dir: std::path::PathBuf, +} + +impl Nats { + pub async fn start() -> Self { + let port = free_port(); + let store_dir = std::env::temp_dir().join(format!("beyond-ai-nats-{port}")); + let _ = std::fs::create_dir_all(&store_dir); + let child = Command::new("nats-server") + .args([ + "-js", + "-a", + "127.0.0.1", + "-p", + &port.to_string(), + "-sd", + store_dir.to_str().unwrap(), + ]) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .spawn() + .expect("spawn nats-server (on PATH? run via mise)"); + let nats = Nats { + child, + port, + store_dir, + }; + wait_for_port(port, "nats-server").await; + nats + } +} + +impl Nats { + /// Kill the server mid-test (for fail-open coverage). Idempotent with `Drop`. + pub fn stop(&mut self) { + let _ = self.child.kill(); + let _ = self.child.wait(); + } +} + +impl Drop for Nats { + fn drop(&mut self) { + let _ = self.child.kill(); + let _ = std::fs::remove_dir_all(&self.store_dir); + } +} + +pub async fn put_kv(nats_port: u16, key: &str, value: &[u8]) { + open_writer(nats_port).await.put(key, value).await.unwrap(); +} + +pub async fn del_kv(nats_port: u16, key: &str) { + open_writer(nats_port).await.delete(key).await.unwrap(); +} + +async fn open_writer(nats_port: u16) -> std::sync::Arc { + let conn = store::NatsConnection::new(store::NatsConnectionConfig { + url: format!("nats://127.0.0.1:{nats_port}"), + creds: None, + creds_file: None, + }); + conn.connect().await.unwrap(); + let kv = conn + .store_with_config(store::StoreConfig { + name: "ai-gateway".into(), + ..Default::default() + }) + .await + .unwrap(); + kv.writer().expect("bucket is writable") +} + +// --- mock upstream provider ------------------------------------------------- + +#[derive(Clone, Copy)] +pub enum Mode { + /// OpenAI-shaped non-streaming JSON body. + Json, + /// OpenAI-shaped SSE stream with a terminal usage chunk. + Sse, + /// Anthropic-shaped non-streaming JSON body (`usage.input_tokens`). + AnthropicJson, + /// OpenAI-shaped SSE stream with >128 KiB of content *before* the usage chunk — forces the + /// proxy's response-tail compaction path. + SseLarge, + /// Always reply with this HTTP status and a small JSON error body — for circuit-breaker tests + /// (5xx trips the breaker; 4xx/429 do not). + Status(u16), +} + +#[derive(Default, Clone)] +pub struct Captured { + pub path: String, + pub authorization: Option, + pub x_api_key: Option, + pub host: Option, + pub body: Vec, +} + +pub struct MockUpstream { + pub port: u16, + captured: Arc>>, + hits: Arc, + task: tokio::task::JoinHandle<()>, +} + +const CANNED_JSON: &str = r#"{"id":"chatcmpl-mock","object":"chat.completion","model":"gpt-4o-2024-08-06","choices":[{"index":0,"message":{"role":"assistant","content":"hi"},"finish_reason":"stop"}],"usage":{"prompt_tokens":11,"completion_tokens":7,"total_tokens":18}}"#; + +const CANNED_SSE: &str = "data: {\"id\":\"chatcmpl-mock\",\"object\":\"chat.completion.chunk\",\"model\":\"gpt-4o-2024-08-06\",\"choices\":[{\"delta\":{\"content\":\"hi\"}}]}\n\ndata: {\"choices\":[],\"usage\":{\"prompt_tokens\":5,\"completion_tokens\":9}}\n\ndata: [DONE]\n\n"; + +const CANNED_ANTHROPIC_JSON: &str = r#"{"id":"msg_mock","type":"message","model":"claude-opus-4-8","content":[{"type":"text","text":"hi"}],"usage":{"input_tokens":13,"output_tokens":7}}"#; + +/// An OpenAI SSE stream whose first chunk carries ~130 KiB of content, pushing the proxy's response +/// tail past `2 × USAGE_TAIL_CAP` (128 KiB) so it compacts at least once before the trailing usage +/// chunk arrives. The usage event must survive in the retained 64 KiB tail. +fn large_sse() -> String { + let filler = "x".repeat(130 * 1024); + format!( + "data: {{\"id\":\"chatcmpl-mock\",\"object\":\"chat.completion.chunk\",\"model\":\"gpt-4o-2024-08-06\",\"choices\":[{{\"delta\":{{\"content\":\"{filler}\"}}}}]}}\n\n\ + data: {{\"choices\":[],\"usage\":{{\"prompt_tokens\":5,\"completion_tokens\":9}}}}\n\n\ + data: [DONE]\n\n" + ) +} + +/// The canned `(content-type, body)` for a mode. `SseLarge` allocates; the rest are static. +fn canned_body(mode: Mode) -> (&'static str, Bytes) { + match mode { + Mode::Json => ( + "application/json", + Bytes::from_static(CANNED_JSON.as_bytes()), + ), + Mode::Sse => ( + "text/event-stream", + Bytes::from_static(CANNED_SSE.as_bytes()), + ), + Mode::AnthropicJson => ( + "application/json", + Bytes::from_static(CANNED_ANTHROPIC_JSON.as_bytes()), + ), + Mode::SseLarge => ("text/event-stream", Bytes::from(large_sse())), + // The status is applied by `mock_handle`; the body is a stock error shape. + Mode::Status(_) => ( + "application/json", + Bytes::from_static(br#"{"error":{"message":"mock"}}"#), + ), + } +} + +/// The protocol the gateway used to *reach the mock* — derived from the version hyper parsed off the +/// wire. Echoed back in `x-mock-proto`; since the gateway relays response headers untouched, the bench +/// client reads this to prove which protocol the gateway→upstream hop negotiated (H2 vs H1). +fn proto_label(version: hyper::Version) -> &'static str { + match version { + hyper::Version::HTTP_2 => "h2", + _ => "http/1.1", + } +} + +/// Shared request handler for both the plaintext and TLS listeners: record what the gateway forwarded, +/// then return the canned body tagged with the negotiated protocol. +async fn mock_handle( + req: Request, + cap: Arc>>, + hits: Arc, + mode: Mode, +) -> Result>, std::convert::Infallible> { + hits.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + let version = req.version(); + let path = req.uri().path().to_string(); + // Pull the headers we record before consuming the body (which moves `req`). + let (authorization, x_api_key, host) = { + let h = req.headers(); + let get = |k: &str| h.get(k).and_then(|v| v.to_str().ok()).map(String::from); + (get("authorization"), get("x-api-key"), get("host")) + }; + let body = req + .into_body() + .collect() + .await + .map(|b| b.to_bytes().to_vec()) + .unwrap_or_default(); + *cap.lock().unwrap() = Some(Captured { + path, + authorization, + x_api_key, + host, + body, + }); + let (ct, payload) = canned_body(mode); + let status = match mode { + Mode::Status(s) => s, + _ => 200, + }; + Ok(Response::builder() + .status(status) + .header("content-type", ct) + .header("x-mock-proto", proto_label(version)) + .body(Full::new(payload)) + .unwrap()) +} + +impl MockUpstream { + pub async fn start(mode: Mode) -> Self { + // Bind `:0` and read the port back, keeping the listener open the whole time — no + // free_port()→rebind window for another test to slip into (this is an in-process server). + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let port = listener.local_addr().unwrap().port(); + let captured: Arc>> = Arc::new(Mutex::new(None)); + let hits = Arc::new(std::sync::atomic::AtomicUsize::new(0)); + let cap = captured.clone(); + let hit_counter = hits.clone(); + let task = tokio::spawn(async move { + loop { + let Ok((stream, _)) = listener.accept().await else { + break; + }; + let io = TokioIo::new(stream); + let cap = cap.clone(); + let hit_counter = hit_counter.clone(); + tokio::spawn(async move { + let svc = service_fn(move |req| { + mock_handle(req, cap.clone(), hit_counter.clone(), mode) + }); + let _ = hyper::server::conn::http1::Builder::new() + .serve_connection(io, svc) + .await; + }); + } + }); + MockUpstream { + port, + captured, + hits, + task, + } + } + + /// Like [`start`], but terminates **TLS** and serves H1 *and* H2 on the one listener (protocol + /// chosen by ALPN, via hyper-util's auto builder). Presents a throwaway self-signed cert, so the + /// gateway must be pointed at it with `upstream_tls = true` and `upstream_verify_cert = false`. + /// This is what lets the concurrency bench drive the gateway's real TLS+ALPN+H2 path against a + /// local mock. Returns the mock; reach it at `authority()` (host `127.0.0.1`). + pub async fn start_tls(mode: Mode) -> Self { + // rustls 0.23 needs a process crypto provider; both ring and aws-lc are compiled in (so there's + // no default), pick ring to match the gateway. Idempotent across multiple mocks in one process. + let _ = rustls::crypto::ring::default_provider().install_default(); + + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let port = listener.local_addr().unwrap().port(); + + let ck = rcgen::generate_simple_self_signed(vec![ + "127.0.0.1".to_string(), + "localhost".to_string(), + ]) + .expect("self-signed cert"); + let certs = vec![ck.cert.der().clone()]; + let key = rustls::pki_types::PrivateKeyDer::Pkcs8(ck.key_pair.serialize_der().into()); + let mut tls = rustls::ServerConfig::builder() + .with_no_client_auth() + .with_single_cert(certs, key) + .expect("server tls config"); + // Offer both so the gateway's ALPN preference decides: H2H1 → h2, H1 → http/1.1. + tls.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()]; + let acceptor = TlsAcceptor::from(Arc::new(tls)); + + let captured: Arc>> = Arc::new(Mutex::new(None)); + let hits = Arc::new(std::sync::atomic::AtomicUsize::new(0)); + let cap = captured.clone(); + let hit_counter = hits.clone(); + let task = tokio::spawn(async move { + loop { + let Ok((stream, _)) = listener.accept().await else { + break; + }; + let acceptor = acceptor.clone(); + let cap = cap.clone(); + let hit_counter = hit_counter.clone(); + tokio::spawn(async move { + let Ok(tls_stream) = acceptor.accept(stream).await else { + return; + }; + let io = TokioIo::new(tls_stream); + let svc = service_fn(move |req| { + mock_handle(req, cap.clone(), hit_counter.clone(), mode) + }); + // Auto builder: serves H2 or H1 per the negotiated ALPN. + let _ = auto::Builder::new(TokioExecutor::new()) + .serve_connection(io, svc) + .await; + }); + } + }); + MockUpstream { + port, + captured, + hits, + task, + } + } + + pub fn authority(&self) -> String { + format!("127.0.0.1:{}", self.port) + } + + pub fn captured(&self) -> Option { + self.captured.lock().unwrap().clone() + } + + /// Total requests the mock has received — used to prove an open circuit breaker stops requests + /// from reaching the upstream at all. + pub fn hits(&self) -> usize { + self.hits.load(std::sync::atomic::Ordering::Relaxed) + } +} + +impl Drop for MockUpstream { + fn drop(&mut self) { + self.task.abort(); + } +} + +// --- the real beyond-ai binary ---------------------------------------------- + +pub struct Gateway { + child: Child, + pub port: u16, + pub metrics_port: u16, + config_path: std::path::PathBuf, +} + +/// The managed pool key configured for a provider. Each provider gets a distinct value so a test +/// can assert the gateway swapped in the *right* one. +fn pool_key(provider: &str) -> &'static str { + match provider { + "openai" => "sk-pool-secret", + "anthropic" => "sk-anthropic-pool", + "fireworks" => "sk-fireworks-pool", + "openrouter" => "sk-openrouter-pool", + _ => "sk-unknown-pool", + } +} + +/// Builds a gateway config, choosing which providers are *configured* (authority → the mock + a +/// pool key). A managed request to a provider absent from this list has no pool key → 503. +pub struct GatewayBuilder { + nats_port: u16, + authority: String, + signkey_b64: String, + providers: Vec<&'static str>, + snapshot_path: Option, + real_upstreams: bool, + pool_key_overrides: Vec<(String, String)>, + rate_limit_rps: Option, + byo_rate_limit_rps: Option, + /// Point at a TLS mock (`MockUpstream::start_tls`): `upstream_tls = true` + skip cert verification + /// (the mock is self-signed), while still routing via `provider_authorities`. For the H2 bench. + tls_upstream: bool, + /// Override the gateway's `upstream_http2` (H2H1 vs H1 ALPN). `None` ⇒ leave the gateway default. + upstream_http2: Option, + /// Override the per-provider circuit-breaker threshold (failures in the window before opening). + /// `None` ⇒ leave the gateway default; `Some(0)` disables the breaker. + circuit_breaker_threshold: Option, +} + +impl GatewayBuilder { + /// Set which providers are configured. Defaults to `["openai", "fireworks"]`. + pub fn providers(mut self, providers: &[&'static str]) -> Self { + self.providers = providers.to_vec(); + self + } + + /// Point the gateway at the **real** provider hosts over TLS (the `route::KNOWN_PROVIDERS` + /// defaults), instead of the plaintext mock. Used by the live smoke tests (`tests/smoke.rs`): + /// no authority overrides, no pool keys, no signing keys — smoke traffic is BYO (the caller's + /// real provider token, passed through), so none of that is needed. + pub fn real_upstreams(mut self) -> Self { + self.real_upstreams = true; + self + } + + /// Set the managed pool key for a provider by name — in `real_upstreams` mode this is the *real* + /// provider key the gateway swaps in for a managed (`bai_…`) request. Combine with a signing key + /// (the `signkey_b64` passed to `builder`) to smoke-test the full managed path against the real + /// provider. + pub fn pool_key(mut self, provider: &str, key: &str) -> Self { + self.pool_key_overrides + .push((provider.to_string(), key.to_string())); + self + } + + /// Point the gateway at an on-disk deny-set snapshot. Pass the same path to two `start()` calls + /// to model a restart that reloads from disk. + pub fn snapshot_path(mut self, path: impl Into) -> Self { + self.snapshot_path = Some(path.into()); + self + } + + /// Override the per-credential request-rate ceiling (requests/sec). The harness default leaves + /// the gateway's own generous default (100) in place; set a small value to exercise the 429 path. + pub fn rate_limit_rps(mut self, rps: u32) -> Self { + self.rate_limit_rps = Some(rps); + self + } + + /// Override the aggregate BYO request-rate ceiling (requests/sec). `0` disables that tier so a + /// per-credential 429 test isn't perturbed by the shared BYO bucket. + pub fn byo_rate_limit_rps(mut self, rps: u32) -> Self { + self.byo_rate_limit_rps = Some(rps); + self + } + + /// Talk to the upstream over TLS without verifying its cert — for a `MockUpstream::start_tls` + /// target (self-signed). The gateway still routes via `provider_authorities` (the mock), but with + /// real TLS + ALPN, so the H2 path is exercised. Used by the concurrency bench. + pub fn tls_upstream(mut self) -> Self { + self.tls_upstream = true; + self + } + + /// Force the gateway's upstream ALPN: `true` ⇒ H2H1 (prefer H2), `false` ⇒ H1 only. The bench + /// starts one gateway each way against the same TLS mock to compare them. + pub fn upstream_http2(mut self, on: bool) -> Self { + self.upstream_http2 = Some(on); + self + } + + /// Set the per-provider circuit-breaker failure threshold (a tight window/reset are written too, + /// so the breaker trips fast in-test). `0` disables it. + pub fn circuit_breaker_threshold(mut self, threshold: u32) -> Self { + self.circuit_breaker_threshold = Some(threshold); + self + } + + pub async fn start(self) -> Gateway { + let port = free_port(); + let metrics_port = free_port(); + let config_path = std::env::temp_dir().join(format!("beyond-ai-config-{port}.toml")); + let nats_port = self.nats_port; + // Scalars first, `[…]` tables last (TOML ordering). + let tls = self.real_upstreams || self.tls_upstream; + let mut cfg = format!( + "listen = \"127.0.0.1:{port}\"\n\ + metrics_listen = \"127.0.0.1:{metrics_port}\"\n\ + nats_url = \"nats://127.0.0.1:{nats_port}\"\n\ + config_bucket = \"ai-gateway\"\n\ + upstream_tls = {tls}\n" + ); + // TLS mock is self-signed → don't verify its cert (production always verifies). + if self.tls_upstream { + cfg.push_str("upstream_verify_cert = false\n"); + } + if let Some(h2) = self.upstream_http2 { + cfg.push_str(&format!("upstream_http2 = {h2}\n")); + } + if let Some(path) = &self.snapshot_path { + cfg.push_str(&format!("snapshot_path = \"{path}\"\n")); + } + if let Some(rps) = self.rate_limit_rps { + cfg.push_str(&format!("rate_limit_rps = {rps}\n")); + } + if let Some(rps) = self.byo_rate_limit_rps { + cfg.push_str(&format!("byo_rate_limit_rps = {rps}\n")); + } + if let Some(threshold) = self.circuit_breaker_threshold { + // Tight window + reset so the test trips and recovers quickly. + cfg.push_str(&format!( + "circuit_breaker_threshold = {threshold}\n\ + circuit_breaker_window_secs = 60\n\ + circuit_breaker_reset_secs = 1\n" + )); + } + if self.real_upstreams { + // Real-host smoke mode: built-in provider defaults (no authority overrides). For a + // *managed* smoke we still write the caller-supplied pool key(s) — the real provider key + // the gateway swaps in — and the signing key the minted virtual key verifies against. + // With neither set, this is a BYO smoke (the caller's token passes through). + if !self.pool_key_overrides.is_empty() { + cfg.push_str("\n[pool_keys]\n"); + for (p, k) in &self.pool_key_overrides { + cfg.push_str(&format!("{p} = \"{k}\"\n")); + } + } + if !self.signkey_b64.is_empty() { + cfg.push_str(&format!("\n[signing_keys]\n1 = \"{}\"\n", self.signkey_b64)); + } + } else { + // Every configured provider points at the one mock upstream... + cfg.push_str("\n[provider_authorities]\n"); + for p in &self.providers { + cfg.push_str(&format!("{p} = \"{}\"\n", self.authority)); + } + // ...with a distinct pool key per provider so key-swap assertions can tell them apart. + cfg.push_str("\n[pool_keys]\n"); + for p in &self.providers { + cfg.push_str(&format!("{p} = \"{}\"\n", pool_key(p))); + } + cfg.push_str(&format!("\n[signing_keys]\n1 = \"{}\"\n", self.signkey_b64)); + } + std::fs::File::create(&config_path) + .unwrap() + .write_all(cfg.as_bytes()) + .unwrap(); + + let child = Command::new(env!("CARGO_BIN_EXE_beyond-ai")) + .arg("run") + .arg("-c") + .arg(&config_path) + .env( + "AI_LOG", + std::env::var("AI_LOG").unwrap_or_else(|_| "warn".into()), + ) + .spawn() + .expect("spawn beyond-ai"); + let gw = Gateway { + child, + port, + metrics_port, + config_path, + }; + wait_for_port(port, "beyond-ai").await; + gw + } +} + +impl Gateway { + /// Start the gateway pointed at `nats` (deny-set) + the mock upstream, configuring the OpenAI + /// and Fireworks providers. Signing key + pool key come from config (mirrors production: NATS + /// holds only the deny-set). For other provider sets use [`Gateway::builder`]. + pub async fn start(nats_port: u16, openai_authority: &str, signkey_b64: &str) -> Self { + Gateway::builder(nats_port, openai_authority, signkey_b64) + .start() + .await + } + + /// A configurable gateway (which providers exist, etc.). Defaults match [`Gateway::start`]. + pub fn builder(nats_port: u16, authority: &str, signkey_b64: &str) -> GatewayBuilder { + GatewayBuilder { + nats_port, + authority: authority.to_string(), + signkey_b64: signkey_b64.to_string(), + providers: vec!["openai", "fireworks"], + snapshot_path: None, + real_upstreams: false, + pool_key_overrides: Vec::new(), + rate_limit_rps: None, + byo_rate_limit_rps: None, + tls_upstream: false, + upstream_http2: None, + circuit_breaker_threshold: None, + } + } + + pub fn url(&self) -> String { + format!("http://127.0.0.1:{}", self.port) + } + + pub async fn metrics(&self) -> String { + reqwest::get(format!("http://127.0.0.1:{}/metrics", self.metrics_port)) + .await + .unwrap() + .text() + .await + .unwrap() + } + + /// GET a path on the admin/metrics listener, returning `(status, body)`. Used to probe + /// `/livez` and `/readyz` (which live on `metrics_port`, alongside `/metrics`). + pub async fn admin_get(&self, path: &str) -> (u16, String) { + let resp = reqwest::get(format!("http://127.0.0.1:{}{path}", self.metrics_port)) + .await + .unwrap(); + let status = resp.status().as_u16(); + (status, resp.text().await.unwrap()) + } +} + +impl Drop for Gateway { + fn drop(&mut self) { + let _ = self.child.kill(); + let _ = std::fs::remove_file(&self.config_path); + } +} + +// --- assertions ------------------------------------------------------------- + +pub fn parse_metric(metrics: &str, name: &str, label_value: &str) -> f64 { + metrics + .lines() + .find(|l| l.starts_with(name) && l.contains(label_value)) + .and_then(|l| l.rsplit(' ').next()) + .and_then(|v| v.parse().ok()) + .unwrap_or(0.0) +} + +pub async fn wait_for_metric(gw: &Gateway, name: &str, label: &str, min: f64) { + let r = timeout(Duration::from_secs(5), async { + loop { + if parse_metric(&gw.metrics().await, name, label) >= min { + return; + } + sleep(Duration::from_millis(150)).await; + } + }) + .await; + assert!(r.is_ok(), "metric {name}{{{label}}} never reached {min}"); +} + +pub async fn wait_for_status(want: u16, mut f: F) +where + F: FnMut() -> Fut, + Fut: std::future::Future, +{ + let r = timeout(Duration::from_secs(10), async { + loop { + if f().await == want { + return; + } + sleep(Duration::from_millis(150)).await; + } + }) + .await; + assert!(r.is_ok(), "status never became {want}"); +} diff --git a/tests/e2e.rs b/tests/e2e.rs new file mode 100644 index 0000000..a12207d --- /dev/null +++ b/tests/e2e.rs @@ -0,0 +1,929 @@ +//! End-to-end: real `beyond-ai` binary + real nats-server + mock upstream. +//! Run via `mise run test:integration:rs` (needs `nats-server` on PATH). +//! +//! Signing key + pool key come from the gateway's *config*; NATS carries only the deny-set. + +// Test target: `.unwrap()`/`.expect()`/`panic!` are assertions, not production code — allow the +// panic-surface restriction lints denied workspace-wide in `[workspace.lints.clippy]`. +#![allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] + +mod common; + +use beyond_ai::key::{VirtualKey, mint}; +use common::*; + +fn body_for(model: &str) -> String { + format!(r#"{{"model":"{model}","messages":[{{"role":"user","content":"hi"}}]}}"#) +} + +async fn post_status(client: &reqwest::Client, url: &str, key: &str, body: String) -> u16 { + client + .post(format!("{url}/v1/chat/completions")) + .header("authorization", format!("Bearer {key}")) + .header("content-type", "application/json") + .body(body) + .send() + .await + .map(|r| r.status().as_u16()) + .unwrap_or(0) +} + +/// POST to an arbitrary gateway path with a Bearer key — exercises provider routing by the first +/// path segment (`/{provider}/…`) vs the bare-path default. +async fn post_path_status( + client: &reqwest::Client, + url: &str, + path: &str, + key: &str, + body: String, +) -> u16 { + client + .post(format!("{url}{path}")) + .header("authorization", format!("Bearer {key}")) + .header("content-type", "application/json") + .body(body) + .send() + .await + .map(|r| r.status().as_u16()) + .unwrap_or(0) +} + +/// POST with the virtual key in the `x-api-key` header (Anthropic-SDK style) instead of `Bearer`. +async fn post_status_xapikey( + client: &reqwest::Client, + url: &str, + path: &str, + key: &str, + body: String, +) -> u16 { + client + .post(format!("{url}{path}")) + .header("x-api-key", key) + .header("content-type", "application/json") + .body(body) + .send() + .await + .map(|r| r.status().as_u16()) + .unwrap_or(0) +} + +/// Send a hand-written HTTP/1.1 request and return the response status. Used to declare a +/// Content-Length the body guard must reject *without* actually transferring that many bytes +/// (the guard fires on the header, before any body is read). +async fn raw_status(port: u16, request: &str) -> u16 { + use tokio::io::{AsyncReadExt, AsyncWriteExt}; + let mut s = tokio::net::TcpStream::connect(("127.0.0.1", port)) + .await + .unwrap(); + s.write_all(request.as_bytes()).await.unwrap(); + s.flush().await.unwrap(); + let mut buf = vec![0u8; 256]; + let n = s.read(&mut buf).await.unwrap(); + String::from_utf8_lossy(&buf[..n]) + .split_whitespace() + .nth(1) + .and_then(|c| c.parse().ok()) + .unwrap_or(0) +} + +#[tokio::test] +async fn managed_swaps_key_relays_body_and_meters_usage() { + let nats = Nats::start().await; + let (pubkey, sk) = test_keypair(1); + let mock = MockUpstream::start(Mode::Json).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + + let vkey = mint( + &VirtualKey { + tenant_id: 42, + vpc_id: 7, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + + { + let (c, u, k) = (client.clone(), gw.url(), vkey.clone()); + wait_for_status(200, move || { + let (c, u, k) = (c.clone(), u.clone(), k.clone()); + async move { post_status(&c, &u, &k, body_for("gpt-4o")).await } + }) + .await; + } + + let resp = client + .post(format!("{}/v1/chat/completions", gw.url())) + .header("authorization", format!("Bearer {vkey}")) + .header("content-type", "application/json") + .body(body_for("gpt-4o")) + .send() + .await + .unwrap(); + assert_eq!(resp.status(), 200); + assert!(resp.text().await.unwrap().contains("\"chatcmpl-mock\"")); + + // Managed: the mock saw the real pool key, never the virtual key. + let cap = mock.captured().expect("mock received a request"); + assert_eq!(cap.path, "/v1/chat/completions"); + assert_eq!(cap.authorization.as_deref(), Some("Bearer sk-pool-secret")); + assert!(!cap.body.is_empty()); + + wait_for_metric(&gw, "ai_tokens_total", "input", 11.0).await; + + // Bad managed key → 401. + assert_eq!( + post_status( + &client, + &gw.url(), + "bai_v1.1.bogus.bogus", + body_for("gpt-4o") + ) + .await, + 401 + ); +} + +#[tokio::test] +async fn byo_passes_user_token_through_unchanged() { + let nats = Nats::start().await; + let (pubkey, _sk) = test_keypair(1); // gateway still needs a signing key in config to boot + let mock = MockUpstream::start(Mode::Json).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + + let client = reqwest::Client::new(); + // A raw provider token (not `bai_`) → BYO → forwarded verbatim. + { + let (c, u) = (client.clone(), gw.url()); + wait_for_status(200, move || { + let (c, u) = (c.clone(), u.clone()); + async move { post_status(&c, &u, "sk-user-byo", body_for("gpt-4o")).await } + }) + .await; + } + let cap = mock.captured().expect("mock received a request"); + assert_eq!( + cap.authorization.as_deref(), + Some("Bearer sk-user-byo"), + "BYO token must pass through unchanged (no swap)" + ); +} + +#[tokio::test] +async fn fireworks_path_prefix_strips_and_swaps_pool_key() { + let nats = Nats::start().await; + let (pubkey, sk) = test_keypair(4); + let mock = MockUpstream::start(Mode::Json).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + + let vkey = mint( + &VirtualKey { + tenant_id: 5, + vpc_id: 6, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + // Fireworks is selected by the `/fireworks` path segment; the client uses its native base path + // (`/inference/v1`). The gateway strips `/fireworks` and forwards the rest VERBATIM, and a + // managed key swaps to the Fireworks-specific pool key. + { + let (c, u, k) = (client.clone(), gw.url(), vkey.clone()); + wait_for_status(200, move || { + let (c, u, k) = (c.clone(), u.clone(), k.clone()); + async move { + post_path_status( + &c, + &u, + "/fireworks/inference/v1/chat/completions", + &k, + body_for("accounts/fireworks/models/llama-v3p1-70b-instruct"), + ) + .await + } + }) + .await; + } + + let cap = mock.captured().expect("mock received a request"); + assert_eq!( + cap.authorization.as_deref(), + Some("Bearer sk-fireworks-pool"), + "managed Fireworks request must swap to the Fireworks pool key" + ); + // The `/fireworks` segment is stripped; the provider-native remainder is forwarded verbatim + // (the gateway does no per-provider path rewriting). + assert_eq!( + cap.path, "/inference/v1/chat/completions", + "first segment (provider) stripped; remainder forwarded verbatim" + ); +} + +#[tokio::test] +async fn openai_prefix_matches_bare_default() { + // `/openai/v1/chat/completions` (explicit prefix) must reach OpenAI identically to bare + // `/v1/chat/completions` (dialect default): same pool-key swap, same upstream path after strip. + let nats = Nats::start().await; + let (pubkey, sk) = test_keypair(8); + let mock = MockUpstream::start(Mode::Json).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + + let vkey = mint( + &VirtualKey { + tenant_id: 1, + vpc_id: 1, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + { + let (c, u, k) = (client.clone(), gw.url(), vkey.clone()); + wait_for_status(200, move || { + let (c, u, k) = (c.clone(), u.clone(), k.clone()); + async move { + post_path_status( + &c, + &u, + "/openai/v1/chat/completions", + &k, + body_for("gpt-4o"), + ) + .await + } + }) + .await; + } + let cap = mock.captured().expect("mock received a request"); + assert_eq!(cap.authorization.as_deref(), Some("Bearer sk-pool-secret")); + assert_eq!( + cap.path, "/v1/chat/completions", + "`/openai` stripped → same upstream path as the bare `/v1` default" + ); +} + +#[tokio::test] +async fn unknown_provider_segment_returns_404() { + // An unrecognized first path segment that isn't the bare `/v1` default is a routing miss — 404 + // from the gateway (before any auth), not a confusing upstream error. Provider resolution is the + // very first step, so this fires regardless of the key. + let nats = Nats::start().await; + let (pubkey, sk) = test_keypair(9); + let mock = MockUpstream::start(Mode::Json).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + + let vkey = mint( + &VirtualKey { + tenant_id: 1, + vpc_id: 1, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + let (c, u, k) = (client.clone(), gw.url(), vkey.clone()); + wait_for_status(404, move || { + let (c, u, k) = (c.clone(), u.clone(), k.clone()); + async move { + post_path_status(&c, &u, "/bogus/v1/chat/completions", &k, body_for("gpt-4o")).await + } + }) + .await; +} + +#[tokio::test] +async fn streaming_relays_sse_and_meters_usage() { + let nats = Nats::start().await; + let (pubkey, sk) = test_keypair(3); + let mock = MockUpstream::start(Mode::Sse).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + + let vkey = mint( + &VirtualKey { + tenant_id: 7, + vpc_id: 1, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + let body = r#"{"model":"gpt-4o","stream":true,"messages":[{"role":"user","content":"hi"}]}"# + .to_string(); + + { + let (c, u, k, b) = (client.clone(), gw.url(), vkey.clone(), body.clone()); + wait_for_status(200, move || { + let (c, u, k, b) = (c.clone(), u.clone(), k.clone(), b.clone()); + async move { post_status(&c, &u, &k, b).await } + }) + .await; + } + + let resp = client + .post(format!("{}/v1/chat/completions", gw.url())) + .header("authorization", format!("Bearer {vkey}")) + .header("content-type", "application/json") + .body(body) + .send() + .await + .unwrap(); + assert_eq!(resp.status(), 200); + assert!(resp.text().await.unwrap().contains("[DONE]")); + wait_for_metric(&gw, "ai_tokens_total", "input", 5.0).await; + + // The client streamed without `stream_options`, so the managed OpenAI path must have buffered the + // body and spliced `stream_options.include_usage` in before forwarding — otherwise OpenAI emits no + // usage chunk and the request is unbillable. The metric above can't prove this (the mock returns a + // usage chunk unconditionally), so assert the *forwarded body* the mock actually received carries + // the injected fragment. This is the only coverage that the splice in `request_body_filter` ran. + let cap = mock.captured().expect("mock received a request"); + let needle = br#""stream_options":{"include_usage":true}"#; + assert!( + cap.body.windows(needle.len()).any(|w| w == needle), + "managed OpenAI streaming body must have stream_options.include_usage injected; got: {}", + String::from_utf8_lossy(&cap.body) + ); +} + +#[tokio::test] +async fn blackhole_denies_then_restores() { + let nats = Nats::start().await; + let (pubkey, sk) = test_keypair(2); + let mock = MockUpstream::start(Mode::Json).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + + let vkey = mint( + &VirtualKey { + tenant_id: 99, + vpc_id: 1, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + + let probe = |want: u16| { + let (c, u, k) = (client.clone(), gw.url(), vkey.clone()); + async move { + wait_for_status(want, move || { + let (c, u, k) = (c.clone(), u.clone(), k.clone()); + async move { post_status(&c, &u, &k, body_for("gpt-4o")).await } + }) + .await + } + }; + + probe(200).await; // ready + allowed + put_kv(nats.port, "blackhole.99", b"spend").await; + probe(402).await; // denied once the watch delta lands + del_kv(nats.port, "blackhole.99").await; + probe(200).await; // restored +} + +#[tokio::test] +async fn blackhole_fraud_returns_403() { + // The spend path (402) is covered above; fraud takes the separate `DenyReason::Fraud` branch + // and must surface as 403 (not 402, not 200) end-to-end. + let nats = Nats::start().await; + let (pubkey, sk) = test_keypair(20); + let mock = MockUpstream::start(Mode::Json).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + + let vkey = mint( + &VirtualKey { + tenant_id: 1234, + vpc_id: 1, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + + let probe = |want: u16| { + let (c, u, k) = (client.clone(), gw.url(), vkey.clone()); + async move { + wait_for_status(want, move || { + let (c, u, k) = (c.clone(), u.clone(), k.clone()); + async move { post_status(&c, &u, &k, body_for("gpt-4o")).await } + }) + .await + } + }; + + probe(200).await; // ready + allowed + put_kv(nats.port, "blackhole.1234", b"fraud").await; + probe(403).await; // fraud → forbidden +} + +#[tokio::test] +async fn oversized_content_length_is_rejected_413() { + let nats = Nats::start().await; + let (pubkey, sk) = test_keypair(21); + let mock = MockUpstream::start(Mode::Json).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + let vkey = mint( + &VirtualKey { + tenant_id: 1, + vpc_id: 1, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + + // Wait until the gateway is serving. + { + let (c, u, k) = (client.clone(), gw.url(), vkey.clone()); + wait_for_status(200, move || { + let (c, u, k) = (c.clone(), u.clone(), k.clone()); + async move { post_status(&c, &u, &k, body_for("gpt-4o")).await } + }) + .await; + } + + // Declare a body of 200 MiB + 1 (> the 100 MiB guard) but send no body — the guard rejects on + // the Content-Length header in request_filter before any body is read. + let req = format!( + "POST /v1/chat/completions HTTP/1.1\r\n\ + Host: x\r\n\ + Authorization: Bearer {vkey}\r\n\ + Content-Type: application/json\r\n\ + Content-Length: 209715201\r\n\ + Connection: close\r\n\r\n" + ); + assert_eq!(raw_status(gw.port, &req).await, 413); +} + +#[tokio::test] +async fn per_credential_rate_limit_returns_429() { + // Every other rejection code is covered e2e (401/402/403/413/503) — 429 was the gap. A + // misconfigured ceiling (e.g. `rate_limit_rps` env typo'd to 0) would silently disable the + // guardrail, so prove the full enforcement path: a burst on one credential trips 429, charged on + // the raw key in `request_filter` before any verify/upstream connect. BYO (so no key material + // needed); the global BYO tier is disabled so this isolates the per-credential ceiling. + let nats = Nats::start().await; + let (pubkey, _sk) = test_keypair(40); + let mock = MockUpstream::start(Mode::Json).await; + let gw = Gateway::builder(nats.port, &mock.authority(), &b64(&pubkey)) + .rate_limit_rps(5) + .byo_rate_limit_rps(0) + .start() + .await; + let client = reqwest::Client::new(); + + // Wait until the gateway serves, using a *different* credential so the flood token's budget is + // untouched by readiness probing. + { + let (c, u) = (client.clone(), gw.url()); + wait_for_status(200, move || { + let (c, u) = (c.clone(), u.clone()); + async move { post_status(&c, &u, "sk-byo-warmup", body_for("gpt-4o")).await } + }) + .await; + } + + // Burst one credential well past its 5 rps ceiling within a single window. The first few are + // served (200); once the ceiling is crossed the rest are throttled (429). + let mut saw_200 = false; + let mut saw_429 = false; + for _ in 0..50 { + match post_status(&client, &gw.url(), "sk-byo-flood", body_for("gpt-4o")).await { + 200 => saw_200 = true, + 429 => saw_429 = true, + other => panic!("unexpected status under rate limit: {other}"), + } + } + assert!( + saw_200, + "the first requests under the ceiling must be served" + ); + assert!( + saw_429, + "a burst past the per-credential ceiling must yield 429" + ); + wait_for_metric(&gw, "ai_rejections_total", "rate_limit", 1.0).await; +} + +#[tokio::test] +async fn managed_key_via_x_api_key_header_is_accepted() { + // Anthropic SDKs present the key in `x-api-key`, not `Authorization: Bearer`. A managed virtual + // key must be extracted from either header; here it arrives via x-api-key on the OpenAI path and + // must still swap to the OpenAI pool key in the Bearer scheme the upstream wants. + let nats = Nats::start().await; + let (pubkey, sk) = test_keypair(22); + let mock = MockUpstream::start(Mode::Json).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + let vkey = mint( + &VirtualKey { + tenant_id: 8, + vpc_id: 8, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + { + let (c, u, k) = (client.clone(), gw.url(), vkey.clone()); + wait_for_status(200, move || { + let (c, u, k) = (c.clone(), u.clone(), k.clone()); + async move { + post_status_xapikey(&c, &u, "/v1/chat/completions", &k, body_for("gpt-4o")).await + } + }) + .await; + } + let cap = mock.captured().expect("mock received a request"); + assert_eq!(cap.authorization.as_deref(), Some("Bearer sk-pool-secret")); +} + +#[tokio::test] +async fn managed_key_for_unconfigured_provider_returns_503() { + // The default gateway configures OpenAI + Fireworks pool keys, but NOT Anthropic. A managed key + // routed to Anthropic (via the `/anthropic` path segment) has no pool key → 503, surfaced in + // request_filter before any upstream connect. + let nats = Nats::start().await; + let (pubkey, sk) = test_keypair(23); + let mock = MockUpstream::start(Mode::Json).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + let vkey = mint( + &VirtualKey { + tenant_id: 11, + vpc_id: 1, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + let (c, u, k) = (client.clone(), gw.url(), vkey.clone()); + wait_for_status(503, move || { + let (c, u, k) = (c.clone(), u.clone(), k.clone()); + async move { + post_path_status( + &c, + &u, + "/anthropic/v1/messages", + &k, + body_for("claude-opus-4-8"), + ) + .await + } + }) + .await; +} + +#[tokio::test] +async fn anthropic_dialect_swaps_key_relays_and_meters() { + // The Anthropic path (`/v1/messages`) drives a different dialect, a different auth scheme + // (x-api-key, not Bearer), and a different usage parser than the OpenAI tests above. + let nats = Nats::start().await; + let (pubkey, sk) = test_keypair(24); + let mock = MockUpstream::start(Mode::AnthropicJson).await; + let gw = Gateway::builder(nats.port, &mock.authority(), &b64(&pubkey)) + .providers(&["anthropic"]) + .start() + .await; + let vkey = mint( + &VirtualKey { + tenant_id: 77, + vpc_id: 2, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + { + let (c, u, k) = (client.clone(), gw.url(), vkey.clone()); + wait_for_status(200, move || { + let (c, u, k) = (c.clone(), u.clone(), k.clone()); + async move { + post_status_xapikey(&c, &u, "/v1/messages", &k, body_for("claude-opus-4-8")).await + } + }) + .await; + } + + let resp = client + .post(format!("{}/v1/messages", gw.url())) + .header("x-api-key", &vkey) + .header("content-type", "application/json") + .body(body_for("claude-opus-4-8")) + .send() + .await + .unwrap(); + assert_eq!(resp.status(), 200); + + let cap = mock.captured().expect("mock received a request"); + assert_eq!(cap.path, "/v1/messages"); + // Anthropic wants the key in x-api-key, and the inbound virtual key must not leak upstream. + assert_eq!(cap.x_api_key.as_deref(), Some("sk-anthropic-pool")); + assert_eq!(cap.authorization, None); + + wait_for_metric(&gw, "ai_tokens_total", "input", 13.0).await; +} + +#[tokio::test] +async fn missing_api_key_returns_401() { + // A request with neither Authorization nor x-api-key takes the "missing API key" branch — a + // different path than the malformed-key (invalid) branch the managed test exercises. + let nats = Nats::start().await; + let (pubkey, _sk) = test_keypair(25); + let mock = MockUpstream::start(Mode::Json).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + let client = reqwest::Client::new(); + + let (c, u) = (client.clone(), gw.url()); + wait_for_status(401, move || { + let (c, u) = (c.clone(), u.clone()); + async move { + c.post(format!("{u}/v1/chat/completions")) + .header("content-type", "application/json") + .body(body_for("gpt-4o")) + .send() + .await + .map(|r| r.status().as_u16()) + .unwrap_or(0) + } + }) + .await; +} + +#[tokio::test] +async fn deny_set_is_fail_open_when_nats_drops() { + // After NATS goes away the last-known deny-set must be *retained* (fail-open), and auth/keys — + // which come from config, not NATS — must keep working. + let mut nats = Nats::start().await; + let (pubkey, sk) = test_keypair(26); + let mock = MockUpstream::start(Mode::Json).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + + let denied = mint( + &VirtualKey { + tenant_id: 555, + vpc_id: 1, + }, + 1, + &sk, + ); + let allowed = mint( + &VirtualKey { + tenant_id: 556, + vpc_id: 1, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + + let probe = |key: String, want: u16| { + let (c, u) = (client.clone(), gw.url()); + async move { + wait_for_status(want, move || { + let (c, u, k) = (c.clone(), u.clone(), key.clone()); + async move { post_status(&c, &u, &k, body_for("gpt-4o")).await } + }) + .await + } + }; + + probe(denied.clone(), 200).await; // ready + allowed + put_kv(nats.port, "blackhole.555", b"spend").await; + probe(denied.clone(), 402).await; // deny delta landed + + nats.stop(); // NATS disappears + + probe(denied.clone(), 402).await; // stale deny retained, not cleared + probe(allowed.clone(), 200).await; // un-denied tenant still served without NATS +} + +#[tokio::test] +async fn streaming_tail_compaction_preserves_usage_event() { + // The usage chunk trails 130 KiB of content, forcing the proxy's response-tail compaction + // (resp_tail grows past 2× the 64 KiB cap). The usage event must survive in the retained tail. + let nats = Nats::start().await; + let (pubkey, sk) = test_keypair(27); + let mock = MockUpstream::start(Mode::SseLarge).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + let vkey = mint( + &VirtualKey { + tenant_id: 21, + vpc_id: 1, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + let body = r#"{"model":"gpt-4o","stream":true,"messages":[{"role":"user","content":"hi"}]}"# + .to_string(); + + { + let (c, u, k, b) = (client.clone(), gw.url(), vkey.clone(), body.clone()); + wait_for_status(200, move || { + let (c, u, k, b) = (c.clone(), u.clone(), k.clone(), b.clone()); + async move { post_status(&c, &u, &k, b).await } + }) + .await; + } + + let resp = client + .post(format!("{}/v1/chat/completions", gw.url())) + .header("authorization", format!("Bearer {vkey}")) + .header("content-type", "application/json") + .body(body) + .send() + .await + .unwrap(); + assert_eq!(resp.status(), 200); + let _ = resp.bytes().await.unwrap(); // drain the >128 KiB stream + + wait_for_metric(&gw, "ai_tokens_total", "input", 5.0).await; +} + +#[tokio::test] +async fn on_disk_snapshot_enforces_across_restart_without_nats() { + // With a configured snapshot path, the deny-set is persisted to disk as deltas arrive. A restart + // must seed from that file and enforce immediately — even with NATS unreachable — proving the + // gateway reads the snapshot rather than re-scanning NATS on every boot. + let mut nats = Nats::start().await; + let (pubkey, sk) = test_keypair(28); + let mock = MockUpstream::start(Mode::Json).await; + let snap = std::env::temp_dir().join(format!("beyond-ai-snap-{}.log", nats.port)); + let _ = std::fs::remove_file(&snap); + let snap_str = snap.to_str().unwrap().to_string(); + + let denied = mint( + &VirtualKey { + tenant_id: 8800, + vpc_id: 1, + }, + 1, + &sk, + ); + let allowed = mint( + &VirtualKey { + tenant_id: 8801, + vpc_id: 1, + }, + 1, + &sk, + ); + let client = reqwest::Client::new(); + + let probe = |gw_url: String, key: String, want: u16| { + let c = client.clone(); + async move { + wait_for_status(want, move || { + let (c, u, k) = (c.clone(), gw_url.clone(), key.clone()); + async move { post_status(&c, &u, &k, body_for("gpt-4o")).await } + }) + .await + } + }; + + // --- First run: blackhole the tenant; the delta is persisted to the snapshot. --- + { + let gw = Gateway::builder(nats.port, &mock.authority(), &b64(&pubkey)) + .snapshot_path(&snap_str) + .start() + .await; + probe(gw.url(), denied.clone(), 200).await; // ready + allowed + put_kv(nats.port, "blackhole.8800", b"fraud").await; + probe(gw.url(), denied.clone(), 403).await; // applied in-memory AND appended to the snapshot + // Let the watcher's apply→persist step flush the checkpoint to disk before we kill it. + tokio::time::sleep(std::time::Duration::from_millis(400)).await; + // gw drops here → process killed. + } + + // NATS goes away entirely: a restart has nothing to scan and must rely on the snapshot. + nats.stop(); + + // --- Restart against the same snapshot path, NATS down. --- + let gw2 = Gateway::builder(nats.port, &mock.authority(), &b64(&pubkey)) + .snapshot_path(&snap_str) + .start() + .await; + // Seeded from disk: the fraud hold is enforced even though NATS is unreachable. + probe(gw2.url(), denied, 403).await; + // And an un-denied tenant is still served (auth/keys are from config, not NATS). + probe(gw2.url(), allowed, 200).await; + + let _ = std::fs::remove_file(&snap); +} + +#[tokio::test] +async fn health_endpoints_report_ready_on_the_metrics_listener() { + // /livez and /readyz live on the metrics listener (alongside /metrics) and must both 200 with a + // `{status:"ok"}` body once the process is up. Readiness is intentionally *not* gated on NATS — + // the gateway is fail-open, so it can serve from config alone. We stop NATS before probing to + // prove readiness doesn't depend on it: a NATS-less gateway is still ready. + let mut nats = Nats::start().await; + let (pubkey, _sk) = test_keypair(30); + let mock = MockUpstream::start(Mode::Json).await; + let gw = Gateway::start(nats.port, &mock.authority(), &b64(&pubkey)).await; + nats.stop(); + + let (live_status, live_body) = gw.admin_get("/livez").await; + assert_eq!( + live_status, 200, + "livez should be 200 once the process answers" + ); + assert!( + live_body.contains("\"status\":\"ok\""), + "livez body: {live_body}" + ); + + let (ready_status, ready_body) = gw.admin_get("/readyz").await; + assert_eq!( + ready_status, 200, + "readyz should be 200 even with NATS down (fail-open): {ready_body}" + ); + assert!( + ready_body.contains("\"status\":\"ok\""), + "readyz body: {ready_body}" + ); + + // An unknown admin path is a clean 404, not a hang or a 200. + let (nf_status, _) = gw.admin_get("/nope").await; + assert_eq!(nf_status, 404); +} + +#[tokio::test] +async fn circuit_breaker_opens_on_5xx_and_sheds() { + // A provider returning 5xx is *broken*: after `threshold` failures the per-provider breaker + // opens and the gateway fast-fails with 503 — without connecting upstream — instead of piling + // requests against `read_timeout_secs`. BYO traffic (no minting needed); the breaker gates all + // traffic to the provider. + let nats = Nats::start().await; + let (pubkey, _sk) = test_keypair(1); + let mock = MockUpstream::start(Mode::Status(500)).await; + let gw = Gateway::builder(nats.port, &mock.authority(), &b64(&pubkey)) + .circuit_breaker_threshold(3) + .start() + .await; + let client = reqwest::Client::new(); + + // While closed the gateway relays the mock's 500; once the breaker trips it returns its own 503. + // Poll until we observe the trip (each failure is recorded in `logging`, which lags the response + // slightly — polling absorbs that). + { + let (c, u) = (client.clone(), gw.url()); + wait_for_status(503, move || { + let (c, u) = (c.clone(), u.clone()); + async move { post_status(&c, &u, "sk-byo-test", body_for("gpt-4o")).await } + }) + .await; + } + + // The trip is visible as circuit_open rejections — the breaker shed requests before the upstream. + assert!( + parse_metric(&gw.metrics().await, "ai_rejections_total", "circuit_open") >= 1.0, + "expected ai_rejections_total{{reason=\"circuit_open\"}} >= 1 after the breaker tripped" + ); +} + +#[tokio::test] +async fn circuit_breaker_does_not_trip_on_429() { + // A 429 is a *healthy* provider throttling our pool key — the rate limiter and the client's + // Retry-After own that, NOT the breaker. So a 429 storm must never open the circuit: every + // request is relayed (429) and reaches the upstream; none is shed (503). + let nats = Nats::start().await; + let (pubkey, _sk) = test_keypair(1); + let mock = MockUpstream::start(Mode::Status(429)).await; + let gw = Gateway::builder(nats.port, &mock.authority(), &b64(&pubkey)) + .circuit_breaker_threshold(3) + // Don't let the BYO rate limiter shed these — we want every request to reach the upstream. + .byo_rate_limit_rps(0) + .start() + .await; + let client = reqwest::Client::new(); + + // Warm up until the gateway is serving and relaying the mock's 429 (the readiness pattern the + // other e2e tests use — avoids racing the first request against gateway startup under load). + { + let (c, u) = (client.clone(), gw.url()); + wait_for_status(429, move || { + let (c, u) = (c.clone(), u.clone()); + async move { post_status(&c, &u, "sk-byo-test", body_for("gpt-4o")).await } + }) + .await; + } + + // Well past the failure threshold (3): all relayed as 429, never the breaker's 503. + for _ in 0..10 { + assert_eq!( + post_status(&client, &gw.url(), "sk-byo-test", body_for("gpt-4o")).await, + 429 + ); + } + + assert_eq!( + parse_metric(&gw.metrics().await, "ai_rejections_total", "circuit_open"), + 0.0, + "a 429 storm must not open the circuit breaker" + ); + assert!( + mock.hits() >= 10, + "every request should have reached the upstream (got {} hits)", + mock.hits() + ); +} diff --git a/tests/smoke.rs b/tests/smoke.rs new file mode 100644 index 0000000..622e56a --- /dev/null +++ b/tests/smoke.rs @@ -0,0 +1,251 @@ +//! Live smoke tests against **real** providers — the proof docs and the mock can't give: +//! a real TLS/SNI handshake to the provider host, the base-path rewrite landing on a real mount +//! (200, not 404), the **managed** path (verify → deny-check → pool-key swap), and a real +//! (non-canned) response body. +//! +//! These exercise the **production** path, not BYO: the test generates an Ed25519 keypair, configures +//! the *real* provider key (from the env var) as the gateway's pool key, mints a `bai_…` virtual key, +//! and sends that. So the gateway verifies the virtual key, runs the deny-set check, and swaps in the +//! real provider key before forwarding — the same flow a real managed tenant takes. The real key only +//! ever lives in the gateway's config; the client presents the minted virtual key. +//! +//! Two safety layers so this never runs — or bills — by accident: +//! 1. Every test is `#[ignore]`, so a plain `cargo test` skips the whole file. +//! 2. When explicitly run, each test still **skips** (early-returns) unless its provider's API +//! key env var is set — so you only ever hit the providers you have keys for. +//! +//! Run them: +//! ANTHROPIC_API_KEY=sk-ant-… mise run test:smoke +//! # or directly: +//! ANTHROPIC_API_KEY=sk-ant-… cargo test -p beyond-ai --test smoke -- --ignored --nocapture +//! +//! Model ids are the cheapest small model per provider as of 2026-05; adjust if a provider retires +//! one (a model-not-found is a stale id here, not a gateway bug). + +// Test target: `.unwrap()`/`.expect()`/`panic!` are assertions, not production code. See e2e.rs. +#![allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] + +mod common; + +use beyond_ai::key::{VirtualKey, mint}; +use common::*; + +/// The provider's API key from the environment, or `None` (→ the test logs a skip and returns). +fn env_key(var: &str) -> Option { + std::env::var(var).ok().filter(|v| !v.trim().is_empty()) +} + +/// A gateway wired to the **real** provider hosts over TLS, with `provider`'s pool key set to the +/// caller's real key and a signing key installed — so a minted virtual key for `provider` verifies +/// and swaps to the real key. Returns the gateway plus the minted `bai_…` key to present as a client. +/// (Its own nats-server backs the deny-set, empty here — this tenant isn't denied.) +async fn managed_gateway(nats: &Nats, provider: &str, real_key: &str) -> (Gateway, String) { + let (pubkey, sk) = test_keypair(7); + let gw = Gateway::builder(nats.port, "unused", &b64(&pubkey)) + .real_upstreams() + .pool_key(provider, real_key) + .start() + .await; + let vkey = mint( + &VirtualKey { + tenant_id: 1, + vpc_id: 1, + }, + 1, + &sk, + ); + (gw, vkey) +} + +/// Drive one OpenAI-wire provider through the gateway as a managed request. The provider is selected +/// by the first path segment; `chat_path` is the full gateway path — `/{provider}/{native-base}/ +/// chat/completions` (the provider's own base path after the selector, forwarded verbatim). +async fn smoke_openai_wire(provider: &str, key_env: &str, model: &str, chat_path: &str) { + let Some(key) = env_key(key_env) else { + eprintln!("smoke[{provider}]: {key_env} unset — skipping"); + return; + }; + let nats = Nats::start().await; + let (gw, vkey) = managed_gateway(&nats, provider, &key).await; + let client = reqwest::Client::new(); + + let body = format!( + r#"{{"model":"{model}","max_tokens":16,"messages":[{{"role":"user","content":"Reply with the single word: ping"}}]}}"# + ); + let resp = client + .post(format!("{}{chat_path}", gw.url())) + .header("authorization", format!("Bearer {vkey}")) + .header("content-type", "application/json") + .body(body) + .send() + .await + .expect("request to gateway"); + let status = resp.status(); + let text = resp.text().await.unwrap_or_default(); + assert!( + status.is_success(), + "smoke[{provider}] model={model} path={chat_path}: expected 2xx, got {status}.\n\ + 404 ⇒ wrong native path / provider segment; 401 ⇒ pool-key swap/verify; 403 ⇒ deny-set; \ + a model error ⇒ stale model id. body: {text}" + ); + assert!( + text.contains("\"choices\""), + "smoke[{provider}]: {status} but no `choices` in body: {text}" + ); + eprintln!("smoke[{provider}]: OK ({status}) — verified, swapped, real 2xx"); +} + +#[tokio::test] +#[ignore = "live provider smoke; run via `mise run test:smoke` with API keys set"] +async fn smoke_anthropic() { + let Some(key) = env_key("ANTHROPIC_API_KEY") else { + eprintln!("smoke[anthropic]: ANTHROPIC_API_KEY unset — skipping"); + return; + }; + let nats = Nats::start().await; + let (gw, vkey) = managed_gateway(&nats, "anthropic", &key).await; + let client = reqwest::Client::new(); + + // `/anthropic/v1/messages` → provider `anthropic` (selected by the path segment, stripped to + // `/v1/messages` upstream). The minted virtual key is presented in `x-api-key` (the Anthropic + // SDK's header); the gateway verifies it and swaps in the real key — again in `x-api-key` (not + // Bearer). The required `anthropic-version` header passes through. This is the *only* test + // covering the x-api-key auth scheme + a real TLS handshake to api.anthropic.com via the full + // managed path. + let body = r#"{"model":"claude-haiku-4-5","max_tokens":16,"messages":[{"role":"user","content":"Reply with the single word: ping"}]}"#; + let resp = client + .post(format!("{}/anthropic/v1/messages", gw.url())) + .header("x-api-key", &vkey) + .header("anthropic-version", "2023-06-01") + .header("content-type", "application/json") + .body(body) + .send() + .await + .expect("request to gateway"); + let status = resp.status(); + let text = resp.text().await.unwrap_or_default(); + assert!( + status.is_success(), + "smoke[anthropic]: expected 2xx, got {status}. body: {text}" + ); + assert!( + text.contains("\"content\""), + "smoke[anthropic]: {status} but no `content` in body: {text}" + ); + eprintln!("smoke[anthropic]: OK ({status}) — verified, swapped to x-api-key, real 2xx"); +} + +// --- OpenAI-wire providers. Same code path; testing more than one confirms each host/base-path/auth +// row in `route::KNOWN_PROVIDERS` against the real endpoint. --- + +#[tokio::test] +#[ignore = "live provider smoke; run via `mise run test:smoke` with API keys set"] +async fn smoke_openai() { + smoke_openai_wire( + "openai", + "OPENAI_API_KEY", + "gpt-4o-mini", + "/openai/v1/chat/completions", + ) + .await; +} + +#[tokio::test] +#[ignore = "live provider smoke; run via `mise run test:smoke` with API keys set"] +async fn smoke_groq() { + // Groq mounts under `/openai/v1`; the client sends `/groq/openai/v1/...` and the gateway strips + // `/groq` and forwards the rest verbatim. The highest-value non-`/v1` native-path case. + smoke_openai_wire( + "groq", + "GROQ_API_KEY", + "llama-3.1-8b-instant", + "/groq/openai/v1/chat/completions", + ) + .await; +} + +#[tokio::test] +#[ignore = "live provider smoke; run via `mise run test:smoke` with API keys set"] +async fn smoke_fireworks() { + // Fireworks mounts under `/inference/v1`: client sends `/fireworks/inference/v1/...`. + smoke_openai_wire( + "fireworks", + "FIREWORKS_API_KEY", + "accounts/fireworks/models/llama-v3p1-8b-instruct", + "/fireworks/inference/v1/chat/completions", + ) + .await; +} + +#[tokio::test] +#[ignore = "live provider smoke; run via `mise run test:smoke` with API keys set"] +async fn smoke_openrouter() { + // OpenRouter mounts under `/api/v1`: client sends `/openrouter/api/v1/...`. + smoke_openai_wire( + "openrouter", + "OPENROUTER_API_KEY", + "openai/gpt-4o-mini", + "/openrouter/api/v1/chat/completions", + ) + .await; +} + +#[tokio::test] +#[ignore = "live provider smoke; run via `mise run test:smoke` with API keys set"] +async fn smoke_deepseek() { + smoke_openai_wire( + "deepseek", + "DEEPSEEK_API_KEY", + "deepseek-chat", + "/deepseek/v1/chat/completions", + ) + .await; +} + +#[tokio::test] +#[ignore = "live provider smoke; run via `mise run test:smoke` with API keys set"] +async fn smoke_together() { + smoke_openai_wire( + "together", + "TOGETHER_API_KEY", + "meta-llama/Llama-3.1-8B-Instruct-Turbo", + "/together/v1/chat/completions", + ) + .await; +} + +#[tokio::test] +#[ignore = "live provider smoke; run via `mise run test:smoke` with API keys set"] +async fn smoke_cerebras() { + smoke_openai_wire( + "cerebras", + "CEREBRAS_API_KEY", + "llama3.1-8b", + "/cerebras/v1/chat/completions", + ) + .await; +} + +#[tokio::test] +#[ignore = "live provider smoke; run via `mise run test:smoke` with API keys set"] +async fn smoke_mistral() { + smoke_openai_wire( + "mistral", + "MISTRAL_API_KEY", + "mistral-small-latest", + "/mistral/v1/chat/completions", + ) + .await; +} + +#[tokio::test] +#[ignore = "live provider smoke; run via `mise run test:smoke` with API keys set"] +async fn smoke_xai() { + smoke_openai_wire( + "xai", + "XAI_API_KEY", + "grok-3-mini", + "/xai/v1/chat/completions", + ) + .await; +}