matdev83 · matdev83 · Jul 1, 2026 · Jul 1, 2026 · Jul 1, 2026 · Jul 1, 2026
diff --git a/.kiro/steering/routing-and-orchestration.md b/.kiro/steering/routing-and-orchestration.md
@@ -59,6 +59,7 @@ The current selector language includes these core-owned behaviors:
 - parallel groups (`!`) that race multiple B-legs,
 - per-leg `[handicap=N]` start delays in parallel groups,
 - global and per-leaf `{ttft_timeout=N}` / `[ttft_timeout=N]` budgets,
+- per-leaf query generation params that override matching per-request body/call options,
 - model aliases that rewrite full selector strings before parsing.
 
 Mixing incompatible selector forms must fail early. In particular, parallel `!` groups cannot be mixed with `^`, weights, or `[first]` in the same arm.

diff --git a/README.md b/README.md
@@ -47,9 +47,9 @@ go run ./cmd/lipstd --config ./config/config.yaml
 ## Configuration and operations
 
 - **Config** - Runtime config is typed and loaded from YAML. [`config/config.yaml`](config/config.yaml) documents access/auth templates, server timeouts, logging, diagnostics, observability, routing, continuity, and provider rows. [`config/config.multi-instance.example.yaml`](config/config.multi-instance.example.yaml) shows multiple backend instances of the same adapter.
-- **Routing** - Default selectors come from `routing.default_route` or the first enabled backend plus registry default model ids. `model_aliases` rewrite full selector strings before parsing. Route selectors support ordered failover, weights, first-request annotations, parallel `!` races, per-leg `[handicap=N]`, and global/per-leg TTFT budgets.
+- **Routing** - Default selectors come from `routing.default_route` or the first enabled backend plus registry default model ids. `model_aliases` rewrite full selector strings before parsing. Route selectors support ordered failover, weights, first-request annotations, parallel `!` races, per-leg `[handicap=N]`, global/per-leg TTFT budgets, and per-leaf query generation parameters. Route query parameters such as `?reasoning_effort=xhigh` are explicit routing directives: when present, they override matching per-request body/canonical generation options; absent parameters leave request values unchanged.
 - **Continuity** - `continuity.store: memory` is the default. `continuity.store: sqlite` with `continuity.sqlite_path` persists A-leg rows and attempt lineage through [`internal/core/continuity/sqlitestore`](internal/core/continuity/sqlitestore). In-memory `ttl` and `max_legs` tuning does not apply to SQLite.
-- **Security** - Multi-user or non-loopback deployments need explicit auth/access posture. Local API keys must be at least 16 Unicode code points after trimming. Diagnostics, pprof, metrics, model-catalog diagnostics, and secure-session summaries require a shared secret when exposed beyond loopback.
+- **Security** - Multi-user or non-loopback deployments need explicit auth/access posture. Local API keys must be at least 16 Unicode code points after trimming. Diagnostics, pprof, metrics, model-catalog diagnostics, and secure-session summaries require a shared secret when exposed beyond loopback. On Unix, OpenAI Codex `auth.json` and managed-OAuth account files must be `0600` (group/other-readable files are now rejected at load); symlinked managed-OAuth account files are skipped. See [`docs/openai-codex-backend.md`](docs/openai-codex-backend.md#token-file-permissions).
 - **Observability** - Optional Prometheus metrics and OpenTelemetry tracing are configured under `observability`. Access logs use bounded-cardinality route groups by default; raw paths are opt-in.
 - **HTTP clients** - The shared upstream client honors `HTTP_PROXY` / `HTTPS_PROXY` by default. Set `http_client.trust_environment_proxy: false` when process environment is not trusted.
 - **Resource bounds** - `lipapi.Call.Validate`, `lipapi.Collect` limits, pending wire event caps, and B2BUA store caps protect memory and request size boundaries.

diff --git a/config/config.yaml b/config/config.yaml
@@ -304,6 +304,8 @@ plugins:
       # managed_oauth_storage_path: var/openai_codex_oauth_accounts
       # managed_oauth_selection_strategy: first-available  # first-available | round-robin | session-affinity
       # managed_oauth_allow_auth_json_fallback: true
+      # transport: https  # default; websocket/auto require experimental_websocket: true
+      # experimental_websocket: false
       # gpt55_downgrade_disabled: false
       # OPENAI_CODEX_ACCESS_TOKEN / OPENAI_CODEX_API_KEY (+ _N variants) env vars
     - id: ollama

diff --git a/config/examples/opencode-codex.yaml b/config/examples/opencode-codex.yaml
@@ -0,0 +1,112 @@
+# OPENCODE + OPENAI-CODEX LIVE TEST
+# Routes OpenCode (Responses API) to the ChatGPT Codex backend using the Codex CLI auth file.
+# Credentials: auto-discovered from ~/.codex/auth.json (tokens.access_token / refresh_token / account_id).
+# Override with OPENAI_CODEX_ACCESS_TOKEN env var or config.access_token if needed.
+#
+# Start:  go run ./cmd/lipstd serve --config ./config/examples/opencode-codex.yaml
+# Point OpenCode at: http://127.0.0.1:8080/v1  (Responses API, @ai-sdk/openai), model gpt-5.5
+server:
+  address: "127.0.0.1:8080"
+
+routing:
+  max_attempts: 3
+  default_route: "openai-codex:gpt-5.5"
+
+continuity:
+  in_memory: true
+  store: memory
+
+logging:
+  level: info
+  format: text
+
+diagnostics:
+  enabled: true
+  health_path: "/healthz"
+  attempts_path: "/admin/attempts"
+  inventory_path: "/debug/inventory"
+  route_trace_path: "/debug/route_trace"
+
+hooks:
+  tool_reactor_error_policy: fail_open
+
+plugins:
+  frontends:
+    - id: openai-responses
+      enabled: true
+      config: {}
+    - id: openai-legacy
+      enabled: true
+      config: {}
+    - id: anthropic
+      enabled: true
+      config: {}
+    - id: gemini
+      enabled: true
+      config: {}
+  backends:
+    - id: openai-responses
+      enabled: false
+      config: {}
+    - id: openai-legacy
+      enabled: false
+      config: {}
+    - id: anthropic
+      enabled: false
+      config: {}
+    - id: gemini
+      enabled: false
+      config: {}
+    - id: bedrock
+      enabled: false
+      config: {}
+    - id: acp
+      enabled: false
+      config: {}
+    - id: openrouter
+      enabled: false
+      config: {}
+    - id: nvidia
+      enabled: false
+      config: {}
+    - id: opencode-go
+      enabled: false
+      config: {}
+    - id: opencode-zen
+      enabled: false
+      config: {}
+    - id: ollama
+      enabled: false
+      config: {}
+    - id: ollama-cloud
+      enabled: false
+      config: {}
+    - id: llamacpp
+      enabled: false
+      config: {}
+    - id: lmstudio
+      enabled: false
+      config: {}
+    - id: vllm
+      enabled: false
+      config: {}
+    - id: openai-codex
+      enabled: true
+      config:
+        # base_url defaults to https://chatgpt.com/backend-api/codex — leave unset for live ChatGPT.
+        # access_token left empty so the connector auto-discovers ~/.codex/auth.json.
+        # account_id is read from auth.json tokens.account_id; override here only if needed.
+        default_reasoning_effort: "medium"
+  features:
+    - id: submit-noop
+      enabled: true
+      config: {}
+    - id: parts-noop
+      enabled: true
+      config: {}
+    - id: tool-reactor-noop
+      enabled: true
+      config: {}
+    - id: codex-client-compat
+      enabled: true
+      config: {}
diff --git a/docs/openai-codex-backend.md b/docs/openai-codex-backend.md
@@ -1,6 +1,6 @@
 # OpenAI Codex backend
 
-The `openai-codex` backend connects to the ChatGPT Codex Responses API (`https://chatgpt.com/backend-api/codex/responses`). Route selectors use the `openai-codex` prefix, for example `openai-codex:gpt-5.3-codex`.
+The `openai-codex` backend connects to the ChatGPT Codex Responses API (`https://chatgpt.com/backend-api/codex/responses`). Route selectors use the `openai-codex` prefix, for example `openai-codex:gpt-5.5`.
 
 ## Enable
 
@@ -21,6 +21,12 @@ plugins:
 - Environment: `OPENAI_CODEX_ACCESS_TOKEN`, then numbered `_2`, `_3`, …; falls back to `OPENAI_CODEX_API_KEY` (+ `_N` variants) when access-token vars are unset.
 - When neither `access_token` nor `auth_json_path` is set, the connector reads `~/.codex/auth.json` if present (Codex CLI default).
 
+## Token file permissions
+
+On Unix, the `auth.json` file and managed-OAuth account files in `managed_oauth_storage_path` must be owner-only (`0600`). Files readable or writable by group or other are rejected at load time with an error mentioning `group/other accessible`; fix with `chmod 600 <file>`. This mirrors the Codex CLI `auth.json` guard and fails closed on multi-user hosts. On Windows (ACL-based permissions, no meaningful Unix mode bits) this check is a no-op.
+
+Symlinked account files inside the managed-OAuth storage directory are skipped during discovery, so a symlink planted in that directory cannot cause the proxy to read a target outside it. Use real files for managed accounts.
+
 ## Optional settings
 
 | Field | Purpose |
@@ -32,6 +38,9 @@ plugins:
 | `oauth_client_id` | OAuth client id (OpenAI Codex CLI default) |
 | `account_id` | `ChatGPT-Account-Id` header |
 | `default_reasoning_effort` | Default reasoning effort for requests |
+| `transport` | `https` (default), `auto`, or `websocket` |
+| `experimental_websocket` | Required opt-in for `transport: auto` or `transport: websocket` |
+| `websocket_fallback_cooldown_seconds` | Auto-mode cooldown after a pre-output WebSocket failure (default 300) |
 | `models` | Static model inventory (inline or file), same shape as other backends |
 | `managed_oauth_enabled` | Load OAuth accounts from JSON files in `managed_oauth_storage_path` |
 | `managed_oauth_storage_path` | Directory of `*.json` account files |
@@ -46,7 +55,11 @@ plugins:
 | `gpt55_downgrade_target_model` | Target model for free-plan downgrade (default `gpt-5.4`) |
 | `plan_type_hint` | Optional plan hint for proactive downgrade tests/local overrides |
 
-Without `models`, the connector exposes a built-in Codex model list.
+Without `models`, the connector exposes a built-in Codex model list: `gpt-5.5`, `gpt-5.4`, `gpt-5.4-mini`, and `gpt-5.3-codex-spark`.
+
+## Transport
+
+The default transport is HTTPS/SSE. WebSocket support is experimental and only enabled when `experimental_websocket: true` is set. With that opt-in, `transport: auto` tries `wss://chatgpt.com/backend-api/codex/responses` first and falls back to HTTPS/SSE only if WebSocket fails before the first canonical event. After that first event, stream errors are surfaced and not retried. Use `transport: websocket` to fail instead of falling back during debugging. After a pre-output WebSocket failure, auto mode skips WebSocket for `websocket_fallback_cooldown_seconds` to avoid repeated retry latency.
 
 ## Client compatibility (OpenCode / Pi / Droid / Hermes)
 
@@ -66,7 +79,84 @@ The request-part hook detects client markers from extensions, headers, prompts,
 
 ```yaml
 routes:
-  default: "openai-codex:gpt-5.3-codex"
+  default: "openai-codex:gpt-5.5"
 ```
 
 Bracket parameters such as `?reasoning_effort=high` are supported in route selectors.
+
+## Per-request routing
+
+A client can override the configured default route per request by putting a full route
+selector in the request body `model` field, with optional URI parameters:
+
+```json
+{ "model": "openai-codex:gpt-5.5?reasoning_effort=low", "input": "ping" }
+```
+
+The `openai-codex:` prefix selects the backend, the model name selects any model (the
+builtin inventory lists `gpt-5.5`, `gpt-5.4`, `gpt-5.4-mini`, and `gpt-5.3-codex-spark`;
+arbitrary model strings can still be routed even if not listed), and the `reasoning_effort`
+URI parameter is converted into the canonical call options and then into the Codex payload
+`reasoning.effort` field. An explicit `X-LIP-Route` header, when present, takes precedence
+over the body `model`. A bare model name without a backend prefix still falls back to the
+configured default route.
+
+URI parameters are explicit routing directives and **override** any corresponding value
+set elsewhere: a `?reasoning_effort=xhigh` on the selector wins over a `reasoning_effort`
+field in the request body and over the backend's `default_reasoning_effort`. A parameter
+absent from the selector leaves the other value in effect. The same override rule applies
+to `temperature`, `top_p`, `max_output_tokens`, and `parallel_tool_calls` when present in
+the selector.
+
+## Unsupported generation parameters
+
+The Codex Responses API does not support `temperature`, `top_p`, or `max_output_tokens`.
+Plain calls that set any of these fail at payload-build time with an explicit error.
+The `openai_codex.ignore_unsupported_gen_params` canonical-call extension (bool, `true`)
+opts in to dropping them instead — the `codex-client-compat` feature sets this for
+detected compatibility clients (OpenCode, pi, Factory Droid, Hermes) so optional tuning
+params are not forwarded upstream and do not fail the request. `reasoning_effort` and
+`parallel_tool_calls` are honored.
+
+## Model name normalization
+
+Clients that use a `provider/model` namespace (for example OpenCode's `openai/gpt-5.4-mini`)
+have the leading `openai/` prefix stripped before the model reaches the Codex upstream, which
+rejects org-prefixed model names. A bare model name such as `gpt-5.4-mini` is sent unchanged.
+
+## System messages
+
+The Codex Responses API rejects `system`-role items in `input` ("System messages are not
+allowed"). System content must be carried in the `instructions` field. The connector folds
+system-role messages from the conversation into `instructions` (deduplicated against explicit
+instructions, including the `codex-client-compat` bridge) and omits them from `input`, so
+clients that send a system prompt (for example OpenCode) interoperate without a capability
+mismatch.
+
+## Tool schemas
+
+The Codex Responses API requires function-tool parameter schemas to be
+"strict-compatible" when sent with `strict:true`: every object must declare
+`additionalProperties:false` and list all of its properties in `required`.
+Clients that emit looser schemas (for example OpenCode's `apply_patch`, which
+omits `additionalProperties`) would otherwise be rejected with
+`invalid_function_parameters`. The connector inspects each tool schema and
+sends `strict:false` for any schema that is not strict-compatible, while
+keeping `strict:true` for strict-compatible and parameterless schemas. This is
+a safe relaxation — it only disables strict validation and never causes an
+upstream rejection. The Hermes compatibility bridge keeps its existing
+`tool_strict:false` behavior (all tools relaxed).
+
+## Tool call history
+
+When a client sends a prior assistant tool call and its result back (for example
+OpenCode following up after executing a tool), the Chat Completions frontend
+encodes the assistant tool call as a `PartJSON` item in the Chat Completions
+shape (`type:"function"` with a nested `function:{name,arguments}` object and
+`id` as the call id). The connector translates that into a Codex Responses
+`function_call` input item (using the Chat Completions `id` as the `call_id`)
+and the matching `tool`-role result into a `function_call_output` item with the
+same `call_id`, so the upstream sees a correctly linked call/output pair. The
+`codex-client-compat` bridge recognizes Chat Completions-style tool calls when
+matching tool results, so results that belong to a known call are preserved
+rather than treated as orphaned.
diff --git a/go.mod b/go.mod
@@ -11,6 +11,7 @@ require (
 	github.com/aws/aws-sdk-go-v2/service/bedrock v1.64.0
 	github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.54.0
 	github.com/aws/smithy-go v1.27.2
+	github.com/gorilla/websocket v1.5.3
 	github.com/jellydator/ttlcache/v3 v3.4.1
 	github.com/openai/openai-go/v3 v3.41.0
 	github.com/prometheus/client_golang v1.23.2
@@ -63,7 +64,6 @@ require (
 	github.com/google/s2a-go v0.1.8 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
-	github.com/gorilla/websocket v1.5.3 // indirect
 	github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
 	github.com/invopop/jsonschema v0.13.0 // indirect
 	github.com/jinzhu/inflection v1.0.0 // indirect

diff --git a/internal/core/diag/debug_summary.go b/internal/core/diag/debug_summary.go
@@ -0,0 +1,54 @@
+package diag
+
+import (
+	"log/slog"
+	"os"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+)
+
+const envDebugTurns = "LIP_CODEX_DEBUG_TURNS"
+
+var debugTurnsEnabled = sync.OnceValue(func() bool {
+	return strings.TrimSpace(os.Getenv(envDebugTurns)) != ""
+})
+
+// DebugTurnsEnabled reports whether verbose per-turn diagnostics are enabled for
+// this process. The environment is read once so debug wrappers agree on a single
+// process-lifetime gate.
+func DebugTurnsEnabled() bool {
+	return debugTurnsEnabled()
+}
+
+// LoggerOrDefault returns log when present, otherwise slog.Default().
+func LoggerOrDefault(log *slog.Logger) *slog.Logger {
+	if log != nil {
+		return log
+	}
+	return slog.Default()
+}
+
+// StableCounts formats count maps as sorted "key=value" strings for stable logs.
+func StableCounts(counts map[string]int) []string {
+	keys := make([]string, 0, len(counts))
+	for k := range counts {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	out := make([]string, 0, len(keys))
+	for _, k := range keys {
+		out = append(out, k+"="+strconv.Itoa(counts[k]))
+	}
+	return out
+}
+
+// AppendLimited appends a trimmed non-empty value until max entries are present.
+func AppendLimited(values []string, value string, max int) []string {
+	value = strings.TrimSpace(value)
+	if value == "" || len(values) >= max {
+		return values
+	}
+	return append(values, value)
+}
diff --git a/internal/core/routing/parser_test.go b/internal/core/routing/parser_test.go
@@ -110,7 +110,7 @@ func TestParseFirstSingleArm(t *testing.T) {
 // Task 14.5: parity with composite routing examples (failover |, weighted ^, [first], [weight=], per-leg query).
 func TestParseParity_pythonLIPCompositeSelector(t *testing.T) {
 	t.Parallel()
-	s := "[first]openai-codex:gpt-5.3-codex?reasoning_effort=high^[weight=4]openai-codex:gpt-5.3-codex?reasoning_effort=low|[weight=2]openai-codex:gpt-5.3-codex?reasoning_effort=medium"
+	s := "[first]openai-codex:gpt-5.3-codex-spark?reasoning_effort=high^[weight=4]openai-codex:gpt-5.3-codex-spark?reasoning_effort=low|[weight=2]openai-codex:gpt-5.3-codex-spark?reasoning_effort=medium"
 	sel, err := Parse(s)
 	if err != nil {
 		t.Fatal(err)
@@ -127,7 +127,7 @@ func TestParseParity_pythonLIPCompositeSelector(t *testing.T) {
 	if !b0.IsFirst || b0.Weight != 1 {
 		t.Fatalf("branch0: IsFirst=%v Weight=%d", b0.IsFirst, b0.Weight)
 	}
-	if b0.Target.Backend != "openai-codex" || b0.Target.Model != "gpt-5.3-codex" {
+	if b0.Target.Backend != "openai-codex" || b0.Target.Model != "gpt-5.3-codex-spark" {
 		t.Fatalf("branch0 target: %#v", b0.Target)
 	}
 	if b0.Target.Params.Get("reasoning_effort") != "high" {

diff --git a/internal/core/routing/routeprefix.go b/internal/core/routing/routeprefix.go
@@ -0,0 +1,30 @@
+package routing
+
+import (
+	"slices"
+	"strings"
+)
+
+// FilterRoutePrefixes trims, drops invalid (empty, colon- or slash-bearing),
+// dedups, and sorts backend route-selector prefixes. Shared by runtime bundle
+// composition and frontend PrefixSet construction so the validation rule lives
+// in one place. A prefix is the "<prefix>:" segment of a route selector; it must
+// not itself contain ":" (which would make it a full selector) or "/" (which
+// collides with provider-namespace model syntax).
+func FilterRoutePrefixes(prefixes []string) []string {
+	seen := make(map[string]struct{}, len(prefixes))
+	out := make([]string, 0, len(prefixes))
+	for _, prefix := range prefixes {
+		prefix = strings.TrimSpace(prefix)
+		if prefix == "" || strings.Contains(prefix, ":") || strings.Contains(prefix, "/") {
+			continue
+		}
+		if _, dup := seen[prefix]; dup {
+			continue
+		}
+		seen[prefix] = struct{}{}
+		out = append(out, prefix)
+	}
+	slices.Sort(out)
+	return out
+}