CaddyGlow · saxyguy81 · May 10, 2026
diff --git a/ccproxy/auth/dependencies.py b/ccproxy/auth/dependencies.py
@@ -68,10 +68,13 @@ async def _build_bearer_auth_manager(
     expected_token: str | None,
     *,
     require_credentials: bool,
+    api_key: str | None = None,
 ) -> AuthManager | None:
     """Create a bearer auth manager when credentials satisfy expectations."""
 
     token = credentials.credentials if credentials and credentials.credentials else None
+    if token is None and api_key:
+        token = api_key
 
     if token is None:
         if require_credentials:
@@ -118,6 +121,7 @@ async def _build_bearer_auth_manager(
 
 
 async def get_auth_manager(
+    request: Request,
     credentials: Annotated[HTTPAuthorizationCredentials | None, Depends(bearer_scheme)],
     settings: SettingsDep,
 ) -> AuthManager:
@@ -127,6 +131,7 @@ async def get_auth_manager(
         credentials,
         _expected_token(settings),
         require_credentials=True,
+        api_key=request.headers.get("x-api-key"),
     )
     # require_credentials ensures auth_manager is never None here.
     assert auth_manager is not None
@@ -183,6 +188,7 @@ async def get_conditional_auth_manager(
         credentials,
         expected_token,
         require_credentials=True,
+        api_key=request.headers.get("x-api-key"),
     )
 
 

diff --git a/ccproxy/llms/formatters/openai_to_anthropic/requests.py b/ccproxy/llms/formatters/openai_to_anthropic/requests.py
@@ -404,7 +404,7 @@ async def convert__openai_chat_to_anthropic_message__request(
 def convert__openai_responses_to_anthropic_message__request(
     request: openai_models.ResponseRequest,
 ) -> anthropic_models.CreateMessageRequest:
-    model = request.model
+    model = request.model or ""
     stream = bool(request.stream)
     max_out = request.max_output_tokens
 
@@ -618,8 +618,7 @@ def convert__openai_responses_to_anthropic_message__request(
             else request.instructions
         )
 
-    # Skip thinking config for ResponseRequest as it doesn't have the required fields
-    thinking_cfg = None
+    thinking_cfg = derive_thinking_config(model, request)
     if thinking_cfg is not None:
         payload_data["thinking"] = thinking_cfg
         budget = thinking_cfg.get("budget_tokens", 0)
@@ -631,21 +630,28 @@ def convert__openai_responses_to_anthropic_message__request(
 
 
 def derive_thinking_config(
-    model: str, request: openai_models.ChatCompletionRequest
+    model: str,
+    request: openai_models.ChatCompletionRequest | openai_models.ResponseRequest,
 ) -> dict[str, Any] | None:
     """Derive Anthropic thinking config from OpenAI fields and model name.
 
     Rules:
     - If model matches o1/o3 families, enable thinking by default with model-specific budget
-    - Map reasoning_effort: low=1000, medium=5000, high=10000
+    - Map effort: minimal/low=1024, medium=5000, high=10000, xhigh=20000, max=32000
     - o3*: 10000; o1-mini: 3000; other o1*: 5000
     - If thinking is enabled, return {"type":"enabled","budget_tokens":N}
     - Otherwise return None
     """
     # Explicit reasoning_effort mapping
-    effort = getattr(request, "reasoning_effort", None)
-    effort = effort.strip().lower() if isinstance(effort, str) else ""
-    effort_budgets = {"low": 1000, "medium": 5000, "high": 10000}
+    effort = _extract_reasoning_effort(request)
+    effort_budgets = {
+        "minimal": 1024,
+        "low": 1024,
+        "medium": 5000,
+        "high": 10000,
+        "xhigh": 20000,
+        "max": 32000,
+    }
 
     budget: int | None = None
     if effort in effort_budgets:
@@ -667,6 +673,22 @@ def derive_thinking_config(
     return {"type": "enabled", "budget_tokens": budget}
 
 
+def _extract_reasoning_effort(
+    request: openai_models.ChatCompletionRequest | openai_models.ResponseRequest,
+) -> str:
+    effort = getattr(request, "reasoning_effort", None)
+    if isinstance(effort, str) and effort.strip():
+        return effort.strip().lower()
+
+    reasoning = getattr(request, "reasoning", None)
+    if isinstance(reasoning, dict):
+        effort = reasoning.get("effort")
+    elif reasoning is not None:
+        effort = getattr(reasoning, "effort", None)
+
+    return effort.strip().lower() if isinstance(effort, str) else ""
+
+
 __all__ = [
     "convert__openai_chat_to_anthropic_message__request",
     "convert__openai_responses_to_anthropic_message__request",

diff --git a/ccproxy/llms/models/openai.py b/ccproxy/llms/models/openai.py
@@ -233,9 +233,9 @@ class ChatCompletionRequest(LlmBaseModel):
     n: int | None = Field(default=1)
     parallel_tool_calls: bool | None = Field(default=None)
     presence_penalty: float | None = Field(default=None, ge=-2.0, le=2.0)
-    reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = Field(
-        default=None
-    )
+    reasoning_effort: (
+        Literal["minimal", "low", "medium", "high", "xhigh", "max"] | None
+    ) = Field(default=None)
     response_format: ResponseFormat | None = Field(default=None)
     seed: int | None = Field(default=None)
     stop: str | list[str] | None = Field(default=None)
@@ -262,7 +262,7 @@ class ChatCompletionRequest(LlmBaseModel):
 
 
 class ResponseMessageReasoning(LlmBaseModel):
-    effort: Literal["minimal", "low", "medium", "high"] | None = None
+    effort: Literal["minimal", "low", "medium", "high", "xhigh", "max"] | None = None
     summary: Literal["auto", "detailed", "concise"] | None = None
 
 

diff --git a/ccproxy/plugins/claude_shared/model_defaults.py b/ccproxy/plugins/claude_shared/model_defaults.py
@@ -14,6 +14,22 @@
         root="claude-sonnet-4-6",
         parent=None,
     ),
+    ModelCard(
+        id="sonnet[1m]",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-sonnet-4-6",
+        parent=None,
+    ),
+    ModelCard(
+        id="claude-sonnet-4-6[1m]",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-sonnet-4-6",
+        parent=None,
+    ),
     ModelCard(
         id="claude-haiku-4-5-20251001",
         created=1722816000,
@@ -22,6 +38,38 @@
         root="claude-haiku-4-5-20251001",
         parent=None,
     ),
+    ModelCard(
+        id="claude-opus-4-7",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-opus-4-7",
+        parent=None,
+    ),
+    ModelCard(
+        id="opus[1m]",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-opus-4-7",
+        parent=None,
+    ),
+    ModelCard(
+        id="opus-4-7[1m]",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-opus-4-7",
+        parent=None,
+    ),
+    ModelCard(
+        id="claude-opus-4-7[1m]",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-opus-4-7",
+        parent=None,
+    ),
     ModelCard(
         id="claude-opus-4-6",
         created=1722816000,
@@ -30,6 +78,22 @@
         root="claude-opus-4-6",
         parent=None,
     ),
+    ModelCard(
+        id="opus-4-6[1m]",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-opus-4-6",
+        parent=None,
+    ),
+    ModelCard(
+        id="claude-opus-4-6[1m]",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-opus-4-6",
+        parent=None,
+    ),
     ModelCard(
         id="claude-opus-4-20250514",
         created=1716336000,
@@ -125,21 +189,30 @@
     ),
     ModelMappingRule(
         match="o1",
-        target="claude-opus-4-6",
+        target="claude-opus-4-7",
         kind="prefix",
     ),
     ModelMappingRule(
         match="o3-mini",
-        target="claude-opus-4-6",
+        target="claude-opus-4-7",
         kind="exact",
     ),
     ModelMappingRule(
         match="gpt-5",
         target="claude-sonnet-4-6",
         kind="prefix",
     ),
+    ModelMappingRule(match="sonnet[1m]", target="claude-sonnet-4-6"),
+    ModelMappingRule(match="claude-sonnet-4-6[1m]", target="claude-sonnet-4-6"),
+    ModelMappingRule(match="opus[1m]", target="claude-opus-4-7"),
+    ModelMappingRule(match="opus-4-7[1m]", target="claude-opus-4-7"),
+    ModelMappingRule(match="claude-opus-4-7[1m]", target="claude-opus-4-7"),
+    ModelMappingRule(match="opus-4-6[1m]", target="claude-opus-4-6"),
+    ModelMappingRule(match="claude-opus-4-6[1m]", target="claude-opus-4-6"),
+    ModelMappingRule(match="opus-4-7", target="claude-opus-4-7"),
+    ModelMappingRule(match="claude-opus-4-7", target="claude-opus-4-7"),
     ModelMappingRule(match="sonnet", target="claude-sonnet-4-6"),
-    ModelMappingRule(match="opus", target="claude-opus-4-6"),
+    ModelMappingRule(match="opus", target="claude-opus-4-7"),
     ModelMappingRule(match="haiku", target="claude-haiku-4-5-20251001"),
     ModelMappingRule(
         match="claude-3-5-sonnet-latest",

diff --git a/ccproxy/plugins/codex/adapter.py b/ccproxy/plugins/codex/adapter.py
@@ -32,6 +32,13 @@
 logger = get_plugin_logger()
 
 
+_CODEX_MODEL_REASONING_ALIASES = {
+    "gpt-5.5-high": "high",
+    "gpt-5.5-xhigh": "xhigh",
+    "gpt-5.5-max": "max",
+}
+
+
 class CodexAdapter(BaseHTTPAdapter):
     """Simplified Codex adapter."""
 
@@ -65,6 +72,7 @@ async def handle_request(
         endpoint = ctx.metadata.get("endpoint", "")
         body = await request.body()
         body = await self._map_request_model(ctx, body)
+        body = self._apply_model_alias_reasoning_effort(ctx, body)
         headers = extract_request_headers(request)
 
         # Determine client streaming intent from body flag (fallback to False)
@@ -294,6 +302,36 @@ async def prepare_provider_request(
 
         return json.dumps(body_data).encode(), filtered_headers
 
+    def _apply_model_alias_reasoning_effort(self, ctx: Any, body: bytes) -> bytes:
+        """Apply reasoning effort implied by client-facing Codex model aliases."""
+
+        metadata = getattr(ctx, "metadata", None)
+        client_model = None
+        if isinstance(metadata, dict):
+            client_model = metadata.get("_last_client_model")
+        if not isinstance(client_model, str):
+            return body
+
+        effort = _CODEX_MODEL_REASONING_ALIASES.get(client_model)
+        if effort is None:
+            return body
+
+        try:
+            body_data = json.loads(body.decode()) if body else {}
+        except Exception:
+            return body
+        if not isinstance(body_data, dict):
+            return body
+
+        if isinstance(body_data.get("reasoning"), dict):
+            reasoning = dict(body_data["reasoning"])
+            reasoning.setdefault("effort", effort)
+            body_data["reasoning"] = reasoning
+        elif not body_data.get("reasoning_effort"):
+            body_data["reasoning_effort"] = effort
+
+        return self._encode_json_body(body_data)
+
     def _sanitize_provider_body(self, body_data: dict[str, Any]) -> dict[str, Any]:
         """Apply Codex-specific payload sanitization shared by all request paths."""
 
@@ -314,17 +352,43 @@ def _sanitize_provider_body(self, body_data: dict[str, Any]) -> dict[str, Any]:
         ):
             body_data.pop(key, None)
 
-        list_input = body_data.get("input", [])
-        # Remove any input types that Codex does not support
-        body_data["input"] = [
-            input for input in list_input if input.get("type") != "item_reference"
-        ]
+        input_value = body_data.get("input", [])
+        # Remove any input types that Codex does not support. Public Responses API
+        # input may be a plain string, but the Codex backend expects message items.
+        if isinstance(input_value, list):
+            body_data["input"] = [
+                input_item
+                for input_item in input_value
+                if not (
+                    isinstance(input_item, dict)
+                    and input_item.get("type") == "item_reference"
+                )
+            ]
+        elif isinstance(input_value, str):
+            body_data["input"] = [
+                {
+                    "type": "message",
+                    "role": "user",
+                    "content": [{"type": "input_text", "text": input_value}],
+                }
+            ]
 
         # Remove any prefixed metadata fields that shouldn't be sent to the API
         body_data = self._remove_metadata_fields(body_data)
+        self._normalize_reasoning_effort(body_data)
 
         return body_data
 
+    def _normalize_reasoning_effort(self, body_data: dict[str, Any]) -> None:
+        """Clamp client-facing effort aliases to values accepted by Codex backend."""
+
+        if body_data.get("reasoning_effort") == "max":
+            body_data["reasoning_effort"] = "xhigh"
+
+        reasoning = body_data.get("reasoning")
+        if isinstance(reasoning, dict) and reasoning.get("effort") == "max":
+            reasoning["effort"] = "xhigh"
+
     async def prepare_provider_headers(self, headers: dict[str, str]) -> dict[str, str]:
         token_value = await self._resolve_access_token()
 
@@ -501,6 +565,7 @@ async def handle_streaming(
         # Extract body and headers
         body = await request.body()
         body = await self._map_request_model(ctx, body)
+        body = self._apply_model_alias_reasoning_effort(ctx, body)
         headers = extract_request_headers(request)
 
         # Ensure format adapters are available when required