From abbf14832b6f6196da38f657518da99f54a4fc37 Mon Sep 17 00:00:00 2001
From: smhanan <hanan@apple.com>
Date: Sat, 9 May 2026 23:51:45 -0700
Subject: [PATCH] feat(models): add provider model and effort aliases

---
 ccproxy/auth/dependencies.py                  |  6 ++
 .../openai_to_anthropic/requests.py           | 38 +++++++--
 ccproxy/llms/models/openai.py                 |  8 +-
 .../plugins/claude_shared/model_defaults.py   | 79 ++++++++++++++++++-
 ccproxy/plugins/codex/adapter.py              | 75 ++++++++++++++++--
 ccproxy/plugins/codex/detection_service.py    | 19 ++++-
 ccproxy/plugins/codex/model_defaults.py       | 45 +++++++++++
 ccproxy/plugins/max_tokens/token_limits.json  | 40 ++++++++++
 .../integration/test_claude_api_basic.py      |  2 +
 tests/plugins/codex/unit/test_adapter.py      | 76 ++++++++++++++++++
 tests/plugins/max_tokens/test_service.py      | 19 +++++
 tests/unit/auth/test_auth.py                  | 26 ++++++
 .../test_openai_to_anthropic_helpers.py       | 37 +++++++++
 .../test_openai_to_openai_reasoning.py        | 30 +++++++
 tests/unit/plugins/test_codex_detection.py    | 11 +++
 tests/unit/utils/test_model_mapper.py         | 37 ++++++++-
 16 files changed, 521 insertions(+), 27 deletions(-)

diff --git a/ccproxy/auth/dependencies.py b/ccproxy/auth/dependencies.py
index 1ea0708c..c7321209 100644
--- a/ccproxy/auth/dependencies.py
+++ b/ccproxy/auth/dependencies.py
@@ -68,10 +68,13 @@ async def _build_bearer_auth_manager(
     expected_token: str | None,
     *,
     require_credentials: bool,
+    api_key: str | None = None,
 ) -> AuthManager | None:
     """Create a bearer auth manager when credentials satisfy expectations."""
 
     token = credentials.credentials if credentials and credentials.credentials else None
+    if token is None and api_key:
+        token = api_key
 
     if token is None:
         if require_credentials:
@@ -118,6 +121,7 @@ async def _build_bearer_auth_manager(
 
 
 async def get_auth_manager(
+    request: Request,
     credentials: Annotated[HTTPAuthorizationCredentials | None, Depends(bearer_scheme)],
     settings: SettingsDep,
 ) -> AuthManager:
@@ -127,6 +131,7 @@ async def get_auth_manager(
         credentials,
         _expected_token(settings),
         require_credentials=True,
+        api_key=request.headers.get("x-api-key"),
     )
     # require_credentials ensures auth_manager is never None here.
     assert auth_manager is not None
@@ -183,6 +188,7 @@ async def get_conditional_auth_manager(
         credentials,
         expected_token,
         require_credentials=True,
+        api_key=request.headers.get("x-api-key"),
     )
 
 
diff --git a/ccproxy/llms/formatters/openai_to_anthropic/requests.py b/ccproxy/llms/formatters/openai_to_anthropic/requests.py
index 5540289c..e9c900e6 100644
--- a/ccproxy/llms/formatters/openai_to_anthropic/requests.py
+++ b/ccproxy/llms/formatters/openai_to_anthropic/requests.py
@@ -404,7 +404,7 @@ async def convert__openai_chat_to_anthropic_message__request(
 def convert__openai_responses_to_anthropic_message__request(
     request: openai_models.ResponseRequest,
 ) -> anthropic_models.CreateMessageRequest:
-    model = request.model
+    model = request.model or ""
     stream = bool(request.stream)
     max_out = request.max_output_tokens
 
@@ -618,8 +618,7 @@ def convert__openai_responses_to_anthropic_message__request(
             else request.instructions
         )
 
-    # Skip thinking config for ResponseRequest as it doesn't have the required fields
-    thinking_cfg = None
+    thinking_cfg = derive_thinking_config(model, request)
     if thinking_cfg is not None:
         payload_data["thinking"] = thinking_cfg
         budget = thinking_cfg.get("budget_tokens", 0)
@@ -631,21 +630,28 @@ def convert__openai_responses_to_anthropic_message__request(
 
 
 def derive_thinking_config(
-    model: str, request: openai_models.ChatCompletionRequest
+    model: str,
+    request: openai_models.ChatCompletionRequest | openai_models.ResponseRequest,
 ) -> dict[str, Any] | None:
     """Derive Anthropic thinking config from OpenAI fields and model name.
 
     Rules:
     - If model matches o1/o3 families, enable thinking by default with model-specific budget
-    - Map reasoning_effort: low=1000, medium=5000, high=10000
+    - Map effort: minimal/low=1024, medium=5000, high=10000, xhigh=20000, max=32000
     - o3*: 10000; o1-mini: 3000; other o1*: 5000
     - If thinking is enabled, return {"type":"enabled","budget_tokens":N}
     - Otherwise return None
     """
     # Explicit reasoning_effort mapping
-    effort = getattr(request, "reasoning_effort", None)
-    effort = effort.strip().lower() if isinstance(effort, str) else ""
-    effort_budgets = {"low": 1000, "medium": 5000, "high": 10000}
+    effort = _extract_reasoning_effort(request)
+    effort_budgets = {
+        "minimal": 1024,
+        "low": 1024,
+        "medium": 5000,
+        "high": 10000,
+        "xhigh": 20000,
+        "max": 32000,
+    }
 
     budget: int | None = None
     if effort in effort_budgets:
@@ -667,6 +673,22 @@ def derive_thinking_config(
     return {"type": "enabled", "budget_tokens": budget}
 
 
+def _extract_reasoning_effort(
+    request: openai_models.ChatCompletionRequest | openai_models.ResponseRequest,
+) -> str:
+    effort = getattr(request, "reasoning_effort", None)
+    if isinstance(effort, str) and effort.strip():
+        return effort.strip().lower()
+
+    reasoning = getattr(request, "reasoning", None)
+    if isinstance(reasoning, dict):
+        effort = reasoning.get("effort")
+    elif reasoning is not None:
+        effort = getattr(reasoning, "effort", None)
+
+    return effort.strip().lower() if isinstance(effort, str) else ""
+
+
 __all__ = [
     "convert__openai_chat_to_anthropic_message__request",
     "convert__openai_responses_to_anthropic_message__request",
diff --git a/ccproxy/llms/models/openai.py b/ccproxy/llms/models/openai.py
index e510d0d4..72117cf2 100644
--- a/ccproxy/llms/models/openai.py
+++ b/ccproxy/llms/models/openai.py
@@ -233,9 +233,9 @@ class ChatCompletionRequest(LlmBaseModel):
     n: int | None = Field(default=1)
     parallel_tool_calls: bool | None = Field(default=None)
     presence_penalty: float | None = Field(default=None, ge=-2.0, le=2.0)
-    reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = Field(
-        default=None
-    )
+    reasoning_effort: (
+        Literal["minimal", "low", "medium", "high", "xhigh", "max"] | None
+    ) = Field(default=None)
     response_format: ResponseFormat | None = Field(default=None)
     seed: int | None = Field(default=None)
     stop: str | list[str] | None = Field(default=None)
@@ -262,7 +262,7 @@ class ChatCompletionRequest(LlmBaseModel):
 
 
 class ResponseMessageReasoning(LlmBaseModel):
-    effort: Literal["minimal", "low", "medium", "high"] | None = None
+    effort: Literal["minimal", "low", "medium", "high", "xhigh", "max"] | None = None
     summary: Literal["auto", "detailed", "concise"] | None = None
 
 
diff --git a/ccproxy/plugins/claude_shared/model_defaults.py b/ccproxy/plugins/claude_shared/model_defaults.py
index 5f22adda..cf506651 100644
--- a/ccproxy/plugins/claude_shared/model_defaults.py
+++ b/ccproxy/plugins/claude_shared/model_defaults.py
@@ -14,6 +14,22 @@
         root="claude-sonnet-4-6",
         parent=None,
     ),
+    ModelCard(
+        id="sonnet[1m]",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-sonnet-4-6",
+        parent=None,
+    ),
+    ModelCard(
+        id="claude-sonnet-4-6[1m]",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-sonnet-4-6",
+        parent=None,
+    ),
     ModelCard(
         id="claude-haiku-4-5-20251001",
         created=1722816000,
@@ -22,6 +38,38 @@
         root="claude-haiku-4-5-20251001",
         parent=None,
     ),
+    ModelCard(
+        id="claude-opus-4-7",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-opus-4-7",
+        parent=None,
+    ),
+    ModelCard(
+        id="opus[1m]",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-opus-4-7",
+        parent=None,
+    ),
+    ModelCard(
+        id="opus-4-7[1m]",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-opus-4-7",
+        parent=None,
+    ),
+    ModelCard(
+        id="claude-opus-4-7[1m]",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-opus-4-7",
+        parent=None,
+    ),
     ModelCard(
         id="claude-opus-4-6",
         created=1722816000,
@@ -30,6 +78,22 @@
         root="claude-opus-4-6",
         parent=None,
     ),
+    ModelCard(
+        id="opus-4-6[1m]",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-opus-4-6",
+        parent=None,
+    ),
+    ModelCard(
+        id="claude-opus-4-6[1m]",
+        created=1722816000,
+        owned_by="anthropic",
+        permission=[],
+        root="claude-opus-4-6",
+        parent=None,
+    ),
     ModelCard(
         id="claude-opus-4-20250514",
         created=1716336000,
@@ -125,12 +189,12 @@
     ),
     ModelMappingRule(
         match="o1",
-        target="claude-opus-4-6",
+        target="claude-opus-4-7",
         kind="prefix",
     ),
     ModelMappingRule(
         match="o3-mini",
-        target="claude-opus-4-6",
+        target="claude-opus-4-7",
         kind="exact",
     ),
     ModelMappingRule(
@@ -138,8 +202,17 @@
         target="claude-sonnet-4-6",
         kind="prefix",
     ),
+    ModelMappingRule(match="sonnet[1m]", target="claude-sonnet-4-6"),
+    ModelMappingRule(match="claude-sonnet-4-6[1m]", target="claude-sonnet-4-6"),
+    ModelMappingRule(match="opus[1m]", target="claude-opus-4-7"),
+    ModelMappingRule(match="opus-4-7[1m]", target="claude-opus-4-7"),
+    ModelMappingRule(match="claude-opus-4-7[1m]", target="claude-opus-4-7"),
+    ModelMappingRule(match="opus-4-6[1m]", target="claude-opus-4-6"),
+    ModelMappingRule(match="claude-opus-4-6[1m]", target="claude-opus-4-6"),
+    ModelMappingRule(match="opus-4-7", target="claude-opus-4-7"),
+    ModelMappingRule(match="claude-opus-4-7", target="claude-opus-4-7"),
     ModelMappingRule(match="sonnet", target="claude-sonnet-4-6"),
-    ModelMappingRule(match="opus", target="claude-opus-4-6"),
+    ModelMappingRule(match="opus", target="claude-opus-4-7"),
     ModelMappingRule(match="haiku", target="claude-haiku-4-5-20251001"),
     ModelMappingRule(
         match="claude-3-5-sonnet-latest",
diff --git a/ccproxy/plugins/codex/adapter.py b/ccproxy/plugins/codex/adapter.py
index 99cecdea..97f721be 100644
--- a/ccproxy/plugins/codex/adapter.py
+++ b/ccproxy/plugins/codex/adapter.py
@@ -32,6 +32,13 @@
 logger = get_plugin_logger()
 
 
+_CODEX_MODEL_REASONING_ALIASES = {
+    "gpt-5.5-high": "high",
+    "gpt-5.5-xhigh": "xhigh",
+    "gpt-5.5-max": "max",
+}
+
+
 class CodexAdapter(BaseHTTPAdapter):
     """Simplified Codex adapter."""
 
@@ -65,6 +72,7 @@ async def handle_request(
         endpoint = ctx.metadata.get("endpoint", "")
         body = await request.body()
         body = await self._map_request_model(ctx, body)
+        body = self._apply_model_alias_reasoning_effort(ctx, body)
         headers = extract_request_headers(request)
 
         # Determine client streaming intent from body flag (fallback to False)
@@ -294,6 +302,36 @@ async def prepare_provider_request(
 
         return json.dumps(body_data).encode(), filtered_headers
 
+    def _apply_model_alias_reasoning_effort(self, ctx: Any, body: bytes) -> bytes:
+        """Apply reasoning effort implied by client-facing Codex model aliases."""
+
+        metadata = getattr(ctx, "metadata", None)
+        client_model = None
+        if isinstance(metadata, dict):
+            client_model = metadata.get("_last_client_model")
+        if not isinstance(client_model, str):
+            return body
+
+        effort = _CODEX_MODEL_REASONING_ALIASES.get(client_model)
+        if effort is None:
+            return body
+
+        try:
+            body_data = json.loads(body.decode()) if body else {}
+        except Exception:
+            return body
+        if not isinstance(body_data, dict):
+            return body
+
+        if isinstance(body_data.get("reasoning"), dict):
+            reasoning = dict(body_data["reasoning"])
+            reasoning.setdefault("effort", effort)
+            body_data["reasoning"] = reasoning
+        elif not body_data.get("reasoning_effort"):
+            body_data["reasoning_effort"] = effort
+
+        return self._encode_json_body(body_data)
+
     def _sanitize_provider_body(self, body_data: dict[str, Any]) -> dict[str, Any]:
         """Apply Codex-specific payload sanitization shared by all request paths."""
 
@@ -314,17 +352,43 @@ def _sanitize_provider_body(self, body_data: dict[str, Any]) -> dict[str, Any]:
         ):
             body_data.pop(key, None)
 
-        list_input = body_data.get("input", [])
-        # Remove any input types that Codex does not support
-        body_data["input"] = [
-            input for input in list_input if input.get("type") != "item_reference"
-        ]
+        input_value = body_data.get("input", [])
+        # Remove any input types that Codex does not support. Public Responses API
+        # input may be a plain string, but the Codex backend expects message items.
+        if isinstance(input_value, list):
+            body_data["input"] = [
+                input_item
+                for input_item in input_value
+                if not (
+                    isinstance(input_item, dict)
+                    and input_item.get("type") == "item_reference"
+                )
+            ]
+        elif isinstance(input_value, str):
+            body_data["input"] = [
+                {
+                    "type": "message",
+                    "role": "user",
+                    "content": [{"type": "input_text", "text": input_value}],
+                }
+            ]
 
         # Remove any prefixed metadata fields that shouldn't be sent to the API
         body_data = self._remove_metadata_fields(body_data)
+        self._normalize_reasoning_effort(body_data)
 
         return body_data
 
+    def _normalize_reasoning_effort(self, body_data: dict[str, Any]) -> None:
+        """Clamp client-facing effort aliases to values accepted by Codex backend."""
+
+        if body_data.get("reasoning_effort") == "max":
+            body_data["reasoning_effort"] = "xhigh"
+
+        reasoning = body_data.get("reasoning")
+        if isinstance(reasoning, dict) and reasoning.get("effort") == "max":
+            reasoning["effort"] = "xhigh"
+
     async def prepare_provider_headers(self, headers: dict[str, str]) -> dict[str, str]:
         token_value = await self._resolve_access_token()
 
@@ -501,6 +565,7 @@ async def handle_streaming(
         # Extract body and headers
         body = await request.body()
         body = await self._map_request_model(ctx, body)
+        body = self._apply_model_alias_reasoning_effort(ctx, body)
         headers = extract_request_headers(request)
 
         # Ensure format adapters are available when required
diff --git a/ccproxy/plugins/codex/detection_service.py b/ccproxy/plugins/codex/detection_service.py
index 7738aab6..12e1c3d5 100644
--- a/ccproxy/plugins/codex/detection_service.py
+++ b/ccproxy/plugins/codex/detection_service.py
@@ -123,7 +123,7 @@ async def initialize_detection(self) -> CodexCacheData:
                 category="plugin",
             )
             # Return fallback data
-            fallback_data = self._get_fallback_data()
+            fallback_data = self._get_fallback_data(version=current_version)
             self._cached_data = fallback_data
             return fallback_data
 
@@ -509,7 +509,7 @@ def _save_to_cache(self, data: CodexCacheData) -> None:
                 category="plugin",
             )
 
-    def _get_fallback_data(self) -> CodexCacheData:
+    def _get_fallback_data(self, version: str | None = None) -> CodexCacheData:
         """Get fallback data when detection fails."""
         logger.warning("using_fallback_codex_data", category="plugin")
 
@@ -519,7 +519,20 @@ def _get_fallback_data(self) -> CodexCacheData:
         )
         with package_data_file.open("r") as f:
             fallback_data_dict = json.load(f)
-            return CodexCacheData.model_validate(fallback_data_dict)
+            fallback_data = CodexCacheData.model_validate(fallback_data_dict)
+
+        if version and version != "unknown":
+            headers = fallback_data.headers.as_dict()
+            headers["version"] = version
+            fallback_data = fallback_data.model_copy(
+                update={
+                    "codex_version": version,
+                    "headers": DetectedHeaders(headers),
+                },
+                deep=True,
+            )
+
+        return fallback_data
 
     def _safe_fallback_data(self) -> CodexCacheData | None:
         """Best-effort fallback data loader for partial detection caches."""
diff --git a/ccproxy/plugins/codex/model_defaults.py b/ccproxy/plugins/codex/model_defaults.py
index 16005583..50e7ba58 100644
--- a/ccproxy/plugins/codex/model_defaults.py
+++ b/ccproxy/plugins/codex/model_defaults.py
@@ -6,6 +6,46 @@
 
 
 DEFAULT_CODEX_MODEL_CARDS: list[ModelCard] = [
+    ModelCard(
+        id="gpt-5.5",
+        created=1778198400,
+        owned_by="openai",
+        permission=[],
+        root="gpt-5.5",
+        parent=None,
+    ),
+    ModelCard(
+        id="gpt-5.5-high",
+        created=1778198400,
+        owned_by="openai",
+        permission=[],
+        root="gpt-5.5-high",
+        parent=None,
+    ),
+    ModelCard(
+        id="gpt-5.5-xhigh",
+        created=1778198400,
+        owned_by="openai",
+        permission=[],
+        root="gpt-5.5-xhigh",
+        parent=None,
+    ),
+    ModelCard(
+        id="gpt-5.5-max",
+        created=1778198400,
+        owned_by="openai",
+        permission=[],
+        root="gpt-5.5-max",
+        parent=None,
+    ),
+    ModelCard(
+        id="gpt-5.4",
+        created=1778198400,
+        owned_by="openai",
+        permission=[],
+        root="gpt-5.4",
+        parent=None,
+    ),
     ModelCard(
         id="gpt-5.3-codex",
         created=1723075200,
@@ -26,6 +66,11 @@
 
 
 DEFAULT_CODEX_MODEL_MAPPINGS: list[ModelMappingRule] = [
+    ModelMappingRule(match="gpt-5.5", target="gpt-5.5", kind="exact"),
+    ModelMappingRule(match="gpt-5.5-", target="gpt-5.5", kind="prefix"),
+    ModelMappingRule(match="gpt-5.4", target="gpt-5.4", kind="exact"),
+    ModelMappingRule(match="gpt-5.3-codex", target="gpt-5.3-codex", kind="exact"),
+    ModelMappingRule(match="gpt-5.2-codex", target="gpt-5.2-codex", kind="exact"),
     ModelMappingRule(match="gpt-5-codex", target="gpt-5.3-codex", kind="prefix"),
     ModelMappingRule(match="gpt-", target="gpt-5.3-codex", kind="prefix"),
     ModelMappingRule(match="o3-", target="gpt-5.3-codex", kind="prefix"),
diff --git a/ccproxy/plugins/max_tokens/token_limits.json b/ccproxy/plugins/max_tokens/token_limits.json
index ea7c57b8..64acfc1b 100644
--- a/ccproxy/plugins/max_tokens/token_limits.json
+++ b/ccproxy/plugins/max_tokens/token_limits.json
@@ -1,4 +1,44 @@
 {
+  "claude-opus-4-7": {
+    "max_output_tokens": 128000,
+    "max_input_tokens": 1000000
+  },
+  "opus[1m]": {
+    "max_output_tokens": 128000,
+    "max_input_tokens": 1000000
+  },
+  "opus-4-7[1m]": {
+    "max_output_tokens": 128000,
+    "max_input_tokens": 1000000
+  },
+  "claude-opus-4-7[1m]": {
+    "max_output_tokens": 128000,
+    "max_input_tokens": 1000000
+  },
+  "claude-opus-4-6": {
+    "max_output_tokens": 128000,
+    "max_input_tokens": 1000000
+  },
+  "opus-4-6[1m]": {
+    "max_output_tokens": 128000,
+    "max_input_tokens": 1000000
+  },
+  "claude-opus-4-6[1m]": {
+    "max_output_tokens": 128000,
+    "max_input_tokens": 1000000
+  },
+  "claude-sonnet-4-6": {
+    "max_output_tokens": 64000,
+    "max_input_tokens": 1000000
+  },
+  "sonnet[1m]": {
+    "max_output_tokens": 64000,
+    "max_input_tokens": 1000000
+  },
+  "claude-sonnet-4-6[1m]": {
+    "max_output_tokens": 64000,
+    "max_input_tokens": 1000000
+  },
   "claude-opus-4-1-20250805": {
     "max_output_tokens": 32000,
     "max_input_tokens": 200000
diff --git a/tests/plugins/claude_api/integration/test_claude_api_basic.py b/tests/plugins/claude_api/integration/test_claude_api_basic.py
index 6fc65c40..6c721a00 100644
--- a/tests/plugins/claude_api/integration/test_claude_api_basic.py
+++ b/tests/plugins/claude_api/integration/test_claude_api_basic.py
@@ -38,6 +38,8 @@ async def test_models_endpoint_available_when_enabled(
     # Verify Claude models are present
     model_ids = {model["id"] for model in models}
     assert "claude-3-5-sonnet-20241022" in model_ids
+    assert "sonnet[1m]" in model_ids
+    assert "opus[1m]" in model_ids
 
 
 @pytest.mark.asyncio
diff --git a/tests/plugins/codex/unit/test_adapter.py b/tests/plugins/codex/unit/test_adapter.py
index dededd10..45fbf528 100644
--- a/tests/plugins/codex/unit/test_adapter.py
+++ b/tests/plugins/codex/unit/test_adapter.py
@@ -616,6 +616,82 @@ def test_sanitize_provider_body_strips_metadata(
         assert cleaned["stream"] is True
         assert cleaned["store"] is False
 
+    def test_sanitize_provider_body_normalizes_string_input(
+        self, adapter: CodexAdapter
+    ) -> None:
+        """Responses API string input should be normalized for Codex backend."""
+        body = {"model": "gpt-5.5", "input": "Reply exactly OK"}
+
+        cleaned = adapter._sanitize_provider_body(body)
+
+        assert cleaned["input"] == [
+            {
+                "type": "message",
+                "role": "user",
+                "content": [{"type": "input_text", "text": "Reply exactly OK"}],
+            }
+        ]
+        assert cleaned["stream"] is True
+        assert cleaned["store"] is False
+
+    def test_apply_model_alias_reasoning_effort_for_chat_alias(
+        self, adapter: CodexAdapter
+    ) -> None:
+        """GPT-5.5 effort aliases should set effort while using the base model."""
+        ctx = Mock()
+        ctx.metadata = {
+            "_last_client_model": "gpt-5.5-xhigh",
+            "_last_provider_model": "gpt-5.5",
+        }
+        body = json.dumps(
+            {
+                "model": "gpt-5.5",
+                "messages": [{"role": "user", "content": "Hello"}],
+            }
+        ).encode()
+
+        result = adapter._apply_model_alias_reasoning_effort(ctx, body)
+        result_data = json.loads(result.decode())
+
+        assert result_data["model"] == "gpt-5.5"
+        assert result_data["reasoning_effort"] == "xhigh"
+
+    def test_apply_model_alias_reasoning_effort_preserves_explicit_effort(
+        self, adapter: CodexAdapter
+    ) -> None:
+        """Explicit request effort should win over model-alias defaults."""
+        ctx = Mock()
+        ctx.metadata = {
+            "_last_client_model": "gpt-5.5-max",
+            "_last_provider_model": "gpt-5.5",
+        }
+        body = json.dumps(
+            {
+                "model": "gpt-5.5",
+                "messages": [{"role": "user", "content": "Hello"}],
+                "reasoning_effort": "high",
+            }
+        ).encode()
+
+        result = adapter._apply_model_alias_reasoning_effort(ctx, body)
+        result_data = json.loads(result.decode())
+
+        assert result_data["reasoning_effort"] == "high"
+
+    def test_sanitize_provider_body_clamps_max_reasoning_effort(
+        self, adapter: CodexAdapter
+    ) -> None:
+        """Codex backend currently accepts xhigh but rejects max."""
+        body = {
+            "model": "gpt-5.5",
+            "input": [{"type": "message", "role": "user", "content": []}],
+            "reasoning": {"effort": "max", "summary": "auto"},
+        }
+
+        cleaned = adapter._sanitize_provider_body(body)
+
+        assert cleaned["reasoning"] == {"effort": "xhigh", "summary": "auto"}
+
     def test_get_instructions_default(self, adapter: CodexAdapter) -> None:
         """Test default instructions when no detection service data."""
         instructions = adapter._get_instructions()
diff --git a/tests/plugins/max_tokens/test_service.py b/tests/plugins/max_tokens/test_service.py
index 7027b96c..8abfc1e2 100644
--- a/tests/plugins/max_tokens/test_service.py
+++ b/tests/plugins/max_tokens/test_service.py
@@ -49,6 +49,25 @@ def test_get_max_output_tokens_variant_models(
         # We just verify that models in the cache can be retrieved
         assert len(service.token_limits_data.models) > 0
 
+    def test_get_token_limits_for_1m_claude_aliases(
+        self, service: TokenLimitsService
+    ) -> None:
+        """Claude 1M aliases should expose their local token limits."""
+        assert service.get_max_output_tokens("sonnet[1m]") == 64000
+        assert service.token_limits_data.get_max_input_tokens("sonnet[1m]") == 1000000
+        assert service.get_max_output_tokens("claude-sonnet-4-6[1m]") == 64000
+        assert (
+            service.token_limits_data.get_max_input_tokens("claude-sonnet-4-6[1m]")
+            == 1000000
+        )
+        assert service.get_max_output_tokens("opus[1m]") == 128000
+        assert service.token_limits_data.get_max_input_tokens("opus[1m]") == 1000000
+        assert service.get_max_output_tokens("claude-opus-4-7[1m]") == 128000
+        assert (
+            service.token_limits_data.get_max_input_tokens("claude-opus-4-7[1m]")
+            == 1000000
+        )
+
     def test_get_max_output_tokens_unknown_model(
         self, service: TokenLimitsService
     ) -> None:
diff --git a/tests/unit/auth/test_auth.py b/tests/unit/auth/test_auth.py
index 6bc86f49..6e59e26b 100644
--- a/tests/unit/auth/test_auth.py
+++ b/tests/unit/auth/test_auth.py
@@ -17,6 +17,7 @@
 
 # from ccproxy.auth.credentials_adapter import CredentialsAuthManager
 from ccproxy.auth.dependencies import (
+    _build_bearer_auth_manager,
     get_access_token,
     require_auth,
 )
@@ -148,6 +149,31 @@ async def test_get_access_token_dependency(self) -> None:
         assert token == "sk-test-token-123"
         mock_manager.get_access_token.assert_called_once()
 
+    async def test_x_api_key_header_authenticates_when_token_matches(self) -> None:
+        """Test x-api-key header support for Anthropic-compatible clients."""
+        auth_manager = await _build_bearer_auth_manager(
+            None,
+            "sk-test-token-123",
+            require_credentials=True,
+            api_key="sk-test-token-123",
+        )
+
+        assert isinstance(auth_manager, BearerTokenAuthManager)
+        assert await auth_manager.get_access_token() == "sk-test-token-123"
+
+    async def test_x_api_key_header_rejects_invalid_token(self) -> None:
+        """Test x-api-key still enforces the configured shared secret."""
+        with pytest.raises(HTTPException) as exc_info:
+            await _build_bearer_auth_manager(
+                None,
+                "sk-test-token-123",
+                require_credentials=True,
+                api_key="wrong-token",
+            )
+
+        assert exc_info.value.status_code == status.HTTP_401_UNAUTHORIZED
+        assert "Invalid authentication credentials" in str(exc_info.value.detail)
+
 
 @pytest.mark.auth
 class TestAPIEndpointsWithAuth:
diff --git a/tests/unit/llms/formatters/test_openai_to_anthropic_helpers.py b/tests/unit/llms/formatters/test_openai_to_anthropic_helpers.py
index 767c07ae..15804cfe 100644
--- a/tests/unit/llms/formatters/test_openai_to_anthropic_helpers.py
+++ b/tests/unit/llms/formatters/test_openai_to_anthropic_helpers.py
@@ -88,3 +88,40 @@ async def test_openai_responses_request_to_anthropic_messages_basic() -> None:
     assert anth_req.max_tokens == 64
     assert anth_req.system == "sys"
     assert anth_req.messages and anth_req.messages[0].role == "user"
+
+
+@pytest.mark.asyncio
+async def test_openai_chat_to_anthropic_supports_max_thinking_effort() -> None:
+    req = openai_models.ChatCompletionRequest(
+        model="claude-opus-4-7",
+        messages=[{"role": "user", "content": "Think carefully"}],  # type: ignore[list-item]
+        max_completion_tokens=128,
+        reasoning_effort="max",
+    )
+
+    anth_req = await convert__openai_chat_to_anthropic_message__request(req)
+
+    assert anth_req.model == "claude-opus-4-7"
+    assert anth_req.thinking is not None
+    assert anth_req.thinking.type == "enabled"
+    assert anth_req.thinking.budget_tokens == 32000
+    assert anth_req.max_tokens == 32064
+    assert anth_req.temperature == 1.0
+
+
+def test_openai_responses_to_anthropic_maps_reasoning_effort() -> None:
+    resp_req = openai_models.ResponseRequest(
+        model="claude-opus-4-7",
+        input="Think carefully",
+        max_output_tokens=128,
+        reasoning={"effort": "xhigh"},
+    )
+
+    anth_req = convert__openai_responses_to_anthropic_message__request(resp_req)
+
+    assert anth_req.model == "claude-opus-4-7"
+    assert anth_req.thinking is not None
+    assert anth_req.thinking.type == "enabled"
+    assert anth_req.thinking.budget_tokens == 20000
+    assert anth_req.max_tokens == 20064
+    assert anth_req.temperature == 1.0
diff --git a/tests/unit/llms/formatters/test_openai_to_openai_reasoning.py b/tests/unit/llms/formatters/test_openai_to_openai_reasoning.py
index e5c6d660..a8a8f87e 100644
--- a/tests/unit/llms/formatters/test_openai_to_openai_reasoning.py
+++ b/tests/unit/llms/formatters/test_openai_to_openai_reasoning.py
@@ -189,6 +189,36 @@ async def test_chat_request_to_responses_maps_reasoning_effort() -> None:
     assert response_request.reasoning == {"effort": "high", "summary": "auto"}
 
 
+@pytest.mark.asyncio
+async def test_chat_request_to_responses_allows_xhigh_reasoning_effort() -> None:
+    chat_request = openai_models.ChatCompletionRequest(
+        model="gpt-test",
+        messages=[openai_models.ChatMessage(role="user", content="Hello")],
+        reasoning_effort="xhigh",
+    )
+
+    response_request = await convert__openai_chat_to_openai_responses__request(
+        chat_request
+    )
+
+    assert response_request.reasoning == {"effort": "xhigh", "summary": "auto"}
+
+
+@pytest.mark.asyncio
+async def test_chat_request_to_responses_allows_max_reasoning_effort() -> None:
+    chat_request = openai_models.ChatCompletionRequest(
+        model="gpt-test",
+        messages=[openai_models.ChatMessage(role="user", content="Hello")],
+        reasoning_effort="max",
+    )
+
+    response_request = await convert__openai_chat_to_openai_responses__request(
+        chat_request
+    )
+
+    assert response_request.reasoning == {"effort": "max", "summary": "auto"}
+
+
 @pytest.mark.asyncio
 async def test_chat_request_to_responses_defaults_reasoning(monkeypatch: Any) -> None:
     monkeypatch.delenv("LLM__OPENAI_THINKING_XML", raising=False)
diff --git a/tests/unit/plugins/test_codex_detection.py b/tests/unit/plugins/test_codex_detection.py
index 36f1b5cd..bf968bd1 100644
--- a/tests/unit/plugins/test_codex_detection.py
+++ b/tests/unit/plugins/test_codex_detection.py
@@ -55,6 +55,17 @@ def test_codex_detection_ignores_content_encoding_header() -> None:
     assert "content-encoding" in CodexDetectionService.ignores_header
 
 
+def test_codex_fallback_data_uses_detected_cli_version() -> None:
+    settings = MagicMock(spec=Settings)
+    cli_service = MagicMock()
+    service = CodexDetectionService(settings=settings, cli_service=cli_service)
+
+    fallback = service._get_fallback_data(version="0.129.0")
+
+    assert fallback.codex_version == "0.129.0"
+    assert fallback.headers.get("version") == "0.129.0"
+
+
 def test_codex_detection_merges_partial_prompt_cache_with_fallback() -> None:
     settings = MagicMock(spec=Settings)
     cli_service = MagicMock()
diff --git a/tests/unit/utils/test_model_mapper.py b/tests/unit/utils/test_model_mapper.py
index 9477e4a2..ba4d00de 100644
--- a/tests/unit/utils/test_model_mapper.py
+++ b/tests/unit/utils/test_model_mapper.py
@@ -1,7 +1,10 @@
 from __future__ import annotations
 
 from ccproxy.models.provider import ModelMappingRule
-from ccproxy.plugins.claude_shared.model_defaults import DEFAULT_CLAUDE_MODEL_MAPPINGS
+from ccproxy.plugins.claude_shared.model_defaults import (
+    DEFAULT_CLAUDE_MODEL_CARDS,
+    DEFAULT_CLAUDE_MODEL_MAPPINGS,
+)
 from ccproxy.plugins.codex.model_defaults import DEFAULT_CODEX_MODEL_MAPPINGS
 from ccproxy.utils.model_mapper import (
     ModelMapper,
@@ -60,13 +63,39 @@ def test_default_claude_mapping_prefers_latest_sonnet_and_opus() -> None:
 
     assert mapper.map("gpt-4o").mapped == "claude-sonnet-4-6"
     assert mapper.map("gpt-5").mapped == "claude-sonnet-4-6"
-    assert mapper.map("o1-preview").mapped == "claude-opus-4-6"
-    assert mapper.map("o3-mini").mapped == "claude-opus-4-6"
+    assert mapper.map("o1-preview").mapped == "claude-opus-4-7"
+    assert mapper.map("o3-mini").mapped == "claude-opus-4-7"
+    assert mapper.map("opus-4-7").mapped == "claude-opus-4-7"
+    assert mapper.map("sonnet[1m]").mapped == "claude-sonnet-4-6"
+    assert mapper.map("claude-sonnet-4-6[1m]").mapped == "claude-sonnet-4-6"
+    assert mapper.map("opus[1m]").mapped == "claude-opus-4-7"
+    assert mapper.map("opus-4-7[1m]").mapped == "claude-opus-4-7"
+    assert mapper.map("claude-opus-4-7[1m]").mapped == "claude-opus-4-7"
+    assert mapper.map("opus-4-6[1m]").mapped == "claude-opus-4-6"
+    assert mapper.map("claude-opus-4-6[1m]").mapped == "claude-opus-4-6"
     assert mapper.map("sonnet").mapped == "claude-sonnet-4-6"
-    assert mapper.map("opus").mapped == "claude-opus-4-6"
+    assert mapper.map("opus").mapped == "claude-opus-4-7"
+
+
+def test_default_claude_model_cards_expose_1m_aliases() -> None:
+    model_ids = {card.id for card in DEFAULT_CLAUDE_MODEL_CARDS}
+
+    assert "sonnet[1m]" in model_ids
+    assert "opus[1m]" in model_ids
+    assert "claude-sonnet-4-6[1m]" in model_ids
+    assert "claude-opus-4-7[1m]" in model_ids
+    assert "claude-opus-4-6[1m]" in model_ids
 
 
 def test_default_codex_mapping_keeps_latest_codex_model() -> None:
     mapper = ModelMapper(DEFAULT_CODEX_MODEL_MAPPINGS)
 
+    assert mapper.map("gpt-5.5").mapped == "gpt-5.5"
+    assert mapper.map("gpt-5.5-high").mapped == "gpt-5.5"
+    assert mapper.map("gpt-5.5-xhigh").mapped == "gpt-5.5"
+    assert mapper.map("gpt-5.5-max").mapped == "gpt-5.5"
+    assert mapper.map("gpt-5.5-custom").mapped == "gpt-5.5"
+    assert mapper.map("gpt-5.4").mapped == "gpt-5.4"
+    assert mapper.map("gpt-5.3-codex").mapped == "gpt-5.3-codex"
+    assert mapper.map("gpt-5.2-codex").mapped == "gpt-5.2-codex"
     assert mapper.map("gpt-5-codex").mapped == "gpt-5.3-codex"