From abbf14832b6f6196da38f657518da99f54a4fc37 Mon Sep 17 00:00:00 2001 From: smhanan Date: Sat, 9 May 2026 23:51:45 -0700 Subject: [PATCH] feat(models): add provider model and effort aliases --- ccproxy/auth/dependencies.py | 6 ++ .../openai_to_anthropic/requests.py | 38 +++++++-- ccproxy/llms/models/openai.py | 8 +- .../plugins/claude_shared/model_defaults.py | 79 ++++++++++++++++++- ccproxy/plugins/codex/adapter.py | 75 ++++++++++++++++-- ccproxy/plugins/codex/detection_service.py | 19 ++++- ccproxy/plugins/codex/model_defaults.py | 45 +++++++++++ ccproxy/plugins/max_tokens/token_limits.json | 40 ++++++++++ .../integration/test_claude_api_basic.py | 2 + tests/plugins/codex/unit/test_adapter.py | 76 ++++++++++++++++++ tests/plugins/max_tokens/test_service.py | 19 +++++ tests/unit/auth/test_auth.py | 26 ++++++ .../test_openai_to_anthropic_helpers.py | 37 +++++++++ .../test_openai_to_openai_reasoning.py | 30 +++++++ tests/unit/plugins/test_codex_detection.py | 11 +++ tests/unit/utils/test_model_mapper.py | 37 ++++++++- 16 files changed, 521 insertions(+), 27 deletions(-) diff --git a/ccproxy/auth/dependencies.py b/ccproxy/auth/dependencies.py index 1ea0708c..c7321209 100644 --- a/ccproxy/auth/dependencies.py +++ b/ccproxy/auth/dependencies.py @@ -68,10 +68,13 @@ async def _build_bearer_auth_manager( expected_token: str | None, *, require_credentials: bool, + api_key: str | None = None, ) -> AuthManager | None: """Create a bearer auth manager when credentials satisfy expectations.""" token = credentials.credentials if credentials and credentials.credentials else None + if token is None and api_key: + token = api_key if token is None: if require_credentials: @@ -118,6 +121,7 @@ async def _build_bearer_auth_manager( async def get_auth_manager( + request: Request, credentials: Annotated[HTTPAuthorizationCredentials | None, Depends(bearer_scheme)], settings: SettingsDep, ) -> AuthManager: @@ -127,6 +131,7 @@ async def get_auth_manager( credentials, _expected_token(settings), require_credentials=True, + api_key=request.headers.get("x-api-key"), ) # require_credentials ensures auth_manager is never None here. assert auth_manager is not None @@ -183,6 +188,7 @@ async def get_conditional_auth_manager( credentials, expected_token, require_credentials=True, + api_key=request.headers.get("x-api-key"), ) diff --git a/ccproxy/llms/formatters/openai_to_anthropic/requests.py b/ccproxy/llms/formatters/openai_to_anthropic/requests.py index 5540289c..e9c900e6 100644 --- a/ccproxy/llms/formatters/openai_to_anthropic/requests.py +++ b/ccproxy/llms/formatters/openai_to_anthropic/requests.py @@ -404,7 +404,7 @@ async def convert__openai_chat_to_anthropic_message__request( def convert__openai_responses_to_anthropic_message__request( request: openai_models.ResponseRequest, ) -> anthropic_models.CreateMessageRequest: - model = request.model + model = request.model or "" stream = bool(request.stream) max_out = request.max_output_tokens @@ -618,8 +618,7 @@ def convert__openai_responses_to_anthropic_message__request( else request.instructions ) - # Skip thinking config for ResponseRequest as it doesn't have the required fields - thinking_cfg = None + thinking_cfg = derive_thinking_config(model, request) if thinking_cfg is not None: payload_data["thinking"] = thinking_cfg budget = thinking_cfg.get("budget_tokens", 0) @@ -631,21 +630,28 @@ def convert__openai_responses_to_anthropic_message__request( def derive_thinking_config( - model: str, request: openai_models.ChatCompletionRequest + model: str, + request: openai_models.ChatCompletionRequest | openai_models.ResponseRequest, ) -> dict[str, Any] | None: """Derive Anthropic thinking config from OpenAI fields and model name. Rules: - If model matches o1/o3 families, enable thinking by default with model-specific budget - - Map reasoning_effort: low=1000, medium=5000, high=10000 + - Map effort: minimal/low=1024, medium=5000, high=10000, xhigh=20000, max=32000 - o3*: 10000; o1-mini: 3000; other o1*: 5000 - If thinking is enabled, return {"type":"enabled","budget_tokens":N} - Otherwise return None """ # Explicit reasoning_effort mapping - effort = getattr(request, "reasoning_effort", None) - effort = effort.strip().lower() if isinstance(effort, str) else "" - effort_budgets = {"low": 1000, "medium": 5000, "high": 10000} + effort = _extract_reasoning_effort(request) + effort_budgets = { + "minimal": 1024, + "low": 1024, + "medium": 5000, + "high": 10000, + "xhigh": 20000, + "max": 32000, + } budget: int | None = None if effort in effort_budgets: @@ -667,6 +673,22 @@ def derive_thinking_config( return {"type": "enabled", "budget_tokens": budget} +def _extract_reasoning_effort( + request: openai_models.ChatCompletionRequest | openai_models.ResponseRequest, +) -> str: + effort = getattr(request, "reasoning_effort", None) + if isinstance(effort, str) and effort.strip(): + return effort.strip().lower() + + reasoning = getattr(request, "reasoning", None) + if isinstance(reasoning, dict): + effort = reasoning.get("effort") + elif reasoning is not None: + effort = getattr(reasoning, "effort", None) + + return effort.strip().lower() if isinstance(effort, str) else "" + + __all__ = [ "convert__openai_chat_to_anthropic_message__request", "convert__openai_responses_to_anthropic_message__request", diff --git a/ccproxy/llms/models/openai.py b/ccproxy/llms/models/openai.py index e510d0d4..72117cf2 100644 --- a/ccproxy/llms/models/openai.py +++ b/ccproxy/llms/models/openai.py @@ -233,9 +233,9 @@ class ChatCompletionRequest(LlmBaseModel): n: int | None = Field(default=1) parallel_tool_calls: bool | None = Field(default=None) presence_penalty: float | None = Field(default=None, ge=-2.0, le=2.0) - reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = Field( - default=None - ) + reasoning_effort: ( + Literal["minimal", "low", "medium", "high", "xhigh", "max"] | None + ) = Field(default=None) response_format: ResponseFormat | None = Field(default=None) seed: int | None = Field(default=None) stop: str | list[str] | None = Field(default=None) @@ -262,7 +262,7 @@ class ChatCompletionRequest(LlmBaseModel): class ResponseMessageReasoning(LlmBaseModel): - effort: Literal["minimal", "low", "medium", "high"] | None = None + effort: Literal["minimal", "low", "medium", "high", "xhigh", "max"] | None = None summary: Literal["auto", "detailed", "concise"] | None = None diff --git a/ccproxy/plugins/claude_shared/model_defaults.py b/ccproxy/plugins/claude_shared/model_defaults.py index 5f22adda..cf506651 100644 --- a/ccproxy/plugins/claude_shared/model_defaults.py +++ b/ccproxy/plugins/claude_shared/model_defaults.py @@ -14,6 +14,22 @@ root="claude-sonnet-4-6", parent=None, ), + ModelCard( + id="sonnet[1m]", + created=1722816000, + owned_by="anthropic", + permission=[], + root="claude-sonnet-4-6", + parent=None, + ), + ModelCard( + id="claude-sonnet-4-6[1m]", + created=1722816000, + owned_by="anthropic", + permission=[], + root="claude-sonnet-4-6", + parent=None, + ), ModelCard( id="claude-haiku-4-5-20251001", created=1722816000, @@ -22,6 +38,38 @@ root="claude-haiku-4-5-20251001", parent=None, ), + ModelCard( + id="claude-opus-4-7", + created=1722816000, + owned_by="anthropic", + permission=[], + root="claude-opus-4-7", + parent=None, + ), + ModelCard( + id="opus[1m]", + created=1722816000, + owned_by="anthropic", + permission=[], + root="claude-opus-4-7", + parent=None, + ), + ModelCard( + id="opus-4-7[1m]", + created=1722816000, + owned_by="anthropic", + permission=[], + root="claude-opus-4-7", + parent=None, + ), + ModelCard( + id="claude-opus-4-7[1m]", + created=1722816000, + owned_by="anthropic", + permission=[], + root="claude-opus-4-7", + parent=None, + ), ModelCard( id="claude-opus-4-6", created=1722816000, @@ -30,6 +78,22 @@ root="claude-opus-4-6", parent=None, ), + ModelCard( + id="opus-4-6[1m]", + created=1722816000, + owned_by="anthropic", + permission=[], + root="claude-opus-4-6", + parent=None, + ), + ModelCard( + id="claude-opus-4-6[1m]", + created=1722816000, + owned_by="anthropic", + permission=[], + root="claude-opus-4-6", + parent=None, + ), ModelCard( id="claude-opus-4-20250514", created=1716336000, @@ -125,12 +189,12 @@ ), ModelMappingRule( match="o1", - target="claude-opus-4-6", + target="claude-opus-4-7", kind="prefix", ), ModelMappingRule( match="o3-mini", - target="claude-opus-4-6", + target="claude-opus-4-7", kind="exact", ), ModelMappingRule( @@ -138,8 +202,17 @@ target="claude-sonnet-4-6", kind="prefix", ), + ModelMappingRule(match="sonnet[1m]", target="claude-sonnet-4-6"), + ModelMappingRule(match="claude-sonnet-4-6[1m]", target="claude-sonnet-4-6"), + ModelMappingRule(match="opus[1m]", target="claude-opus-4-7"), + ModelMappingRule(match="opus-4-7[1m]", target="claude-opus-4-7"), + ModelMappingRule(match="claude-opus-4-7[1m]", target="claude-opus-4-7"), + ModelMappingRule(match="opus-4-6[1m]", target="claude-opus-4-6"), + ModelMappingRule(match="claude-opus-4-6[1m]", target="claude-opus-4-6"), + ModelMappingRule(match="opus-4-7", target="claude-opus-4-7"), + ModelMappingRule(match="claude-opus-4-7", target="claude-opus-4-7"), ModelMappingRule(match="sonnet", target="claude-sonnet-4-6"), - ModelMappingRule(match="opus", target="claude-opus-4-6"), + ModelMappingRule(match="opus", target="claude-opus-4-7"), ModelMappingRule(match="haiku", target="claude-haiku-4-5-20251001"), ModelMappingRule( match="claude-3-5-sonnet-latest", diff --git a/ccproxy/plugins/codex/adapter.py b/ccproxy/plugins/codex/adapter.py index 99cecdea..97f721be 100644 --- a/ccproxy/plugins/codex/adapter.py +++ b/ccproxy/plugins/codex/adapter.py @@ -32,6 +32,13 @@ logger = get_plugin_logger() +_CODEX_MODEL_REASONING_ALIASES = { + "gpt-5.5-high": "high", + "gpt-5.5-xhigh": "xhigh", + "gpt-5.5-max": "max", +} + + class CodexAdapter(BaseHTTPAdapter): """Simplified Codex adapter.""" @@ -65,6 +72,7 @@ async def handle_request( endpoint = ctx.metadata.get("endpoint", "") body = await request.body() body = await self._map_request_model(ctx, body) + body = self._apply_model_alias_reasoning_effort(ctx, body) headers = extract_request_headers(request) # Determine client streaming intent from body flag (fallback to False) @@ -294,6 +302,36 @@ async def prepare_provider_request( return json.dumps(body_data).encode(), filtered_headers + def _apply_model_alias_reasoning_effort(self, ctx: Any, body: bytes) -> bytes: + """Apply reasoning effort implied by client-facing Codex model aliases.""" + + metadata = getattr(ctx, "metadata", None) + client_model = None + if isinstance(metadata, dict): + client_model = metadata.get("_last_client_model") + if not isinstance(client_model, str): + return body + + effort = _CODEX_MODEL_REASONING_ALIASES.get(client_model) + if effort is None: + return body + + try: + body_data = json.loads(body.decode()) if body else {} + except Exception: + return body + if not isinstance(body_data, dict): + return body + + if isinstance(body_data.get("reasoning"), dict): + reasoning = dict(body_data["reasoning"]) + reasoning.setdefault("effort", effort) + body_data["reasoning"] = reasoning + elif not body_data.get("reasoning_effort"): + body_data["reasoning_effort"] = effort + + return self._encode_json_body(body_data) + def _sanitize_provider_body(self, body_data: dict[str, Any]) -> dict[str, Any]: """Apply Codex-specific payload sanitization shared by all request paths.""" @@ -314,17 +352,43 @@ def _sanitize_provider_body(self, body_data: dict[str, Any]) -> dict[str, Any]: ): body_data.pop(key, None) - list_input = body_data.get("input", []) - # Remove any input types that Codex does not support - body_data["input"] = [ - input for input in list_input if input.get("type") != "item_reference" - ] + input_value = body_data.get("input", []) + # Remove any input types that Codex does not support. Public Responses API + # input may be a plain string, but the Codex backend expects message items. + if isinstance(input_value, list): + body_data["input"] = [ + input_item + for input_item in input_value + if not ( + isinstance(input_item, dict) + and input_item.get("type") == "item_reference" + ) + ] + elif isinstance(input_value, str): + body_data["input"] = [ + { + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": input_value}], + } + ] # Remove any prefixed metadata fields that shouldn't be sent to the API body_data = self._remove_metadata_fields(body_data) + self._normalize_reasoning_effort(body_data) return body_data + def _normalize_reasoning_effort(self, body_data: dict[str, Any]) -> None: + """Clamp client-facing effort aliases to values accepted by Codex backend.""" + + if body_data.get("reasoning_effort") == "max": + body_data["reasoning_effort"] = "xhigh" + + reasoning = body_data.get("reasoning") + if isinstance(reasoning, dict) and reasoning.get("effort") == "max": + reasoning["effort"] = "xhigh" + async def prepare_provider_headers(self, headers: dict[str, str]) -> dict[str, str]: token_value = await self._resolve_access_token() @@ -501,6 +565,7 @@ async def handle_streaming( # Extract body and headers body = await request.body() body = await self._map_request_model(ctx, body) + body = self._apply_model_alias_reasoning_effort(ctx, body) headers = extract_request_headers(request) # Ensure format adapters are available when required diff --git a/ccproxy/plugins/codex/detection_service.py b/ccproxy/plugins/codex/detection_service.py index 7738aab6..12e1c3d5 100644 --- a/ccproxy/plugins/codex/detection_service.py +++ b/ccproxy/plugins/codex/detection_service.py @@ -123,7 +123,7 @@ async def initialize_detection(self) -> CodexCacheData: category="plugin", ) # Return fallback data - fallback_data = self._get_fallback_data() + fallback_data = self._get_fallback_data(version=current_version) self._cached_data = fallback_data return fallback_data @@ -509,7 +509,7 @@ def _save_to_cache(self, data: CodexCacheData) -> None: category="plugin", ) - def _get_fallback_data(self) -> CodexCacheData: + def _get_fallback_data(self, version: str | None = None) -> CodexCacheData: """Get fallback data when detection fails.""" logger.warning("using_fallback_codex_data", category="plugin") @@ -519,7 +519,20 @@ def _get_fallback_data(self) -> CodexCacheData: ) with package_data_file.open("r") as f: fallback_data_dict = json.load(f) - return CodexCacheData.model_validate(fallback_data_dict) + fallback_data = CodexCacheData.model_validate(fallback_data_dict) + + if version and version != "unknown": + headers = fallback_data.headers.as_dict() + headers["version"] = version + fallback_data = fallback_data.model_copy( + update={ + "codex_version": version, + "headers": DetectedHeaders(headers), + }, + deep=True, + ) + + return fallback_data def _safe_fallback_data(self) -> CodexCacheData | None: """Best-effort fallback data loader for partial detection caches.""" diff --git a/ccproxy/plugins/codex/model_defaults.py b/ccproxy/plugins/codex/model_defaults.py index 16005583..50e7ba58 100644 --- a/ccproxy/plugins/codex/model_defaults.py +++ b/ccproxy/plugins/codex/model_defaults.py @@ -6,6 +6,46 @@ DEFAULT_CODEX_MODEL_CARDS: list[ModelCard] = [ + ModelCard( + id="gpt-5.5", + created=1778198400, + owned_by="openai", + permission=[], + root="gpt-5.5", + parent=None, + ), + ModelCard( + id="gpt-5.5-high", + created=1778198400, + owned_by="openai", + permission=[], + root="gpt-5.5-high", + parent=None, + ), + ModelCard( + id="gpt-5.5-xhigh", + created=1778198400, + owned_by="openai", + permission=[], + root="gpt-5.5-xhigh", + parent=None, + ), + ModelCard( + id="gpt-5.5-max", + created=1778198400, + owned_by="openai", + permission=[], + root="gpt-5.5-max", + parent=None, + ), + ModelCard( + id="gpt-5.4", + created=1778198400, + owned_by="openai", + permission=[], + root="gpt-5.4", + parent=None, + ), ModelCard( id="gpt-5.3-codex", created=1723075200, @@ -26,6 +66,11 @@ DEFAULT_CODEX_MODEL_MAPPINGS: list[ModelMappingRule] = [ + ModelMappingRule(match="gpt-5.5", target="gpt-5.5", kind="exact"), + ModelMappingRule(match="gpt-5.5-", target="gpt-5.5", kind="prefix"), + ModelMappingRule(match="gpt-5.4", target="gpt-5.4", kind="exact"), + ModelMappingRule(match="gpt-5.3-codex", target="gpt-5.3-codex", kind="exact"), + ModelMappingRule(match="gpt-5.2-codex", target="gpt-5.2-codex", kind="exact"), ModelMappingRule(match="gpt-5-codex", target="gpt-5.3-codex", kind="prefix"), ModelMappingRule(match="gpt-", target="gpt-5.3-codex", kind="prefix"), ModelMappingRule(match="o3-", target="gpt-5.3-codex", kind="prefix"), diff --git a/ccproxy/plugins/max_tokens/token_limits.json b/ccproxy/plugins/max_tokens/token_limits.json index ea7c57b8..64acfc1b 100644 --- a/ccproxy/plugins/max_tokens/token_limits.json +++ b/ccproxy/plugins/max_tokens/token_limits.json @@ -1,4 +1,44 @@ { + "claude-opus-4-7": { + "max_output_tokens": 128000, + "max_input_tokens": 1000000 + }, + "opus[1m]": { + "max_output_tokens": 128000, + "max_input_tokens": 1000000 + }, + "opus-4-7[1m]": { + "max_output_tokens": 128000, + "max_input_tokens": 1000000 + }, + "claude-opus-4-7[1m]": { + "max_output_tokens": 128000, + "max_input_tokens": 1000000 + }, + "claude-opus-4-6": { + "max_output_tokens": 128000, + "max_input_tokens": 1000000 + }, + "opus-4-6[1m]": { + "max_output_tokens": 128000, + "max_input_tokens": 1000000 + }, + "claude-opus-4-6[1m]": { + "max_output_tokens": 128000, + "max_input_tokens": 1000000 + }, + "claude-sonnet-4-6": { + "max_output_tokens": 64000, + "max_input_tokens": 1000000 + }, + "sonnet[1m]": { + "max_output_tokens": 64000, + "max_input_tokens": 1000000 + }, + "claude-sonnet-4-6[1m]": { + "max_output_tokens": 64000, + "max_input_tokens": 1000000 + }, "claude-opus-4-1-20250805": { "max_output_tokens": 32000, "max_input_tokens": 200000 diff --git a/tests/plugins/claude_api/integration/test_claude_api_basic.py b/tests/plugins/claude_api/integration/test_claude_api_basic.py index 6fc65c40..6c721a00 100644 --- a/tests/plugins/claude_api/integration/test_claude_api_basic.py +++ b/tests/plugins/claude_api/integration/test_claude_api_basic.py @@ -38,6 +38,8 @@ async def test_models_endpoint_available_when_enabled( # Verify Claude models are present model_ids = {model["id"] for model in models} assert "claude-3-5-sonnet-20241022" in model_ids + assert "sonnet[1m]" in model_ids + assert "opus[1m]" in model_ids @pytest.mark.asyncio diff --git a/tests/plugins/codex/unit/test_adapter.py b/tests/plugins/codex/unit/test_adapter.py index dededd10..45fbf528 100644 --- a/tests/plugins/codex/unit/test_adapter.py +++ b/tests/plugins/codex/unit/test_adapter.py @@ -616,6 +616,82 @@ def test_sanitize_provider_body_strips_metadata( assert cleaned["stream"] is True assert cleaned["store"] is False + def test_sanitize_provider_body_normalizes_string_input( + self, adapter: CodexAdapter + ) -> None: + """Responses API string input should be normalized for Codex backend.""" + body = {"model": "gpt-5.5", "input": "Reply exactly OK"} + + cleaned = adapter._sanitize_provider_body(body) + + assert cleaned["input"] == [ + { + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": "Reply exactly OK"}], + } + ] + assert cleaned["stream"] is True + assert cleaned["store"] is False + + def test_apply_model_alias_reasoning_effort_for_chat_alias( + self, adapter: CodexAdapter + ) -> None: + """GPT-5.5 effort aliases should set effort while using the base model.""" + ctx = Mock() + ctx.metadata = { + "_last_client_model": "gpt-5.5-xhigh", + "_last_provider_model": "gpt-5.5", + } + body = json.dumps( + { + "model": "gpt-5.5", + "messages": [{"role": "user", "content": "Hello"}], + } + ).encode() + + result = adapter._apply_model_alias_reasoning_effort(ctx, body) + result_data = json.loads(result.decode()) + + assert result_data["model"] == "gpt-5.5" + assert result_data["reasoning_effort"] == "xhigh" + + def test_apply_model_alias_reasoning_effort_preserves_explicit_effort( + self, adapter: CodexAdapter + ) -> None: + """Explicit request effort should win over model-alias defaults.""" + ctx = Mock() + ctx.metadata = { + "_last_client_model": "gpt-5.5-max", + "_last_provider_model": "gpt-5.5", + } + body = json.dumps( + { + "model": "gpt-5.5", + "messages": [{"role": "user", "content": "Hello"}], + "reasoning_effort": "high", + } + ).encode() + + result = adapter._apply_model_alias_reasoning_effort(ctx, body) + result_data = json.loads(result.decode()) + + assert result_data["reasoning_effort"] == "high" + + def test_sanitize_provider_body_clamps_max_reasoning_effort( + self, adapter: CodexAdapter + ) -> None: + """Codex backend currently accepts xhigh but rejects max.""" + body = { + "model": "gpt-5.5", + "input": [{"type": "message", "role": "user", "content": []}], + "reasoning": {"effort": "max", "summary": "auto"}, + } + + cleaned = adapter._sanitize_provider_body(body) + + assert cleaned["reasoning"] == {"effort": "xhigh", "summary": "auto"} + def test_get_instructions_default(self, adapter: CodexAdapter) -> None: """Test default instructions when no detection service data.""" instructions = adapter._get_instructions() diff --git a/tests/plugins/max_tokens/test_service.py b/tests/plugins/max_tokens/test_service.py index 7027b96c..8abfc1e2 100644 --- a/tests/plugins/max_tokens/test_service.py +++ b/tests/plugins/max_tokens/test_service.py @@ -49,6 +49,25 @@ def test_get_max_output_tokens_variant_models( # We just verify that models in the cache can be retrieved assert len(service.token_limits_data.models) > 0 + def test_get_token_limits_for_1m_claude_aliases( + self, service: TokenLimitsService + ) -> None: + """Claude 1M aliases should expose their local token limits.""" + assert service.get_max_output_tokens("sonnet[1m]") == 64000 + assert service.token_limits_data.get_max_input_tokens("sonnet[1m]") == 1000000 + assert service.get_max_output_tokens("claude-sonnet-4-6[1m]") == 64000 + assert ( + service.token_limits_data.get_max_input_tokens("claude-sonnet-4-6[1m]") + == 1000000 + ) + assert service.get_max_output_tokens("opus[1m]") == 128000 + assert service.token_limits_data.get_max_input_tokens("opus[1m]") == 1000000 + assert service.get_max_output_tokens("claude-opus-4-7[1m]") == 128000 + assert ( + service.token_limits_data.get_max_input_tokens("claude-opus-4-7[1m]") + == 1000000 + ) + def test_get_max_output_tokens_unknown_model( self, service: TokenLimitsService ) -> None: diff --git a/tests/unit/auth/test_auth.py b/tests/unit/auth/test_auth.py index 6bc86f49..6e59e26b 100644 --- a/tests/unit/auth/test_auth.py +++ b/tests/unit/auth/test_auth.py @@ -17,6 +17,7 @@ # from ccproxy.auth.credentials_adapter import CredentialsAuthManager from ccproxy.auth.dependencies import ( + _build_bearer_auth_manager, get_access_token, require_auth, ) @@ -148,6 +149,31 @@ async def test_get_access_token_dependency(self) -> None: assert token == "sk-test-token-123" mock_manager.get_access_token.assert_called_once() + async def test_x_api_key_header_authenticates_when_token_matches(self) -> None: + """Test x-api-key header support for Anthropic-compatible clients.""" + auth_manager = await _build_bearer_auth_manager( + None, + "sk-test-token-123", + require_credentials=True, + api_key="sk-test-token-123", + ) + + assert isinstance(auth_manager, BearerTokenAuthManager) + assert await auth_manager.get_access_token() == "sk-test-token-123" + + async def test_x_api_key_header_rejects_invalid_token(self) -> None: + """Test x-api-key still enforces the configured shared secret.""" + with pytest.raises(HTTPException) as exc_info: + await _build_bearer_auth_manager( + None, + "sk-test-token-123", + require_credentials=True, + api_key="wrong-token", + ) + + assert exc_info.value.status_code == status.HTTP_401_UNAUTHORIZED + assert "Invalid authentication credentials" in str(exc_info.value.detail) + @pytest.mark.auth class TestAPIEndpointsWithAuth: diff --git a/tests/unit/llms/formatters/test_openai_to_anthropic_helpers.py b/tests/unit/llms/formatters/test_openai_to_anthropic_helpers.py index 767c07ae..15804cfe 100644 --- a/tests/unit/llms/formatters/test_openai_to_anthropic_helpers.py +++ b/tests/unit/llms/formatters/test_openai_to_anthropic_helpers.py @@ -88,3 +88,40 @@ async def test_openai_responses_request_to_anthropic_messages_basic() -> None: assert anth_req.max_tokens == 64 assert anth_req.system == "sys" assert anth_req.messages and anth_req.messages[0].role == "user" + + +@pytest.mark.asyncio +async def test_openai_chat_to_anthropic_supports_max_thinking_effort() -> None: + req = openai_models.ChatCompletionRequest( + model="claude-opus-4-7", + messages=[{"role": "user", "content": "Think carefully"}], # type: ignore[list-item] + max_completion_tokens=128, + reasoning_effort="max", + ) + + anth_req = await convert__openai_chat_to_anthropic_message__request(req) + + assert anth_req.model == "claude-opus-4-7" + assert anth_req.thinking is not None + assert anth_req.thinking.type == "enabled" + assert anth_req.thinking.budget_tokens == 32000 + assert anth_req.max_tokens == 32064 + assert anth_req.temperature == 1.0 + + +def test_openai_responses_to_anthropic_maps_reasoning_effort() -> None: + resp_req = openai_models.ResponseRequest( + model="claude-opus-4-7", + input="Think carefully", + max_output_tokens=128, + reasoning={"effort": "xhigh"}, + ) + + anth_req = convert__openai_responses_to_anthropic_message__request(resp_req) + + assert anth_req.model == "claude-opus-4-7" + assert anth_req.thinking is not None + assert anth_req.thinking.type == "enabled" + assert anth_req.thinking.budget_tokens == 20000 + assert anth_req.max_tokens == 20064 + assert anth_req.temperature == 1.0 diff --git a/tests/unit/llms/formatters/test_openai_to_openai_reasoning.py b/tests/unit/llms/formatters/test_openai_to_openai_reasoning.py index e5c6d660..a8a8f87e 100644 --- a/tests/unit/llms/formatters/test_openai_to_openai_reasoning.py +++ b/tests/unit/llms/formatters/test_openai_to_openai_reasoning.py @@ -189,6 +189,36 @@ async def test_chat_request_to_responses_maps_reasoning_effort() -> None: assert response_request.reasoning == {"effort": "high", "summary": "auto"} +@pytest.mark.asyncio +async def test_chat_request_to_responses_allows_xhigh_reasoning_effort() -> None: + chat_request = openai_models.ChatCompletionRequest( + model="gpt-test", + messages=[openai_models.ChatMessage(role="user", content="Hello")], + reasoning_effort="xhigh", + ) + + response_request = await convert__openai_chat_to_openai_responses__request( + chat_request + ) + + assert response_request.reasoning == {"effort": "xhigh", "summary": "auto"} + + +@pytest.mark.asyncio +async def test_chat_request_to_responses_allows_max_reasoning_effort() -> None: + chat_request = openai_models.ChatCompletionRequest( + model="gpt-test", + messages=[openai_models.ChatMessage(role="user", content="Hello")], + reasoning_effort="max", + ) + + response_request = await convert__openai_chat_to_openai_responses__request( + chat_request + ) + + assert response_request.reasoning == {"effort": "max", "summary": "auto"} + + @pytest.mark.asyncio async def test_chat_request_to_responses_defaults_reasoning(monkeypatch: Any) -> None: monkeypatch.delenv("LLM__OPENAI_THINKING_XML", raising=False) diff --git a/tests/unit/plugins/test_codex_detection.py b/tests/unit/plugins/test_codex_detection.py index 36f1b5cd..bf968bd1 100644 --- a/tests/unit/plugins/test_codex_detection.py +++ b/tests/unit/plugins/test_codex_detection.py @@ -55,6 +55,17 @@ def test_codex_detection_ignores_content_encoding_header() -> None: assert "content-encoding" in CodexDetectionService.ignores_header +def test_codex_fallback_data_uses_detected_cli_version() -> None: + settings = MagicMock(spec=Settings) + cli_service = MagicMock() + service = CodexDetectionService(settings=settings, cli_service=cli_service) + + fallback = service._get_fallback_data(version="0.129.0") + + assert fallback.codex_version == "0.129.0" + assert fallback.headers.get("version") == "0.129.0" + + def test_codex_detection_merges_partial_prompt_cache_with_fallback() -> None: settings = MagicMock(spec=Settings) cli_service = MagicMock() diff --git a/tests/unit/utils/test_model_mapper.py b/tests/unit/utils/test_model_mapper.py index 9477e4a2..ba4d00de 100644 --- a/tests/unit/utils/test_model_mapper.py +++ b/tests/unit/utils/test_model_mapper.py @@ -1,7 +1,10 @@ from __future__ import annotations from ccproxy.models.provider import ModelMappingRule -from ccproxy.plugins.claude_shared.model_defaults import DEFAULT_CLAUDE_MODEL_MAPPINGS +from ccproxy.plugins.claude_shared.model_defaults import ( + DEFAULT_CLAUDE_MODEL_CARDS, + DEFAULT_CLAUDE_MODEL_MAPPINGS, +) from ccproxy.plugins.codex.model_defaults import DEFAULT_CODEX_MODEL_MAPPINGS from ccproxy.utils.model_mapper import ( ModelMapper, @@ -60,13 +63,39 @@ def test_default_claude_mapping_prefers_latest_sonnet_and_opus() -> None: assert mapper.map("gpt-4o").mapped == "claude-sonnet-4-6" assert mapper.map("gpt-5").mapped == "claude-sonnet-4-6" - assert mapper.map("o1-preview").mapped == "claude-opus-4-6" - assert mapper.map("o3-mini").mapped == "claude-opus-4-6" + assert mapper.map("o1-preview").mapped == "claude-opus-4-7" + assert mapper.map("o3-mini").mapped == "claude-opus-4-7" + assert mapper.map("opus-4-7").mapped == "claude-opus-4-7" + assert mapper.map("sonnet[1m]").mapped == "claude-sonnet-4-6" + assert mapper.map("claude-sonnet-4-6[1m]").mapped == "claude-sonnet-4-6" + assert mapper.map("opus[1m]").mapped == "claude-opus-4-7" + assert mapper.map("opus-4-7[1m]").mapped == "claude-opus-4-7" + assert mapper.map("claude-opus-4-7[1m]").mapped == "claude-opus-4-7" + assert mapper.map("opus-4-6[1m]").mapped == "claude-opus-4-6" + assert mapper.map("claude-opus-4-6[1m]").mapped == "claude-opus-4-6" assert mapper.map("sonnet").mapped == "claude-sonnet-4-6" - assert mapper.map("opus").mapped == "claude-opus-4-6" + assert mapper.map("opus").mapped == "claude-opus-4-7" + + +def test_default_claude_model_cards_expose_1m_aliases() -> None: + model_ids = {card.id for card in DEFAULT_CLAUDE_MODEL_CARDS} + + assert "sonnet[1m]" in model_ids + assert "opus[1m]" in model_ids + assert "claude-sonnet-4-6[1m]" in model_ids + assert "claude-opus-4-7[1m]" in model_ids + assert "claude-opus-4-6[1m]" in model_ids def test_default_codex_mapping_keeps_latest_codex_model() -> None: mapper = ModelMapper(DEFAULT_CODEX_MODEL_MAPPINGS) + assert mapper.map("gpt-5.5").mapped == "gpt-5.5" + assert mapper.map("gpt-5.5-high").mapped == "gpt-5.5" + assert mapper.map("gpt-5.5-xhigh").mapped == "gpt-5.5" + assert mapper.map("gpt-5.5-max").mapped == "gpt-5.5" + assert mapper.map("gpt-5.5-custom").mapped == "gpt-5.5" + assert mapper.map("gpt-5.4").mapped == "gpt-5.4" + assert mapper.map("gpt-5.3-codex").mapped == "gpt-5.3-codex" + assert mapper.map("gpt-5.2-codex").mapped == "gpt-5.2-codex" assert mapper.map("gpt-5-codex").mapped == "gpt-5.3-codex"