Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions ccproxy/auth/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,13 @@ async def _build_bearer_auth_manager(
expected_token: str | None,
*,
require_credentials: bool,
api_key: str | None = None,
) -> AuthManager | None:
"""Create a bearer auth manager when credentials satisfy expectations."""

token = credentials.credentials if credentials and credentials.credentials else None
if token is None and api_key:
token = api_key

if token is None:
if require_credentials:
Expand Down Expand Up @@ -118,6 +121,7 @@ async def _build_bearer_auth_manager(


async def get_auth_manager(
request: Request,
credentials: Annotated[HTTPAuthorizationCredentials | None, Depends(bearer_scheme)],
settings: SettingsDep,
) -> AuthManager:
Expand All @@ -127,6 +131,7 @@ async def get_auth_manager(
credentials,
_expected_token(settings),
require_credentials=True,
api_key=request.headers.get("x-api-key"),
)
# require_credentials ensures auth_manager is never None here.
assert auth_manager is not None
Expand Down Expand Up @@ -183,6 +188,7 @@ async def get_conditional_auth_manager(
credentials,
expected_token,
require_credentials=True,
api_key=request.headers.get("x-api-key"),
)


Expand Down
38 changes: 30 additions & 8 deletions ccproxy/llms/formatters/openai_to_anthropic/requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ async def convert__openai_chat_to_anthropic_message__request(
def convert__openai_responses_to_anthropic_message__request(
request: openai_models.ResponseRequest,
) -> anthropic_models.CreateMessageRequest:
model = request.model
model = request.model or ""
stream = bool(request.stream)
max_out = request.max_output_tokens

Expand Down Expand Up @@ -618,8 +618,7 @@ def convert__openai_responses_to_anthropic_message__request(
else request.instructions
)

# Skip thinking config for ResponseRequest as it doesn't have the required fields
thinking_cfg = None
thinking_cfg = derive_thinking_config(model, request)
if thinking_cfg is not None:
payload_data["thinking"] = thinking_cfg
budget = thinking_cfg.get("budget_tokens", 0)
Expand All @@ -631,21 +630,28 @@ def convert__openai_responses_to_anthropic_message__request(


def derive_thinking_config(
model: str, request: openai_models.ChatCompletionRequest
model: str,
request: openai_models.ChatCompletionRequest | openai_models.ResponseRequest,
) -> dict[str, Any] | None:
"""Derive Anthropic thinking config from OpenAI fields and model name.

Rules:
- If model matches o1/o3 families, enable thinking by default with model-specific budget
- Map reasoning_effort: low=1000, medium=5000, high=10000
- Map effort: minimal/low=1024, medium=5000, high=10000, xhigh=20000, max=32000
- o3*: 10000; o1-mini: 3000; other o1*: 5000
- If thinking is enabled, return {"type":"enabled","budget_tokens":N}
- Otherwise return None
"""
# Explicit reasoning_effort mapping
effort = getattr(request, "reasoning_effort", None)
effort = effort.strip().lower() if isinstance(effort, str) else ""
effort_budgets = {"low": 1000, "medium": 5000, "high": 10000}
effort = _extract_reasoning_effort(request)
effort_budgets = {
"minimal": 1024,
"low": 1024,
"medium": 5000,
"high": 10000,
"xhigh": 20000,
"max": 32000,
}

budget: int | None = None
if effort in effort_budgets:
Expand All @@ -667,6 +673,22 @@ def derive_thinking_config(
return {"type": "enabled", "budget_tokens": budget}


def _extract_reasoning_effort(
request: openai_models.ChatCompletionRequest | openai_models.ResponseRequest,
) -> str:
effort = getattr(request, "reasoning_effort", None)
if isinstance(effort, str) and effort.strip():
return effort.strip().lower()

reasoning = getattr(request, "reasoning", None)
if isinstance(reasoning, dict):
effort = reasoning.get("effort")
elif reasoning is not None:
effort = getattr(reasoning, "effort", None)

return effort.strip().lower() if isinstance(effort, str) else ""


__all__ = [
"convert__openai_chat_to_anthropic_message__request",
"convert__openai_responses_to_anthropic_message__request",
Expand Down
8 changes: 4 additions & 4 deletions ccproxy/llms/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,9 @@ class ChatCompletionRequest(LlmBaseModel):
n: int | None = Field(default=1)
parallel_tool_calls: bool | None = Field(default=None)
presence_penalty: float | None = Field(default=None, ge=-2.0, le=2.0)
reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = Field(
default=None
)
reasoning_effort: (
Literal["minimal", "low", "medium", "high", "xhigh", "max"] | None
) = Field(default=None)
response_format: ResponseFormat | None = Field(default=None)
seed: int | None = Field(default=None)
stop: str | list[str] | None = Field(default=None)
Expand All @@ -262,7 +262,7 @@ class ChatCompletionRequest(LlmBaseModel):


class ResponseMessageReasoning(LlmBaseModel):
effort: Literal["minimal", "low", "medium", "high"] | None = None
effort: Literal["minimal", "low", "medium", "high", "xhigh", "max"] | None = None
summary: Literal["auto", "detailed", "concise"] | None = None


Expand Down
79 changes: 76 additions & 3 deletions ccproxy/plugins/claude_shared/model_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,22 @@
root="claude-sonnet-4-6",
parent=None,
),
ModelCard(
id="sonnet[1m]",
created=1722816000,
owned_by="anthropic",
permission=[],
root="claude-sonnet-4-6",
parent=None,
),
ModelCard(
id="claude-sonnet-4-6[1m]",
created=1722816000,
owned_by="anthropic",
permission=[],
root="claude-sonnet-4-6",
parent=None,
),
ModelCard(
id="claude-haiku-4-5-20251001",
created=1722816000,
Expand All @@ -22,6 +38,38 @@
root="claude-haiku-4-5-20251001",
parent=None,
),
ModelCard(
id="claude-opus-4-7",
created=1722816000,
owned_by="anthropic",
permission=[],
root="claude-opus-4-7",
parent=None,
),
ModelCard(
id="opus[1m]",
created=1722816000,
owned_by="anthropic",
permission=[],
root="claude-opus-4-7",
parent=None,
),
ModelCard(
id="opus-4-7[1m]",
created=1722816000,
owned_by="anthropic",
permission=[],
root="claude-opus-4-7",
parent=None,
),
ModelCard(
id="claude-opus-4-7[1m]",
created=1722816000,
owned_by="anthropic",
permission=[],
root="claude-opus-4-7",
parent=None,
),
ModelCard(
id="claude-opus-4-6",
created=1722816000,
Expand All @@ -30,6 +78,22 @@
root="claude-opus-4-6",
parent=None,
),
ModelCard(
id="opus-4-6[1m]",
created=1722816000,
owned_by="anthropic",
permission=[],
root="claude-opus-4-6",
parent=None,
),
ModelCard(
id="claude-opus-4-6[1m]",
created=1722816000,
owned_by="anthropic",
permission=[],
root="claude-opus-4-6",
parent=None,
),
ModelCard(
id="claude-opus-4-20250514",
created=1716336000,
Expand Down Expand Up @@ -125,21 +189,30 @@
),
ModelMappingRule(
match="o1",
target="claude-opus-4-6",
target="claude-opus-4-7",
kind="prefix",
),
ModelMappingRule(
match="o3-mini",
target="claude-opus-4-6",
target="claude-opus-4-7",
kind="exact",
),
ModelMappingRule(
match="gpt-5",
target="claude-sonnet-4-6",
kind="prefix",
),
ModelMappingRule(match="sonnet[1m]", target="claude-sonnet-4-6"),
ModelMappingRule(match="claude-sonnet-4-6[1m]", target="claude-sonnet-4-6"),
ModelMappingRule(match="opus[1m]", target="claude-opus-4-7"),
ModelMappingRule(match="opus-4-7[1m]", target="claude-opus-4-7"),
ModelMappingRule(match="claude-opus-4-7[1m]", target="claude-opus-4-7"),
ModelMappingRule(match="opus-4-6[1m]", target="claude-opus-4-6"),
ModelMappingRule(match="claude-opus-4-6[1m]", target="claude-opus-4-6"),
ModelMappingRule(match="opus-4-7", target="claude-opus-4-7"),
ModelMappingRule(match="claude-opus-4-7", target="claude-opus-4-7"),
ModelMappingRule(match="sonnet", target="claude-sonnet-4-6"),
ModelMappingRule(match="opus", target="claude-opus-4-6"),
ModelMappingRule(match="opus", target="claude-opus-4-7"),
ModelMappingRule(match="haiku", target="claude-haiku-4-5-20251001"),
ModelMappingRule(
match="claude-3-5-sonnet-latest",
Expand Down
75 changes: 70 additions & 5 deletions ccproxy/plugins/codex/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@
logger = get_plugin_logger()


_CODEX_MODEL_REASONING_ALIASES = {
"gpt-5.5-high": "high",
"gpt-5.5-xhigh": "xhigh",
"gpt-5.5-max": "max",
}


class CodexAdapter(BaseHTTPAdapter):
"""Simplified Codex adapter."""

Expand Down Expand Up @@ -65,6 +72,7 @@ async def handle_request(
endpoint = ctx.metadata.get("endpoint", "")
body = await request.body()
body = await self._map_request_model(ctx, body)
body = self._apply_model_alias_reasoning_effort(ctx, body)
headers = extract_request_headers(request)

# Determine client streaming intent from body flag (fallback to False)
Expand Down Expand Up @@ -294,6 +302,36 @@ async def prepare_provider_request(

return json.dumps(body_data).encode(), filtered_headers

def _apply_model_alias_reasoning_effort(self, ctx: Any, body: bytes) -> bytes:
"""Apply reasoning effort implied by client-facing Codex model aliases."""

metadata = getattr(ctx, "metadata", None)
client_model = None
if isinstance(metadata, dict):
client_model = metadata.get("_last_client_model")
if not isinstance(client_model, str):
return body

effort = _CODEX_MODEL_REASONING_ALIASES.get(client_model)
if effort is None:
return body

try:
body_data = json.loads(body.decode()) if body else {}
except Exception:
return body
if not isinstance(body_data, dict):
return body

if isinstance(body_data.get("reasoning"), dict):
reasoning = dict(body_data["reasoning"])
reasoning.setdefault("effort", effort)
body_data["reasoning"] = reasoning
elif not body_data.get("reasoning_effort"):
body_data["reasoning_effort"] = effort

return self._encode_json_body(body_data)

def _sanitize_provider_body(self, body_data: dict[str, Any]) -> dict[str, Any]:
"""Apply Codex-specific payload sanitization shared by all request paths."""

Expand All @@ -314,17 +352,43 @@ def _sanitize_provider_body(self, body_data: dict[str, Any]) -> dict[str, Any]:
):
body_data.pop(key, None)

list_input = body_data.get("input", [])
# Remove any input types that Codex does not support
body_data["input"] = [
input for input in list_input if input.get("type") != "item_reference"
]
input_value = body_data.get("input", [])
# Remove any input types that Codex does not support. Public Responses API
# input may be a plain string, but the Codex backend expects message items.
if isinstance(input_value, list):
body_data["input"] = [
input_item
for input_item in input_value
if not (
isinstance(input_item, dict)
and input_item.get("type") == "item_reference"
)
]
elif isinstance(input_value, str):
body_data["input"] = [
{
"type": "message",
"role": "user",
"content": [{"type": "input_text", "text": input_value}],
}
]

# Remove any prefixed metadata fields that shouldn't be sent to the API
body_data = self._remove_metadata_fields(body_data)
self._normalize_reasoning_effort(body_data)

return body_data

def _normalize_reasoning_effort(self, body_data: dict[str, Any]) -> None:
"""Clamp client-facing effort aliases to values accepted by Codex backend."""

if body_data.get("reasoning_effort") == "max":
body_data["reasoning_effort"] = "xhigh"

reasoning = body_data.get("reasoning")
if isinstance(reasoning, dict) and reasoning.get("effort") == "max":
reasoning["effort"] = "xhigh"

async def prepare_provider_headers(self, headers: dict[str, str]) -> dict[str, str]:
token_value = await self._resolve_access_token()

Expand Down Expand Up @@ -501,6 +565,7 @@ async def handle_streaming(
# Extract body and headers
body = await request.body()
body = await self._map_request_model(ctx, body)
body = self._apply_model_alias_reasoning_effort(ctx, body)
headers = extract_request_headers(request)

# Ensure format adapters are available when required
Expand Down
Loading
Loading