fix(codex): preserve usage through legacy response conversion

Mateusz · Mateusz · commit 857067af010d · 2026-04-10T09:33:38.000+02:00
diff --git a/dev/scripts/demo_codex_usage_reporting_fix.py b/dev/scripts/demo_codex_usage_reporting_fix.py
@@ -31,6 +31,7 @@
 )
 from src.core.interfaces.response_processor_interface import ProcessedResponse
 from src.core.services.translation_service import TranslationService
+from src.core.transport.fastapi.response_adapters import domain_response_to_fastapi
 
 
 class _FakeTransportWithProviderUsage:
@@ -161,6 +162,26 @@ async def _run_demo() -> None:
                             "Non-streaming total_tokens is zero; expected > 0"
                         )
 
+                    legacy_fastapi_response = domain_response_to_fastapi(
+                        non_stream_result
+                    )
+                    legacy_body = (
+                        legacy_fastapi_response.body.tobytes()
+                        if isinstance(legacy_fastapi_response.body, memoryview)
+                        else legacy_fastapi_response.body
+                    )
+                    legacy_payload = json.loads(legacy_body.decode("utf-8"))
+                    legacy_usage = legacy_payload.get("usage")
+                    print("[legacy-non-stream] usage:", legacy_usage)
+                    if not isinstance(legacy_usage, dict):
+                        raise RuntimeError(
+                            "Legacy OpenAI-compatible payload is missing usage"
+                        )
+                    if int(legacy_usage.get("total_tokens", 0)) <= 0:
+                        raise RuntimeError(
+                            "Legacy OpenAI-compatible total_tokens is zero; expected > 0"
+                        )
+
                     streaming_request = ChatRequest(
                         model="openai-codex:gpt-5-codex",
                         messages=[
@@ -215,7 +236,9 @@ async def _run_demo() -> None:
                             "Streaming total_tokens is zero; expected > 0"
                         )
 
-                    print("SUCCESS: Codex usage reporting is non-zero for both flows.")
+                    print(
+                        "SUCCESS: Codex usage reporting is non-zero for connector and legacy OpenAI frontend flows."
+                    )
             finally:
                 await backend.shutdown()
 
diff --git a/src/core/app/controllers/chat_controller.py b/src/core/app/controllers/chat_controller.py
@@ -707,11 +707,7 @@ def _inject_reasoning_aliases(payload: object) -> object:
 
                             raw_usage = metadata.get(
                                 "usage",
-                                {
-                                    "prompt_tokens": 0,
-                                    "completion_tokens": 0,
-                                    "total_tokens": 0,
-                                },
+                                None,
                             )
                             usage_summary = None
                             if isinstance(raw_usage, UsageSummary):
@@ -720,15 +716,15 @@ def _inject_reasoning_aliases(payload: object) -> object:
                                 usage_summary = UsageSummary.from_dict(raw_usage)
 
                             # Create the response using Pydantic model
-                            response = ChatResponse(
+                            chat_response = ChatResponse(
                                 id=response_id,
                                 created=created_val,
                                 model=model_name,
                                 choices=[choice],
                                 usage=usage_summary,
                             )
 
-                            return _inject_reasoning_aliases(response.model_dump())
+                            return _inject_reasoning_aliases(chat_response.model_dump())
 
                     if metadata:
                         meta_role = metadata.get("role")  # type: ignore[arg-type]
@@ -781,7 +777,11 @@ def _inject_reasoning_aliases(payload: object) -> object:
                             choice = ChatCompletionChoice(
                                 index=0,
                                 message=message,
-                                finish_reason=finish_reason,  # type: ignore[arg-type]
+                                finish_reason=(
+                                    finish_reason
+                                    if isinstance(finish_reason, str)
+                                    else None
+                                ),
                             )
 
                             from src.core.domain.usage_summary import UsageSummary
@@ -793,15 +793,15 @@ def _inject_reasoning_aliases(payload: object) -> object:
                             elif isinstance(raw_usage, dict):
                                 usage_summary = UsageSummary.from_dict(raw_usage)
 
-                            response = ChatResponse(
+                            chat_response = ChatResponse(
                                 id=response_id,
                                 created=created_val,
                                 model=model_name,
                                 choices=[choice],
                                 usage=usage_summary,
                             )
 
-                            return response.model_dump()
+                            return chat_response.model_dump()
 
                     # Check if content is a JSON string of tool calls (common backend response format)
                     if isinstance(content, str):
@@ -855,26 +855,22 @@ def _inject_reasoning_aliases(payload: object) -> object:
                                 if metadata:
                                     raw_usage = metadata.get("usage")
                                 else:
-                                    raw_usage = {
-                                        "prompt_tokens": 0,
-                                        "completion_tokens": 0,
-                                        "total_tokens": 0,
-                                    }
+                                    raw_usage = None
                                 usage_summary = None
                                 if isinstance(raw_usage, UsageSummary):
                                     usage_summary = raw_usage
                                 elif isinstance(raw_usage, dict):
                                     usage_summary = UsageSummary.from_dict(raw_usage)
 
-                                response = ChatResponse(
+                                chat_response = ChatResponse(
                                     id=response_id,
                                     created=created_val,
                                     model=model_name,
                                     choices=[choice],
                                     usage=usage_summary,
                                 )
 
-                                return response.model_dump()
+                                return chat_response.model_dump()
                         except (ValueError, TypeError) as e:
                             if logger.isEnabledFor(TRACE_LEVEL):
                                 logger.log(
@@ -964,7 +960,7 @@ def _inject_reasoning_aliases(payload: object) -> object:
                         )
 
                         # Create the response using Pydantic model
-                        response = ChatResponse(
+                        chat_response = ChatResponse(
                             id=content.get("id", f"chatcmpl-{_uuid.uuid4().hex[:16]}"),
                             created=int(_time.time()),
                             model=content.get(
@@ -974,7 +970,7 @@ def _inject_reasoning_aliases(payload: object) -> object:
                             usage=UsageSummary.from_dict(openai_usage),
                         )
 
-                        return response
+                        return chat_response
 
                     import json as _json
                     import time
@@ -1021,7 +1017,7 @@ def _inject_reasoning_aliases(payload: object) -> object:
                     from src.core.domain.usage_summary import UsageSummary
 
                     # Create the response using Pydantic model
-                    response = ChatResponse(
+                    chat_response = ChatResponse(
                         id=f"chatcmpl-{uuid.uuid4().hex[:16]}",
                         created=int(time.time()),
                         model=getattr(domain_request, "model", "gpt-4"),
@@ -1035,7 +1031,7 @@ def _inject_reasoning_aliases(payload: object) -> object:
                         ),
                     )
 
-                    return response
+                    return chat_response
                 except Exception as e:
                     if logger.isEnabledFor(logging.WARNING):
                         logger.warning(
diff --git a/src/core/domain/usage_summary.py b/src/core/domain/usage_summary.py
@@ -48,9 +48,22 @@ def from_dict(cls, data: dict[str, Any]) -> UsageSummary:
         Returns:
             UsageSummary instance
         """
-        prompt_tokens = data.get("prompt_tokens")
-        completion_tokens = data.get("completion_tokens")
-        total_tokens = data.get("total_tokens")
+        prompt_tokens = data.get("prompt_tokens")
+        if not isinstance(prompt_tokens, int):
+            prompt_tokens = data.get("input_tokens")
+
+        completion_tokens = data.get("completion_tokens")
+        if not isinstance(completion_tokens, int):
+            completion_tokens = data.get("output_tokens")
+
+        total_tokens = data.get("total_tokens")
+        if not isinstance(total_tokens, int):
+            resolved_prompt = prompt_tokens if isinstance(prompt_tokens, int) else 0
+            resolved_completion = (
+                completion_tokens if isinstance(completion_tokens, int) else 0
+            )
+            computed_total = resolved_prompt + resolved_completion
+            total_tokens = computed_total if computed_total > 0 else None
 
         # Extract extensions
         # If "extensions" key exists, use it directly; otherwise extract all non-standard fields
diff --git a/tests/unit/core/app/controllers/test_chat_controller_content.py b/tests/unit/core/app/controllers/test_chat_controller_content.py
@@ -4,8 +4,12 @@
 
 import json
 from typing import Any
+from unittest.mock import AsyncMock, Mock
 
+import pytest
 from src.core.app.controllers.chat_controller import ChatController
+from src.core.domain.responses import ResponseEnvelope
+from src.core.domain.usage_summary import UsageSummary
 
 
 class TestCoerceMessageContentToText:
@@ -145,3 +149,67 @@ def test_coerce_message_content_to_text_prevents_stack_overflow(self) -> None:
         assert len(result) > 0
         # The result should contain some indication of the circular reference
         assert "Circular reference detected" in result
+
+
+class TestEnsureOpenAIChatSchemaUsage:
+    @pytest.mark.asyncio
+    async def test_tool_calls_schema_preserves_metadata_usage(self) -> None:
+        processor = AsyncMock()
+        processor.process_request = AsyncMock(
+            return_value=ResponseEnvelope(
+                content='[{"type": "function", "id": "call_1", "function": {"name": "do_work", "arguments": "{}"}}]',
+                metadata={
+                    "tool_calls": [
+                        {
+                            "id": "call_1",
+                            "type": "function",
+                            "function": {"name": "do_work", "arguments": "{}"},
+                        }
+                    ],
+                    "usage": {
+                        "input_tokens": 19,
+                        "output_tokens": 7,
+                        "total_tokens": 26,
+                    },
+                },
+            )
+        )
+
+        controller = ChatController(
+            request_processor=processor,
+            translation_service=None,
+            wire_capture=None,
+            metrics_initializer=None,
+        )
+
+        request = Mock()
+        request.body = AsyncMock(return_value=b"{}")
+        request.headers = {}
+        request.cookies = {}
+        request.url = Mock()
+        request.url.path = "/v1/chat/completions"
+        request.state = Mock()
+        request.app = Mock()
+        request.app.state = Mock()
+        request.app.state.service_provider = None
+
+        from src.core.domain.chat import ChatMessage, ChatRequest
+
+        request_data = ChatRequest(
+            model="openai-codex:gpt-5-codex",
+            messages=[ChatMessage(role="user", content="hello")],
+            stream=False,
+        )
+
+        response = await controller.handle_chat_completion(request, request_data)
+        body = (
+            response.body.tobytes()
+            if isinstance(response.body, memoryview)
+            else response.body
+        )
+        payload = json.loads(body.decode("utf-8"))
+
+        usage = UsageSummary.from_dict(payload["usage"])
+        assert usage.prompt_tokens == 19
+        assert usage.completion_tokens == 7
+        assert usage.total_tokens == 26
diff --git a/tests/unit/core/domain/test_usage_summary.py b/tests/unit/core/domain/test_usage_summary.py
@@ -103,14 +103,41 @@ def test_usage_summary_from_dict(self) -> None:
         assert summary.total_tokens == 150
         assert summary.extensions == {"cost": 0.002}
 
-    def test_usage_summary_from_dict_with_none(self) -> None:
-        """Test creating UsageSummary from dictionary with None values."""
-        data = {
-            "prompt_tokens": None,
-            "completion_tokens": None,
-            "total_tokens": None,
-            "extensions": {},
-        }
+    def test_usage_summary_from_dict_supports_responses_api_fields(self) -> None:
+        """Responses API usage fields should populate canonical token counts."""
+        data: dict[str, int] = {
+            "input_tokens": 17,
+            "output_tokens": 9,
+            "total_tokens": 26,
+        }
+
+        summary = UsageSummary.from_dict(data)
+
+        assert summary.prompt_tokens == 17
+        assert summary.completion_tokens == 9
+        assert summary.total_tokens == 26
+        assert summary.extensions == {"input_tokens": 17, "output_tokens": 9}
+
+    def test_usage_summary_from_dict_computes_total_for_responses_api_fields(
+        self,
+    ) -> None:
+        """Responses API usage should compute total_tokens when omitted."""
+        data: dict[str, int] = {"input_tokens": 11, "output_tokens": 5}
+
+        summary = UsageSummary.from_dict(data)
+
+        assert summary.prompt_tokens == 11
+        assert summary.completion_tokens == 5
+        assert summary.total_tokens == 16
+
+    def test_usage_summary_from_dict_with_none(self) -> None:
+        """Test creating UsageSummary from dictionary with None values."""
+        data: dict[str, object] = {
+            "prompt_tokens": None,
+            "completion_tokens": None,
+            "total_tokens": None,
+            "extensions": {},
+        }
         summary = UsageSummary.from_dict(data)
         assert summary.prompt_tokens is None
         assert summary.completion_tokens is None