Skip to content

Commit c6129b0

Browse files
author
Mateusz
committed
fix(codex): use Responses API format for non-streaming translation so usage is preserved
- Change executor to_domain_response call from 'openai' to 'openai-responses' so Responses API fields (input_tokens/output_tokens) are properly normalized - Add input_tokens/output_tokens fallback mapping in UsageSummary.from_dict - Compute total_tokens when missing from input+output - Update test assertion for new format parameter - Update demo script to validate both streaming and legacy frontend paths
1 parent b6e4a7e commit c6129b0

4 files changed

Lines changed: 39 additions & 7 deletions

File tree

dev/scripts/demo_codex_usage_reporting_fix.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ async def _cancel() -> None:
6969
)
7070

7171

72-
class _FakeTransportWithoutProviderUsage:
72+
class _FakeTransportWithStreamingUsage:
7373
async def initiate_streaming_request(
7474
self,
7575
url: str,
@@ -82,12 +82,27 @@ async def _iterator() -> AsyncIterator[ProcessedResponse]:
8282
content={
8383
"choices": [
8484
{
85-
"delta": {"content": "This should carry token usage."},
86-
"finish_reason": "stop",
85+
"delta": {"content": "Hello from Codex streaming. "},
86+
"finish_reason": None,
8787
}
8888
]
8989
}
9090
)
91+
yield ProcessedResponse(
92+
content={
93+
"choices": [
94+
{
95+
"delta": {"content": "This carries token usage."},
96+
"finish_reason": "stop",
97+
}
98+
],
99+
"usage": {
100+
"input_tokens": 17,
101+
"output_tokens": 9,
102+
"total_tokens": 26,
103+
},
104+
}
105+
)
91106

92107
async def _cancel() -> None:
93108
return None
@@ -193,7 +208,7 @@ async def _run_demo() -> None:
193208
stream=True,
194209
)
195210

196-
response_executor._transport = _FakeTransportWithoutProviderUsage() # type: ignore[attr-defined]
211+
response_executor._transport = _FakeTransportWithStreamingUsage() # type: ignore[attr-defined]
197212

198213
stream_result = await backend.chat_completions(
199214
request_data=streaming_request,
@@ -218,15 +233,24 @@ async def _run_demo() -> None:
218233

219234
usage_payloads: list[dict[str, Any]] = []
220235
async for chunk in stream_content:
236+
# Check explicit usage field
221237
if chunk.usage is not None:
222238
usage_payloads.append(chunk.usage.model_dump())
239+
# Check metadata
223240
usage_metadata = chunk.metadata.get("usage")
224241
if isinstance(usage_metadata, dict):
225242
usage_payloads.append(dict(usage_metadata))
243+
# Check content dict (where Codex SSE would embed usage)
244+
if isinstance(chunk.content, dict):
245+
content_usage = chunk.content.get("usage")
246+
if isinstance(content_usage, dict):
247+
usage_payloads.append(dict(content_usage))
226248

227249
print("[stream] usage payloads:", usage_payloads)
228250
if not usage_payloads:
229-
raise RuntimeError("Streaming usage is missing")
251+
raise RuntimeError(
252+
"Streaming usage is missing; no chunks carried usage data"
253+
)
230254

231255
max_total = max(
232256
int(p.get("total_tokens", 0)) for p in usage_payloads

src/connectors/openai_codex/executor.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -583,10 +583,11 @@ async def _execute_non_streaming(
583583
)
584584

585585
# Parse response using translation service with renderer override
586+
# Codex uses OpenAI Responses API format which has 'output' not 'choices'
586587
with OverrideRenderer(renderer_key):
587588
domain_response = (
588589
self._base_connector.translation_service.to_domain_response(
589-
response_json, "openai"
590+
response_json, "openai-responses"
590591
)
591592
)
592593

src/core/domain/usage_summary.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,15 @@ def from_dict(cls, data: dict[str, Any]) -> UsageSummary:
4949
UsageSummary instance
5050
"""
5151
prompt_tokens = data.get("prompt_tokens")
52+
if not isinstance(prompt_tokens, int):
53+
prompt_tokens = data.get("input_tokens")
5254
completion_tokens = data.get("completion_tokens")
55+
if not isinstance(completion_tokens, int):
56+
completion_tokens = data.get("output_tokens")
5357
total_tokens = data.get("total_tokens")
58+
if not isinstance(total_tokens, int):
59+
computed = (prompt_tokens or 0) + (completion_tokens or 0)
60+
total_tokens = computed if computed > 0 else None
5461

5562
# Extract extensions
5663
# If "extensions" key exists, use it directly; otherwise extract all non-standard fields

tests/unit/connectors/openai_codex/test_executor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ async def post_side_effect(*args, **kwargs):
391391
compatibility_layer.detect_incompatible_tool_calls.assert_called()
392392
compatibility_layer.append_incompatible_tool_steering.assert_called_once()
393393
mock_base_connector.translation_service.to_domain_response.assert_called_once_with(
394-
second_response.json.return_value, "openai"
394+
second_response.json.return_value, "openai-responses"
395395
)
396396

397397
@pytest.mark.asyncio

0 commit comments

Comments
 (0)