Skip to content

Commit da0e3c3

Browse files
committed
fixes #103: responses api max_output_tokens bug
1 parent 8754203 commit da0e3c3

9 files changed

Lines changed: 132 additions & 331 deletions

File tree

DOCKER.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,12 @@ Set options in `.env` or pass environment variables:
2525
- `CHATGPT_LOCAL_REASONING_SUMMARY`: auto|concise|detailed|none
2626
- `CHATGPT_LOCAL_REASONING_COMPAT`: legacy|o3|think-tags|current
2727
- `CHATGPT_LOCAL_FAST_MODE`: `true|false` to enable fast mode by default for supported models
28-
- `CHATGPT_LOCAL_DEBUG_MODEL`: force model override (e.g., `gpt-5.4`)
2928
- `CHATGPT_LOCAL_CLIENT_ID`: OAuth client id override (rarely needed)
3029
- `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS`: `true|false` to add reasoning model variants to `/v1/models`
3130
- `CHATGPT_LOCAL_ENABLE_WEB_SEARCH`: `true|false` to enable default web search tool
3231

3332
## Logs
34-
Set `VERBOSE=true` to include extra logging for debugging issues in upstream or chat app requests. Please include and use these logs when submitting bug reports.
33+
Set `VERBOSE=true` to include extra logging for troubleshooting upstream or chat app requests. Please include and use these logs when submitting bug reports.
3534

3635
## Test
3736

chatmock/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ def cmd_serve(
284284
default_web_search=default_web_search,
285285
)
286286

287-
app.run(host=host, debug=False, use_reloader=False, port=port, threaded=True)
287+
app.run(host=host, use_reloader=False, port=port, threaded=True)
288288
return 0
289289

290290

chatmock/responses_api.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def normalize_responses_payload(
8888

8989
normalized = dict(payload)
9090
normalized["model"] = normalized_model
91+
normalized.pop("max_output_tokens", None)
9192

9293
if "input" in normalized:
9394
normalized["input"] = canonicalize_responses_input(normalized.get("input"))

chatmock/routes_ollama.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ def ollama_chat() -> Response:
250250
input_items = convert_chat_messages_to_responses_input(messages)
251251

252252
model_reasoning = extract_reasoning_from_model_name(model)
253-
normalized_model = normalize_model_name(model)
253+
normalized_model = normalize_model_name(model, current_app.config.get("DEBUG_MODEL"))
254254
service_tier_resolution = resolve_service_tier(
255255
normalized_model,
256256
request_fast_mode=payload.get("fast_mode"),
@@ -306,7 +306,7 @@ def ollama_chat() -> Response:
306306
base_tools_only = convert_tools_chat_to_responses(normalize_ollama_tools(tools_req))
307307
safe_choice = payload.get("tool_choice", "auto")
308308
upstream2, err2 = start_upstream_request(
309-
normalize_model_name(model),
309+
normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
310310
input_items,
311311
instructions=BASE_INSTRUCTIONS,
312312
tools=base_tools_only,
@@ -570,7 +570,7 @@ def _gen():
570570
full_text = f"<think>{rtxt}</think>" + (full_text or "")
571571

572572
out_json = {
573-
"model": normalize_model_name(model),
573+
"model": normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
574574
"created_at": created_at,
575575
"message": {"role": "assistant", "content": full_text, **({"tool_calls": tool_calls} if tool_calls else {})},
576576
"done": True,

chatmock/routes_openai.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ def chat_completions() -> Response:
109109
reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
110110
reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
111111
reasoning_compat = current_app.config.get("REASONING_COMPAT", "think-tags")
112-
debug_model = current_app.config.get("DEBUG_MODEL")
113112

114113
raw = request.get_data(cache=True, as_text=True) or ""
115114
if verbose:
@@ -129,7 +128,7 @@ def chat_completions() -> Response:
129128
return jsonify(err), 400
130129

131130
requested_model = payload.get("model")
132-
model = normalize_model_name(requested_model, debug_model)
131+
model = normalize_model_name(requested_model, current_app.config.get("DEBUG_MODEL"))
133132
messages = payload.get("messages")
134133
if messages is None and isinstance(payload.get("prompt"), str):
135134
messages = [{"role": "user", "content": payload.get("prompt") or ""}]
@@ -413,7 +412,6 @@ def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
413412
def completions() -> Response:
414413
verbose = bool(current_app.config.get("VERBOSE"))
415414
verbose_obfuscation = bool(current_app.config.get("VERBOSE_OBFUSCATION"))
416-
debug_model = current_app.config.get("DEBUG_MODEL")
417415
reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
418416
reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
419417

@@ -432,7 +430,7 @@ def completions() -> Response:
432430
return jsonify(err), 400
433431

434432
requested_model = payload.get("model")
435-
model = normalize_model_name(requested_model, debug_model)
433+
model = normalize_model_name(requested_model, current_app.config.get("DEBUG_MODEL"))
436434
prompt = payload.get("prompt")
437435
if isinstance(prompt, list):
438436
prompt = "".join([p if isinstance(p, str) else "" for p in prompt])

gui.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def run_server(
1919
reasoning_summary: str = "auto",
2020
reasoning_compat: str = "think-tags",
2121
fast_mode: bool = False,
22+
debug_model: str | None = None,
2223
expose_reasoning_models: bool = False,
2324
default_web_search: bool = False,
2425
) -> None:
@@ -27,10 +28,11 @@ def run_server(
2728
reasoning_summary=reasoning_summary,
2829
reasoning_compat=reasoning_compat,
2930
fast_mode=fast_mode,
31+
debug_model=debug_model,
3032
expose_reasoning_models=expose_reasoning_models,
3133
default_web_search=default_web_search,
3234
)
33-
app.run(host=host, port=port, debug=False, use_reloader=False, threaded=True)
35+
app.run(host=host, port=port, use_reloader=False, threaded=True)
3436

3537

3638
class ServerProcess(QtCore.QObject):
@@ -45,6 +47,7 @@ def __init__(self) -> None:
4547
self._summary = "auto"
4648
self._compat = "think-tags"
4749
self._fast_mode = False
50+
self._debug_model: str | None = None
4851
self._expose_reasoning_models = False
4952
self._default_web_search = False
5053

@@ -59,6 +62,7 @@ def start(
5962
summary: str,
6063
compat: str,
6164
fast_mode: bool,
65+
debug_model: str | None,
6266
expose_reasoning_models: bool,
6367
default_web_search: bool,
6468
) -> None:
@@ -68,6 +72,7 @@ def start(
6872
self._effort, self._summary = effort, summary
6973
self._compat = compat
7074
self._fast_mode = fast_mode
75+
self._debug_model = debug_model
7176
self._expose_reasoning_models = expose_reasoning_models
7277
self._default_web_search = default_web_search
7378
self._proc = QtCore.QProcess()
@@ -80,6 +85,8 @@ def start(
8085
"--summary", summary,
8186
"--compat", compat,
8287
]
88+
if isinstance(debug_model, str) and debug_model.strip():
89+
args.extend(["--debug-model", debug_model.strip()])
8390
if fast_mode:
8491
args.append("--fast-mode")
8592
if expose_reasoning_models:
@@ -317,6 +324,12 @@ def __init__(self) -> None:
317324
self.port_edit.setValidator(QtGui.QIntValidator(1, 65535, self))
318325
self.port_edit.setMaximumWidth(100)
319326
form.addWidget(self.port_edit, 0, 3)
327+
form.addWidget(QtWidgets.QLabel("Debug Model"), 1, 0)
328+
self.debug_model_edit = QtWidgets.QLineEdit("")
329+
self.debug_model_edit.setClearButtonEnabled(True)
330+
self.debug_model_edit.setPlaceholderText("Optional override, e.g. gpt-5.4")
331+
self.debug_model_edit.setSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Fixed)
332+
form.addWidget(self.debug_model_edit, 1, 1, 1, 3)
320333
form.setColumnStretch(1, 1)
321334
srv_layout.addLayout(form)
322335

@@ -473,6 +486,7 @@ def _start_server(self) -> None:
473486
summary = self.summary.currentText().strip()
474487
compat = self.compat.currentText().strip()
475488
fast_mode = self.fast_mode.isChecked()
489+
debug_model = self.debug_model_edit.text().strip() or None
476490
expose_reasoning_models = self.expose_reasoning_models.isChecked()
477491
default_web_search = self.enable_web_search.isChecked()
478492
self.status.setText(f"Starting server at http://{host}:{port} …")
@@ -484,6 +498,7 @@ def _start_server(self) -> None:
484498
summary,
485499
compat,
486500
fast_mode,
501+
debug_model,
487502
expose_reasoning_models,
488503
default_web_search,
489504
)
@@ -536,6 +551,7 @@ def main() -> None:
536551
p.add_argument("--summary", default="auto")
537552
p.add_argument("--compat", default="think-tags")
538553
p.add_argument("--fast-mode", action="store_true")
554+
p.add_argument("--debug-model")
539555
p.add_argument("--expose-reasoning-models", action="store_true")
540556
p.add_argument("--enable-web-search", action="store_true")
541557
args, _ = p.parse_known_args()
@@ -546,6 +562,7 @@ def main() -> None:
546562
args.summary,
547563
args.compat,
548564
args.fast_mode,
565+
args.debug_model,
549566
args.expose_reasoning_models,
550567
args.enable_web_search,
551568
)

scripts/test_responses_cached_tokens.py

Lines changed: 0 additions & 176 deletions
This file was deleted.

0 commit comments

Comments
 (0)