Skip to content

Commit c9914bc

Browse files
authored
Merge pull request #234 from igerber/ai-local-response
Migrate AI local review to OpenAI Responses API
2 parents ea66c57 + 5af76f1 commit c9914bc

3 files changed

Lines changed: 395 additions & 23 deletions

File tree

.claude/commands/ai-review-local.md

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
---
22
description: Run AI code review locally using OpenAI API before opening a PR
3-
argument-hint: "[--context minimal|standard|deep] [--include-files <files>] [--token-budget <n>] [--force-fresh] [--full-registry] [--model <model>] [--dry-run]"
3+
argument-hint: "[--context minimal|standard|deep] [--include-files <files>] [--token-budget <n>] [--force-fresh] [--full-registry] [--model <model>] [--timeout <seconds>] [--dry-run]"
44
---
55

66
# Local AI Code Review
77

8-
Run a structured code review using the OpenAI Chat Completions API. Reviews changes
8+
Run a structured code review using the OpenAI Responses API. Reviews changes
99
against the same methodology criteria used by the CI reviewer, but adapted for local
1010
pre-PR use. Designed for iterative review/revision cycles before submitting a PR.
1111

@@ -23,8 +23,15 @@ pre-PR use. Designed for iterative review/revision cycles before submitting a PR
2323
- `--force-fresh`: Skip delta-diff mode, run a full fresh review even if previous state exists
2424
- `--full-registry`: Include the entire REGISTRY.md instead of selective sections
2525
- `--model <name>`: Override the OpenAI model (default: `gpt-5.4`)
26+
- `--timeout <seconds>`: HTTP request timeout (default: 300). Use 900 for reasoning models.
2627
- `--dry-run`: Print the compiled prompt without calling the API
2728

29+
**Reasoning models** (`gpt-5.4-pro`, `o3`, `o4-mini`, etc.): Reviews may take 10-15
30+
minutes. For deep reviews with reasoning models, combine `--token-budget` with `--model`:
31+
```
32+
/ai-review-local --model gpt-5.4-pro --token-budget 500000 --context deep
33+
```
34+
2835
## Constraints
2936

3037
This skill does not modify source code files. It may:
@@ -320,12 +327,19 @@ python3 .claude/scripts/openai_review.py \
320327
[--token-budget "$token_budget"] \
321328
[--full-registry] \
322329
[--model <model>] \
330+
[--timeout <seconds>] \
323331
[--dry-run]
324332
```
325333

326334
Note: `--force-fresh` is a skill-only flag — it controls whether delta diffs are
327335
generated in Step 4 and is NOT passed to the script.
328336

337+
**Reasoning model handling:** If the model contains `-pro` or starts with `o1`/`o3`/`o4`
338+
(e.g., `gpt-5.4-pro`, `o3`, `o4-mini`):
339+
- Pass `--timeout 900` to the script (unless the user explicitly specified `--timeout`)
340+
- Run the Bash command with `run_in_background: true` (bypasses the 600s Bash tool timeout cap)
341+
- After the background command completes, continue to Step 6
342+
329343
If `--dry-run`: display the prompt output and stop. Report the estimated token count,
330344
cost estimate, and model that would be used.
331345

@@ -451,6 +465,9 @@ runs `--force-fresh` or when a rebase invalidates the tracked commit.
451465
# Use a different model with full registry
452466
/ai-review-local --model gpt-4.1 --full-registry
453467

468+
# Deep review with reasoning model (may take 10-15 minutes)
469+
/ai-review-local --model gpt-5.4-pro --token-budget 500000 --context deep
470+
454471
# Limit token budget for faster/cheaper reviews
455472
/ai-review-local --token-budget 100000
456473
```

.claude/scripts/openai_review.py

Lines changed: 102 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/usr/bin/env python3
2-
"""Local AI code review using OpenAI Chat Completions API.
2+
"""Local AI code review using OpenAI Responses API.
33
44
Compiles a review prompt from the project's review criteria, methodology registry,
55
and code diffs, then sends it to the OpenAI API for structured feedback.
@@ -854,6 +854,7 @@ def apply_token_budget(
854854
# MAINTENANCE: Update when OpenAI changes pricing.
855855
PRICING = {
856856
"gpt-5.4": (2.50, 15.00),
857+
"gpt-5.4-pro": (30.00, 180.00),
857858
"gpt-4.1": (2.00, 8.00),
858859
"gpt-4.1-mini": (0.40, 1.60),
859860
"o3": (2.00, 8.00),
@@ -1093,31 +1094,63 @@ def compile_prompt(
10931094
# OpenAI API call
10941095
# ---------------------------------------------------------------------------
10951096

1096-
ENDPOINT = "https://api.openai.com/v1/chat/completions"
1097+
ENDPOINT = "https://api.openai.com/v1/responses"
10971098
DEFAULT_MODEL = "gpt-5.4"
10981099
DEFAULT_TIMEOUT = 300 # seconds
10991100
DEFAULT_MAX_TOKENS = 16384
1101+
REASONING_MAX_TOKENS = 32768
1102+
1103+
1104+
def _is_reasoning_model(model: str) -> bool:
1105+
"""Return True for models that use internal chain-of-thought reasoning."""
1106+
return model.startswith(("o1", "o3", "o4")) or "-pro" in model
11001107

11011108

11021109
def estimate_tokens(text: str) -> int:
11031110
"""Rough token estimate (~4 chars per token). May vary +/- 50% for code."""
11041111
return len(text) // 4
11051112

11061113

1114+
def _extract_response_text(result: dict) -> str:
1115+
"""Extract review text from a Responses API JSON payload.
1116+
1117+
Tries the top-level ``output_text`` convenience field first (populated by
1118+
the Python SDK but typically null in raw HTTP responses), then walks
1119+
``output[].content[]`` items. Returns an empty string when no text is
1120+
found so the caller can decide how to handle it.
1121+
"""
1122+
text = result.get("output_text") or ""
1123+
if text:
1124+
return text
1125+
for item in result.get("output", []):
1126+
if item.get("type") == "message":
1127+
for block in item.get("content", []):
1128+
if block.get("type") == "output_text":
1129+
text += block.get("text", "")
1130+
return text
1131+
1132+
11071133
def call_openai(
1108-
prompt: str, model: str, api_key: str
1134+
prompt: str,
1135+
model: str,
1136+
api_key: str,
1137+
timeout: int = DEFAULT_TIMEOUT,
11091138
) -> "tuple[str, dict]":
1110-
"""Call the OpenAI Chat Completions API.
1139+
"""Call the OpenAI Responses API.
11111140
11121141
Returns (content, usage) where usage is the API response's usage dict
1113-
containing prompt_tokens and completion_tokens.
1142+
containing input_tokens and output_tokens.
11141143
"""
1115-
payload = {
1144+
reasoning = _is_reasoning_model(model)
1145+
max_tokens = REASONING_MAX_TOKENS if reasoning else DEFAULT_MAX_TOKENS
1146+
1147+
payload: dict = {
11161148
"model": model,
1117-
"messages": [{"role": "user", "content": prompt}],
1118-
"temperature": 0,
1119-
"max_completion_tokens": DEFAULT_MAX_TOKENS,
1149+
"input": prompt,
1150+
"max_output_tokens": max_tokens,
11201151
}
1152+
if not reasoning:
1153+
payload["temperature"] = 0
11211154

11221155
data = json.dumps(payload).encode("utf-8")
11231156
req = urllib.request.Request(
@@ -1131,7 +1164,7 @@ def call_openai(
11311164
)
11321165

11331166
try:
1134-
with urllib.request.urlopen(req, timeout=DEFAULT_TIMEOUT) as resp:
1167+
with urllib.request.urlopen(req, timeout=timeout) as resp:
11351168
result = json.loads(resp.read().decode("utf-8"))
11361169
except urllib.error.HTTPError as e:
11371170
body = ""
@@ -1165,7 +1198,7 @@ def call_openai(
11651198
sys.exit(1)
11661199
except TimeoutError:
11671200
print(
1168-
f"Error: Request timed out (>{DEFAULT_TIMEOUT}s). "
1201+
f"Error: Request timed out (>{timeout}s). "
11691202
"Try a smaller diff or disable --full-registry.",
11701203
file=sys.stderr,
11711204
)
@@ -1174,14 +1207,39 @@ def call_openai(
11741207
print(f"Error: Network error — {e.reason}", file=sys.stderr)
11751208
sys.exit(1)
11761209

1177-
choices = result.get("choices", [])
1178-
if not choices:
1179-
print("Error: Empty response from OpenAI API.", file=sys.stderr)
1210+
content = _extract_response_text(result)
1211+
1212+
# Treat truncated responses as errors — partial reviews may suppress findings.
1213+
status = result.get("status")
1214+
if content.strip() and status == "incomplete":
1215+
detail = result.get("incomplete_details") or ""
1216+
print(
1217+
"Error: Review was truncated (status='incomplete'). "
1218+
"Output may be missing findings.",
1219+
file=sys.stderr,
1220+
)
1221+
if detail:
1222+
print(f"Detail: {detail}", file=sys.stderr)
1223+
print(
1224+
"Try reducing diff size, disabling --full-registry, or "
1225+
"lowering --context to 'minimal'.",
1226+
file=sys.stderr,
1227+
)
11801228
sys.exit(1)
11811229

1182-
content = choices[0].get("message", {}).get("content", "")
11831230
if not content.strip():
1184-
print("Error: Empty review content from OpenAI API.", file=sys.stderr)
1231+
# No usable content — report the best diagnostic we have.
1232+
status = result.get("status", "<missing>")
1233+
detail = result.get("incomplete_details") or result.get("error") or ""
1234+
if status not in ("completed", "<missing>"):
1235+
print(
1236+
f"Error: OpenAI response status is '{status}' with no review content.",
1237+
file=sys.stderr,
1238+
)
1239+
else:
1240+
print("Error: Empty review content from OpenAI API.", file=sys.stderr)
1241+
if detail:
1242+
print(f"Detail: {detail}", file=sys.stderr)
11851243
sys.exit(1)
11861244

11871245
usage = result.get("usage", {})
@@ -1204,7 +1262,7 @@ def _read_file(path: str, label: str) -> str:
12041262

12051263
def main() -> None:
12061264
parser = argparse.ArgumentParser(
1207-
description="Run local AI code review via OpenAI Chat Completions API."
1265+
description="Run local AI code review via OpenAI Responses API."
12081266
)
12091267
parser.add_argument(
12101268
"--review-criteria",
@@ -1282,6 +1340,12 @@ def main() -> None:
12821340
help=f"Max estimated input tokens before dropping context "
12831341
f"(default: {DEFAULT_TOKEN_BUDGET:,})",
12841342
)
1343+
parser.add_argument(
1344+
"--timeout",
1345+
type=int,
1346+
default=DEFAULT_TIMEOUT,
1347+
help=f"HTTP request timeout in seconds (default: {DEFAULT_TIMEOUT})",
1348+
)
12851349
parser.add_argument(
12861350
"--delta-diff",
12871351
default=None,
@@ -1531,7 +1595,8 @@ def main() -> None:
15311595
)
15321596

15331597
# Cost estimate
1534-
cost_str = estimate_cost(est_tokens, DEFAULT_MAX_TOKENS, args.model)
1598+
max_out = REASONING_MAX_TOKENS if _is_reasoning_model(args.model) else DEFAULT_MAX_TOKENS
1599+
cost_str = estimate_cost(est_tokens, max_out, args.model)
15351600

15361601
# Dry-run: print prompt and exit
15371602
if args.dry_run:
@@ -1549,6 +1614,12 @@ def main() -> None:
15491614
sys.exit(0)
15501615

15511616
# Call OpenAI API
1617+
if _is_reasoning_model(args.model) and args.timeout == DEFAULT_TIMEOUT:
1618+
print(
1619+
f"Note: {args.model} is a reasoning model. Consider --timeout 900 "
1620+
"for large reviews.",
1621+
file=sys.stderr,
1622+
)
15521623
print(f"Sending review to {args.model}...", file=sys.stderr)
15531624
print(f"Estimated input tokens: ~{est_tokens:,}", file=sys.stderr)
15541625
if cost_str:
@@ -1559,7 +1630,9 @@ def main() -> None:
15591630
if delta_diff_text:
15601631
print("Mode: Delta-diff (changes since last review)", file=sys.stderr)
15611632

1562-
review_content, usage = call_openai(prompt, args.model, api_key)
1633+
review_content, usage = call_openai(
1634+
prompt, args.model, api_key, timeout=args.timeout
1635+
)
15631636

15641637
# Write review output
15651638
os.makedirs(os.path.dirname(args.output), exist_ok=True)
@@ -1603,8 +1676,8 @@ def main() -> None:
16031676
)
16041677

16051678
# Print completion summary with actual usage
1606-
actual_input = usage.get("prompt_tokens", 0)
1607-
actual_output = usage.get("completion_tokens", 0)
1679+
actual_input = usage.get("input_tokens", 0)
1680+
actual_output = usage.get("output_tokens", 0)
16081681
actual_cost = estimate_cost(actual_input, actual_output, args.model)
16091682

16101683
print(f"\nAI Review complete.", file=sys.stderr)
@@ -1615,6 +1688,14 @@ def main() -> None:
16151688
f"{actual_output:,} output",
16161689
file=sys.stderr,
16171690
)
1691+
reasoning_tokens = usage.get("output_tokens_details", {}).get(
1692+
"reasoning_tokens", 0
1693+
)
1694+
if reasoning_tokens:
1695+
print(
1696+
f" (includes {reasoning_tokens:,} reasoning tokens)",
1697+
file=sys.stderr,
1698+
)
16181699
if actual_cost:
16191700
print(f"Actual cost: {actual_cost}", file=sys.stderr)
16201701
else:

0 commit comments

Comments
 (0)