Skip to content

Commit afec25e

Browse files
feat(retry): opt-in claude -p retry backend with silent fallback
The log-driven LaTeX retry currently reuses the main backend's LLM, which for MiniMax means the same model that wrote the broken section also gets to fix it — often not enough for structural equation bugs. New flag: --retry-backend {same,hybrid} (default: same). - 'same' preserves current behavior; users without claude delegation set up see no change at all. - 'hybrid' swaps the fix pass to claude -p via the local proxy (model defaults to claude-opus-4-5, override with --retry-model). Before using it we probe {claude-proxy}/health with a 1.5s timeout; if the proxy isn't running we log a `warning` progress event and silently fall back to the main backend. No exception, no abort — "retry quality is a nice-to-have, not a dependency" is an explicit design choice. The probe helper (probe_claude_proxy) lives in config.py so other callers can reuse it. writeup()/run_pipeline() now carry retry_cfg/retry_model kwargs end-to-end; _retry_failing_sections uses them if supplied, otherwise the existing cfg/model. Also: sanitize the paper title alongside abstract (both bypass the per-section pipeline). Cheap, symmetric, covers the one prose-special case abstract fix didn't. Tests: 9 new cases pin the flag surface, the probe-fail-fallback contract, explicit --retry-model wins, and the real probe returns False quickly against a dead port. Total suite now 68 passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6d5731b commit afec25e

5 files changed

Lines changed: 191 additions & 12 deletions

File tree

skills/hermes-sci/package/hermes_sci/cli.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import sys
1515
from typing import Any, Optional
1616

17-
from .config import Backend, apply_env, resolve_backend
17+
from .config import Backend, apply_env, probe_claude_proxy, resolve_backend
1818
from .ideation import ideate as ideate_fn, save_ideas
1919
from .orchestrator import run_pipeline
2020
from .progress import _resolve_builtin as _resolve_progress
@@ -47,6 +47,44 @@ def _add_common(p: argparse.ArgumentParser) -> None:
4747
p.add_argument("-v", "--verbose", action="store_true")
4848

4949

50+
def _add_retry_common(p: argparse.ArgumentParser) -> None:
51+
"""Retry-backend knobs for writeup/pipeline only (not ideate/review)."""
52+
p.add_argument("--retry-backend", choices=("same", "hybrid"), default="same",
53+
help="backend for log-driven LaTeX retry after pdflatex "
54+
"fails. 'same' (default) reuses --backend; 'hybrid' "
55+
"routes the retry through claude -p for a stronger "
56+
"fix pass. If the claude proxy is unreachable we "
57+
"silently fall back to 'same' — users without "
58+
"delegation keep working.")
59+
p.add_argument("--retry-model", default=None,
60+
help="model to use on retry (default: --model, or "
61+
"'claude-opus-4-5' when --retry-backend=hybrid)")
62+
63+
64+
def _build_retry_cfg(args, primary_cfg, progress_cb):
65+
"""Build a retry BackendConfig if opted in + reachable; else return None.
66+
67+
Emits a progress warning when hybrid was requested but the proxy probe
68+
fails, so the user sees WHY the retry is downgrading."""
69+
if getattr(args, "retry_backend", "same") != "hybrid":
70+
return None, None
71+
if not probe_claude_proxy(args.claude_proxy):
72+
from .progress import Progress, emit
73+
emit(progress_cb, Progress(
74+
kind="warning", stage="compile",
75+
message=f"--retry-backend=hybrid requested but claude proxy at "
76+
f"{args.claude_proxy} is unreachable — retry will use "
77+
f"the primary backend instead",
78+
))
79+
return None, None
80+
retry_cfg = resolve_backend(
81+
backend="hybrid", model_override=None,
82+
claude_proxy_url=args.claude_proxy,
83+
)
84+
retry_model = args.retry_model or "claude-opus-4-5"
85+
return retry_cfg, retry_model
86+
87+
5088
def cmd_ideate(args) -> int:
5189
cfg = resolve_backend(backend=args.backend, model_override=args.model,
5290
claude_proxy_url=args.claude_proxy)
@@ -84,6 +122,7 @@ def cmd_writeup(args) -> int:
84122
results_arg = pathlib.Path(args.results_md).read_text(encoding="utf-8")
85123
out = pathlib.Path(args.output)
86124
progress = _resolve_progress(args.progress)
125+
retry_cfg, retry_model = _build_retry_cfg(args, cfg, progress)
87126
r = writeup_fn(
88127
cfg, idea=idea, out_dir=out, results=results_arg,
89128
model=args.model, skip_compile=args.skip_compile,
@@ -93,6 +132,8 @@ def cmd_writeup(args) -> int:
93132
concurrency=args.concurrency,
94133
annotate_unverified_claims=args.annotate_unverified,
95134
progress=progress,
135+
retry_cfg=retry_cfg,
136+
retry_model=retry_model,
96137
)
97138
print(json.dumps(r, indent=2, ensure_ascii=False))
98139
return 0 if r.get("pdf") or args.skip_compile else 1
@@ -145,6 +186,7 @@ def cmd_pipeline(args) -> int:
145186
elif args.results_md:
146187
results_arg = pathlib.Path(args.results_md).read_text(encoding="utf-8")
147188
progress = _resolve_progress(args.progress)
189+
retry_cfg, retry_model = _build_retry_cfg(args, cfg, progress)
148190
r = run_pipeline(
149191
cfg, topic=args.topic, out_dir=pathlib.Path(args.output),
150192
num_ideas=args.num_ideas, results=results_arg,
@@ -158,6 +200,8 @@ def cmd_pipeline(args) -> int:
158200
concurrency=args.concurrency,
159201
annotate_unverified_claims=args.annotate_unverified,
160202
progress=progress,
203+
retry_cfg=retry_cfg,
204+
retry_model=retry_model,
161205
)
162206
print(json.dumps(r, indent=2, ensure_ascii=False))
163207
return 0 if not r.get("error") else 1
@@ -198,6 +242,7 @@ def build_parser() -> argparse.ArgumentParser:
198242
help="override concurrency limit (default: auto — 1 during "
199243
"MiniMax peak 15:00-17:30 Asia/Shanghai, 7 off-peak)")
200244
pw.add_argument("-o", "--output", required=True, help="output directory")
245+
_add_retry_common(pw)
201246
pw.set_defaults(func=cmd_writeup)
202247

203248
pv = sub.add_parser("validate-results",
@@ -229,6 +274,7 @@ def build_parser() -> argparse.ArgumentParser:
229274
pp.add_argument("--no-parallel", action="store_true")
230275
pp.add_argument("--concurrency", type=int, default=None)
231276
pp.add_argument("-o", "--output", required=True, help="output directory")
277+
_add_retry_common(pp)
232278
pp.set_defaults(func=cmd_pipeline)
233279
return p
234280

skills/hermes-sci/package/hermes_sci/config.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,26 @@ def resolve_backend(
159159
raise ValueError(f"Unknown backend: {backend}")
160160

161161

162+
def probe_claude_proxy(url: str, timeout_s: float = 1.5) -> bool:
163+
"""Return True iff GET {url}/health responds {"ok": true} fast enough.
164+
165+
Intended for CLI `--retry-backend hybrid` where we want to silently
166+
fall back to the main backend if the claude proxy isn't running —
167+
not every user has delegation wired up, and failing the whole run
168+
because of an optional-upgrade path would be hostile.
169+
"""
170+
import json
171+
import urllib.error
172+
import urllib.request
173+
try:
174+
with urllib.request.urlopen(f"{url.rstrip('/')}/health",
175+
timeout=timeout_s) as r:
176+
data = json.loads(r.read().decode("utf-8"))
177+
return bool(data.get("ok"))
178+
except (urllib.error.URLError, OSError, ValueError, TimeoutError):
179+
return False
180+
181+
162182
def apply_env(cfg: BackendConfig) -> None:
163183
"""Export env so 3rd-party SDKs (openai, anthropic) pick up the config."""
164184
os.environ["OPENAI_API_KEY"] = cfg.api_key

skills/hermes-sci/package/hermes_sci/orchestrator.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ def run_pipeline(
4848
concurrency: Optional[int] = None,
4949
annotate_unverified_claims: bool = False,
5050
progress: ProgressCallback = _noop_progress,
51+
retry_cfg: Optional[BackendConfig] = None,
52+
retry_model: Optional[str] = None,
5153
) -> dict:
5254
"""Full: ideate → pick best → writeup → review.
5355
@@ -100,7 +102,8 @@ def run_pipeline(
100102
critique=critique, coherence=coherence, parallel=parallel,
101103
concurrency=concurrency,
102104
annotate_unverified_claims=annotate_unverified_claims,
103-
progress=progress)
105+
progress=progress,
106+
retry_cfg=retry_cfg, retry_model=retry_model)
104107
report["stages"]["writeup"] = w
105108

106109
# Stage 4: review (only if PDF was produced)

skills/hermes-sci/package/hermes_sci/writeup.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -413,15 +413,16 @@ def context_fn(section_key: str) -> str:
413413
log.info("inserted table labels in %s: %s", k, sorted(after - before))
414414
cleaned[k] = v2
415415

416-
# Abstract comes from idea metadata (ideation step), not from the
417-
# per-section LLM pass, so it also bypasses the per-section sanitize
418-
# pipeline. Run it through explicitly so prose specials like `_` or a
419-
# truncated inline equation don't crash pdflatex before \section{} even
420-
# starts.
416+
# Title + abstract come from idea metadata (ideation step), not from
417+
# the per-section LLM pass, so they bypass the SANITIZE_PIPELINE. Run
418+
# them through explicitly — otherwise a prose `_` in the title or a
419+
# truncated inline equation in the abstract crashes pdflatex before
420+
# any \section{} even starts.
421+
title = _sanitize_latex(str(idea.get("Title") or "Untitled Research"))
421422
abstract = _sanitize_latex(str(idea.get("Abstract") or ""))
422423

423424
return Paper(
424-
title=str(idea.get("Title") or "Untitled Research"),
425+
title=title,
425426
abstract=abstract,
426427
sections=cleaned,
427428
)
@@ -472,10 +473,20 @@ def _run_latex(out_dir: pathlib.Path, pdflatex: str) -> tuple[int, list[str]]:
472473

473474
def _retry_failing_sections(
474475
cfg: BackendConfig, paper: Paper, errors: list[str], model: Optional[str],
476+
retry_cfg: Optional[BackendConfig] = None,
477+
retry_model: Optional[str] = None,
475478
) -> Paper:
476479
"""Ask the LLM to fix LaTeX of each section given the error list.
477480
Runs sections in parallel via asyncio.gather — one gather cap saves ~7x
478-
vs sequential when all sections need fixing."""
481+
vs sequential when all sections need fixing.
482+
483+
If `retry_cfg` is supplied, uses that backend/model for the fix pass
484+
instead of the main one. Intended for swapping to a stronger model on
485+
retry (e.g. `claude-opus-4-5` via hybrid) when the main backend is
486+
cheap/fast MiniMax. Caller is responsible for probing reachability
487+
before handing us a hybrid cfg — we don't swallow connection errors."""
488+
fix_cfg = retry_cfg or cfg
489+
fix_model = retry_model or model
479490
err_blob = "\n".join(f"- {e}" for e in errors[:12])
480491

481492
async def _fix_one(key: str, body: str) -> tuple[str, str]:
@@ -486,7 +497,8 @@ async def _fix_one(key: str, body: str) -> tuple[str, str]:
486497
"clean, return it unchanged verbatim."
487498
)
488499
try:
489-
text = await acomplete(cfg, system=RETRY_SYSTEM, user=user, model=model,
500+
text = await acomplete(fix_cfg, system=RETRY_SYSTEM, user=user,
501+
model=fix_model,
490502
temperature=0.1, max_tokens=2500)
491503
fixed = _sanitize_latex(text.strip())
492504
if not fixed or not _looks_like_latex(fixed):
@@ -498,7 +510,7 @@ async def _fix_one(key: str, body: str) -> tuple[str, str]:
498510
log.warning("retry LLM failed for %s: %s", key, e)
499511
return key, body
500512

501-
limit = recommended_concurrency(cfg)
513+
limit = recommended_concurrency(fix_cfg)
502514

503515
async def _fix_all():
504516
sem = asyncio.Semaphore(limit)
@@ -569,6 +581,8 @@ def writeup(
569581
audit: bool = True,
570582
annotate_unverified_claims: bool = False,
571583
progress: ProgressCallback = _noop_progress,
584+
retry_cfg: Optional[BackendConfig] = None,
585+
retry_model: Optional[str] = None,
572586
) -> dict:
573587
"""End-to-end: idea → paper.tex → paper.pdf with Phase-2 quality passes.
574588
@@ -662,7 +676,9 @@ def writeup(
662676
meta={"duration_s": time.time() - t_c,
663677
"error": str(first_err)}))
664678
return result
665-
fixed = _retry_failing_sections(cfg, paper, errs, model)
679+
fixed = _retry_failing_sections(cfg, paper, errs, model,
680+
retry_cfg=retry_cfg,
681+
retry_model=retry_model)
666682
tex2 = render_tex(fixed)
667683
try:
668684
pdf = compile_pdf(tex2, out_dir, progress=progress)
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
"""`--retry-backend hybrid` flag: opt-in, silent fallback when proxy down."""
2+
from __future__ import annotations
3+
4+
from unittest import mock
5+
6+
from hermes_sci.cli import _build_retry_cfg, build_parser
7+
from hermes_sci.progress import Progress, noop
8+
9+
10+
def _parse_writeup(*extra):
11+
p = build_parser()
12+
return p.parse_args([
13+
"writeup", "--ideas-json", "i.json", "-o", "out", *extra,
14+
])
15+
16+
17+
def test_flag_default_is_same():
18+
ns = _parse_writeup()
19+
assert ns.retry_backend == "same"
20+
assert ns.retry_model is None
21+
22+
23+
def test_flag_accepts_hybrid():
24+
ns = _parse_writeup("--retry-backend", "hybrid")
25+
assert ns.retry_backend == "hybrid"
26+
27+
28+
def test_flag_rejects_unknown_value():
29+
p = build_parser()
30+
import pytest
31+
with pytest.raises(SystemExit):
32+
p.parse_args(["writeup", "--ideas-json", "i.json", "-o", "out",
33+
"--retry-backend", "anthropic"])
34+
35+
36+
def test_retry_model_accepted():
37+
ns = _parse_writeup("--retry-backend", "hybrid",
38+
"--retry-model", "claude-sonnet-4-5")
39+
assert ns.retry_model == "claude-sonnet-4-5"
40+
41+
42+
def test_build_retry_cfg_default_returns_none():
43+
"""retry-backend=same → no retry cfg built, retry keeps using main cfg."""
44+
ns = _parse_writeup()
45+
rc, rm = _build_retry_cfg(ns, primary_cfg=object(), progress_cb=noop)
46+
assert rc is None and rm is None
47+
48+
49+
def test_build_retry_cfg_hybrid_probe_fail_falls_back_silently():
50+
"""The whole point: users without delegation keep working.
51+
52+
Proxy probe fails → _build_retry_cfg returns (None, None) + emits a
53+
`warning` progress event. No exception, no abort."""
54+
ns = _parse_writeup("--retry-backend", "hybrid")
55+
events: list[Progress] = []
56+
57+
def capture(p: Progress) -> None:
58+
events.append(p)
59+
60+
with mock.patch("hermes_sci.cli.probe_claude_proxy", return_value=False):
61+
rc, rm = _build_retry_cfg(ns, primary_cfg=object(), progress_cb=capture)
62+
63+
assert rc is None and rm is None
64+
assert any(e.kind == "warning" and "unreachable" in e.message for e in events)
65+
66+
67+
def test_build_retry_cfg_hybrid_probe_ok_builds_cfg():
68+
ns = _parse_writeup("--retry-backend", "hybrid")
69+
fake_cfg = object()
70+
71+
with mock.patch("hermes_sci.cli.probe_claude_proxy", return_value=True), \
72+
mock.patch("hermes_sci.cli.resolve_backend", return_value=fake_cfg) \
73+
as rb:
74+
rc, rm = _build_retry_cfg(ns, primary_cfg=object(), progress_cb=noop)
75+
76+
assert rc is fake_cfg
77+
assert rm == "claude-opus-4-5" # default when --retry-model absent
78+
# Called with hybrid backend.
79+
assert rb.call_args.kwargs["backend"] == "hybrid"
80+
81+
82+
def test_build_retry_cfg_hybrid_respects_explicit_retry_model():
83+
ns = _parse_writeup("--retry-backend", "hybrid",
84+
"--retry-model", "claude-haiku-4-5")
85+
with mock.patch("hermes_sci.cli.probe_claude_proxy", return_value=True), \
86+
mock.patch("hermes_sci.cli.resolve_backend", return_value=object()):
87+
_, rm = _build_retry_cfg(ns, primary_cfg=object(), progress_cb=noop)
88+
assert rm == "claude-haiku-4-5"
89+
90+
91+
def test_probe_claude_proxy_false_on_unreachable():
92+
"""Real function — no server on this port, must return False fast."""
93+
from hermes_sci.config import probe_claude_proxy
94+
assert probe_claude_proxy("http://127.0.0.1:1", timeout_s=0.5) is False

0 commit comments

Comments
 (0)