feat(retry): opt-in claude -p retry backend with silent fallback

easyvibecoding · claude · easyvibecoding · commit afec25e20a3f · 2026-04-14T19:02:59.000+08:00
The log-driven LaTeX retry currently reuses the main backend's LLM,
which for MiniMax means the same model that wrote the broken section
also gets to fix it — often not enough for structural equation bugs.

New flag: --retry-backend {same,hybrid} (default: same).

- 'same' preserves current behavior; users without claude delegation
  set up see no change at all.
- 'hybrid' swaps the fix pass to claude -p via the local proxy
  (model defaults to claude-opus-4-5, override with --retry-model).
  Before using it we probe {claude-proxy}/health with a 1.5s timeout;
  if the proxy isn't running we log a `warning` progress event and
  silently fall back to the main backend. No exception, no abort —
  "retry quality is a nice-to-have, not a dependency" is an explicit
  design choice.

The probe helper (probe_claude_proxy) lives in config.py so other
callers can reuse it. writeup()/run_pipeline() now carry
retry_cfg/retry_model kwargs end-to-end; _retry_failing_sections
uses them if supplied, otherwise the existing cfg/model.

Also: sanitize the paper title alongside abstract (both bypass the
per-section pipeline). Cheap, symmetric, covers the one prose-special
case abstract fix didn't.

Tests: 9 new cases pin the flag surface, the probe-fail-fallback
contract, explicit --retry-model wins, and the real probe returns
False quickly against a dead port. Total suite now 68 passing.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/skills/hermes-sci/package/hermes_sci/cli.py b/skills/hermes-sci/package/hermes_sci/cli.py
@@ -14,7 +14,7 @@
 import sys
 from typing import Any, Optional
 
-from .config import Backend, apply_env, resolve_backend
+from .config import Backend, apply_env, probe_claude_proxy, resolve_backend
 from .ideation import ideate as ideate_fn, save_ideas
 from .orchestrator import run_pipeline
 from .progress import _resolve_builtin as _resolve_progress
@@ -47,6 +47,44 @@ def _add_common(p: argparse.ArgumentParser) -> None:
     p.add_argument("-v", "--verbose", action="store_true")
 
 
+def _add_retry_common(p: argparse.ArgumentParser) -> None:
+    """Retry-backend knobs for writeup/pipeline only (not ideate/review)."""
+    p.add_argument("--retry-backend", choices=("same", "hybrid"), default="same",
+                   help="backend for log-driven LaTeX retry after pdflatex "
+                        "fails. 'same' (default) reuses --backend; 'hybrid' "
+                        "routes the retry through claude -p for a stronger "
+                        "fix pass. If the claude proxy is unreachable we "
+                        "silently fall back to 'same' — users without "
+                        "delegation keep working.")
+    p.add_argument("--retry-model", default=None,
+                   help="model to use on retry (default: --model, or "
+                        "'claude-opus-4-5' when --retry-backend=hybrid)")
+
+
+def _build_retry_cfg(args, primary_cfg, progress_cb):
+    """Build a retry BackendConfig if opted in + reachable; else return None.
+
+    Emits a progress warning when hybrid was requested but the proxy probe
+    fails, so the user sees WHY the retry is downgrading."""
+    if getattr(args, "retry_backend", "same") != "hybrid":
+        return None, None
+    if not probe_claude_proxy(args.claude_proxy):
+        from .progress import Progress, emit
+        emit(progress_cb, Progress(
+            kind="warning", stage="compile",
+            message=f"--retry-backend=hybrid requested but claude proxy at "
+                    f"{args.claude_proxy} is unreachable — retry will use "
+                    f"the primary backend instead",
+        ))
+        return None, None
+    retry_cfg = resolve_backend(
+        backend="hybrid", model_override=None,
+        claude_proxy_url=args.claude_proxy,
+    )
+    retry_model = args.retry_model or "claude-opus-4-5"
+    return retry_cfg, retry_model
+
+
 def cmd_ideate(args) -> int:
     cfg = resolve_backend(backend=args.backend, model_override=args.model,
                           claude_proxy_url=args.claude_proxy)
@@ -84,6 +122,7 @@ def cmd_writeup(args) -> int:
         results_arg = pathlib.Path(args.results_md).read_text(encoding="utf-8")
     out = pathlib.Path(args.output)
     progress = _resolve_progress(args.progress)
+    retry_cfg, retry_model = _build_retry_cfg(args, cfg, progress)
     r = writeup_fn(
         cfg, idea=idea, out_dir=out, results=results_arg,
         model=args.model, skip_compile=args.skip_compile,
@@ -93,6 +132,8 @@ def cmd_writeup(args) -> int:
         concurrency=args.concurrency,
         annotate_unverified_claims=args.annotate_unverified,
         progress=progress,
+        retry_cfg=retry_cfg,
+        retry_model=retry_model,
     )
     print(json.dumps(r, indent=2, ensure_ascii=False))
     return 0 if r.get("pdf") or args.skip_compile else 1
@@ -145,6 +186,7 @@ def cmd_pipeline(args) -> int:
     elif args.results_md:
         results_arg = pathlib.Path(args.results_md).read_text(encoding="utf-8")
     progress = _resolve_progress(args.progress)
+    retry_cfg, retry_model = _build_retry_cfg(args, cfg, progress)
     r = run_pipeline(
         cfg, topic=args.topic, out_dir=pathlib.Path(args.output),
         num_ideas=args.num_ideas, results=results_arg,
@@ -158,6 +200,8 @@ def cmd_pipeline(args) -> int:
         concurrency=args.concurrency,
         annotate_unverified_claims=args.annotate_unverified,
         progress=progress,
+        retry_cfg=retry_cfg,
+        retry_model=retry_model,
     )
     print(json.dumps(r, indent=2, ensure_ascii=False))
     return 0 if not r.get("error") else 1
@@ -198,6 +242,7 @@ def build_parser() -> argparse.ArgumentParser:
                     help="override concurrency limit (default: auto — 1 during "
                          "MiniMax peak 15:00-17:30 Asia/Shanghai, 7 off-peak)")
     pw.add_argument("-o", "--output", required=True, help="output directory")
+    _add_retry_common(pw)
     pw.set_defaults(func=cmd_writeup)
 
     pv = sub.add_parser("validate-results",
@@ -229,6 +274,7 @@ def build_parser() -> argparse.ArgumentParser:
     pp.add_argument("--no-parallel", action="store_true")
     pp.add_argument("--concurrency", type=int, default=None)
     pp.add_argument("-o", "--output", required=True, help="output directory")
+    _add_retry_common(pp)
     pp.set_defaults(func=cmd_pipeline)
     return p
 
diff --git a/skills/hermes-sci/package/hermes_sci/config.py b/skills/hermes-sci/package/hermes_sci/config.py
@@ -159,6 +159,26 @@ def resolve_backend(
     raise ValueError(f"Unknown backend: {backend}")
 
 
+def probe_claude_proxy(url: str, timeout_s: float = 1.5) -> bool:
+    """Return True iff GET {url}/health responds {"ok": true} fast enough.
+
+    Intended for CLI `--retry-backend hybrid` where we want to silently
+    fall back to the main backend if the claude proxy isn't running —
+    not every user has delegation wired up, and failing the whole run
+    because of an optional-upgrade path would be hostile.
+    """
+    import json
+    import urllib.error
+    import urllib.request
+    try:
+        with urllib.request.urlopen(f"{url.rstrip('/')}/health",
+                                     timeout=timeout_s) as r:
+            data = json.loads(r.read().decode("utf-8"))
+            return bool(data.get("ok"))
+    except (urllib.error.URLError, OSError, ValueError, TimeoutError):
+        return False
+
+
 def apply_env(cfg: BackendConfig) -> None:
     """Export env so 3rd-party SDKs (openai, anthropic) pick up the config."""
     os.environ["OPENAI_API_KEY"] = cfg.api_key
diff --git a/skills/hermes-sci/package/hermes_sci/orchestrator.py b/skills/hermes-sci/package/hermes_sci/orchestrator.py
@@ -48,6 +48,8 @@ def run_pipeline(
     concurrency: Optional[int] = None,
     annotate_unverified_claims: bool = False,
     progress: ProgressCallback = _noop_progress,
+    retry_cfg: Optional[BackendConfig] = None,
+    retry_model: Optional[str] = None,
 ) -> dict:
     """Full: ideate → pick best → writeup → review.
 
@@ -100,7 +102,8 @@ def run_pipeline(
                 critique=critique, coherence=coherence, parallel=parallel,
                 concurrency=concurrency,
                 annotate_unverified_claims=annotate_unverified_claims,
-                progress=progress)
+                progress=progress,
+                retry_cfg=retry_cfg, retry_model=retry_model)
     report["stages"]["writeup"] = w
 
     # Stage 4: review (only if PDF was produced)
diff --git a/skills/hermes-sci/package/hermes_sci/writeup.py b/skills/hermes-sci/package/hermes_sci/writeup.py
@@ -413,15 +413,16 @@ def context_fn(section_key: str) -> str:
                 log.info("inserted table labels in %s: %s", k, sorted(after - before))
         cleaned[k] = v2
 
-    # Abstract comes from idea metadata (ideation step), not from the
-    # per-section LLM pass, so it also bypasses the per-section sanitize
-    # pipeline. Run it through explicitly so prose specials like `_` or a
-    # truncated inline equation don't crash pdflatex before \section{} even
-    # starts.
+    # Title + abstract come from idea metadata (ideation step), not from
+    # the per-section LLM pass, so they bypass the SANITIZE_PIPELINE. Run
+    # them through explicitly — otherwise a prose `_` in the title or a
+    # truncated inline equation in the abstract crashes pdflatex before
+    # any \section{} even starts.
+    title = _sanitize_latex(str(idea.get("Title") or "Untitled Research"))
     abstract = _sanitize_latex(str(idea.get("Abstract") or ""))
 
     return Paper(
-        title=str(idea.get("Title") or "Untitled Research"),
+        title=title,
         abstract=abstract,
         sections=cleaned,
     )
@@ -472,10 +473,20 @@ def _run_latex(out_dir: pathlib.Path, pdflatex: str) -> tuple[int, list[str]]:
 
 def _retry_failing_sections(
     cfg: BackendConfig, paper: Paper, errors: list[str], model: Optional[str],
+    retry_cfg: Optional[BackendConfig] = None,
+    retry_model: Optional[str] = None,
 ) -> Paper:
     """Ask the LLM to fix LaTeX of each section given the error list.
     Runs sections in parallel via asyncio.gather — one gather cap saves ~7x
-    vs sequential when all sections need fixing."""
+    vs sequential when all sections need fixing.
+
+    If `retry_cfg` is supplied, uses that backend/model for the fix pass
+    instead of the main one. Intended for swapping to a stronger model on
+    retry (e.g. `claude-opus-4-5` via hybrid) when the main backend is
+    cheap/fast MiniMax. Caller is responsible for probing reachability
+    before handing us a hybrid cfg — we don't swallow connection errors."""
+    fix_cfg = retry_cfg or cfg
+    fix_model = retry_model or model
     err_blob = "\n".join(f"- {e}" for e in errors[:12])
 
     async def _fix_one(key: str, body: str) -> tuple[str, str]:
@@ -486,7 +497,8 @@ async def _fix_one(key: str, body: str) -> tuple[str, str]:
             "clean, return it unchanged verbatim."
         )
         try:
-            text = await acomplete(cfg, system=RETRY_SYSTEM, user=user, model=model,
+            text = await acomplete(fix_cfg, system=RETRY_SYSTEM, user=user,
+                                   model=fix_model,
                                    temperature=0.1, max_tokens=2500)
             fixed = _sanitize_latex(text.strip())
             if not fixed or not _looks_like_latex(fixed):
@@ -498,7 +510,7 @@ async def _fix_one(key: str, body: str) -> tuple[str, str]:
             log.warning("retry LLM failed for %s: %s", key, e)
             return key, body
 
-    limit = recommended_concurrency(cfg)
+    limit = recommended_concurrency(fix_cfg)
 
     async def _fix_all():
         sem = asyncio.Semaphore(limit)
@@ -569,6 +581,8 @@ def writeup(
     audit: bool = True,
     annotate_unverified_claims: bool = False,
     progress: ProgressCallback = _noop_progress,
+    retry_cfg: Optional[BackendConfig] = None,
+    retry_model: Optional[str] = None,
 ) -> dict:
     """End-to-end: idea → paper.tex → paper.pdf with Phase-2 quality passes.
 
@@ -662,7 +676,9 @@ def writeup(
                                     meta={"duration_s": time.time() - t_c,
                                           "error": str(first_err)}))
             return result
-        fixed = _retry_failing_sections(cfg, paper, errs, model)
+        fixed = _retry_failing_sections(cfg, paper, errs, model,
+                                        retry_cfg=retry_cfg,
+                                        retry_model=retry_model)
         tex2 = render_tex(fixed)
         try:
             pdf = compile_pdf(tex2, out_dir, progress=progress)
diff --git a/skills/hermes-sci/package/tests/test_retry_backend.py b/skills/hermes-sci/package/tests/test_retry_backend.py
@@ -0,0 +1,94 @@
+"""`--retry-backend hybrid` flag: opt-in, silent fallback when proxy down."""
+from __future__ import annotations
+
+from unittest import mock
+
+from hermes_sci.cli import _build_retry_cfg, build_parser
+from hermes_sci.progress import Progress, noop
+
+
+def _parse_writeup(*extra):
+    p = build_parser()
+    return p.parse_args([
+        "writeup", "--ideas-json", "i.json", "-o", "out", *extra,
+    ])
+
+
+def test_flag_default_is_same():
+    ns = _parse_writeup()
+    assert ns.retry_backend == "same"
+    assert ns.retry_model is None
+
+
+def test_flag_accepts_hybrid():
+    ns = _parse_writeup("--retry-backend", "hybrid")
+    assert ns.retry_backend == "hybrid"
+
+
+def test_flag_rejects_unknown_value():
+    p = build_parser()
+    import pytest
+    with pytest.raises(SystemExit):
+        p.parse_args(["writeup", "--ideas-json", "i.json", "-o", "out",
+                      "--retry-backend", "anthropic"])
+
+
+def test_retry_model_accepted():
+    ns = _parse_writeup("--retry-backend", "hybrid",
+                        "--retry-model", "claude-sonnet-4-5")
+    assert ns.retry_model == "claude-sonnet-4-5"
+
+
+def test_build_retry_cfg_default_returns_none():
+    """retry-backend=same → no retry cfg built, retry keeps using main cfg."""
+    ns = _parse_writeup()
+    rc, rm = _build_retry_cfg(ns, primary_cfg=object(), progress_cb=noop)
+    assert rc is None and rm is None
+
+
+def test_build_retry_cfg_hybrid_probe_fail_falls_back_silently():
+    """The whole point: users without delegation keep working.
+
+    Proxy probe fails → _build_retry_cfg returns (None, None) + emits a
+    `warning` progress event. No exception, no abort."""
+    ns = _parse_writeup("--retry-backend", "hybrid")
+    events: list[Progress] = []
+
+    def capture(p: Progress) -> None:
+        events.append(p)
+
+    with mock.patch("hermes_sci.cli.probe_claude_proxy", return_value=False):
+        rc, rm = _build_retry_cfg(ns, primary_cfg=object(), progress_cb=capture)
+
+    assert rc is None and rm is None
+    assert any(e.kind == "warning" and "unreachable" in e.message for e in events)
+
+
+def test_build_retry_cfg_hybrid_probe_ok_builds_cfg():
+    ns = _parse_writeup("--retry-backend", "hybrid")
+    fake_cfg = object()
+
+    with mock.patch("hermes_sci.cli.probe_claude_proxy", return_value=True), \
+         mock.patch("hermes_sci.cli.resolve_backend", return_value=fake_cfg) \
+             as rb:
+        rc, rm = _build_retry_cfg(ns, primary_cfg=object(), progress_cb=noop)
+
+    assert rc is fake_cfg
+    assert rm == "claude-opus-4-5"  # default when --retry-model absent
+    # Called with hybrid backend.
+    assert rb.call_args.kwargs["backend"] == "hybrid"
+
+
+def test_build_retry_cfg_hybrid_respects_explicit_retry_model():
+    ns = _parse_writeup("--retry-backend", "hybrid",
+                        "--retry-model", "claude-haiku-4-5")
+    with mock.patch("hermes_sci.cli.probe_claude_proxy", return_value=True), \
+         mock.patch("hermes_sci.cli.resolve_backend", return_value=object()):
+        _, rm = _build_retry_cfg(ns, primary_cfg=object(), progress_cb=noop)
+    assert rm == "claude-haiku-4-5"
+
+
+def test_probe_claude_proxy_false_on_unreachable():
+    """Real function — no server on this port, must return False fast."""
+    from hermes_sci.config import probe_claude_proxy
+    assert probe_claude_proxy("http://127.0.0.1:1", timeout_s=0.5) is False