Skip to content

Commit edfc37b

Browse files
committed
style(compiler): trim narrative comments from #75 series
Strip multi-paragraph docstrings down to one-line summaries, remove inline comments that re-stated what the code does, and drop all issue/PR cross-references — those belong in commit messages and PR descriptions, not in code that has to age. Kept the WHYs a cold reader actually needs: the DeepSeek/Qwen prompt requirement on _JSON_RESPONSE_FORMAT, and the .get("content") or raw note about JSON-null distinction.
1 parent fada924 commit edfc37b

1 file changed

Lines changed: 13 additions & 59 deletions

File tree

openkb/agent/compiler.py

Lines changed: 13 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,8 @@
3737
# Prompt templates
3838
# ---------------------------------------------------------------------------
3939

40-
# JSON-mode hint for calls whose prompt asks the LLM to return a JSON object.
41-
# Most providers (OpenAI, DeepSeek, Qwen, Kimi, GLM, MiniMax, Doubao) accept
42-
# this kwarg and switch into a JSON-constrained decoding mode; providers that
43-
# don't will either ignore it or raise BadRequestError (caller's choice).
44-
# DeepSeek/Qwen also require the prompt itself to mention "json", which the
45-
# templates below already satisfy.
40+
# DeepSeek/Qwen require the prompt itself to mention "json" when this kwarg
41+
# is set; the templates below already do.
4642
_JSON_RESPONSE_FORMAT = {"type": "json_object"}
4743

4844
_SYSTEM_TEMPLATE = """\
@@ -293,12 +289,10 @@ async def _llm_call_async(model: str, messages: list[dict], step_name: str, **kw
293289

294290

295291
def _warn_if_truncated(response, step_name: str, max_tokens: int | None) -> None:
296-
"""Surface ``finish_reason == 'length'`` as a visible warning.
292+
"""Emit a warning when the LLM hit the max_tokens cap.
297293
298-
When the LLM hits the ``max_tokens`` cap mid-response, ``json_repair``
299-
will often salvage the truncated prefix and parsing silently succeeds
300-
with a smaller-than-intended payload. Flagging it here lets users
301-
distinguish "LLM emitted a short plan" from "LLM was cut off".
294+
``json_repair`` will silently salvage the truncated prefix, so without
295+
this the caller can't tell a short response from a cut-off one.
302296
"""
303297
try:
304298
finish_reason = response.choices[0].finish_reason
@@ -329,15 +323,7 @@ def _parse_json(text: str) -> list | dict:
329323

330324

331325
def _filter_concept_items(items: list, label: str) -> list[dict]:
332-
"""Keep only dicts that carry a non-empty ``name``; warn about anything else.
333-
334-
The concepts-plan prompt asks for ``[{"name": ..., "title": ...}, ...]``
335-
but LLMs occasionally emit nested lists, bare strings, or dicts that
336-
forgot ``name``. JSON mode constrains syntax, not schema, so all of
337-
these still slip through ``_parse_json``. Without this guard a
338-
name-less dict crashes the ``planned_slugs`` set comprehension
339-
(``c["name"]`` → KeyError) and aborts the whole concepts step.
340-
"""
326+
"""Keep only dicts that carry a non-empty ``name``; warn about anything else."""
341327
if not isinstance(items, list):
342328
logger.warning("concepts plan: %s was %s, expected list — dropping",
343329
label, type(items).__name__)
@@ -358,30 +344,13 @@ def _filter_concept_items(items: list, label: str) -> list[dict]:
358344

359345

360346
def _require_nonempty_content(content, name: str) -> None:
361-
"""Raise if a concept body is missing or whitespace-only.
362-
363-
Under ``response_format=json_object`` the LLM can legally return
364-
``{"content": null}`` or ``{"content": ""}`` — typically a refusal
365-
or a content-policy hit. Without this guard, ``_gen_create`` /
366-
``_gen_update`` would return an empty tuple that ``pending_writes``
367-
accepts as a successful concept, then ``_write_concept`` would commit
368-
an empty Markdown page to disk and ``[OK]`` would print as if all
369-
was well. Raising here makes the failure visible through the
370-
existing ``failure_types`` collector + partial-failure ``[WARN]``.
371-
"""
347+
"""Raise if a concept body is missing or whitespace-only."""
372348
if not isinstance(content, str) or not content.strip():
373349
raise ValueError(f"LLM returned empty content for concept {name!r}")
374350

375351

376352
def _filter_related_slugs(items: list) -> list[str]:
377-
"""Keep only non-empty string slugs; warn about anything else.
378-
379-
``related`` is documented in the prompt as "array of slug strings",
380-
but the same shape drift that motivates ``_filter_concept_items``
381-
applies here. Non-strings reaching ``_sanitize_concept_name`` raise
382-
TypeError inside ``unicodedata.normalize`` and crash the whole
383-
``_compile_concepts`` call.
384-
"""
353+
"""Keep only non-empty string slugs; warn about anything else."""
385354
if not isinstance(items, list):
386355
logger.warning("concepts plan: related was %s, expected list — dropping",
387356
type(items).__name__)
@@ -1034,9 +1003,6 @@ def _write_v1_summary_stripped() -> None:
10341003
try:
10351004
parsed = _parse_json(plan_raw)
10361005
except (json.JSONDecodeError, ValueError) as exc:
1037-
# Surface the first 500 chars at WARNING so operators not running
1038-
# with DEBUG enabled can still diagnose; keep the full raw at
1039-
# DEBUG for the truncation-past-500 case (see issue #71).
10401006
preview = plan_raw[:500] + ("..." if len(plan_raw) > 500 else "")
10411007
logger.warning(
10421008
"Failed to parse concepts plan: %s. Raw output (first 500 chars): %r",
@@ -1055,8 +1021,6 @@ def _write_v1_summary_stripped() -> None:
10551021
return
10561022

10571023
# Fallback: if LLM returns a flat list, treat all items as "create".
1058-
# Validate each item is a dict — without this, a nested list like
1059-
# [[{...}]] crashes _gen_create at `concept.get("title")` (issue #71).
10601024
if isinstance(parsed, list):
10611025
plan = {"create": _filter_concept_items(parsed, "list"),
10621026
"update": [], "related": []}
@@ -1071,10 +1035,7 @@ def _write_v1_summary_stripped() -> None:
10711035
update_items = plan["update"]
10721036
related_items = plan["related"]
10731037

1074-
# Detect "plan had items but the filters dropped them all". Without
1075-
# this, the early-return below looks identical to "LLM legitimately
1076-
# had nothing to add" — the exact silent-loss-looks-like-success bug
1077-
# PR #75's [WARN] mechanism set out to fix.
1038+
# Distinguish "filters dropped everything" from "LLM emitted an empty plan".
10781039
if isinstance(parsed, list):
10791040
original_total = len(parsed)
10801041
else:
@@ -1148,11 +1109,8 @@ async def _gen_create(concept: dict) -> tuple[str, str, bool, str]:
11481109
try:
11491110
parsed = _parse_json(raw)
11501111
brief = parsed.get("brief", "")
1151-
# ``.get("content", raw)`` only uses the default when the key is
1152-
# absent — ``{"content": null}`` (legal under json_object mode
1153-
# for a refused/empty page) returns None. ``or raw`` collapses
1154-
# null/empty to the raw fallback so the validator below sees
1155-
# a consistent string-or-empty.
1112+
# ``or raw``: ``.get("content", raw)`` returns None for
1113+
# ``{"content": null}`` (legal under json_object mode).
11561114
content = parsed.get("content") or raw
11571115
except (json.JSONDecodeError, ValueError):
11581116
brief, content = "", raw
@@ -1220,12 +1178,8 @@ async def _gen_update(concept: dict) -> tuple[str, str, bool, str]:
12201178
if brief:
12211179
concept_briefs_map[safe_name] = brief
12221180

1223-
# Surface partial/total failure prominently: WARNING logs are easy to
1224-
# miss in long compile output, and the [OK] line at the end of `add`
1225-
# is unconditional. Issue #71: silent loss of all concepts looked
1226-
# like success to the user. Include exception type names inline so
1227-
# the stdout line is self-contained (the per-failure WARNING logs
1228-
# go to stderr, which a stdout-only consumer never sees).
1181+
# Include exception type names inline so the stdout line is
1182+
# self-contained — per-failure WARNINGs go to stderr.
12291183
written = len(pending_writes)
12301184
if written < total:
12311185
reason = (

0 commit comments

Comments
 (0)