From 12d0e4fafd191209721b52b38ee6f836c68e246a Mon Sep 17 00:00:00 2001 From: ssweber <57631333+ssweber@users.noreply.github.com> Date: Wed, 8 Apr 2026 07:49:06 -0400 Subject: [PATCH 1/3] refactor(csv): consolidate tall-instruction padding hydrate/dehydrate Writer dehydrate: replace _conditions_are_blank with _is_padding_row that also recognizes |-only rows as strippable padding. Delete unused _is_blank_row helper. Converter hydrate: add _hydrate_wire_continuations pass that propagates T/| tokens downward into blank padding cells after block finalization. Generalize auto-pad to handle tall AF instructions at any row position, not just row 0. Co-Authored-By: Claude Opus 4.6 --- src/laddercodec/csv/converter.py | 44 ++++++++++++++++++++++++++------ src/laddercodec/csv/writer.py | 27 ++++++++++---------- 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/src/laddercodec/csv/converter.py b/src/laddercodec/csv/converter.py index b49a8bc..cb4162d 100644 --- a/src/laddercodec/csv/converter.py +++ b/src/laddercodec/csv/converter.py @@ -34,6 +34,10 @@ search/copy = 2, send/receive = 3, etc.). If the user CSV omits blank continuation rows, they are auto-appended so ``encode_rung()`` receives the correct ``logical_rows``. + +After padding, a wire-continuation hydration pass propagates ``T`` and +``|`` tokens downward into blank cells, restoring vertical wire continuity +that was stripped by the writer's dehydration pass. """ from __future__ import annotations @@ -121,6 +125,27 @@ def _af_visual_rows(token: AfToken) -> int: return 1 +def _hydrate_wire_continuations(condition_rows: list[list[ConditionToken]]) -> None: + """Propagate ``T`` / ``|`` tokens downward into blank cells (in-place). + + After block finalization and auto-padding, padding rows are all-blank. + This pass restores vertical wire continuity: wherever row *R* has a + ``T`` (junction-down) or ``|`` (vertical pass-through), row *R+1* + gets a ``|`` if the cell is currently blank. + + Iterates top-to-bottom so multi-row vertical runs propagate correctly. + Stops one row short of the last row — vertical tokens on the last row + are invalid (they would point to a nonexistent row below). + """ + # range(len - 2): iterate rows 0..N-3, writing to rows 1..N-2. + # Row N-1 (last) is never written to. + for row_idx in range(len(condition_rows) - 2): + for col_idx in range(len(condition_rows[row_idx])): + token = condition_rows[row_idx][col_idx] + if token in ("T", "|") and condition_rows[row_idx + 1][col_idx] == "": + condition_rows[row_idx + 1][col_idx] = cast(ConditionToken, "|") + + def _parse_rung_row(row: RowAst, *, strict: bool) -> _ParsedRow: """Convert one parsed CSV row into tokens plus AF-row classification.""" conds = [condition_node_to_token(n) for n in row.condition_nodes] @@ -612,14 +637,17 @@ def convert_rung( _flush_plain_rows(plain_rows, condition_rows, af_tokens) - # --- Auto-pad for tall instructions --- - af0 = af_tokens[0] if af_tokens else "" - if isinstance(af0, AfInstruction): - spec = get_af_family_for_token(af0) - min_rows = spec.min_csv_rows if spec is not None else 1 - while len(condition_rows) < min_rows: - condition_rows.append(cast(list[ConditionToken], [""] * CONDITION_COLUMNS)) - af_tokens.append("") + # --- Auto-pad for tall instructions (any row, not just row 0) --- + for i in range(len(af_tokens)): + af_i = af_tokens[i] + if isinstance(af_i, AfInstruction): + needed = i + _af_visual_rows(af_i) + while len(condition_rows) < needed: + condition_rows.append(_blank_condition_row()) + af_tokens.append("") + + # --- Hydrate wire continuations --- + _hydrate_wire_continuations(condition_rows) logical_rows = len(condition_rows) return logical_rows, condition_rows, af_tokens, comment diff --git a/src/laddercodec/csv/writer.py b/src/laddercodec/csv/writer.py index d627e6b..80b9ce2 100644 --- a/src/laddercodec/csv/writer.py +++ b/src/laddercodec/csv/writer.py @@ -20,8 +20,10 @@ Tall instruction padding ------------------------ -Non-retained timers have a trailing blank row for visual height. This row -is stripped before writing — the forward path's auto-padding restores it. +Tall AF instructions have trailing padding rows for visual height. Padding +rows — those containing only blank cells and vertical pass-through wires +(``|``) — are stripped before writing. The forward path's auto-padding and +wire-continuation hydration restores them. """ from __future__ import annotations @@ -79,16 +81,15 @@ def _token_to_csv(token: object) -> str: # --------------------------------------------------------------------------- -def _is_blank_row(conditions: Sequence[object], af: object) -> bool: - """Return True if all conditions are blank/empty and AF is blank.""" - if af != "": - return False - return all(c == "" for c in conditions) +def _is_padding_row(conditions: Sequence[object]) -> bool: + """Return True when a row is pure tall-instruction padding. - -def _conditions_are_blank(conditions: Sequence[object]) -> bool: - """Return True when a row has no condition-side content at all.""" - return all(c == "" for c in conditions) + Padding rows contain only blank cells and vertical pass-through + wires (``|``). The ``|`` tokens are reconstructable from ``T`` + tokens in the row above, so they can be stripped on write and + restored on read. + """ + return all(c in ("", "|") for c in conditions) def _append_data_row( @@ -109,7 +110,7 @@ def _emit_blank_continuation( conditions: Sequence[object], ) -> int: """Emit a blank-AF continuation row only when it carries geometry.""" - if _conditions_are_blank(conditions): + if _is_padding_row(conditions): return data_row_count return _append_data_row(rows, data_row_count, conditions, "") @@ -200,7 +201,7 @@ def _emit_counter_block( counter_conditions = condition_rows[start] bridge_conditions = condition_rows[start + 1] - if _conditions_are_blank(counter_conditions): + if all(c == "" for c in counter_conditions): data_row_count = _append_data_row(rows, data_row_count, bridge_conditions, counter) else: data_row_count = _append_data_row(rows, data_row_count, counter_conditions, "") From 994bf231d9a14dba2660e024444d7a9c5eba6236 Mon Sep 17 00:00:00 2001 From: ssweber <57631333+ssweber@users.noreply.github.com> Date: Wed, 8 Apr 2026 07:59:49 -0400 Subject: [PATCH 2/3] refactor(encode): remove AF summary block from encoder Click accepts multi-AF single-rung encodes without the summary block appended to the last AF cell. Confirmed by paste-testing both variants. Removes ~115 lines of complex gating logic that only one golden fixture exercised. Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 4 +- docs/guides/troubleshooting.md | 10 +--- docs/internals/instruction-blobs.md | 13 ----- src/laddercodec/_grid.py | 34 +----------- src/laddercodec/cell.py | 18 ++----- src/laddercodec/encode.py | 49 ------------------ src/laddercodec/encode_multi.py | 2 +- src/laddercodec/instructions/raw.py | 4 +- .../golden/instr-3row-branch.bin | Bin 12288 -> 12288 bytes .../golden/verify_progress.log | 2 +- 10 files changed, 11 insertions(+), 125 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index c548fc2..293494a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -72,7 +72,7 @@ src/laddercodec/ - **encode.py** — Encoder. Public API is `encode()` (accepts `Rung` or `list[Rung]`). Internal `encode_rung()` orchestrates: validate → compute metadata → build grid → insert comment → pad to page. Constants, RTF helpers, type aliases (`ConditionToken`, `AfToken`), and `_af_segment()` / `_compute_seg_boundaries()` live here. - **encode_multi.py** — Multi-rung encoder. Internal `encode_rungs()`. Combines N rungs into one buffer with per-rung preambles and data rows. Delegates grid building to `_grid.py`. -- **_grid.py** — Shared grid-building functions used by both encoders. `_validate_rung()` validates dimensions/tokens, `_compute_rung_metadata()` returns a `RungMetadata` dataclass (instruction indices, AF summary, segment boundaries), `_build_rung_grid()` builds the cell grid for one rung. +- **_grid.py** — Shared grid-building functions used by both encoders. `_validate_rung()` validates dimensions/tokens, `_compute_rung_metadata()` returns a `RungMetadata` dataclass (instruction indices, segment boundaries), `_build_rung_grid()` builds the cell grid for one rung. - **binary_helpers.py** — Shared binary serialization primitives. Encoding: `_utf16le_null()`, `_tagged_field()`, `_variant_tagged_field()`. Decoding: `_read_utf16le()`, `_parse_tagged_fields()`, `_parse_tagged_fields_verbose()` (returns tag IDs + handles variant sentinels). Used by all instruction modules and `devtools/inspect_bin.py`. - **decode.py** — Decoder. Public API is `decode()` (auto-detects single vs multi-rung). Returns `Rung` or `list[Rung]`. Walks the variable-length cell grid, parses instruction blobs into Contact/Coil/Timer domain objects, decodes RTF comments to markdown. Falls back to `RawInstruction` for unrecognised cell types. - **decode_program.py** — Program file decoder. Public API is `decode_program()`. Reads `Scr*.tmp` files (Click's internal format, ~17x smaller than clipboard) and returns a `Program` with name, index, and decoded rungs. Same instruction parsing as `decode.py` but different framing. @@ -128,7 +128,7 @@ Quick reference: three flag bytes per cell — segment (+0x19), right (+0x1D), d ## Instruction Blobs -Full spec: [`docs/internals/instruction-blobs.md`](docs/internals/instruction-blobs.md) — blob structure, class names, field layouts, AF summary block. +Full spec: [`docs/internals/instruction-blobs.md`](docs/internals/instruction-blobs.md) — blob structure, class names, field layouts. ## Current Encoder State diff --git a/docs/guides/troubleshooting.md b/docs/guides/troubleshooting.md index 19e60f9..48689e2 100644 --- a/docs/guides/troubleshooting.md +++ b/docs/guides/troubleshooting.md @@ -77,7 +77,7 @@ The cell header is 0x25 (37) bytes. Key fields: After the header: instruction blob (variable length), then a 16-byte tail. -**Size differences** are the most important signal. If a cell is 32 bytes larger in your output, you're probably emitting an AF summary block that shouldn't be there — or vice versa. +**Size differences** are the most important signal. If a cell has unexpected extra bytes, the blob length formula is probably wrong for that instruction type. **Flag differences** (segment, wire_right, wire_down) can cause visual corruption but usually don't crash Click. @@ -95,14 +95,6 @@ Raw `xxd` / hex diffs of the two binaries are nearly useless because: ## Known pitfalls -### AF summary block - -When a single-rung buffer has 2+ AF instructions, the encoder appends a 32-byte summary block to the last AF cell. **This summary must be suppressed when any AF is multi-row** (e.g. a retained timer with a `.reset()` pin row). - -The summary is built by `_build_af_summary()` in `encode.py` and gated by `_compute_rung_metadata()` in `_grid.py`. If your rung has a mix of single-row AFs (coils, copies) and multi-row AFs (timers), check that the summary isn't being emitted. - -Symptom: the last AF instruction cell is ~32 bytes larger than in the native capture. Click crashes on paste because the extra bytes misalign the rest of the grid. - ### Tall instruction visual_rows The byte at cell offset +0x0A (`visual_rows`) is not always the same as the instruction's `cell_params()["visual_rows"]`. For instructions with pin rows (retained timers, counters, shifts, drums), the native binary may use a different value that accounts for the pin rows. diff --git a/docs/internals/instruction-blobs.md b/docs/internals/instruction-blobs.md index 4f905ad..86cb91b 100644 --- a/docs/internals/instruction-blobs.md +++ b/docs/internals/instruction-blobs.md @@ -51,19 +51,6 @@ stores concrete addresses. The `encode()` API exposes this via `show_nicknames=True`, which sets the flag on all math instructions in the buffer. -## AF summary block - -In single-rung buffers, when a rung has 2+ AF instruction cells, the **last** AF instruction cell gets an extra block appended between the blob and tail: - -1. 12 zero bytes (header padding) -2. `uint32 LE` total instruction count -3. `af_count * 8`-byte entries in a diagonal pattern: - - `entry[af_idx] = left_value` (total_instr_count - instr_index for non-last; instrs_on_row for last) - - `entry[af_idx + af_count] = 1` if row has a condition contact -4. Modified 16-byte tail: `tail[3]=1, tail[12]=1, tail[15]=1` - -This block replaces the instruction count that would normally go on an AF data cell (`tail[12] = total_instr_count`) when no AF data cell exists (all rows have AF instructions). - ## Blob boundary detection For unknown instruction types, the blob boundary can be detected using the generic multi-part formula: diff --git a/src/laddercodec/_grid.py b/src/laddercodec/_grid.py index 456f41e..3a277bf 100644 --- a/src/laddercodec/_grid.py +++ b/src/laddercodec/_grid.py @@ -21,7 +21,6 @@ AfToken, ConditionToken, _af_segment, - _build_af_summary, _compute_seg_boundaries, _normalize_af, ) @@ -98,7 +97,6 @@ class RungMetadata: total_instr_count: int cond_instr_indices: dict[tuple[int, int], int] af_instr_indices: dict[int, int] - af_summary_block: bytes seg_boundaries: list[int] | None @@ -106,13 +104,8 @@ def _compute_rung_metadata( logical_rows: int, condition_rows: Sequence[Sequence[ConditionToken]], af_tokens: Sequence[AfToken], - *, - single_rung: bool = True, ) -> RungMetadata: - """Compute instruction indices, summary block, and segment boundaries. - - The AF summary block is only computed for single-rung buffers. - """ + """Compute instruction indices and segment boundaries.""" rung_has_instructions = any( isinstance(t, ConditionInstruction) for row in condition_rows for t in row ) or any(isinstance(af, AfInstruction) for af in af_tokens) @@ -136,26 +129,6 @@ def _compute_rung_metadata( af_instr_indices[row_idx] = idx idx += 1 - # AF summary block — needed on the last AF instruction cell when 2+ AFs - # are all single-row (single-rung only; multi-rung does not use - # af_summary). Suppressed when any AF is multi-row (e.g. retained - # timer with reset pin) — native captures omit it in that case. - af_summary_block = b"" - af_rows = sorted(af_instr_indices.keys()) - any_multi_row_af = any( - tok.cell_params().get("visual_rows", 1) > 1 - for r in af_rows - if isinstance((tok := af_tokens[r]), AfInstruction) - ) - if single_rung and len(af_rows) >= 2 and not any_multi_row_af: - af_entries: list[tuple[int, int, bool]] = [] - for r in af_rows: - cond_count = sum(1 for t in condition_rows[r] if isinstance(t, ConditionInstruction)) - instrs_on_row = cond_count + 1 # +1 for the AF instruction itself - row_has_contact = cond_count > 0 - af_entries.append((af_instr_indices[r], instrs_on_row, row_has_contact)) - af_summary_block = _build_af_summary(total_instr_count, len(af_rows), af_entries) - seg_boundaries = _compute_seg_boundaries(condition_rows) if rung_has_instructions else None return RungMetadata( @@ -163,7 +136,6 @@ def _compute_rung_metadata( total_instr_count=total_instr_count, cond_instr_indices=cond_instr_indices, af_instr_indices=af_instr_indices, - af_summary_block=af_summary_block, seg_boundaries=seg_boundaries, ) @@ -190,7 +162,6 @@ def _build_rung_grid( Returns the concatenated grid rows as a bytearray. """ grid = bytearray() - af_rows = sorted(meta.af_instr_indices.keys()) for local_row in range(logical_rows): g = global_row_start + local_row @@ -257,8 +228,6 @@ def _build_rung_grid( ).to_bytes() ) else: # AF column (col 31) - is_last_af = af_rows and local_row == af_rows[-1] - summary = meta.af_summary_block if is_last_af else b"" if isinstance(af, AfInstruction): from .instructions.math import Math @@ -286,7 +255,6 @@ def _build_rung_grid( blob=blob, row_span=params.get("visual_rows", 1), visual_rows=params.get("visual_rows", 1), - af_summary=summary, ).to_bytes() ) else: diff --git a/src/laddercodec/cell.py b/src/laddercodec/cell.py index e9f80ac..d0bbebf 100644 --- a/src/laddercodec/cell.py +++ b/src/laddercodec/cell.py @@ -67,11 +67,6 @@ class ClickCell: row_span: int = 1 visual_rows: int = 1 - # AF summary block — appended between blob and tail on the LAST AF - # instruction cell when the rung has 2+ AF instructions. Pre-built - # by the encoder; this module just splices it in. - af_summary: bytes = b"" - def build_header(self) -> bytearray: """Build the 0x25 (37-byte) structured header.""" header = bytearray(_INSTR_DATA_OFFSET) @@ -122,14 +117,7 @@ def build_tail(self) -> bytearray: """Build the 16-byte tail with dynamic rung index.""" tail = bytearray(16) - if self.af_summary: - # Modified tail for the last AF instruction cell when a summary - # block is present (single-rung only; multi-rung does not use - # af_summary). Confirmed via native capture (instr-3row-branch). - tail[3] = 0x01 - tail[12] = 0x01 - tail[15] = 0x01 - elif self.blob: + if self.blob: # Instruction cell tail — always has marker, dynamic rung_idx. # Condition-side (col < 31): tail[13] = local_row + 1. # AF-side (col 31): single-rung and multi-rung use different @@ -195,8 +183,8 @@ def to_bytes(self) -> bytes: padding = bytes(CELL_SIZE - _INSTR_DATA_OFFSET - 16) return bytes(header + padding + tail) - # Instruction cell: header(37) + blob [+ af_summary] + tail(16). - return bytes(header + self.blob + self.af_summary + tail) + # Instruction cell: header(37) + blob + tail(16). + return bytes(header + self.blob + tail) # --------------------------------------------------------------------------- diff --git a/src/laddercodec/encode.py b/src/laddercodec/encode.py index ec6aa0d..8178467 100644 --- a/src/laddercodec/encode.py +++ b/src/laddercodec/encode.py @@ -247,55 +247,6 @@ def _normalize_af(token: AfToken) -> str: raise ValueError(f"Unsupported AF token: {token!r}") -def _build_af_summary( - total_instr_count: int, - af_count: int, - af_entries: list[tuple[int, int, bool]], -) -> bytes: - """Build the AF instruction summary block for the last AF cell. - - Appended between the blob and tail on the last AF instruction cell - when the rung has 2+ AF instructions. Confirmed via native capture - (instr-3row-branch). - - Parameters - ---------- - total_instr_count: - Total number of instructions (conditions + AFs) in the rung. - af_count: - Number of AF instruction cells in the rung. - af_entries: - One tuple per AF instruction, ordered by row: - ``(instr_index, instrs_on_row, row_has_contact)``. - ``instrs_on_row`` = count of all instructions on this AF's row. - ``row_has_contact`` = True if a Contact/CompareContact is on this row. - - Structure (40 bytes for 3 AFs):: - - 12 zero bytes - uint32 LE total_instr_count - af_count × 8-byte entries (diagonal pattern): - entry[af_idx] = left_value - entry[af_idx + af_count] = right_value (1 if row has contact) - where left_value: - non-last AF: total_instr_count - instr_index - last AF: instrs_on_row - """ - block = bytearray(12) # zero header - block += total_instr_count.to_bytes(4, "little") - - for af_idx, (instr_index, instrs_on_row, row_has_contact) in enumerate(af_entries): - entry = bytearray(8) - is_last_af = af_idx == af_count - 1 - left = instrs_on_row if is_last_af else total_instr_count - instr_index - entry[af_idx] = left & 0xFF - if row_has_contact: - entry[af_idx + af_count] = 1 - block += entry - - return bytes(block) - - def _compute_seg_boundaries( condition_rows: Sequence[Sequence[ConditionToken]], ) -> list[int]: diff --git a/src/laddercodec/encode_multi.py b/src/laddercodec/encode_multi.py index 658ca5a..88f31e7 100644 --- a/src/laddercodec/encode_multi.py +++ b/src/laddercodec/encode_multi.py @@ -177,7 +177,7 @@ def encode_rungs( for r_idx, (logical_rows, condition_rows, af_tokens) in enumerate(rungs): is_last = r_idx == N - 1 - meta = _compute_rung_metadata(logical_rows, condition_rows, af_tokens, single_rung=False) + meta = _compute_rung_metadata(logical_rows, condition_rows, af_tokens) grid += _build_rung_grid( logical_rows, condition_rows, diff --git a/src/laddercodec/instructions/raw.py b/src/laddercodec/instructions/raw.py index 9850da2..777ed94 100644 --- a/src/laddercodec/instructions/raw.py +++ b/src/laddercodec/instructions/raw.py @@ -66,7 +66,7 @@ def find_blob_boundary(raw: bytes) -> tuple[str, int, int]: """Find the end of the instruction blob in *raw* bytes. *raw* is everything from cell offset +0x25 to the next cell boundary - (i.e. blob + optional af_summary + 16-byte tail). + (i.e. blob + 16-byte tail). Returns ``(class_name, blob_end_offset, part_count)``. The clean blob is ``raw[:blob_end_offset]``. @@ -336,7 +336,7 @@ class RawInstruction(AfInstruction): the blob for CSV readability; also present inside *blob*. blob: Full instruction blob bytes (from cell offset +0x25 to the end - of tagged fields, excluding tail and af_summary). + of tagged fields, excluding tail). part_count: Number of parts (1 = single-row, >1 = multi-row). Derived from the blob during construction. diff --git a/tests/fixtures/ladder_captures/golden/instr-3row-branch.bin b/tests/fixtures/ladder_captures/golden/instr-3row-branch.bin index 57e74c323899bb68161d1b87661cf3df752111f3..882742963a7cb2b97f15577281f998e17d63b8d6 100644 GIT binary patch delta 22 ccmZojXh_&_SecEH0SH(&GphXOo>*W308WkueE Date: Wed, 8 Apr 2026 08:37:15 -0400 Subject: [PATCH 3/3] feat(devtools): add combine_coverage tool and simplify coverage-golden Add combine_coverage.py to stack random coverage CSVs into a single multi-row rung for quick paste smoke-testing. Simplify coverage_golden.py to regenerate all bins unconditionally instead of gating on verify log. Co-Authored-By: Claude Opus 4.6 --- devtools/combine_coverage.py | 88 ++++++++++++ devtools/coverage_golden.py | 126 ++++++++++-------- docs/guides/adding-instructions.md | 25 ++-- .../coverage/golden/verify_progress.log | 2 +- 4 files changed, 180 insertions(+), 61 deletions(-) create mode 100644 devtools/combine_coverage.py diff --git a/devtools/combine_coverage.py b/devtools/combine_coverage.py new file mode 100644 index 0000000..0107243 --- /dev/null +++ b/devtools/combine_coverage.py @@ -0,0 +1,88 @@ +"""Combine random coverage golden CSVs into a single multi-row rung. + +Picks N coverage fixtures at random, stacks their data rows into one +rung, encodes to .bin, and writes both to devtools/. The first data +row gets the ``R`` marker; all others get an empty marker. Comment +rows are discarded. + +Usage:: + + uv run devtools/combine_coverage.py # 4 random fixtures + uv run devtools/combine_coverage.py -n 3 # 3 random fixtures + uv run devtools/combine_coverage.py -n 6 # 6 random fixtures +""" + +import argparse +import csv +import random +import sys +from io import StringIO +from pathlib import Path + +from laddercodec import encode, read_csv +from laddercodec.csv.contract import CSV_HEADER + +COVERAGE_DIR = Path("tests/fixtures/coverage/golden") +OUT_CSV = Path("devtools/combined.csv") +OUT_BIN = Path("devtools/combined.bin") + + +def collect_data_rows(csv_path: Path) -> list[list[str]]: + """Read a coverage CSV and return data rows (no header, no comments).""" + rows = [] + with csv_path.open(newline="") as f: + reader = csv.reader(f) + next(reader) # skip header + for row in reader: + if not row or row[0].strip() == "#": + continue + rows.append(row) + return rows + + +def main() -> None: + parser = argparse.ArgumentParser(description="Combine coverage CSVs") + parser.add_argument("-n", type=int, default=4, help="number of fixtures (default: 4)") + parser.add_argument("--seed", type=int, default=None, help="random seed") + args = parser.parse_args() + + csvs = sorted(COVERAGE_DIR.glob("*.csv")) + if not csvs: + print("No coverage CSVs found", file=sys.stderr) + sys.exit(1) + + rng = random.Random(args.seed) + picked = rng.sample(csvs, min(args.n, len(csvs))) + + print(f"Combining {len(picked)} fixtures:") + all_rows: list[list[str]] = [] + for p in picked: + print(f" {p.stem}") + all_rows.extend(collect_data_rows(p)) + + if not all_rows: + print("No data rows found", file=sys.stderr) + sys.exit(1) + + # First row gets R marker, rest get empty + all_rows[0][0] = "R" + for row in all_rows[1:]: + row[0] = "" + + # Write combined CSV + buf = StringIO() + writer = csv.writer(buf) + writer.writerow(CSV_HEADER) + writer.writerows(all_rows) + OUT_CSV.write_text(buf.getvalue()) + print(f"\nWrote {OUT_CSV} ({len(all_rows)} data rows)") + + # Encode to .bin + rung = read_csv(OUT_CSV) + data = encode(rung) + OUT_BIN.write_bytes(data) + print(f"Wrote {OUT_BIN} ({len(data)} bytes)") + + +if __name__ == "__main__": + main() diff --git a/devtools/coverage_golden.py b/devtools/coverage_golden.py index ed7b6b0..3e81fd8 100644 --- a/devtools/coverage_golden.py +++ b/devtools/coverage_golden.py @@ -1,10 +1,7 @@ -"""Manage coverage CSV/BIN fixtures based on verify_progress.log. +"""Manage coverage golden CSV/BIN fixtures. -Behavior: - - Parse tests/fixtures/coverage/golden/verify_progress.log - - Select only fixture stems marked as ``worked`` - - Generate ``.bin`` files for those stems from their ``.csv`` - - Remove any existing ``.bin`` whose stem is not marked ``worked`` +Regenerates all .bin files from .csv sources and prunes the verify log +for changed/deleted fixtures. Usage: uv run devtools/coverage_golden.py @@ -12,7 +9,7 @@ from __future__ import annotations -import re +import subprocess import sys from pathlib import Path @@ -25,20 +22,6 @@ GOLDEN_DIR = ROOT / "tests" / "fixtures" / "coverage" / "golden" VERIFY_LOG = GOLDEN_DIR / "verify_progress.log" -_WORKED_RE = re.compile(r"^([A-Za-z0-9_]+):\s*worked\s*$") - - -def _worked_stems_from_log(path: Path) -> list[str]: - if not path.exists(): - raise FileNotFoundError(f"Missing verify log: {path}") - - stems: list[str] = [] - for line in path.read_text(encoding="utf-8").splitlines(): - m = _WORKED_RE.match(line.strip()) - if m: - stems.append(m.group(1)) - return sorted(set(stems)) - def _encode_csv(csv_path: Path) -> bytes: program = parse_csv_file(csv_path) @@ -67,51 +50,90 @@ def _encode_csv(csv_path: Path) -> bytes: return encode_rungs(rung_inputs, comments=comments) -def sync() -> int: - worked_stems = _worked_stems_from_log(VERIFY_LOG) - if not worked_stems: - print(f"No 'worked' fixtures found in {VERIFY_LOG}") - return 1 - - print(f"Generating coverage bins for {len(worked_stems)} worked fixture(s):") - - missing_csv: list[str] = [] - generated = 0 - for stem in worked_stems: - csv_path = GOLDEN_DIR / f"{stem}.csv" - if not csv_path.exists(): - missing_csv.append(stem) - continue +def generate() -> None: + csv_files = sorted(GOLDEN_DIR.glob("*.csv")) + if not csv_files: + print(f"No CSV files found in {GOLDEN_DIR}") + sys.exit(1) + print(f"Generating coverage bins from {len(csv_files)} CSV files:") + for csv_path in csv_files: encoded = _encode_csv(csv_path) - bin_path = GOLDEN_DIR / f"{stem}.bin" + bin_path = csv_path.with_suffix(".bin") bin_path.write_bytes(encoded) - generated += 1 print(f" {csv_path.name} -> {bin_path.name} ({len(encoded):,} bytes)") - if missing_csv: - print("\nMissing CSV files for worked fixtures:") - for stem in missing_csv: - print(f" {stem}.csv") - return 1 - - worked_set = set(worked_stems) - stale_bins = sorted(p for p in GOLDEN_DIR.glob("*.bin") if p.stem not in worked_set) - if stale_bins: - print(f"\nRemoving {len(stale_bins)} non-worked .bin fixture(s):") - for p in stale_bins: + print(f"\nDone. {len(csv_files)} fixtures generated.") + + +def prune() -> None: + csv_stems = {p.stem for p in GOLDEN_DIR.glob("*.csv")} + + # --- Delete orphaned .bin (no matching .csv) --- + orphaned = sorted(p for p in GOLDEN_DIR.glob("*.bin") if p.stem not in csv_stems) + if orphaned: + print(f"Deleting {len(orphaned)} orphaned .bin files:") + for p in orphaned: print(f" {p.name}") p.unlink() - print(f"\nDone. Generated {generated} coverage bin fixture(s).") - return 0 + # --- Prune verify_progress.log --- + if not VERIFY_LOG.exists(): + print("No verify_progress.log found — nothing to prune.") + return + + # Find .bin files with uncommitted changes + result = subprocess.run( + ["git", "diff", "--name-only", "--", "tests/fixtures/coverage/golden/*.bin"], + capture_output=True, + text=True, + cwd=str(ROOT), + ) + changed_stems = { + Path(line.strip()).stem for line in result.stdout.strip().split("\n") if line.strip() + } + + lines = VERIFY_LOG.read_text().splitlines(keepends=True) + kept: list[str] = [] + removed: list[str] = [] + + for line in lines: + if line.startswith("#") or not line.strip(): + continue + name = line.split(":")[0].strip() + if name in changed_stems or name not in csv_stems: + removed.append(name) + else: + kept.append(line) + + with open(VERIFY_LOG, "w", encoding="utf-8") as f: + f.write("# verify_progress.log — tracks Click paste verification status\n") + for line in kept: + f.write(line if line.endswith("\n") else line + "\n") + + verified = sum(1 for ln in kept if ln.strip()) + total = len(csv_stems) + unverified = total - verified + + if removed: + print(f"Pruned {len(removed)} entries from verify_progress.log:") + for name in sorted(removed): + print(f" {name}") + + print(f"\nVerification status: {verified}/{total} verified, {unverified} unverified") + if unverified: + verified_names = {ln.split(":")[0].strip() for ln in kept if ln.strip()} + for name in sorted(csv_stems - verified_names): + print(f" UNVERIFIED: {name}") def main() -> None: if len(sys.argv) > 1: print("Usage: uv run devtools/coverage_golden.py") raise SystemExit(2) - raise SystemExit(sync()) + generate() + print() + prune() if __name__ == "__main__": diff --git a/docs/guides/adding-instructions.md b/docs/guides/adding-instructions.md index 07c4b8c..4a1eb31 100644 --- a/docs/guides/adding-instructions.md +++ b/docs/guides/adding-instructions.md @@ -158,25 +158,34 @@ make test && make lint ## Coverage testing -Coverage golden fixtures live in `tests/fixtures/coverage/golden/`. Each fixture is a hand-written CSV (`.csv`) paired with a Click-captured binary (`.bin`). +Coverage golden fixtures live in `tests/fixtures/coverage/golden/`. Each fixture is a hand-written CSV (`.csv`) paired with a generated binary (`.bin`). ### Adding a fixture 1. Create `tests/fixtures/coverage/golden/.csv` by hand — one rung per file, 33-column canonical format. -2. Capture the golden binary via Click paste round-trip using `clicknick-rung guided`. -3. `make test` — each `.csv` with a matching `.bin` gets a parametrized test comparing encoded CSV against captured bytes. +2. Run `make coverage-golden` to generate the `.bin` from the CSV. +3. Paste-verify in Click using `clicknick-rung guided`. +4. `make test` — each `.csv` with a matching `.bin` gets a parametrized test comparing encoded output to golden binaries. -### Regenerating coverage bins from verified fixtures - -When you want to refresh coverage bins from the current encoder, use: +### Regenerating coverage bins ```bash make coverage-golden ``` -This command reads `tests/fixtures/coverage/golden/verify_progress.log`, selects only fixture IDs marked `: worked`, regenerates those `.bin` files, and removes non-worked coverage `.bin` files. +Regenerates all `.bin` files from `.csv` sources and prunes the verify log for changed/deleted fixtures. Same workflow as `make golden` for ladder fixtures. + +### Smoke-testing combined fixtures + +To quickly test multiple instruction types together in a single rung: + +```bash +uv run devtools/combine_coverage.py # 4 random fixtures +uv run devtools/combine_coverage.py -n 6 # 6 random fixtures +uv run devtools/combine_coverage.py --seed 42 # reproducible pick +``` -Use this for bulk refresh/sync. For native Click fidelity checks, keep using capture round-trips for the specific fixture. +This picks N random coverage CSVs, stacks their data rows into one multi-row rung, and writes `devtools/combined.csv` + `devtools/combined.bin` for paste-testing. ## Clicknick compatibility diff --git a/tests/fixtures/coverage/golden/verify_progress.log b/tests/fixtures/coverage/golden/verify_progress.log index 799685d..c3299b5 100644 --- a/tests/fixtures/coverage/golden/verify_progress.log +++ b/tests/fixtures/coverage/golden/verify_progress.log @@ -1,4 +1,4 @@ -# Verify 2026-03-20T19:38:08.081138+00:00 +# verify_progress.log — tracks Click paste verification status cond__and: worked cond__eq: worked cond__fall: worked