Skip to content
Merged
135 changes: 131 additions & 4 deletions .claude/hooks/workflow-context-injector.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
# Keep in sync with map_step_runner.py GOAL_HEADING_RE
GOAL_HEADING_RE = r"## (?:Goal|Overview)\n(.*?)(?=\n##|\Z)"
REMINDER_LIMIT = 700
PERSONAL_BLOCK_BUDGET_TOTAL = 10000
PERSONAL_RULES_SEPARATOR = "\n\n"

# Bash commands that don't need workflow reminders
READONLY_COMMANDS = {
Expand Down Expand Up @@ -601,6 +603,120 @@ def format_reminder(
return base


def _sanitize_fence_content(text: str) -> str:
"""Remove fence tag occurrences from user-supplied content.

Strips case-insensitive literal ``<personal-rules`` and
``</personal-rules>`` so that a malicious or accidental occurrence
inside a rules file cannot close the outer fence early (INV-6/E7).

Postcondition: neither ``<personal-rules`` nor ``</personal-rules>``
appears in the returned string (case-insensitive).
"""
text = re.sub(r"(?i)</personal-rules>", "", text)
text = re.sub(r"(?i)<personal-rules", "", text)
return text


def _load_personal_rules(project_dir: Path) -> tuple[int, str]:
"""Load personal learned rules from ``.map/personal/rules/learned/``.

Reads every ``*.md`` file under the directory in sorted order,
sanitises each file's content through ``_sanitize_fence_content``,
and returns a tuple of ``(count, joined_content)``.

Returns ``(0, "")`` when the directory does not exist or contains
no readable ``.md`` files.

Invariants:
- INV-1: read-only; never writes anything, never opens credential files.
- HC-1: reads only ``*.md`` under the ``learned`` subdirectory.
- Symlink-escape guard: any resolved path that escapes the base
directory is silently skipped.
"""
base = project_dir / ".map" / "personal" / "rules" / "learned"
if not base.is_dir():
return (0, "")

base_resolved = base.resolve()
sanitized_parts: list[str] = []

for md_file in sorted(base.glob("*.md")):
try:
resolved = md_file.resolve()
if not resolved.is_relative_to(base_resolved):
continue
except OSError:
continue

try:
content = md_file.read_text(encoding="utf-8")
except (OSError, UnicodeDecodeError):
continue

sanitized_parts.append(_sanitize_fence_content(content))

count = len(sanitized_parts)
return (count, "\n".join(sanitized_parts))


def _build_personal_block(count: int, content: str, limit: int) -> str:
"""Assemble the ``<personal-rules>`` XML block for context injection.

Returns ``""`` when *count* is zero or negative (HC-3).

Otherwise assembles::

<personal-rules>
[personal-rules: N files]
<content>
</personal-rules>

If the assembled string exceeds *limit*, the content is trimmed from
the END and a ``[... trimmed]`` marker is inserted on its own line
before the closing tag. The opening line, banner, and closing tag
are ALWAYS present (INV-4), even when content must be trimmed to
empty.

Raw bullet markdown in *content* is concatenated unchanged (SC-2).
"""
if count <= 0:
return ""

opening = "<personal-rules>"
banner = f"[personal-rules: {count} files]"
closing = "</personal-rules>"

assembled = opening + "\n" + banner + "\n" + content + "\n" + closing

if len(assembled) <= limit:
return assembled

# Compute fixed overhead for the trimmed variant:
# opening\n banner\n trimmed_content\n [... trimmed]\n closing
trim_marker = "[... trimmed]"
overhead = (
len(opening) + 1 # opening + \n
+ len(banner) + 1 # banner + \n
+ 1 # \n before trim_marker
+ len(trim_marker) + 1 # trim_marker + \n
+ len(closing) # closing (no trailing \n)
)
content_budget = max(0, limit - overhead)
trimmed_content = content[:content_budget]
result = (
opening + "\n"
+ banner + "\n"
+ trimmed_content + "\n"
+ trim_marker + "\n"
+ closing
)

# Degenerate guard: if even the skeleton exceeds limit, emit it anyway
# (correctness of the fence beats the cap in this edge case).
return result


def main() -> None:
if os.environ.get("MAP_INVOKED_BY"):
sys.exit(0)
Expand Down Expand Up @@ -684,23 +800,34 @@ def main() -> None:
suppress_required = True
reminder = format_reminder(state, branch, suppress_required=suppress_required)
if reminder:
project_dir = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd()))
personal_count, personal_content = _load_personal_rules(project_dir)
personal_limit = max(
0,
PERSONAL_BLOCK_BUDGET_TOTAL - len(reminder) - len(PERSONAL_RULES_SEPARATOR),
)
personal_block = _build_personal_block(personal_count, personal_content, personal_limit)
assembled = (
reminder if not personal_block else reminder + PERSONAL_RULES_SEPARATOR + personal_block
)
assert len(assembled) <= PERSONAL_BLOCK_BUDGET_TOTAL
# Per-turn dedup: same reminder + same state_mtime within 5s = same
# turn; squelch to avoid the [MAP] banner repeating across every
# Edit/Write/Bash invocation in a single agent burst.
if _should_squelch_duplicate(branch, reminder):
if _should_squelch_duplicate(branch, assembled):
record_hook_injection_status(
branch, state, "deduped", "duplicate reminder squelched", tool_name
)
print("{}")
sys.exit(0)
_write_dedup_cache(branch, reminder)
_write_dedup_cache(branch, assembled)
record_hook_injection_status(
branch, state, "injected", "reminder emitted", tool_name, len(reminder)
branch, state, "injected", "reminder emitted", tool_name, len(assembled)
)
output = {
"hookSpecificOutput": {
"hookEventName": "PreToolUse",
"additionalContext": reminder,
"additionalContext": assembled,
}
}
print(json.dumps(output))
Expand Down
21 changes: 11 additions & 10 deletions .claude/hooks/workflow-gate.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,15 @@
# JSON edit, third-party tool) as a security regression on this gate.
TERMINAL_PHASES = {"COMPLETE"} # Workflow closed — gate is permissive.

# MONITOR hot-fix opt-in: when MAP_MONITOR_HOTFIX=1 in env, Edits are
# allowed during MONITOR so the operator can land a 2-line nit without
# spinning a full monitor_failed → ACTOR retry cycle. Opt-in (not default)
# because the unconditional behaviour would silently widen the gate; the
# operator must set the env variable per-session to acknowledge they're
# making a hot-fix and re-running validate_step("2.4") themselves.
# MONITOR hot-fix: Edits during MONITOR are allowed BY DEFAULT. Actor
# routinely needs to append a test or land a small nit while the Monitor
# verdict is being captured, and blocking that forced operators through an
# escape hatch (the former MAP_MONITOR_HOTFIX=1 opt-in). The default is now
# permissive; set MAP_MONITOR_HOTFIX=0 to restore strict read-only MONITOR.
# The operator remains responsible for re-running validate_step("2.4") after
# any MONITOR-phase edit.
HOTFIX_PHASES: set[str] = (
{"MONITOR"} if os.environ.get("MAP_MONITOR_HOTFIX") == "1" else set()
set() if os.environ.get("MAP_MONITOR_HOTFIX") == "0" else {"MONITOR"}
)
ALLOWED_PHASES = EDITING_PHASES | TERMINAL_PHASES | HOTFIX_PHASES

Expand Down Expand Up @@ -284,9 +285,9 @@ def is_editing_phase(branch: str) -> tuple[bool, Optional[str]]:
" - Or call monitor_failed if Actor needs revisions, returning\n"
" to ACTOR phase legitimately.\n"
"\n"
"Hot-fix escape: MAP_MONITOR_HOTFIX=1 env opt-in re-opens Edit\n"
"during MONITOR for trivial one-line nits (operator acknowledges\n"
"they will re-run validate_step 2.4 themselves)."
"Note: MONITOR-phase Edits are allowed by default; set\n"
"MAP_MONITOR_HOTFIX=0 to make MONITOR strictly read-only\n"
"(operator then re-runs validate_step 2.4 themselves)."
)
return False, (
f"Workflow gate: Edit blocked during phase '{current_phase}' "
Expand Down
36 changes: 36 additions & 0 deletions .claude/skills/map-learn/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,42 @@ After writing, count bullets in each modified file. If any file exceeds 50 bulle
⚠ {filename} has {N} rules (recommended max: 50). Consider pruning old or low-value rules.
```

### Personal vs public write-time choice

When writing a NEW rule, choose the target layer at write time:

| Layer | Directory | Loaded by |
|---|---|---|
| **Public** (team-shared) | `.claude/rules/learned/<category>.md` | Claude Code on every session |
| **Personal** (user-local) | `.map/personal/rules/learned/<category>.md` | Active MAP workflows only (see D2 note below) |

Both layers use the **same 6-category → file mapping** from the table above and the **same bullet format**:

```markdown
- **{title}** ({YYYY-MM-DD}): {content} [workflow: {workflow_type}]
```

Only the directory prefix differs. Create the personal directory if it does not exist:

```bash
mkdir -p .map/personal/rules/learned
```

The `.map/personal/` tree is repo-global but gitignored (HC-1), keeping personal rules off version control.

**D2 limitation — personal rules inject only during active MAP workflows:** Unlike `.claude/rules/` files which Claude Code auto-loads on every session, personal rules under `.map/personal/rules/learned/` are injected only when an active MAP workflow is running (i.e., when `.map/<branch>/step_state.json` is present in the branch workspace). They are NOT available on every prompt outside a MAP workflow. This is an informed trade-off (E5): personal rules stay scoped to the workflow context where they are most relevant, but you will not see them in ad-hoc sessions.

### Promoting a personal rule to public

To share a personal rule with the team, **move** it from the personal layer to the public layer:

1. **Locate** the bullet in `.map/personal/rules/learned/<category>.md` (same category → file mapping).
2. **Check idempotency** — a rule is already present iff a bullet with the same exact bold-title token (the text between the leading `**...**` markers) exists in the target public file.
- If the bold-title token is **not** found in the public file: insert the bullet into `.claude/rules/learned/<category>.md`.
- If the bold-title token **is already** found in the public file: skip insertion (do not duplicate).
- In **both** cases: remove the bullet from the personal file. Re-running promote never duplicates and always cleans up the personal copy.
3. **Result:** the rule is now in `.claude/rules/learned/<category>.md` and no longer in `.map/personal/rules/learned/<category>.md`.

---

## Step 4: Summary Report
Expand Down
21 changes: 11 additions & 10 deletions .codex/hooks/workflow-gate.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,15 @@
# JSON edit, third-party tool) as a security regression on this gate.
TERMINAL_PHASES = {"COMPLETE"} # Workflow closed — gate is permissive.

# MONITOR hot-fix opt-in: when MAP_MONITOR_HOTFIX=1 in env, Edits are
# allowed during MONITOR so the operator can land a 2-line nit without
# spinning a full monitor_failed → ACTOR retry cycle. Opt-in (not default)
# because the unconditional behaviour would silently widen the gate; the
# operator must set the env variable per-session to acknowledge they're
# making a hot-fix and re-running validate_step("2.4") themselves.
# MONITOR hot-fix: Edits during MONITOR are allowed BY DEFAULT. Actor
# routinely needs to append a test or land a small nit while the Monitor
# verdict is being captured, and blocking that forced operators through an
# escape hatch (the former MAP_MONITOR_HOTFIX=1 opt-in). The default is now
# permissive; set MAP_MONITOR_HOTFIX=0 to restore strict read-only MONITOR.
# The operator remains responsible for re-running validate_step("2.4") after
# any MONITOR-phase edit.
HOTFIX_PHASES: set[str] = (
{"MONITOR"} if os.environ.get("MAP_MONITOR_HOTFIX") == "1" else set()
set() if os.environ.get("MAP_MONITOR_HOTFIX") == "0" else {"MONITOR"}
)
ALLOWED_PHASES = EDITING_PHASES | TERMINAL_PHASES | HOTFIX_PHASES

Expand Down Expand Up @@ -284,9 +285,9 @@ def is_editing_phase(branch: str) -> tuple[bool, Optional[str]]:
" - Or call monitor_failed if Actor needs revisions, returning\n"
" to ACTOR phase legitimately.\n"
"\n"
"Hot-fix escape: MAP_MONITOR_HOTFIX=1 env opt-in re-opens Edit\n"
"during MONITOR for trivial one-line nits (operator acknowledges\n"
"they will re-run validate_step 2.4 themselves)."
"Note: MONITOR-phase Edits are allowed by default; set\n"
"MAP_MONITOR_HOTFIX=0 to make MONITOR strictly read-only\n"
"(operator then re-runs validate_step 2.4 themselves)."
)
return False, (
f"Workflow gate: Edit blocked during phase '{current_phase}' "
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ coverage.json
!.map/static-analysis/
!.map/scripts/
.map/scripts/.map/
.map/personal/
# ^ Personal/local learned-rules layer — user-local, never committed or shipped
# (redundant over .map/* above; kept explicit for intent + defense in depth)

# Temporary verification files
mapify_cli_verification_*.json
Expand Down
68 changes: 68 additions & 0 deletions docs/improvement-plan.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,71 @@
- Require complex workflows to consume the prior stage artifact explicitly before proceeding; for example, review should load spec + tests + diff, and code execution should record which test/spec contract it is satisfying. Shipped as `2604.039-followup-3` via `prior_stage_consumption` reports in verification summaries and review bundles plus an explicit validator command.
- Update canonical docs so MAP has a visible default artifact pipeline even if individual commands still differ in internal implementation details.


---

## Phase B run — framework gate findings (2026-05-30)

Discovered while running `/map-efficient` for the personal-rules layer. Each
item: defect, fix approach, and how to test after the fix.

### DONE (this change) — MONITOR-phase Edit gate now permissive by default

`MAP_MONITOR_HOTFIX` defaults to **on**: `.claude/hooks/workflow-gate.py` (and
the `.codex/` copy) allow Edit/Write/MultiEdit during MONITOR by default;
`MAP_MONITOR_HOTFIX=0` restores strict read-only MONITOR. The operator stays
responsible for re-running `validate_step("2.4")` after a MONITOR-phase edit.

Why: Actor routinely appends a test / lands a nit while the Monitor verdict is
being captured. The old default-off forced an escape-hatch env var — the gate
fired where the write was legitimate.

Tested in `tests/test_workflow_gate.py`:
- `test_allows_edit_during_monitor_phase_by_default` (allow with no env)
- `test_monitor_strict_mode_blocks_edit` (`MAP_MONITOR_HOTFIX=0` blocks; deny
message documents the opt-out + `monitor_failed`)

### OPEN — Strict-scope gate enforcement (`MAP_STRICT_SCOPE`)

Two related defects: phase gates trust a "checkmark" instead of actual repo
state. Fix extends the EXISTING opt-in `MAP_STRICT_SCOPE=1` (already used by
`validate_mutation_boundary` in `validate_step("2.4")`); default off →
non-breaking.

**#4 — `validate_step("2.3")` (ACTOR) doesn't verify Actor wrote anything.**
`map_orchestrator.py::validate_step` closes ACTOR without checking the diff; the
machine can reach MONITOR while edits are pending. `files_changed` is only
reconciled later in `record_subtask_result` (warn-only).
Fix (under `MAP_STRICT_SCOPE=1`): in `validate_step("2.3")`, diff the current
subtask vs its baseline SHA; empty diff → `valid=false`, `reason="actor_no_diff"`.
Subtasks closed via `mark_subtask_complete` (synthetic no-op) are exempt.

**#6 — `validate_step("2.4")` doesn't confirm the MANDATORY `detect_*` gates ran.**
`detect_actor_files_changed_mismatch`, `detect_symbol_blast_radius`,
`detect_cross_subtask_regression_risk` are skill-MANDATORY but unenforced.
Fix (under `MAP_STRICT_SCOPE=1`): each `detect_*` helper writes a receipt keyed
by `(subtask_id, gate_name)` into `step_state.json`; `validate_step("2.4")`
rejects (`valid=false`, `reason="gates_not_run"`, listing missing gates) when
receipts are absent. Mirror the `validate_mutation_boundary` reject path.

**How to test after the fix.** Dual-copy invariant: run `make sync-templates`
before pytest (suite imports from `src/mapify_cli/templates/map/scripts/`).
Strict ON: (1) empty-diff 2.3 → `actor_no_diff`; (2) real edit → pass; (3) no-op
exempt; (4) each `detect_*` writes a receipt; (5) missing receipts → 2.4
`gates_not_run` naming the missing gates; (6) all receipts + clean rec → pass.
Strict OFF (regression guard): (7) empty diff still closes 2.3; (8) missing
receipts don't block 2.4. Then `python3 -m pytest -q` (full suite must stay
green) and `python3 scripts/lint-hooks.py`.

### NOT FIXING (recorded, out of scope here)

- **state ↔ git reconciliation (#1):** orchestrator trusts `step_state.json`
over git; no "working tree disagrees with state" detector. Needs a dedicated
`reconcile` command — not bundled here.
- **idempotency asymmetry (#3):** re-running `validate_step("2.4")` after an
advance hard-errors "Step mismatch" while `2.2` returns a clean no-op.
Smoothing it risks masking genuine out-of-order calls; left until #1 lands.
- **baseline `status` (originally flagged #2):** NOT a bug. `record_test_baseline`
returns `"skipped"` when no harness is found and `"success"` only on a real
`returncode==0` run. The earlier `{"runner":null,...}` was an operator-side
extractor error, not a framework defect.
Loading
Loading