From 5ffe18f1c1db5870607064058ee1e2ebd1e38424 Mon Sep 17 00:00:00 2001 From: WilcoLouwerse Date: Fri, 10 Apr 2026 14:32:07 +0200 Subject: [PATCH 1/5] docs: update Claude developer guides and add repo config Updates across Claude docs (commands, getting-started, global-claude-settings, parallel-agents, testing, workflow, writing-docs, writing-skills, writing-specs), usage-tracker docs (MODELS, QUICKSTART, README, SETUP), global-settings README, adds .gitattributes for line ending normalization, and adds .claude/ config directory. Co-Authored-By: Claude Sonnet 4.6 --- .claude/settings.json | 7 + .gitattributes | 56 + docs/claude/commands.md | 2758 ++++++++++++------------- docs/claude/getting-started.md | 450 ++-- docs/claude/global-claude-settings.md | 402 ++-- docs/claude/parallel-agents.md | 278 +-- docs/claude/testing.md | 4 +- docs/claude/workflow.md | 530 ++--- docs/claude/writing-docs.md | 1108 +++++----- docs/claude/writing-skills.md | 92 +- docs/claude/writing-specs.md | 788 +++---- global-settings/README.md | 2 +- usage-tracker/MODELS.md | 19 +- usage-tracker/QUICKSTART.md | 6 +- usage-tracker/README.md | 2 +- usage-tracker/SETUP.md | 4 +- 16 files changed, 3316 insertions(+), 3190 deletions(-) create mode 100644 .claude/settings.json create mode 100644 .gitattributes diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 0000000..2d603ef --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,7 @@ +{ + "permissions": { + "additionalDirectories": [ + "/home/wilco/wordpress-docker/.claude/docs" + ] + } +} diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..80cd45f --- /dev/null +++ b/.gitattributes @@ -0,0 +1,56 @@ +# Auto detect text files and perform LF normalization +* text=auto + +# Source code - always use LF +*.py text eol=lf +*.js text eol=lf +*.mjs text eol=lf + +# Configuration files - always use LF +*.json text eol=lf +*.yml text eol=lf +*.yaml text eol=lf +*.toml text eol=lf +*.xml text eol=lf + +# Styles - always use LF +*.css text eol=lf + +# HTML and templates - always use LF +*.svg text eol=lf + +# Documentation - always use LF +*.md text eol=lf +*.txt text eol=lf + +# Shell scripts - always use LF +*.sh text eol=lf + +# Dockerfiles - always use LF +Dockerfile text eol=lf +Dockerfile.* text eol=lf + +# Git files - always use LF +.gitattributes text eol=lf +.gitignore text eol=lf +.gitmodules text eol=lf + +# Windows script files - use CRLF +*.bat text eol=crlf +*.cmd text eol=crlf +*.ps1 text eol=crlf + +# Binary files - do not modify +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.ico binary +*.webp binary +*.pdf binary +*.woff binary +*.woff2 binary +*.ttf binary +*.zip binary +*.gz binary +*.tar binary diff --git a/docs/claude/commands.md b/docs/claude/commands.md index be6df09..387549d 100644 --- a/docs/claude/commands.md +++ b/docs/claude/commands.md @@ -1,1379 +1,1379 @@ -# Command Reference - -Complete reference for all commands available in the spec-driven development workflow. - -## OpenSpec Built-in Commands - -These commands are installed per-project when you run `openspec init`. They're available inside each project directory. - ---- - -### `/opsx-new ` - -**Phase:** Spec Building - -Start a new change. Creates the change directory with metadata. - -**Usage:** -``` -/opsx-new add-publication-search -``` - -**What it creates:** -``` -openspec/changes/add-publication-search/ -└── .openspec.yaml # Change metadata (schema, created date) -``` - -**Tips:** -- Use descriptive kebab-case names: `add-dark-mode`, `fix-cors-headers`, `refactor-object-service` -- The name becomes a GitHub Issue label, so keep it readable - ---- - -### `/opsx-ff` - -**Phase:** Spec Building - -Fast-forward: generates ALL artifacts in dependency order (proposal → specs → design → tasks) in one go. - -**Usage:** -``` -/opsx-ff -``` - -**What it creates:** -``` -openspec/changes/add-publication-search/ -├── .openspec.yaml -├── proposal.md # Why & what -├── specs/ # Delta specs (ADDED/MODIFIED/REMOVED) -│ └── search/ -│ └── spec.md -├── design.md # How (technical approach) -└── tasks.md # Implementation checklist -``` - -**When to use:** When you have a clear idea of what you want to build and want to generate everything quickly for review. - -**When NOT to use:** When you want to iterate on each artifact step by step, getting feedback between each. Use `/opsx-continue` instead. - -**Model:** Asked at run time — the command asks which model to use and spawns a subagent with that model for artifact generation. Artifact quality (specs, design, tasks) directly determines implementation quality downstream. **Sonnet** for most changes. **Opus** for complex or architectural changes where deeper reasoning improves the design. - ---- - -### `/opsx-continue` - -**Phase:** Spec Building - -Creates the next artifact in the dependency chain. Run repeatedly to build specs incrementally. - -**Usage:** -``` -/opsx-continue # Creates proposal.md (first time) -/opsx-continue # Creates specs/ (second time) -/opsx-continue # Creates design.md (third time) -/opsx-continue # Creates tasks.md (fourth time) -``` - -**Dependency chain:** -``` -proposal (root) - ├── discovery (optional — requires: proposal) - ├── contract (optional — requires: proposal) - ├── specs (requires: proposal) - ├── design (requires: proposal) - ├── migration (optional — requires: design) - ├── test-plan (optional — requires: specs) - └── tasks (requires: specs + design) -``` - -**When to use:** When you want to review and refine each artifact before proceeding to the next. - ---- - -### `/opsx-explore` - -**Phase:** Pre-spec - -Think through ideas and investigate the codebase before starting a formal change. No artifacts are created. - -**Usage:** -``` -/opsx-explore -``` - -**When to use:** When you're not sure what approach to take yet and want to investigate first. - -**Comparison with `/app-explore`:** - -| | `/opsx-explore` | `/app-explore` | -|---|---|---| -| **Scope** | Any topic — a change, a bug, an idea | A specific Nextcloud app's configuration | -| **Output** | None — thinking only | Writes to `openspec/app-config.json` | -| **When to use** | Before starting a change (`/opsx-new`) when requirements are unclear | When designing or refining an app's goals, architecture, and features | -| **Phase** | Pre-spec | Design / Configuration | - -Use `/opsx-explore` to think through *what to build*. Use `/app-explore` to document *how an app is designed and configured*. - -**Model:** Checked at run time — stops on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for most exploration sessions. ✅ **Opus** recommended — complex analysis, architecture decisions, and strategic thinking benefit from stronger reasoning. - ---- - -### `/opsx-apply` - -**Phase:** Implementation - -OpenSpec's built-in implementation command. Reads `tasks.md` and works through tasks. - -**Usage:** -``` -/opsx-apply -``` - -**Note:** `/opsx-ralph-start` (not yet built) is planned as a dedicated implementation loop with minimal context loading and deeper GitHub Issues integration. For now, use this command — it already supports `plan.json` and GitHub Issues when a `plan.json` exists. - -**Model:** Checked at run time — stops if on Haiku. **Sonnet** for most implementation work. **Opus** for architecturally complex changes. - ---- - -### `/opsx-verify` - -**Phase:** Review - -OpenSpec's built-in verification. Validates implementation against artifacts. - -**Usage:** -``` -/opsx-verify -``` - -**Checks:** -- **Completeness** — All tasks done, all requirements implemented -- **Correctness** — Implementation matches spec intent -- **Coherence** — Design decisions reflected in code -- **Test coverage** — Every new PHP service/controller has a corresponding test file; every new Vue component has a test if the project uses Jest/Vitest -- **Documentation** — New features and API endpoints are described in README.md or docs/ - -**Note:** `/opsx-ralph-review` (not yet built) is planned as a dedicated review command that cross-references shared specs and creates GitHub Issues for findings. For now, use this command — it already supports GitHub Issues sync via `plan.json` when present. - -**Model:** Checked at run time — stops if on Haiku. **Sonnet** for most verification work. **Opus** for complex or large changes. - ---- - -### `/opsx-sync` - -**Phase:** Archive - -Merges delta specs from the change into the main `openspec/specs/` directory. - -**Usage:** -``` -/opsx-sync -``` - -**What it does:** -- **ADDED** requirements → appended to main spec -- **MODIFIED** requirements → replace existing in main spec -- **REMOVED** requirements → deleted from main spec - -Usually done automatically during archive. - ---- - -### `/sync-docs` - -**Phase:** Maintenance - -Check and sync documentation to reflect the current project state. Two targets: **app docs** (`{app}/docs/`) for a specific Nextcloud app's users and admins, and **dev docs** (`.claude/docs/`) for Claude and developers. - -**Usage:** -``` -/sync-docs # prompts for target -/sync-docs app # prompts for which app, then syncs its docs/ -/sync-docs app openregister # sync docs for a specific app -/sync-docs dev # sync developer/Claude docs (.claude/docs/) -``` - -Before syncing, runs 4 preliminary checks in parallel — config.yaml rules vs writing-docs.md/writing-specs.md, Sources of Truth accuracy, writing-specs.md vs schema template alignment (`.claude/openspec/schemas/conduction/`), and forked schema drift from the upstream `spec-driven` schema. Reports gaps and asks whether to fix before proceeding. - -**App docs mode** (`{app}/docs/`) — checks the app's `README.md` (root), `docs/features/`, `docs/ARCHITECTURE.md`, `docs/FEATURES.md`, `docs/GOVERNMENT-FEATURES.md`, and any other user-facing `.md` files against the app's current specs. Also loads all company-wide ADRs from `apps-extra/.claude/openspec/architecture/` and any app-level ADRs as auditing context (never as link targets in app docs). Flags outdated descriptions, missing features, stale `[Future]` markers (with full removal checklist), broken links, duplicated content, writing anti-patterns, ADR compliance gaps (screenshots, i18n, API conventions), and missing GEMMA/ZGW/Forum Standaardisatie standards references. Never inserts links into `.claude/` paths. Always shows a diff and asks for confirmation before writing. - -**Dev docs mode** (`.claude/docs/`) — checks `commands.md`, `workflow.md`, `writing-specs.md`, `writing-docs.md`, `testing.md`, `getting-started.md`, `README.md`, plus the conduction schema (`.claude/openspec/schemas/conduction/schema.yaml`) and its `templates/spec.md` for alignment with `writing-specs.md`. Never changes intent without user confirmation. After syncing, runs a Phase 6 review of all commands and skills for stale references, outdated instructions, and redundant inline content — and asks whether to update them. - -Both modes enforce the [Documentation Principles](writing-docs.md) — duplication and wrong-audience content are flagged as issues, with direct links to the relevant writing-docs.md sections. - -**When to use:** After a significant batch of changes — new commands, archived features, updated specs, or structural changes to the project. - ---- - -### `/opsx-archive` - -**Phase:** Archive - -Complete a change and preserve it for the historical record. - -**Usage:** -``` -/opsx-archive -``` - -**What it does:** -1. Checks artifact and task completion -2. Syncs delta specs into main specs (if not already done) -3. Moves the change to `openspec/changes/archive/YYYY-MM-DD-/` -4. All artifacts are preserved for audit trail -5. Updates or creates `docs/features/.md` — creates it if no matching feature doc exists -6. Updates the feature overview table in `docs/features/README.md` (creates the file if it doesn't exist) -7. Creates or updates `CHANGELOG.md` — completed tasks become versioned entries (version from `app-config.json`); uses [Keep a Changelog](https://keepachangelog.com/) format - ---- - -### `/opsx-bulk-archive` - -**Phase:** Archive - -Archive multiple completed changes at once. - -**Usage:** -``` -/opsx-bulk-archive -``` - -**When to use:** When you have several changes that are all complete and want to clean up. - ---- - -### `/opsx-apply-loop` - -**Phase:** Full Lifecycle (experimental) - -Automated apply→verify loop for a single change in a specific app. Runs the implementation loop inside an isolated Docker container, optionally runs targeted tests on the host, then archives and syncs to GitHub. - -**Usage:** -``` -/opsx-apply-loop # asks which app + change to run -/opsx-apply-loop procest add-sla-tracking # run a specific app/change -/opsx-apply-loop openregister seed-data # run in a different app -``` - -**What it does:** -1. Selects app and change (scans across all apps, or uses provided arguments) -2. Checks for a GitHub tracking issue (runs `/opsx-plan-to-issues` first if missing) -3. Creates a `feature//` branch in the app's git repo -4. Checks the Nextcloud environment is running -5. Reads `test-plan.md` (if present) and classifies which test commands to include in the loop -6. Asks whether to include a test cycle (tests run **outside the container** against the live Nextcloud app) -7. Builds and starts an isolated Docker container — mounts the app directory + shared `.claude/` skills (read-only); no git, no GitHub -8. Inside the container: runs `/opsx-apply` → `/opsx-verify` in a loop (max 5 iterations) - - CRITICAL issues retrigger the loop; WARNING issues also retrigger but never block archive - - At max iterations with only warnings remaining, archive still proceeds - - Seed data (ADR-016) is created/updated during apply as required -9. Captures container logs to `.claude/logs/`, then removes container -10. **If test cycle enabled:** runs targeted single-agent test commands on the host (max 3 test iterations); failures loop back into apply→verify -11. **If test cycle enabled and deferred tests exist:** asks about multi-agent/broad tests from the test-plan that were excluded from the loop; runs them once if confirmed, with one final apply→verify if they fail -12. Runs `/opsx-archive` on the host (after tests pass or tests skipped) -13. Commits all changes in the app repo with a generated commit message -14. Syncs GitHub: updates issue checkboxes, posts a completion comment, prompts to close -15. Asks about test scenario conversion (deferred from archive) -16. Shows a final report with iterations used, tasks completed, and what's next - -**When to use:** When you want hands-off implementation of a single change in one app. Prefer `/opsx-pipeline` for running multiple changes across apps in parallel. - -**Container design:** The container mounts the app directory at `/workspace` and the shared `.claude/` at `/workspace/.claude` (read-only). This gives the container's Claude session access to all shared skills without requiring git or GitHub. Each app is isolated — the container only touches one app directory. - -**Container limitations:** GitHub operations, `docker compose exec`, browser tests, and git commands are not available inside the container — all handled on the host after the container exits. Tests always run on the host against the live Nextcloud environment. - -**Cap impact:** High — runs apply + verify sequentially (up to 5 iterations), optionally followed by targeted tests (up to 3 test iterations). Each iteration is a full implementation + verification pass. - -**Model:** Sonnet recommended for most changes; Opus for complex architectural work. Asked at run time. - -**Requires:** -- Docker running -- `gh` CLI authenticated on the host -- Nextcloud containers up (auto-started if not running — uses `docker compose -f` pointed at the docker-dev root's `.github/docker-compose.yml`) -- **Container authentication** — the Docker container cannot use interactive OAuth, so it needs an explicit token. One of these environment variables must be set in your shell (see [Getting Started — Container authentication](getting-started.md#prerequisites) for full setup instructions): - 1. `CLAUDE_CODE_AUTH_TOKEN` (preferred) — uses your existing Claude Max/Pro subscription at no extra cost. Generate with `claude setup-token`, then `export CLAUDE_CODE_AUTH_TOKEN="..."` in `~/.bashrc`. - 2. `ANTHROPIC_API_KEY` (fallback) — uses prepaid API credits from console.anthropic.com (costs money). `export ANTHROPIC_API_KEY="sk-ant-api03-..."` in `~/.bashrc`. - ---- - -### `/opsx-pipeline` - -**Phase:** Full Lifecycle (experimental) - -Process one or more OpenSpec changes through the full lifecycle in parallel — each change gets its own subagent, git worktree, feature branch, and PR. - -**Usage:** -``` -/opsx-pipeline all # process all open proposals across all repos -/opsx-pipeline procest # all open proposals in one app -/opsx-pipeline sla-tracking routing # specific changes by name -``` - -**What it does:** -1. Discovers open proposals (changes with `proposal.md` but not yet archived) -2. Presents an execution plan and asks for confirmation -3. Creates a git worktree and feature branch per change -4. Launches up to 5 parallel subagents — each runs ff → apply → verify → archive -5. Monitors progress and queues remaining changes as slots free up -6. Creates a PR per completed change to `development` -7. Reports full results including tasks completed, quality checks, and PR links - -**Subagent lifecycle per change:** -``` -ff (artifacts) → plan-to-issues → apply (implement + tests + docs) → verify → archive → push + PR -``` - -**When to use:** When you have multiple open proposals ready to implement and want to run them hands-off. - -**Cap impact:** High — up to 5 agents running full implementations in parallel. Each agent may run for 10-30 minutes depending on change complexity. - -**Model:** Asked at run time with three options: one model for all sub-agents, choose per change, or auto-select by reading each proposal. **Haiku** for simple changes (config, text, minor fixes). **Sonnet** for standard feature work. **Opus** for complex architectural changes. The model applies per implementation sub-agent — choose based on change complexity and available quota. - -**Requires:** `gh` CLI authenticated; quality checks must pass per app (`composer check:strict` / `npm run lint`) - ---- - -### `/opsx-onboard` - -**Phase:** Setup - -Get an overview of the current project's OpenSpec setup and active changes. - -**Usage:** -``` -/opsx-onboard -``` - ---- - -## App Management Commands - -Commands for creating, configuring, and maintaining Nextcloud apps. These work together in a lifecycle: `/app-design` → `/app-create` → `/app-explore` → `/app-apply` → `/app-verify`. - -For a full guide on the lifecycle, when to use each command, and how they relate to the OpenSpec workflow, see [App Lifecycle](app-lifecycle.md). - ---- - -### `/app-design` - -**Phase:** Setup / Design - -Full upfront design for a new Nextcloud app — architecture research, competitor analysis, feature matrix, ASCII wireframes, and OpenSpec setup. Run this **before** `/app-create` for brand-new apps. - -**Usage:** -``` -/app-design -/app-design my-new-app -``` - -**What it does:** -1. Researches the problem domain and existing solutions -2. Produces architecture decisions, feature matrix, and ASCII wireframes -3. Sets up the `openspec/` structure with initial design docs - -**Output:** Design documentation ready to hand off to `/app-create`. - -**Model:** Checked at run time — stops on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for general design sessions. ✅ **Opus** recommended — competitive research, architecture decisions, and full design document creation benefit from stronger reasoning. - ---- - -### `/app-create` - -**Phase:** Setup - -Bootstrap a new Nextcloud app from the ConductionNL template, or onboard an existing repo. Always creates an `openspec/` configuration folder that tracks all app decisions. - -**Usage:** -``` -/app-create -/app-create my-new-app -``` - -**What it does:** -1. Asks whether a local folder already exists — if yes, uses it as the base; if no, clones the template -2. Collects basic identity: app ID, name, goal, one-line summary, Nextcloud category -3. Asks about dependencies (OpenRegister, additional CI apps) -4. Creates `openspec/app-config.json` and `openspec/README.md` -5. Replaces all template placeholders (`AppTemplate`, `app-template`, etc.) across all files -6. Creates the GitHub repository and branches (`main`, `development`, `beta`) -7. Optionally sets branch protection and team access -8. Optionally installs dependencies and enables the app in the local Nextcloud environment - -**Output:** Fully scaffolded app directory with correct identity, CI/CD workflows, and GitHub repo. - -**Requires:** `gh` CLI authenticated (`gh auth login`) - ---- - -### `/app-explore` - -**Phase:** Design / Configuration - -Enter exploration mode for a Nextcloud app. Think through its goals, architecture, features, and Architectural Decision Records (ADRs). Updates `openspec/` files to capture decisions. - -**Usage:** -``` -/app-explore -/app-explore openregister -``` - -**What it does:** -- Loads `openspec/app-config.json` for full context -- Acts as a **thinking partner** — draws diagrams, asks questions, challenges assumptions -- Captures decisions into `openspec/app-config.json` -- Never writes application code — only `openspec/` files - -**Feature lifecycle:** -``` -idea → planned → in-progress → done -``` -When a feature moves to `planned` (has user stories + acceptance criteria), suggests `/opsx-ff {feature-name}` to create an OpenSpec change from it. - -**Important:** Run this before implementing anything. Features defined here become the inputs for `/opsx-ff`. - -**Model:** Checked at run time — stops on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for general app exploration. ✅ **Opus** recommended — feature strategy, ADRs, and competitive analysis benefit from stronger reasoning. - ---- - -### `/app-apply` - -**Phase:** Configuration - -Applies `openspec/app-config.json` decisions back into the actual app files. The counterpart to `/app-explore`. - -**Usage:** -``` -/app-apply -/app-apply openregister -``` - -**What it does:** -1. Loads `openspec/app-config.json` -2. Compares current file values against config — builds a list of pending changes -3. Shows a clear diff summary of what would change -4. Asks for confirmation before applying any changes -5. Updates only the tracked values in each file (IDs, names, namespaces, CI parameters) — never touches feature code -6. Optionally runs `composer check:strict` to verify PHP changes are clean - -**In scope:** `appinfo/info.xml`, CI/CD workflow parameters, PHP namespaces and app ID constants, `composer.json`/`package.json` names, `webpack.config.js` app ID, `src/App.vue` OpenRegister gate, `README.md` header. - -**Out of scope:** Feature code, business logic, Vue components, PHP controllers. Use `/opsx-ff {feature-name}` for those. - ---- - -### `/app-verify` - -**Phase:** Review / Audit - -Read-only audit. Checks every tracked app file against `openspec/app-config.json` and reports drift — without making any changes. - -**Usage:** -``` -/app-verify -/app-verify openregister -``` - -**What it does:** -- Loads `openspec/app-config.json` and reads every tracked file -- Reports each check as **CRITICAL** (will break CI or runtime), **WARNING** (wrong metadata), or **INFO** (cosmetic drift) -- Shows exact current value vs expected value for every failing check -- Recommends `/app-apply` if issues are found - -**When to use:** After `/app-apply` to confirm changes landed, or at any time to check for drift. - ---- - -### `/clean-env` - -**Phase:** Setup / Reset - -Fully resets the OpenRegister Docker development environment. - -**Usage:** -``` -/clean-env -``` - -**What it does:** -1. Stops all Docker containers from the OpenRegister docker-compose -2. Removes all containers and volumes (full data reset) -3. Starts containers fresh -4. Waits for Nextcloud to become ready -5. Installs core apps: openregister, opencatalogi, softwarecatalog, nldesign, mydash - -**Important:** Destructive — removes all database data and volumes. Only use when a full reset is intended. - -After completion, verify at `http://localhost:8080` (admin/admin). - -**Model:** Checked at run time when invoked standalone — stops if on Opus (no reasoning needed, wastes quota), warns if on Sonnet and offers to switch. **Haiku** is the right fit for this task. Model check is skipped when this skill is called from within another skill. - ---- - -## Team Role Commands - -Specialist agents representing different roles on the development team. Useful for getting a focused perspective on a change — architecture review, QA, product sign-off, etc. - -| Command | Role | Focus | -|---------|------|-------| -| `/team-architect` | Architect | API design, data models, cross-app dependencies | -| `/team-backend` | Backend Developer | PHP implementation, entities, services, tests | -| `/team-frontend` | Frontend Developer | Vue components, state management, UX | -| `/team-po` | Product Owner | Business value, acceptance criteria, priority | -| `/team-qa` | QA Engineer | Test coverage, edge cases, regression risk | -| `/team-reviewer` | Code Reviewer | Standards, conventions, security, code quality | -| `/team-sm` | Scrum Master | Progress tracking, blockers, sprint health | - -**Usage:** -``` -/team-architect # review the API design for the active change -/team-qa # get QA perspective on test coverage -``` - -**Model for `/team-architect`:** Checked at run time — stops if on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Opus** recommended — best multi-framework reasoning across NLGov, BIO2/NIS2, WCAG, Haven, AVG/GDPR. **Sonnet** not recommended — may miss nuances in complex compliance scenarios. - ---- - -## Softwarecatalogus Commands (`/swc:*`) - -Commands specific to the VNG Softwarecatalogus client project. See `Softwarecatalogus/` (never commit to this repo). - ---- - -### `/swc-test` - -**Phase:** Testing - -Run automated tests for the GEMMA Softwarecatalogus — API tests (Postman/Newman), browser tests (persona agents), or both. - -**Usage:** -``` -/swc-test # choose mode interactively -/swc-test api # API tests only -/swc-test browser # browser tests only -/swc-test personas # all 8 persona agents -/swc-test all # everything -``` - ---- - -### `/swc-update` - -**Phase:** Maintenance - -Sync GitHub issues from VNG-Realisatie/Softwarecatalogus, auto-generate acceptance criteria, and update test infrastructure to reflect current issue state. - -**Usage:** -``` -/swc-update -``` - ---- - -## Custom Conduction Commands - -These commands are workspace-level and available from any project within `apps-extra/`. They extend OpenSpec with GitHub Issues integration and Ralph Wiggum loops. - ---- - -### `/create-pr` - -**Phase:** Git / Delivery - -Create a Pull Request from a branch in any repo. Handles the full flow interactively. - -**Usage:** -``` -/create-pr -``` - -**What it does:** -1. **Selects the repository** — scans for available git repos in the workspace, asks you to pick one (never assumes the current directory) -2. **Confirms the source branch** — shows the current branch, lets you override -3. **Recommends a target branch** based on the branching strategy; checks GitHub for an existing open PR on the same branch pair — if found, offers to view or update it instead -4. **Checks for uncommitted or unpushed changes** — if any are found, offers to commit, stash, or continue; offers to push unpushed commits before continuing -5. **Verifies global settings version** *(claude-code-config repo only)* — delegates to `/verify-global-settings-version`; pauses and offers a fix if a VERSION bump is missing -6. **Discovers CI checks from `.github/workflows/`** — reads the repo's workflow files to determine exactly which checks CI will run, then mirrors them locally (never hardcodes a list) -7. **Installs missing dependencies** (`vendor/`, `node_modules/`) if needed before running checks -8. **Runs all discovered checks** — nothing skipped; slow checks (e.g. test suites) ask for confirmation first; shows a pass/fail table when done -9. **Reads all commits and diffs** on the branch to draft a PR title and description from the actual changes -10. **Shows the draft in chat** for review — you can ask to change or shorten it; the loop repeats until you approve -11. **Pushes the branch and creates the PR** via `gh pr create` -12. Reports the PR URL and next steps - -**Branching strategy:** - -| Source | Recommended target | -|---|---| -| `feature/*`, `bugfix/*` | `development` | -| `development` | `beta` | -| `beta` | `main` | -| `hotfix/*` | `main` (or `beta`/`development`) | - -**Model:** Checked at run time — the command reads your active model from context and stops automatically if you're on Haiku (or anything weaker than Sonnet). Involves parsing CI workflows, detecting branch-protection rules, and reasoning about code diffs where mistakes have real consequences. **Sonnet** for most PRs. **Opus** when the repo uses reusable CI workflows, branch-protection rulesets, or a complex branching strategy — that's where it pays off most. - -**Requires:** `gh` CLI authenticated (`gh auth login`) - ---- - -### `/verify-global-settings-version` - -**Phase:** Git / Delivery - -Checks whether `global-settings/VERSION` has been correctly bumped after any changes to files in the `global-settings/` directory. Run this before creating a PR on the `ConductionNL/claude-code-config` repo. - -**Usage:** -``` -/verify-global-settings-version -``` - -**What it does:** -1. Fetches `origin/main` to get the latest published version -2. Diffs `global-settings/` between the current branch and `origin/main` -3. Compares the branch `VERSION` against the `origin/main` `VERSION` -4. Reports one of four outcomes: - - ✅ No changes to `global-settings/` — no bump needed - - ✅ Changes found and `VERSION` correctly bumped - - ❌ Changes found but `VERSION` not bumped — suggests the next semver and the command to apply it - - ⚠️ `VERSION` bumped but no other files changed — flags as unusual - -**When to use:** -- Standalone: any time you modify a file in `global-settings/` and want to confirm the bump is in place before committing -- Automatically: called by `/create-pr` when the selected repo is `ConductionNL/claude-code-config` — no need to run it separately in that flow - -**Semver rules for `global-settings/`:** -- `1.0.0 → 1.1.0` — new permissions, guards, or behavior added -- `1.0.0 → 2.0.0` — breaking change requiring manual migration - -**Model:** Checked at run time when invoked standalone — stops if on Opus (no reasoning needed, wastes quota), warns if on Sonnet and offers to switch. **Haiku** is the right fit for this task. Model check is skipped when this skill is called from within another skill. - ---- - -### `/opsx-plan-to-issues` - -**Phase:** Planning → GitHub - -Converts an OpenSpec change's `tasks.md` into structured `plan.json` and creates corresponding GitHub Issues. - -**Usage:** -``` -/opsx-plan-to-issues -``` - -**Prerequisites:** -- A change with completed `tasks.md` -- GitHub MCP server active or `gh` CLI authenticated -- Git remote pointing to a ConductionNL repository - -**What it does:** - -1. **Finds the active change** in the current project's `openspec/changes/` -2. **Detects the GitHub repo** from `git remote get-url origin` -3. **Parses tasks.md** into structured JSON -4. **Creates GitHub Issues:** - - One **tracking issue** (epic) with: - - Title: `[OpenSpec] ` - - Body: proposal summary + task checklist - - Labels: `openspec`, `tracking` - - One **issue per task** with: - - Title: `[] ` - - Body: description, acceptance criteria, spec ref, affected files - - Labels: `openspec`, `` -5. **Saves `plan.json`** with all issue numbers linked - -**Output example:** -``` -Created tracking issue: https://github.com/ConductionNL/opencatalogi/issues/42 -Created 5 task issues: #43, #44, #45, #46, #47 -Saved plan.json at: openspec/changes/add-search/plan.json - -Run /opsx-ralph-start to begin implementation. -``` - -**The plan.json it creates:** -```json -{ - "change": "add-search", - "project": "opencatalogi", - "repo": "ConductionNL/opencatalogi", - "created": "2026-02-15T10:00:00Z", - "tracking_issue": 42, - "tasks": [ - { - "id": 1, - "title": "Create SearchController", - "description": "Add new controller for search API endpoint", - "github_issue": 43, - "status": "pending", - "spec_ref": "openspec/specs/search/spec.md#requirement-search-api", - "acceptance_criteria": [ - "GIVEN a search query WHEN GET /api/search?q=test THEN returns matching results" - ], - "files_likely_affected": [ - "lib/Controller/SearchController.php" - ], - "labels": ["openspec", "add-search"] - } - ] -} -``` - ---- - -### `/opsx-ralph-start` *(not yet implemented)* - -**Phase:** Implementation - -Starts a Ralph Wiggum implementation loop driven by `plan.json`. This is the core of our minimal-context coding approach. - -**Usage:** -``` -/opsx-ralph-start -``` - -**Prerequisites:** -- A `plan.json` in the active change (created by `/opsx-plan-to-issues`) - -**What it does per iteration:** - -1. **Reads plan.json** — finds the next task with `"status": "pending"` -2. **Sets status to `"in_progress"`** in plan.json -3. **Reads ONLY the referenced spec section** — uses `spec_ref` to load just the relevant requirement, NOT the entire spec file -4. **Implements the task** — following acceptance criteria and coding standards -5. **Verifies** — checks acceptance criteria are met -6. **Updates progress:** - - Sets task to `"completed"` in plan.json - - Checks off boxes in tasks.md - - Closes the GitHub issue with a summary comment - - Updates the tracking issue checklist -7. **Loops** — picks up the next pending task, or stops if all done - -**Why minimal context matters:** - -Each iteration loads only: -- `plan.json` (the task list — typically 1-2 KB) -- One spec section via `spec_ref` (the specific requirement — a few paragraphs) -- The affected files - -It does NOT load: -- proposal.md -- design.md -- Other spec files -- The full tasks.md - -This prevents context window bloat and keeps each iteration fast and focused. - -**Resuming after interruption:** - -If the loop is interrupted (context limit, error, etc.), simply run `/opsx-ralph-start` again. It reads `plan.json`, finds the first non-completed task, and continues from there. - ---- - -### `/opsx-ralph-review` *(not yet implemented)* - -**Phase:** Review - -Verifies the completed implementation against all spec requirements and shared conventions. Creates a structured review report. - -**Usage:** -``` -/opsx-ralph-review -``` - -**Prerequisites:** -- All tasks in plan.json should be `"completed"` - -**What it does:** - -1. **Loads full context** — proposal, all delta specs, tasks, plan.json -2. **Checks completeness:** - - All tasks completed? - - All GitHub issues closed? - - All task checkboxes checked? -3. **Checks spec compliance:** - - For each ADDED requirement: does the implementation exist? - - For each MODIFIED requirement: is the old behavior changed? - - For each REMOVED requirement: is the deprecated code gone? - - Do GIVEN/WHEN/THEN scenarios match the code behavior? -4. **Cross-references shared specs:** - - `nextcloud-app/spec.md` — correct app structure, DI, route ordering - - `api-patterns/spec.md` — URL patterns, CORS, error responses - - `nl-design/spec.md` — design tokens, accessibility - - `docker/spec.md` — environment compatibility -5. **Categorizes findings:** - - **CRITICAL** — Spec MUST/SHALL requirement not met - - **WARNING** — SHOULD requirement not met or partial compliance - - **SUGGESTION** — Improvement opportunity -6. **Generates `review.md`** in the change directory -7. **Creates GitHub Issue** if CRITICAL/WARNING findings exist - -**Output example:** -``` -Review: add-search -Tasks completed: 5/5 -GitHub issues closed: 5/5 -Spec compliance: PASS (with warnings) - -Findings: -- 0 CRITICAL -- 2 WARNING - - Missing CORS headers on /api/search (api-patterns spec) - - No pagination metadata in response (api-patterns spec) -- 1 SUGGESTION - - Consider adding rate limiting - -Review saved: openspec/changes/add-search/review.md -GitHub issue created: #48 [Review] add-search: 0 critical, 2 warnings -``` - ---- - -## OpenSpec CLI Commands - -These are terminal commands (not Claude slash commands) for managing specs directly. - -| Command | Description | -|---------|-------------| -| `openspec init --tools claude` | Initialize OpenSpec in a project | -| `openspec list --changes` | List all active changes | -| `openspec list --specs` | List all specs | -| `openspec show ` | View details of a change or spec | -| `openspec status --change ` | Show artifact completion status | -| `openspec validate --all` | Validate all specs and changes | -| `openspec validate --strict` | Strict validation (errors on warnings) | -| `openspec update` | Regenerate AI tool config after CLI upgrade | -| `openspec schema which` | Show which schema is being used | -| `openspec config list` | Show all configuration | - -Add `--json` to any command for machine-readable output. - ---- - -## Testing Commands - -For detailed guidance on when to use each command, typical testing workflows, and situational advice, see [testing.md](testing.md). - -> **Note on agentic browser testing:** `/test-app`, `/test-counsel`, and `/feature-counsel` use Playwright MCP browsers to explore live applications. Results may include false positives (elements not found due to timing) or false negatives (bugs missed due to exploration order). Always verify critical findings manually. - ---- - -### `/test-app` - -**Phase:** Testing - -Run automated browser tests for any Nextcloud app in this workspace. Explores every page, button, and form guided by the app's documentation and specs. - -**Usage:** -``` -/test-app -/test-app procest -``` - -**Modes:** -- **Quick (1 agent)** — One agent walks through the entire app. Fast, good for smoke testing. Low cap impact. -- **Full (6 agents)** — Six parallel agents each with a different perspective: Functional, UX, Performance, Accessibility, Security, API. More thorough. High cap impact. - -**What it does:** -1. Selects the app (from argument or prompt) -2. Chooses Quick or Full mode -3. Checks `{APP}/test-scenarios/` for active scenarios — asks whether to include them -4. Reads `{APP}/docs/features/` to understand what to test -5. Asks which model to use for agents (Haiku default, Sonnet, or Opus) -6. Launches agents, each reading docs, logging in, and testing from their perspective -7. Agents execute any included test scenario steps before free exploration -8. Writes per-perspective results to `{APP}/test-results/` and a summary to `{APP}/test-results/README.md` - -**Model:** Asked at run time (applies to all sub-agents). **Haiku** (default) — fastest, lowest quota cost. **Sonnet** — more nuanced analysis, larger context window. **Opus** — deepest coverage; significant quota cost in Full mode. See [parallel-agents.md](parallel-agents.md) for context window sizes, subscription quota limits, and how they differ. - -**Cap impact:** See [parallel-agents.md](parallel-agents.md). - ---- - -### `/test-counsel` - -**Phase:** Testing - -Test a Nextcloud app from 8 persona perspectives simultaneously: Henk, Fatima, Sem, Noor, Annemarie, Mark, Priya, Jan-Willem. - -**Usage:** -``` -/test-counsel -``` - -**What it does:** -- Launches 8 parallel browser agents — one per persona (model is user-selected at run time; Haiku is the default) -- Each agent reads its persona card and relevant test scenarios before testing -- Tests from the perspective of that persona's role, technical level, and priorities -- Produces a combined report with findings per persona -- Writes results to `{APP}/test-results/` - -**Model:** Asked at run time (applies to all 8 agents). **Haiku** (default) — fastest, lowest quota cost. **Sonnet** — more nuanced persona findings, larger context window. **Opus** — deepest analysis; significant quota cost with 8 agents. See [parallel-agents.md](parallel-agents.md) for context window sizes, subscription quota limits, and how they differ. - -**Cap impact:** Very high — 8 parallel agents. Open a fresh Claude window before running. See [parallel-agents.md](parallel-agents.md). - ---- - -### `/feature-counsel` - -**Phase:** Discovery / Ideation - -Analyse a Nextcloud app's OpenSpec from 8 persona perspectives and suggest additional features or improvements. - -**Usage:** -``` -/feature-counsel -``` - -**What it does:** -- Reads the app's OpenSpec, specs, and existing features -- Each of the 8 personas considers what's missing from their perspective -- Produces a consolidated list of suggested features and improvements -- Does not test the live app — reads specs and docs only - -**Model:** Asked at run time (applies to all 8 agents). No browser required — agents read specs and docs only. **Sonnet** (default) — recommended; no context window concern without browser snapshots, and better reasoning produces more useful suggestions. **Haiku** — faster, lower quota, good for a quick broad pass. **Opus** — deepest reasoning for complex architectural gaps; use with full mode (8 agents) sparingly. - -**Cap impact:** Very high — 8 parallel agents. See [parallel-agents.md](parallel-agents.md). - ---- - -### Commands (Single-Agent) - ---- - -### `/test-functional` - -**Phase:** Testing - -Feature correctness via browser — executes GIVEN/WHEN/THEN scenarios from specs against the live app. - -**Usage:** -``` -/test-functional -``` - ---- - -### `/test-api` - -**Phase:** Testing - -REST API endpoint testing. Checks endpoints, authentication, pagination, and error responses. - -**Usage:** -``` -/test-api -``` - ---- - -### `/test-accessibility` - -**Phase:** Testing - -WCAG 2.1 AA compliance using axe-core, plus manual keyboard and focus checks. - -**Usage:** -``` -/test-accessibility -``` - ---- - -### `/test-performance` - -**Phase:** Testing - -Load times, API response times, and network request analysis via browser timing APIs. - -**Usage:** -``` -/test-performance -``` - ---- - -### `/test-security` - -**Phase:** Testing - -OWASP Top 10, Nextcloud roles, authorization, XSS, CSRF, sensitive data exposure. - -**Usage:** -``` -/test-security -``` - ---- - -### `/test-regression` - -**Phase:** Testing - -Cross-feature regression — verifies changes don't break unrelated flows. - -**Usage:** -``` -/test-regression -``` - ---- - -### `/test-persona-*` - -**Phase:** Testing - -Single-persona deep dive. Use when you want one persona's full assessment without launching all eight: - -| Command | Persona | Role | -|---------|---------|------| -| `/test-persona-henk` | **Henk Bakker** | Elderly citizen — low digital literacy | -| `/test-persona-fatima` | **Fatima El-Amrani** | Low-literate migrant citizen | -| `/test-persona-sem` | **Sem de Jong** | Young digital native | -| `/test-persona-noor` | **Noor Yilmaz** | Municipal CISO / functional admin | -| `/test-persona-annemarie` | **Annemarie de Vries** | VNG standards architect | -| `/test-persona-mark` | **Mark Visser** | MKB software vendor | -| `/test-persona-priya` | **Priya Ganpat** | ZZP developer / integrator | -| `/test-persona-janwillem` | **Jan-Willem van der Berg** | Small business owner | - -**Usage:** -``` -/test-persona-henk -/test-persona-priya -``` - -**Use when:** You know which persona is most affected by a change, or when you've run `/test-counsel` and want a deeper single-perspective follow-up. One agent instead of eight — lower cap cost. - -**Cap impact:** Low — single agent. See [parallel-agents.md](parallel-agents.md). - ---- - -## Test Scenario Commands - -Test scenarios are reusable, Gherkin-style descriptions of user journeys that can be executed by any test command. They live in `{APP}/test-scenarios/TS-NNN-slug.md` and are automatically discovered by `/test-app`, `/test-counsel`, and `/test-persona-*` when they run. - -> **Test scenario vs test case**: A scenario is a high-level, user-centered description of *what* to verify and *for whom* — one concrete flow, written in Given-When-Then. It is broader than a click-by-click test case but more specific than a spec requirement. - ---- - -### `/test-scenario-create` - -**Phase:** Testing - -Guided wizard for creating a well-structured test scenario for a Nextcloud app. - -**Usage:** -``` -/test-scenario-create -/test-scenario-create openregister -``` - -**What it does:** -1. Determines the next ID (`TS-NNN`) by scanning existing scenarios -2. Asks for title, goal, category (functional/api/security/accessibility/performance/ux/integration), and priority -3. Shows relevant personas and asks which this scenario targets -4. Suggests which test commands should automatically include it -5. Auto-suggests tags based on category and title -6. Guides through Gherkin steps (Given/When/Then), test data, and acceptance criteria -7. Generates persona-specific notes for each linked persona -8. Saves to `{APP}/test-scenarios/TS-NNN-slug.md` - -**Scenario categories and suggested personas:** - -| Category | Suggested personas | -|---|---| -| functional | Mark Visser, Sem de Jong | -| api | Priya Ganpat, Annemarie de Vries | -| security | Noor Yilmaz | -| accessibility | Henk Bakker, Fatima El-Amrani | -| ux | Henk Bakker, Jan-Willem, Mark Visser | -| performance | Sem de Jong, Priya Ganpat | -| integration | Priya Ganpat, Annemarie de Vries | - ---- - -### `/test-scenario-run` - -**Phase:** Testing - -Execute one or more test scenarios against the live Nextcloud environment using a browser agent. - -**Usage:** -``` -/test-scenario-run # list and choose -/test-scenario-run TS-001 # run specific scenario -/test-scenario-run openregister TS-001 # run from specific app -/test-scenario-run --tag smoke # run all smoke-tagged scenarios -/test-scenario-run --all openregister # run all scenarios for an app -/test-scenario-run --persona priya-ganpat # run all Priya's scenarios -``` - -**What it does:** -1. Discovers scenario files in `{APP}/test-scenarios/` -2. Filters by tag, persona, or ID as specified -3. Asks which environment to test against (local or custom URL) -4. Asks whether to use Haiku (default, cost-efficient) or Sonnet (for complex flows) -5. Launches a browser agent per scenario (parallelised up to 5 for multiple) -6. Agent verifies preconditions, follows Given-When-Then steps, checks each acceptance criterion -7. Writes results to `{APP}/test-results/scenarios/` -8. Synthesises a summary report for multiple runs - -**Model:** Asked at run time. **Haiku** (default) — fast, cost-efficient. **Sonnet** — for complex multi-step flows or ambiguous UI states where Haiku may misread the interface. Cap cost scales with the number of scenarios run in parallel. - -**Cap impact:** Low for single scenario; medium for multiple. See [parallel-agents.md](parallel-agents.md). - -**Result statuses**: ✅ PASS / ❌ FAIL / ⚠️ PARTIAL / ⛔ BLOCKED - ---- - -### `/test-scenario-edit` - -**Phase:** Testing - -Edit an existing test scenario — update any field (metadata or content) interactively. - -**Usage:** -``` -/test-scenario-edit # list all scenarios, pick one -/test-scenario-edit TS-001 # open specific scenario -/test-scenario-edit openregister TS-001 # open from specific app -``` - -**What it does:** -1. Locates the scenario file -2. Shows a summary of current values (status, priority, category, personas, tags, spec refs) -3. Asks what scope to edit: metadata only / content only / both / status only / tags only -4. Walks through each field in scope, showing the current value and asking for the new one -5. Supports `+tag` / `-tag` syntax for incremental tag changes, same for personas -6. Regenerates persona notes if the personas list changed -7. Optionally renames the file if the title changed -8. Writes the updated file and shows a diff-style summary - ---- - -### How existing test commands use scenarios - -| Command | Behaviour when scenarios exist | -|---|---| -| `/test-app` | Asks to include active scenarios before launching agents. Agents execute scenario steps before free exploration. | -| `/test-counsel` | Asks to include scenarios, grouped by persona. Each persona agent receives only the scenarios tagged with their slug. | -| `/test-persona-*` | Scans for scenarios matching that persona's slug. Asks to run them before free exploration in Step 2. | - ---- - -## Tender & Ecosystem Intelligence Commands - -These commands support the competitive analysis and ecosystem gap-finding workflow. They operate on the `concurrentie-analyse/intelligence.db` SQLite database and require the database to exist before running. - ---- - -### `/tender-scan` - -**Phase:** Intelligence Gathering - -Scrape TenderNed for new tenders, import them into SQLite, and classify unclassified tenders by software category using a local Qwen model. - -**Usage:** -``` -/tender-scan -``` - -**What it does:** -1. Runs `concurrentie-analyse/tenders/scrape_tenderned.py` to fetch fresh data -2. Imports new tenders into the intelligence database -3. Classifies unclassified tenders using Qwen via `localhost:11434` -4. Reports new tenders found, classified, and any new gaps detected - -**Requires:** Local Qwen model running on Ollama (`http://localhost:11434`) - ---- - -### `/tender-status` - -**Phase:** Intelligence Monitoring - -Show a dashboard of the tender intelligence database — totals by source, category, status, gaps, and recent activity. - -**Usage:** -``` -/tender-status -``` - -**What it does:** -- Queries `concurrentie-analyse/intelligence.db` for live stats -- Shows tenders by source, status, and category (top 15) -- Highlights categories with Conduction coverage vs gaps -- Shows top integration systems and ecosystem gaps - -**Model:** Checked at run time when invoked standalone — stops if on Opus (no reasoning needed, wastes quota), warns if on Sonnet and offers to switch. **Haiku** is the right fit for this task. Model check is skipped when this skill is called from within another skill. - ---- - -### `/tender-gap-report` - -**Phase:** Gap Analysis - -Generate a gap analysis report — software categories that appear in government tenders but have no Conduction product. - -**Usage:** -``` -/tender-gap-report -``` - -**What it does:** -1. Queries the database for categories with tenders but no `conduction_product` -2. Generates a markdown report at `concurrentie-analyse/reports/gap-report-{date}.md` -3. Includes top 5 gaps with tender details, organisations, and key requirements -4. Cross-references with `application-roadmap.md` to flag already-tracked gaps -5. Recommends which gaps to investigate first - ---- - -### `/ecosystem-investigate ` - -**Phase:** Competitive Research - -Deep-dive research into a software category — find and analyze open-source competitors using GitHub, G2, Capterra, AlternativeTo, and TEC. - -**Usage:** -``` -/ecosystem-investigate bookkeeping -``` - -**What it does:** -1. Loads category context and related tenders from the intelligence database -2. Uses the browser pool (browser-1 through browser-5) to scrape 5-10 competitors from multiple source types -3. Creates competitor profiles in `concurrentie-analyse/{category}/{competitor-slug}/` -4. Inserts competitors and feature data into the database with provenance tracking -5. Presents a comparison table and recommendation for Nextcloud ecosystem fit - -**Model:** Checked at run time — stops if on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for most categories. **Opus** for high-stakes or complex categories where strategic depth matters. - ---- - -### `/ecosystem-propose-app ` - -**Phase:** Product Planning - -Generate a full app proposal for a software category gap, using tender requirements and competitor research as input. - -**Usage:** -``` -/ecosystem-propose-app bookkeeping -``` - -**What it does:** -1. Gathers all tenders, requirements, competitors, and integrations for the category -2. Generates a structured proposal following the template in `concurrentie-analyse/application-roadmap.md` -3. Appends the proposal to `application-roadmap.md` -4. Inserts the proposal into the `app_proposals` database table -5. Optionally bootstraps the app with `/app-create` - -**Model:** Checked at run time — stops if on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for most proposals. **Opus** for high-stakes proposals where architectural fit and market analysis need extra depth. - ---- - -### `/intelligence-update [source]` - -**Phase:** Intelligence Maintenance - -Pull latest data from external sources into the intelligence database. Syncs sources that are past their scheduled interval. - -**Usage:** -``` -/intelligence-update # sync all sources that are due -/intelligence-update all # force sync every source -/intelligence-update wikidata-software # sync one specific source -``` - -**Sources and intervals:** - -| Source | Interval | -|--------|----------| -| `tenderned` | 24h | -| `wikidata-software` | 7 days | -| `wikipedia-comparisons` | 7 days | -| `awesome-selfhosted` | 7 days | -| `github-issues` | 7 days | -| `dpg-registry` | 7 days | -| `developers-italia` | 7 days | -| `gemma-release` | yearly | - -**What it does:** -1. Checks `source_syncs` table for overdue sources -2. Runs `concurrentie-analyse/scripts/sync/sync_{source}.py` for each -3. Updates sync status, records count, and error messages -4. Displays a summary table of all sources with their sync status - -**Model:** Checked at run time when invoked standalone — stops if on Opus (no reasoning needed, wastes quota), warns if on Sonnet and offers to switch. **Haiku** is the right fit for this task. Model check is skipped when this skill is called from within another skill. - ---- - -### Tender Intelligence Workflow - -``` -/tender-scan (fetch & classify new tenders) - │ - ▼ -/tender-status (review dashboard) - │ - ▼ -/tender-gap-report (identify gaps) - │ - ▼ -/ecosystem-investigate (research competitors for top gap) - │ - ▼ -/ecosystem-propose-app (generate app proposal) - │ - ▼ -/app-design (design the new app) -``` - -**Keep data fresh:** Run `/intelligence-update` weekly and `/tender-scan` daily to keep the database current. - ---- - -## Command Flow Cheat Sheet - -``` -/opsx-explore (optional: investigate first) - │ - ▼ -/opsx-new (start a change) - │ - ▼ -/opsx-ff (generate all specs at once) - │ OR -/opsx-continue (generate specs one by one) - │ - ▼ - [Human review & edit specs] - │ - ▼ -/feature-counsel (optional: 8-persona feedback on specs) - │ - ▼ -/opsx-plan-to-issues (optional: tasks → JSON + GitHub Issues) - │ - ▼ -/opsx-apply (implement tasks) - │ - ▼ -/opsx-verify (verify implementation against specs) - │ - ▼ -/test-functional (confirm feature behaves as specced) -/test-counsel (user acceptance — all 8 personas) -/test-app (optional: full technical sweep) - │ - ▼ -/create-pr (create PR on GitHub) - │ - ▼ -/opsx-archive (complete & preserve) -``` - -See [testing.md](testing.md) for situational testing guidance and recommended testing order. - -For the app lifecycle flow (`/app-design` → `/app-create` → `/app-explore` → `/app-apply` → `/app-verify`), see [app-lifecycle.md](app-lifecycle.md). +# Command Reference + +Complete reference for all commands available in the spec-driven development workflow. + +## OpenSpec Built-in Commands + +These commands are installed per-project when you run `openspec init`. They're available inside each project directory. + +--- + +### `/opsx-new ` + +**Phase:** Spec Building + +Start a new change. Creates the change directory with metadata. + +**Usage:** +``` +/opsx-new add-publication-search +``` + +**What it creates:** +``` +openspec/changes/add-publication-search/ +└── .openspec.yaml # Change metadata (schema, created date) +``` + +**Tips:** +- Use descriptive kebab-case names: `add-dark-mode`, `fix-cors-headers`, `refactor-object-service` +- The name becomes a GitHub Issue label, so keep it readable + +--- + +### `/opsx-ff` + +**Phase:** Spec Building + +Fast-forward: generates ALL artifacts in dependency order (proposal → specs → design → tasks) in one go. + +**Usage:** +``` +/opsx-ff +``` + +**What it creates:** +``` +openspec/changes/add-publication-search/ +├── .openspec.yaml +├── proposal.md # Why & what +├── specs/ # Delta specs (ADDED/MODIFIED/REMOVED) +│ └── search/ +│ └── spec.md +├── design.md # How (technical approach) +└── tasks.md # Implementation checklist +``` + +**When to use:** When you have a clear idea of what you want to build and want to generate everything quickly for review. + +**When NOT to use:** When you want to iterate on each artifact step by step, getting feedback between each. Use `/opsx-continue` instead. + +**Model:** Asked at run time — the command asks which model to use and spawns a subagent with that model for artifact generation. Artifact quality (specs, design, tasks) directly determines implementation quality downstream. **Sonnet** for most changes. **Opus** for complex or architectural changes where deeper reasoning improves the design. + +--- + +### `/opsx-continue` + +**Phase:** Spec Building + +Creates the next artifact in the dependency chain. Run repeatedly to build specs incrementally. + +**Usage:** +``` +/opsx-continue # Creates proposal.md (first time) +/opsx-continue # Creates specs/ (second time) +/opsx-continue # Creates design.md (third time) +/opsx-continue # Creates tasks.md (fourth time) +``` + +**Dependency chain:** +``` +proposal (root) + ├── discovery (optional — requires: proposal) + ├── contract (optional — requires: proposal) + ├── specs (requires: proposal) + ├── design (requires: proposal) + ├── migration (optional — requires: design) + ├── test-plan (optional — requires: specs) + └── tasks (requires: specs + design) +``` + +**When to use:** When you want to review and refine each artifact before proceeding to the next. + +--- + +### `/opsx-explore` + +**Phase:** Pre-spec + +Think through ideas and investigate the codebase before starting a formal change. No artifacts are created. + +**Usage:** +``` +/opsx-explore +``` + +**When to use:** When you're not sure what approach to take yet and want to investigate first. + +**Comparison with `/app-explore`:** + +| | `/opsx-explore` | `/app-explore` | +|---|---|---| +| **Scope** | Any topic — a change, a bug, an idea | A specific Nextcloud app's configuration | +| **Output** | None — thinking only | Writes to `openspec/app-config.json` | +| **When to use** | Before starting a change (`/opsx-new`) when requirements are unclear | When designing or refining an app's goals, architecture, and features | +| **Phase** | Pre-spec | Design / Configuration | + +Use `/opsx-explore` to think through *what to build*. Use `/app-explore` to document *how an app is designed and configured*. + +**Model:** Checked at run time — stops on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for most exploration sessions. ✅ **Opus** recommended — complex analysis, architecture decisions, and strategic thinking benefit from stronger reasoning. + +--- + +### `/opsx-apply` + +**Phase:** Implementation + +OpenSpec's built-in implementation command. Reads `tasks.md` and works through tasks. + +**Usage:** +``` +/opsx-apply +``` + +**Note:** `/opsx-ralph-start` (not yet built) is planned as a dedicated implementation loop with minimal context loading and deeper GitHub Issues integration. For now, use this command — it already supports `plan.json` and GitHub Issues when a `plan.json` exists. + +**Model:** Checked at run time — stops if on Haiku. **Sonnet** for most implementation work. **Opus** for architecturally complex changes. + +--- + +### `/opsx-verify` + +**Phase:** Review + +OpenSpec's built-in verification. Validates implementation against artifacts. + +**Usage:** +``` +/opsx-verify +``` + +**Checks:** +- **Completeness** — All tasks done, all requirements implemented +- **Correctness** — Implementation matches spec intent +- **Coherence** — Design decisions reflected in code +- **Test coverage** — Every new PHP service/controller has a corresponding test file; every new Vue component has a test if the project uses Jest/Vitest +- **Documentation** — New features and API endpoints are described in README.md or docs/ + +**Note:** `/opsx-ralph-review` (not yet built) is planned as a dedicated review command that cross-references shared specs and creates GitHub Issues for findings. For now, use this command — it already supports GitHub Issues sync via `plan.json` when present. + +**Model:** Checked at run time — stops if on Haiku. **Sonnet** for most verification work. **Opus** for complex or large changes. + +--- + +### `/opsx-sync` + +**Phase:** Archive + +Merges delta specs from the change into the main `openspec/specs/` directory. + +**Usage:** +``` +/opsx-sync +``` + +**What it does:** +- **ADDED** requirements → appended to main spec +- **MODIFIED** requirements → replace existing in main spec +- **REMOVED** requirements → deleted from main spec + +Usually done automatically during archive. + +--- + +### `/sync-docs` + +**Phase:** Maintenance + +Check and sync documentation to reflect the current project state. Two targets: **app docs** (`{app}/docs/`) for a specific Nextcloud app's users and admins, and **dev docs** (`.claude/docs/`) for Claude and developers. + +**Usage:** +``` +/sync-docs # prompts for target +/sync-docs app # prompts for which app, then syncs its docs/ +/sync-docs app openregister # sync docs for a specific app +/sync-docs dev # sync developer/Claude docs (.claude/docs/) +``` + +Before syncing, runs 4 preliminary checks in parallel — config.yaml rules vs writing-docs.md/writing-specs.md, Sources of Truth accuracy, writing-specs.md vs schema template alignment (`.claude/openspec/schemas/conduction/`), and forked schema drift from the upstream `spec-driven` schema. Reports gaps and asks whether to fix before proceeding. + +**App docs mode** (`{app}/docs/`) — checks the app's `README.md` (root), `docs/features/`, `docs/ARCHITECTURE.md`, `docs/FEATURES.md`, `docs/GOVERNMENT-FEATURES.md`, and any other user-facing `.md` files against the app's current specs. Also loads all company-wide ADRs from `apps-extra/.claude/openspec/architecture/` and any app-level ADRs as auditing context (never as link targets in app docs). Flags outdated descriptions, missing features, stale `[Future]` markers (with full removal checklist), broken links, duplicated content, writing anti-patterns, ADR compliance gaps (screenshots, i18n, API conventions), and missing GEMMA/ZGW/Forum Standaardisatie standards references. Never inserts links into `.claude/` paths. Always shows a diff and asks for confirmation before writing. + +**Dev docs mode** (`.claude/docs/`) — checks `commands.md`, `workflow.md`, `writing-specs.md`, `writing-docs.md`, `testing.md`, `getting-started.md`, `README.md`, plus the conduction schema (`.claude/openspec/schemas/conduction/schema.yaml`) and its `templates/spec.md` for alignment with `writing-specs.md`. Never changes intent without user confirmation. After syncing, runs a Phase 6 review of all commands and skills for stale references, outdated instructions, and redundant inline content — and asks whether to update them. + +Both modes enforce the [Documentation Principles](writing-docs.md) — duplication and wrong-audience content are flagged as issues, with direct links to the relevant writing-docs.md sections. + +**When to use:** After a significant batch of changes — new commands, archived features, updated specs, or structural changes to the project. + +--- + +### `/opsx-archive` + +**Phase:** Archive + +Complete a change and preserve it for the historical record. + +**Usage:** +``` +/opsx-archive +``` + +**What it does:** +1. Checks artifact and task completion +2. Syncs delta specs into main specs (if not already done) +3. Moves the change to `openspec/changes/archive/YYYY-MM-DD-/` +4. All artifacts are preserved for audit trail +5. Updates or creates `docs/features/.md` — creates it if no matching feature doc exists +6. Updates the feature overview table in `docs/features/README.md` (creates the file if it doesn't exist) +7. Creates or updates `CHANGELOG.md` — completed tasks become versioned entries (version from `app-config.json`); uses [Keep a Changelog](https://keepachangelog.com/) format + +--- + +### `/opsx-bulk-archive` + +**Phase:** Archive + +Archive multiple completed changes at once. + +**Usage:** +``` +/opsx-bulk-archive +``` + +**When to use:** When you have several changes that are all complete and want to clean up. + +--- + +### `/opsx-apply-loop` + +**Phase:** Full Lifecycle (experimental) + +Automated apply→verify loop for a single change in a specific app. Runs the implementation loop inside an isolated Docker container, optionally runs targeted tests on the host, then archives and syncs to GitHub. + +**Usage:** +``` +/opsx-apply-loop # asks which app + change to run +/opsx-apply-loop procest add-sla-tracking # run a specific app/change +/opsx-apply-loop openregister seed-data # run in a different app +``` + +**What it does:** +1. Selects app and change (scans across all apps, or uses provided arguments) +2. Checks for a GitHub tracking issue (runs `/opsx-plan-to-issues` first if missing) +3. Creates a `feature//` branch in the app's git repo +4. Checks the Nextcloud environment is running +5. Reads `test-plan.md` (if present) and classifies which test commands to include in the loop +6. Asks whether to include a test cycle (tests run **outside the container** against the live Nextcloud app) +7. Builds and starts an isolated Docker container — mounts the app directory + shared `.claude/` skills (read-only); no git, no GitHub +8. Inside the container: runs `/opsx-apply` → `/opsx-verify` in a loop (max 5 iterations) + - CRITICAL issues retrigger the loop; WARNING issues also retrigger but never block archive + - At max iterations with only warnings remaining, archive still proceeds + - Seed data (ADR-016) is created/updated during apply as required +9. Captures container logs to `.claude/logs/`, then removes container +10. **If test cycle enabled:** runs targeted single-agent test commands on the host (max 3 test iterations); failures loop back into apply→verify +11. **If test cycle enabled and deferred tests exist:** asks about multi-agent/broad tests from the test-plan that were excluded from the loop; runs them once if confirmed, with one final apply→verify if they fail +12. Runs `/opsx-archive` on the host (after tests pass or tests skipped) +13. Commits all changes in the app repo with a generated commit message +14. Syncs GitHub: updates issue checkboxes, posts a completion comment, prompts to close +15. Asks about test scenario conversion (deferred from archive) +16. Shows a final report with iterations used, tasks completed, and what's next + +**When to use:** When you want hands-off implementation of a single change in one app. Prefer `/opsx-pipeline` for running multiple changes across apps in parallel. + +**Container design:** The container mounts the app directory at `/workspace` and the shared `.claude/` at `/workspace/.claude` (read-only). This gives the container's Claude session access to all shared skills without requiring git or GitHub. Each app is isolated — the container only touches one app directory. + +**Container limitations:** GitHub operations, `docker compose exec`, browser tests, and git commands are not available inside the container — all handled on the host after the container exits. Tests always run on the host against the live Nextcloud environment. + +**Cap impact:** High — runs apply + verify sequentially (up to 5 iterations), optionally followed by targeted tests (up to 3 test iterations). Each iteration is a full implementation + verification pass. + +**Model:** Sonnet recommended for most changes; Opus for complex architectural work. Asked at run time. + +**Requires:** +- Docker running +- `gh` CLI authenticated on the host +- Nextcloud containers up (auto-started if not running — uses `docker compose -f` pointed at the docker-dev root's `.github/docker-compose.yml`) +- **Container authentication** — the Docker container cannot use interactive OAuth, so it needs an explicit token. One of these environment variables must be set in your shell (see [Getting Started — Container authentication](getting-started.md#prerequisites) for full setup instructions): + 1. `CLAUDE_CODE_AUTH_TOKEN` (preferred) — uses your existing Claude Max/Pro subscription at no extra cost. Generate with `claude setup-token`, then `export CLAUDE_CODE_AUTH_TOKEN="..."` in `~/.bashrc`. + 2. `ANTHROPIC_API_KEY` (fallback) — uses prepaid API credits from console.anthropic.com (costs money). `export ANTHROPIC_API_KEY="sk-ant-api03-..."` in `~/.bashrc`. + +--- + +### `/opsx-pipeline` + +**Phase:** Full Lifecycle (experimental) + +Process one or more OpenSpec changes through the full lifecycle in parallel — each change gets its own subagent, git worktree, feature branch, and PR. + +**Usage:** +``` +/opsx-pipeline all # process all open proposals across all repos +/opsx-pipeline procest # all open proposals in one app +/opsx-pipeline sla-tracking routing # specific changes by name +``` + +**What it does:** +1. Discovers open proposals (changes with `proposal.md` but not yet archived) +2. Presents an execution plan and asks for confirmation +3. Creates a git worktree and feature branch per change +4. Launches up to 5 parallel subagents — each runs ff → apply → verify → archive +5. Monitors progress and queues remaining changes as slots free up +6. Creates a PR per completed change to `development` +7. Reports full results including tasks completed, quality checks, and PR links + +**Subagent lifecycle per change:** +``` +ff (artifacts) → plan-to-issues → apply (implement + tests + docs) → verify → archive → push + PR +``` + +**When to use:** When you have multiple open proposals ready to implement and want to run them hands-off. + +**Cap impact:** High — up to 5 agents running full implementations in parallel. Each agent may run for 10-30 minutes depending on change complexity. + +**Model:** Asked at run time with three options: one model for all sub-agents, choose per change, or auto-select by reading each proposal. **Haiku** for simple changes (config, text, minor fixes). **Sonnet** for standard feature work. **Opus** for complex architectural changes. The model applies per implementation sub-agent — choose based on change complexity and available quota. + +**Requires:** `gh` CLI authenticated; quality checks must pass per app (`composer check:strict` / `npm run lint`) + +--- + +### `/opsx-onboard` + +**Phase:** Setup + +Get an overview of the current project's OpenSpec setup and active changes. + +**Usage:** +``` +/opsx-onboard +``` + +--- + +## App Management Commands + +Commands for creating, configuring, and maintaining Nextcloud apps. These work together in a lifecycle: `/app-design` → `/app-create` → `/app-explore` → `/app-apply` → `/app-verify`. + +For a full guide on the lifecycle, when to use each command, and how they relate to the OpenSpec workflow, see [App Lifecycle](app-lifecycle.md). + +--- + +### `/app-design` + +**Phase:** Setup / Design + +Full upfront design for a new Nextcloud app — architecture research, competitor analysis, feature matrix, ASCII wireframes, and OpenSpec setup. Run this **before** `/app-create` for brand-new apps. + +**Usage:** +``` +/app-design +/app-design my-new-app +``` + +**What it does:** +1. Researches the problem domain and existing solutions +2. Produces architecture decisions, feature matrix, and ASCII wireframes +3. Sets up the `openspec/` structure with initial design docs + +**Output:** Design documentation ready to hand off to `/app-create`. + +**Model:** Checked at run time — stops on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for general design sessions. ✅ **Opus** recommended — competitive research, architecture decisions, and full design document creation benefit from stronger reasoning. + +--- + +### `/app-create` + +**Phase:** Setup + +Bootstrap a new Nextcloud app from the ConductionNL template, or onboard an existing repo. Always creates an `openspec/` configuration folder that tracks all app decisions. + +**Usage:** +``` +/app-create +/app-create my-new-app +``` + +**What it does:** +1. Asks whether a local folder already exists — if yes, uses it as the base; if no, clones the template +2. Collects basic identity: app ID, name, goal, one-line summary, Nextcloud category +3. Asks about dependencies (OpenRegister, additional CI apps) +4. Creates `openspec/app-config.json` and `openspec/README.md` +5. Replaces all template placeholders (`AppTemplate`, `app-template`, etc.) across all files +6. Creates the GitHub repository and branches (`main`, `development`, `beta`) +7. Optionally sets branch protection and team access +8. Optionally installs dependencies and enables the app in the local Nextcloud environment + +**Output:** Fully scaffolded app directory with correct identity, CI/CD workflows, and GitHub repo. + +**Requires:** `gh` CLI authenticated (`gh auth login`) + +--- + +### `/app-explore` + +**Phase:** Design / Configuration + +Enter exploration mode for a Nextcloud app. Think through its goals, architecture, features, and Architectural Decision Records (ADRs). Updates `openspec/` files to capture decisions. + +**Usage:** +``` +/app-explore +/app-explore openregister +``` + +**What it does:** +- Loads `openspec/app-config.json` for full context +- Acts as a **thinking partner** — draws diagrams, asks questions, challenges assumptions +- Captures decisions into `openspec/app-config.json` +- Never writes application code — only `openspec/` files + +**Feature lifecycle:** +``` +idea → planned → in-progress → done +``` +When a feature moves to `planned` (has user stories + acceptance criteria), suggests `/opsx-ff {feature-name}` to create an OpenSpec change from it. + +**Important:** Run this before implementing anything. Features defined here become the inputs for `/opsx-ff`. + +**Model:** Checked at run time — stops on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for general app exploration. ✅ **Opus** recommended — feature strategy, ADRs, and competitive analysis benefit from stronger reasoning. + +--- + +### `/app-apply` + +**Phase:** Configuration + +Applies `openspec/app-config.json` decisions back into the actual app files. The counterpart to `/app-explore`. + +**Usage:** +``` +/app-apply +/app-apply openregister +``` + +**What it does:** +1. Loads `openspec/app-config.json` +2. Compares current file values against config — builds a list of pending changes +3. Shows a clear diff summary of what would change +4. Asks for confirmation before applying any changes +5. Updates only the tracked values in each file (IDs, names, namespaces, CI parameters) — never touches feature code +6. Optionally runs `composer check:strict` to verify PHP changes are clean + +**In scope:** `appinfo/info.xml`, CI/CD workflow parameters, PHP namespaces and app ID constants, `composer.json`/`package.json` names, `webpack.config.js` app ID, `src/App.vue` OpenRegister gate, `README.md` header. + +**Out of scope:** Feature code, business logic, Vue components, PHP controllers. Use `/opsx-ff {feature-name}` for those. + +--- + +### `/app-verify` + +**Phase:** Review / Audit + +Read-only audit. Checks every tracked app file against `openspec/app-config.json` and reports drift — without making any changes. + +**Usage:** +``` +/app-verify +/app-verify openregister +``` + +**What it does:** +- Loads `openspec/app-config.json` and reads every tracked file +- Reports each check as **CRITICAL** (will break CI or runtime), **WARNING** (wrong metadata), or **INFO** (cosmetic drift) +- Shows exact current value vs expected value for every failing check +- Recommends `/app-apply` if issues are found + +**When to use:** After `/app-apply` to confirm changes landed, or at any time to check for drift. + +--- + +### `/clean-env` + +**Phase:** Setup / Reset + +Fully resets the OpenRegister Docker development environment. + +**Usage:** +``` +/clean-env +``` + +**What it does:** +1. Stops all Docker containers from the OpenRegister docker-compose +2. Removes all containers and volumes (full data reset) +3. Starts containers fresh +4. Waits for Nextcloud to become ready +5. Installs core apps: openregister, opencatalogi, softwarecatalog, nldesign, mydash + +**Important:** Destructive — removes all database data and volumes. Only use when a full reset is intended. + +After completion, verify at `http://localhost:8080` (admin/admin). + +**Model:** Checked at run time when invoked standalone — stops if on Opus (no reasoning needed, wastes quota), warns if on Sonnet and offers to switch. **Haiku** is the right fit for this task. Model check is skipped when this skill is called from within another skill. + +--- + +## Team Role Commands + +Specialist agents representing different roles on the development team. Useful for getting a focused perspective on a change — architecture review, QA, product sign-off, etc. + +| Command | Role | Focus | +|---------|------|-------| +| `/team-architect` | Architect | API design, data models, cross-app dependencies | +| `/team-backend` | Backend Developer | PHP implementation, entities, services, tests | +| `/team-frontend` | Frontend Developer | Vue components, state management, UX | +| `/team-po` | Product Owner | Business value, acceptance criteria, priority | +| `/team-qa` | QA Engineer | Test coverage, edge cases, regression risk | +| `/team-reviewer` | Code Reviewer | Standards, conventions, security, code quality | +| `/team-sm` | Scrum Master | Progress tracking, blockers, sprint health | + +**Usage:** +``` +/team-architect # review the API design for the active change +/team-qa # get QA perspective on test coverage +``` + +**Model for `/team-architect`:** Checked at run time — stops if on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Opus** recommended — best multi-framework reasoning across NLGov, BIO2/NIS2, WCAG, Haven, AVG/GDPR. **Sonnet** not recommended — may miss nuances in complex compliance scenarios. + +--- + +## Softwarecatalogus Commands (`/swc:*`) + +Commands specific to the VNG Softwarecatalogus client project. See `Softwarecatalogus/` (never commit to this repo). + +--- + +### `/swc-test` + +**Phase:** Testing + +Run automated tests for the GEMMA Softwarecatalogus — API tests (Postman/Newman), browser tests (persona agents), or both. + +**Usage:** +``` +/swc-test # choose mode interactively +/swc-test api # API tests only +/swc-test browser # browser tests only +/swc-test personas # all 8 persona agents +/swc-test all # everything +``` + +--- + +### `/swc-update` + +**Phase:** Maintenance + +Sync GitHub issues from VNG-Realisatie/Softwarecatalogus, auto-generate acceptance criteria, and update test infrastructure to reflect current issue state. + +**Usage:** +``` +/swc-update +``` + +--- + +## Custom Conduction Commands + +These commands are workspace-level and available from any project within `apps-extra/`. They extend OpenSpec with GitHub Issues integration and Ralph Wiggum loops. + +--- + +### `/create-pr` + +**Phase:** Git / Delivery + +Create a Pull Request from a branch in any repo. Handles the full flow interactively. + +**Usage:** +``` +/create-pr +``` + +**What it does:** +1. **Selects the repository** — scans for available git repos in the workspace, asks you to pick one (never assumes the current directory) +2. **Confirms the source branch** — shows the current branch, lets you override +3. **Recommends a target branch** based on the branching strategy; checks GitHub for an existing open PR on the same branch pair — if found, offers to view or update it instead +4. **Checks for uncommitted or unpushed changes** — if any are found, offers to commit, stash, or continue; offers to push unpushed commits before continuing +5. **Verifies global settings version** *(claude-code-config repo only)* — delegates to `/verify-global-settings-version`; pauses and offers a fix if a VERSION bump is missing +6. **Discovers CI checks from `.github/workflows/`** — reads the repo's workflow files to determine exactly which checks CI will run, then mirrors them locally (never hardcodes a list) +7. **Installs missing dependencies** (`vendor/`, `node_modules/`) if needed before running checks +8. **Runs all discovered checks** — nothing skipped; slow checks (e.g. test suites) ask for confirmation first; shows a pass/fail table when done +9. **Reads all commits and diffs** on the branch to draft a PR title and description from the actual changes +10. **Shows the draft in chat** for review — you can ask to change or shorten it; the loop repeats until you approve +11. **Pushes the branch and creates the PR** via `gh pr create` +12. Reports the PR URL and next steps + +**Branching strategy:** + +| Source | Recommended target | +|---|---| +| `feature/*`, `bugfix/*` | `development` | +| `development` | `beta` | +| `beta` | `main` | +| `hotfix/*` | `main` (or `beta`/`development`) | + +**Model:** Checked at run time — the command reads your active model from context and stops automatically if you're on Haiku (or anything weaker than Sonnet). Involves parsing CI workflows, detecting branch-protection rules, and reasoning about code diffs where mistakes have real consequences. **Sonnet** for most PRs. **Opus** when the repo uses reusable CI workflows, branch-protection rulesets, or a complex branching strategy — that's where it pays off most. + +**Requires:** `gh` CLI authenticated (`gh auth login`) + +--- + +### `/verify-global-settings-version` + +**Phase:** Git / Delivery + +Checks whether `global-settings/VERSION` has been correctly bumped after any changes to files in the `global-settings/` directory. Run this before creating a PR on the `ConductionNL/claude-code-config` repo. + +**Usage:** +``` +/verify-global-settings-version +``` + +**What it does:** +1. Fetches `origin/main` to get the latest published version +2. Diffs `global-settings/` between the current branch and `origin/main` +3. Compares the branch `VERSION` against the `origin/main` `VERSION` +4. Reports one of four outcomes: + - ✅ No changes to `global-settings/` — no bump needed + - ✅ Changes found and `VERSION` correctly bumped + - ❌ Changes found but `VERSION` not bumped — suggests the next semver and the command to apply it + - ⚠️ `VERSION` bumped but no other files changed — flags as unusual + +**When to use:** +- Standalone: any time you modify a file in `global-settings/` and want to confirm the bump is in place before committing +- Automatically: called by `/create-pr` when the selected repo is `ConductionNL/claude-code-config` — no need to run it separately in that flow + +**Semver rules for `global-settings/`:** +- `1.0.0 → 1.1.0` — new permissions, guards, or behavior added +- `1.0.0 → 2.0.0` — breaking change requiring manual migration + +**Model:** Checked at run time when invoked standalone — stops if on Opus (no reasoning needed, wastes quota), warns if on Sonnet and offers to switch. **Haiku** is the right fit for this task. Model check is skipped when this skill is called from within another skill. + +--- + +### `/opsx-plan-to-issues` + +**Phase:** Planning → GitHub + +Converts an OpenSpec change's `tasks.md` into structured `plan.json` and creates corresponding GitHub Issues. + +**Usage:** +``` +/opsx-plan-to-issues +``` + +**Prerequisites:** +- A change with completed `tasks.md` +- GitHub MCP server active or `gh` CLI authenticated +- Git remote pointing to a ConductionNL repository + +**What it does:** + +1. **Finds the active change** in the current project's `openspec/changes/` +2. **Detects the GitHub repo** from `git remote get-url origin` +3. **Parses tasks.md** into structured JSON +4. **Creates GitHub Issues:** + - One **tracking issue** (epic) with: + - Title: `[OpenSpec] ` + - Body: proposal summary + task checklist + - Labels: `openspec`, `tracking` + - One **issue per task** with: + - Title: `[] ` + - Body: description, acceptance criteria, spec ref, affected files + - Labels: `openspec`, `` +5. **Saves `plan.json`** with all issue numbers linked + +**Output example:** +``` +Created tracking issue: https://github.com/ConductionNL/opencatalogi/issues/42 +Created 5 task issues: #43, #44, #45, #46, #47 +Saved plan.json at: openspec/changes/add-search/plan.json + +Run /opsx-ralph-start to begin implementation. +``` + +**The plan.json it creates:** +```json +{ + "change": "add-search", + "project": "opencatalogi", + "repo": "ConductionNL/opencatalogi", + "created": "2026-02-15T10:00:00Z", + "tracking_issue": 42, + "tasks": [ + { + "id": 1, + "title": "Create SearchController", + "description": "Add new controller for search API endpoint", + "github_issue": 43, + "status": "pending", + "spec_ref": "openspec/specs/search/spec.md#requirement-search-api", + "acceptance_criteria": [ + "GIVEN a search query WHEN GET /api/search?q=test THEN returns matching results" + ], + "files_likely_affected": [ + "lib/Controller/SearchController.php" + ], + "labels": ["openspec", "add-search"] + } + ] +} +``` + +--- + +### `/opsx-ralph-start` *(not yet implemented)* + +**Phase:** Implementation + +Starts a Ralph Wiggum implementation loop driven by `plan.json`. This is the core of our minimal-context coding approach. + +**Usage:** +``` +/opsx-ralph-start +``` + +**Prerequisites:** +- A `plan.json` in the active change (created by `/opsx-plan-to-issues`) + +**What it does per iteration:** + +1. **Reads plan.json** — finds the next task with `"status": "pending"` +2. **Sets status to `"in_progress"`** in plan.json +3. **Reads ONLY the referenced spec section** — uses `spec_ref` to load just the relevant requirement, NOT the entire spec file +4. **Implements the task** — following acceptance criteria and coding standards +5. **Verifies** — checks acceptance criteria are met +6. **Updates progress:** + - Sets task to `"completed"` in plan.json + - Checks off boxes in tasks.md + - Closes the GitHub issue with a summary comment + - Updates the tracking issue checklist +7. **Loops** — picks up the next pending task, or stops if all done + +**Why minimal context matters:** + +Each iteration loads only: +- `plan.json` (the task list — typically 1-2 KB) +- One spec section via `spec_ref` (the specific requirement — a few paragraphs) +- The affected files + +It does NOT load: +- proposal.md +- design.md +- Other spec files +- The full tasks.md + +This prevents context window bloat and keeps each iteration fast and focused. + +**Resuming after interruption:** + +If the loop is interrupted (context limit, error, etc.), simply run `/opsx-ralph-start` again. It reads `plan.json`, finds the first non-completed task, and continues from there. + +--- + +### `/opsx-ralph-review` *(not yet implemented)* + +**Phase:** Review + +Verifies the completed implementation against all spec requirements and shared conventions. Creates a structured review report. + +**Usage:** +``` +/opsx-ralph-review +``` + +**Prerequisites:** +- All tasks in plan.json should be `"completed"` + +**What it does:** + +1. **Loads full context** — proposal, all delta specs, tasks, plan.json +2. **Checks completeness:** + - All tasks completed? + - All GitHub issues closed? + - All task checkboxes checked? +3. **Checks spec compliance:** + - For each ADDED requirement: does the implementation exist? + - For each MODIFIED requirement: is the old behavior changed? + - For each REMOVED requirement: is the deprecated code gone? + - Do GIVEN/WHEN/THEN scenarios match the code behavior? +4. **Cross-references shared specs:** + - `nextcloud-app/spec.md` — correct app structure, DI, route ordering + - `api-patterns/spec.md` — URL patterns, CORS, error responses + - `nl-design/spec.md` — design tokens, accessibility + - `docker/spec.md` — environment compatibility +5. **Categorizes findings:** + - **CRITICAL** — Spec MUST/SHALL requirement not met + - **WARNING** — SHOULD requirement not met or partial compliance + - **SUGGESTION** — Improvement opportunity +6. **Generates `review.md`** in the change directory +7. **Creates GitHub Issue** if CRITICAL/WARNING findings exist + +**Output example:** +``` +Review: add-search +Tasks completed: 5/5 +GitHub issues closed: 5/5 +Spec compliance: PASS (with warnings) + +Findings: +- 0 CRITICAL +- 2 WARNING + - Missing CORS headers on /api/search (api-patterns spec) + - No pagination metadata in response (api-patterns spec) +- 1 SUGGESTION + - Consider adding rate limiting + +Review saved: openspec/changes/add-search/review.md +GitHub issue created: #48 [Review] add-search: 0 critical, 2 warnings +``` + +--- + +## OpenSpec CLI Commands + +These are terminal commands (not Claude slash commands) for managing specs directly. + +| Command | Description | +|---------|-------------| +| `openspec init --tools claude` | Initialize OpenSpec in a project | +| `openspec list --changes` | List all active changes | +| `openspec list --specs` | List all specs | +| `openspec show ` | View details of a change or spec | +| `openspec status --change ` | Show artifact completion status | +| `openspec validate --all` | Validate all specs and changes | +| `openspec validate --strict` | Strict validation (errors on warnings) | +| `openspec update` | Regenerate AI tool config after CLI upgrade | +| `openspec schema which` | Show which schema is being used | +| `openspec config list` | Show all configuration | + +Add `--json` to any command for machine-readable output. + +--- + +## Testing Commands + +For detailed guidance on when to use each command, typical testing workflows, and situational advice, see [testing.md](testing.md). + +> **Note on agentic browser testing:** `/test-app`, `/test-counsel`, and `/feature-counsel` use Playwright MCP browsers to explore live applications. Results may include false positives (elements not found due to timing) or false negatives (bugs missed due to exploration order). Always verify critical findings manually. + +--- + +### `/test-app` + +**Phase:** Testing + +Run automated browser tests for any Nextcloud app in this workspace. Explores every page, button, and form guided by the app's documentation and specs. + +**Usage:** +``` +/test-app +/test-app procest +``` + +**Modes:** +- **Quick (1 agent)** — One agent walks through the entire app. Fast, good for smoke testing. Low cap impact. +- **Full (6 agents)** — Six parallel agents each with a different perspective: Functional, UX, Performance, Accessibility, Security, API. More thorough. High cap impact. + +**What it does:** +1. Selects the app (from argument or prompt) +2. Chooses Quick or Full mode +3. Checks `{APP}/test-scenarios/` for active scenarios — asks whether to include them +4. Reads `{APP}/docs/features/` to understand what to test +5. Asks which model to use for agents (Haiku default, Sonnet, or Opus) +6. Launches agents, each reading docs, logging in, and testing from their perspective +7. Agents execute any included test scenario steps before free exploration +8. Writes per-perspective results to `{APP}/test-results/` and a summary to `{APP}/test-results/README.md` + +**Model:** Asked at run time (applies to all sub-agents). **Haiku** (default) — fastest, lowest quota cost. **Sonnet** — more nuanced analysis, larger context window. **Opus** — deepest coverage; significant quota cost in Full mode. See [parallel-agents.md](parallel-agents.md) for context window sizes, subscription quota limits, and how they differ. + +**Cap impact:** See [parallel-agents.md](parallel-agents.md). + +--- + +### `/test-counsel` + +**Phase:** Testing + +Test a Nextcloud app from 8 persona perspectives simultaneously: Henk, Fatima, Sem, Noor, Annemarie, Mark, Priya, Jan-Willem. + +**Usage:** +``` +/test-counsel +``` + +**What it does:** +- Launches 8 parallel browser agents — one per persona (model is user-selected at run time; Haiku is the default) +- Each agent reads its persona card and relevant test scenarios before testing +- Tests from the perspective of that persona's role, technical level, and priorities +- Produces a combined report with findings per persona +- Writes results to `{APP}/test-results/` + +**Model:** Asked at run time (applies to all 8 agents). **Haiku** (default) — fastest, lowest quota cost. **Sonnet** — more nuanced persona findings, larger context window. **Opus** — deepest analysis; significant quota cost with 8 agents. See [parallel-agents.md](parallel-agents.md) for context window sizes, subscription quota limits, and how they differ. + +**Cap impact:** Very high — 8 parallel agents. Open a fresh Claude window before running. See [parallel-agents.md](parallel-agents.md). + +--- + +### `/feature-counsel` + +**Phase:** Discovery / Ideation + +Analyse a Nextcloud app's OpenSpec from 8 persona perspectives and suggest additional features or improvements. + +**Usage:** +``` +/feature-counsel +``` + +**What it does:** +- Reads the app's OpenSpec, specs, and existing features +- Each of the 8 personas considers what's missing from their perspective +- Produces a consolidated list of suggested features and improvements +- Does not test the live app — reads specs and docs only + +**Model:** Asked at run time (applies to all 8 agents). No browser required — agents read specs and docs only. **Sonnet** (default) — recommended; no context window concern without browser snapshots, and better reasoning produces more useful suggestions. **Haiku** — faster, lower quota, good for a quick broad pass. **Opus** — deepest reasoning for complex architectural gaps; use with full mode (8 agents) sparingly. + +**Cap impact:** Very high — 8 parallel agents. See [parallel-agents.md](parallel-agents.md). + +--- + +### Commands (Single-Agent) + +--- + +### `/test-functional` + +**Phase:** Testing + +Feature correctness via browser — executes GIVEN/WHEN/THEN scenarios from specs against the live app. + +**Usage:** +``` +/test-functional +``` + +--- + +### `/test-api` + +**Phase:** Testing + +REST API endpoint testing. Checks endpoints, authentication, pagination, and error responses. + +**Usage:** +``` +/test-api +``` + +--- + +### `/test-accessibility` + +**Phase:** Testing + +WCAG 2.1 AA compliance using axe-core, plus manual keyboard and focus checks. + +**Usage:** +``` +/test-accessibility +``` + +--- + +### `/test-performance` + +**Phase:** Testing + +Load times, API response times, and network request analysis via browser timing APIs. + +**Usage:** +``` +/test-performance +``` + +--- + +### `/test-security` + +**Phase:** Testing + +OWASP Top 10, Nextcloud roles, authorization, XSS, CSRF, sensitive data exposure. + +**Usage:** +``` +/test-security +``` + +--- + +### `/test-regression` + +**Phase:** Testing + +Cross-feature regression — verifies changes don't break unrelated flows. + +**Usage:** +``` +/test-regression +``` + +--- + +### `/test-persona-*` + +**Phase:** Testing + +Single-persona deep dive. Use when you want one persona's full assessment without launching all eight: + +| Command | Persona | Role | +|---------|---------|------| +| `/test-persona-henk` | **Henk Bakker** | Elderly citizen — low digital literacy | +| `/test-persona-fatima` | **Fatima El-Amrani** | Low-literate migrant citizen | +| `/test-persona-sem` | **Sem de Jong** | Young digital native | +| `/test-persona-noor` | **Noor Yilmaz** | Municipal CISO / functional admin | +| `/test-persona-annemarie` | **Annemarie de Vries** | VNG standards architect | +| `/test-persona-mark` | **Mark Visser** | MKB software vendor | +| `/test-persona-priya` | **Priya Ganpat** | ZZP developer / integrator | +| `/test-persona-janwillem` | **Jan-Willem van der Berg** | Small business owner | + +**Usage:** +``` +/test-persona-henk +/test-persona-priya +``` + +**Use when:** You know which persona is most affected by a change, or when you've run `/test-counsel` and want a deeper single-perspective follow-up. One agent instead of eight — lower cap cost. + +**Cap impact:** Low — single agent. See [parallel-agents.md](parallel-agents.md). + +--- + +## Test Scenario Commands + +Test scenarios are reusable, Gherkin-style descriptions of user journeys that can be executed by any test command. They live in `{APP}/test-scenarios/TS-NNN-slug.md` and are automatically discovered by `/test-app`, `/test-counsel`, and `/test-persona-*` when they run. + +> **Test scenario vs test case**: A scenario is a high-level, user-centered description of *what* to verify and *for whom* — one concrete flow, written in Given-When-Then. It is broader than a click-by-click test case but more specific than a spec requirement. + +--- + +### `/test-scenario-create` + +**Phase:** Testing + +Guided wizard for creating a well-structured test scenario for a Nextcloud app. + +**Usage:** +``` +/test-scenario-create +/test-scenario-create openregister +``` + +**What it does:** +1. Determines the next ID (`TS-NNN`) by scanning existing scenarios +2. Asks for title, goal, category (functional/api/security/accessibility/performance/ux/integration), and priority +3. Shows relevant personas and asks which this scenario targets +4. Suggests which test commands should automatically include it +5. Auto-suggests tags based on category and title +6. Guides through Gherkin steps (Given/When/Then), test data, and acceptance criteria +7. Generates persona-specific notes for each linked persona +8. Saves to `{APP}/test-scenarios/TS-NNN-slug.md` + +**Scenario categories and suggested personas:** + +| Category | Suggested personas | +|---|---| +| functional | Mark Visser, Sem de Jong | +| api | Priya Ganpat, Annemarie de Vries | +| security | Noor Yilmaz | +| accessibility | Henk Bakker, Fatima El-Amrani | +| ux | Henk Bakker, Jan-Willem, Mark Visser | +| performance | Sem de Jong, Priya Ganpat | +| integration | Priya Ganpat, Annemarie de Vries | + +--- + +### `/test-scenario-run` + +**Phase:** Testing + +Execute one or more test scenarios against the live Nextcloud environment using a browser agent. + +**Usage:** +``` +/test-scenario-run # list and choose +/test-scenario-run TS-001 # run specific scenario +/test-scenario-run openregister TS-001 # run from specific app +/test-scenario-run --tag smoke # run all smoke-tagged scenarios +/test-scenario-run --all openregister # run all scenarios for an app +/test-scenario-run --persona priya-ganpat # run all Priya's scenarios +``` + +**What it does:** +1. Discovers scenario files in `{APP}/test-scenarios/` +2. Filters by tag, persona, or ID as specified +3. Asks which environment to test against (local or custom URL) +4. Asks whether to use Haiku (default, cost-efficient) or Sonnet (for complex flows) +5. Launches a browser agent per scenario (parallelised up to 5 for multiple) +6. Agent verifies preconditions, follows Given-When-Then steps, checks each acceptance criterion +7. Writes results to `{APP}/test-results/scenarios/` +8. Synthesises a summary report for multiple runs + +**Model:** Asked at run time. **Haiku** (default) — fast, cost-efficient. **Sonnet** — for complex multi-step flows or ambiguous UI states where Haiku may misread the interface. Cap cost scales with the number of scenarios run in parallel. + +**Cap impact:** Low for single scenario; medium for multiple. See [parallel-agents.md](parallel-agents.md). + +**Result statuses**: ✅ PASS / ❌ FAIL / ⚠️ PARTIAL / ⛔ BLOCKED + +--- + +### `/test-scenario-edit` + +**Phase:** Testing + +Edit an existing test scenario — update any field (metadata or content) interactively. + +**Usage:** +``` +/test-scenario-edit # list all scenarios, pick one +/test-scenario-edit TS-001 # open specific scenario +/test-scenario-edit openregister TS-001 # open from specific app +``` + +**What it does:** +1. Locates the scenario file +2. Shows a summary of current values (status, priority, category, personas, tags, spec refs) +3. Asks what scope to edit: metadata only / content only / both / status only / tags only +4. Walks through each field in scope, showing the current value and asking for the new one +5. Supports `+tag` / `-tag` syntax for incremental tag changes, same for personas +6. Regenerates persona notes if the personas list changed +7. Optionally renames the file if the title changed +8. Writes the updated file and shows a diff-style summary + +--- + +### How existing test commands use scenarios + +| Command | Behaviour when scenarios exist | +|---|---| +| `/test-app` | Asks to include active scenarios before launching agents. Agents execute scenario steps before free exploration. | +| `/test-counsel` | Asks to include scenarios, grouped by persona. Each persona agent receives only the scenarios tagged with their slug. | +| `/test-persona-*` | Scans for scenarios matching that persona's slug. Asks to run them before free exploration in Step 2. | + +--- + +## Tender & Ecosystem Intelligence Commands + +These commands support the competitive analysis and ecosystem gap-finding workflow. They operate on the `concurrentie-analyse/intelligence.db` SQLite database and require the database to exist before running. + +--- + +### `/tender-scan` + +**Phase:** Intelligence Gathering + +Scrape TenderNed for new tenders, import them into SQLite, and classify unclassified tenders by software category using a local Qwen model. + +**Usage:** +``` +/tender-scan +``` + +**What it does:** +1. Runs `concurrentie-analyse/tenders/scrape_tenderned.py` to fetch fresh data +2. Imports new tenders into the intelligence database +3. Classifies unclassified tenders using Qwen via `localhost:11434` +4. Reports new tenders found, classified, and any new gaps detected + +**Requires:** Local Qwen model running on Ollama (`http://localhost:11434`) + +--- + +### `/tender-status` + +**Phase:** Intelligence Monitoring + +Show a dashboard of the tender intelligence database — totals by source, category, status, gaps, and recent activity. + +**Usage:** +``` +/tender-status +``` + +**What it does:** +- Queries `concurrentie-analyse/intelligence.db` for live stats +- Shows tenders by source, status, and category (top 15) +- Highlights categories with Conduction coverage vs gaps +- Shows top integration systems and ecosystem gaps + +**Model:** Checked at run time when invoked standalone — stops if on Opus (no reasoning needed, wastes quota), warns if on Sonnet and offers to switch. **Haiku** is the right fit for this task. Model check is skipped when this skill is called from within another skill. + +--- + +### `/tender-gap-report` + +**Phase:** Gap Analysis + +Generate a gap analysis report — software categories that appear in government tenders but have no Conduction product. + +**Usage:** +``` +/tender-gap-report +``` + +**What it does:** +1. Queries the database for categories with tenders but no `conduction_product` +2. Generates a markdown report at `concurrentie-analyse/reports/gap-report-{date}.md` +3. Includes top 5 gaps with tender details, organisations, and key requirements +4. Cross-references with `application-roadmap.md` to flag already-tracked gaps +5. Recommends which gaps to investigate first + +--- + +### `/ecosystem-investigate ` + +**Phase:** Competitive Research + +Deep-dive research into a software category — find and analyze open-source competitors using GitHub, G2, Capterra, AlternativeTo, and TEC. + +**Usage:** +``` +/ecosystem-investigate bookkeeping +``` + +**What it does:** +1. Loads category context and related tenders from the intelligence database +2. Uses the browser pool (browser-1 through browser-5) to scrape 5-10 competitors from multiple source types +3. Creates competitor profiles in `concurrentie-analyse/{category}/{competitor-slug}/` +4. Inserts competitors and feature data into the database with provenance tracking +5. Presents a comparison table and recommendation for Nextcloud ecosystem fit + +**Model:** Checked at run time — stops if on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for most categories. **Opus** for high-stakes or complex categories where strategic depth matters. + +--- + +### `/ecosystem-propose-app ` + +**Phase:** Product Planning + +Generate a full app proposal for a software category gap, using tender requirements and competitor research as input. + +**Usage:** +``` +/ecosystem-propose-app bookkeeping +``` + +**What it does:** +1. Gathers all tenders, requirements, competitors, and integrations for the category +2. Generates a structured proposal following the template in `concurrentie-analyse/application-roadmap.md` +3. Appends the proposal to `application-roadmap.md` +4. Inserts the proposal into the `app_proposals` database table +5. Optionally bootstraps the app with `/app-create` + +**Model:** Checked at run time — stops if on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for most proposals. **Opus** for high-stakes proposals where architectural fit and market analysis need extra depth. + +--- + +### `/intelligence-update [source]` + +**Phase:** Intelligence Maintenance + +Pull latest data from external sources into the intelligence database. Syncs sources that are past their scheduled interval. + +**Usage:** +``` +/intelligence-update # sync all sources that are due +/intelligence-update all # force sync every source +/intelligence-update wikidata-software # sync one specific source +``` + +**Sources and intervals:** + +| Source | Interval | +|--------|----------| +| `tenderned` | 24h | +| `wikidata-software` | 7 days | +| `wikipedia-comparisons` | 7 days | +| `awesome-selfhosted` | 7 days | +| `github-issues` | 7 days | +| `dpg-registry` | 7 days | +| `developers-italia` | 7 days | +| `gemma-release` | yearly | + +**What it does:** +1. Checks `source_syncs` table for overdue sources +2. Runs `concurrentie-analyse/scripts/sync/sync_{source}.py` for each +3. Updates sync status, records count, and error messages +4. Displays a summary table of all sources with their sync status + +**Model:** Checked at run time when invoked standalone — stops if on Opus (no reasoning needed, wastes quota), warns if on Sonnet and offers to switch. **Haiku** is the right fit for this task. Model check is skipped when this skill is called from within another skill. + +--- + +### Tender Intelligence Workflow + +``` +/tender-scan (fetch & classify new tenders) + │ + ▼ +/tender-status (review dashboard) + │ + ▼ +/tender-gap-report (identify gaps) + │ + ▼ +/ecosystem-investigate (research competitors for top gap) + │ + ▼ +/ecosystem-propose-app (generate app proposal) + │ + ▼ +/app-design (design the new app) +``` + +**Keep data fresh:** Run `/intelligence-update` weekly and `/tender-scan` daily to keep the database current. + +--- + +## Command Flow Cheat Sheet + +``` +/opsx-explore (optional: investigate first) + │ + ▼ +/opsx-new (start a change) + │ + ▼ +/opsx-ff (generate all specs at once) + │ OR +/opsx-continue (generate specs one by one) + │ + ▼ + [Human review & edit specs] + │ + ▼ +/feature-counsel (optional: 8-persona feedback on specs) + │ + ▼ +/opsx-plan-to-issues (optional: tasks → JSON + GitHub Issues) + │ + ▼ +/opsx-apply (implement tasks) + │ + ▼ +/opsx-verify (verify implementation against specs) + │ + ▼ +/test-functional (confirm feature behaves as specced) +/test-counsel (user acceptance — all 8 personas) +/test-app (optional: full technical sweep) + │ + ▼ +/create-pr (create PR on GitHub) + │ + ▼ +/opsx-archive (complete & preserve) +``` + +See [testing.md](testing.md) for situational testing guidance and recommended testing order. + +For the app lifecycle flow (`/app-design` → `/app-create` → `/app-explore` → `/app-apply` → `/app-verify`), see [app-lifecycle.md](app-lifecycle.md). diff --git a/docs/claude/getting-started.md b/docs/claude/getting-started.md index 9960bfb..18bbcf0 100644 --- a/docs/claude/getting-started.md +++ b/docs/claude/getting-started.md @@ -1,225 +1,225 @@ -# Getting Started - -_This is the **setup guide** — see [Workflow Overview](./workflow.md) for the architecture reference, and [End-to-End Walkthrough](./walkthrough.md) for a complete concrete example._ - -This guide walks you through setting up the spec-driven development workflow and completing your first change. - -## Prerequisites - -- **Node.js 20+** (required by OpenSpec CLI) -- **Global Claude settings installed** — mandatory for all Conduction work; see [global-claude-settings.md](global-claude-settings.md) for the install commands (sets up read-only Bash policy, write-approval hooks, and session-level version checking) -- **GitHub CLI** (`gh`) authenticated, or the GitHub MCP server active -- Access to the `ConductionNL` GitHub organization -- The `apps-extra` workspace cloned with at least one project - -**Optional — Container authentication (needed for `/opsx-apply-loop` and `/opsx-pipeline`):** - -These commands run Claude CLI inside an isolated Docker container, which cannot use the interactive OAuth login your host session uses. You need one of these environment variables set in your shell: - -| Variable | Source | Cost | -|----------|--------|------| -| `CLAUDE_CODE_AUTH_TOKEN` (preferred) | Your existing Claude Max/Pro subscription | Free (included in subscription) | -| `ANTHROPIC_API_KEY` (fallback) | Anthropic API console | Prepaid credits (billed per token) | - -**To set up `CLAUDE_CODE_AUTH_TOKEN` (recommended):** - -```bash -# 1. Generate a long-lived token from your subscription -claude setup-token - -# 2. Copy the token it outputs, then add to your shell profile: -echo 'export CLAUDE_CODE_AUTH_TOKEN="sk-ant-oat01-..."' >> ~/.bashrc - -# 3. Reload your shell -source ~/.bashrc - -# 4. Verify -echo $CLAUDE_CODE_AUTH_TOKEN | head -c 20 -``` - -**To set up `ANTHROPIC_API_KEY` (alternative — costs money):** - -```bash -# 1. Go to https://console.anthropic.com → API Keys → Create Key -# 2. Ensure your account has credits (Billing → Add credits) -# 3. Add to your shell profile: -echo 'export ANTHROPIC_API_KEY="sk-ant-api03-..."' >> ~/.bashrc - -# 4. Reload your shell -source ~/.bashrc -``` - -> Neither variable is needed for interactive commands like `/opsx-apply` or `/opsx-verify` — only for the containerized automation commands. - -**Optional — VS Code extensions:** See the [main README](../README.md#4-install-vs-code-extensions) for the full list of required, recommended, and optional VS Code extensions. - -**Optional — Usage monitoring:** Install the usage tracker to watch your Claude token consumption in real time inside VS Code. Especially useful before running multi-agent commands (see [parallel-agents.md](parallel-agents.md)). - -```bash -bash .claude/usage-tracker/install.sh -``` - -See [`.claude/usage-tracker/README.md`](../usage-tracker/README.md) for setup details. - -## Step 1: Install OpenSpec - -```bash -npm install -g @fission-ai/openspec@latest -``` - -Verify installation: - -```bash -openspec --version -``` - -## Step 2: Understand the Workspace Structure - -The workspace has two levels of spec management: - -### Workspace level (shared) - -``` -apps-extra/ -├── project.md # Coding standards for ALL projects -├── openspec/ -│ ├── config.yaml # Shared context and rules -│ ├── schemas/conduction/ # Our custom workflow schema -│ ├── specs/ # Cross-project specs (NC conventions, APIs, etc.) -│ └── docs/ # You are here -``` - -These files define the patterns and conventions that apply to every project. - -### Project level (specific) - -``` -openregister/ -├── project.md # What this project does, its architecture, dependencies -├── openspec/ -│ ├── config.yaml # Project config (points to shared schema) -│ ├── specs/ # Domain-specific specs for this project -│ └── changes/ # Active work in progress -``` - -Each project has its own specs describing its unique domain behavior. - -## Step 3: Initialize a New Project (if needed) - -If your project doesn't have OpenSpec set up yet, see [App Lifecycle](./app-lifecycle.md) for the bootstrapping commands and onboarding checklist. - -If you're working on `openregister` or `opencatalogi`, they're already initialized. - -## Step 4: Your First Change - -Let's walk through creating your first spec-driven change. - -### 4a. Start a new change - -Navigate to your project and run: - -``` -/opsx-new my-first-feature -``` - -This creates `openspec/changes/my-first-feature/` with a `.openspec.yaml` metadata file. - -### 4b. Build the specs - -Generate all planning artifacts at once: - -``` -/opsx-ff -``` - -Claude will create: -1. **`proposal.md`** — Why this change exists and what it covers -2. **`specs/*.md`** — Detailed requirements with scenarios -3. **`design.md`** — Technical approach and architecture -4. **`tasks.md`** — Implementation checklist - -### 4c. Review the artifacts - -Read through each artifact. This is the most valuable step — catching issues in specs is much cheaper than catching them in code. - -Things to check: -- Does the proposal cover the right scope? -- Are the spec requirements using the right RFC 2119 keywords (MUST vs SHOULD)? -- Do the scenarios cover edge cases? -- Is the task breakdown granular enough? - -Edit the artifacts directly if needed — they're just markdown files. - -### 4d. Create GitHub Issues - -``` -/opsx-plan-to-issues -``` - -This converts your tasks into GitHub Issues: -- A **tracking issue** with a full checklist (your "epic") -- **Individual issues** per task with acceptance criteria and spec references -- A **`plan.json`** file linking everything together - -Open the tracking issue URL to see your kanban view. - -### 4e. Start implementing - -``` -/opsx-apply -``` - -This starts the implementation loop. Each iteration: -1. Picks the next pending task from `plan.json` -2. Reads ONLY the spec section that task references -3. Implements the task -4. Closes the GitHub issue -5. Moves to the next task - -The key benefit: each iteration works with minimal context, preventing AI "amnesia" on large changes. - -> **Note:** `/opsx-ralph-start` is a planned dedicated implementation loop with deeper minimal-context loading and tighter GitHub integration — not yet implemented. Use `/opsx-apply` for now. - -### 4f. Review your work - -After all tasks are done: - -``` -/opsx-verify -``` - -This checks every spec requirement against your implementation and reports: -- **CRITICAL** findings that must be fixed -- **WARNING** findings that should be addressed -- **SUGGESTION** findings that are nice-to-have - -> **Note:** `/opsx-ralph-review` is a planned dedicated review command — not yet implemented. Use `/opsx-verify` for now. - -### 4g. Archive the change - -Once review passes: - -``` -/opsx-archive -``` - -This merges your delta specs into the main specs and preserves the change for history. - -## Quick Reference - -| What you want to do | Command | -|---------------------|---------| -| Start a new feature | `/opsx-new ` | -| Generate all specs at once | `/opsx-ff` | -| Generate specs one at a time | `/opsx-continue` | -| Convert tasks to GitHub Issues | `/opsx-plan-to-issues` | -| Start implementing | `/opsx-apply` *(or `/opsx-ralph-start` once built)* | -| Review implementation | `/opsx-verify` *(or `/opsx-ralph-review` once built)* | -| Complete and archive | `/opsx-archive` | - -## Next Steps - -- Read the [Command Reference](./commands.md) for detailed options on each command -- Read [Writing Specs](./writing-specs.md) to write better specifications -- See the [Walkthrough](./walkthrough.md) for a full end-to-end example -- See [App Lifecycle](./app-lifecycle.md) to bootstrap or onboard a new app +# Getting Started + +_This is the **setup guide** — see [Workflow Overview](./workflow.md) for the architecture reference, and [End-to-End Walkthrough](./walkthrough.md) for a complete concrete example._ + +This guide walks you through setting up the spec-driven development workflow and completing your first change. + +## Prerequisites + +- **Node.js 20+** (required by OpenSpec CLI) +- **Global Claude settings installed** — mandatory for all Conduction work; see [global-claude-settings.md](global-claude-settings.md) for the install commands (sets up read-only Bash policy, write-approval hooks, and session-level version checking) +- **GitHub CLI** (`gh`) authenticated, or the GitHub MCP server active +- Access to the `ConductionNL` GitHub organization +- The `apps-extra` workspace cloned with at least one project + +**Optional — Container authentication (needed for `/opsx-apply-loop` and `/opsx-pipeline`):** + +These commands run Claude CLI inside an isolated Docker container, which cannot use the interactive OAuth login your host session uses. You need one of these environment variables set in your shell: + +| Variable | Source | Cost | +|----------|--------|------| +| `CLAUDE_CODE_AUTH_TOKEN` (preferred) | Your existing Claude Max/Pro subscription | Free (included in subscription) | +| `ANTHROPIC_API_KEY` (fallback) | Anthropic API console | Prepaid credits (billed per token) | + +**To set up `CLAUDE_CODE_AUTH_TOKEN` (recommended):** + +```bash +# 1. Generate a long-lived token from your subscription +claude setup-token + +# 2. Copy the token it outputs, then add to your shell profile: +echo 'export CLAUDE_CODE_AUTH_TOKEN="sk-ant-oat01-..."' >> ~/.bashrc + +# 3. Reload your shell +source ~/.bashrc + +# 4. Verify +echo $CLAUDE_CODE_AUTH_TOKEN | head -c 20 +``` + +**To set up `ANTHROPIC_API_KEY` (alternative — costs money):** + +```bash +# 1. Go to https://console.anthropic.com → API Keys → Create Key +# 2. Ensure your account has credits (Billing → Add credits) +# 3. Add to your shell profile: +echo 'export ANTHROPIC_API_KEY="sk-ant-api03-..."' >> ~/.bashrc + +# 4. Reload your shell +source ~/.bashrc +``` + +> Neither variable is needed for interactive commands like `/opsx-apply` or `/opsx-verify` — only for the containerized automation commands. + +**Optional — VS Code extensions:** See the [main README](../../README.md#4-install-vs-code-extensions) for the full list of required, recommended, and optional VS Code extensions. + +**Optional — Usage monitoring:** Install the usage tracker to watch your Claude token consumption in real time inside VS Code. Especially useful before running multi-agent commands (see [parallel-agents.md](parallel-agents.md)). + +```bash +bash .claude/usage-tracker/install.sh +``` + +See [`.claude/usage-tracker/README.md`](../../usage-tracker/README.md) for setup details. + +## Step 1: Install OpenSpec + +```bash +npm install -g @fission-ai/openspec@latest +``` + +Verify installation: + +```bash +openspec --version +``` + +## Step 2: Understand the Workspace Structure + +The workspace has two levels of spec management: + +### Workspace level (shared) + +``` +apps-extra/ +├── project.md # Coding standards for ALL projects +├── openspec/ +│ ├── config.yaml # Shared context and rules +│ ├── schemas/conduction/ # Our custom workflow schema +│ ├── specs/ # Cross-project specs (NC conventions, APIs, etc.) +│ └── docs/ # You are here +``` + +These files define the patterns and conventions that apply to every project. + +### Project level (specific) + +``` +openregister/ +├── project.md # What this project does, its architecture, dependencies +├── openspec/ +│ ├── config.yaml # Project config (points to shared schema) +│ ├── specs/ # Domain-specific specs for this project +│ └── changes/ # Active work in progress +``` + +Each project has its own specs describing its unique domain behavior. + +## Step 3: Initialize a New Project (if needed) + +If your project doesn't have OpenSpec set up yet, see [App Lifecycle](./app-lifecycle.md) for the bootstrapping commands and onboarding checklist. + +If you're working on `openregister` or `opencatalogi`, they're already initialized. + +## Step 4: Your First Change + +Let's walk through creating your first spec-driven change. + +### 4a. Start a new change + +Navigate to your project and run: + +``` +/opsx-new my-first-feature +``` + +This creates `openspec/changes/my-first-feature/` with a `.openspec.yaml` metadata file. + +### 4b. Build the specs + +Generate all planning artifacts at once: + +``` +/opsx-ff +``` + +Claude will create: +1. **`proposal.md`** — Why this change exists and what it covers +2. **`specs/*.md`** — Detailed requirements with scenarios +3. **`design.md`** — Technical approach and architecture +4. **`tasks.md`** — Implementation checklist + +### 4c. Review the artifacts + +Read through each artifact. This is the most valuable step — catching issues in specs is much cheaper than catching them in code. + +Things to check: +- Does the proposal cover the right scope? +- Are the spec requirements using the right RFC 2119 keywords (MUST vs SHOULD)? +- Do the scenarios cover edge cases? +- Is the task breakdown granular enough? + +Edit the artifacts directly if needed — they're just markdown files. + +### 4d. Create GitHub Issues + +``` +/opsx-plan-to-issues +``` + +This converts your tasks into GitHub Issues: +- A **tracking issue** with a full checklist (your "epic") +- **Individual issues** per task with acceptance criteria and spec references +- A **`plan.json`** file linking everything together + +Open the tracking issue URL to see your kanban view. + +### 4e. Start implementing + +``` +/opsx-apply +``` + +This starts the implementation loop. Each iteration: +1. Picks the next pending task from `plan.json` +2. Reads ONLY the spec section that task references +3. Implements the task +4. Closes the GitHub issue +5. Moves to the next task + +The key benefit: each iteration works with minimal context, preventing AI "amnesia" on large changes. + +> **Note:** `/opsx-ralph-start` is a planned dedicated implementation loop with deeper minimal-context loading and tighter GitHub integration — not yet implemented. Use `/opsx-apply` for now. + +### 4f. Review your work + +After all tasks are done: + +``` +/opsx-verify +``` + +This checks every spec requirement against your implementation and reports: +- **CRITICAL** findings that must be fixed +- **WARNING** findings that should be addressed +- **SUGGESTION** findings that are nice-to-have + +> **Note:** `/opsx-ralph-review` is a planned dedicated review command — not yet implemented. Use `/opsx-verify` for now. + +### 4g. Archive the change + +Once review passes: + +``` +/opsx-archive +``` + +This merges your delta specs into the main specs and preserves the change for history. + +## Quick Reference + +| What you want to do | Command | +|---------------------|---------| +| Start a new feature | `/opsx-new ` | +| Generate all specs at once | `/opsx-ff` | +| Generate specs one at a time | `/opsx-continue` | +| Convert tasks to GitHub Issues | `/opsx-plan-to-issues` | +| Start implementing | `/opsx-apply` *(or `/opsx-ralph-start` once built)* | +| Review implementation | `/opsx-verify` *(or `/opsx-ralph-review` once built)* | +| Complete and archive | `/opsx-archive` | + +## Next Steps + +- Read the [Command Reference](./commands.md) for detailed options on each command +- Read [Writing Specs](./writing-specs.md) to write better specifications +- See the [Walkthrough](./walkthrough.md) for a full end-to-end example +- See [App Lifecycle](./app-lifecycle.md) to bootstrap or onboard a new app diff --git a/docs/claude/global-claude-settings.md b/docs/claude/global-claude-settings.md index 955473c..8cf0683 100644 --- a/docs/claude/global-claude-settings.md +++ b/docs/claude/global-claude-settings.md @@ -1,201 +1,201 @@ -# Global Claude Code settings (`~/.claude`) - -These are **mandatory** settings for anyone working on Conduction projects with Claude Code. They enforce a read-first, write-with-approval policy at the user level, ensuring Claude cannot perform destructive operations without explicit confirmation. They also version-check themselves at the start of each session so you always know when an update is available. - -Project files under `.claude/` in this repo (for example `settings.json` with `enableAllProjectMcpServers` and MCP allowlists) **complement** this; they do not replace the global policy. - -## Versioned canonical files - -The canonical files live under **[`global-settings/`](../global-settings/)**. The version is tracked in [`global-settings/VERSION`](../global-settings/VERSION). - -| File | Install as | -|------|------------| -| [`global-settings/settings.json`](../global-settings/settings.json) | `~/.claude/settings.json` | -| [`global-settings/block-write-commands.sh`](../global-settings/block-write-commands.sh) | `~/.claude/hooks/block-write-commands.sh` | -| [`global-settings/check-settings-version.sh`](../global-settings/check-settings-version.sh) | `~/.claude/hooks/check-settings-version.sh` | - -## Install / update on a new machine - -Run the following from the root of the `apps-extra` repo: - -```bash -REPO_CLAUDE="$(pwd)/.claude" - -mkdir -p ~/.claude/hooks - -# Core settings and hooks -cp "$REPO_CLAUDE/global-settings/settings.json" ~/.claude/settings.json -cp "$REPO_CLAUDE/global-settings/block-write-commands.sh" ~/.claude/hooks/block-write-commands.sh -cp "$REPO_CLAUDE/global-settings/check-settings-version.sh" ~/.claude/hooks/check-settings-version.sh -chmod +x ~/.claude/hooks/block-write-commands.sh ~/.claude/hooks/check-settings-version.sh - -# Version tracking -cp "$REPO_CLAUDE/global-settings/VERSION" ~/.claude/settings-version -echo "$REPO_CLAUDE" > ~/.claude/settings-repo-path -``` - -Requirements: **`jq`** and **`md5sum`** on `PATH`. Restart Claude Code after installing. - -If `~` is not expanded in hook commands on your system, replace `~/.claude/hooks/…` with absolute paths in `~/.claude/settings.json`. - -## Session-start status panel - -At the start of every Claude session, a live status panel is printed to the terminal (stderr): - -``` -┌──────────────────────────────────────────────┐ -│ Global Claude Settings Status │ -└──────────────────────────────────────────────┘ - Installed : v1.0.0 ✓ - Local repo : master @ v1.0.0 - Online : v1.0.0 -``` - -Color coding: -- **Green** — version matches / up to date -- **Yellow** — local branch is ahead of installed (informational only, no action needed) -- **Red** — installed is behind online main (update required) - -If configuration issues are detected (missing `settings-repo-path`, missing `VERSION` file, unreachable remote), they are shown in red below the panel — never silently skipped. - -> **Note:** The terminal panel is only visible when using Claude Code in the terminal (CLI). In the VS Code extension, hook stderr is not shown as a visible banner — see the Claude chat message below instead. - -## Session-start message in Claude chat - -In addition to the terminal panel, the hook always injects a message into Claude's context at the start of every session. Claude will relay this at the top of its first response: - -**Settings up to date:** -> New session started — Global Claude Settings checked. ✅ Settings are up to date (v1.0.0). - -**Update required** (prominently displayed, cannot be missed): -> ⚠️ NEW SESSION — GLOBAL CLAUDE SETTINGS: UPDATE REQUIRED -> Installed: v0.1.0 ❌ | Latest: v1.0.0 ✅ -> Say "update my global settings to 1.0.0" to apply the update. - -**Configuration error** (prominently displayed): -> 🚨 NEW SESSION — GLOBAL CLAUDE SETTINGS: CONFIGURATION ERROR -> ❌ [description of the issue] - -## Keeping settings up to date - -When the online (origin/main) version is bumped, Claude displays a prominent warning at the start of its first response in the new session. - -To update, tell Claude: **"update my global settings to [version]"** and Claude will pull all files directly from `origin/main` using `git show` — not from your local branch. This ensures you always get the exact online version regardless of which branch your local repo is on. - -> **Note:** The version check fetches `VERSION` from `origin/main` via `git fetch`. It checks the local repo's configured remote, not a separate URL. If your remote is the upstream nextcloud/server and the global-settings aren't tracked there, the online check will warn about that too. - -### ⚠️ VERSION bump required on every change - -**Any commit that modifies a file in `global-settings/` MUST also increment `VERSION`.** Without a bump, users will not be warned to update and their installed settings will silently fall behind. - -Semver rules: -- `1.0.0 → 1.1.0` — new permissions, guards, or behavior added -- `1.0.0 → 2.0.0` — breaking change requiring manual migration - -Run `/verify-global-settings-version` before creating a PR to confirm the bump is correct. - -## File locations - -| Path | Role | -|------|------| -| `~/.claude/settings.json` | User permissions allowlist, `PreToolUse` + `UserPromptSubmit` hooks, optional `mcpServers` | -| `~/.claude/hooks/block-write-commands.sh` | Hook script invoked for every **Bash** tool use before it runs | -| `~/.claude/hooks/check-settings-version.sh` | Hook script that warns on version mismatch at session start | -| `~/.claude/settings-version` | Installed version (semver, matches repo `VERSION`) | -| `~/.claude/settings-repo-path` | Absolute path to `apps-extra/.claude/` — tells the version hook where to find the canonical `VERSION` file | - -## Shape of `~/.claude/settings.json` - -### 1. `permissions.allow` - -List **Bash** permission patterns you want granted **without** prompting. Keep this aligned with the hook: anything you allow here should still pass `block-write-commands.sh`, or the hook will deny the command even if it is allowlisted. - -Allowed categories (all read-only; write operations are gated by the hook): - -- **Inspection**: `ls`, `cat`, `head`, `tail`, `wc`, `stat`, `file`, `du`, `df`, `pwd`, `tree`, `find`, `realpath`, `basename`, `dirname` -- **Text processing**: `diff`, `grep`, `egrep`, `awk`, `tr`, `sort`, `jq`, `cut`, `uniq`, `column` -- **System info**: `which`, `whoami`, `uname`, `ps`, `free`, `lsof`, `ss`, `id`, `groups`, `uptime`, `hostname`, `env`, `date` -- **Git (read-only)**: `git log`, `git status`, `git diff`, `git show`, `git blame`, `git ls-files`, `git rev-parse`, `git describe`, `git shortlog`, `git cat-file`, `git branch`, `git remote`, `git stash list`, `git config --list` -- **`git -C`**: allow `Bash(git -C:*)` so agents can run git in arbitrary directories; the hook restricts **which** `git -C …` invocations are safe -- **Docker (read)**: `docker ps`, `docker images`, `docker logs`, `docker inspect`, `docker stats`, `docker info`, `docker network ls/inspect`, `docker volume ls/inspect`, `docker compose ps/config` -- **GitHub CLI (read)**: `gh pr list/view/checks/diff`, `gh issue list/view`, `gh repo view`, `gh run list/view`, `gh release list/view`, `gh workflow list` -- **Package managers (read)**: `composer show/validate/diagnose/check-platform-reqs`, `npm list/outdated`, `pnpm list/outdated`, `yarn list`, `pip list/show/freeze` -- **PHP**: `php -l/-m/-i/--version` -- **HTTP / API (read; hook narrows further)**: `curl`, `gh api` - -Do **not** put broad `Bash(*)` allow rules here. - -### 2. `hooks.PreToolUse` - -```json -"PreToolUse": [ - { - "matcher": "Bash", - "hooks": [{ "type": "command", "command": "bash ~/.claude/hooks/block-write-commands.sh" }] - } -] -``` - -### 3. `hooks.UserPromptSubmit` - -```json -"UserPromptSubmit": [ - { - "hooks": [{ "type": "command", "command": "bash ~/.claude/hooks/check-settings-version.sh" }] - } -] -``` - -### 4. `mcpServers` (optional) - -7 Playwright browser instances (`browser-1` through `browser-7`). `browser-6` runs headed (no `--headless`). Adjust the count to match your actual usage. - ---- - -## What `block-write-commands.sh` does - -- Reads **JSON from stdin** once into a variable, then extracts `cmd` and `transcript_path`. -- On deny, prints `permissionDecision: "deny"` JSON. On ask, prints `permissionDecision: "ask"` JSON. On allow, exits `0`. - -| Area | Allowed silently | Prompts for approval | Hard blocked | -|------|-----------------|---------------------|--------------| -| **curl** | GET without file output | Non-GET methods, data flags, `-o` / `--output` | — | -| **gh api** | GET | POST/PUT/PATCH/DELETE, `--input`, `--field` / `--raw-field` | — | -| **git push** | Last user message contains authorized phrase | — | Blocked otherwise | -| **git -C** | Read-only subcommands (`log`, `status`, `diff`, etc.) | Write subcommands, branch/remote writes, stash modifications, config writes | `push` (phrase-authorized) | -| **git branch** (bare) | Listing | `-d/-D/-m/-M/-c/-C`, `--delete`, `--move`, `--copy` | — | -| **git remote** (bare) | Listing, `show`, `get-url` | `add`, `remove`, `rename`, `set-url`, `prune`, `update` | — | -| **env** | `env` alone or `VAR=value` assignments | Using `env` to execute another command | — | -| **date** | Display time | — | `-s` / `--set` (system clock) | -| **cat** | Normal stdout | Shell redirection `>` / `>>` to a file | — | -| **find** | Normal path traversal | `-delete`, `-exec`, `-execdir` | — | -| **sort** | Normal sort to stdout | `-o` / `--output`, shell `>` / `>>` | — | -| **awk** | Normal processing | `print >` / `print >>` in script, shell `>` after script | — | -| **hostname** | Read current hostname (no args) | Setting a new hostname (bare name argument) | — | - -Authorized git push phrases (case-insensitive): `push for me`, `commit and push`, `please push`, `push my changes`. - -## What `check-settings-version.sh` does - -- Fires once per session (keyed to the transcript path via `/tmp/` flag file). -- Reads the installed version from `~/.claude/settings-version`. -- Reads the local branch version from `$REPO_DIR/global-settings/VERSION` (repo path from `~/.claude/settings-repo-path`). -- Fetches the online version from `origin/main` via `git fetch --depth=1` and reads the `VERSION` file from that ref. -- Compares all three versions using semver and prints a colored status panel to stderr (visible in the terminal/CLI). -- Always injects a session-start message into Claude's context via stdout — "up to date", "update required", or "configuration error" — which Claude relays at the top of its first response. -- Never silently skips: configuration issues (missing `settings-repo-path`, missing `VERSION` file, unreachable remote) are shown in the panel and forwarded to Claude. - -## Relationship to this repo's `.claude/settings.json` - -Under `apps-extra/.claude/`, project `settings.json` can enable project MCP servers and list allowed MCP tool names. That is separate from the **global** Bash policy above. For a consistent setup, use both: - -1. Global `~/.claude/settings.json` + hooks for Bash safety and version checking. -2. Project `.claude/settings.json` (and `settings.local.json` if used) for workspace-specific MCP and permissions. - -## Checklist for a new machine - -1. Run the install commands above (copies settings, hooks, version files). -2. Confirm `jq` and `md5sum` are on `PATH`. -3. Restart Claude Code so settings reload. -4. Test: a denied pattern (e.g. `curl -X POST`) should be blocked with a clear reason. A `find . -exec` should prompt for approval. -5. Verify the version hook fires: open a new session and confirm no warning (or update if one appears). +# Global Claude Code settings (`~/.claude`) + +These are **mandatory** settings for anyone working on Conduction projects with Claude Code. They enforce a read-first, write-with-approval policy at the user level, ensuring Claude cannot perform destructive operations without explicit confirmation. They also version-check themselves at the start of each session so you always know when an update is available. + +Project files under `.claude/` in this repo (for example `settings.json` with `enableAllProjectMcpServers` and MCP allowlists) **complement** this; they do not replace the global policy. + +## Versioned canonical files + +The canonical files live under **[`global-settings/`](../../global-settings/)**. The version is tracked in [`global-settings/VERSION`](../../global-settings/VERSION). + +| File | Install as | +|------|------------| +| [`global-settings/settings.json`](../../global-settings/settings.json) | `~/.claude/settings.json` | +| [`global-settings/block-write-commands.sh`](../../global-settings/block-write-commands.sh) | `~/.claude/hooks/block-write-commands.sh` | +| [`global-settings/check-settings-version.sh`](../../global-settings/check-settings-version.sh) | `~/.claude/hooks/check-settings-version.sh` | + +## Install / update on a new machine + +Run the following from the root of the `apps-extra` repo: + +```bash +REPO_CLAUDE="$(pwd)/.claude" + +mkdir -p ~/.claude/hooks + +# Core settings and hooks +cp "$REPO_CLAUDE/global-settings/settings.json" ~/.claude/settings.json +cp "$REPO_CLAUDE/global-settings/block-write-commands.sh" ~/.claude/hooks/block-write-commands.sh +cp "$REPO_CLAUDE/global-settings/check-settings-version.sh" ~/.claude/hooks/check-settings-version.sh +chmod +x ~/.claude/hooks/block-write-commands.sh ~/.claude/hooks/check-settings-version.sh + +# Version tracking +cp "$REPO_CLAUDE/global-settings/VERSION" ~/.claude/settings-version +echo "$REPO_CLAUDE" > ~/.claude/settings-repo-path +``` + +Requirements: **`jq`** and **`md5sum`** on `PATH`. Restart Claude Code after installing. + +If `~` is not expanded in hook commands on your system, replace `~/.claude/hooks/…` with absolute paths in `~/.claude/settings.json`. + +## Session-start status panel + +At the start of every Claude session, a live status panel is printed to the terminal (stderr): + +``` +┌──────────────────────────────────────────────┐ +│ Global Claude Settings Status │ +└──────────────────────────────────────────────┘ + Installed : v1.0.0 ✓ + Local repo : master @ v1.0.0 + Online : v1.0.0 +``` + +Color coding: +- **Green** — version matches / up to date +- **Yellow** — local branch is ahead of installed (informational only, no action needed) +- **Red** — installed is behind online main (update required) + +If configuration issues are detected (missing `settings-repo-path`, missing `VERSION` file, unreachable remote), they are shown in red below the panel — never silently skipped. + +> **Note:** The terminal panel is only visible when using Claude Code in the terminal (CLI). In the VS Code extension, hook stderr is not shown as a visible banner — see the Claude chat message below instead. + +## Session-start message in Claude chat + +In addition to the terminal panel, the hook always injects a message into Claude's context at the start of every session. Claude will relay this at the top of its first response: + +**Settings up to date:** +> New session started — Global Claude Settings checked. ✅ Settings are up to date (v1.0.0). + +**Update required** (prominently displayed, cannot be missed): +> ⚠️ NEW SESSION — GLOBAL CLAUDE SETTINGS: UPDATE REQUIRED +> Installed: v0.1.0 ❌ | Latest: v1.0.0 ✅ +> Say "update my global settings to 1.0.0" to apply the update. + +**Configuration error** (prominently displayed): +> 🚨 NEW SESSION — GLOBAL CLAUDE SETTINGS: CONFIGURATION ERROR +> ❌ [description of the issue] + +## Keeping settings up to date + +When the online (origin/main) version is bumped, Claude displays a prominent warning at the start of its first response in the new session. + +To update, tell Claude: **"update my global settings to [version]"** and Claude will pull all files directly from `origin/main` using `git show` — not from your local branch. This ensures you always get the exact online version regardless of which branch your local repo is on. + +> **Note:** The version check fetches `VERSION` from `origin/main` via `git fetch`. It checks the local repo's configured remote, not a separate URL. If your remote is the upstream nextcloud/server and the global-settings aren't tracked there, the online check will warn about that too. + +### ⚠️ VERSION bump required on every change + +**Any commit that modifies a file in `global-settings/` MUST also increment `VERSION`.** Without a bump, users will not be warned to update and their installed settings will silently fall behind. + +Semver rules: +- `1.0.0 → 1.1.0` — new permissions, guards, or behavior added +- `1.0.0 → 2.0.0` — breaking change requiring manual migration + +Run `/verify-global-settings-version` before creating a PR to confirm the bump is correct. + +## File locations + +| Path | Role | +|------|------| +| `~/.claude/settings.json` | User permissions allowlist, `PreToolUse` + `UserPromptSubmit` hooks, optional `mcpServers` | +| `~/.claude/hooks/block-write-commands.sh` | Hook script invoked for every **Bash** tool use before it runs | +| `~/.claude/hooks/check-settings-version.sh` | Hook script that warns on version mismatch at session start | +| `~/.claude/settings-version` | Installed version (semver, matches repo `VERSION`) | +| `~/.claude/settings-repo-path` | Absolute path to `apps-extra/.claude/` — tells the version hook where to find the canonical `VERSION` file | + +## Shape of `~/.claude/settings.json` + +### 1. `permissions.allow` + +List **Bash** permission patterns you want granted **without** prompting. Keep this aligned with the hook: anything you allow here should still pass `block-write-commands.sh`, or the hook will deny the command even if it is allowlisted. + +Allowed categories (all read-only; write operations are gated by the hook): + +- **Inspection**: `ls`, `cat`, `head`, `tail`, `wc`, `stat`, `file`, `du`, `df`, `pwd`, `tree`, `find`, `realpath`, `basename`, `dirname` +- **Text processing**: `diff`, `grep`, `egrep`, `awk`, `tr`, `sort`, `jq`, `cut`, `uniq`, `column` +- **System info**: `which`, `whoami`, `uname`, `ps`, `free`, `lsof`, `ss`, `id`, `groups`, `uptime`, `hostname`, `env`, `date` +- **Git (read-only)**: `git log`, `git status`, `git diff`, `git show`, `git blame`, `git ls-files`, `git rev-parse`, `git describe`, `git shortlog`, `git cat-file`, `git branch`, `git remote`, `git stash list`, `git config --list` +- **`git -C`**: allow `Bash(git -C:*)` so agents can run git in arbitrary directories; the hook restricts **which** `git -C …` invocations are safe +- **Docker (read)**: `docker ps`, `docker images`, `docker logs`, `docker inspect`, `docker stats`, `docker info`, `docker network ls/inspect`, `docker volume ls/inspect`, `docker compose ps/config` +- **GitHub CLI (read)**: `gh pr list/view/checks/diff`, `gh issue list/view`, `gh repo view`, `gh run list/view`, `gh release list/view`, `gh workflow list` +- **Package managers (read)**: `composer show/validate/diagnose/check-platform-reqs`, `npm list/outdated`, `pnpm list/outdated`, `yarn list`, `pip list/show/freeze` +- **PHP**: `php -l/-m/-i/--version` +- **HTTP / API (read; hook narrows further)**: `curl`, `gh api` + +Do **not** put broad `Bash(*)` allow rules here. + +### 2. `hooks.PreToolUse` + +```json +"PreToolUse": [ + { + "matcher": "Bash", + "hooks": [{ "type": "command", "command": "bash ~/.claude/hooks/block-write-commands.sh" }] + } +] +``` + +### 3. `hooks.UserPromptSubmit` + +```json +"UserPromptSubmit": [ + { + "hooks": [{ "type": "command", "command": "bash ~/.claude/hooks/check-settings-version.sh" }] + } +] +``` + +### 4. `mcpServers` (optional) + +7 Playwright browser instances (`browser-1` through `browser-7`). `browser-6` runs headed (no `--headless`). Adjust the count to match your actual usage. + +--- + +## What `block-write-commands.sh` does + +- Reads **JSON from stdin** once into a variable, then extracts `cmd` and `transcript_path`. +- On deny, prints `permissionDecision: "deny"` JSON. On ask, prints `permissionDecision: "ask"` JSON. On allow, exits `0`. + +| Area | Allowed silently | Prompts for approval | Hard blocked | +|------|-----------------|---------------------|--------------| +| **curl** | GET without file output | Non-GET methods, data flags, `-o` / `--output` | — | +| **gh api** | GET | POST/PUT/PATCH/DELETE, `--input`, `--field` / `--raw-field` | — | +| **git push** | Last user message contains authorized phrase | — | Blocked otherwise | +| **git -C** | Read-only subcommands (`log`, `status`, `diff`, etc.) | Write subcommands, branch/remote writes, stash modifications, config writes | `push` (phrase-authorized) | +| **git branch** (bare) | Listing | `-d/-D/-m/-M/-c/-C`, `--delete`, `--move`, `--copy` | — | +| **git remote** (bare) | Listing, `show`, `get-url` | `add`, `remove`, `rename`, `set-url`, `prune`, `update` | — | +| **env** | `env` alone or `VAR=value` assignments | Using `env` to execute another command | — | +| **date** | Display time | — | `-s` / `--set` (system clock) | +| **cat** | Normal stdout | Shell redirection `>` / `>>` to a file | — | +| **find** | Normal path traversal | `-delete`, `-exec`, `-execdir` | — | +| **sort** | Normal sort to stdout | `-o` / `--output`, shell `>` / `>>` | — | +| **awk** | Normal processing | `print >` / `print >>` in script, shell `>` after script | — | +| **hostname** | Read current hostname (no args) | Setting a new hostname (bare name argument) | — | + +Authorized git push phrases (case-insensitive): `push for me`, `commit and push`, `please push`, `push my changes`. + +## What `check-settings-version.sh` does + +- Fires once per session (keyed to the transcript path via `/tmp/` flag file). +- Reads the installed version from `~/.claude/settings-version`. +- Reads the local branch version from `$REPO_DIR/global-settings/VERSION` (repo path from `~/.claude/settings-repo-path`). +- Fetches the online version from `origin/main` via `git fetch --depth=1` and reads the `VERSION` file from that ref. +- Compares all three versions using semver and prints a colored status panel to stderr (visible in the terminal/CLI). +- Always injects a session-start message into Claude's context via stdout — "up to date", "update required", or "configuration error" — which Claude relays at the top of its first response. +- Never silently skips: configuration issues (missing `settings-repo-path`, missing `VERSION` file, unreachable remote) are shown in the panel and forwarded to Claude. + +## Relationship to this repo's `.claude/settings.json` + +Under `apps-extra/.claude/`, project `settings.json` can enable project MCP servers and list allowed MCP tool names. That is separate from the **global** Bash policy above. For a consistent setup, use both: + +1. Global `~/.claude/settings.json` + hooks for Bash safety and version checking. +2. Project `.claude/settings.json` (and `settings.local.json` if used) for workspace-specific MCP and permissions. + +## Checklist for a new machine + +1. Run the install commands above (copies settings, hooks, version files). +2. Confirm `jq` and `md5sum` are on `PATH`. +3. Restart Claude Code so settings reload. +4. Test: a denied pattern (e.g. `curl -X POST`) should be blocked with a clear reason. A `find . -exec` should prompt for approval. +5. Verify the version hook fires: open a new session and confirm no warning (or update if one appears). diff --git a/docs/claude/parallel-agents.md b/docs/claude/parallel-agents.md index e7331c4..84e487e 100644 --- a/docs/claude/parallel-agents.md +++ b/docs/claude/parallel-agents.md @@ -1,139 +1,139 @@ -# Parallel Agents & Subscription Cap - -Running commands that spawn multiple agents simultaneously (like `/test-counsel`, `/test-app`, `/feature-counsel`) consumes your Claude subscription usage much faster than normal conversations. This guide explains why and how to use these commands responsibly. - -## Why Parallel Agents Drain Your Cap Fast - -Every Claude Code API call sends the following with it: -- **CLAUDE.md** — workspace instructions -- **MEMORY.md** — persistent memory index -- **CLAUDE.local.md** — local credentials/config -- **The full conversation history** so far - -When you run a command that launches 8 agents in parallel, all 8 agents start simultaneously, and each one makes many tool calls internally (file reads, browser snapshots, API calls). That means those files above get sent dozens to hundreds of times within a few minutes. - -**Example: `/test-counsel` on a single project** -- 8 agents × ~30 tool calls each = ~240 API calls -- Each call carries CLAUDE.md + MEMORY.md + the agent's conversation history -- This can consume as much as a full day of normal usage in one run - -When you see: `You've hit your limit · resets 3pm (Europe/Amsterdam)` — that's your Claude subscription's rolling usage cap, not a rate limit. - -## Commands That Use Parallel Agents - -| Command | Agents | Cap Impact | -|---------|--------|------------| -| `/test-counsel` | 8 agents | Very high — use sparingly | -| `/feature-counsel` | 8 agents | Very high — use sparingly | -| `/test-app` (Full mode) | 6 agents | High — use sparingly | -| `/test-app` (Quick mode) | 1 agent | Low — fine to use regularly | -| `/test-scenario-run` (multiple) | up to 5 agents | Medium — depends on scenario count | -| `/test-scenario-run` (single) | 1 agent | Low — fine to use regularly | -| `/opsx-pipeline` | 1–5 agents per batch | Medium to Very high — depends on number of changes and model selected | -| `/opsx-apply` | 1 agent | Low — fine to use regularly | -| Single `/test-persona-*` | 1 agent | Low — fine to use regularly | - -## Guidelines for Careful Use - -**Open a fresh window before running:** -Start the command in a new Claude Code window (no prior conversation history). The full conversation history is sent with every API call — in a window with 30+ prior messages, that history alone multiplies the token cost significantly across all parallel agents. A fresh window has zero history overhead. - -**Before running a multi-agent command:** -- Check the clock — do you have enough session left, or will you need Claude for other work today? -- Prefer Quick mode over Full mode in `/test-app` unless you need the full perspective sweep -- Run individual persona testers (`/test-persona-henk`, etc.) instead of the full `/test-counsel` when you only need one perspective - -**Don't run these commands:** -- Multiple times in a row on the same day -- Right before needing Claude for urgent implementation work -- Just to "see what happens" — run them when you have a concrete need for the output - -**After hitting the cap:** -- The limit resets at a fixed time (shown in the message, e.g. `resets 3pm (Europe/Amsterdam)`) -- Wait for the reset before continuing — there's no workaround -- Use the waiting time to review output that was already generated - -## Files to Keep Lean - -These files are sent with **every single API call** in the workspace. In a parallel-agent run they are multiplied by the number of agents and the number of tool calls each agent makes. Keep them minimal. - -| File | Purpose | Target size | -|------|---------|-------------| -| `.claude/CLAUDE.md` | Workspace instructions for Claude | < 100 lines | -| `.claude/MEMORY.md` | Index of memory files | < 50 lines (index only, no content) | -| `.claude/CLAUDE.local.md` | Local credentials | < 30 lines | - -**Rules:** -- **CLAUDE.md**: Only include instructions Claude needs on every task. Move niche/infrequent knowledge to separate files in `.claude/docs/` that can be read on demand. -- **MEMORY.md**: This is an index only — one line per memory file with a brief description. Never write memory content directly into MEMORY.md. -- **CLAUDE.local.md**: Credentials only. Do not add project notes here. -- **Persona files** (`.claude/personas/*.md`): These are only loaded when a sub-agent explicitly reads them — they don't auto-load. Keep them focused, but they don't need to be ultra-short. - -## Two Kinds of Token Limits - -Claude has two separate token limits that are easy to confuse: - -### Context window (per-conversation limit) - -The maximum tokens a model can process in a **single conversation**. This is a fixed technical limit of the model itself. If a conversation exceeds it, older messages are compressed or dropped. You never "run out" of context window across conversations — each conversation starts fresh. - -| Model | Context window | Max output | -|-----------|---------------|-------------| -| Haiku 4.5 | 200k tokens | 64k tokens | -| Sonnet 4.6| 1M tokens | 64k tokens | -| Opus 4.6 | 1M tokens | 128k tokens | - -> **Source:** [Anthropic models overview](https://platform.claude.com/docs/en/about-claude/models/overview) - -### Subscription quota (account-level limit) - -The total tokens you can use across **all conversations combined** within a rolling time window. When you hit this, you see *"You've hit your limit - resets 3pm"*. Cheaper models allow more total tokens before hitting the cap: - -| Model | Session (~5h) | Weekly (~7d) | -|--------|--------------|-------------| -| Haiku | ~1.2M tokens | ~6M tokens | -| Sonnet | ~400K tokens | ~2M tokens | -| Opus | ~200K tokens | ~1M tokens | - -These are approximate estimates — Anthropic does not publish exact numbers. Calibrate your own values using [claude.ai/settings/usage](https://claude.ai/settings/usage) (see the [usage tracker setup guide](../usage-tracker/SETUP.md)). - -### Why this matters for parallel agents - -A 3-agent parallel run with Haiku might use ~90K tokens total. That fits easily in Haiku's 200k context window per agent, but it consumes ~7.5% of your ~1.2M session quota. The same run with Opus uses similar tokens but that's ~45% of your ~200K session quota. **The context window is rarely the bottleneck — the subscription quota is.** - -## Model Selection - -All parallel sub-agent skills ask which model to use at run time. **Haiku is the default and recommended choice** for parallel runs — it costs significantly less from your subscription quota than Sonnet or Opus. - -| Model | Context window | Cap cost | Best for | -|--------------------------|---------------|----------|-------------------------------------------------------------------| -| **Haiku 4.5 (default)** | 200k tokens | Lowest | Most parallel test runs — broad coverage, fast, quota-efficient | -| **Sonnet 4.6** | 1M tokens | Higher | Browser-heavy tasks with many snapshots, or nuanced analysis | -| **Opus 4.6** | 1M tokens | Highest | Final pre-release testing or critical targeted reviews | - -Skills that ask for a model choice when launching agents: -- `/test-counsel` — 8 agents (one per persona) -- `/feature-counsel` — 8 agents (one per persona) -- `/test-app` (Full mode) — 6 agents (one per perspective) -- `/opsx-pipeline` — 1–5 agents (model selectable per change or uniformly for all) -- `/test-scenario-run` — Haiku by default, Sonnet optional (asked per run) - -**Choosing Sonnet or Opus:** Both have a 1M context window vs Haiku's 200k. For browser-heavy tasks that process many page snapshots or read large files, Sonnet's larger context is an advantage. Reserve Opus for final pre-release sweeps or critical targeted reviews where maximum reasoning depth matters. - -The main conversation (where you type commands) always uses whichever model you have active. Only the sub-agents use the model you select when prompted. - -For guidance on which testing commands to use and when, see [testing.md](testing.md). - -## Monitor Your Live Usage - -The [usage tracker](../usage-tracker/README.md) lets you watch your token consumption in real time from a terminal panel in VS Code — useful for knowing how much cap you have left before starting a parallel-agent run. - -```bash -# One-line status check -python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar - -# Live monitoring (30s refresh) -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor -``` - -The tracker reads Claude Code's session files (`~/.claude/projects/`) directly and is accurate for API token counts. The **limit thresholds** are approximate — verify your real cap at [claude.ai/settings/usage](https://claude.ai/settings/usage). Setup instructions: [`.claude/usage-tracker/SETUP.md`](../usage-tracker/SETUP.md). +# Parallel Agents & Subscription Cap + +Running commands that spawn multiple agents simultaneously (like `/test-counsel`, `/test-app`, `/feature-counsel`) consumes your Claude subscription usage much faster than normal conversations. This guide explains why and how to use these commands responsibly. + +## Why Parallel Agents Drain Your Cap Fast + +Every Claude Code API call sends the following with it: +- **CLAUDE.md** — workspace instructions +- **MEMORY.md** — persistent memory index +- **CLAUDE.local.md** — local credentials/config +- **The full conversation history** so far + +When you run a command that launches 8 agents in parallel, all 8 agents start simultaneously, and each one makes many tool calls internally (file reads, browser snapshots, API calls). That means those files above get sent dozens to hundreds of times within a few minutes. + +**Example: `/test-counsel` on a single project** +- 8 agents × ~30 tool calls each = ~240 API calls +- Each call carries CLAUDE.md + MEMORY.md + the agent's conversation history +- This can consume as much as a full day of normal usage in one run + +When you see: `You've hit your limit · resets 3pm (Europe/Amsterdam)` — that's your Claude subscription's rolling usage cap, not a rate limit. + +## Commands That Use Parallel Agents + +| Command | Agents | Cap Impact | +|---------|--------|------------| +| `/test-counsel` | 8 agents | Very high — use sparingly | +| `/feature-counsel` | 8 agents | Very high — use sparingly | +| `/test-app` (Full mode) | 6 agents | High — use sparingly | +| `/test-app` (Quick mode) | 1 agent | Low — fine to use regularly | +| `/test-scenario-run` (multiple) | up to 5 agents | Medium — depends on scenario count | +| `/test-scenario-run` (single) | 1 agent | Low — fine to use regularly | +| `/opsx-pipeline` | 1–5 agents per batch | Medium to Very high — depends on number of changes and model selected | +| `/opsx-apply` | 1 agent | Low — fine to use regularly | +| Single `/test-persona-*` | 1 agent | Low — fine to use regularly | + +## Guidelines for Careful Use + +**Open a fresh window before running:** +Start the command in a new Claude Code window (no prior conversation history). The full conversation history is sent with every API call — in a window with 30+ prior messages, that history alone multiplies the token cost significantly across all parallel agents. A fresh window has zero history overhead. + +**Before running a multi-agent command:** +- Check the clock — do you have enough session left, or will you need Claude for other work today? +- Prefer Quick mode over Full mode in `/test-app` unless you need the full perspective sweep +- Run individual persona testers (`/test-persona-henk`, etc.) instead of the full `/test-counsel` when you only need one perspective + +**Don't run these commands:** +- Multiple times in a row on the same day +- Right before needing Claude for urgent implementation work +- Just to "see what happens" — run them when you have a concrete need for the output + +**After hitting the cap:** +- The limit resets at a fixed time (shown in the message, e.g. `resets 3pm (Europe/Amsterdam)`) +- Wait for the reset before continuing — there's no workaround +- Use the waiting time to review output that was already generated + +## Files to Keep Lean + +These files are sent with **every single API call** in the workspace. In a parallel-agent run they are multiplied by the number of agents and the number of tool calls each agent makes. Keep them minimal. + +| File | Purpose | Target size | +|------|---------|-------------| +| `.claude/CLAUDE.md` | Workspace instructions for Claude | < 100 lines | +| `.claude/MEMORY.md` | Index of memory files | < 50 lines (index only, no content) | +| `.claude/CLAUDE.local.md` | Local credentials | < 30 lines | + +**Rules:** +- **CLAUDE.md**: Only include instructions Claude needs on every task. Move niche/infrequent knowledge to separate files in `.claude/docs/` that can be read on demand. +- **MEMORY.md**: This is an index only — one line per memory file with a brief description. Never write memory content directly into MEMORY.md. +- **CLAUDE.local.md**: Credentials only. Do not add project notes here. +- **Persona files** (`.claude/personas/*.md`): These are only loaded when a sub-agent explicitly reads them — they don't auto-load. Keep them focused, but they don't need to be ultra-short. + +## Two Kinds of Token Limits + +Claude has two separate token limits that are easy to confuse: + +### Context window (per-conversation limit) + +The maximum tokens a model can process in a **single conversation**. This is a fixed technical limit of the model itself. If a conversation exceeds it, older messages are compressed or dropped. You never "run out" of context window across conversations — each conversation starts fresh. + +| Model | Context window | Max output | +|-----------|---------------|-------------| +| Haiku 4.5 | 200k tokens | 64k tokens | +| Sonnet 4.6| 1M tokens | 64k tokens | +| Opus 4.6 | 1M tokens | 128k tokens | + +> **Source:** [Anthropic models overview](https://platform.claude.com/docs/en/about-claude/models/overview) + +### Subscription quota (account-level limit) + +The total tokens you can use across **all conversations combined** within a rolling time window. When you hit this, you see *"You've hit your limit - resets 3pm"*. Cheaper models allow more total tokens before hitting the cap: + +| Model | Session (~5h) | Weekly (~7d) | +|--------|--------------|-------------| +| Haiku | ~1.2M tokens | ~6M tokens | +| Sonnet | ~400K tokens | ~2M tokens | +| Opus | ~200K tokens | ~1M tokens | + +These are approximate estimates — Anthropic does not publish exact numbers. Calibrate your own values using [claude.ai/settings/usage](https://claude.ai/settings/usage) (see the [usage tracker setup guide](../../usage-tracker/SETUP.md)). + +### Why this matters for parallel agents + +A 3-agent parallel run with Haiku might use ~90K tokens total. That fits easily in Haiku's 200k context window per agent, but it consumes ~7.5% of your ~1.2M session quota. The same run with Opus uses similar tokens but that's ~45% of your ~200K session quota. **The context window is rarely the bottleneck — the subscription quota is.** + +## Model Selection + +All parallel sub-agent skills ask which model to use at run time. **Haiku is the default and recommended choice** for parallel runs — it costs significantly less from your subscription quota than Sonnet or Opus. + +| Model | Context window | Cap cost | Best for | +|--------------------------|---------------|----------|-------------------------------------------------------------------| +| **Haiku 4.5 (default)** | 200k tokens | Lowest | Most parallel test runs — broad coverage, fast, quota-efficient | +| **Sonnet 4.6** | 1M tokens | Higher | Browser-heavy tasks with many snapshots, or nuanced analysis | +| **Opus 4.6** | 1M tokens | Highest | Final pre-release testing or critical targeted reviews | + +Skills that ask for a model choice when launching agents: +- `/test-counsel` — 8 agents (one per persona) +- `/feature-counsel` — 8 agents (one per persona) +- `/test-app` (Full mode) — 6 agents (one per perspective) +- `/opsx-pipeline` — 1–5 agents (model selectable per change or uniformly for all) +- `/test-scenario-run` — Haiku by default, Sonnet optional (asked per run) + +**Choosing Sonnet or Opus:** Both have a 1M context window vs Haiku's 200k. For browser-heavy tasks that process many page snapshots or read large files, Sonnet's larger context is an advantage. Reserve Opus for final pre-release sweeps or critical targeted reviews where maximum reasoning depth matters. + +The main conversation (where you type commands) always uses whichever model you have active. Only the sub-agents use the model you select when prompted. + +For guidance on which testing commands to use and when, see [testing.md](testing.md). + +## Monitor Your Live Usage + +The [usage tracker](../../usage-tracker/README.md) lets you watch your token consumption in real time from a terminal panel in VS Code — useful for knowing how much cap you have left before starting a parallel-agent run. + +```bash +# One-line status check +python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar + +# Live monitoring (30s refresh) +python3 .claude/usage-tracker/claude-usage-tracker.py --monitor +``` + +The tracker reads Claude Code's session files (`~/.claude/projects/`) directly and is accurate for API token counts. The **limit thresholds** are approximate — verify your real cap at [claude.ai/settings/usage](https://claude.ai/settings/usage). Setup instructions: [`.claude/usage-tracker/SETUP.md`](../../usage-tracker/SETUP.md). diff --git a/docs/claude/testing.md b/docs/claude/testing.md index 80dbaa9..df755f0 100644 --- a/docs/claude/testing.md +++ b/docs/claude/testing.md @@ -123,7 +123,7 @@ This is the only testing-adjacent command that runs *before* implementation. It **Cap impact:** Very high — 8 parallel agents. Open a fresh Claude window before running. See [parallel-agents.md](parallel-agents.md). -**See:** [.claude/skills/test-counsel/SKILL.md](../skills/test-counsel/SKILL.md) +**See:** [.claude/skills/test-counsel/SKILL.md](https://github.com/ConductionNL/hydra/blob/main/.claude/skills/test-counsel/SKILL.md) --- @@ -139,7 +139,7 @@ This is the only testing-adjacent command that runs *before* implementation. It **Cap impact:** Low (Quick) to Very high (Full). See [parallel-agents.md](parallel-agents.md). -**See:** [.claude/skills/test-app/SKILL.md](../skills/test-app/SKILL.md) +**See:** [.claude/skills/test-app/SKILL.md](https://github.com/ConductionNL/hydra/blob/main/.claude/skills/test-app/SKILL.md) --- diff --git a/docs/claude/workflow.md b/docs/claude/workflow.md index 5c37f16..18550a6 100644 --- a/docs/claude/workflow.md +++ b/docs/claude/workflow.md @@ -1,265 +1,265 @@ -# Spec-Driven Development Workflow - -_This is the **architecture reference** — see [Getting Started](./getting-started.md) for setup and your first change, and [End-to-End Walkthrough](./walkthrough.md) for a complete concrete example._ - -## Overview - -This workspace uses a spec-driven development workflow that combines: -- **OpenSpec** — Structured specifications alongside code -- **GitHub Issues** — Visual progress tracking via kanban boards -- **Ralph Wiggum loops** — Focused, low-context AI coding iterations -- **Spec verification** — Automated review of code against specifications - -The key insight: **specs are written once, then broken into small JSON tasks** that each point back to a specific spec section. This means AI coding loops can work with minimal context (just the task + its spec ref) instead of loading entire spec documents. - -## Architecture - -All specs and changes live in their **primary app repository** (submodule). There is no root `openspec/` directory. Workflow docs and skills live in `.claude/` (`claude-code-config` repo). - -``` -apps-extra/ # Workspace root -├── project.md # Generic guidelines (all projects) -├── .claude/ # Claude Code config (company-wide repo) -│ ├── CLAUDE.md # Workflow instructions -│ ├── skills/ # OpenSpec skills (opsx-new, opsx-ff, etc.) -│ └── docs/ # This documentation -│ -├── openregister/ # FOUNDATION REPO -│ ├── project.md # Project description & context -│ └── openspec/ -│ ├── config.yaml # Project config -│ ├── specs/ # Domain + shared specs -│ │ ├── nextcloud-app/ # Shared: NC app conventions -│ │ ├── api-patterns/ # Shared: API conventions -│ │ ├── docker/ # Shared: Docker environment -│ │ ├── release-workflows/ # Shared: Release workflows -│ │ └── ... # Domain specs (registers, schemas, etc.) -│ └── changes/ # Active changes -│ └── add-feature-x/ -│ ├── proposal.md # Why & what -│ ├── discovery.md # Research output (optional — uncertain approach) -│ ├── contract.md # API contract (optional — cross-project APIs) -│ ├── specs/ # Delta specs (ADDED/MODIFIED/REMOVED) -│ ├── design.md # How (technical approach) -│ ├── migration.md # DB migration plan (optional — schema changes) -│ ├── test-plan.md # Test cases pre-defined from specs (optional) -│ ├── tasks.md # Implementation checklist -│ ├── plan.json # Task tracking JSON (generated by /opsx-plan-to-issues) -│ └── review.md # Verification report (generated) -│ -├── opencatalogi/ # Same openspec/ pattern per app -│ ├── project.md -│ └── openspec/ -``` - -**Primary spec ownership:** -- `openregister` (foundation) — `nextcloud-app/`, `api-patterns/`, `docker/`, `release-workflows/` -- `nldesign` — `nl-design/` -- `pipelinq` — `pipeline/`, `pipeline-views/` - -## The Full Flow - -### Phase 1: Spec Building - -Start by defining what you're building. This creates structured, reviewable specifications. - -``` -/opsx-new add-woo-search -``` - -This creates `openspec/changes/add-woo-search/` with metadata. Then either: - -**Fast-forward (all at once):** -``` -/opsx-ff -``` -Creates proposal → specs → design → tasks in dependency order. - -**Or incrementally:** -``` -/opsx-continue # Creates proposal -/opsx-continue # Creates specs -/opsx-continue # Creates design -/opsx-continue # Creates tasks -``` - -**Review the artifacts.** This is your chance to refine requirements before any code is written. The core artifacts form a dependency chain: - -``` -proposal → specs → design → tasks - (why) (what) (how) (steps) -``` - -Three optional artifacts can be inserted when needed: - -``` -proposal → discovery → specs (use when approach or NC API availability is uncertain) -proposal → contract → specs (use when change introduces API consumed by other projects) -design → migration → tasks (use when change introduces DB/schema changes) -specs → test-plan → tasks (use to pre-define test cases before implementation) -``` - -**test-plan and test scenarios:** A `test-plan.md` maps spec scenarios to named test cases (TC-1, TC-2, …) before any code is written — it answers "what does done look like?" After implementation, TCs that represent ongoing regression value should be promoted to reusable test scenarios via `/test-scenario-create`. Those `TS-NNN-slug.md` files persist after the change is archived and are automatically picked up by `/test-counsel`, `/test-app`, and `/test-persona-*`. - -### Phase 2: Plan to GitHub Issues - -Once specs are reviewed and approved, convert them to trackable work items: - -``` -/opsx-plan-to-issues -``` - -This command: -1. Parses `tasks.md` into structured JSON -2. Creates a **tracking issue** (epic) on GitHub with a full task checklist -3. Creates **individual issues** per task, each containing: - - Task description - - Acceptance criteria (from spec scenarios) - - Spec reference (link to the relevant spec section) - - Files likely affected - - Labels: `openspec`, `` -4. Saves `plan.json` with all GitHub issue numbers linked - -**Why GitHub Issues?** -- Visual kanban board (GitHub Projects) -- Progress visible to the whole team -- Each issue links back to specs for traceability -- Can be managed independently of Claude sessions - -### Phase 3: Implementation - -Start the focused implementation loop: - -``` -/opsx-apply -``` - -> **Note:** `/opsx-ralph-start` is a planned dedicated implementation loop with minimal-context loading and deeper GitHub Issues integration — not yet implemented. Use `/opsx-apply` for now; it already reads `plan.json` and supports GitHub Issues sync when a `plan.json` exists. - -**Automated alternative — `/opsx-apply-loop` (experimental):** - -Runs Phases 3 → 4 → 5 in one hands-off command inside an isolated Docker container: - -``` -/opsx-apply-loop procest add-sla-tracking -/opsx-apply-loop # asks which app + change -``` - -The loop runs `/opsx-apply` → `/opsx-verify` up to 5 times per app, optionally followed by targeted single-agent tests (max 3 test iterations), then archives when verify is clean and handles git commit and GitHub sync on the host. Use this when you want to walk away and let Claude work through the full cycle automatically. Requires a container authentication token — the Docker container cannot use interactive OAuth. Set `CLAUDE_CODE_AUTH_TOKEN` (preferred — free, uses your subscription) or `ANTHROPIC_API_KEY` (fallback — costs money) in your `~/.bashrc`. See [Getting Started — Container authentication](getting-started.md#prerequisites) for step-by-step setup. - -Each iteration of the loop: -1. **Reads plan.json** — finds the next pending task -2. **Reads ONLY the referenced spec section** — via `spec_ref` pointer -3. **Implements the task** — following acceptance criteria, including: - - **Backend logic** (service/controller) - - **UI** so users can actually use the feature (Vue component, page, dialog) - - **Tests**: unit tests (PHPUnit), API tests (Newman/Postman), browser tests (Playwright MCP) -4. **Runs tests** — unit tests, Newman tests, and browser verification MUST pass before marking complete -5. **Updates progress** — marks task done in plan.json and tasks.md -6. **Closes the GitHub issue** — with a summary comment -7. **Moves to the next task** — or stops if all done - -**Why this works:** -- Minimal context per iteration (just the task + its spec section) -- No "amnesia" — plan.json tracks state across sessions -- Visual progress — GitHub issues close as work completes -- Resumable — if interrupted, picks up where it left off -- Tests catch regressions immediately — before moving to the next task - -### Phase 4: Review - -After all tasks are complete, verify the implementation: - -``` -/opsx-verify -``` - -> **Note:** `/opsx-ralph-review` is a planned dedicated review command that will cross-reference shared specs and create GitHub Issues for findings — not yet implemented. Use `/opsx-verify` for now; it already supports GitHub Issues sync via `plan.json` when present. - -This command: -1. Reads ALL spec requirements (ADDED/MODIFIED/REMOVED) -2. Checks each against the actual implementation -3. Cross-references with shared specs (NC conventions, API patterns, etc.) -4. Categorizes findings: - - **CRITICAL** — Must fix (spec requirement not met) - - **WARNING** — Should fix (partial compliance) - - **SUGGESTION** — Nice to have -5. Generates `review.md` in the change directory -6. Creates a GitHub issue if CRITICAL/WARNING findings exist - -### Phase 5: Archive - -Once review passes: - -``` -/opsx-archive -``` - -This: -- Merges delta specs into the app's `openspec/specs/` directory -- Moves the change to `openspec/changes/archive/YYYY-MM-DD-/` -- Creates or updates `CHANGELOG.md` with the completed tasks as versioned entries -- Preserves full audit trail - -## The plan.json Format - -```json -{ - "change": "add-woo-search", - "project": "opencatalogi", - "repo": "ConductionNL/opencatalogi", - "created": "2026-02-14T12:00:00Z", - "tracking_issue": 42, - "tasks": [ - { - "id": 1, - "title": "Add search API endpoint", - "description": "Create /api/woo/search endpoint with query parameter support", - "github_issue": 43, - "status": "pending", - "spec_ref": "openspec/specs/search/spec.md#requirement-search-api", - "acceptance_criteria": [ - "GIVEN a search query WHEN GET /api/woo/search?q=test THEN returns matching publications", - "GIVEN no results WHEN searching THEN returns empty array with 200" - ], - "files_likely_affected": [ - "lib/Controller/SearchController.php", - "lib/Service/SearchService.php" - ], - "labels": ["openspec", "add-woo-search"] - } - ] -} -``` - -**Key design decisions:** -- `spec_ref` uses `file#anchor` format so the AI can read just that section -- `acceptance_criteria` are extracted from spec scenarios, ready for verification -- `files_likely_affected` scopes the search space for implementation -- `github_issue` enables automatic close on completion -- `status` tracks progress across sessions (`pending` → `in_progress` → `completed`) - -## Spec Writing Guide - -See [writing-specs.md](writing-specs.md) for the complete guide — RFC 2119 keywords, Gherkin scenario format, delta spec operations (ADDED/MODIFIED/REMOVED/RENAMED), and common mistakes to avoid. - -## Commands Reference - -| Command | Phase | Description | -|---------|-------|-------------| -| `/opsx-new ` | Spec | Start a new change | -| `/opsx-ff` | Spec | Fast-forward all artifacts | -| `/opsx-continue` | Spec | Create next artifact | -| `/opsx-plan-to-issues` | Plan | Tasks → JSON + GitHub Issues | -| `/opsx-apply` | Implement | Implement tasks from plan.json (use this; `/opsx-ralph-start` not yet built) | -| `/opsx-verify` | Review | Verify implementation against specs (use this; `/opsx-ralph-review` not yet built) | -| `/opsx-archive` | Archive | Complete and preserve change | - -## Tips - -- **Start small**: Try the flow on a small feature first to build muscle memory -- **Review specs before coding**: The spec review is the most valuable step — catch issues before writing code -- **Keep tasks small**: Each task should be completable in one Ralph Wiggum iteration (15-30 min of focused work) -- **Use shared specs**: Reference cross-project specs in your delta specs to avoid reinventing patterns -- **Trust the JSON**: The plan.json is your source of truth during implementation — it survives context window resets -- **GitHub is your dashboard**: Use GitHub Projects to visualize progress across multiple changes and projects +# Spec-Driven Development Workflow + +_This is the **architecture reference** — see [Getting Started](./getting-started.md) for setup and your first change, and [End-to-End Walkthrough](./walkthrough.md) for a complete concrete example._ + +## Overview + +This workspace uses a spec-driven development workflow that combines: +- **OpenSpec** — Structured specifications alongside code +- **GitHub Issues** — Visual progress tracking via kanban boards +- **Ralph Wiggum loops** — Focused, low-context AI coding iterations +- **Spec verification** — Automated review of code against specifications + +The key insight: **specs are written once, then broken into small JSON tasks** that each point back to a specific spec section. This means AI coding loops can work with minimal context (just the task + its spec ref) instead of loading entire spec documents. + +## Architecture + +All specs and changes live in their **primary app repository** (submodule). There is no root `openspec/` directory. Workflow docs and skills live in `.claude/` (`claude-code-config` repo). + +``` +apps-extra/ # Workspace root +├── project.md # Generic guidelines (all projects) +├── .claude/ # Claude Code config (company-wide repo) +│ ├── CLAUDE.md # Workflow instructions +│ ├── skills/ # OpenSpec skills (opsx-new, opsx-ff, etc.) +│ └── docs/ # This documentation +│ +├── openregister/ # FOUNDATION REPO +│ ├── project.md # Project description & context +│ └── openspec/ +│ ├── config.yaml # Project config +│ ├── specs/ # Domain + shared specs +│ │ ├── nextcloud-app/ # Shared: NC app conventions +│ │ ├── api-patterns/ # Shared: API conventions +│ │ ├── docker/ # Shared: Docker environment +│ │ ├── release-workflows/ # Shared: Release workflows +│ │ └── ... # Domain specs (registers, schemas, etc.) +│ └── changes/ # Active changes +│ └── add-feature-x/ +│ ├── proposal.md # Why & what +│ ├── discovery.md # Research output (optional — uncertain approach) +│ ├── contract.md # API contract (optional — cross-project APIs) +│ ├── specs/ # Delta specs (ADDED/MODIFIED/REMOVED) +│ ├── design.md # How (technical approach) +│ ├── migration.md # DB migration plan (optional — schema changes) +│ ├── test-plan.md # Test cases pre-defined from specs (optional) +│ ├── tasks.md # Implementation checklist +│ ├── plan.json # Task tracking JSON (generated by /opsx-plan-to-issues) +│ └── review.md # Verification report (generated) +│ +├── opencatalogi/ # Same openspec/ pattern per app +│ ├── project.md +│ └── openspec/ +``` + +**Primary spec ownership:** +- `openregister` (foundation) — `nextcloud-app/`, `api-patterns/`, `docker/`, `release-workflows/` +- `nldesign` — `nl-design/` +- `pipelinq` — `pipeline/`, `pipeline-views/` + +## The Full Flow + +### Phase 1: Spec Building + +Start by defining what you're building. This creates structured, reviewable specifications. + +``` +/opsx-new add-woo-search +``` + +This creates `openspec/changes/add-woo-search/` with metadata. Then either: + +**Fast-forward (all at once):** +``` +/opsx-ff +``` +Creates proposal → specs → design → tasks in dependency order. + +**Or incrementally:** +``` +/opsx-continue # Creates proposal +/opsx-continue # Creates specs +/opsx-continue # Creates design +/opsx-continue # Creates tasks +``` + +**Review the artifacts.** This is your chance to refine requirements before any code is written. The core artifacts form a dependency chain: + +``` +proposal → specs → design → tasks + (why) (what) (how) (steps) +``` + +Three optional artifacts can be inserted when needed: + +``` +proposal → discovery → specs (use when approach or NC API availability is uncertain) +proposal → contract → specs (use when change introduces API consumed by other projects) +design → migration → tasks (use when change introduces DB/schema changes) +specs → test-plan → tasks (use to pre-define test cases before implementation) +``` + +**test-plan and test scenarios:** A `test-plan.md` maps spec scenarios to named test cases (TC-1, TC-2, …) before any code is written — it answers "what does done look like?" After implementation, TCs that represent ongoing regression value should be promoted to reusable test scenarios via `/test-scenario-create`. Those `TS-NNN-slug.md` files persist after the change is archived and are automatically picked up by `/test-counsel`, `/test-app`, and `/test-persona-*`. + +### Phase 2: Plan to GitHub Issues + +Once specs are reviewed and approved, convert them to trackable work items: + +``` +/opsx-plan-to-issues +``` + +This command: +1. Parses `tasks.md` into structured JSON +2. Creates a **tracking issue** (epic) on GitHub with a full task checklist +3. Creates **individual issues** per task, each containing: + - Task description + - Acceptance criteria (from spec scenarios) + - Spec reference (link to the relevant spec section) + - Files likely affected + - Labels: `openspec`, `` +4. Saves `plan.json` with all GitHub issue numbers linked + +**Why GitHub Issues?** +- Visual kanban board (GitHub Projects) +- Progress visible to the whole team +- Each issue links back to specs for traceability +- Can be managed independently of Claude sessions + +### Phase 3: Implementation + +Start the focused implementation loop: + +``` +/opsx-apply +``` + +> **Note:** `/opsx-ralph-start` is a planned dedicated implementation loop with minimal-context loading and deeper GitHub Issues integration — not yet implemented. Use `/opsx-apply` for now; it already reads `plan.json` and supports GitHub Issues sync when a `plan.json` exists. + +**Automated alternative — `/opsx-apply-loop` (experimental):** + +Runs Phases 3 → 4 → 5 in one hands-off command inside an isolated Docker container: + +``` +/opsx-apply-loop procest add-sla-tracking +/opsx-apply-loop # asks which app + change +``` + +The loop runs `/opsx-apply` → `/opsx-verify` up to 5 times per app, optionally followed by targeted single-agent tests (max 3 test iterations), then archives when verify is clean and handles git commit and GitHub sync on the host. Use this when you want to walk away and let Claude work through the full cycle automatically. Requires a container authentication token — the Docker container cannot use interactive OAuth. Set `CLAUDE_CODE_AUTH_TOKEN` (preferred — free, uses your subscription) or `ANTHROPIC_API_KEY` (fallback — costs money) in your `~/.bashrc`. See [Getting Started — Container authentication](getting-started.md#prerequisites) for step-by-step setup. + +Each iteration of the loop: +1. **Reads plan.json** — finds the next pending task +2. **Reads ONLY the referenced spec section** — via `spec_ref` pointer +3. **Implements the task** — following acceptance criteria, including: + - **Backend logic** (service/controller) + - **UI** so users can actually use the feature (Vue component, page, dialog) + - **Tests**: unit tests (PHPUnit), API tests (Newman/Postman), browser tests (Playwright MCP) +4. **Runs tests** — unit tests, Newman tests, and browser verification MUST pass before marking complete +5. **Updates progress** — marks task done in plan.json and tasks.md +6. **Closes the GitHub issue** — with a summary comment +7. **Moves to the next task** — or stops if all done + +**Why this works:** +- Minimal context per iteration (just the task + its spec section) +- No "amnesia" — plan.json tracks state across sessions +- Visual progress — GitHub issues close as work completes +- Resumable — if interrupted, picks up where it left off +- Tests catch regressions immediately — before moving to the next task + +### Phase 4: Review + +After all tasks are complete, verify the implementation: + +``` +/opsx-verify +``` + +> **Note:** `/opsx-ralph-review` is a planned dedicated review command that will cross-reference shared specs and create GitHub Issues for findings — not yet implemented. Use `/opsx-verify` for now; it already supports GitHub Issues sync via `plan.json` when present. + +This command: +1. Reads ALL spec requirements (ADDED/MODIFIED/REMOVED) +2. Checks each against the actual implementation +3. Cross-references with shared specs (NC conventions, API patterns, etc.) +4. Categorizes findings: + - **CRITICAL** — Must fix (spec requirement not met) + - **WARNING** — Should fix (partial compliance) + - **SUGGESTION** — Nice to have +5. Generates `review.md` in the change directory +6. Creates a GitHub issue if CRITICAL/WARNING findings exist + +### Phase 5: Archive + +Once review passes: + +``` +/opsx-archive +``` + +This: +- Merges delta specs into the app's `openspec/specs/` directory +- Moves the change to `openspec/changes/archive/YYYY-MM-DD-/` +- Creates or updates `CHANGELOG.md` with the completed tasks as versioned entries +- Preserves full audit trail + +## The plan.json Format + +```json +{ + "change": "add-woo-search", + "project": "opencatalogi", + "repo": "ConductionNL/opencatalogi", + "created": "2026-02-14T12:00:00Z", + "tracking_issue": 42, + "tasks": [ + { + "id": 1, + "title": "Add search API endpoint", + "description": "Create /api/woo/search endpoint with query parameter support", + "github_issue": 43, + "status": "pending", + "spec_ref": "openspec/specs/search/spec.md#requirement-search-api", + "acceptance_criteria": [ + "GIVEN a search query WHEN GET /api/woo/search?q=test THEN returns matching publications", + "GIVEN no results WHEN searching THEN returns empty array with 200" + ], + "files_likely_affected": [ + "lib/Controller/SearchController.php", + "lib/Service/SearchService.php" + ], + "labels": ["openspec", "add-woo-search"] + } + ] +} +``` + +**Key design decisions:** +- `spec_ref` uses `file#anchor` format so the AI can read just that section +- `acceptance_criteria` are extracted from spec scenarios, ready for verification +- `files_likely_affected` scopes the search space for implementation +- `github_issue` enables automatic close on completion +- `status` tracks progress across sessions (`pending` → `in_progress` → `completed`) + +## Spec Writing Guide + +See [writing-specs.md](writing-specs.md) for the complete guide — RFC 2119 keywords, Gherkin scenario format, delta spec operations (ADDED/MODIFIED/REMOVED/RENAMED), and common mistakes to avoid. + +## Commands Reference + +| Command | Phase | Description | +|---------|-------|-------------| +| `/opsx-new ` | Spec | Start a new change | +| `/opsx-ff` | Spec | Fast-forward all artifacts | +| `/opsx-continue` | Spec | Create next artifact | +| `/opsx-plan-to-issues` | Plan | Tasks → JSON + GitHub Issues | +| `/opsx-apply` | Implement | Implement tasks from plan.json (use this; `/opsx-ralph-start` not yet built) | +| `/opsx-verify` | Review | Verify implementation against specs (use this; `/opsx-ralph-review` not yet built) | +| `/opsx-archive` | Archive | Complete and preserve change | + +## Tips + +- **Start small**: Try the flow on a small feature first to build muscle memory +- **Review specs before coding**: The spec review is the most valuable step — catch issues before writing code +- **Keep tasks small**: Each task should be completable in one Ralph Wiggum iteration (15-30 min of focused work) +- **Use shared specs**: Reference cross-project specs in your delta specs to avoid reinventing patterns +- **Trust the JSON**: The plan.json is your source of truth during implementation — it survives context window resets +- **GitHub is your dashboard**: Use GitHub Projects to visualize progress across multiple changes and projects diff --git a/docs/claude/writing-docs.md b/docs/claude/writing-docs.md index ab4de3e..2146095 100644 --- a/docs/claude/writing-docs.md +++ b/docs/claude/writing-docs.md @@ -1,554 +1,554 @@ -# Writing Documentation - -How to write and maintain documentation in this project. These rules apply whenever Claude or a developer writes or updates any `.md` file — `docs/`, `.claude/docs/`, `openspec/specs/`, `openspec/ROADMAP.md`, `README.md`, or anywhere else. - -See [writing-specs.md](writing-specs.md) for the separate guide on writing OpenSpec requirements and scenarios. - ---- - -## Contents - -**Part 1 — Principles** -- [The Core Rule: Reference, Don't Duplicate](#the-core-rule-reference-dont-duplicate) -- [Sources of Truth](#sources-of-truth) -- [Audience Determines Location](#audience-determines-location) -- [Language](#language) -- [Where Does This Content Go?](#where-does-this-content-go) - -**Part 2 — Writing Mechanics** -- [Document Lifecycle Markers](#document-lifecycle-markers) -- [Link Structure](#link-structure) -- [Lists, Tables, and Structure](#lists-tables-and-structure) -- [Table of Contents](#table-of-contents) -- [Diagrams and Images](#diagrams-and-images) -- [Formatting Alignment](#formatting-alignment) - -**Part 3 — Maintenance** -- [Keeping Docs Current](#keeping-docs-current) -- [Staleness Signals](#staleness-signals) -- [Outdated and Legacy Documentation](#outdated-and-legacy-documentation) -- [Common Mistakes](#common-mistakes) -- [Writing Anti-Patterns](#writing-anti-patterns) - ---- - -## Part 1 — Principles - ---- - -## The Core Rule: Reference, Don't Duplicate - -**Every piece of information should live in exactly one place.** When another document needs to refer to it, link to the source of truth — never copy the content. - -``` -✓ Good — link to the source of truth -"See [openspec/specs/publications/spec.md](../../openspec/specs/publications/spec.md) for API endpoint requirements." - -✗ Bad — copies requirements into a guide -"The API MUST return HTTP 404 when the publication does not exist." - → this information already lives in the spec; now you have two places to keep in sync -``` - -When content is duplicated, it will eventually diverge. One copy gets updated; the other goes stale. The reader gets confused. The solution is to pick the source of truth and link from everywhere else. - ---- - -## Sources of Truth - -| Concern | Source of truth | -|-------------------------------------------------------------|--------------------------------------------------------------------------| -| **Vision & Direction** | | -| Project vision and phases | `openspec/ROADMAP.md` (if present) | -| Target audience and personas | `openspec/audience.md` (if present) | -| Architectural decisions (why) | `openspec/architecture/adr-{NNN}-*.md` | -| Architecture decisions index | `.claude/openspec/architecture/README.md` | -| Technical decisions and constraints | `openspec/architecture/` ADRs | -| **Standards & Patterns** | | -| NL Design System and UI standards | `openspec/specs/{domain}/spec.md` (app-specific) or company ADR-003 | -| API conventions and URL structure | `openspec/specs/{domain}/spec.md` (app-specific) or company ADR-002 | -| **Requirements** | | -| Feature requirements and scenarios | `openspec/specs/{domain}/spec.md` | -| **Guides & Documentation** | | -| User-facing how-to guides | `docs/` feature docs | -| App administrator procedures | `docs/admin-guide.md` (if present) | -| Developer setup and environment | `README.md` | -| Available `make` commands and scripts | workspace root `Makefile` | -| Developer workflow and commands | `.claude/docs/commands.md`, `.claude/docs/workflow.md` | -| Testing conventions and persona usage | `.claude/docs/testing.md` | -| Docker environment and setup | `.claude/docs/docker.md`, `.claude/docs/getting-started.md` | -| Frontend standards | `.claude/docs/frontend-standards.md` | -| Standards compliance references | `docs/features/README.md` (GEMMA, ZGW, Forum Standaardisatie) | -| **Testing** | | -| Persona testing behavior and scripts | `.claude/personas/` | -| Reusable test scenarios (Gherkin) | `test-scenarios/TS-*.md` | -| **Meta** | | -| Spec and doc writing conventions | `.claude/docs/writing-specs.md`, `.claude/docs/writing-docs.md` | -| OpenSpec schema and artifact templates | `.claude/openspec/schemas/conduction/schema.yaml`, `templates/` | -| Parallel agent conventions | `.claude/docs/parallel-agents.md` | -| Claude harness configuration (permissions, hooks, env vars) | `.claude/global-settings/settings.json` | -| Global Claude settings guide | `.claude/docs/global-claude-settings.md` | -| Claude usage tracking documentation | `.claude/usage-tracker/README.md` | - ---- - -## Audience Determines Location - -Each document has one target audience. Don't mix them. - -| Audience | Location | Style | -|---------------------------------------|----------------------------------------------------------|------------------------------------------------------------------------------| -| End users / citizens | `docs/` feature docs | Plain language, no jargon, task-oriented | -| App administrator | `docs/admin-guide.md` (if present) | Task-oriented, step-by-step | -| Developer (setup, environment) | `README.md` | Technical, precise | -| Claude / spec workflow | `.claude/docs/`, `.claude/skills/` | Instruction-style, precise — Claude reads this at runtime | -| Spec / requirements | `openspec/specs/` | RFC 2119, Gherkin — see [writing-specs.md](writing-specs.md) | -| Architectural decisions (why) | `openspec/architecture/` | ADR format — context, decision, consequences; written for future developers | -| Claude test agents (persona testers) | `.claude/personas/` | Persona cards — behavior, goals, device preference; loaded by test commands at runtime | -| Claude test agents (scenario execution) | `test-scenarios/` | Gherkin-style test scenarios; loaded by `/test-scenario-run` | - -**Developer/technical content does not belong in `docs/`.** If you find implementation details, class names, or spec requirements in a user-facing guide, replace them with plain-language descriptions or links to the spec. - ---- - -## Language - -**All documentation is written in English** — `docs/`, `.claude/docs/`, `openspec/`, `README.md`. - -**Filenames** also MUST be English — `user-guide.md`, not `handleiding.md`. - -**Language support for user-facing features:** Many apps in this ecosystem are Dutch-first for end users. When documenting such features, note the Dutch-first default and where English is also required. Per ADR-005, both Dutch and English MUST be supported for i18n-enabled features. - ---- - -## Where Does This Content Go? - -Use this when you're not sure which file to write new content into. These rules cover the most common cases without needing to cross-reference both tables above. - -1. **Is it _why_ a decision was made?** → `.claude/openspec/architecture/adr-{NNN}-*.md` -2. **Is it _what must be true_ (a requirement, acceptance criterion, or constraint)?** → `.claude/openspec/specs/{domain}/spec.md` (or per-project `openspec/specs/{domain}/spec.md`) -3. **Is it instructions for an _end user or citizen_ using an app?** → `docs/` feature docs for that app -4. **Is it instructions for an _app administrator_?** → `docs/admin-guide.md` (if present in that app) -5. **Is it _developer setup_ or environment instructions?** → `README.md` -6. **Is it instructions for _Claude_ at runtime (workflow, testing, commands, spec writing)?** → `.claude/docs/` -7. **Is it about _project direction, phase goals, or technical strategy_?** → `.claude/openspec/ROADMAP.md` (if present) -8. **Is it _standards compliance_ information (GEMMA, ZGW, Forum Standaardisatie)?** → `docs/features/README.md` -9. **Is it a reusable _test flow_ (Given/When/Then)?** → `test-scenarios/TS-*.md` - -If you're still unsure: write it once in the most specific location and link from everywhere else. When content could fit in two places, it almost always belongs in the more authoritative one (spec over guide, ADR over design doc) and should be referenced from the other. - ---- - -## Part 2 — Writing Mechanics - ---- - -## Document Lifecycle Markers - -### The `[Future]` Marker - -In `docs/` user-facing guides, functionality that is not yet implemented is marked with `[Future]`: - -```markdown -## Export to PDF [Future] - -Users will be able to export publications to PDF format. -``` - -**Adding the marker:** -- Only use `[Future]` in `docs/` files — not in specs or `.claude/docs/` -- Only mark features on the active roadmap. Don't document speculative or far-future items — if you don't know when they'll ship, don't document them yet -- Write the section body in future tense: "Users will be able to..." - -**Auditing for stale markers:** -- Run `/sync-docs app` to check automatically -- When archiving any change, check whether it implements something currently marked `[Future]` in any doc -- When reading a doc and encountering a `[Future]` section, verify against current specs before assuming it's still future - -**Removing the marker — not just deletion:** -When a feature ships, don't just strip the label — do a content review: -1. Switch future tense to present tense: "will be available" → "is available" -2. Verify the description still matches what was actually built — planned and implemented are not always identical -3. Update any example steps, URLs, or screenshots -4. Check whether the companion guide (feature doc ↔ admin-guide) also has a `[Future]` section for the same feature — update both together -5. Remove any "once implemented..." caveats that assumed the feature wasn't ready - -### The `[Legacy]` Marker - -See [Outdated and Legacy Documentation](#outdated-and-legacy-documentation) for when to use `[Legacy]` and how to handle deprecated content. - ---- - -## Link Structure - -- Use **relative paths** for internal links, not absolute paths - - Good: `[spec](../../openspec/specs/publications/spec.md)` - - Bad: `/home/user/apps-extra/opencatalogi/openspec/specs/publications/spec.md` -- **Verify linked files exist** before writing the link — a broken link is worse than no link -- For section links, use the GitHub anchor format: `#section-name-lowercase-hyphenated` - ---- - -## Lists, Tables, and Structure - -### When to use a list - -**Bulleted list** — unordered items with no inherent sequence: -- Three or more items that would be awkward as a run-on sentence -- Items where order doesn't matter - -**Numbered list** — always use when sequence matters: -- Step-by-step instructions -- Ordered procedures where skipping or reordering a step would cause problems - -Avoid lists for fewer than three items — prose is usually cleaner: "Feature A and Feature B are both required" is better than a two-item bullet list. - -### When to use a table - -Use a table when each item has **two or more parallel attributes**: -- Comparing options across a consistent set of criteria -- Mapping one thing to another (status → meaning, command → effect, field → description) -- Reference material readers will scan rather than read linearly - -Don't use a table for a simple list of items with a single attribute — that's a bulleted list. - -### Ordering rows in a table - -- **Lifecycle or workflow order** — if rows represent phases, steps, or statuses (preferred for commands, status transitions, phases) -- **Most-used first** — if the table is a lookup reference readers scan frequently -- **Alphabetical** — only when there is no logical order and readers are likely to search by name -- Avoid insertion order or random order - -### Ordering list items - -- Put the most important or most common item first -- Use consistent grammatical parallelism — all items should start with the same form (all imperatives, all noun phrases, all clauses) -- For instructional lists, use the order the reader should encounter the items - ---- - -## Table of Contents - -**Add a ToC when:** -- The document has 5 or more sections and is longer than ~50 lines -- The document serves as an overview or index (any `README.md`) -- The document is a guide that readers navigate non-linearly (feature docs, admin-guide.md, commands.md) - -**Don't add a ToC when:** -- The document is short (under ~50 lines) -- The document has a single coherent top-to-bottom flow -- The document is primarily a single table or reference list - -### Keeping the ToC up to date - -- Use GitHub anchor format: `#section-name-lowercase-with-hyphens` -- When you add, rename, or remove a section heading, update the ToC in the same edit — never leave them out of sync -- Before adding a ToC link, verify the anchor matches the exact heading text (GitHub derives anchors from heading text, with spaces replaced by `-` and special characters stripped) -- Run `/sync-docs` to surface stale ToC entries automatically - ---- - -## Diagrams and Images - -### Diagrams - -Prefer diagrams over prose when the relationship between things is genuinely hard to express linearly — state transitions, multi-party flows, decision branches. Do not add a diagram just to make a doc look more thorough; a clear table or numbered list is often better. - -**Use Mermaid** for all new diagrams. Mermaid renders natively in GitHub, lives as text in the file (so it can be diffed and updated), and requires no external assets. - -✓ **Good** — inline Mermaid, lives with the doc, diffs cleanly: - -```mermaid -stateDiagram-v2 - [*] --> Pending - Pending --> Processing : task picked up - Processing --> Completed -``` - -✗ **Bad** — exported PNG of a diagram created in a separate tool: - -``` -![Feature flow](images/feature-flow.png) -``` - -→ now two things to keep in sync; the image goes stale silently - -**Mermaid diagram types and when to use them:** - -| Type | Use for | -|--------------------|----------------------------------------------------------------------| -| `flowchart` | Process flows, decision trees, "what happens when" | -| `sequenceDiagram` | Multi-party interactions (user → Nextcloud app → external service) | -| `stateDiagram-v2` | State machines — task lifecycle, status transitions | -| `erDiagram` | Data model relationships between entities | -| `gitGraph` | Branch topology (use sparingly — only if it genuinely aids understanding) | - -**Where diagrams live:** - -- Inline in the document that uses them — never in separate files -- Never copy a diagram into two documents; put it in the most authoritative location and link from the other - -**When not to use a diagram:** - -- When a table, numbered list, or short prose communicates the same thing clearly -- When the diagram would describe something that changes frequently — prose is cheaper to update than a Mermaid block -- When the audience is an end user or admin — `docs/` guides should use plain language, not technical diagrams - ---- - -### Images and Screenshots - -Use screenshots to illustrate UI steps that are genuinely hard to describe in text — for example, navigating to a specific setting buried in the admin interface. Do not screenshot things that change frequently; an outdated screenshot misleads more than it helps. - -**Where images live:** - -| Purpose | Location | Committed? | -|----------------------------------------------------------|--------------------------------------------|------------| -| Documentation screenshots for `docs/` guides | `docs/images/` | Yes — commit alongside the doc | -| Documentation screenshots for `.claude/docs/` | `.claude/docs/images/` | Yes — commit alongside the doc | -| Automated test screenshots (browser tests) | `{app}/test-results/` | **No** — gitignored | - -The `docs/images/` and `.claude/docs/images/` directories do not exist yet — create them when you add the first image. - -**The gitignore boundary:** - -Test screenshots saved to `{app}/test-results/` are gitignored. They are ephemeral test artifacts — do not use them as documentation assets. If a screenshot captured during a test run is worth keeping in documentation, copy it to the appropriate committed location: - -```bash -cp {app}/test-results/screenshots/feature-flow.png docs/images/feature-flow.png -``` - -Then reference `docs/images/feature-flow.png` in the doc, not the original path. - -**Taking screenshots with the browser agent:** - -When a screenshot would genuinely improve a doc and the app is running in Docker, use the browser agent to capture the specific screen you need. Save directly to the target `docs/images/` path — not to `test-results/` — so it is committed immediately and stays out of the gitignore. - -Use descriptive filenames based on what is shown, not sequential numbers: - -``` -✓ docs/images/admin-user-management.png -✗ docs/images/screenshot-1.png -✗ docs/images/image.png -``` - -**Referencing images in markdown:** - -Always use relative paths and write meaningful alt text: - -```markdown -![Admin user management screen](images/admin-user-management.png) -``` - -- `docs/` docs: path is relative to the doc file, so `images/filename.png` resolves to `docs/images/filename.png` -- `.claude/docs/` docs: same pattern — `images/filename.png` resolves to `.claude/docs/images/filename.png` -- Never use absolute paths (see [Link Structure](#link-structure)) - -**Keeping screenshots current:** - -- Note in the doc if a screenshot reflects a specific app version or configuration state -- When running `/sync-docs`, flag image references where the UI may have changed since the screenshot was taken -- When a UI step changes, retake the screenshot and replace the file — do not leave a stale image with a note saying "this may look different" - ---- - -## Formatting Alignment - -Correct visual alignment in tables and diagrams makes documentation easier to read, edit, and maintain. Misaligned source is a sign of a partial edit — fix it when you touch the file. - -### Markdown tables - -GFM renders tables regardless of source padding, but readable source matters for editing. When writing or updating a table: - -- **Separator rows** (`|---|---|`) must span the full width of each column — a one-character `|-|` under a wide column signals the row was added without checking alignment -- **Cell padding** should be visually consistent across rows in the same column — if most cells in a column are padded to 20 characters, an outlier cell should match -- **Pipe characters** (`|`) must be present on both ends of every row - -✓ Well-aligned: - -```markdown -| Concern | Source of truth | -|-----------------|-----------------------------| -| Requirements | `openspec/specs/` | -| Developer setup | `README.md` | -``` - -✗ Misaligned separator — won't break rendering, but signals a partial edit: - -```markdown -| Concern | Source of truth | -|--|--| -| Requirements | `openspec/specs/` | -``` - -### ASCII box diagrams - -ASCII diagrams use box-drawing characters to show lifecycle or flow order. Misalignment here is immediately visible to anyone reading the raw file. - -- **Vertical bars** (`│`) must sit in the same column on every row in the same block -- **Label/description spacing** must be consistent — if one row uses 8 spaces between a command name and its description, all rows in that block must use the same spacing -- **Borders** must be complete — `┌` and `┐` at the top, `└` and `┘` at the bottom, `─` characters filling horizontal lines without gaps - -✓ Consistent spacing: - -``` -│ 1. /opsx-new Start a new change │ -│ 2. /opsx-ff Generate all specs at once │ -│ 3. /opsx-apply Implement the tasks │ -``` - -✗ Inconsistent spacing — second row label runs into its description: - -``` -│ 1. /opsx-new Start a new change │ -│ 2. /opsx-ff Generate all specs at once │ -│ 3. /opsx-apply Implement the tasks │ -``` - -**When editing any file that contains a table or diagram:** check the entire table/diagram for alignment before saving — not just the rows you changed. A partial fix that leaves other rows misaligned is worse than leaving everything as-is. - ---- - -## Part 3 — Maintenance - ---- - -## Keeping Docs Current - -A stale doc is worse than no doc — it misleads. After any change that affects documented behavior: - -1. Update the source-of-truth file first -2. If the change affects a `docs/` guide (user-facing), update that too -3. Check for cross-references in other docs and update them if needed -4. After archiving a change, verify that the affected specs and `docs/` guides still reflect the new state -5. Run `/sync-docs` periodically to catch drift across all docs - ---- - -## Staleness Signals - -When reading or reviewing documentation, certain patterns are signals to stop and verify before trusting. Some indicate the doc has drifted from reality; others indicate it was never finished. - -| Pattern found in a doc | What to check | -|-------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------| -| `[Future]` | Whether the feature has since been implemented | -| `[Legacy]` | Whether the content can now be fully removed | -| `TODO` / `TBD` in a shipped doc | Whether it needs resolving or a proper `[Future]` marker | -| Hardcoded version numbers in prose | The relevant version source for what's actually pinned | -| File path references (`openspec/specs/publications/spec.md`, etc.) | Whether the file still exists at that path | -| Environment variable names | `.env.example` or app config to confirm still a valid variable | -| Hardcoded port or URL (`localhost:8080`) | App config to confirm current port and URL | -| Links to other docs | Whether the linked file and section still exist | -| Phase references ("In Phase 1", "POC phase") | `openspec/ROADMAP.md` to see if the phase has advanced | -| App or tool names ("OpenCatalogi", "OpenConnector") | App install scripts or `apps-extra/` to confirm still active | -| Persona names | `.claude/personas/` to confirm the persona still exists | -| Command names (`/opsx-archive`, `make reset`) | `.claude/skills/` or `Makefile` to confirm still valid | -| Table of Contents entries | Whether each linked section still exists with the same heading | -| "See [document title]" cross-references | Whether the referenced doc still has the described content | -| Screenshot references (`![alt](images/...)`) | Whether the file exists AND whether the UI has changed since the screenshot was taken | -| `(not yet created)` or `(none created yet)` in a table | Whether the file now exists and the annotation should be removed | -| Mermaid diagrams (states, flows, sequences) | Whether the underlying process, states, or parties still match reality | -| Specific UI navigation paths ("Go to Settings > Users") | Whether the menu structure still exists with those exact labels | -| Step-by-step numbered instructions in guides | Whether the step count and order still match the current UI | -| Code block examples with commands or config snippets | Whether the syntax or API contract still holds | -| Standards references ("GEMMA", "ZGW", "Forum Standaardisatie") | `docs/features/README.md` to confirm still the governing standards | -| ADR references (`adr-003-...`) | Whether the file exists at that path in `openspec/architecture/` | -| References to `openspec/changes/` proposals | Whether the change was archived and links need updating or removal | - ---- - -## Outdated and Legacy Documentation - -Docs accumulate. Some sections go stale, some get superseded by automation, and occasionally a whole file outlives its purpose. Knowing when to update, move, mark, or delete is as important as knowing how to write. - -### When to update in place - -Update a section when the underlying facts changed but the content's purpose and location are still correct: - -- A setting was renamed, a URL changed, or a step was added -- A `[Future]` marker should be removed because the feature shipped -- A link points to a file that was moved or renamed - -### When to move content - -Move content when it is in the right state but the wrong place — usually because the audience changed or the project structure evolved: - -- Technical steps that ended up in a user-facing guide → move to `README.md` or a developer doc -- A section in a user guide that only makes sense to a developer → move to `.claude/docs/` or `README.md` -- A spec requirement copy-pasted into a guide → replace with a link, remove the copy - -When moving, always replace the old location with a short link to the new one. Never just delete without redirecting. - -### When to mark as legacy - -Use a `[Legacy]` marker when content describes something that still works but should no longer be used or recommended: - -- An old setup procedure replaced by an automated script -- A manual configuration step that is now automated -- An API pattern or plugin version that has been superseded - -```markdown -## Manual CORS Configuration [Legacy] - -> This approach was used before the shared API middleware. Use the `openconnector` service instead — it handles CORS automatically. -``` - -### When to remove a section - -Remove a section outright (not just mark it) when: - -- The feature it describes was removed from scope entirely -- The content is factually wrong and there is no "old way" worth preserving -- The section is pure duplication of a source of truth elsewhere — replace with a link - -### When to remove an entire file - -Remove a whole doc file when: - -- All its content is superseded by another file or by automation -- The audience or purpose it served no longer exists in the project -- It was a transitional document (e.g. a migration guide) that is no longer relevant - -Before deleting, grep for the filename across all `.md` files to find incoming links. Update or remove them first. - -### Handling large duplicates - -When two docs describe the same thing at length, don't merge them line by line. Instead: - -1. Pick the source of truth (see the table above) -2. Keep the full content in the source-of-truth file -3. In the other file, replace the duplicate block with a one-line link: `See [X](path/to/file.md).` -4. Before removing the copy, check whether it contains any updates the source of truth is missing — merge those in first - -Run `/sync-docs` to surface large duplicates automatically. - ---- - -## Common Mistakes - -| Mistake | Fix | -|----------------------------------------------------------------|-------------------------------------------------------------------| -| Copying a spec requirement into a user guide | Link to the spec instead | -| Writing technical setup steps in a user-facing guide | Move to `README.md` or a developer doc | -| Describing the same feature in both the spec and a design doc | Keep requirements in the spec; keep design decisions in `design.md` | -| Using absolute file paths in links | Use relative paths | -| Describing API internals in user docs | Keep API details in specs and API docs | -| Marking a feature `[Future]` after it ships | Remove the marker when the feature is live | - ---- - -## Writing Anti-Patterns - -The mistakes above are structural — wrong place, wrong audience, wrong format. These are writing-style patterns that make documentation go stale faster or harder to read. - -| Anti-pattern | Why it's a problem | Fix | -|---------------------------------------------------------------------|-----------------------------------------------------------------|------------------------------------------------------------------------------| -| Using "currently", "as of now", "recently", "at the time of writing" | Becomes misleading the moment circumstances change | Write as timeless fact: "The app uses X" not "Currently, the app uses X" | -| Hardcoding version numbers in prose | Versions change; prose doesn't update automatically | Link to the relevant version source instead | -| "It should be noted that…" / "Please be aware that…" | Adds noise without adding information | State the fact directly | -| Describing what a thing *is* instead of what the reader should *do* | User guides become encyclopedias instead of task guides | Lead with the action: "Click X to do Y", not "X is the button that does Y" | -| Naming the actor vaguely ("the user", "you should") | Unclear whether "you" means end user, admin, or developer | Name the actor explicitly: "The administrator clicks…", "The citizen sees…" | -| Writing "as we discussed" or "following the recent change" | Assumes shared context the reader doesn't have | Docs must be self-contained; link to the change or ADR instead | -| Using Dutch strings without labelling them | Readers who don't speak Dutch can't tell if it's a slug, a label, or a typo | Annotate: `` `zaaktype` (Dutch term for case type) `` | -| Adding "TODO" or "TBD" in shipped documentation | Signals the doc is incomplete; confuses readers | Use `[Future]` with a specific description, or don't document it yet | -| Writing "see below" or "as mentioned above" | Breaks when the doc is restructured | Use a named section link: `[see API Conventions](#api-conventions)` | -| Doc file proliferation — creating a new file for every concern | Increases maintenance surface; readers can't find the right doc | Before creating a new file, check if the content belongs as a section in an existing one. A standalone doc is justified when it has internal navigation needs, targets a distinct audience, or is frequently referenced from multiple places. Run `/sync-docs dev` → Part C to audit doc structure periodically. | +# Writing Documentation + +How to write and maintain documentation in this project. These rules apply whenever Claude or a developer writes or updates any `.md` file — `docs/`, `.claude/docs/`, `openspec/specs/`, `openspec/ROADMAP.md`, `README.md`, or anywhere else. + +See [writing-specs.md](writing-specs.md) for the separate guide on writing OpenSpec requirements and scenarios. + +--- + +## Contents + +**Part 1 — Principles** +- [The Core Rule: Reference, Don't Duplicate](#the-core-rule-reference-dont-duplicate) +- [Sources of Truth](#sources-of-truth) +- [Audience Determines Location](#audience-determines-location) +- [Language](#language) +- [Where Does This Content Go?](#where-does-this-content-go) + +**Part 2 — Writing Mechanics** +- [Document Lifecycle Markers](#document-lifecycle-markers) +- [Link Structure](#link-structure) +- [Lists, Tables, and Structure](#lists-tables-and-structure) +- [Table of Contents](#table-of-contents) +- [Diagrams and Images](#diagrams-and-images) +- [Formatting Alignment](#formatting-alignment) + +**Part 3 — Maintenance** +- [Keeping Docs Current](#keeping-docs-current) +- [Staleness Signals](#staleness-signals) +- [Outdated and Legacy Documentation](#outdated-and-legacy-documentation) +- [Common Mistakes](#common-mistakes) +- [Writing Anti-Patterns](#writing-anti-patterns) + +--- + +## Part 1 — Principles + +--- + +## The Core Rule: Reference, Don't Duplicate + +**Every piece of information should live in exactly one place.** When another document needs to refer to it, link to the source of truth — never copy the content. + +``` +✓ Good — link to the source of truth +"See [openspec/specs/publications/spec.md](../../openspec/specs/publications/spec.md) for API endpoint requirements." + +✗ Bad — copies requirements into a guide +"The API MUST return HTTP 404 when the publication does not exist." + → this information already lives in the spec; now you have two places to keep in sync +``` + +When content is duplicated, it will eventually diverge. One copy gets updated; the other goes stale. The reader gets confused. The solution is to pick the source of truth and link from everywhere else. + +--- + +## Sources of Truth + +| Concern | Source of truth | +|-------------------------------------------------------------|--------------------------------------------------------------------------| +| **Vision & Direction** | | +| Project vision and phases | `openspec/ROADMAP.md` (if present) | +| Target audience and personas | `openspec/audience.md` (if present) | +| Architectural decisions (why) | `openspec/architecture/adr-{NNN}-*.md` | +| Architecture decisions index | `.claude/openspec/architecture/README.md` | +| Technical decisions and constraints | `openspec/architecture/` ADRs | +| **Standards & Patterns** | | +| NL Design System and UI standards | `openspec/specs/{domain}/spec.md` (app-specific) or company ADR-003 | +| API conventions and URL structure | `openspec/specs/{domain}/spec.md` (app-specific) or company ADR-002 | +| **Requirements** | | +| Feature requirements and scenarios | `openspec/specs/{domain}/spec.md` | +| **Guides & Documentation** | | +| User-facing how-to guides | `docs/` feature docs | +| App administrator procedures | `docs/admin-guide.md` (if present) | +| Developer setup and environment | `README.md` | +| Available `make` commands and scripts | workspace root `Makefile` | +| Developer workflow and commands | `.claude/docs/commands.md`, `.claude/docs/workflow.md` | +| Testing conventions and persona usage | `.claude/docs/testing.md` | +| Docker environment and setup | `.claude/docs/docker.md`, `.claude/docs/getting-started.md` | +| Frontend standards | `.claude/docs/frontend-standards.md` | +| Standards compliance references | `docs/features/README.md` (GEMMA, ZGW, Forum Standaardisatie) | +| **Testing** | | +| Persona testing behavior and scripts | `.claude/personas/` | +| Reusable test scenarios (Gherkin) | `test-scenarios/TS-*.md` | +| **Meta** | | +| Spec and doc writing conventions | `.claude/docs/writing-specs.md`, `.claude/docs/writing-docs.md` | +| OpenSpec schema and artifact templates | `.claude/openspec/schemas/conduction/schema.yaml`, `templates/` | +| Parallel agent conventions | `.claude/docs/parallel-agents.md` | +| Claude harness configuration (permissions, hooks, env vars) | `.claude/global-settings/settings.json` | +| Global Claude settings guide | `.claude/docs/global-claude-settings.md` | +| Claude usage tracking documentation | `.claude/usage-tracker/README.md` | + +--- + +## Audience Determines Location + +Each document has one target audience. Don't mix them. + +| Audience | Location | Style | +|---------------------------------------|----------------------------------------------------------|------------------------------------------------------------------------------| +| End users / citizens | `docs/` feature docs | Plain language, no jargon, task-oriented | +| App administrator | `docs/admin-guide.md` (if present) | Task-oriented, step-by-step | +| Developer (setup, environment) | `README.md` | Technical, precise | +| Claude / spec workflow | `.claude/docs/`, `.claude/skills/` | Instruction-style, precise — Claude reads this at runtime | +| Spec / requirements | `openspec/specs/` | RFC 2119, Gherkin — see [writing-specs.md](writing-specs.md) | +| Architectural decisions (why) | `openspec/architecture/` | ADR format — context, decision, consequences; written for future developers | +| Claude test agents (persona testers) | `.claude/personas/` | Persona cards — behavior, goals, device preference; loaded by test commands at runtime | +| Claude test agents (scenario execution) | `test-scenarios/` | Gherkin-style test scenarios; loaded by `/test-scenario-run` | + +**Developer/technical content does not belong in `docs/`.** If you find implementation details, class names, or spec requirements in a user-facing guide, replace them with plain-language descriptions or links to the spec. + +--- + +## Language + +**All documentation is written in English** — `docs/`, `.claude/docs/`, `openspec/`, `README.md`. + +**Filenames** also MUST be English — `user-guide.md`, not `handleiding.md`. + +**Language support for user-facing features:** Many apps in this ecosystem are Dutch-first for end users. When documenting such features, note the Dutch-first default and where English is also required. Per ADR-005, both Dutch and English MUST be supported for i18n-enabled features. + +--- + +## Where Does This Content Go? + +Use this when you're not sure which file to write new content into. These rules cover the most common cases without needing to cross-reference both tables above. + +1. **Is it _why_ a decision was made?** → `.claude/openspec/architecture/adr-{NNN}-*.md` +2. **Is it _what must be true_ (a requirement, acceptance criterion, or constraint)?** → `.claude/openspec/specs/{domain}/spec.md` (or per-project `openspec/specs/{domain}/spec.md`) +3. **Is it instructions for an _end user or citizen_ using an app?** → `docs/` feature docs for that app +4. **Is it instructions for an _app administrator_?** → `docs/admin-guide.md` (if present in that app) +5. **Is it _developer setup_ or environment instructions?** → `README.md` +6. **Is it instructions for _Claude_ at runtime (workflow, testing, commands, spec writing)?** → `.claude/docs/` +7. **Is it about _project direction, phase goals, or technical strategy_?** → `.claude/openspec/ROADMAP.md` (if present) +8. **Is it _standards compliance_ information (GEMMA, ZGW, Forum Standaardisatie)?** → `docs/features/README.md` +9. **Is it a reusable _test flow_ (Given/When/Then)?** → `test-scenarios/TS-*.md` + +If you're still unsure: write it once in the most specific location and link from everywhere else. When content could fit in two places, it almost always belongs in the more authoritative one (spec over guide, ADR over design doc) and should be referenced from the other. + +--- + +## Part 2 — Writing Mechanics + +--- + +## Document Lifecycle Markers + +### The `[Future]` Marker + +In `docs/` user-facing guides, functionality that is not yet implemented is marked with `[Future]`: + +```markdown +## Export to PDF [Future] + +Users will be able to export publications to PDF format. +``` + +**Adding the marker:** +- Only use `[Future]` in `docs/` files — not in specs or `.claude/docs/` +- Only mark features on the active roadmap. Don't document speculative or far-future items — if you don't know when they'll ship, don't document them yet +- Write the section body in future tense: "Users will be able to..." + +**Auditing for stale markers:** +- Run `/sync-docs app` to check automatically +- When archiving any change, check whether it implements something currently marked `[Future]` in any doc +- When reading a doc and encountering a `[Future]` section, verify against current specs before assuming it's still future + +**Removing the marker — not just deletion:** +When a feature ships, don't just strip the label — do a content review: +1. Switch future tense to present tense: "will be available" → "is available" +2. Verify the description still matches what was actually built — planned and implemented are not always identical +3. Update any example steps, URLs, or screenshots +4. Check whether the companion guide (feature doc ↔ admin-guide) also has a `[Future]` section for the same feature — update both together +5. Remove any "once implemented..." caveats that assumed the feature wasn't ready + +### The `[Legacy]` Marker + +See [Outdated and Legacy Documentation](#outdated-and-legacy-documentation) for when to use `[Legacy]` and how to handle deprecated content. + +--- + +## Link Structure + +- Use **relative paths** for internal links, not absolute paths + - Good: `[spec](../../openspec/specs/publications/spec.md)` + - Bad: `/home/user/apps-extra/opencatalogi/openspec/specs/publications/spec.md` +- **Verify linked files exist** before writing the link — a broken link is worse than no link +- For section links, use the GitHub anchor format: `#section-name-lowercase-hyphenated` + +--- + +## Lists, Tables, and Structure + +### When to use a list + +**Bulleted list** — unordered items with no inherent sequence: +- Three or more items that would be awkward as a run-on sentence +- Items where order doesn't matter + +**Numbered list** — always use when sequence matters: +- Step-by-step instructions +- Ordered procedures where skipping or reordering a step would cause problems + +Avoid lists for fewer than three items — prose is usually cleaner: "Feature A and Feature B are both required" is better than a two-item bullet list. + +### When to use a table + +Use a table when each item has **two or more parallel attributes**: +- Comparing options across a consistent set of criteria +- Mapping one thing to another (status → meaning, command → effect, field → description) +- Reference material readers will scan rather than read linearly + +Don't use a table for a simple list of items with a single attribute — that's a bulleted list. + +### Ordering rows in a table + +- **Lifecycle or workflow order** — if rows represent phases, steps, or statuses (preferred for commands, status transitions, phases) +- **Most-used first** — if the table is a lookup reference readers scan frequently +- **Alphabetical** — only when there is no logical order and readers are likely to search by name +- Avoid insertion order or random order + +### Ordering list items + +- Put the most important or most common item first +- Use consistent grammatical parallelism — all items should start with the same form (all imperatives, all noun phrases, all clauses) +- For instructional lists, use the order the reader should encounter the items + +--- + +## Table of Contents + +**Add a ToC when:** +- The document has 5 or more sections and is longer than ~50 lines +- The document serves as an overview or index (any `README.md`) +- The document is a guide that readers navigate non-linearly (feature docs, admin-guide.md, commands.md) + +**Don't add a ToC when:** +- The document is short (under ~50 lines) +- The document has a single coherent top-to-bottom flow +- The document is primarily a single table or reference list + +### Keeping the ToC up to date + +- Use GitHub anchor format: `#section-name-lowercase-with-hyphens` +- When you add, rename, or remove a section heading, update the ToC in the same edit — never leave them out of sync +- Before adding a ToC link, verify the anchor matches the exact heading text (GitHub derives anchors from heading text, with spaces replaced by `-` and special characters stripped) +- Run `/sync-docs` to surface stale ToC entries automatically + +--- + +## Diagrams and Images + +### Diagrams + +Prefer diagrams over prose when the relationship between things is genuinely hard to express linearly — state transitions, multi-party flows, decision branches. Do not add a diagram just to make a doc look more thorough; a clear table or numbered list is often better. + +**Use Mermaid** for all new diagrams. Mermaid renders natively in GitHub, lives as text in the file (so it can be diffed and updated), and requires no external assets. + +✓ **Good** — inline Mermaid, lives with the doc, diffs cleanly: + +```mermaid +stateDiagram-v2 + [*] --> Pending + Pending --> Processing : task picked up + Processing --> Completed +``` + +✗ **Bad** — exported PNG of a diagram created in a separate tool: + +``` +![Feature flow](images/feature-flow.png) +``` + +→ now two things to keep in sync; the image goes stale silently + +**Mermaid diagram types and when to use them:** + +| Type | Use for | +|--------------------|----------------------------------------------------------------------| +| `flowchart` | Process flows, decision trees, "what happens when" | +| `sequenceDiagram` | Multi-party interactions (user → Nextcloud app → external service) | +| `stateDiagram-v2` | State machines — task lifecycle, status transitions | +| `erDiagram` | Data model relationships between entities | +| `gitGraph` | Branch topology (use sparingly — only if it genuinely aids understanding) | + +**Where diagrams live:** + +- Inline in the document that uses them — never in separate files +- Never copy a diagram into two documents; put it in the most authoritative location and link from the other + +**When not to use a diagram:** + +- When a table, numbered list, or short prose communicates the same thing clearly +- When the diagram would describe something that changes frequently — prose is cheaper to update than a Mermaid block +- When the audience is an end user or admin — `docs/` guides should use plain language, not technical diagrams + +--- + +### Images and Screenshots + +Use screenshots to illustrate UI steps that are genuinely hard to describe in text — for example, navigating to a specific setting buried in the admin interface. Do not screenshot things that change frequently; an outdated screenshot misleads more than it helps. + +**Where images live:** + +| Purpose | Location | Committed? | +|----------------------------------------------------------|--------------------------------------------|------------| +| Documentation screenshots for `docs/` guides | `docs/images/` | Yes — commit alongside the doc | +| Documentation screenshots for `.claude/docs/` | `.claude/docs/images/` | Yes — commit alongside the doc | +| Automated test screenshots (browser tests) | `{app}/test-results/` | **No** — gitignored | + +The `docs/images/` and `.claude/docs/images/` directories do not exist yet — create them when you add the first image. + +**The gitignore boundary:** + +Test screenshots saved to `{app}/test-results/` are gitignored. They are ephemeral test artifacts — do not use them as documentation assets. If a screenshot captured during a test run is worth keeping in documentation, copy it to the appropriate committed location: + +```bash +cp {app}/test-results/screenshots/feature-flow.png docs/images/feature-flow.png +``` + +Then reference `docs/images/feature-flow.png` in the doc, not the original path. + +**Taking screenshots with the browser agent:** + +When a screenshot would genuinely improve a doc and the app is running in Docker, use the browser agent to capture the specific screen you need. Save directly to the target `docs/images/` path — not to `test-results/` — so it is committed immediately and stays out of the gitignore. + +Use descriptive filenames based on what is shown, not sequential numbers: + +``` +✓ docs/images/admin-user-management.png +✗ docs/images/screenshot-1.png +✗ docs/images/image.png +``` + +**Referencing images in markdown:** + +Always use relative paths and write meaningful alt text: + +```markdown +![Admin user management screen](images/admin-user-management.png) +``` + +- `docs/` docs: path is relative to the doc file, so `images/filename.png` resolves to `docs/images/filename.png` +- `.claude/docs/` docs: same pattern — `images/filename.png` resolves to `.claude/docs/images/filename.png` +- Never use absolute paths (see [Link Structure](#link-structure)) + +**Keeping screenshots current:** + +- Note in the doc if a screenshot reflects a specific app version or configuration state +- When running `/sync-docs`, flag image references where the UI may have changed since the screenshot was taken +- When a UI step changes, retake the screenshot and replace the file — do not leave a stale image with a note saying "this may look different" + +--- + +## Formatting Alignment + +Correct visual alignment in tables and diagrams makes documentation easier to read, edit, and maintain. Misaligned source is a sign of a partial edit — fix it when you touch the file. + +### Markdown tables + +GFM renders tables regardless of source padding, but readable source matters for editing. When writing or updating a table: + +- **Separator rows** (`|---|---|`) must span the full width of each column — a one-character `|-|` under a wide column signals the row was added without checking alignment +- **Cell padding** should be visually consistent across rows in the same column — if most cells in a column are padded to 20 characters, an outlier cell should match +- **Pipe characters** (`|`) must be present on both ends of every row + +✓ Well-aligned: + +```markdown +| Concern | Source of truth | +|-----------------|-----------------------------| +| Requirements | `openspec/specs/` | +| Developer setup | `README.md` | +``` + +✗ Misaligned separator — won't break rendering, but signals a partial edit: + +```markdown +| Concern | Source of truth | +|--|--| +| Requirements | `openspec/specs/` | +``` + +### ASCII box diagrams + +ASCII diagrams use box-drawing characters to show lifecycle or flow order. Misalignment here is immediately visible to anyone reading the raw file. + +- **Vertical bars** (`│`) must sit in the same column on every row in the same block +- **Label/description spacing** must be consistent — if one row uses 8 spaces between a command name and its description, all rows in that block must use the same spacing +- **Borders** must be complete — `┌` and `┐` at the top, `└` and `┘` at the bottom, `─` characters filling horizontal lines without gaps + +✓ Consistent spacing: + +``` +│ 1. /opsx-new Start a new change │ +│ 2. /opsx-ff Generate all specs at once │ +│ 3. /opsx-apply Implement the tasks │ +``` + +✗ Inconsistent spacing — second row label runs into its description: + +``` +│ 1. /opsx-new Start a new change │ +│ 2. /opsx-ff Generate all specs at once │ +│ 3. /opsx-apply Implement the tasks │ +``` + +**When editing any file that contains a table or diagram:** check the entire table/diagram for alignment before saving — not just the rows you changed. A partial fix that leaves other rows misaligned is worse than leaving everything as-is. + +--- + +## Part 3 — Maintenance + +--- + +## Keeping Docs Current + +A stale doc is worse than no doc — it misleads. After any change that affects documented behavior: + +1. Update the source-of-truth file first +2. If the change affects a `docs/` guide (user-facing), update that too +3. Check for cross-references in other docs and update them if needed +4. After archiving a change, verify that the affected specs and `docs/` guides still reflect the new state +5. Run `/sync-docs` periodically to catch drift across all docs + +--- + +## Staleness Signals + +When reading or reviewing documentation, certain patterns are signals to stop and verify before trusting. Some indicate the doc has drifted from reality; others indicate it was never finished. + +| Pattern found in a doc | What to check | +|-------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------| +| `[Future]` | Whether the feature has since been implemented | +| `[Legacy]` | Whether the content can now be fully removed | +| `TODO` / `TBD` in a shipped doc | Whether it needs resolving or a proper `[Future]` marker | +| Hardcoded version numbers in prose | The relevant version source for what's actually pinned | +| File path references (`openspec/specs/publications/spec.md`, etc.) | Whether the file still exists at that path | +| Environment variable names | `.env.example` or app config to confirm still a valid variable | +| Hardcoded port or URL (`localhost:8080`) | App config to confirm current port and URL | +| Links to other docs | Whether the linked file and section still exist | +| Phase references ("In Phase 1", "POC phase") | `openspec/ROADMAP.md` to see if the phase has advanced | +| App or tool names ("OpenCatalogi", "OpenConnector") | App install scripts or `apps-extra/` to confirm still active | +| Persona names | `.claude/personas/` to confirm the persona still exists | +| Command names (`/opsx-archive`, `make reset`) | `.claude/skills/` or `Makefile` to confirm still valid | +| Table of Contents entries | Whether each linked section still exists with the same heading | +| "See [document title]" cross-references | Whether the referenced doc still has the described content | +| Screenshot references (`![alt](images/...)`) | Whether the file exists AND whether the UI has changed since the screenshot was taken | +| `(not yet created)` or `(none created yet)` in a table | Whether the file now exists and the annotation should be removed | +| Mermaid diagrams (states, flows, sequences) | Whether the underlying process, states, or parties still match reality | +| Specific UI navigation paths ("Go to Settings > Users") | Whether the menu structure still exists with those exact labels | +| Step-by-step numbered instructions in guides | Whether the step count and order still match the current UI | +| Code block examples with commands or config snippets | Whether the syntax or API contract still holds | +| Standards references ("GEMMA", "ZGW", "Forum Standaardisatie") | `docs/features/README.md` to confirm still the governing standards | +| ADR references (`adr-003-...`) | Whether the file exists at that path in `openspec/architecture/` | +| References to `openspec/changes/` proposals | Whether the change was archived and links need updating or removal | + +--- + +## Outdated and Legacy Documentation + +Docs accumulate. Some sections go stale, some get superseded by automation, and occasionally a whole file outlives its purpose. Knowing when to update, move, mark, or delete is as important as knowing how to write. + +### When to update in place + +Update a section when the underlying facts changed but the content's purpose and location are still correct: + +- A setting was renamed, a URL changed, or a step was added +- A `[Future]` marker should be removed because the feature shipped +- A link points to a file that was moved or renamed + +### When to move content + +Move content when it is in the right state but the wrong place — usually because the audience changed or the project structure evolved: + +- Technical steps that ended up in a user-facing guide → move to `README.md` or a developer doc +- A section in a user guide that only makes sense to a developer → move to `.claude/docs/` or `README.md` +- A spec requirement copy-pasted into a guide → replace with a link, remove the copy + +When moving, always replace the old location with a short link to the new one. Never just delete without redirecting. + +### When to mark as legacy + +Use a `[Legacy]` marker when content describes something that still works but should no longer be used or recommended: + +- An old setup procedure replaced by an automated script +- A manual configuration step that is now automated +- An API pattern or plugin version that has been superseded + +```markdown +## Manual CORS Configuration [Legacy] + +> This approach was used before the shared API middleware. Use the `openconnector` service instead — it handles CORS automatically. +``` + +### When to remove a section + +Remove a section outright (not just mark it) when: + +- The feature it describes was removed from scope entirely +- The content is factually wrong and there is no "old way" worth preserving +- The section is pure duplication of a source of truth elsewhere — replace with a link + +### When to remove an entire file + +Remove a whole doc file when: + +- All its content is superseded by another file or by automation +- The audience or purpose it served no longer exists in the project +- It was a transitional document (e.g. a migration guide) that is no longer relevant + +Before deleting, grep for the filename across all `.md` files to find incoming links. Update or remove them first. + +### Handling large duplicates + +When two docs describe the same thing at length, don't merge them line by line. Instead: + +1. Pick the source of truth (see the table above) +2. Keep the full content in the source-of-truth file +3. In the other file, replace the duplicate block with a one-line link: `See [X](path/to/file.md).` +4. Before removing the copy, check whether it contains any updates the source of truth is missing — merge those in first + +Run `/sync-docs` to surface large duplicates automatically. + +--- + +## Common Mistakes + +| Mistake | Fix | +|----------------------------------------------------------------|-------------------------------------------------------------------| +| Copying a spec requirement into a user guide | Link to the spec instead | +| Writing technical setup steps in a user-facing guide | Move to `README.md` or a developer doc | +| Describing the same feature in both the spec and a design doc | Keep requirements in the spec; keep design decisions in `design.md` | +| Using absolute file paths in links | Use relative paths | +| Describing API internals in user docs | Keep API details in specs and API docs | +| Marking a feature `[Future]` after it ships | Remove the marker when the feature is live | + +--- + +## Writing Anti-Patterns + +The mistakes above are structural — wrong place, wrong audience, wrong format. These are writing-style patterns that make documentation go stale faster or harder to read. + +| Anti-pattern | Why it's a problem | Fix | +|---------------------------------------------------------------------|-----------------------------------------------------------------|------------------------------------------------------------------------------| +| Using "currently", "as of now", "recently", "at the time of writing" | Becomes misleading the moment circumstances change | Write as timeless fact: "The app uses X" not "Currently, the app uses X" | +| Hardcoding version numbers in prose | Versions change; prose doesn't update automatically | Link to the relevant version source instead | +| "It should be noted that…" / "Please be aware that…" | Adds noise without adding information | State the fact directly | +| Describing what a thing *is* instead of what the reader should *do* | User guides become encyclopedias instead of task guides | Lead with the action: "Click X to do Y", not "X is the button that does Y" | +| Naming the actor vaguely ("the user", "you should") | Unclear whether "you" means end user, admin, or developer | Name the actor explicitly: "The administrator clicks…", "The citizen sees…" | +| Writing "as we discussed" or "following the recent change" | Assumes shared context the reader doesn't have | Docs must be self-contained; link to the change or ADR instead | +| Using Dutch strings without labelling them | Readers who don't speak Dutch can't tell if it's a slug, a label, or a typo | Annotate: `` `zaaktype` (Dutch term for case type) `` | +| Adding "TODO" or "TBD" in shipped documentation | Signals the doc is incomplete; confuses readers | Use `[Future]` with a specific description, or don't document it yet | +| Writing "see below" or "as mentioned above" | Breaks when the doc is restructured | Use a named section link: `[see API Conventions](#api-conventions)` | +| Doc file proliferation — creating a new file for every concern | Increases maintenance surface; readers can't find the right doc | Before creating a new file, check if the content belongs as a section in an existing one. A standalone doc is justified when it has internal navigation needs, targets a distinct audience, or is frequently referenced from multiple places. Run `/sync-docs dev` → Part C to audit doc structure periodically. | diff --git a/docs/claude/writing-skills.md b/docs/claude/writing-skills.md index 3babaad..2cbd6ef 100644 --- a/docs/claude/writing-skills.md +++ b/docs/claude/writing-skills.md @@ -80,10 +80,12 @@ The skill is built on recognized patterns, community best practices, or existing **Criteria (in addition to L2):** - Built on a **recognized pattern**: Anthropic official patterns, validated community skill, or your own proven pattern library -- Has `examples/` showing expected output format (few-shot guidance) -- Uses **common patterns** consistently (model guard, AskUserQuestion, destructive action confirmation — see [Common Patterns](#common-patterns) below) +- Has **at least one supporting subfolder**: `examples/` (output format demos), `references/` (standards docs), or `templates/` (fillable scaffolds) +- Uses **at least one common pattern** consistently (model guard, AskUserQuestion, destructive action confirmation, quality gates — see [Common Patterns](#common-patterns) below) - References **standards documents** where applicable (in `references/`) +> **What the script auto-detects for L3:** at least one common pattern keyword present in SKILL.md (model guard, AskUserQuestion, quality gates, or subfolder references) AND existence of at least one of `examples/`, `references/`, or `templates/`. These are structural proxies for the full criteria above. + **Sources for proven patterns:** - Anthropic's official `/skill-creator` bundled plugin ([GitHub](https://github.com/anthropics/skills/blob/main/skills/skill-creator/SKILL.md)) - Agent Skills Open Standard: [agentskills.io/specification](https://agentskills.io/specification) @@ -131,18 +133,24 @@ The skill has been systematically tested with evaluation scenarios. Its performa - Input prompt (what the user would say) - Expected output characteristics (what good output looks like) - Assertion criteria (how to grade pass/fail) +- **Description trigger testing**: 10+ `should_trigger` + 10+ `should_not_trigger` prompts in `trigger_tests` +- **Evals have been run**: `last_validated` is set to a date in `evals.json` - **Baseline measurement** exists: what does Claude produce on these scenarios WITHOUT the skill? -- **Description trigger testing**: 10 should-trigger + 10 should-NOT-trigger prompts, with measured precision - Skill has been through at least **one improve cycle** based on eval results -- Optional: `evals/` folder with `evals.json`, `timing.json`, `grading.json` +- `evals/` folder with `evals.json`; `timing.json` and `grading.json` produced after running evals + +> **What the script auto-detects for L5:** 3+ scenarios, 10+/10+ trigger tests, and `last_validated` non-null in evals.json. Baseline measurement and improve cycles are required for true L5 but not auto-checked by the script. **How to evaluate a skill:** -``` -# 1. Run Claude on tasks WITHOUT the skill — document failures -# 2. Create eval scenarios testing those gaps: -evals/evals.json: +**`evals/evals.json` format:** + +```json { + "skill": "create-pr", + "version": "1.0.0", + "created": "2025-01-15", + "last_validated": null, "scenarios": [ { "prompt": "Create a PR for the openregister feature branch", @@ -153,14 +161,70 @@ evals/evals.json: "includes ## Summary and ## Test plan sections" ] } - ] + ], + "trigger_tests": { + "should_trigger": [ + "Create a pull request for the feature branch", + "Open a PR from development to main", + "Make a PR for my changes", + "Submit this branch for review via PR", + "Create a GitHub pull request", + "PR this to development", + "Open pull request for openregister branch", + "Make a pull request for my new feature", + "Create PR targeting the main branch", + "Submit a pull request with these changes" + ], + "should_not_trigger": [ + "Can you review this code?", + "What is the difference between git merge and rebase?", + "How do I resolve a merge conflict?", + "Show me the git log", + "Commit my changes", + "Push to the remote branch", + "What branches are available?", + "Help me write a commit message", + "Show the diff for my changes", + "Explain what a pull request is" + ] + } } -# 3. Run skill on eval scenarios, grade each assertion -# 4. Compare with-skill vs baseline scores -# 5. Identify weak assertions, improve skill instructions, re-evaluate ``` -> **Reference:** Anthropic's official Skill Creator has 4 modes: **Create, Eval, Improve, Benchmark**. Under the hood it uses: Executor (runs skills), Grader (scores outputs), Comparator (blind A/B tests), Analyzer (suggests improvements). See [Anthropic Skill Creator](https://github.com/anthropics/skills/blob/main/skills/skill-creator/SKILL.md). +After running evals, update `last_validated` with the run date to unlock L5 green circle status. + +**Using the Anthropic Skill Creator to run evals:** + +The [Anthropic Skill Creator](https://github.com/anthropics/skills/blob/main/skills/skill-creator/SKILL.md) automates running, grading, and improving evals as a Claude Code skill. + +**Step-by-step:** + +1. **Install** (one-time): add the Skill Creator to your `.claude/skills/` folder. + +2. **Invoke**: In a Claude Code session, ask Claude to evaluate the skill: + > "Run evals on the test-app skill" or "Use the skill creator to evaluate and improve my X skill" + + Claude picks up the skill-creator and guides the process. The skill-creator's `evals/evals.json` format uses `evals[]` with `id`, `prompt`, `expected_output`, and `expectations` — note this differs from our custom tracking format (`scenarios`, `trigger_tests`, `last_validated`). The Skill Creator will create or adapt evals as needed. + +3. **What happens**: Two parallel subagents run each eval: + - **With-skill agent**: runs the scenario with the skill active + - **Baseline agent**: runs the same scenario without the skill + Results are saved to `-workspace/iteration-N/eval-N/` alongside the skills directory. + +4. **Review results**: The Skill Creator runs `eval-viewer/generate_review.py` and opens a browser tab with two tabs: **Outputs** (click through each eval, leave qualitative feedback) and **Benchmark** (pass rates, timing, tokens with-skill vs baseline). + +5. **Output files** written to `-workspace/iteration-N/eval-N/`: + - `grading.json` — assertion pass/fail with evidence per expectation + - `timing.json` — token count and duration + - `benchmark.json` — aggregate stats across all evals + +6. **Update `last_validated`** in our `evals.json` after a successful run: + ```json + "last_validated": "2026-04-10" + ``` + This unlocks the L5 green circle in the skill overview dashboard (our tracking format, separate from the Skill Creator workspace). + +7. **Improve cycle**: The Skill Creator's analyzer flags non-discriminating assertions, flaky evals, and skill improvement suggestions. Update `SKILL.md` and re-run as iteration-2. --- @@ -525,7 +589,7 @@ Non-markdown static files that get copied as-is to the user's project or used by Evaluation scenarios and benchmark results for measured skills. **Use for:** -- `evals.json` — test scenarios with prompts, expected outputs, and assertion criteria +- `evals.json` — test scenarios, `trigger_tests` (should/should-not-trigger examples), and `last_validated` (date of last eval run; required for L5 green circle) - `timing.json` — token usage and duration per eval run - `grading.json` — assertion pass/fail results with evidence diff --git a/docs/claude/writing-specs.md b/docs/claude/writing-specs.md index ca5e81b..77fe5a3 100644 --- a/docs/claude/writing-specs.md +++ b/docs/claude/writing-specs.md @@ -1,394 +1,394 @@ -# Writing Specs - -How to write effective specifications that produce good code. Specs are the foundation of the entire workflow — bad specs lead to bad code, no matter how good the AI is. - -## Spec Structure - -Every spec file at `openspec/specs/{domain}/spec.md` follows this structure: - -```markdown -# Specification - -**Status**: idea | planned | in-progress | done -**Scope**: company-wide | {app-name} -**OpenSpec changes**: -- [change-name](../../changes/change-name/) -- [archived-change](../../changes/archive/YYYY-MM-DD-archived-change/) _(archived YYYY-MM-DD)_ - -## Purpose - - -## Requirements - -### REQ-{AREA}-{NNN}: - - -#### Scenario: -- GIVEN -- WHEN -- THEN -- AND - -## Non-Functional Requirements - -- **Performance:** -- **Accessibility:** -- **Internationalization:** Dutch and English MUST be supported (ADR-005) - -## Acceptance Criteria - -- [ ] - -## Notes - - -``` - -### Field reference - -| Field | Required | Notes | -|-------|----------|-------| -| `**Status**` | Yes | `idea` → `planned` → `in-progress` → `done` | -| `**Scope**` | Yes | `company-wide` (in `.claude/openspec/specs/`) or app name (in `{app}/openspec/specs/`) | -| `**OpenSpec changes**` | Yes | Vertical list, one entry per line, oldest first. `_(none yet)_` until first change created. Archived entries include `_(archived YYYY-MM-DD)_`. See [Grouping rule](#openspec-changes-list-format) below. | -| `## Non-Functional Requirements` | Yes | Always present, even if minimal | -| `## Acceptance Criteria` | Yes | Placeholder OK for `idea` status; fill in before moving to `planned` | -| `## Notes` | Yes | Always present | - -### Status lifecycle - -``` -idea ──► planned ──► in-progress ──► done - │ │ │ - │ ready for /opsx-ff │ new change created - │ ▼ -still fuzzy, fill in in-progress (again) -Acceptance Criteria first -``` - -| Status | Meaning | -|--------|---------| -| `idea` | Concept noted — Purpose defined, Requirements fuzzy | -| `planned` | Acceptance criteria fully defined — **ready for `/opsx-ff`** | -| `in-progress` | One or more OpenSpec changes have been created from this spec | -| `done` | All associated OpenSpec changes have been archived | - -**Re-opening a done spec:** If a new change is created that modifies a `done` spec, set the status back to `in-progress`. The `**OpenSpec changes**` list preserves the full history (archived entries stay visible). - -### OpenSpec changes list format - -List entries oldest-first (top = oldest, bottom = newest). One entry per line: - -``` -**OpenSpec changes**: -- [change-name](../../changes/change-name/) -- [archived-change](../../changes/archive/YYYY-MM-DD-name/) _(archived YYYY-MM-DD)_ -``` - -**When the list exceeds 15 entries**, group multiple changes per bullet by timeframe (same day → same month → same year). Oldest group first. **Never remove entries.** - -``` -**OpenSpec changes**: -- [change-a](link/), [change-b](link/) _(Jan 2026)_ -- [change-c](link/), [change-d](link/) _(Mar 2026)_ -- [newest-change](link/) _(Apr 2026)_ -``` - -Group at the coarsest level that keeps the list under 15 bullets while preserving order. Start by grouping same-day entries, then same-month, then same-year if still too long. - -## RFC 2119 Keywords - -Use these keywords deliberately to communicate the importance of each requirement: - -| Keyword | Meaning | Use when | -|---------|---------|----------| -| **MUST** / **SHALL** | Absolute requirement. Non-negotiable. | The feature won't work correctly without this | -| **MUST NOT** / **SHALL NOT** | Absolute prohibition | Doing this would break something or violate a constraint | -| **SHOULD** | Recommended, but exceptions may exist | Best practice that can be skipped with justification | -| **SHOULD NOT** | Discouraged, but exceptions may exist | Not ideal but acceptable in some cases | -| **MAY** | Optional | Nice to have, up to implementer | - -### Examples - -```markdown -# Good — clear intention -The API endpoint MUST return HTTP 404 when the resource does not exist. -The response SHOULD include a human-readable error message. -The response MAY include a machine-readable error code. - -# Bad — vague, no keywords -The API should handle errors properly. -``` - -**Rule of thumb:** Prefer MUST/SHALL for normative requirements — if behavior is genuinely required, say so. Use SHOULD when real exceptions are acceptable. Reserve MAY for truly optional behavior; if it can be expressed as MUST or SHOULD, prefer that instead. - -## Writing Scenarios - -Scenarios use the Gherkin format (GIVEN/WHEN/THEN) to describe specific behaviors. They serve as both documentation and acceptance criteria for implementation. - -### Good Scenarios - -```markdown -#### Scenario: Successful login with valid credentials -- GIVEN a user with email "test@example.com" and a valid password -- WHEN they submit the login form -- THEN the system MUST return a JWT token -- AND the user MUST be redirected to the dashboard -- AND the session MUST be stored in the database - -#### Scenario: Login fails with invalid password -- GIVEN a user with email "test@example.com" -- WHEN they submit the login form with an incorrect password -- THEN the system MUST return HTTP 401 -- AND the response body MUST contain `{"error": "Invalid credentials"}` -- AND the failed attempt MUST be logged -``` - -### Bad Scenarios - -```markdown -# Too vague -#### Scenario: Login works -- GIVEN a user -- WHEN they log in -- THEN it works - -# Too implementation-specific -#### Scenario: Login -- GIVEN a POST to /api/v1/auth/login with body {"email":"x","pass":"y"} -- WHEN AuthController::login() calls UserService::authenticate() -- THEN it calls $mapper->findByEmail() and JWTService::generate() -``` - -### Tips for Good Scenarios - -1. **Cover the happy path first**, then error cases, then edge cases -2. **Be specific about inputs and outputs** — what data, what status codes, what format -3. **Focus on behavior, not implementation** — describe what happens, not which classes/methods do it -4. **One scenario, one behavior** — don't combine multiple behaviors in one scenario -5. **Include negative scenarios** — what happens when things go wrong? - -## Delta Specs - -When making changes to existing functionality, use delta specs to show what's changing. - -### ADDED - -New requirements that didn't exist before: - -```markdown -## ADDED Requirements - -### Requirement: Full-Text Search -The system MUST support full-text search across publication titles and content bodies using PostgreSQL's tsvector. - -#### Scenario: Search returns matching publications -- GIVEN publications with titles "Climate Report 2024" and "Budget Overview" -- WHEN a user searches for "climate" -- THEN the results MUST include "Climate Report 2024" -- AND the results MUST NOT include "Budget Overview" -- AND results MUST be ordered by relevance score -``` - -### MODIFIED - -Changes to existing requirements. Always note what the previous behavior was: - -```markdown -## MODIFIED Requirements - -### Requirement: Session Duration -The system MUST expire user sessions after 15 minutes of inactivity. - -(Previously: sessions expired after 30 minutes of inactivity) - -#### Scenario: Session expires -- GIVEN a user who has been inactive for 16 minutes -- WHEN they make a request -- THEN the system MUST return HTTP 401 -- AND the session MUST be cleared from the database -``` - -### REMOVED - -Requirements being deprecated. Always explain why: - -```markdown -## REMOVED Requirements - -### Requirement: Remember Me Checkbox -(Deprecated: replaced by automatic session refresh on activity. Removing the checkbox simplifies the login form and improves security by eliminating long-lived sessions.) -``` - -### RENAMED - -Requirements whose name is changing but whose behavior is unchanged. Always use FROM:/TO: format so reviewers can track the rename: - -```markdown -## RENAMED Requirements - -### Requirement: Old Requirement Name -FROM: Old Requirement Name -TO: New Requirement Name - -``` - -## Referencing Shared Specs - -When your requirement relates to a cross-project convention, reference the shared spec: - -```markdown -### Requirement: Publication API Endpoint -The system MUST provide a REST endpoint at `/index.php/apps/opencatalogi/api/publications`. - -See shared spec: `api-patterns/spec.md#requirement-url-structure` for URL conventions. -See shared spec: `api-patterns/spec.md#requirement-cors-support` for CORS requirements. -``` - -Shared specs live in `.claude/openspec/specs/` (company-wide, maintained by Conduction). Check that directory for currently available shared specs — the list evolves as new cross-app specs are added. Company-wide architectural decisions (NL Design System, API conventions, security, i18n) are captured in ADRs under `.claude/openspec/architecture/`. - -## Organizing Specs - -### By domain capability - -``` -openspec/specs/ -├── auth/spec.md # Authentication & sessions -├── publications/spec.md # Publication CRUD -├── search/spec.md # Search functionality -├── export/spec.md # Data export features -└── notifications/spec.md # User notifications -``` - -### Tips - -- **One capability per spec file** — don't mix unrelated concerns -- **Name directories for the domain concept**, not the implementation (`search/`, not `search-controller/`) -- **Keep specs focused** — if a spec file grows past ~100 requirements, split it -- **Update specs when behavior changes** — specs must always reflect the current system behavior - -## Common Mistakes - -### 1. Writing specs after code - -Specs written after implementation just document what exists. They don't help you think through requirements or catch issues early. **Write specs first.** - -### 2. Being too vague - -```markdown -# Bad -The system should handle errors. - -# Good -The system MUST return HTTP 400 with a JSON body containing an `error` field -when the request body fails validation. -``` - -### 3. Being too implementation-specific - -```markdown -# Bad — tied to specific classes -The AuthController MUST call UserMapper::findByEmail(). - -# Good — describes behavior -The system MUST look up users by email address during authentication. -``` - -### 4. Missing error scenarios - -Always consider: what happens when the input is invalid? When the resource doesn't exist? When the user isn't authorized? When an external service is down? - -### 5. Using MUST for everything - -If everything is MUST, nothing is distinguishable. Reserve MUST for true requirements and use SHOULD/MAY for less critical behaviors. - -### 6. Writing untestable requirements - -```markdown -# Bad — how do you verify this? -The system MUST be fast. - -# Good — measurable -The search endpoint MUST respond within 500ms for queries returning fewer than 100 results. -``` - -## Task Breakdown - -When writing `tasks.md`, each task should: - -1. **Be completable in one focused iteration** (15-30 minutes) -2. **Have a clear `spec_ref`** pointing to the specific requirement -3. **List `files`** to scope the work -4. **Include `acceptance_criteria`** extracted from spec scenarios -5. **Be ordered by dependency** — foundations first, features second, polish third - -### Mandatory deliverables per feature - -Every feature implemented from a spec MUST include all three layers: - -1. **Backend logic** — service/controller code that implements the requirement -2. **UI** — a user-facing interface so the feature is actually usable (Vue component, page, dialog, form, etc.) -3. **Tests** — covering both backend and frontend: - - **Unit tests** (PHPUnit) for services, mappers, and business logic - - **Newman/Postman tests** for API endpoints (add to the app's Postman collection) - - **Browser tests** (Playwright MCP) for the UI — verify the feature works end-to-end through the browser - -After implementing each task, the agent MUST run the relevant tests to confirm everything works: -- `composer test` or `vendor/bin/phpunit` for unit tests -- `newman run` for API tests -- Browser MCP snapshot/interaction for UI verification - -A task is NOT complete until its tests pass. - -### Good task breakdown - -```markdown -### Task 1: Create SearchService with basic query method -- **spec_ref**: `openspec/specs/search/spec.md#requirement-full-text-search` -- **files**: `lib/Service/SearchService.php` -- **acceptance_criteria**: - - GIVEN a search query WHEN SearchService::search("test") is called THEN it returns matching objects -- [ ] Implement service logic -- [ ] Write unit test (`tests/Unit/Service/SearchServiceTest.php`) -- [ ] Run unit tests — confirm passing - -### Task 2: Create SearchController with GET endpoint -- **spec_ref**: `openspec/specs/search/spec.md#requirement-search-api-endpoint` -- **files**: `lib/Controller/SearchController.php`, `appinfo/routes.php` -- **acceptance_criteria**: - - GIVEN a GET request to /api/search?q=test THEN returns JSON array of results -- [ ] Implement controller and route -- [ ] Add Newman/Postman test to collection -- [ ] Run Newman tests — confirm passing - -### Task 3: Add search UI page -- **spec_ref**: `openspec/specs/search/spec.md#requirement-search-ui` -- **files**: `src/views/SearchView.vue`, `src/router/index.js` -- **acceptance_criteria**: - - GIVEN a user navigating to the search page WHEN they enter a query THEN results are displayed -- [ ] Implement Vue component -- [ ] Build frontend (`npm run build`) -- [ ] Browser test — navigate to page, enter query, verify results appear - -### Task 4: Add pagination to search results -- **spec_ref**: `openspec/specs/search/spec.md#requirement-search-pagination` -- **files**: `lib/Service/SearchService.php`, `lib/Controller/SearchController.php`, `src/views/SearchView.vue` -- **acceptance_criteria**: - - GIVEN 50 results WHEN requesting page=2&limit=10 THEN returns results 11-20 with total count -- [ ] Implement backend pagination -- [ ] Update UI with pagination controls -- [ ] Add Newman test for pagination params -- [ ] Run all tests — confirm passing -``` - -### Bad task breakdown - -```markdown -### Task 1: Implement search -- [ ] Do everything - -### Task 2: (also bad) Backend only, no UI or tests -- [ ] Add SearchService -- [ ] Add SearchController -# Missing: no UI (users can't use it), no tests (nothing verified) -``` +# Writing Specs + +How to write effective specifications that produce good code. Specs are the foundation of the entire workflow — bad specs lead to bad code, no matter how good the AI is. + +## Spec Structure + +Every spec file at `openspec/specs/{domain}/spec.md` follows this structure: + +```markdown +# Specification + +**Status**: idea | planned | in-progress | done +**Scope**: company-wide | {app-name} +**OpenSpec changes**: +- [change-name](../../changes/change-name/) +- [archived-change](../../changes/archive/YYYY-MM-DD-archived-change/) _(archived YYYY-MM-DD)_ + +## Purpose + + +## Requirements + +### REQ-{AREA}-{NNN}: + + +#### Scenario: +- GIVEN +- WHEN +- THEN +- AND + +## Non-Functional Requirements + +- **Performance:** +- **Accessibility:** +- **Internationalization:** Dutch and English MUST be supported (ADR-005) + +## Acceptance Criteria + +- [ ] + +## Notes + + +``` + +### Field reference + +| Field | Required | Notes | +|-------|----------|-------| +| `**Status**` | Yes | `idea` → `planned` → `in-progress` → `done` | +| `**Scope**` | Yes | `company-wide` (in `.claude/openspec/specs/`) or app name (in `{app}/openspec/specs/`) | +| `**OpenSpec changes**` | Yes | Vertical list, one entry per line, oldest first. `_(none yet)_` until first change created. Archived entries include `_(archived YYYY-MM-DD)_`. See [Grouping rule](#openspec-changes-list-format) below. | +| `## Non-Functional Requirements` | Yes | Always present, even if minimal | +| `## Acceptance Criteria` | Yes | Placeholder OK for `idea` status; fill in before moving to `planned` | +| `## Notes` | Yes | Always present | + +### Status lifecycle + +``` +idea ──► planned ──► in-progress ──► done + │ │ │ + │ ready for /opsx-ff │ new change created + │ ▼ +still fuzzy, fill in in-progress (again) +Acceptance Criteria first +``` + +| Status | Meaning | +|--------|---------| +| `idea` | Concept noted — Purpose defined, Requirements fuzzy | +| `planned` | Acceptance criteria fully defined — **ready for `/opsx-ff`** | +| `in-progress` | One or more OpenSpec changes have been created from this spec | +| `done` | All associated OpenSpec changes have been archived | + +**Re-opening a done spec:** If a new change is created that modifies a `done` spec, set the status back to `in-progress`. The `**OpenSpec changes**` list preserves the full history (archived entries stay visible). + +### OpenSpec changes list format + +List entries oldest-first (top = oldest, bottom = newest). One entry per line: + +``` +**OpenSpec changes**: +- [change-name](../../changes/change-name/) +- [archived-change](../../changes/archive/YYYY-MM-DD-name/) _(archived YYYY-MM-DD)_ +``` + +**When the list exceeds 15 entries**, group multiple changes per bullet by timeframe (same day → same month → same year). Oldest group first. **Never remove entries.** + +``` +**OpenSpec changes**: +- [change-a](link/), [change-b](link/) _(Jan 2026)_ +- [change-c](link/), [change-d](link/) _(Mar 2026)_ +- [newest-change](link/) _(Apr 2026)_ +``` + +Group at the coarsest level that keeps the list under 15 bullets while preserving order. Start by grouping same-day entries, then same-month, then same-year if still too long. + +## RFC 2119 Keywords + +Use these keywords deliberately to communicate the importance of each requirement: + +| Keyword | Meaning | Use when | +|---------|---------|----------| +| **MUST** / **SHALL** | Absolute requirement. Non-negotiable. | The feature won't work correctly without this | +| **MUST NOT** / **SHALL NOT** | Absolute prohibition | Doing this would break something or violate a constraint | +| **SHOULD** | Recommended, but exceptions may exist | Best practice that can be skipped with justification | +| **SHOULD NOT** | Discouraged, but exceptions may exist | Not ideal but acceptable in some cases | +| **MAY** | Optional | Nice to have, up to implementer | + +### Examples + +```markdown +# Good — clear intention +The API endpoint MUST return HTTP 404 when the resource does not exist. +The response SHOULD include a human-readable error message. +The response MAY include a machine-readable error code. + +# Bad — vague, no keywords +The API should handle errors properly. +``` + +**Rule of thumb:** Prefer MUST/SHALL for normative requirements — if behavior is genuinely required, say so. Use SHOULD when real exceptions are acceptable. Reserve MAY for truly optional behavior; if it can be expressed as MUST or SHOULD, prefer that instead. + +## Writing Scenarios + +Scenarios use the Gherkin format (GIVEN/WHEN/THEN) to describe specific behaviors. They serve as both documentation and acceptance criteria for implementation. + +### Good Scenarios + +```markdown +#### Scenario: Successful login with valid credentials +- GIVEN a user with email "test@example.com" and a valid password +- WHEN they submit the login form +- THEN the system MUST return a JWT token +- AND the user MUST be redirected to the dashboard +- AND the session MUST be stored in the database + +#### Scenario: Login fails with invalid password +- GIVEN a user with email "test@example.com" +- WHEN they submit the login form with an incorrect password +- THEN the system MUST return HTTP 401 +- AND the response body MUST contain `{"error": "Invalid credentials"}` +- AND the failed attempt MUST be logged +``` + +### Bad Scenarios + +```markdown +# Too vague +#### Scenario: Login works +- GIVEN a user +- WHEN they log in +- THEN it works + +# Too implementation-specific +#### Scenario: Login +- GIVEN a POST to /api/v1/auth/login with body {"email":"x","pass":"y"} +- WHEN AuthController::login() calls UserService::authenticate() +- THEN it calls $mapper->findByEmail() and JWTService::generate() +``` + +### Tips for Good Scenarios + +1. **Cover the happy path first**, then error cases, then edge cases +2. **Be specific about inputs and outputs** — what data, what status codes, what format +3. **Focus on behavior, not implementation** — describe what happens, not which classes/methods do it +4. **One scenario, one behavior** — don't combine multiple behaviors in one scenario +5. **Include negative scenarios** — what happens when things go wrong? + +## Delta Specs + +When making changes to existing functionality, use delta specs to show what's changing. + +### ADDED + +New requirements that didn't exist before: + +```markdown +## ADDED Requirements + +### Requirement: Full-Text Search +The system MUST support full-text search across publication titles and content bodies using PostgreSQL's tsvector. + +#### Scenario: Search returns matching publications +- GIVEN publications with titles "Climate Report 2024" and "Budget Overview" +- WHEN a user searches for "climate" +- THEN the results MUST include "Climate Report 2024" +- AND the results MUST NOT include "Budget Overview" +- AND results MUST be ordered by relevance score +``` + +### MODIFIED + +Changes to existing requirements. Always note what the previous behavior was: + +```markdown +## MODIFIED Requirements + +### Requirement: Session Duration +The system MUST expire user sessions after 15 minutes of inactivity. + +(Previously: sessions expired after 30 minutes of inactivity) + +#### Scenario: Session expires +- GIVEN a user who has been inactive for 16 minutes +- WHEN they make a request +- THEN the system MUST return HTTP 401 +- AND the session MUST be cleared from the database +``` + +### REMOVED + +Requirements being deprecated. Always explain why: + +```markdown +## REMOVED Requirements + +### Requirement: Remember Me Checkbox +(Deprecated: replaced by automatic session refresh on activity. Removing the checkbox simplifies the login form and improves security by eliminating long-lived sessions.) +``` + +### RENAMED + +Requirements whose name is changing but whose behavior is unchanged. Always use FROM:/TO: format so reviewers can track the rename: + +```markdown +## RENAMED Requirements + +### Requirement: Old Requirement Name +FROM: Old Requirement Name +TO: New Requirement Name + +``` + +## Referencing Shared Specs + +When your requirement relates to a cross-project convention, reference the shared spec: + +```markdown +### Requirement: Publication API Endpoint +The system MUST provide a REST endpoint at `/index.php/apps/opencatalogi/api/publications`. + +See shared spec: `api-patterns/spec.md#requirement-url-structure` for URL conventions. +See shared spec: `api-patterns/spec.md#requirement-cors-support` for CORS requirements. +``` + +Shared specs live in `.claude/openspec/specs/` (company-wide, maintained by Conduction). Check that directory for currently available shared specs — the list evolves as new cross-app specs are added. Company-wide architectural decisions (NL Design System, API conventions, security, i18n) are captured in ADRs under `.claude/openspec/architecture/`. + +## Organizing Specs + +### By domain capability + +``` +openspec/specs/ +├── auth/spec.md # Authentication & sessions +├── publications/spec.md # Publication CRUD +├── search/spec.md # Search functionality +├── export/spec.md # Data export features +└── notifications/spec.md # User notifications +``` + +### Tips + +- **One capability per spec file** — don't mix unrelated concerns +- **Name directories for the domain concept**, not the implementation (`search/`, not `search-controller/`) +- **Keep specs focused** — if a spec file grows past ~100 requirements, split it +- **Update specs when behavior changes** — specs must always reflect the current system behavior + +## Common Mistakes + +### 1. Writing specs after code + +Specs written after implementation just document what exists. They don't help you think through requirements or catch issues early. **Write specs first.** + +### 2. Being too vague + +```markdown +# Bad +The system should handle errors. + +# Good +The system MUST return HTTP 400 with a JSON body containing an `error` field +when the request body fails validation. +``` + +### 3. Being too implementation-specific + +```markdown +# Bad — tied to specific classes +The AuthController MUST call UserMapper::findByEmail(). + +# Good — describes behavior +The system MUST look up users by email address during authentication. +``` + +### 4. Missing error scenarios + +Always consider: what happens when the input is invalid? When the resource doesn't exist? When the user isn't authorized? When an external service is down? + +### 5. Using MUST for everything + +If everything is MUST, nothing is distinguishable. Reserve MUST for true requirements and use SHOULD/MAY for less critical behaviors. + +### 6. Writing untestable requirements + +```markdown +# Bad — how do you verify this? +The system MUST be fast. + +# Good — measurable +The search endpoint MUST respond within 500ms for queries returning fewer than 100 results. +``` + +## Task Breakdown + +When writing `tasks.md`, each task should: + +1. **Be completable in one focused iteration** (15-30 minutes) +2. **Have a clear `spec_ref`** pointing to the specific requirement +3. **List `files`** to scope the work +4. **Include `acceptance_criteria`** extracted from spec scenarios +5. **Be ordered by dependency** — foundations first, features second, polish third + +### Mandatory deliverables per feature + +Every feature implemented from a spec MUST include all three layers: + +1. **Backend logic** — service/controller code that implements the requirement +2. **UI** — a user-facing interface so the feature is actually usable (Vue component, page, dialog, form, etc.) +3. **Tests** — covering both backend and frontend: + - **Unit tests** (PHPUnit) for services, mappers, and business logic + - **Newman/Postman tests** for API endpoints (add to the app's Postman collection) + - **Browser tests** (Playwright MCP) for the UI — verify the feature works end-to-end through the browser + +After implementing each task, the agent MUST run the relevant tests to confirm everything works: +- `composer test` or `vendor/bin/phpunit` for unit tests +- `newman run` for API tests +- Browser MCP snapshot/interaction for UI verification + +A task is NOT complete until its tests pass. + +### Good task breakdown + +```markdown +### Task 1: Create SearchService with basic query method +- **spec_ref**: `openspec/specs/search/spec.md#requirement-full-text-search` +- **files**: `lib/Service/SearchService.php` +- **acceptance_criteria**: + - GIVEN a search query WHEN SearchService::search("test") is called THEN it returns matching objects +- [ ] Implement service logic +- [ ] Write unit test (`tests/Unit/Service/SearchServiceTest.php`) +- [ ] Run unit tests — confirm passing + +### Task 2: Create SearchController with GET endpoint +- **spec_ref**: `openspec/specs/search/spec.md#requirement-search-api-endpoint` +- **files**: `lib/Controller/SearchController.php`, `appinfo/routes.php` +- **acceptance_criteria**: + - GIVEN a GET request to /api/search?q=test THEN returns JSON array of results +- [ ] Implement controller and route +- [ ] Add Newman/Postman test to collection +- [ ] Run Newman tests — confirm passing + +### Task 3: Add search UI page +- **spec_ref**: `openspec/specs/search/spec.md#requirement-search-ui` +- **files**: `src/views/SearchView.vue`, `src/router/index.js` +- **acceptance_criteria**: + - GIVEN a user navigating to the search page WHEN they enter a query THEN results are displayed +- [ ] Implement Vue component +- [ ] Build frontend (`npm run build`) +- [ ] Browser test — navigate to page, enter query, verify results appear + +### Task 4: Add pagination to search results +- **spec_ref**: `openspec/specs/search/spec.md#requirement-search-pagination` +- **files**: `lib/Service/SearchService.php`, `lib/Controller/SearchController.php`, `src/views/SearchView.vue` +- **acceptance_criteria**: + - GIVEN 50 results WHEN requesting page=2&limit=10 THEN returns results 11-20 with total count +- [ ] Implement backend pagination +- [ ] Update UI with pagination controls +- [ ] Add Newman test for pagination params +- [ ] Run all tests — confirm passing +``` + +### Bad task breakdown + +```markdown +### Task 1: Implement search +- [ ] Do everything + +### Task 2: (also bad) Backend only, no UI or tests +- [ ] Add SearchService +- [ ] Add SearchController +# Missing: no UI (users can't use it), no tests (nothing verified) +``` diff --git a/global-settings/README.md b/global-settings/README.md index c194672..7d7df10 100644 --- a/global-settings/README.md +++ b/global-settings/README.md @@ -75,4 +75,4 @@ Use the `/verify-global-settings-version` command to check whether a version bum ## Full documentation -See [`docs/global-claude-settings.md`](../docs/global-claude-settings.md) for the complete reference including the permissions list, hook behavior table, and troubleshooting. +See [`docs/claude/global-claude-settings.md`](../docs/claude/global-claude-settings.md) for the complete reference including the permissions list, hook behavior table, and troubleshooting. diff --git a/usage-tracker/MODELS.md b/usage-tracker/MODELS.md index d079c51..0bb2ad6 100644 --- a/usage-tracker/MODELS.md +++ b/usage-tracker/MODELS.md @@ -8,13 +8,15 @@ Track **Haiku, Sonnet, and Opus** simultaneously with separate usage monitoring. ## Model Limits Overview -> These are **subscription quota** limits (how many tokens you can use across all conversations before hitting your cap), NOT model context windows. Context windows are a separate, per-conversation limit. See [Two Kinds of Token Limits](../../.claude/docs/parallel-agents.md#two-kinds-of-token-limits) for the full explanation. +> These are **subscription quota** limits (how many tokens you can use across all conversations before hitting your cap), NOT model context windows. Context windows are a separate, per-conversation limit. See [Two Kinds of Token Limits](../docs/claude/parallel-agents.md#two-kinds-of-token-limits) for the full explanation. -| Model | Session (~5h) | Weekly (~7d) | Best For | -|-------|--------------|-------------|----------| -| **Haiku** | ~1.2M tokens | ~6M tokens | ⚡ Quick tasks, high volume | -| **Sonnet** | ~400K tokens | ~2M tokens | 🎯 Balanced, most tasks | -| **Opus** | ~200K tokens | ~1M tokens | 🧠 Complex reasoning | +> **Model matching:** The tracker matches by substring — `haiku` matches `claude-haiku-4-5-20251001`, `sonnet` matches `claude-sonnet-4-6`, `opus` matches `claude-opus-4-6`. New model versions are picked up automatically as long as the family name is in the model ID. + +| Model | Model ID | Session (~5h) | Weekly (~7d) | Best For | +|-------|----------|--------------|-------------|----------| +| **Haiku** (`--model haiku`) | `claude-haiku-4-5` | ~1.2M tokens | ~6M tokens | ⚡ Quick tasks, high volume | +| **Sonnet** (`--model sonnet`) | `claude-sonnet-4-6` | ~400K tokens | ~2M tokens | 🎯 Balanced, most tasks | +| **Opus** (`--model opus`) | `claude-opus-4-6` | ~200K tokens | ~1M tokens | 🧠 Complex reasoning | **Important:** The session limit is **shared across all models** — it's one combined 5-hour rolling window, not separate per-model buckets. The per-model session values above are estimates; only Anthropic knows the exact combined pool size for your plan. @@ -114,10 +116,7 @@ alias claude-opus="python3 /path/to/project/.claude/usage-tracker/claude-usage-t ### Can I track across VS Code sessions? -Yes — session data per model is saved automatically (git-ignored): -- `.claude/usage-tracker/logs/session-sonnet.json` -- `.claude/usage-tracker/logs/session-haiku.json` -- `.claude/usage-tracker/logs/session-opus.json` +Yes — the tracker reads JSONL files that Claude Code writes persistently to `~/.claude/projects/`. As long as those files exist, any run of the tracker can reconstruct historical usage for any window. ### Get total tokens across all models diff --git a/usage-tracker/QUICKSTART.md b/usage-tracker/QUICKSTART.md index 6ac70ed..317e076 100644 --- a/usage-tracker/QUICKSTART.md +++ b/usage-tracker/QUICKSTART.md @@ -153,7 +153,7 @@ Run the monitor automatically every time you open the workspace. ## Troubleshooting ### "Today: 0.0%" -Normal if no Claude Code API calls have been made today. Use any slash command (e.g. `/opsx-apply`, `/opsx-verify`) or run a task from the agent, then re-check. +Normal if no Claude Code API calls have been made today. Use any slash command or run a task from the agent, then re-check. ### No `~/.claude/projects/` directory Claude Code CLI is not installed or hasn't been run yet. Install it from [claude.ai/code](https://claude.ai/code). @@ -166,8 +166,8 @@ Claude Code CLI is not installed or hasn't been run yet. Install it from [claude | Metric | Claude Sonnet (approx.) | |--------|------------------------| -| Session limit | 400,000 tokens (resets every ~4-5 hrs) | -| Weekly limit | 2,000,000 tokens | +| Session limit | Varies by plan — calibrate via `limits.json` (see SETUP.md) | +| Weekly limit | Varies by plan — calibrate via `limits.json` | | Weekly reset | Varies by plan — check claude.ai/settings/usage | --- diff --git a/usage-tracker/README.md b/usage-tracker/README.md index 5ed5ea8..58fc94c 100644 --- a/usage-tracker/README.md +++ b/usage-tracker/README.md @@ -225,7 +225,7 @@ cp .claude/usage-tracker/limits.example.json .claude/usage-tracker/limits.json `limits.json` is git-ignored so it stays personal. The tracker shows `(cfg)` when configured and `(est)` when using defaults. -**Calibrating subscription quota limits:** divide the tracker's current token count by the percentage shown on [claude.ai/settings/usage](https://claude.ai/settings/usage). Example: tracker shows 286K tokens, page shows 24% → 286K ÷ 0.24 = ~1.2M quota limit. You can share a screenshot with Claude and it will calculate everything for you. (These are subscription quota limits, not model context windows — see [Two Kinds of Token Limits](../.claude/docs/parallel-agents.md#two-kinds-of-token-limits).) +**Calibrating subscription quota limits:** divide the tracker's current token count by the percentage shown on [claude.ai/settings/usage](https://claude.ai/settings/usage). Example: tracker shows 286K tokens, page shows 24% → 286K ÷ 0.24 = ~1.2M quota limit. You can share a screenshot with Claude and it will calculate everything for you. (These are subscription quota limits, not model context windows — see [Two Kinds of Token Limits](../docs/claude/parallel-agents.md#two-kinds-of-token-limits).) **Weekly reset times:** the "Sonnet only" and "All models" bars reset on different days — configure `weekly_reset_day` and `weekly_reset_hour_utc` in `limits.json` to get accurate weekly percentages. See [SETUP.md](SETUP.md) for the conversion guide. diff --git a/usage-tracker/SETUP.md b/usage-tracker/SETUP.md index 09993a9..40f9227 100644 --- a/usage-tracker/SETUP.md +++ b/usage-tracker/SETUP.md @@ -8,7 +8,7 @@ Monitor Claude token usage directly in VS Code with real-time status indicators The Claude Code extension writes full API responses (including token counts) to `~/.claude/projects/**/*.jsonl` as you work. The tracker reads those files directly — no verbose logging, no extra configuration. -**What gets tracked:** every Claude Code API call made through VS Code (slash commands like `/opsx-apply`, agent tasks, inline chat). The tracker shows the last ~5 hours (approximating Anthropic's 5h rolling session window) and this week's running total. +**What gets tracked:** every Claude Code API call made through VS Code (slash commands, agent tasks, inline chat). The tracker shows the last ~5 hours (approximating Anthropic's 5h rolling session window) and this week's running total. --- @@ -281,7 +281,7 @@ python3 .claude/usage-tracker/claude-usage-tracker.py --monitor --all-models ### Update Plan Limits -Default limits are approximate **subscription quota** estimates (not model context windows — see [Two Kinds of Token Limits](../../.claude/docs/parallel-agents.md#two-kinds-of-token-limits)). To set your real limits, edit `limits.json` (see Step 2.5). Default values: +Default limits are approximate **subscription quota** estimates (not model context windows — see [Two Kinds of Token Limits](../docs/claude/parallel-agents.md#two-kinds-of-token-limits)). To set your real limits, edit `limits.json` (see Step 2.5). Default values: | Model | Daily | Weekly | |-------|-------|--------| From 636dc21473e1cd3ba61a46e6c959368de234df67 Mon Sep 17 00:00:00 2001 From: WilcoLouwerse Date: Fri, 10 Apr 2026 17:15:35 +0200 Subject: [PATCH 2/5] docs: clarify L3/L5 detection criteria and update evals schema in writing-skills Co-Authored-By: Claude Sonnet 4.6 --- docs/claude/writing-skills.md | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/docs/claude/writing-skills.md b/docs/claude/writing-skills.md index 2cbd6ef..f058185 100644 --- a/docs/claude/writing-skills.md +++ b/docs/claude/writing-skills.md @@ -84,7 +84,7 @@ The skill is built on recognized patterns, community best practices, or existing - Uses **at least one common pattern** consistently (model guard, AskUserQuestion, destructive action confirmation, quality gates — see [Common Patterns](#common-patterns) below) - References **standards documents** where applicable (in `references/`) -> **What the script auto-detects for L3:** at least one common pattern keyword present in SKILL.md (model guard, AskUserQuestion, quality gates, or subfolder references) AND existence of at least one of `examples/`, `references/`, or `templates/`. These are structural proxies for the full criteria above. +> **What the script auto-detects for L3:** at least one common pattern keyword present in SKILL.md from any of these categories: (1) model guard (`model:`, `On Haiku`, `active model`, …), (2) `AskUserQuestion`, (3) quality gates (`composer check`, `phpcs`, `phpstan`, `make check`, `ruff`, `psalm`), (4) subfolder references (`examples/`, `refs/`, `references/`, or `templates/` as text in SKILL.md), or (5) destructive/browser patterns (`confirm.*before`, `browser_snapshot`, `browser_navigate`, `## Hard Rule`, `## Verification`, `acceptance_criteria`) — AND existence of at least one of `examples/`, `references/`, or `templates/` on disk. These are structural proxies for the full criteria above. **Sources for proven patterns:** - Anthropic's official `/skill-creator` bundled plugin ([GitHub](https://github.com/anthropics/skills/blob/main/skills/skill-creator/SKILL.md)) @@ -129,17 +129,17 @@ The skill has been systematically tested with evaluation scenarios. Its performa **Why most skills plateau at L4:** A skill that "feels right" but has never been measured may have blind spots, false confidence, or suboptimal triggering. Measurement turns intuition into evidence. **Criteria (in addition to L4):** -- Has **3+ evaluation scenarios** with: +- Has **3+ evals** with: - Input prompt (what the user would say) - - Expected output characteristics (what good output looks like) - - Assertion criteria (how to grade pass/fail) + - Expected output characteristics (`expected_output`) + - Assertion criteria (`expectations` — how to grade pass/fail) - **Description trigger testing**: 10+ `should_trigger` + 10+ `should_not_trigger` prompts in `trigger_tests` - **Evals have been run**: `last_validated` is set to a date in `evals.json` -- **Baseline measurement** exists: what does Claude produce on these scenarios WITHOUT the skill? +- **Baseline measurement** exists: what does Claude produce on these evals WITHOUT the skill? - Skill has been through at least **one improve cycle** based on eval results - `evals/` folder with `evals.json`; `timing.json` and `grading.json` produced after running evals -> **What the script auto-detects for L5:** 3+ scenarios, 10+/10+ trigger tests, and `last_validated` non-null in evals.json. Baseline measurement and improve cycles are required for true L5 but not auto-checked by the script. +> **What the script auto-detects for L5:** 3+ evals (checks `evals` key, falls back to `scenarios`), 10+/10+ trigger tests, and `last_validated` non-null in evals.json. Baseline measurement and improve cycles are required for true L5 but not auto-checked by the script. **How to evaluate a skill:** @@ -147,15 +147,17 @@ The skill has been systematically tested with evaluation scenarios. Its performa ```json { - "skill": "create-pr", + "skill_name": "create-pr", "version": "1.0.0", "created": "2025-01-15", "last_validated": null, - "scenarios": [ + "evals": [ { + "id": 1, "prompt": "Create a PR for the openregister feature branch", - "expected": "PR targets development branch, has quality checks, proper format", - "assertions": [ + "expected_output": "PR targets development branch, has quality checks, proper format", + "files": [], + "expectations": [ "targets development branch (not main)", "runs composer check:strict", "includes ## Summary and ## Test plan sections" From d9198622639d326d2467c6502ed997c04f18a8ad Mon Sep 17 00:00:00 2001 From: WilcoLouwerse Date: Mon, 13 Apr 2026 17:42:22 +0200 Subject: [PATCH 3/5] feat: add settings-repo-ref support, remove CLAUDE.local.md, bump to v1.5.1 - global-settings: add optional settings-repo-ref file to track a non-main branch/tag/sha for version checks (GitHub API + git fetch paths), update block-write-commands.sh to protect the new file, and add deny rules for destructive commands (sudo, rm -rf root, gh pr merge, git reset --hard, etc.) - Remove CLAUDE.local.md flow from docs (README, parallel-agents) and delete the example template; gitignore .claude/settings.json + settings.local.json - writing-skills: document skill-creator vendoring + update script, the evals/workspace/iteration-N layout, and the baseline_score regression marker Co-Authored-By: Claude Sonnet 4.6 --- .claude/settings.json | 7 ---- .gitignore | 2 + docs/claude/README.md | 16 +------- docs/claude/parallel-agents.md | 3 -- docs/claude/writing-skills.md | 41 +++++++++++++++----- global-settings/CLAUDE.local.md.example | 29 -------------- global-settings/README.md | 4 ++ global-settings/VERSION | 2 +- global-settings/block-write-commands.sh | 6 +-- global-settings/check-settings-version.sh | 46 +++++++++++++++-------- global-settings/settings.json | 30 ++++++++++++++- 11 files changed, 102 insertions(+), 84 deletions(-) delete mode 100644 .claude/settings.json delete mode 100644 global-settings/CLAUDE.local.md.example diff --git a/.claude/settings.json b/.claude/settings.json deleted file mode 100644 index 2d603ef..0000000 --- a/.claude/settings.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "permissions": { - "additionalDirectories": [ - "/home/wilco/wordpress-docker/.claude/docs" - ] - } -} diff --git a/.gitignore b/.gitignore index df30947..b6c90c4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .idea/ +.claude/settings.json +.claude/settings.local.json diff --git a/docs/claude/README.md b/docs/claude/README.md index 86c0e2c..5586aac 100644 --- a/docs/claude/README.md +++ b/docs/claude/README.md @@ -579,17 +579,6 @@ Optionally, bootstrap it upfront with common permissions to avoid approval promp Save this as `.claude/settings.local.json` in your project root. It is gitignored and will never be committed. -### CLAUDE.local.md - -Contains environment-specific credentials and API tokens (passwords, keys, endpoints). **Never commit this file.** - -Copy the example template and fill in your own values: - -```bash -cp .claude/CLAUDE.local.md.example .claude/CLAUDE.local.md -# Edit with your credentials -``` - --- ## Prerequisites (WSL) @@ -880,10 +869,9 @@ qwen ```markdown @CLAUDE.md -@CLAUDE.local.md ``` -This gives Qwen Code the same project context, coding standards, and credentials as Claude Code. However, Qwen Code does **not** support Claude's `/opsx-*` slash commands or skills — those are Claude Code-specific. For the full OpenSpec workflow, use Claude Code (with either API or local Qwen backend). +This gives Qwen Code the same project context and coding standards as Claude Code. However, Qwen Code does **not** support Claude's `/opsx-*` slash commands or skills — those are Claude Code-specific. For the full OpenSpec workflow, use Claude Code (with either API or local Qwen backend). #### Tips @@ -1005,8 +993,6 @@ npx -y @playwright/mcp@latest --isolated --port 9226 & │ └── .claude/ # Claude Code configuration (this repository) ├── CLAUDE.md # Workflow rules, project context, Docker env - ├── CLAUDE.local.md # [GITIGNORED] Your credentials — copy from .example, never commit - ├── CLAUDE.local.md.example # Template — copy to CLAUDE.local.md on setup ├── .mcp.json # Template — copy to project root on setup ├── settings.json # [COMMITTED] Shared project permissions (MCP approvals, enableAllProjectMcpServers) ├── settings.local.json # [GITIGNORED] Your personal tool permissions — auto-generated by Claude Code diff --git a/docs/claude/parallel-agents.md b/docs/claude/parallel-agents.md index 84e487e..63080d8 100644 --- a/docs/claude/parallel-agents.md +++ b/docs/claude/parallel-agents.md @@ -7,7 +7,6 @@ Running commands that spawn multiple agents simultaneously (like `/test-counsel` Every Claude Code API call sends the following with it: - **CLAUDE.md** — workspace instructions - **MEMORY.md** — persistent memory index -- **CLAUDE.local.md** — local credentials/config - **The full conversation history** so far When you run a command that launches 8 agents in parallel, all 8 agents start simultaneously, and each one makes many tool calls internally (file reads, browser snapshots, API calls). That means those files above get sent dozens to hundreds of times within a few minutes. @@ -61,12 +60,10 @@ These files are sent with **every single API call** in the workspace. In a paral |------|---------|-------------| | `.claude/CLAUDE.md` | Workspace instructions for Claude | < 100 lines | | `.claude/MEMORY.md` | Index of memory files | < 50 lines (index only, no content) | -| `.claude/CLAUDE.local.md` | Local credentials | < 30 lines | **Rules:** - **CLAUDE.md**: Only include instructions Claude needs on every task. Move niche/infrequent knowledge to separate files in `.claude/docs/` that can be read on demand. - **MEMORY.md**: This is an index only — one line per memory file with a brief description. Never write memory content directly into MEMORY.md. -- **CLAUDE.local.md**: Credentials only. Do not add project notes here. - **Persona files** (`.claude/personas/*.md`): These are only loaded when a sub-agent explicitly reads them — they don't auto-load. Keep them focused, but they don't need to be ultra-short. ## Two Kinds of Token Limits diff --git a/docs/claude/writing-skills.md b/docs/claude/writing-skills.md index f058185..fc11555 100644 --- a/docs/claude/writing-skills.md +++ b/docs/claude/writing-skills.md @@ -201,32 +201,53 @@ The [Anthropic Skill Creator](https://github.com/anthropics/skills/blob/main/ski **Step-by-step:** -1. **Install** (one-time): add the Skill Creator to your `.claude/skills/` folder. +1. **Install** (one-time): the Skill Creator lives at `.claude/skills/skill-creator/` in each repo (hydra and wordpress-docker). It's a vendored copy of [`anthropics/skills/skills/skill-creator/`](https://github.com/anthropics/skills/tree/main/skills/skill-creator) with one local modification: the eval workspace lives **inside** each skill rather than as a sibling folder. + + **Keeping it up to date:** Run `bash .claude/skills/update-skill-creator.sh` from the repo root. This script: + - Sparse-clones `anthropics/skills` to a tempdir + - Compares the upstream commit hash against `.claude/skills/skill-creator/.upstream-version` + - Backs up the current copy, rsyncs upstream files in, then re-applies `local-mods.patch` + - Updates `.upstream-version` to the new commit hash + + If `local-mods.patch` no longer applies cleanly (upstream rewrote the relevant section), the script aborts and points you at the backup + `.rej` files so you can hand-merge. We use this script-based approach because `anthropics/skills` keeps `skill-creator/` as a subdirectory, which makes pure `git subtree` impractical for tracking just that one folder. + + **Why we deviate from upstream:** Upstream Skill Creator writes eval results to `-workspace/` as a sibling to the skill folder. We patch this so results live at `/evals/workspace/iteration-N/`, keeping eval artifacts adjacent to the skill they belong to. The patch is recorded in `.claude/skills/skill-creator/local-mods.patch`. 2. **Invoke**: In a Claude Code session, ask Claude to evaluate the skill: > "Run evals on the test-app skill" or "Use the skill creator to evaluate and improve my X skill" - Claude picks up the skill-creator and guides the process. The skill-creator's `evals/evals.json` format uses `evals[]` with `id`, `prompt`, `expected_output`, and `expectations` — note this differs from our custom tracking format (`scenarios`, `trigger_tests`, `last_validated`). The Skill Creator will create or adapt evals as needed. + Claude picks up the skill-creator and guides the process. The skill-creator's `evals/evals.json` format uses `evals[]` with `id`, `prompt`, `expected_output`, and `expectations`. We adopted this format across all our skills — see "Standard evals.json schema" below. 3. **What happens**: Two parallel subagents run each eval: - **With-skill agent**: runs the scenario with the skill active - **Baseline agent**: runs the same scenario without the skill - Results are saved to `-workspace/iteration-N/eval-N/` alongside the skills directory. + Results are saved to `/evals/workspace/iteration-N/eval-/` (inside the skill folder, per our local convention). -4. **Review results**: The Skill Creator runs `eval-viewer/generate_review.py` and opens a browser tab with two tabs: **Outputs** (click through each eval, leave qualitative feedback) and **Benchmark** (pass rates, timing, tokens with-skill vs baseline). +4. **Review results**: The Skill Creator runs `eval-viewer/generate_review.py` and opens a browser tab (or generates a static HTML file with `--static`) with two tabs: **Outputs** (click through each eval, leave qualitative feedback) and **Benchmark** (pass rates, timing, tokens with-skill vs baseline). -5. **Output files** written to `-workspace/iteration-N/eval-N/`: +5. **Output files** written to `/evals/workspace/iteration-N/eval-/`: - `grading.json` — assertion pass/fail with evidence per expectation - `timing.json` — token count and duration - - `benchmark.json` — aggregate stats across all evals + - `benchmark.json` — aggregate stats across all evals (one level up, at `iteration-N/`) + - `eval-review-iteration-N.html` — static viewer (one level up, at `evals/workspace/`) -6. **Update `last_validated`** in our `evals.json` after a successful run: +6. **Update `last_validated` and `baseline_score`** in `evals.json` after a successful run: ```json - "last_validated": "2026-04-10" + "last_validated": "2026-04-13", + "baseline_score": 0.67 ``` - This unlocks the L5 green circle in the skill overview dashboard (our tracking format, separate from the Skill Creator workspace). + `last_validated` unlocks the L5 green circle in the skill overview dashboard. `baseline_score` records the with-skill pass rate at the time of validation — used as a regression guardrail when re-running evals (see "baseline_score" section below). + +7. **Improve cycle**: The Skill Creator's analyzer flags non-discriminating assertions, flaky evals, and skill improvement suggestions. Update `SKILL.md` and re-run as iteration-2 (and so on, in the same `evals/workspace/` folder). + +**`baseline_score` field — manual tracking for regression detection:** + +Even running evals manually (no CI), `baseline_score` is useful: it's the with-skill pass rate from the most recent successful eval run, recorded in `evals/evals.json` next to `last_validated`. When you re-run evals later, compare the new pass rate against `baseline_score`: + +- **New rate ≥ baseline_score** → skill is stable or improving. Update `baseline_score` to the new rate (and bump `last_validated`). +- **New rate < baseline_score** → regression. Investigate before updating either field. The skill change you just made may have broken something. -7. **Improve cycle**: The Skill Creator's analyzer flags non-discriminating assertions, flaky evals, and skill improvement suggestions. Update `SKILL.md` and re-run as iteration-2. +This gives you a paper trail of "this skill scored 0.67 on these expectations on this date" without needing CI infrastructure. The Skill Creator's own `benchmark.json` already produces the pass rate — you're just writing it back into `evals.json` as a durable marker. --- diff --git a/global-settings/CLAUDE.local.md.example b/global-settings/CLAUDE.local.md.example deleted file mode 100644 index dee4238..0000000 --- a/global-settings/CLAUDE.local.md.example +++ /dev/null @@ -1,29 +0,0 @@ -# Local Credentials (DO NOT COMMIT) - -Copy this file to `CLAUDE.local.md` and fill in your values: - -```bash -cp CLAUDE.local.md.example CLAUDE.local.md -``` - -## Local Backend Login - -- Email: admin -- Password: admin - -## Test Backend Login - -- Email: your-email@conduction.nl -- Password: (get from 1Password / team lead) - -## Production Backend Login - -- Email: your-email@conduction.nl -- Password: (get from 1Password / team lead) - -## Jira / Tempo API (optional, for WBSO tracking) - -- Jira URL: https://conduction.atlassian.net -- Jira Email: your-email@conduction.nl -- Jira API Token: (generate at https://id.atlassian.com/manage-profile/security/api-tokens) -- Tempo API Token: (generate in Tempo > Settings > API Integration) diff --git a/global-settings/README.md b/global-settings/README.md index 7d7df10..3c09506 100644 --- a/global-settings/README.md +++ b/global-settings/README.md @@ -32,6 +32,10 @@ echo "$REPO_ROOT" > ~/.claude/settings-repo-path # Online version checking via GitHub API (recommended — no local repo required): echo "ConductionNL/.github" > ~/.claude/settings-repo-url + +# Optional: track a branch other than main (tag or SHA also accepted). +# Defaults to "main" when this file is absent. +# echo "feature/claude-code-tooling" > ~/.claude/settings-repo-ref ``` Restart Claude Code after installing. Requires `jq`, `md5sum`, and `gh` (GitHub CLI) on `PATH`. diff --git a/global-settings/VERSION b/global-settings/VERSION index 88c5fb8..26ca594 100644 --- a/global-settings/VERSION +++ b/global-settings/VERSION @@ -1 +1 @@ -1.4.0 +1.5.1 diff --git a/global-settings/block-write-commands.sh b/global-settings/block-write-commands.sh index 162dd98..3973415 100644 --- a/global-settings/block-write-commands.sh +++ b/global-settings/block-write-commands.sh @@ -43,7 +43,7 @@ PUSH_DENY_MSG="Blocked: git push requires explicit authorization. Include one of # tee, eval, bash/sh -c, and inline scripting (python, perl, node). # Also hard-blocks chmod that makes protected files writable. _h=$(printf '%s' "$HOME" | sed 's/[.[\*^$()+?{}|]/\\&/g') -_prot="(~|\\\$HOME|${_h})/\.claude/(settings\.json|hooks/|settings-version|settings-repo-path|settings-repo-url)" +_prot="(~|\\\$HOME|${_h})/\.claude/(settings\.json|hooks/|settings-version|settings-repo-path|settings-repo-url|settings-repo-ref)" # chmod guard: deny write-enabling permissions on protected files if echo "$cmd" | grep -qE "^\s*chmod\b" && echo "$cmd" | grep -qE "${_prot}"; then @@ -66,7 +66,7 @@ if echo "$cmd" | grep -qE "^\s*(cp|mv)\b" && echo "$cmd" | grep -qE "[[:space:]] _is_config_write=true fi # 3. Variable assigned to a protected path and used as redirect target (same command) -if echo "$cmd" | grep -qE "[a-zA-Z_][a-zA-Z0-9_]*=[\"']?(~|\\\$HOME|${_h})/\.claude/(settings\.json|hooks|settings-version|settings-repo-path)" \ +if echo "$cmd" | grep -qE "[a-zA-Z_][a-zA-Z0-9_]*=[\"']?(~|\\\$HOME|${_h})/\.claude/(settings\.json|hooks|settings-version|settings-repo-path|settings-repo-url|settings-repo-ref)" \ && echo "$cmd" | grep -qE ">[[:space:]]*[\"']?\\\$[a-zA-Z_][a-zA-Z0-9_]*"; then _is_config_write=true fi @@ -74,7 +74,7 @@ fi if echo "$cmd" | grep -qE "\btee\b.*${_prot}"; then _is_config_write=true fi -if echo "$cmd" | grep -qE "[a-zA-Z_][a-zA-Z0-9_]*=[\"']?(~|\\\$HOME|${_h})/\.claude/(settings\.json|hooks|settings-version|settings-repo-path)" \ +if echo "$cmd" | grep -qE "[a-zA-Z_][a-zA-Z0-9_]*=[\"']?(~|\\\$HOME|${_h})/\.claude/(settings\.json|hooks|settings-version|settings-repo-path|settings-repo-url|settings-repo-ref)" \ && echo "$cmd" | grep -qE "\btee\b[^|]*\\\$[a-zA-Z_][a-zA-Z0-9_]*"; then _is_config_write=true fi diff --git a/global-settings/check-settings-version.sh b/global-settings/check-settings-version.sh index 254a5da..d00797f 100644 --- a/global-settings/check-settings-version.sh +++ b/global-settings/check-settings-version.sh @@ -9,6 +9,9 @@ # settings-repo-url — (optional) GitHub repo slug for online version check # (e.g. "ConductionNL/.github") # If present, checks VERSION via GitHub API first. +# settings-repo-ref — (optional) Git ref (branch/tag/sha) to track. +# Defaults to "main" when absent. Applies to both +# GitHub API and git-fetch lookup paths. # settings-repo-path — absolute path to the root of the canonical repo # (e.g. ~/path/to/.github) # Used as fallback when settings-repo-url is absent or fails. @@ -24,8 +27,16 @@ NC='\033[0m' REPO_URL_FILE="$HOME/.claude/settings-repo-url" REPO_PATH_FILE="$HOME/.claude/settings-repo-path" +REPO_REF_FILE="$HOME/.claude/settings-repo-ref" VERSION_FILE="$HOME/.claude/settings-version" +# ── Tracking ref (branch/tag/sha) — defaults to "main" when unset ──────────── +tracking_ref="main" +if [ -f "$REPO_REF_FILE" ]; then + _ref=$(cat "$REPO_REF_FILE" | tr -d '[:space:]') + [ -n "$_ref" ] && tracking_ref="$_ref" +fi + # ── Session-once guard ──────────────────────────────────────────────────────── input=$(cat) transcript_path=$(echo "$input" | jq -r '.transcript_path // ""' 2>/dev/null) @@ -103,7 +114,7 @@ fi if [ -n "$online_repo_slug" ]; then if command -v gh >/dev/null 2>&1; then - _api_path="repos/${online_repo_slug}/contents/global-settings/VERSION?ref=main" + _api_path="repos/${online_repo_slug}/contents/global-settings/VERSION?ref=${tracking_ref}" _gh_result=$(timeout 5 gh api "$_api_path" -H "Accept: application/vnd.github.raw+json" 2>/dev/null | tr -d '[:space:]') if [ -n "$_gh_result" ] && echo "$_gh_result" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+$'; then online_version="$_gh_result" @@ -121,15 +132,15 @@ fi if ! $online_fetch_ok && [ -n "$REPO_DIR" ] && [ -n "$git_root" ]; then rel_path=$(realpath --relative-to="$git_root" "$REPO_DIR/global-settings/VERSION" 2>/dev/null) - if timeout 5 git -C "$git_root" fetch origin main --quiet --depth=1 2>/dev/null; then - fetched=$(git -C "$git_root" show "origin/main:${rel_path}" 2>/dev/null | tr -d '[:space:]') + if timeout 5 git -C "$git_root" fetch origin "${tracking_ref}" --quiet --depth=1 2>/dev/null; then + fetched=$(git -C "$git_root" show "origin/${tracking_ref}:${rel_path}" 2>/dev/null | tr -d '[:space:]') if [ -n "$fetched" ]; then online_version="$fetched" online_fetch_ok=true online_source="git-fetch" else online_version="(not on remote)" - config_warnings+=("global-settings/VERSION not found on origin/main (path: ${rel_path}). The canonical global settings may not be committed to this remote — your settings may be outdated.") + config_warnings+=("global-settings/VERSION not found on origin/${tracking_ref} (path: ${rel_path}). The canonical global settings may not be committed to this remote — your settings may be outdated.") fi else online_version="(fetch failed)" @@ -175,10 +186,12 @@ fi # ── Online source label ────────────────────────────────────────────────────── online_label="" +ref_suffix="" +[ "$tracking_ref" != "main" ] && ref_suffix=" @${tracking_ref}" if [ "$online_source" = "github-api" ]; then - online_label=" ${DIM}(via GitHub API)${NC}" + online_label=" ${DIM}(via GitHub API${ref_suffix})${NC}" elif [ "$online_source" = "git-fetch" ]; then - online_label=" ${DIM}(via git fetch)${NC}" + online_label=" ${DIM}(via git fetch${ref_suffix})${NC}" fi # ── Status panel → stderr (displayed directly in the UI) ───────────────────── @@ -236,7 +249,7 @@ if $online_fetch_ok && semver_gt "$online_version" "$installed_version"; then echo "==========================================" echo "" echo " Installed : v${installed_version} ❌ (outdated)" - echo " Latest : v${online_version} ✅ (on origin/main)" + echo " Latest : v${online_version} ✅ (on origin/${tracking_ref})" echo "" echo " To apply the update:" echo " 1. First, in your own terminal (not through Claude), unlock the config files:" @@ -244,12 +257,13 @@ if $online_fetch_ok && semver_gt "$online_version" "$installed_version"; then echo " 2. Then say: \"update my global settings to ${online_version}\"" if [ "$online_source" = "github-api" ]; then - echo " When they do, run the following to pull all files directly from GitHub (${online_repo_slug}, main branch):" + echo " When they do, run the following to pull all files directly from GitHub (${online_repo_slug}, ref: ${tracking_ref}):" echo " repo_slug=\"${online_repo_slug}\"" + echo " ref=\"${tracking_ref}\"" echo " base_path=\"global-settings\"" echo " mkdir -p ~/.claude/hooks" echo " for f in settings.json block-write-commands.sh check-settings-version.sh VERSION README.md; do" - echo " content=\$(gh api \"repos/\${repo_slug}/contents/\${base_path}/\${f}?ref=main\" -H \"Accept: application/vnd.github.raw+json\")" + echo " content=\$(gh api \"repos/\${repo_slug}/contents/\${base_path}/\${f}?ref=\${ref}\" -H \"Accept: application/vnd.github.raw+json\")" echo " case \"\$f\" in" echo " *.sh) dest=\"\$HOME/.claude/hooks/\$f\" ;;" echo " VERSION) dest=\"\$HOME/.claude/settings-version\" ;;" @@ -259,14 +273,16 @@ if $online_fetch_ok && semver_gt "$online_version" "$installed_version"; then echo " printf '%s\n' \"\$content\" > \"\$dest\"" echo " if [[ \"\$f\" == *.sh ]]; then chmod 555 \"\$dest\"; elif [[ \"\$f\" == \"settings.json\" ]]; then chmod 644 \"\$dest\"; else chmod 444 \"\$dest\"; fi" echo " done" - echo " This pulls files directly from GitHub — no local repo clone needed." + echo " This pulls files directly from GitHub (ref: ${tracking_ref}) — no local repo clone needed." else - echo " When they do, run the following to pull all files directly from origin/main (not the local branch):" + echo " When they do, run the following to pull all files directly from origin/${tracking_ref} (not the local branch):" + echo " ref=\"${tracking_ref}\"" echo " git_root=\$(git -C \"${REPO_DIR}\" rev-parse --show-toplevel)" echo " rel_base=\$(realpath --relative-to=\"\$git_root\" \"${REPO_DIR}/global-settings\")" echo " mkdir -p ~/.claude/hooks" - echo " git -C \"\$git_root\" ls-tree --name-only \"origin/main:\${rel_base}/\" | while read f; do" - echo " src=\"origin/main:\${rel_base}/\${f}\"" + echo " git -C \"\$git_root\" fetch origin \"\$ref\" --depth=1" + echo " git -C \"\$git_root\" ls-tree --name-only \"origin/\${ref}:\${rel_base}/\" | while read f; do" + echo " src=\"origin/\${ref}:\${rel_base}/\${f}\"" echo " case \"\$f\" in" echo " *.sh) dest=\"\$HOME/.claude/hooks/\$f\" ;;" echo " VERSION) dest=\"\$HOME/.claude/settings-version\" ;;" @@ -275,7 +291,7 @@ if $online_fetch_ok && semver_gt "$online_version" "$installed_version"; then echo " git -C \"\$git_root\" show \"\$src\" > \"\$dest\"" echo " if [[ \"\$f\" == *.sh ]]; then chmod 555 \"\$dest\"; elif [[ \"\$f\" == \"settings.json\" ]]; then chmod 644 \"\$dest\"; else chmod 444 \"\$dest\"; fi" echo " done" - echo " Do NOT copy from the local repo directory — always use origin/main to get the correct version." + echo " Do NOT copy from the local repo directory — always use origin/${tracking_ref} to get the correct version." fi echo "" echo "INSTRUCTION FOR CLAUDE: At the very start of your response, mention that a new session has" @@ -294,7 +310,7 @@ elif $online_fetch_ok && semver_eq "$online_version" "$installed_version"; then # Online version unknown but no other warning — still note session start else - echo "NEW SESSION — Global Claude Settings checked. Version comparison unavailable (could not reach origin/main)." + echo "NEW SESSION — Global Claude Settings checked. Version comparison unavailable (could not reach origin/${tracking_ref})." echo "Briefly mention this at the start of your response, and suggest the user run /verify-global-settings-version to retry the check." echo "" fi diff --git a/global-settings/settings.json b/global-settings/settings.json index ef9d527..bd6f156 100644 --- a/global-settings/settings.json +++ b/global-settings/settings.json @@ -10,7 +10,35 @@ "Edit(~/.claude/settings-repo-path)", "Write(~/.claude/settings-repo-path)", "Edit(~/.claude/settings-repo-url)", - "Write(~/.claude/settings-repo-url)" + "Write(~/.claude/settings-repo-url)", + "Edit(~/.claude/settings-repo-ref)", + "Write(~/.claude/settings-repo-ref)", + + "Bash(sudo:*)", + "Bash(su:*)", + "Bash(shutdown:*)", + "Bash(reboot:*)", + "Bash(halt:*)", + "Bash(poweroff:*)", + "Bash(mkfs:*)", + "Bash(dd if=:*)", + + "Bash(gh pr merge:*)", + "Bash(gh repo delete:*)", + "Bash(gh release delete:*)", + + "Bash(git reset --hard:*)", + "Bash(git clean -f:*)", + "Bash(git clean -fd:*)", + "Bash(git clean -fdx:*)", + "Bash(git filter-branch:*)", + "Bash(git filter-repo:*)", + "Bash(git reflog expire:*)", + "Bash(git reflog delete:*)", + "Bash(git update-ref -d:*)", + "Bash(git config --global:*)", + "Bash(git checkout --:*)", + "Bash(git restore:*)" ], "allow": [ "Bash(ls:*)", From 6ae9e38b3cf9331e31336f45b55660972db9f7df Mon Sep 17 00:00:00 2001 From: WilcoLouwerse Date: Tue, 14 Apr 2026 17:26:40 +0200 Subject: [PATCH 4/5] docs: update Claude developer guides and add repo config - Rename top-level heading to "Claude Code Developer Guide" - Add new docs: local-llm.md, playwright-setup.md, examples/ - Remove obsolete exapp-sidecar-status.md - Update commands, workflow, getting-started, parallel-agents, writing-docs, writing-specs, walkthrough, app-lifecycle, and global-claude-settings docs - Add settings-repo-ref.example and settings-repo-url.example - Update usage-tracker scripts (claude-track.py, install.sh) and docs Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/claude/README.md | 807 +++++-------------- docs/claude/app-lifecycle.md | 2 +- docs/claude/commands.md | 134 +-- docs/claude/examples/.mcp.json.example | 32 + docs/claude/examples/CLAUDE.local.md.example | 29 + docs/claude/exapp-sidecar-status.md | 131 --- docs/claude/getting-started.md | 30 +- docs/claude/global-claude-settings.md | 178 ++-- docs/claude/local-llm.md | 284 +++++++ docs/claude/parallel-agents.md | 8 +- docs/claude/playwright-setup.md | 74 ++ docs/claude/walkthrough.md | 6 +- docs/claude/workflow.md | 25 +- docs/claude/writing-docs.md | 52 +- docs/claude/writing-specs.md | 4 +- global-settings/README.md | 8 +- global-settings/settings-repo-ref.example | 1 + global-settings/settings-repo-url.example | 1 + usage-tracker/MODELS.md | 32 +- usage-tracker/QUICKSTART.md | 42 +- usage-tracker/README.md | 46 +- usage-tracker/SETUP.md | 52 +- usage-tracker/claude-track.py | 4 +- usage-tracker/install.sh | 8 +- 24 files changed, 912 insertions(+), 1078 deletions(-) create mode 100644 docs/claude/examples/.mcp.json.example create mode 100644 docs/claude/examples/CLAUDE.local.md.example delete mode 100644 docs/claude/exapp-sidecar-status.md create mode 100644 docs/claude/local-llm.md create mode 100644 docs/claude/playwright-setup.md create mode 100644 global-settings/settings-repo-ref.example create mode 100644 global-settings/settings-repo-url.example diff --git a/docs/claude/README.md b/docs/claude/README.md index 5586aac..b2aaeaf 100644 --- a/docs/claude/README.md +++ b/docs/claude/README.md @@ -1,4 +1,4 @@ -# Spec-Driven Development Documentation +# Claude Code Developer Guide Documentation for Conduction's spec-driven development workflow, combining OpenSpec, GitHub Issues, and Claude Code. @@ -8,10 +8,10 @@ Documentation for Conduction's spec-driven development workflow, combining OpenS Step-by-step guide from installation to your first completed change. Start here if you're new to the workflow. ### [Workflow Overview](./workflow.md) -Architecture overview of the full system: how specs, GitHub Issues, plan.json, and Ralph Wiggum loops fit together. Includes the plan.json format and flow diagrams. +Architecture overview of the full system: how specs, GitHub Issues, and plan.json fit together. Includes the plan.json format and flow diagrams. ### [Command Reference](./commands.md) -Detailed reference for every skill — OpenSpec built-ins (`/opsx-new`, `/opsx-ff`, etc.), custom Conduction skills (`/opsx-plan-to-issues`, `/opsx-apply-loop`, `/opsx-pipeline`), and planned additions (`/opsx-ralph-start`, `/opsx-ralph-review` — not yet implemented). Includes expected output and usage tips. +Detailed reference for every skill — OpenSpec built-ins (`/opsx-new`, `/opsx-ff`, etc.) and custom Conduction skills (`/opsx-plan-to-issues`, `/opsx-apply-loop`, `/opsx-pipeline`). Includes expected output and usage tips. ### [Writing Specs](./writing-specs.md) In-depth guide on writing effective specifications: RFC 2119 keywords, Gherkin scenarios, delta specs, shared spec references, task breakdown, and common mistakes to avoid. @@ -40,6 +40,15 @@ All testing commands and skills in one place — when to use each, typical workf ### [Parallel Agents & Subscription Cap](./parallel-agents.md) How parallel agent commands (like `/test-counsel`, `/test-app`, and `/feature-counsel`) consume your Claude subscription cap, guidelines for responsible use, and which files to keep lean to reduce token usage. +### [Frontend Standards](./frontend-standards.md) +Frontend development standards enforced across all Conduction apps: OpenRegister dependency checking, CSS scoping, admin detection patterns, and reference implementations. + +### [Local LLM Setup (Ollama + Qwen)](./local-llm.md) +How to run Claude Code with a local Qwen model via Ollama for privacy, cost reduction, and offline use. Includes the Double Dutch RAD workflow for pairing Claude (day shift) with Qwen (overnight batch jobs). + +### [Playwright MCP Browser Setup](./playwright-setup.md) +Detailed setup guide for the 7 independent Playwright browser sessions used for parallel testing, including VS Code extension configuration, CLI alternatives, and usage rules. + ### [Usage Tracker](../../usage-tracker/README.md) Real-time Claude token usage monitoring in VS Code — color-coded status, threshold notifications, and multi-model support (Haiku, Sonnet, Opus). Reads Claude Code session files directly; no log configuration needed. Run `bash usage-tracker/install.sh` to get started. @@ -55,26 +64,16 @@ A complete worked example showing every phase of the flow on a realistic feature - [Stage 2: Specify](#stage-2-specify--writing-openspec-artifacts) - [Stage 3: Build](#stage-3-build--configuration-not-code) - [Stage 4: Validate](#stage-4-validate--quality-assurance--verification) -- [Double Dutch (RAD Workflow)](#double-dutch-rad-workflow) - [Workstation Setup (Windows)](#workstation-setup-windows) - [Prerequisites (WSL)](#prerequisites-wsl) - - [Node.js](#nodejs-via-nvm) - - [PHP & Composer](#php-81--composer) - - [GitHub CLI](#github-cli) - - [PHP Quality Tools](#php-quality-tools-phpcs-phpmd-psalm-phpstan) - - [Playwright Browsers](#playwright-browsers) - - [OpenSpec CLI](#openspec-cli) - - [Claude Code CLI](#claude-code-cli-optional-for-terminal-use) - - [Ollama + Qwen (local LLM)](#ollama--qwen-coder-optional-local-llm) - [Local Configuration](#local-configuration) - [Playwright MCP Browser Setup](#playwright-mcp-browser-setup) - [Directory Structure](#directory-structure) -- [Commands Reference](#commands-reference) -- [Skills Reference](#skills-reference) - [Personas](#personas) -- [Scripts](#scripts) -- [Usage Tracker](#usage-tracker) - [Architectural Design Rules (ADRs)](#architectural-design-rules-adrs) +- [Usage Tracker](#usage-tracker) +- [Related: Hydra CI/CD Pipeline](#related-hydra-cicd-pipeline) +- [Scripts](#scripts) - [Contributing](#contributing) - [Troubleshooting](#troubleshooting) @@ -267,18 +266,9 @@ Verify that the implementation matches the specs, passes quality standards, and #### Testing -| Type | Command | What it tests | -|------|---------|---------------| -| **Spec verification** | `/opsx-verify` | Implementation matches spec requirements (CRITICAL / WARNING / SUGGESTION) | -| **Functional** | `/test-functional` | Feature correctness via browser | -| **API** | `/test-api` | REST API + NLGov Design Rules compliance | -| **Accessibility** | `/test-accessibility` | WCAG 2.1 AA compliance | -| **Performance** | `/test-performance` | Load times, API response, network | -| **Security** | `/test-security` | OWASP Top 10, BIO2, multi-tenancy | -| **Regression** | `/test-regression` | Cross-app regression | -| **Persona testing** | `/test-persona-*` | 8 user-perspective tests (Henk, Fatima, Sem, etc.) | -| **Multi-perspective** | `/test-counsel` | All 8 personas test simultaneously | -| **Browser testing** | `/test-app ` | Automated browser testing (single or 6 parallel agents) | +For the full list of testing commands, browser pool rules, and recommended workflows, see [testing.md](./testing.md) and [commands.md](./commands.md). + +Key commands: `/opsx-verify` (spec verification), `/test-counsel` (8-persona test sweep), `/test-app` (automated browser testing), `/test-functional`, `/test-api`, `/test-accessibility`, `/test-performance`, `/test-security`, `/test-regression`, and `/test-persona-*` (per-persona testing). #### CI/CD @@ -307,79 +297,6 @@ composer phpcs && composer phpmd # Code quality gates --- -## Double Dutch (RAD Workflow) - -A two-shift Rapid Application Development cycle that pairs Claude (daytime, fast, cloud) with Qwen (overnight, slow, local/free). - -``` - 09:00 17:00 09:00 - | | | - ┌────────┴────────────────────────┴───────────────────────┴── - │ REVIEW ◄── DAY SHIFT (Claude) ──► HANDOFF NIGHT SHIFT (Qwen) - │ Qwen's Specs, architecture, Prepare PHPCS fixes, - │ output complex logic, task files boilerplate, - │ code review, PRs bulk refactors, - │ test generation - └──────────────────────────────────────────────────────────── -``` - -### Daily Cycle - -**Morning (09:00)** — Review Qwen's overnight output: code changes, test results, PHPCS fixes. Accept or reject changes, note issues for the day's work. - -**Day (09:00-17:00)** — Spec work with Claude: clarify requirements, write OpenSpec artifacts (`/opsx-ff`, `/opsx-new` → `/opsx-continue`), design architecture, solve hard problems, review PRs. Claude handles the thinking. - -**Evening (17:00)** — Hand off to Qwen: prepare self-contained task files (e.g., `qwen-phpcs-task.md`) with specific, mechanical work. Start Qwen batch and leave overnight. - -### Division of Labor - -| | Claude (Day) | Qwen (Night) | -|---|---|---| -| **Strengths** | Reasoning, architecture, specs, multi-file design | Mechanical fixes, repetitive changes, bulk ops | -| **Speed** | ~3s/response (cloud API) | ~2min/response (local 14b on 8GB VRAM) | -| **Cost** | API tokens (Max plan) | Free (local GPU) | -| **Best for** | Complex logic, code review, client deliverables | PHPCS fixes, boilerplate, test scaffolding | - -### Task File Format - -Qwen works best with narrow, explicit task files. Example: - -```markdown -# Task: Fix PHPCS Named Parameter Errors - -Working directory: `/path/to/app` - -## Files to fix -1. `lib/Controller/FooController.php` (3 errors) -2. `lib/Service/BarService.php` (1 error) - -## How to fix -Find function calls without named parameters. Look up the method signature -and add the parameter name: -- BEFORE: `$this->setName('value')` where signature is `setName(string $name)` -- AFTER: `$this->setName(name: 'value')` - -## Verification -Run: `./vendor/bin/phpcs --standard=phpcs.xml ` -Expected: 0 errors -``` - -### Running Qwen Overnight - -```bash -# Terminal 1 — start Qwen with Claude Code CLI -ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama \ - claude --model qwen3:14b - -# Then paste or reference the task file -``` - -> **Requires `qwen3:14b` or larger** for tool calling (file editing, shell commands). See [Ollama setup](#ollama--qwen-coder-optional-local-llm) for details. - -> **Known limitation:** Tool calling via CLI is unreliable with local models when system prompts are large. For now, Qwen works best on tasks where it can output code changes as text that you review and apply manually in the morning. - ---- - ## Workstation Setup (Windows) Our development environment runs on **Windows + WSL2 + Docker Desktop + VS Code**. Follow these steps on a fresh Windows machine. @@ -532,55 +449,6 @@ chmod +x ~/.claude/hooks/block-write-commands.sh --- -## Local Configuration - -Claude Code uses three settings files that work together. Understanding the difference prevents confusion: - -| File | Scope | Committed? | Purpose | -|------|-------|------------|---------| -| `~/.claude/settings.json` | Machine-wide, all projects | No — installed per developer | Global read-only policy and safety hooks. Installed from [`global-settings/`](../../global-settings/) in step 7 above. | -| `.claude/settings.json` | Project-wide, all developers | **Yes** | Shared team permissions — MCP server approvals, `enableAllProjectMcpServers`. Do not edit locally. | -| `.claude/settings.local.json` | Project, per developer | No — gitignored | Your personal tool approvals on top of the shared settings. Auto-generated by Claude Code. | - -### settings.local.json - -This file is **auto-generated** by Claude Code the first time you approve a tool permission in a session — no manual setup needed. It stores your personal allow/deny rules on top of the shared project settings. - -Optionally, bootstrap it upfront with common permissions to avoid approval prompts during normal development: - -```json -{ - "$schema": "https://json.schemastore.org/claude-code-settings.json", - "permissions": { - "allow": [ - "Bash(docker:*)", - "Bash(docker-compose:*)", - "Bash(composer:*)", - "Bash(git:*)", - "Bash(npm:*)", - "Bash(php:*)", - "Bash(curl:*)", - "Bash(bash:*)", - "Bash(ls:*)", - "Bash(mkdir:*)", - "Bash(cp:*)", - "Bash(mv:*)", - "Bash(rm:*)", - "WebFetch(domain:localhost)", - "WebFetch(domain:github.com)", - "WebFetch(domain:raw.githubusercontent.com)" - ], - "additionalDirectories": [ - "/tmp" - ] - } -} -``` - -Save this as `.claude/settings.local.json` in your project root. It is gitignored and will never be committed. - ---- - ## Prerequisites (WSL) Run these commands inside WSL (the VS Code terminal after connecting to WSL). @@ -659,226 +527,15 @@ npm install -g @fission-ai/openspec - [npm](https://www.npmjs.com/package/@fission-ai/openspec) — Package info - [Workflow docs](./workflow.md) — Our workspace-specific workflow -**Local docs:** - -| File | Content | -|------|---------| -| [getting-started.md](./getting-started.md) | First-time setup and orientation | -| [global-claude-settings.md](./global-claude-settings.md) | User-level Claude permissions, hooks, and safety settings | -| [workflow.md](./workflow.md) | Full spec-driven development workflow | -| [writing-specs.md](./writing-specs.md) | How to write good specs | -| [commands.md](./commands.md) | CLI command reference | -| [walkthrough.md](./walkthrough.md) | Step-by-step example of a full cycle | -| [testing.md](./testing.md) | All testing commands and skills — when to use each, recommended workflows | -| [app-lifecycle.md](./app-lifecycle.md) | Creating and managing Nextcloud apps — design, bootstrap, onboarding, config, and drift detection | -| [parallel-agents.md](./parallel-agents.md) | How parallel agents work, subscription cap implications, responsible use | -| [frontend-standards.md](./frontend-standards.md) | Frontend standards enforced across all Conduction apps | -| [docker.md](./docker.md) | Docker environment setup, profiles, and common operations | -| [exapp-sidecar-status.md](./exapp-sidecar-status.md) | Status report for ExApp sidecar wrapper projects | - ### Claude Code CLI (optional, for terminal use) ```bash npm install -g @anthropic-ai/claude-code ``` -### Ollama + Qwen Coder (optional, local LLM) - -Claude Code can run with a **local LLM** instead of the Anthropic API, using [Ollama](https://ollama.com/) and Alibaba's [Qwen3-Coder](https://ollama.com/library/qwen3-coder) model. Ollama v0.14.0+ includes built-in Anthropic Messages API compatibility, so Claude Code connects to it without any proxy or adapter. - -#### When to use local vs Claude API - -| Use case | Recommendation | -|----------|---------------| -| **Data sovereignty** — code or data must stay in the EU / on-premise | Local Qwen | -| **Security-sensitive work** — credentials, private APIs, client data | Local Qwen | -| **Offline / air-gapped environments** | Local Qwen | -| **Simple tasks** — formatting, renaming, small refactors, boilerplate | Local Qwen | -| **Cost reduction** — high-volume, repetitive prompts | Local Qwen | -| **Complex reasoning** — architecture, debugging, multi-file changes | Claude API | -| **Large context** — analyzing entire codebases or long specs | Claude API | -| **Quality-critical** — production code, specs, client deliverables | Claude API | - -> **Rule of thumb:** Use Qwen locally for work that is private, simple, or high-volume. Use Claude API when quality and reasoning depth matter most. You can switch between them freely — they use the same Claude Code interface, tools, and commands. - -#### Step 1: Install Ollama - -Install Ollama **natively on WSL** (not in Docker — native gives better GPU passthrough and performance): - -```bash -curl -fsSL https://ollama.com/install.sh | sh -``` - -Ollama runs as a background service automatically. Verify it's running: - -```bash -ollama --version # Should show 0.14.0+ -``` - -#### Step 2: Pull a Qwen model - -Choose the right model for your GPU VRAM: - -| Model | Download | Size | Min VRAM | Speed (RTX 3070) | Tool calling? | -|-------|----------|------|----------|-------------------|---------------| -| `qwen3:8b` | `ollama pull qwen3:8b` | 5.2 GB | 8 GB (fits 100%) | ~12s | **No** (chat only) | -| **`qwen3:14b`** | `ollama pull qwen3:14b` | **9.3 GB** | 12 GB | **~2min** (spills to CPU on 8GB) | **Yes** | -| `qwen3-coder` | `ollama pull qwen3-coder` | 18 GB | 24 GB | ~6min (mostly CPU on 8GB) | Yes | - -**Recommended: `qwen3:14b`** — the smallest model that supports **tool calling** (reading files, editing code, running commands). On 8GB VRAM it's slow (~2min/response) but works as a batch/overnight agent. On 12GB+ VRAM it runs at interactive speed (~15s). - -```bash -ollama pull qwen3:14b -``` - -> **Why not `qwen3:8b`?** It's faster but can only chat — it cannot use tools (file access, shell commands, code editing). The model is too small to reliably produce the structured function-call format that CLI agents require. It will show its thinking but won't execute anything. -> -> **Why not `qwen3-coder`?** It's the most capable (30B params) but requires 24GB+ VRAM. On an 8GB GPU it runs ~68% on CPU and takes ~6 minutes per response. Only use it with a workstation GPU (RTX 4090, A6000, etc). - -Check your available memory: - -```bash -free -h # Look at the "available" column -nvidia-smi # Check GPU VRAM -``` - -If you don't have enough system memory, increase the WSL allocation. On **Windows**, edit (or create) `%USERPROFILE%\.wslconfig`: - -```ini -[wsl2] -memory=24GB -``` - -Then restart WSL from PowerShell: - -```powershell -wsl --shutdown -``` - -Reopen your Ubuntu terminal — the new memory limit is now active. - -#### Step 3: Run Claude Code with Qwen - -Open a **new terminal** and run (replace model name with whichever you pulled): - -```bash -ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3:14b -``` - -This opens the full interactive Claude Code CLI — same interface, same tools, same commands — but powered by Qwen running locally on your machine. No data leaves your workstation. - -For a quick **one-shot prompt** (no interactive session): - -```bash -ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3:14b --print "explain this function" -``` - -#### Running local and API side by side - -The env vars are **scoped to that single terminal window only**. This means you can run both simultaneously: - -- **Terminal 1** — Qwen locally (free, private, slower) doing a long-running task like a bulk refactor or code review -- **Terminal 2 / VS Code** — Claude API (fast, powerful) for your main interactive development work - -This is the recommended workflow: **sidecar the free local model** for background tasks while you continue your normal work with Claude API at full speed. The local session won't affect your API session in any way — they're completely independent. - -``` -┌─────────────────────────┐ ┌─────────────────────────┐ -│ Terminal 1 (Qwen) │ │ VS Code / Terminal 2 │ -│ │ │ │ -│ Free, local, private │ │ Claude API (Opus) │ -│ Running: bulk refactor │ │ Fast interactive dev │ -│ Speed: ~15 tok/s │ │ Speed: ~50-80 tok/s │ -│ Cost: $0 │ │ Cost: normal API usage │ -│ │ │ │ -│ ► Background task │ │ ► Your main work │ -└─────────────────────────┘ └─────────────────────────┘ -``` - -To go back to Claude API in any terminal, simply open a new terminal as normal — no env vars to unset. - -#### Performance expectations - -Benchmarked on an RTX 3070 (8GB VRAM) with 24GB WSL memory: - -| Model | Simple task | Tool calling | Fits in 8GB VRAM | Usable interactively? | -|-------|-------------|-------------|------------------|----------------------| -| qwen3:8b | ~12 seconds | No | Yes (100% GPU) | Chat only | -| **qwen3:14b** | **~2 minutes** | **Yes** | No (spills to CPU) | **Batch/overnight** | -| qwen3-coder (30B) | ~6 minutes | Yes | No (68% CPU) | No | -| Claude API (Opus) | ~3 seconds | Yes | N/A (cloud) | Yes | - -**Be honest about the trade-off:** The recommended local model (`qwen3:14b`) is **~40x slower** than Claude API on 8GB VRAM hardware. It's not viable for interactive coding — but it **does support tool calling**, which makes it a real coding agent that can read files, edit code, and run commands. Use it for batch jobs you kick off and walk away from (e.g., overnight PHPCS fixes, bulk refactors, code reviews). - -**Where local shines:** -- **Nightly / batch jobs** — automated code reviews, linting suggestions, documentation generation, bulk refactors where you kick it off and walk away -- **Cost** — completely free, no API usage, no token limits, run it as much as you want -- **Privacy** — nothing leaves your machine, ideal for client code under NDA or government data -- **Simple interactive tasks** — quick renames, formatting, boilerplate generation where the speed difference barely matters - -#### Alternative: Qwen Code CLI (native Qwen experience) - -Qwen has its own dedicated CLI tool (v0.11+) with an interface similar to Claude Code, optimized for Qwen models: - -```bash -sudo npm install -g @qwen-code/qwen-code@latest -``` - -**Configure it to use your local Ollama** by editing `~/.qwen/settings.json`: - -```json -{ - "modelProviders": { - "openai": [ - { - "id": "qwen3:14b", - "name": "Qwen3 14B (Local Ollama)", - "envKey": "OLLAMA_API_KEY", - "baseUrl": "http://localhost:11434/v1" - } - ] - }, - "security": { - "auth": { - "selectedType": "openai" - } - }, - "env": { - "OLLAMA_API_KEY": "ollama" - }, - "model": { - "name": "qwen3:14b" - } -} -``` - -> Adjust the model `id` and `name` if you pulled a different model (e.g., `qwen3:8b` for chat-only, or `qwen3-coder` on 24GB+ VRAM). - -The key parts: `security.auth.selectedType: "openai"` bypasses the OAuth prompt, `modelProviders.openai` tells Qwen Code where your local Ollama lives, and `env.OLLAMA_API_KEY` provides the dummy API key that Ollama ignores but Qwen Code requires. - -**Launch it:** - -```bash -cd /path/to/your-project -qwen -``` - -**Tool calling requires `qwen3:14b` or larger.** The `qwen3:8b` model runs in chat-only mode — it can reason and answer questions but cannot use tools (no file access, no shell commands, no code editing). The `qwen3:14b` model supports structured tool calling and works as a full coding agent, though it's slow on 8GB VRAM (~2min/response). On 12GB+ VRAM it runs at interactive speed. - -**Sharing context with Claude Code:** Qwen Code reads `QWEN.md` instead of `CLAUDE.md`, but supports `@path/to/file.md` imports. You can create a `QWEN.md` in the workspace root that imports the Claude configuration: - -```markdown -@CLAUDE.md -``` - -This gives Qwen Code the same project context and coding standards as Claude Code. However, Qwen Code does **not** support Claude's `/opsx-*` slash commands or skills — those are Claude Code-specific. For the full OpenSpec workflow, use Claude Code (with either API or local Qwen backend). +### Ollama + Qwen (optional, local LLM) -#### Tips - -- **Don't close the terminal** where Ollama is running — if Ollama stops, your Claude Code session loses its backend -- **One model at a time** — Ollama loads/unloads models automatically, but running two large models simultaneously will OOM -- **VS Code extension** still uses Claude API — the env var trick only works for the CLI. This is fine: use VS Code for complex work (Claude API) and terminal for quick local tasks (Qwen) -- **All Claude Code features work** — tools, file editing, git, commands, skills, browser MCP — because the interface is the same, only the model backend changes +For running Claude Code with a local Qwen model (privacy, cost reduction, offline use), see **[local-llm.md](./local-llm.md)**. That guide covers Ollama installation, model selection, performance benchmarks, the Qwen Code CLI, and the Double Dutch RAD workflow for pairing Claude (day shift) with Qwen (overnight batch jobs). ### Summary Checklist @@ -893,263 +550,154 @@ openspec --version # 1.x npx playwright --version # 1.x ``` ---- +Your machine is ready. See [Getting Started](./getting-started.md) to complete your first spec-driven change. -## Playwright MCP Browser Setup - -The workspace uses **7 independent Playwright browser sessions** for parallel testing. +--- -### Browser Pool +## Local Configuration -| Server | Mode | Purpose | -|--------|------|---------| -| `browser-1` | Headless | Main agent (default) | -| `browser-2` | Headless | Sub-agent / parallel | -| `browser-3` | Headless | Sub-agent / parallel | -| `browser-4` | Headless | Sub-agent / parallel | -| `browser-5` | Headless | Sub-agent / parallel | -| `browser-6` | **Headed** | User observation (visible window) | -| `browser-7` | Headless | Sub-agent / parallel | +Claude Code uses three settings files that work together. Understanding the difference prevents confusion: -### VS Code Extension Setup +| File | Scope | Committed? | Purpose | +|------|-------|------------|---------| +| `~/.claude/settings.json` | Machine-wide, all projects | No — installed per developer | Global read-only policy and safety hooks. Installed from [`global-settings/`](../../global-settings/) in step 7 above. | +| `.claude/settings.json` | Project-wide, all developers | **Yes** | Shared team permissions — MCP server approvals, `enableAllProjectMcpServers`. Do not edit locally. | +| `.claude/settings.local.json` | Project, per developer | No — gitignored | Your personal tool approvals on top of the shared settings. Auto-generated by Claude Code. | -The VS Code extension loads MCP servers from `.mcp.json` in the **project root**. The file defines 7 browser instances. Browsers 1–5 and 7 are headless: +### settings.local.json -```json -{ - "mcpServers": { - "browser-1": { "command": "npx", "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] }, - "browser-2": { "command": "npx", "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] }, - "browser-3": { "command": "npx", "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] }, - "browser-4": { "command": "npx", "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] }, - "browser-5": { "command": "npx", "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] }, - "browser-6": { "command": "npx", "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--isolated"] }, - "browser-7": { "command": "npx", "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] } - } -} -``` +This file is **auto-generated** by Claude Code the first time you approve a tool permission in a session — no manual setup needed. It stores your personal allow/deny rules on top of the shared project settings. -**Headed browser** (browser-6 — for watching tests live): +Optionally, bootstrap it upfront with common permissions to avoid approval prompts during normal development: ```json { - "browser-6": { - "command": "npx", - "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--isolated"] + "$schema": "https://json.schemastore.org/claude-code-settings.json", + "permissions": { + "allow": [ + "Bash(docker:*)", + "Bash(docker-compose:*)", + "Bash(composer:*)", + "Bash(git:*)", + "Bash(npm:*)", + "Bash(php:*)", + "Bash(curl:*)", + "Bash(bash:*)", + "Bash(ls:*)", + "Bash(mkdir:*)", + "Bash(cp:*)", + "Bash(mv:*)", + "Bash(rm:*)", + "WebFetch(domain:localhost)", + "WebFetch(domain:github.com)", + "WebFetch(domain:raw.githubusercontent.com)" + ], + "additionalDirectories": [ + "/tmp" + ] } } ``` -> `browser-6` omits `--headless` so the browser window is visible when you want to watch. - -The shared `settings.json` has two pre-approval entries: - -- **`"enableAllProjectMcpServers": true`** — auto-approves all servers from `.mcp.json` without prompting on each reload. -- **All `mcp__browser-*` tool calls** — pre-approved for all 7 browsers so that parallel sub-agents (used by `/test-app` Full mode and `/test-counsel`) can use their assigned browser without needing an interactive permission prompt. Without this, background agents are silently denied and no testing occurs. - -Then **reload the VS Code window**: `Ctrl+Shift+P` → type `reload window` → Enter. - -### Verification +Save this as `.claude/settings.local.json` in your project root. It is gitignored and will never be committed. -After reload, open the MCP servers panel to verify all 7 browsers show **Connected**. You can do this two ways: -- Type `/MCP servers` in the Claude Code chat input -- Or `Ctrl+Shift+P` → search **"MCP servers"** +### CLAUDE.local.md -![MCP servers panel showing all 7 browser instances connected](./img/mcp-servers-connected.png) +Contains environment-specific credentials and API tokens (passwords, keys, endpoints). **Never commit this file.** -If any server shows an error, check the output panel: `Ctrl+Shift+P` → **"Output: Focus on Output"** → select **"Claude VSCode"** from the dropdown. +Copy the [example template](./examples/CLAUDE.local.md.example) into your project and fill in your values: -### CLI Alternative (terminal only) +```bash +cp docs/claude/examples/CLAUDE.local.md.example .claude/CLAUDE.local.md +# Edit with your credentials +``` -For the Claude Code CLI (`claude` terminal command, not VS Code), you can start servers as HTTP endpoints on fixed ports and reference them via URL: +--- -```bash -# Start headless browsers -for port in 9221 9222 9223 9224 9225 9227; do - npx -y @playwright/mcp@latest --headless --isolated --port $port & -done +## Playwright MCP Browser Setup -# Start headed browser -npx -y @playwright/mcp@latest --isolated --port 9226 & -``` +The workspace uses 7 independent Playwright browser sessions for parallel testing. Copy the [example .mcp.json](./examples/.mcp.json.example) to your project root as `.mcp.json`, or see the [playwright-setup.md](./playwright-setup.md) guide for the full configuration, verification steps, CLI alternatives, and usage rules. -> This is **not needed for VS Code** — the extension manages server processes automatically via `.mcp.json`. Only use this approach if you're running `claude` from the terminal without VS Code. +**Quick summary:** -### Usage Rules +| Server | Mode | Purpose | +|--------|------|---------| +| `browser-1` | Headless | Main agent (default) | +| `browser-2`–`browser-5`, `browser-7` | Headless | Sub-agent / parallel | +| `browser-6` | **Headed** | User observation (visible window) | -1. **Default**: Use `browser-1` for normal work -2. **Parallel agents**: Assign sub-agents `browser-2` through `browser-5` and `browser-7` -3. **User watching**: Switch to `browser-6` when the user wants to observe -4. **Fallback**: If a browser errors, try the next numbered browser -5. **Keep `browser-6` reserved**: Only for explicit user observation +**Usage rules:** Use `browser-1` for normal work. Assign `browser-2`–`browser-5` and `browser-7` to parallel sub-agents. Keep `browser-6` reserved for user observation only. --- ## Directory Structure -``` -/ -├── .mcp.json # Playwright browser MCP servers +### This repository (`.github`) + +This repo contains **documentation**, **global settings**, and **project templates** — not skills, personas, or scripts. Those live in each project's own `.claude/` directory (see below). + +``` +.github/ +├── docs/ +│ └── claude/ # Developer guides (this documentation) +│ ├── README.md # This file — overview and setup +│ ├── getting-started.md # First-change walkthrough +│ ├── workflow.md # Spec-driven architecture reference +│ ├── commands.md # Full command reference +│ ├── testing.md # Testing commands and skills +│ ├── writing-specs.md # How to write specs +│ ├── writing-skills.md # How to create skills +│ ├── writing-adrs.md # How to write ADRs +│ ├── writing-docs.md # Documentation standards +│ ├── app-lifecycle.md # Nextcloud app lifecycle +│ ├── frontend-standards.md # Frontend coding standards +│ ├── parallel-agents.md # Parallel agents and cap usage +│ ├── local-llm.md # Ollama + Qwen + Double Dutch +│ ├── playwright-setup.md # Playwright browser configuration +│ ├── walkthrough.md # End-to-end worked example +│ ├── docker.md # Docker environment +│ ├── global-claude-settings.md # Global settings reference +│ └── examples/ # Project-level template files +│ ├── CLAUDE.local.md.example # Template for project .claude/CLAUDE.local.md +│ └── .mcp.json.example # Template for project root .mcp.json (7 browsers) │ -└── .claude/ # Claude Code configuration (this repository) - ├── CLAUDE.md # Workflow rules, project context, Docker env - ├── .mcp.json # Template — copy to project root on setup - ├── settings.json # [COMMITTED] Shared project permissions (MCP approvals, enableAllProjectMcpServers) - ├── settings.local.json # [GITIGNORED] Your personal tool permissions — auto-generated by Claude Code - │ - ├── docs/ # This documentation - │ - ├── global-settings/ # Source files for ~/.claude/ — install once per machine (see step 7) - │ ├── settings.json # → installed as ~/.claude/settings.json (global read-only policy) - │ ├── block-write-commands.sh # → installed as ~/.claude/hooks/block-write-commands.sh - │ ├── check-settings-version.sh # → installed as ~/.claude/hooks/check-settings-version.sh - │ └── VERSION # → version tracking for update checks - │ - ├── skills/ # See docs/commands.md for full reference - │ ├── app-*/ # App lifecycle (create, design, apply, verify, explore) - │ ├── ecosystem-*/ # Ecosystem research (investigate, propose-app) - │ ├── opsx-*/ # OpenSpec workflow (new, ff, apply, verify, archive, …) - │ ├── swc-*/ # Softwarecatalogus (test, update) - │ ├── team-*/ # Scrum team agents (architect, backend, frontend, …) - │ ├── tender-*/ # Tender intelligence (scan, status, gap-report) - │ ├── test-*/ # Testing (counsel, app, personas, scenarios, …) - │ ├── clean-env/ # Reset Docker environment - │ ├── create-pr/ # Create a PR on GitHub - │ ├── feature-counsel/ # Multi-persona spec analysis - │ ├── intelligence-update/ # Sync external data sources - │ ├── sync-docs/ # Sync documentation to current state - │ └── verify-global-settings-version/ - │ - ├── personas/ # 8 Dutch government user personas - │ ├── henk-bakker.md - │ ├── fatima-el-amrani.md - │ ├── sem-de-jong.md - │ ├── noor-yilmaz.md - │ ├── annemarie-de-vries.md - │ ├── mark-visser.md - │ ├── priya-ganpat.md - │ └── janwillem-van-der-berg.md - │ - └── usage-tracker/ # Claude token usage monitoring +├── global-settings/ # Mandatory user-level settings for ~/.claude/ +│ ├── settings.json # → ~/.claude/settings.json (global read-only policy) +│ ├── block-write-commands.sh # → ~/.claude/hooks/block-write-commands.sh +│ ├── check-settings-version.sh # → ~/.claude/hooks/check-settings-version.sh +│ └── VERSION # Version tracking for update checks +│ +└── usage-tracker/ # Claude token usage monitoring tool ``` ---- +### Typical project workspace -## Commands Reference - -Commands are invoked as `/namespace-command [args]` in Claude Code. - -### OpenSpec Workflow (`/opsx-*`) - -**Core lifecycle:** - -| Command | Description | -|---------|-------------| -| `/opsx-new ` | Start a new change | -| `/opsx-ff` | Fast-forward: create all artifacts (proposal, specs, design, tasks) | -| `/opsx-continue` | Create the next artifact incrementally | -| `/opsx-apply` | Implement tasks from a change | -| `/opsx-verify` | Check implementation against specs (includes test coverage + documentation checks) | -| `/opsx-archive` | Archive a completed change, merge delta specs, update feature docs | -| `/opsx-bulk-archive` | Archive multiple changes at once | -| `/opsx-sync` | Sync delta specs to main specs | -| `/opsx-plan-to-issues` | Convert tasks.md to GitHub Issues with tracking epic | -| `/opsx-pipeline` | Process multiple changes in parallel — full lifecycle per change, up to 5 concurrent agents | - -**Discovery & design:** - -| Command | Description | -|---------|-------------| -| `/opsx-explore` | Read-only investigation mode | -| `/opsx-onboard` | Guided walkthrough for new team members | - -**App lifecycle:** - -| Command | Description | -|---------|-------------| -| `/app-design [app-name]` | Full upfront design — architecture research, competitor analysis, feature matrix, ASCII wireframes, OpenSpec setup. Run **before** `/app-create` for new apps. | -| `/app-create [app-id]` | Bootstrap a new app from template or onboard an existing repo — creates `openspec/`, scaffolds files, sets up GitHub repo | -| `/app-explore [app-id]` | Think through and evolve app configuration — updates `openspec/app-config.json`, feature specs, and ADRs | -| `/app-apply [app-id]` | Apply `openspec/app-config.json` changes to the actual app files (info.xml, CI workflows, PHP namespaces, etc.) | -| `/app-verify [app-id]` | Read-only audit — reports drift between `openspec/app-config.json` and the actual files (CRITICAL / WARNING / INFO) | - -**Team role agents:** - -| Command | Role | -|---------|------| -| `/team-architect` | Architecture review (API, data models, cross-app) | -| `/team-backend` | Backend implementation (PHP, entities, services) | -| `/team-frontend` | Frontend implementation (Vue, state, UX) | -| `/team-po` | Product Owner (business value, acceptance criteria) | -| `/team-qa` | QA Engineer (test coverage, edge cases) | -| `/team-reviewer` | Code review (standards, conventions) | -| `/team-sm` | Scrum Master (progress, blockers) | - -**Testing agents:** - -| Command | Focus | -|---------|-------| -| `/test-functional` | Feature correctness via browser | -| `/test-api` | REST API + NLGov Design Rules compliance | -| `/test-accessibility` | WCAG 2.1 AA compliance | -| `/test-performance` | Load times, API response, network | -| `/test-security` | OWASP Top 10, BIO2, multi-tenancy | -| `/test-regression` | Cross-app regression testing | -| `/test-persona-*` | 8 persona-specific testing agents | - -**Typical workflow:** +Each Conduction project (Nextcloud apps, WordPress sites, etc.) has its own `.claude/` directory with skills, personas, and configuration. The [Hydra](https://github.com/ConductionNL/hydra) repo also maintains its own set of skills and personas for CI/CD agents. ``` -/opsx-new add-search-filters # Define the change -/opsx-ff # Generate all spec artifacts -/opsx-plan-to-issues # Create GitHub issues (optional) -/opsx-apply # Implement the tasks -/opsx-verify # Verify against specs -/opsx-archive # Archive when done +/ +├── .mcp.json # Playwright browser MCP servers (see docs/claude/examples/.mcp.json.example) +│ +└── .claude/ + ├── CLAUDE.md # Workflow rules, project context + ├── CLAUDE.local.md # [GITIGNORED] Your credentials + ├── CLAUDE.local.md.example # Template — copy from global-settings/ or customize per project + ├── settings.json # [COMMITTED] Shared team permissions + ├── settings.local.json # [GITIGNORED] Personal tool permissions (auto-generated) + │ + ├── skills/ # Project-specific skills (see commands.md) + ├── personas/ # User personas for testing + ├── scripts/ # Shared shell utilities + └── docs/ # Project-specific documentation ``` -### Softwarecatalogus (`/swc-*`) - -| Command | Description | -|---------|-------------| -| `/swc-test [mode]` | Run tests — `api`, `browser`, `all`, or `personas` | -| `/swc-update` | Sync GitHub issues and update test infrastructure | - ---- - -## Skills Reference - -Skills are invoked as `/skill-name [args]`. - -### General - -| Skill | Description | -|-------|-------------| -| `/clean-env` | Full Docker environment reset (stop, remove volumes, restart, install apps) | -| `/feature-counsel` | Analyze specs from 8 persona perspectives, suggest missing features | -| `/test-app [appname]` | Automated browser testing — single agent or 6 parallel perspectives | -| `/test-counsel` | Execute tests from 8 persona perspectives using browser + API | - -> These commands spawn multiple agents in parallel and consume your Claude usage cap faster than normal. See [parallel-agents.md](./parallel-agents.md) for cap impact, guidelines, and tips to reduce token usage. - -### Tender & Ecosystem Intelligence - -| Skill | Description | -|-------|-------------| -| `/tender-scan` | Scrape TenderNed for new tenders, import to SQLite, classify by software category using local Qwen | -| `/tender-status` | Dashboard: tenders by source, category, status, gaps, and top integration systems | -| `/tender-gap-report` | Generate gap analysis report — categories with tender demand but no Conduction product | -| `/ecosystem-investigate ` | Deep-dive competitor research for a software category (GitHub, G2, Capterra, AlternativeTo) | -| `/ecosystem-propose-app ` | Generate a structured app proposal from tender requirements and competitor research | -| `/intelligence-update [source]` | Sync external data sources into `intelligence.db` (due sources by default, or specify `all`/``) | - -> Requires `concurrentie-analyse/intelligence.db` to exist. `/tender-scan` requires a local Qwen model via Ollama (`http://localhost:11434`). - --- ## Personas -Eight Dutch government user personas in `personas/` represent the full spectrum of public sector users: +Each project defines its own user personas in `personas/`. Personas drive multi-perspective analysis via `/feature-counsel` and testing via `/test-counsel`. + +**Nextcloud workspace** — 8 Dutch government user personas representing public sector users: | Persona | Age | Role | Perspective | |---------|-----|------|-------------| @@ -1162,36 +710,7 @@ Eight Dutch government user personas in `personas/` represent the full spectrum | Priya Ganpat | 34 | ZZP developer | API quality, OpenAPI, DX | | Jan-Willem van der Berg | 55 | Small business owner | Plain language, findability | -Used by `/feature-counsel` and `/test-counsel` for multi-perspective analysis. - ---- - -## Scripts - -Shell scripts in `scripts/` are shared utilities used by skills and developers. - -| Script | Description | Usage | -|--------|-------------|-------| -| `clean-env.sh` | Full Docker environment reset — stops containers, removes volumes, restarts, installs core apps | `bash scripts/clean-env.sh` or `/clean-env` | - ---- - -## Usage Tracker - -Monitor your Claude token usage in real-time to avoid hitting subscription limits mid-session. The tracker reads Claude Code's JSONL session files directly — no extra configuration needed. - -```bash -# Install -bash usage-tracker/install.sh - -# Quick status (all models) -python3 usage-tracker/claude-usage-tracker.py --status-bar --all-models - -# Live monitoring (refreshes every 5 min) -python3 usage-tracker/claude-usage-tracker.py --monitor --all-models -``` - -See [usage-tracker/README.md](../../usage-tracker/README.md) for full documentation, VS Code task integration, and limit configuration. +Other projects define personas relevant to their domain (e.g., the [wordpress-docker](https://github.com/ConductionNL/wordpress-docker) project uses shopper and admin personas). The [Hydra](https://github.com/ConductionNL/hydra) CI/CD pipeline also maintains its own copy of personas for automated testing. --- @@ -1233,17 +752,70 @@ ADRs are referenced in each app's `openspec/config.yaml` under the `rules:` sect ### Adding a New ADR +See [writing-adrs.md](./writing-adrs.md) for the full guide on structure, format, and when to create one. + +Quick start: + 1. Create `openspec/architecture/adr-NNN-title.md` (company-wide) or `{app}/openspec/architecture/adr-NNN-title.md` (app-specific) following the template in `openspec/architecture/README.md` 2. Add reference rules to `config.yaml` for the relevant artifact types 3. Update this table --- +## Usage Tracker + +Monitor your Claude token usage in real-time to avoid hitting subscription limits mid-session. The tracker reads Claude Code's JSONL session files directly — no extra configuration needed. + +```bash +# Install +bash usage-tracker/install.sh + +# Quick status (all models) +python3 usage-tracker/claude-usage-tracker.py --status-bar --all-models + +# Live monitoring (refreshes every 5 min) +python3 usage-tracker/claude-usage-tracker.py --monitor --all-models +``` + +See [usage-tracker/README.md](../../usage-tracker/README.md) for full documentation, VS Code task integration, and limit configuration. + +--- + +## Related: Hydra CI/CD Pipeline + +[Hydra](https://github.com/ConductionNL/hydra) is Conduction's agentic CI/CD platform that runs the same spec-driven workflow autonomously in Docker containers. It transforms OpenSpec change proposals into validated, security-scanned code on feature branches — with final human approval before merging. + +Hydra maintains its own skills, personas, and OpenSpec workflows in its repository, running them through three specialized agent containers: + +| Agent | Role | Permissions | +|-------|------|-------------| +| **Al Gorithm** (Builder) | Reads OpenSpec change, implements code, opens draft PR | Full: Read, Write, Edit, Bash | +| **Juan Claude van Damme** (Reviewer) | Code review for correctness, style, architecture | Read-only | +| **Clyde Barcode** (Security) | SAST analysis, secret detection, security hardening | Read-only | + +The workflow and commands documented in this guide apply to both interactive development and Hydra's automated agents. See the [Hydra repository](https://github.com/ConductionNL/hydra) for container architecture, agent configuration, deployment models, and operational guides. + +--- + +## Scripts + +Each project may include shell scripts in its `.claude/scripts/` or `scripts/` directory, used by skills and developers. Common examples: + +| Script | Description | Usage | +|--------|-------------|-------| +| `clean-env.sh` | Full Docker environment reset — stops containers, removes volumes, restarts, installs core apps | `bash scripts/clean-env.sh` or `/clean-env` | + +--- + ## Contributing ### Adding a Skill -1. Create `skills//SKILL.md` +Skills are added to each project's `.claude/skills/` directory. See [writing-skills.md](./writing-skills.md) for the full guide on folder layout, SKILL.md format, naming conventions, maturity levels, and the extraction threshold rule. + +Quick start: + +1. Create `skills//SKILL.md` in your project's `.claude/` directory 2. Use frontmatter: ```yaml --- @@ -1255,8 +827,10 @@ ADRs are referenced in each app's `openspec/config.yaml` under the `rules:` sect ### Adding a Persona +Personas are added to each project's `personas/` directory. + 1. Create `personas/.md` -2. Follow the existing format (see `henk-bakker.md`) +2. Follow the existing format (see any existing persona file in the project) 3. Update skills that reference the persona list ### PR Process @@ -1307,17 +881,6 @@ Ensure `.claude/` is at the workspace root and Claude Code is started from that gh auth login ``` -### Ollama model won't load (out of memory) - -Increase WSL memory in `%USERPROFILE%\.wslconfig`: - -```ini -[wsl2] -memory=24GB -``` - -Then restart WSL from PowerShell: `wsl --shutdown` - ### Docker environment not starting ```bash diff --git a/docs/claude/app-lifecycle.md b/docs/claude/app-lifecycle.md index ff02991..b890bc5 100644 --- a/docs/claude/app-lifecycle.md +++ b/docs/claude/app-lifecycle.md @@ -379,7 +379,7 @@ After completing the steps above, confirm: App-specific ADRs live in `openspec/architecture/` and document why the app is built the way it is. They are created and explored during `/app-explore` sessions. -> **Company-wide ADRs** (ADR-001 through ADR-015) live in `apps-extra/.claude/openspec/architecture/` and apply to all Conduction apps. Only create an app-specific ADR when the decision is unique to that app. +> **Company-wide ADRs** (ADR-001 through ADR-015) live in `hydra/openspec/architecture/` and apply to all Conduction apps. Only create an app-specific ADR when the decision is unique to that app. Good candidates for app-specific ADRs: - Data storage approach (OpenRegister vs own tables) diff --git a/docs/claude/commands.md b/docs/claude/commands.md index 387549d..989eeea 100644 --- a/docs/claude/commands.md +++ b/docs/claude/commands.md @@ -131,8 +131,6 @@ OpenSpec's built-in implementation command. Reads `tasks.md` and works through t /opsx-apply ``` -**Note:** `/opsx-ralph-start` (not yet built) is planned as a dedicated implementation loop with minimal context loading and deeper GitHub Issues integration. For now, use this command — it already supports `plan.json` and GitHub Issues when a `plan.json` exists. - **Model:** Checked at run time — stops if on Haiku. **Sonnet** for most implementation work. **Opus** for architecturally complex changes. --- @@ -155,8 +153,6 @@ OpenSpec's built-in verification. Validates implementation against artifacts. - **Test coverage** — Every new PHP service/controller has a corresponding test file; every new Vue component has a test if the project uses Jest/Vitest - **Documentation** — New features and API endpoints are described in README.md or docs/ -**Note:** `/opsx-ralph-review` (not yet built) is planned as a dedicated review command that cross-references shared specs and creates GitHub Issues for findings. For now, use this command — it already supports GitHub Issues sync via `plan.json` when present. - **Model:** Checked at run time — stops if on Haiku. **Sonnet** for most verification work. **Opus** for complex or large changes. --- @@ -185,21 +181,21 @@ Usually done automatically during archive. **Phase:** Maintenance -Check and sync documentation to reflect the current project state. Two targets: **app docs** (`{app}/docs/`) for a specific Nextcloud app's users and admins, and **dev docs** (`.claude/docs/`) for Claude and developers. +Check and sync documentation to reflect the current project state. Two targets: **app docs** (`{app}/docs/`) for a specific Nextcloud app's users and admins, and **dev docs** (`.github/docs/claude/`) for Claude and developers. **Usage:** ``` /sync-docs # prompts for target /sync-docs app # prompts for which app, then syncs its docs/ /sync-docs app openregister # sync docs for a specific app -/sync-docs dev # sync developer/Claude docs (.claude/docs/) +/sync-docs dev # sync developer/Claude docs (.github/docs/claude/) ``` -Before syncing, runs 4 preliminary checks in parallel — config.yaml rules vs writing-docs.md/writing-specs.md, Sources of Truth accuracy, writing-specs.md vs schema template alignment (`.claude/openspec/schemas/conduction/`), and forked schema drift from the upstream `spec-driven` schema. Reports gaps and asks whether to fix before proceeding. +Before syncing, runs 4 preliminary checks in parallel — config.yaml rules vs writing-docs.md/writing-specs.md, Sources of Truth accuracy, writing-specs.md vs schema template alignment (`openspec/schemas/conduction/`), and forked schema drift from the upstream `spec-driven` schema. Reports gaps and asks whether to fix before proceeding. -**App docs mode** (`{app}/docs/`) — checks the app's `README.md` (root), `docs/features/`, `docs/ARCHITECTURE.md`, `docs/FEATURES.md`, `docs/GOVERNMENT-FEATURES.md`, and any other user-facing `.md` files against the app's current specs. Also loads all company-wide ADRs from `apps-extra/.claude/openspec/architecture/` and any app-level ADRs as auditing context (never as link targets in app docs). Flags outdated descriptions, missing features, stale `[Future]` markers (with full removal checklist), broken links, duplicated content, writing anti-patterns, ADR compliance gaps (screenshots, i18n, API conventions), and missing GEMMA/ZGW/Forum Standaardisatie standards references. Never inserts links into `.claude/` paths. Always shows a diff and asks for confirmation before writing. +**App docs mode** (`{app}/docs/`) — checks the app's `README.md` (root), `docs/features/`, `docs/ARCHITECTURE.md`, `docs/FEATURES.md`, `docs/GOVERNMENT-FEATURES.md`, and any other user-facing `.md` files against the app's current specs. Also loads all company-wide ADRs from `hydra/openspec/architecture/` and any app-level ADRs as auditing context (never as link targets in app docs). Flags outdated descriptions, missing features, stale `[Future]` markers (with full removal checklist), broken links, duplicated content, writing anti-patterns, ADR compliance gaps (screenshots, i18n, API conventions), and missing GEMMA/ZGW/Forum Standaardisatie standards references. Never inserts links into `.claude/` paths. Always shows a diff and asks for confirmation before writing. -**Dev docs mode** (`.claude/docs/`) — checks `commands.md`, `workflow.md`, `writing-specs.md`, `writing-docs.md`, `testing.md`, `getting-started.md`, `README.md`, plus the conduction schema (`.claude/openspec/schemas/conduction/schema.yaml`) and its `templates/spec.md` for alignment with `writing-specs.md`. Never changes intent without user confirmation. After syncing, runs a Phase 6 review of all commands and skills for stale references, outdated instructions, and redundant inline content — and asks whether to update them. +**Dev docs mode** (`.github/docs/`) — checks `commands.md`, `workflow.md`, `writing-specs.md`, `writing-docs.md`, `testing.md`, `getting-started.md`, `README.md`, plus the conduction schema (`hydra/openspec/schemas/conduction/schema.yaml`) and its `templates/spec.md` for alignment with `writing-specs.md`. Never changes intent without user confirmation. After syncing, runs a Phase 6 review of all commands and skills for stale references, outdated instructions, and redundant inline content — and asks whether to update them. Both modes enforce the [Documentation Principles](writing-docs.md) — duplication and wrong-audience content are flagged as issues, with direct links to the relevant writing-docs.md sections. @@ -573,7 +569,7 @@ Sync GitHub issues from VNG-Realisatie/Softwarecatalogus, auto-generate acceptan ## Custom Conduction Commands -These commands are workspace-level and available from any project within `apps-extra/`. They extend OpenSpec with GitHub Issues integration and Ralph Wiggum loops. +These commands are workspace-level and available from any project within `apps-extra/`. They extend OpenSpec with GitHub Issues integration. --- @@ -593,7 +589,7 @@ Create a Pull Request from a branch in any repo. Handles the full flow interacti 2. **Confirms the source branch** — shows the current branch, lets you override 3. **Recommends a target branch** based on the branching strategy; checks GitHub for an existing open PR on the same branch pair — if found, offers to view or update it instead 4. **Checks for uncommitted or unpushed changes** — if any are found, offers to commit, stash, or continue; offers to push unpushed commits before continuing -5. **Verifies global settings version** *(claude-code-config repo only)* — delegates to `/verify-global-settings-version`; pauses and offers a fix if a VERSION bump is missing +5. **Verifies global settings version** *(.github repo only)* — delegates to `/verify-global-settings-version`; pauses and offers a fix if a VERSION bump is missing 6. **Discovers CI checks from `.github/workflows/`** — reads the repo's workflow files to determine exactly which checks CI will run, then mirrors them locally (never hardcodes a list) 7. **Installs missing dependencies** (`vendor/`, `node_modules/`) if needed before running checks 8. **Runs all discovered checks** — nothing skipped; slow checks (e.g. test suites) ask for confirmation first; shows a pass/fail table when done @@ -621,7 +617,7 @@ Create a Pull Request from a branch in any repo. Handles the full flow interacti **Phase:** Git / Delivery -Checks whether `global-settings/VERSION` has been correctly bumped after any changes to files in the `global-settings/` directory. Run this before creating a PR on the `ConductionNL/claude-code-config` repo. +Checks whether `global-settings/VERSION` has been correctly bumped after any changes to files in the `global-settings/` directory. Run this before creating a PR on the `ConductionNL/.github` repo. **Usage:** ``` @@ -640,7 +636,7 @@ Checks whether `global-settings/VERSION` has been correctly bumped after any cha **When to use:** - Standalone: any time you modify a file in `global-settings/` and want to confirm the bump is in place before committing -- Automatically: called by `/create-pr` when the selected repo is `ConductionNL/claude-code-config` — no need to run it separately in that flow +- Automatically: called by `/create-pr` when the selected repo is `ConductionNL/.github` — no need to run it separately in that flow **Semver rules for `global-settings/`:** - `1.0.0 → 1.1.0` — new permissions, guards, or behavior added @@ -688,7 +684,7 @@ Created tracking issue: https://github.com/ConductionNL/opencatalogi/issues/42 Created 5 task issues: #43, #44, #45, #46, #47 Saved plan.json at: openspec/changes/add-search/plan.json -Run /opsx-ralph-start to begin implementation. +Run /opsx-apply to begin implementation. ``` **The plan.json it creates:** @@ -721,111 +717,27 @@ Run /opsx-ralph-start to begin implementation. --- -### `/opsx-ralph-start` *(not yet implemented)* - -**Phase:** Implementation - -Starts a Ralph Wiggum implementation loop driven by `plan.json`. This is the core of our minimal-context coding approach. - -**Usage:** -``` -/opsx-ralph-start -``` - -**Prerequisites:** -- A `plan.json` in the active change (created by `/opsx-plan-to-issues`) - -**What it does per iteration:** - -1. **Reads plan.json** — finds the next task with `"status": "pending"` -2. **Sets status to `"in_progress"`** in plan.json -3. **Reads ONLY the referenced spec section** — uses `spec_ref` to load just the relevant requirement, NOT the entire spec file -4. **Implements the task** — following acceptance criteria and coding standards -5. **Verifies** — checks acceptance criteria are met -6. **Updates progress:** - - Sets task to `"completed"` in plan.json - - Checks off boxes in tasks.md - - Closes the GitHub issue with a summary comment - - Updates the tracking issue checklist -7. **Loops** — picks up the next pending task, or stops if all done - -**Why minimal context matters:** - -Each iteration loads only: -- `plan.json` (the task list — typically 1-2 KB) -- One spec section via `spec_ref` (the specific requirement — a few paragraphs) -- The affected files - -It does NOT load: -- proposal.md -- design.md -- Other spec files -- The full tasks.md - -This prevents context window bloat and keeps each iteration fast and focused. - -**Resuming after interruption:** - -If the loop is interrupted (context limit, error, etc.), simply run `/opsx-ralph-start` again. It reads `plan.json`, finds the first non-completed task, and continues from there. - ---- - -### `/opsx-ralph-review` *(not yet implemented)* +### `/skill-creator` -**Phase:** Review +**Phase:** Maintenance / Meta -Verifies the completed implementation against all spec requirements and shared conventions. Creates a structured review report. +Create new skills, modify and improve existing skills, and measure skill performance with evals. Use when you want to build a new skill from scratch, refine an existing skill's behavior, or benchmark a skill's accuracy with quantitative evaluation runs. **Usage:** ``` -/opsx-ralph-review +/skill-creator ``` -**Prerequisites:** -- All tasks in plan.json should be `"completed"` - **What it does:** - -1. **Loads full context** — proposal, all delta specs, tasks, plan.json -2. **Checks completeness:** - - All tasks completed? - - All GitHub issues closed? - - All task checkboxes checked? -3. **Checks spec compliance:** - - For each ADDED requirement: does the implementation exist? - - For each MODIFIED requirement: is the old behavior changed? - - For each REMOVED requirement: is the deprecated code gone? - - Do GIVEN/WHEN/THEN scenarios match the code behavior? -4. **Cross-references shared specs:** - - `nextcloud-app/spec.md` — correct app structure, DI, route ordering - - `api-patterns/spec.md` — URL patterns, CORS, error responses - - `nl-design/spec.md` — design tokens, accessibility - - `docker/spec.md` — environment compatibility -5. **Categorizes findings:** - - **CRITICAL** — Spec MUST/SHALL requirement not met - - **WARNING** — SHOULD requirement not met or partial compliance - - **SUGGESTION** — Improvement opportunity -6. **Generates `review.md`** in the change directory -7. **Creates GitHub Issue** if CRITICAL/WARNING findings exist - -**Output example:** -``` -Review: add-search -Tasks completed: 5/5 -GitHub issues closed: 5/5 -Spec compliance: PASS (with warnings) - -Findings: -- 0 CRITICAL -- 2 WARNING - - Missing CORS headers on /api/search (api-patterns spec) - - No pagination metadata in response (api-patterns spec) -- 1 SUGGESTION - - Consider adding rate limiting - -Review saved: openspec/changes/add-search/review.md -GitHub issue created: #48 [Review] add-search: 0 critical, 2 warnings -``` +1. Helps you decide what the skill should do and roughly how +2. Drafts the SKILL.md +3. Generates a small set of test prompts and runs them against `claude-with-access-to-the-skill` +4. Drafts quantitative evals (or uses existing ones) and reports the metrics +5. Iterates on the skill based on qualitative and quantitative feedback +6. Optionally expands the test set for larger-scale benchmarking +7. Can also optimize a skill's `description` field for better triggering accuracy + +**When to use:** When adding a new capability, when an existing skill is misfiring or producing inconsistent results, or when you want to verify a recent skill change hasn't regressed behavior. --- diff --git a/docs/claude/examples/.mcp.json.example b/docs/claude/examples/.mcp.json.example new file mode 100644 index 0000000..3ee2375 --- /dev/null +++ b/docs/claude/examples/.mcp.json.example @@ -0,0 +1,32 @@ +{ + "mcpServers": { + "browser-1": { + "command": "npx", + "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] + }, + "browser-2": { + "command": "npx", + "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] + }, + "browser-3": { + "command": "npx", + "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] + }, + "browser-4": { + "command": "npx", + "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] + }, + "browser-5": { + "command": "npx", + "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] + }, + "browser-6": { + "command": "npx", + "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--isolated"] + }, + "browser-7": { + "command": "npx", + "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] + } + } +} diff --git a/docs/claude/examples/CLAUDE.local.md.example b/docs/claude/examples/CLAUDE.local.md.example new file mode 100644 index 0000000..c086bfd --- /dev/null +++ b/docs/claude/examples/CLAUDE.local.md.example @@ -0,0 +1,29 @@ +# Local Credentials (DO NOT COMMIT) + +Copy this file to `.claude/CLAUDE.local.md` in your project and fill in your values: + +```bash +cp CLAUDE.local.md.example CLAUDE.local.md +``` + +## Local Backend Login + +- Email: admin +- Password: admin + +## Test Backend Login + +- Email: your-email@conduction.nl +- Password: (get from 1Password / team lead) + +## Production Backend Login + +- Email: your-email@conduction.nl +- Password: (get from 1Password / team lead) + +## Jira / Tempo API (optional, for WBSO tracking) + +- Jira URL: https://conduction.atlassian.net +- Jira Email: your-email@conduction.nl +- Jira API Token: (generate at https://id.atlassian.com/manage-profile/security/api-tokens) +- Tempo API Token: (generate in Tempo > Settings > API Integration) diff --git a/docs/claude/exapp-sidecar-status.md b/docs/claude/exapp-sidecar-status.md deleted file mode 100644 index 3805ef9..0000000 --- a/docs/claude/exapp-sidecar-status.md +++ /dev/null @@ -1,131 +0,0 @@ -# ExApp Sidecar Wrappers — Status Report - -**Date:** 2026-03-05 -**Goal:** Get ExApp sidecar wrappers (OpenKlant, OpenZaak, Valtimo, OpenTalk, Keycloak) up and running, following the n8n pattern. - -## Summary - -All 5 ExApp Docker containers are **running and healthy**. They are **registered with AppAPI** in Nextcloud. However, AppAPI's heartbeat mechanism is not fully completing the initialization cycle for all apps yet. - -## What's Done - -### Repositories & Submodules -- **keycloak-nextcloud** — New repo created at ConductionNL/keycloak-nextcloud, added as submodule -- **open-webui-nextcloud** — Added as submodule (repo already existed) -- **openklant, openzaak, valtimo, opentalk** — Existing submodules, `ex_app/lib/main.py` rewritten to use `nc_py_api` pattern - -### Docker Images Built -| App | Image | Base | Status | -|-----|-------|------|--------| -| Keycloak | `ghcr.io/conductionnl/keycloak-nextcloud:latest` | UBI9-minimal + microdnf Python + Keycloak 26.5.4 | Built, running | -| OpenKlant | `ghcr.io/conductionnl/openklant-exapp:latest` | maykinmedia/open-klant:2.15.0 | Built, running | -| OpenZaak | `ghcr.io/conductionnl/openzaak-exapp:latest` | openzaak/open-zaak:1.27.0 | Built, running | -| Valtimo | `ghcr.io/conductionnl/valtimo-exapp:latest` | eclipse-temurin:17-jre-jammy + ritense/valtimo-backend:12.0.0 | Built, running | -| OpenTalk | `ghcr.io/conductionnl/opentalk-exapp:latest` | python:3.11-slim + opentalk controller v0.31.0-3 | Built, running | - -### Docker Compose (openregister/docker-compose.yml) -- All 5 ExApp services added under `commonground` + `exapps` profiles -- Shared infrastructure: `exapp-redis` (Redis 7), `exapp-livekit` (LiveKit WebRTC) -- PostgreSQL databases created: keycloak, openklant, openzaak, opentalk, valtimo -- AppAPI-generated secrets hardcoded in compose for each ExApp -- Healthchecks use Python urllib (no wget/curl dependency) - -### AppAPI Registration -- 5 manual-install daemons registered (one per ExApp) -- All 5 apps registered in `oc_ex_apps` table -- Keycloak: **enabled**, OpenKlant/OpenZaak/Valtimo/OpenTalk: **disabled** (pending init) - -### Port Assignments (assigned by AppAPI) -| App | Port | -|-----|------| -| n8n | 23000 | -| Keycloak | 23002 | -| OpenZaak | 23003 | -| Valtimo | 23004 | -| OpenKlant | 23005 | -| OpenTalk | 23005 | - -## Current Container Status - -All 8 ExApp containers running and healthy: -``` -openregister-exapp-keycloak healthy -openregister-exapp-openklant healthy -openregister-exapp-openzaak healthy -openregister-exapp-valtimo healthy -openregister-exapp-opentalk healthy -openregister-exapp-livekit healthy -openregister-exapp-redis healthy -openregister-exapp-n8n healthy -``` - -## What's Left / Known Issues - -### 1. AppAPI Heartbeat → Init Cycle Not Completing -AppAPI checks heartbeat on each ExApp's assigned port. Some ExApps (valtimo, opentalk) are getting heartbeat counts but others (keycloak, openklant, openzaak) are stuck at 0. This may be an AppAPI internal scheduling issue or related to the high failure count from before we fixed the heartbeat endpoints. - -**Possible fix:** Unregister and re-register the stuck ExApps, or investigate AppAPI's heartbeat scheduling. - -### 2. Internal Services Not Started -The heartbeat endpoints return `{"status":"waiting"}` (HTTP 200) because the wrapped services (Django, Spring Boot, Rust) only start when AppAPI calls `/init` or `/enabled`. This is the chicken-and-egg: heartbeat must succeed → AppAPI calls init → internal service starts → heartbeat returns "ok". - -The 200 status fix should resolve this — AppAPI should proceed to call `/init` once it sees successful heartbeats. - -### 3. ZaakAfhandelApp Admin Settings Bug -`/settings/admin/app_api` crashes with "Unknown named parameter $app" in `ZaakAfhandelAppAdmin.php:55`. This is unrelated to the ExApp work but blocks the AppAPI admin UI. - -### 4. PostGIS Not Available -OpenZaak may need PostGIS extension but the pgvector PostgreSQL image doesn't include it. OpenZaak may need a different approach (PostGIS Docker image or skip geo features). - -### 5. Commits Pending -Changes to entrypoint.sh, main.py (heartbeat fix), Dockerfiles, and docker-compose.yml are local only. Need to commit and push to feature branches for each submodule. - -## Files Changed - -### docker-compose (openregister/) -- `docker-compose.yml` — Added 5 ExApp services, volumes, healthchecks, secrets, port assignments - -### keycloak-nextcloud/ (new repo) -- Full ExApp structure: `ex_app/lib/main.py`, `Dockerfile`, `entrypoint.sh`, `appinfo/info.xml`, CI workflows - -### openklant/ -- `ex_app/lib/main.py` — Rewritten with nc_py_api, heartbeat returns 200 -- `Dockerfile` — Updated base image to 2.15.0 -- `entrypoint.sh` — Fixed to use `python3 ex_app/lib/main.py` - -### openzaak/ -- `ex_app/lib/main.py` — Rewritten with nc_py_api, heartbeat returns 200 -- `Dockerfile` — Updated base image to 1.27.0 -- `entrypoint.sh` — Fixed to use `python3 ex_app/lib/main.py` -- `appinfo/info.xml` — Fixed registry to ghcr.io - -### valtimo/ -- `ex_app/lib/main.py` — Rewritten with nc_py_api, heartbeat returns 200 -- `Dockerfile` — Rewritten with proper Python install -- `entrypoint.sh` — Fixed to use `python3 ex_app/lib/main.py` -- `appinfo/info.xml` — Fixed registry to ghcr.io - -### opentalk/ -- `ex_app/lib/main.py` — Rewritten with nc_py_api, heartbeat returns 200 -- `Dockerfile` — Added controller.toml config file -- `controller.toml` — New minimal config for OpenTalk controller -- `entrypoint.sh` — Fixed to use `python3 ex_app/lib/main.py` -- `appinfo/info.xml` — Fixed registry to ghcr.io - -## Key Learnings - -1. **Keycloak UBI9-micro has no package manager** — use UBI9-minimal with microdnf instead -2. **glibc compatibility matters** — Python from Debian/Ubuntu cannot run on UBI9 (glibc 2.38 vs 2.34) -3. **AppAPI assigns unique ports** per ExApp — containers must listen on the assigned port, not hardcoded 23000 -4. **APP_SECRET must match** between docker-compose env and AppAPI's database — get secrets from `oc_ex_apps` table -5. **Healthcheck must return 200** even when internal service isn't running, or AppAPI won't proceed to `/init` -6. **Docker compose `${VAR}` in healthcheck** resolves at compose level, not container level — use Python `os.environ` or `$$VAR` instead - -## Next Steps - -1. Investigate why some ExApps aren't getting heartbeat checks from AppAPI -2. Once heartbeats work, verify `/init` is called and internal services start -3. Enable all 4 disabled ExApps -4. Test through browser at http://localhost:8080 -5. Commit all changes to feature branches -6. Push Docker images to ghcr.io diff --git a/docs/claude/getting-started.md b/docs/claude/getting-started.md index 18bbcf0..c2d3657 100644 --- a/docs/claude/getting-started.md +++ b/docs/claude/getting-started.md @@ -56,10 +56,10 @@ source ~/.bashrc **Optional — Usage monitoring:** Install the usage tracker to watch your Claude token consumption in real time inside VS Code. Especially useful before running multi-agent commands (see [parallel-agents.md](parallel-agents.md)). ```bash -bash .claude/usage-tracker/install.sh +bash usage-tracker/install.sh ``` -See [`.claude/usage-tracker/README.md`](../../usage-tracker/README.md) for setup details. +See [`usage-tracker/README.md`](../../usage-tracker/README.md) for setup details. ## Step 1: Install OpenSpec @@ -80,13 +80,17 @@ The workspace has two levels of spec management: ### Workspace level (shared) ``` -apps-extra/ -├── project.md # Coding standards for ALL projects -├── openspec/ -│ ├── config.yaml # Shared context and rules -│ ├── schemas/conduction/ # Our custom workflow schema -│ ├── specs/ # Cross-project specs (NC conventions, APIs, etc.) -│ └── docs/ # You are here +apps-extra/ # Workspace root +├── project.md # Coding standards for ALL projects +├── hydra/ # Automation, skills & shared config +│ ├── .claude/ +│ │ └── skills/ # OpenSpec skills (opsx-new, opsx-ff, etc.) +│ └── openspec/ +│ ├── architecture/ # Company-wide ADRs +│ └── schemas/conduction/ # Shared workflow schema +│ +~/.github/ # Developer documentation (standalone repo) +└── docs/claude/ # You are here ``` These files define the patterns and conventions that apply to every project. @@ -178,8 +182,6 @@ This starts the implementation loop. Each iteration: The key benefit: each iteration works with minimal context, preventing AI "amnesia" on large changes. -> **Note:** `/opsx-ralph-start` is a planned dedicated implementation loop with deeper minimal-context loading and tighter GitHub integration — not yet implemented. Use `/opsx-apply` for now. - ### 4f. Review your work After all tasks are done: @@ -193,8 +195,6 @@ This checks every spec requirement against your implementation and reports: - **WARNING** findings that should be addressed - **SUGGESTION** findings that are nice-to-have -> **Note:** `/opsx-ralph-review` is a planned dedicated review command — not yet implemented. Use `/opsx-verify` for now. - ### 4g. Archive the change Once review passes: @@ -213,8 +213,8 @@ This merges your delta specs into the main specs and preserves the change for hi | Generate all specs at once | `/opsx-ff` | | Generate specs one at a time | `/opsx-continue` | | Convert tasks to GitHub Issues | `/opsx-plan-to-issues` | -| Start implementing | `/opsx-apply` *(or `/opsx-ralph-start` once built)* | -| Review implementation | `/opsx-verify` *(or `/opsx-ralph-review` once built)* | +| Start implementing | `/opsx-apply` | +| Review implementation | `/opsx-verify` | | Complete and archive | `/opsx-archive` | ## Next Steps diff --git a/docs/claude/global-claude-settings.md b/docs/claude/global-claude-settings.md index 8cf0683..53953f4 100644 --- a/docs/claude/global-claude-settings.md +++ b/docs/claude/global-claude-settings.md @@ -2,7 +2,7 @@ These are **mandatory** settings for anyone working on Conduction projects with Claude Code. They enforce a read-first, write-with-approval policy at the user level, ensuring Claude cannot perform destructive operations without explicit confirmation. They also version-check themselves at the start of each session so you always know when an update is available. -Project files under `.claude/` in this repo (for example `settings.json` with `enableAllProjectMcpServers` and MCP allowlists) **complement** this; they do not replace the global policy. +Project files under `.claude/` in this repo (for example `settings.json` with MCP allowlists) **complement** this; they do not replace the global policy. ## Versioned canonical files @@ -16,27 +16,32 @@ The canonical files live under **[`global-settings/`](../../global-settings/)**. ## Install / update on a new machine -Run the following from the root of the `apps-extra` repo: +Run the following from the root of the `.github` repo: ```bash -REPO_CLAUDE="$(pwd)/.claude" +REPO_ROOT="$(pwd)" mkdir -p ~/.claude/hooks # Core settings and hooks -cp "$REPO_CLAUDE/global-settings/settings.json" ~/.claude/settings.json -cp "$REPO_CLAUDE/global-settings/block-write-commands.sh" ~/.claude/hooks/block-write-commands.sh -cp "$REPO_CLAUDE/global-settings/check-settings-version.sh" ~/.claude/hooks/check-settings-version.sh +cp "$REPO_ROOT/global-settings/settings.json" ~/.claude/settings.json +cp "$REPO_ROOT/global-settings/block-write-commands.sh" ~/.claude/hooks/block-write-commands.sh +cp "$REPO_ROOT/global-settings/check-settings-version.sh" ~/.claude/hooks/check-settings-version.sh chmod +x ~/.claude/hooks/block-write-commands.sh ~/.claude/hooks/check-settings-version.sh # Version tracking -cp "$REPO_CLAUDE/global-settings/VERSION" ~/.claude/settings-version -echo "$REPO_CLAUDE" > ~/.claude/settings-repo-path -``` +cp "$REPO_ROOT/global-settings/VERSION" ~/.claude/settings-version +echo "$REPO_ROOT" > ~/.claude/settings-repo-path + +# Online version checking via GitHub API (recommended — no local repo required): +echo "ConductionNL/.github" > ~/.claude/settings-repo-url -Requirements: **`jq`** and **`md5sum`** on `PATH`. Restart Claude Code after installing. +# Optional: track a branch other than main (tag or SHA also accepted). +# Defaults to "main" when this file is absent. +# echo "feature/claude-code-tooling" > ~/.claude/settings-repo-ref +``` -If `~` is not expanded in hook commands on your system, replace `~/.claude/hooks/…` with absolute paths in `~/.claude/settings.json`. +Requirements: **`jq`** and **`md5sum`** on `PATH`. Online mode also requires **`gh`** CLI (authenticated via `gh auth login`). Restart Claude Code after installing. ## Session-start status panel @@ -47,16 +52,22 @@ At the start of every Claude session, a live status panel is printed to the term │ Global Claude Settings Status │ └──────────────────────────────────────────────┘ Installed : v1.0.0 ✓ - Local repo : master @ v1.0.0 - Online : v1.0.0 + Local repo : main @ v1.0.0 + Online : v1.0.0 (via GitHub API) ``` Color coding: - **Green** — version matches / up to date -- **Yellow** — local branch is ahead of installed (informational only, no action needed) +- **Yellow** — local branch is ahead of installed (informational only) - **Red** — installed is behind online main (update required) -If configuration issues are detected (missing `settings-repo-path`, missing `VERSION` file, unreachable remote), they are shown in red below the panel — never silently skipped. +The "Online" line shows the fetch method used: +- **(via GitHub API)** — fetched directly from GitHub using `gh api` (primary method, uses `settings-repo-url`) +- **(via git fetch)** — fetched from `origin/main` of the local repo clone (fallback method, uses `settings-repo-path`) + +If no local repo is configured, "Local repo" shows "(not configured)" instead of branch info. + +If configuration issues are detected (missing config files, unreachable remote, `gh` not installed), they are shown in red below the panel — never silently skipped. > **Note:** The terminal panel is only visible when using Claude Code in the terminal (CLI). In the VS Code extension, hook stderr is not shown as a visible banner — see the Claude chat message below instead. @@ -65,24 +76,66 @@ If configuration issues are detected (missing `settings-repo-path`, missing `VER In addition to the terminal panel, the hook always injects a message into Claude's context at the start of every session. Claude will relay this at the top of its first response: **Settings up to date:** -> New session started — Global Claude Settings checked. ✅ Settings are up to date (v1.0.0). +> New session started — Global Claude Settings checked. Settings are up to date (v1.0.0). **Update required** (prominently displayed, cannot be missed): -> ⚠️ NEW SESSION — GLOBAL CLAUDE SETTINGS: UPDATE REQUIRED -> Installed: v0.1.0 ❌ | Latest: v1.0.0 ✅ +> NEW SESSION — GLOBAL CLAUDE SETTINGS: UPDATE REQUIRED +> Installed: v0.1.0 (outdated) | Latest: v1.0.0 (on origin/main) > Say "update my global settings to 1.0.0" to apply the update. **Configuration error** (prominently displayed): -> 🚨 NEW SESSION — GLOBAL CLAUDE SETTINGS: CONFIGURATION ERROR -> ❌ [description of the issue] +> NEW SESSION — GLOBAL CLAUDE SETTINGS: CONFIGURATION ERROR +> [description of the issue] + +## Online version checking + +The version check supports two methods for fetching the online version, tried in order: + +### 1. GitHub API (primary — recommended) + +If `~/.claude/settings-repo-url` contains a GitHub repo slug (e.g. `ConductionNL/.github`), the hook fetches `VERSION` via `gh api` from the configured ref (default: `main`). This method: + +- Does **not** require a local clone of the repo +- Uses the authenticated `gh` CLI (requires `gh auth login`) +- Is faster than `git fetch` (single HTTP request) +- Falls back gracefully if `gh` is not installed or the API call fails + +### 2. Git fetch (fallback) + +If the GitHub API method is not configured or fails, and `~/.claude/settings-repo-path` points to a valid local clone, the hook falls back to `git fetch origin --depth=1` followed by `git show origin/:...`. This is the original method. + +### Tracking a non-default branch + +By default, both methods track the `main` branch. To track a different branch, tag, or SHA, write it to `~/.claude/settings-repo-ref`: + +```bash +echo "feature/claude-code-tooling" > ~/.claude/settings-repo-ref +``` + +When absent, the ref defaults to `main`. + +### Configuration options + +| Config file | Required? | Purpose | +|-------------|-----------|---------| +| `~/.claude/settings-repo-url` | Optional (recommended) | GitHub repo slug for online API check | +| `~/.claude/settings-repo-path` | Optional (fallback) | Path to the root of the canonical repo for git-based check | +| `~/.claude/settings-repo-ref` | Optional | Branch/tag/SHA to track (defaults to `main`) | + +You can configure: +- **Both URL and path** (recommended): GitHub API is tried first, local git as fallback +- **Only `settings-repo-url`**: Works without any local clone; no fallback if GitHub is unreachable +- **Only `settings-repo-path`**: Original behavior; requires a local clone +- **Neither**: Version check cannot run; a configuration warning is shown ## Keeping settings up to date -When the online (origin/main) version is bumped, Claude displays a prominent warning at the start of its first response in the new session. +When the online version is bumped, Claude displays a prominent warning at the start of its first response in the new session. -To update, tell Claude: **"update my global settings to [version]"** and Claude will pull all files directly from `origin/main` using `git show` — not from your local branch. This ensures you always get the exact online version regardless of which branch your local repo is on. +To update, tell Claude: **"update my global settings to [version]"** and Claude will pull all files from the canonical source: -> **Note:** The version check fetches `VERSION` from `origin/main` via `git fetch`. It checks the local repo's configured remote, not a separate URL. If your remote is the upstream nextcloud/server and the global-settings aren't tracked there, the online check will warn about that too. +- **Online mode** (`settings-repo-url` configured): Files are fetched via `gh api` directly from GitHub — no local clone needed. +- **Local mode** (`settings-repo-path` configured): Files are pulled from `origin/main` via `git show` from the local clone. ### ⚠️ VERSION bump required on every change @@ -100,32 +153,46 @@ Run `/verify-global-settings-version` before creating a PR to confirm the bump i |------|------| | `~/.claude/settings.json` | User permissions allowlist, `PreToolUse` + `UserPromptSubmit` hooks, optional `mcpServers` | | `~/.claude/hooks/block-write-commands.sh` | Hook script invoked for every **Bash** tool use before it runs | -| `~/.claude/hooks/check-settings-version.sh` | Hook script that warns on version mismatch at session start | +| `~/.claude/hooks/check-settings-version.sh` | Hook script that shows the status panel and warns on version mismatch | | `~/.claude/settings-version` | Installed version (semver, matches repo `VERSION`) | -| `~/.claude/settings-repo-path` | Absolute path to `apps-extra/.claude/` — tells the version hook where to find the canonical `VERSION` file | +| `~/.claude/settings-repo-url` | GitHub repo slug for online version checking (e.g. `ConductionNL/.github`) | +| `~/.claude/settings-repo-path` | Absolute path to the root of the canonical repo (fallback for git-based check) | +| `~/.claude/settings-repo-ref` | Branch/tag/SHA to track for version checks (defaults to `main`) | ## Shape of `~/.claude/settings.json` -### 1. `permissions.allow` +### 1. `permissions.deny` + +Hard-blocked patterns — Claude cannot perform these even with user approval: + +- **Config files**: `Edit`/`Write` of `~/.claude/settings.json`, `hooks/*`, `settings-version`, `settings-repo-path`, `settings-repo-url`, `settings-repo-ref` +- **System**: `sudo`, `su`, `shutdown`, `reboot`, `halt`, `poweroff`, `mkfs`, `dd if=` +- **GitHub destructive**: `gh pr merge`, `gh repo delete`, `gh release delete` +- **Git destructive**: `git reset --hard`, `git clean -f/-fd/-fdx`, `git filter-branch`, `git filter-repo`, `git reflog expire/delete`, `git update-ref -d`, `git config --global`, `git checkout --`, `git restore` + +### 2. `permissions.allow` -List **Bash** permission patterns you want granted **without** prompting. Keep this aligned with the hook: anything you allow here should still pass `block-write-commands.sh`, or the hook will deny the command even if it is allowlisted. +Bash permission patterns granted **without** prompting. Keep this aligned with the hook: anything allowed here should still pass `block-write-commands.sh`, or the hook will deny the command even if it is allowlisted. Allowed categories (all read-only; write operations are gated by the hook): - **Inspection**: `ls`, `cat`, `head`, `tail`, `wc`, `stat`, `file`, `du`, `df`, `pwd`, `tree`, `find`, `realpath`, `basename`, `dirname` - **Text processing**: `diff`, `grep`, `egrep`, `awk`, `tr`, `sort`, `jq`, `cut`, `uniq`, `column` - **System info**: `which`, `whoami`, `uname`, `ps`, `free`, `lsof`, `ss`, `id`, `groups`, `uptime`, `hostname`, `env`, `date` -- **Git (read-only)**: `git log`, `git status`, `git diff`, `git show`, `git blame`, `git ls-files`, `git rev-parse`, `git describe`, `git shortlog`, `git cat-file`, `git branch`, `git remote`, `git stash list`, `git config --list` -- **`git -C`**: allow `Bash(git -C:*)` so agents can run git in arbitrary directories; the hook restricts **which** `git -C …` invocations are safe -- **Docker (read)**: `docker ps`, `docker images`, `docker logs`, `docker inspect`, `docker stats`, `docker info`, `docker network ls/inspect`, `docker volume ls/inspect`, `docker compose ps/config` -- **GitHub CLI (read)**: `gh pr list/view/checks/diff`, `gh issue list/view`, `gh repo view`, `gh run list/view`, `gh release list/view`, `gh workflow list` -- **Package managers (read)**: `composer show/validate/diagnose/check-platform-reqs`, `npm list/outdated`, `pnpm list/outdated`, `yarn list`, `pip list/show/freeze` +- **Checksums / misc**: `sha256sum`, `md5sum`, `nproc`, `printenv` +- **Git (read-only)**: `git log`, `git status`, `git diff`, `git show`, `git blame`, `git ls-files`, `git ls-tree`, `git rev-parse`, `git describe`, `git shortlog`, `git cat-file`, `git branch`, `git remote`, `git fetch`, `git stash list`, `git stash show`, `git config --list`, `git config --get` +- **`git -C`**: `Bash(git -C:*)` so agents can run git in arbitrary directories; the hook restricts **which** `git -C …` invocations are safe +- **Navigation**: `cd` +- **Docker (read)**: `docker ps`, `docker images`, `docker image inspect`, `docker logs`, `docker inspect`, `docker stats`, `docker info`, `docker network ls/inspect`, `docker volume ls/inspect`, `docker --version`, `docker compose ps/config/logs/version` +- **GitHub CLI (read)**: `gh pr list/view/checks/diff`, `gh issue list/view`, `gh repo view`, `gh run list/view`, `gh release list/view`, `gh workflow list/view`, `gh auth status`, `gh api` +- **Package managers (read)**: `composer --version/show/validate/diagnose/audit/check-platform-reqs`, `node --version`, `npm --version/list/outdated/audit`, `pnpm list/outdated`, `yarn list`, `pip list/show/freeze` - **PHP**: `php -l/-m/-i/--version` -- **HTTP / API (read; hook narrows further)**: `curl`, `gh api` +- **HTTP / API (read; hook narrows further)**: `curl` +- **Logs**: `Read(**/.claude/logs/**)` Do **not** put broad `Bash(*)` allow rules here. -### 2. `hooks.PreToolUse` +### 3. `hooks.PreToolUse` ```json "PreToolUse": [ @@ -136,7 +203,7 @@ Do **not** put broad `Bash(*)` allow rules here. ] ``` -### 3. `hooks.UserPromptSubmit` +### 4. `hooks.UserPromptSubmit` ```json "UserPromptSubmit": [ @@ -146,7 +213,7 @@ Do **not** put broad `Bash(*)` allow rules here. ] ``` -### 4. `mcpServers` (optional) +### 5. `mcpServers` (optional) 7 Playwright browser instances (`browser-1` through `browser-7`). `browser-6` runs headed (no `--headless`). Adjust the count to match your actual usage. @@ -162,16 +229,18 @@ Do **not** put broad `Bash(*)` allow rules here. | **curl** | GET without file output | Non-GET methods, data flags, `-o` / `--output` | — | | **gh api** | GET | POST/PUT/PATCH/DELETE, `--input`, `--field` / `--raw-field` | — | | **git push** | Last user message contains authorized phrase | — | Blocked otherwise | -| **git -C** | Read-only subcommands (`log`, `status`, `diff`, etc.) | Write subcommands, branch/remote writes, stash modifications, config writes | `push` (phrase-authorized) | +| **git -C** | Read-only subcommands | Write subcommands, branch/remote writes | `push` (phrase-authorized) | | **git branch** (bare) | Listing | `-d/-D/-m/-M/-c/-C`, `--delete`, `--move`, `--copy` | — | | **git remote** (bare) | Listing, `show`, `get-url` | `add`, `remove`, `rename`, `set-url`, `prune`, `update` | — | -| **env** | `env` alone or `VAR=value` assignments | Using `env` to execute another command | — | +| **env** | `env` alone or `VAR=value` | Using `env` to execute another command | — | | **date** | Display time | — | `-s` / `--set` (system clock) | -| **cat** | Normal stdout | Shell redirection `>` / `>>` to a file | — | -| **find** | Normal path traversal | `-delete`, `-exec`, `-execdir` | — | -| **sort** | Normal sort to stdout | `-o` / `--output`, shell `>` / `>>` | — | -| **awk** | Normal processing | `print >` / `print >>` in script, shell `>` after script | — | -| **hostname** | Read current hostname (no args) | Setting a new hostname (bare name argument) | — | +| **cat** | Normal stdout | Shell redirection `>` / `>>` | — | +| **find** | Normal traversal | `-delete`, `-exec`, `-execdir` | — | +| **sort** | Normal sort | `-o` / `--output`, shell `>` / `>>` | — | +| **awk** | Normal processing | `print >` / `print >>`, shell `>` after script | — | +| **hostname** | Read hostname | Setting a new hostname (bare name argument) | — | +| **WSL boundary** | — | — | All paths/executables escaping the Linux filesystem | +| Config writes (`~/.claude/`) | `git show origin/main:` from canonical repo; `gh api` from canonical repo | — | All other methods | Authorized git push phrases (case-insensitive): `push for me`, `commit and push`, `please push`, `push my changes`. @@ -179,23 +248,26 @@ Authorized git push phrases (case-insensitive): `push for me`, `commit and push` - Fires once per session (keyed to the transcript path via `/tmp/` flag file). - Reads the installed version from `~/.claude/settings-version`. -- Reads the local branch version from `$REPO_DIR/global-settings/VERSION` (repo path from `~/.claude/settings-repo-path`). -- Fetches the online version from `origin/main` via `git fetch --depth=1` and reads the `VERSION` file from that ref. -- Compares all three versions using semver and prints a colored status panel to stderr (visible in the terminal/CLI). +- Reads the tracking ref from `~/.claude/settings-repo-ref` (defaults to `main` when absent). +- **Online check (primary):** If `~/.claude/settings-repo-url` is set, fetches `VERSION` via `gh api` from the GitHub repo's configured ref. +- **Git fetch (fallback):** If the GitHub API method is not configured or fails, and `~/.claude/settings-repo-path` points to a valid local clone, fetches via `git fetch origin --depth=1` and reads the `VERSION` file from that ref. +- Reads the local branch version from `$REPO_DIR/global-settings/VERSION` (if a local repo is configured). +- Compares all versions using semver and prints a colored status panel to stderr (visible in the terminal/CLI). - Always injects a session-start message into Claude's context via stdout — "up to date", "update required", or "configuration error" — which Claude relays at the top of its first response. -- Never silently skips: configuration issues (missing `settings-repo-path`, missing `VERSION` file, unreachable remote) are shown in the panel and forwarded to Claude. +- Never silently skips: configuration issues (missing config files, unreachable remote, `gh` not installed) are shown in the panel and forwarded to Claude. ## Relationship to this repo's `.claude/settings.json` -Under `apps-extra/.claude/`, project `settings.json` can enable project MCP servers and list allowed MCP tool names. That is separate from the **global** Bash policy above. For a consistent setup, use both: +Project `settings.json` in `.claude/` enables MCP servers and project-specific permissions. That is separate from the global Bash policy above: 1. Global `~/.claude/settings.json` + hooks for Bash safety and version checking. -2. Project `.claude/settings.json` (and `settings.local.json` if used) for workspace-specific MCP and permissions. +2. Project `.claude/settings.json` (and `settings.local.json` if used) for workspace-specific MCP. ## Checklist for a new machine -1. Run the install commands above (copies settings, hooks, version files). +1. Run the install commands above. 2. Confirm `jq` and `md5sum` are on `PATH`. -3. Restart Claude Code so settings reload. -4. Test: a denied pattern (e.g. `curl -X POST`) should be blocked with a clear reason. A `find . -exec` should prompt for approval. -5. Verify the version hook fires: open a new session and confirm no warning (or update if one appears). +3. For online mode: confirm `gh` is installed and authenticated (`gh auth status`). +4. Restart Claude Code. +5. Test: `curl -X POST` should be blocked. `find . -exec` should prompt. +6. Verify the status panel appears at the start of the next session. diff --git a/docs/claude/local-llm.md b/docs/claude/local-llm.md new file mode 100644 index 0000000..8332b13 --- /dev/null +++ b/docs/claude/local-llm.md @@ -0,0 +1,284 @@ +# Local LLM Setup (Ollama + Qwen) + +Claude Code can run with a **local LLM** instead of the Anthropic API, using [Ollama](https://ollama.com/) and Alibaba's [Qwen3-Coder](https://ollama.com/library/qwen3-coder) model. Ollama v0.14.0+ includes built-in Anthropic Messages API compatibility, so Claude Code connects to it without any proxy or adapter. + +## When to use local vs Claude API + +| Use case | Recommendation | +|----------|---------------| +| **Data sovereignty** — code or data must stay in the EU / on-premise | Local Qwen | +| **Security-sensitive work** — credentials, private APIs, client data | Local Qwen | +| **Offline / air-gapped environments** | Local Qwen | +| **Simple tasks** — formatting, renaming, small refactors, boilerplate | Local Qwen | +| **Cost reduction** — high-volume, repetitive prompts | Local Qwen | +| **Complex reasoning** — architecture, debugging, multi-file changes | Claude API | +| **Large context** — analyzing entire codebases or long specs | Claude API | +| **Quality-critical** — production code, specs, client deliverables | Claude API | + +> **Rule of thumb:** Use Qwen locally for work that is private, simple, or high-volume. Use Claude API when quality and reasoning depth matter most. You can switch between them freely — they use the same Claude Code interface, tools, and commands. + +## Step 1: Install Ollama + +Install Ollama **natively on WSL** (not in Docker — native gives better GPU passthrough and performance): + +```bash +curl -fsSL https://ollama.com/install.sh | sh +``` + +Ollama runs as a background service automatically. Verify it's running: + +```bash +ollama --version # Should show 0.14.0+ +``` + +## Step 2: Pull a Qwen model + +Choose the right model for your GPU VRAM: + +| Model | Download | Size | Min VRAM | Speed (RTX 3070) | Tool calling? | +|-------|----------|------|----------|-------------------|---------------| +| `qwen3:8b` | `ollama pull qwen3:8b` | 5.2 GB | 8 GB (fits 100%) | ~12s | **No** (chat only) | +| **`qwen3:14b`** | `ollama pull qwen3:14b` | **9.3 GB** | 12 GB | **~2min** (spills to CPU on 8GB) | **Yes** | +| `qwen3-coder` | `ollama pull qwen3-coder` | 18 GB | 24 GB | ~6min (mostly CPU on 8GB) | Yes | + +**Recommended: `qwen3:14b`** — the smallest model that supports **tool calling** (reading files, editing code, running commands). On 8GB VRAM it's slow (~2min/response) but works as a batch/overnight agent. On 12GB+ VRAM it runs at interactive speed (~15s). + +```bash +ollama pull qwen3:14b +``` + +> **Why not `qwen3:8b`?** It's faster but can only chat — it cannot use tools (file access, shell commands, code editing). The model is too small to reliably produce the structured function-call format that CLI agents require. It will show its thinking but won't execute anything. +> +> **Why not `qwen3-coder`?** It's the most capable (30B params) but requires 24GB+ VRAM. On an 8GB GPU it runs ~68% on CPU and takes ~6 minutes per response. Only use it with a workstation GPU (RTX 4090, A6000, etc). + +Check your available memory: + +```bash +free -h # Look at the "available" column +nvidia-smi # Check GPU VRAM +``` + +If you don't have enough system memory, increase the WSL allocation. On **Windows**, edit (or create) `%USERPROFILE%\.wslconfig`: + +```ini +[wsl2] +memory=24GB +``` + +Then restart WSL from PowerShell: + +```powershell +wsl --shutdown +``` + +Reopen your Ubuntu terminal — the new memory limit is now active. + +## Step 3: Run Claude Code with Qwen + +Open a **new terminal** and run (replace model name with whichever you pulled): + +```bash +ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3:14b +``` + +This opens the full interactive Claude Code CLI — same interface, same tools, same commands — but powered by Qwen running locally on your machine. No data leaves your workstation. + +For a quick **one-shot prompt** (no interactive session): + +```bash +ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3:14b --print "explain this function" +``` + +## Running local and API side by side + +The env vars are **scoped to that single terminal window only**. This means you can run both simultaneously: + +- **Terminal 1** — Qwen locally (free, private, slower) doing a long-running task like a bulk refactor or code review +- **Terminal 2 / VS Code** — Claude API (fast, powerful) for your main interactive development work + +This is the recommended workflow: **sidecar the free local model** for background tasks while you continue your normal work with Claude API at full speed. The local session won't affect your API session in any way — they're completely independent. + +``` +┌─────────────────────────┐ ┌─────────────────────────┐ +│ Terminal 1 (Qwen) │ │ VS Code / Terminal 2 │ +│ │ │ │ +│ Free, local, private │ │ Claude API (Opus) │ +│ Running: bulk refactor │ │ Fast interactive dev │ +│ Speed: ~15 tok/s │ │ Speed: ~50-80 tok/s │ +│ Cost: $0 │ │ Cost: normal API usage │ +│ │ │ │ +│ ► Background task │ │ ► Your main work │ +└─────────────────────────┘ └─────────────────────────┘ +``` + +To go back to Claude API in any terminal, simply open a new terminal as normal — no env vars to unset. + +## Performance expectations + +Benchmarked on an RTX 3070 (8GB VRAM) with 24GB WSL memory: + +| Model | Simple task | Tool calling | Fits in 8GB VRAM | Usable interactively? | +|-------|-------------|-------------|------------------|----------------------| +| qwen3:8b | ~12 seconds | No | Yes (100% GPU) | Chat only | +| **qwen3:14b** | **~2 minutes** | **Yes** | No (spills to CPU) | **Batch/overnight** | +| qwen3-coder (30B) | ~6 minutes | Yes | No (68% CPU) | No | +| Claude API (Opus) | ~3 seconds | Yes | N/A (cloud) | Yes | + +**Be honest about the trade-off:** The recommended local model (`qwen3:14b`) is **~40x slower** than Claude API on 8GB VRAM hardware. It's not viable for interactive coding — but it **does support tool calling**, which makes it a real coding agent that can read files, edit code, and run commands. Use it for batch jobs you kick off and walk away from (e.g., overnight PHPCS fixes, bulk refactors, code reviews). + +**Where local shines:** +- **Nightly / batch jobs** — automated code reviews, linting suggestions, documentation generation, bulk refactors where you kick it off and walk away +- **Cost** — completely free, no API usage, no token limits, run it as much as you want +- **Privacy** — nothing leaves your machine, ideal for client code under NDA or government data +- **Simple interactive tasks** — quick renames, formatting, boilerplate generation where the speed difference barely matters + +## Alternative: Qwen Code CLI (native Qwen experience) + +Qwen has its own dedicated CLI tool (v0.11+) with an interface similar to Claude Code, optimized for Qwen models: + +```bash +sudo npm install -g @qwen-code/qwen-code@latest +``` + +**Configure it to use your local Ollama** by editing `~/.qwen/settings.json`: + +```json +{ + "modelProviders": { + "openai": [ + { + "id": "qwen3:14b", + "name": "Qwen3 14B (Local Ollama)", + "envKey": "OLLAMA_API_KEY", + "baseUrl": "http://localhost:11434/v1" + } + ] + }, + "security": { + "auth": { + "selectedType": "openai" + } + }, + "env": { + "OLLAMA_API_KEY": "ollama" + }, + "model": { + "name": "qwen3:14b" + } +} +``` + +> Adjust the model `id` and `name` if you pulled a different model (e.g., `qwen3:8b` for chat-only, or `qwen3-coder` on 24GB+ VRAM). + +The key parts: `security.auth.selectedType: "openai"` bypasses the OAuth prompt, `modelProviders.openai` tells Qwen Code where your local Ollama lives, and `env.OLLAMA_API_KEY` provides the dummy API key that Ollama ignores but Qwen Code requires. + +**Launch it:** + +```bash +cd /path/to/your-project +qwen +``` + +**Tool calling requires `qwen3:14b` or larger.** The `qwen3:8b` model runs in chat-only mode — it can reason and answer questions but cannot use tools (no file access, no shell commands, no code editing). The `qwen3:14b` model supports structured tool calling and works as a full coding agent, though it's slow on 8GB VRAM (~2min/response). On 12GB+ VRAM it runs at interactive speed. + +**Sharing context with Claude Code:** Qwen Code reads `QWEN.md` instead of `CLAUDE.md`, but supports `@path/to/file.md` imports. You can create a `QWEN.md` in the workspace root that imports the Claude configuration: + +```markdown +@CLAUDE.md +@CLAUDE.local.md +``` + +This gives Qwen Code the same project context, coding standards, and credentials as Claude Code. However, Qwen Code does **not** support Claude's `/opsx-*` slash commands or skills — those are Claude Code-specific. For the full OpenSpec workflow, use Claude Code (with either API or local Qwen backend). + +## Tips + +- **Don't close the terminal** where Ollama is running — if Ollama stops, your Claude Code session loses its backend +- **One model at a time** — Ollama loads/unloads models automatically, but running two large models simultaneously will OOM +- **VS Code extension** still uses Claude API — the env var trick only works for the CLI. This is fine: use VS Code for complex work (Claude API) and terminal for quick local tasks (Qwen) +- **All Claude Code features work** — tools, file editing, git, commands, skills, browser MCP — because the interface is the same, only the model backend changes + +## Troubleshooting + +### Ollama model won't load (out of memory) + +Increase WSL memory in `%USERPROFILE%\.wslconfig`: + +```ini +[wsl2] +memory=24GB +``` + +Then restart WSL from PowerShell: `wsl --shutdown` + +--- + +## Double Dutch (RAD Workflow) + +A two-shift Rapid Application Development cycle that pairs Claude (daytime, fast, cloud) with Qwen (overnight, slow, local/free). + +``` + 09:00 17:00 09:00 + | | | + ┌────────┴────────────────────────┴───────────────────────┴── + │ REVIEW ◄── DAY SHIFT (Claude) ──► HANDOFF NIGHT SHIFT (Qwen) + │ Qwen's Specs, architecture, Prepare PHPCS fixes, + │ output complex logic, task files boilerplate, + │ code review, PRs bulk refactors, + │ test generation + └──────────────────────────────────────────────────────────── +``` + +### Daily Cycle + +**Morning (09:00)** — Review Qwen's overnight output: code changes, test results, PHPCS fixes. Accept or reject changes, note issues for the day's work. + +**Day (09:00-17:00)** — Spec work with Claude: clarify requirements, write OpenSpec artifacts (`/opsx-ff`, `/opsx-new` → `/opsx-continue`), design architecture, solve hard problems, review PRs. Claude handles the thinking. + +**Evening (17:00)** — Hand off to Qwen: prepare self-contained task files (e.g., `qwen-phpcs-task.md`) with specific, mechanical work. Start Qwen batch and leave overnight. + +### Division of Labor + +| | Claude (Day) | Qwen (Night) | +|---|---|---| +| **Strengths** | Reasoning, architecture, specs, multi-file design | Mechanical fixes, repetitive changes, bulk ops | +| **Speed** | ~3s/response (cloud API) | ~2min/response (local 14b on 8GB VRAM) | +| **Cost** | API tokens (Max plan) | Free (local GPU) | +| **Best for** | Complex logic, code review, client deliverables | PHPCS fixes, boilerplate, test scaffolding | + +### Task File Format + +Qwen works best with narrow, explicit task files. Example: + +```markdown +# Task: Fix PHPCS Named Parameter Errors + +Working directory: `/path/to/app` + +## Files to fix +1. `lib/Controller/FooController.php` (3 errors) +2. `lib/Service/BarService.php` (1 error) + +## How to fix +Find function calls without named parameters. Look up the method signature +and add the parameter name: +- BEFORE: `$this->setName('value')` where signature is `setName(string $name)` +- AFTER: `$this->setName(name: 'value')` + +## Verification +Run: `./vendor/bin/phpcs --standard=phpcs.xml ` +Expected: 0 errors +``` + +### Running Qwen Overnight + +```bash +# Terminal 1 — start Qwen with Claude Code CLI +ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama \ + claude --model qwen3:14b + +# Then paste or reference the task file +``` + +> **Requires `qwen3:14b` or larger** for tool calling (file editing, shell commands). See [Step 2](#step-2-pull-a-qwen-model) for details. + +> **Known limitation:** Tool calling via CLI is unreliable with local models when system prompts are large. For now, Qwen works best on tasks where it can output code changes as text that you review and apply manually in the morning. diff --git a/docs/claude/parallel-agents.md b/docs/claude/parallel-agents.md index 63080d8..22e1dcf 100644 --- a/docs/claude/parallel-agents.md +++ b/docs/claude/parallel-agents.md @@ -64,7 +64,7 @@ These files are sent with **every single API call** in the workspace. In a paral **Rules:** - **CLAUDE.md**: Only include instructions Claude needs on every task. Move niche/infrequent knowledge to separate files in `.claude/docs/` that can be read on demand. - **MEMORY.md**: This is an index only — one line per memory file with a brief description. Never write memory content directly into MEMORY.md. -- **Persona files** (`.claude/personas/*.md`): These are only loaded when a sub-agent explicitly reads them — they don't auto-load. Keep them focused, but they don't need to be ultra-short. +- **Persona files** (`personas/*.md`): These are only loaded when a sub-agent explicitly reads them — they don't auto-load. Keep them focused, but they don't need to be ultra-short. ## Two Kinds of Token Limits @@ -127,10 +127,10 @@ The [usage tracker](../../usage-tracker/README.md) lets you watch your token con ```bash # One-line status check -python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar +python3 usage-tracker/claude-usage-tracker.py --status-bar # Live monitoring (30s refresh) -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor +python3 usage-tracker/claude-usage-tracker.py --monitor ``` -The tracker reads Claude Code's session files (`~/.claude/projects/`) directly and is accurate for API token counts. The **limit thresholds** are approximate — verify your real cap at [claude.ai/settings/usage](https://claude.ai/settings/usage). Setup instructions: [`.claude/usage-tracker/SETUP.md`](../../usage-tracker/SETUP.md). +The tracker reads Claude Code's session files (`~/.claude/projects/`) directly and is accurate for API token counts. The **limit thresholds** are approximate — verify your real cap at [claude.ai/settings/usage](https://claude.ai/settings/usage). Setup instructions: [`usage-tracker/SETUP.md`](../../usage-tracker/SETUP.md). diff --git a/docs/claude/playwright-setup.md b/docs/claude/playwright-setup.md new file mode 100644 index 0000000..c271c08 --- /dev/null +++ b/docs/claude/playwright-setup.md @@ -0,0 +1,74 @@ +# Playwright MCP Browser Setup + +Each project workspace configures its own Playwright MCP browser sessions in a `.mcp.json` file at the project root. The Nextcloud workspace uses 7 browsers; other projects (e.g., [wordpress-docker](https://github.com/ConductionNL/wordpress-docker)) may use fewer. An [example .mcp.json](./examples/.mcp.json.example) with the 7-browser configuration is available as a starting point. + +## Browser Pool (Nextcloud workspace) + +| Server | Mode | Purpose | +|--------|------|---------| +| `browser-1` | Headless | Main agent (default) | +| `browser-2` | Headless | Sub-agent / parallel | +| `browser-3` | Headless | Sub-agent / parallel | +| `browser-4` | Headless | Sub-agent / parallel | +| `browser-5` | Headless | Sub-agent / parallel | +| `browser-6` | **Headed** | User observation (visible window) | +| `browser-7` | Headless | Sub-agent / parallel | + +## VS Code Extension Setup + +The VS Code extension loads MCP servers from `.mcp.json` in the **project root** (this file lives in each project repo, not in the `.github` documentation repo). The file defines 7 browser instances. Browsers 1–5 and 7 are headless; browser-6 is headed (omits `--headless`) so the browser window is visible when you want to watch: + +```json +{ + "mcpServers": { + "browser-1": { "command": "npx", "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] }, + "browser-2": { "command": "npx", "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] }, + "browser-3": { "command": "npx", "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] }, + "browser-4": { "command": "npx", "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] }, + "browser-5": { "command": "npx", "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] }, + "browser-6": { "command": "npx", "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--isolated"] }, + "browser-7": { "command": "npx", "args": ["-y", "@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"] } + } +} +``` + +The project's shared `.claude/settings.json` has two pre-approval entries: + +- **`"enableAllProjectMcpServers": true`** — auto-approves all servers from `.mcp.json` without prompting on each reload. +- **All `mcp__browser-*` tool calls** — pre-approved for all 7 browsers so that parallel sub-agents (used by `/test-app` Full mode and `/test-counsel`) can use their assigned browser without needing an interactive permission prompt. Without this, background agents are silently denied and no testing occurs. + +Then **reload the VS Code window**: `Ctrl+Shift+P` → type `reload window` → Enter. + +## Verification + +After reload, open the MCP servers panel to verify all 7 browsers show **Connected**. You can do this two ways: +- Type `/MCP servers` in the Claude Code chat input +- Or `Ctrl+Shift+P` → search **"MCP servers"** + +![MCP servers panel showing all 7 browser instances connected](./img/mcp-servers-connected.png) + +If any server shows an error, check the output panel: `Ctrl+Shift+P` → **"Output: Focus on Output"** → select **"Claude VSCode"** from the dropdown. + +## CLI Alternative (terminal only) + +For the Claude Code CLI (`claude` terminal command, not VS Code), you can start servers as HTTP endpoints on fixed ports and reference them via URL: + +```bash +# Start headless browsers +for port in 9221 9222 9223 9224 9225 9227; do + npx -y @playwright/mcp@latest --headless --isolated --port $port & +done + +# Start headed browser +npx -y @playwright/mcp@latest --isolated --port 9226 & +``` + +> This is **not needed for VS Code** — the extension manages server processes automatically via `.mcp.json`. Only use this approach if you're running `claude` from the terminal without VS Code. + +## Usage Rules + +1. **Default**: Use `browser-1` for normal work +2. **Parallel agents**: Assign sub-agents `browser-2` through `browser-5` and `browser-7` +3. **User watching**: Switch to `browser-6` when the user wants to observe +4. **Fallback**: If a browser errors, try the next numbered browser +5. **Keep `browser-6` reserved**: Only for explicit user observation diff --git a/docs/claude/walkthrough.md b/docs/claude/walkthrough.md index 8c11c61..cdc271d 100644 --- a/docs/claude/walkthrough.md +++ b/docs/claude/walkthrough.md @@ -259,7 +259,7 @@ At this point, **you read through all four files** and make adjustments: - Is the task breakdown granular enough? - Are the spec keywords right (MUST vs SHOULD)? -Edit the files directly if needed. Once you're satisfied, move to phase 2. +Edit the files directly if needed. Once you're satisfied, move to Phase 2. ## Phase 2: Plan to GitHub Issues @@ -289,8 +289,6 @@ Now you can see progress on GitHub! Open the tracking issue to see the full chec /opsx-apply ``` -> **Note:** `/opsx-ralph-start` is not yet implemented. Use `/opsx-apply` — it reads `plan.json` and handles GitHub Issues sync. - ### Iteration 1: SearchService Claude reads plan.json, finds Task 1 (pending), reads the spec section, and implements: @@ -368,8 +366,6 @@ Claude reads plan.json, finds Task 1 (pending), reads the spec section, and impl /opsx-verify ``` -> **Note:** `/opsx-ralph-review` is not yet implemented. Use `/opsx-verify` — it checks every spec requirement against the implementation and supports GitHub Issues sync via `plan.json`. - Claude loads ALL the specs and checks every requirement: > **Review: add-publication-search** diff --git a/docs/claude/workflow.md b/docs/claude/workflow.md index 18550a6..2555fc4 100644 --- a/docs/claude/workflow.md +++ b/docs/claude/workflow.md @@ -7,22 +7,23 @@ _This is the **architecture reference** — see [Getting Started](./getting-star This workspace uses a spec-driven development workflow that combines: - **OpenSpec** — Structured specifications alongside code - **GitHub Issues** — Visual progress tracking via kanban boards -- **Ralph Wiggum loops** — Focused, low-context AI coding iterations - **Spec verification** — Automated review of code against specifications The key insight: **specs are written once, then broken into small JSON tasks** that each point back to a specific spec section. This means AI coding loops can work with minimal context (just the task + its spec ref) instead of loading entire spec documents. ## Architecture -All specs and changes live in their **primary app repository** (submodule). There is no root `openspec/` directory. Workflow docs and skills live in `.claude/` (`claude-code-config` repo). +All specs and changes live in their **primary app repository** (submodule). Skills and shared config (schemas, company-wide ADRs) live in the `hydra` repo inside apps-extra. Developer documentation lives in the `.github` repo (`~/.github`). ``` apps-extra/ # Workspace root ├── project.md # Generic guidelines (all projects) -├── .claude/ # Claude Code config (company-wide repo) -│ ├── CLAUDE.md # Workflow instructions -│ ├── skills/ # OpenSpec skills (opsx-new, opsx-ff, etc.) -│ └── docs/ # This documentation +├── hydra/ # Automation, skills & shared config +│ ├── .claude/ +│ │ └── skills/ # OpenSpec skills (opsx-new, opsx-ff, etc.) +│ └── openspec/ +│ ├── architecture/ # Company-wide ADRs (ADR-001 through ADR-015) +│ └── schemas/conduction/ # Shared workflow schema │ ├── openregister/ # FOUNDATION REPO │ ├── project.md # Project description & context @@ -134,14 +135,12 @@ Start the focused implementation loop: /opsx-apply ``` -> **Note:** `/opsx-ralph-start` is a planned dedicated implementation loop with minimal-context loading and deeper GitHub Issues integration — not yet implemented. Use `/opsx-apply` for now; it already reads `plan.json` and supports GitHub Issues sync when a `plan.json` exists. - **Automated alternative — `/opsx-apply-loop` (experimental):** Runs Phases 3 → 4 → 5 in one hands-off command inside an isolated Docker container: ``` -/opsx-apply-loop procest add-sla-tracking +/opsx-apply-loop project add-sla-tracking /opsx-apply-loop # asks which app + change ``` @@ -174,8 +173,6 @@ After all tasks are complete, verify the implementation: /opsx-verify ``` -> **Note:** `/opsx-ralph-review` is a planned dedicated review command that will cross-reference shared specs and create GitHub Issues for findings — not yet implemented. Use `/opsx-verify` for now; it already supports GitHub Issues sync via `plan.json` when present. - This command: 1. Reads ALL spec requirements (ADDED/MODIFIED/REMOVED) 2. Checks each against the actual implementation @@ -251,15 +248,15 @@ See [writing-specs.md](writing-specs.md) for the complete guide — RFC 2119 key | `/opsx-ff` | Spec | Fast-forward all artifacts | | `/opsx-continue` | Spec | Create next artifact | | `/opsx-plan-to-issues` | Plan | Tasks → JSON + GitHub Issues | -| `/opsx-apply` | Implement | Implement tasks from plan.json (use this; `/opsx-ralph-start` not yet built) | -| `/opsx-verify` | Review | Verify implementation against specs (use this; `/opsx-ralph-review` not yet built) | +| `/opsx-apply` | Implement | Implement tasks from plan.json | +| `/opsx-verify` | Review | Verify implementation against specs | | `/opsx-archive` | Archive | Complete and preserve change | ## Tips - **Start small**: Try the flow on a small feature first to build muscle memory - **Review specs before coding**: The spec review is the most valuable step — catch issues before writing code -- **Keep tasks small**: Each task should be completable in one Ralph Wiggum iteration (15-30 min of focused work) +- **Keep tasks small**: Each task should be completable in one focused iteration (15-30 min of work) - **Use shared specs**: Reference cross-project specs in your delta specs to avoid reinventing patterns - **Trust the JSON**: The plan.json is your source of truth during implementation — it survives context window resets - **GitHub is your dashboard**: Use GitHub Projects to visualize progress across multiple changes and projects diff --git a/docs/claude/writing-docs.md b/docs/claude/writing-docs.md index 2146095..5c9bf83 100644 --- a/docs/claude/writing-docs.md +++ b/docs/claude/writing-docs.md @@ -61,7 +61,7 @@ When content is duplicated, it will eventually diverge. One copy gets updated; t | Project vision and phases | `openspec/ROADMAP.md` (if present) | | Target audience and personas | `openspec/audience.md` (if present) | | Architectural decisions (why) | `openspec/architecture/adr-{NNN}-*.md` | -| Architecture decisions index | `.claude/openspec/architecture/README.md` | +| Architecture decisions index | `hydra/openspec/architecture/README.md` | | Technical decisions and constraints | `openspec/architecture/` ADRs | | **Standards & Patterns** | | | NL Design System and UI standards | `openspec/specs/{domain}/spec.md` (app-specific) or company ADR-003 | @@ -73,21 +73,21 @@ When content is duplicated, it will eventually diverge. One copy gets updated; t | App administrator procedures | `docs/admin-guide.md` (if present) | | Developer setup and environment | `README.md` | | Available `make` commands and scripts | workspace root `Makefile` | -| Developer workflow and commands | `.claude/docs/commands.md`, `.claude/docs/workflow.md` | -| Testing conventions and persona usage | `.claude/docs/testing.md` | -| Docker environment and setup | `.claude/docs/docker.md`, `.claude/docs/getting-started.md` | -| Frontend standards | `.claude/docs/frontend-standards.md` | +| Developer workflow and commands | `.github/docs/claude/commands.md`, `.github/docs/claude/workflow.md` | +| Testing conventions and persona usage | `.github/docs/claude/testing.md` | +| Docker environment and setup | `.github/docs/claude/docker.md`, `.github/docs/claude/getting-started.md`| +| Frontend standards | `.github/docs/claude/frontend-standards.md` | | Standards compliance references | `docs/features/README.md` (GEMMA, ZGW, Forum Standaardisatie) | | **Testing** | | -| Persona testing behavior and scripts | `.claude/personas/` | +| Persona testing behavior and scripts | `hydra/personas/` | | Reusable test scenarios (Gherkin) | `test-scenarios/TS-*.md` | | **Meta** | | -| Spec and doc writing conventions | `.claude/docs/writing-specs.md`, `.claude/docs/writing-docs.md` | -| OpenSpec schema and artifact templates | `.claude/openspec/schemas/conduction/schema.yaml`, `templates/` | -| Parallel agent conventions | `.claude/docs/parallel-agents.md` | -| Claude harness configuration (permissions, hooks, env vars) | `.claude/global-settings/settings.json` | -| Global Claude settings guide | `.claude/docs/global-claude-settings.md` | -| Claude usage tracking documentation | `.claude/usage-tracker/README.md` | +| Spec and doc writing conventions | `.github/docs/claude/writing-specs.md`, `.github/docs/claude/writing-docs.md` | +| OpenSpec schema and artifact templates | `openspec/schemas/conduction/schema.yaml`, `templates/` | +| Parallel agent conventions | `.github/docs/claude/parallel-agents.md` | +| Claude harness configuration (permissions, hooks, env vars) | `.github/global-settings/settings.json` | +| Global Claude settings guide | `.github/docs/claude/global-claude-settings.md` | +| Claude usage tracking documentation | `.github/usage-tracker/README.md` | --- @@ -100,10 +100,10 @@ Each document has one target audience. Don't mix them. | End users / citizens | `docs/` feature docs | Plain language, no jargon, task-oriented | | App administrator | `docs/admin-guide.md` (if present) | Task-oriented, step-by-step | | Developer (setup, environment) | `README.md` | Technical, precise | -| Claude / spec workflow | `.claude/docs/`, `.claude/skills/` | Instruction-style, precise — Claude reads this at runtime | +| Claude / spec workflow | `.github/docs/`, `hydra/.claude/skills/` | Instruction-style, precise — Claude reads this at runtime | | Spec / requirements | `openspec/specs/` | RFC 2119, Gherkin — see [writing-specs.md](writing-specs.md) | | Architectural decisions (why) | `openspec/architecture/` | ADR format — context, decision, consequences; written for future developers | -| Claude test agents (persona testers) | `.claude/personas/` | Persona cards — behavior, goals, device preference; loaded by test commands at runtime | +| Claude test agents (persona testers) | `hydra/personas/` | Persona cards — behavior, goals, device preference; loaded by test commands at runtime | | Claude test agents (scenario execution) | `test-scenarios/` | Gherkin-style test scenarios; loaded by `/test-scenario-run` | **Developer/technical content does not belong in `docs/`.** If you find implementation details, class names, or spec requirements in a user-facing guide, replace them with plain-language descriptions or links to the spec. @@ -112,7 +112,7 @@ Each document has one target audience. Don't mix them. ## Language -**All documentation is written in English** — `docs/`, `.claude/docs/`, `openspec/`, `README.md`. +**All documentation is written in English** — `docs/`, `.github/docs/`, `openspec/`, `README.md`. **Filenames** also MUST be English — `user-guide.md`, not `handleiding.md`. @@ -124,13 +124,13 @@ Each document has one target audience. Don't mix them. Use this when you're not sure which file to write new content into. These rules cover the most common cases without needing to cross-reference both tables above. -1. **Is it _why_ a decision was made?** → `.claude/openspec/architecture/adr-{NNN}-*.md` -2. **Is it _what must be true_ (a requirement, acceptance criterion, or constraint)?** → `.claude/openspec/specs/{domain}/spec.md` (or per-project `openspec/specs/{domain}/spec.md`) +1. **Is it _why_ a decision was made?** → `hydra/openspec/architecture/adr-{NNN}-*.md` (company-wide) or `{app}/openspec/architecture/` (app-specific) +2. **Is it _what must be true_ (a requirement, acceptance criterion, or constraint)?** → `hydra/openspec/specs/{domain}/spec.md` (company-wide) or `{app}/openspec/specs/{domain}/spec.md` (app-specific) 3. **Is it instructions for an _end user or citizen_ using an app?** → `docs/` feature docs for that app 4. **Is it instructions for an _app administrator_?** → `docs/admin-guide.md` (if present in that app) 5. **Is it _developer setup_ or environment instructions?** → `README.md` -6. **Is it instructions for _Claude_ at runtime (workflow, testing, commands, spec writing)?** → `.claude/docs/` -7. **Is it about _project direction, phase goals, or technical strategy_?** → `.claude/openspec/ROADMAP.md` (if present) +6. **Is it instructions for _Claude_ at runtime (workflow, testing, commands, spec writing)?** → `.github/docs/` +7. **Is it about _project direction, phase goals, or technical strategy_?** → `hydra/openspec/ROADMAP.md` (if present) 8. **Is it _standards compliance_ information (GEMMA, ZGW, Forum Standaardisatie)?** → `docs/features/README.md` 9. **Is it a reusable _test flow_ (Given/When/Then)?** → `test-scenarios/TS-*.md` @@ -155,7 +155,7 @@ Users will be able to export publications to PDF format. ``` **Adding the marker:** -- Only use `[Future]` in `docs/` files — not in specs or `.claude/docs/` +- Only use `[Future]` in `docs/` files — not in specs or `.github/docs/` - Only mark features on the active roadmap. Don't document speculative or far-future items — if you don't know when they'll ship, don't document them yet - Write the section body in future tense: "Users will be able to..." @@ -304,10 +304,10 @@ Use screenshots to illustrate UI steps that are genuinely hard to describe in te | Purpose | Location | Committed? | |----------------------------------------------------------|--------------------------------------------|------------| | Documentation screenshots for `docs/` guides | `docs/images/` | Yes — commit alongside the doc | -| Documentation screenshots for `.claude/docs/` | `.claude/docs/images/` | Yes — commit alongside the doc | +| Documentation screenshots for `.github/docs/` | `.github/docs/images/` | Yes — commit alongside the doc | | Automated test screenshots (browser tests) | `{app}/test-results/` | **No** — gitignored | -The `docs/images/` and `.claude/docs/images/` directories do not exist yet — create them when you add the first image. +The `docs/images/` and `.github/docs/images/` directories do not exist yet — create them when you add the first image. **The gitignore boundary:** @@ -340,7 +340,7 @@ Always use relative paths and write meaningful alt text: ``` - `docs/` docs: path is relative to the doc file, so `images/filename.png` resolves to `docs/images/filename.png` -- `.claude/docs/` docs: same pattern — `images/filename.png` resolves to `.claude/docs/images/filename.png` +- `.github/docs/` docs: same pattern — `images/filename.png` resolves to `.github/docs/images/filename.png` - Never use absolute paths (see [Link Structure](#link-structure)) **Keeping screenshots current:** @@ -440,8 +440,8 @@ When reading or reviewing documentation, certain patterns are signals to stop an | Links to other docs | Whether the linked file and section still exist | | Phase references ("In Phase 1", "POC phase") | `openspec/ROADMAP.md` to see if the phase has advanced | | App or tool names ("OpenCatalogi", "OpenConnector") | App install scripts or `apps-extra/` to confirm still active | -| Persona names | `.claude/personas/` to confirm the persona still exists | -| Command names (`/opsx-archive`, `make reset`) | `.claude/skills/` or `Makefile` to confirm still valid | +| Persona names | `hydra/personas/` to confirm the persona still exists | +| Command names (`/opsx-archive`, `make reset`) | `hydra/.claude/skills/` or `Makefile` to confirm still valid | | Table of Contents entries | Whether each linked section still exists with the same heading | | "See [document title]" cross-references | Whether the referenced doc still has the described content | | Screenshot references (`![alt](images/...)`) | Whether the file exists AND whether the UI has changed since the screenshot was taken | @@ -473,7 +473,7 @@ Update a section when the underlying facts changed but the content's purpose and Move content when it is in the right state but the wrong place — usually because the audience changed or the project structure evolved: - Technical steps that ended up in a user-facing guide → move to `README.md` or a developer doc -- A section in a user guide that only makes sense to a developer → move to `.claude/docs/` or `README.md` +- A section in a user guide that only makes sense to a developer → move to `.github/docs/` or `README.md` - A spec requirement copy-pasted into a guide → replace with a link, remove the copy When moving, always replace the old location with a short link to the new one. Never just delete without redirecting. diff --git a/docs/claude/writing-specs.md b/docs/claude/writing-specs.md index 77fe5a3..268f11a 100644 --- a/docs/claude/writing-specs.md +++ b/docs/claude/writing-specs.md @@ -49,7 +49,7 @@ Every spec file at `openspec/specs/{domain}/spec.md` follows this structure: | Field | Required | Notes | |-------|----------|-------| | `**Status**` | Yes | `idea` → `planned` → `in-progress` → `done` | -| `**Scope**` | Yes | `company-wide` (in `.claude/openspec/specs/`) or app name (in `{app}/openspec/specs/`) | +| `**Scope**` | Yes | `company-wide` (in `hydra/openspec/specs/`) or app name (in `{app}/openspec/specs/`) | | `**OpenSpec changes**` | Yes | Vertical list, one entry per line, oldest first. `_(none yet)_` until first change created. Archived entries include `_(archived YYYY-MM-DD)_`. See [Grouping rule](#openspec-changes-list-format) below. | | `## Non-Functional Requirements` | Yes | Always present, even if minimal | | `## Acceptance Criteria` | Yes | Placeholder OK for `idea` status; fill in before moving to `planned` | @@ -245,7 +245,7 @@ See shared spec: `api-patterns/spec.md#requirement-url-structure` for URL conven See shared spec: `api-patterns/spec.md#requirement-cors-support` for CORS requirements. ``` -Shared specs live in `.claude/openspec/specs/` (company-wide, maintained by Conduction). Check that directory for currently available shared specs — the list evolves as new cross-app specs are added. Company-wide architectural decisions (NL Design System, API conventions, security, i18n) are captured in ADRs under `.claude/openspec/architecture/`. +Shared specs live in `hydra/openspec/specs/` (company-wide, maintained by Conduction). Check that directory for currently available shared specs — the list evolves as new cross-app specs are added. Company-wide architectural decisions (NL Design System, API conventions, security, i18n) are captured in ADRs under `hydra/openspec/architecture/`. ## Organizing Specs diff --git a/global-settings/README.md b/global-settings/README.md index 3c09506..3a0c8e9 100644 --- a/global-settings/README.md +++ b/global-settings/README.md @@ -12,6 +12,8 @@ Current version: see [`VERSION`](VERSION) | `block-write-commands.sh` | `~/.claude/hooks/block-write-commands.sh` | Guards write operations, prompts for approval | | `check-settings-version.sh` | `~/.claude/hooks/check-settings-version.sh` | Warns at session start if settings are outdated | | `VERSION` | `~/.claude/settings-version` | Installed version tracker (semver) | +| `settings-repo-url.example` | `~/.claude/settings-repo-url` | GitHub repo slug for online version checking | +| `settings-repo-ref.example` | `~/.claude/settings-repo-ref` | Branch/tag/SHA to track (defaults to `main` when absent) | ## Install @@ -31,11 +33,13 @@ cp "$REPO_ROOT/global-settings/VERSION" ~/.claude/settings-version echo "$REPO_ROOT" > ~/.claude/settings-repo-path # Online version checking via GitHub API (recommended — no local repo required): -echo "ConductionNL/.github" > ~/.claude/settings-repo-url +cp "$REPO_ROOT/global-settings/settings-repo-url.example" ~/.claude/settings-repo-url # Optional: track a branch other than main (tag or SHA also accepted). # Defaults to "main" when this file is absent. -# echo "feature/claude-code-tooling" > ~/.claude/settings-repo-ref +# To track a specific branch, copy and edit: +# cp "$REPO_ROOT/global-settings/settings-repo-ref.example" ~/.claude/settings-repo-ref +# echo "feature/your-branch" > ~/.claude/settings-repo-ref ``` Restart Claude Code after installing. Requires `jq`, `md5sum`, and `gh` (GitHub CLI) on `PATH`. diff --git a/global-settings/settings-repo-ref.example b/global-settings/settings-repo-ref.example new file mode 100644 index 0000000..ba2906d --- /dev/null +++ b/global-settings/settings-repo-ref.example @@ -0,0 +1 @@ +main diff --git a/global-settings/settings-repo-url.example b/global-settings/settings-repo-url.example new file mode 100644 index 0000000..0f01d48 --- /dev/null +++ b/global-settings/settings-repo-url.example @@ -0,0 +1 @@ +ConductionNL/.github diff --git a/usage-tracker/MODELS.md b/usage-tracker/MODELS.md index 0bb2ad6..89fa8a8 100644 --- a/usage-tracker/MODELS.md +++ b/usage-tracker/MODELS.md @@ -28,13 +28,13 @@ Track **Haiku, Sonnet, and Opus** simultaneously with separate usage monitoring. ```bash # Track Sonnet (default) -python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar +python3 usage-tracker/claude-usage-tracker.py --status-bar # Track Haiku (1.2M session / 6M weekly) -python3 .claude/usage-tracker/claude-usage-tracker.py --model haiku --status-bar +python3 usage-tracker/claude-usage-tracker.py --model haiku --status-bar # Track Opus (200K session / 1M weekly) -python3 .claude/usage-tracker/claude-usage-tracker.py --model opus --status-bar +python3 usage-tracker/claude-usage-tracker.py --model opus --status-bar ``` --- @@ -45,13 +45,13 @@ Open 3 VS Code terminal tabs and run one per tab: ```bash # Tab 1: Sonnet -python3 .claude/usage-tracker/claude-usage-tracker.py --model sonnet --monitor --interval 300 +python3 usage-tracker/claude-usage-tracker.py --model sonnet --monitor --interval 300 # Tab 2: Haiku -python3 .claude/usage-tracker/claude-usage-tracker.py --model haiku --monitor --interval 300 +python3 usage-tracker/claude-usage-tracker.py --model haiku --monitor --interval 300 # Tab 3: Opus -python3 .claude/usage-tracker/claude-usage-tracker.py --model opus --monitor --interval 300 +python3 usage-tracker/claude-usage-tracker.py --model opus --monitor --interval 300 ``` --- @@ -59,9 +59,9 @@ python3 .claude/usage-tracker/claude-usage-tracker.py --model opus --monitor --i ## Full Reports for Each Model ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py # Sonnet -python3 .claude/usage-tracker/claude-usage-tracker.py --model haiku # Haiku -python3 .claude/usage-tracker/claude-usage-tracker.py --model opus # Opus +python3 usage-tracker/claude-usage-tracker.py # Sonnet +python3 usage-tracker/claude-usage-tracker.py --model haiku # Haiku +python3 usage-tracker/claude-usage-tracker.py --model opus # Opus ``` --- @@ -73,7 +73,7 @@ Check all models before starting work: ```bash for model in haiku sonnet opus; do echo "$model:" - python3 .claude/usage-tracker/claude-usage-tracker.py --model $model --status-bar + python3 usage-tracker/claude-usage-tracker.py --model $model --status-bar done ``` @@ -81,7 +81,7 @@ End-of-session summary across all models: ```bash for model in haiku sonnet opus; do - echo "=== $model ===" && python3 .claude/usage-tracker/claude-usage-tracker.py --model $model + echo "=== $model ===" && python3 usage-tracker/claude-usage-tracker.py --model $model done ``` @@ -105,9 +105,9 @@ Both yellow = both at the same limit percentage, even though token counts differ Add to `~/.bashrc` or `~/.zshrc`: ```bash -alias claude-haiku="python3 /path/to/project/.claude/usage-tracker/claude-usage-tracker.py --model haiku --status-bar" -alias claude-sonnet="python3 /path/to/project/.claude/usage-tracker/claude-usage-tracker.py --status-bar" -alias claude-opus="python3 /path/to/project/.claude/usage-tracker/claude-usage-tracker.py --model opus --status-bar" +alias claude-haiku="python3 /path/to/project/usage-tracker/claude-usage-tracker.py --model haiku --status-bar" +alias claude-sonnet="python3 /path/to/project/usage-tracker/claude-usage-tracker.py --status-bar" +alias claude-opus="python3 /path/to/project/usage-tracker/claude-usage-tracker.py --model opus --status-bar" ``` --- @@ -123,11 +123,11 @@ Yes — the tracker reads JSONL files that Claude Code writes persistently to `~ Use the built-in combined view: ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar --all-models +python3 usage-tracker/claude-usage-tracker.py --status-bar --all-models ``` Or for a full report across all models at once: ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py --all-models +python3 usage-tracker/claude-usage-tracker.py --all-models ``` diff --git a/usage-tracker/QUICKSTART.md b/usage-tracker/QUICKSTART.md index 317e076..d7a69b4 100644 --- a/usage-tracker/QUICKSTART.md +++ b/usage-tracker/QUICKSTART.md @@ -6,10 +6,10 @@ Run from the project root: ```bash # 1. Run installer -bash .claude/usage-tracker/install.sh +bash usage-tracker/install.sh # 2. Test it -python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar +python3 usage-tracker/claude-usage-tracker.py --status-bar ``` That's it. No verbose logging to enable — the tracker reads Claude Code's session files in `~/.claude/projects/` directly. @@ -22,50 +22,50 @@ All commands run from the project root: ### Status (one-line) ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar # Sonnet only -python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar --all-models # All three models -python3 .claude/usage-tracker/claude-usage-tracker.py --model haiku --status-bar # Haiku only +python3 usage-tracker/claude-usage-tracker.py --status-bar # Sonnet only +python3 usage-tracker/claude-usage-tracker.py --status-bar --all-models # All three models +python3 usage-tracker/claude-usage-tracker.py --model haiku --status-bar # Haiku only ``` ### Full Report ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py # Sonnet -python3 .claude/usage-tracker/claude-usage-tracker.py --all-models # All models +python3 usage-tracker/claude-usage-tracker.py # Sonnet +python3 usage-tracker/claude-usage-tracker.py --all-models # All models ``` ### Continuous Monitoring ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor # Sonnet, 5 min -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor --all-models # All models -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor --interval 300 # 5 min refresh (default) -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor --interval 30 # 30s refresh -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor --all-models --interval 300 -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor --all-models --active-only # Hide idle models +python3 usage-tracker/claude-usage-tracker.py --monitor # Sonnet, 5 min +python3 usage-tracker/claude-usage-tracker.py --monitor --all-models # All models +python3 usage-tracker/claude-usage-tracker.py --monitor --interval 300 # 5 min refresh (default) +python3 usage-tracker/claude-usage-tracker.py --monitor --interval 30 # 30s refresh +python3 usage-tracker/claude-usage-tracker.py --monitor --all-models --interval 300 +python3 usage-tracker/claude-usage-tracker.py --monitor --all-models --active-only # Hide idle models ``` ### Check / Update Limits ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py --limits # show current limits +python3 usage-tracker/claude-usage-tracker.py --limits # show current limits # Edit limits.json to set your real plan limits (copy from limits.example.json first) ``` ### Calibrate Session Reset Time ```bash # When you start a fresh session: -python3 .claude/usage-tracker/claude-usage-tracker.py --mark-session-start +python3 usage-tracker/claude-usage-tracker.py --mark-session-start # When claude.ai/settings/usage shows a known "Resets in X" time, store it: -python3 .claude/usage-tracker/claude-usage-tracker.py --set-session-reset "4h 50m" +python3 usage-tracker/claude-usage-tracker.py --set-session-reset "4h 50m" # Accepts: "4h 50m", "4:50", or plain minutes ("290") # Header shows (calibrated) vs (approx) — running monitor picks up changes within 1 second ``` ### Via Makefile ```bash -make -C .claude/usage-tracker report # View report -make -C .claude/usage-tracker status # Check status -make -C .claude/usage-tracker monitor # Monitor (60s) -make -C .claude/usage-tracker monitor-fast # Monitor (10s) +make -C usage-tracker report # View report +make -C usage-tracker status # Check status +make -C usage-tracker monitor # Monitor (60s) +make -C usage-tracker monitor-fast # Monitor (10s) ``` --- @@ -89,7 +89,7 @@ Run the monitor automatically every time you open the workspace. "type": "shell", "command": "python3", "args": [ - "${workspaceFolder}/.claude/usage-tracker/claude-usage-tracker.py", + "${workspaceFolder}/usage-tracker/claude-usage-tracker.py", "--monitor", "--all-models", "--interval", diff --git a/usage-tracker/README.md b/usage-tracker/README.md index 58fc94c..2507392 100644 --- a/usage-tracker/README.md +++ b/usage-tracker/README.md @@ -29,7 +29,7 @@ The Claude Code extension writes full API responses (including token counts) to ## What's Included ``` -.claude/usage-tracker/ +usage-tracker/ ├── claude-usage-tracker.py ← Main tracker script ├── claude-track.py ← CLI launcher wrapper ├── install.sh ← Installation script @@ -52,20 +52,20 @@ The Claude Code extension writes full API responses (including token counts) to ### 1. Install ```bash -bash .claude/usage-tracker/install.sh +bash usage-tracker/install.sh ``` ### 2. Test ```bash # Quick status -python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar +python3 usage-tracker/claude-usage-tracker.py --status-bar # Full report -python3 .claude/usage-tracker/claude-usage-tracker.py +python3 usage-tracker/claude-usage-tracker.py # Live monitoring -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor +python3 usage-tracker/claude-usage-tracker.py --monitor ``` --- @@ -78,46 +78,46 @@ All commands run from the project root: ```bash # Show one-time report (Sonnet) -python3 .claude/usage-tracker/claude-usage-tracker.py +python3 usage-tracker/claude-usage-tracker.py # Show all three models at once -python3 .claude/usage-tracker/claude-usage-tracker.py --all-models +python3 usage-tracker/claude-usage-tracker.py --all-models # Compact status bar (single model) -python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar +python3 usage-tracker/claude-usage-tracker.py --status-bar # Compact status bar (all models) -python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar --all-models +python3 usage-tracker/claude-usage-tracker.py --status-bar --all-models # Continuous monitoring — one model -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor +python3 usage-tracker/claude-usage-tracker.py --monitor # Continuous monitoring — all models, 30s refresh -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor --all-models --interval 300 +python3 usage-tracker/claude-usage-tracker.py --monitor --all-models --interval 300 # Show configured limits -python3 .claude/usage-tracker/claude-usage-tracker.py --limits +python3 usage-tracker/claude-usage-tracker.py --limits # Hide models with no usage -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor --all-models --active-only +python3 usage-tracker/claude-usage-tracker.py --monitor --all-models --active-only # Track a specific model -python3 .claude/usage-tracker/claude-usage-tracker.py --model haiku --status-bar +python3 usage-tracker/claude-usage-tracker.py --model haiku --status-bar # Calibrate session at start of a new session -python3 .claude/usage-tracker/claude-usage-tracker.py --mark-session-start +python3 usage-tracker/claude-usage-tracker.py --mark-session-start # Calibrate session reset time (when claude.ai/settings/usage shows a known remaining time) -python3 .claude/usage-tracker/claude-usage-tracker.py --set-session-reset "4h 50m" +python3 usage-tracker/claude-usage-tracker.py --set-session-reset "4h 50m" ``` ### Via Make Commands ```bash -make -C .claude/usage-tracker report # Full report -make -C .claude/usage-tracker status # Status bar -make -C .claude/usage-tracker monitor # Monitor (60s) -make -C .claude/usage-tracker monitor-fast # Monitor (10s) -make -C .claude/usage-tracker test # Test setup +make -C usage-tracker report # Full report +make -C usage-tracker status # Status bar +make -C usage-tracker monitor # Monitor (60s) +make -C usage-tracker monitor-fast # Monitor (10s) +make -C usage-tracker test # Test setup ``` --- @@ -126,7 +126,7 @@ make -C .claude/usage-tracker test # Test setup ### Option A: Terminal Panel (Easiest) 1. Open Terminal: `` Ctrl + ` `` -2. Run: `python3 .claude/usage-tracker/claude-usage-tracker.py --monitor` +2. Run: `python3 usage-tracker/claude-usage-tracker.py --monitor` 3. Keep panel open for live updates ### Option B: VS Code Task (Recommended) @@ -219,7 +219,7 @@ Color scale: 🔵 0% · 🟢 >0–50% · 🟡 50–75% · 🟠 75–90% · 🔴 ### Update Your Plan Limits ```bash -cp .claude/usage-tracker/limits.example.json .claude/usage-tracker/limits.json +cp usage-tracker/limits.example.json usage-tracker/limits.json # Edit limits.json with your values — see SETUP.md for full instructions ``` diff --git a/usage-tracker/SETUP.md b/usage-tracker/SETUP.md index 40f9227..74c6507 100644 --- a/usage-tracker/SETUP.md +++ b/usage-tracker/SETUP.md @@ -27,11 +27,11 @@ ls ~/.claude/projects/ # should list project directories after first use ## Step 1: Install Tracker Script ```bash -bash .claude/usage-tracker/install.sh +bash usage-tracker/install.sh ``` This will: -- ✅ Create log storage directory (`.claude/usage-tracker/logs/`, git-ignored) +- ✅ Create log storage directory (`usage-tracker/logs/`, git-ignored) - ✅ Make scripts executable - ✅ Create a symlink at `~/.local/bin/claude-usage-tracker` - ✅ Self-test the configuration @@ -42,13 +42,13 @@ This will: ```bash # Quick one-line status (Sonnet) -python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar +python3 usage-tracker/claude-usage-tracker.py --status-bar # All three models at once -python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar --all-models +python3 usage-tracker/claude-usage-tracker.py --status-bar --all-models # Full report -python3 .claude/usage-tracker/claude-usage-tracker.py +python3 usage-tracker/claude-usage-tracker.py ``` If you see `Today: 0.0%`, that's normal until you make Claude Code API calls today. The weekly total should show data if you've used Claude Code this week. @@ -61,7 +61,7 @@ Output shows `(cfg)` if your limits are configured, or `(est)` if using built-in ```bash # Copy the example file -cp .claude/usage-tracker/limits.example.json .claude/usage-tracker/limits.json +cp usage-tracker/limits.example.json usage-tracker/limits.json # Open and edit the values to match your actual plan ``` @@ -113,7 +113,7 @@ Omit `weekly_reset_day` / `weekly_reset_hour_utc` to fall back to Monday 00:00 U To verify your configuration: ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py --limits +python3 usage-tracker/claude-usage-tracker.py --limits ``` ### Session reset calibration @@ -122,12 +122,12 @@ The session window is a rolling 5-hour window — there's no fixed start time in **At the start of a new session:** ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py --mark-session-start +python3 usage-tracker/claude-usage-tracker.py --mark-session-start ``` **When [claude.ai/settings/usage](https://claude.ai/settings/usage) shows a known remaining time:** ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py --set-session-reset "4h 50m" +python3 usage-tracker/claude-usage-tracker.py --set-session-reset "4h 50m" # Also accepts: "4:50" or plain minutes ("290") ``` @@ -162,7 +162,7 @@ This creates an always-on monitor that displays in a dedicated Terminal panel. "type": "shell", "command": "python3", "args": [ - "${workspaceFolder}/.claude/usage-tracker/claude-usage-tracker.py", + "${workspaceFolder}/usage-tracker/claude-usage-tracker.py", "--monitor", "--all-models", "--interval", @@ -213,15 +213,15 @@ Press `Ctrl+Shift+U` to start the monitor at any time. ### One-time Report ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py # Sonnet only -python3 .claude/usage-tracker/claude-usage-tracker.py --all-models # All three models +python3 usage-tracker/claude-usage-tracker.py # Sonnet only +python3 usage-tracker/claude-usage-tracker.py --all-models # All three models ``` ### Status Bar (Compact, One Line per Model) ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar # Sonnet -python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar --all-models # All models +python3 usage-tracker/claude-usage-tracker.py --status-bar # Sonnet +python3 usage-tracker/claude-usage-tracker.py --status-bar --all-models # All models ``` Output (all models): @@ -240,23 +240,23 @@ First circle = Session · Second circle = Weekly · `(cfg)` = limits.json loaded ### Continuous Monitoring ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor # Sonnet, 5 min -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor --all-models # All models -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor --all-models --interval 300 -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor --all-models --active-only # Hide idle models +python3 usage-tracker/claude-usage-tracker.py --monitor # Sonnet, 5 min +python3 usage-tracker/claude-usage-tracker.py --monitor --all-models # All models +python3 usage-tracker/claude-usage-tracker.py --monitor --all-models --interval 300 +python3 usage-tracker/claude-usage-tracker.py --monitor --all-models --active-only # Hide idle models ``` ### Check Configured Limits ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py --limits +python3 usage-tracker/claude-usage-tracker.py --limits ``` ### Calibrate Session Reset ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py --mark-session-start # new session -python3 .claude/usage-tracker/claude-usage-tracker.py --set-session-reset "4h 50m" # known time remaining +python3 usage-tracker/claude-usage-tracker.py --mark-session-start # new session +python3 usage-tracker/claude-usage-tracker.py --set-session-reset "4h 50m" # known time remaining ``` --- @@ -267,16 +267,16 @@ python3 .claude/usage-tracker/claude-usage-tracker.py --set-session-reset "4h 50 ```bash # Sonnet (default) -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor +python3 usage-tracker/claude-usage-tracker.py --monitor # Haiku -python3 .claude/usage-tracker/claude-usage-tracker.py --model haiku --monitor +python3 usage-tracker/claude-usage-tracker.py --model haiku --monitor # Opus -python3 .claude/usage-tracker/claude-usage-tracker.py --model opus --monitor +python3 usage-tracker/claude-usage-tracker.py --model opus --monitor # All three simultaneously -python3 .claude/usage-tracker/claude-usage-tracker.py --monitor --all-models +python3 usage-tracker/claude-usage-tracker.py --monitor --all-models ``` ### Update Plan Limits @@ -294,7 +294,7 @@ Default limits are approximate **subscription quota** estimates (not model conte If your Claude Code data is in a non-default location: ```bash -python3 .claude/usage-tracker/claude-usage-tracker.py \ +python3 usage-tracker/claude-usage-tracker.py \ --projects-dir /path/to/.claude/projects ``` diff --git a/usage-tracker/claude-track.py b/usage-tracker/claude-track.py index 3a26379..de9074b 100644 --- a/usage-tracker/claude-track.py +++ b/usage-tracker/claude-track.py @@ -14,7 +14,7 @@ def load_tracker(): tracker_path = Path(__file__).parent / "claude-usage-tracker.py" if not tracker_path.exists(): print(f"❌ Tracker script not found at {tracker_path}") - print("Run: bash .claude/usage-tracker/install.sh") + print("Run: bash usage-tracker/install.sh") sys.exit(1) spec = importlib.util.spec_from_file_location("claude_usage_tracker", tracker_path) mod = importlib.util.module_from_spec(spec) @@ -90,7 +90,7 @@ def main(): if not limits_path.exists(): print(" ⚠️ Copy limits.example.json → limits.json and edit values") - print("\n📖 Documentation : .claude/usage-tracker/SETUP.md") + print("\n📖 Documentation : usage-tracker/SETUP.md") print("🆘 Help : claude-track -h") else: diff --git a/usage-tracker/install.sh b/usage-tracker/install.sh index 2618f19..95bce32 100644 --- a/usage-tracker/install.sh +++ b/usage-tracker/install.sh @@ -1,6 +1,6 @@ #!/bin/bash # Claude Usage Tracker - Quick Install Script -# Run from project root: bash .claude/usage-tracker/install.sh +# Run from project root: bash usage-tracker/install.sh set -e @@ -57,13 +57,13 @@ echo "" echo "📚 NEXT STEPS:" echo "" echo "1️⃣ Test the tracker:" -echo " python3 .claude/usage-tracker/claude-usage-tracker.py --status-bar --all-models" +echo " python3 usage-tracker/claude-usage-tracker.py --status-bar --all-models" echo "" echo "2️⃣ Full report:" -echo " python3 .claude/usage-tracker/claude-usage-tracker.py --all-models" +echo " python3 usage-tracker/claude-usage-tracker.py --all-models" echo "" echo "3️⃣ Run continuous monitoring:" -echo " python3 .claude/usage-tracker/claude-usage-tracker.py --monitor --all-models" +echo " python3 usage-tracker/claude-usage-tracker.py --monitor --all-models" echo "" echo "4️⃣ Set up VS Code task (recommended):" echo " • Tasks: Open User Tasks (Ctrl/Cmd + Shift + P)" From d2923659193ed1551308a34762b82223cdddd447 Mon Sep 17 00:00:00 2001 From: WilcoLouwerse Date: Thu, 16 Apr 2026 18:22:10 +0200 Subject: [PATCH 5/5] docs: reorganize command reference and add skill documentation Split monolithic commands.md into domain-specific references (openspec, tender) and add new skill guides for checklists, evals, and patterns. Update testing, workflow, and writing-skills docs. Bump global settings scripts. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/claude/commands-openspec.md | 360 +++++++ docs/claude/commands-tender.md | 170 ++++ docs/claude/commands.md | 1068 ++------------------- docs/claude/skill-checklist.md | 47 + docs/claude/skill-evals.md | 141 +++ docs/claude/skill-patterns.md | 198 ++++ docs/claude/testing.md | 97 +- docs/claude/workflow.md | 24 + docs/claude/writing-skills.md | 413 +------- global-settings/block-write-commands.sh | 72 +- global-settings/check-settings-version.sh | 71 +- 11 files changed, 1246 insertions(+), 1415 deletions(-) create mode 100644 docs/claude/commands-openspec.md create mode 100644 docs/claude/commands-tender.md create mode 100644 docs/claude/skill-checklist.md create mode 100644 docs/claude/skill-evals.md create mode 100644 docs/claude/skill-patterns.md diff --git a/docs/claude/commands-openspec.md b/docs/claude/commands-openspec.md new file mode 100644 index 0000000..5216bca --- /dev/null +++ b/docs/claude/commands-openspec.md @@ -0,0 +1,360 @@ +# OpenSpec Command Reference + +These commands are installed per-project when you run `openspec init`. They're available inside each project directory. + +--- + +### `/opsx-new ` + +**Phase:** Spec Building + +Start a new change. Creates the change directory with metadata. + +**Usage:** +``` +/opsx-new add-publication-search +``` + +**What it creates:** +``` +openspec/changes/add-publication-search/ +└── .openspec.yaml # Change metadata (schema, created date) +``` + +**Tips:** +- Use descriptive kebab-case names: `add-dark-mode`, `fix-cors-headers`, `refactor-object-service` +- The name becomes a GitHub Issue label, so keep it readable + +--- + +### `/opsx-ff` + +**Phase:** Spec Building + +Fast-forward: generates ALL artifacts in dependency order (proposal → specs → design → tasks) in one go. + +**Usage:** +``` +/opsx-ff +``` + +**What it creates:** +``` +openspec/changes/add-publication-search/ +├── .openspec.yaml +├── proposal.md # Why & what +├── specs/ # Delta specs (ADDED/MODIFIED/REMOVED) +│ └── search/ +│ └── spec.md +├── design.md # How (technical approach) +└── tasks.md # Implementation checklist +``` + +**When to use:** When you have a clear idea of what you want to build and want to generate everything quickly for review. + +**When NOT to use:** When you want to iterate on each artifact step by step, getting feedback between each. Use `/opsx-continue` instead. + +**Model:** Asked at run time — the command asks which model to use and spawns a subagent with that model for artifact generation. Artifact quality (specs, design, tasks) directly determines implementation quality downstream. **Sonnet** for most changes. **Opus** for complex or architectural changes where deeper reasoning improves the design. + +--- + +### `/opsx-continue` + +**Phase:** Spec Building + +Creates the next artifact in the dependency chain. Run repeatedly to build specs incrementally. + +**Usage:** +``` +/opsx-continue # Creates proposal.md (first time) +/opsx-continue # Creates specs/ (second time) +/opsx-continue # Creates design.md (third time) +/opsx-continue # Creates tasks.md (fourth time) +``` + +**Dependency chain:** +``` +proposal (root) + ├── discovery (optional — requires: proposal) + ├── contract (optional — requires: proposal) + ├── specs (requires: proposal) + ├── design (requires: proposal) + ├── migration (optional — requires: design) + ├── test-plan (optional — requires: specs) + └── tasks (requires: specs + design) +``` + +**When to use:** When you want to review and refine each artifact before proceeding to the next. + +--- + +### `/opsx-explore` + +**Phase:** Pre-spec + +Think through ideas and investigate the codebase before starting a formal change. No artifacts are created. + +**Usage:** +``` +/opsx-explore +``` + +**When to use:** When you're not sure what approach to take yet and want to investigate first. + +**Comparison with `/app-explore`:** + +| | `/opsx-explore` | `/app-explore` | +|---|---|---| +| **Scope** | Any topic — a change, a bug, an idea | A specific Nextcloud app's configuration | +| **Output** | None — thinking only | Writes to `openspec/app-config.json` | +| **When to use** | Before starting a change (`/opsx-new`) when requirements are unclear | When designing or refining an app's goals, architecture, and features | +| **Phase** | Pre-spec | Design / Configuration | + +Use `/opsx-explore` to think through *what to build*. Use `/app-explore` to document *how an app is designed and configured*. + +**Model:** Checked at run time — stops on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for most exploration sessions. **Opus** recommended — complex analysis, architecture decisions, and strategic thinking benefit from stronger reasoning. + +--- + +### `/opsx-apply` + +**Phase:** Implementation + +OpenSpec's built-in implementation command. Reads `tasks.md` and works through tasks. + +**Usage:** +``` +/opsx-apply +``` + +**Model:** Checked at run time — stops if on Haiku. **Sonnet** for most implementation work. **Opus** for architecturally complex changes. + +--- + +### `/opsx-verify` + +**Phase:** Review + +OpenSpec's built-in verification. Validates implementation against artifacts. + +**Usage:** +``` +/opsx-verify +``` + +**Checks:** +- **Completeness** — All tasks done, all requirements implemented +- **Correctness** — Implementation matches spec intent +- **Coherence** — Design decisions reflected in code +- **Test coverage** — Every new PHP service/controller has a corresponding test file; every new Vue component has a test if the project uses Jest/Vitest +- **Documentation** — New features and API endpoints are described in README.md or docs/ + +**Model:** Checked at run time — stops if on Haiku. **Sonnet** for most verification work. **Opus** for complex or large changes. + +--- + +### `/opsx-sync` + +**Phase:** Archive + +Merges delta specs from the change into the main `openspec/specs/` directory. + +**Usage:** +``` +/opsx-sync +``` + +**What it does:** +- **ADDED** requirements → appended to main spec +- **MODIFIED** requirements → replace existing in main spec +- **REMOVED** requirements → deleted from main spec + +Usually done automatically during archive. + +--- + +### `/sync-docs` + +**Phase:** Maintenance + +Check and sync documentation to reflect the current project state. Two targets: **app docs** (`{app}/docs/`) for a specific Nextcloud app's users and admins, and **dev docs** (`.github/docs/claude/`) for Claude and developers. + +**Usage:** +``` +/sync-docs # prompts for target +/sync-docs app # prompts for which app, then syncs its docs/ +/sync-docs app openregister # sync docs for a specific app +/sync-docs dev # sync developer/Claude docs (.github/docs/claude/) +``` + +Before syncing, runs 4 preliminary checks in parallel — config.yaml rules vs writing-docs.md/writing-specs.md, Sources of Truth accuracy, writing-specs.md vs schema template alignment (`openspec/schemas/conduction/`), and forked schema drift from the upstream `spec-driven` schema. Reports gaps and asks whether to fix before proceeding. + +**App docs mode** (`{app}/docs/`) — checks the app's `README.md` (root), `docs/features/`, `docs/ARCHITECTURE.md`, `docs/FEATURES.md`, `docs/GOVERNMENT-FEATURES.md`, and any other user-facing `.md` files against the app's current specs. Also loads all company-wide ADRs from `hydra/openspec/architecture/` and any app-level ADRs as auditing context (never as link targets in app docs). Flags outdated descriptions, missing features, stale `[Future]` markers (with full removal checklist), broken links, duplicated content, writing anti-patterns, ADR compliance gaps (screenshots, i18n, API conventions), and missing GEMMA/ZGW/Forum Standaardisatie standards references. Never inserts links into `.claude/` paths. Always shows a diff and asks for confirmation before writing. + +**Dev docs mode** (`.github/docs/`) — checks `commands.md`, `workflow.md`, `writing-specs.md`, `writing-docs.md`, `testing.md`, `getting-started.md`, `README.md`, plus the conduction schema (`hydra/openspec/schemas/conduction/schema.yaml`) and its `templates/spec.md` for alignment with `writing-specs.md`. Never changes intent without user confirmation. After syncing, runs a Phase 6 review of all commands and skills for stale references, outdated instructions, and redundant inline content — and asks whether to update them. + +Both modes enforce the [Documentation Principles](writing-docs.md) — duplication and wrong-audience content are flagged as issues, with direct links to the relevant writing-docs.md sections. + +**When to use:** After a significant batch of changes — new commands, archived features, updated specs, or structural changes to the project. + +--- + +### `/opsx-archive` + +**Phase:** Archive + +Complete a change and preserve it for the historical record. + +**Usage:** +``` +/opsx-archive +``` + +**What it does:** +1. Checks artifact and task completion +2. Syncs delta specs into main specs (if not already done) +3. Moves the change to `openspec/changes/archive/YYYY-MM-DD-/` +4. All artifacts are preserved for audit trail +5. Updates or creates `docs/features/.md` — creates it if no matching feature doc exists +6. Updates the feature overview table in `docs/features/README.md` (creates the file if it doesn't exist) +7. Creates or updates `CHANGELOG.md` — completed tasks become versioned entries (version from `app-config.json`); uses [Keep a Changelog](https://keepachangelog.com/) format + +--- + +### `/opsx-bulk-archive` + +**Phase:** Archive + +Archive multiple completed changes at once. + +**Usage:** +``` +/opsx-bulk-archive +``` + +**When to use:** When you have several changes that are all complete and want to clean up. + +--- + +### `/opsx-apply-loop` + +**Phase:** Full Lifecycle (experimental) + +Automated apply→verify loop for a single change in a specific app. Runs the implementation loop inside an isolated Docker container, optionally runs targeted tests on the host, then archives and syncs to GitHub. + +**Usage:** +``` +/opsx-apply-loop # asks which app + change to run +/opsx-apply-loop procest add-sla-tracking # run a specific app/change +/opsx-apply-loop openregister seed-data # run in a different app +``` + +**What it does:** +1. Selects app and change (scans across all apps, or uses provided arguments) +2. Checks for a GitHub tracking issue (runs `/opsx-plan-to-issues` first if missing) +3. Creates a `feature//` branch in the app's git repo +4. Checks the Nextcloud environment is running +5. Reads `test-plan.md` (if present) and classifies which test commands to include in the loop +6. Asks whether to include a test cycle (tests run **outside the container** against the live Nextcloud app) +7. Builds and starts an isolated Docker container — mounts the app directory + shared `.claude/` skills (read-only); no git, no GitHub +8. Inside the container: runs `/opsx-apply` → `/opsx-verify` in a loop (max 5 iterations) + - CRITICAL issues retrigger the loop; WARNING issues also retrigger but never block archive + - At max iterations with only warnings remaining, archive still proceeds + - Seed data (ADR-016) is created/updated during apply as required +9. Captures container logs to `.claude/logs/`, then removes container +10. **If test cycle enabled:** runs targeted single-agent test commands on the host (max 3 test iterations); failures loop back into apply→verify +11. **If test cycle enabled and deferred tests exist:** asks about multi-agent/broad tests from the test-plan that were excluded from the loop; runs them once if confirmed, with one final apply→verify if they fail +12. Runs `/opsx-archive` on the host (after tests pass or tests skipped) +13. Commits all changes in the app repo with a generated commit message +14. Syncs GitHub: updates issue checkboxes, posts a completion comment, prompts to close +15. Asks about test scenario conversion (deferred from archive) +16. Shows a final report with iterations used, tasks completed, and what's next + +**When to use:** When you want hands-off implementation of a single change in one app. Prefer `/opsx-pipeline` for running multiple changes across apps in parallel. + +**Container design:** The container mounts the app directory at `/workspace` and the shared `.claude/` at `/workspace/.claude` (read-only). This gives the container's Claude session access to all shared skills without requiring git or GitHub. Each app is isolated — the container only touches one app directory. + +**Container limitations:** GitHub operations, `docker compose exec`, browser tests, and git commands are not available inside the container — all handled on the host after the container exits. Tests always run on the host against the live Nextcloud environment. + +**Cap impact:** High — runs apply + verify sequentially (up to 5 iterations), optionally followed by targeted tests (up to 3 test iterations). Each iteration is a full implementation + verification pass. + +**Model:** Sonnet recommended for most changes; Opus for complex architectural work. Asked at run time. + +**Requires:** +- Docker running +- `gh` CLI authenticated on the host +- Nextcloud containers up (auto-started if not running — uses `docker compose -f` pointed at the docker-dev root's `.github/docker-compose.yml`) +- **Container authentication** — the Docker container cannot use interactive OAuth, so it needs an explicit token. One of these environment variables must be set in your shell (see [Getting Started — Container authentication](getting-started.md#prerequisites) for full setup instructions): + 1. `CLAUDE_CODE_AUTH_TOKEN` (preferred) — uses your existing Claude Max/Pro subscription at no extra cost. Generate with `claude setup-token`, then `export CLAUDE_CODE_AUTH_TOKEN="..."` in `~/.bashrc`. + 2. `ANTHROPIC_API_KEY` (fallback) — uses prepaid API credits from console.anthropic.com (costs money). `export ANTHROPIC_API_KEY="sk-ant-api03-..."` in `~/.bashrc`. + +--- + +### `/opsx-pipeline` + +**Phase:** Full Lifecycle (experimental) + +Process one or more OpenSpec changes through the full lifecycle in parallel — each change gets its own subagent, git worktree, feature branch, and PR. + +**Usage:** +``` +/opsx-pipeline all # process all open proposals across all repos +/opsx-pipeline procest # all open proposals in one app +/opsx-pipeline sla-tracking routing # specific changes by name +``` + +**What it does:** +1. Discovers open proposals (changes with `proposal.md` but not yet archived) +2. Presents an execution plan and asks for confirmation +3. Creates a git worktree and feature branch per change +4. Launches up to 5 parallel subagents — each runs ff → apply → verify → archive +5. Monitors progress and queues remaining changes as slots free up +6. Creates a PR per completed change to `development` +7. Reports full results including tasks completed, quality checks, and PR links + +**Subagent lifecycle per change:** +``` +ff (artifacts) → plan-to-issues → apply (implement + tests + docs) → verify → archive → push + PR +``` + +**When to use:** When you have multiple open proposals ready to implement and want to run them hands-off. + +**Cap impact:** High — up to 5 agents running full implementations in parallel. Each agent may run for 10-30 minutes depending on change complexity. + +**Model:** Asked at run time with three options: one model for all sub-agents, choose per change, or auto-select by reading each proposal. **Haiku** for simple changes (config, text, minor fixes). **Sonnet** for standard feature work. **Opus** for complex architectural changes. The model applies per implementation sub-agent — choose based on change complexity and available quota. + +**Requires:** `gh` CLI authenticated; quality checks must pass per app (`composer check:strict` / `npm run lint`) + +--- + +### `/opsx-onboard` + +**Phase:** Setup + +Get an overview of the current project's OpenSpec setup and active changes. + +**Usage:** +``` +/opsx-onboard +``` + +--- + +## OpenSpec CLI Commands + +These are terminal commands (not Claude slash commands) for managing specs directly. + +| Command | Description | +|---------|-------------| +| `openspec init --tools claude` | Initialize OpenSpec in a project | +| `openspec list --changes` | List all active changes | +| `openspec list --specs` | List all specs | +| `openspec show ` | View details of a change or spec | +| `openspec status --change ` | Show artifact completion status | +| `openspec validate --all` | Validate all specs and changes | +| `openspec validate --strict` | Strict validation (errors on warnings) | +| `openspec update` | Regenerate AI tool config after CLI upgrade | +| `openspec schema which` | Show which schema is being used | +| `openspec config list` | Show all configuration | + +Add `--json` to any command for machine-readable output. diff --git a/docs/claude/commands-tender.md b/docs/claude/commands-tender.md new file mode 100644 index 0000000..d1f655c --- /dev/null +++ b/docs/claude/commands-tender.md @@ -0,0 +1,170 @@ +# Tender & Ecosystem Intelligence Commands + +These commands support the competitive analysis and ecosystem gap-finding workflow. They operate on the `concurrentie-analyse/intelligence.db` SQLite database and require the database to exist before running. + +--- + +### `/tender-scan` + +**Phase:** Intelligence Gathering + +Scrape TenderNed for new tenders, import them into SQLite, and classify unclassified tenders by software category using a local Qwen model. + +**Usage:** +``` +/tender-scan +``` + +**What it does:** +1. Runs `concurrentie-analyse/tenders/scrape_tenderned.py` to fetch fresh data +2. Imports new tenders into the intelligence database +3. Classifies unclassified tenders using Qwen via `localhost:11434` +4. Reports new tenders found, classified, and any new gaps detected + +**Requires:** Local Qwen model running on Ollama (`http://localhost:11434`) + +--- + +### `/tender-status` + +**Phase:** Intelligence Monitoring + +Show a dashboard of the tender intelligence database — totals by source, category, status, gaps, and recent activity. + +**Usage:** +``` +/tender-status +``` + +**What it does:** +- Queries `concurrentie-analyse/intelligence.db` for live stats +- Shows tenders by source, status, and category (top 15) +- Highlights categories with Conduction coverage vs gaps +- Shows top integration systems and ecosystem gaps + +**Model:** Checked at run time when invoked standalone — stops if on Opus (no reasoning needed, wastes quota), warns if on Sonnet and offers to switch. **Haiku** is the right fit for this task. Model check is skipped when this skill is called from within another skill. + +--- + +### `/tender-gap-report` + +**Phase:** Gap Analysis + +Generate a gap analysis report — software categories that appear in government tenders but have no Conduction product. + +**Usage:** +``` +/tender-gap-report +``` + +**What it does:** +1. Queries the database for categories with tenders but no `conduction_product` +2. Generates a markdown report at `concurrentie-analyse/reports/gap-report-{date}.md` +3. Includes top 5 gaps with tender details, organisations, and key requirements +4. Cross-references with `application-roadmap.md` to flag already-tracked gaps +5. Recommends which gaps to investigate first + +--- + +### `/ecosystem-investigate ` + +**Phase:** Competitive Research + +Deep-dive research into a software category — find and analyze open-source competitors using GitHub, G2, Capterra, AlternativeTo, and TEC. + +**Usage:** +``` +/ecosystem-investigate bookkeeping +``` + +**What it does:** +1. Loads category context and related tenders from the intelligence database +2. Uses the browser pool (browser-1 through browser-5) to scrape 5-10 competitors from multiple source types +3. Creates competitor profiles in `concurrentie-analyse/{category}/{competitor-slug}/` +4. Inserts competitors and feature data into the database with provenance tracking +5. Presents a comparison table and recommendation for Nextcloud ecosystem fit + +**Model:** Checked at run time — stops if on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for most categories. **Opus** for high-stakes or complex categories where strategic depth matters. + +--- + +### `/ecosystem-propose-app ` + +**Phase:** Product Planning + +Generate a full app proposal for a software category gap, using tender requirements and competitor research as input. + +**Usage:** +``` +/ecosystem-propose-app bookkeeping +``` + +**What it does:** +1. Gathers all tenders, requirements, competitors, and integrations for the category +2. Generates a structured proposal following the template in `concurrentie-analyse/application-roadmap.md` +3. Appends the proposal to `application-roadmap.md` +4. Inserts the proposal into the `app_proposals` database table +5. Optionally bootstraps the app with `/app-create` + +**Model:** Checked at run time — stops if on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for most proposals. **Opus** for high-stakes proposals where architectural fit and market analysis need extra depth. + +--- + +### `/intelligence-update [source]` + +**Phase:** Intelligence Maintenance + +Pull latest data from external sources into the intelligence database. Syncs sources that are past their scheduled interval. + +**Usage:** +``` +/intelligence-update # sync all sources that are due +/intelligence-update all # force sync every source +/intelligence-update wikidata-software # sync one specific source +``` + +**Sources and intervals:** + +| Source | Interval | +|--------|----------| +| `tenderned` | 24h | +| `wikidata-software` | 7 days | +| `wikipedia-comparisons` | 7 days | +| `awesome-selfhosted` | 7 days | +| `github-issues` | 7 days | +| `dpg-registry` | 7 days | +| `developers-italia` | 7 days | +| `gemma-release` | yearly | + +**What it does:** +1. Checks `source_syncs` table for overdue sources +2. Runs `concurrentie-analyse/scripts/sync/sync_{source}.py` for each +3. Updates sync status, records count, and error messages +4. Displays a summary table of all sources with their sync status + +**Model:** Checked at run time when invoked standalone — stops if on Opus (no reasoning needed, wastes quota), warns if on Sonnet and offers to switch. **Haiku** is the right fit for this task. Model check is skipped when this skill is called from within another skill. + +--- + +## Tender Intelligence Workflow + +``` +/tender-scan (fetch & classify new tenders) + │ + ▼ +/tender-status (review dashboard) + │ + ▼ +/tender-gap-report (identify gaps) + │ + ▼ +/ecosystem-investigate (research competitors for top gap) + │ + ▼ +/ecosystem-propose-app (generate app proposal) + │ + ▼ +/app-design (design the new app) +``` + +**Keep data fresh:** Run `/intelligence-update` weekly and `/tender-scan` daily to keep the database current. diff --git a/docs/claude/commands.md b/docs/claude/commands.md index 989eeea..4aa6a6e 100644 --- a/docs/claude/commands.md +++ b/docs/claude/commands.md @@ -1,515 +1,77 @@ # Command Reference -Complete reference for all commands available in the spec-driven development workflow. +Complete reference for all commands available in the spec-driven development workflow. Commands are organized by domain — click through to the detailed reference for each area. -## OpenSpec Built-in Commands +## OpenSpec Commands -These commands are installed per-project when you run `openspec init`. They're available inside each project directory. +Full spec-driven workflow: create changes, generate artifacts, implement, verify, and archive. ---- - -### `/opsx-new ` - -**Phase:** Spec Building - -Start a new change. Creates the change directory with metadata. - -**Usage:** -``` -/opsx-new add-publication-search -``` - -**What it creates:** -``` -openspec/changes/add-publication-search/ -└── .openspec.yaml # Change metadata (schema, created date) -``` - -**Tips:** -- Use descriptive kebab-case names: `add-dark-mode`, `fix-cors-headers`, `refactor-object-service` -- The name becomes a GitHub Issue label, so keep it readable - ---- - -### `/opsx-ff` - -**Phase:** Spec Building - -Fast-forward: generates ALL artifacts in dependency order (proposal → specs → design → tasks) in one go. - -**Usage:** -``` -/opsx-ff -``` - -**What it creates:** -``` -openspec/changes/add-publication-search/ -├── .openspec.yaml -├── proposal.md # Why & what -├── specs/ # Delta specs (ADDED/MODIFIED/REMOVED) -│ └── search/ -│ └── spec.md -├── design.md # How (technical approach) -└── tasks.md # Implementation checklist -``` - -**When to use:** When you have a clear idea of what you want to build and want to generate everything quickly for review. - -**When NOT to use:** When you want to iterate on each artifact step by step, getting feedback between each. Use `/opsx-continue` instead. - -**Model:** Asked at run time — the command asks which model to use and spawns a subagent with that model for artifact generation. Artifact quality (specs, design, tasks) directly determines implementation quality downstream. **Sonnet** for most changes. **Opus** for complex or architectural changes where deeper reasoning improves the design. - ---- - -### `/opsx-continue` - -**Phase:** Spec Building - -Creates the next artifact in the dependency chain. Run repeatedly to build specs incrementally. - -**Usage:** -``` -/opsx-continue # Creates proposal.md (first time) -/opsx-continue # Creates specs/ (second time) -/opsx-continue # Creates design.md (third time) -/opsx-continue # Creates tasks.md (fourth time) -``` - -**Dependency chain:** -``` -proposal (root) - ├── discovery (optional — requires: proposal) - ├── contract (optional — requires: proposal) - ├── specs (requires: proposal) - ├── design (requires: proposal) - ├── migration (optional — requires: design) - ├── test-plan (optional — requires: specs) - └── tasks (requires: specs + design) -``` - -**When to use:** When you want to review and refine each artifact before proceeding to the next. - ---- - -### `/opsx-explore` - -**Phase:** Pre-spec - -Think through ideas and investigate the codebase before starting a formal change. No artifacts are created. - -**Usage:** -``` -/opsx-explore -``` - -**When to use:** When you're not sure what approach to take yet and want to investigate first. - -**Comparison with `/app-explore`:** - -| | `/opsx-explore` | `/app-explore` | -|---|---|---| -| **Scope** | Any topic — a change, a bug, an idea | A specific Nextcloud app's configuration | -| **Output** | None — thinking only | Writes to `openspec/app-config.json` | -| **When to use** | Before starting a change (`/opsx-new`) when requirements are unclear | When designing or refining an app's goals, architecture, and features | -| **Phase** | Pre-spec | Design / Configuration | - -Use `/opsx-explore` to think through *what to build*. Use `/app-explore` to document *how an app is designed and configured*. - -**Model:** Checked at run time — stops on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for most exploration sessions. ✅ **Opus** recommended — complex analysis, architecture decisions, and strategic thinking benefit from stronger reasoning. - ---- - -### `/opsx-apply` - -**Phase:** Implementation - -OpenSpec's built-in implementation command. Reads `tasks.md` and works through tasks. - -**Usage:** -``` -/opsx-apply -``` - -**Model:** Checked at run time — stops if on Haiku. **Sonnet** for most implementation work. **Opus** for architecturally complex changes. - ---- - -### `/opsx-verify` - -**Phase:** Review - -OpenSpec's built-in verification. Validates implementation against artifacts. - -**Usage:** -``` -/opsx-verify -``` - -**Checks:** -- **Completeness** — All tasks done, all requirements implemented -- **Correctness** — Implementation matches spec intent -- **Coherence** — Design decisions reflected in code -- **Test coverage** — Every new PHP service/controller has a corresponding test file; every new Vue component has a test if the project uses Jest/Vitest -- **Documentation** — New features and API endpoints are described in README.md or docs/ - -**Model:** Checked at run time — stops if on Haiku. **Sonnet** for most verification work. **Opus** for complex or large changes. - ---- - -### `/opsx-sync` - -**Phase:** Archive - -Merges delta specs from the change into the main `openspec/specs/` directory. - -**Usage:** -``` -/opsx-sync -``` - -**What it does:** -- **ADDED** requirements → appended to main spec -- **MODIFIED** requirements → replace existing in main spec -- **REMOVED** requirements → deleted from main spec - -Usually done automatically during archive. - ---- - -### `/sync-docs` - -**Phase:** Maintenance - -Check and sync documentation to reflect the current project state. Two targets: **app docs** (`{app}/docs/`) for a specific Nextcloud app's users and admins, and **dev docs** (`.github/docs/claude/`) for Claude and developers. - -**Usage:** -``` -/sync-docs # prompts for target -/sync-docs app # prompts for which app, then syncs its docs/ -/sync-docs app openregister # sync docs for a specific app -/sync-docs dev # sync developer/Claude docs (.github/docs/claude/) -``` - -Before syncing, runs 4 preliminary checks in parallel — config.yaml rules vs writing-docs.md/writing-specs.md, Sources of Truth accuracy, writing-specs.md vs schema template alignment (`openspec/schemas/conduction/`), and forked schema drift from the upstream `spec-driven` schema. Reports gaps and asks whether to fix before proceeding. - -**App docs mode** (`{app}/docs/`) — checks the app's `README.md` (root), `docs/features/`, `docs/ARCHITECTURE.md`, `docs/FEATURES.md`, `docs/GOVERNMENT-FEATURES.md`, and any other user-facing `.md` files against the app's current specs. Also loads all company-wide ADRs from `hydra/openspec/architecture/` and any app-level ADRs as auditing context (never as link targets in app docs). Flags outdated descriptions, missing features, stale `[Future]` markers (with full removal checklist), broken links, duplicated content, writing anti-patterns, ADR compliance gaps (screenshots, i18n, API conventions), and missing GEMMA/ZGW/Forum Standaardisatie standards references. Never inserts links into `.claude/` paths. Always shows a diff and asks for confirmation before writing. - -**Dev docs mode** (`.github/docs/`) — checks `commands.md`, `workflow.md`, `writing-specs.md`, `writing-docs.md`, `testing.md`, `getting-started.md`, `README.md`, plus the conduction schema (`hydra/openspec/schemas/conduction/schema.yaml`) and its `templates/spec.md` for alignment with `writing-specs.md`. Never changes intent without user confirmation. After syncing, runs a Phase 6 review of all commands and skills for stale references, outdated instructions, and redundant inline content — and asks whether to update them. - -Both modes enforce the [Documentation Principles](writing-docs.md) — duplication and wrong-audience content are flagged as issues, with direct links to the relevant writing-docs.md sections. - -**When to use:** After a significant batch of changes — new commands, archived features, updated specs, or structural changes to the project. - ---- - -### `/opsx-archive` - -**Phase:** Archive - -Complete a change and preserve it for the historical record. - -**Usage:** -``` -/opsx-archive -``` - -**What it does:** -1. Checks artifact and task completion -2. Syncs delta specs into main specs (if not already done) -3. Moves the change to `openspec/changes/archive/YYYY-MM-DD-/` -4. All artifacts are preserved for audit trail -5. Updates or creates `docs/features/.md` — creates it if no matching feature doc exists -6. Updates the feature overview table in `docs/features/README.md` (creates the file if it doesn't exist) -7. Creates or updates `CHANGELOG.md` — completed tasks become versioned entries (version from `app-config.json`); uses [Keep a Changelog](https://keepachangelog.com/) format - ---- - -### `/opsx-bulk-archive` - -**Phase:** Archive - -Archive multiple completed changes at once. - -**Usage:** -``` -/opsx-bulk-archive -``` - -**When to use:** When you have several changes that are all complete and want to clean up. - ---- - -### `/opsx-apply-loop` - -**Phase:** Full Lifecycle (experimental) - -Automated apply→verify loop for a single change in a specific app. Runs the implementation loop inside an isolated Docker container, optionally runs targeted tests on the host, then archives and syncs to GitHub. +For the complete reference, see [commands-openspec.md](commands-openspec.md). -**Usage:** -``` -/opsx-apply-loop # asks which app + change to run -/opsx-apply-loop procest add-sla-tracking # run a specific app/change -/opsx-apply-loop openregister seed-data # run in a different app -``` - -**What it does:** -1. Selects app and change (scans across all apps, or uses provided arguments) -2. Checks for a GitHub tracking issue (runs `/opsx-plan-to-issues` first if missing) -3. Creates a `feature//` branch in the app's git repo -4. Checks the Nextcloud environment is running -5. Reads `test-plan.md` (if present) and classifies which test commands to include in the loop -6. Asks whether to include a test cycle (tests run **outside the container** against the live Nextcloud app) -7. Builds and starts an isolated Docker container — mounts the app directory + shared `.claude/` skills (read-only); no git, no GitHub -8. Inside the container: runs `/opsx-apply` → `/opsx-verify` in a loop (max 5 iterations) - - CRITICAL issues retrigger the loop; WARNING issues also retrigger but never block archive - - At max iterations with only warnings remaining, archive still proceeds - - Seed data (ADR-016) is created/updated during apply as required -9. Captures container logs to `.claude/logs/`, then removes container -10. **If test cycle enabled:** runs targeted single-agent test commands on the host (max 3 test iterations); failures loop back into apply→verify -11. **If test cycle enabled and deferred tests exist:** asks about multi-agent/broad tests from the test-plan that were excluded from the loop; runs them once if confirmed, with one final apply→verify if they fail -12. Runs `/opsx-archive` on the host (after tests pass or tests skipped) -13. Commits all changes in the app repo with a generated commit message -14. Syncs GitHub: updates issue checkboxes, posts a completion comment, prompts to close -15. Asks about test scenario conversion (deferred from archive) -16. Shows a final report with iterations used, tasks completed, and what's next - -**When to use:** When you want hands-off implementation of a single change in one app. Prefer `/opsx-pipeline` for running multiple changes across apps in parallel. - -**Container design:** The container mounts the app directory at `/workspace` and the shared `.claude/` at `/workspace/.claude` (read-only). This gives the container's Claude session access to all shared skills without requiring git or GitHub. Each app is isolated — the container only touches one app directory. - -**Container limitations:** GitHub operations, `docker compose exec`, browser tests, and git commands are not available inside the container — all handled on the host after the container exits. Tests always run on the host against the live Nextcloud environment. - -**Cap impact:** High — runs apply + verify sequentially (up to 5 iterations), optionally followed by targeted tests (up to 3 test iterations). Each iteration is a full implementation + verification pass. - -**Model:** Sonnet recommended for most changes; Opus for complex architectural work. Asked at run time. - -**Requires:** -- Docker running -- `gh` CLI authenticated on the host -- Nextcloud containers up (auto-started if not running — uses `docker compose -f` pointed at the docker-dev root's `.github/docker-compose.yml`) -- **Container authentication** — the Docker container cannot use interactive OAuth, so it needs an explicit token. One of these environment variables must be set in your shell (see [Getting Started — Container authentication](getting-started.md#prerequisites) for full setup instructions): - 1. `CLAUDE_CODE_AUTH_TOKEN` (preferred) — uses your existing Claude Max/Pro subscription at no extra cost. Generate with `claude setup-token`, then `export CLAUDE_CODE_AUTH_TOKEN="..."` in `~/.bashrc`. - 2. `ANTHROPIC_API_KEY` (fallback) — uses prepaid API credits from console.anthropic.com (costs money). `export ANTHROPIC_API_KEY="sk-ant-api03-..."` in `~/.bashrc`. - ---- - -### `/opsx-pipeline` - -**Phase:** Full Lifecycle (experimental) - -Process one or more OpenSpec changes through the full lifecycle in parallel — each change gets its own subagent, git worktree, feature branch, and PR. - -**Usage:** -``` -/opsx-pipeline all # process all open proposals across all repos -/opsx-pipeline procest # all open proposals in one app -/opsx-pipeline sla-tracking routing # specific changes by name -``` - -**What it does:** -1. Discovers open proposals (changes with `proposal.md` but not yet archived) -2. Presents an execution plan and asks for confirmation -3. Creates a git worktree and feature branch per change -4. Launches up to 5 parallel subagents — each runs ff → apply → verify → archive -5. Monitors progress and queues remaining changes as slots free up -6. Creates a PR per completed change to `development` -7. Reports full results including tasks completed, quality checks, and PR links - -**Subagent lifecycle per change:** -``` -ff (artifacts) → plan-to-issues → apply (implement + tests + docs) → verify → archive → push + PR -``` - -**When to use:** When you have multiple open proposals ready to implement and want to run them hands-off. - -**Cap impact:** High — up to 5 agents running full implementations in parallel. Each agent may run for 10-30 minutes depending on change complexity. - -**Model:** Asked at run time with three options: one model for all sub-agents, choose per change, or auto-select by reading each proposal. **Haiku** for simple changes (config, text, minor fixes). **Sonnet** for standard feature work. **Opus** for complex architectural changes. The model applies per implementation sub-agent — choose based on change complexity and available quota. - -**Requires:** `gh` CLI authenticated; quality checks must pass per app (`composer check:strict` / `npm run lint`) - ---- +| Command | Phase | Description | +|---------|-------|-------------| +| `/opsx-new ` | Spec | Start a new change | +| `/opsx-ff` | Spec | Fast-forward all artifacts (proposal → specs → design → tasks) | +| `/opsx-continue` | Spec | Create next artifact in dependency chain | +| `/opsx-explore` | Pre-spec | Investigate before starting a formal change | +| `/opsx-apply` | Implement | Implement tasks from plan.json | +| `/opsx-verify` | Review | Verify implementation against specs | +| `/opsx-sync` | Archive | Merge delta specs into main specs | +| `/sync-docs` | Maintenance | Check and sync documentation to current project state | +| `/opsx-archive` | Archive | Complete and preserve change | +| `/opsx-bulk-archive` | Archive | Archive multiple completed changes at once | +| `/opsx-apply-loop` | Full Lifecycle | Automated apply→verify loop in Docker container | +| `/opsx-pipeline` | Full Lifecycle | Parallel multi-change lifecycle (up to 5 agents) | +| `/opsx-onboard` | Setup | Overview of current OpenSpec setup | -### `/opsx-onboard` - -**Phase:** Setup - -Get an overview of the current project's OpenSpec setup and active changes. - -**Usage:** -``` -/opsx-onboard -``` +**OpenSpec CLI** (terminal commands, not slash commands): `openspec init`, `openspec list`, `openspec validate`, etc. — see [commands-openspec.md](commands-openspec.md#openspec-cli-commands). --- ## App Management Commands -Commands for creating, configuring, and maintaining Nextcloud apps. These work together in a lifecycle: `/app-design` → `/app-create` → `/app-explore` → `/app-apply` → `/app-verify`. - -For a full guide on the lifecycle, when to use each command, and how they relate to the OpenSpec workflow, see [App Lifecycle](app-lifecycle.md). - ---- - -### `/app-design` - -**Phase:** Setup / Design - -Full upfront design for a new Nextcloud app — architecture research, competitor analysis, feature matrix, ASCII wireframes, and OpenSpec setup. Run this **before** `/app-create` for brand-new apps. - -**Usage:** -``` -/app-design -/app-design my-new-app -``` - -**What it does:** -1. Researches the problem domain and existing solutions -2. Produces architecture decisions, feature matrix, and ASCII wireframes -3. Sets up the `openspec/` structure with initial design docs - -**Output:** Design documentation ready to hand off to `/app-create`. - -**Model:** Checked at run time — stops on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for general design sessions. ✅ **Opus** recommended — competitive research, architecture decisions, and full design document creation benefit from stronger reasoning. - ---- - -### `/app-create` - -**Phase:** Setup - -Bootstrap a new Nextcloud app from the ConductionNL template, or onboard an existing repo. Always creates an `openspec/` configuration folder that tracks all app decisions. - -**Usage:** -``` -/app-create -/app-create my-new-app -``` - -**What it does:** -1. Asks whether a local folder already exists — if yes, uses it as the base; if no, clones the template -2. Collects basic identity: app ID, name, goal, one-line summary, Nextcloud category -3. Asks about dependencies (OpenRegister, additional CI apps) -4. Creates `openspec/app-config.json` and `openspec/README.md` -5. Replaces all template placeholders (`AppTemplate`, `app-template`, etc.) across all files -6. Creates the GitHub repository and branches (`main`, `development`, `beta`) -7. Optionally sets branch protection and team access -8. Optionally installs dependencies and enables the app in the local Nextcloud environment - -**Output:** Fully scaffolded app directory with correct identity, CI/CD workflows, and GitHub repo. - -**Requires:** `gh` CLI authenticated (`gh auth login`) - ---- - -### `/app-explore` - -**Phase:** Design / Configuration - -Enter exploration mode for a Nextcloud app. Think through its goals, architecture, features, and Architectural Decision Records (ADRs). Updates `openspec/` files to capture decisions. - -**Usage:** -``` -/app-explore -/app-explore openregister -``` - -**What it does:** -- Loads `openspec/app-config.json` for full context -- Acts as a **thinking partner** — draws diagrams, asks questions, challenges assumptions -- Captures decisions into `openspec/app-config.json` -- Never writes application code — only `openspec/` files - -**Feature lifecycle:** -``` -idea → planned → in-progress → done -``` -When a feature moves to `planned` (has user stories + acceptance criteria), suggests `/opsx-ff {feature-name}` to create an OpenSpec change from it. - -**Important:** Run this before implementing anything. Features defined here become the inputs for `/opsx-ff`. - -**Model:** Checked at run time — stops on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for general app exploration. ✅ **Opus** recommended — feature strategy, ADRs, and competitive analysis benefit from stronger reasoning. - ---- - -### `/app-apply` - -**Phase:** Configuration - -Applies `openspec/app-config.json` decisions back into the actual app files. The counterpart to `/app-explore`. - -**Usage:** -``` -/app-apply -/app-apply openregister -``` - -**What it does:** -1. Loads `openspec/app-config.json` -2. Compares current file values against config — builds a list of pending changes -3. Shows a clear diff summary of what would change -4. Asks for confirmation before applying any changes -5. Updates only the tracked values in each file (IDs, names, namespaces, CI parameters) — never touches feature code -6. Optionally runs `composer check:strict` to verify PHP changes are clean - -**In scope:** `appinfo/info.xml`, CI/CD workflow parameters, PHP namespaces and app ID constants, `composer.json`/`package.json` names, `webpack.config.js` app ID, `src/App.vue` OpenRegister gate, `README.md` header. - -**Out of scope:** Feature code, business logic, Vue components, PHP controllers. Use `/opsx-ff {feature-name}` for those. - ---- - -### `/app-verify` - -**Phase:** Review / Audit - -Read-only audit. Checks every tracked app file against `openspec/app-config.json` and reports drift — without making any changes. - -**Usage:** -``` -/app-verify -/app-verify openregister -``` +Commands for creating, configuring, and maintaining Nextcloud apps: `/app-design` → `/app-create` → `/app-explore` → `/app-apply` → `/app-verify`. -**What it does:** -- Loads `openspec/app-config.json` and reads every tracked file -- Reports each check as **CRITICAL** (will break CI or runtime), **WARNING** (wrong metadata), or **INFO** (cosmetic drift) -- Shows exact current value vs expected value for every failing check -- Recommends `/app-apply` if issues are found +For the full lifecycle guide, see [app-lifecycle.md](app-lifecycle.md). -**When to use:** After `/app-apply` to confirm changes landed, or at any time to check for drift. +| Command | Phase | Description | +|---------|-------|-------------| +| `/app-design` | Design | Full upfront design — architecture, competitors, wireframes | +| `/app-create` | Setup | Bootstrap or onboard a Nextcloud app | +| `/app-explore` | Design | Think through goals, features, and ADRs | +| `/app-apply` | Configuration | Apply `app-config.json` to tracked files | +| `/app-verify` | Audit | Read-only check for config drift | +| `/clean-env` | Reset | Fully reset Docker development environment | --- -### `/clean-env` - -**Phase:** Setup / Reset - -Fully resets the OpenRegister Docker development environment. - -**Usage:** -``` -/clean-env -``` - -**What it does:** -1. Stops all Docker containers from the OpenRegister docker-compose -2. Removes all containers and volumes (full data reset) -3. Starts containers fresh -4. Waits for Nextcloud to become ready -5. Installs core apps: openregister, opencatalogi, softwarecatalog, nldesign, mydash +## Testing Commands -**Important:** Destructive — removes all database data and volumes. Only use when a full reset is intended. +All testing commands — persona-based sweeps, perspective-based sweeps, single-agent tests, and test scenario management. -After completion, verify at `http://localhost:8080` (admin/admin). +For the complete reference, workflows, and when-to-use guidance, see [testing.md](testing.md). -**Model:** Checked at run time when invoked standalone — stops if on Opus (no reasoning needed, wastes quota), warns if on Sonnet and offers to switch. **Haiku** is the right fit for this task. Model check is skipped when this skill is called from within another skill. +| Command | Type | Description | +|---------|------|-------------| +| `/test-counsel` | Skill (8 agents) | Persona-based testing — all 8 personas in parallel | +| `/test-app` | Skill (1 or 6) | Perspective-based sweep (functional, UX, a11y, perf, security, API) | +| `/feature-counsel` | Skill (8 agents) | Pre-build spec analysis from 8 persona perspectives | +| `/test-functional` | Command (1 agent) | Feature correctness via GIVEN/WHEN/THEN | +| `/test-api` | Command (1 agent) | REST API endpoint testing | +| `/test-accessibility` | Command (1 agent) | WCAG 2.1 AA compliance | +| `/test-performance` | Command (1 agent) | Load times and API response | +| `/test-security` | Command (1 agent) | OWASP Top 10, roles, authorization | +| `/test-regression` | Command (1 agent) | Cross-feature regression | +| `/test-persona-*` | Command (1 agent) | Single-persona deep dive | +| `/test-scenario-create` | Command | Create a reusable test scenario | +| `/test-scenario-run` | Command | Execute test scenarios against live env | +| `/test-scenario-edit` | Command | Edit an existing test scenario | --- ## Team Role Commands -Specialist agents representing different roles on the development team. Useful for getting a focused perspective on a change — architecture review, QA, product sign-off, etc. +Specialist agents for focused perspectives on a change. For full details, see [workflow.md](workflow.md#team-role-commands). | Command | Role | Focus | |---------|------|-------| @@ -521,14 +83,6 @@ Specialist agents representing different roles on the development team. Useful f | `/team-reviewer` | Code Reviewer | Standards, conventions, security, code quality | | `/team-sm` | Scrum Master | Progress tracking, blockers, sprint health | -**Usage:** -``` -/team-architect # review the API design for the active change -/team-qa # get QA perspective on test coverage -``` - -**Model for `/team-architect`:** Checked at run time — stops if on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Opus** recommended — best multi-framework reasoning across NLGov, BIO2/NIS2, WCAG, Haven, AVG/GDPR. **Sonnet** not recommended — may miss nuances in complex compliance scenarios. - --- ## Softwarecatalogus Commands (`/swc:*`) @@ -629,10 +183,10 @@ Checks whether `global-settings/VERSION` has been correctly bumped after any cha 2. Diffs `global-settings/` between the current branch and `origin/main` 3. Compares the branch `VERSION` against the `origin/main` `VERSION` 4. Reports one of four outcomes: - - ✅ No changes to `global-settings/` — no bump needed - - ✅ Changes found and `VERSION` correctly bumped - - ❌ Changes found but `VERSION` not bumped — suggests the next semver and the command to apply it - - ⚠️ `VERSION` bumped but no other files changed — flags as unusual + - No changes to `global-settings/` — no bump needed + - Changes found and `VERSION` correctly bumped + - Changes found but `VERSION` not bumped — suggests the next semver and the command to apply it + - `VERSION` bumped but no other files changed — flags as unusual **When to use:** - Standalone: any time you modify a file in `global-settings/` and want to confirm the bump is in place before committing @@ -741,512 +295,22 @@ Create new skills, modify and improve existing skills, and measure skill perform --- -## OpenSpec CLI Commands - -These are terminal commands (not Claude slash commands) for managing specs directly. - -| Command | Description | -|---------|-------------| -| `openspec init --tools claude` | Initialize OpenSpec in a project | -| `openspec list --changes` | List all active changes | -| `openspec list --specs` | List all specs | -| `openspec show ` | View details of a change or spec | -| `openspec status --change ` | Show artifact completion status | -| `openspec validate --all` | Validate all specs and changes | -| `openspec validate --strict` | Strict validation (errors on warnings) | -| `openspec update` | Regenerate AI tool config after CLI upgrade | -| `openspec schema which` | Show which schema is being used | -| `openspec config list` | Show all configuration | - -Add `--json` to any command for machine-readable output. - ---- - -## Testing Commands +## Tender & Ecosystem Intelligence Commands -For detailed guidance on when to use each command, typical testing workflows, and situational advice, see [testing.md](testing.md). +Competitive analysis and ecosystem gap-finding workflow. For the complete reference, see [commands-tender.md](commands-tender.md). -> **Note on agentic browser testing:** `/test-app`, `/test-counsel`, and `/feature-counsel` use Playwright MCP browsers to explore live applications. Results may include false positives (elements not found due to timing) or false negatives (bugs missed due to exploration order). Always verify critical findings manually. +| Command | Phase | Description | +|---------|-------|-------------| +| `/tender-scan` | Intelligence | Scrape TenderNed, import, classify by category | +| `/tender-status` | Monitoring | Dashboard of tender intelligence database | +| `/tender-gap-report` | Gap Analysis | Categories with tenders but no Conduction product | +| `/ecosystem-investigate` | Research | Deep-dive into a software category's competitors | +| `/ecosystem-propose-app` | Planning | Generate full app proposal for a gap | +| `/intelligence-update` | Maintenance | Pull latest data from external sources | --- -### `/test-app` - -**Phase:** Testing - -Run automated browser tests for any Nextcloud app in this workspace. Explores every page, button, and form guided by the app's documentation and specs. - -**Usage:** -``` -/test-app -/test-app procest -``` - -**Modes:** -- **Quick (1 agent)** — One agent walks through the entire app. Fast, good for smoke testing. Low cap impact. -- **Full (6 agents)** — Six parallel agents each with a different perspective: Functional, UX, Performance, Accessibility, Security, API. More thorough. High cap impact. - -**What it does:** -1. Selects the app (from argument or prompt) -2. Chooses Quick or Full mode -3. Checks `{APP}/test-scenarios/` for active scenarios — asks whether to include them -4. Reads `{APP}/docs/features/` to understand what to test -5. Asks which model to use for agents (Haiku default, Sonnet, or Opus) -6. Launches agents, each reading docs, logging in, and testing from their perspective -7. Agents execute any included test scenario steps before free exploration -8. Writes per-perspective results to `{APP}/test-results/` and a summary to `{APP}/test-results/README.md` - -**Model:** Asked at run time (applies to all sub-agents). **Haiku** (default) — fastest, lowest quota cost. **Sonnet** — more nuanced analysis, larger context window. **Opus** — deepest coverage; significant quota cost in Full mode. See [parallel-agents.md](parallel-agents.md) for context window sizes, subscription quota limits, and how they differ. - -**Cap impact:** See [parallel-agents.md](parallel-agents.md). - ---- - -### `/test-counsel` - -**Phase:** Testing - -Test a Nextcloud app from 8 persona perspectives simultaneously: Henk, Fatima, Sem, Noor, Annemarie, Mark, Priya, Jan-Willem. - -**Usage:** -``` -/test-counsel -``` - -**What it does:** -- Launches 8 parallel browser agents — one per persona (model is user-selected at run time; Haiku is the default) -- Each agent reads its persona card and relevant test scenarios before testing -- Tests from the perspective of that persona's role, technical level, and priorities -- Produces a combined report with findings per persona -- Writes results to `{APP}/test-results/` - -**Model:** Asked at run time (applies to all 8 agents). **Haiku** (default) — fastest, lowest quota cost. **Sonnet** — more nuanced persona findings, larger context window. **Opus** — deepest analysis; significant quota cost with 8 agents. See [parallel-agents.md](parallel-agents.md) for context window sizes, subscription quota limits, and how they differ. - -**Cap impact:** Very high — 8 parallel agents. Open a fresh Claude window before running. See [parallel-agents.md](parallel-agents.md). - ---- - -### `/feature-counsel` - -**Phase:** Discovery / Ideation - -Analyse a Nextcloud app's OpenSpec from 8 persona perspectives and suggest additional features or improvements. - -**Usage:** -``` -/feature-counsel -``` - -**What it does:** -- Reads the app's OpenSpec, specs, and existing features -- Each of the 8 personas considers what's missing from their perspective -- Produces a consolidated list of suggested features and improvements -- Does not test the live app — reads specs and docs only - -**Model:** Asked at run time (applies to all 8 agents). No browser required — agents read specs and docs only. **Sonnet** (default) — recommended; no context window concern without browser snapshots, and better reasoning produces more useful suggestions. **Haiku** — faster, lower quota, good for a quick broad pass. **Opus** — deepest reasoning for complex architectural gaps; use with full mode (8 agents) sparingly. - -**Cap impact:** Very high — 8 parallel agents. See [parallel-agents.md](parallel-agents.md). - ---- - -### Commands (Single-Agent) - ---- - -### `/test-functional` - -**Phase:** Testing - -Feature correctness via browser — executes GIVEN/WHEN/THEN scenarios from specs against the live app. - -**Usage:** -``` -/test-functional -``` - ---- - -### `/test-api` - -**Phase:** Testing - -REST API endpoint testing. Checks endpoints, authentication, pagination, and error responses. - -**Usage:** -``` -/test-api -``` - ---- - -### `/test-accessibility` - -**Phase:** Testing - -WCAG 2.1 AA compliance using axe-core, plus manual keyboard and focus checks. - -**Usage:** -``` -/test-accessibility -``` - ---- - -### `/test-performance` - -**Phase:** Testing - -Load times, API response times, and network request analysis via browser timing APIs. - -**Usage:** -``` -/test-performance -``` - ---- - -### `/test-security` - -**Phase:** Testing - -OWASP Top 10, Nextcloud roles, authorization, XSS, CSRF, sensitive data exposure. - -**Usage:** -``` -/test-security -``` - ---- - -### `/test-regression` - -**Phase:** Testing - -Cross-feature regression — verifies changes don't break unrelated flows. - -**Usage:** -``` -/test-regression -``` - ---- - -### `/test-persona-*` - -**Phase:** Testing - -Single-persona deep dive. Use when you want one persona's full assessment without launching all eight: - -| Command | Persona | Role | -|---------|---------|------| -| `/test-persona-henk` | **Henk Bakker** | Elderly citizen — low digital literacy | -| `/test-persona-fatima` | **Fatima El-Amrani** | Low-literate migrant citizen | -| `/test-persona-sem` | **Sem de Jong** | Young digital native | -| `/test-persona-noor` | **Noor Yilmaz** | Municipal CISO / functional admin | -| `/test-persona-annemarie` | **Annemarie de Vries** | VNG standards architect | -| `/test-persona-mark` | **Mark Visser** | MKB software vendor | -| `/test-persona-priya` | **Priya Ganpat** | ZZP developer / integrator | -| `/test-persona-janwillem` | **Jan-Willem van der Berg** | Small business owner | - -**Usage:** -``` -/test-persona-henk -/test-persona-priya -``` - -**Use when:** You know which persona is most affected by a change, or when you've run `/test-counsel` and want a deeper single-perspective follow-up. One agent instead of eight — lower cap cost. - -**Cap impact:** Low — single agent. See [parallel-agents.md](parallel-agents.md). - ---- - -## Test Scenario Commands - -Test scenarios are reusable, Gherkin-style descriptions of user journeys that can be executed by any test command. They live in `{APP}/test-scenarios/TS-NNN-slug.md` and are automatically discovered by `/test-app`, `/test-counsel`, and `/test-persona-*` when they run. - -> **Test scenario vs test case**: A scenario is a high-level, user-centered description of *what* to verify and *for whom* — one concrete flow, written in Given-When-Then. It is broader than a click-by-click test case but more specific than a spec requirement. - ---- - -### `/test-scenario-create` - -**Phase:** Testing - -Guided wizard for creating a well-structured test scenario for a Nextcloud app. - -**Usage:** -``` -/test-scenario-create -/test-scenario-create openregister -``` - -**What it does:** -1. Determines the next ID (`TS-NNN`) by scanning existing scenarios -2. Asks for title, goal, category (functional/api/security/accessibility/performance/ux/integration), and priority -3. Shows relevant personas and asks which this scenario targets -4. Suggests which test commands should automatically include it -5. Auto-suggests tags based on category and title -6. Guides through Gherkin steps (Given/When/Then), test data, and acceptance criteria -7. Generates persona-specific notes for each linked persona -8. Saves to `{APP}/test-scenarios/TS-NNN-slug.md` - -**Scenario categories and suggested personas:** - -| Category | Suggested personas | -|---|---| -| functional | Mark Visser, Sem de Jong | -| api | Priya Ganpat, Annemarie de Vries | -| security | Noor Yilmaz | -| accessibility | Henk Bakker, Fatima El-Amrani | -| ux | Henk Bakker, Jan-Willem, Mark Visser | -| performance | Sem de Jong, Priya Ganpat | -| integration | Priya Ganpat, Annemarie de Vries | - ---- - -### `/test-scenario-run` - -**Phase:** Testing - -Execute one or more test scenarios against the live Nextcloud environment using a browser agent. - -**Usage:** -``` -/test-scenario-run # list and choose -/test-scenario-run TS-001 # run specific scenario -/test-scenario-run openregister TS-001 # run from specific app -/test-scenario-run --tag smoke # run all smoke-tagged scenarios -/test-scenario-run --all openregister # run all scenarios for an app -/test-scenario-run --persona priya-ganpat # run all Priya's scenarios -``` - -**What it does:** -1. Discovers scenario files in `{APP}/test-scenarios/` -2. Filters by tag, persona, or ID as specified -3. Asks which environment to test against (local or custom URL) -4. Asks whether to use Haiku (default, cost-efficient) or Sonnet (for complex flows) -5. Launches a browser agent per scenario (parallelised up to 5 for multiple) -6. Agent verifies preconditions, follows Given-When-Then steps, checks each acceptance criterion -7. Writes results to `{APP}/test-results/scenarios/` -8. Synthesises a summary report for multiple runs - -**Model:** Asked at run time. **Haiku** (default) — fast, cost-efficient. **Sonnet** — for complex multi-step flows or ambiguous UI states where Haiku may misread the interface. Cap cost scales with the number of scenarios run in parallel. - -**Cap impact:** Low for single scenario; medium for multiple. See [parallel-agents.md](parallel-agents.md). - -**Result statuses**: ✅ PASS / ❌ FAIL / ⚠️ PARTIAL / ⛔ BLOCKED - ---- - -### `/test-scenario-edit` - -**Phase:** Testing - -Edit an existing test scenario — update any field (metadata or content) interactively. - -**Usage:** -``` -/test-scenario-edit # list all scenarios, pick one -/test-scenario-edit TS-001 # open specific scenario -/test-scenario-edit openregister TS-001 # open from specific app -``` - -**What it does:** -1. Locates the scenario file -2. Shows a summary of current values (status, priority, category, personas, tags, spec refs) -3. Asks what scope to edit: metadata only / content only / both / status only / tags only -4. Walks through each field in scope, showing the current value and asking for the new one -5. Supports `+tag` / `-tag` syntax for incremental tag changes, same for personas -6. Regenerates persona notes if the personas list changed -7. Optionally renames the file if the title changed -8. Writes the updated file and shows a diff-style summary - ---- - -### How existing test commands use scenarios - -| Command | Behaviour when scenarios exist | -|---|---| -| `/test-app` | Asks to include active scenarios before launching agents. Agents execute scenario steps before free exploration. | -| `/test-counsel` | Asks to include scenarios, grouped by persona. Each persona agent receives only the scenarios tagged with their slug. | -| `/test-persona-*` | Scans for scenarios matching that persona's slug. Asks to run them before free exploration in Step 2. | - ---- - -## Tender & Ecosystem Intelligence Commands - -These commands support the competitive analysis and ecosystem gap-finding workflow. They operate on the `concurrentie-analyse/intelligence.db` SQLite database and require the database to exist before running. - ---- - -### `/tender-scan` - -**Phase:** Intelligence Gathering - -Scrape TenderNed for new tenders, import them into SQLite, and classify unclassified tenders by software category using a local Qwen model. - -**Usage:** -``` -/tender-scan -``` - -**What it does:** -1. Runs `concurrentie-analyse/tenders/scrape_tenderned.py` to fetch fresh data -2. Imports new tenders into the intelligence database -3. Classifies unclassified tenders using Qwen via `localhost:11434` -4. Reports new tenders found, classified, and any new gaps detected - -**Requires:** Local Qwen model running on Ollama (`http://localhost:11434`) - ---- - -### `/tender-status` - -**Phase:** Intelligence Monitoring - -Show a dashboard of the tender intelligence database — totals by source, category, status, gaps, and recent activity. - -**Usage:** -``` -/tender-status -``` - -**What it does:** -- Queries `concurrentie-analyse/intelligence.db` for live stats -- Shows tenders by source, status, and category (top 15) -- Highlights categories with Conduction coverage vs gaps -- Shows top integration systems and ecosystem gaps - -**Model:** Checked at run time when invoked standalone — stops if on Opus (no reasoning needed, wastes quota), warns if on Sonnet and offers to switch. **Haiku** is the right fit for this task. Model check is skipped when this skill is called from within another skill. - ---- - -### `/tender-gap-report` - -**Phase:** Gap Analysis - -Generate a gap analysis report — software categories that appear in government tenders but have no Conduction product. - -**Usage:** -``` -/tender-gap-report -``` - -**What it does:** -1. Queries the database for categories with tenders but no `conduction_product` -2. Generates a markdown report at `concurrentie-analyse/reports/gap-report-{date}.md` -3. Includes top 5 gaps with tender details, organisations, and key requirements -4. Cross-references with `application-roadmap.md` to flag already-tracked gaps -5. Recommends which gaps to investigate first - ---- - -### `/ecosystem-investigate ` - -**Phase:** Competitive Research - -Deep-dive research into a software category — find and analyze open-source competitors using GitHub, G2, Capterra, AlternativeTo, and TEC. - -**Usage:** -``` -/ecosystem-investigate bookkeeping -``` - -**What it does:** -1. Loads category context and related tenders from the intelligence database -2. Uses the browser pool (browser-1 through browser-5) to scrape 5-10 competitors from multiple source types -3. Creates competitor profiles in `concurrentie-analyse/{category}/{competitor-slug}/` -4. Inserts competitors and feature data into the database with provenance tracking -5. Presents a comparison table and recommendation for Nextcloud ecosystem fit - -**Model:** Checked at run time — stops if on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for most categories. **Opus** for high-stakes or complex categories where strategic depth matters. - ---- - -### `/ecosystem-propose-app ` - -**Phase:** Product Planning - -Generate a full app proposal for a software category gap, using tender requirements and competitor research as input. - -**Usage:** -``` -/ecosystem-propose-app bookkeeping -``` - -**What it does:** -1. Gathers all tenders, requirements, competitors, and integrations for the category -2. Generates a structured proposal following the template in `concurrentie-analyse/application-roadmap.md` -3. Appends the proposal to `application-roadmap.md` -4. Inserts the proposal into the `app_proposals` database table -5. Optionally bootstraps the app with `/app-create` - -**Model:** Checked at run time — stops if on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Sonnet** for most proposals. **Opus** for high-stakes proposals where architectural fit and market analysis need extra depth. - ---- - -### `/intelligence-update [source]` - -**Phase:** Intelligence Maintenance - -Pull latest data from external sources into the intelligence database. Syncs sources that are past their scheduled interval. - -**Usage:** -``` -/intelligence-update # sync all sources that are due -/intelligence-update all # force sync every source -/intelligence-update wikidata-software # sync one specific source -``` - -**Sources and intervals:** - -| Source | Interval | -|--------|----------| -| `tenderned` | 24h | -| `wikidata-software` | 7 days | -| `wikipedia-comparisons` | 7 days | -| `awesome-selfhosted` | 7 days | -| `github-issues` | 7 days | -| `dpg-registry` | 7 days | -| `developers-italia` | 7 days | -| `gemma-release` | yearly | - -**What it does:** -1. Checks `source_syncs` table for overdue sources -2. Runs `concurrentie-analyse/scripts/sync/sync_{source}.py` for each -3. Updates sync status, records count, and error messages -4. Displays a summary table of all sources with their sync status - -**Model:** Checked at run time when invoked standalone — stops if on Opus (no reasoning needed, wastes quota), warns if on Sonnet and offers to switch. **Haiku** is the right fit for this task. Model check is skipped when this skill is called from within another skill. - ---- - -### Tender Intelligence Workflow - -``` -/tender-scan (fetch & classify new tenders) - │ - ▼ -/tender-status (review dashboard) - │ - ▼ -/tender-gap-report (identify gaps) - │ - ▼ -/ecosystem-investigate (research competitors for top gap) - │ - ▼ -/ecosystem-propose-app (generate app proposal) - │ - ▼ -/app-design (design the new app) -``` - -**Keep data fresh:** Run `/intelligence-update` weekly and `/tender-scan` daily to keep the database current. - ---- - -## Command Flow Cheat Sheet +## Command Flow Cheat Sheet ``` /opsx-explore (optional: investigate first) @@ -1285,7 +349,3 @@ Pull latest data from external sources into the intelligence database. Syncs sou ▼ /opsx-archive (complete & preserve) ``` - -See [testing.md](testing.md) for situational testing guidance and recommended testing order. - -For the app lifecycle flow (`/app-design` → `/app-create` → `/app-explore` → `/app-apply` → `/app-verify`), see [app-lifecycle.md](app-lifecycle.md). diff --git a/docs/claude/skill-checklist.md b/docs/claude/skill-checklist.md new file mode 100644 index 0000000..3c9e241 --- /dev/null +++ b/docs/claude/skill-checklist.md @@ -0,0 +1,47 @@ +# Skill Checklist + +Quick validation checklist before adding or reviewing a Claude Code skill. Organized by [maturity level](writing-skills.md#skill-maturity-levels). + +--- + +## L1-L2 (minimum for any skill) + +- [ ] Folder name matches `name` in frontmatter and the slash command +- [ ] `description` is action-oriented, third-person, under 250 characters, with specific trigger terms +- [ ] Steps are numbered and self-contained +- [ ] Guardrails define what the skill must NOT do +- [ ] Destructive actions have explicit confirmation prompts +- [ ] `SKILL.md` is under 500 lines — large blocks extracted to subfolders +- [ ] All subfolder links use relative paths +- [ ] `SKILL.md` reads coherently top to bottom without opening subfolder files + +## L3 (recommended for all skills) + +- [ ] Uses common patterns consistently (model guard, AskUserQuestion, quality checks) +- [ ] Has `examples/` if the skill produces structured output +- [ ] References standards documents where applicable + +## L4 (recommended for business-critical skills) + +- [ ] Contains business-specific `references/` (standards, architecture, ADRs) +- [ ] Uses project-specific templates and terminology +- [ ] Output matches team expectations without manual correction + +## L5+ (recommended for frequently-used skills) + +- [ ] Has 3+ eval scenarios in `evals/` +- [ ] Baseline measurement documented (output without skill) +- [ ] Description trigger-tested (10 should + 10 should-not queries) +- [ ] PHP/Python quality checks triggered after code changes (if applicable) + +## L6+ (recommended for skills that run often and evolve) + +- [ ] Has `learnings.md` with dated, atomic entries +- [ ] SKILL.md includes "capture learnings" step +- [ ] Consolidation process defined (trigger at ~80-100 entries) + +## L7 (for workflow and orchestration skills) + +- [ ] Part of a defined workflow chain with explicit handoff +- [ ] Spawns subagents or is spawned by orchestrator +- [ ] Tested end-to-end as part of the full chain diff --git a/docs/claude/skill-evals.md b/docs/claude/skill-evals.md new file mode 100644 index 0000000..32ae2bc --- /dev/null +++ b/docs/claude/skill-evals.md @@ -0,0 +1,141 @@ +# Skill Evaluation & Measurement Guide + +How to evaluate, measure, and improve Claude Code skills with data. This is the detailed reference for [Level 5: Measurement](writing-skills.md#level-5-measurement--evaluated-and-optimized-with-data) in the skill maturity framework. + +--- + +## `evals/evals.json` Format + +```json +{ + "skill_name": "create-pr", + "version": "1.0.0", + "created": "2025-01-15", + "last_validated": null, + "evals": [ + { + "id": 1, + "prompt": "Create a PR for the openregister feature branch", + "expected_output": "PR targets development branch, has quality checks, proper format", + "files": [], + "expectations": [ + "targets development branch (not main)", + "runs composer check:strict", + "includes ## Summary and ## Test plan sections" + ] + } + ], + "trigger_tests": { + "should_trigger": [ + "Create a pull request for the feature branch", + "Open a PR from development to main", + "Make a PR for my changes", + "Submit this branch for review via PR", + "Create a GitHub pull request", + "PR this to development", + "Open pull request for openregister branch", + "Make a pull request for my new feature", + "Create PR targeting the main branch", + "Submit a pull request with these changes" + ], + "should_not_trigger": [ + "Can you review this code?", + "What is the difference between git merge and rebase?", + "How do I resolve a merge conflict?", + "Show me the git log", + "Commit my changes", + "Push to the remote branch", + "What branches are available?", + "Help me write a commit message", + "Show the diff for my changes", + "Explain what a pull request is" + ] + } +} +``` + +After running evals, update `last_validated` with the run date to unlock L5 green circle status. + +--- + +## Using the Anthropic Skill Creator + +The [Anthropic Skill Creator](https://github.com/anthropics/skills/blob/main/skills/skill-creator/SKILL.md) automates running, grading, and improving evals as a Claude Code skill. + +### Installation + +The Skill Creator lives at `.claude/skills/skill-creator/` in each repo (hydra and wordpress-docker). It's a vendored copy of [`anthropics/skills/skills/skill-creator/`](https://github.com/anthropics/skills/tree/main/skills/skill-creator) with one local modification: the eval workspace lives **inside** each skill rather than as a sibling folder. + +### Keeping it up to date + +Run `bash .claude/skills/update-skill-creator.sh` from the repo root. This script: +- Sparse-clones `anthropics/skills` to a tempdir +- Compares the upstream commit hash against `.claude/skills/skill-creator/.upstream-version` +- Backs up the current copy, rsyncs upstream files in, then re-applies `local-mods.patch` +- Updates `.upstream-version` to the new commit hash + +If `local-mods.patch` no longer applies cleanly (upstream rewrote the relevant section), the script aborts and points you at the backup + `.rej` files so you can hand-merge. We use this script-based approach because `anthropics/skills` keeps `skill-creator/` as a subdirectory, which makes pure `git subtree` impractical for tracking just that one folder. + +**Why we deviate from upstream:** Upstream Skill Creator writes eval results to `-workspace/` as a sibling to the skill folder. We patch this so results live at `/evals/workspace/iteration-N/`, keeping eval artifacts adjacent to the skill they belong to. The patch is recorded in `.claude/skills/skill-creator/local-mods.patch`. + +### Running evals step-by-step + +1. **Invoke**: In a Claude Code session, ask Claude to evaluate the skill: + > "Run evals on the test-app skill" or "Use the skill creator to evaluate and improve my X skill" + + Claude picks up the skill-creator and guides the process. The skill-creator's `evals/evals.json` format uses `evals[]` with `id`, `prompt`, `expected_output`, and `expectations`. We adopted this format across all our skills. + +2. **What happens**: Two parallel subagents run each eval: + - **With-skill agent**: runs the scenario with the skill active + - **Baseline agent**: runs the same scenario without the skill + Results are saved to `/evals/workspace/iteration-N/eval-/` (inside the skill folder, per our local convention). + +3. **Review results**: The Skill Creator runs `eval-viewer/generate_review.py` and opens a browser tab (or generates a static HTML file with `--static`) with two tabs: **Outputs** (click through each eval, leave qualitative feedback) and **Benchmark** (pass rates, timing, tokens with-skill vs baseline). + +4. **Output files** written to `/evals/workspace/iteration-N/eval-/`: + - `grading.json` — assertion pass/fail with evidence per expectation + - `timing.json` — token count and duration + - `benchmark.json` — aggregate stats across all evals (one level up, at `iteration-N/`) + - `eval-review-iteration-N.html` — static viewer (one level up, at `evals/workspace/`) + +5. **Update `last_validated` and `baseline_score`** in `evals.json` after a successful run: + ```json + "last_validated": "2026-04-13", + "baseline_score": 0.67 + ``` + `last_validated` unlocks the L5 green circle in the skill overview dashboard. `baseline_score` records the with-skill pass rate at the time of validation — used as a regression guardrail when re-running evals (see below). + +6. **Improve cycle**: The Skill Creator's analyzer flags non-discriminating assertions, flaky evals, and skill improvement suggestions. Update `SKILL.md` and re-run as iteration-2 (and so on, in the same `evals/workspace/` folder). + +--- + +## `baseline_score` — Regression Detection + +Even running evals manually (no CI), `baseline_score` is useful: it's the with-skill pass rate from the most recent successful eval run, recorded in `evals/evals.json` next to `last_validated`. When you re-run evals later, compare the new pass rate against `baseline_score`: + +- **New rate >= baseline_score** — skill is stable or improving. Update `baseline_score` to the new rate (and bump `last_validated`). +- **New rate < baseline_score** — regression. Investigate before updating either field. The skill change you just made may have broken something. + +This gives you a paper trail of "this skill scored 0.67 on these expectations on this date" without needing CI infrastructure. The Skill Creator's own `benchmark.json` already produces the pass rate — you're just writing it back into `evals.json` as a durable marker. + +--- + +## Eval Workspace Layout + +``` +/ + evals/ + evals.json # eval definitions + trigger tests + workspace/ + iteration-1/ + eval-basic-usage/ + grading.json + timing.json + eval-edge-case/ + grading.json + timing.json + benchmark.json # aggregate for this iteration + iteration-2/ # after improvements + ... + eval-review-iteration-1.html # static viewer +``` diff --git a/docs/claude/skill-patterns.md b/docs/claude/skill-patterns.md new file mode 100644 index 0000000..482c4b6 --- /dev/null +++ b/docs/claude/skill-patterns.md @@ -0,0 +1,198 @@ +# Skill Patterns & Subfolder Guide + +Proven, reusable building blocks for Claude Code skills. These are **L3 patterns** — apply them consistently across skills. For the full maturity framework, see [writing-skills.md](writing-skills.md). + +--- + +## Description Writing Guide + +The description is the single most important line in your skill. It determines auto-triggering reliability (L2). + +``` +# Structure: [Action verb] [what] — [key detail or when to use] + +# Good examples: +description: Create a Pull Request from the current branch — runs local checks, picks target branch, and opens the PR on GitHub +description: Run automated browser tests for a Nextcloud app — single agent or multi-perspective parallel testing +description: Archive a completed change in the experimental workflow + +# Bad examples: +description: Helps with PRs # too vague, no trigger terms +description: This skill creates pull requests for you # first person, wastes chars +description: A useful tool for managing code reviews # passive, no specificity +``` + +--- + +## Subfolder Guide + +### `templates/` + +Files with `{PLACEHOLDER}` variables that Claude fills in at runtime. Claude reads the file, substitutes values, and either writes the result to the user's project or injects it into a sub-agent. + +**Use for:** +- JSON/YAML/Markdown documents Claude writes to disk (e.g. `app-config.json`, `CHANGELOG.md`) +- Sub-agent prompts with variable substitution +- PR/issue body formats Claude fills in before creating +- Spec or design document scaffolds + +**Do not use for:** documents Claude reads without modifying, or fixed reference material. + +In `SKILL.md`, reference with: +``` +Write the file using the template in [templates/architecture-template.md](templates/architecture-template.md). +``` + +### `references/` + +Static content Claude reads to inform decisions — no placeholder variables, never written to the user's project. This is a key subfolder for **L4 personalization** — business-specific standards, architecture docs, and domain knowledge live here. + +**Use for:** +- Standards documents (Dutch government guidelines, GEMMA/ZGW compliance, coding standards) +- Architecture guidelines and conventions +- Spec excerpts or capability descriptions +- Multi-page guides Claude consults during a step + +**Do not use for:** templates, examples, or binary files. + +In `SKILL.md`, reference with: +``` +Follow the standards in [references/dutch-gov-backend-standards.md](references/dutch-gov-backend-standards.md). +``` + +### `examples/` + +Worked demonstrations showing what expected output looks like. Used for few-shot guidance — Claude sees a concrete example and produces output in the same pattern. Critical for **L3** (proven patterns). + +**Use for:** +- Output format blocks (e.g. "Archive Complete", "Implementation Paused") +- Worked conflict resolution or decision examples +- Sample report sections showing expected structure + +**Do not use for:** templates with placeholders (those go in `templates/`), or static reference docs. + +In `SKILL.md`, reference with: +``` +For the expected output format, see [examples/output-templates.md](examples/output-templates.md). +``` + +### `assets/` + +Non-markdown static files that get copied as-is to the user's project or used by the skill tooling. + +**Use for:** +- SVG illustrations or placeholder images +- JavaScript/TypeScript config stubs (`webpack.config.js`, `docusaurus.config.js`) +- YAML/JSON configuration stubs copied verbatim + +**Do not use for:** markdown files (even if they're config-like — those go in `templates/`). + +### `evals/` (L5+) + +Evaluation scenarios and benchmark results for measured skills. + +**Use for:** +- `evals.json` — test scenarios, `trigger_tests` (should/should-not-trigger examples), and `last_validated` (date of last eval run; required for L5 green circle) +- `timing.json` — token usage and duration per eval run +- `grading.json` — assertion pass/fail results with evidence + +See [skill-evals.md](skill-evals.md) for format details. + +--- + +## Common Patterns + +### Model guard (for heavy reasoning skills) + +Place at the top of `SKILL.md` when the skill needs Sonnet or Opus: + +```markdown +**Check the active model** from your system context. + +- **On Haiku**: stop immediately — this skill requires Sonnet or Opus. + Switch with `/model sonnet` and re-run. +- **On Sonnet or Opus**: proceed normally. +``` + +### User input + +Always use the **AskUserQuestion** tool — never assume or auto-select: + +```markdown +Use **AskUserQuestion** to ask: + +> "Which change should I archive?" + +Options: +- **change-a** — description +- **change-b** — description +``` + +For multi-select prompts: +```markdown +Use **AskUserQuestion** with `multiSelect: true` to let the user choose. +``` + +### Destructive action confirmation + +Before any irreversible action, show what will happen and ask for explicit confirmation: + +```markdown +Show a preview of the changes, then use **AskUserQuestion**: + +> "Create these issues in `owner/repo`?" + +Options: +- **Yes, proceed** — continue +- **Cancel** — end without changes +``` + +### Referencing subfolders + +Use relative markdown links — Claude resolves them relative to the skill folder: + +```markdown +Use the template in [templates/architecture-template.md](templates/architecture-template.md). +Follow the standards in [references/dutch-gov-backend-standards.md](references/dutch-gov-backend-standards.md). +For output examples, see [examples/output-templates.md](examples/output-templates.md). +``` + +### PHP quality checks (for skills that trigger implementation) + +After code changes, run the quality suite and block on failures: + +```markdown +Run `composer check:strict` from the app directory. +If it fails, fix all issues before continuing — do not skip. +``` + +### Capture learnings step (L6+) + +Add as the final step in skills that should self-improve: + +```markdown +**Capture learnings** + +After execution, review what happened and append new observations to +[learnings.md](learnings.md) under the appropriate section: + +- **Patterns That Work** — approaches that produced good results +- **Mistakes to Avoid** — errors encountered and how they were resolved +- **Domain Knowledge** — facts discovered during this run +- **Open Questions** — unresolved items for future investigation + +Each entry must include today's date. One insight per bullet. Skip if nothing new was learned. +``` + +### Next steps handoff (L7) + +For skills that are part of a workflow chain: + +```markdown +**Next steps** + +Suggest the logical next action: +- If tasks remain → "Run `/opsx-apply` to implement" +- If implementation done → "Run `/opsx-verify` to validate" +- If verified → "Run `/opsx-archive` to complete" +``` diff --git a/docs/claude/testing.md b/docs/claude/testing.md index df755f0..44a51d7 100644 --- a/docs/claude/testing.md +++ b/docs/claude/testing.md @@ -237,7 +237,102 @@ Test scenarios (`{APP}/test-scenarios/TS-NNN-slug.md`) are reusable, Gherkin-sty | `/test-persona-*` | Scans for scenarios matching that persona's slug. Asks to run them before free exploration. | | `/test-scenario-run` | Runs scenarios directly (by ID, tag, persona, or all) | -See [commands.md](commands.md#test-scenario-commands) for the full reference on `/test-scenario-create`, `/test-scenario-run`, and `/test-scenario-edit`. +### `/test-scenario-create` + +Guided wizard for creating a well-structured test scenario for a Nextcloud app. + +**Usage:** +``` +/test-scenario-create +/test-scenario-create openregister +``` + +**What it does:** +1. Determines the next ID (`TS-NNN`) by scanning existing scenarios +2. Asks for title, goal, category (functional/api/security/accessibility/performance/ux/integration), and priority +3. Shows relevant personas and asks which this scenario targets +4. Suggests which test commands should automatically include it +5. Auto-suggests tags based on category and title +6. Guides through Gherkin steps (Given/When/Then), test data, and acceptance criteria +7. Generates persona-specific notes for each linked persona +8. Saves to `{APP}/test-scenarios/TS-NNN-slug.md` + +**Scenario categories and suggested personas:** + +| Category | Suggested personas | +|---|---| +| functional | Mark Visser, Sem de Jong | +| api | Priya Ganpat, Annemarie de Vries | +| security | Noor Yilmaz | +| accessibility | Henk Bakker, Fatima El-Amrani | +| ux | Henk Bakker, Jan-Willem, Mark Visser | +| performance | Sem de Jong, Priya Ganpat | +| integration | Priya Ganpat, Annemarie de Vries | + +--- + +### `/test-scenario-run` + +Execute one or more test scenarios against the live Nextcloud environment using a browser agent. + +**Usage:** +``` +/test-scenario-run # list and choose +/test-scenario-run TS-001 # run specific scenario +/test-scenario-run openregister TS-001 # run from specific app +/test-scenario-run --tag smoke # run all smoke-tagged scenarios +/test-scenario-run --all openregister # run all scenarios for an app +/test-scenario-run --persona priya-ganpat # run all Priya's scenarios +``` + +**What it does:** +1. Discovers scenario files in `{APP}/test-scenarios/` +2. Filters by tag, persona, or ID as specified +3. Asks which environment to test against (local or custom URL) +4. Asks whether to use Haiku (default, cost-efficient) or Sonnet (for complex flows) +5. Launches a browser agent per scenario (parallelised up to 5 for multiple) +6. Agent verifies preconditions, follows Given-When-Then steps, checks each acceptance criterion +7. Writes results to `{APP}/test-results/scenarios/` +8. Synthesises a summary report for multiple runs + +**Model:** Asked at run time. **Haiku** (default) — fast, cost-efficient. **Sonnet** — for complex multi-step flows or ambiguous UI states where Haiku may misread the interface. Cap cost scales with the number of scenarios run in parallel. + +**Cap impact:** Low for single scenario; medium for multiple. See [parallel-agents.md](parallel-agents.md). + +**Result statuses**: PASS / FAIL / PARTIAL / BLOCKED + +--- + +### `/test-scenario-edit` + +Edit an existing test scenario — update any field (metadata or content) interactively. + +**Usage:** +``` +/test-scenario-edit # list all scenarios, pick one +/test-scenario-edit TS-001 # open specific scenario +/test-scenario-edit openregister TS-001 # open from specific app +``` + +**What it does:** +1. Locates the scenario file +2. Shows a summary of current values (status, priority, category, personas, tags, spec refs) +3. Asks what scope to edit: metadata only / content only / both / status only / tags only +4. Walks through each field in scope, showing the current value and asking for the new one +5. Supports `+tag` / `-tag` syntax for incremental tag changes, same for personas +6. Regenerates persona notes if the personas list changed +7. Optionally renames the file if the title changed +8. Writes the updated file and shows a diff-style summary + +--- + +### How existing test commands use scenarios + +| Command | Behaviour when scenarios exist | +|---|---| +| `/test-app` | Asks to include active scenarios before launching agents. Agents execute scenario steps before free exploration. | +| `/test-counsel` | Asks to include scenarios, grouped by persona. Each persona agent receives only the scenarios tagged with their slug. | +| `/test-persona-*` | Scans for scenarios matching that persona's slug. Asks to run them before free exploration in Step 2. | --- diff --git a/docs/claude/workflow.md b/docs/claude/workflow.md index 2555fc4..e6c4dd9 100644 --- a/docs/claude/workflow.md +++ b/docs/claude/workflow.md @@ -252,6 +252,30 @@ See [writing-specs.md](writing-specs.md) for the complete guide — RFC 2119 key | `/opsx-verify` | Review | Verify implementation against specs | | `/opsx-archive` | Archive | Complete and preserve change | +## Team Role Commands + +Specialist agents representing different roles on the development team. Useful for getting a focused perspective on a change — architecture review, QA, product sign-off, etc. + +| Command | Role | Focus | +|---------|------|-------| +| `/team-architect` | Architect | API design, data models, cross-app dependencies | +| `/team-backend` | Backend Developer | PHP implementation, entities, services, tests | +| `/team-frontend` | Frontend Developer | Vue components, state management, UX | +| `/team-po` | Product Owner | Business value, acceptance criteria, priority | +| `/team-qa` | QA Engineer | Test coverage, edge cases, regression risk | +| `/team-reviewer` | Code Reviewer | Standards, conventions, security, code quality | +| `/team-sm` | Scrum Master | Progress tracking, blockers, sprint health | + +**Usage:** +``` +/team-architect # review the API design for the active change +/team-qa # get QA perspective on test coverage +``` + +**Model for `/team-architect`:** Checked at run time — stops if on Haiku. Asks which model to use and explains how to switch if the choice differs from the active model. **Opus** recommended — best multi-framework reasoning across NLGov, BIO2/NIS2, WCAG, Haven, AVG/GDPR. **Sonnet** not recommended — may miss nuances in complex compliance scenarios. + +--- + ## Tips - **Start small**: Try the flow on a small feature first to build muscle memory diff --git a/docs/claude/writing-skills.md b/docs/claude/writing-skills.md index fc11555..cdd4bb1 100644 --- a/docs/claude/writing-skills.md +++ b/docs/claude/writing-skills.md @@ -5,6 +5,11 @@ How to create, structure, and improve Claude Code skills in this repository. Skills live in `.claude/skills//` and are invoked with `/`. Each skill is a folder containing a `SKILL.md` entry point and optional subfolders for supporting files. +**Related docs:** +- [Skill Patterns & Subfolder Guide](skill-patterns.md) — common patterns, subfolder conventions, description writing +- [Skill Evaluation Guide](skill-evals.md) — evals.json schema, Skill Creator setup, baseline_score +- [Skill Checklist](skill-checklist.md) — quick validation checklist per maturity level + --- ## Skill Maturity Levels @@ -15,7 +20,7 @@ Skills evolve through 7 maturity levels. Each level builds on the previous — a ### Levels Are Cumulative but Not Always Sequential -Levels 1–7 build on each other in terms of criteria. However, in practice **skills can exhibit higher-level patterns while skipping intermediate levels**. For example, a skill that orchestrates 8 parallel agents (L7 structure) but has never been formally evaluated (L5) or given a learnings pipeline (L6) is **"structurally L7 but maturity L4."** +Levels 1-7 build on each other in terms of criteria. However, in practice **skills can exhibit higher-level patterns while skipping intermediate levels**. For example, a skill that orchestrates 8 parallel agents (L7 structure) but has never been formally evaluated (L5) or given a learnings pipeline (L6) is **"structurally L7 but maturity L4."** When assessing skills, note both the **structural level** (highest level pattern present) and the **maturity level** (highest level where ALL criteria through that level are met). The goal is to close gaps — add measurement and self-improvement to skills that already have orchestration. @@ -81,10 +86,10 @@ The skill is built on recognized patterns, community best practices, or existing **Criteria (in addition to L2):** - Built on a **recognized pattern**: Anthropic official patterns, validated community skill, or your own proven pattern library - Has **at least one supporting subfolder**: `examples/` (output format demos), `references/` (standards docs), or `templates/` (fillable scaffolds) -- Uses **at least one common pattern** consistently (model guard, AskUserQuestion, destructive action confirmation, quality gates — see [Common Patterns](#common-patterns) below) +- Uses **at least one common pattern** consistently (model guard, AskUserQuestion, destructive action confirmation, quality gates — see [skill-patterns.md](skill-patterns.md#common-patterns)) - References **standards documents** where applicable (in `references/`) -> **What the script auto-detects for L3:** at least one common pattern keyword present in SKILL.md from any of these categories: (1) model guard (`model:`, `On Haiku`, `active model`, …), (2) `AskUserQuestion`, (3) quality gates (`composer check`, `phpcs`, `phpstan`, `make check`, `ruff`, `psalm`), (4) subfolder references (`examples/`, `refs/`, `references/`, or `templates/` as text in SKILL.md), or (5) destructive/browser patterns (`confirm.*before`, `browser_snapshot`, `browser_navigate`, `## Hard Rule`, `## Verification`, `acceptance_criteria`) — AND existence of at least one of `examples/`, `references/`, or `templates/` on disk. These are structural proxies for the full criteria above. +> **What the script auto-detects for L3:** at least one common pattern keyword present in SKILL.md from any of these categories: (1) model guard (`model:`, `On Haiku`, `active model`, ...), (2) `AskUserQuestion`, (3) quality gates (`composer check`, `phpcs`, `phpstan`, `make check`, `ruff`, `psalm`), (4) subfolder references (`examples/`, `refs/`, `references/`, or `templates/` as text in SKILL.md), or (5) destructive/browser patterns (`confirm.*before`, `browser_snapshot`, `browser_navigate`, `## Hard Rule`, `## Verification`, `acceptance_criteria`) — AND existence of at least one of `examples/`, `references/`, or `templates/` on disk. These are structural proxies for the full criteria above. **Sources for proven patterns:** - Anthropic's official `/skill-creator` bundled plugin ([GitHub](https://github.com/anthropics/skills/blob/main/skills/skill-creator/SKILL.md)) @@ -129,125 +134,16 @@ The skill has been systematically tested with evaluation scenarios. Its performa **Why most skills plateau at L4:** A skill that "feels right" but has never been measured may have blind spots, false confidence, or suboptimal triggering. Measurement turns intuition into evidence. **Criteria (in addition to L4):** -- Has **3+ evals** with: - - Input prompt (what the user would say) - - Expected output characteristics (`expected_output`) - - Assertion criteria (`expectations` — how to grade pass/fail) -- **Description trigger testing**: 10+ `should_trigger` + 10+ `should_not_trigger` prompts in `trigger_tests` -- **Evals have been run**: `last_validated` is set to a date in `evals.json` -- **Baseline measurement** exists: what does Claude produce on these evals WITHOUT the skill? +- Has **3+ evals** with input prompts, expected output characteristics, and assertion criteria +- **Description trigger testing**: 10+ `should_trigger` + 10+ `should_not_trigger` prompts +- **Evals have been run**: `last_validated` is set in `evals.json` +- **Baseline measurement** exists: what does Claude produce without the skill? - Skill has been through at least **one improve cycle** based on eval results - `evals/` folder with `evals.json`; `timing.json` and `grading.json` produced after running evals > **What the script auto-detects for L5:** 3+ evals (checks `evals` key, falls back to `scenarios`), 10+/10+ trigger tests, and `last_validated` non-null in evals.json. Baseline measurement and improve cycles are required for true L5 but not auto-checked by the script. -**How to evaluate a skill:** - -**`evals/evals.json` format:** - -```json -{ - "skill_name": "create-pr", - "version": "1.0.0", - "created": "2025-01-15", - "last_validated": null, - "evals": [ - { - "id": 1, - "prompt": "Create a PR for the openregister feature branch", - "expected_output": "PR targets development branch, has quality checks, proper format", - "files": [], - "expectations": [ - "targets development branch (not main)", - "runs composer check:strict", - "includes ## Summary and ## Test plan sections" - ] - } - ], - "trigger_tests": { - "should_trigger": [ - "Create a pull request for the feature branch", - "Open a PR from development to main", - "Make a PR for my changes", - "Submit this branch for review via PR", - "Create a GitHub pull request", - "PR this to development", - "Open pull request for openregister branch", - "Make a pull request for my new feature", - "Create PR targeting the main branch", - "Submit a pull request with these changes" - ], - "should_not_trigger": [ - "Can you review this code?", - "What is the difference between git merge and rebase?", - "How do I resolve a merge conflict?", - "Show me the git log", - "Commit my changes", - "Push to the remote branch", - "What branches are available?", - "Help me write a commit message", - "Show the diff for my changes", - "Explain what a pull request is" - ] - } -} -``` - -After running evals, update `last_validated` with the run date to unlock L5 green circle status. - -**Using the Anthropic Skill Creator to run evals:** - -The [Anthropic Skill Creator](https://github.com/anthropics/skills/blob/main/skills/skill-creator/SKILL.md) automates running, grading, and improving evals as a Claude Code skill. - -**Step-by-step:** - -1. **Install** (one-time): the Skill Creator lives at `.claude/skills/skill-creator/` in each repo (hydra and wordpress-docker). It's a vendored copy of [`anthropics/skills/skills/skill-creator/`](https://github.com/anthropics/skills/tree/main/skills/skill-creator) with one local modification: the eval workspace lives **inside** each skill rather than as a sibling folder. - - **Keeping it up to date:** Run `bash .claude/skills/update-skill-creator.sh` from the repo root. This script: - - Sparse-clones `anthropics/skills` to a tempdir - - Compares the upstream commit hash against `.claude/skills/skill-creator/.upstream-version` - - Backs up the current copy, rsyncs upstream files in, then re-applies `local-mods.patch` - - Updates `.upstream-version` to the new commit hash - - If `local-mods.patch` no longer applies cleanly (upstream rewrote the relevant section), the script aborts and points you at the backup + `.rej` files so you can hand-merge. We use this script-based approach because `anthropics/skills` keeps `skill-creator/` as a subdirectory, which makes pure `git subtree` impractical for tracking just that one folder. - - **Why we deviate from upstream:** Upstream Skill Creator writes eval results to `-workspace/` as a sibling to the skill folder. We patch this so results live at `/evals/workspace/iteration-N/`, keeping eval artifacts adjacent to the skill they belong to. The patch is recorded in `.claude/skills/skill-creator/local-mods.patch`. - -2. **Invoke**: In a Claude Code session, ask Claude to evaluate the skill: - > "Run evals on the test-app skill" or "Use the skill creator to evaluate and improve my X skill" - - Claude picks up the skill-creator and guides the process. The skill-creator's `evals/evals.json` format uses `evals[]` with `id`, `prompt`, `expected_output`, and `expectations`. We adopted this format across all our skills — see "Standard evals.json schema" below. - -3. **What happens**: Two parallel subagents run each eval: - - **With-skill agent**: runs the scenario with the skill active - - **Baseline agent**: runs the same scenario without the skill - Results are saved to `/evals/workspace/iteration-N/eval-/` (inside the skill folder, per our local convention). - -4. **Review results**: The Skill Creator runs `eval-viewer/generate_review.py` and opens a browser tab (or generates a static HTML file with `--static`) with two tabs: **Outputs** (click through each eval, leave qualitative feedback) and **Benchmark** (pass rates, timing, tokens with-skill vs baseline). - -5. **Output files** written to `/evals/workspace/iteration-N/eval-/`: - - `grading.json` — assertion pass/fail with evidence per expectation - - `timing.json` — token count and duration - - `benchmark.json` — aggregate stats across all evals (one level up, at `iteration-N/`) - - `eval-review-iteration-N.html` — static viewer (one level up, at `evals/workspace/`) - -6. **Update `last_validated` and `baseline_score`** in `evals.json` after a successful run: - ```json - "last_validated": "2026-04-13", - "baseline_score": 0.67 - ``` - `last_validated` unlocks the L5 green circle in the skill overview dashboard. `baseline_score` records the with-skill pass rate at the time of validation — used as a regression guardrail when re-running evals (see "baseline_score" section below). - -7. **Improve cycle**: The Skill Creator's analyzer flags non-discriminating assertions, flaky evals, and skill improvement suggestions. Update `SKILL.md` and re-run as iteration-2 (and so on, in the same `evals/workspace/` folder). - -**`baseline_score` field — manual tracking for regression detection:** - -Even running evals manually (no CI), `baseline_score` is useful: it's the with-skill pass rate from the most recent successful eval run, recorded in `evals/evals.json` next to `last_validated`. When you re-run evals later, compare the new pass rate against `baseline_score`: - -- **New rate ≥ baseline_score** → skill is stable or improving. Update `baseline_score` to the new rate (and bump `last_validated`). -- **New rate < baseline_score** → regression. Investigate before updating either field. The skill change you just made may have broken something. - -This gives you a paper trail of "this skill scored 0.67 on these expectations on this date" without needing CI infrastructure. The Skill Creator's own `benchmark.json` already produces the pass rate — you're just writing it back into `evals.json` as a durable marker. +For the full evals.json schema, Skill Creator setup guide, and baseline_score usage, see [skill-evals.md](skill-evals.md). --- @@ -257,8 +153,8 @@ The skill captures learnings during execution and periodically consolidates them **The Learnings-to-Rules Pipeline:** ``` -Execution → Capture observations → learnings.md → Consolidation → Updated SKILL.md rules - ↑ | +Execution -> Capture observations -> learnings.md -> Consolidation -> Updated SKILL.md rules + ^ | └──────────────────────────────┘ ``` @@ -267,14 +163,14 @@ Execution → Capture observations → learnings.md → Consolidation → Update - SKILL.md includes a **"capture learnings" step** near the end of execution - Each learning entry is **dated and atomic** (one insight per bullet) - Learnings have **5 sections**: Patterns That Work, Mistakes to Avoid, Domain Knowledge, Open Questions, Consolidated Principles -- **Consolidation triggers** at ~80–100 entries: remove outdated, merge duplicates, extract cross-entry patterns, promote validated principles to SKILL.md guardrails/rules +- **Consolidation triggers** at ~80-100 entries: remove outdated, merge duplicates, extract cross-entry patterns, promote validated principles to SKILL.md guardrails/rules **Improvement: Learning Candidates Ledger** Rather than writing every observation directly to `learnings.md`, use a two-stage buffer to prevent garbage learnings from polluting context: ``` -learning-candidates.md → (promotion criteria met?) → learnings.md → SKILL.md rules +learning-candidates.md -> (promotion criteria met?) -> learnings.md -> SKILL.md rules ↓ (no) discarded after 30 days ``` @@ -293,7 +189,7 @@ Promotion criteria: observation confirmed across 3+ executions, or resolves a me ## Mistakes to Avoid - 2026-03-18: Do NOT create PR with uncommitted changes — causes confusion about what's included -- 2026-03-22: Lock file conflicts (composer.lock) → run `composer update` locally, don't accept either side +- 2026-03-22: Lock file conflicts (composer.lock) -> run `composer update` locally, don't accept either side ## Domain Knowledge - 2026-03-19: Conduction repos use `development` as primary integration branch, not `main` @@ -316,7 +212,7 @@ The skill orchestrates multiple agents or is part of a coordinated workflow wher - **Orchestrates sub-agents** (spawns parallel workers) or is **orchestrated by a parent skill** - Part of a **defined workflow chain** with explicit handoff points: ``` - opsx-new → opsx-ff → opsx-plan-to-issues → opsx-apply → opsx-verify → opsx-archive + opsx-new -> opsx-ff -> opsx-plan-to-issues -> opsx-apply -> opsx-verify -> opsx-archive ``` - **Hands off context** to the next skill (shows "Next steps: run `/opsx-verify`") - Uses **isolated execution contexts** when needed (git worktrees, Docker containers) @@ -329,19 +225,19 @@ The skill orchestrates multiple agents or is part of a coordinated workflow wher |---------|---------|-------------| | **Pipeline** | `opsx-pipeline` | Full lifecycle for 1+ changes in parallel via subagents | | **Fan-out/Fan-in** | `test-counsel`, `feature-counsel` | Spawn N agents in parallel, synthesize results | -| **Sequential Chain** | `opsx-new` → ... → `opsx-archive` | Each skill hands off to the next | -| **Autonomous Loop** | `opsx-apply-loop` | Runs apply→verify cycle with retry logic, auto-archives | +| **Sequential Chain** | `opsx-new` -> ... -> `opsx-archive` | Each skill hands off to the next | +| **Autonomous Loop** | `opsx-apply-loop` | Runs apply->verify cycle with retry logic, auto-archives | | **Multi-perspective** | `test-app` | Spawns 6 specialized test agents simultaneously | > **Reference:** Claude Code Agent Teams (experimental) and the Agent SDK support these patterns natively. See [Claude Code Agent Teams docs](https://code.claude.com/docs/en/agent-teams). -**Important — structural vs mature L7:** A skill can exhibit L7 orchestration patterns (spawning subagents, workflow chains) while lacking L5 measurement and L6 self-improvement. Such a skill is **"structurally L7, maturity L4"** — it has the architecture of a workforce but the self-awareness of a static tool. The goal is to close the L5–L6 gap so the orchestration is not just complex but also measurably effective and continuously improving. +**Important — structural vs mature L7:** A skill can exhibit L7 orchestration patterns (spawning subagents, workflow chains) while lacking L5 measurement and L6 self-improvement. Such a skill is **"structurally L7, maturity L4"** — it has the architecture of a workforce but the self-awareness of a static tool. The goal is to close the L5-L6 gap so the orchestration is not just complex but also measurably effective and continuously improving. --- ### Maturity Assessment Quick Reference -| Check | Yes → at least | No → stuck at | +| Check | Yes -> at least | No -> stuck at | |-------|:---:|:---:| | Has SKILL.md with frontmatter, steps, guardrails? | L1 | Below L1 | | Description optimized for triggering, progressive disclosure used, <500 lines? | L2 | L1 | @@ -376,11 +272,11 @@ Skills degrade over time. Schedule periodic reviews: ### Common Upgrade Paths -**L4 → L5 (most common need):** Create 3 eval scenarios from real usage. Run the skill, grade output, identify one weakness, improve, re-eval. +**L4 -> L5 (most common need):** Create 3 eval scenarios from real usage. Run the skill, grade output, identify one weakness, improve, re-eval. -**L5 → L6:** Add `learnings.md` and a "capture learnings" step to SKILL.md. After 5–10 executions, review learnings and promote validated patterns to standing rules. +**L5 -> L6:** Add `learnings.md` and a "capture learnings" step to SKILL.md. After 5-10 executions, review learnings and promote validated patterns to standing rules. -**L4 → L7 (standalone → workflow):** Identify which workflow chain the skill belongs to. Add "Next steps" guidance. Add context handoff. Test the full chain end-to-end. +**L4 -> L7 (standalone -> workflow):** Identify which workflow chain the skill belongs to. Add "Next steps" guidance. Add context handoff. Test the full chain end-to-end. **Fixing "structurally L7 but maturity L4":** Add L5 evals and L6 learnings to the orchestrator skill first — its improvements cascade to all sub-agents. @@ -391,14 +287,14 @@ Skills degrade over time. Schedule periodic reviews: ``` .claude/skills/ / - SKILL.md ← required: the skill logic (L1+) - templates/ ← files Claude fills in and writes to disk (L2+) - references/ ← standards and guides Claude reads for context (L3+) - examples/ ← worked output demonstrations and few-shot patterns (L3+) - assets/ ← non-markdown static files (SVG, JS, YAML, JSON) - evals/ ← evaluation scenarios and results (L5+) - learnings.md ← accumulated execution insights (L6+) - learning-candidates.md ← unverified observations awaiting promotion (L6+) + SKILL.md <- required: the skill logic (L1+) + templates/ <- files Claude fills in and writes to disk (L2+) + references/ <- standards and guides Claude reads for context (L3+) + examples/ <- worked output demonstrations and few-shot patterns (L3+) + assets/ <- non-markdown static files (SVG, JS, YAML, JSON) + evals/ <- evaluation scenarios and results (L5+) + learnings.md <- accumulated execution insights (L6+) + learning-candidates.md <- unverified observations awaiting promotion (L6+) ``` Not every skill needs all subfolders. Create them only when content qualifies. A skill with no supporting files is just a `SKILL.md` — no subfolders needed. @@ -467,7 +363,7 @@ Brief explanation of what the skill does. ### Frontmatter Rules -- `name` must match the folder name exactly (e.g. folder `test-counsel` → `name: test-counsel`) +- `name` must match the folder name exactly (e.g. folder `test-counsel` -> `name: test-counsel`) - `description` is what users see in the skill picker AND what Claude uses to decide whether to load the skill — make it action-oriented, specific, and written in **third person** - Front-load the key use case in the first 250 characters - Include specific trigger terms (verbs and nouns a user would naturally say) @@ -503,7 +399,7 @@ Skills can inject dynamic content at invocation time: | Syntax | Purpose | Example | |--------|---------|---------| -| `$ARGUMENTS` | Full argument string passed after `/skill-name` | `/app-create my-app` → `$ARGUMENTS` = `"my-app"` | +| `$ARGUMENTS` | Full argument string passed after `/skill-name` | `/app-create my-app` -> `$ARGUMENTS` = `"my-app"` | | `$ARGUMENTS[0]` | Individual positional argument | First argument after the skill name | | `${CLAUDE_SKILL_DIR}` | Absolute path to the skill's own folder | Useful for referencing bundled scripts or assets | | `` !`command` `` | Shell command output injected before skill loads | `` !`git branch --show-current` `` injects the current branch name | @@ -520,104 +416,10 @@ Match the skill's specificity to its task fragility: | Feature implementation, refactoring | **Medium** | Provide steps with decision points, let Claude adapt | | Database migrations, production deploys, CI config | **Low** | Prescribe exact commands, explicit confirmation gates | -A skill for `/app-explore` (thinking mode) should have high degrees of freedom — it's about creativity and investigation. A skill for `/app-apply` (config → code sync) should have low freedom — it must apply changes predictably and safely. +A skill for `/app-explore` (thinking mode) should have high degrees of freedom — it's about creativity and investigation. A skill for `/app-apply` (config -> code sync) should have low freedom — it must apply changes predictably and safely. > **Source:** Anthropic's [Skill Authoring Best Practices](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/best-practices) — "Set appropriate degrees of freedom: match specificity to task fragility." -### Description Writing Guide - -The description is the single most important line in your skill. It determines auto-triggering reliability (L2). - -``` -# Structure: [Action verb] [what] — [key detail or when to use] - -# Good examples: -description: Create a Pull Request from the current branch — runs local checks, picks target branch, and opens the PR on GitHub -description: Run automated browser tests for a Nextcloud app — single agent or multi-perspective parallel testing -description: Archive a completed change in the experimental workflow - -# Bad examples: -description: Helps with PRs # too vague, no trigger terms -description: This skill creates pull requests for you # first person, wastes chars -description: A useful tool for managing code reviews # passive, no specificity -``` - ---- - -## Subfolder Guide - -### `templates/` - -Files with `{PLACEHOLDER}` variables that Claude fills in at runtime. Claude reads the file, substitutes values, and either writes the result to the user's project or injects it into a sub-agent. - -**Use for:** -- JSON/YAML/Markdown documents Claude writes to disk (e.g. `app-config.json`, `CHANGELOG.md`) -- Sub-agent prompts with variable substitution -- PR/issue body formats Claude fills in before creating -- Spec or design document scaffolds - -**Do not use for:** documents Claude reads without modifying, or fixed reference material. - -In `SKILL.md`, reference with: -``` -Write the file using the template in [templates/architecture-template.md](templates/architecture-template.md). -``` - -### `references/` - -Static content Claude reads to inform decisions — no placeholder variables, never written to the user's project. This is a key subfolder for **L4 personalization** — business-specific standards, architecture docs, and domain knowledge live here. - -**Use for:** -- Standards documents (Dutch government guidelines, GEMMA/ZGW compliance, coding standards) -- Architecture guidelines and conventions -- Spec excerpts or capability descriptions -- Multi-page guides Claude consults during a step - -**Do not use for:** templates, examples, or binary files. - -In `SKILL.md`, reference with: -``` -Follow the standards in [references/dutch-gov-backend-standards.md](references/dutch-gov-backend-standards.md). -``` - -### `examples/` - -Worked demonstrations showing what expected output looks like. Used for few-shot guidance — Claude sees a concrete example and produces output in the same pattern. Critical for **L3** (proven patterns). - -**Use for:** -- Output format blocks (e.g. "Archive Complete", "Implementation Paused") -- Worked conflict resolution or decision examples -- Sample report sections showing expected structure - -**Do not use for:** templates with placeholders (those go in `templates/`), or static reference docs. - -In `SKILL.md`, reference with: -``` -For the expected output format, see [examples/output-templates.md](examples/output-templates.md). -``` - -### `assets/` - -Non-markdown static files that get copied as-is to the user's project or used by the skill tooling. - -**Use for:** -- SVG illustrations or placeholder images -- JavaScript/TypeScript config stubs (`webpack.config.js`, `docusaurus.config.js`) -- YAML/JSON configuration stubs copied verbatim - -**Do not use for:** markdown files (even if they're config-like — those go in `templates/`). - -### `evals/` (L5+) - -Evaluation scenarios and benchmark results for measured skills. - -**Use for:** -- `evals.json` — test scenarios, `trigger_tests` (should/should-not-trigger examples), and `last_validated` (date of last eval run; required for L5 green circle) -- `timing.json` — token usage and duration per eval run -- `grading.json` — assertion pass/fail results with evidence - -See [Level 5: Measurement](#level-5-measurement--evaluated-and-optimized-with-data) for format details. - --- ## Naming Conventions @@ -638,107 +440,6 @@ The folder name, `name` frontmatter field, and the slash command all must match --- -## Common Patterns - -These are **L3 patterns** — proven, reusable building blocks that should be applied consistently across skills. - -### Model guard (for heavy reasoning skills) - -Place at the top of `SKILL.md` when the skill needs Sonnet or Opus: - -```markdown -**Check the active model** from your system context. - -- **On Haiku**: stop immediately — this skill requires Sonnet or Opus. - Switch with `/model sonnet` and re-run. -- **On Sonnet or Opus**: proceed normally. -``` - -### User input - -Always use the **AskUserQuestion** tool — never assume or auto-select: - -```markdown -Use **AskUserQuestion** to ask: - -> "Which change should I archive?" - -Options: -- **change-a** — description -- **change-b** — description -``` - -For multi-select prompts: -```markdown -Use **AskUserQuestion** with `multiSelect: true` to let the user choose. -``` - -### Destructive action confirmation - -Before any irreversible action, show what will happen and ask for explicit confirmation: - -```markdown -Show a preview of the changes, then use **AskUserQuestion**: - -> "Create these issues in `owner/repo`?" - -Options: -- **Yes, proceed** — continue -- **Cancel** — end without changes -``` - -### Referencing subfolders - -Use relative markdown links — Claude resolves them relative to the skill folder: - -```markdown -Use the template in [templates/architecture-template.md](templates/architecture-template.md). -Follow the standards in [references/dutch-gov-backend-standards.md](references/dutch-gov-backend-standards.md). -For output examples, see [examples/output-templates.md](examples/output-templates.md). -``` - -### PHP quality checks (for skills that trigger implementation) - -After code changes, run the quality suite and block on failures: - -```markdown -Run `composer check:strict` from the app directory. -If it fails, fix all issues before continuing — do not skip. -``` - -### Capture learnings step (L6+) - -Add as the final step in skills that should self-improve: - -```markdown -**Capture learnings** - -After execution, review what happened and append new observations to -[learnings.md](learnings.md) under the appropriate section: - -- **Patterns That Work** — approaches that produced good results -- **Mistakes to Avoid** — errors encountered and how they were resolved -- **Domain Knowledge** — facts discovered during this run -- **Open Questions** — unresolved items for future investigation - -Each entry must include today's date. One insight per bullet. Skip if nothing new was learned. -``` - -### Next steps handoff (L7) - -For skills that are part of a workflow chain: - -```markdown -**Next steps** - -Suggest the logical next action: -- If tasks remain → "Run `/opsx-apply` to implement" -- If implementation done → "Run `/opsx-verify` to validate" -- If verified → "Run `/opsx-archive` to complete" -``` - ---- - ## What NOT to Put in SKILL.md Extract to subfolders when a block qualifies (10%+, standalone). Leave in `SKILL.md`: @@ -747,43 +448,3 @@ Extract to subfolders when a block qualifies (10%+, standalone). Leave in `SKILL - Conditional logic that references the step context - Procedural steps that only make sense in sequence - Guardrails and constraints (they're part of the skill's contract) - ---- - -## Checklist Before Adding a Skill - -### L1–L2 (minimum for any skill) -- [ ] Folder name matches `name` in frontmatter and the slash command -- [ ] `description` is action-oriented, third-person, under 250 characters, with specific trigger terms -- [ ] Steps are numbered and self-contained -- [ ] Guardrails define what the skill must NOT do -- [ ] Destructive actions have explicit confirmation prompts -- [ ] `SKILL.md` is under 500 lines — large blocks extracted to subfolders -- [ ] All subfolder links use relative paths -- [ ] `SKILL.md` reads coherently top to bottom without opening subfolder files - -### L3 (recommended for all skills) -- [ ] Uses common patterns consistently (model guard, AskUserQuestion, quality checks) -- [ ] Has `examples/` if the skill produces structured output -- [ ] References standards documents where applicable - -### L4 (recommended for business-critical skills) -- [ ] Contains business-specific `references/` (standards, architecture, ADRs) -- [ ] Uses project-specific templates and terminology -- [ ] Output matches team expectations without manual correction - -### L5+ (recommended for frequently-used skills) -- [ ] Has 3+ eval scenarios in `evals/` -- [ ] Baseline measurement documented (output without skill) -- [ ] Description trigger-tested (10 should + 10 should-not queries) -- [ ] PHP/Python quality checks triggered after code changes (if applicable) - -### L6+ (recommended for skills that run often and evolve) -- [ ] Has `learnings.md` with dated, atomic entries -- [ ] SKILL.md includes "capture learnings" step -- [ ] Consolidation process defined (trigger at ~80–100 entries) - -### L7 (for workflow and orchestration skills) -- [ ] Part of a defined workflow chain with explicit handoff -- [ ] Spawns subagents or is spawned by orchestrator -- [ ] Tested end-to-end as part of the full chain diff --git a/global-settings/block-write-commands.sh b/global-settings/block-write-commands.sh index 3973415..f78117f 100644 --- a/global-settings/block-write-commands.sh +++ b/global-settings/block-write-commands.sh @@ -12,12 +12,12 @@ cmd=$(echo "$input" | jq -r '.tool_input.command // ""') transcript_path=$(echo "$input" | jq -r '.transcript_path // ""') deny() { - printf '{"hookSpecificOutput":{"hookEventName":"PreToolUse","permissionDecision":"deny","permissionDecisionReason":"%s"}}\n' "$1" + printf '{"hookSpecificOutput":{"hookEventName":"PreToolUse","permissionDecision":"deny","permissionDecisionReason":"%s"}}\n' "$*" exit 0 } ask() { - printf '{"hookSpecificOutput":{"hookEventName":"PreToolUse","permissionDecision":"ask","permissionDecisionReason":"%s"}}\n' "$1" + printf '{"hookSpecificOutput":{"hookEventName":"PreToolUse","permissionDecision":"ask","permissionDecisionReason":"%s"}}\n' "$*" exit 0 } @@ -46,8 +46,8 @@ _h=$(printf '%s' "$HOME" | sed 's/[.[\*^$()+?{}|]/\\&/g') _prot="(~|\\\$HOME|${_h})/\.claude/(settings\.json|hooks/|settings-version|settings-repo-path|settings-repo-url|settings-repo-ref)" # chmod guard: deny write-enabling permissions on protected files -if echo "$cmd" | grep -qE "^\s*chmod\b" && echo "$cmd" | grep -qE "${_prot}"; then - if echo "$cmd" | grep -qE "^\s*chmod\s+(444|555|-w|\+x)(\s|$)"; then +if echo "$cmd" | grep -qE "(^|[;&|]\s*)chmod\b" && echo "$cmd" | grep -qE "${_prot}"; then + if echo "$cmd" | grep -qE "(^|[;&|]\s*)chmod\s+(444|555|-w|\+x)(\s|$)"; then : # read-only or execute-only — allowed else deny "BLOCKED: Claude cannot make ~/.claude/ config files writable. Run chmod manually in your own terminal if an update requires it." @@ -90,7 +90,7 @@ fi if $_is_config_write; then if echo "$cmd" | grep -qE "\bgit\b.*\bshow\b.*\borigin/main:"; then # Method 1: git show origin/main — verify canonical repo - _repo_path=$(cat "$HOME/.claude/settings-repo-path" 2>/dev/null | tr -d '[:space:]') + _repo_path=$(tr -d '[:space:]' < "$HOME/.claude/settings-repo-path" 2>/dev/null) _git_root="" [ -n "$_repo_path" ] && [ -d "$_repo_path" ] && \ _git_root=$(git -C "$_repo_path" rev-parse --show-toplevel 2>/dev/null) @@ -113,7 +113,7 @@ if $_is_config_write; then fi # ── curl ────────────────────────────────────────────────────────────────────── -if echo "$cmd" | grep -qE '^\s*curl\b'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)curl\b'; then # Unambiguous write flags — check the full command string so that piped curl # invocations are also caught (e.g. curl url | curl -X POST url2). if echo "$cmd" | grep -qiE '(^|\s)(-[sviIkLSfnN]*X\s*(POST|PUT|DELETE|PATCH)|--request\s+(POST|PUT|DELETE|PATCH))' \ @@ -157,7 +157,7 @@ if echo "$cmd" | grep -qE '\bdocker\b'; then fi # ── gh api ──────────────────────────────────────────────────────────────────── -if echo "$cmd" | grep -qE '^\s*gh\s+api\b'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)gh\s+api\b'; then if echo "$cmd" | grep -qiE '(^|\s)(--method\s+(POST|PUT|DELETE|PATCH)|-X\s*(POST|PUT|DELETE|PATCH))' \ || echo "$cmd" | grep -qiE '(^|\s)--input(=|\s)' \ || echo "$cmd" | grep -qiE '(^|\s)(--field|--raw-field)(=|\s)' \ @@ -181,7 +181,7 @@ if echo "$cmd" | grep -qE '\bgh\s+pr\s+(merge|close|edit|review|comment|create|r fi # ── git -C (allowlist — known read-only subcommands pass silently; write ops prompt) ── -if echo "$cmd" | grep -qE '^\s*git\b' && echo "$cmd" | grep -qE '\s-C\s'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)git\b' && echo "$cmd" | grep -qE '\s-C\s'; then # Extract subcommand: strip 'git', all '-C ' pairs, then leading flags subcmd=$(echo "$cmd" \ | sed 's/^\s*git\s*//' \ @@ -249,7 +249,7 @@ if echo "$cmd" | grep -qE '\bgit\s+push\b'; then fi # ── git branch (prompt for write flags, bare — without -C) ─────────────────── -if echo "$cmd" | grep -qE '^\s*git\s+branch\b' && ! echo "$cmd" | grep -qE '\s-C\s'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)git\s+branch\b' && ! echo "$cmd" | grep -qE '\s-C\s'; then if echo "$cmd" | grep -qE '\bbranch\b.*-[a-zA-Z]*[dDmMcC]' \ || echo "$cmd" | grep -qiE '(^|\s)(--delete|--move|--copy|--force-create)(\s|=|$)'; then ask "git branch with write flags (-d/-D/-m/-M/-c/-C) modifies branches — approve to proceed." @@ -257,14 +257,14 @@ if echo "$cmd" | grep -qE '^\s*git\s+branch\b' && ! echo "$cmd" | grep -qE '\s-C fi # ── git remote (prompt for write subcommands, bare — without -C) ───────────── -if echo "$cmd" | grep -qE '^\s*git\s+remote\b' && ! echo "$cmd" | grep -qE '\s-C\s'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)git\s+remote\b' && ! echo "$cmd" | grep -qE '\s-C\s'; then if echo "$cmd" | grep -qE '\bremote\s+(add|remove|rename|set-url|set-head|prune|update)\b'; then ask "git remote add/remove/rename/set-url/prune/update modifies remotes — approve to proceed." fi fi # ── env (prompt when used to execute a command) ─────────────────────────────── -if echo "$cmd" | grep -qE '^\s*env\b'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)env\b'; then remainder=$(echo "$cmd" | sed 's/^\s*env\s*//') if [ -n "$remainder" ] && echo "$remainder" | tr ' \t' '\n' | grep -qE '^([a-z][a-zA-Z0-9_.-]*|[./][^[:space:]]*)$'; then ask "env used to execute a command — approve to proceed." @@ -272,21 +272,21 @@ if echo "$cmd" | grep -qE '^\s*env\b'; then fi # ── date (HARD BLOCK: modifying system time — no legitimate use case) ───────── -if echo "$cmd" | grep -qE '^\s*date\b'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)date\b'; then if echo "$cmd" | grep -qE '(^|\s)(-s\s|--set[[:space:]=])'; then deny "Blocked: date -s / --set modifies the system clock. This is never allowed." fi fi # ── cat (prompt for output redirection) ────────────────────────────────────── -if echo "$cmd" | grep -qE '^\s*cat\b'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)cat\b'; then if echo "$cmd" | grep -qE '(^|[[:space:]])>{1,2}[[:space:]]*[^[:space:]]'; then ask "cat with output redirection (> or >>) would write to a file — approve to proceed." fi fi # ── find (prompt for destructive operations) ────────────────────────────────── -if echo "$cmd" | grep -qE '^\s*find\b'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)find\b'; then if echo "$cmd" | grep -qE '(^|\s)-delete\b' \ || echo "$cmd" | grep -qE '(^|\s)-exec(dir)?\b'; then ask "find with -delete or -exec/-execdir can modify the filesystem — approve to proceed." @@ -294,7 +294,7 @@ if echo "$cmd" | grep -qE '^\s*find\b'; then fi # ── sort (prompt for file output flags) ─────────────────────────────────────── -if echo "$cmd" | grep -qE '^\s*sort\b'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)sort\b'; then if echo "$cmd" | grep -qE '(^|\s)(-o[[:space:]]|--output[[:space:]=])' \ || echo "$cmd" | grep -qE '(^|[[:space:]])>{1,2}[[:space:]]*[^[:space:]]'; then ask "sort with -o/--output or output redirection writes to a file — approve to proceed." @@ -302,7 +302,7 @@ if echo "$cmd" | grep -qE '^\s*sort\b'; then fi # ── awk (prompt for file output operations) ─────────────────────────────────── -if echo "$cmd" | grep -qE '^\s*awk\b'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)awk\b'; then if echo "$cmd" | grep -qE 'print[[:space:]]*>{1,2}' \ || echo "$cmd" | grep -qE "['\"][[:space:]]*>{1,2}[[:space:]]*[^[:space:]]"; then ask "awk with output redirection or print > file may write files — approve to proceed." @@ -323,7 +323,7 @@ if echo "$cmd" | grep -qE '\btee\b'; then fi # ── hostname (prompt when setting system hostname) ──────────────────────────── -if echo "$cmd" | grep -qE '^\s*hostname\b'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)hostname\b'; then remainder=$(echo "$cmd" | sed 's/^\s*hostname\s*//' | tr ' \t' '\n' | grep -vE '^-' | tr -d '[:space:]') if [ -n "$remainder" ]; then ask "hostname with a name argument sets the system hostname — approve to proceed." @@ -331,7 +331,7 @@ if echo "$cmd" | grep -qE '^\s*hostname\b'; then fi # ── rm (prompt for all deletions) ──────────────────────────────────────────── -if echo "$cmd" | grep -qE '^\s*rm\b'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)rm\b'; then if echo "$cmd" | grep -qE '(^|\s)-[a-zA-Z]*[rRfFdi]'; then ask "rm with recursive/force/directory flags detected — approve to proceed." else @@ -340,12 +340,12 @@ if echo "$cmd" | grep -qE '^\s*rm\b'; then fi # ── rmdir (prompt — removes directories) ───────────────────────────────────── -if echo "$cmd" | grep -qE '^\s*rmdir\b'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)rmdir\b'; then ask "rmdir will remove directories — approve to proceed." fi # ── npm audit (prompt for fix — modifies package.json and lock file) ───────── -if echo "$cmd" | grep -qE '^\s*npm\s+audit\b'; then +if echo "$cmd" | grep -qE '(^|[;&|]\s*)npm\s+audit\b'; then if echo "$cmd" | grep -qE '\baudit\b.*\bfix\b'; then ask "npm audit fix modifies package.json and lock file — approve to proceed." fi @@ -361,6 +361,38 @@ if echo "$cmd" | grep -qE '>{1,2}[[:space:]]*[^[:space:]&>/]' \ ask "Output redirection to a file detected — approve to proceed." fi +# ── ln (symlink/hardlink guard — blocks links targeting protected paths) ────── +if echo "$cmd" | grep -qE '(^|[;&|]\s*)ln\b'; then + if echo "$cmd" | grep -qE "${_prot}"; then + deny "BLOCKED: Creating symlinks or hardlinks to ~/.claude/ config files is not allowed." + else + ask "ln creates a link — approve to proceed." + fi +fi + +# ── sed -i (in-place file editing) ─────────────────────────────────────────── +if echo "$cmd" | grep -qE '(^|[;&|]\s*)sed\b' && echo "$cmd" | grep -qE "(^|\s)-[a-zA-Z]*i"; then + ask "sed -i edits files in place — approve to proceed." +fi + +# ── chown (change file ownership) ──────────────────────────────────────────── +if echo "$cmd" | grep -qE '(^|[;&|]\s*)chown\b'; then + ask "chown changes file ownership — approve to proceed." +fi + +# ── install (copy files with permissions) ──────────────────────────────────── +if echo "$cmd" | grep -qE '(^|[;&|]\s*)install\b'; then + ask "install copies files and sets permissions — approve to proceed." +fi + +# ── Pipe-to-shell and obfuscation guard ────────────────────────────────────── +# Catches common obfuscation techniques that bypass command-specific guards. +if echo "$cmd" | grep -qE '\|\s*(bash|sh|zsh|dash)\b' \ +|| echo "$cmd" | grep -qE '\bbase64\s+(-d|--decode)\b' \ +|| echo "$cmd" | grep -qE '(^|[;&|]\s*)eval\b'; then + ask "Potential command obfuscation detected (pipe to shell, base64 decode, or eval) — approve to proceed." +fi + WSL_DENY_MSG="BLOCKED: This command would leave the WSL Ubuntu workspace. Claude must never navigate to, execute from, or access the Windows filesystem or Windows processes. All work must stay within the WSL Linux environment." # ── WSL boundary guard ──────────────────────────────────────────────────────── diff --git a/global-settings/check-settings-version.sh b/global-settings/check-settings-version.sh index d00797f..8f0daad 100644 --- a/global-settings/check-settings-version.sh +++ b/global-settings/check-settings-version.sh @@ -30,11 +30,34 @@ REPO_PATH_FILE="$HOME/.claude/settings-repo-path" REPO_REF_FILE="$HOME/.claude/settings-repo-ref" VERSION_FILE="$HOME/.claude/settings-version" +# ── Input validation ───────────────────────────────────────────────────────── +# All config values read from files are validated before use — prevents prompt +# injection via crafted config files and API endpoint abuse via repo slug. +validate_ref() { [[ "$1" =~ ^[a-zA-Z0-9._/-]+$ ]]; } +validate_repo_slug() { [[ "$1" =~ ^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$ ]]; } +validate_semver() { [[ "$1" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; } + +# ── timeout wrapper (falls back to direct execution if timeout is missing) ─── +run_with_timeout() { + local secs="$1"; shift + if command -v timeout >/dev/null 2>&1; then + timeout "$secs" "$@" + else + "$@" + fi +} + # ── Tracking ref (branch/tag/sha) — defaults to "main" when unset ──────────── tracking_ref="main" if [ -f "$REPO_REF_FILE" ]; then - _ref=$(cat "$REPO_REF_FILE" | tr -d '[:space:]') - [ -n "$_ref" ] && tracking_ref="$_ref" + _ref=$(tr -d '[:space:]' < "$REPO_REF_FILE") + if [ -n "$_ref" ]; then + if validate_ref "$_ref"; then + tracking_ref="$_ref" + else + echo "WARNING: ~/.claude/settings-repo-ref contains invalid characters — ignoring, using 'main'." >&2 + fi + fi fi # ── Session-once guard ──────────────────────────────────────────────────────── @@ -44,15 +67,18 @@ if [ -z "$transcript_path" ]; then exit 0 fi session_key=$(echo "$transcript_path" | md5sum | cut -c1-12) -flag_file="/tmp/claude-version-warned-${session_key}" +_flag_dir="${XDG_RUNTIME_DIR:-$HOME/.claude}" +flag_file="${_flag_dir}/claude-version-warned-${session_key}" [ -f "$flag_file" ] && exit 0 -touch "$flag_file" +touch "$flag_file" && chmod 600 "$flag_file" 2>/dev/null # ── Semver helpers ──────────────────────────────────────────────────────────── semver_gt() { [ "$1" = "$2" ] && return 1 - local IFS=. - local i ver1=($1) ver2=($2) + local IFS=. i + local -a ver1 ver2 + read -ra ver1 <<< "$1" + read -ra ver2 <<< "$2" for ((i = 0; i < ${#ver1[@]}; i++)); do local a=${ver1[i]:-0} b=${ver2[i]:-0} if ((10#$a > 10#$b)); then return 0; fi @@ -62,19 +88,29 @@ semver_gt() { } semver_eq() { [ "$1" = "$2" ]; } +# ── Config warnings array (populated throughout, displayed at the end) ──────── +config_warnings=() + # ── Installed version ───────────────────────────────────────────────────────── installed_version="(not set)" installed_ok=false if [ -f "$VERSION_FILE" ]; then - installed_version=$(cat "$VERSION_FILE" | tr -d '[:space:]') - [ -n "$installed_version" ] && installed_ok=true + _iv=$(tr -d '[:space:]' < "$VERSION_FILE") + if [ -n "$_iv" ]; then + if validate_semver "$_iv"; then + installed_version="$_iv" + installed_ok=true + else + installed_version="(invalid: $_iv)" + config_warnings+=("~/.claude/settings-version contains invalid value '$_iv' — expected semver (e.g. 1.2.3).") + fi + fi fi # ── Repo dir resolution ─────────────────────────────────────────────────────── -config_warnings=() REPO_DIR="" if [ -f "$REPO_PATH_FILE" ]; then - REPO_DIR=$(cat "$REPO_PATH_FILE" | tr -d '[:space:]') + REPO_DIR=$(tr -d '[:space:]' < "$REPO_PATH_FILE") if [ ! -d "$REPO_DIR" ]; then config_warnings+=("Repo directory '${REPO_DIR}' from ~/.claude/settings-repo-path does not exist.") REPO_DIR="" @@ -95,7 +131,7 @@ if [ -n "$REPO_DIR" ]; then REPO_VERSION_FILE="$REPO_DIR/global-settings/VERSION" if [ -f "$REPO_VERSION_FILE" ]; then - local_version=$(cat "$REPO_VERSION_FILE" | tr -d '[:space:]') + local_version=$(tr -d '[:space:]' < "$REPO_VERSION_FILE") else local_version="(missing)" config_warnings+=("global-settings/VERSION not found at '${REPO_DIR}/global-settings/VERSION'.") @@ -109,13 +145,20 @@ online_source="" online_repo_slug="" if [ -f "$REPO_URL_FILE" ]; then - online_repo_slug=$(cat "$REPO_URL_FILE" | tr -d '[:space:]') + _slug=$(tr -d '[:space:]' < "$REPO_URL_FILE") + if [ -n "$_slug" ]; then + if validate_repo_slug "$_slug"; then + online_repo_slug="$_slug" + else + config_warnings+=("~/.claude/settings-repo-url contains invalid value '$_slug' — expected owner/repo format.") + fi + fi fi if [ -n "$online_repo_slug" ]; then if command -v gh >/dev/null 2>&1; then _api_path="repos/${online_repo_slug}/contents/global-settings/VERSION?ref=${tracking_ref}" - _gh_result=$(timeout 5 gh api "$_api_path" -H "Accept: application/vnd.github.raw+json" 2>/dev/null | tr -d '[:space:]') + _gh_result=$(run_with_timeout 5 gh api "$_api_path" -H "Accept: application/vnd.github.raw+json" 2>/dev/null | tr -d '[:space:]') if [ -n "$_gh_result" ] && echo "$_gh_result" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+$'; then online_version="$_gh_result" online_fetch_ok=true @@ -132,7 +175,7 @@ fi if ! $online_fetch_ok && [ -n "$REPO_DIR" ] && [ -n "$git_root" ]; then rel_path=$(realpath --relative-to="$git_root" "$REPO_DIR/global-settings/VERSION" 2>/dev/null) - if timeout 5 git -C "$git_root" fetch origin "${tracking_ref}" --quiet --depth=1 2>/dev/null; then + if run_with_timeout 5 git -C "$git_root" fetch origin "${tracking_ref}" --quiet --depth=1 2>/dev/null; then fetched=$(git -C "$git_root" show "origin/${tracking_ref}:${rel_path}" 2>/dev/null | tr -d '[:space:]') if [ -n "$fetched" ]; then online_version="$fetched"