From de8781482f38b8b49e0cd632cf49f93a8e3ddbac Mon Sep 17 00:00:00 2001 From: Aadarsh Agarwal Date: Mon, 15 Jun 2026 00:23:15 -0400 Subject: [PATCH 1/6] core: shared diagnostic/dependency primitives; fail loud for unattended agents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add config/bash/fns/core — _anu_require / _anu_die / _anu_warn / _anu_note — the one place anu's diagnostic vocabulary lives, so a full-auto cxc run hits a clear "missing required tool: X" line instead of a symptom three calls deep. Wire it into the genuinely unguarded paths: - swarm() guards jq+tmux for every real subcommand (help still works bare) - nc/ncn require ssh before opening an ssh/cluster/apple channel - swarm send/capture/inspect list the available agents on a bad id, so a mistyped agent self-corrects instead of dead-ending - swarm send no longer claims "sent" when live pane injection fails — it says the message is queued to the mailbox and the pane may be gone Tests: new tests/bash/core_test.sh (18 assertions); helpers.bash loads core so functions-under-test resolve their primitives. smoke/swarm/ncn stay green. Co-Authored-By: Claude Opus 4.8 (1M context) --- config/bash/fns/core | 48 +++++++++++++++++++++++++++++++++++++++++ config/bash/fns/ncn | 2 +- config/bash/fns/swarm | 33 ++++++++++++++++++++++------ tests/bash/core_test.sh | 45 ++++++++++++++++++++++++++++++++++++++ tests/lib/helpers.bash | 6 ++++++ 5 files changed, 127 insertions(+), 7 deletions(-) create mode 100644 config/bash/fns/core create mode 100644 tests/bash/core_test.sh diff --git a/config/bash/fns/core b/config/bash/fns/core new file mode 100644 index 0000000..0abb862 --- /dev/null +++ b/config/bash/fns/core @@ -0,0 +1,48 @@ +# ============================================================================== +# core — shared primitives for anu shell functions. +# ============================================================================== +# Sourced into every interactive shell (via the config/bash/fns/* glob, before +# the commands that call it run) and by the bash test harness. This is the one +# place anu's diagnostic vocabulary lives, so an unattended agent fails loud +# with a clear line instead of a cryptic error three calls deep — the single +# most common way a full-auto cxc run silently dead-ends. +# +# _anu_warn "msg" # yellow "anu: msg" → stderr +# _anu_note "msg" # dim "anu: msg" → stderr +# _anu_die "msg" # red "anu: msg" → stderr, returns 1 +# _anu_require jq tmux # 0 if all present; else names the missing tool(s) + +# Write a prefixed diagnostic to stderr. Colorized only when stderr is a tty, +# so logs and pipes stay clean. +_anu_msg() { + local level="$1"; shift + local color='' reset='' + if [[ -t 2 ]]; then + reset=$'\033[0m' + case "$level" in + err) color=$'\033[31m' ;; + warn) color=$'\033[33m' ;; + *) color=$'\033[2m' ;; + esac + fi + printf '%sanu:%s %s\n' "$color" "$reset" "$*" >&2 +} + +_anu_warn() { _anu_msg warn "$@"; } +_anu_note() { _anu_msg note "$@"; } + +# Print a diagnostic and return 1. Callers chain `|| return`. +_anu_die() { _anu_msg err "$@"; return 1; } + +# _anu_require TOOL [TOOL...] — ensure each named command is on PATH. On the +# first run with any miss, print one line listing every missing tool and return +# 1. The choke point for "a dependency isn't installed": swarm/ncn route their +# hard requirements through here so the failure names the tool, not a symptom. +_anu_require() { + local tool missing=() + for tool in "$@"; do + command -v "$tool" &>/dev/null || missing+=("$tool") + done + (( ${#missing[@]} )) || return 0 + _anu_die "missing required tool(s): ${missing[*]}" +} diff --git a/config/bash/fns/ncn b/config/bash/fns/ncn index c7c1420..0233ba4 100644 --- a/config/bash/fns/ncn +++ b/config/bash/fns/ncn @@ -89,7 +89,7 @@ _nc_open() { local rcmd case "$profile" in box) rcmd="box bash" ;; - cluster|apple|ssh) rcmd="ssh ${sshopts:+$sshopts }${target}" ;; + cluster|apple|ssh) _anu_require ssh || return 1; rcmd="ssh ${sshopts:+$sshopts }${target}" ;; *) echo "nc/ncn: unknown profile '$profile' (cluster|apple|ssh|box)"; return 1 ;; esac diff --git a/config/bash/fns/swarm b/config/bash/fns/swarm index 40af0b3..05bc7ab 100644 --- a/config/bash/fns/swarm +++ b/config/bash/fns/swarm @@ -93,6 +93,16 @@ _swarm_agent_ids() { done } +# A one-line "available: a b c" hint for not-found diagnostics (empty if none). +# Turns a mistyped agent id into a self-correcting message for an unattended +# conductor instead of a dead end. +_swarm_agents_hint() { + local ids + ids=$(_swarm_agent_ids "$@" 2>/dev/null | tr '\n' ' ') + ids="${ids% }" + [[ -n "$ids" ]] && printf 'available: %s' "$ids" +} + # Generate a short swarm ID _swarm_gen_id() { date +%s | shasum | head -c 8 @@ -635,7 +645,7 @@ _swarm_send() { swarm_dir=$(_swarm_dir) || { echo "swarm send: no active swarm in this window."; return 1; } # Validate target exists - [[ -f "$swarm_dir/agents/$target.json" ]] || { echo "swarm send: agent '$target' not found."; return 1; } + [[ -f "$swarm_dir/agents/$target.json" ]] || { _anu_die "swarm send: no agent '$target' in this swarm. $(_swarm_agents_hint "$swarm_dir")"; return 1; } local sender sender=$(_swarm_whoami) @@ -655,10 +665,16 @@ TIME: $(date -u +%Y-%m-%dT%H:%M:%SZ) $message EOF - # Inject into pane (immediacy) - _swarm_send_keys "$target" "$message" + # Inject into pane (immediacy). The mailbox write above is the durable + # channel; report honestly if the live injection couldn't land. + local injected=0 + _swarm_send_keys "$target" "$message" && injected=1 command -v _agentlog_append &>/dev/null && _agentlog_append "$(_swarm_current_id 2>/dev/null)" "$target" "send" "[$sender→$target] $message" - echo "swarm: sent to $target" + if (( injected )); then + echo "swarm: sent to $target" + else + _anu_warn "queued to $target's mailbox; live pane injection failed (agent pane gone? check: swarm status)" + fi } # Broadcast message to all agents in the current swarm @@ -732,7 +748,7 @@ _swarm_capture() { local swarm_dir swarm_dir=$(_swarm_dir) || { echo "swarm capture: no active swarm."; return 1; } - [[ -f "$swarm_dir/agents/$agent.json" ]] || { echo "swarm capture: agent '$agent' not found."; return 1; } + [[ -f "$swarm_dir/agents/$agent.json" ]] || { _anu_die "swarm capture: no agent '$agent' in this swarm. $(_swarm_agents_hint "$swarm_dir")"; return 1; } _swarm_capture_pane "$agent" > "$swarm_dir/results/$agent.md" echo "swarm: captured $agent → results/$agent.md" @@ -746,7 +762,7 @@ _swarm_inspect() { local swarm_dir swarm_dir=$(_swarm_dir) || { echo "swarm inspect: no active swarm."; return 1; } - [[ -f "$swarm_dir/agents/$agent.json" ]] || { echo "swarm inspect: agent '$agent' not found."; return 1; } + [[ -f "$swarm_dir/agents/$agent.json" ]] || { _anu_die "swarm inspect: no agent '$agent' in this swarm. $(_swarm_agents_hint "$swarm_dir")"; return 1; } _swarm_colors local role device cmd @@ -2882,6 +2898,11 @@ swarm() { local subcmd="${1:-help}" shift 2>/dev/null + # Fail loud if the hard runtime deps are missing (help still works without). + if [[ "$subcmd" != help && "$subcmd" != --help && "$subcmd" != -h ]]; then + _anu_require jq tmux || return 1 + fi + case "$subcmd" in start) _swarm_start "$@" ;; star) _swarm_star "$@" ;; diff --git a/tests/bash/core_test.sh b/tests/bash/core_test.sh new file mode 100644 index 0000000..4f3c48a --- /dev/null +++ b/tests/bash/core_test.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# core — the shared diagnostic/dependency primitives every other fn leans on. +# These are load-bearing for unattended agents: a missing tool or a dead pane +# must surface as one clear line, not a symptom three calls deep. +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$here/../lib/harness.sh" +source "$here/../lib/helpers.bash" # sources config/bash/fns/core + +t_suite "core" + +t_section "_anu_require names missing tools, passes present ones" +_anu_require bash 2>/dev/null; assert_ok $? "single present tool → 0" +_anu_require bash sh 2>/dev/null; assert_ok $? "multiple present → 0" +_anu_require 2>/dev/null; assert_ok $? "no args → 0" +_anu_require __anu_no_such__ 2>/dev/null; assert_fail $? "missing tool → nonzero" +err=$(_anu_require __anu_no_such__ 2>&1) +assert_contains "$err" "__anu_no_such__" "names the missing tool" +assert_contains "$err" "anu:" "uses the anu: prefix" +err=$(_anu_require __nope_a__ __nope_b__ 2>&1) +assert_contains "$err" "__nope_a__" "lists every missing tool (1/2)" +assert_contains "$err" "__nope_b__" "lists every missing tool (2/2)" + +t_section "diagnostics go to stderr and _anu_die returns 1" +out=$(_anu_die "boom" 2>/dev/null); assert_eq "" "$out" "_anu_die silent on stdout" +err=$(_anu_die "boom" 2>&1 >/dev/null); assert_contains "$err" "boom" "_anu_die → stderr" +_anu_die "x" 2>/dev/null; assert_fail $? "_anu_die returns 1" +err=$(_anu_warn "heads up" 2>&1 >/dev/null); assert_contains "$err" "heads up" "_anu_warn → stderr" + +t_section "swarm wires its guards through core" +# Static wiring checks — no live tmux needed. Sourcing swarm alone must still +# succeed (the smoke test proves it) and the guards must be present in the body. +source "$(anu_fns swarm)" 2>/dev/null +assert_contains "$(declare -f swarm)" "_anu_require" "swarm() guards jq/tmux" +assert_contains "$(declare -f _swarm_send)" "injection failed" "swarm send reports a failed injection" +assert_contains "$(declare -f _swarm_agents_hint)" "available:" "agents-hint helper exists" + +t_section "_swarm_agents_hint turns a bad id into a self-correcting list" +sd=$(mktmp); mkdir -p "$sd/agents" +: > "$sd/agents/agent-1.json"; : > "$sd/agents/agent-2.json" +hint=$(_swarm_agents_hint "$sd") +assert_contains "$hint" "agent-1" "hint lists agent-1" +assert_contains "$hint" "agent-2" "hint lists agent-2" +assert_eq "" "$(_swarm_agents_hint "$(mktmp)" 2>/dev/null)" "no agents → empty hint" + +t_done diff --git a/tests/lib/helpers.bash b/tests/lib/helpers.bash index 2d85a63..322978b 100644 --- a/tests/lib/helpers.bash +++ b/tests/lib/helpers.bash @@ -11,6 +11,12 @@ export ANU_ROOT # Many fns resolve sibling files via $ANU_PATH; point it at the repo under test. export ANU_PATH="$ANU_ROOT" +# Shared primitives (_anu_require/_anu_die/_anu_warn/...) that swarm, ncn, and +# the rest call at runtime. The real shell loads this first via the fns/* glob; +# tests load it explicitly so functions-under-test resolve their helpers. +# shellcheck source=/dev/null +source "$ANU_ROOT/config/bash/fns/core" + # Path to a function file under config/bash/fns (or bin). anu_fns() { printf '%s/config/bash/fns/%s' "$ANU_ROOT" "$1"; } anu_bin() { printf '%s/config/bash/bin/%s' "$ANU_ROOT" "$1"; } From 0b2cac0a6fe3acde8582555ca3ff56ce54afa9d0 Mon Sep 17 00:00:00 2001 From: Aadarsh Agarwal Date: Mon, 15 Jun 2026 00:26:39 -0400 Subject: [PATCH 2/6] modeling: a plugin that does the quantitative science, not just describes it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit anu could write and animate research (science-writing, tikz, manim, marimo) but nothing actually did it. modeling is the quantitative do-stage twin of /investigate: take data (a file, a simulation, or a digitized figure), frame candidate models, fit with parameter covariance, select on evidence (AIC/BIC + cross-validation, not in-sample fit), and discover the functional form when it's unknown (symbolic regression) — composing the Axiomatic MCP tools (AxModelFitter / AxEquationExplorer / AxPlotToData / AxArgmin), ncn for heavy compute, and trail/present to record and show. This is the /modeling plugin the build notes had deferred. Wired into the marketplace; passes the plugin contract tests. Command /modeling, skill modeling. Co-Authored-By: Claude Opus 4.8 (1M context) --- plugins/.claude-plugin/marketplace.json | 5 + plugins/modeling/.claude-plugin/plugin.json | 11 ++ plugins/modeling/commands/modeling.md | 22 ++++ plugins/modeling/skills/modeling/SKILL.md | 112 ++++++++++++++++++++ 4 files changed, 150 insertions(+) create mode 100644 plugins/modeling/.claude-plugin/plugin.json create mode 100644 plugins/modeling/commands/modeling.md create mode 100644 plugins/modeling/skills/modeling/SKILL.md diff --git a/plugins/.claude-plugin/marketplace.json b/plugins/.claude-plugin/marketplace.json index 431d91c..16c8080 100644 --- a/plugins/.claude-plugin/marketplace.json +++ b/plugins/.claude-plugin/marketplace.json @@ -73,6 +73,11 @@ "name": "investigate", "source": "./investigate", "description": "Run the research loop, don't just record it: the doing-stage twin of /study (understand) and /present (show). Turn a question (or a /study gap) into falsifiable hypotheses, test each in its own git worktree agent (one per hypothesis — cxc contained by default, cxx host when they share a toolchain — the anu swarm), adversarially verify the outcomes against their real evidence, and render one fixed-template HTML investigation in the atlas (~/.anu/atlas/investigations/): the verdict, what was learned, every hypothesis with its prediction/evidence/verdict, the roads not taken, the open frontier awaiting a human, and a link to the decision trail. Launch and watch a whole run in one shot — `investigate ` opens a window with the swarm plus a live `investigate watch` cockpit (the dig/delve twin), or draw the same data as a browser matrix with `trail swarm watch`. Execution is the swarm; the decision record is `trail` (git trailers, no LLM). Composes trail + swarm + box; command /investigate, launcher `investigate`, worker skill running-experiments." + }, + { + "name": "modeling", + "source": "./modeling", + "description": "Do the quantitative science, don't just describe it: turn data (a file, a simulation, or a digitized figure) into a fitted, model-selected, uncertainty-quantified result. Frame candidate models, fit them with parameter covariance, select on evidence (AIC/BIC + cross-validation rather than in-sample fit), and discover the functional form when it's unknown (symbolic regression). Composes the Axiomatic model-fitting/equation-discovery tools (AxModelFitter / AxEquationExplorer / AxPlotToData / AxArgmin), ncn for heavy compute, and trail/present to record and show. The quantitative twin of /investigate; command /modeling, skill modeling." } ] } diff --git a/plugins/modeling/.claude-plugin/plugin.json b/plugins/modeling/.claude-plugin/plugin.json new file mode 100644 index 0000000..ce3e49a --- /dev/null +++ b/plugins/modeling/.claude-plugin/plugin.json @@ -0,0 +1,11 @@ +{ + "name": "modeling", + "version": "0.1.0", + "description": "Do the quantitative science, don't just describe it: turn data (a file, a simulation, or a digitized figure) into a fitted, model-selected, uncertainty-quantified result. Frame candidate models, fit them with parameter covariance, select on evidence (AIC/BIC + cross-validation rather than in-sample fit), and discover the functional form when it's unknown (symbolic regression). Composes the Axiomatic model-fitting/equation-discovery tools, ncn for heavy compute, and trail/present to record and show. Use when the user wants to fit a model, find a scaling law or equation, do model selection, quantify parameter uncertainty, digitize a plot into data, or test whether a proposed law holds.", + "author": { + "name": "Aadarsh Agarwal", + "url": "https://github.com/aadarwal" + }, + "license": "MIT", + "keywords": ["anu", "modeling", "model-fitting", "curve-fitting", "model-selection", "symbolic-regression", "equation-discovery", "uncertainty", "axiomatic", "ncn", "research", "physics"] +} diff --git a/plugins/modeling/commands/modeling.md b/plugins/modeling/commands/modeling.md new file mode 100644 index 0000000..b999646 --- /dev/null +++ b/plugins/modeling/commands/modeling.md @@ -0,0 +1,22 @@ +--- +description: Do the quantitative science — turn data (a file, a simulation, or a digitized figure) into a fitted, model-selected, uncertainty-quantified law. Frame candidate models, fit with covariance, select on AIC/BIC + cross-validation, and discover the form when it's unknown. +--- + +Model the data / question in `$ARGUMENTS`. + +Follow the **modeling** skill (`~/.local/share/anu/plugins/modeling/skills/modeling/SKILL.md`) end to end: + +1. **Get the data honestly** — a file, a simulation you run, or digitize a figure + with `AxPlotToData`. Label digitized data as digitized; never invent points. +2. **Frame 2–5 candidate models**, each with a reason — not a single curve. +3. **Fit each with parameter covariance** — value ± uncertainty for every parameter. +4. **Select on evidence** — `compare_models` + AIC/BIC + `cross_validate_model` + + residual diagnostics, not in-sample fit. Prefer the simplest model that survives. +5. **If no candidate is defensible, discover the form** with `AxEquationExplorer` + (symbolic regression), then make it earn its keep against simpler models. +6. **Deliver the verdict** — chosen model, parameters ± uncertainty, the + alternatives ruled out and why, a data+fit figure, and (inside `/investigate`) + the outcome recorded via `trail`. + +Heavy fits/sweeps run on the right compute via `ncn` (GPU mesh box or the cluster). +Don't fabricate data or fits; always an error bar; always more than one model. diff --git a/plugins/modeling/skills/modeling/SKILL.md b/plugins/modeling/skills/modeling/SKILL.md new file mode 100644 index 0000000..3495ca8 --- /dev/null +++ b/plugins/modeling/skills/modeling/SKILL.md @@ -0,0 +1,112 @@ +--- +name: modeling +description: Do the quantitative science, don't just describe it — turn data (a file, a simulation, or a figure you digitize) into a fitted, model-selected, uncertainty-quantified result. Frame candidate models, fit them with parameter covariance, select by evidence (AIC/BIC + cross-validation, not in-sample fit), and discover the functional form when it's unknown (symbolic regression). Composes the Axiomatic model-fitting/equation-discovery tools, ncn for heavy compute, and trail/present to record and show. Use when the user wants to fit a model, find a scaling law or equation, do model selection, quantify uncertainty on parameters, digitize a plot into data, or test whether a proposed law holds. +--- + +# modeling — data → a law that holds, with its error bars + +This is the quantitative **do** stage made literal: anu's other research plugins +*write* and *animate* science (`science-writing`, `tikz`, `manim`); this one +**does** it. You take data and produce a *result that holds* — a model chosen on +evidence, parameters with uncertainty, and the alternatives you ruled out. + +You are a careful experimentalist, not a curve-fitter. Anyone can fit one curve +and declare victory. Your job is to fit several, quantify how well each is +actually supported, and report the one that survives — including the honest case +where nothing does. + +This plugin **composes** rather than reinvents: +- **The Axiomatic model-fitting tools** — `AxModelFitter` (fit, compare, + information criteria, cross-validation, parameter covariance, R²), + `AxModelFitterV2` (generate + execute fitting code), `AxEquationExplorer` + (discover a functional form; check a proposed equation), `AxArgmin` + (optimization), `AxPlotToData` (digitize a figure into numbers). These are MCP + tools — load them with ToolSearch (`axiomatic model fitter`, `equation + explorer`, `plot to data`) when you need them. +- **`ncn`** — run a heavy fit / large sweep / symbolic-regression search on the + right compute (GPU mesh box or the Slurm cluster), not the laptop. +- **`trail`** + **`present`** — record the chosen model and the roads not taken + as the decision record, and show the fitted law as a figure. + +## The seam with the arc + +A quantitative **`/investigate` hypothesis** ("insertion loss falls +exponentially with taper length") or a **`/study` gap** that is really a +measurement is a modeling task. Frame it here, fit it, and the result — +*the law plus its uncertainty plus the rejected forms* — is the evidence that +`/investigate` records and `/present` shows. Modeling is where a hypothesis +becomes a number with an error bar. + +## Moves + +### 1. Get the data — honestly +Never invent data points. Acquire them one of three ways: +- **A file / array** the user already has. +- **A simulation** you write and run (small ones locally; heavy ones via `ncn`). +- **A figure** from a paper — digitize it with `AxPlotToData` + (`extract_numerical_series`; `split_multi_plot` first if several series share + axes). State that the data is digitized and roughly how accurate that is. + +If the data is too sparse or noisy to support any conclusion, say so and stop. +A null result is a result; a fabricated one is misconduct. + +### 2. Frame candidate models — not one +Like `/investigate` frames hypotheses, list **2–5 functional forms**, each with +a *reason* it's plausible (a mechanism, a limiting behaviour, a known scaling). +Fitting a single model can only confirm what you assumed. Competing models are +what make the answer falsifiable. + +### 3. Fit, and quantify uncertainty +Fit each candidate (`AxModelFitter.fit_model`, or `AxModelFitterV2` to +generate/execute custom code for awkward models). For every fit get the +**parameter covariance** (`compute_parameter_covariance`) — *a parameter without +an error bar is not a result.* Report each parameter as value ± uncertainty. + +### 4. Select by evidence — adversarially +The best in-sample fit is **not** the best model; more parameters always fit +better. Choose on out-of-sample evidence: +- `compare_models` + `calculate_information_criteria` (AIC/BIC) to penalize + complexity, +- `cross_validate_model` to check it predicts data it didn't see, +- residual diagnostics — structure left in the residuals means the model is + wrong however high its R². + +Prefer the simplest model that survives. When two are statistically tied, the +honest report is "the data don't distinguish them," not a coin flip. + +### 5. Discover the form when it's unknown +If you have no defensible candidate, find one: `AxEquationExplorer.find_functional_form` +(symbolic regression) proposes a form *from the data*; then treat it as a +candidate and run it back through steps 3–4 — discovered forms still must earn +their keep against simpler ones. To test a law someone proposed, use +`check_equation`. + +### 6. Verdict + artifact +Produce, not a wall of chat, a result: +- the **chosen model** and its parameters ± uncertainty, +- the **alternatives ruled out** and *why* (the roads not taken), +- residual / goodness diagnostics and the data provenance (measured / simulated / + digitized), +- a **figure** of data + fit + uncertainty band (hand to `/present`), +- and, when running inside `/investigate`, the outcome recorded via `trail` + (the fit is the evidence behind a `Hypothesis:`/`Outcome:` pair). + +## Compute — where `ncn` earns its keep +- **Light** (a handful of parameters, modest data) → fit locally / in a `box`. +- **Heavy** (large datasets, global optimization with `AxArgmin`, a wide + symbolic-regression search, k-fold cross-validation over big models) → `ncn` + to a GPU mesh box or `ncn --cluster` (Slurm + Apptainer); pull the fit and + the figure back. Sweeps are embarrassingly parallel — fan them with `mesh + spawn` when there are many. + +## Rules +- **Never fabricate data or a fit.** Digitized data is labelled as digitized. +- **Always an error bar.** A parameter or a prediction without uncertainty is + incomplete, not done. +- **Always more than one model.** Report what you rejected and why; a single fit + is an assumption wearing the costume of a result. +- **Evidence over R².** Selection is AIC/BIC + cross-validation; in-sample fit + alone never decides. +- **The law is the deliverable** — the chosen model, its parameters ± + uncertainty, the alternatives ruled out, and one figure. Cheap fits before + expensive searches. From 53aca71b56a6d71dd2a706ca002dbeac2f7bb89d Mon Sep 17 00:00:00 2001 From: Aadarsh Agarwal Date: Mon, 15 Jun 2026 00:38:26 -0400 Subject: [PATCH 3/6] atlas: the corpus index that makes anu's research compound (the agent arXiv) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every /study, /investigate, /map and trail render lands alone under ~/.anu/atlas. atlas indexes them into one fixed B&W page and collects their gaps, open questions and frontiers into a single queue — so finished work exposes what it left open and the next run pulls its question from there. It turns the find->understand->do->show arc from a line into a loop. - build.py scans ~/.anu/atlas + ~/.anu/trail into atlas.json (no LLM; the corpus IS the on-disk JSON). Only real edges are recorded (study->present, investigate->trail); the cross-arc gap->hypothesis link is left to the shared frontier rather than fabricated. - render.py injects into the fixed template (same 4 funnel), the anu map and trail, with 23 frontier items aggregated. tests/bash/atlas_test.sh (24 assertions); smoke + contracts stay green. Co-Authored-By: Claude Opus 4.8 (1M context) --- config/bash/fns/atlas | 49 ++++++++ plugins/.claude-plugin/marketplace.json | 5 + plugins/atlas/.claude-plugin/plugin.json | 11 ++ plugins/atlas/commands/atlas.md | 28 +++++ plugins/atlas/skills/atlas/SKILL.md | 60 ++++++++++ plugins/atlas/skills/atlas/build.py | 146 +++++++++++++++++++++++ plugins/atlas/skills/atlas/render.py | 25 ++++ plugins/atlas/skills/atlas/template.html | 103 ++++++++++++++++ tests/bash/atlas_test.sh | 71 +++++++++++ tests/bash/smoke_test.sh | 2 +- 10 files changed, 499 insertions(+), 1 deletion(-) create mode 100644 config/bash/fns/atlas create mode 100644 plugins/atlas/.claude-plugin/plugin.json create mode 100644 plugins/atlas/commands/atlas.md create mode 100644 plugins/atlas/skills/atlas/SKILL.md create mode 100644 plugins/atlas/skills/atlas/build.py create mode 100644 plugins/atlas/skills/atlas/render.py create mode 100644 plugins/atlas/skills/atlas/template.html create mode 100644 tests/bash/atlas_test.sh diff --git a/config/bash/fns/atlas b/config/bash/fns/atlas new file mode 100644 index 0000000..7bfef6d --- /dev/null +++ b/config/bash/fns/atlas @@ -0,0 +1,49 @@ +# ============================================================================== +# atlas — one word: render the corpus index across all anu research artifacts. +# +# Every /study, /investigate, /map and trail render lands under ~/.anu/atlas +# (and ~/.anu/trail). Each is a one-off today. `atlas` indexes them into one +# fixed B&W page and collects their gaps, open questions and frontiers into a +# single queue — so finished work exposes what it left open and the next +# investigation pulls its question from there. The corpus compounds; the agent +# arXiv. No LLM — the index IS the on-disk JSON (build.py + render.py). +# +# atlas build + render + open the corpus index +# atlas open re-open the last index without rebuilding +# atlas ls list the corpus records in the terminal +# +# The skill `atlas` (plugins/atlas) teaches agents to consult the frontier +# before starting work and to leave their own gaps behind when they finish. +# ============================================================================== + +_atlas_render() { # + python3 "$3/build.py" "$1" "$2" "$1/atlas.json" >/dev/null && + python3 "$3/render.py" "$1/atlas.json" "$1/index.html" >/dev/null +} + +atlas() { + local atlas_root="$HOME/.anu/atlas" trail_root="$HOME/.anu/trail" skill + skill="${ANU_PATH:-$HOME/.local/share/anu}/plugins/atlas/skills/atlas" + + case "${1:-}" in + open) + if [[ -f "$atlas_root/index.html" ]]; then + open "$atlas_root/index.html" 2>/dev/null || xdg-open "$atlas_root/index.html" 2>/dev/null + else + echo "no atlas yet — run: atlas" + fi + return ;; + ls) + _anu_require python3 jq || return 1 + [[ -f "$atlas_root/atlas.json" ]] || _atlas_render "$atlas_root" "$trail_root" "$skill" >/dev/null + jq -r '.records[] | " [\(.kind)]\t\(.title)"' "$atlas_root/atlas.json" 2>/dev/null \ + || echo "no atlas yet — run: atlas" + return ;; + esac + + _anu_require python3 || return 1 + mkdir -p "$atlas_root" + _atlas_render "$atlas_root" "$trail_root" "$skill" || { _anu_die "atlas: build failed"; return 1; } + echo "atlas → $atlas_root/index.html" + open "$atlas_root/index.html" 2>/dev/null || xdg-open "$atlas_root/index.html" 2>/dev/null || true +} diff --git a/plugins/.claude-plugin/marketplace.json b/plugins/.claude-plugin/marketplace.json index 16c8080..f036af1 100644 --- a/plugins/.claude-plugin/marketplace.json +++ b/plugins/.claude-plugin/marketplace.json @@ -78,6 +78,11 @@ "name": "modeling", "source": "./modeling", "description": "Do the quantitative science, don't just describe it: turn data (a file, a simulation, or a digitized figure) into a fitted, model-selected, uncertainty-quantified result. Frame candidate models, fit them with parameter covariance, select on evidence (AIC/BIC + cross-validation rather than in-sample fit), and discover the functional form when it's unknown (symbolic regression). Composes the Axiomatic model-fitting/equation-discovery tools (AxModelFitter / AxEquationExplorer / AxPlotToData / AxArgmin), ncn for heavy compute, and trail/present to record and show. The quantitative twin of /investigate; command /modeling, skill modeling." + }, + { + "name": "atlas", + "source": "./atlas", + "description": "The corpus memory that makes anu's research compound: index every /study, /investigate, /map and trail render under ~/.anu/atlas into one fixed B&W page, and collect their gaps, open questions and frontiers into a single queue so finished work exposes what it left open and the next run pulls its question from there. A pure render (build.py + render.py, no LLM); the skill teaches agents to consult the frontier before starting and to deposit their own gaps when they finish. The agent arXiv. Command /atlas, shell command atlas, skill atlas." } ] } diff --git a/plugins/atlas/.claude-plugin/plugin.json b/plugins/atlas/.claude-plugin/plugin.json new file mode 100644 index 0000000..9cc5eb5 --- /dev/null +++ b/plugins/atlas/.claude-plugin/plugin.json @@ -0,0 +1,11 @@ +{ + "name": "atlas", + "version": "0.1.0", + "description": "The corpus memory that makes anu's research compound. Every /study, /investigate, /map and trail render is a one-off artifact under ~/.anu/atlas; atlas indexes them into one fixed B&W page and collects their gaps, open questions and frontiers into a single queue, so finished work exposes what it left open and the next run pulls its question from there. A pure render (build.py + render.py, no LLM); the skill teaches agents to consult the frontier before starting and to leave their own gaps behind when they finish. The agent arXiv. Command /atlas, shell command atlas, skill atlas. Use when the user wants to see everything studied/investigated so far, avoid redoing work, or pick the next question from the open frontier.", + "author": { + "name": "Aadarsh Agarwal", + "url": "https://github.com/aadarwal" + }, + "license": "MIT", + "keywords": ["anu", "atlas", "corpus", "agent-arxiv", "knowledge", "frontier", "research", "study", "investigate", "compounding", "memory"] +} diff --git a/plugins/atlas/commands/atlas.md b/plugins/atlas/commands/atlas.md new file mode 100644 index 0000000..7e8cfa7 --- /dev/null +++ b/plugins/atlas/commands/atlas.md @@ -0,0 +1,28 @@ +--- +description: Render the corpus index — every /study, /investigate, /map and trail in one B&W page, with their gaps, open questions and frontiers collected into one queue. The agent arXiv that makes the research arc a loop. +--- + +Render and open the corpus index for everything anu has produced. + +The fast path is the shell command — run it directly: + +```bash +atlas # build + render + open the corpus index +atlas open # re-open the last index without rebuilding +atlas ls # list the corpus records in the terminal +``` + +Then follow the **atlas** skill +(`~/.local/share/anu/plugins/atlas/skills/atlas/SKILL.md`) — the discipline of +*using* the corpus so it compounds: + +1. **Before starting work, consult the frontier.** Check whether the paper was + already `/study`'d or the question already sits in the frontier as a gap or an + investigation's next-step — pull it from there instead of starting cold. +2. **When you finish, leave your edges behind.** End a `/study` with real `gap` + and `open_questions`; end an `/investigate` with a real `frontier`. Those are + the inputs to the next run. + +The index records only real edges (study→present, investigate→trail) and never +infers a gap→hypothesis link — if you acted on a frontier item, cite its source +id in your new artifact so the edge becomes real. diff --git a/plugins/atlas/skills/atlas/SKILL.md b/plugins/atlas/skills/atlas/SKILL.md new file mode 100644 index 0000000..c3df549 --- /dev/null +++ b/plugins/atlas/skills/atlas/SKILL.md @@ -0,0 +1,60 @@ +--- +name: atlas +description: The corpus memory that makes anu's research compound. Every /study, /investigate, /map and trail render is a one-off artifact under ~/.anu/atlas; `atlas` indexes them into one fixed B&W page and collects their gaps, open questions and frontiers into a single queue, so finished work exposes what it left open and the next run pulls its question from there. The agent arXiv — a research group's accumulated, navigable memory. Use when the user wants to see everything studied/investigated so far, find what's already been done before starting, or pick the next question from the open frontier. +--- + +# atlas — the corpus, and why it compounds + +A research group's value is not any single result; it is the **accumulated body +of work** where findings cite and build on findings. anu produces durable +artifacts — a `/study` dossier, an `/investigate` verdict, a `/map`, a `trail` — +but each lands alone in `~/.anu/atlas`. The atlas is the layer that turns that +pile into a **corpus**: one index over everything, and one **frontier** — every +gap, open question and next-step the finished work left behind, in a single +queue the next run draws from. + +`atlas` is a **pure render** (`build.py` + `render.py`, no LLM): the corpus *is* +the on-disk JSON. Run it; this skill is the discipline of *using* it. + +``` +atlas build + render + open the corpus index +atlas open re-open the last index without rebuilding +atlas ls list the corpus records in the terminal +``` + +## The discipline — close the loop + +The arc is `find → understand → do → show`. The atlas is what makes it a *loop* +instead of a line. Two habits make the corpus compound: + +**Before you start — consult the frontier.** Don't begin a study or an +investigation cold. Run `atlas` (or read `~/.anu/atlas/atlas.json`) first: +- Has this paper already been `/study`'d? Build on the dossier, don't redo it. +- Is the question you're about to ask already sitting in the **frontier** as a + `/study` gap or another investigation's open frontier? Pull it from there — a + gap that became your hypothesis is exactly how one result seeds the next. +- Is there a related investigation whose verdict changes your framing? Cite it. + +**When you finish — leave your edges behind.** The frontier is only as good as +what each run deposits into it. A `/study` must end with real `gap` and +`open_questions`; an `/investigate` must end with a real `frontier` (the roads +not taken, the next test). Those fields are not decoration — they are the +*inputs to the next run*. Write them honestly; vague frontiers starve the loop. + +## What it links — and what it refuses to + +The index records **only real edges**: a `/study` to its `/present` demo (same +paper folder), an `/investigate` to its `trail`. It does **not** infer a +"this gap became that hypothesis" link — that would be a fabricated claim about +intent. Instead both ends surface in the shared frontier, where the connection +is visible but honest. If you *do* act on a specific frontier item, say so in +your new artifact (cite the source id); that makes the edge real, and the next +`atlas` build can show it. + +## Rules +- **Don't fabricate the corpus.** The index reflects what's on disk; if little + has been done, the honest atlas is short. +- **The frontier is the deliverable of the loop**, not a footnote. Finished work + that records no open questions has broken the chain. +- **Consult before you create; deposit when you're done.** That is the whole + point — knowledge that compounds instead of restarting. diff --git a/plugins/atlas/skills/atlas/build.py b/plugins/atlas/skills/atlas/build.py new file mode 100644 index 0000000..c60a3cf --- /dev/null +++ b/plugins/atlas/skills/atlas/build.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +"""Build the atlas corpus index: scan ~/.anu/atlas and ~/.anu/trail, emit atlas.json. + +Usage: build.py + +No LLM, no hidden state — the corpus IS the on-disk artifacts. Every /study, +/investigate, /map and trail render becomes one record; their gaps, open +questions and frontiers are collected into one frontier queue, so finished work +exposes what it left open and the next investigation can pull from it. That is +how the atlas compounds: a research group's accumulated, navigable memory. + +Only real edges are recorded (study->present, investigation->trail). The cross- +arc "a gap became a hypothesis" link is not inferred — it would be a fabricated +claim; instead both ends surface in the shared frontier, honestly. +""" +import json +import pathlib +import sys + + +def load(p): + try: + return json.loads(pathlib.Path(p).read_text()) + except Exception: + return None + + +def text_list(v): + """Normalize a field that may be a list of strings/objects into [str].""" + out = [] + if isinstance(v, list): + for x in v: + if isinstance(x, str): + out.append(x.strip()) + elif isinstance(x, dict): + t = x.get("text") or x.get("title") or x.get("question") or x.get("q") + if t: + out.append(str(t).strip()) + elif isinstance(v, str): + out.append(v.strip()) + return [s for s in out if s] + + +def repo_name(d, fallback): + """map.json / trail.json carry `repo` as an object {name, path, ...}.""" + r = d.get("repo") + if isinstance(r, dict): + return r.get("name") or fallback + if isinstance(r, str) and r: + return r + return fallback + + +def main(): + if len(sys.argv) != 4: + sys.exit("usage: build.py ") + atlas = pathlib.Path(sys.argv[1]) + trail = pathlib.Path(sys.argv[2]) + out = pathlib.Path(sys.argv[3]) + + records, frontier, edges = [], [], [] + + def add_frontier(kind, rid, title, items): + for t in items: + frontier.append({"kind": kind, "id": rid, "title": title, "text": t}) + + # --- studies (papers) --- + for sj in sorted(atlas.glob("papers/*/study.json")): + d = load(sj) + if not d: + continue + rid = sj.parent.name + paper = d.get("paper") if isinstance(d.get("paper"), dict) else {} + title = paper.get("title") or d.get("title") or rid + rec = {"kind": "study", "id": rid, "title": title, + "summary": d.get("summary", ""), "href": f"papers/{rid}/index.html", + "badges": [], "stats": {}} + if isinstance(d.get("neighbors"), list): + rec["stats"]["neighbors"] = len(d["neighbors"]) + gaps, oqs = text_list(d.get("gap")), text_list(d.get("open_questions")) + if gaps or oqs: + rec["stats"]["open"] = len(gaps) + len(oqs) + if (sj.parent / "present" / "index.html").exists(): + rec["badges"].append("present") + edges.append({"from": rec["href"], "to": f"papers/{rid}/present/index.html", + "kind": "study→present"}) + records.append(rec) + add_frontier("study", rid, title, gaps + oqs) + + # --- investigations --- + for ij in sorted(atlas.glob("investigations/*/investigation.json")): + d = load(ij) + if not d: + continue + rid = ij.parent.name + meta = d.get("investigation") if isinstance(d.get("investigation"), dict) else {} + title = meta.get("question") or meta.get("title") or d.get("title") or rid + funnel = d.get("funnel") if isinstance(d.get("funnel"), dict) else {} + rec = {"kind": "investigation", "id": rid, "title": title, + "summary": d.get("verdict", ""), "href": f"investigations/{rid}/index.html", + "badges": [], "stats": dict(funnel)} + if (ij.parent / "trail.html").exists(): + rec["badges"].append("trail") + edges.append({"from": rec["href"], "to": f"investigations/{rid}/trail.html", + "kind": "investigation→trail"}) + records.append(rec) + add_frontier("investigation", rid, title, text_list(d.get("frontier"))) + + # --- maps (repo dossiers): atlas//map.json, excluding the special dirs --- + for mj in sorted(atlas.glob("*/map.json")): + if mj.parent.name in ("papers", "investigations"): + continue + d = load(mj) + if not d: + continue + rid = mj.parent.name + rec = {"kind": "map", "id": rid, "title": repo_name(d, rid), + "summary": d.get("summary", ""), "href": f"{rid}/index.html", + "badges": [], "stats": {"components": len(d.get("components") or [])}} + records.append(rec) + add_frontier("map", rid, rid, text_list(d.get("open_questions"))) + + # --- trails (the sibling root ~/.anu/trail/) --- + for tj in sorted(trail.glob("*/trail.json")): + d = load(tj) + if not d: + continue + rid = tj.parent.name + nodes = d.get("nodes") if isinstance(d.get("nodes"), list) else [] + rmeta = d.get("repo") if isinstance(d.get("repo"), dict) else {} + records.append({"kind": "trail", "id": rid, "title": repo_name(d, rid), + "summary": d.get("summary") or rmeta.get("goal") or "", + "href": f"../trail/{rid}/index.html", + "badges": [], "stats": {"nodes": len(nodes)}}) + + counts = {} + for r in records: + counts[r["kind"]] = counts.get(r["kind"], 0) + 1 + + out.write_text(json.dumps( + {"counts": counts, "records": records, "frontier": frontier, "edges": edges}, + indent=2)) + print(out) + + +main() diff --git a/plugins/atlas/skills/atlas/render.py b/plugins/atlas/skills/atlas/render.py new file mode 100644 index 0000000..bc5f19b --- /dev/null +++ b/plugins/atlas/skills/atlas/render.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +"""Render atlas.json into index.html using the fixed template. + +Usage: render.py + +The template never varies — all per-corpus content is injected as one JSON blob +the page renders client-side. Same contract as map/study/trail. +""" +import json +import pathlib +import sys + + +def main(): + if len(sys.argv) != 3: + sys.exit("usage: render.py ") + src, out = pathlib.Path(sys.argv[1]), pathlib.Path(sys.argv[2]) + template = (pathlib.Path(__file__).resolve().parent / "template.html").read_text() + # block early; escape it inside strings. + data = json.dumps(json.loads(src.read_text())).replace(" + + + + +atlas + + + +

atlas

+

+ +

frontier

+

every open gap, question and next-step across the corpus — the queue the next investigation pulls from

+
    + +
    + + + + diff --git a/tests/bash/atlas_test.sh b/tests/bash/atlas_test.sh new file mode 100644 index 0000000..b132085 --- /dev/null +++ b/tests/bash/atlas_test.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# atlas — the corpus index. build.py scans the atlas + trail JSON into one +# record set and one frontier queue; render.py injects it into the fixed +# template. The frontier (gaps + open questions + investigation frontiers) is +# the compounding mechanism, so it gets the most coverage. +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$here/../lib/harness.sh" +source "$here/../lib/helpers.bash" + +t_suite "atlas" + +SKILL="$ANU_ROOT/plugins/atlas/skills/atlas" +atlas=$(mktmp); trail=$(mktmp) +mkdir -p "$atlas/papers/W123/present" "$atlas/investigations/inv-1" "$atlas/demo-repo" "$trail/demo-repo" + +cat > "$atlas/papers/W123/study.json" <<'JSON' +{"paper":{"title":"A test paper"},"summary":"sum","gap":["the open gap"],"open_questions":["an open q"],"neighbors":[1,2,3]} +JSON +: > "$atlas/papers/W123/present/index.html" +cat > "$atlas/investigations/inv-1/investigation.json" <<'JSON' +{"investigation":{"question":"does X beat Y?"},"verdict":"X wins","funnel":{"weighed":5,"hypotheses":3,"confirmed":2},"frontier":["try Z next"]} +JSON +: > "$atlas/investigations/inv-1/trail.html" +# repo is an object {name, path, ...} in real map.json / trail.json, not a string. +cat > "$atlas/demo-repo/map.json" <<'JSON' +{"repo":{"name":"demo-repo","path":"/x","branch":"master"},"summary":"a repo","components":[1,2],"open_questions":["map question"]} +JSON +cat > "$trail/demo-repo/trail.json" <<'JSON' +{"repo":{"name":"demo-repo","goal":"the goal"},"nodes":[1,2,3,4]} +JSON + +t_section "build.py assembles one record per artifact kind" +python3 "$SKILL/build.py" "$atlas" "$trail" "$atlas/atlas.json" >/dev/null +assert_ok $? "build.py runs" +assert_file "$atlas/atlas.json" "atlas.json written" +assert_jq "$atlas/atlas.json" '.counts.study' "1" "one study" +assert_jq "$atlas/atlas.json" '.counts.investigation' "1" "one investigation" +assert_jq "$atlas/atlas.json" '.counts.map' "1" "one repo map" +assert_jq "$atlas/atlas.json" '.counts.trail' "1" "one trail" +assert_jq "$atlas/atlas.json" '[.records[]|select(.kind=="study")][0].href' "papers/W123/index.html" "study links to its dossier" +assert_jq "$atlas/atlas.json" '[.records[]|select(.kind=="investigation")][0].summary' "X wins" "investigation summary is its verdict" +assert_jq "$atlas/atlas.json" '[.records[]|select(.kind=="investigation")][0].title' "does X beat Y?" "investigation title is its question" +# repo is an object in real artifacts — title must coerce to the name, not stringify the object. +assert_jq "$atlas/atlas.json" '[.records[]|select(.kind=="map")][0].title' "demo-repo" "map title coerces repo.name" +assert_jq "$atlas/atlas.json" '[.records[]|select(.kind=="trail")][0].title' "demo-repo" "trail title coerces repo.name" +assert_jq "$atlas/atlas.json" '[.records[]|select(.kind=="trail")][0].summary' "the goal" "trail summary falls back to repo.goal" + +t_section "frontier collects gaps, open questions and frontiers across kinds" +assert_jq "$atlas/atlas.json" '.frontier | length' "4" "gap + open_q + inv frontier + map question = 4" +assert_jq "$atlas/atlas.json" 'any(.frontier[]; .text=="try Z next")' "true" "investigation frontier included" +assert_jq "$atlas/atlas.json" 'any(.frontier[]; .text=="the open gap")' "true" "study gap included" +assert_jq "$atlas/atlas.json" 'any(.frontier[]; .text=="map question")' "true" "map open-question included" + +t_section "only real edges are recorded (no inferred gap->hypothesis)" +assert_jq "$atlas/atlas.json" 'any(.edges[]; .kind=="study→present")' "true" "study→present edge" +assert_jq "$atlas/atlas.json" 'any(.edges[]; .kind=="investigation→trail")' "true" "investigation→trail edge" +assert_jq "$atlas/atlas.json" '.edges | length' "2" "exactly the two real edges" + +t_section "render.py injects the corpus into the fixed template" +python3 "$SKILL/render.py" "$atlas/atlas.json" "$atlas/index.html" >/dev/null +assert_ok $? "render.py runs" +assert_file "$atlas/index.html" "index.html written" +html=$(cat "$atlas/index.html") +assert_contains "$html" "does X beat Y?" "a record title is embedded" +assert_not_contains "$html" "__ATLAS__" "placeholder fully replaced" + +t_section "atlas fn wires through core (deps guarded, no silent fail)" +source "$(anu_fns atlas)" 2>/dev/null +assert_contains "$(declare -f atlas)" "_anu_require" "atlas() guards python3" + +t_done diff --git a/tests/bash/smoke_test.sh b/tests/bash/smoke_test.sh index 12b9b64..6905d33 100644 --- a/tests/bash/smoke_test.sh +++ b/tests/bash/smoke_test.sh @@ -29,7 +29,7 @@ declare -A entry=( [review]=review [swarmext]=swarmx [configmap]=cfgmap [dashboard]=pd [agentlog]=agentlog [map]=map [dynlayout]=taa [agentlaunch]=al [meshsync]=meshsync [tile]=tile [init_project]=anu_init [relink]=relink - [delve]=delve + [delve]=delve [atlas]=atlas ) for file in "${!entry[@]}"; do fn="${entry[$file]}" From bfdd49915dfdc6cd53743344ef37ecab01a9aafa Mon Sep 17 00:00:00 2001 From: Aadarsh Agarwal Date: Mon, 15 Jun 2026 00:49:21 -0400 Subject: [PATCH 4/6] =?UTF-8?q?tend:=20the=20continuity=20layer=20?= =?UTF-8?q?=E2=80=94=20keep=20work=20healthy=20over=20time,=20self-heal,?= =?UTF-8?q?=20loop?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The research arc was a line of commands a human re-invokes. tend makes it a loop: register checks that should keep passing, run them on a cadence, record the drift, and — when armed — spawn a contained agent (cxc) to self-heal the moment one breaks. This is the most-named, least-built gap from the build notes ("tend — zero infrastructure exists"). - config/bash/fns/tend: add / run / watch (live loop) / heal / log / rm / dash / cron, over one JSON per watch under ~/.local/share/anu/tend. Guarded through core; healers run contained (cxc), never with host creds. - config/bash/bin/tend: standalone wrapper so the cron heartbeat works without an interactive shell (functions aren't on PATH for cron). - plugins/tend: build.py + render.py + a fixed B&W health dashboard (the same template grammar as map/atlas/trail — autonomy you can see), plus a skill that teaches the judgment: --auto only for safe/reversible fixes, surface anything with judgment or irreversibility (a refuted claim is a result, not a bug). A watch can re-open the research frontier — tend closing the loop with atlas/trail. tests/bash/tend_test.sh (25 assertions); smoke (+tend) and contracts stay green. Co-Authored-By: Claude Opus 4.8 (1M context) --- config/bash/bin/tend | 8 + config/bash/fns/tend | 252 ++++++++++++++++++++++++ plugins/.claude-plugin/marketplace.json | 5 + plugins/tend/.claude-plugin/plugin.json | 11 ++ plugins/tend/commands/tend.md | 32 +++ plugins/tend/skills/tend/SKILL.md | 71 +++++++ plugins/tend/skills/tend/build.py | 52 +++++ plugins/tend/skills/tend/render.py | 24 +++ plugins/tend/skills/tend/template.html | 77 ++++++++ tests/bash/smoke_test.sh | 2 +- tests/bash/tend_test.sh | 69 +++++++ 11 files changed, 602 insertions(+), 1 deletion(-) create mode 100755 config/bash/bin/tend create mode 100644 config/bash/fns/tend create mode 100644 plugins/tend/.claude-plugin/plugin.json create mode 100644 plugins/tend/commands/tend.md create mode 100644 plugins/tend/skills/tend/SKILL.md create mode 100644 plugins/tend/skills/tend/build.py create mode 100644 plugins/tend/skills/tend/render.py create mode 100644 plugins/tend/skills/tend/template.html create mode 100644 tests/bash/tend_test.sh diff --git a/config/bash/bin/tend b/config/bash/bin/tend new file mode 100755 index 0000000..d5701ed --- /dev/null +++ b/config/bash/bin/tend @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Standalone wrapper so `tend` is callable as a command, not just a shell +# function — needed by the cron/launchd heartbeat (`tend cron on`), which runs +# in a shell that has not sourced the anu fns. Mirrors bin/box, bin/delve, bin/swarm. +ANU="${ANU_PATH:-$HOME/.local/share/anu}" +source "$ANU/config/bash/fns/core" +source "$ANU/config/bash/fns/tend" +tend "$@" diff --git a/config/bash/fns/tend b/config/bash/fns/tend new file mode 100644 index 0000000..40b83c8 --- /dev/null +++ b/config/bash/fns/tend @@ -0,0 +1,252 @@ +# ============================================================================== +# tend — keep work healthy over time. anu's continuity / autonomy layer. +# +# The research arc (find -> understand -> do -> show) is otherwise a line of +# commands you re-invoke. tend makes it a LOOP that keeps running: register +# checks that should keep passing, run them on a cadence, record the drift, and +# — when armed — spawn a CONTAINED agent to self-heal the moment one breaks. +# +# tend status: every watch, its last result, its drift +# tend add [opts] -- register a check (runs in the current repo) +# --every cadence: 30m, 1h, 2h, 1d, or seconds (default 1h) +# --heal "" shell command to run on failure (or cxc for an agent) +# --auto self-heal automatically on failure (default: surface it) +# tend run [name] run all due watches now (or just one) +# tend watch live cockpit: re-run due watches on a loop +# tend heal spawn the healer for a failing watch now +# tend log the watch's history (drift over time) +# tend dash render + open the B&W health dashboard +# tend rm remove a watch +# tend cron on|off install/remove the headless heartbeat (cron) +# +# State: ~/.local/share/anu/tend/.json. The skill `tend` (plugins/tend) +# teaches what is worth tending and when to heal vs surface to a human. +# ============================================================================== + +_tend_dir() { printf '%s/anu/tend' "${XDG_DATA_HOME:-$HOME/.local/share}"; } +_tend_file() { printf '%s/%s.json' "$(_tend_dir)" "$1"; } +_tend_skill() { printf '%s/plugins/tend/skills/tend' "${ANU_PATH:-$HOME/.local/share/anu}"; } +_tend_now() { date -u +%Y-%m-%dT%H:%M:%SZ; } + +# Parse a duration (30m/2h/1d/90s/3600) into seconds. +_tend_secs() { + local d="${1:-3600}" + case "$d" in + *s) echo "$(( ${d%s} ))" ;; + *m) echo "$(( ${d%m} * 60 ))" ;; + *h) echo "$(( ${d%h} * 3600 ))" ;; + *d) echo "$(( ${d%d} * 86400 ))" ;; + ''|*[!0-9]*) echo 3600 ;; + *) echo "$d" ;; + esac +} + +# Human-format a duration in seconds. +_tend_human() { + local s="${1:-0}" + if (( s >= 86400 )); then echo "$(( s/86400 ))d" + elif (( s >= 3600 )); then echo "$(( s/3600 ))h" + elif (( s >= 60 )); then echo "$(( s/60 ))m" + else echo "${s}s"; fi +} + +# Seconds since an ISO-8601 UTC timestamp (handles GNU and BSD date). +_tend_age() { + local ts="$1" then now + [[ -z "$ts" || "$ts" == "null" ]] && { echo 999999999; return; } + then=$(date -u -d "$ts" +%s 2>/dev/null || date -u -j -f '%Y-%m-%dT%H:%M:%SZ' "$ts" +%s 2>/dev/null) + [[ -z "$then" ]] && { echo 999999999; return; } + now=$(date -u +%s) + echo "$(( now - then ))" +} + +_tend_help() { + sed -n '2,33p' "${BASH_SOURCE[0]}" | sed 's/^# \{0,1\}//' +} + +# --- register ---------------------------------------------------------------- +_tend_add() { + local name="$1"; shift 2>/dev/null + [[ -z "$name" || "$name" == --* ]] && { _anu_die "usage: tend add [--every D] [--heal CMD|cxc] [--auto] -- "; return 1; } + local every="1h" heal="" auto="false" + local -a cmd=() + while [[ $# -gt 0 ]]; do + case "$1" in + --every) every="$2"; shift 2 ;; + --heal) heal="$2"; shift 2 ;; + --auto) auto="true"; shift ;; + --) shift; cmd=("$@"); break ;; + *) _anu_die "tend add: unexpected '$1' — put the command after --, e.g. tend add $name -- make test"; return 1 ;; + esac + done + [[ ${#cmd[@]} -eq 0 ]] && { _anu_die "tend add: no command — tend add $name -- "; return 1; } + local root repo secs f + root="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" + repo="$(basename "$root")" + secs="$(_tend_secs "$every")" + mkdir -p "$(_tend_dir)" + f="$(_tend_file "$name")" + jq -n --arg name "$name" --arg repo "$repo" --arg root "$root" \ + --arg cmd "${cmd[*]}" --argjson every "$secs" --arg heal "$heal" \ + --argjson auto "$auto" --arg created "$(_tend_now)" ' + {name:$name, repo:$repo, root:$root, cmd:$cmd, every:$every, heal:$heal, + auto:$auto, created:$created, last_run:null, last_status:"unknown", + last_ms:0, last_output:"", history:[]}' > "$f" \ + && echo "tend: watching '$name' — $repo · ${cmd[*]} · every $(_tend_human "$secs")${heal:+ · heal=$heal}$([[ $auto == true ]] && echo ' · auto')" +} + +# --- run --------------------------------------------------------------------- +_tend_run_one() { + local f; f="$(_tend_file "$1")" + [[ -f "$f" ]] || { _anu_die "tend: no watch '$1' (see: tend)"; return 1; } + local root cmd auto heal out rc start end ms status now trimmed tmp + root="$(jq -r '.root' "$f")"; cmd="$(jq -r '.cmd' "$f")" + auto="$(jq -r '.auto' "$f")"; heal="$(jq -r '.heal' "$f")" + start="$(date +%s)" + out="$(cd "$root" 2>/dev/null && bash -c "$cmd" 2>&1)"; rc=$? + end="$(date +%s)"; ms=$(( (end - start) * 1000 )) + [[ $rc -eq 0 ]] && status=pass || status=fail + now="$(_tend_now)" + trimmed="$(printf '%s\n' "$out" | tail -40)" + tmp="$(mktemp)" + jq --arg t "$now" --arg s "$status" --argjson ms "$ms" --arg out "$trimmed" ' + .last_run=$t | .last_status=$s | .last_ms=$ms | .last_output=$out | + .history = ((.history // []) + [{t:$t, status:$s, ms:$ms}] | .[-50:])' "$f" > "$tmp" && mv "$tmp" "$f" + if [[ "$status" == pass ]]; then + echo "tend ✓ $1 (${ms}ms)" + else + _anu_warn "tend ✗ $1 failed" + if [[ "$auto" == "true" && -n "$heal" && "$heal" != "null" ]]; then + echo "tend: auto-healing '$1'…"; _tend_heal "$1" + elif [[ -n "$heal" && "$heal" != "null" ]]; then + echo "tend: run tend heal $1 to fix" + fi + fi +} + +_tend_run() { + [[ -n "$1" ]] && { _tend_run_one "$1"; return; } + local d f name last every age any=0 + d="$(_tend_dir)" + for f in "$d"/*.json; do + [[ -f "$f" ]] || continue + [[ "$(basename "$f")" == tend.json ]] && continue + any=1 + name="$(jq -r '.name' "$f")"; last="$(jq -r '.last_run' "$f")"; every="$(jq -r '.every' "$f")" + age="$(_tend_age "$last")" + (( age >= every )) && _tend_run_one "$name" + done + (( any )) || echo "tend: no watches — add one: tend add -- " +} + +# --- status ------------------------------------------------------------------ +_tend_status() { + local d; d="$(_tend_dir)" + compgen -G "$d/*.json" >/dev/null 2>&1 || { echo "tend: no watches yet — tend add -- "; return; } + local f name repo status last every age agestr icon cmd + for f in "$d"/*.json; do + [[ -f "$f" ]] || continue + [[ "$(basename "$f")" == tend.json ]] && continue + name="$(jq -r '.name' "$f")"; repo="$(jq -r '.repo' "$f")"; status="$(jq -r '.last_status' "$f")" + last="$(jq -r '.last_run' "$f")"; cmd="$(jq -r '.cmd' "$f")" + case "$status" in pass) icon="✓ ok ";; fail) icon="✗ FAIL";; *) icon="· new ";; esac + age="$(_tend_age "$last")"; agestr="$(_tend_human "$age")"; (( age >= 999999999 )) && agestr="never" + printf ' %-18s %-12s %-7s %-7s %s\n' "$name" "$repo" "$icon" "$agestr" "$cmd" + done +} + +# --- heal -------------------------------------------------------------------- +_tend_heal_brief() { + local f; f="$(_tend_file "$1")"; [[ -f "$f" ]] || return 1 + local cmd root out + cmd="$(jq -r '.cmd' "$f")"; root="$(jq -r '.root' "$f")"; out="$(jq -r '.last_output // ""' "$f")" + printf 'A tended check is failing and needs a fix. Repo: %s. The check is: %s . Recent output:\n%s\nFind the cause and fix it so the check passes again; make a focused commit and do not touch unrelated code. When done, %s must pass.' "$root" "$cmd" "$out" "$cmd" +} + +_tend_heal_run() { # — spawn a contained healer + local root="$1" brief="$2"; brief="${brief//\'/}" + if [[ -n "$TMUX" ]] && command -v tmux &>/dev/null; then + local pane + pane="$(tmux split-window -h -P -F '#{pane_id}' -c "$root" 2>/dev/null)" + if [[ -n "$pane" ]]; then + tmux send-keys -t "$pane" "cxc '${brief}'" Enter + echo "tend: healer spawned in pane $pane (contained cxc)"; return 0 + fi + fi + if command -v box &>/dev/null; then + echo "tend: healing headless in a contained box…" + ( cd "$root" 2>/dev/null && box claude --dangerously-skip-permissions -p "${brief}" ) + return $? + fi + _anu_warn "tend: no runtime to heal (need tmux+box, or box). Brief follows:" + printf '%s\n' "$brief"; return 1 +} + +_tend_heal() { + local f; f="$(_tend_file "$1")"; [[ -f "$f" ]] || { _anu_die "tend: no watch '$1'"; return 1; } + local root heal; root="$(jq -r '.root' "$f")"; heal="$(jq -r '.heal' "$f")" + if [[ -n "$heal" && "$heal" != "cxc" && "$heal" != "null" ]]; then + echo "tend: running heal command for '$1'…" + ( cd "$root" 2>/dev/null && bash -c "$heal" ); return $? + fi + _tend_heal_run "$root" "$(_tend_heal_brief "$1")" +} + +# --- misc -------------------------------------------------------------------- +_tend_log() { + local f; f="$(_tend_file "$1")"; [[ -f "$f" ]] || { _anu_die "tend: no watch '$1'"; return 1; } + jq -r '.history[] | " \(.t) \(.status) \(.ms)ms"' "$f" +} + +_tend_rm() { + local f; f="$(_tend_file "$1")" + [[ -f "$f" ]] || { _anu_die "tend: no watch '$1'"; return 1; } + rm -f "$f" && echo "tend: removed '$1'" +} + +_tend_watch_loop() { + echo "tend watch — running due checks every 30s (Ctrl-C to stop)" + while true; do _tend_run; sleep 30; done +} + +_tend_dash() { + _anu_require python3 || return 1 + local d skill out; d="$(_tend_dir)"; skill="$(_tend_skill)"; out="$d/index.html" + mkdir -p "$d" + python3 "$skill/build.py" "$d" "$d/tend.json" >/dev/null && + python3 "$skill/render.py" "$d/tend.json" "$out" >/dev/null \ + || { _anu_die "tend: dash build failed"; return 1; } + echo "tend dash → $out" + open "$out" 2>/dev/null || xdg-open "$out" 2>/dev/null || true +} + +_tend_cron() { + _anu_require crontab || return 1 + local bin line + bin="${ANU_PATH:-$HOME/.local/share/anu}/config/bash/bin/tend" + line="*/30 * * * * $bin run >/dev/null 2>&1 # anu-tend" + case "${1:-}" in + on) ( crontab -l 2>/dev/null | grep -v '# anu-tend'; echo "$line" ) | crontab - \ + && echo "tend: heartbeat installed (cron, every 30m) → $bin run" ;; + off) ( crontab -l 2>/dev/null | grep -v '# anu-tend' ) | crontab - \ + && echo "tend: heartbeat removed" ;; + *) echo "usage: tend cron on|off" ;; + esac +} + +tend() { + local sub="${1:-status}"; shift 2>/dev/null + case "$sub" in + add) _anu_require jq || return 1; _tend_add "$@" ;; + run) _anu_require jq || return 1; _tend_run "$@" ;; + watch|loop) _anu_require jq || return 1; _tend_watch_loop ;; + heal) _anu_require jq || return 1; _tend_heal "$@" ;; + log) _anu_require jq || return 1; _tend_log "$@" ;; + rm|remove) _tend_rm "$@" ;; + dash) _tend_dash ;; + cron) _tend_cron "$@" ;; + status|ls|"") _anu_require jq || return 1; _tend_status ;; + help|-h|--help) _tend_help ;; + *) _anu_die "tend: unknown command '$sub' (try: tend help)"; return 1 ;; + esac +} diff --git a/plugins/.claude-plugin/marketplace.json b/plugins/.claude-plugin/marketplace.json index f036af1..3d3b399 100644 --- a/plugins/.claude-plugin/marketplace.json +++ b/plugins/.claude-plugin/marketplace.json @@ -83,6 +83,11 @@ "name": "atlas", "source": "./atlas", "description": "The corpus memory that makes anu's research compound: index every /study, /investigate, /map and trail render under ~/.anu/atlas into one fixed B&W page, and collect their gaps, open questions and frontiers into a single queue so finished work exposes what it left open and the next run pulls its question from there. A pure render (build.py + render.py, no LLM); the skill teaches agents to consult the frontier before starting and to deposit their own gaps when they finish. The agent arXiv. Command /atlas, shell command atlas, skill atlas." + }, + { + "name": "tend", + "source": "./tend", + "description": "Keep work healthy over time — anu's continuity layer. Register checks that should keep passing (tests, a built artifact, an invariant, a benchmark, a deploy, or a research frontier going stale), run them on a cadence, record the drift, and — when armed — spawn a contained agent (cxc) to self-heal the moment one breaks. Turns the find->understand->do->show arc from commands a human re-invokes into a loop that keeps running, with a B&W health dashboard and a headless cron heartbeat. Shell command tend (add/run/watch/heal/dash/cron); the skill teaches what is worth tending and when to heal vs surface to a human." } ] } diff --git a/plugins/tend/.claude-plugin/plugin.json b/plugins/tend/.claude-plugin/plugin.json new file mode 100644 index 0000000..2fcf3e1 --- /dev/null +++ b/plugins/tend/.claude-plugin/plugin.json @@ -0,0 +1,11 @@ +{ + "name": "tend", + "version": "0.1.0", + "description": "Keep work healthy over time — anu's continuity layer. Register checks that should keep passing (tests, a built artifact, an invariant, a benchmark, a deploy, or a research frontier going stale), run them on a cadence, record the drift, and — when armed — spawn a contained agent (cxc) to self-heal the moment one breaks. Turns the find→understand→do→show arc from commands a human re-invokes into a loop that keeps running, with a B&W health dashboard (autonomy you can see) and a headless cron heartbeat. The shell command `tend` does the work; this skill teaches what is worth tending and when to heal vs surface to a human. Use when the user wants something watched/maintained over time, a self-healing check, a periodic verification, scheduled re-runs, or to keep a long-running result from rotting.", + "author": { + "name": "Aadarsh Agarwal", + "url": "https://github.com/aadarwal" + }, + "license": "MIT", + "keywords": ["anu", "tend", "continuity", "autonomy", "self-healing", "cron", "verification", "watch", "monitoring", "research", "frontier"] +} diff --git a/plugins/tend/commands/tend.md b/plugins/tend/commands/tend.md new file mode 100644 index 0000000..f8a7834 --- /dev/null +++ b/plugins/tend/commands/tend.md @@ -0,0 +1,32 @@ +--- +description: Keep work healthy over time — register checks that should keep passing, run them on a cadence, record drift, and (armed) spawn a contained agent to self-heal when one breaks. The continuity layer that turns the research arc into a loop. +--- + +Set up or review tended checks for the work in `$ARGUMENTS` (default: this repo). + +The fast path is the shell command — run it directly: + +```bash +tend # status: every watch, its last result, its drift +tend add --every 6h -- tests/run.sh # register a check +tend add --every 1d --heal cxc --auto -- # self-healing (armed) +tend run # run all due watches now +tend watch # live cockpit (re-run due watches on a loop) +tend dash # B&W health dashboard +tend cron on # headless heartbeat (cron runs `tend run`) +``` + +Then follow the **tend** skill +(`~/.local/share/anu/plugins/tend/skills/tend/SKILL.md`) for the judgment: + +1. **Pick what's worth tending** — a suite/build that must stay green, an + invariant or benchmark a result depends on, a served demo, or a stale research + frontier the arc should re-open. +2. **Heal vs surface** — arm `--auto` only for safe, reversible, well-scoped fixes + (flaky test, formatting, re-render) that land as a reviewable commit in a + contained box. Surface, never auto-heal, anything with judgment or + irreversibility — a refuted claim is a result to look at, not a bug to patch. +3. **Keep checks deterministic** — a watch that flaps teaches nothing. + +Healers run contained (`cxc`); the conductor stays on the host. Drift over time is +the signal — the recorded history matters more than any single run. diff --git a/plugins/tend/skills/tend/SKILL.md b/plugins/tend/skills/tend/SKILL.md new file mode 100644 index 0000000..dc61445 --- /dev/null +++ b/plugins/tend/skills/tend/SKILL.md @@ -0,0 +1,71 @@ +--- +name: tend +description: Keep work healthy over time — anu's continuity layer. Register checks that should keep passing (tests, a built artifact, an invariant, a benchmark, a deploy, or a research frontier going stale), run them on a cadence, record the drift, and — when armed — spawn a contained agent to self-heal the moment one breaks. Turns the find→understand→do→show arc from commands a human re-invokes into a loop that keeps running. Use when the user wants something watched/maintained over time, a self-healing check, a periodic verification, scheduled re-runs, or to keep a long-running result from rotting. +--- + +# tend — the loop that keeps running + +anu's research arc is otherwise a *line*: a human runs `/study`, then +`/investigate`, then `/present`, each by hand. `tend` is the layer that makes it +a **loop** — work that stays healthy and re-opens itself without a human in the +hot path. It is the answer to "who runs the check next week, and who fixes it +when it breaks?" + +A **watch** is the unit: *a check that should keep passing* + *what to do when it +doesn't*. Run it on a cadence, record every result (so drift is visible), and — +if armed — heal it in a **contained box** the moment it fails. + +``` +tend status — every watch, its last result, its drift +tend add --every 1h --heal ""|cxc [--auto] -- +tend run [name] run all due watches now (or one) +tend watch live cockpit: re-run due watches on a loop +tend heal spawn the healer for a failing watch now +tend dash the B&W health dashboard (autonomy you can see) +tend cron on|off the headless heartbeat (cron runs `tend run`) +``` + +## What is worth tending +- **Tests / builds** — a green suite that must stay green; a build artifact that + must keep building. `tend add tests --every 6h -- tests/run.sh`. +- **Invariants & claims** — a property a result depends on (a benchmark number, a + fixture, an external API contract). When it drifts, you want to know *that day*. +- **Served things** — a `/present` demo or an `ncn` endpoint that should stay up. +- **The research frontier** — the deepest use: an investigation's `frontier` or a + `/study` gap that is sitting *stale*. A watch can re-open the arc — surface the + oldest open frontier item, or (armed) spawn an `/investigate` on it — so the + corpus keeps moving even when no one is driving. This is `tend` closing the + loop with `atlas` and `trail`. + +## Heal vs. surface — the judgment +Healing is powerful and contained (`cxc` = a Claude agent in a disposable box), +but autonomy needs a leash: +- **`--auto` only for safe, reversible, well-scoped fixes** — a flaky test, a + formatting drift, a regenerated lockfile, a re-render. The blast radius is one + worktree and the change is reviewable as a commit. +- **Surface, don't auto-heal, anything with judgment or irreversibility** — a + failing scientific claim, a schema change, anything that touches data or deploys + or money. `tend` should ping a human, not "fix" the science. The session logs + call the human-in-the-loop the real bottleneck; respect it. A refuted claim is a + *result to look at*, not a bug to patch away. +- The conductor stays on the host; **healers run contained** (`cxc`), never with + host credentials. + +## How it works +- State is one JSON per watch under `~/.local/share/anu/tend/`. The history is + capped, so drift stays legible. No database; no LLM in the scheduler. +- `tend watch` is the attended heartbeat (a live pane, the trail/investigate-watch + twin); `tend cron on` is the unattended one (cron calls the `bin/tend` wrapper + so it works without an interactive shell). +- `tend dash` renders the same fixed B&W template grammar as map/atlas/trail — + every watch with a pass/fail sparkline, failing ones flagged. Autonomy you can + *see*. + +## Rules +- **Contained by default.** Healers are `cxc`; never auto-heal with host creds. +- **Don't auto-heal the irreversible or the judgment-heavy.** Surface it. +- **A check must be deterministic** — a watch that flaps teaches nothing. Make the + command return clean pass/fail. +- **Drift is the signal.** The value is the recorded history, not any single run — + a check that silently started failing three days ago is exactly what tend exists + to catch. diff --git a/plugins/tend/skills/tend/build.py b/plugins/tend/skills/tend/build.py new file mode 100644 index 0000000..325d736 --- /dev/null +++ b/plugins/tend/skills/tend/build.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +"""Build the tend health index: scan the watch JSONs, emit tend.json. + +Usage: build.py + +No LLM — the health view IS the recorded watch state. Each watch contributes its +last status and a sparkline of its recent run history, so drift over time is +visible at a glance. +""" +import json +import pathlib +import sys + + +def load(p): + try: + return json.loads(pathlib.Path(p).read_text()) + except Exception: + return None + + +def main(): + if len(sys.argv) != 3: + sys.exit("usage: build.py ") + d, out = pathlib.Path(sys.argv[1]), pathlib.Path(sys.argv[2]) + watches = [] + for wf in sorted(d.glob("*.json")): + if wf.name == "tend.json": + continue + w = load(wf) + if not w or "name" not in w: + continue + hist = w.get("history") or [] + watches.append({ + "name": w.get("name"), "repo": w.get("repo"), "cmd": w.get("cmd"), + "status": w.get("last_status", "unknown"), "last_run": w.get("last_run"), + "every": w.get("every"), "heal": w.get("heal", ""), + "auto": bool(w.get("auto", False)), + "runs": len(hist), + "passes": sum(1 for h in hist if h.get("status") == "pass"), + "spark": [h.get("status") for h in hist][-40:], + }) + counts = { + "watches": len(watches), + "passing": sum(1 for w in watches if w["status"] == "pass"), + "failing": sum(1 for w in watches if w["status"] == "fail"), + } + out.write_text(json.dumps({"counts": counts, "watches": watches}, indent=2)) + print(out) + + +main() diff --git a/plugins/tend/skills/tend/render.py b/plugins/tend/skills/tend/render.py new file mode 100644 index 0000000..250b26c --- /dev/null +++ b/plugins/tend/skills/tend/render.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +"""Render tend.json into index.html using the fixed template. + +Usage: render.py + +Same contract as map/atlas/trail: the template never varies, all per-run content +is one injected JSON blob the page renders client-side. +""" +import json +import pathlib +import sys + + +def main(): + if len(sys.argv) != 3: + sys.exit("usage: render.py ") + src, out = pathlib.Path(sys.argv[1]), pathlib.Path(sys.argv[2]) + template = (pathlib.Path(__file__).resolve().parent / "template.html").read_text() + data = json.dumps(json.loads(src.read_text())).replace(" + + + + +tend + + + +

    tend

    +

    +
    + + + + diff --git a/tests/bash/smoke_test.sh b/tests/bash/smoke_test.sh index 6905d33..c6983d4 100644 --- a/tests/bash/smoke_test.sh +++ b/tests/bash/smoke_test.sh @@ -29,7 +29,7 @@ declare -A entry=( [review]=review [swarmext]=swarmx [configmap]=cfgmap [dashboard]=pd [agentlog]=agentlog [map]=map [dynlayout]=taa [agentlaunch]=al [meshsync]=meshsync [tile]=tile [init_project]=anu_init [relink]=relink - [delve]=delve [atlas]=atlas + [delve]=delve [atlas]=atlas [tend]=tend ) for file in "${!entry[@]}"; do fn="${entry[$file]}" diff --git a/tests/bash/tend_test.sh b/tests/bash/tend_test.sh new file mode 100644 index 0000000..782eafa --- /dev/null +++ b/tests/bash/tend_test.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# tend — the continuity layer: register checks, run them, record drift, render +# the health dashboard. The scheduler is pure bash + jq (no LLM); the heal path +# spawns a contained agent and is covered at the brief-construction level. +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$here/../lib/harness.sh" +source "$here/../lib/helpers.bash" # sources config/bash/fns/core +export XDG_DATA_HOME="$(mktmp)" # isolate ~/.local/share/anu/tend +source "$(anu_fns tend)" + +t_suite "tend" + +t_section "duration parsing" +assert_eq "1800" "$(_tend_secs 30m)" "30m → 1800s" +assert_eq "3600" "$(_tend_secs 1h)" "1h → 3600s" +assert_eq "86400" "$(_tend_secs 1d)" "1d → 86400s" +assert_eq "90" "$(_tend_secs 90s)" "90s → 90s" +assert_eq "3600" "$(_tend_secs junk)" "garbage → 1h default" +assert_eq "2h" "$(_tend_human 7200)" "7200s → 2h" + +t_section "add registers a watch JSON" +tend add ci --every 30m -- true >/dev/null +f="$(_tend_file ci)" +assert_file "$f" "watch json written" +assert_jq "$f" '.name' "ci" "name" +assert_jq "$f" '.cmd' "true" "cmd" +assert_jq "$f" '.every' "1800" "cadence in seconds" +assert_jq "$f" '.last_status' "unknown" "starts unknown" + +t_section "run records pass/fail and grows history" +tend run ci >/dev/null +assert_jq "$f" '.last_status' "pass" "true → pass" +assert_jq "$f" '.history|length' "1" "history has one entry" +tend add broken -- false >/dev/null +tend run broken >/dev/null +bf="$(_tend_file broken)" +assert_jq "$bf" '.last_status' "fail" "false → fail" +assert_jq "$bf" '.last_output|type' "string" "output captured" + +t_section "status lists every watch" +out="$(tend status 2>&1)" +assert_contains "$out" "ci" "status shows ci" +assert_contains "$out" "broken" "status shows broken" + +t_section "heal brief carries the failing command and success condition" +tend add needsheal --heal cxc -- false >/dev/null +tend run needsheal >/dev/null +brief="$(_tend_heal_brief needsheal)" +assert_contains "$brief" "false" "brief names the failing check" +assert_contains "$brief" "must pass" "brief states the success condition" + +t_section "rm removes a watch" +tend rm ci >/dev/null +assert_eq "0" "$([[ -f "$f" ]] && echo 1 || echo 0)" "watch file gone" + +t_section "dash builds and renders the health view" +SKILL="$ANU_ROOT/plugins/tend/skills/tend" +python3 "$SKILL/build.py" "$(_tend_dir)" "$(_tend_dir)/tend.json" >/dev/null +assert_file "$(_tend_dir)/tend.json" "tend.json built" +assert_jq "$(_tend_dir)/tend.json" '.counts.failing >= 1' "true" "failing count reflects broken watch" +python3 "$SKILL/render.py" "$(_tend_dir)/tend.json" "$(_tend_dir)/index.html" >/dev/null +html="$(cat "$(_tend_dir)/index.html")" +assert_contains "$html" "broken" "watch embedded in dash" +assert_not_contains "$html" "__TEND__" "placeholder replaced" + +t_section "tend wires through core" +assert_contains "$(declare -f tend)" "_anu_require" "tend() guards jq" + +t_done From 384212e6494ae63980015c9dc5fbf7d7a9bd6331 Mon Sep 17 00:00:00 2001 From: Aadarsh Agarwal Date: Mon, 15 Jun 2026 01:02:24 -0400 Subject: [PATCH 5/6] docs: integrate tend/atlas/modeling into the README; de-slop new prose - README research arc: add the modeling (quantitative do), atlas (compound) and tend (continuity) rows; show atlas+tend closing the arc into a loop; note the atlas index and tend state under state/data. - Match the house style across all new files: drop em/en dashes (colon / period / parens per context, 2-5 not 2-5) and "not X, but Y" phrasing, as the deslop-plugins branch is doing elsewhere. Zero U+2014/U+2013 remain. No behavior change; full suite green (the links failure is pre-existing and unrelated: the stale config/claude/skills symlink source). Co-Authored-By: Claude Opus 4.8 (1M context) --- README.md | 9 +++-- config/bash/bin/tend | 2 +- config/bash/fns/atlas | 10 +++--- config/bash/fns/core | 6 ++-- config/bash/fns/tend | 18 +++++----- plugins/atlas/commands/atlas.md | 10 +++--- plugins/atlas/skills/atlas/SKILL.md | 26 +++++++------- plugins/atlas/skills/atlas/build.py | 4 +-- plugins/atlas/skills/atlas/render.py | 2 +- plugins/atlas/skills/atlas/template.html | 10 +++--- plugins/modeling/commands/modeling.md | 12 +++---- plugins/modeling/skills/modeling/SKILL.md | 42 +++++++++++------------ plugins/tend/.claude-plugin/plugin.json | 2 +- plugins/tend/commands/tend.md | 14 ++++---- plugins/tend/skills/tend/SKILL.md | 38 ++++++++++---------- plugins/tend/skills/tend/build.py | 2 +- plugins/tend/skills/tend/template.html | 4 +-- 17 files changed, 108 insertions(+), 103 deletions(-) diff --git a/README.md b/README.md index 5a1937a..b9e1ed2 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,8 @@ The harness ships a marketplace of [plugins](plugins/) (Claude Code / Pi skills, └── a Gap becomes ──┘ a hypothesis trail ── records goal · hypothesis · alternatives · outcome (git trailers) + atlas ── indexes every artefact; its frontier seeds the next run (the loop) + tend ── runs the checks on a cadence and self-heals (keeps it running) ``` | Stage | Command | What it does | @@ -73,6 +75,9 @@ The harness ships a marketplace of [plugins](plugins/) (Claude Code / Pi skills, | **do** | `/investigate` | *Run* the question, don't read about it. **Frame** it into 2-5 falsifiable hypotheses, **fan out** one contained agent per hypothesis (the swarm), **judge** the outcomes adversarially against their evidence, **record** everything to the trail. | | **show** | `/present` | Turn a result into a served, visual presentation: pick the medium per finding (Manim animation · marimo app · static figure · served notebook), render on the right compute, reachable over Tailscale. | | **trail** | `trail` | The decision graph: hypotheses, choices, roads not taken, and outcomes recorded as git-commit trailers. Reconstructs from `git log` alone (no LLM), and renders as a B&W graph + tempo timeline with open hypotheses flagged as awaiting a verdict. | +| **do (quantitative)** | `/modeling` | *Do* the quantitative science: turn data (a file, a simulation, or a digitized figure) into a fitted, model-selected, uncertainty-quantified law: candidate models, parameter covariance, AIC/BIC + cross-validation, symbolic regression. The quantitative twin of `/investigate`; composes the Axiomatic tools. | +| **compound** | `atlas` | The corpus that makes the arc *compound*: indexes every study, investigation, map and trail into one B&W page and collects their gaps and frontiers into one queue the next run pulls from. The agent arXiv. | +| **continuity** | `tend` | The layer that turns the arc from a line into a *loop*: register checks that should keep passing, run them on a cadence, record drift, and (armed) spawn a contained agent to self-heal, so work stays healthy and the frontier re-opens without a human in the hot path. | ```bash /study arxiv.org/abs/2601.06712 # understand: a paper → dossier in the atlas @@ -82,7 +87,7 @@ trail # see the decision graph reconstruct fr /present # show: render the surviving result, serve it ``` -`/delve` conducts understand→show in one shot; `/map` and `/explore` render a whole repo into a one-page dossier. The full set lives in **[`plugins/`](plugins/)**: `science-writing` (verify-citations, peer-review, rebuttal, arxiv-prep), `tikz`, `manim`, `marimo`, and `writing-styles`. +`/delve` conducts understand→show in one shot; `/map` and `/explore` render a whole repo into a one-page dossier. `atlas` indexes the whole corpus so the frontier of one run seeds the next, and `tend` keeps the checks healthy over time; together they close the arc into a loop. The full set lives in **[`plugins/`](plugins/)**: `science-writing` (verify-citations, peer-review, rebuttal, arxiv-prep), `tikz`, `manim`, `marimo`, `writing-styles`, and `modeling` (quantitative fits). --- @@ -345,7 +350,7 @@ When `anu init` finds an existing config it offers three strategies: **merge** (
    State & data -anu exposes the repo at `~/.local/share/anu/` and stores runtime state there: `swarms/` (metadata, mailboxes), `reviews/` (cached summaries per SHA), `mesh/` (device cache), `box/` (contained-agent state; `box/claude` holds credentials, gitignored). The **atlas** at `~/.anu/atlas/` holds dossiers and investigations; the decision **trail** at `~/.anu/trail/`. The installer link manifest lives at `~/.local/state/anu/manifest` so `anu unlink` restores configs cleanly. All runtime state is gitignored; never commit it. +anu exposes the repo at `~/.local/share/anu/` and stores runtime state there: `swarms/` (metadata, mailboxes), `reviews/` (cached summaries per SHA), `mesh/` (device cache), `box/` (contained-agent state; `box/claude` holds credentials, gitignored). The **atlas** at `~/.anu/atlas/` holds dossiers and investigations, with `atlas` rendering one index over all of it at `~/.anu/atlas/index.html`; the decision **trail** at `~/.anu/trail/`; and `tend` keeps watch state under `~/.local/share/anu/tend/`. The installer link manifest lives at `~/.local/state/anu/manifest` so `anu unlink` restores configs cleanly. All runtime state is gitignored; never commit it.
    diff --git a/config/bash/bin/tend b/config/bash/bin/tend index d5701ed..01dd5ed 100755 --- a/config/bash/bin/tend +++ b/config/bash/bin/tend @@ -1,6 +1,6 @@ #!/usr/bin/env bash # Standalone wrapper so `tend` is callable as a command, not just a shell -# function — needed by the cron/launchd heartbeat (`tend cron on`), which runs +# function, needed by the cron/launchd heartbeat (`tend cron on`), which runs # in a shell that has not sourced the anu fns. Mirrors bin/box, bin/delve, bin/swarm. ANU="${ANU_PATH:-$HOME/.local/share/anu}" source "$ANU/config/bash/fns/core" diff --git a/config/bash/fns/atlas b/config/bash/fns/atlas index 7bfef6d..5d5f87a 100644 --- a/config/bash/fns/atlas +++ b/config/bash/fns/atlas @@ -1,12 +1,12 @@ # ============================================================================== -# atlas — one word: render the corpus index across all anu research artifacts. +# atlas: one word, render the corpus index across all anu research artifacts. # # Every /study, /investigate, /map and trail render lands under ~/.anu/atlas # (and ~/.anu/trail). Each is a one-off today. `atlas` indexes them into one # fixed B&W page and collects their gaps, open questions and frontiers into a -# single queue — so finished work exposes what it left open and the next +# single queue, so finished work exposes what it left open and the next # investigation pulls its question from there. The corpus compounds; the agent -# arXiv. No LLM — the index IS the on-disk JSON (build.py + render.py). +# arXiv. No LLM: the index IS the on-disk JSON (build.py + render.py). # # atlas build + render + open the corpus index # atlas open re-open the last index without rebuilding @@ -30,14 +30,14 @@ atlas() { if [[ -f "$atlas_root/index.html" ]]; then open "$atlas_root/index.html" 2>/dev/null || xdg-open "$atlas_root/index.html" 2>/dev/null else - echo "no atlas yet — run: atlas" + echo "no atlas yet. Run: atlas" fi return ;; ls) _anu_require python3 jq || return 1 [[ -f "$atlas_root/atlas.json" ]] || _atlas_render "$atlas_root" "$trail_root" "$skill" >/dev/null jq -r '.records[] | " [\(.kind)]\t\(.title)"' "$atlas_root/atlas.json" 2>/dev/null \ - || echo "no atlas yet — run: atlas" + || echo "no atlas yet. Run: atlas" return ;; esac diff --git a/config/bash/fns/core b/config/bash/fns/core index 0abb862..ba24051 100644 --- a/config/bash/fns/core +++ b/config/bash/fns/core @@ -1,10 +1,10 @@ # ============================================================================== -# core — shared primitives for anu shell functions. +# core: shared primitives for anu shell functions. # ============================================================================== # Sourced into every interactive shell (via the config/bash/fns/* glob, before # the commands that call it run) and by the bash test harness. This is the one # place anu's diagnostic vocabulary lives, so an unattended agent fails loud -# with a clear line instead of a cryptic error three calls deep — the single +# with a clear line instead of a cryptic error three calls deep: the single # most common way a full-auto cxc run silently dead-ends. # # _anu_warn "msg" # yellow "anu: msg" → stderr @@ -34,7 +34,7 @@ _anu_note() { _anu_msg note "$@"; } # Print a diagnostic and return 1. Callers chain `|| return`. _anu_die() { _anu_msg err "$@"; return 1; } -# _anu_require TOOL [TOOL...] — ensure each named command is on PATH. On the +# _anu_require TOOL [TOOL...]: ensure each named command is on PATH. On the # first run with any miss, print one line listing every missing tool and return # 1. The choke point for "a dependency isn't installed": swarm/ncn route their # hard requirements through here so the failure names the tool, not a symptom. diff --git a/config/bash/fns/tend b/config/bash/fns/tend index 40b83c8..b7f67b0 100644 --- a/config/bash/fns/tend +++ b/config/bash/fns/tend @@ -1,10 +1,10 @@ # ============================================================================== -# tend — keep work healthy over time. anu's continuity / autonomy layer. +# tend: keep work healthy over time. anu's continuity / autonomy layer. # # The research arc (find -> understand -> do -> show) is otherwise a line of # commands you re-invoke. tend makes it a LOOP that keeps running: register # checks that should keep passing, run them on a cadence, record the drift, and -# — when armed — spawn a CONTAINED agent to self-heal the moment one breaks. +# when armed, spawn a CONTAINED agent to self-heal the moment one breaks. # # tend status: every watch, its last result, its drift # tend add [opts] -- register a check (runs in the current repo) @@ -76,10 +76,10 @@ _tend_add() { --heal) heal="$2"; shift 2 ;; --auto) auto="true"; shift ;; --) shift; cmd=("$@"); break ;; - *) _anu_die "tend add: unexpected '$1' — put the command after --, e.g. tend add $name -- make test"; return 1 ;; + *) _anu_die "tend add: unexpected '$1'. Put the command after --, e.g. tend add $name -- make test"; return 1 ;; esac done - [[ ${#cmd[@]} -eq 0 ]] && { _anu_die "tend add: no command — tend add $name -- "; return 1; } + [[ ${#cmd[@]} -eq 0 ]] && { _anu_die "tend add: no command. Usage: tend add $name -- "; return 1; } local root repo secs f root="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" repo="$(basename "$root")" @@ -92,7 +92,7 @@ _tend_add() { {name:$name, repo:$repo, root:$root, cmd:$cmd, every:$every, heal:$heal, auto:$auto, created:$created, last_run:null, last_status:"unknown", last_ms:0, last_output:"", history:[]}' > "$f" \ - && echo "tend: watching '$name' — $repo · ${cmd[*]} · every $(_tend_human "$secs")${heal:+ · heal=$heal}$([[ $auto == true ]] && echo ' · auto')" + && echo "tend: watching '$name': $repo · ${cmd[*]} · every $(_tend_human "$secs")${heal:+ · heal=$heal}$([[ $auto == true ]] && echo ' · auto')" } # --- run --------------------------------------------------------------------- @@ -136,13 +136,13 @@ _tend_run() { age="$(_tend_age "$last")" (( age >= every )) && _tend_run_one "$name" done - (( any )) || echo "tend: no watches — add one: tend add -- " + (( any )) || echo "tend: no watches yet. Add one: tend add -- " } # --- status ------------------------------------------------------------------ _tend_status() { local d; d="$(_tend_dir)" - compgen -G "$d/*.json" >/dev/null 2>&1 || { echo "tend: no watches yet — tend add -- "; return; } + compgen -G "$d/*.json" >/dev/null 2>&1 || { echo "tend: no watches yet. Add one: tend add -- "; return; } local f name repo status last every age agestr icon cmd for f in "$d"/*.json; do [[ -f "$f" ]] || continue @@ -163,7 +163,7 @@ _tend_heal_brief() { printf 'A tended check is failing and needs a fix. Repo: %s. The check is: %s . Recent output:\n%s\nFind the cause and fix it so the check passes again; make a focused commit and do not touch unrelated code. When done, %s must pass.' "$root" "$cmd" "$out" "$cmd" } -_tend_heal_run() { # — spawn a contained healer +_tend_heal_run() { # : spawn a contained healer local root="$1" brief="$2"; brief="${brief//\'/}" if [[ -n "$TMUX" ]] && command -v tmux &>/dev/null; then local pane @@ -205,7 +205,7 @@ _tend_rm() { } _tend_watch_loop() { - echo "tend watch — running due checks every 30s (Ctrl-C to stop)" + echo "tend watch: running due checks every 30s (Ctrl-C to stop)" while true; do _tend_run; sleep 30; done } diff --git a/plugins/atlas/commands/atlas.md b/plugins/atlas/commands/atlas.md index 7e8cfa7..13abf90 100644 --- a/plugins/atlas/commands/atlas.md +++ b/plugins/atlas/commands/atlas.md @@ -1,10 +1,10 @@ --- -description: Render the corpus index — every /study, /investigate, /map and trail in one B&W page, with their gaps, open questions and frontiers collected into one queue. The agent arXiv that makes the research arc a loop. +description: Render the corpus index, every /study, /investigate, /map and trail in one B&W page, with their gaps, open questions and frontiers collected into one queue. The agent arXiv that makes the research arc a loop. --- Render and open the corpus index for everything anu has produced. -The fast path is the shell command — run it directly: +The fast path is the shell command. Run it directly: ```bash atlas # build + render + open the corpus index @@ -13,16 +13,16 @@ atlas ls # list the corpus records in the terminal ``` Then follow the **atlas** skill -(`~/.local/share/anu/plugins/atlas/skills/atlas/SKILL.md`) — the discipline of +(`~/.local/share/anu/plugins/atlas/skills/atlas/SKILL.md`) for the discipline of *using* the corpus so it compounds: 1. **Before starting work, consult the frontier.** Check whether the paper was already `/study`'d or the question already sits in the frontier as a gap or an - investigation's next-step — pull it from there instead of starting cold. + investigation's next-step; pull it from there instead of starting cold. 2. **When you finish, leave your edges behind.** End a `/study` with real `gap` and `open_questions`; end an `/investigate` with a real `frontier`. Those are the inputs to the next run. The index records only real edges (study→present, investigate→trail) and never -infers a gap→hypothesis link — if you acted on a frontier item, cite its source +infers a gap→hypothesis link. If you acted on a frontier item, cite its source id in your new artifact so the edge becomes real. diff --git a/plugins/atlas/skills/atlas/SKILL.md b/plugins/atlas/skills/atlas/SKILL.md index c3df549..45c9c3a 100644 --- a/plugins/atlas/skills/atlas/SKILL.md +++ b/plugins/atlas/skills/atlas/SKILL.md @@ -1,15 +1,15 @@ --- name: atlas -description: The corpus memory that makes anu's research compound. Every /study, /investigate, /map and trail render is a one-off artifact under ~/.anu/atlas; `atlas` indexes them into one fixed B&W page and collects their gaps, open questions and frontiers into a single queue, so finished work exposes what it left open and the next run pulls its question from there. The agent arXiv — a research group's accumulated, navigable memory. Use when the user wants to see everything studied/investigated so far, find what's already been done before starting, or pick the next question from the open frontier. +description: The corpus memory that makes anu's research compound. Every /study, /investigate, /map and trail render is a one-off artifact under ~/.anu/atlas; `atlas` indexes them into one fixed B&W page and collects their gaps, open questions and frontiers into a single queue, so finished work exposes what it left open and the next run pulls its question from there. The agent arXiv: a research group's accumulated, navigable memory. Use when the user wants to see everything studied/investigated so far, find what's already been done before starting, or pick the next question from the open frontier. --- -# atlas — the corpus, and why it compounds +# atlas: the corpus, and why it compounds A research group's value is not any single result; it is the **accumulated body of work** where findings cite and build on findings. anu produces durable -artifacts — a `/study` dossier, an `/investigate` verdict, a `/map`, a `trail` — +artifacts (a `/study` dossier, an `/investigate` verdict, a `/map`, a `trail`), but each lands alone in `~/.anu/atlas`. The atlas is the layer that turns that -pile into a **corpus**: one index over everything, and one **frontier** — every +pile into a **corpus**: one index over everything, and one **frontier**: every gap, open question and next-step the finished work left behind, in a single queue the next run draws from. @@ -22,30 +22,30 @@ atlas open re-open the last index without rebuilding atlas ls list the corpus records in the terminal ``` -## The discipline — close the loop +## The discipline: close the loop The arc is `find → understand → do → show`. The atlas is what makes it a *loop* instead of a line. Two habits make the corpus compound: -**Before you start — consult the frontier.** Don't begin a study or an +**Before you start, consult the frontier.** Don't begin a study or an investigation cold. Run `atlas` (or read `~/.anu/atlas/atlas.json`) first: - Has this paper already been `/study`'d? Build on the dossier, don't redo it. - Is the question you're about to ask already sitting in the **frontier** as a - `/study` gap or another investigation's open frontier? Pull it from there — a + `/study` gap or another investigation's open frontier? Pull it from there: a gap that became your hypothesis is exactly how one result seeds the next. - Is there a related investigation whose verdict changes your framing? Cite it. -**When you finish — leave your edges behind.** The frontier is only as good as +**When you finish, leave your edges behind.** The frontier is only as good as what each run deposits into it. A `/study` must end with real `gap` and `open_questions`; an `/investigate` must end with a real `frontier` (the roads -not taken, the next test). Those fields are not decoration — they are the -*inputs to the next run*. Write them honestly; vague frontiers starve the loop. +not taken, the next test). Those fields are the *inputs to the next run*, not +decoration. Write them honestly; vague frontiers starve the loop. -## What it links — and what it refuses to +## What it links, and what it refuses to The index records **only real edges**: a `/study` to its `/present` demo (same paper folder), an `/investigate` to its `trail`. It does **not** infer a -"this gap became that hypothesis" link — that would be a fabricated claim about +"this gap became that hypothesis" link; that would be a fabricated claim about intent. Instead both ends surface in the shared frontier, where the connection is visible but honest. If you *do* act on a specific frontier item, say so in your new artifact (cite the source id); that makes the edge real, and the next @@ -57,4 +57,4 @@ your new artifact (cite the source id); that makes the edge real, and the next - **The frontier is the deliverable of the loop**, not a footnote. Finished work that records no open questions has broken the chain. - **Consult before you create; deposit when you're done.** That is the whole - point — knowledge that compounds instead of restarting. + point: knowledge that compounds instead of restarting. diff --git a/plugins/atlas/skills/atlas/build.py b/plugins/atlas/skills/atlas/build.py index c60a3cf..ecd363e 100644 --- a/plugins/atlas/skills/atlas/build.py +++ b/plugins/atlas/skills/atlas/build.py @@ -3,14 +3,14 @@ Usage: build.py -No LLM, no hidden state — the corpus IS the on-disk artifacts. Every /study, +No LLM, no hidden state: the corpus IS the on-disk artifacts. Every /study, /investigate, /map and trail render becomes one record; their gaps, open questions and frontiers are collected into one frontier queue, so finished work exposes what it left open and the next investigation can pull from it. That is how the atlas compounds: a research group's accumulated, navigable memory. Only real edges are recorded (study->present, investigation->trail). The cross- -arc "a gap became a hypothesis" link is not inferred — it would be a fabricated +arc "a gap became a hypothesis" link is not inferred. It would be a fabricated claim; instead both ends surface in the shared frontier, honestly. """ import json diff --git a/plugins/atlas/skills/atlas/render.py b/plugins/atlas/skills/atlas/render.py index bc5f19b..37f4acf 100644 --- a/plugins/atlas/skills/atlas/render.py +++ b/plugins/atlas/skills/atlas/render.py @@ -3,7 +3,7 @@ Usage: render.py -The template never varies — all per-corpus content is injected as one JSON blob +The template never varies: all per-corpus content is injected as one JSON blob the page renders client-side. Same contract as map/study/trail. """ import json diff --git a/plugins/atlas/skills/atlas/template.html b/plugins/atlas/skills/atlas/template.html index 4af5b83..8af7beb 100644 --- a/plugins/atlas/skills/atlas/template.html +++ b/plugins/atlas/skills/atlas/template.html @@ -21,7 +21,7 @@ } .lead { color:var(--mute); margin:-0.4rem 0 0.6rem; } - /* frontier — the corpus queue */ + /* frontier: the corpus queue */ ol.oq { counter-reset:q; list-style:none; } ol.oq li { counter-increment:q; padding-left:2.6em; text-indent:-2.6em; max-width:84ch; margin-bottom:0.45rem; } ol.oq li::before { content:counter(q,decimal-leading-zero) " "; color:var(--mute); } @@ -45,7 +45,7 @@

    atlas

    frontier

    -

    every open gap, question and next-step across the corpus — the queue the next investigation pulls from

    +

    every open gap, question and next-step across the corpus: the queue the next investigation pulls from

      @@ -59,7 +59,7 @@

      frontier

      const counts = D.counts || {}; document.getElementById("sub").textContent = ORDER.filter(([k]) => counts[k]).map(([k, l]) => counts[k] + " " + l).join(" · ") - || "empty — run /study, /investigate, /map, or trail to fill the corpus"; + || "empty: run /study, /investigate, /map, or trail to fill the corpus"; const recordHref = (kind, id) => { const r = (D.records || []).find(r => r.kind === kind && r.id === id); @@ -70,12 +70,12 @@

      frontier

      (D.frontier || []).forEach(f => { const li = el("li"); li.appendChild(document.createTextNode(f.text + " ")); - const a = el("a", "src", "— " + f.kind + " · " + clip(f.title || f.id, 40)); + const a = el("a", "src", f.kind + " · " + clip(f.title || f.id, 40)); a.href = recordHref(f.kind, f.id); li.appendChild(a); fr.appendChild(li); }); -if (!(D.frontier || []).length) fr.appendChild(el("li", "empty", "nothing open yet — finished work records its gaps and frontiers here")); +if (!(D.frontier || []).length) fr.appendChild(el("li", "empty", "nothing open yet: finished work records its gaps and frontiers here")); const groups = document.getElementById("groups"); ORDER.forEach(([kind, label]) => { diff --git a/plugins/modeling/commands/modeling.md b/plugins/modeling/commands/modeling.md index b999646..742daa3 100644 --- a/plugins/modeling/commands/modeling.md +++ b/plugins/modeling/commands/modeling.md @@ -1,20 +1,20 @@ --- -description: Do the quantitative science — turn data (a file, a simulation, or a digitized figure) into a fitted, model-selected, uncertainty-quantified law. Frame candidate models, fit with covariance, select on AIC/BIC + cross-validation, and discover the form when it's unknown. +description: Do the quantitative science: turn data (a file, a simulation, or a digitized figure) into a fitted, model-selected, uncertainty-quantified law. Frame candidate models, fit with covariance, select on AIC/BIC + cross-validation, and discover the form when it's unknown. --- Model the data / question in `$ARGUMENTS`. Follow the **modeling** skill (`~/.local/share/anu/plugins/modeling/skills/modeling/SKILL.md`) end to end: -1. **Get the data honestly** — a file, a simulation you run, or digitize a figure +1. **Get the data honestly:** a file, a simulation you run, or digitize a figure with `AxPlotToData`. Label digitized data as digitized; never invent points. -2. **Frame 2–5 candidate models**, each with a reason — not a single curve. -3. **Fit each with parameter covariance** — value ± uncertainty for every parameter. -4. **Select on evidence** — `compare_models` + AIC/BIC + `cross_validate_model` + +2. **Frame 2-5 candidate models**, each with a reason, never a single curve. +3. **Fit each with parameter covariance:** value ± uncertainty for every parameter. +4. **Select on evidence:** `compare_models` + AIC/BIC + `cross_validate_model` + residual diagnostics, not in-sample fit. Prefer the simplest model that survives. 5. **If no candidate is defensible, discover the form** with `AxEquationExplorer` (symbolic regression), then make it earn its keep against simpler models. -6. **Deliver the verdict** — chosen model, parameters ± uncertainty, the +6. **Deliver the verdict:** chosen model, parameters ± uncertainty, the alternatives ruled out and why, a data+fit figure, and (inside `/investigate`) the outcome recorded via `trail`. diff --git a/plugins/modeling/skills/modeling/SKILL.md b/plugins/modeling/skills/modeling/SKILL.md index 3495ca8..2f5a4a4 100644 --- a/plugins/modeling/skills/modeling/SKILL.md +++ b/plugins/modeling/skills/modeling/SKILL.md @@ -1,57 +1,57 @@ --- name: modeling -description: Do the quantitative science, don't just describe it — turn data (a file, a simulation, or a figure you digitize) into a fitted, model-selected, uncertainty-quantified result. Frame candidate models, fit them with parameter covariance, select by evidence (AIC/BIC + cross-validation, not in-sample fit), and discover the functional form when it's unknown (symbolic regression). Composes the Axiomatic model-fitting/equation-discovery tools, ncn for heavy compute, and trail/present to record and show. Use when the user wants to fit a model, find a scaling law or equation, do model selection, quantify uncertainty on parameters, digitize a plot into data, or test whether a proposed law holds. +description: Do the quantitative science, don't just describe it: turn data (a file, a simulation, or a figure you digitize) into a fitted, model-selected, uncertainty-quantified result. Frame candidate models, fit them with parameter covariance, select by evidence (AIC/BIC + cross-validation, not in-sample fit), and discover the functional form when it's unknown (symbolic regression). Composes the Axiomatic model-fitting/equation-discovery tools, ncn for heavy compute, and trail/present to record and show. Use when the user wants to fit a model, find a scaling law or equation, do model selection, quantify uncertainty on parameters, digitize a plot into data, or test whether a proposed law holds. --- -# modeling — data → a law that holds, with its error bars +# modeling: data → a law that holds, with its error bars This is the quantitative **do** stage made literal: anu's other research plugins *write* and *animate* science (`science-writing`, `tikz`, `manim`); this one -**does** it. You take data and produce a *result that holds* — a model chosen on +**does** it. You take data and produce a *result that holds*: a model chosen on evidence, parameters with uncertainty, and the alternatives you ruled out. You are a careful experimentalist, not a curve-fitter. Anyone can fit one curve and declare victory. Your job is to fit several, quantify how well each is -actually supported, and report the one that survives — including the honest case +actually supported, and report the one that survives, including the honest case where nothing does. This plugin **composes** rather than reinvents: -- **The Axiomatic model-fitting tools** — `AxModelFitter` (fit, compare, +- **The Axiomatic model-fitting tools:** `AxModelFitter` (fit, compare, information criteria, cross-validation, parameter covariance, R²), `AxModelFitterV2` (generate + execute fitting code), `AxEquationExplorer` (discover a functional form; check a proposed equation), `AxArgmin` (optimization), `AxPlotToData` (digitize a figure into numbers). These are MCP - tools — load them with ToolSearch (`axiomatic model fitter`, `equation + tools; load them with ToolSearch (`axiomatic model fitter`, `equation explorer`, `plot to data`) when you need them. -- **`ncn`** — run a heavy fit / large sweep / symbolic-regression search on the +- **`ncn`:** run a heavy fit / large sweep / symbolic-regression search on the right compute (GPU mesh box or the Slurm cluster), not the laptop. -- **`trail`** + **`present`** — record the chosen model and the roads not taken +- **`trail`** + **`present`:** record the chosen model and the roads not taken as the decision record, and show the fitted law as a figure. ## The seam with the arc A quantitative **`/investigate` hypothesis** ("insertion loss falls exponentially with taper length") or a **`/study` gap** that is really a -measurement is a modeling task. Frame it here, fit it, and the result — -*the law plus its uncertainty plus the rejected forms* — is the evidence that +measurement is a modeling task. Frame it here, fit it, and the result, +*the law plus its uncertainty plus the rejected forms*, is the evidence that `/investigate` records and `/present` shows. Modeling is where a hypothesis becomes a number with an error bar. ## Moves -### 1. Get the data — honestly +### 1. Get the data, honestly Never invent data points. Acquire them one of three ways: - **A file / array** the user already has. - **A simulation** you write and run (small ones locally; heavy ones via `ncn`). -- **A figure** from a paper — digitize it with `AxPlotToData` +- **A figure** from a paper: digitize it with `AxPlotToData` (`extract_numerical_series`; `split_multi_plot` first if several series share axes). State that the data is digitized and roughly how accurate that is. If the data is too sparse or noisy to support any conclusion, say so and stop. A null result is a result; a fabricated one is misconduct. -### 2. Frame candidate models — not one -Like `/investigate` frames hypotheses, list **2–5 functional forms**, each with +### 2. Frame candidate models, not one +Like `/investigate` frames hypotheses, list **2-5 functional forms**, each with a *reason* it's plausible (a mechanism, a limiting behaviour, a known scaling). Fitting a single model can only confirm what you assumed. Competing models are what make the answer falsifiable. @@ -59,16 +59,16 @@ what make the answer falsifiable. ### 3. Fit, and quantify uncertainty Fit each candidate (`AxModelFitter.fit_model`, or `AxModelFitterV2` to generate/execute custom code for awkward models). For every fit get the -**parameter covariance** (`compute_parameter_covariance`) — *a parameter without +**parameter covariance** (`compute_parameter_covariance`): *a parameter without an error bar is not a result.* Report each parameter as value ± uncertainty. -### 4. Select by evidence — adversarially +### 4. Select by evidence, adversarially The best in-sample fit is **not** the best model; more parameters always fit better. Choose on out-of-sample evidence: - `compare_models` + `calculate_information_criteria` (AIC/BIC) to penalize complexity, - `cross_validate_model` to check it predicts data it didn't see, -- residual diagnostics — structure left in the residuals means the model is +- residual diagnostics: structure left in the residuals means the model is wrong however high its R². Prefer the simplest model that survives. When two are statistically tied, the @@ -77,7 +77,7 @@ honest report is "the data don't distinguish them," not a coin flip. ### 5. Discover the form when it's unknown If you have no defensible candidate, find one: `AxEquationExplorer.find_functional_form` (symbolic regression) proposes a form *from the data*; then treat it as a -candidate and run it back through steps 3–4 — discovered forms still must earn +candidate and run it back through steps 3-4; discovered forms still must earn their keep against simpler ones. To test a law someone proposed, use `check_equation`. @@ -91,12 +91,12 @@ Produce, not a wall of chat, a result: - and, when running inside `/investigate`, the outcome recorded via `trail` (the fit is the evidence behind a `Hypothesis:`/`Outcome:` pair). -## Compute — where `ncn` earns its keep +## Compute: where `ncn` earns its keep - **Light** (a handful of parameters, modest data) → fit locally / in a `box`. - **Heavy** (large datasets, global optimization with `AxArgmin`, a wide symbolic-regression search, k-fold cross-validation over big models) → `ncn` to a GPU mesh box or `ncn --cluster` (Slurm + Apptainer); pull the fit and - the figure back. Sweeps are embarrassingly parallel — fan them with `mesh + the figure back. Sweeps are embarrassingly parallel; fan them with `mesh spawn` when there are many. ## Rules @@ -107,6 +107,6 @@ Produce, not a wall of chat, a result: is an assumption wearing the costume of a result. - **Evidence over R².** Selection is AIC/BIC + cross-validation; in-sample fit alone never decides. -- **The law is the deliverable** — the chosen model, its parameters ± +- **The law is the deliverable:** the chosen model, its parameters ± uncertainty, the alternatives ruled out, and one figure. Cheap fits before expensive searches. diff --git a/plugins/tend/.claude-plugin/plugin.json b/plugins/tend/.claude-plugin/plugin.json index 2fcf3e1..bb2f09d 100644 --- a/plugins/tend/.claude-plugin/plugin.json +++ b/plugins/tend/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "tend", "version": "0.1.0", - "description": "Keep work healthy over time — anu's continuity layer. Register checks that should keep passing (tests, a built artifact, an invariant, a benchmark, a deploy, or a research frontier going stale), run them on a cadence, record the drift, and — when armed — spawn a contained agent (cxc) to self-heal the moment one breaks. Turns the find→understand→do→show arc from commands a human re-invokes into a loop that keeps running, with a B&W health dashboard (autonomy you can see) and a headless cron heartbeat. The shell command `tend` does the work; this skill teaches what is worth tending and when to heal vs surface to a human. Use when the user wants something watched/maintained over time, a self-healing check, a periodic verification, scheduled re-runs, or to keep a long-running result from rotting.", + "description": "Keep work healthy over time: anu's continuity layer. Register checks that should keep passing (tests, a built artifact, an invariant, a benchmark, a deploy, or a research frontier going stale), run them on a cadence, record the drift, and, when armed, spawn a contained agent (cxc) to self-heal the moment one breaks. Turns the find→understand→do→show arc from commands a human re-invokes into a loop that keeps running, with a B&W health dashboard (autonomy you can see) and a headless cron heartbeat. The shell command `tend` does the work; this skill teaches what is worth tending and when to heal vs surface to a human. Use when the user wants something watched/maintained over time, a self-healing check, a periodic verification, scheduled re-runs, or to keep a long-running result from rotting.", "author": { "name": "Aadarsh Agarwal", "url": "https://github.com/aadarwal" diff --git a/plugins/tend/commands/tend.md b/plugins/tend/commands/tend.md index f8a7834..7ed3250 100644 --- a/plugins/tend/commands/tend.md +++ b/plugins/tend/commands/tend.md @@ -1,10 +1,10 @@ --- -description: Keep work healthy over time — register checks that should keep passing, run them on a cadence, record drift, and (armed) spawn a contained agent to self-heal when one breaks. The continuity layer that turns the research arc into a loop. +description: Keep work healthy over time: register checks that should keep passing, run them on a cadence, record drift, and (armed) spawn a contained agent to self-heal when one breaks. The continuity layer that turns the research arc into a loop. --- Set up or review tended checks for the work in `$ARGUMENTS` (default: this repo). -The fast path is the shell command — run it directly: +The fast path is the shell command. Run it directly: ```bash tend # status: every watch, its last result, its drift @@ -19,14 +19,14 @@ tend cron on # headless heartbeat (cron runs `tend run`) Then follow the **tend** skill (`~/.local/share/anu/plugins/tend/skills/tend/SKILL.md`) for the judgment: -1. **Pick what's worth tending** — a suite/build that must stay green, an +1. **Pick what's worth tending:** a suite/build that must stay green, an invariant or benchmark a result depends on, a served demo, or a stale research frontier the arc should re-open. -2. **Heal vs surface** — arm `--auto` only for safe, reversible, well-scoped fixes +2. **Heal vs surface.** Arm `--auto` only for safe, reversible, well-scoped fixes (flaky test, formatting, re-render) that land as a reviewable commit in a contained box. Surface, never auto-heal, anything with judgment or - irreversibility — a refuted claim is a result to look at, not a bug to patch. -3. **Keep checks deterministic** — a watch that flaps teaches nothing. + irreversibility; a refuted claim is a result to look at, not a bug to patch. +3. **Keep checks deterministic:** a watch that flaps teaches nothing. Healers run contained (`cxc`); the conductor stays on the host. Drift over time is -the signal — the recorded history matters more than any single run. +the signal; the recorded history matters more than any single run. diff --git a/plugins/tend/skills/tend/SKILL.md b/plugins/tend/skills/tend/SKILL.md index dc61445..601bdc8 100644 --- a/plugins/tend/skills/tend/SKILL.md +++ b/plugins/tend/skills/tend/SKILL.md @@ -1,22 +1,22 @@ --- name: tend -description: Keep work healthy over time — anu's continuity layer. Register checks that should keep passing (tests, a built artifact, an invariant, a benchmark, a deploy, or a research frontier going stale), run them on a cadence, record the drift, and — when armed — spawn a contained agent to self-heal the moment one breaks. Turns the find→understand→do→show arc from commands a human re-invokes into a loop that keeps running. Use when the user wants something watched/maintained over time, a self-healing check, a periodic verification, scheduled re-runs, or to keep a long-running result from rotting. +description: Keep work healthy over time: anu's continuity layer. Register checks that should keep passing (tests, a built artifact, an invariant, a benchmark, a deploy, or a research frontier going stale), run them on a cadence, record the drift, and, when armed, spawn a contained agent to self-heal the moment one breaks. Turns the find→understand→do→show arc from commands a human re-invokes into a loop that keeps running. Use when the user wants something watched/maintained over time, a self-healing check, a periodic verification, scheduled re-runs, or to keep a long-running result from rotting. --- -# tend — the loop that keeps running +# tend: the loop that keeps running anu's research arc is otherwise a *line*: a human runs `/study`, then `/investigate`, then `/present`, each by hand. `tend` is the layer that makes it -a **loop** — work that stays healthy and re-opens itself without a human in the +a **loop**: work that stays healthy and re-opens itself without a human in the hot path. It is the answer to "who runs the check next week, and who fixes it when it breaks?" A **watch** is the unit: *a check that should keep passing* + *what to do when it -doesn't*. Run it on a cadence, record every result (so drift is visible), and — -if armed — heal it in a **contained box** the moment it fails. +doesn't*. Run it on a cadence, record every result (so drift is visible), and, +if armed, heal it in a **contained box** the moment it fails. ``` -tend status — every watch, its last result, its drift +tend status: every watch, its last result, its drift tend add --every 1h --heal ""|cxc [--auto] -- tend run [name] run all due watches now (or one) tend watch live cockpit: re-run due watches on a loop @@ -26,24 +26,24 @@ tend cron on|off the headless heartbeat (cron runs `tend run`) ``` ## What is worth tending -- **Tests / builds** — a green suite that must stay green; a build artifact that +- **Tests / builds:** a green suite that must stay green; a build artifact that must keep building. `tend add tests --every 6h -- tests/run.sh`. -- **Invariants & claims** — a property a result depends on (a benchmark number, a +- **Invariants & claims:** a property a result depends on (a benchmark number, a fixture, an external API contract). When it drifts, you want to know *that day*. -- **Served things** — a `/present` demo or an `ncn` endpoint that should stay up. -- **The research frontier** — the deepest use: an investigation's `frontier` or a - `/study` gap that is sitting *stale*. A watch can re-open the arc — surface the - oldest open frontier item, or (armed) spawn an `/investigate` on it — so the +- **Served things:** a `/present` demo or an `ncn` endpoint that should stay up. +- **The research frontier:** the deepest use is an investigation's `frontier` or a + `/study` gap that is sitting *stale*. A watch can re-open the arc by surfacing the + oldest open frontier item, or (armed) spawning an `/investigate` on it, so the corpus keeps moving even when no one is driving. This is `tend` closing the loop with `atlas` and `trail`. -## Heal vs. surface — the judgment +## Heal vs. surface: the judgment Healing is powerful and contained (`cxc` = a Claude agent in a disposable box), but autonomy needs a leash: -- **`--auto` only for safe, reversible, well-scoped fixes** — a flaky test, a +- **`--auto` only for safe, reversible, well-scoped fixes:** a flaky test, a formatting drift, a regenerated lockfile, a re-render. The blast radius is one worktree and the change is reviewable as a commit. -- **Surface, don't auto-heal, anything with judgment or irreversibility** — a +- **Surface, don't auto-heal, anything with judgment or irreversibility:** a failing scientific claim, a schema change, anything that touches data or deploys or money. `tend` should ping a human, not "fix" the science. The session logs call the human-in-the-loop the real bottleneck; respect it. A refuted claim is a @@ -57,15 +57,15 @@ but autonomy needs a leash: - `tend watch` is the attended heartbeat (a live pane, the trail/investigate-watch twin); `tend cron on` is the unattended one (cron calls the `bin/tend` wrapper so it works without an interactive shell). -- `tend dash` renders the same fixed B&W template grammar as map/atlas/trail — +- `tend dash` renders the same fixed B&W template grammar as map/atlas/trail: every watch with a pass/fail sparkline, failing ones flagged. Autonomy you can *see*. ## Rules - **Contained by default.** Healers are `cxc`; never auto-heal with host creds. - **Don't auto-heal the irreversible or the judgment-heavy.** Surface it. -- **A check must be deterministic** — a watch that flaps teaches nothing. Make the +- **A check must be deterministic:** a watch that flaps teaches nothing. Make the command return clean pass/fail. -- **Drift is the signal.** The value is the recorded history, not any single run — - a check that silently started failing three days ago is exactly what tend exists +- **Drift is the signal.** The value is the recorded history, not any single run. + A check that silently started failing three days ago is exactly what tend exists to catch. diff --git a/plugins/tend/skills/tend/build.py b/plugins/tend/skills/tend/build.py index 325d736..0491c2f 100644 --- a/plugins/tend/skills/tend/build.py +++ b/plugins/tend/skills/tend/build.py @@ -3,7 +3,7 @@ Usage: build.py -No LLM — the health view IS the recorded watch state. Each watch contributes its +No LLM: the health view IS the recorded watch state. Each watch contributes its last status and a sparkline of its recent run history, so drift over time is visible at a glance. """ diff --git a/plugins/tend/skills/tend/template.html b/plugins/tend/skills/tend/template.html index 979aa9d..647834c 100644 --- a/plugins/tend/skills/tend/template.html +++ b/plugins/tend/skills/tend/template.html @@ -46,11 +46,11 @@

      tend

      const c = D.counts || {}; document.getElementById("sub").textContent = (c.watches || 0) + " watches · " + (c.passing || 0) + " passing · " + (c.failing || 0) + " failing" - + (c.failing ? " — needs attention" : ""); + + (c.failing ? " (needs attention)" : ""); const box = document.getElementById("watches"); const ws = D.watches || []; -if (!ws.length) box.appendChild(el("p", "empty", "no watches yet — tend add -- ")); +if (!ws.length) box.appendChild(el("p", "empty", "no watches yet: tend add -- ")); ws.forEach(w => { const card = el("div", "w" + (w.status === "fail" ? " fail" : "")); const top = el("div", "top"); From 2406b45403f509f26f7d024d1a4322bf69eb9a48 Mon Sep 17 00:00:00 2001 From: Aadarsh Agarwal Date: Mon, 15 Jun 2026 01:21:46 -0400 Subject: [PATCH 6/6] review fixes: address 14 adversarially-verified findings An adversarial review (5 dimensions, every finding independently verified; 11 false alarms correctly dismissed) surfaced real defects, fixed here. HIGH - bin/swarm sourced fns/swarm but not fns/core, so the new _anu_require guard died with "_anu_require: command not found" on every non-interactive call, exactly the agent path the wrapper exists for. Source core first (like bin/tend). MED - tend add looped forever when --every/--heal was the final arg (shift 2 with one positional never terminates); guard the value before consuming it. - _tend_secs crashed on fractional/garbage durations (1.5h aborted with an arithmetic error, leaving an empty value that made jq truncate the watch file to 0 bytes and poison the store); validate the integer stem, default to 1h. - atlas/tend build.py crashed on any JSON parsing to a non-dict; load() now enforces dict-ness and tend filters non-dict history elements. - atlas ls rendered before mkdir on first use; build.py also mkdirs its out dir. - auto-heal had no debounce: a still-failing watch re-spawned a fresh contained healer every cadence tick; record healing_since and skip re-heal within a cooldown. - watch command was space-flattened (lost quoting); store via printf %q so it round-trips through bash -c. - bin/tend exports a PATH so box/jq resolve under cron (headless heal). - tend help sed range overshot the header and dumped source; bounded to 2,23. Tests: +2 core (bin-wrapper-sources-core regression), +7 tend (malformed durations, value-less flag, quoting round-trip, literal auto-heal + debounce marker). Full suite green; the links failure is pre-existing and unrelated. Co-Authored-By: Claude Opus 4.8 (1M context) --- config/bash/bin/swarm | 7 ++++- config/bash/bin/tend | 3 ++ config/bash/fns/atlas | 1 + config/bash/fns/tend | 48 +++++++++++++++++++---------- plugins/atlas/skills/atlas/build.py | 4 ++- plugins/tend/skills/tend/build.py | 6 ++-- tests/bash/core_test.sh | 9 ++++++ tests/bash/tend_test.sh | 16 ++++++++++ 8 files changed, 74 insertions(+), 20 deletions(-) diff --git a/config/bash/bin/swarm b/config/bash/bin/swarm index 82b85dc..ae76e0a 100755 --- a/config/bash/bin/swarm +++ b/config/bash/bin/swarm @@ -1,5 +1,10 @@ #!/usr/bin/env bash # Standalone wrapper so `swarm` is callable as a command (not just a function) # This lets AI agents inside Claude Code run `swarm send`, `swarm collect`, etc. -source "${ANU_PATH:-$HOME/.local/share/anu}/config/bash/fns/swarm" +ANU="${ANU_PATH:-$HOME/.local/share/anu}" +# swarm() guards its deps through core (_anu_require); source core first, exactly +# as bin/tend does, or every non-help subcommand dies here with +# "_anu_require: command not found" on this non-interactive path. +source "$ANU/config/bash/fns/core" +source "$ANU/config/bash/fns/swarm" swarm "$@" diff --git a/config/bash/bin/tend b/config/bash/bin/tend index 01dd5ed..59623e9 100755 --- a/config/bash/bin/tend +++ b/config/bash/bin/tend @@ -3,6 +3,9 @@ # function, needed by the cron/launchd heartbeat (`tend cron on`), which runs # in a shell that has not sourced the anu fns. Mirrors bin/box, bin/delve, bin/swarm. ANU="${ANU_PATH:-$HOME/.local/share/anu}" +# cron/launchd runs with a minimal PATH; make anu's tools resolvable (box/cxc for +# the heal, jq, homebrew bins) so the headless heartbeat and its healer can run. +export PATH="$ANU/config/bash/bin:$HOME/.local/bin:/opt/homebrew/bin:/usr/local/bin:$PATH" source "$ANU/config/bash/fns/core" source "$ANU/config/bash/fns/tend" tend "$@" diff --git a/config/bash/fns/atlas b/config/bash/fns/atlas index 5d5f87a..343387f 100644 --- a/config/bash/fns/atlas +++ b/config/bash/fns/atlas @@ -35,6 +35,7 @@ atlas() { return ;; ls) _anu_require python3 jq || return 1 + mkdir -p "$atlas_root" [[ -f "$atlas_root/atlas.json" ]] || _atlas_render "$atlas_root" "$trail_root" "$skill" >/dev/null jq -r '.records[] | " [\(.kind)]\t\(.title)"' "$atlas_root/atlas.json" 2>/dev/null \ || echo "no atlas yet. Run: atlas" diff --git a/config/bash/fns/tend b/config/bash/fns/tend index b7f67b0..5859060 100644 --- a/config/bash/fns/tend +++ b/config/bash/fns/tend @@ -30,15 +30,16 @@ _tend_now() { date -u +%Y-%m-%dT%H:%M:%SZ; } # Parse a duration (30m/2h/1d/90s/3600) into seconds. _tend_secs() { - local d="${1:-3600}" + local d="${1:-3600}" n mult case "$d" in - *s) echo "$(( ${d%s} ))" ;; - *m) echo "$(( ${d%m} * 60 ))" ;; - *h) echo "$(( ${d%h} * 3600 ))" ;; - *d) echo "$(( ${d%d} * 86400 ))" ;; - ''|*[!0-9]*) echo 3600 ;; - *) echo "$d" ;; + *s) n="${d%s}"; mult=1 ;; + *m) n="${d%m}"; mult=60 ;; + *h) n="${d%h}"; mult=3600 ;; + *d) n="${d%d}"; mult=86400 ;; + *) n="$d"; mult=1 ;; esac + [[ "$n" =~ ^[0-9]+$ ]] || { echo 3600; return; } # fractional / garbage -> 1h + echo "$(( n * mult ))" } # Human-format a duration in seconds. @@ -61,7 +62,7 @@ _tend_age() { } _tend_help() { - sed -n '2,33p' "${BASH_SOURCE[0]}" | sed 's/^# \{0,1\}//' + sed -n '2,23p' "${BASH_SOURCE[0]}" | sed 's/^# \{0,1\}//' } # --- register ---------------------------------------------------------------- @@ -72,26 +73,28 @@ _tend_add() { local -a cmd=() while [[ $# -gt 0 ]]; do case "$1" in - --every) every="$2"; shift 2 ;; - --heal) heal="$2"; shift 2 ;; + --every) [[ $# -ge 2 ]] || { _anu_die "tend add: --every needs a value"; return 1; }; every="$2"; shift 2 ;; + --heal) [[ $# -ge 2 ]] || { _anu_die "tend add: --heal needs a value"; return 1; }; heal="$2"; shift 2 ;; --auto) auto="true"; shift ;; --) shift; cmd=("$@"); break ;; *) _anu_die "tend add: unexpected '$1'. Put the command after --, e.g. tend add $name -- make test"; return 1 ;; esac done [[ ${#cmd[@]} -eq 0 ]] && { _anu_die "tend add: no command. Usage: tend add $name -- "; return 1; } - local root repo secs f + local root repo secs f cmdq root="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" repo="$(basename "$root")" secs="$(_tend_secs "$every")" + # store the command so it round-trips through `bash -c` with quoting intact + printf -v cmdq '%q ' "${cmd[@]}"; cmdq="${cmdq% }" mkdir -p "$(_tend_dir)" f="$(_tend_file "$name")" jq -n --arg name "$name" --arg repo "$repo" --arg root "$root" \ - --arg cmd "${cmd[*]}" --argjson every "$secs" --arg heal "$heal" \ + --arg cmd "$cmdq" --argjson every "$secs" --arg heal "$heal" \ --argjson auto "$auto" --arg created "$(_tend_now)" ' {name:$name, repo:$repo, root:$root, cmd:$cmd, every:$every, heal:$heal, auto:$auto, created:$created, last_run:null, last_status:"unknown", - last_ms:0, last_output:"", history:[]}' > "$f" \ + last_ms:0, last_output:"", healing_since:null, history:[]}' > "$f" \ && echo "tend: watching '$name': $repo · ${cmd[*]} · every $(_tend_human "$secs")${heal:+ · heal=$heal}$([[ $auto == true ]] && echo ' · auto')" } @@ -99,9 +102,9 @@ _tend_add() { _tend_run_one() { local f; f="$(_tend_file "$1")" [[ -f "$f" ]] || { _anu_die "tend: no watch '$1' (see: tend)"; return 1; } - local root cmd auto heal out rc start end ms status now trimmed tmp + local root cmd auto heal every out rc start end ms status now trimmed tmp root="$(jq -r '.root' "$f")"; cmd="$(jq -r '.cmd' "$f")" - auto="$(jq -r '.auto' "$f")"; heal="$(jq -r '.heal' "$f")" + auto="$(jq -r '.auto' "$f")"; heal="$(jq -r '.heal' "$f")"; every="$(jq -r '.every' "$f")" start="$(date +%s)" out="$(cd "$root" 2>/dev/null && bash -c "$cmd" 2>&1)"; rc=$? end="$(date +%s)"; ms=$(( (end - start) * 1000 )) @@ -113,11 +116,24 @@ _tend_run_one() { .last_run=$t | .last_status=$s | .last_ms=$ms | .last_output=$out | .history = ((.history // []) + [{t:$t, status:$s, ms:$ms}] | .[-50:])' "$f" > "$tmp" && mv "$tmp" "$f" if [[ "$status" == pass ]]; then + # clear any in-flight heal marker once the check is green again + tmp="$(mktemp)"; jq '.healing_since=null' "$f" > "$tmp" && mv "$tmp" "$f" echo "tend ✓ $1 (${ms}ms)" else _anu_warn "tend ✗ $1 failed" if [[ "$auto" == "true" && -n "$heal" && "$heal" != "null" ]]; then - echo "tend: auto-healing '$1'…"; _tend_heal "$1" + # debounce: don't stack healers while one is still in flight (a fix can + # take longer than the cadence). Re-heal only after a cooldown lapses. + local hsince hage cooldown + hsince="$(jq -r '.healing_since // ""' "$f")" + hage="$(_tend_age "$hsince")" + cooldown=$(( every > 1800 ? every : 1800 )) + if [[ -n "$hsince" && "$hsince" != "null" ]] && (( hage < cooldown )); then + echo "tend: heal already in flight for '$1' ($(_tend_human "$hage") ago); not re-spawning" + else + tmp="$(mktemp)"; jq --arg t "$now" '.healing_since=$t' "$f" > "$tmp" && mv "$tmp" "$f" + echo "tend: auto-healing '$1'…"; _tend_heal "$1" + fi elif [[ -n "$heal" && "$heal" != "null" ]]; then echo "tend: run tend heal $1 to fix" fi diff --git a/plugins/atlas/skills/atlas/build.py b/plugins/atlas/skills/atlas/build.py index ecd363e..3ea3eae 100644 --- a/plugins/atlas/skills/atlas/build.py +++ b/plugins/atlas/skills/atlas/build.py @@ -20,7 +20,8 @@ def load(p): try: - return json.loads(pathlib.Path(p).read_text()) + x = json.loads(pathlib.Path(p).read_text()) + return x if isinstance(x, dict) else None except Exception: return None @@ -137,6 +138,7 @@ def add_frontier(kind, rid, title, items): for r in records: counts[r["kind"]] = counts.get(r["kind"], 0) + 1 + out.parent.mkdir(parents=True, exist_ok=True) out.write_text(json.dumps( {"counts": counts, "records": records, "frontier": frontier, "edges": edges}, indent=2)) diff --git a/plugins/tend/skills/tend/build.py b/plugins/tend/skills/tend/build.py index 0491c2f..c8958e6 100644 --- a/plugins/tend/skills/tend/build.py +++ b/plugins/tend/skills/tend/build.py @@ -14,7 +14,8 @@ def load(p): try: - return json.loads(pathlib.Path(p).read_text()) + x = json.loads(pathlib.Path(p).read_text()) + return x if isinstance(x, dict) else None except Exception: return None @@ -30,7 +31,7 @@ def main(): w = load(wf) if not w or "name" not in w: continue - hist = w.get("history") or [] + hist = [h for h in (w.get("history") or []) if isinstance(h, dict)] watches.append({ "name": w.get("name"), "repo": w.get("repo"), "cmd": w.get("cmd"), "status": w.get("last_status", "unknown"), "last_run": w.get("last_run"), @@ -45,6 +46,7 @@ def main(): "passing": sum(1 for w in watches if w["status"] == "pass"), "failing": sum(1 for w in watches if w["status"] == "fail"), } + out.parent.mkdir(parents=True, exist_ok=True) out.write_text(json.dumps({"counts": counts, "watches": watches}, indent=2)) print(out) diff --git a/tests/bash/core_test.sh b/tests/bash/core_test.sh index 4f3c48a..744a6a8 100644 --- a/tests/bash/core_test.sh +++ b/tests/bash/core_test.sh @@ -42,4 +42,13 @@ assert_contains "$hint" "agent-1" "hint lists agent-1" assert_contains "$hint" "agent-2" "hint lists agent-2" assert_eq "" "$(_swarm_agents_hint "$(mktmp)" 2>/dev/null)" "no agents → empty hint" +t_section "bin wrappers source core so the new guards resolve (the agent path)" +# Regression: bin/swarm sourced fns/swarm but not fns/core, so swarm()'s +# _anu_require guard died with "_anu_require: command not found" on every +# non-interactive call. Both wrappers must source core before their fn. +for w in swarm tend; do + out=$(ANU_PATH="$ANU_ROOT" XDG_DATA_HOME="$(mktmp)" bash "$(anu_bin "$w")" status 2>&1) + assert_not_contains "$out" "command not found" "bin/$w runs without an undefined-function error" +done + t_done diff --git a/tests/bash/tend_test.sh b/tests/bash/tend_test.sh index 782eafa..b67fbde 100644 --- a/tests/bash/tend_test.sh +++ b/tests/bash/tend_test.sh @@ -16,6 +16,8 @@ assert_eq "3600" "$(_tend_secs 1h)" "1h → 3600s" assert_eq "86400" "$(_tend_secs 1d)" "1d → 86400s" assert_eq "90" "$(_tend_secs 90s)" "90s → 90s" assert_eq "3600" "$(_tend_secs junk)" "garbage → 1h default" +assert_eq "3600" "$(_tend_secs 1.5h)" "fractional → 1h default (no arithmetic crash)" +assert_eq "3600" "$(_tend_secs 2hh)" "malformed suffix → 1h default" assert_eq "2h" "$(_tend_human 7200)" "7200s → 2h" t_section "add registers a watch JSON" @@ -63,6 +65,20 @@ html="$(cat "$(_tend_dir)/index.html")" assert_contains "$html" "broken" "watch embedded in dash" assert_not_contains "$html" "__TEND__" "placeholder replaced" +t_section "add rejects a value-less flag instead of looping forever" +( tend add hangy --every >/dev/null 2>&1 ); assert_fail $? "trailing --every fails fast (no infinite loop)" +( tend add hangy --heal >/dev/null 2>&1 ); assert_fail $? "trailing --heal fails fast" + +t_section "watch command round-trips through bash -c with quoting intact" +tend add spacey -- printf '[%s]' 'a b' >/dev/null +assert_eq "[a b]" "$(bash -c "$(jq -r '.cmd' "$(_tend_file spacey)")" 2>&1)" "a space-containing arg survives (not flattened)" + +t_section "armed --auto runs the literal heal command and marks the heal in flight" +tend add healme --heal "touch $XDG_DATA_HOME/HEALED" --auto -- false >/dev/null +tend run healme >/dev/null 2>&1 +assert_file "$XDG_DATA_HOME/HEALED" "auto --heal '' actually executed on failure" +assert_ne "null" "$(jq -r '.healing_since' "$(_tend_file healme)")" "healing_since set (debounce marker)" + t_section "tend wires through core" assert_contains "$(declare -f tend)" "_anu_require" "tend() guards jq"