From 0b489135a437c01b7fb8fc97d5169719a62086ba Mon Sep 17 00:00:00 2001 From: Rekseto Date: Tue, 16 Jun 2026 23:32:44 +0100 Subject: [PATCH 01/57] add netsim scenario for lab --- netsim/README.md | 90 ++++++++++++++++++++++++++ netsim/lab.story | 7 ++ netsim/link.sh | 24 +++++++ netsim/tasks/install-astrald/README.md | 63 ++++++++++++++++++ netsim/tasks/install-astrald/run.sh | 88 +++++++++++++++++++++++++ netsim/tasks/install-astrald/verify.sh | 31 +++++++++ 6 files changed, 303 insertions(+) create mode 100644 netsim/README.md create mode 100644 netsim/lab.story create mode 100755 netsim/link.sh create mode 100644 netsim/tasks/install-astrald/README.md create mode 100755 netsim/tasks/install-astrald/run.sh create mode 100755 netsim/tasks/install-astrald/verify.sh diff --git a/netsim/README.md b/netsim/README.md new file mode 100644 index 00000000..9745ebf9 --- /dev/null +++ b/netsim/README.md @@ -0,0 +1,90 @@ +# netsim scenarios for astrald + +Test scaffolding that drives `netsim` to build and run `astrald` on a simulated +LAN. It contains no astrald Go source and modifies none. + +`netsim` boots Ubuntu 26.04 cloud-image VMs on `10.77.0.0/24` with per-VM NAT. A +*task* is a host-side script that configures the VMs. A *story* runs a list of +tasks in one simulation and saves a named *stage*. `lab.story` builds the +`astrald-lab` stage: two nodes running astrald, with a Qwen Code operator on +`node1`. + +## Layout + +``` +netsim/ + tasks/ + install-astrald/ # custom task (see tasks/install-astrald/README.md) + run.sh # installs on target VMs (all running VMs by default) + verify.sh # independent re-check: service active + node answers + README.md # task reference: usage, execution model, build facts + lab.story # full lab in one simulation -> stage astrald-lab + link.sh # register tasks with netsim (idempotent; re-run anytime) + README.md +``` + +## Registering tasks + +`netsim` discovers tasks only under `~/.local/share/netsim/tasks/`. `link.sh` +symlinks every task under `tasks/` — each folder containing a `run.sh` — there. +It is idempotent; re-run it after adding a task. The symlinks leave netsim's +shipped builtins intact. + +```sh +./netsim/link.sh +netsim tasks # confirm: install-astrald is listed as a user task +``` + +## Lab + +`lab.story` builds the full lab in one simulation: two nodes running astrald and +a Qwen Code operator on `node1`. + +``` +# lab.story — the astrald lab, built in one netsim simulation. +# Result: a single stage with two nodes running astrald and a Qwen Code +# operator installed on node1. +add-vm --hostname node1 +add-vm --hostname node2 +install-astrald +install-qwen-code --vm node1 --create-user +``` + +A story is a plain-text file with one `task [args...]` per line, shell-style +quoting, and `#` for full-line or trailing comments. `netsim story` boots one +simulation, runs the listed tasks in order in the same VMs, and saves a single +stage at the end. It stops at the first failing task. Order is significant: + +* `add-vm --hostname node1` and `add-vm --hostname node2` use the `add-vm` + builtin; they create the two plain Ubuntu VMs on the LAN. +* `install-astrald` is the [custom task](tasks/install-astrald/README.md); with no + `--vm` it installs and runs astrald on every running VM, so on both nodes. It + runs `run.sh` then `verify.sh` and fails the story unless every node's service + is active and answers `astral-query localnode:.spec`. +* `install-qwen-code --vm node1 --create-user` uses the `install-qwen-code` + builtin; it installs the Qwen Code CLI on `node1` and points it at the + inference endpoint. The builtin installs for user `tester`, which does not + exist on a fresh cloud image, so `--create-user` is required. `node2` stays a + plain astrald peer. + +Both VMs must exist and run before `install-astrald`, and astrald must be present +before the Qwen Code operator is layered on `node1`. + +Register the custom task once (see [Registering tasks](#registering-tasks)), +then build the lab: + +```sh +./netsim/link.sh +netsim story --stage null --save astrald-lab netsim/lab.story +``` + +The result is the stage `astrald-lab`: `node1` and `node2` running astrald, Qwen +Code on `node1`. Re-enter it with `netsim shell --stage astrald-lab`. + +## Scope + +v1 installs and runs astrald on each node as two independent nodes. Linking the +nodes and verifying a live session is a later phase. + +Fresh nodes broadcast on UDP 8822 through the `ether` and `nearby` modules and +discover each other on a shared L2 LAN. v1 asserts nothing about discovery. diff --git a/netsim/lab.story b/netsim/lab.story new file mode 100644 index 00000000..c5691fbb --- /dev/null +++ b/netsim/lab.story @@ -0,0 +1,7 @@ +# lab.story — the astrald lab, built in one netsim simulation. +# Result: a single stage with two nodes running astrald and a Qwen Code +# operator installed on node1. +add-vm --hostname node1 +add-vm --hostname node2 +install-astrald +install-qwen-code --vm node1 --create-user diff --git a/netsim/link.sh b/netsim/link.sh new file mode 100755 index 00000000..3828e10a --- /dev/null +++ b/netsim/link.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# link.sh — register every task under tasks/ as a netsim user task. +# netsim only discovers tasks in ~/.local/share/netsim/tasks/, so symlink each +# task dir (each folder under tasks/ with a run.sh) there. Idempotent; re-run anytime. +set -eu + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +repo=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +dest="${NETSIM_HOME:-$HOME/.local/share/netsim}/tasks" +mkdir -p "$dest" + +found=0 +# a "task" = any folder under tasks/ that contains a run.sh +for rs in "$repo"/tasks/*/run.sh; do + [ -f "$rs" ] || continue + d=$(dirname "$rs") + ln -sfn "$d" "$dest/$(basename "$d")" + echo "linked $(basename "$d")" + found=$((found + 1)) +done + +[ "$found" -gt 0 ] || { echo "no tasks (folders with run.sh) found in $repo/tasks" >&2; exit 1; } +echo "done: $found task(s) registered — run 'netsim tasks' to confirm" diff --git a/netsim/tasks/install-astrald/README.md b/netsim/tasks/install-astrald/README.md new file mode 100644 index 00000000..49b8a36e --- /dev/null +++ b/netsim/tasks/install-astrald/README.md @@ -0,0 +1,63 @@ +# install-astrald + +A netsim task that builds `astrald` from source and runs it as a systemd service +on target VMs. `run.sh` installs; `verify.sh` re-checks independently. + +``` +install-astrald [--vm ]... [--ref ] +``` + +* No `--vm`: every running VM in the simulation, derived from + `netsim vm ls --json`. +* `--vm ` (repeatable): restrict to the named hosts. +* `--ref `: build a branch or tag via a shallow `--branch` clone instead + of the default branch. + +Each target receives, in one ssh call: `git` and `curl` ensured, Go from the +official tarball, `astrald` and `astral-query` built to `/usr/local/bin`, a +systemd unit installed and started with `systemctl enable --now`, and a +self-check. + +Use the task in a story (see the [netsim README](../../README.md#lab)), or run it +standalone against an existing stage with +`netsim task --stage --save install-astrald`. + +## Execution model + +`run.sh` and `verify.sh` run on the host, with the simulation root as the working +directory. They reach each guest with `netsim ssh -- ` and land as +`root`. + +Everything after `--` is one argv element; ssh joins argv with spaces and the +guest shell re-parses it. The whole remote program is sent as a single string: +parameters as an assignment prefix, the body in a single-quoted heredoc +(`<<'EOS'`) so host-side `$...` reach the guest unexpanded. + +```sh +netsim ssh "$vm" -- "repo='$REPO' ref='$REF' go_ver='$GO_VERSION'; $REMOTE_BODY" +``` + +## Build and run facts + +* Go is installed from the official tarball. astrald's `go.mod` requires + `go >= 1.25.0`; the apt package is older. The download is arch-aware + (`x86_64`→`amd64`, `aarch64`→`arm64`). +* The clone is `git clone --depth 1` over HTTPS, never `--recursive`. The only + submodule (`.ai/system`) is an SSH-only docs repo and is not needed for the + build. +* The build sets `CGO_ENABLED=0`. astrald uses pure-Go SQLite and needs no C + toolchain. +* Build targets carry the `./` prefix: `go build -o /usr/local/bin/astrald + ./cmd/astrald`, and the same for `./cmd/astral-query`. `go build cmd/astrald` + fails; `go build .` at the repo root builds a do-nothing stub. +* The service runs `astrald -root /var/lib/astrald` with `Environment=HOME=/root`. + Default config and data paths derive from `$HOME`, which systemd does not set. +* First start auto-generates the node identity, a `secp256k1` key at + `/var/lib/astrald/config/node_key`, with no prompt and no TTY. +* The liveness probe is `astral-query localnode:.spec`. The op is built-in and + always available; it streams the node's operation spec over the local apphost + API (`tcp:127.0.0.1:8625`, anonymous access by default). Exit code 0 means + healthy. +* apt calls pass `-o DPkg::Lock::Timeout=120`; `cloud-init` can hold the dpkg + lock on a fresh boot. Readiness is never gated on `ping`; guest ICMP is + disabled. diff --git a/netsim/tasks/install-astrald/run.sh b/netsim/tasks/install-astrald/run.sh new file mode 100755 index 00000000..f89df448 --- /dev/null +++ b/netsim/tasks/install-astrald/run.sh @@ -0,0 +1,88 @@ +#!/bin/sh +# install-astrald: build astrald from source, run it as a systemd service on VMs. +# install-astrald [--vm ]... [--ref ] +# No --vm -> every running VM in the simulation. +# +# Runs ON THE HOST (cwd = simulation root). Reaches each VM with a single +# `netsim ssh -- ` call: the whole remote script travels as ONE +# argument (assignment prefix + single-quoted heredoc body, so host-side $... are +# left for the guest to expand). ssh lands as root on the guest. +set -eu +REPO="https://github.com/cryptopunkscc/astrald" +GO_VERSION="1.25.1" # must be >= 1.25.0 (astrald go.mod); pin to current 1.25.x +REF="" + +VMS="" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VMS="${VMS:+$VMS }$2"; shift 2 ;; + --ref) [ $# -ge 2 ] || { echo "need ref after --ref" >&2; exit 64; }; REF=$2; shift 2 ;; + *) echo "usage: install-astrald [--vm ]... [--ref ]" >&2; exit 64 ;; + esac +done +if [ -z "$VMS" ]; then + VMS=$(netsim vm ls --json | python3 -c \ + 'import json,sys; print(" ".join(v["hostname"] for v in json.load(sys.stdin) if v["state"]=="running"))') +fi +[ -n "$VMS" ] || { echo "no running VMs" >&2; exit 1; } + +REMOTE_BODY=$(cat <<'EOS' +set -eu +export DEBIAN_FRONTEND=noninteractive + +# deps: git + curl (Go comes from the official tarball, not apt -> need >= 1.25) +need=""; command -v git >/dev/null 2>&1 || need="$need git" + command -v curl >/dev/null 2>&1 || need="$need curl" +if [ -n "$need" ]; then + apt-get -qq -o DPkg::Lock::Timeout=120 update + apt-get -qq -y -o DPkg::Lock::Timeout=120 install $need ca-certificates >/dev/null +fi +if ! /usr/local/go/bin/go version 2>/dev/null | grep -q "go$go_ver "; then + case "$(uname -m)" in + x86_64) ga=amd64 ;; aarch64) ga=arm64 ;; + *) echo "unsupported arch $(uname -m)" >&2; exit 1 ;; + esac + t=$(mktemp); curl -fsSL -o "$t" "https://go.dev/dl/go${go_ver}.linux-${ga}.tar.gz" + rm -rf /usr/local/go; tar -C /usr/local -xzf "$t"; rm -f "$t" +fi +export PATH=/usr/local/go/bin:$PATH CGO_ENABLED=0 + +# build (plain clone, NO --recursive; subpackages need the ./ prefix) +src=/opt/astrald-src +[ -d "$src/.git" ] || git clone --depth 1 ${ref:+--branch "$ref"} "$repo" "$src" +cd "$src" +go build -o /usr/local/bin/astrald ./cmd/astrald +go build -o /usr/local/bin/astral-query ./cmd/astral-query + +# run as a service: explicit -root and HOME (default paths break without HOME) +install -d -m 700 /var/lib/astrald +cat > /etc/systemd/system/astrald.service </dev/null 2>&1 +echo "astrald healthy on $(hostname)" +EOS +) + +# $VMS is a space-separated list -> intentional word-splitting +# shellcheck disable=SC2086 +for vm in $VMS; do + echo "installing astrald on $vm ..." + netsim ssh "$vm" -- "repo='$REPO' ref='$REF' go_ver='$GO_VERSION'; $REMOTE_BODY" +done diff --git a/netsim/tasks/install-astrald/verify.sh b/netsim/tasks/install-astrald/verify.sh new file mode 100755 index 00000000..e2bf5769 --- /dev/null +++ b/netsim/tasks/install-astrald/verify.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# verify install-astrald (same args as run.sh): on every target VM the astrald +# service must be active AND the node must answer its local API. This is an +# INDEPENDENT re-check -- it re-derives the VM list and re-probes the node; it +# does not trust run.sh's output. +set -eu +VMS="" +while [ $# -gt 0 ]; do + case "$1" in + --vm) VMS="${VMS:+$VMS }$2"; shift 2 ;; + --ref) shift 2 ;; + *) shift ;; + esac +done +if [ -z "$VMS" ]; then + VMS=$(netsim vm ls --json | python3 -c \ + 'import json,sys; print(" ".join(v["hostname"] for v in json.load(sys.stdin) if v["state"]=="running"))') +fi +[ -n "$VMS" ] || { echo "no running VMs to verify" >&2; exit 1; } + +# $VMS is a space-separated list -> intentional word-splitting +# shellcheck disable=SC2086 +for vm in $VMS; do + # single-quoted: $(hostname) must expand on the guest, not the host + # shellcheck disable=SC2016 + netsim ssh "$vm" -- 'systemctl is-active --quiet astrald \ + && timeout 5 astral-query localnode:.spec -out json >/dev/null 2>&1 \ + && echo "$(hostname): astrald healthy"' \ + || { echo "astrald NOT healthy on $vm" >&2; exit 1; } +done +echo "verified astrald on: $VMS" From 5533a1a7cba64ede5fb27896b1164320b4da0ac7 Mon Sep 17 00:00:00 2001 From: Rekseto Date: Tue, 16 Jun 2026 23:59:37 +0100 Subject: [PATCH 02/57] netsim: adjust install-astrald script; docs: add running guide; --- docs/running-as-a-service.md | 81 ++++++++++++++++++++++++++ netsim/README.md | 7 ++- netsim/tasks/install-astrald/README.md | 16 +++-- netsim/tasks/install-astrald/run.sh | 28 +++++---- netsim/tasks/install-astrald/verify.sh | 29 ++++++--- 5 files changed, 135 insertions(+), 26 deletions(-) create mode 100644 docs/running-as-a-service.md diff --git a/docs/running-as-a-service.md b/docs/running-as-a-service.md new file mode 100644 index 00000000..644d89f2 --- /dev/null +++ b/docs/running-as-a-service.md @@ -0,0 +1,81 @@ +# Running astrald as a service + +`astrald` is a long-running daemon. Run it under systemd on Linux. + +## Build + +```shell +CGO_ENABLED=0 go build -o /usr/local/bin/astrald ./cmd/astrald +CGO_ENABLED=0 go build -o /usr/local/bin/astral-query ./cmd/astral-query +``` + +Go >= 1.25.0 is required. astrald uses pure-Go SQLite, so `CGO_ENABLED=0` builds a +static binary. The `./` prefix is required; `go build .` at the repo root builds +an empty stub. + +## Root directory + +astrald stores config, identity, and data under a root directory derived from +`$HOME`. A systemd service has no `$HOME`. Pass `-root ` to set the root +explicitly, or set `Environment=HOME=`. The first start generates the node +identity — a `secp256k1` key at `/config/node_key` — with no interaction. + +## Unit + +`/etc/systemd/system/astrald.service`: + +```ini +[Unit] +Description=astral daemon + +[Service] +ExecStart=/usr/local/bin/astrald -root /var/lib/astrald +Environment=HOME=/root +Restart=on-failure +KillSignal=SIGINT + +[Install] +WantedBy=multi-user.target +``` + +`Type=simple` is the systemd default and is omitted. astrald traps `SIGINT`, not +`SIGTERM`; `KillSignal=SIGINT` makes `systemctl stop` shut it down gracefully. + +```shell +systemctl enable --now astrald +``` + +## Health check + +```shell +astral-query localnode:.spec +``` + +The local API listens on `tcp:127.0.0.1:8625` with anonymous access. `.spec` is a +built-in, always-available op. Exit code 0 means the node is up. + +## Ports + +Default transports bind all interfaces. + +| Port | Proto | Purpose | +|---|---|---| +| 1791 | TCP | node links | +| 1792 | UDP | KCP transport | +| 1791 | UDP | UTP transport | +| 8822 | UDP | `ether` LAN discovery | +| 8625 | TCP 127.0.0.1 | local apphost API | +| 8624 | TCP 0.0.0.0 | apphost HTTP API | + +## Imaging and snapshots + +Stop astrald before capturing a VM image or live snapshot; leave the unit enabled. + +```shell +systemctl enable astrald +systemctl stop astrald +``` + +A running daemon dirties memory continuously and can stall a live RAM snapshot. +The enabled unit autostarts astrald on boot. The identity at +`/config/node_key` persists across the capture. diff --git a/netsim/README.md b/netsim/README.md index 9745ebf9..e028ed7e 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -58,9 +58,10 @@ stage at the end. It stops at the first failing task. Order is significant: * `add-vm --hostname node1` and `add-vm --hostname node2` use the `add-vm` builtin; they create the two plain Ubuntu VMs on the LAN. * `install-astrald` is the [custom task](tasks/install-astrald/README.md); with no - `--vm` it installs and runs astrald on every running VM, so on both nodes. It - runs `run.sh` then `verify.sh` and fails the story unless every node's service - is active and answers `astral-query localnode:.spec`. + `--vm` it installs astrald on every running VM, so on both nodes. It runs + `run.sh` then `verify.sh` and fails the story unless astrald builds, starts, and + answers `astral-query localnode:.spec` on every node. The service is left + enabled but stopped, so it autostarts when the stage boots. * `install-qwen-code --vm node1 --create-user` uses the `install-qwen-code` builtin; it installs the Qwen Code CLI on `node1` and points it at the inference endpoint. The builtin installs for user `tester`, which does not diff --git a/netsim/tasks/install-astrald/README.md b/netsim/tasks/install-astrald/README.md index 49b8a36e..d10609b4 100644 --- a/netsim/tasks/install-astrald/README.md +++ b/netsim/tasks/install-astrald/README.md @@ -1,7 +1,11 @@ # install-astrald -A netsim task that builds `astrald` from source and runs it as a systemd service -on target VMs. `run.sh` installs; `verify.sh` re-checks independently. +A netsim task that builds `astrald` from source and installs it as a systemd +service on target VMs. `run.sh` builds, installs, and enables the unit; +`verify.sh` independently confirms the node answers. The service is left enabled +but stopped, so the netsim stage snapshots cleanly and astrald autostarts when +the stage boots. See [Running astrald as a service](../../../docs/running-as-a-service.md) +for the unit file and operational details. ``` install-astrald [--vm ]... [--ref ] @@ -14,9 +18,9 @@ install-astrald [--vm ]... [--ref ] of the default branch. Each target receives, in one ssh call: `git` and `curl` ensured, Go from the -official tarball, `astrald` and `astral-query` built to `/usr/local/bin`, a -systemd unit installed and started with `systemctl enable --now`, and a -self-check. +official tarball, `astrald` and `astral-query` built to `/usr/local/bin`, and a +systemd unit installed and enabled. astrald is started briefly to confirm it +answers `astral-query localnode:.spec`, then stopped for snapshotting. Use the task in a story (see the [netsim README](../../README.md#lab)), or run it standalone against an existing stage with @@ -52,6 +56,8 @@ netsim ssh "$vm" -- "repo='$REPO' ref='$REF' go_ver='$GO_VERSION'; $REMOTE_BODY" fails; `go build .` at the repo root builds a do-nothing stub. * The service runs `astrald -root /var/lib/astrald` with `Environment=HOME=/root`. Default config and data paths derive from `$HOME`, which systemd does not set. + The unit sets `KillSignal=SIGINT` so `systemctl stop` shuts astrald down + gracefully (astrald traps SIGINT, not SIGTERM). * First start auto-generates the node identity, a `secp256k1` key at `/var/lib/astrald/config/node_key`, with no prompt and no TTY. * The liveness probe is `astral-query localnode:.spec`. The op is built-in and diff --git a/netsim/tasks/install-astrald/run.sh b/netsim/tasks/install-astrald/run.sh index f89df448..d279db9a 100755 --- a/netsim/tasks/install-astrald/run.sh +++ b/netsim/tasks/install-astrald/run.sh @@ -1,5 +1,5 @@ #!/bin/sh -# install-astrald: build astrald from source, run it as a systemd service on VMs. +# install-astrald: build astrald from source, install it as a systemd service on VMs. # install-astrald [--vm ]... [--ref ] # No --vm -> every running VM in the simulation. # @@ -59,24 +59,32 @@ install -d -m 700 /var/lib/astrald cat > /etc/systemd/system/astrald.service </dev/null 2>&1 -echo "astrald healthy on $(hostname)" +# confirm it built AND runs: wait for the apphost listener, then probe the API +ok= +for _ in 1 2 3 4 5 6 7 8 9 10; do + if systemctl is-active --quiet astrald && timeout 5 astral-query localnode:.spec -out json >/dev/null 2>&1; then + ok=1; break + fi + sleep 1 +done +[ -n "$ok" ] || { echo "astrald did not come up on $(hostname)" >&2; exit 1; } + +# stop it so netsim snapshots an idle guest; the unit stays enabled and +# autostarts when the stage boots. a running daemon keeps dirtying RAM and can +# stall the live snapshot (the qmp timeout). +systemctl stop astrald +echo "astrald installed and verified; enabled, stopped for snapshot on $(hostname)" EOS ) diff --git a/netsim/tasks/install-astrald/verify.sh b/netsim/tasks/install-astrald/verify.sh index e2bf5769..64176898 100755 --- a/netsim/tasks/install-astrald/verify.sh +++ b/netsim/tasks/install-astrald/verify.sh @@ -1,8 +1,8 @@ #!/bin/sh # verify install-astrald (same args as run.sh): on every target VM the astrald -# service must be active AND the node must answer its local API. This is an -# INDEPENDENT re-check -- it re-derives the VM list and re-probes the node; it -# does not trust run.sh's output. +# unit must be enabled and, once started, answer its local API. INDEPENDENT +# re-check -- it re-derives the VM list, starts the (snapshot-idle) service, +# probes it, and stops it again; it does not trust run.sh's output. set -eu VMS="" while [ $# -gt 0 ]; do @@ -18,14 +18,27 @@ if [ -z "$VMS" ]; then fi [ -n "$VMS" ] || { echo "no running VMs to verify" >&2; exit 1; } +REMOTE_CHECK=$(cat <<'EOS' +set -eu +systemctl is-enabled --quiet astrald +systemctl start astrald +ok= +for _ in 1 2 3 4 5 6 7 8 9 10; do + if systemctl is-active --quiet astrald && timeout 5 astral-query localnode:.spec -out json >/dev/null 2>&1; then + ok=1; break + fi + sleep 1 +done +systemctl stop astrald +[ -n "$ok" ] || { echo "astrald did not answer on $(hostname)" >&2; exit 1; } +echo "$(hostname): astrald healthy" +EOS +) + # $VMS is a space-separated list -> intentional word-splitting # shellcheck disable=SC2086 for vm in $VMS; do - # single-quoted: $(hostname) must expand on the guest, not the host - # shellcheck disable=SC2016 - netsim ssh "$vm" -- 'systemctl is-active --quiet astrald \ - && timeout 5 astral-query localnode:.spec -out json >/dev/null 2>&1 \ - && echo "$(hostname): astrald healthy"' \ + netsim ssh "$vm" -- "$REMOTE_CHECK" \ || { echo "astrald NOT healthy on $vm" >&2; exit 1; } done echo "verified astrald on: $VMS" From c6f29cf8abf348c9149d701af1c9cbb37023ff49 Mon Sep 17 00:00:00 2001 From: Rekseto Date: Wed, 17 Jun 2026 10:57:56 +0100 Subject: [PATCH 03/57] docs: expand running-as-a-service.md doc; --- docs/running-as-a-service.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/running-as-a-service.md b/docs/running-as-a-service.md index 644d89f2..ce3ca284 100644 --- a/docs/running-as-a-service.md +++ b/docs/running-as-a-service.md @@ -45,6 +45,13 @@ WantedBy=multi-user.target systemctl enable --now astrald ``` +This unit runs astrald as root — the simplest setup. To run it as your own user +instead, install it as a user service: place the unit at +`~/.config/systemd/user/astrald.service`, drop `Environment=HOME=` and the `-root` +flag (config and data then default to `~/.config/astrald` and +`~/.local/share/astrald`), and run `systemctl --user enable --now astrald`. +`loginctl enable-linger $USER` keeps it running without an active login session. + ## Health check ```shell From 87deced6945816b88766cdc287ffe4b257df8f15 Mon Sep 17 00:00:00 2001 From: Rekseto Date: Wed, 17 Jun 2026 13:49:06 +0100 Subject: [PATCH 04/57] netsim: keep astrald insance running --- netsim/README.md | 3 ++- netsim/tasks/install-astrald/README.md | 8 ++++---- netsim/tasks/install-astrald/run.sh | 10 +++++----- netsim/tasks/install-astrald/verify.sh | 7 +++---- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/netsim/README.md b/netsim/README.md index e028ed7e..9e4b90b1 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -61,7 +61,8 @@ stage at the end. It stops at the first failing task. Order is significant: `--vm` it installs astrald on every running VM, so on both nodes. It runs `run.sh` then `verify.sh` and fails the story unless astrald builds, starts, and answers `astral-query localnode:.spec` on every node. The service is left - enabled but stopped, so it autostarts when the stage boots. + enabled and running, so the stage snapshots a live node that resumes + already-running on restore. * `install-qwen-code --vm node1 --create-user` uses the `install-qwen-code` builtin; it installs the Qwen Code CLI on `node1` and points it at the inference endpoint. The builtin installs for user `tester`, which does not diff --git a/netsim/tasks/install-astrald/README.md b/netsim/tasks/install-astrald/README.md index d10609b4..f883f155 100644 --- a/netsim/tasks/install-astrald/README.md +++ b/netsim/tasks/install-astrald/README.md @@ -3,8 +3,8 @@ A netsim task that builds `astrald` from source and installs it as a systemd service on target VMs. `run.sh` builds, installs, and enables the unit; `verify.sh` independently confirms the node answers. The service is left enabled -but stopped, so the netsim stage snapshots cleanly and astrald autostarts when -the stage boots. See [Running astrald as a service](../../../docs/running-as-a-service.md) +and running, so the netsim stage snapshots a live node that resumes already-running +on restore. See [Running astrald as a service](../../../docs/running-as-a-service.md) for the unit file and operational details. ``` @@ -19,8 +19,8 @@ install-astrald [--vm ]... [--ref ] Each target receives, in one ssh call: `git` and `curl` ensured, Go from the official tarball, `astrald` and `astral-query` built to `/usr/local/bin`, and a -systemd unit installed and enabled. astrald is started briefly to confirm it -answers `astral-query localnode:.spec`, then stopped for snapshotting. +systemd unit installed and enabled. astrald is started and confirmed to answer +`astral-query localnode:.spec`, then left running for snapshotting. Use the task in a story (see the [netsim README](../../README.md#lab)), or run it standalone against an existing stage with diff --git a/netsim/tasks/install-astrald/run.sh b/netsim/tasks/install-astrald/run.sh index d279db9a..78a17f4e 100755 --- a/netsim/tasks/install-astrald/run.sh +++ b/netsim/tasks/install-astrald/run.sh @@ -80,11 +80,11 @@ for _ in 1 2 3 4 5 6 7 8 9 10; do done [ -n "$ok" ] || { echo "astrald did not come up on $(hostname)" >&2; exit 1; } -# stop it so netsim snapshots an idle guest; the unit stays enabled and -# autostarts when the stage boots. a running daemon keeps dirtying RAM and can -# stall the live snapshot (the qmp timeout). -systemctl stop astrald -echo "astrald installed and verified; enabled, stopped for snapshot on $(hostname)" +# leave astrald running: netsim snapshots live RAM, so the node resumes +# already-running when the stage is restored (a stopped service would not +# restart, as resume is not a boot). astrald's footprint is tiny (~17 MB peak), +# so it does not stall the live snapshot against a sane qmp timeout. +echo "astrald installed, verified, and left running on $(hostname)" EOS ) diff --git a/netsim/tasks/install-astrald/verify.sh b/netsim/tasks/install-astrald/verify.sh index 64176898..951d3618 100755 --- a/netsim/tasks/install-astrald/verify.sh +++ b/netsim/tasks/install-astrald/verify.sh @@ -1,8 +1,8 @@ #!/bin/sh # verify install-astrald (same args as run.sh): on every target VM the astrald -# unit must be enabled and, once started, answer its local API. INDEPENDENT -# re-check -- it re-derives the VM list, starts the (snapshot-idle) service, -# probes it, and stops it again; it does not trust run.sh's output. +# unit must be enabled and answer its local API. INDEPENDENT re-check -- it +# re-derives the VM list, ensures the service is running, probes it, and leaves +# it running for the snapshot; it does not trust run.sh's output. set -eu VMS="" while [ $# -gt 0 ]; do @@ -29,7 +29,6 @@ for _ in 1 2 3 4 5 6 7 8 9 10; do fi sleep 1 done -systemctl stop astrald [ -n "$ok" ] || { echo "astrald did not answer on $(hostname)" >&2; exit 1; } echo "$(hostname): astrald healthy" EOS From 6a2ba50bdca1c0f10d317d5001c8f5f6eae425be Mon Sep 17 00:00:00 2001 From: intern0 Date: Wed, 17 Jun 2026 18:12:06 +0200 Subject: [PATCH 05/57] netsim: add configure-astral-agent task; wire into lab.story Install the astral-agent skill into the Qwen Code operator. The netsim host owns a deploy key (SATFORGE_SKILLS_DEPLOY_KEY); run.sh injects it into the VM, which clones the private satforgedev/skills repo, builds the satforge-skills linker (Go already present from install-astrald), and runs `link astral-agent --target qwen` -> ~/.qwen/skills/astral-agent. Folded into lab.story after install-qwen-code; documented in the task README (one-time deploy-key setup) and netsim/README. Co-Authored-By: Claude Opus 4.8 (1M context) --- netsim/README.md | 29 ++++--- netsim/lab.story | 3 +- netsim/tasks/configure-astral-agent/README.md | 81 +++++++++++++++++++ netsim/tasks/configure-astral-agent/run.sh | 79 ++++++++++++++++++ netsim/tasks/configure-astral-agent/verify.sh | 34 ++++++++ 5 files changed, 214 insertions(+), 12 deletions(-) create mode 100644 netsim/tasks/configure-astral-agent/README.md create mode 100755 netsim/tasks/configure-astral-agent/run.sh create mode 100755 netsim/tasks/configure-astral-agent/verify.sh diff --git a/netsim/README.md b/netsim/README.md index 9e4b90b1..dee21f62 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -14,10 +14,9 @@ tasks in one simulation and saves a named *stage*. `lab.story` builds the ``` netsim/ tasks/ - install-astrald/ # custom task (see tasks/install-astrald/README.md) - run.sh # installs on target VMs (all running VMs by default) - verify.sh # independent re-check: service active + node answers - README.md # task reference: usage, execution model, build facts + install-astrald/ # build + run astrald as a service (tasks/install-astrald/README.md) + configure-astral-agent/ # install the astral-agent skill into the qwen operator + run.sh / verify.sh / README.md # each task: installs on target VMs + independent re-check lab.story # full lab in one simulation -> stage astrald-lab link.sh # register tasks with netsim (idempotent; re-run anytime) README.md @@ -38,16 +37,17 @@ netsim tasks # confirm: install-astrald is listed as a user task ## Lab `lab.story` builds the full lab in one simulation: two nodes running astrald and -a Qwen Code operator on `node1`. +a Qwen Code operator on `node1`, equipped with the `astral-agent` skill. ``` # lab.story — the astrald lab, built in one netsim simulation. # Result: a single stage with two nodes running astrald and a Qwen Code -# operator installed on node1. +# operator on node1, equipped with the astral-agent skill. add-vm --hostname node1 add-vm --hostname node2 install-astrald install-qwen-code --vm node1 --create-user +configure-astral-agent --vm node1 ``` A story is a plain-text file with one `task [args...]` per line, shell-style @@ -68,20 +68,27 @@ stage at the end. It stops at the first failing task. Order is significant: inference endpoint. The builtin installs for user `tester`, which does not exist on a fresh cloud image, so `--create-user` is required. `node2` stays a plain astrald peer. +* `configure-astral-agent --vm node1` is a [custom task](tasks/configure-astral-agent/README.md); + it installs the `astral-agent` skill into the Qwen Code operator so it can drive + astrald from the skill's knowledge. The host must have `SATFORGE_SKILLS_DEPLOY_KEY` + set (a deploy key for the private skills repo) — see its README. -Both VMs must exist and run before `install-astrald`, and astrald must be present -before the Qwen Code operator is layered on `node1`. +Both VMs must exist and run before `install-astrald`, astrald must be present +before the Qwen Code operator is layered on `node1`, and the operator must exist +before its skill is configured. -Register the custom task once (see [Registering tasks](#registering-tasks)), +Register the custom tasks once (see [Registering tasks](#registering-tasks)), then build the lab: ```sh ./netsim/link.sh +export SATFORGE_SKILLS_DEPLOY_KEY=~/.ssh/satforge_skills_deploy # see tasks/configure-astral-agent netsim story --stage null --save astrald-lab netsim/lab.story ``` -The result is the stage `astrald-lab`: `node1` and `node2` running astrald, Qwen -Code on `node1`. Re-enter it with `netsim shell --stage astrald-lab`. +The result is the stage `astrald-lab`: `node1` and `node2` running astrald, with a +Qwen Code operator on `node1` equipped with the `astral-agent` skill. Re-enter it +with `netsim shell --stage astrald-lab`. ## Scope diff --git a/netsim/lab.story b/netsim/lab.story index c5691fbb..44c7de66 100644 --- a/netsim/lab.story +++ b/netsim/lab.story @@ -1,7 +1,8 @@ # lab.story — the astrald lab, built in one netsim simulation. # Result: a single stage with two nodes running astrald and a Qwen Code -# operator installed on node1. +# operator on node1, equipped with the astral-agent skill. add-vm --hostname node1 add-vm --hostname node2 install-astrald install-qwen-code --vm node1 --create-user +configure-astral-agent --vm node1 diff --git a/netsim/tasks/configure-astral-agent/README.md b/netsim/tasks/configure-astral-agent/README.md new file mode 100644 index 00000000..1c65b3fa --- /dev/null +++ b/netsim/tasks/configure-astral-agent/README.md @@ -0,0 +1,81 @@ +# configure-astral-agent + +A netsim task that installs the `astral-agent` skill into the Qwen Code operator +on a VM, so the operator can drive astrald from the skill's knowledge (the +astral-docs corpus + playbooks) instead of having every procedure spelled out in +each task prompt. + +``` +configure-astral-agent [--vm ] [--user ] # default: node1, tester +``` + +After it runs, `~/.qwen/skills/astral-agent` exists (SKILL.md with +frontmatter, `references/`, and the `astral-docs` mount). Run standalone against +the lab stage with: + +```sh +SATFORGE_SKILLS_DEPLOY_KEY=~/.ssh/satforge_skills_deploy \ + netsim task --stage astrald-lab --save astrald-operator configure-astral-agent +``` + +## Setup (one-time, on the netsim host) + +The host running the sims must own a deploy key for the private repo: + +```sh +# 1. generate a keypair (keep the private half on the host) +ssh-keygen -t ed25519 -f ~/.ssh/satforge_skills_deploy -N '' -C netsim-skills-deploy + +# 2. register the PUBLIC half on GitHub: +# satforgedev/skills -> Settings -> Deploy keys -> Add -> paste +# ~/.ssh/satforge_skills_deploy.pub (read-only is enough) + +# 3. point the env at the PRIVATE key (export it, or prefix each netsim run) +export SATFORGE_SKILLS_DEPLOY_KEY=~/.ssh/satforge_skills_deploy +``` + +`SATFORGE_SKILLS_DEPLOY_KEY` is a **path to the private deploy-key file**. netsim +runs this task as a subprocess and passes the env through, so exporting it once +covers every `netsim story` / `netsim task` invocation. + +## How it works — deploy key, clone in the VM + +`satforgedev/skills` is **private**, so the **host** owns the deploy key and the +VM never carries GitHub credentials of its own. `run.sh` (host) reads the private +key from `$SATFORGE_SKILLS_DEPLOY_KEY` and base64-ships it into the VM over one +`netsim ssh` argv. The guest then, as the operator: + +1. installs the key at `~/.ssh/skills_deploy` and clones + `git@github.com:satforgedev/skills` via `GIT_SSH_COMMAND` (parent repo over + SSH/deploy-key; the `astral-docs` submodule is public HTTPS — no key needed); +2. builds the `satforge-skills` linker (Go is already on the node from + `install-astrald`); +3. `satforge-skills link astral-agent --target qwen` → installs into + `~/.qwen/skills/astral-agent` (Qwen Code reads `SKILL.md`, frontmatter intact, + from there). The clone stays in `~/satforge-skills`, so the install's symlinks + resolve and the operator can re-link/pull other skills later. + +Idempotent: re-running `git pull`s the default branch, `unlink`s, then `link`s again. + +## Environment + +| Var | Default | Meaning | +|---|---|---| +| `SATFORGE_SKILLS_DEPLOY_KEY` | *(required)* | host path to the private deploy key for the repo | +| `SATFORGE_SKILLS_REPO` | `git@github.com:satforgedev/skills` | repo SSH URL (clones the default branch) | + +## Security note + +For now the deploy key is **left in the VM** (and therefore in the saved +snapshot) — simplest, and lets the operator re-pull skills. This is a private key +inside a shareable stage; we may switch to wiping it before the snapshot (inject +→ clone/build/link → remove key) if that exposure matters. See the `NOTE` in +`run.sh`. + +If outbound SSH:22 is ever blocked in the sim, point `SATFORGE_SKILLS_REPO` at +`ssh://git@ssh.github.com:443/satforgedev/skills`. + +## Scope + +Installs exactly one skill (`astral-agent`). `node2` is untouched — only the +operator node needs it. diff --git a/netsim/tasks/configure-astral-agent/run.sh b/netsim/tasks/configure-astral-agent/run.sh new file mode 100755 index 00000000..55ab77c4 --- /dev/null +++ b/netsim/tasks/configure-astral-agent/run.sh @@ -0,0 +1,79 @@ +#!/bin/sh +# configure-astral-agent: install the astral-agent skill into the Qwen Code +# operator by having the VM clone the (private) satforgedev/skills repo with an +# injected deploy key and run the linker itself. +# configure-astral-agent [--vm ] [--user ] +# Default: --vm node1 --user tester (the operator created by install-qwen-code). +# +# The HOST owns the deploy key; the VM never needs GitHub credentials of its own. +# run.sh reads the private key path from $SATFORGE_SKILLS_DEPLOY_KEY, base64-ships +# it in over a single `netsim ssh` argv, and the guest then: +# 1. installs the key for the operator and clones +# git@github.com:satforgedev/skills (parent over SSH via the deploy key; +# the astral-docs submodule is public HTTPS, so it needs no key), +# 2. builds the satforge-skills linker (Go is already on the node from +# install-astrald), +# 3. runs `link astral-agent --target qwen` -> ~/.qwen/skills/astral-agent. +# +# NOTE: for now the deploy key is LEFT in the VM (simpler; lets the operator +# re-clone/pull skills later), which means it also lives in the saved snapshot. +# We may switch to wiping the key before the snapshot if that exposure matters. +set -eu + +VM=node1 +USER_NAME=tester +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + --user) [ $# -ge 2 ] || { echo "need name after --user" >&2; exit 64; }; USER_NAME=$2; shift 2 ;; + *) echo "usage: configure-astral-agent [--vm ] [--user ]" >&2; exit 64 ;; + esac +done + +REPO=${SATFORGE_SKILLS_REPO:-git@github.com:satforgedev/skills} +KEY=${SATFORGE_SKILLS_DEPLOY_KEY:-} +[ -n "$KEY" ] || { echo "set SATFORGE_SKILLS_DEPLOY_KEY to the deploy key path for $REPO" >&2; exit 1; } +[ -r "$KEY" ] || { echo "deploy key not readable: $KEY" >&2; exit 1; } +key_b64=$(base64 -w0 "$KEY") + +REMOTE_BODY=$(cat <<'EOS' +set -eu +home=$(getent passwd "$u" | cut -d: -f6) +[ -n "$home" ] || { echo "user '$u' not found on $(hostname)" >&2; exit 1; } +command -v git >/dev/null 2>&1 || { echo "git missing on $(hostname)" >&2; exit 1; } + +install -d -m 700 -o "$u" -g "$u" "$home/.ssh" "$home/.netsim" +printf '%s' "$key_b64" | base64 -d > "$home/.ssh/skills_deploy" +chmod 600 "$home/.ssh/skills_deploy" +chown "$u:$u" "$home/.ssh/skills_deploy" + +# Guest-side provisioning, run as the operator. Quoted heredoc: fully literal; +# repo + ref arrive as positional args. github's host key is auto-accepted on +# first connect. If outbound SSH:22 is ever blocked, switch the URL to +# ssh://git@ssh.github.com:443/satforgedev/skills. +cat > "$home/.netsim/setup-skill.sh" <<'SCRIPT' +#!/bin/sh +set -eu +export PATH=/usr/local/go/bin:$PATH +export GIT_SSH_COMMAND="ssh -i $HOME/.ssh/skills_deploy -o IdentitiesOnly=yes -o StrictHostKeyChecking=accept-new" +repo=$1 +src=$HOME/satforge-skills +[ -d "$src/.git" ] || git clone --recurse-submodules "$repo" "$src" +cd "$src" +git pull --ff-only --quiet 2>/dev/null || true +git submodule update --init --recursive --quiet +go build -C bin/satforge-skills -o satforge-skills . +bin="$src/bin/satforge-skills/satforge-skills" +"$bin" unlink astral-agent --target qwen >/dev/null 2>&1 || true # idempotent re-run +"$bin" link astral-agent --target qwen +SCRIPT +chown "$u:$u" "$home/.netsim/setup-skill.sh" + +su - "$u" -c "sh '$home/.netsim/setup-skill.sh' '$repo'" +echo "configure-astral-agent: $(hostname) cloned skills + linked astral-agent (deploy key left in place)" +EOS +) + +echo "configure-astral-agent: injecting deploy key + linking on $VM (user $USER_NAME) ..." +netsim ssh "$VM" -- "u='$USER_NAME' key_b64='$key_b64' repo='$REPO'; $REMOTE_BODY" +echo "configure-astral-agent: done on $VM" diff --git a/netsim/tasks/configure-astral-agent/verify.sh b/netsim/tasks/configure-astral-agent/verify.sh new file mode 100755 index 00000000..c41ad1cb --- /dev/null +++ b/netsim/tasks/configure-astral-agent/verify.sh @@ -0,0 +1,34 @@ +#!/bin/sh +# verify configure-astral-agent (same args as run.sh): the astral-agent skill is +# installed for the operator where Qwen Code reads it +# (~/.qwen/skills/astral-agent), with SKILL.md frontmatter intact, the +# references/ dir, and the astral-docs mount present, owned by the operator. +set -eu + +VM=node1 +USER_NAME=tester +while [ $# -gt 0 ]; do + case "$1" in + --vm) VM=$2; shift 2 ;; + --user) USER_NAME=$2; shift 2 ;; + *) shift ;; + esac +done + +REMOTE_CHECK=$(cat <<'EOS' +set -eu +home=$(getent passwd "$u" | cut -d: -f6) +d="$home/.qwen/skills/astral-agent" +[ -f "$d/SKILL.md" ] || { echo "missing $d/SKILL.md on $(hostname)" >&2; exit 1; } +head -n1 "$d/SKILL.md" | grep -qx -- '---' || { echo "SKILL.md frontmatter missing on $(hostname)" >&2; exit 1; } +[ -d "$d/references" ] || { echo "missing references/ on $(hostname)" >&2; exit 1; } +[ -f "$d/astral-docs/README.md" ] || { echo "astral-docs mount missing on $(hostname)" >&2; exit 1; } +owner=$(stat -c '%U' "$d") +[ "$owner" = "$u" ] || { echo "astral-agent owned by '$owner', expected '$u' on $(hostname)" >&2; exit 1; } +echo "$(hostname): astral-agent present for $u ($(find "$d" -type f | wc -l) files), frontmatter intact" +EOS +) + +netsim ssh "$VM" -- "u='$USER_NAME'; $REMOTE_CHECK" \ + || { echo "configure-astral-agent verify FAILED on $VM" >&2; exit 1; } +echo "verified astral-agent skill on: $VM" From b98a48d20c2c2c136c08f0d78a3d049ca4a68ded Mon Sep 17 00:00:00 2001 From: intern0 Date: Thu, 18 Jun 2026 11:59:30 +0200 Subject: [PATCH 06/57] netsim: fix configure-astral-agent verify file count for symlinks The astral-agent skill is installed as a tree of symlinks, so the verify file count used `find`, which does not traverse symlinked directories and undercounted. Use `find -L` to follow symlinks (and silence transient errors) so the count reflects the materialized tree. Co-Authored-By: Claude Opus 4.8 (1M context) --- netsim/tasks/configure-astral-agent/verify.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netsim/tasks/configure-astral-agent/verify.sh b/netsim/tasks/configure-astral-agent/verify.sh index c41ad1cb..64994880 100755 --- a/netsim/tasks/configure-astral-agent/verify.sh +++ b/netsim/tasks/configure-astral-agent/verify.sh @@ -25,7 +25,7 @@ head -n1 "$d/SKILL.md" | grep -qx -- '---' || { echo "SKILL.md frontmatter missi [ -f "$d/astral-docs/README.md" ] || { echo "astral-docs mount missing on $(hostname)" >&2; exit 1; } owner=$(stat -c '%U' "$d") [ "$owner" = "$u" ] || { echo "astral-agent owned by '$owner', expected '$u' on $(hostname)" >&2; exit 1; } -echo "$(hostname): astral-agent present for $u ($(find "$d" -type f | wc -l) files), frontmatter intact" +echo "$(hostname): astral-agent present for $u ($(find -L "$d" -type f 2>/dev/null | wc -l) files via symlinks), frontmatter intact" EOS ) From 161b3b10bf76dcc2ea37da3a7c4f4cff58d12a22 Mon Sep 17 00:00:00 2001 From: intern0 Date: Thu, 18 Jun 2026 11:59:30 +0200 Subject: [PATCH 07/57] netsim: add bootstrap-user task Thin-prompt, skill-driven swarm task: a two-sentence prompt tells the in-VM Qwen operator to make node1 a User-controlled node by following its astral-agent skill's node-setup playbook (software-User path), without restating the procedure. run.sh base64-ships the prompt over one `netsim ssh` argv and runs `qwen -y` as tester; verify.sh independently reads the persisted User token and asserts apphost.whoami = User id and user.info returns the active contract. Standalone (not in lab.story): `netsim task --stage astrald-lab --save astrald-user bootstrap-user`. Validated end-to-end on a live astrald-lab. Co-Authored-By: Claude Opus 4.8 (1M context) --- netsim/tasks/bootstrap-user/README.md | 83 +++++++++++++++++++++++++++ netsim/tasks/bootstrap-user/prompt.md | 6 ++ netsim/tasks/bootstrap-user/run.sh | 58 +++++++++++++++++++ netsim/tasks/bootstrap-user/verify.sh | 41 +++++++++++++ 4 files changed, 188 insertions(+) create mode 100644 netsim/tasks/bootstrap-user/README.md create mode 100644 netsim/tasks/bootstrap-user/prompt.md create mode 100755 netsim/tasks/bootstrap-user/run.sh create mode 100755 netsim/tasks/bootstrap-user/verify.sh diff --git a/netsim/tasks/bootstrap-user/README.md b/netsim/tasks/bootstrap-user/README.md new file mode 100644 index 00000000..b4c60afb --- /dev/null +++ b/netsim/tasks/bootstrap-user/README.md @@ -0,0 +1,83 @@ +# bootstrap-user + +A netsim task that turns the operator node into a **User-controlled node**, +driven by the Qwen Code agent running inside the VM. It is the first half of the +swarm phase: it establishes identity only; it does not link or claim anything +(that is [`link-swarm`](../link-swarm/README.md)). + +``` +bootstrap-user [--vm ] # default: node1 (the VM carrying Qwen) +``` + +After it runs, the node holds an active `mod.user.swarm_access_action` contract +(issuer = a fresh software User, subject = this node), and a User-bound apphost +token is persisted so later tasks can act as the User. It produces a new stage +on top of the lab base — run it standalone against `astrald-lab` with: + +```sh +netsim task --stage astrald-lab --save astrald-user bootstrap-user +``` + +(`bootstrap-user` is deliberately *not* part of `lab.story`: `astrald-lab` stays +the reusable base, and each swarm step is an incremental stage layered on it.) + +## Execution model + +`run.sh` runs on the host (cwd = simulation root) and does almost nothing +itself: it base64-ships [`prompt.md`](prompt.md) to the guest in a single +`netsim ssh -- ` call and invokes `qwen` as user `tester`, +non-interactively (one-shot positional prompt + `-y`), against that prompt. The +astral work — minting a software User, signing and installing the node contract, +persisting a token — is carried out by the agent, not by the script. + +This is the design principle, taken one step further than the other tasks: +**tiny script, thin prompt, intelligence in the skill.** The prompt does not +spell out the contract procedure — it states the situation and the goal in plain +sentences and tells the agent to follow its **astral-agent** skill, whose +`node-setup` playbook (software-User path) is exactly this flow. The prompt +carries only what the skill cannot know: the machine-specific files `verify.sh` +will look for, idempotency, and the success criterion. If the skill is present +and sufficient, that is all the agent needs; exercising that is part of the +test. + +The agent writes its artifacts under `~tester/.netsim/`: + +| File | Purpose | +|---|---| +| `user.id` | the User's hex public key (the User identity) | +| `user.token` | a User-bound apphost access token (also exported in `~/.bashrc`) | +| `bootstrap-user.log` | the agent's run log | + +`verify.sh` is an **independent** re-check: it reads `user.id` + `user.token`, +acts as the User, and asserts `apphost.whoami` reports the User and `user.info` +returns the active contract (which the op rejects with code `2` when absent). + +## The contract flow (driven by the skill, not the prompt) + +For reference only — this is what the astral-agent `node-setup` playbook +(software User) walks the agent through; the prompt does **not** restate it: + +1. Mint a `secp256k1` User key via the `bip137sig` ops + (`new_entropy → mnemonic → seed → derive_key`). +2. Store the private key via `objects.store` so `crypto` indexes it as a signer. +3. Derive the User identity (`crypto.public_key`). +4. (optional) `dir.set_alias` for a readable name. +5. `user.new_node_contract -user ` (subject defaults to this node). +6. `auth.sign_contract` — co-signs as issuer + subject (both keys are local). +7. Install at tree path `/mod/user/config/active_contract` via `tree.set`. +8. `apphost.create_token` for the User; persist + export it. +9. Confirm with `apphost.whoami` + `user.info`. + +## Not yet run end-to-end + +The harness mechanics mirror the validated `configure-astral-agent` task, and +the `qwen -y ""` invocation matches what was confirmed against the live +lab. Still unverified until run on a fresh `astrald-lab` stage: + +* whether the thin prompt reliably triggers the astral-agent skill and the agent + follows the `node-setup` playbook to a passing `verify.sh`; +* that `objects.store` of a `crypto.private_key` and `tree.set` of the active + contract succeed under the node's local access without a pre-minted token; +* that the agent keeps `ASTRALD_APPHOST_TOKEN` exported within its own session + for the User-scoped steps (the skill's "Acting as the User from the CLI" + section covers this). diff --git a/netsim/tasks/bootstrap-user/prompt.md b/netsim/tasks/bootstrap-user/prompt.md new file mode 100644 index 00000000..83c12a76 --- /dev/null +++ b/netsim/tasks/bootstrap-user/prompt.md @@ -0,0 +1,6 @@ +On this machine there is an `astrald` node running. It has its own node identity +but no User. Make it a User-controlled node under a fresh software User, following +your **astral-agent** skill's node-setup playbook. + +Then write the User's id to `~/.netsim/user.id` and a User-bound apphost token to +`~/.netsim/user.token`. The skill won't mention this — it's how the run is checked. diff --git a/netsim/tasks/bootstrap-user/run.sh b/netsim/tasks/bootstrap-user/run.sh new file mode 100755 index 00000000..6d5faa39 --- /dev/null +++ b/netsim/tasks/bootstrap-user/run.sh @@ -0,0 +1,58 @@ +#!/bin/sh +# bootstrap-user: turn the operator node into a User-controlled node, driven by +# the Qwen Code agent running INSIDE the VM. +# bootstrap-user [--vm ] (default: node1 — the VM carrying Qwen) +# +# Runs ON THE HOST (cwd = simulation root). This script is deliberately tiny: it +# ships prompt.md to the agent on the guest and lets the agent do the astral +# work via astral-query against the local node API. The intelligence lives in +# the prompt and — by design — in the agent's astral-agent skill, not here. The +# whole remote program travels as ONE argv to `netsim ssh` (no reliance on stdin +# forwarding); the prompt rides along base64-encoded so a multi-line file never +# fights shell quoting. +set -eu + +VM="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + *) echo "usage: bootstrap-user [--vm ]" >&2; exit 64 ;; + esac +done + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +prompt_b64=$(base64 -w0 "$here/prompt.md") # GNU coreutils; -w0 = single line + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/bootstrap-user.prompt" +chown -R tester:tester "$d" + +# Run the agent as `tester` (qwen is installed for that user), non-interactively. +# Invocation matches what was validated against the live lab: one-shot positional +# prompt + `-y` (auto-approve). The prompt is passed positionally via command +# substitution; the substituted text is used literally (not re-scanned), so the +# backticks and $-signs inside it are safe. +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/bootstrap-user.prompt)"' \ + > "$d/bootstrap-user.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/bootstrap-user.log" >&2 + exit 1 + } + +# Cheap smoke-check; verify.sh does the authoritative, independent check. +[ -s "$d/user.id" ] || { echo "agent recorded no User id on $(hostname)" >&2; exit 1; } +echo "bootstrap-user: agent finished on $(hostname); User id $(cat "$d/user.id")" +EOS +) + +echo "bootstrap-user: driving Qwen operator on $VM ..." +# assignment prefix carries the prompt to the guest; body re-parses it +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" +echo "bootstrap-user: done on $VM" diff --git a/netsim/tasks/bootstrap-user/verify.sh b/netsim/tasks/bootstrap-user/verify.sh new file mode 100755 index 00000000..b9a4a734 --- /dev/null +++ b/netsim/tasks/bootstrap-user/verify.sh @@ -0,0 +1,41 @@ +#!/bin/sh +# verify bootstrap-user (same args as run.sh): the target node must be a +# User-controlled node. INDEPENDENT re-check -- it does not trust run.sh's +# output: it reads the persisted User credentials, acts AS the User, and asserts +# the node answers as a user node. user.info itself rejects (code 2) when there +# is no active contract, so a successful call IS the proof. +set -eu + +VM="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) VM=$2; shift 2 ;; + *) shift ;; + esac +done + +REMOTE_CHECK=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +[ -s "$d/user.id" ] || { echo "no recorded User id on $(hostname)" >&2; exit 1; } +[ -s "$d/user.token" ] || { echo "no recorded User token on $(hostname)" >&2; exit 1; } +uid=$(cat "$d/user.id") +ASTRALD_APPHOST_TOKEN=$(cat "$d/user.token"); export ASTRALD_APPHOST_TOKEN + +# acting as the User: whoami must report the User identity +who=$(astral-query apphost.whoami -out json) \ + || { echo "apphost.whoami failed on $(hostname)" >&2; exit 1; } +echo "$who" | grep -q "$uid" \ + || { echo "whoami != User id on $(hostname): $who" >&2; exit 1; } + +# active contract present (user.info rejects with code 2 if none) +astral-query user.info -out json \ + || { echo "user.info failed on $(hostname) -- no active contract?" >&2; exit 1; } + +echo "$(hostname): user node OK (User $uid)" +EOS +) + +netsim ssh "$VM" -- "$REMOTE_CHECK" \ + || { echo "bootstrap-user verify FAILED on $VM" >&2; exit 1; } +echo "verified user node on: $VM" From d58eae7f6a4d37a69d43ff52ccaf49b9406085ae Mon Sep 17 00:00:00 2001 From: intern0 Date: Thu, 18 Jun 2026 11:59:30 +0200 Subject: [PATCH 08/57] netsim: add link-swarm task Thin-prompt, skill-driven swarm task chained onto bootstrap-user: a two-sentence prompt drives the Qwen operator to claim node2 into the User's swarm via its astral-agent skill's node-claiming playbook (`user.claim`, with nearby handling reachability). verify.sh is an independent both-ends check -- both nodes hold a contract from the same User, node1 lists node2 as a Linked sibling, and a mutual link exists -- parsing the astral-query JSON object-stream line-by-line. Standalone: `netsim task --stage astrald-user --save astrald-swarm link-swarm`. Validated end-to-end (two nodes in one User Swarm). Co-Authored-By: Claude Opus 4.8 (1M context) --- netsim/tasks/link-swarm/README.md | 73 +++++++++++++++++++++ netsim/tasks/link-swarm/prompt.md | 3 + netsim/tasks/link-swarm/run.sh | 65 +++++++++++++++++++ netsim/tasks/link-swarm/verify.sh | 104 ++++++++++++++++++++++++++++++ 4 files changed, 245 insertions(+) create mode 100644 netsim/tasks/link-swarm/README.md create mode 100644 netsim/tasks/link-swarm/prompt.md create mode 100755 netsim/tasks/link-swarm/run.sh create mode 100755 netsim/tasks/link-swarm/verify.sh diff --git a/netsim/tasks/link-swarm/README.md b/netsim/tasks/link-swarm/README.md new file mode 100644 index 00000000..ea019868 --- /dev/null +++ b/netsim/tasks/link-swarm/README.md @@ -0,0 +1,73 @@ +# link-swarm + +A netsim task that claims the second node into the User's swarm, driven by the +Qwen Code agent on node1. It is the second half of the swarm phase: node1 is +already a User node (from [`bootstrap-user`](../bootstrap-user/README.md)); this +task brings node2 under the same User so the two share one swarm. + +``` +link-swarm [--vm ] # default: node1 (the VM carrying Qwen) +``` + +It produces a new stage on top of the bootstrapped lab — its default starting +point is `astrald-user` (the `astrald-lab` stage after `bootstrap-user`): + +```sh +netsim task --stage astrald-user --save astrald-swarm link-swarm +``` + +(Like `bootstrap-user`, it is **not** part of `lab.story`; each swarm step is an +incremental stage layered on the reusable base.) + +## Execution model + +Identical mechanic to `bootstrap-user`: **tiny script, thin prompt, intelligence +in the skill.** `run.sh` base64-ships [`prompt.md`](prompt.md) to node1 in a +single `netsim ssh` call and runs `qwen -y` as `tester`. The prompt is two +sentences — the operator is told it controls a User node, that another astrald +node is on the local network, and to claim it following its **astral-agent** +skill's `node-claiming` playbook. The claim flow (`user.claim -target `, +reachability via the `nearby` module on the shared LAN) lives entirely in the +skill; the prompt restates none of it. + +Reachability is already de-risked (see the task doc's discovery log): `nearby` +discovery works across netsim's per-VM NAT, so the agent needs no manual +`nodes.add_endpoint`. + +## What `verify.sh` checks (independent, both ends) + +It pulls raw JSON from both nodes and asserts **three facts on the host** that +together prove the swarm from both ends: + +1. **Both nodes hold an active contract issued by the same User** — `user.info` + on node1 *and* node2 each shows `Issuer == ` with `Subject ==` + that node. node2 independently confirming the same User is the key both-ends + proof that the claim took. +2. **node1, as the User, lists node2 as a `Linked` sibling** (`user.swarm_status`). +3. **A mutual authenticated link exists** — node2's `nodes.links` shows a link + whose `RemoteIdentity` is node1. + +node1 acts as the User via its persisted token; node2 answers under its node +identity (it holds the contract after the claim, so no token is needed there). + +`astral-query … -out json` emits a JSON **stream** (one object per line + an +`{"Type":"eos"}` terminator), so output is parsed line-by-line, not as one +document. Parsing/assertions run with host `python3`. + +## Why not a "routed query" proof? + +The first cut planned to prove routing with `astral-query :.spec`. That is +**not valid**: node introspection ops (`.spec`, `.id`, `.ping`) are served +locally and do **not** route to a sibling addressed by node-id — they fail even +on a fully formed swarm (verified live: every `:` returned a routing +failure while the swarm was demonstrably linked). The earlier discovery-log +hypothesis that "swarm membership unlocks `:.spec`" is therefore +**disproven**. The contract + link + sibling triple above is the correct, +reproducible both-ends proof. + +## Validated end-to-end + +Run `astrald-user → astrald-swarm` (2026-06-17): the thin prompt drove the +operator to `user.claim` node2 into the User's swarm; both nodes ended under one +User (`02ad7ef7…`) with a mutual link, and the rewritten `verify.sh` passes. +Stage `astrald-swarm` saved. diff --git a/netsim/tasks/link-swarm/prompt.md b/netsim/tasks/link-swarm/prompt.md new file mode 100644 index 00000000..faeb893d --- /dev/null +++ b/netsim/tasks/link-swarm/prompt.md @@ -0,0 +1,3 @@ +On this machine an `astrald` node is running, and you control it as its User. +Another astrald node is on the local network, not yet in your swarm. Bring it +into your swarm, following your **astral-agent** skill's node-claiming playbook. diff --git a/netsim/tasks/link-swarm/run.sh b/netsim/tasks/link-swarm/run.sh new file mode 100755 index 00000000..297f945f --- /dev/null +++ b/netsim/tasks/link-swarm/run.sh @@ -0,0 +1,65 @@ +#!/bin/sh +# link-swarm: claim the second node into the User's swarm, driven by the Qwen +# Code agent running INSIDE node1 (which is already a User node from +# bootstrap-user — default starting stage: astrald-user). +# link-swarm [--vm ] (default: node1 — the VM carrying Qwen) +# +# Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user: +# tiny script, thin prompt, intelligence in the agent's astral-agent skill. The +# whole remote program travels as ONE argv to `netsim ssh`; the prompt rides +# along base64-encoded so a multi-line file never fights shell quoting. +set -eu + +VM="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + *) echo "usage: link-swarm [--vm ]" >&2; exit 64 ;; + esac +done + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +prompt_b64=$(base64 -w0 "$here/prompt.md") # GNU coreutils; -w0 = single line + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/link-swarm.prompt" +chown -R tester:tester "$d" + +# Run the agent as `tester` (qwen is installed for that user), non-interactively. +# Invocation matches what was validated for bootstrap-user: one-shot positional +# prompt + `-y` (auto-approve). +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/link-swarm.prompt)"' \ + > "$d/link-swarm.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/link-swarm.log" >&2 + exit 1 + } + +# Soft smoke-check only (verify.sh is the authoritative, independent check). +# node1 already holds a User token from bootstrap-user, so we can peek at the +# swarm here; don't fail the run on a shape mismatch — leave the verdict to +# verify.sh. CONFIRM the user.swarm_status JSON field for a linked sibling. +tok="$d/user.token" +if [ -s "$tok" ]; then + if ASTRALD_APPHOST_TOKEN=$(cat "$tok") astral-query user.swarm_status -out json 2>/dev/null \ + | grep -q '"Linked":true'; then + echo "link-swarm: $(hostname) reports a linked sibling" + else + echo "link-swarm: WARNING $(hostname) shows no linked sibling yet (verify.sh decides)" >&2 + fi +fi +echo "link-swarm: agent finished on $(hostname)" +EOS +) + +echo "link-swarm: driving Qwen operator on $VM ..." +# assignment prefix carries the prompt to the guest; body re-parses it +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" +echo "link-swarm: done on $VM" diff --git a/netsim/tasks/link-swarm/verify.sh b/netsim/tasks/link-swarm/verify.sh new file mode 100755 index 00000000..ccffe6ed --- /dev/null +++ b/netsim/tasks/link-swarm/verify.sh @@ -0,0 +1,104 @@ +#!/bin/sh +# verify link-swarm: node1 and node2 must be linked into one User swarm. +# INDEPENDENT both-ends check -- it does not trust run.sh. It pulls raw JSON from +# both nodes and asserts THREE facts on the host; together they prove the swarm +# from both ends: +# 1. both nodes hold an active contract issued by the SAME User +# (user.info: Issuer == the bootstrap User on each; Subject == that node); +# 2. node1, acting as the User, lists node2 as a Linked sibling +# (user.swarm_status); +# 3. a mutual authenticated link exists (node2 nodes.links -> node1). +# +# NOTE on "routed query": an earlier plan probed `:.spec` as the proof. +# That is NOT valid -- node introspection ops (.spec/.id/.ping) are served +# locally and do not route to a sibling by node-id, so they fail even on a fully +# formed swarm. The contract + link + sibling triple above is the real proof. +# +# astral-query ... -out json emits a JSON *stream* (one object per line, then an +# {"Type":"eos"} terminator), so everything is parsed line-by-line, not as one +# document. +set -eu + +VM1=node1 +VM2=node2 +while [ $# -gt 0 ]; do + case "$1" in + --node1) VM1=$2; shift 2 ;; + --node2) VM2=$2; shift 2 ;; + *) shift ;; + esac +done + +# Pull the User id and the four JSON blobs. Single-quoted remote args so the +# command substitutions run on the guest. node1 acts as the User (token from +# bootstrap-user); node2 answers under its node identity (it holds the contract). +U=$(netsim ssh "$VM1" -- 'cat /home/tester/.netsim/user.id') +n1_info=$(netsim ssh "$VM1" -- 'export ASTRALD_APPHOST_TOKEN=$(cat /home/tester/.netsim/user.token); astral-query user.info -out json') +n1_swarm=$(netsim ssh "$VM1" -- 'export ASTRALD_APPHOST_TOKEN=$(cat /home/tester/.netsim/user.token); astral-query user.swarm_status -out json') +n2_info=$(netsim ssh "$VM2" -- 'astral-query user.info -out json') +n2_links=$(netsim ssh "$VM2" -- 'astral-query nodes.links -out json') + +UU=$(printf '%s' "$U" | tr -d '[:space:]') +export UU N1_INFO="$n1_info" N1_SWARM="$n1_swarm" N2_INFO="$n2_info" N2_LINKS="$n2_links" + +python3 - <<'PY' +import os, sys, json + +def objs(s): + out = [] + for ln in s.splitlines(): + ln = ln.strip() + if not ln: + continue + try: + out.append(json.loads(ln)) + except json.JSONDecodeError: + pass + return out + +def contract(info): + for o in objs(info): + ob = o.get("Object") + if isinstance(ob, dict) and isinstance(ob.get("Contract"), dict): + c = ob["Contract"].get("Contract", {}) + return c.get("Issuer"), c.get("Subject") + return None, None + +U = os.environ["UU"] +i1, s1 = contract(os.environ["N1_INFO"]) +i2, s2 = contract(os.environ["N2_INFO"]) + +sib = None +for o in objs(os.environ["N1_SWARM"]): + ob = o.get("Object") + if isinstance(ob, dict) and ob.get("Linked"): + sib = ob.get("Identity") + break + +linkback = False +for o in objs(os.environ["N2_LINKS"]): + ob = o.get("Object") + if isinstance(ob, dict) and ob.get("RemoteIdentity") == s1: + linkback = True + break + +errs = [] +if not U: errs.append("no User id recorded on node1 (~/.netsim/user.id)") +if i1 != U: errs.append(f"node1 contract issuer {i1} != User {U}") +if i2 != U: errs.append(f"node2 contract issuer {i2} != User {U} (node2 not claimed under this User)") +if not s1: errs.append("node1 has no active contract subject") +if not s2: errs.append("node2 has no active contract subject") +if s2 and sib != s2: errs.append(f"node1's linked sibling {sib} != node2 {s2}") +if not linkback: errs.append(f"node2 has no active link back to node1 ({s1})") + +if errs: + sys.stderr.write("link-swarm verify FAILED:\n") + for e in errs: + sys.stderr.write(f" - {e}\n") + sys.exit(1) + +print(f"swarm OK: User {U[:8]}.. ; node1 {s1[:8]}.. <-link-> node2 {s2[:8]}.. ; " + f"both under one User; node1 lists node2 as Linked sibling") +PY + +echo "verified swarm link on: $VM1 and $VM2" From ae0cb7a61affff4171fe6e1c8687015524750696 Mon Sep 17 00:00:00 2001 From: intern0 Date: Thu, 18 Jun 2026 11:59:30 +0200 Subject: [PATCH 09/57] netsim: add share-object task First scenario past swarm formation: store an astral object on node1 and prove sibling node2 can obtain it by Object ID across the swarm. The thin prompt drives the Qwen operator (acting as its User) to objects.store a text payload and record the id; the cross-swarm fetch lives in verify.sh, not the prompt, which from node2 tries a ladder -- explicit-target :objects.load, transparent objects.load, then objects.find -- and asserts the bytes match, distinguishing a routing failure from an auth rejection. Standalone: `netsim task --stage astrald-swarm --save astrald-shared share-object`. Drafted; not yet run end-to-end (the cross-swarm read hop is inferred from the docs). Co-Authored-By: Claude Opus 4.8 (1M context) --- netsim/tasks/share-object/README.md | 143 ++++++++++++++++++ netsim/tasks/share-object/prompt.md | 10 ++ netsim/tasks/share-object/run.sh | 63 ++++++++ netsim/tasks/share-object/verify.sh | 224 ++++++++++++++++++++++++++++ 4 files changed, 440 insertions(+) create mode 100644 netsim/tasks/share-object/README.md create mode 100644 netsim/tasks/share-object/prompt.md create mode 100755 netsim/tasks/share-object/run.sh create mode 100755 netsim/tasks/share-object/verify.sh diff --git a/netsim/tasks/share-object/README.md b/netsim/tasks/share-object/README.md new file mode 100644 index 00000000..f3e23dbb --- /dev/null +++ b/netsim/tasks/share-object/README.md @@ -0,0 +1,143 @@ +# share-object + +A netsim task that stores an astral **object** on node1 and proves a swarm +sibling (node2) can obtain it **across the swarm** by its Object ID. It is the +first scenario *past* swarm formation: where `bootstrap-user` + `link-swarm` +build the swarm, `share-object` makes the swarm carry data — astral's core act, +"Identities exchanging Objects." + +``` +share-object [--vm ] # default: node1 (the VM carrying Qwen) +``` + +It produces a new stage on top of the formed swarm — its default starting point +is `astrald-swarm` (two nodes in one User Swarm): + +```sh +netsim task --stage astrald-swarm --save astrald-shared share-object +``` + +(Like the other swarm tasks, it is **not** part of `lab.story`; each step is an +incremental stage layered on the reusable base.) + +## Execution model + +Same mechanic as `bootstrap-user` / `link-swarm`: **tiny script, thin prompt, +intelligence in the skill.** `run.sh` base64-ships [`prompt.md`](prompt.md) to +node1 in a single `netsim ssh` call and runs `qwen -y` as `tester`. + +The split is deliberate and is the heart of this task's design: + +* **The agent does only the STORE half.** The prompt tells the operator — acting + as its User — to store a short, distinctive text payload as an astral object + via the objects protocol (its **astral-agent** skill navigates + `protocols/README → objects/README → objects.store`), and to record the + returned Object ID. The store path is reliably reachable from a thin prompt. +* **The cross-swarm FETCH lives in `verify.sh`, not the prompt.** A thin prompt + names no target, and the skill's rule is *"default Query target is the local + Node; set an explicit target only when the user names one"* — so an + agent-driven fetch would be non-deterministic. Putting the fetch in `verify.sh` + lets it address node2 deterministically and keeps the cross-swarm assertion + independent of agent behaviour. + +The agent writes its artifacts under `~tester/.netsim/`: + +| File | Purpose | +|---|---| +| `object.id` | the `data1…` Object ID returned by `objects.store` (the handle `verify.sh` fetches by) | +| `object.payload` | the exact bytes the agent stored (`verify.sh` compares node2's fetched bytes against this) | +| `share-object.log` | the agent's run log | + +The store itself is, per the docs: +`echo '{"Type":"string8","Object":""}' | astral-query objects.store -in json -out json` +→ `{"Type":"object_id.sha256","Object":"data1…"}`, run under the User token +(`ASTRALD_APPHOST_TOKEN` from `~/.netsim/user.token`) so the object lands in +node1's write-default repo as the User. + +## What `verify.sh` checks (independent, both ends) + +It reads the id + payload the agent persisted on node1, resolves node1's node +identity host-side (the `Subject` of node1's active contract, cross-checked +against node2's `nodes.links` `RemoteIdentity`), and then tries to pull that +exact id **from node2's vantage**, asserting the bytes match. node1 acts as the +User via its token; node2 answers under its node identity (anonymous apphost +caller, no token — exactly like `link-swarm`'s node2-side checks). + +**The cross-swarm hop is inferred from the docs, not demonstrated by them.** The +astral-docs describe a `network` zone and a finder/provider layer, but contain no +worked example of one swarm member reading another's object by id. So — exactly +as `link-swarm` discovered that `:.spec` does **not** route — +`verify.sh` probes a **ladder** on node2 and reports which hop routes: + +1. **explicit target** `astral-query :objects.load -id -out json` + — query-target routing over the swarm link. **Primary**, because it does *not* + depend on node2's network zone: an anonymous apphost caller has `ZoneNetwork` + **stripped**, so it can't resolve a remote provider by zone, but it *can* + address node1 directly and let node1 serve the read. +2. **transparent** `astral-query objects.load -id -out json` — relies on the + read context's zone defaulting to all zones (incl. the network zone). Likely + **blocked** for the anonymous host-side caller; kept as a bonus probe. +3. **provider find** `astral-query objects.find -id -out json` — returns + provider **identities**, not bytes. If *only* this works, discovery crosses + the swarm but the byte read does not — a partial finding, not a pass. + +Before the read it runs a locality pre-check (`objects.contains -repo local` on +node2) so a pass reflects a genuine remote pull, not a coincidental local copy +(advisory — `objects.contains` is probabilistic, so it warns, never hard-fails). +It also separates an **authorization** rejection (`mod.objects.read_object_action` +denies the read) from a **routing** failure — different findings, never +conflated. + +**PASS** iff node2 obtained the exact stored bytes for the agent-reported id +across the swarm (hop 1 or 2). `astral-query … -out json` emits a JSON **stream** +(one object/line + an `{"Type":"eos"}` terminator), parsed line-by-line with host +`python3`. + +## Memory repository — a separate task, by decision + +The docs expose a `memory` repository group (`objects.new_mem`, the `mem0`/ +`memory` repos) — an in-memory, **non-default** write target. `share-object` +deliberately does **not** use it: it must test the *default* cross-swarm path +(node1's standard write-default repo, node2 pulls), and routing an object through +a memory repo would muddy whether a *default-repo* object crosses the swarm. +Ephemeral / `objects.new_mem` behaviour deserves its own focused task layered on +`astrald-shared` later (captured in Triage). + +## Skill gap this scenario exercises + +The skill has playbooks only for `swarm-management` (`node-setup`, +`node-claiming`) — there is **no objects storage/transfer playbook**. The store +half is reachable from the protocol docs alone (a real test of "are the docs +sufficient without a playbook?"); the transfer half having no playbook is itself +a finding. If the thin store prompt proves shaky live, the remedy is a small +`objects` playbook in the skill, not a fatter prompt. + +## Not yet run end-to-end + +Syntax-clean and registered via `link.sh` (`netsim tasks` lists `share-object`). +The harness mechanics mirror the validated `bootstrap-user` / `link-swarm` +tasks, and the `objects.store` form is taken verbatim from `objects.store.md`. +Open **CONFIRM** items, all to be pinned on the first live +`astrald-swarm → astrald-shared` run (the cross-swarm read is the inferred part): + +* Whether `astral-query :objects.load` actually **routes** the read to + node1 across the swarm and returns the bytes — or hits the same wall as + `:.spec`. This is *the* unknown the run resolves. +* Whether an **anonymous** host-side caller on node2 is **authorized** to read + node1's object (`mod.objects.read_object_action` default policy in a one-User + swarm). If reads are denied, the likely fix is to mint a node2-side User token + (node2 holds the contract) and read as the User, or to move the fetch into the + operator on node1 — noted as the fallback lever, not baked into v1. +* Whether the transparent (no-target) read is genuinely blocked by the stripped + network zone, confirming the explicit-target form as the only working hop. +* The exact `objects.load` / `objects.find` / `objects.contains` stream shapes and + any `error_message` framing on this build (the parser is defensive but + unverified against live output). +* Whether the thin store prompt reliably drives the operator to `objects.store` + under the User token (vs an anonymous/local context) and to write both artifact + files. + +If the read does not route but `objects.find` does, that is a real discovery to +record (provider discovery crosses the swarm; byte read does not) — `verify.sh` +already detects and reports exactly that, the same way `link-swarm` reported the +`.spec` non-routing. diff --git a/netsim/tasks/share-object/prompt.md b/netsim/tasks/share-object/prompt.md new file mode 100644 index 00000000..2ecddb42 --- /dev/null +++ b/netsim/tasks/share-object/prompt.md @@ -0,0 +1,10 @@ +On this machine an `astrald` node is running and you control it as its User (a +User-bound apphost token is at `~/.netsim/user.token`). Acting as that User, +store a short, distinctive text payload as an astral object via the objects +protocol, following your **astral-agent** skill, and note the Object ID it +returns. + +Then write that Object ID to `~/.netsim/object.id` and the exact payload you +stored to `~/.netsim/object.payload`. The skill won't mention these files — they +are how the run is checked. Success means an Object ID is returned and both files +are written. diff --git a/netsim/tasks/share-object/run.sh b/netsim/tasks/share-object/run.sh new file mode 100755 index 00000000..fffbde1d --- /dev/null +++ b/netsim/tasks/share-object/run.sh @@ -0,0 +1,63 @@ +#!/bin/sh +# share-object: have the operator node store an astral object, driven by the Qwen +# Code agent running INSIDE node1 (already a User node in one swarm with node2 — +# default starting stage: astrald-swarm). +# share-object [--vm ] (default: node1 — the VM carrying Qwen) +# +# Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user / +# link-swarm: tiny script, thin prompt, intelligence in the agent's astral-agent +# skill. The agent does ONLY the store half (store a payload, surface its Object +# ID); the cross-swarm fetch from node2 is left entirely to verify.sh, which can +# address the sibling deterministically. The whole remote program travels as ONE +# argv to `netsim ssh`; the prompt rides along base64-encoded so a multi-line +# file never fights shell quoting. +set -eu + +VM="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + *) echo "usage: share-object [--vm ]" >&2; exit 64 ;; + esac +done + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +prompt_b64=$(base64 -w0 "$here/prompt.md") # GNU coreutils; -w0 = single line + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/share-object.prompt" +chown -R tester:tester "$d" + +# Run the agent as `tester` (qwen is installed for that user), non-interactively. +# Invocation matches what was validated for bootstrap-user / link-swarm: one-shot +# positional prompt + `-y` (auto-approve). +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/share-object.prompt)"' \ + > "$d/share-object.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/share-object.log" >&2 + exit 1 + } + +# Cheap smoke-check; verify.sh does the authoritative, independent cross-swarm +# check. The agent must have recorded an Object ID and the payload it stored. +[ -s "$d/object.id" ] || { echo "agent recorded no Object ID on $(hostname) (~/.netsim/object.id)" >&2; exit 1; } +[ -s "$d/object.payload" ] || { echo "agent recorded no payload on $(hostname) (~/.netsim/object.payload)" >&2; exit 1; } +case "$(cat "$d/object.id")" in + data1*) : ;; + *) echo "WARNING $(hostname): object.id does not look like a data1… Object ID (verify.sh decides)" >&2 ;; +esac +echo "share-object: agent finished on $(hostname); stored object $(cat "$d/object.id")" +EOS +) + +echo "share-object: driving Qwen operator on $VM ..." +# assignment prefix carries the prompt to the guest; body re-parses it +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" +echo "share-object: done on $VM" diff --git a/netsim/tasks/share-object/verify.sh b/netsim/tasks/share-object/verify.sh new file mode 100755 index 00000000..1a7da88f --- /dev/null +++ b/netsim/tasks/share-object/verify.sh @@ -0,0 +1,224 @@ +#!/bin/sh +# verify share-object: an astral object stored on node1 must be obtainable by its +# sibling node2 ACROSS THE SWARM. INDEPENDENT host-side check -- it does not trust +# run.sh. It reads the id + payload the agent persisted on node1, then tries to +# pull that exact id FROM node2's vantage and asserts the bytes match. +# +# THE CROSS-SWARM HOP IS INFERRED, NOT DOCUMENTED (see README). The astral-docs +# describe a network zone + a finder/provider layer but no worked example of one +# swarm member reading another's object by id, so verify probes a LADDER and +# reports which hop routes -- exactly as link-swarm discovered that :.spec +# does NOT route. Order (strongest -> weakest), all run on node2: +# 1. EXPLICIT TARGET astral-query :objects.load -id -out json +# Query-target routing over the swarm link. Primary path: it does NOT rely +# on node2's network zone (an anonymous apphost caller has ZoneNetwork +# stripped), it addresses node1 directly; node1 serves the read locally. +# 2. TRANSPARENT astral-query objects.load -id -out json +# Relies on the read context's zone defaulting to all zones (incl. the +# network zone) so node2 resolves node1 as provider. Likely BLOCKED for an +# anonymous host-side caller (network zone stripped) -- kept as a bonus probe. +# 3. PROVIDER FIND astral-query objects.find -id -out json +# Returns provider IDENTITIES, not bytes. If only this works, discovery +# crosses the swarm but the byte read does not -- a partial finding, not a pass. +# +# PASS iff node2 obtained the EXACT stored bytes for the agent-reported id across +# the swarm (hop 1 or 2). A pre-check asserts node2 does not already hold the +# object locally, so a pass reflects a genuine remote pull. +# +# astral-query ... -out json emits a JSON *stream* (one object per line, then an +# {"Type":"eos"} terminator), so everything is parsed line-by-line, not as one +# document. +set -eu + +VM1=node1 +VM2=node2 +while [ $# -gt 0 ]; do + case "$1" in + --node1) VM1=$2; shift 2 ;; + --node2) VM2=$2; shift 2 ;; + *) shift ;; + esac +done + +# --- node1 side: the id + payload the agent persisted, and node1's node id ------ +# node1 acts as the User (token from bootstrap-user) so user.info returns the +# active contract whose Subject IS node1's node identity (the provider to target). +ID=$(netsim ssh "$VM1" -- 'cat /home/tester/.netsim/object.id') +PAY=$(netsim ssh "$VM1" -- 'cat /home/tester/.netsim/object.payload') +n1_info=$(netsim ssh "$VM1" -- 'export ASTRALD_APPHOST_TOKEN=$(cat /home/tester/.netsim/user.token); astral-query user.info -out json') + +IDC=$(printf '%s' "$ID" | tr -d '[:space:]') + +# --- node2 side: cross-check node1's id, locality pre-check, then the fetch ladder +# node2 answers under its node identity (no token => anonymous apphost caller). +n2_links=$(netsim ssh "$VM2" -- 'astral-query nodes.links -out json' 2>/dev/null || true) +n2_contains=$(netsim ssh "$VM2" -- "astral-query objects.contains -repo local -id '$IDC' -out json" 2>/dev/null || true) +n2_find=$(netsim ssh "$VM2" -- "astral-query objects.find -id '$IDC' -out json" 2>/dev/null || true) +n2_transparent=$(netsim ssh "$VM2" -- "astral-query objects.load -id '$IDC' -out json" 2>/dev/null || true) + +# explicit-target read needs node1's node id; resolve it host-side first. +N1=$(printf '%s' "$n1_info" | python3 -c ' +import sys, json +for ln in sys.stdin: + ln = ln.strip() + if not ln: + continue + try: + o = json.loads(ln) + except json.JSONDecodeError: + continue + ob = o.get("Object") + if isinstance(ob, dict) and isinstance(ob.get("Contract"), dict): + c = ob["Contract"].get("Contract", {}) + if c.get("Subject"): + print(c["Subject"]); break +' 2>/dev/null || true) +# fall back to the RemoteIdentity of node2->node1 link if user.info parse failed +if [ -z "$N1" ]; then + N1=$(printf '%s' "$n2_links" | python3 -c ' +import sys, json +for ln in sys.stdin: + ln = ln.strip() + if not ln: + continue + try: + o = json.loads(ln) + except json.JSONDecodeError: + continue + ob = o.get("Object") + if isinstance(ob, dict) and ob.get("RemoteIdentity"): + print(ob["RemoteIdentity"]); break +' 2>/dev/null || true) +fi + +n2_explicit="" +if [ -n "$N1" ]; then + n2_explicit=$(netsim ssh "$VM2" -- "astral-query '$N1':objects.load -id '$IDC' -out json" 2>/dev/null || true) +fi + +export IDC PAY N1 N1_INFO="$n1_info" N2_LINKS="$n2_links" N2_CONTAINS="$n2_contains" \ + N2_FIND="$n2_find" N2_TRANSPARENT="$n2_transparent" N2_EXPLICIT="$n2_explicit" + +python3 - <<'PY' +import os, sys, json + +def objs(s): + """Parse a JSON object-stream (one object per line + an eos terminator).""" + out = [] + for ln in (s or "").splitlines(): + ln = ln.strip() + if not ln: + continue + try: + out.append(json.loads(ln)) + except json.JSONDecodeError: + pass + return out + +def loaded_payload(stream): + """From an objects.load -out json stream, return the decoded payload string + (the stored string8's Object), or None. Skips eos / error_message frames.""" + for o in objs(stream): + if o.get("Type") in ("eos", "error_message"): + continue + ob = o.get("Object") + if isinstance(ob, str): + return ob + return None + +def errors(stream): + return [o.get("Object") for o in objs(stream) if o.get("Type") == "error_message"] + +def contains_local(stream): + """objects.contains -out json -> a bool frame. Returns True/False/None.""" + for o in objs(stream): + if o.get("Type") in ("eos", "error_message"): + continue + if isinstance(o.get("Object"), bool): + return o["Object"] + return None + +def find_identities(stream): + ids = [] + for o in objs(stream): + if o.get("Type") in ("eos", "error_message"): + continue + ob = o.get("Object") + if isinstance(ob, str): + ids.append(ob) + return ids + +ID = os.environ["IDC"] +N1 = os.environ.get("N1", "") +# compare tolerant of a single trailing newline on either side +PAY = os.environ["PAY"].rstrip("\n") + +already_local = contains_local(os.environ["N2_CONTAINS"]) +explicit = loaded_payload(os.environ["N2_EXPLICIT"]) +transparent = loaded_payload(os.environ["N2_TRANSPARENT"]) +providers = find_identities(os.environ["N2_FIND"]) + +explicit_ok = explicit is not None and explicit.rstrip("\n") == PAY +transparent_ok = transparent is not None and transparent.rstrip("\n") == PAY +find_ok = (N1 in providers) if N1 else bool(providers) + +errs = [] +notes = [] + +if not ID: + errs.append("no Object ID recorded on node1 (~/.netsim/object.id)") +if not PAY: + errs.append("no payload recorded on node1 (~/.netsim/object.payload)") +if not N1: + notes.append("could not resolve node1's node identity host-side (explicit-target read skipped)") + +# locality pre-check is advisory (objects.contains is probabilistic): a 'true' +# here means the pass might not reflect a genuine remote pull -> warn, don't fail. +if already_local is True: + notes.append("objects.contains reports node2 may ALREADY hold this object locally; " + "a byte-match below might not be a genuine cross-swarm pull") +elif already_local is None: + notes.append("objects.contains gave no usable answer on node2 (locality pre-check inconclusive)") + +# surface auth-vs-route signal: an error_message naming auth/permission is a +# DIFFERENT failure than no route / no provider -- don't conflate them. +for label, env in (("explicit-target", "N2_EXPLICIT"), + ("transparent", "N2_TRANSPARENT"), + ("objects.find", "N2_FIND")): + for e in errors(os.environ.get(env, "")): + notes.append(f"{label} returned error_message: {e}") + +crossed = explicit_ok or transparent_ok + +if crossed: + path = "explicit-target (:objects.load)" if explicit_ok else "transparent (objects.load, network zone)" + print(f"share-object OK: node2 pulled object {ID[:12]}.. from node1 across the swarm " + f"via {path}; bytes match ({len(PAY)} B). " + f"providers seen by objects.find: {len(providers)}.") + for n in notes: + sys.stderr.write(f" note: {n}\n") + sys.exit(0) + +# Did not cross. Build a precise diagnostic (the link-swarm-style finding). +sys.stderr.write("share-object verify FAILED: object did NOT cross the swarm to node2.\n") +if find_ok: + sys.stderr.write(" FINDING: provider discovery DOES cross the swarm " + "(objects.find on node2 returned node1) but the byte READ did not route " + "(explicit-target and transparent objects.load both failed to return the payload). " + "This is the share-object analogue of link-swarm's ':.spec does not route' " + "discovery -- record which hop routes in the task log.\n") +else: + sys.stderr.write(" no cross-swarm object access at all: neither a read nor objects.find " + "resolved node1's object from node2.\n") +for e in errs: + sys.stderr.write(f" - {e}\n") +for n in notes: + sys.stderr.write(f" note: {n}\n") +sys.stderr.write(f" (id={ID} node1={N1[:12] + '..' if N1 else '?'} " + f"explicit={'hit' if explicit is not None else 'miss'} " + f"transparent={'hit' if transparent is not None else 'miss'} " + f"find_providers={len(providers)})\n") +sys.exit(1) +PY + +echo "verified share-object across: $VM1 and $VM2" From ca3184f388e252a7ecdf3f9f25f2216396cebfad Mon Sep 17 00:00:00 2001 From: intern0 Date: Thu, 18 Jun 2026 16:18:57 +0200 Subject: [PATCH 10/57] netsim: widen install-astrald health probe + diagnose on failure The post-install probe only waited ~10s for astrald to come up, but a fresh astrald's first start (node-key generation + SQLite init), right after a CPU-heavy go build still loads the VM, can take longer -- it flaked on an otherwise-clean lab build ("astrald did not come up"). Wait up to ~90s, and on failure dump `systemctl status` + `journalctl -u astrald` so the message is a real diagnosis instead of opaque. Validated: the lab build passed with the wider window on both nodes. Co-Authored-By: Claude Opus 4.8 (1M context) --- netsim/tasks/install-astrald/run.sh | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/netsim/tasks/install-astrald/run.sh b/netsim/tasks/install-astrald/run.sh index 78a17f4e..760be837 100755 --- a/netsim/tasks/install-astrald/run.sh +++ b/netsim/tasks/install-astrald/run.sh @@ -70,15 +70,28 @@ UNIT systemctl daemon-reload systemctl enable --now astrald -# confirm it built AND runs: wait for the apphost listener, then probe the API +# confirm it built AND runs: wait (up to ~90s) for the apphost listener, then +# probe the API. First start is much slower than a snapshot resume -- astrald +# generates its node key and inits SQLite, often right after a CPU-heavy go +# build still loads the VM -- so the window is deliberately generous (the old +# ~10s loop flaked here). On failure, dump the service state + journal so "did +# not come up" is a real diagnosis instead of an opaque message. ok= -for _ in 1 2 3 4 5 6 7 8 9 10; do +n=0 +while [ "$n" -lt 90 ]; do if systemctl is-active --quiet astrald && timeout 5 astral-query localnode:.spec -out json >/dev/null 2>&1; then ok=1; break fi - sleep 1 + n=$((n + 1)); sleep 1 done -[ -n "$ok" ] || { echo "astrald did not come up on $(hostname)" >&2; exit 1; } +if [ -z "$ok" ]; then + echo "astrald did not come up on $(hostname) after ${n}s" >&2 + echo "--- systemctl status astrald ---" >&2 + systemctl status astrald --no-pager >&2 2>&1 || true + echo "--- journalctl -u astrald (tail 40) ---" >&2 + journalctl -u astrald --no-pager 2>&1 | tail -40 >&2 || true + exit 1 +fi # leave astrald running: netsim snapshots live RAM, so the node resumes # already-running when the stage is restored (a stopped service would not From bf31595a12aebd0a54408631e3e4ac589c33b06d Mon Sep 17 00:00:00 2001 From: intern0 Date: Fri, 19 Jun 2026 03:29:41 +0200 Subject: [PATCH 11/57] netsim: extract verify.sh logic into verify.py (link-swarm, share-object) The link-swarm and share-object verifiers were shell scripts that gathered astral-query JSON and parsed it with embedded python heredocs -- awkward, and the parsing couldn't be unit-tested without booting a VM. Move all logic into a real verify.py per task (calls `netsim ssh ... astral-query` via subprocess, parses the JSON streams, asserts); verify.sh becomes a thin shim: exec python3 "$NETSIM_TASK_DIR/verify.py" "$@" netsim sets $NETSIM_TASK_DIR to the task dir and only auto-runs run.sh/verify.sh, so verify.py sits alongside and is found cleanly. Behavior-preserving: parsers golden-file tested against captured JSON, and the full pipeline re-run fresh on NFS -- link-swarm verify PASSES, share-object verify reproduces the cross-swarm-fetch diagnostic, both via the new shim. Co-Authored-By: Claude Opus 4.8 (1M context) --- netsim/tasks/link-swarm/verify.py | 126 +++++++++++++++ netsim/tasks/link-swarm/verify.sh | 109 +------------ netsim/tasks/share-object/verify.py | 216 ++++++++++++++++++++++++++ netsim/tasks/share-object/verify.sh | 229 +--------------------------- 4 files changed, 354 insertions(+), 326 deletions(-) create mode 100755 netsim/tasks/link-swarm/verify.py create mode 100755 netsim/tasks/share-object/verify.py diff --git a/netsim/tasks/link-swarm/verify.py b/netsim/tasks/link-swarm/verify.py new file mode 100755 index 00000000..55ee36b9 --- /dev/null +++ b/netsim/tasks/link-swarm/verify.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +"""verify link-swarm: node1 and node2 must be linked into one User swarm. + +INDEPENDENT both-ends check -- it does not trust run.sh. It pulls raw JSON from +both nodes and asserts THREE facts on the host; together they prove the swarm from +both ends: + 1. both nodes hold an active contract issued by the SAME User + (user.info: Issuer == the bootstrap User on each; Subject == that node); + 2. node1, acting as the User, lists node2 as a Linked sibling (user.swarm_status); + 3. a mutual authenticated link exists (node2 nodes.links -> node1). + +Runs on the host (invoked by the verify.sh shim); reaches the VMs with `netsim ssh`. + +NOTE on "routed query": an earlier plan probed `:.spec` as the proof. That is +NOT valid -- node introspection ops (.spec/.id/.ping) are served locally and do not +route to a sibling by node-id, so they fail even on a fully formed swarm. The +contract + link + sibling triple above is the real proof. + +astral-query ... -out json emits a JSON *stream* (one object per line, then an +{"Type":"eos"} terminator), so everything is parsed line-by-line, not as one doc. +""" +import argparse +import json +import subprocess +import sys + +TOKEN = "export ASTRALD_APPHOST_TOKEN=$(cat /home/tester/.netsim/user.token);" + + +def ssh(vm, remote): + """Run `netsim ssh -- ` on the host; return stdout.""" + p = subprocess.run(["netsim", "ssh", vm, "--", remote], + capture_output=True, text=True) + return p.stdout + + +def objs(stream): + out = [] + for ln in (stream or "").splitlines(): + ln = ln.strip() + if not ln: + continue + try: + out.append(json.loads(ln)) + except json.JSONDecodeError: + pass + return out + + +def contract(info): + """(Issuer, Subject) of the active contract from a user.info stream.""" + for o in objs(info): + ob = o.get("Object") + if isinstance(ob, dict) and isinstance(ob.get("Contract"), dict): + c = ob["Contract"].get("Contract", {}) + return c.get("Issuer"), c.get("Subject") + return None, None + + +def linked_sibling(swarm): + """Identity of the first Linked sibling in a user.swarm_status stream.""" + for o in objs(swarm): + ob = o.get("Object") + if isinstance(ob, dict) and ob.get("Linked"): + return ob.get("Identity") + return None + + +def has_link_to(links, identity): + """True if a nodes.links stream contains an active link to `identity`.""" + for o in objs(links): + ob = o.get("Object") + if isinstance(ob, dict) and ob.get("RemoteIdentity") == identity: + return True + return False + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--node1", default="node1") + ap.add_argument("--node2", default="node2") + args, _ = ap.parse_known_args() + vm1, vm2 = args.node1, args.node2 + + # node1 acts as the User (token from bootstrap-user); node2 answers under its + # node identity (it holds the contract after the claim). + U = "".join(ssh(vm1, "cat /home/tester/.netsim/user.id").split()) + n1_info = ssh(vm1, TOKEN + " astral-query user.info -out json") + n1_swarm = ssh(vm1, TOKEN + " astral-query user.swarm_status -out json") + n2_info = ssh(vm2, "astral-query user.info -out json") + n2_links = ssh(vm2, "astral-query nodes.links -out json") + + i1, s1 = contract(n1_info) + i2, s2 = contract(n2_info) + sib = linked_sibling(n1_swarm) + linkback = has_link_to(n2_links, s1) + + errs = [] + if not U: + errs.append("no User id recorded on node1 (~/.netsim/user.id)") + if i1 != U: + errs.append(f"node1 contract issuer {i1} != User {U}") + if i2 != U: + errs.append(f"node2 contract issuer {i2} != User {U} (node2 not claimed under this User)") + if not s1: + errs.append("node1 has no active contract subject") + if not s2: + errs.append("node2 has no active contract subject") + if s2 and sib != s2: + errs.append(f"node1's linked sibling {sib} != node2 {s2}") + if not linkback: + errs.append(f"node2 has no active link back to node1 ({s1})") + + if errs: + sys.stderr.write("link-swarm verify FAILED:\n") + for e in errs: + sys.stderr.write(f" - {e}\n") + return 1 + + print(f"swarm OK: User {U[:8]}.. ; node1 {s1[:8]}.. <-link-> node2 {s2[:8]}.. ; " + f"both under one User; node1 lists node2 as Linked sibling") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/link-swarm/verify.sh b/netsim/tasks/link-swarm/verify.sh index ccffe6ed..fac4b4ff 100755 --- a/netsim/tasks/link-swarm/verify.sh +++ b/netsim/tasks/link-swarm/verify.sh @@ -1,104 +1,7 @@ #!/bin/sh -# verify link-swarm: node1 and node2 must be linked into one User swarm. -# INDEPENDENT both-ends check -- it does not trust run.sh. It pulls raw JSON from -# both nodes and asserts THREE facts on the host; together they prove the swarm -# from both ends: -# 1. both nodes hold an active contract issued by the SAME User -# (user.info: Issuer == the bootstrap User on each; Subject == that node); -# 2. node1, acting as the User, lists node2 as a Linked sibling -# (user.swarm_status); -# 3. a mutual authenticated link exists (node2 nodes.links -> node1). -# -# NOTE on "routed query": an earlier plan probed `:.spec` as the proof. -# That is NOT valid -- node introspection ops (.spec/.id/.ping) are served -# locally and do not route to a sibling by node-id, so they fail even on a fully -# formed swarm. The contract + link + sibling triple above is the real proof. -# -# astral-query ... -out json emits a JSON *stream* (one object per line, then an -# {"Type":"eos"} terminator), so everything is parsed line-by-line, not as one -# document. -set -eu - -VM1=node1 -VM2=node2 -while [ $# -gt 0 ]; do - case "$1" in - --node1) VM1=$2; shift 2 ;; - --node2) VM2=$2; shift 2 ;; - *) shift ;; - esac -done - -# Pull the User id and the four JSON blobs. Single-quoted remote args so the -# command substitutions run on the guest. node1 acts as the User (token from -# bootstrap-user); node2 answers under its node identity (it holds the contract). -U=$(netsim ssh "$VM1" -- 'cat /home/tester/.netsim/user.id') -n1_info=$(netsim ssh "$VM1" -- 'export ASTRALD_APPHOST_TOKEN=$(cat /home/tester/.netsim/user.token); astral-query user.info -out json') -n1_swarm=$(netsim ssh "$VM1" -- 'export ASTRALD_APPHOST_TOKEN=$(cat /home/tester/.netsim/user.token); astral-query user.swarm_status -out json') -n2_info=$(netsim ssh "$VM2" -- 'astral-query user.info -out json') -n2_links=$(netsim ssh "$VM2" -- 'astral-query nodes.links -out json') - -UU=$(printf '%s' "$U" | tr -d '[:space:]') -export UU N1_INFO="$n1_info" N1_SWARM="$n1_swarm" N2_INFO="$n2_info" N2_LINKS="$n2_links" - -python3 - <<'PY' -import os, sys, json - -def objs(s): - out = [] - for ln in s.splitlines(): - ln = ln.strip() - if not ln: - continue - try: - out.append(json.loads(ln)) - except json.JSONDecodeError: - pass - return out - -def contract(info): - for o in objs(info): - ob = o.get("Object") - if isinstance(ob, dict) and isinstance(ob.get("Contract"), dict): - c = ob["Contract"].get("Contract", {}) - return c.get("Issuer"), c.get("Subject") - return None, None - -U = os.environ["UU"] -i1, s1 = contract(os.environ["N1_INFO"]) -i2, s2 = contract(os.environ["N2_INFO"]) - -sib = None -for o in objs(os.environ["N1_SWARM"]): - ob = o.get("Object") - if isinstance(ob, dict) and ob.get("Linked"): - sib = ob.get("Identity") - break - -linkback = False -for o in objs(os.environ["N2_LINKS"]): - ob = o.get("Object") - if isinstance(ob, dict) and ob.get("RemoteIdentity") == s1: - linkback = True - break - -errs = [] -if not U: errs.append("no User id recorded on node1 (~/.netsim/user.id)") -if i1 != U: errs.append(f"node1 contract issuer {i1} != User {U}") -if i2 != U: errs.append(f"node2 contract issuer {i2} != User {U} (node2 not claimed under this User)") -if not s1: errs.append("node1 has no active contract subject") -if not s2: errs.append("node2 has no active contract subject") -if s2 and sib != s2: errs.append(f"node1's linked sibling {sib} != node2 {s2}") -if not linkback: errs.append(f"node2 has no active link back to node1 ({s1})") - -if errs: - sys.stderr.write("link-swarm verify FAILED:\n") - for e in errs: - sys.stderr.write(f" - {e}\n") - sys.exit(1) - -print(f"swarm OK: User {U[:8]}.. ; node1 {s1[:8]}.. <-link-> node2 {s2[:8]}.. ; " - f"both under one User; node1 lists node2 as Linked sibling") -PY - -echo "verified swarm link on: $VM1 and $VM2" +# Thin shim — all verification logic lives in verify.py. Calling astral-query and +# walking its JSON streams is far cleaner in python than bash, so verify.sh just +# hands off. netsim sets $NETSIM_TASK_DIR to this task's directory and only +# auto-runs run.sh/verify.sh, so verify.py sits next to us and is invoked here +# (the dirname fallback covers running this script directly). +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" diff --git a/netsim/tasks/share-object/verify.py b/netsim/tasks/share-object/verify.py new file mode 100755 index 00000000..976ec906 --- /dev/null +++ b/netsim/tasks/share-object/verify.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +"""verify share-object: an astral object stored on node1 must be obtainable by +its sibling node2 ACROSS THE SWARM. + +INDEPENDENT host-side check -- it does not trust run.sh. It reads the id + payload +the agent persisted on node1, then tries to pull that exact id FROM node2's vantage +and asserts the bytes match. Runs on the host (invoked by the verify.sh shim, which +netsim runs with cwd=sim root and $NETSIM_TASK_DIR set); it reaches the VMs with +`netsim ssh`. + +THE CROSS-SWARM HOP IS INFERRED, NOT DEMONSTRATED (see README). The astral-docs +describe a network zone + a finder/provider layer but no worked example of one +swarm member reading another's object by id, so we probe a LADDER and report which +hop routes -- exactly as link-swarm discovered that :.spec does NOT route. +Order (strongest -> weakest), all run on node2: + 1. EXPLICIT TARGET astral-query :objects.load -id -out json + Query-target routing over the swarm link. Primary: does NOT rely on node2's + network zone (an anonymous apphost caller has ZoneNetwork stripped) -- it + addresses node1 directly; node1 serves the read locally. + 2. TRANSPARENT astral-query objects.load -id -out json + Relies on the read context's zone defaulting to all zones (incl. network). + Likely BLOCKED for an anonymous host-side caller -- kept as a bonus probe. + 3. PROVIDER FIND astral-query objects.find -id -out json + Returns provider IDENTITIES, not bytes. If only this works, discovery + crosses but the byte read does not -- a partial finding, not a pass. + +PASS iff node2 obtained the EXACT stored bytes for the agent-reported id across the +swarm (hop 1 or 2). A pre-check asserts node2 doesn't already hold the object +locally, so a pass reflects a genuine remote pull. + +astral-query ... -out json emits a JSON *stream* (one object per line, then an +{"Type":"eos"} terminator), so everything is parsed line-by-line, not as one doc. +""" +import argparse +import json +import subprocess +import sys + +TOKEN = "export ASTRALD_APPHOST_TOKEN=$(cat /home/tester/.netsim/user.token);" + + +def ssh(vm, remote): + """Run `netsim ssh -- ` on the host; return stdout (best-effort). + + astral-query writes error_message frames to stdout (which we parse) and other + failures (route_not_found, etc.) to stderr (which we drop) -- mirroring the + old shell `2>/dev/null`. + """ + p = subprocess.run(["netsim", "ssh", vm, "--", remote], + capture_output=True, text=True) + return p.stdout + + +# ---- JSON object-stream parsing (one object/line + an eos terminator) ---------- + +def objs(stream): + out = [] + for ln in (stream or "").splitlines(): + ln = ln.strip() + if not ln: + continue + try: + out.append(json.loads(ln)) + except json.JSONDecodeError: + pass + return out + + +def loaded_payload(stream): + """From an objects.load stream, the decoded payload string (the stored + string8's Object), or None. Skips eos / error_message frames.""" + for o in objs(stream): + if o.get("Type") in ("eos", "error_message"): + continue + ob = o.get("Object") + if isinstance(ob, str): + return ob + return None + + +def errors(stream): + return [o.get("Object") for o in objs(stream) if o.get("Type") == "error_message"] + + +def contains_local(stream): + """objects.contains stream -> a bool frame. Returns True/False/None.""" + for o in objs(stream): + if o.get("Type") in ("eos", "error_message"): + continue + if isinstance(o.get("Object"), bool): + return o["Object"] + return None + + +def find_identities(stream): + ids = [] + for o in objs(stream): + if o.get("Type") in ("eos", "error_message"): + continue + ob = o.get("Object") + if isinstance(ob, str): + ids.append(ob) + return ids + + +def contract_subject(stream): + """node1's node identity = Subject of its active contract (from user.info).""" + for o in objs(stream): + ob = o.get("Object") + if isinstance(ob, dict) and isinstance(ob.get("Contract"), dict): + c = ob["Contract"].get("Contract", {}) + if c.get("Subject"): + return c["Subject"] + return None + + +def remote_identity(stream): + """Fallback: RemoteIdentity from node2's nodes.links (the link back to node1).""" + for o in objs(stream): + ob = o.get("Object") + if isinstance(ob, dict) and ob.get("RemoteIdentity"): + return ob["RemoteIdentity"] + return None + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--node1", default="node1") + ap.add_argument("--node2", default="node2") + args, _ = ap.parse_known_args() + vm1, vm2 = args.node1, args.node2 + + # node1: the id + payload the agent persisted, and node1's node identity. + # node1 acts as the User (token) so user.info returns the active contract whose + # Subject IS node1's node identity (the provider to target). ID strips all + # whitespace (matches the old `tr -d '[:space:]'`); PAY tolerates a trailing nl. + ID = "".join(ssh(vm1, "cat /home/tester/.netsim/object.id").split()) + PAY = ssh(vm1, "cat /home/tester/.netsim/object.payload").rstrip("\n") + n1_info = ssh(vm1, TOKEN + " astral-query user.info -out json") + + # node2 answers under its node identity (no token => anonymous apphost caller). + n2_links = ssh(vm2, "astral-query nodes.links -out json") + N1 = contract_subject(n1_info) or remote_identity(n2_links) or "" + + n2_contains = ssh(vm2, f"astral-query objects.contains -repo local -id '{ID}' -out json") + n2_find = ssh(vm2, f"astral-query objects.find -id '{ID}' -out json") + n2_transparent = ssh(vm2, f"astral-query objects.load -id '{ID}' -out json") + n2_explicit = ssh(vm2, f"astral-query '{N1}':objects.load -id '{ID}' -out json") if N1 else "" + + already_local = contains_local(n2_contains) + explicit = loaded_payload(n2_explicit) + transparent = loaded_payload(n2_transparent) + providers = find_identities(n2_find) + + explicit_ok = explicit is not None and explicit.rstrip("\n") == PAY + transparent_ok = transparent is not None and transparent.rstrip("\n") == PAY + find_ok = (N1 in providers) if N1 else bool(providers) + + errs, notes = [], [] + if not ID: + errs.append("no Object ID recorded on node1 (~/.netsim/object.id)") + if not PAY: + errs.append("no payload recorded on node1 (~/.netsim/object.payload)") + if not N1: + notes.append("could not resolve node1's node identity host-side (explicit-target read skipped)") + + # locality pre-check is advisory (objects.contains is probabilistic). + if already_local is True: + notes.append("objects.contains reports node2 may ALREADY hold this object locally; " + "a byte-match below might not be a genuine cross-swarm pull") + elif already_local is None: + notes.append("objects.contains gave no usable answer on node2 (locality pre-check inconclusive)") + + # auth-vs-route signal: an error_message naming auth/permission is a DIFFERENT + # failure than no route / no provider -- don't conflate them. + for label, stream in (("explicit-target", n2_explicit), + ("transparent", n2_transparent), + ("objects.find", n2_find)): + for e in errors(stream): + notes.append(f"{label} returned error_message: {e}") + + if explicit_ok or transparent_ok: + path = ("explicit-target (:objects.load)" if explicit_ok + else "transparent (objects.load, network zone)") + print(f"share-object OK: node2 pulled object {ID[:12]}.. from node1 across the " + f"swarm via {path}; bytes match ({len(PAY)} B). " + f"providers seen by objects.find: {len(providers)}.") + for n in notes: + sys.stderr.write(f" note: {n}\n") + return 0 + + # Did not cross. Build a precise diagnostic (the link-swarm-style finding). + sys.stderr.write("share-object verify FAILED: object did NOT cross the swarm to node2.\n") + if find_ok: + sys.stderr.write(" FINDING: provider discovery DOES cross the swarm " + "(objects.find on node2 returned node1) but the byte READ did not route " + "(explicit-target and transparent objects.load both failed to return the payload). " + "This is the share-object analogue of link-swarm's ':.spec does not route' " + "discovery -- record which hop routes in the task log.\n") + else: + sys.stderr.write(" no cross-swarm object access at all: neither a read nor objects.find " + "resolved node1's object from node2.\n") + for e in errs: + sys.stderr.write(f" - {e}\n") + for n in notes: + sys.stderr.write(f" note: {n}\n") + n1disp = (N1[:12] + "..") if N1 else "?" + sys.stderr.write(f" (id={ID} node1={n1disp} " + f"explicit={'hit' if explicit is not None else 'miss'} " + f"transparent={'hit' if transparent is not None else 'miss'} " + f"find_providers={len(providers)})\n") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/share-object/verify.sh b/netsim/tasks/share-object/verify.sh index 1a7da88f..fac4b4ff 100755 --- a/netsim/tasks/share-object/verify.sh +++ b/netsim/tasks/share-object/verify.sh @@ -1,224 +1,7 @@ #!/bin/sh -# verify share-object: an astral object stored on node1 must be obtainable by its -# sibling node2 ACROSS THE SWARM. INDEPENDENT host-side check -- it does not trust -# run.sh. It reads the id + payload the agent persisted on node1, then tries to -# pull that exact id FROM node2's vantage and asserts the bytes match. -# -# THE CROSS-SWARM HOP IS INFERRED, NOT DOCUMENTED (see README). The astral-docs -# describe a network zone + a finder/provider layer but no worked example of one -# swarm member reading another's object by id, so verify probes a LADDER and -# reports which hop routes -- exactly as link-swarm discovered that :.spec -# does NOT route. Order (strongest -> weakest), all run on node2: -# 1. EXPLICIT TARGET astral-query :objects.load -id -out json -# Query-target routing over the swarm link. Primary path: it does NOT rely -# on node2's network zone (an anonymous apphost caller has ZoneNetwork -# stripped), it addresses node1 directly; node1 serves the read locally. -# 2. TRANSPARENT astral-query objects.load -id -out json -# Relies on the read context's zone defaulting to all zones (incl. the -# network zone) so node2 resolves node1 as provider. Likely BLOCKED for an -# anonymous host-side caller (network zone stripped) -- kept as a bonus probe. -# 3. PROVIDER FIND astral-query objects.find -id -out json -# Returns provider IDENTITIES, not bytes. If only this works, discovery -# crosses the swarm but the byte read does not -- a partial finding, not a pass. -# -# PASS iff node2 obtained the EXACT stored bytes for the agent-reported id across -# the swarm (hop 1 or 2). A pre-check asserts node2 does not already hold the -# object locally, so a pass reflects a genuine remote pull. -# -# astral-query ... -out json emits a JSON *stream* (one object per line, then an -# {"Type":"eos"} terminator), so everything is parsed line-by-line, not as one -# document. -set -eu - -VM1=node1 -VM2=node2 -while [ $# -gt 0 ]; do - case "$1" in - --node1) VM1=$2; shift 2 ;; - --node2) VM2=$2; shift 2 ;; - *) shift ;; - esac -done - -# --- node1 side: the id + payload the agent persisted, and node1's node id ------ -# node1 acts as the User (token from bootstrap-user) so user.info returns the -# active contract whose Subject IS node1's node identity (the provider to target). -ID=$(netsim ssh "$VM1" -- 'cat /home/tester/.netsim/object.id') -PAY=$(netsim ssh "$VM1" -- 'cat /home/tester/.netsim/object.payload') -n1_info=$(netsim ssh "$VM1" -- 'export ASTRALD_APPHOST_TOKEN=$(cat /home/tester/.netsim/user.token); astral-query user.info -out json') - -IDC=$(printf '%s' "$ID" | tr -d '[:space:]') - -# --- node2 side: cross-check node1's id, locality pre-check, then the fetch ladder -# node2 answers under its node identity (no token => anonymous apphost caller). -n2_links=$(netsim ssh "$VM2" -- 'astral-query nodes.links -out json' 2>/dev/null || true) -n2_contains=$(netsim ssh "$VM2" -- "astral-query objects.contains -repo local -id '$IDC' -out json" 2>/dev/null || true) -n2_find=$(netsim ssh "$VM2" -- "astral-query objects.find -id '$IDC' -out json" 2>/dev/null || true) -n2_transparent=$(netsim ssh "$VM2" -- "astral-query objects.load -id '$IDC' -out json" 2>/dev/null || true) - -# explicit-target read needs node1's node id; resolve it host-side first. -N1=$(printf '%s' "$n1_info" | python3 -c ' -import sys, json -for ln in sys.stdin: - ln = ln.strip() - if not ln: - continue - try: - o = json.loads(ln) - except json.JSONDecodeError: - continue - ob = o.get("Object") - if isinstance(ob, dict) and isinstance(ob.get("Contract"), dict): - c = ob["Contract"].get("Contract", {}) - if c.get("Subject"): - print(c["Subject"]); break -' 2>/dev/null || true) -# fall back to the RemoteIdentity of node2->node1 link if user.info parse failed -if [ -z "$N1" ]; then - N1=$(printf '%s' "$n2_links" | python3 -c ' -import sys, json -for ln in sys.stdin: - ln = ln.strip() - if not ln: - continue - try: - o = json.loads(ln) - except json.JSONDecodeError: - continue - ob = o.get("Object") - if isinstance(ob, dict) and ob.get("RemoteIdentity"): - print(ob["RemoteIdentity"]); break -' 2>/dev/null || true) -fi - -n2_explicit="" -if [ -n "$N1" ]; then - n2_explicit=$(netsim ssh "$VM2" -- "astral-query '$N1':objects.load -id '$IDC' -out json" 2>/dev/null || true) -fi - -export IDC PAY N1 N1_INFO="$n1_info" N2_LINKS="$n2_links" N2_CONTAINS="$n2_contains" \ - N2_FIND="$n2_find" N2_TRANSPARENT="$n2_transparent" N2_EXPLICIT="$n2_explicit" - -python3 - <<'PY' -import os, sys, json - -def objs(s): - """Parse a JSON object-stream (one object per line + an eos terminator).""" - out = [] - for ln in (s or "").splitlines(): - ln = ln.strip() - if not ln: - continue - try: - out.append(json.loads(ln)) - except json.JSONDecodeError: - pass - return out - -def loaded_payload(stream): - """From an objects.load -out json stream, return the decoded payload string - (the stored string8's Object), or None. Skips eos / error_message frames.""" - for o in objs(stream): - if o.get("Type") in ("eos", "error_message"): - continue - ob = o.get("Object") - if isinstance(ob, str): - return ob - return None - -def errors(stream): - return [o.get("Object") for o in objs(stream) if o.get("Type") == "error_message"] - -def contains_local(stream): - """objects.contains -out json -> a bool frame. Returns True/False/None.""" - for o in objs(stream): - if o.get("Type") in ("eos", "error_message"): - continue - if isinstance(o.get("Object"), bool): - return o["Object"] - return None - -def find_identities(stream): - ids = [] - for o in objs(stream): - if o.get("Type") in ("eos", "error_message"): - continue - ob = o.get("Object") - if isinstance(ob, str): - ids.append(ob) - return ids - -ID = os.environ["IDC"] -N1 = os.environ.get("N1", "") -# compare tolerant of a single trailing newline on either side -PAY = os.environ["PAY"].rstrip("\n") - -already_local = contains_local(os.environ["N2_CONTAINS"]) -explicit = loaded_payload(os.environ["N2_EXPLICIT"]) -transparent = loaded_payload(os.environ["N2_TRANSPARENT"]) -providers = find_identities(os.environ["N2_FIND"]) - -explicit_ok = explicit is not None and explicit.rstrip("\n") == PAY -transparent_ok = transparent is not None and transparent.rstrip("\n") == PAY -find_ok = (N1 in providers) if N1 else bool(providers) - -errs = [] -notes = [] - -if not ID: - errs.append("no Object ID recorded on node1 (~/.netsim/object.id)") -if not PAY: - errs.append("no payload recorded on node1 (~/.netsim/object.payload)") -if not N1: - notes.append("could not resolve node1's node identity host-side (explicit-target read skipped)") - -# locality pre-check is advisory (objects.contains is probabilistic): a 'true' -# here means the pass might not reflect a genuine remote pull -> warn, don't fail. -if already_local is True: - notes.append("objects.contains reports node2 may ALREADY hold this object locally; " - "a byte-match below might not be a genuine cross-swarm pull") -elif already_local is None: - notes.append("objects.contains gave no usable answer on node2 (locality pre-check inconclusive)") - -# surface auth-vs-route signal: an error_message naming auth/permission is a -# DIFFERENT failure than no route / no provider -- don't conflate them. -for label, env in (("explicit-target", "N2_EXPLICIT"), - ("transparent", "N2_TRANSPARENT"), - ("objects.find", "N2_FIND")): - for e in errors(os.environ.get(env, "")): - notes.append(f"{label} returned error_message: {e}") - -crossed = explicit_ok or transparent_ok - -if crossed: - path = "explicit-target (:objects.load)" if explicit_ok else "transparent (objects.load, network zone)" - print(f"share-object OK: node2 pulled object {ID[:12]}.. from node1 across the swarm " - f"via {path}; bytes match ({len(PAY)} B). " - f"providers seen by objects.find: {len(providers)}.") - for n in notes: - sys.stderr.write(f" note: {n}\n") - sys.exit(0) - -# Did not cross. Build a precise diagnostic (the link-swarm-style finding). -sys.stderr.write("share-object verify FAILED: object did NOT cross the swarm to node2.\n") -if find_ok: - sys.stderr.write(" FINDING: provider discovery DOES cross the swarm " - "(objects.find on node2 returned node1) but the byte READ did not route " - "(explicit-target and transparent objects.load both failed to return the payload). " - "This is the share-object analogue of link-swarm's ':.spec does not route' " - "discovery -- record which hop routes in the task log.\n") -else: - sys.stderr.write(" no cross-swarm object access at all: neither a read nor objects.find " - "resolved node1's object from node2.\n") -for e in errs: - sys.stderr.write(f" - {e}\n") -for n in notes: - sys.stderr.write(f" note: {n}\n") -sys.stderr.write(f" (id={ID} node1={N1[:12] + '..' if N1 else '?'} " - f"explicit={'hit' if explicit is not None else 'miss'} " - f"transparent={'hit' if transparent is not None else 'miss'} " - f"find_providers={len(providers)})\n") -sys.exit(1) -PY - -echo "verified share-object across: $VM1 and $VM2" +# Thin shim — all verification logic lives in verify.py. Calling astral-query and +# walking its JSON streams is far cleaner in python than bash, so verify.sh just +# hands off. netsim sets $NETSIM_TASK_DIR to this task's directory and only +# auto-runs run.sh/verify.sh, so verify.py sits next to us and is invoked here +# (the dirname fallback covers running this script directly). +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" From 7cac1ee8b1f8c316125553451f6ab0a569cee91a Mon Sep 17 00:00:00 2001 From: intern0 Date: Sun, 21 Jun 2026 23:50:01 +0200 Subject: [PATCH 12/57] netsim: align scenarios with swarm-membership vocabulary Rename swarm wording to match astrald master (PR #350) and the updated astral-agent skill: user.claim -> user.adopt, the node-claiming playbook -> node-adoption, and mod.user.swarm_access_action -> mod.user.swarm_membership_action. Docs/comments/prompt wording only; no verifier logic changes. --- netsim/tasks/bootstrap-user/README.md | 4 ++-- netsim/tasks/link-swarm/README.md | 12 ++++++------ netsim/tasks/link-swarm/prompt.md | 2 +- netsim/tasks/link-swarm/run.sh | 2 +- netsim/tasks/link-swarm/verify.py | 4 ++-- netsim/tasks/share-object/README.md | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/netsim/tasks/bootstrap-user/README.md b/netsim/tasks/bootstrap-user/README.md index b4c60afb..44e58831 100644 --- a/netsim/tasks/bootstrap-user/README.md +++ b/netsim/tasks/bootstrap-user/README.md @@ -2,14 +2,14 @@ A netsim task that turns the operator node into a **User-controlled node**, driven by the Qwen Code agent running inside the VM. It is the first half of the -swarm phase: it establishes identity only; it does not link or claim anything +swarm phase: it establishes identity only; it does not link or adopt anything (that is [`link-swarm`](../link-swarm/README.md)). ``` bootstrap-user [--vm ] # default: node1 (the VM carrying Qwen) ``` -After it runs, the node holds an active `mod.user.swarm_access_action` contract +After it runs, the node holds an active `mod.user.swarm_membership_action` contract (issuer = a fresh software User, subject = this node), and a User-bound apphost token is persisted so later tasks can act as the User. It produces a new stage on top of the lab base — run it standalone against `astrald-lab` with: diff --git a/netsim/tasks/link-swarm/README.md b/netsim/tasks/link-swarm/README.md index ea019868..f1b44cb2 100644 --- a/netsim/tasks/link-swarm/README.md +++ b/netsim/tasks/link-swarm/README.md @@ -1,6 +1,6 @@ # link-swarm -A netsim task that claims the second node into the User's swarm, driven by the +A netsim task that adopts the second node into the User's swarm, driven by the Qwen Code agent on node1. It is the second half of the swarm phase: node1 is already a User node (from [`bootstrap-user`](../bootstrap-user/README.md)); this task brings node2 under the same User so the two share one swarm. @@ -25,8 +25,8 @@ Identical mechanic to `bootstrap-user`: **tiny script, thin prompt, intelligence in the skill.** `run.sh` base64-ships [`prompt.md`](prompt.md) to node1 in a single `netsim ssh` call and runs `qwen -y` as `tester`. The prompt is two sentences — the operator is told it controls a User node, that another astrald -node is on the local network, and to claim it following its **astral-agent** -skill's `node-claiming` playbook. The claim flow (`user.claim -target `, +node is on the local network, and to adopt it following its **astral-agent** +skill's `node-adoption` playbook. The adopt flow (`user.adopt -target `, reachability via the `nearby` module on the shared LAN) lives entirely in the skill; the prompt restates none of it. @@ -42,13 +42,13 @@ together prove the swarm from both ends: 1. **Both nodes hold an active contract issued by the same User** — `user.info` on node1 *and* node2 each shows `Issuer == ` with `Subject ==` that node. node2 independently confirming the same User is the key both-ends - proof that the claim took. + proof that the adoption took. 2. **node1, as the User, lists node2 as a `Linked` sibling** (`user.swarm_status`). 3. **A mutual authenticated link exists** — node2's `nodes.links` shows a link whose `RemoteIdentity` is node1. node1 acts as the User via its persisted token; node2 answers under its node -identity (it holds the contract after the claim, so no token is needed there). +identity (it holds the contract after the adoption, so no token is needed there). `astral-query … -out json` emits a JSON **stream** (one object per line + an `{"Type":"eos"}` terminator), so output is parsed line-by-line, not as one @@ -68,6 +68,6 @@ reproducible both-ends proof. ## Validated end-to-end Run `astrald-user → astrald-swarm` (2026-06-17): the thin prompt drove the -operator to `user.claim` node2 into the User's swarm; both nodes ended under one +operator to `user.adopt` node2 into the User's swarm; both nodes ended under one User (`02ad7ef7…`) with a mutual link, and the rewritten `verify.sh` passes. Stage `astrald-swarm` saved. diff --git a/netsim/tasks/link-swarm/prompt.md b/netsim/tasks/link-swarm/prompt.md index faeb893d..337ff9e2 100644 --- a/netsim/tasks/link-swarm/prompt.md +++ b/netsim/tasks/link-swarm/prompt.md @@ -1,3 +1,3 @@ On this machine an `astrald` node is running, and you control it as its User. Another astrald node is on the local network, not yet in your swarm. Bring it -into your swarm, following your **astral-agent** skill's node-claiming playbook. +into your swarm, following your **astral-agent** skill's node-adoption playbook. diff --git a/netsim/tasks/link-swarm/run.sh b/netsim/tasks/link-swarm/run.sh index 297f945f..baabc3fe 100755 --- a/netsim/tasks/link-swarm/run.sh +++ b/netsim/tasks/link-swarm/run.sh @@ -1,5 +1,5 @@ #!/bin/sh -# link-swarm: claim the second node into the User's swarm, driven by the Qwen +# link-swarm: adopt the second node into the User's swarm, driven by the Qwen # Code agent running INSIDE node1 (which is already a User node from # bootstrap-user — default starting stage: astrald-user). # link-swarm [--vm ] (default: node1 — the VM carrying Qwen) diff --git a/netsim/tasks/link-swarm/verify.py b/netsim/tasks/link-swarm/verify.py index 55ee36b9..f3fa87d3 100755 --- a/netsim/tasks/link-swarm/verify.py +++ b/netsim/tasks/link-swarm/verify.py @@ -83,7 +83,7 @@ def main(): vm1, vm2 = args.node1, args.node2 # node1 acts as the User (token from bootstrap-user); node2 answers under its - # node identity (it holds the contract after the claim). + # node identity (it holds the contract after the adoption). U = "".join(ssh(vm1, "cat /home/tester/.netsim/user.id").split()) n1_info = ssh(vm1, TOKEN + " astral-query user.info -out json") n1_swarm = ssh(vm1, TOKEN + " astral-query user.swarm_status -out json") @@ -101,7 +101,7 @@ def main(): if i1 != U: errs.append(f"node1 contract issuer {i1} != User {U}") if i2 != U: - errs.append(f"node2 contract issuer {i2} != User {U} (node2 not claimed under this User)") + errs.append(f"node2 contract issuer {i2} != User {U} (node2 not adopted under this User)") if not s1: errs.append("node1 has no active contract subject") if not s2: diff --git a/netsim/tasks/share-object/README.md b/netsim/tasks/share-object/README.md index f3e23dbb..59b5889e 100644 --- a/netsim/tasks/share-object/README.md +++ b/netsim/tasks/share-object/README.md @@ -106,7 +106,7 @@ Ephemeral / `objects.new_mem` behaviour deserves its own focused task layered on ## Skill gap this scenario exercises The skill has playbooks only for `swarm-management` (`node-setup`, -`node-claiming`) — there is **no objects storage/transfer playbook**. The store +`node-adoption`) — there is **no objects storage/transfer playbook**. The store half is reachable from the protocol docs alone (a real test of "are the docs sufficient without a playbook?"); the transfer half having no playbook is itself a finding. If the thin store prompt proves shaky live, the remedy is a small From afb18807c2cbc128de0e18fb603c8b7d8b51854f Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 11:49:48 +0200 Subject: [PATCH 13/57] =?UTF-8?q?netsim:=20link-swarm=20=E2=80=94=20assert?= =?UTF-8?q?=20symmetric=20swarm=20roster=20(guards=20#348)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a fourth verifier check: node2 must list node1 as a Linked sibling (user.swarm_status, which derives from node2's own active contract, so no token). This is a direct regression guard for astrald #348 (roster sync to a newly adopted node) and the precondition share-object's write direction relies on. Offline golden test: post-#348 passes, pre-#348 (roster={node2}) correctly fails. --- netsim/tasks/link-swarm/README.md | 12 +++++++++--- netsim/tasks/link-swarm/verify.py | 19 +++++++++++++++---- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/netsim/tasks/link-swarm/README.md b/netsim/tasks/link-swarm/README.md index f1b44cb2..7569b6e4 100644 --- a/netsim/tasks/link-swarm/README.md +++ b/netsim/tasks/link-swarm/README.md @@ -36,15 +36,21 @@ discovery works across netsim's per-VM NAT, so the agent needs no manual ## What `verify.sh` checks (independent, both ends) -It pulls raw JSON from both nodes and asserts **three facts on the host** that -together prove the swarm from both ends: +It pulls raw JSON from both nodes and asserts **four facts on the host** that +together prove the swarm from both ends, with a **symmetric roster**: 1. **Both nodes hold an active contract issued by the same User** — `user.info` on node1 *and* node2 each shows `Issuer == ` with `Subject ==` that node. node2 independently confirming the same User is the key both-ends proof that the adoption took. 2. **node1, as the User, lists node2 as a `Linked` sibling** (`user.swarm_status`). -3. **A mutual authenticated link exists** — node2's `nodes.links` shows a link +3. **node2 lists node1 as a `Linked` sibling too** (`user.swarm_status`) — the + symmetric roster delivered by astrald **#348**. `swarm_status` derives from + node2's own active contract (not the caller), so this needs no User token. This + guards the membership-race regression: pre-#348, node2 held only `User→node2` + and its roster was `{node2}`, so it never recognized node1 — exactly the gap + that blocked storing objects on a sibling (see `share-object`). +4. **A mutual authenticated link exists** — node2's `nodes.links` shows a link whose `RemoteIdentity` is node1. node1 acts as the User via its persisted token; node2 answers under its node diff --git a/netsim/tasks/link-swarm/verify.py b/netsim/tasks/link-swarm/verify.py index f3fa87d3..f3bab909 100755 --- a/netsim/tasks/link-swarm/verify.py +++ b/netsim/tasks/link-swarm/verify.py @@ -2,12 +2,16 @@ """verify link-swarm: node1 and node2 must be linked into one User swarm. INDEPENDENT both-ends check -- it does not trust run.sh. It pulls raw JSON from -both nodes and asserts THREE facts on the host; together they prove the swarm from -both ends: +both nodes and asserts FOUR facts on the host; together they prove the swarm from +both ends, with a SYMMETRIC roster: 1. both nodes hold an active contract issued by the SAME User (user.info: Issuer == the bootstrap User on each; Subject == that node); 2. node1, acting as the User, lists node2 as a Linked sibling (user.swarm_status); - 3. a mutual authenticated link exists (node2 nodes.links -> node1). + 3. node2 lists node1 as a Linked sibling too (user.swarm_status) -- the symmetric + roster delivered by astrald #348. node2's swarm view derives from its own + active contract, not the caller, so this needs no User token; it guards the + membership-race regression (pre-#348 node2's roster was {node2} only). + 4. a mutual authenticated link exists (node2 nodes.links -> node1). Runs on the host (invoked by the verify.sh shim); reaches the VMs with `netsim ssh`. @@ -89,10 +93,14 @@ def main(): n1_swarm = ssh(vm1, TOKEN + " astral-query user.swarm_status -out json") n2_info = ssh(vm2, "astral-query user.info -out json") n2_links = ssh(vm2, "astral-query nodes.links -out json") + # node2's own swarm view: swarm_status derives from node2's active contract, + # not the caller, so no token is needed; post-#348 it must list node1 too. + n2_swarm = ssh(vm2, "astral-query user.swarm_status -out json") i1, s1 = contract(n1_info) i2, s2 = contract(n2_info) sib = linked_sibling(n1_swarm) + n2_sib = linked_sibling(n2_swarm) linkback = has_link_to(n2_links, s1) errs = [] @@ -108,6 +116,9 @@ def main(): errs.append("node2 has no active contract subject") if s2 and sib != s2: errs.append(f"node1's linked sibling {sib} != node2 {s2}") + if s1 and n2_sib != s1: + errs.append(f"node2's linked sibling {n2_sib} != node1 {s1} " + "(node2 does not list node1 -- swarm roster not symmetric; #348 regression?)") if not linkback: errs.append(f"node2 has no active link back to node1 ({s1})") @@ -118,7 +129,7 @@ def main(): return 1 print(f"swarm OK: User {U[:8]}.. ; node1 {s1[:8]}.. <-link-> node2 {s2[:8]}.. ; " - f"both under one User; node1 lists node2 as Linked sibling") + f"both under one User; each lists the other as a Linked sibling (symmetric roster)") return 0 From 52c71c3da8b95fa77e3c53c8f3b6beeb409f7da6 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 11:49:48 +0200 Subject: [PATCH 14/57] =?UTF-8?q?netsim:=20share-object=20=E2=80=94=20stor?= =?UTF-8?q?e=20an=20object=20on=20a=20swarm=20sibling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pivot from the (blocked) cross-swarm read to the now-unblocked write direction: the agent stores an object ON node2 (:objects.store) and reads it back; verify.py independently proves node2 physically holds it via repo-pinned, ungated objects.load/contains -repo local. Unblocked by #348 (node2 now recognizes node1 -> AuthorizeRelayFor permits the relayed store, which reaches the ungated op_store). Caveat documented: op-level write is unauthenticated (CreateObjectAction still unwired). --- netsim/tasks/share-object/README.md | 188 +++++++++++++--------------- netsim/tasks/share-object/prompt.md | 20 +-- netsim/tasks/share-object/run.sh | 35 ++++-- netsim/tasks/share-object/verify.py | 187 ++++++++++++--------------- 4 files changed, 202 insertions(+), 228 deletions(-) diff --git a/netsim/tasks/share-object/README.md b/netsim/tasks/share-object/README.md index 59b5889e..7c993ddc 100644 --- a/netsim/tasks/share-object/README.md +++ b/netsim/tasks/share-object/README.md @@ -1,10 +1,11 @@ # share-object -A netsim task that stores an astral **object** on node1 and proves a swarm -sibling (node2) can obtain it **across the swarm** by its Object ID. It is the -first scenario *past* swarm formation: where `bootstrap-user` + `link-swarm` -build the swarm, `share-object` makes the swarm carry data — astral's core act, -"Identities exchanging Objects." +A netsim task that has node1 (acting as its User) **store an astral object ON its +swarm sibling** (node2) and read it back, then independently proves node2 +physically holds it. It is the first scenario *past* swarm formation: where +`bootstrap-user` + `link-swarm` build the swarm, `share-object` makes the swarm +carry data the hard way — one member **writing** an object onto another — astral's +core act, "Identities exchanging Objects." ``` share-object [--vm ] # default: node1 (the VM carrying Qwen) @@ -26,118 +27,99 @@ Same mechanic as `bootstrap-user` / `link-swarm`: **tiny script, thin prompt, intelligence in the skill.** `run.sh` base64-ships [`prompt.md`](prompt.md) to node1 in a single `netsim ssh` call and runs `qwen -y` as `tester`. -The split is deliberate and is the heart of this task's design: - -* **The agent does only the STORE half.** The prompt tells the operator — acting +* **The agent stores ON the other node.** The prompt tells the operator — acting as its User — to store a short, distinctive text payload as an astral object - via the objects protocol (its **astral-agent** skill navigates - `protocols/README → objects/README → objects.store`), and to record the - returned Object ID. The store path is reliably reachable from a thin prompt. -* **The cross-swarm FETCH lives in `verify.sh`, not the prompt.** A thin prompt - names no target, and the skill's rule is *"default Query target is the local - Node; set an explicit target only when the user names one"* — so an - agent-driven fetch would be non-deterministic. Putting the fetch in `verify.sh` - lets it address node2 deterministically and keeps the cross-swarm assertion - independent of agent behaviour. + **on the sibling**, addressing it explicitly as the query target + (`:objects.store`), then to load it back **from that node** and confirm + the bytes round-trip. This exercises the comprehension axis (find the sibling, + set an explicit query target, store + load) and the newly-unblocked write + capability in one go. It records the id, the stored payload, the read-back, and + the node id it targeted. +* **The independent proof lives in `verify.py`.** A host-side check confirms node2 + physically holds the object, deterministically, without trusting the agent. The agent writes its artifacts under `~tester/.netsim/`: | File | Purpose | |---|---| -| `object.id` | the `data1…` Object ID returned by `objects.store` (the handle `verify.sh` fetches by) | -| `object.payload` | the exact bytes the agent stored (`verify.sh` compares node2's fetched bytes against this) | +| `object.id` | the `data1…` Object ID returned by `objects.store` | +| `object.payload` | the exact bytes the agent stored (the node2-local read is compared against this) | +| `object.readback` | the bytes the agent read back from node2 (advisory cross-check) | +| `object.target` | the node id the agent stored on (cross-checked against node2's real identity) | | `share-object.log` | the agent's run log | -The store itself is, per the docs: -`echo '{"Type":"string8","Object":""}' | astral-query objects.store -in json -out json` -→ `{"Type":"object_id.sha256","Object":"data1…"}`, run under the User token -(`ASTRALD_APPHOST_TOKEN` from `~/.netsim/user.token`) so the object lands in -node1's write-default repo as the User. - -## What `verify.sh` checks (independent, both ends) - -It reads the id + payload the agent persisted on node1, resolves node1's node -identity host-side (the `Subject` of node1's active contract, cross-checked -against node2's `nodes.links` `RemoteIdentity`), and then tries to pull that -exact id **from node2's vantage**, asserting the bytes match. node1 acts as the -User via its token; node2 answers under its node identity (anonymous apphost -caller, no token — exactly like `link-swarm`'s node2-side checks). - -**The cross-swarm hop is inferred from the docs, not demonstrated by them.** The -astral-docs describe a `network` zone and a finder/provider layer, but contain no -worked example of one swarm member reading another's object by id. So — exactly -as `link-swarm` discovered that `:.spec` does **not** route — -`verify.sh` probes a **ladder** on node2 and reports which hop routes: - -1. **explicit target** `astral-query :objects.load -id -out json` - — query-target routing over the swarm link. **Primary**, because it does *not* - depend on node2's network zone: an anonymous apphost caller has `ZoneNetwork` - **stripped**, so it can't resolve a remote provider by zone, but it *can* - address node1 directly and let node1 serve the read. -2. **transparent** `astral-query objects.load -id -out json` — relies on the - read context's zone defaulting to all zones (incl. the network zone). Likely - **blocked** for the anonymous host-side caller; kept as a bonus probe. -3. **provider find** `astral-query objects.find -id -out json` — returns - provider **identities**, not bytes. If *only* this works, discovery crosses - the swarm but the byte read does not — a partial finding, not a pass. - -Before the read it runs a locality pre-check (`objects.contains -repo local` on -node2) so a pass reflects a genuine remote pull, not a coincidental local copy -(advisory — `objects.contains` is probabilistic, so it warns, never hard-fails). -It also separates an **authorization** rejection (`mod.objects.read_object_action` -denies the read) from a **routing** failure — different findings, never -conflated. - -**PASS** iff node2 obtained the exact stored bytes for the agent-reported id -across the swarm (hop 1 or 2). `astral-query … -out json` emits a JSON **stream** -(one object/line + an `{"Type":"eos"}` terminator), parsed line-by-line with host -`python3`. +The store is, per the docs, the same `string8` form as a local store but with an +explicit target: +`echo '{"Type":"string8","Object":""}' | astral-query :objects.store -in json -out json` +run under the User token (`ASTRALD_APPHOST_TOKEN` from `~/.netsim/user.token`); the +read-back is `astral-query :objects.load -id `. + +## What `verify.py` checks (independent, decisive) + +`verify.py` does not trust `run.sh` or the agent's read-back. It reads the id + +payload the agent persisted on node1 and proves **node2 physically holds the +object in its local repo**: + +* `objects.store` writes to `WriteDefault()` — the **`local`** repo — so the object + lands in node2's `local` repo. `verify.py` reads it straight back from there, on + node2: + * `astral-query objects.load -id -repo local` → bytes must equal the stored + payload — **the decisive check**; + * `astral-query objects.contains -repo local -id ` → corroborating bool. +* Both ops are **ungated and repo-pinned**, so a successful repo-local load on node2 + is conclusive: the bytes came from node2's own storage, not a network re-fetch + from node1. node2 answers under its node identity (anonymous host-side caller, no + token — repo-local load/contains need no authorization). + +It also resolves node2's real identity host-side (the `Subject` of node2's active +contract, with node1's `nodes.links` `RemoteIdentity` as a fallback) to cross-check +the node the agent claims it targeted, and notes (advisory) whether node1 also +holds a copy. **PASS** iff node2's `local` repo returns the exact stored bytes. +`astral-query … -out json` emits a JSON **stream** (one object/line + an +`{"Type":"eos"}` terminator), parsed line-by-line with host `python3`. + +## Why storing on a sibling works now + +Earlier runs hit `query rejected (1)` on `:objects.store` — node2 refused to +relay node1's User-authenticated query because its swarm roster was **asymmetric**: +after `link-swarm`, node2 held only `User→node2` and never learned `User→node1`, so +node1 was absent from node2's `LocalSwarm()` and `AuthorizeRelayFor` denied the +relay before the query reached any objects op. + +astrald **#348** ("Sync full swarm roster to a newly invited node", on `master`) +fixes this: both invite paths now schedule `SyncNodesAction` against the joined +node right after indexing, so node2 converges to the full, symmetric roster +(including `User→node1`). node2 now recognizes node1 as a sibling → +`AuthorizeRelayFor` allows the relay → the query reaches `op_store`, which has **no +auth gate**, and the write lands. + +> **Caveat (recorded, not a blocker):** `objects.CreateObjectAction` is still an +> unwired stub — `op_store` performs **no** authorization. So a cross-swarm store +> works but is *unauthenticated at the op level* (any caller that can route + relay +> can write). Hardening that (wire `CreateObjectAction` + an `AuthorizeCreateObject` +> grant) is a separate, design-gated task ("Wire up the object-creation +> authorizer"). This scenario tests the *functional* write path, not the +> authorization model. +> +> **Pending the first live `astrald-swarm → astrald-shared` run:** this is verified +> at the code level on `master`; confirm end-to-end live, and confirm the thin +> prompt reliably drives the agent to resolve the sibling and set an explicit query +> target. ## Memory repository — a separate task, by decision The docs expose a `memory` repository group (`objects.new_mem`, the `mem0`/ `memory` repos) — an in-memory, **non-default** write target. `share-object` -deliberately does **not** use it: it must test the *default* cross-swarm path -(node1's standard write-default repo, node2 pulls), and routing an object through -a memory repo would muddy whether a *default-repo* object crosses the swarm. -Ephemeral / `objects.new_mem` behaviour deserves its own focused task layered on -`astrald-shared` later (captured in Triage). +deliberately does **not** use it: it must test the *default* write path (the +sibling's standard write-default `local` repo). Ephemeral / `objects.new_mem` +behaviour deserves its own focused task layered on `astrald-shared` later (captured +in Triage). ## Skill gap this scenario exercises The skill has playbooks only for `swarm-management` (`node-setup`, -`node-adoption`) — there is **no objects storage/transfer playbook**. The store -half is reachable from the protocol docs alone (a real test of "are the docs -sufficient without a playbook?"); the transfer half having no playbook is itself -a finding. If the thin store prompt proves shaky live, the remedy is a small -`objects` playbook in the skill, not a fatter prompt. - -## Not yet run end-to-end - -Syntax-clean and registered via `link.sh` (`netsim tasks` lists `share-object`). -The harness mechanics mirror the validated `bootstrap-user` / `link-swarm` -tasks, and the `objects.store` form is taken verbatim from `objects.store.md`. -Open **CONFIRM** items, all to be pinned on the first live -`astrald-swarm → astrald-shared` run (the cross-swarm read is the inferred part): - -* Whether `astral-query :objects.load` actually **routes** the read to - node1 across the swarm and returns the bytes — or hits the same wall as - `:.spec`. This is *the* unknown the run resolves. -* Whether an **anonymous** host-side caller on node2 is **authorized** to read - node1's object (`mod.objects.read_object_action` default policy in a one-User - swarm). If reads are denied, the likely fix is to mint a node2-side User token - (node2 holds the contract) and read as the User, or to move the fetch into the - operator on node1 — noted as the fallback lever, not baked into v1. -* Whether the transparent (no-target) read is genuinely blocked by the stripped - network zone, confirming the explicit-target form as the only working hop. -* The exact `objects.load` / `objects.find` / `objects.contains` stream shapes and - any `error_message` framing on this build (the parser is defensive but - unverified against live output). -* Whether the thin store prompt reliably drives the operator to `objects.store` - under the User token (vs an anonymous/local context) and to write both artifact - files. - -If the read does not route but `objects.find` does, that is a real discovery to -record (provider discovery crosses the swarm; byte read does not) — `verify.sh` -already detects and reports exactly that, the same way `link-swarm` reported the -`.spec` non-routing. +`node-adoption`) — there is **no objects storage/transfer playbook**. Storing on a +sibling (resolve the sibling, set an explicit query target, `objects.store` + +`objects.load`) must be reached from the protocol docs alone — a real test of "are +the docs sufficient without a playbook?". If the thin prompt proves shaky live, the +remedy is a small `objects` playbook in the skill, not a fatter prompt. diff --git a/netsim/tasks/share-object/prompt.md b/netsim/tasks/share-object/prompt.md index 2ecddb42..d7817499 100644 --- a/netsim/tasks/share-object/prompt.md +++ b/netsim/tasks/share-object/prompt.md @@ -1,10 +1,14 @@ On this machine an `astrald` node is running and you control it as its User (a -User-bound apphost token is at `~/.netsim/user.token`). Acting as that User, -store a short, distinctive text payload as an astral object via the objects -protocol, following your **astral-agent** skill, and note the Object ID it -returns. +User-bound apphost token is at `~/.netsim/user.token`). Your swarm has one other +node — a sibling. Acting as that User, store a short, distinctive text payload as +an astral object **on that other node** — address it explicitly as the query +target — via the objects protocol, following your **astral-agent** skill, and note +the Object ID it returns. Then read the object back **from that other node** by its +Object ID and confirm the bytes match what you stored. -Then write that Object ID to `~/.netsim/object.id` and the exact payload you -stored to `~/.netsim/object.payload`. The skill won't mention these files — they -are how the run is checked. Success means an Object ID is returned and both files -are written. +Then write the Object ID to `~/.netsim/object.id`, the exact payload you stored to +`~/.netsim/object.payload`, the bytes you read back to `~/.netsim/object.readback`, +and the node id you stored it on to `~/.netsim/object.target`. The skill won't +mention these files — they are how the run is checked. Success means the object is +stored on the other node, read back with matching bytes, and all four files are +written. diff --git a/netsim/tasks/share-object/run.sh b/netsim/tasks/share-object/run.sh index fffbde1d..7e03e0c7 100755 --- a/netsim/tasks/share-object/run.sh +++ b/netsim/tasks/share-object/run.sh @@ -1,15 +1,17 @@ #!/bin/sh -# share-object: have the operator node store an astral object, driven by the Qwen -# Code agent running INSIDE node1 (already a User node in one swarm with node2 — -# default starting stage: astrald-swarm). +# share-object: have node1 store an astral object ON its swarm sibling (node2) and +# read it back, driven by the Qwen Code agent running INSIDE node1 (already a User +# node in one swarm with node2 — default starting stage: astrald-swarm). # share-object [--vm ] (default: node1 — the VM carrying Qwen) # # Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user / # link-swarm: tiny script, thin prompt, intelligence in the agent's astral-agent -# skill. The agent does ONLY the store half (store a payload, surface its Object -# ID); the cross-swarm fetch from node2 is left entirely to verify.sh, which can -# address the sibling deterministically. The whole remote program travels as ONE -# argv to `netsim ssh`; the prompt rides along base64-encoded so a multi-line +# skill. The agent stores a payload ON THE OTHER node — addressing the sibling +# explicitly as the query target (:objects.store) — then loads it back from +# that node and confirms the bytes round-trip. verify.py then INDEPENDENTLY +# confirms node2 physically holds the object in its LOCAL repo with matching bytes +# (objects.contains/load -repo local on node2). The whole remote program travels as +# ONE argv to `netsim ssh`; the prompt rides along base64-encoded so a multi-line # file never fights shell quoting. set -eu @@ -44,15 +46,22 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/share-object.prompt)"' \ exit 1 } -# Cheap smoke-check; verify.sh does the authoritative, independent cross-swarm -# check. The agent must have recorded an Object ID and the payload it stored. -[ -s "$d/object.id" ] || { echo "agent recorded no Object ID on $(hostname) (~/.netsim/object.id)" >&2; exit 1; } -[ -s "$d/object.payload" ] || { echo "agent recorded no payload on $(hostname) (~/.netsim/object.payload)" >&2; exit 1; } +# Cheap smoke-check; verify.py does the authoritative, independent check (node2 +# physically holds the object in its local repo). The agent must have recorded an +# Object ID, the payload it stored, the bytes it read back, and the node it stored +# it on. +[ -s "$d/object.id" ] || { echo "agent recorded no Object ID on $(hostname) (~/.netsim/object.id)" >&2; exit 1; } +[ -s "$d/object.payload" ] || { echo "agent recorded no payload on $(hostname) (~/.netsim/object.payload)" >&2; exit 1; } +[ -s "$d/object.readback" ] || { echo "agent recorded no read-back on $(hostname) (~/.netsim/object.readback)" >&2; exit 1; } +[ -s "$d/object.target" ] || { echo "agent recorded no target node on $(hostname) (~/.netsim/object.target)" >&2; exit 1; } case "$(cat "$d/object.id")" in data1*) : ;; - *) echo "WARNING $(hostname): object.id does not look like a data1… Object ID (verify.sh decides)" >&2 ;; + *) echo "WARNING $(hostname): object.id does not look like a data1… Object ID (verify.py decides)" >&2 ;; esac -echo "share-object: agent finished on $(hostname); stored object $(cat "$d/object.id")" +# Advisory: the agent's own round-trip should already match (verify.py re-checks). +[ "$(cat "$d/object.payload")" = "$(cat "$d/object.readback")" ] \ + || echo "WARNING $(hostname): agent read-back != stored payload (verify.py decides)" >&2 +echo "share-object: agent finished on $(hostname); stored object $(cat "$d/object.id") on $(cat "$d/object.target")" EOS ) diff --git a/netsim/tasks/share-object/verify.py b/netsim/tasks/share-object/verify.py index 976ec906..67f064b5 100755 --- a/netsim/tasks/share-object/verify.py +++ b/netsim/tasks/share-object/verify.py @@ -1,32 +1,27 @@ #!/usr/bin/env python3 -"""verify share-object: an astral object stored on node1 must be obtainable by -its sibling node2 ACROSS THE SWARM. - -INDEPENDENT host-side check -- it does not trust run.sh. It reads the id + payload -the agent persisted on node1, then tries to pull that exact id FROM node2's vantage -and asserts the bytes match. Runs on the host (invoked by the verify.sh shim, which -netsim runs with cwd=sim root and $NETSIM_TASK_DIR set); it reaches the VMs with -`netsim ssh`. - -THE CROSS-SWARM HOP IS INFERRED, NOT DEMONSTRATED (see README). The astral-docs -describe a network zone + a finder/provider layer but no worked example of one -swarm member reading another's object by id, so we probe a LADDER and report which -hop routes -- exactly as link-swarm discovered that :.spec does NOT route. -Order (strongest -> weakest), all run on node2: - 1. EXPLICIT TARGET astral-query :objects.load -id -out json - Query-target routing over the swarm link. Primary: does NOT rely on node2's - network zone (an anonymous apphost caller has ZoneNetwork stripped) -- it - addresses node1 directly; node1 serves the read locally. - 2. TRANSPARENT astral-query objects.load -id -out json - Relies on the read context's zone defaulting to all zones (incl. network). - Likely BLOCKED for an anonymous host-side caller -- kept as a bonus probe. - 3. PROVIDER FIND astral-query objects.find -id -out json - Returns provider IDENTITIES, not bytes. If only this works, discovery - crosses but the byte read does not -- a partial finding, not a pass. - -PASS iff node2 obtained the EXACT stored bytes for the agent-reported id across the -swarm (hop 1 or 2). A pre-check asserts node2 doesn't already hold the object -locally, so a pass reflects a genuine remote pull. +"""verify share-object: storing an object ON a swarm sibling. + +The agent (on node1, acting as its User) stored an astral object ON the sibling +node2 by addressing it explicitly (`:objects.store`) and read it back from +node2. This check is INDEPENDENT -- it does not trust run.sh or the agent's own +read-back. It proves node2 PHYSICALLY HOLDS the object in its local repo: + + - `objects.store` writes to WriteDefault() == the "local" repo, so the object + lands in node2's "local" repo. We read it straight back from there on node2: + objects.load -id -repo local -> bytes must equal the stored payload + objects.contains -repo local -id -> corroborating bool + Both ops are UNGATED and repo-pinned, so a successful repo-local load on node2 + is decisive: the bytes came from node2's own storage, not re-fetched over the + network. node2 answers under its node identity (anonymous host-side caller, no + token) -- repo-local load/contains need no authorization. + +PASS iff node2's "local" repo returns the exact stored bytes for the agent-reported +Object ID. We also cross-check that the node the agent targeted matches node2's real +identity, and note (advisory) whether node1 also holds a copy. + +This is the write direction, unblocked by the swarm roster sync (astrald #348): +node2 now holds `User->node1`, so node2's AuthorizeRelayFor recognizes node1 and +the relayed `:objects.store` reaches op_store (which has no auth gate). astral-query ... -out json emits a JSON *stream* (one object per line, then an {"Type":"eos"} terminator), so everything is parsed line-by-line, not as one doc. @@ -43,8 +38,7 @@ def ssh(vm, remote): """Run `netsim ssh -- ` on the host; return stdout (best-effort). astral-query writes error_message frames to stdout (which we parse) and other - failures (route_not_found, etc.) to stderr (which we drop) -- mirroring the - old shell `2>/dev/null`. + failures (route_not_found, etc.) to stderr (which we drop). """ p = subprocess.run(["netsim", "ssh", vm, "--", remote], capture_output=True, text=True) @@ -92,19 +86,8 @@ def contains_local(stream): return None -def find_identities(stream): - ids = [] - for o in objs(stream): - if o.get("Type") in ("eos", "error_message"): - continue - ob = o.get("Object") - if isinstance(ob, str): - ids.append(ob) - return ids - - def contract_subject(stream): - """node1's node identity = Subject of its active contract (from user.info).""" + """node identity = Subject of the active contract (from user.info).""" for o in objs(stream): ob = o.get("Object") if isinstance(ob, dict) and isinstance(ob.get("Contract"), dict): @@ -115,7 +98,7 @@ def contract_subject(stream): def remote_identity(stream): - """Fallback: RemoteIdentity from node2's nodes.links (the link back to node1).""" + """Fallback: RemoteIdentity from a nodes.links stream (the link to the peer).""" for o in objs(stream): ob = o.get("Object") if isinstance(ob, dict) and ob.get("RemoteIdentity"): @@ -130,85 +113,81 @@ def main(): args, _ = ap.parse_known_args() vm1, vm2 = args.node1, args.node2 - # node1: the id + payload the agent persisted, and node1's node identity. - # node1 acts as the User (token) so user.info returns the active contract whose - # Subject IS node1's node identity (the provider to target). ID strips all - # whitespace (matches the old `tr -d '[:space:]'`); PAY tolerates a trailing nl. + # What the agent persisted on node1. ID strips all whitespace; the text fields + # tolerate a trailing newline. ID = "".join(ssh(vm1, "cat /home/tester/.netsim/object.id").split()) PAY = ssh(vm1, "cat /home/tester/.netsim/object.payload").rstrip("\n") - n1_info = ssh(vm1, TOKEN + " astral-query user.info -out json") - - # node2 answers under its node identity (no token => anonymous apphost caller). - n2_links = ssh(vm2, "astral-query nodes.links -out json") - N1 = contract_subject(n1_info) or remote_identity(n2_links) or "" - + READBACK = ssh(vm1, "cat /home/tester/.netsim/object.readback").rstrip("\n") + TARGET = "".join(ssh(vm1, "cat /home/tester/.netsim/object.target").split()) + + # node2's real identity, resolved host-side: Subject of node2's active contract + # (node2 answers user.info under its node identity), with node1's link-back as a + # fallback. Used only to cross-check the node the agent claims it targeted. + n2_info = ssh(vm2, "astral-query user.info -out json") + n1_links = ssh(vm1, "astral-query nodes.links -out json") + N2 = contract_subject(n2_info) or remote_identity(n1_links) or "" + + # DECISIVE: read the object straight out of node2's "local" repo (where + # objects.store writes). Repo-pinned + ungated, so a hit proves node2 itself + # holds the bytes -- not a network re-fetch from node1. + n2_load = ssh(vm2, f"astral-query objects.load -id '{ID}' -repo local -out json") n2_contains = ssh(vm2, f"astral-query objects.contains -repo local -id '{ID}' -out json") - n2_find = ssh(vm2, f"astral-query objects.find -id '{ID}' -out json") - n2_transparent = ssh(vm2, f"astral-query objects.load -id '{ID}' -out json") - n2_explicit = ssh(vm2, f"astral-query '{N1}':objects.load -id '{ID}' -out json") if N1 else "" + got = loaded_payload(n2_load) + held = contains_local(n2_contains) + bytes_ok = got is not None and got.rstrip("\n") == PAY - already_local = contains_local(n2_contains) - explicit = loaded_payload(n2_explicit) - transparent = loaded_payload(n2_transparent) - providers = find_identities(n2_find) - - explicit_ok = explicit is not None and explicit.rstrip("\n") == PAY - transparent_ok = transparent is not None and transparent.rstrip("\n") == PAY - find_ok = (N1 in providers) if N1 else bool(providers) + # Advisory: did the object ALSO land in node1's local repo? (Not required -- the + # agent targeted node2 explicitly; a copy on node1 is fine, just noted.) + n1_contains = ssh(vm1, f"astral-query objects.contains -repo local -id '{ID}' -out json") + on_node1 = contains_local(n1_contains) errs, notes = [], [] if not ID: errs.append("no Object ID recorded on node1 (~/.netsim/object.id)") if not PAY: errs.append("no payload recorded on node1 (~/.netsim/object.payload)") - if not N1: - notes.append("could not resolve node1's node identity host-side (explicit-target read skipped)") - - # locality pre-check is advisory (objects.contains is probabilistic). - if already_local is True: - notes.append("objects.contains reports node2 may ALREADY hold this object locally; " - "a byte-match below might not be a genuine cross-swarm pull") - elif already_local is None: - notes.append("objects.contains gave no usable answer on node2 (locality pre-check inconclusive)") - - # auth-vs-route signal: an error_message naming auth/permission is a DIFFERENT - # failure than no route / no provider -- don't conflate them. - for label, stream in (("explicit-target", n2_explicit), - ("transparent", n2_transparent), - ("objects.find", n2_find)): - for e in errors(stream): - notes.append(f"{label} returned error_message: {e}") - - if explicit_ok or transparent_ok: - path = ("explicit-target (:objects.load)" if explicit_ok - else "transparent (objects.load, network zone)") - print(f"share-object OK: node2 pulled object {ID[:12]}.. from node1 across the " - f"swarm via {path}; bytes match ({len(PAY)} B). " - f"providers seen by objects.find: {len(providers)}.") + if READBACK and READBACK != PAY: + notes.append(f"agent's own read-back != stored payload ({READBACK!r} != {PAY!r})") + if TARGET and N2 and TARGET != N2: + notes.append(f"agent stored on {TARGET[:12]}.. but node2's identity is {N2[:12]}.. " + "(agent may have targeted the wrong node)") + elif not TARGET: + notes.append("agent recorded no target node (~/.netsim/object.target)") + if on_node1 is True: + notes.append("object is ALSO present in node1's local repo (a local copy alongside the " + "remote store -- not required, just noted)") + + if not errs and bytes_ok: + tgt = (N2[:12] + "..") if N2 else (TARGET[:12] + ".." if TARGET else "node2") + print(f"share-object OK: node1 stored object {ID[:12]}.. ON sibling {tgt} and node2's " + f"local repo returns the exact bytes ({len(PAY)} B). " + f"contains(local)={held}.") for n in notes: sys.stderr.write(f" note: {n}\n") return 0 - # Did not cross. Build a precise diagnostic (the link-swarm-style finding). - sys.stderr.write("share-object verify FAILED: object did NOT cross the swarm to node2.\n") - if find_ok: - sys.stderr.write(" FINDING: provider discovery DOES cross the swarm " - "(objects.find on node2 returned node1) but the byte READ did not route " - "(explicit-target and transparent objects.load both failed to return the payload). " - "This is the share-object analogue of link-swarm's ':.spec does not route' " - "discovery -- record which hop routes in the task log.\n") - else: - sys.stderr.write(" no cross-swarm object access at all: neither a read nor objects.find " - "resolved node1's object from node2.\n") + # Failure -- pinpoint what broke. + sys.stderr.write("share-object verify FAILED: node2 does NOT hold the object in its local repo.\n") for e in errs: sys.stderr.write(f" - {e}\n") + if held is False: + sys.stderr.write(" node2 objects.contains -repo local = false: the store never landed on node2 " + "(relay rejected, or the agent stored locally on node1 instead of targeting node2). " + "Check node2's journal for an objects.store and node1's for a 'query rejected'.\n") + elif got is None: + sys.stderr.write(" node2 objects.load -repo local returned no payload (see error frames below).\n") + elif not bytes_ok: + sys.stderr.write(f" node2 returned bytes that do not match: got {got!r} != stored {PAY!r}.\n") + # surface error frames (auth vs not-found vs repo-missing) without conflating. + for label, stream in (("node2 load", n2_load), ("node2 contains", n2_contains)): + for e in errors(stream): + sys.stderr.write(f" {label} error_message: {e}\n") for n in notes: sys.stderr.write(f" note: {n}\n") - n1disp = (N1[:12] + "..") if N1 else "?" - sys.stderr.write(f" (id={ID} node1={n1disp} " - f"explicit={'hit' if explicit is not None else 'miss'} " - f"transparent={'hit' if transparent is not None else 'miss'} " - f"find_providers={len(providers)})\n") + n2disp = (N2[:12] + "..") if N2 else "?" + sys.stderr.write(f" (id={ID} node2={n2disp} target={(TARGET[:12]+'..') if TARGET else '?'} " + f"contains={held} load={'hit' if got is not None else 'miss'} " + f"on_node1={on_node1})\n") return 1 From 238dfdaffa34a1475964510e9aca838ca55baace Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 11:49:48 +0200 Subject: [PATCH 15/57] netsim: add per-flow story files; refresh README + running-as-a-service Move lab.story into netsim/stories/ and add one story per tested flow (bootstrap-user, link-swarm, share-object), each a thin task list with a start/save stage header so a story doubles as a pass/fail integration test. Refresh netsim/README.md (full task list, swarm pipeline via stories) and reconcile running-as-a-service.md snapshot guidance (disk image: stop; live RAM snapshot: leave running). --- docs/running-as-a-service.md | 13 +++++---- netsim/README.md | 44 ++++++++++++++++++++++------- netsim/stories/bootstrap-user.story | 4 +++ netsim/{ => stories}/lab.story | 1 + netsim/stories/link-swarm.story | 4 +++ netsim/stories/share-object.story | 4 +++ 6 files changed, 55 insertions(+), 15 deletions(-) create mode 100644 netsim/stories/bootstrap-user.story rename netsim/{ => stories}/lab.story (90%) create mode 100644 netsim/stories/link-swarm.story create mode 100644 netsim/stories/share-object.story diff --git a/docs/running-as-a-service.md b/docs/running-as-a-service.md index ce3ca284..da9c591f 100644 --- a/docs/running-as-a-service.md +++ b/docs/running-as-a-service.md @@ -76,13 +76,16 @@ Default transports bind all interfaces. ## Imaging and snapshots -Stop astrald before capturing a VM image or live snapshot; leave the unit enabled. +Which step you take depends on the capture type: + +- **Disk image (cold):** stop astrald first for a clean on-disk state; keep the + unit enabled so it autostarts on boot. +- **Live RAM snapshot (e.g. netsim):** leave astrald running so it resumes + already-running on restore. ```shell systemctl enable astrald -systemctl stop astrald +systemctl stop astrald # disk image only — skip for a live RAM snapshot ``` -A running daemon dirties memory continuously and can stall a live RAM snapshot. -The enabled unit autostarts astrald on boot. The identity at -`/config/node_key` persists across the capture. +The identity at `/config/node_key` persists across either capture. diff --git a/netsim/README.md b/netsim/README.md index dee21f62..eefd03a6 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -13,11 +13,17 @@ tasks in one simulation and saves a named *stage*. `lab.story` builds the ``` netsim/ - tasks/ - install-astrald/ # build + run astrald as a service (tasks/install-astrald/README.md) + tasks/ # each task: run.sh (+ verify.sh / verify.py) + README.md + install-astrald/ # build + run astrald as a service on each node configure-astral-agent/ # install the astral-agent skill into the qwen operator - run.sh / verify.sh / README.md # each task: installs on target VMs + independent re-check - lab.story # full lab in one simulation -> stage astrald-lab + bootstrap-user/ # make node1 a User node -> stage astrald-user + link-swarm/ # adopt node2 into node1's swarm -> stage astrald-swarm + share-object/ # store an object on the sibling -> stage astrald-shared + stories/ # one story per tested flow (start/save stage in each header) + lab.story # null -> astrald-lab + bootstrap-user.story # astrald-lab -> astrald-user + link-swarm.story # astrald-user -> astrald-swarm + share-object.story # astrald-swarm -> astrald-shared link.sh # register tasks with netsim (idempotent; re-run anytime) README.md ``` @@ -83,17 +89,35 @@ then build the lab: ```sh ./netsim/link.sh export SATFORGE_SKILLS_DEPLOY_KEY=~/.ssh/satforge_skills_deploy # see tasks/configure-astral-agent -netsim story --stage null --save astrald-lab netsim/lab.story +netsim story --stage null --save astrald-lab netsim/stories/lab.story ``` The result is the stage `astrald-lab`: `node1` and `node2` running astrald, with a Qwen Code operator on `node1` equipped with the `astral-agent` skill. Re-enter it with `netsim shell --stage astrald-lab`. -## Scope +## Swarm pipeline + +Each post-lab flow is its own story under `stories/`, layered on the previous +stage (its `start`/`save` stages are in the story header). Intermediate stages +stay reusable, so you can replay one flow without rebuilding the chain: + +``` +astrald-lab ─[bootstrap-user]→ astrald-user ─[link-swarm]→ astrald-swarm ─[share-object]→ astrald-shared +``` -v1 installs and runs astrald on each node as two independent nodes. Linking the -nodes and verifying a live session is a later phase. +```sh +netsim story --stage astrald-lab --save astrald-user netsim/stories/bootstrap-user.story +netsim story --stage astrald-user --save astrald-swarm netsim/stories/link-swarm.story +netsim story --stage astrald-swarm --save astrald-shared netsim/stories/share-object.story +``` + +Each story drives the Qwen operator through its `astral-agent` skill, then runs an +independent `verify.sh`/`verify.py` check — so a story is a pass/fail integration +test for that flow. + +## Scope -Fresh nodes broadcast on UDP 8822 through the `ether` and `nearby` modules and -discover each other on a shared L2 LAN. v1 asserts nothing about discovery. +The lab stands up two astrald nodes, links them into one User Swarm, and stores +an object on the sibling across it. Nodes discover each other on the shared L2 +LAN via UDP 8822 (`ether`/`nearby`). diff --git a/netsim/stories/bootstrap-user.story b/netsim/stories/bootstrap-user.story new file mode 100644 index 00000000..4917388f --- /dev/null +++ b/netsim/stories/bootstrap-user.story @@ -0,0 +1,4 @@ +# bootstrap-user.story — node1 becomes a User-controlled node. +# start: astrald-lab save: astrald-user +# netsim story --stage astrald-lab --save astrald-user netsim/stories/bootstrap-user.story +bootstrap-user diff --git a/netsim/lab.story b/netsim/stories/lab.story similarity index 90% rename from netsim/lab.story rename to netsim/stories/lab.story index 44c7de66..b5401d50 100644 --- a/netsim/lab.story +++ b/netsim/stories/lab.story @@ -1,4 +1,5 @@ # lab.story — the astrald lab, built in one netsim simulation. +# start: null save: astrald-lab # Result: a single stage with two nodes running astrald and a Qwen Code # operator on node1, equipped with the astral-agent skill. add-vm --hostname node1 diff --git a/netsim/stories/link-swarm.story b/netsim/stories/link-swarm.story new file mode 100644 index 00000000..4572ceae --- /dev/null +++ b/netsim/stories/link-swarm.story @@ -0,0 +1,4 @@ +# link-swarm.story — adopt node2 into node1's User swarm (symmetric roster). +# start: astrald-user save: astrald-swarm +# netsim story --stage astrald-user --save astrald-swarm netsim/stories/link-swarm.story +link-swarm diff --git a/netsim/stories/share-object.story b/netsim/stories/share-object.story new file mode 100644 index 00000000..9ac66f71 --- /dev/null +++ b/netsim/stories/share-object.story @@ -0,0 +1,4 @@ +# share-object.story — store an astral object on the swarm sibling (node2). +# start: astrald-swarm save: astrald-shared +# netsim story --stage astrald-swarm --save astrald-shared netsim/stories/share-object.story +share-object From e45dcf29b0edc3b7caf699f32897dbe25fdef23d Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 11:55:52 +0200 Subject: [PATCH 16/57] netsim: trim verbose top-of-file docstrings in verify.py scripts The link-swarm and share-object verifiers carried long module docstrings restating rationale already in their README.md. Cut to a one/two-line summary; no logic change. --- netsim/tasks/link-swarm/verify.py | 26 +++--------------------- netsim/tasks/share-object/verify.py | 31 +++++------------------------ 2 files changed, 8 insertions(+), 49 deletions(-) diff --git a/netsim/tasks/link-swarm/verify.py b/netsim/tasks/link-swarm/verify.py index f3bab909..24eed5ac 100755 --- a/netsim/tasks/link-swarm/verify.py +++ b/netsim/tasks/link-swarm/verify.py @@ -1,27 +1,7 @@ #!/usr/bin/env python3 -"""verify link-swarm: node1 and node2 must be linked into one User swarm. - -INDEPENDENT both-ends check -- it does not trust run.sh. It pulls raw JSON from -both nodes and asserts FOUR facts on the host; together they prove the swarm from -both ends, with a SYMMETRIC roster: - 1. both nodes hold an active contract issued by the SAME User - (user.info: Issuer == the bootstrap User on each; Subject == that node); - 2. node1, acting as the User, lists node2 as a Linked sibling (user.swarm_status); - 3. node2 lists node1 as a Linked sibling too (user.swarm_status) -- the symmetric - roster delivered by astrald #348. node2's swarm view derives from its own - active contract, not the caller, so this needs no User token; it guards the - membership-race regression (pre-#348 node2's roster was {node2} only). - 4. a mutual authenticated link exists (node2 nodes.links -> node1). - -Runs on the host (invoked by the verify.sh shim); reaches the VMs with `netsim ssh`. - -NOTE on "routed query": an earlier plan probed `:.spec` as the proof. That is -NOT valid -- node introspection ops (.spec/.id/.ping) are served locally and do not -route to a sibling by node-id, so they fail even on a fully formed swarm. The -contract + link + sibling triple above is the real proof. - -astral-query ... -out json emits a JSON *stream* (one object per line, then an -{"Type":"eos"} terminator), so everything is parsed line-by-line, not as one doc. +"""verify link-swarm: node1 and node2 linked into one User swarm, symmetric roster. + +Independent both-ends check (does not trust run.sh); reaches the VMs via netsim ssh. """ import argparse import json diff --git a/netsim/tasks/share-object/verify.py b/netsim/tasks/share-object/verify.py index 67f064b5..eff0535e 100755 --- a/netsim/tasks/share-object/verify.py +++ b/netsim/tasks/share-object/verify.py @@ -1,30 +1,9 @@ #!/usr/bin/env python3 -"""verify share-object: storing an object ON a swarm sibling. - -The agent (on node1, acting as its User) stored an astral object ON the sibling -node2 by addressing it explicitly (`:objects.store`) and read it back from -node2. This check is INDEPENDENT -- it does not trust run.sh or the agent's own -read-back. It proves node2 PHYSICALLY HOLDS the object in its local repo: - - - `objects.store` writes to WriteDefault() == the "local" repo, so the object - lands in node2's "local" repo. We read it straight back from there on node2: - objects.load -id -repo local -> bytes must equal the stored payload - objects.contains -repo local -id -> corroborating bool - Both ops are UNGATED and repo-pinned, so a successful repo-local load on node2 - is decisive: the bytes came from node2's own storage, not re-fetched over the - network. node2 answers under its node identity (anonymous host-side caller, no - token) -- repo-local load/contains need no authorization. - -PASS iff node2's "local" repo returns the exact stored bytes for the agent-reported -Object ID. We also cross-check that the node the agent targeted matches node2's real -identity, and note (advisory) whether node1 also holds a copy. - -This is the write direction, unblocked by the swarm roster sync (astrald #348): -node2 now holds `User->node1`, so node2's AuthorizeRelayFor recognizes node1 and -the relayed `:objects.store` reaches op_store (which has no auth gate). - -astral-query ... -out json emits a JSON *stream* (one object per line, then an -{"Type":"eos"} terminator), so everything is parsed line-by-line, not as one doc. +"""verify share-object: prove node2 physically holds the object node1 stored on it. + +Independent host-side check (does not trust run.sh or the agent's read-back): a +repo-pinned, ungated objects.load -repo local on node2 must return the exact stored +bytes. Reaches the VMs via netsim ssh. See README.md for the full rationale. """ import argparse import json From 2084e9679a33d1e32c938c5537802281c945ec41 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 11:57:05 +0200 Subject: [PATCH 17/57] netsim: minimize task READMEs to a general description each Cut the per-task READMEs down to a short paragraph (what the task does + the stage it produces); dropped the execution-model, build-facts, verify-internals, deploy-key setup, and security-note sections. No behavior change. --- netsim/tasks/bootstrap-user/README.md | 85 +----------- netsim/tasks/configure-astral-agent/README.md | 84 +----------- netsim/tasks/install-astrald/README.md | 74 +--------- netsim/tasks/link-swarm/README.md | 82 +---------- netsim/tasks/share-object/README.md | 127 +----------------- 5 files changed, 25 insertions(+), 427 deletions(-) diff --git a/netsim/tasks/bootstrap-user/README.md b/netsim/tasks/bootstrap-user/README.md index 44e58831..65af9c23 100644 --- a/netsim/tasks/bootstrap-user/README.md +++ b/netsim/tasks/bootstrap-user/README.md @@ -1,83 +1,6 @@ # bootstrap-user -A netsim task that turns the operator node into a **User-controlled node**, -driven by the Qwen Code agent running inside the VM. It is the first half of the -swarm phase: it establishes identity only; it does not link or adopt anything -(that is [`link-swarm`](../link-swarm/README.md)). - -``` -bootstrap-user [--vm ] # default: node1 (the VM carrying Qwen) -``` - -After it runs, the node holds an active `mod.user.swarm_membership_action` contract -(issuer = a fresh software User, subject = this node), and a User-bound apphost -token is persisted so later tasks can act as the User. It produces a new stage -on top of the lab base — run it standalone against `astrald-lab` with: - -```sh -netsim task --stage astrald-lab --save astrald-user bootstrap-user -``` - -(`bootstrap-user` is deliberately *not* part of `lab.story`: `astrald-lab` stays -the reusable base, and each swarm step is an incremental stage layered on it.) - -## Execution model - -`run.sh` runs on the host (cwd = simulation root) and does almost nothing -itself: it base64-ships [`prompt.md`](prompt.md) to the guest in a single -`netsim ssh -- ` call and invokes `qwen` as user `tester`, -non-interactively (one-shot positional prompt + `-y`), against that prompt. The -astral work — minting a software User, signing and installing the node contract, -persisting a token — is carried out by the agent, not by the script. - -This is the design principle, taken one step further than the other tasks: -**tiny script, thin prompt, intelligence in the skill.** The prompt does not -spell out the contract procedure — it states the situation and the goal in plain -sentences and tells the agent to follow its **astral-agent** skill, whose -`node-setup` playbook (software-User path) is exactly this flow. The prompt -carries only what the skill cannot know: the machine-specific files `verify.sh` -will look for, idempotency, and the success criterion. If the skill is present -and sufficient, that is all the agent needs; exercising that is part of the -test. - -The agent writes its artifacts under `~tester/.netsim/`: - -| File | Purpose | -|---|---| -| `user.id` | the User's hex public key (the User identity) | -| `user.token` | a User-bound apphost access token (also exported in `~/.bashrc`) | -| `bootstrap-user.log` | the agent's run log | - -`verify.sh` is an **independent** re-check: it reads `user.id` + `user.token`, -acts as the User, and asserts `apphost.whoami` reports the User and `user.info` -returns the active contract (which the op rejects with code `2` when absent). - -## The contract flow (driven by the skill, not the prompt) - -For reference only — this is what the astral-agent `node-setup` playbook -(software User) walks the agent through; the prompt does **not** restate it: - -1. Mint a `secp256k1` User key via the `bip137sig` ops - (`new_entropy → mnemonic → seed → derive_key`). -2. Store the private key via `objects.store` so `crypto` indexes it as a signer. -3. Derive the User identity (`crypto.public_key`). -4. (optional) `dir.set_alias` for a readable name. -5. `user.new_node_contract -user ` (subject defaults to this node). -6. `auth.sign_contract` — co-signs as issuer + subject (both keys are local). -7. Install at tree path `/mod/user/config/active_contract` via `tree.set`. -8. `apphost.create_token` for the User; persist + export it. -9. Confirm with `apphost.whoami` + `user.info`. - -## Not yet run end-to-end - -The harness mechanics mirror the validated `configure-astral-agent` task, and -the `qwen -y ""` invocation matches what was confirmed against the live -lab. Still unverified until run on a fresh `astrald-lab` stage: - -* whether the thin prompt reliably triggers the astral-agent skill and the agent - follows the `node-setup` playbook to a passing `verify.sh`; -* that `objects.store` of a `crypto.private_key` and `tree.set` of the active - contract succeed under the node's local access without a pre-minted token; -* that the agent keeps `ASTRALD_APPHOST_TOKEN` exported within its own session - for the User-scoped steps (the skill's "Acting as the User from the CLI" - section covers this). +Drives the Qwen operator on node1 to make it a **User-controlled node** — mint a +software User and install node1's active contract — following the astral-agent +skill's node-setup playbook. `verify.sh` independently confirms node1 answers as +that User. Produces stage `astrald-user` (from `astrald-lab`). diff --git a/netsim/tasks/configure-astral-agent/README.md b/netsim/tasks/configure-astral-agent/README.md index 1c65b3fa..be818368 100644 --- a/netsim/tasks/configure-astral-agent/README.md +++ b/netsim/tasks/configure-astral-agent/README.md @@ -1,81 +1,7 @@ # configure-astral-agent -A netsim task that installs the `astral-agent` skill into the Qwen Code operator -on a VM, so the operator can drive astrald from the skill's knowledge (the -astral-docs corpus + playbooks) instead of having every procedure spelled out in -each task prompt. - -``` -configure-astral-agent [--vm ] [--user ] # default: node1, tester -``` - -After it runs, `~/.qwen/skills/astral-agent` exists (SKILL.md with -frontmatter, `references/`, and the `astral-docs` mount). Run standalone against -the lab stage with: - -```sh -SATFORGE_SKILLS_DEPLOY_KEY=~/.ssh/satforge_skills_deploy \ - netsim task --stage astrald-lab --save astrald-operator configure-astral-agent -``` - -## Setup (one-time, on the netsim host) - -The host running the sims must own a deploy key for the private repo: - -```sh -# 1. generate a keypair (keep the private half on the host) -ssh-keygen -t ed25519 -f ~/.ssh/satforge_skills_deploy -N '' -C netsim-skills-deploy - -# 2. register the PUBLIC half on GitHub: -# satforgedev/skills -> Settings -> Deploy keys -> Add -> paste -# ~/.ssh/satforge_skills_deploy.pub (read-only is enough) - -# 3. point the env at the PRIVATE key (export it, or prefix each netsim run) -export SATFORGE_SKILLS_DEPLOY_KEY=~/.ssh/satforge_skills_deploy -``` - -`SATFORGE_SKILLS_DEPLOY_KEY` is a **path to the private deploy-key file**. netsim -runs this task as a subprocess and passes the env through, so exporting it once -covers every `netsim story` / `netsim task` invocation. - -## How it works — deploy key, clone in the VM - -`satforgedev/skills` is **private**, so the **host** owns the deploy key and the -VM never carries GitHub credentials of its own. `run.sh` (host) reads the private -key from `$SATFORGE_SKILLS_DEPLOY_KEY` and base64-ships it into the VM over one -`netsim ssh` argv. The guest then, as the operator: - -1. installs the key at `~/.ssh/skills_deploy` and clones - `git@github.com:satforgedev/skills` via `GIT_SSH_COMMAND` (parent repo over - SSH/deploy-key; the `astral-docs` submodule is public HTTPS — no key needed); -2. builds the `satforge-skills` linker (Go is already on the node from - `install-astrald`); -3. `satforge-skills link astral-agent --target qwen` → installs into - `~/.qwen/skills/astral-agent` (Qwen Code reads `SKILL.md`, frontmatter intact, - from there). The clone stays in `~/satforge-skills`, so the install's symlinks - resolve and the operator can re-link/pull other skills later. - -Idempotent: re-running `git pull`s the default branch, `unlink`s, then `link`s again. - -## Environment - -| Var | Default | Meaning | -|---|---|---| -| `SATFORGE_SKILLS_DEPLOY_KEY` | *(required)* | host path to the private deploy key for the repo | -| `SATFORGE_SKILLS_REPO` | `git@github.com:satforgedev/skills` | repo SSH URL (clones the default branch) | - -## Security note - -For now the deploy key is **left in the VM** (and therefore in the saved -snapshot) — simplest, and lets the operator re-pull skills. This is a private key -inside a shareable stage; we may switch to wiping it before the snapshot (inject -→ clone/build/link → remove key) if that exposure matters. See the `NOTE` in -`run.sh`. - -If outbound SSH:22 is ever blocked in the sim, point `SATFORGE_SKILLS_REPO` at -`ssh://git@ssh.github.com:443/satforgedev/skills`. - -## Scope - -Installs exactly one skill (`astral-agent`). `node2` is untouched — only the -operator node needs it. +Installs the `astral-agent` skill into the Qwen Code operator on node1, so it can +drive astrald from the skill's playbooks + astral-docs instead of from procedures +spelled out in each prompt. The host clones the private `satforgedev/skills` via a +deploy key (`SATFORGE_SKILLS_DEPLOY_KEY`, a host path to the private key) and links +the skill into `~tester/.qwen/skills/astral-agent`. Part of `lab.story`. diff --git a/netsim/tasks/install-astrald/README.md b/netsim/tasks/install-astrald/README.md index f883f155..d81fe9ca 100644 --- a/netsim/tasks/install-astrald/README.md +++ b/netsim/tasks/install-astrald/README.md @@ -1,69 +1,9 @@ # install-astrald -A netsim task that builds `astrald` from source and installs it as a systemd -service on target VMs. `run.sh` builds, installs, and enables the unit; -`verify.sh` independently confirms the node answers. The service is left enabled -and running, so the netsim stage snapshots a live node that resumes already-running -on restore. See [Running astrald as a service](../../../docs/running-as-a-service.md) -for the unit file and operational details. - -``` -install-astrald [--vm ]... [--ref ] -``` - -* No `--vm`: every running VM in the simulation, derived from - `netsim vm ls --json`. -* `--vm ` (repeatable): restrict to the named hosts. -* `--ref `: build a branch or tag via a shallow `--branch` clone instead - of the default branch. - -Each target receives, in one ssh call: `git` and `curl` ensured, Go from the -official tarball, `astrald` and `astral-query` built to `/usr/local/bin`, and a -systemd unit installed and enabled. astrald is started and confirmed to answer -`astral-query localnode:.spec`, then left running for snapshotting. - -Use the task in a story (see the [netsim README](../../README.md#lab)), or run it -standalone against an existing stage with -`netsim task --stage --save install-astrald`. - -## Execution model - -`run.sh` and `verify.sh` run on the host, with the simulation root as the working -directory. They reach each guest with `netsim ssh -- ` and land as -`root`. - -Everything after `--` is one argv element; ssh joins argv with spaces and the -guest shell re-parses it. The whole remote program is sent as a single string: -parameters as an assignment prefix, the body in a single-quoted heredoc -(`<<'EOS'`) so host-side `$...` reach the guest unexpanded. - -```sh -netsim ssh "$vm" -- "repo='$REPO' ref='$REF' go_ver='$GO_VERSION'; $REMOTE_BODY" -``` - -## Build and run facts - -* Go is installed from the official tarball. astrald's `go.mod` requires - `go >= 1.25.0`; the apt package is older. The download is arch-aware - (`x86_64`→`amd64`, `aarch64`→`arm64`). -* The clone is `git clone --depth 1` over HTTPS, never `--recursive`. The only - submodule (`.ai/system`) is an SSH-only docs repo and is not needed for the - build. -* The build sets `CGO_ENABLED=0`. astrald uses pure-Go SQLite and needs no C - toolchain. -* Build targets carry the `./` prefix: `go build -o /usr/local/bin/astrald - ./cmd/astrald`, and the same for `./cmd/astral-query`. `go build cmd/astrald` - fails; `go build .` at the repo root builds a do-nothing stub. -* The service runs `astrald -root /var/lib/astrald` with `Environment=HOME=/root`. - Default config and data paths derive from `$HOME`, which systemd does not set. - The unit sets `KillSignal=SIGINT` so `systemctl stop` shuts astrald down - gracefully (astrald traps SIGINT, not SIGTERM). -* First start auto-generates the node identity, a `secp256k1` key at - `/var/lib/astrald/config/node_key`, with no prompt and no TTY. -* The liveness probe is `astral-query localnode:.spec`. The op is built-in and - always available; it streams the node's operation spec over the local apphost - API (`tcp:127.0.0.1:8625`, anonymous access by default). Exit code 0 means - healthy. -* apt calls pass `-o DPkg::Lock::Timeout=120`; `cloud-init` can hold the dpkg - lock on a fresh boot. Readiness is never gated on `ping`; guest ICMP is - disabled. +Builds `astrald` and `astral-query` from source and runs `astrald` as a systemd +service on the target VMs (all running VMs by default, or `--vm `; `--ref` +builds a specific git ref). `verify.sh` confirms each node answers +`astral-query localnode:.spec`. The service is left running so the netsim stage +snapshots a live node that resumes already-running. Used by `lab.story`; see +[Running astrald as a service](../../../docs/running-as-a-service.md) for the unit +file and operational details. diff --git a/netsim/tasks/link-swarm/README.md b/netsim/tasks/link-swarm/README.md index 7569b6e4..fc80a6c0 100644 --- a/netsim/tasks/link-swarm/README.md +++ b/netsim/tasks/link-swarm/README.md @@ -1,79 +1,7 @@ # link-swarm -A netsim task that adopts the second node into the User's swarm, driven by the -Qwen Code agent on node1. It is the second half of the swarm phase: node1 is -already a User node (from [`bootstrap-user`](../bootstrap-user/README.md)); this -task brings node2 under the same User so the two share one swarm. - -``` -link-swarm [--vm ] # default: node1 (the VM carrying Qwen) -``` - -It produces a new stage on top of the bootstrapped lab — its default starting -point is `astrald-user` (the `astrald-lab` stage after `bootstrap-user`): - -```sh -netsim task --stage astrald-user --save astrald-swarm link-swarm -``` - -(Like `bootstrap-user`, it is **not** part of `lab.story`; each swarm step is an -incremental stage layered on the reusable base.) - -## Execution model - -Identical mechanic to `bootstrap-user`: **tiny script, thin prompt, intelligence -in the skill.** `run.sh` base64-ships [`prompt.md`](prompt.md) to node1 in a -single `netsim ssh` call and runs `qwen -y` as `tester`. The prompt is two -sentences — the operator is told it controls a User node, that another astrald -node is on the local network, and to adopt it following its **astral-agent** -skill's `node-adoption` playbook. The adopt flow (`user.adopt -target `, -reachability via the `nearby` module on the shared LAN) lives entirely in the -skill; the prompt restates none of it. - -Reachability is already de-risked (see the task doc's discovery log): `nearby` -discovery works across netsim's per-VM NAT, so the agent needs no manual -`nodes.add_endpoint`. - -## What `verify.sh` checks (independent, both ends) - -It pulls raw JSON from both nodes and asserts **four facts on the host** that -together prove the swarm from both ends, with a **symmetric roster**: - -1. **Both nodes hold an active contract issued by the same User** — `user.info` - on node1 *and* node2 each shows `Issuer == ` with `Subject ==` - that node. node2 independently confirming the same User is the key both-ends - proof that the adoption took. -2. **node1, as the User, lists node2 as a `Linked` sibling** (`user.swarm_status`). -3. **node2 lists node1 as a `Linked` sibling too** (`user.swarm_status`) — the - symmetric roster delivered by astrald **#348**. `swarm_status` derives from - node2's own active contract (not the caller), so this needs no User token. This - guards the membership-race regression: pre-#348, node2 held only `User→node2` - and its roster was `{node2}`, so it never recognized node1 — exactly the gap - that blocked storing objects on a sibling (see `share-object`). -4. **A mutual authenticated link exists** — node2's `nodes.links` shows a link - whose `RemoteIdentity` is node1. - -node1 acts as the User via its persisted token; node2 answers under its node -identity (it holds the contract after the adoption, so no token is needed there). - -`astral-query … -out json` emits a JSON **stream** (one object per line + an -`{"Type":"eos"}` terminator), so output is parsed line-by-line, not as one -document. Parsing/assertions run with host `python3`. - -## Why not a "routed query" proof? - -The first cut planned to prove routing with `astral-query :.spec`. That is -**not valid**: node introspection ops (`.spec`, `.id`, `.ping`) are served -locally and do **not** route to a sibling addressed by node-id — they fail even -on a fully formed swarm (verified live: every `:` returned a routing -failure while the swarm was demonstrably linked). The earlier discovery-log -hypothesis that "swarm membership unlocks `:.spec`" is therefore -**disproven**. The contract + link + sibling triple above is the correct, -reproducible both-ends proof. - -## Validated end-to-end - -Run `astrald-user → astrald-swarm` (2026-06-17): the thin prompt drove the -operator to `user.adopt` node2 into the User's swarm; both nodes ended under one -User (`02ad7ef7…`) with a mutual link, and the rewritten `verify.sh` passes. -Stage `astrald-swarm` saved. +Drives the Qwen operator on node1 to **adopt node2** into the User's swarm +(`user.adopt`), following the astral-agent skill's node-adoption playbook. +`verify.py` independently confirms both nodes hold a contract under the same User, +a mutual link, and a symmetric roster (each lists the other as a Linked sibling). +Produces stage `astrald-swarm` (from `astrald-user`). diff --git a/netsim/tasks/share-object/README.md b/netsim/tasks/share-object/README.md index 7c993ddc..6faeec9a 100644 --- a/netsim/tasks/share-object/README.md +++ b/netsim/tasks/share-object/README.md @@ -1,125 +1,6 @@ # share-object -A netsim task that has node1 (acting as its User) **store an astral object ON its -swarm sibling** (node2) and read it back, then independently proves node2 -physically holds it. It is the first scenario *past* swarm formation: where -`bootstrap-user` + `link-swarm` build the swarm, `share-object` makes the swarm -carry data the hard way — one member **writing** an object onto another — astral's -core act, "Identities exchanging Objects." - -``` -share-object [--vm ] # default: node1 (the VM carrying Qwen) -``` - -It produces a new stage on top of the formed swarm — its default starting point -is `astrald-swarm` (two nodes in one User Swarm): - -```sh -netsim task --stage astrald-swarm --save astrald-shared share-object -``` - -(Like the other swarm tasks, it is **not** part of `lab.story`; each step is an -incremental stage layered on the reusable base.) - -## Execution model - -Same mechanic as `bootstrap-user` / `link-swarm`: **tiny script, thin prompt, -intelligence in the skill.** `run.sh` base64-ships [`prompt.md`](prompt.md) to -node1 in a single `netsim ssh` call and runs `qwen -y` as `tester`. - -* **The agent stores ON the other node.** The prompt tells the operator — acting - as its User — to store a short, distinctive text payload as an astral object - **on the sibling**, addressing it explicitly as the query target - (`:objects.store`), then to load it back **from that node** and confirm - the bytes round-trip. This exercises the comprehension axis (find the sibling, - set an explicit query target, store + load) and the newly-unblocked write - capability in one go. It records the id, the stored payload, the read-back, and - the node id it targeted. -* **The independent proof lives in `verify.py`.** A host-side check confirms node2 - physically holds the object, deterministically, without trusting the agent. - -The agent writes its artifacts under `~tester/.netsim/`: - -| File | Purpose | -|---|---| -| `object.id` | the `data1…` Object ID returned by `objects.store` | -| `object.payload` | the exact bytes the agent stored (the node2-local read is compared against this) | -| `object.readback` | the bytes the agent read back from node2 (advisory cross-check) | -| `object.target` | the node id the agent stored on (cross-checked against node2's real identity) | -| `share-object.log` | the agent's run log | - -The store is, per the docs, the same `string8` form as a local store but with an -explicit target: -`echo '{"Type":"string8","Object":""}' | astral-query :objects.store -in json -out json` -run under the User token (`ASTRALD_APPHOST_TOKEN` from `~/.netsim/user.token`); the -read-back is `astral-query :objects.load -id `. - -## What `verify.py` checks (independent, decisive) - -`verify.py` does not trust `run.sh` or the agent's read-back. It reads the id + -payload the agent persisted on node1 and proves **node2 physically holds the -object in its local repo**: - -* `objects.store` writes to `WriteDefault()` — the **`local`** repo — so the object - lands in node2's `local` repo. `verify.py` reads it straight back from there, on - node2: - * `astral-query objects.load -id -repo local` → bytes must equal the stored - payload — **the decisive check**; - * `astral-query objects.contains -repo local -id ` → corroborating bool. -* Both ops are **ungated and repo-pinned**, so a successful repo-local load on node2 - is conclusive: the bytes came from node2's own storage, not a network re-fetch - from node1. node2 answers under its node identity (anonymous host-side caller, no - token — repo-local load/contains need no authorization). - -It also resolves node2's real identity host-side (the `Subject` of node2's active -contract, with node1's `nodes.links` `RemoteIdentity` as a fallback) to cross-check -the node the agent claims it targeted, and notes (advisory) whether node1 also -holds a copy. **PASS** iff node2's `local` repo returns the exact stored bytes. -`astral-query … -out json` emits a JSON **stream** (one object/line + an -`{"Type":"eos"}` terminator), parsed line-by-line with host `python3`. - -## Why storing on a sibling works now - -Earlier runs hit `query rejected (1)` on `:objects.store` — node2 refused to -relay node1's User-authenticated query because its swarm roster was **asymmetric**: -after `link-swarm`, node2 held only `User→node2` and never learned `User→node1`, so -node1 was absent from node2's `LocalSwarm()` and `AuthorizeRelayFor` denied the -relay before the query reached any objects op. - -astrald **#348** ("Sync full swarm roster to a newly invited node", on `master`) -fixes this: both invite paths now schedule `SyncNodesAction` against the joined -node right after indexing, so node2 converges to the full, symmetric roster -(including `User→node1`). node2 now recognizes node1 as a sibling → -`AuthorizeRelayFor` allows the relay → the query reaches `op_store`, which has **no -auth gate**, and the write lands. - -> **Caveat (recorded, not a blocker):** `objects.CreateObjectAction` is still an -> unwired stub — `op_store` performs **no** authorization. So a cross-swarm store -> works but is *unauthenticated at the op level* (any caller that can route + relay -> can write). Hardening that (wire `CreateObjectAction` + an `AuthorizeCreateObject` -> grant) is a separate, design-gated task ("Wire up the object-creation -> authorizer"). This scenario tests the *functional* write path, not the -> authorization model. -> -> **Pending the first live `astrald-swarm → astrald-shared` run:** this is verified -> at the code level on `master`; confirm end-to-end live, and confirm the thin -> prompt reliably drives the agent to resolve the sibling and set an explicit query -> target. - -## Memory repository — a separate task, by decision - -The docs expose a `memory` repository group (`objects.new_mem`, the `mem0`/ -`memory` repos) — an in-memory, **non-default** write target. `share-object` -deliberately does **not** use it: it must test the *default* write path (the -sibling's standard write-default `local` repo). Ephemeral / `objects.new_mem` -behaviour deserves its own focused task layered on `astrald-shared` later (captured -in Triage). - -## Skill gap this scenario exercises - -The skill has playbooks only for `swarm-management` (`node-setup`, -`node-adoption`) — there is **no objects storage/transfer playbook**. Storing on a -sibling (resolve the sibling, set an explicit query target, `objects.store` + -`objects.load`) must be reached from the protocol docs alone — a real test of "are -the docs sufficient without a playbook?". If the thin prompt proves shaky live, the -remedy is a small `objects` playbook in the skill, not a fatter prompt. +Drives the Qwen operator on node1 to **store an astral object on its swarm sibling +node2** (`:objects.store`) and read it back. `verify.py` independently +confirms node2 physically holds the object in its local repo. Produces stage +`astrald-shared` (from `astrald-swarm`). From 5566ca695367187bc3f0cc6d848ce0ea3d9858cc Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 12:14:12 +0200 Subject: [PATCH 18/57] netsim: consolidate agent artifacts into $HOME/info.json Replace the scattered ~/.netsim/{user.id,user.token,object.*} files with a single $HOME/info.json (/home/tester/info.json) holding user_id, user_token, object_id, object_payload, object_readback, object_target. bootstrap-user writes user_*; share-object merges object_* (keeping user_*); verifiers and smoke-checks read the JSON (python3 in-VM for shell, host-side json for verify.py). Transient prompt/log files stay under ~/.netsim. --- netsim/tasks/bootstrap-user/prompt.md | 5 +++-- netsim/tasks/bootstrap-user/run.sh | 8 +++++--- netsim/tasks/bootstrap-user/verify.sh | 12 +++++++----- netsim/tasks/link-swarm/verify.py | 17 ++++++++++++----- netsim/tasks/share-object/prompt.md | 25 +++++++++++++------------ netsim/tasks/share-object/run.sh | 26 ++++++++++++++------------ netsim/tasks/share-object/verify.py | 26 ++++++++++++++++---------- 7 files changed, 70 insertions(+), 49 deletions(-) diff --git a/netsim/tasks/bootstrap-user/prompt.md b/netsim/tasks/bootstrap-user/prompt.md index 83c12a76..757b223d 100644 --- a/netsim/tasks/bootstrap-user/prompt.md +++ b/netsim/tasks/bootstrap-user/prompt.md @@ -2,5 +2,6 @@ On this machine there is an `astrald` node running. It has its own node identity but no User. Make it a User-controlled node under a fresh software User, following your **astral-agent** skill's node-setup playbook. -Then write the User's id to `~/.netsim/user.id` and a User-bound apphost token to -`~/.netsim/user.token`. The skill won't mention this — it's how the run is checked. +Then write the User's id and a User-bound apphost token to `$HOME/info.json` as a +JSON object with keys `user_id` and `user_token`. The skill won't mention this — +it's how the run is checked. diff --git a/netsim/tasks/bootstrap-user/run.sh b/netsim/tasks/bootstrap-user/run.sh index 6d5faa39..a3b4c2a8 100755 --- a/netsim/tasks/bootstrap-user/run.sh +++ b/netsim/tasks/bootstrap-user/run.sh @@ -45,9 +45,11 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/bootstrap-user.prompt)"' \ exit 1 } -# Cheap smoke-check; verify.sh does the authoritative, independent check. -[ -s "$d/user.id" ] || { echo "agent recorded no User id on $(hostname)" >&2; exit 1; } -echo "bootstrap-user: agent finished on $(hostname); User id $(cat "$d/user.id")" +# Cheap smoke-check; verify.sh does the authoritative, independent check. The agent +# records its outputs in $HOME/info.json (/home/tester/info.json). +uid=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_id",""))' 2>/dev/null || true) +[ -n "$uid" ] || { echo "agent recorded no user_id in /home/tester/info.json on $(hostname)" >&2; exit 1; } +echo "bootstrap-user: agent finished on $(hostname); User id $uid" EOS ) diff --git a/netsim/tasks/bootstrap-user/verify.sh b/netsim/tasks/bootstrap-user/verify.sh index b9a4a734..c4e367e2 100755 --- a/netsim/tasks/bootstrap-user/verify.sh +++ b/netsim/tasks/bootstrap-user/verify.sh @@ -16,11 +16,13 @@ done REMOTE_CHECK=$(cat <<'EOS' set -eu -d=/home/tester/.netsim -[ -s "$d/user.id" ] || { echo "no recorded User id on $(hostname)" >&2; exit 1; } -[ -s "$d/user.token" ] || { echo "no recorded User token on $(hostname)" >&2; exit 1; } -uid=$(cat "$d/user.id") -ASTRALD_APPHOST_TOKEN=$(cat "$d/user.token"); export ASTRALD_APPHOST_TOKEN +info=/home/tester/info.json +[ -s "$info" ] || { echo "no $info on $(hostname)" >&2; exit 1; } +uid=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_id",""))') +ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_token",""))') +export ASTRALD_APPHOST_TOKEN +[ -n "$uid" ] || { echo "no user_id in $info on $(hostname)" >&2; exit 1; } +[ -n "$ASTRALD_APPHOST_TOKEN" ] || { echo "no user_token in $info on $(hostname)" >&2; exit 1; } # acting as the User: whoami must report the User identity who=$(astral-query apphost.whoami -out json) \ diff --git a/netsim/tasks/link-swarm/verify.py b/netsim/tasks/link-swarm/verify.py index 24eed5ac..e5533e04 100755 --- a/netsim/tasks/link-swarm/verify.py +++ b/netsim/tasks/link-swarm/verify.py @@ -8,9 +8,6 @@ import subprocess import sys -TOKEN = "export ASTRALD_APPHOST_TOKEN=$(cat /home/tester/.netsim/user.token);" - - def ssh(vm, remote): """Run `netsim ssh -- ` on the host; return stdout.""" p = subprocess.run(["netsim", "ssh", vm, "--", remote], @@ -18,6 +15,14 @@ def ssh(vm, remote): return p.stdout +def info(vm): + """The agent's $HOME/info.json (/home/tester/info.json) on the VM, as a dict.""" + try: + return json.loads(ssh(vm, "cat /home/tester/info.json") or "{}") or {} + except json.JSONDecodeError: + return {} + + def objs(stream): out = [] for ln in (stream or "").splitlines(): @@ -68,7 +73,9 @@ def main(): # node1 acts as the User (token from bootstrap-user); node2 answers under its # node identity (it holds the contract after the adoption). - U = "".join(ssh(vm1, "cat /home/tester/.netsim/user.id").split()) + info1 = info(vm1) + U = "".join(str(info1.get("user_id", "")).split()) + TOKEN = f"export ASTRALD_APPHOST_TOKEN={info1.get('user_token', '')};" n1_info = ssh(vm1, TOKEN + " astral-query user.info -out json") n1_swarm = ssh(vm1, TOKEN + " astral-query user.swarm_status -out json") n2_info = ssh(vm2, "astral-query user.info -out json") @@ -85,7 +92,7 @@ def main(): errs = [] if not U: - errs.append("no User id recorded on node1 (~/.netsim/user.id)") + errs.append("no user_id in node1's info.json") if i1 != U: errs.append(f"node1 contract issuer {i1} != User {U}") if i2 != U: diff --git a/netsim/tasks/share-object/prompt.md b/netsim/tasks/share-object/prompt.md index d7817499..fc385228 100644 --- a/netsim/tasks/share-object/prompt.md +++ b/netsim/tasks/share-object/prompt.md @@ -1,14 +1,15 @@ On this machine an `astrald` node is running and you control it as its User (a -User-bound apphost token is at `~/.netsim/user.token`). Your swarm has one other -node — a sibling. Acting as that User, store a short, distinctive text payload as -an astral object **on that other node** — address it explicitly as the query -target — via the objects protocol, following your **astral-agent** skill, and note -the Object ID it returns. Then read the object back **from that other node** by its -Object ID and confirm the bytes match what you stored. +User-bound apphost token is recorded in `$HOME/info.json` under `user_token`). Your +swarm has one other node — a sibling. Acting as that User, store a short, +distinctive text payload as an astral object **on that other node** — address it +explicitly as the query target — via the objects protocol, following your +**astral-agent** skill, and note the Object ID it returns. Then read the object +back **from that other node** by its Object ID and confirm the bytes match what you +stored. -Then write the Object ID to `~/.netsim/object.id`, the exact payload you stored to -`~/.netsim/object.payload`, the bytes you read back to `~/.netsim/object.readback`, -and the node id you stored it on to `~/.netsim/object.target`. The skill won't -mention these files — they are how the run is checked. Success means the object is -stored on the other node, read back with matching bytes, and all four files are -written. +Then add to `$HOME/info.json` (keep the existing `user_*` keys) these keys: +`object_id` (the Object ID), `object_payload` (the exact payload you stored), +`object_readback` (the bytes you read back), and `object_target` (the node id you +stored it on). The skill won't mention this — it's how the run is checked. Success +means the object is stored on the other node, read back with matching bytes, and +those keys are written. diff --git a/netsim/tasks/share-object/run.sh b/netsim/tasks/share-object/run.sh index 7e03e0c7..86bd6dfb 100755 --- a/netsim/tasks/share-object/run.sh +++ b/netsim/tasks/share-object/run.sh @@ -47,21 +47,23 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/share-object.prompt)"' \ } # Cheap smoke-check; verify.py does the authoritative, independent check (node2 -# physically holds the object in its local repo). The agent must have recorded an -# Object ID, the payload it stored, the bytes it read back, and the node it stored -# it on. -[ -s "$d/object.id" ] || { echo "agent recorded no Object ID on $(hostname) (~/.netsim/object.id)" >&2; exit 1; } -[ -s "$d/object.payload" ] || { echo "agent recorded no payload on $(hostname) (~/.netsim/object.payload)" >&2; exit 1; } -[ -s "$d/object.readback" ] || { echo "agent recorded no read-back on $(hostname) (~/.netsim/object.readback)" >&2; exit 1; } -[ -s "$d/object.target" ] || { echo "agent recorded no target node on $(hostname) (~/.netsim/object.target)" >&2; exit 1; } -case "$(cat "$d/object.id")" in +# physically holds the object in its local repo). The agent records its outputs in +# $HOME/info.json (/home/tester/info.json). +oid=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_id",""))' 2>/dev/null || true) +opay=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_payload",""))' 2>/dev/null || true) +orb=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_readback",""))' 2>/dev/null || true) +otgt=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_target",""))' 2>/dev/null || true) +[ -n "$oid" ] || { echo "agent recorded no object_id in /home/tester/info.json on $(hostname)" >&2; exit 1; } +[ -n "$opay" ] || { echo "agent recorded no object_payload on $(hostname)" >&2; exit 1; } +[ -n "$orb" ] || { echo "agent recorded no object_readback on $(hostname)" >&2; exit 1; } +[ -n "$otgt" ] || { echo "agent recorded no object_target on $(hostname)" >&2; exit 1; } +case "$oid" in data1*) : ;; - *) echo "WARNING $(hostname): object.id does not look like a data1… Object ID (verify.py decides)" >&2 ;; + *) echo "WARNING $(hostname): object_id does not look like a data1… Object ID (verify.py decides)" >&2 ;; esac # Advisory: the agent's own round-trip should already match (verify.py re-checks). -[ "$(cat "$d/object.payload")" = "$(cat "$d/object.readback")" ] \ - || echo "WARNING $(hostname): agent read-back != stored payload (verify.py decides)" >&2 -echo "share-object: agent finished on $(hostname); stored object $(cat "$d/object.id") on $(cat "$d/object.target")" +[ "$opay" = "$orb" ] || echo "WARNING $(hostname): agent read-back != stored payload (verify.py decides)" >&2 +echo "share-object: agent finished on $(hostname); stored object $oid on $otgt" EOS ) diff --git a/netsim/tasks/share-object/verify.py b/netsim/tasks/share-object/verify.py index eff0535e..722134e1 100755 --- a/netsim/tasks/share-object/verify.py +++ b/netsim/tasks/share-object/verify.py @@ -10,9 +10,6 @@ import subprocess import sys -TOKEN = "export ASTRALD_APPHOST_TOKEN=$(cat /home/tester/.netsim/user.token);" - - def ssh(vm, remote): """Run `netsim ssh -- ` on the host; return stdout (best-effort). @@ -24,6 +21,14 @@ def ssh(vm, remote): return p.stdout +def info(vm): + """The agent's $HOME/info.json (/home/tester/info.json) on the VM, as a dict.""" + try: + return json.loads(ssh(vm, "cat /home/tester/info.json") or "{}") or {} + except json.JSONDecodeError: + return {} + + # ---- JSON object-stream parsing (one object/line + an eos terminator) ---------- def objs(stream): @@ -94,10 +99,11 @@ def main(): # What the agent persisted on node1. ID strips all whitespace; the text fields # tolerate a trailing newline. - ID = "".join(ssh(vm1, "cat /home/tester/.netsim/object.id").split()) - PAY = ssh(vm1, "cat /home/tester/.netsim/object.payload").rstrip("\n") - READBACK = ssh(vm1, "cat /home/tester/.netsim/object.readback").rstrip("\n") - TARGET = "".join(ssh(vm1, "cat /home/tester/.netsim/object.target").split()) + info1 = info(vm1) + ID = "".join(str(info1.get("object_id", "")).split()) + PAY = str(info1.get("object_payload", "")).rstrip("\n") + READBACK = str(info1.get("object_readback", "")).rstrip("\n") + TARGET = "".join(str(info1.get("object_target", "")).split()) # node2's real identity, resolved host-side: Subject of node2's active contract # (node2 answers user.info under its node identity), with node1's link-back as a @@ -122,16 +128,16 @@ def main(): errs, notes = [], [] if not ID: - errs.append("no Object ID recorded on node1 (~/.netsim/object.id)") + errs.append("no object_id in node1's info.json") if not PAY: - errs.append("no payload recorded on node1 (~/.netsim/object.payload)") + errs.append("no object_payload in node1's info.json") if READBACK and READBACK != PAY: notes.append(f"agent's own read-back != stored payload ({READBACK!r} != {PAY!r})") if TARGET and N2 and TARGET != N2: notes.append(f"agent stored on {TARGET[:12]}.. but node2's identity is {N2[:12]}.. " "(agent may have targeted the wrong node)") elif not TARGET: - notes.append("agent recorded no target node (~/.netsim/object.target)") + notes.append("agent recorded no object_target in info.json") if on_node1 is True: notes.append("object is ALSO present in node1's local repo (a local copy alongside the " "remote store -- not required, just noted)") From 81d539191eb602e4481e77b74116cb924f667111 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 12:27:00 +0200 Subject: [PATCH 19/57] netsim: add import-user task (configure node from an existing mnemonic) A drop-in alternative to bootstrap-user: instead of minting fresh entropy, the agent derives the User key from a provided BIP-39 mnemonic (ASTRAL_USER_MNEMONIC) and installs node1's active contract under that existing software User. verify.sh asserts node1 is a User node and, if ASTRAL_USER_ID is set, that the derived id matches exactly (proof the existing key was used). Produces stage astrald-user. --- netsim/README.md | 2 + netsim/stories/import-user.story | 6 +++ netsim/tasks/import-user/README.md | 8 ++++ netsim/tasks/import-user/prompt.md | 15 ++++++++ netsim/tasks/import-user/run.sh | 62 ++++++++++++++++++++++++++++++ netsim/tasks/import-user/verify.sh | 49 +++++++++++++++++++++++ 6 files changed, 142 insertions(+) create mode 100644 netsim/stories/import-user.story create mode 100644 netsim/tasks/import-user/README.md create mode 100644 netsim/tasks/import-user/prompt.md create mode 100755 netsim/tasks/import-user/run.sh create mode 100755 netsim/tasks/import-user/verify.sh diff --git a/netsim/README.md b/netsim/README.md index eefd03a6..49fd8c75 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -17,11 +17,13 @@ netsim/ install-astrald/ # build + run astrald as a service on each node configure-astral-agent/ # install the astral-agent skill into the qwen operator bootstrap-user/ # make node1 a User node -> stage astrald-user + import-user/ # make node1 a User node from an existing mnemonic (alt.) -> astrald-user link-swarm/ # adopt node2 into node1's swarm -> stage astrald-swarm share-object/ # store an object on the sibling -> stage astrald-shared stories/ # one story per tested flow (start/save stage in each header) lab.story # null -> astrald-lab bootstrap-user.story # astrald-lab -> astrald-user + import-user.story # astrald-lab -> astrald-user (alt. to bootstrap-user) link-swarm.story # astrald-user -> astrald-swarm share-object.story # astrald-swarm -> astrald-shared link.sh # register tasks with netsim (idempotent; re-run anytime) diff --git a/netsim/stories/import-user.story b/netsim/stories/import-user.story new file mode 100644 index 00000000..2eaabd17 --- /dev/null +++ b/netsim/stories/import-user.story @@ -0,0 +1,6 @@ +# import-user.story — make node1 a User node from an EXISTING mnemonic +# (alternative to bootstrap-user). Requires env ASTRAL_USER_MNEMONIC; optional +# ASTRAL_USER_ID makes verify assert the derived id. +# start: astrald-lab save: astrald-user +# ASTRAL_USER_MNEMONIC="..." netsim story --stage astrald-lab --save astrald-user netsim/stories/import-user.story +import-user diff --git a/netsim/tasks/import-user/README.md b/netsim/tasks/import-user/README.md new file mode 100644 index 00000000..5c4155d5 --- /dev/null +++ b/netsim/tasks/import-user/README.md @@ -0,0 +1,8 @@ +# import-user + +Drives the Qwen operator on node1 to make it a **User-controlled node from an +existing software User** — deriving the User key from a provided BIP-39 mnemonic +(`ASTRAL_USER_MNEMONIC`) instead of minting a fresh one — following the +astral-agent skill's node-setup playbook. `verify.sh` confirms node1 answers as +that User (and, if `ASTRAL_USER_ID` is set, that the derived id matches exactly). +A drop-in alternative to `bootstrap-user`; produces stage `astrald-user`. diff --git a/netsim/tasks/import-user/prompt.md b/netsim/tasks/import-user/prompt.md new file mode 100644 index 00000000..4d4f9b8e --- /dev/null +++ b/netsim/tasks/import-user/prompt.md @@ -0,0 +1,15 @@ +On this machine there is an `astrald` node running. It has its own node identity +but no User. You already control a software User whose BIP-39 mnemonic seed phrase +is: + + __MNEMONIC__ + +Make this node a User-controlled node under THAT existing User: derive the User's +`secp256k1` key from the mnemonic above (start from the mnemonic — do NOT generate +new entropy), then build, sign, and install the node contract, following your +**astral-agent** skill's node-setup playbook (software User) but substituting the +given mnemonic for the entropy-generation step. + +Then write the User's id and a User-bound apphost token to `$HOME/info.json` as a +JSON object with keys `user_id` and `user_token`. The skill won't mention this — +it's how the run is checked. diff --git a/netsim/tasks/import-user/run.sh b/netsim/tasks/import-user/run.sh new file mode 100755 index 00000000..3371617c --- /dev/null +++ b/netsim/tasks/import-user/run.sh @@ -0,0 +1,62 @@ +#!/bin/sh +# import-user: configure the operator node as a User node from an EXISTING software +# User key — the User's BIP-39 mnemonic (env ASTRAL_USER_MNEMONIC) is derived +# instead of minting fresh entropy. Driven by the Qwen Code agent in the VM. +# import-user [--vm ] (default: node1 — the VM carrying Qwen) +# env: ASTRAL_USER_MNEMONIC (required) ASTRAL_USER_ID (optional; verify.sh asserts it) +# +# Drop-in alternative to bootstrap-user. Runs ON THE HOST (cwd = simulation root): +# substitutes the mnemonic into prompt.md, base64-ships the prompt to the agent over +# one `netsim ssh` argv, and runs `qwen -y`. Intelligence lives in the prompt and +# the agent's astral-agent skill, not here. +set -eu + +VM="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + *) echo "usage: import-user [--vm ] (env ASTRAL_USER_MNEMONIC required)" >&2; exit 64 ;; + esac +done + +[ -n "${ASTRAL_USER_MNEMONIC:-}" ] \ + || { echo "set ASTRAL_USER_MNEMONIC to the User's BIP-39 mnemonic seed phrase" >&2; exit 64; } + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +# Substitute the mnemonic into the prompt template (BIP-39 words are [a-z ] only, +# so no sed-delimiter or regex-metachar hazard). +prompt=$(sed "s|__MNEMONIC__|$ASTRAL_USER_MNEMONIC|" "$here/prompt.md") +prompt_b64=$(printf '%s' "$prompt" | base64 -w0) # GNU coreutils; -w0 = single line + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/import-user.prompt" +chown -R tester:tester "$d" + +# Run the agent as `tester` (qwen is installed for that user), non-interactively: +# one-shot positional prompt + `-y` (auto-approve). +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/import-user.prompt)"' \ + > "$d/import-user.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/import-user.log" >&2 + exit 1 + } + +# Cheap smoke-check; verify.sh does the authoritative, independent check. The agent +# records its outputs in $HOME/info.json (/home/tester/info.json). +uid=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_id",""))' 2>/dev/null || true) +[ -n "$uid" ] || { echo "agent recorded no user_id in /home/tester/info.json on $(hostname)" >&2; exit 1; } +echo "import-user: agent finished on $(hostname); User id $uid" +EOS +) + +echo "import-user: driving Qwen operator on $VM ..." +# assignment prefix carries the prompt to the guest; body re-parses it +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" +echo "import-user: done on $VM" diff --git a/netsim/tasks/import-user/verify.sh b/netsim/tasks/import-user/verify.sh new file mode 100755 index 00000000..a9edeadb --- /dev/null +++ b/netsim/tasks/import-user/verify.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# verify import-user: the node must be a User node under the imported software User. +# INDEPENDENT re-check -- reads $HOME/info.json, acts AS the User, and asserts the +# node answers as a user node. If ASTRAL_USER_ID is set, the derived User id must +# equal it (proof the EXISTING key was used, not a fresh one). +set -eu + +VM="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) VM=$2; shift 2 ;; + *) shift ;; + esac +done +EXPECT=${ASTRAL_USER_ID:-} + +REMOTE_CHECK=$(cat <<'EOS' +set -eu +info=/home/tester/info.json +[ -s "$info" ] || { echo "no $info on $(hostname)" >&2; exit 1; } +uid=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_id",""))') +ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_token",""))') +export ASTRALD_APPHOST_TOKEN +[ -n "$uid" ] || { echo "no user_id in $info on $(hostname)" >&2; exit 1; } +[ -n "$ASTRALD_APPHOST_TOKEN" ] || { echo "no user_token in $info on $(hostname)" >&2; exit 1; } + +# if an expected User id was supplied, the imported key must derive exactly it +if [ -n "$expect" ] && [ "$uid" != "$expect" ]; then + echo "imported User id $uid != expected $expect on $(hostname) (wrong key derived?)" >&2 + exit 1 +fi + +# acting as the User: whoami must report the User identity +who=$(astral-query apphost.whoami -out json) \ + || { echo "apphost.whoami failed on $(hostname)" >&2; exit 1; } +echo "$who" | grep -q "$uid" \ + || { echo "whoami != User id on $(hostname): $who" >&2; exit 1; } + +# active contract present (user.info rejects with code 2 if none) +astral-query user.info -out json \ + || { echo "user.info failed on $(hostname) -- no active contract?" >&2; exit 1; } + +echo "$(hostname): user node OK (User $uid${expect:+ — matches expected})" +EOS +) + +netsim ssh "$VM" -- "expect='$EXPECT'; $REMOTE_CHECK" \ + || { echo "import-user verify FAILED on $VM" >&2; exit 1; } +echo "verified imported user node on: $VM" From 8e19a83b98788a4835af5875e54e7e564dcbbd46 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 12:39:13 +0200 Subject: [PATCH 20/57] netsim: rename User-setup tasks to -software-key Rename the two first-node User-setup tasks to spell out the key variant: bootstrap-user -> bootstrap-user-software-key (new soft key), import-user -> import-user-software-key (existing soft key, known mnemonic). Renames the task dirs + story files and updates every reference (internal messages, prompt/log basenames, cross-references in link-swarm/share-object, README layout/pipeline). Leaves room for hardware-key variants later. --- netsim/README.md | 30 +++++++++--------- .../stories/bootstrap-user-software-key.story | 4 +++ netsim/stories/bootstrap-user.story | 4 --- netsim/stories/import-user-software-key.story | 6 ++++ netsim/stories/import-user.story | 6 ---- .../README.md | 2 +- .../prompt.md | 0 .../run.sh | 20 ++++++------ .../verify.sh | 4 +-- .../README.md | 4 +-- .../prompt.md | 0 .../run.sh | 22 ++++++------- .../verify.sh | 4 +-- .../__pycache__/verify.cpython-314.pyc | Bin 0 -> 6796 bytes netsim/tasks/link-swarm/run.sh | 8 ++--- netsim/tasks/link-swarm/verify.py | 2 +- .../__pycache__/verify.cpython-314.pyc | Bin 0 -> 10246 bytes netsim/tasks/share-object/run.sh | 4 +-- 18 files changed, 60 insertions(+), 60 deletions(-) create mode 100644 netsim/stories/bootstrap-user-software-key.story delete mode 100644 netsim/stories/bootstrap-user.story create mode 100644 netsim/stories/import-user-software-key.story delete mode 100644 netsim/stories/import-user.story rename netsim/tasks/{bootstrap-user => bootstrap-user-software-key}/README.md (91%) rename netsim/tasks/{bootstrap-user => bootstrap-user-software-key}/prompt.md (100%) rename netsim/tasks/{bootstrap-user => bootstrap-user-software-key}/run.sh (76%) rename netsim/tasks/{bootstrap-user => bootstrap-user-software-key}/verify.sh (90%) rename netsim/tasks/{import-user => import-user-software-key}/README.md (77%) rename netsim/tasks/{import-user => import-user-software-key}/prompt.md (100%) rename netsim/tasks/{import-user => import-user-software-key}/run.sh (71%) rename netsim/tasks/{import-user => import-user-software-key}/verify.sh (91%) create mode 100644 netsim/tasks/link-swarm/__pycache__/verify.cpython-314.pyc create mode 100644 netsim/tasks/share-object/__pycache__/verify.cpython-314.pyc diff --git a/netsim/README.md b/netsim/README.md index 49fd8c75..7a300a15 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -13,19 +13,19 @@ tasks in one simulation and saves a named *stage*. `lab.story` builds the ``` netsim/ - tasks/ # each task: run.sh (+ verify.sh / verify.py) + README.md - install-astrald/ # build + run astrald as a service on each node - configure-astral-agent/ # install the astral-agent skill into the qwen operator - bootstrap-user/ # make node1 a User node -> stage astrald-user - import-user/ # make node1 a User node from an existing mnemonic (alt.) -> astrald-user - link-swarm/ # adopt node2 into node1's swarm -> stage astrald-swarm - share-object/ # store an object on the sibling -> stage astrald-shared - stories/ # one story per tested flow (start/save stage in each header) - lab.story # null -> astrald-lab - bootstrap-user.story # astrald-lab -> astrald-user - import-user.story # astrald-lab -> astrald-user (alt. to bootstrap-user) - link-swarm.story # astrald-user -> astrald-swarm - share-object.story # astrald-swarm -> astrald-shared + tasks/ # each task: run.sh (+ verify.sh / verify.py) + README.md + install-astrald/ # build + run astrald as a service on each node + configure-astral-agent/ # install the astral-agent skill into the qwen operator + bootstrap-user-software-key/ # make node1 a User node, new key -> astrald-user + import-user-software-key/ # make node1 a User node, existing mnemonic -> astrald-user + link-swarm/ # adopt node2 into node1's swarm -> astrald-swarm + share-object/ # store an object on the sibling -> astrald-shared + stories/ # one story per tested flow (start/save stage in each header) + lab.story # null -> astrald-lab + bootstrap-user-software-key.story # astrald-lab -> astrald-user + import-user-software-key.story # astrald-lab -> astrald-user (alt.) + link-swarm.story # astrald-user -> astrald-swarm + share-object.story # astrald-swarm -> astrald-shared link.sh # register tasks with netsim (idempotent; re-run anytime) README.md ``` @@ -105,11 +105,11 @@ stage (its `start`/`save` stages are in the story header). Intermediate stages stay reusable, so you can replay one flow without rebuilding the chain: ``` -astrald-lab ─[bootstrap-user]→ astrald-user ─[link-swarm]→ astrald-swarm ─[share-object]→ astrald-shared +astrald-lab ─[bootstrap-user-software-key]→ astrald-user ─[link-swarm]→ astrald-swarm ─[share-object]→ astrald-shared ``` ```sh -netsim story --stage astrald-lab --save astrald-user netsim/stories/bootstrap-user.story +netsim story --stage astrald-lab --save astrald-user netsim/stories/bootstrap-user-software-key.story netsim story --stage astrald-user --save astrald-swarm netsim/stories/link-swarm.story netsim story --stage astrald-swarm --save astrald-shared netsim/stories/share-object.story ``` diff --git a/netsim/stories/bootstrap-user-software-key.story b/netsim/stories/bootstrap-user-software-key.story new file mode 100644 index 00000000..9547643b --- /dev/null +++ b/netsim/stories/bootstrap-user-software-key.story @@ -0,0 +1,4 @@ +# bootstrap-user-software-key.story — node1 becomes a User-controlled node. +# start: astrald-lab save: astrald-user +# netsim story --stage astrald-lab --save astrald-user netsim/stories/bootstrap-user-software-key.story +bootstrap-user-software-key diff --git a/netsim/stories/bootstrap-user.story b/netsim/stories/bootstrap-user.story deleted file mode 100644 index 4917388f..00000000 --- a/netsim/stories/bootstrap-user.story +++ /dev/null @@ -1,4 +0,0 @@ -# bootstrap-user.story — node1 becomes a User-controlled node. -# start: astrald-lab save: astrald-user -# netsim story --stage astrald-lab --save astrald-user netsim/stories/bootstrap-user.story -bootstrap-user diff --git a/netsim/stories/import-user-software-key.story b/netsim/stories/import-user-software-key.story new file mode 100644 index 00000000..7f17fe44 --- /dev/null +++ b/netsim/stories/import-user-software-key.story @@ -0,0 +1,6 @@ +# import-user-software-key.story — make node1 a User node from an EXISTING mnemonic +# (alternative to bootstrap-user-software-key). Requires env ASTRAL_USER_MNEMONIC; optional +# ASTRAL_USER_ID makes verify assert the derived id. +# start: astrald-lab save: astrald-user +# ASTRAL_USER_MNEMONIC="..." netsim story --stage astrald-lab --save astrald-user netsim/stories/import-user-software-key.story +import-user-software-key diff --git a/netsim/stories/import-user.story b/netsim/stories/import-user.story deleted file mode 100644 index 2eaabd17..00000000 --- a/netsim/stories/import-user.story +++ /dev/null @@ -1,6 +0,0 @@ -# import-user.story — make node1 a User node from an EXISTING mnemonic -# (alternative to bootstrap-user). Requires env ASTRAL_USER_MNEMONIC; optional -# ASTRAL_USER_ID makes verify assert the derived id. -# start: astrald-lab save: astrald-user -# ASTRAL_USER_MNEMONIC="..." netsim story --stage astrald-lab --save astrald-user netsim/stories/import-user.story -import-user diff --git a/netsim/tasks/bootstrap-user/README.md b/netsim/tasks/bootstrap-user-software-key/README.md similarity index 91% rename from netsim/tasks/bootstrap-user/README.md rename to netsim/tasks/bootstrap-user-software-key/README.md index 65af9c23..4265d3ae 100644 --- a/netsim/tasks/bootstrap-user/README.md +++ b/netsim/tasks/bootstrap-user-software-key/README.md @@ -1,4 +1,4 @@ -# bootstrap-user +# bootstrap-user-software-key Drives the Qwen operator on node1 to make it a **User-controlled node** — mint a software User and install node1's active contract — following the astral-agent diff --git a/netsim/tasks/bootstrap-user/prompt.md b/netsim/tasks/bootstrap-user-software-key/prompt.md similarity index 100% rename from netsim/tasks/bootstrap-user/prompt.md rename to netsim/tasks/bootstrap-user-software-key/prompt.md diff --git a/netsim/tasks/bootstrap-user/run.sh b/netsim/tasks/bootstrap-user-software-key/run.sh similarity index 76% rename from netsim/tasks/bootstrap-user/run.sh rename to netsim/tasks/bootstrap-user-software-key/run.sh index a3b4c2a8..7a58a673 100755 --- a/netsim/tasks/bootstrap-user/run.sh +++ b/netsim/tasks/bootstrap-user-software-key/run.sh @@ -1,7 +1,7 @@ #!/bin/sh -# bootstrap-user: turn the operator node into a User-controlled node, driven by +# bootstrap-user-software-key: turn the operator node into a User-controlled node, driven by # the Qwen Code agent running INSIDE the VM. -# bootstrap-user [--vm ] (default: node1 — the VM carrying Qwen) +# bootstrap-user-software-key [--vm ] (default: node1 — the VM carrying Qwen) # # Runs ON THE HOST (cwd = simulation root). This script is deliberately tiny: it # ships prompt.md to the agent on the guest and lets the agent do the astral @@ -16,7 +16,7 @@ VM="node1" while [ $# -gt 0 ]; do case "$1" in --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; - *) echo "usage: bootstrap-user [--vm ]" >&2; exit 64 ;; + *) echo "usage: bootstrap-user-software-key [--vm ]" >&2; exit 64 ;; esac done @@ -30,7 +30,7 @@ REMOTE_BODY=$(cat <<'EOS' set -eu d=/home/tester/.netsim mkdir -p "$d" -printf '%s' "$prompt_b64" | base64 -d > "$d/bootstrap-user.prompt" +printf '%s' "$prompt_b64" | base64 -d > "$d/bootstrap-user-software-key.prompt" chown -R tester:tester "$d" # Run the agent as `tester` (qwen is installed for that user), non-interactively. @@ -38,10 +38,10 @@ chown -R tester:tester "$d" # prompt + `-y` (auto-approve). The prompt is passed positionally via command # substitution; the substituted text is used literally (not re-scanned), so the # backticks and $-signs inside it are safe. -su - tester -c 'qwen -y "$(cat /home/tester/.netsim/bootstrap-user.prompt)"' \ - > "$d/bootstrap-user.log" 2>&1 || { +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/bootstrap-user-software-key.prompt)"' \ + > "$d/bootstrap-user-software-key.log" 2>&1 || { echo "qwen run failed on $(hostname); tail of log:" >&2 - tail -n 40 "$d/bootstrap-user.log" >&2 + tail -n 40 "$d/bootstrap-user-software-key.log" >&2 exit 1 } @@ -49,12 +49,12 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/bootstrap-user.prompt)"' \ # records its outputs in $HOME/info.json (/home/tester/info.json). uid=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_id",""))' 2>/dev/null || true) [ -n "$uid" ] || { echo "agent recorded no user_id in /home/tester/info.json on $(hostname)" >&2; exit 1; } -echo "bootstrap-user: agent finished on $(hostname); User id $uid" +echo "bootstrap-user-software-key: agent finished on $(hostname); User id $uid" EOS ) -echo "bootstrap-user: driving Qwen operator on $VM ..." +echo "bootstrap-user-software-key: driving Qwen operator on $VM ..." # assignment prefix carries the prompt to the guest; body re-parses it # shellcheck disable=SC2029 netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" -echo "bootstrap-user: done on $VM" +echo "bootstrap-user-software-key: done on $VM" diff --git a/netsim/tasks/bootstrap-user/verify.sh b/netsim/tasks/bootstrap-user-software-key/verify.sh similarity index 90% rename from netsim/tasks/bootstrap-user/verify.sh rename to netsim/tasks/bootstrap-user-software-key/verify.sh index c4e367e2..336d280f 100755 --- a/netsim/tasks/bootstrap-user/verify.sh +++ b/netsim/tasks/bootstrap-user-software-key/verify.sh @@ -1,5 +1,5 @@ #!/bin/sh -# verify bootstrap-user (same args as run.sh): the target node must be a +# verify bootstrap-user-software-key (same args as run.sh): the target node must be a # User-controlled node. INDEPENDENT re-check -- it does not trust run.sh's # output: it reads the persisted User credentials, acts AS the User, and asserts # the node answers as a user node. user.info itself rejects (code 2) when there @@ -39,5 +39,5 @@ EOS ) netsim ssh "$VM" -- "$REMOTE_CHECK" \ - || { echo "bootstrap-user verify FAILED on $VM" >&2; exit 1; } + || { echo "bootstrap-user-software-key verify FAILED on $VM" >&2; exit 1; } echo "verified user node on: $VM" diff --git a/netsim/tasks/import-user/README.md b/netsim/tasks/import-user-software-key/README.md similarity index 77% rename from netsim/tasks/import-user/README.md rename to netsim/tasks/import-user-software-key/README.md index 5c4155d5..b2361f7d 100644 --- a/netsim/tasks/import-user/README.md +++ b/netsim/tasks/import-user-software-key/README.md @@ -1,8 +1,8 @@ -# import-user +# import-user-software-key Drives the Qwen operator on node1 to make it a **User-controlled node from an existing software User** — deriving the User key from a provided BIP-39 mnemonic (`ASTRAL_USER_MNEMONIC`) instead of minting a fresh one — following the astral-agent skill's node-setup playbook. `verify.sh` confirms node1 answers as that User (and, if `ASTRAL_USER_ID` is set, that the derived id matches exactly). -A drop-in alternative to `bootstrap-user`; produces stage `astrald-user`. +A drop-in alternative to `bootstrap-user-software-key`; produces stage `astrald-user`. diff --git a/netsim/tasks/import-user/prompt.md b/netsim/tasks/import-user-software-key/prompt.md similarity index 100% rename from netsim/tasks/import-user/prompt.md rename to netsim/tasks/import-user-software-key/prompt.md diff --git a/netsim/tasks/import-user/run.sh b/netsim/tasks/import-user-software-key/run.sh similarity index 71% rename from netsim/tasks/import-user/run.sh rename to netsim/tasks/import-user-software-key/run.sh index 3371617c..615ef4be 100755 --- a/netsim/tasks/import-user/run.sh +++ b/netsim/tasks/import-user-software-key/run.sh @@ -1,11 +1,11 @@ #!/bin/sh -# import-user: configure the operator node as a User node from an EXISTING software +# import-user-software-key: configure the operator node as a User node from an EXISTING software # User key — the User's BIP-39 mnemonic (env ASTRAL_USER_MNEMONIC) is derived # instead of minting fresh entropy. Driven by the Qwen Code agent in the VM. -# import-user [--vm ] (default: node1 — the VM carrying Qwen) +# import-user-software-key [--vm ] (default: node1 — the VM carrying Qwen) # env: ASTRAL_USER_MNEMONIC (required) ASTRAL_USER_ID (optional; verify.sh asserts it) # -# Drop-in alternative to bootstrap-user. Runs ON THE HOST (cwd = simulation root): +# Drop-in alternative to bootstrap-user-software-key. Runs ON THE HOST (cwd = simulation root): # substitutes the mnemonic into prompt.md, base64-ships the prompt to the agent over # one `netsim ssh` argv, and runs `qwen -y`. Intelligence lives in the prompt and # the agent's astral-agent skill, not here. @@ -15,7 +15,7 @@ VM="node1" while [ $# -gt 0 ]; do case "$1" in --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; - *) echo "usage: import-user [--vm ] (env ASTRAL_USER_MNEMONIC required)" >&2; exit 64 ;; + *) echo "usage: import-user-software-key [--vm ] (env ASTRAL_USER_MNEMONIC required)" >&2; exit 64 ;; esac done @@ -35,15 +35,15 @@ REMOTE_BODY=$(cat <<'EOS' set -eu d=/home/tester/.netsim mkdir -p "$d" -printf '%s' "$prompt_b64" | base64 -d > "$d/import-user.prompt" +printf '%s' "$prompt_b64" | base64 -d > "$d/import-user-software-key.prompt" chown -R tester:tester "$d" # Run the agent as `tester` (qwen is installed for that user), non-interactively: # one-shot positional prompt + `-y` (auto-approve). -su - tester -c 'qwen -y "$(cat /home/tester/.netsim/import-user.prompt)"' \ - > "$d/import-user.log" 2>&1 || { +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/import-user-software-key.prompt)"' \ + > "$d/import-user-software-key.log" 2>&1 || { echo "qwen run failed on $(hostname); tail of log:" >&2 - tail -n 40 "$d/import-user.log" >&2 + tail -n 40 "$d/import-user-software-key.log" >&2 exit 1 } @@ -51,12 +51,12 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/import-user.prompt)"' \ # records its outputs in $HOME/info.json (/home/tester/info.json). uid=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_id",""))' 2>/dev/null || true) [ -n "$uid" ] || { echo "agent recorded no user_id in /home/tester/info.json on $(hostname)" >&2; exit 1; } -echo "import-user: agent finished on $(hostname); User id $uid" +echo "import-user-software-key: agent finished on $(hostname); User id $uid" EOS ) -echo "import-user: driving Qwen operator on $VM ..." +echo "import-user-software-key: driving Qwen operator on $VM ..." # assignment prefix carries the prompt to the guest; body re-parses it # shellcheck disable=SC2029 netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" -echo "import-user: done on $VM" +echo "import-user-software-key: done on $VM" diff --git a/netsim/tasks/import-user/verify.sh b/netsim/tasks/import-user-software-key/verify.sh similarity index 91% rename from netsim/tasks/import-user/verify.sh rename to netsim/tasks/import-user-software-key/verify.sh index a9edeadb..6f1b82f2 100755 --- a/netsim/tasks/import-user/verify.sh +++ b/netsim/tasks/import-user-software-key/verify.sh @@ -1,5 +1,5 @@ #!/bin/sh -# verify import-user: the node must be a User node under the imported software User. +# verify import-user-software-key: the node must be a User node under the imported software User. # INDEPENDENT re-check -- reads $HOME/info.json, acts AS the User, and asserts the # node answers as a user node. If ASTRAL_USER_ID is set, the derived User id must # equal it (proof the EXISTING key was used, not a fresh one). @@ -45,5 +45,5 @@ EOS ) netsim ssh "$VM" -- "expect='$EXPECT'; $REMOTE_CHECK" \ - || { echo "import-user verify FAILED on $VM" >&2; exit 1; } + || { echo "import-user-software-key verify FAILED on $VM" >&2; exit 1; } echo "verified imported user node on: $VM" diff --git a/netsim/tasks/link-swarm/__pycache__/verify.cpython-314.pyc b/netsim/tasks/link-swarm/__pycache__/verify.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fad0613eaab899c766ed02d8b065cdd420111097 GIT binary patch literal 6796 zcmcIoYit|G5#Bo<$>T$OiF#PEt<%$@ZAmaK%Z_9#r5?6qIg)Ue88=lzQ{>5}Es^Zq z(T>@)po0RDk|0u=6j6Z`Re-jUf3z`BpfFINaDe_W3KXD2#bPf)lb`{D{__h(kYAnM z;~goPiqisJf_rlbkXoV>>9=-#O4C0ZDUPL@K zLdg_)(lSk+2H8NKjLh(K4U&z#0csP^KyBuYP_w)VY71|M+RC#~+g`BqmLq2oiq;{t z6TQGpz;kP9Krh%DP*;K6MX8N?(4mR^eb)2`mjy94bCsKo#V@r>SHj|44;N2Fg#%nT z9wjY5=@X({EG{RwL|ov`NP@@_-u;|(b#6|O#aM(B6Ot^59aif|JSrpwsNym=osciI zLtEk^7lgahGFZE-uJYY>tzp z3mw+62wk!|Gqeo|F%N^#$e<~`Yoe5^Ky84v@^ldKh9K2ppzH_i0MB@vvcDx2TCB`{PjwVuahhkDk zDh60Unr?5O^wEkV5>5h>5CV>5N>&(IcuDpd6swe)PKt?$AW4b=HbpV1QlN{zJg1n{ z2^A_SVZ~cpIi*3B-YJKrOHyaie4VO6JCavLye?ANpdsypDucdcJ&TUl9LwxV%c^H} z@w_BytA!h z1=I?I6%s3<>A^k2P-W0P#$fbpI4c*^uchB>TAt1}_T}oHS+6^htvhk6E9V?ucaCM9 zW4HUW?AX^EHs`vn{*JBw5u%Kqywm-f<(>gr-$*qe?3KoD^qY!q^ar{d{jnzCGJNEu zpe|=z^pF9?GN=P3sY&#>Ie0Wt>JlIv7cIiL(D8x7nF7;pEgBa-bDLdYbQc153Q`lg z!xYC(=?0%*i&U-EfP%X7Xiij*Mu!lxpxq#mI8RXmiPhRz^$E@cvtsAjg+{}u=sJ`F zF#rx`ayBNz@e(A(C_$i0DyDD}hc^+zJrdox!e)b$3TGC=*qma3!>!P>arLZnT*1nT z>5G!M6IgMimM%e+L3yh^Gmv*yExtVe^0H^$vFDCs&s~Rap3YY|7tb!7ef{})`ZK5d z?{@D!+F)_#?asxM3n$-^-%bB4z0&yhPwv`V@7s`b?>!f?J2NN0eq=;0@4B<;j`lpsaEB#SxRp1%pM|~93yBhDf z8drK&#hlAOAIMjF-#zr!q2=1QpIHffF!cV=sxQ~vm96ah*xq&Df*iH?tjJ!SWvWC6 z%)ZGnaZ~_zV;*LSUfVb(K!N{tG z^N*RB2-xTpKr(<`QfwN+&v}4fnu`$V6#^T}DM@n`aX0LZVp7K_tR`_xYy>*QCLXELPN2pn2h0F-g$i?t9ng6I{?aL^R8MGm^9SGN2`l{{9Do1#>XG&4uC?Z_ zd}YwHm6!QZjizdTg54(g9u|4L*a}UOgAZ}1THoMropIL>cAVR8HF71Fq}rf(p=%p z<9-fs^to8U=I1(8(>Zv8DK*1Q9#<{L0GPlc6;>NiVncF}btBp?gryK>4#^4eAnaxj zw&1{jXTj{g-dyw1TkPHDVWqN0H=J8F9LUm(1zOk~4=;iVpNzrv+aJJSxqF@`DE)B3 zHR`wHG5~s~H*uMDjr4{#*A7ugB*Zb1`UVD#q z(dJIFn|N$xc`QYqAj8}BvDQuW3Y~7-M0ek9r+TIUFRJCaKkM^W0%r9FO?o^AP_`2Vy%Y5sZ}G`HHCgrOxw}WMz*ltb zw{W|H6|mRtpvjK>`h^c8|4l~1YPX%o6Le}L`AWaBIBU@Ho%HmUjBtIY5mn_zL}3p= zM|aTm#I;rj-Jo$z(6xE3MSY_jk>v?Gp1Atjpa*2_2s)sy+qC*heJ$1BvU^XFkvskR zIfSU-)ka3g@W?69MlBQgqT5m?TDB12mETpeLtX&1a3vdc^T+qZfl)_OM*BEp>Mr#VX9-}q;%|(4*1c2#lNt@f-Ns2}>l828{%u!(` zoSK!jF2CC47jdvr%otdPV$t+2;iY6kl)1p^Nj`9DFcg@W7#=@88JZj)9U42PSP7q; zxFp0CsyE#LpT2-kXWM@WAogkrI&FvV8xDWyrhR2tHldRuYZ*=gb&DL0o2DzhPK!Kk zn|KId-e$h~c!H=5>&DQD>=a}G^l$%kH6ay2c#ME`E^Xl&j;SdwmBs%!OEcGo2m47P zD;!NEWx#1E$askW?_&}f?MpYS6TqzueA@l5fo%>FKKPqJemC zXpd@yp0R)7+j}f+>*(Nm3*yoqcsbfm7g=hWu?Y|~d;*aDx=tFU45d@?Lbg5n5>=Tmb4)+g|*sMy2NXb5@q&X(4~0d zN*q%q@epV(9>x_$2ap{~#Nv<-ltr-#zZ*$%LCw?Qk1v9|33LFR}1Q8s;$7GpO$a~BI z1QtWUg`ub<0Vd0!4U2o-!hLMva+bzSf1WYDI(}_@X=LR}W_*q5eC*xfT6Z+uaWv!{ zP1jCj2J<*Awvb-#`c=;_dgd!1u?Rj@D_$ebTG?}eo{M{bDqi!tNk@k=Nvs+XYJ9P^H^s1kqNmffw#6l=N!lke_2tr zbUs(H``YnL|3*n0zM!qO%e6UcV`gBZ;;H4~Tt!Rf_=dA?`AE*$mKpxsR*@O}%x2HC z_C?!*E$cnB#&+k;mY-hHDAoH{`PIQ2mYWSXE3@8XYi!^4-AB)5z2P->`ZHUFHr(5@ z#`Zq;Znn%vmiRYf8dr7uYGn1yjlDPdn}bhP>ML8N&e?ZUycxaKko69(u|r>(Z5ym@(Yjz=XKU}Uwb>orYwY1q z-Bt5}4K=^DZr^#wzH?b#39KAknO>f{Yu|g{fIQV-I9*>dmih7J!1DAjhn5eo>|33_ z;rShCkv(+cHhtUwH`A?`K!*wH1+0qgMXZW3M15hilkXRW4TZ2bhC)7vVh)9(iAX3U z_JLcf$viV|!KbF0H7945WL8LKq~vg7XU4MP;W;4`f}`+KOumlD6spg&N9PjJ)U41a z3c!k!V$%1af>azu-DeCG^N>Z9>r-U=6uCb|)&~qjc^^28l;?rVN;x0YnW^drUK8bd r;4x9|2cAPz#l0kj82fyB&9EcSm|q=!Wq4uo_26B`3me1Kk*WU!M+~&~ literal 0 HcmV?d00001 diff --git a/netsim/tasks/link-swarm/run.sh b/netsim/tasks/link-swarm/run.sh index baabc3fe..b5f39261 100755 --- a/netsim/tasks/link-swarm/run.sh +++ b/netsim/tasks/link-swarm/run.sh @@ -1,10 +1,10 @@ #!/bin/sh # link-swarm: adopt the second node into the User's swarm, driven by the Qwen # Code agent running INSIDE node1 (which is already a User node from -# bootstrap-user — default starting stage: astrald-user). +# bootstrap-user-software-key — default starting stage: astrald-user). # link-swarm [--vm ] (default: node1 — the VM carrying Qwen) # -# Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user: +# Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user-software-key: # tiny script, thin prompt, intelligence in the agent's astral-agent skill. The # whole remote program travels as ONE argv to `netsim ssh`; the prompt rides # along base64-encoded so a multi-line file never fights shell quoting. @@ -32,7 +32,7 @@ printf '%s' "$prompt_b64" | base64 -d > "$d/link-swarm.prompt" chown -R tester:tester "$d" # Run the agent as `tester` (qwen is installed for that user), non-interactively. -# Invocation matches what was validated for bootstrap-user: one-shot positional +# Invocation matches what was validated for bootstrap-user-software-key: one-shot positional # prompt + `-y` (auto-approve). su - tester -c 'qwen -y "$(cat /home/tester/.netsim/link-swarm.prompt)"' \ > "$d/link-swarm.log" 2>&1 || { @@ -42,7 +42,7 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/link-swarm.prompt)"' \ } # Soft smoke-check only (verify.sh is the authoritative, independent check). -# node1 already holds a User token from bootstrap-user, so we can peek at the +# node1 already holds a User token from bootstrap-user-software-key, so we can peek at the # swarm here; don't fail the run on a shape mismatch — leave the verdict to # verify.sh. CONFIRM the user.swarm_status JSON field for a linked sibling. tok="$d/user.token" diff --git a/netsim/tasks/link-swarm/verify.py b/netsim/tasks/link-swarm/verify.py index e5533e04..06631dff 100755 --- a/netsim/tasks/link-swarm/verify.py +++ b/netsim/tasks/link-swarm/verify.py @@ -71,7 +71,7 @@ def main(): args, _ = ap.parse_known_args() vm1, vm2 = args.node1, args.node2 - # node1 acts as the User (token from bootstrap-user); node2 answers under its + # node1 acts as the User (token from bootstrap-user-software-key); node2 answers under its # node identity (it holds the contract after the adoption). info1 = info(vm1) U = "".join(str(info1.get("user_id", "")).split()) diff --git a/netsim/tasks/share-object/__pycache__/verify.cpython-314.pyc b/netsim/tasks/share-object/__pycache__/verify.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a93fd67dcabf7b1253a66d1c7c2c321648a363d4 GIT binary patch literal 10246 zcmcIKZA@HOcJICUV7_Ny_%=2duz|5*2pHQK8{621V2FV}@Uy+anG7=z7<-tRy!Qr& z*)vyY5813H3Z6eQ9lyR^Ls9>Uf2fZ(-FdPCojAo(67>ef z(idonCT~V!$Xg?6$XhFES*C)LbgTwiJ*$Pbh}A)BVD-=%*&=97tN~gxYlPOqnxM5_ zv$5u`D-`9grl=a~ns#)bN<#KsM_selQJv|5PFm^I0zD1n&t>tRgTXmo2u#f*amFj~ zt>MY*yie*z5g|Osqfpq-cc943ycqC#gTZ+;6At=CB+c+h0VELZNR+|???>Sf3P^5~ z=|afQM|fyL63h^#Rx#k`k#C0g-9XL$FfYP12}wd!l#mb&xy2b276?}FG|Xudk-&TX zt&?8g4Ochvngl))ZjA&&A>QADqM>Q8gcl=}itb?8>qo5^1O>xDC8k25dzRqmrKk`h z$oaRuJ}FJBX>wlTMK@x3FVLlIiMuGGxqugicu5S*B2k=iqcNUGY+vvBi+%1{KblID zITZ~Ck>HgA;gC1TyG_GBCd+oJ2>Kd|(mvs!6Q?FJpGI?`(xY`C3YOt0R>RS59jk?Z zx??aKKM{swMpczkhDh5$x%9CuHX1^2W@$wy=Vni#)>d><;Ag`Uf9g%l2`nlsf)l_} z!U$3Fhocf|p5#F;{1og+a)HRaq9k~Ot#3tnVIJKS0>B)^3qn}%%<`fLVnI`a7rICa z0|ylpH)jIA8FZ6J5w9TfF60e?RA52DH02EhqXIxT3$PgP0dae#!qE_jkC%LIR~ic} zfSNN{{6aVaq{w<@PqGFC5Mx?f$6btU^LZmcKJNi^k*FkVCH`&6rIAfybP`n62Q}`@)ydk#bDt((`2=2(dfO$?DD|EzGXyVjU z3uSaJ**>r(3@Zm#OIF9%jwUO*w;KU7Fb7IB1V*lKTfnF7~wrLQ`S)brCeHxosvq%eR4|NW738 zcd1&(U2H*K5qXh6;FH|3YM)m^g{Q=rAHM6-%36$8)?s@V3G-$9;MmCUdEN(Js1NI+ zix#l!6)|TU1uPp`iwP2((5=K@2B3*k549Ry$&TH*6#F3dX+vT%S$`^3eR`|<)nxUn zcRN$|fi3%R(ms6eY|=3N>m9Rw%Uru*u6;_;x{_^s@dw6-8tDB-tN>&`u0Klsne{03 z1#^`8+lpR?=1V&bZ6V=OPqaW-oN`kUdYpPz94s}Os}g`5KdO6nQ2po977wD`{ismb zBjl^V&|V7EiK9m|3X`2XkrDW)Av>z}>L@OwJgO4aQbT zYG1mtc;&+%EHDr3#s6k0eaL8x#oHG9(%|CY$I|WCpTt(`fBK{Qmc~bB%6{;ngRNqa3AM_J$361E_+rShhGo5sbyZ5iz0vaGY{87IKLAi5dFA*Uys^7t1p=jhgWRqa6Px zWvB~|?-6n~yISzEJ{%75aE{&xMBqdTi>R&Ox#E_!`5$ zC6G1Kyd;|fVjv_+-jI)nvqBQ?XoM=@oJ@yhCOnz1KsHP>@Asss7hC|?27lrpG~nCy zl&xgB@z%rwy$w$M$l{UZ@(+8GHJzL0PSBrit8MAh;-%%T4?PRaj=5}k^3#Sb^S({< zzHMjO2Nyt#Y}Jn}l*J*~09~G9VEe)1esYT}R9G=C(NV???zjICMZHyAP(3ng#M1cM zr?~$ixct3HJ%*VfIKQx{qf7*B8jBwH0$_ClzBCHhq=Z^%Tnyo^&;b2A3}L2#wMWFK zPh;8?^+X_|&;ny8@W&(2#HpPFuGPlRCsJnavxyb@?=Jn-rS+p?A{rN>d|SUa zDDrK@b`&XL0SyZX+O!c6F$w!&K-OY30w_8L`TnzDwn`PtJfxgX?c$UkLW_Vqf@M z`ZmzX9O*Iz4JqR{Unt!%>?!l5TNq;-rTW^4hSL;;(Wu^impA@HSVO~Vo})op;+xOl zXa+$96_NtdJnBJXQQ}@vcuI+xd{O{nH7sla2RO}B*q28|UU1{fk;_etTQ;0a1IR^b z1jBa}cbcn7__78Hh8-!feW4k8GVK$!@NPuvf$UIM^S}ihM~uQz_&ighpo)|kxvjd^O~59Uz$skdh7F?yZq35E2^_kRhs!vIh>q2b;{EP=Ch+BxJe+RS zsrIvPR?q5G_oF(Q(X#A!wWrdn!bJ2h#0-h3XE5WKXxGCUGB}G~h|~C6<21Y^UrYs7 z`aXOyPGILp9!|@eRSrZ-DQ5TKm_U7U>Gdn{yLP<`o@TX?@`(fLe72U; z=i>)1A+8+Sx(Ssq&0J#1;>wZL%fTHrQ6qdYEC|ob51km;t_?q9}YW zN2(FBN24_B?y1Q0++KOPaOh2$YrAvTEo3mwxzr2{EkeUWP5rD zmy+8>GWY3>OFqr|9O51R(E)-5D>tO#VpwztDA@`Qa(8n6HAZpgGAe>80@rQ4Q zvXwVf*ON}{WpaMaF)bdIX;;ia=BNNr4(J~KZwm-zIJU>_Mw7ro%&!hij~b-}L=-y& zADfk_vtB4CdLeLC~=gRDBMS| zPVZ}DBT&NRMO=Z**|Sn0B(=h3Z@OIQ3rFUWHwep;DkUaDiFcIE;fflrRto%E(Et=V zThMh}R)itm?}{;QckDoJ5ebb9brbTlMNuVKkJ(`BBg04xOhP<1jbiPjUZ!#>hccy5 zr<5d>k|n8CqUKl`I_q+iKp!WPn#mkjk4(E`21F?8MzP^MYSOjc{@x3(^_^D;>p-Le zJ3KN@s=0*gD(7(hR!s8+bIEN2iKAB3AA5)wz{Oxy#S1EG51PVxt8RjnWQC!a3N)Y~ z)CQFTv4W*p;DaE4fycQ6%+%cauc}pKe^7Kz7j)Y}l2B`cYA;TF5F^HnBnW{m=SX>3 z<>&P<EljT&To=*jxwzYElZHDm_iG@sZs-ex}f@2ue{*)xerXC>WCz zsai0d}`sEql;Sb%8OEBR)+(KK{p<{S$IMKi@r3hyu-F`Lp? zr4VBfiaGK#>)l|er(=4AngjlxSdqf09#9p9cRevvMwNO145RIlH8TOIuFeKT5k7}Q zP`L#*^~4yL$yF&Efa#>xE8BX7>F6w2*C;Oi3bMuP_j}+g8Iv6Z$a5nE4h4^j0?rf* zJ-ERkER_1ffslajdL;J*)gfFdf^sb`Aw-n&D^4hpOan>QljNmvKB8c(;itfH`|UII6$z+XqTA_=Vi@k?^W4=tNUkr z&kf1?@m}`TzHzxI)ZxKEvLV#&A=wm}8SWs%m`Ac1zN>tPN<9Sh*P=+0?c@^V{awvej+a>Qc6b z_g;;k-^OLr#aN>A=iNW+UT{7&P>?2eet03?w{0{pRV-F~>|DNd`|3wm6F*!%n<{Hd z6~D4od~~DuXsWm?Wjr1~yJNC0@JoTkK(exVwLWEQO_|(VrXw4sBWwPY=|ue8j=g00 z$nE1F9Z&9cuQ4g-;gr2|%YI_Req#N6%HA6v!1cS!iSt9er`OJ=s*a_~ zy0*$rZIqo#m7Pghd*l62O%z~iNLCzA*}CJeVzh^ol_yj7p7_9yu`1C7``m}>j|gl(Eqiy;-o0}87u}zAuliFBhu6f7hVJ+PFwR+C z5SM*#;zr7TFh1~YN$K+B!h!ht2WIQG!LnptG$%`sZ5p~Bm~ENgCpQf}4~p!|10RoU z8>~x~MN6{c%-zZ_EkNM!jc<%zPF}u}EW5gC_`!oB>+SkP`KL7*xTaP8s`yLu%Dc(V zk>n_w9OJ$oy?k%}Aw^%M&(QEHKSTcylsbFqUs;{%tg>^PhV$F7VrQzTB0Y#fdud!1 zJxl+u9#VC;2OiY#Ul5iXesVKWdh193T)ujAt>Kp^)=Tdn>bZA$L9=79CLKrCCfA*R zGre|oJ#=sEULPP~=_5Nwy4IavTv;20pQr1#>kh2wSG*5(jNSE=VvGl$=&6#5h29;d zSi5DZ*|5|k-d?3w#MS!b-q!mT_ahBJ{D<8EH9MW*{lWJJx3r}j+R|j%)TVa&8*A|b zy<;#hnHEi3hRO{?WukM1PKYb@iJJR{Mwn|W`L@Ws5dDL9^PnD5Omz@=!5C-hhkD9n z{a{bt_hi+LuMNQm*0TK3Js0jZ{7VH^lnKwq>yybzK3O%jX_)?YPt$5evbsIldH(K= z-@(&w709(JIb;pA-C9}V)SggF4k;kn2|1NC~Eex%jW z+9wu@c6>vbzoCl1p-hjp2D] (default: node1 — the VM carrying Qwen) # -# Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user / +# Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user-software-key / # link-swarm: tiny script, thin prompt, intelligence in the agent's astral-agent # skill. The agent stores a payload ON THE OTHER node — addressing the sibling # explicitly as the query target (:objects.store) — then loads it back from @@ -37,7 +37,7 @@ printf '%s' "$prompt_b64" | base64 -d > "$d/share-object.prompt" chown -R tester:tester "$d" # Run the agent as `tester` (qwen is installed for that user), non-interactively. -# Invocation matches what was validated for bootstrap-user / link-swarm: one-shot +# Invocation matches what was validated for bootstrap-user-software-key / link-swarm: one-shot # positional prompt + `-y` (auto-approve). su - tester -c 'qwen -y "$(cat /home/tester/.netsim/share-object.prompt)"' \ > "$d/share-object.log" 2>&1 || { From 88ec7b1271890be7f70b2e9ed5e0171b36375b86 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 12:39:33 +0200 Subject: [PATCH 21/57] netsim: drop accidentally-committed __pycache__; ignore it The previous commit swept in py_compile byte-cache via 'git add -A'. Remove the .pyc artifacts and add netsim/.gitignore for __pycache__/*.pyc. --- netsim/.gitignore | 2 ++ .../__pycache__/verify.cpython-314.pyc | Bin 6796 -> 0 bytes .../__pycache__/verify.cpython-314.pyc | Bin 10246 -> 0 bytes 3 files changed, 2 insertions(+) create mode 100644 netsim/.gitignore delete mode 100644 netsim/tasks/link-swarm/__pycache__/verify.cpython-314.pyc delete mode 100644 netsim/tasks/share-object/__pycache__/verify.cpython-314.pyc diff --git a/netsim/.gitignore b/netsim/.gitignore new file mode 100644 index 00000000..7a60b85e --- /dev/null +++ b/netsim/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +*.pyc diff --git a/netsim/tasks/link-swarm/__pycache__/verify.cpython-314.pyc b/netsim/tasks/link-swarm/__pycache__/verify.cpython-314.pyc deleted file mode 100644 index fad0613eaab899c766ed02d8b065cdd420111097..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6796 zcmcIoYit|G5#Bo<$>T$OiF#PEt<%$@ZAmaK%Z_9#r5?6qIg)Ue88=lzQ{>5}Es^Zq z(T>@)po0RDk|0u=6j6Z`Re-jUf3z`BpfFINaDe_W3KXD2#bPf)lb`{D{__h(kYAnM z;~goPiqisJf_rlbkXoV>>9=-#O4C0ZDUPL@K zLdg_)(lSk+2H8NKjLh(K4U&z#0csP^KyBuYP_w)VY71|M+RC#~+g`BqmLq2oiq;{t z6TQGpz;kP9Krh%DP*;K6MX8N?(4mR^eb)2`mjy94bCsKo#V@r>SHj|44;N2Fg#%nT z9wjY5=@X({EG{RwL|ov`NP@@_-u;|(b#6|O#aM(B6Ot^59aif|JSrpwsNym=osciI zLtEk^7lgahGFZE-uJYY>tzp z3mw+62wk!|Gqeo|F%N^#$e<~`Yoe5^Ky84v@^ldKh9K2ppzH_i0MB@vvcDx2TCB`{PjwVuahhkDk zDh60Unr?5O^wEkV5>5h>5CV>5N>&(IcuDpd6swe)PKt?$AW4b=HbpV1QlN{zJg1n{ z2^A_SVZ~cpIi*3B-YJKrOHyaie4VO6JCavLye?ANpdsypDucdcJ&TUl9LwxV%c^H} z@w_BytA!h z1=I?I6%s3<>A^k2P-W0P#$fbpI4c*^uchB>TAt1}_T}oHS+6^htvhk6E9V?ucaCM9 zW4HUW?AX^EHs`vn{*JBw5u%Kqywm-f<(>gr-$*qe?3KoD^qY!q^ar{d{jnzCGJNEu zpe|=z^pF9?GN=P3sY&#>Ie0Wt>JlIv7cIiL(D8x7nF7;pEgBa-bDLdYbQc153Q`lg z!xYC(=?0%*i&U-EfP%X7Xiij*Mu!lxpxq#mI8RXmiPhRz^$E@cvtsAjg+{}u=sJ`F zF#rx`ayBNz@e(A(C_$i0DyDD}hc^+zJrdox!e)b$3TGC=*qma3!>!P>arLZnT*1nT z>5G!M6IgMimM%e+L3yh^Gmv*yExtVe^0H^$vFDCs&s~Rap3YY|7tb!7ef{})`ZK5d z?{@D!+F)_#?asxM3n$-^-%bB4z0&yhPwv`V@7s`b?>!f?J2NN0eq=;0@4B<;j`lpsaEB#SxRp1%pM|~93yBhDf z8drK&#hlAOAIMjF-#zr!q2=1QpIHffF!cV=sxQ~vm96ah*xq&Df*iH?tjJ!SWvWC6 z%)ZGnaZ~_zV;*LSUfVb(K!N{tG z^N*RB2-xTpKr(<`QfwN+&v}4fnu`$V6#^T}DM@n`aX0LZVp7K_tR`_xYy>*QCLXELPN2pn2h0F-g$i?t9ng6I{?aL^R8MGm^9SGN2`l{{9Do1#>XG&4uC?Z_ zd}YwHm6!QZjizdTg54(g9u|4L*a}UOgAZ}1THoMropIL>cAVR8HF71Fq}rf(p=%p z<9-fs^to8U=I1(8(>Zv8DK*1Q9#<{L0GPlc6;>NiVncF}btBp?gryK>4#^4eAnaxj zw&1{jXTj{g-dyw1TkPHDVWqN0H=J8F9LUm(1zOk~4=;iVpNzrv+aJJSxqF@`DE)B3 zHR`wHG5~s~H*uMDjr4{#*A7ugB*Zb1`UVD#q z(dJIFn|N$xc`QYqAj8}BvDQuW3Y~7-M0ek9r+TIUFRJCaKkM^W0%r9FO?o^AP_`2Vy%Y5sZ}G`HHCgrOxw}WMz*ltb zw{W|H6|mRtpvjK>`h^c8|4l~1YPX%o6Le}L`AWaBIBU@Ho%HmUjBtIY5mn_zL}3p= zM|aTm#I;rj-Jo$z(6xE3MSY_jk>v?Gp1Atjpa*2_2s)sy+qC*heJ$1BvU^XFkvskR zIfSU-)ka3g@W?69MlBQgqT5m?TDB12mETpeLtX&1a3vdc^T+qZfl)_OM*BEp>Mr#VX9-}q;%|(4*1c2#lNt@f-Ns2}>l828{%u!(` zoSK!jF2CC47jdvr%otdPV$t+2;iY6kl)1p^Nj`9DFcg@W7#=@88JZj)9U42PSP7q; zxFp0CsyE#LpT2-kXWM@WAogkrI&FvV8xDWyrhR2tHldRuYZ*=gb&DL0o2DzhPK!Kk zn|KId-e$h~c!H=5>&DQD>=a}G^l$%kH6ay2c#ME`E^Xl&j;SdwmBs%!OEcGo2m47P zD;!NEWx#1E$askW?_&}f?MpYS6TqzueA@l5fo%>FKKPqJemC zXpd@yp0R)7+j}f+>*(Nm3*yoqcsbfm7g=hWu?Y|~d;*aDx=tFU45d@?Lbg5n5>=Tmb4)+g|*sMy2NXb5@q&X(4~0d zN*q%q@epV(9>x_$2ap{~#Nv<-ltr-#zZ*$%LCw?Qk1v9|33LFR}1Q8s;$7GpO$a~BI z1QtWUg`ub<0Vd0!4U2o-!hLMva+bzSf1WYDI(}_@X=LR}W_*q5eC*xfT6Z+uaWv!{ zP1jCj2J<*Awvb-#`c=;_dgd!1u?Rj@D_$ebTG?}eo{M{bDqi!tNk@k=Nvs+XYJ9P^H^s1kqNmffw#6l=N!lke_2tr zbUs(H``YnL|3*n0zM!qO%e6UcV`gBZ;;H4~Tt!Rf_=dA?`AE*$mKpxsR*@O}%x2HC z_C?!*E$cnB#&+k;mY-hHDAoH{`PIQ2mYWSXE3@8XYi!^4-AB)5z2P->`ZHUFHr(5@ z#`Zq;Znn%vmiRYf8dr7uYGn1yjlDPdn}bhP>ML8N&e?ZUycxaKko69(u|r>(Z5ym@(Yjz=XKU}Uwb>orYwY1q z-Bt5}4K=^DZr^#wzH?b#39KAknO>f{Yu|g{fIQV-I9*>dmih7J!1DAjhn5eo>|33_ z;rShCkv(+cHhtUwH`A?`K!*wH1+0qgMXZW3M15hilkXRW4TZ2bhC)7vVh)9(iAX3U z_JLcf$viV|!KbF0H7945WL8LKq~vg7XU4MP;W;4`f}`+KOumlD6spg&N9PjJ)U41a z3c!k!V$%1af>azu-DeCG^N>Z9>r-U=6uCb|)&~qjc^^28l;?rVN;x0YnW^drUK8bd r;4x9|2cAPz#l0kj82fyB&9EcSm|q=!Wq4uo_26B`3me1Kk*WU!M+~&~ diff --git a/netsim/tasks/share-object/__pycache__/verify.cpython-314.pyc b/netsim/tasks/share-object/__pycache__/verify.cpython-314.pyc deleted file mode 100644 index a93fd67dcabf7b1253a66d1c7c2c321648a363d4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10246 zcmcIKZA@HOcJICUV7_Ny_%=2duz|5*2pHQK8{621V2FV}@Uy+anG7=z7<-tRy!Qr& z*)vyY5813H3Z6eQ9lyR^Ls9>Uf2fZ(-FdPCojAo(67>ef z(idonCT~V!$Xg?6$XhFES*C)LbgTwiJ*$Pbh}A)BVD-=%*&=97tN~gxYlPOqnxM5_ zv$5u`D-`9grl=a~ns#)bN<#KsM_selQJv|5PFm^I0zD1n&t>tRgTXmo2u#f*amFj~ zt>MY*yie*z5g|Osqfpq-cc943ycqC#gTZ+;6At=CB+c+h0VELZNR+|???>Sf3P^5~ z=|afQM|fyL63h^#Rx#k`k#C0g-9XL$FfYP12}wd!l#mb&xy2b276?}FG|Xudk-&TX zt&?8g4Ochvngl))ZjA&&A>QADqM>Q8gcl=}itb?8>qo5^1O>xDC8k25dzRqmrKk`h z$oaRuJ}FJBX>wlTMK@x3FVLlIiMuGGxqugicu5S*B2k=iqcNUGY+vvBi+%1{KblID zITZ~Ck>HgA;gC1TyG_GBCd+oJ2>Kd|(mvs!6Q?FJpGI?`(xY`C3YOt0R>RS59jk?Z zx??aKKM{swMpczkhDh5$x%9CuHX1^2W@$wy=Vni#)>d><;Ag`Uf9g%l2`nlsf)l_} z!U$3Fhocf|p5#F;{1og+a)HRaq9k~Ot#3tnVIJKS0>B)^3qn}%%<`fLVnI`a7rICa z0|ylpH)jIA8FZ6J5w9TfF60e?RA52DH02EhqXIxT3$PgP0dae#!qE_jkC%LIR~ic} zfSNN{{6aVaq{w<@PqGFC5Mx?f$6btU^LZmcKJNi^k*FkVCH`&6rIAfybP`n62Q}`@)ydk#bDt((`2=2(dfO$?DD|EzGXyVjU z3uSaJ**>r(3@Zm#OIF9%jwUO*w;KU7Fb7IB1V*lKTfnF7~wrLQ`S)brCeHxosvq%eR4|NW738 zcd1&(U2H*K5qXh6;FH|3YM)m^g{Q=rAHM6-%36$8)?s@V3G-$9;MmCUdEN(Js1NI+ zix#l!6)|TU1uPp`iwP2((5=K@2B3*k549Ry$&TH*6#F3dX+vT%S$`^3eR`|<)nxUn zcRN$|fi3%R(ms6eY|=3N>m9Rw%Uru*u6;_;x{_^s@dw6-8tDB-tN>&`u0Klsne{03 z1#^`8+lpR?=1V&bZ6V=OPqaW-oN`kUdYpPz94s}Os}g`5KdO6nQ2po977wD`{ismb zBjl^V&|V7EiK9m|3X`2XkrDW)Av>z}>L@OwJgO4aQbT zYG1mtc;&+%EHDr3#s6k0eaL8x#oHG9(%|CY$I|WCpTt(`fBK{Qmc~bB%6{;ngRNqa3AM_J$361E_+rShhGo5sbyZ5iz0vaGY{87IKLAi5dFA*Uys^7t1p=jhgWRqa6Px zWvB~|?-6n~yISzEJ{%75aE{&xMBqdTi>R&Ox#E_!`5$ zC6G1Kyd;|fVjv_+-jI)nvqBQ?XoM=@oJ@yhCOnz1KsHP>@Asss7hC|?27lrpG~nCy zl&xgB@z%rwy$w$M$l{UZ@(+8GHJzL0PSBrit8MAh;-%%T4?PRaj=5}k^3#Sb^S({< zzHMjO2Nyt#Y}Jn}l*J*~09~G9VEe)1esYT}R9G=C(NV???zjICMZHyAP(3ng#M1cM zr?~$ixct3HJ%*VfIKQx{qf7*B8jBwH0$_ClzBCHhq=Z^%Tnyo^&;b2A3}L2#wMWFK zPh;8?^+X_|&;ny8@W&(2#HpPFuGPlRCsJnavxyb@?=Jn-rS+p?A{rN>d|SUa zDDrK@b`&XL0SyZX+O!c6F$w!&K-OY30w_8L`TnzDwn`PtJfxgX?c$UkLW_Vqf@M z`ZmzX9O*Iz4JqR{Unt!%>?!l5TNq;-rTW^4hSL;;(Wu^impA@HSVO~Vo})op;+xOl zXa+$96_NtdJnBJXQQ}@vcuI+xd{O{nH7sla2RO}B*q28|UU1{fk;_etTQ;0a1IR^b z1jBa}cbcn7__78Hh8-!feW4k8GVK$!@NPuvf$UIM^S}ihM~uQz_&ighpo)|kxvjd^O~59Uz$skdh7F?yZq35E2^_kRhs!vIh>q2b;{EP=Ch+BxJe+RS zsrIvPR?q5G_oF(Q(X#A!wWrdn!bJ2h#0-h3XE5WKXxGCUGB}G~h|~C6<21Y^UrYs7 z`aXOyPGILp9!|@eRSrZ-DQ5TKm_U7U>Gdn{yLP<`o@TX?@`(fLe72U; z=i>)1A+8+Sx(Ssq&0J#1;>wZL%fTHrQ6qdYEC|ob51km;t_?q9}YW zN2(FBN24_B?y1Q0++KOPaOh2$YrAvTEo3mwxzr2{EkeUWP5rD zmy+8>GWY3>OFqr|9O51R(E)-5D>tO#VpwztDA@`Qa(8n6HAZpgGAe>80@rQ4Q zvXwVf*ON}{WpaMaF)bdIX;;ia=BNNr4(J~KZwm-zIJU>_Mw7ro%&!hij~b-}L=-y& zADfk_vtB4CdLeLC~=gRDBMS| zPVZ}DBT&NRMO=Z**|Sn0B(=h3Z@OIQ3rFUWHwep;DkUaDiFcIE;fflrRto%E(Et=V zThMh}R)itm?}{;QckDoJ5ebb9brbTlMNuVKkJ(`BBg04xOhP<1jbiPjUZ!#>hccy5 zr<5d>k|n8CqUKl`I_q+iKp!WPn#mkjk4(E`21F?8MzP^MYSOjc{@x3(^_^D;>p-Le zJ3KN@s=0*gD(7(hR!s8+bIEN2iKAB3AA5)wz{Oxy#S1EG51PVxt8RjnWQC!a3N)Y~ z)CQFTv4W*p;DaE4fycQ6%+%cauc}pKe^7Kz7j)Y}l2B`cYA;TF5F^HnBnW{m=SX>3 z<>&P<EljT&To=*jxwzYElZHDm_iG@sZs-ex}f@2ue{*)xerXC>WCz zsai0d}`sEql;Sb%8OEBR)+(KK{p<{S$IMKi@r3hyu-F`Lp? zr4VBfiaGK#>)l|er(=4AngjlxSdqf09#9p9cRevvMwNO145RIlH8TOIuFeKT5k7}Q zP`L#*^~4yL$yF&Efa#>xE8BX7>F6w2*C;Oi3bMuP_j}+g8Iv6Z$a5nE4h4^j0?rf* zJ-ERkER_1ffslajdL;J*)gfFdf^sb`Aw-n&D^4hpOan>QljNmvKB8c(;itfH`|UII6$z+XqTA_=Vi@k?^W4=tNUkr z&kf1?@m}`TzHzxI)ZxKEvLV#&A=wm}8SWs%m`Ac1zN>tPN<9Sh*P=+0?c@^V{awvej+a>Qc6b z_g;;k-^OLr#aN>A=iNW+UT{7&P>?2eet03?w{0{pRV-F~>|DNd`|3wm6F*!%n<{Hd z6~D4od~~DuXsWm?Wjr1~yJNC0@JoTkK(exVwLWEQO_|(VrXw4sBWwPY=|ue8j=g00 z$nE1F9Z&9cuQ4g-;gr2|%YI_Req#N6%HA6v!1cS!iSt9er`OJ=s*a_~ zy0*$rZIqo#m7Pghd*l62O%z~iNLCzA*}CJeVzh^ol_yj7p7_9yu`1C7``m}>j|gl(Eqiy;-o0}87u}zAuliFBhu6f7hVJ+PFwR+C z5SM*#;zr7TFh1~YN$K+B!h!ht2WIQG!LnptG$%`sZ5p~Bm~ENgCpQf}4~p!|10RoU z8>~x~MN6{c%-zZ_EkNM!jc<%zPF}u}EW5gC_`!oB>+SkP`KL7*xTaP8s`yLu%Dc(V zk>n_w9OJ$oy?k%}Aw^%M&(QEHKSTcylsbFqUs;{%tg>^PhV$F7VrQzTB0Y#fdud!1 zJxl+u9#VC;2OiY#Ul5iXesVKWdh193T)ujAt>Kp^)=Tdn>bZA$L9=79CLKrCCfA*R zGre|oJ#=sEULPP~=_5Nwy4IavTv;20pQr1#>kh2wSG*5(jNSE=VvGl$=&6#5h29;d zSi5DZ*|5|k-d?3w#MS!b-q!mT_ahBJ{D<8EH9MW*{lWJJx3r}j+R|j%)TVa&8*A|b zy<;#hnHEi3hRO{?WukM1PKYb@iJJR{Mwn|W`L@Ws5dDL9^PnD5Omz@=!5C-hhkD9n z{a{bt_hi+LuMNQm*0TK3Js0jZ{7VH^lnKwq>yybzK3O%jX_)?YPt$5evbsIldH(K= z-@(&w709(JIb;pA-C9}V)SggF4k;kn2|1NC~Eex%jW z+9wu@c6>vbzoCl1p-hjp2D Date: Mon, 22 Jun 2026 12:54:15 +0200 Subject: [PATCH 22/57] netsim: embed a real mnemonic in import-user-software-key prompt Bake a valid BIP-39 mnemonic (the canonical all-zero-entropy test vector) into prompt.md instead of an __MNEMONIC__ placeholder; the task is now self-contained and reproducible. run.sh ships prompt.md verbatim (drops the ASTRAL_USER_MNEMONIC requirement and sed substitution). verify.sh's optional ASTRAL_USER_ID assertion is unchanged. --- netsim/stories/import-user-software-key.story | 6 ++-- .../tasks/import-user-software-key/README.md | 4 +-- .../tasks/import-user-software-key/prompt.md | 2 +- netsim/tasks/import-user-software-key/run.sh | 28 ++++++++----------- 4 files changed, 17 insertions(+), 23 deletions(-) diff --git a/netsim/stories/import-user-software-key.story b/netsim/stories/import-user-software-key.story index 7f17fe44..75857202 100644 --- a/netsim/stories/import-user-software-key.story +++ b/netsim/stories/import-user-software-key.story @@ -1,6 +1,6 @@ # import-user-software-key.story — make node1 a User node from an EXISTING mnemonic -# (alternative to bootstrap-user-software-key). Requires env ASTRAL_USER_MNEMONIC; optional -# ASTRAL_USER_ID makes verify assert the derived id. +# (embedded in the task's prompt.md; alternative to bootstrap-user-software-key). +# Optional env ASTRAL_USER_ID makes verify assert the derived id. # start: astrald-lab save: astrald-user -# ASTRAL_USER_MNEMONIC="..." netsim story --stage astrald-lab --save astrald-user netsim/stories/import-user-software-key.story +# netsim story --stage astrald-lab --save astrald-user netsim/stories/import-user-software-key.story import-user-software-key diff --git a/netsim/tasks/import-user-software-key/README.md b/netsim/tasks/import-user-software-key/README.md index b2361f7d..e8b53fce 100644 --- a/netsim/tasks/import-user-software-key/README.md +++ b/netsim/tasks/import-user-software-key/README.md @@ -1,8 +1,8 @@ # import-user-software-key Drives the Qwen operator on node1 to make it a **User-controlled node from an -existing software User** — deriving the User key from a provided BIP-39 mnemonic -(`ASTRAL_USER_MNEMONIC`) instead of minting a fresh one — following the +existing software User** — deriving the User key from a fixed, known BIP-39 +mnemonic embedded in `prompt.md` instead of minting a fresh one — following the astral-agent skill's node-setup playbook. `verify.sh` confirms node1 answers as that User (and, if `ASTRAL_USER_ID` is set, that the derived id matches exactly). A drop-in alternative to `bootstrap-user-software-key`; produces stage `astrald-user`. diff --git a/netsim/tasks/import-user-software-key/prompt.md b/netsim/tasks/import-user-software-key/prompt.md index 4d4f9b8e..dc5c2758 100644 --- a/netsim/tasks/import-user-software-key/prompt.md +++ b/netsim/tasks/import-user-software-key/prompt.md @@ -2,7 +2,7 @@ On this machine there is an `astrald` node running. It has its own node identity but no User. You already control a software User whose BIP-39 mnemonic seed phrase is: - __MNEMONIC__ + abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about Make this node a User-controlled node under THAT existing User: derive the User's `secp256k1` key from the mnemonic above (start from the mnemonic — do NOT generate diff --git a/netsim/tasks/import-user-software-key/run.sh b/netsim/tasks/import-user-software-key/run.sh index 615ef4be..0180b23f 100755 --- a/netsim/tasks/import-user-software-key/run.sh +++ b/netsim/tasks/import-user-software-key/run.sh @@ -1,35 +1,29 @@ #!/bin/sh -# import-user-software-key: configure the operator node as a User node from an EXISTING software -# User key — the User's BIP-39 mnemonic (env ASTRAL_USER_MNEMONIC) is derived -# instead of minting fresh entropy. Driven by the Qwen Code agent in the VM. -# import-user-software-key [--vm ] (default: node1 — the VM carrying Qwen) -# env: ASTRAL_USER_MNEMONIC (required) ASTRAL_USER_ID (optional; verify.sh asserts it) +# import-user-software-key: configure the operator node as a User node from an +# EXISTING software User key — the User's BIP-39 mnemonic is embedded in prompt.md +# and derived instead of minting fresh entropy. Driven by the Qwen Code agent. +# import-user-software-key [--vm ] (default: node1 — the VM carrying Qwen) +# env: ASTRAL_USER_ID (optional; verify.sh asserts the derived id matches it) # -# Drop-in alternative to bootstrap-user-software-key. Runs ON THE HOST (cwd = simulation root): -# substitutes the mnemonic into prompt.md, base64-ships the prompt to the agent over -# one `netsim ssh` argv, and runs `qwen -y`. Intelligence lives in the prompt and -# the agent's astral-agent skill, not here. +# Drop-in alternative to bootstrap-user-software-key. Runs ON THE HOST (cwd = +# simulation root): base64-ships prompt.md to the agent over one `netsim ssh` argv +# and runs `qwen -y`. Intelligence lives in the prompt and the agent's astral-agent +# skill, not here. set -eu VM="node1" while [ $# -gt 0 ]; do case "$1" in --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; - *) echo "usage: import-user-software-key [--vm ] (env ASTRAL_USER_MNEMONIC required)" >&2; exit 64 ;; + *) echo "usage: import-user-software-key [--vm ]" >&2; exit 64 ;; esac done -[ -n "${ASTRAL_USER_MNEMONIC:-}" ] \ - || { echo "set ASTRAL_USER_MNEMONIC to the User's BIP-39 mnemonic seed phrase" >&2; exit 64; } - # CDPATH= is an intentional one-shot env prefix for cd, not an assignment # shellcheck disable=SC1007 here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) [ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } -# Substitute the mnemonic into the prompt template (BIP-39 words are [a-z ] only, -# so no sed-delimiter or regex-metachar hazard). -prompt=$(sed "s|__MNEMONIC__|$ASTRAL_USER_MNEMONIC|" "$here/prompt.md") -prompt_b64=$(printf '%s' "$prompt" | base64 -w0) # GNU coreutils; -w0 = single line +prompt_b64=$(base64 -w0 "$here/prompt.md") # GNU coreutils; -w0 = single line REMOTE_BODY=$(cat <<'EOS' set -eu From 6336052c294e2a27c466ad19f4cce59eb7d93df9 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 12:56:58 +0200 Subject: [PATCH 23/57] netsim: use the provided 18-word mnemonic in import-user-software-key prompt --- netsim/tasks/import-user-software-key/prompt.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netsim/tasks/import-user-software-key/prompt.md b/netsim/tasks/import-user-software-key/prompt.md index dc5c2758..ab4e2b6b 100644 --- a/netsim/tasks/import-user-software-key/prompt.md +++ b/netsim/tasks/import-user-software-key/prompt.md @@ -2,7 +2,7 @@ On this machine there is an `astrald` node running. It has its own node identity but no User. You already control a software User whose BIP-39 mnemonic seed phrase is: - abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about + horse soldier imitate stool square buyer verb party enjoy result jazz rabbit trigger file benefit cloth term change Make this node a User-controlled node under THAT existing User: derive the User's `secp256k1` key from the mnemonic above (start from the mnemonic — do NOT generate From dcbe480b860357f1dda4d0f10b752788bc0145e1 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 14:48:42 +0200 Subject: [PATCH 24/57] netsim: point configure-astral-agent at the migrated skills repo Skills moved off GitHub to ssh://git@git.satforge.dev/satforge/skills.git. Update the default SATFORGE_SKILLS_REPO and the comments/README; drop the GitHub-specific 443 fallback note. Host-key handling (StrictHostKeyChecking= accept-new) already covers the new host, and the deploy-key flow is unchanged (the key must now be registered on git.satforge.dev). --- netsim/tasks/configure-astral-agent/README.md | 5 +++-- netsim/tasks/configure-astral-agent/run.sh | 15 +++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/netsim/tasks/configure-astral-agent/README.md b/netsim/tasks/configure-astral-agent/README.md index be818368..ccc8ad60 100644 --- a/netsim/tasks/configure-astral-agent/README.md +++ b/netsim/tasks/configure-astral-agent/README.md @@ -2,6 +2,7 @@ Installs the `astral-agent` skill into the Qwen Code operator on node1, so it can drive astrald from the skill's playbooks + astral-docs instead of from procedures -spelled out in each prompt. The host clones the private `satforgedev/skills` via a -deploy key (`SATFORGE_SKILLS_DEPLOY_KEY`, a host path to the private key) and links +spelled out in each prompt. The host clones the private `satforge/skills` +(`ssh://git@git.satforge.dev/satforge/skills.git`) via a deploy key +(`SATFORGE_SKILLS_DEPLOY_KEY`, a host path to the private key) and links the skill into `~tester/.qwen/skills/astral-agent`. Part of `lab.story`. diff --git a/netsim/tasks/configure-astral-agent/run.sh b/netsim/tasks/configure-astral-agent/run.sh index 55ab77c4..d0161ed0 100755 --- a/netsim/tasks/configure-astral-agent/run.sh +++ b/netsim/tasks/configure-astral-agent/run.sh @@ -1,16 +1,16 @@ #!/bin/sh # configure-astral-agent: install the astral-agent skill into the Qwen Code -# operator by having the VM clone the (private) satforgedev/skills repo with an +# operator by having the VM clone the (private) satforge/skills repo with an # injected deploy key and run the linker itself. # configure-astral-agent [--vm ] [--user ] # Default: --vm node1 --user tester (the operator created by install-qwen-code). # -# The HOST owns the deploy key; the VM never needs GitHub credentials of its own. +# The HOST owns the deploy key; the VM never needs git credentials of its own. # run.sh reads the private key path from $SATFORGE_SKILLS_DEPLOY_KEY, base64-ships # it in over a single `netsim ssh` argv, and the guest then: # 1. installs the key for the operator and clones -# git@github.com:satforgedev/skills (parent over SSH via the deploy key; -# the astral-docs submodule is public HTTPS, so it needs no key), +# ssh://git@git.satforge.dev/satforge/skills.git over SSH via the deploy key +# (submodules resolve per the repo's .gitmodules), # 2. builds the satforge-skills linker (Go is already on the node from # install-astrald), # 3. runs `link astral-agent --target qwen` -> ~/.qwen/skills/astral-agent. @@ -30,7 +30,7 @@ while [ $# -gt 0 ]; do esac done -REPO=${SATFORGE_SKILLS_REPO:-git@github.com:satforgedev/skills} +REPO=${SATFORGE_SKILLS_REPO:-ssh://git@git.satforge.dev/satforge/skills.git} KEY=${SATFORGE_SKILLS_DEPLOY_KEY:-} [ -n "$KEY" ] || { echo "set SATFORGE_SKILLS_DEPLOY_KEY to the deploy key path for $REPO" >&2; exit 1; } [ -r "$KEY" ] || { echo "deploy key not readable: $KEY" >&2; exit 1; } @@ -48,9 +48,8 @@ chmod 600 "$home/.ssh/skills_deploy" chown "$u:$u" "$home/.ssh/skills_deploy" # Guest-side provisioning, run as the operator. Quoted heredoc: fully literal; -# repo + ref arrive as positional args. github's host key is auto-accepted on -# first connect. If outbound SSH:22 is ever blocked, switch the URL to -# ssh://git@ssh.github.com:443/satforgedev/skills. +# repo arrives as a positional arg. The git host's key is auto-accepted on first +# connect (StrictHostKeyChecking=accept-new). cat > "$home/.netsim/setup-skill.sh" <<'SCRIPT' #!/bin/sh set -eu From 5bf4e49c7b8c69271ff1bec95e6fc3c96904fdc6 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 15:00:01 +0200 Subject: [PATCH 25/57] netsim: rename link-swarm scenario to adopt-node Rename the second-node task to match the swarm vocabulary (user.adopt): link-swarm -> adopt-node. Renames the task dir + story file and updates every reference (internal messages, prompt/log basenames, README layout/pipeline, share-object cross-reference). Stage names unchanged (astrald-user -> astrald-swarm). --- netsim/README.md | 8 +++---- .../{link-swarm.story => adopt-node.story} | 6 ++--- .../{link-swarm => adopt-node}/README.md | 2 +- .../{link-swarm => adopt-node}/prompt.md | 0 .../tasks/{link-swarm => adopt-node}/run.sh | 24 +++++++++---------- .../{link-swarm => adopt-node}/verify.py | 4 ++-- .../{link-swarm => adopt-node}/verify.sh | 0 netsim/tasks/share-object/run.sh | 4 ++-- 8 files changed, 24 insertions(+), 24 deletions(-) rename netsim/stories/{link-swarm.story => adopt-node.story} (54%) rename netsim/tasks/{link-swarm => adopt-node}/README.md (96%) rename netsim/tasks/{link-swarm => adopt-node}/prompt.md (100%) rename netsim/tasks/{link-swarm => adopt-node}/run.sh (76%) rename netsim/tasks/{link-swarm => adopt-node}/verify.py (97%) rename netsim/tasks/{link-swarm => adopt-node}/verify.sh (100%) diff --git a/netsim/README.md b/netsim/README.md index 7a300a15..a7418c73 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -18,13 +18,13 @@ netsim/ configure-astral-agent/ # install the astral-agent skill into the qwen operator bootstrap-user-software-key/ # make node1 a User node, new key -> astrald-user import-user-software-key/ # make node1 a User node, existing mnemonic -> astrald-user - link-swarm/ # adopt node2 into node1's swarm -> astrald-swarm + adopt-node/ # adopt node2 into node1's swarm -> astrald-swarm share-object/ # store an object on the sibling -> astrald-shared stories/ # one story per tested flow (start/save stage in each header) lab.story # null -> astrald-lab bootstrap-user-software-key.story # astrald-lab -> astrald-user import-user-software-key.story # astrald-lab -> astrald-user (alt.) - link-swarm.story # astrald-user -> astrald-swarm + adopt-node.story # astrald-user -> astrald-swarm share-object.story # astrald-swarm -> astrald-shared link.sh # register tasks with netsim (idempotent; re-run anytime) README.md @@ -105,12 +105,12 @@ stage (its `start`/`save` stages are in the story header). Intermediate stages stay reusable, so you can replay one flow without rebuilding the chain: ``` -astrald-lab ─[bootstrap-user-software-key]→ astrald-user ─[link-swarm]→ astrald-swarm ─[share-object]→ astrald-shared +astrald-lab ─[bootstrap-user-software-key]→ astrald-user ─[adopt-node]→ astrald-swarm ─[share-object]→ astrald-shared ``` ```sh netsim story --stage astrald-lab --save astrald-user netsim/stories/bootstrap-user-software-key.story -netsim story --stage astrald-user --save astrald-swarm netsim/stories/link-swarm.story +netsim story --stage astrald-user --save astrald-swarm netsim/stories/adopt-node.story netsim story --stage astrald-swarm --save astrald-shared netsim/stories/share-object.story ``` diff --git a/netsim/stories/link-swarm.story b/netsim/stories/adopt-node.story similarity index 54% rename from netsim/stories/link-swarm.story rename to netsim/stories/adopt-node.story index 4572ceae..5c32097c 100644 --- a/netsim/stories/link-swarm.story +++ b/netsim/stories/adopt-node.story @@ -1,4 +1,4 @@ -# link-swarm.story — adopt node2 into node1's User swarm (symmetric roster). +# adopt-node.story — adopt node2 into node1's User swarm (symmetric roster). # start: astrald-user save: astrald-swarm -# netsim story --stage astrald-user --save astrald-swarm netsim/stories/link-swarm.story -link-swarm +# netsim story --stage astrald-user --save astrald-swarm netsim/stories/adopt-node.story +adopt-node diff --git a/netsim/tasks/link-swarm/README.md b/netsim/tasks/adopt-node/README.md similarity index 96% rename from netsim/tasks/link-swarm/README.md rename to netsim/tasks/adopt-node/README.md index fc80a6c0..2460ca3e 100644 --- a/netsim/tasks/link-swarm/README.md +++ b/netsim/tasks/adopt-node/README.md @@ -1,4 +1,4 @@ -# link-swarm +# adopt-node Drives the Qwen operator on node1 to **adopt node2** into the User's swarm (`user.adopt`), following the astral-agent skill's node-adoption playbook. diff --git a/netsim/tasks/link-swarm/prompt.md b/netsim/tasks/adopt-node/prompt.md similarity index 100% rename from netsim/tasks/link-swarm/prompt.md rename to netsim/tasks/adopt-node/prompt.md diff --git a/netsim/tasks/link-swarm/run.sh b/netsim/tasks/adopt-node/run.sh similarity index 76% rename from netsim/tasks/link-swarm/run.sh rename to netsim/tasks/adopt-node/run.sh index b5f39261..7e8c7d77 100755 --- a/netsim/tasks/link-swarm/run.sh +++ b/netsim/tasks/adopt-node/run.sh @@ -1,8 +1,8 @@ #!/bin/sh -# link-swarm: adopt the second node into the User's swarm, driven by the Qwen +# adopt-node: adopt the second node into the User's swarm, driven by the Qwen # Code agent running INSIDE node1 (which is already a User node from # bootstrap-user-software-key — default starting stage: astrald-user). -# link-swarm [--vm ] (default: node1 — the VM carrying Qwen) +# adopt-node [--vm ] (default: node1 — the VM carrying Qwen) # # Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user-software-key: # tiny script, thin prompt, intelligence in the agent's astral-agent skill. The @@ -14,7 +14,7 @@ VM="node1" while [ $# -gt 0 ]; do case "$1" in --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; - *) echo "usage: link-swarm [--vm ]" >&2; exit 64 ;; + *) echo "usage: adopt-node [--vm ]" >&2; exit 64 ;; esac done @@ -28,16 +28,16 @@ REMOTE_BODY=$(cat <<'EOS' set -eu d=/home/tester/.netsim mkdir -p "$d" -printf '%s' "$prompt_b64" | base64 -d > "$d/link-swarm.prompt" +printf '%s' "$prompt_b64" | base64 -d > "$d/adopt-node.prompt" chown -R tester:tester "$d" # Run the agent as `tester` (qwen is installed for that user), non-interactively. # Invocation matches what was validated for bootstrap-user-software-key: one-shot positional # prompt + `-y` (auto-approve). -su - tester -c 'qwen -y "$(cat /home/tester/.netsim/link-swarm.prompt)"' \ - > "$d/link-swarm.log" 2>&1 || { +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/adopt-node.prompt)"' \ + > "$d/adopt-node.log" 2>&1 || { echo "qwen run failed on $(hostname); tail of log:" >&2 - tail -n 40 "$d/link-swarm.log" >&2 + tail -n 40 "$d/adopt-node.log" >&2 exit 1 } @@ -49,17 +49,17 @@ tok="$d/user.token" if [ -s "$tok" ]; then if ASTRALD_APPHOST_TOKEN=$(cat "$tok") astral-query user.swarm_status -out json 2>/dev/null \ | grep -q '"Linked":true'; then - echo "link-swarm: $(hostname) reports a linked sibling" + echo "adopt-node: $(hostname) reports a linked sibling" else - echo "link-swarm: WARNING $(hostname) shows no linked sibling yet (verify.sh decides)" >&2 + echo "adopt-node: WARNING $(hostname) shows no linked sibling yet (verify.sh decides)" >&2 fi fi -echo "link-swarm: agent finished on $(hostname)" +echo "adopt-node: agent finished on $(hostname)" EOS ) -echo "link-swarm: driving Qwen operator on $VM ..." +echo "adopt-node: driving Qwen operator on $VM ..." # assignment prefix carries the prompt to the guest; body re-parses it # shellcheck disable=SC2029 netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" -echo "link-swarm: done on $VM" +echo "adopt-node: done on $VM" diff --git a/netsim/tasks/link-swarm/verify.py b/netsim/tasks/adopt-node/verify.py similarity index 97% rename from netsim/tasks/link-swarm/verify.py rename to netsim/tasks/adopt-node/verify.py index 06631dff..9727f248 100755 --- a/netsim/tasks/link-swarm/verify.py +++ b/netsim/tasks/adopt-node/verify.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""verify link-swarm: node1 and node2 linked into one User swarm, symmetric roster. +"""verify adopt-node: node1 and node2 linked into one User swarm, symmetric roster. Independent both-ends check (does not trust run.sh); reaches the VMs via netsim ssh. """ @@ -110,7 +110,7 @@ def main(): errs.append(f"node2 has no active link back to node1 ({s1})") if errs: - sys.stderr.write("link-swarm verify FAILED:\n") + sys.stderr.write("adopt-node verify FAILED:\n") for e in errs: sys.stderr.write(f" - {e}\n") return 1 diff --git a/netsim/tasks/link-swarm/verify.sh b/netsim/tasks/adopt-node/verify.sh similarity index 100% rename from netsim/tasks/link-swarm/verify.sh rename to netsim/tasks/adopt-node/verify.sh diff --git a/netsim/tasks/share-object/run.sh b/netsim/tasks/share-object/run.sh index ae2c073d..c5896793 100755 --- a/netsim/tasks/share-object/run.sh +++ b/netsim/tasks/share-object/run.sh @@ -5,7 +5,7 @@ # share-object [--vm ] (default: node1 — the VM carrying Qwen) # # Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user-software-key / -# link-swarm: tiny script, thin prompt, intelligence in the agent's astral-agent +# adopt-node: tiny script, thin prompt, intelligence in the agent's astral-agent # skill. The agent stores a payload ON THE OTHER node — addressing the sibling # explicitly as the query target (:objects.store) — then loads it back from # that node and confirms the bytes round-trip. verify.py then INDEPENDENTLY @@ -37,7 +37,7 @@ printf '%s' "$prompt_b64" | base64 -d > "$d/share-object.prompt" chown -R tester:tester "$d" # Run the agent as `tester` (qwen is installed for that user), non-interactively. -# Invocation matches what was validated for bootstrap-user-software-key / link-swarm: one-shot +# Invocation matches what was validated for bootstrap-user-software-key / adopt-node: one-shot # positional prompt + `-y` (auto-approve). su - tester -c 'qwen -y "$(cat /home/tester/.netsim/share-object.prompt)"' \ > "$d/share-object.log" 2>&1 || { From 9518d780af9302ad0918f62f7463ca95465c437c Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 15:03:35 +0200 Subject: [PATCH 26/57] netsim: rename the single-User-node stage astrald-user -> astrald-single-node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both bootstrap-user-software-key and import-user-software-key produce the same single-node stage (a node set up as a User; they differ only in the User key — random vs the embedded mnemonic). Name it astrald-single-node and point adopt-node at it. Stage-name change only (set via --save/--stage); no script logic depends on it. --- netsim/README.md | 20 +++++++++---------- netsim/stories/adopt-node.story | 4 ++-- .../stories/bootstrap-user-software-key.story | 4 ++-- netsim/stories/import-user-software-key.story | 4 ++-- netsim/tasks/adopt-node/README.md | 2 +- netsim/tasks/adopt-node/run.sh | 2 +- .../bootstrap-user-software-key/README.md | 2 +- .../tasks/import-user-software-key/README.md | 2 +- 8 files changed, 20 insertions(+), 20 deletions(-) diff --git a/netsim/README.md b/netsim/README.md index a7418c73..e6c4807e 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -16,16 +16,16 @@ netsim/ tasks/ # each task: run.sh (+ verify.sh / verify.py) + README.md install-astrald/ # build + run astrald as a service on each node configure-astral-agent/ # install the astral-agent skill into the qwen operator - bootstrap-user-software-key/ # make node1 a User node, new key -> astrald-user - import-user-software-key/ # make node1 a User node, existing mnemonic -> astrald-user + bootstrap-user-software-key/ # make node1 a User node, new key -> astrald-single-node + import-user-software-key/ # make node1 a User node, existing mnemonic -> astrald-single-node adopt-node/ # adopt node2 into node1's swarm -> astrald-swarm share-object/ # store an object on the sibling -> astrald-shared stories/ # one story per tested flow (start/save stage in each header) - lab.story # null -> astrald-lab - bootstrap-user-software-key.story # astrald-lab -> astrald-user - import-user-software-key.story # astrald-lab -> astrald-user (alt.) - adopt-node.story # astrald-user -> astrald-swarm - share-object.story # astrald-swarm -> astrald-shared + lab.story # null -> astrald-lab + bootstrap-user-software-key.story # astrald-lab -> astrald-single-node + import-user-software-key.story # astrald-lab -> astrald-single-node (alt.) + adopt-node.story # astrald-single-node -> astrald-swarm + share-object.story # astrald-swarm -> astrald-shared link.sh # register tasks with netsim (idempotent; re-run anytime) README.md ``` @@ -105,12 +105,12 @@ stage (its `start`/`save` stages are in the story header). Intermediate stages stay reusable, so you can replay one flow without rebuilding the chain: ``` -astrald-lab ─[bootstrap-user-software-key]→ astrald-user ─[adopt-node]→ astrald-swarm ─[share-object]→ astrald-shared +astrald-lab ─[bootstrap-user-software-key]→ astrald-single-node ─[adopt-node]→ astrald-swarm ─[share-object]→ astrald-shared ``` ```sh -netsim story --stage astrald-lab --save astrald-user netsim/stories/bootstrap-user-software-key.story -netsim story --stage astrald-user --save astrald-swarm netsim/stories/adopt-node.story +netsim story --stage astrald-lab --save astrald-single-node netsim/stories/bootstrap-user-software-key.story +netsim story --stage astrald-single-node --save astrald-swarm netsim/stories/adopt-node.story netsim story --stage astrald-swarm --save astrald-shared netsim/stories/share-object.story ``` diff --git a/netsim/stories/adopt-node.story b/netsim/stories/adopt-node.story index 5c32097c..b44b55c9 100644 --- a/netsim/stories/adopt-node.story +++ b/netsim/stories/adopt-node.story @@ -1,4 +1,4 @@ # adopt-node.story — adopt node2 into node1's User swarm (symmetric roster). -# start: astrald-user save: astrald-swarm -# netsim story --stage astrald-user --save astrald-swarm netsim/stories/adopt-node.story +# start: astrald-single-node save: astrald-swarm +# netsim story --stage astrald-single-node --save astrald-swarm netsim/stories/adopt-node.story adopt-node diff --git a/netsim/stories/bootstrap-user-software-key.story b/netsim/stories/bootstrap-user-software-key.story index 9547643b..6a96275a 100644 --- a/netsim/stories/bootstrap-user-software-key.story +++ b/netsim/stories/bootstrap-user-software-key.story @@ -1,4 +1,4 @@ # bootstrap-user-software-key.story — node1 becomes a User-controlled node. -# start: astrald-lab save: astrald-user -# netsim story --stage astrald-lab --save astrald-user netsim/stories/bootstrap-user-software-key.story +# start: astrald-lab save: astrald-single-node +# netsim story --stage astrald-lab --save astrald-single-node netsim/stories/bootstrap-user-software-key.story bootstrap-user-software-key diff --git a/netsim/stories/import-user-software-key.story b/netsim/stories/import-user-software-key.story index 75857202..12f12d7f 100644 --- a/netsim/stories/import-user-software-key.story +++ b/netsim/stories/import-user-software-key.story @@ -1,6 +1,6 @@ # import-user-software-key.story — make node1 a User node from an EXISTING mnemonic # (embedded in the task's prompt.md; alternative to bootstrap-user-software-key). # Optional env ASTRAL_USER_ID makes verify assert the derived id. -# start: astrald-lab save: astrald-user -# netsim story --stage astrald-lab --save astrald-user netsim/stories/import-user-software-key.story +# start: astrald-lab save: astrald-single-node +# netsim story --stage astrald-lab --save astrald-single-node netsim/stories/import-user-software-key.story import-user-software-key diff --git a/netsim/tasks/adopt-node/README.md b/netsim/tasks/adopt-node/README.md index 2460ca3e..7ad54a8e 100644 --- a/netsim/tasks/adopt-node/README.md +++ b/netsim/tasks/adopt-node/README.md @@ -4,4 +4,4 @@ Drives the Qwen operator on node1 to **adopt node2** into the User's swarm (`user.adopt`), following the astral-agent skill's node-adoption playbook. `verify.py` independently confirms both nodes hold a contract under the same User, a mutual link, and a symmetric roster (each lists the other as a Linked sibling). -Produces stage `astrald-swarm` (from `astrald-user`). +Produces stage `astrald-swarm` (from `astrald-single-node`). diff --git a/netsim/tasks/adopt-node/run.sh b/netsim/tasks/adopt-node/run.sh index 7e8c7d77..0a218968 100755 --- a/netsim/tasks/adopt-node/run.sh +++ b/netsim/tasks/adopt-node/run.sh @@ -1,7 +1,7 @@ #!/bin/sh # adopt-node: adopt the second node into the User's swarm, driven by the Qwen # Code agent running INSIDE node1 (which is already a User node from -# bootstrap-user-software-key — default starting stage: astrald-user). +# bootstrap-user-software-key — default starting stage: astrald-single-node). # adopt-node [--vm ] (default: node1 — the VM carrying Qwen) # # Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user-software-key: diff --git a/netsim/tasks/bootstrap-user-software-key/README.md b/netsim/tasks/bootstrap-user-software-key/README.md index 4265d3ae..65c12e91 100644 --- a/netsim/tasks/bootstrap-user-software-key/README.md +++ b/netsim/tasks/bootstrap-user-software-key/README.md @@ -3,4 +3,4 @@ Drives the Qwen operator on node1 to make it a **User-controlled node** — mint a software User and install node1's active contract — following the astral-agent skill's node-setup playbook. `verify.sh` independently confirms node1 answers as -that User. Produces stage `astrald-user` (from `astrald-lab`). +that User. Produces stage `astrald-single-node` (from `astrald-lab`). diff --git a/netsim/tasks/import-user-software-key/README.md b/netsim/tasks/import-user-software-key/README.md index e8b53fce..74e6e244 100644 --- a/netsim/tasks/import-user-software-key/README.md +++ b/netsim/tasks/import-user-software-key/README.md @@ -5,4 +5,4 @@ existing software User** — deriving the User key from a fixed, known BIP-39 mnemonic embedded in `prompt.md` instead of minting a fresh one — following the astral-agent skill's node-setup playbook. `verify.sh` confirms node1 answers as that User (and, if `ASTRAL_USER_ID` is set, that the derived id matches exactly). -A drop-in alternative to `bootstrap-user-software-key`; produces stage `astrald-user`. +A drop-in alternative to `bootstrap-user-software-key`; produces stage `astrald-single-node`. From 526d6e42a3e118f858fd44c3db30d5d5571b76f3 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 15:20:52 +0200 Subject: [PATCH 27/57] netsim: replace share-object with object-store + read-remote-object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the object lifecycle into two focused scenarios and drop the combined (write-direction) share-object: - object-store (0006): node1 stores an object in its OWN local repo and reads it back; agent-driven, verify re-loads -repo local. astrald-swarm -> astrald-stored. - read-remote-object (0007): node2 reads node1's stored object OVER ASTRAL; host-driven (node2 has no operator), verify runs the :objects.load ladder and asserts the bytes. astrald-stored -> astrald-read. This is the peer-reads-node1 direction that failed pre-#348 — re-probed on current master. Rewire README pipeline accordingly. --- netsim/README.md | 17 +- netsim/stories/object-store.story | 4 + netsim/stories/read-remote-object.story | 4 + netsim/stories/share-object.story | 4 - netsim/tasks/object-store/README.md | 8 + netsim/tasks/object-store/prompt.md | 12 ++ netsim/tasks/object-store/run.sh | 62 ++++++++ netsim/tasks/object-store/verify.py | 104 +++++++++++++ netsim/tasks/object-store/verify.sh | 5 + netsim/tasks/read-remote-object/README.md | 13 ++ netsim/tasks/read-remote-object/run.sh | 8 + netsim/tasks/read-remote-object/verify.py | 178 +++++++++++++++++++++ netsim/tasks/read-remote-object/verify.sh | 5 + netsim/tasks/share-object/README.md | 6 - netsim/tasks/share-object/prompt.md | 15 -- netsim/tasks/share-object/run.sh | 74 --------- netsim/tasks/share-object/verify.py | 180 ---------------------- netsim/tasks/share-object/verify.sh | 7 - 18 files changed, 413 insertions(+), 293 deletions(-) create mode 100644 netsim/stories/object-store.story create mode 100644 netsim/stories/read-remote-object.story delete mode 100644 netsim/stories/share-object.story create mode 100644 netsim/tasks/object-store/README.md create mode 100644 netsim/tasks/object-store/prompt.md create mode 100755 netsim/tasks/object-store/run.sh create mode 100644 netsim/tasks/object-store/verify.py create mode 100755 netsim/tasks/object-store/verify.sh create mode 100644 netsim/tasks/read-remote-object/README.md create mode 100755 netsim/tasks/read-remote-object/run.sh create mode 100644 netsim/tasks/read-remote-object/verify.py create mode 100755 netsim/tasks/read-remote-object/verify.sh delete mode 100644 netsim/tasks/share-object/README.md delete mode 100644 netsim/tasks/share-object/prompt.md delete mode 100755 netsim/tasks/share-object/run.sh delete mode 100755 netsim/tasks/share-object/verify.py delete mode 100755 netsim/tasks/share-object/verify.sh diff --git a/netsim/README.md b/netsim/README.md index e6c4807e..bf10a4b9 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -19,13 +19,15 @@ netsim/ bootstrap-user-software-key/ # make node1 a User node, new key -> astrald-single-node import-user-software-key/ # make node1 a User node, existing mnemonic -> astrald-single-node adopt-node/ # adopt node2 into node1's swarm -> astrald-swarm - share-object/ # store an object on the sibling -> astrald-shared + object-store/ # node1 stores an object locally + reads it -> astrald-stored + read-remote-object/ # node2 reads node1's object over astral -> astrald-read stories/ # one story per tested flow (start/save stage in each header) lab.story # null -> astrald-lab bootstrap-user-software-key.story # astrald-lab -> astrald-single-node import-user-software-key.story # astrald-lab -> astrald-single-node (alt.) adopt-node.story # astrald-single-node -> astrald-swarm - share-object.story # astrald-swarm -> astrald-shared + object-store.story # astrald-swarm -> astrald-stored + read-remote-object.story # astrald-stored -> astrald-read link.sh # register tasks with netsim (idempotent; re-run anytime) README.md ``` @@ -105,13 +107,14 @@ stage (its `start`/`save` stages are in the story header). Intermediate stages stay reusable, so you can replay one flow without rebuilding the chain: ``` -astrald-lab ─[bootstrap-user-software-key]→ astrald-single-node ─[adopt-node]→ astrald-swarm ─[share-object]→ astrald-shared +astrald-lab ─[bootstrap-user-software-key]→ astrald-single-node ─[adopt-node]→ astrald-swarm ─[object-store]→ astrald-stored ─[read-remote-object]→ astrald-read ``` ```sh netsim story --stage astrald-lab --save astrald-single-node netsim/stories/bootstrap-user-software-key.story netsim story --stage astrald-single-node --save astrald-swarm netsim/stories/adopt-node.story -netsim story --stage astrald-swarm --save astrald-shared netsim/stories/share-object.story +netsim story --stage astrald-swarm --save astrald-stored netsim/stories/object-store.story +netsim story --stage astrald-stored --save astrald-read netsim/stories/read-remote-object.story ``` Each story drives the Qwen operator through its `astral-agent` skill, then runs an @@ -120,6 +123,6 @@ test for that flow. ## Scope -The lab stands up two astrald nodes, links them into one User Swarm, and stores -an object on the sibling across it. Nodes discover each other on the shared L2 -LAN via UDP 8822 (`ether`/`nearby`). +The lab stands up two astrald nodes, links them into one User Swarm, stores an +object on a node, and reads it from a peer across the swarm. Nodes discover each +other on the shared L2 LAN via UDP 8822 (`ether`/`nearby`). diff --git a/netsim/stories/object-store.story b/netsim/stories/object-store.story new file mode 100644 index 00000000..d8de8e89 --- /dev/null +++ b/netsim/stories/object-store.story @@ -0,0 +1,4 @@ +# object-store.story — node1 stores an object locally and reads it back. +# start: astrald-swarm save: astrald-stored +# netsim story --stage astrald-swarm --save astrald-stored netsim/stories/object-store.story +object-store diff --git a/netsim/stories/read-remote-object.story b/netsim/stories/read-remote-object.story new file mode 100644 index 00000000..5198c14f --- /dev/null +++ b/netsim/stories/read-remote-object.story @@ -0,0 +1,4 @@ +# read-remote-object.story — node2 reads node1's object over astral. +# start: astrald-stored save: astrald-read +# netsim story --stage astrald-stored --save astrald-read netsim/stories/read-remote-object.story +read-remote-object diff --git a/netsim/stories/share-object.story b/netsim/stories/share-object.story deleted file mode 100644 index 9ac66f71..00000000 --- a/netsim/stories/share-object.story +++ /dev/null @@ -1,4 +0,0 @@ -# share-object.story — store an astral object on the swarm sibling (node2). -# start: astrald-swarm save: astrald-shared -# netsim story --stage astrald-swarm --save astrald-shared netsim/stories/share-object.story -share-object diff --git a/netsim/tasks/object-store/README.md b/netsim/tasks/object-store/README.md new file mode 100644 index 00000000..44fc5b17 --- /dev/null +++ b/netsim/tasks/object-store/README.md @@ -0,0 +1,8 @@ +# object-store + +Drives the Qwen operator on node1 to **store an astral object in its own local +repo and read it back** — the basic local object operations (`objects.store` then +`objects.load`) — following the astral-agent skill. `verify.py` independently +re-loads the object from node1's local repo and asserts the bytes match. Produces +stage `astrald-stored` (from `astrald-swarm`); the saved object is what +`read-remote-object` then fetches from a peer. diff --git a/netsim/tasks/object-store/prompt.md b/netsim/tasks/object-store/prompt.md new file mode 100644 index 00000000..809561df --- /dev/null +++ b/netsim/tasks/object-store/prompt.md @@ -0,0 +1,12 @@ +On this machine an `astrald` node is running and you control it as its User (a +User-bound apphost token is recorded in `$HOME/info.json` under `user_token`). +Acting as that User, store a short, distinctive text payload as an astral object +via the objects protocol, following your **astral-agent** skill, and note the +Object ID it returns. Then read that object back by its Object ID and confirm the +bytes match what you stored. + +Then add to `$HOME/info.json` (keep the existing `user_*` keys) the keys +`object_id` (the Object ID), `object_payload` (the exact payload you stored), and +`object_readback` (the bytes you read back). The skill won't mention this — it's +how the run is checked. Success means the object is stored, read back with matching +bytes, and those keys are written. diff --git a/netsim/tasks/object-store/run.sh b/netsim/tasks/object-store/run.sh new file mode 100755 index 00000000..4d88748b --- /dev/null +++ b/netsim/tasks/object-store/run.sh @@ -0,0 +1,62 @@ +#!/bin/sh +# object-store: have node1 store an astral object in its OWN local repo and read it +# back, driven by the Qwen Code agent running INSIDE node1. +# object-store [--vm ] (default: node1 — the VM carrying Qwen) +# +# Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user-*: tiny +# script, thin prompt, intelligence in the agent's astral-agent skill. The agent +# exercises the basic LOCAL object operations — store a payload, surface its Object +# ID, load it back by that id, confirm the round-trip. verify.py then INDEPENDENTLY +# re-reads the object from node1's local repo. The whole remote program travels as +# ONE argv to `netsim ssh`; the prompt rides along base64-encoded. +set -eu + +VM="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + *) echo "usage: object-store [--vm ]" >&2; exit 64 ;; + esac +done + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +prompt_b64=$(base64 -w0 "$here/prompt.md") # GNU coreutils; -w0 = single line + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/object-store.prompt" +chown -R tester:tester "$d" + +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/object-store.prompt)"' \ + > "$d/object-store.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/object-store.log" >&2 + exit 1 + } + +# Cheap smoke-check; verify.py does the authoritative, independent check. The agent +# records its outputs in $HOME/info.json (/home/tester/info.json). +oid=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_id",""))' 2>/dev/null || true) +opay=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_payload",""))' 2>/dev/null || true) +orb=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_readback",""))' 2>/dev/null || true) +[ -n "$oid" ] || { echo "agent recorded no object_id in /home/tester/info.json on $(hostname)" >&2; exit 1; } +[ -n "$opay" ] || { echo "agent recorded no object_payload on $(hostname)" >&2; exit 1; } +[ -n "$orb" ] || { echo "agent recorded no object_readback on $(hostname)" >&2; exit 1; } +case "$oid" in + data1*) : ;; + *) echo "WARNING $(hostname): object_id does not look like a data1… Object ID (verify.py decides)" >&2 ;; +esac +[ "$opay" = "$orb" ] || echo "WARNING $(hostname): agent read-back != stored payload (verify.py decides)" >&2 +echo "object-store: agent finished on $(hostname); stored+read object $oid" +EOS +) + +echo "object-store: driving Qwen operator on $VM ..." +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" +echo "object-store: done on $VM" diff --git a/netsim/tasks/object-store/verify.py b/netsim/tasks/object-store/verify.py new file mode 100644 index 00000000..24595005 --- /dev/null +++ b/netsim/tasks/object-store/verify.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +"""verify object-store: node1 stored an object in its local repo and can read it back. + +Independent host-side check (does not trust run.sh or the agent's read-back): a +repo-pinned, ungated objects.load -repo local on node1 must return the exact stored +bytes. Reaches the VM via netsim ssh. +""" +import argparse +import json +import subprocess +import sys + + +def ssh(vm, remote): + """Run `netsim ssh -- ` on the host; return stdout (best-effort).""" + p = subprocess.run(["netsim", "ssh", vm, "--", remote], + capture_output=True, text=True) + return p.stdout + + +def info(vm): + """The agent's $HOME/info.json (/home/tester/info.json) on the VM, as a dict.""" + try: + return json.loads(ssh(vm, "cat /home/tester/info.json") or "{}") or {} + except json.JSONDecodeError: + return {} + + +def objs(stream): + out = [] + for ln in (stream or "").splitlines(): + ln = ln.strip() + if not ln: + continue + try: + out.append(json.loads(ln)) + except json.JSONDecodeError: + pass + return out + + +def loaded_payload(stream): + """From an objects.load stream, the decoded payload string, or None.""" + for o in objs(stream): + if o.get("Type") in ("eos", "error_message"): + continue + ob = o.get("Object") + if isinstance(ob, str): + return ob + return None + + +def errors(stream): + return [o.get("Object") for o in objs(stream) if o.get("Type") == "error_message"] + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--vm", default="node1") + args, _ = ap.parse_known_args() + vm = args.vm + + info1 = info(vm) + ID = "".join(str(info1.get("object_id", "")).split()) + PAY = str(info1.get("object_payload", "")).rstrip("\n") + READBACK = str(info1.get("object_readback", "")).rstrip("\n") + + # Decisive: re-load the object from node1's local repo (repo-pinned + ungated). + n1_load = ssh(vm, f"astral-query objects.load -id '{ID}' -repo local -out json") + got = loaded_payload(n1_load) + local_ok = got is not None and got.rstrip("\n") == PAY + + errs, notes = [], [] + if not ID: + errs.append("no object_id in node1's info.json") + if not PAY: + errs.append("no object_payload in node1's info.json") + if READBACK and READBACK != PAY: + notes.append(f"agent's own read-back != stored payload ({READBACK!r} != {PAY!r})") + + if not errs and local_ok: + print(f"object-store OK: node1 stored object {ID[:12]}.. and its local repo " + f"returns the exact bytes ({len(PAY)} B).") + for n in notes: + sys.stderr.write(f" note: {n}\n") + return 0 + + sys.stderr.write("object-store verify FAILED: node1 could not re-load its own stored object.\n") + for e in errs: + sys.stderr.write(f" - {e}\n") + if got is None: + sys.stderr.write(" objects.load -repo local returned no payload (see error frames below).\n") + elif not local_ok: + sys.stderr.write(f" bytes mismatch: got {got!r} != stored {PAY!r}.\n") + for e in errors(n1_load): + sys.stderr.write(f" load error_message: {e}\n") + for n in notes: + sys.stderr.write(f" note: {n}\n") + sys.stderr.write(f" (id={ID} load={'hit' if got is not None else 'miss'})\n") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/object-store/verify.sh b/netsim/tasks/object-store/verify.sh new file mode 100755 index 00000000..9d719cca --- /dev/null +++ b/netsim/tasks/object-store/verify.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# Thin shim — all verification logic lives in verify.py. netsim sets $NETSIM_TASK_DIR +# to this task's directory and only auto-runs run.sh/verify.sh, so verify.py sits +# next to us and is invoked here (the dirname fallback covers running this directly). +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" diff --git a/netsim/tasks/read-remote-object/README.md b/netsim/tasks/read-remote-object/README.md new file mode 100644 index 00000000..8e6bbd71 --- /dev/null +++ b/netsim/tasks/read-remote-object/README.md @@ -0,0 +1,13 @@ +# read-remote-object + +Confirms a peer (node2) can read **node1's** object **over astral** — the object +that `object-store` stored in node1's local repo. Host-driven: node2 has no Qwen +operator, so `verify.py` issues the read (resolve node1's identity + the stored +`object_id`, then node2 runs `:objects.load` and asserts the exact bytes, +with transparent/`objects.find` as fallback diagnostics). No agent comprehension +axis — this is a pure implementation-axis probe. Produces stage `astrald-read` +(from `astrald-stored`). + +Note: the peer-reads-node1 direction **failed before astrald #348** (the roster +sync); this task re-probes it on current master. It may now pass (node2 knows +node1, and `op_load` is ungated) or surface the gap — either is a valid finding. diff --git a/netsim/tasks/read-remote-object/run.sh b/netsim/tasks/read-remote-object/run.sh new file mode 100755 index 00000000..1cde3c42 --- /dev/null +++ b/netsim/tasks/read-remote-object/run.sh @@ -0,0 +1,8 @@ +#!/bin/sh +# read-remote-object has no run-phase setup: node2 has no Qwen operator, so the +# remote read of node1's object IS the thing under test. verify.py performs it +# (resolve node1's identity + the object_id stored by object-store, then have node2 +# read :objects.load and assert the bytes). run.sh is a no-op placeholder so +# netsim discovers the task and hands off to verify.sh. +set -eu +echo "read-remote-object: no run-phase setup; verify.py performs the cross-read." diff --git a/netsim/tasks/read-remote-object/verify.py b/netsim/tasks/read-remote-object/verify.py new file mode 100644 index 00000000..129bdccd --- /dev/null +++ b/netsim/tasks/read-remote-object/verify.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +"""verify read-remote-object: node2 reads node1's object OVER ASTRAL. + +node1 stored an object in its local repo (object-store). This confirms a peer +(node2) can obtain those exact bytes across the swarm. Host-driven, since node2 has +no operator. PRE-#348 this direction (peer reads node1) failed (route_not_found / +0 providers); this task re-probes it on current master. op_load is ungated, so a +successful route returns the bytes. + +Ladder on node2 (strongest -> weakest), PASS iff node2 gets the exact stored bytes +via hop 1 or 2: + 1. explicit target :objects.load -id (query-target routing) + 2. transparent objects.load -id (zone-based) + 3. provider find objects.find -id (discovery; identities, not bytes) +""" +import argparse +import json +import subprocess +import sys + + +def ssh(vm, remote): + """Run `netsim ssh -- ` on the host; return stdout (best-effort).""" + p = subprocess.run(["netsim", "ssh", vm, "--", remote], + capture_output=True, text=True) + return p.stdout + + +def info(vm): + """The agent's $HOME/info.json (/home/tester/info.json) on the VM, as a dict.""" + try: + return json.loads(ssh(vm, "cat /home/tester/info.json") or "{}") or {} + except json.JSONDecodeError: + return {} + + +def objs(stream): + out = [] + for ln in (stream or "").splitlines(): + ln = ln.strip() + if not ln: + continue + try: + out.append(json.loads(ln)) + except json.JSONDecodeError: + pass + return out + + +def loaded_payload(stream): + for o in objs(stream): + if o.get("Type") in ("eos", "error_message"): + continue + ob = o.get("Object") + if isinstance(ob, str): + return ob + return None + + +def errors(stream): + return [o.get("Object") for o in objs(stream) if o.get("Type") == "error_message"] + + +def contains_local(stream): + for o in objs(stream): + if o.get("Type") in ("eos", "error_message"): + continue + if isinstance(o.get("Object"), bool): + return o["Object"] + return None + + +def find_identities(stream): + ids = [] + for o in objs(stream): + if o.get("Type") in ("eos", "error_message"): + continue + ob = o.get("Object") + if isinstance(ob, str): + ids.append(ob) + return ids + + +def contract_subject(stream): + """node1's node identity = Subject of its active contract (from user.info).""" + for o in objs(stream): + ob = o.get("Object") + if isinstance(ob, dict) and isinstance(ob.get("Contract"), dict): + c = ob["Contract"].get("Contract", {}) + if c.get("Subject"): + return c["Subject"] + return None + + +def remote_identity(stream): + """Fallback: RemoteIdentity from node2's nodes.links (the link to node1).""" + for o in objs(stream): + ob = o.get("Object") + if isinstance(ob, dict) and ob.get("RemoteIdentity"): + return ob["RemoteIdentity"] + return None + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--node1", default="node1") + ap.add_argument("--node2", default="node2") + args, _ = ap.parse_known_args() + vm1, vm2 = args.node1, args.node2 + + # The object node1 stored (object-store) and node1's node identity. node1 acts as + # the User (token from info.json) so user.info returns the contract whose Subject + # is node1's node identity; node2's link-back is the fallback. + info1 = info(vm1) + ID = "".join(str(info1.get("object_id", "")).split()) + PAY = str(info1.get("object_payload", "")).rstrip("\n") + token = info1.get("user_token", "") + n1_info = ssh(vm1, f"export ASTRALD_APPHOST_TOKEN={token}; astral-query user.info -out json") + n2_links = ssh(vm2, "astral-query nodes.links -out json") + N1 = contract_subject(n1_info) or remote_identity(n2_links) or "" + + # node2 answers under its node identity (anonymous host-side caller, no token). + n2_contains = ssh(vm2, f"astral-query objects.contains -repo local -id '{ID}' -out json") + n2_explicit = ssh(vm2, f"astral-query '{N1}':objects.load -id '{ID}' -out json") if N1 else "" + n2_transparent = ssh(vm2, f"astral-query objects.load -id '{ID}' -out json") + n2_find = ssh(vm2, f"astral-query objects.find -id '{ID}' -out json") + + already_local = contains_local(n2_contains) + explicit = loaded_payload(n2_explicit) + transparent = loaded_payload(n2_transparent) + providers = find_identities(n2_find) + + explicit_ok = explicit is not None and explicit.rstrip("\n") == PAY + transparent_ok = transparent is not None and transparent.rstrip("\n") == PAY + + errs, notes = [], [] + if not ID: + errs.append("no object_id in node1's info.json (run object-store first)") + if not PAY: + errs.append("no object_payload in node1's info.json") + if not N1: + notes.append("could not resolve node1's node identity host-side (explicit-target read skipped)") + if already_local is True: + notes.append("objects.contains reports node2 may ALREADY hold this object locally; " + "the byte-match might not be a genuine remote pull") + + if not errs and (explicit_ok or transparent_ok): + path = ("explicit-target (:objects.load)" if explicit_ok + else "transparent (objects.load)") + print(f"read-remote-object OK: node2 read node1's object {ID[:12]}.. across the swarm " + f"via {path}; bytes match ({len(PAY)} B). providers via objects.find: {len(providers)}.") + for n in notes: + sys.stderr.write(f" note: {n}\n") + return 0 + + sys.stderr.write("read-remote-object verify FAILED: node2 did NOT obtain node1's object across the swarm.\n") + for e in errs: + sys.stderr.write(f" - {e}\n") + if (N1 in providers) if N1 else bool(providers): + sys.stderr.write(" FINDING: objects.find DID return a provider (discovery crosses) but the byte " + "READ did not route -- record which hop in the task log.\n") + for label, stream in (("explicit-target", n2_explicit), + ("transparent", n2_transparent), + ("objects.find", n2_find)): + for e in errors(stream): + sys.stderr.write(f" {label} error_message: {e}\n") + for n in notes: + sys.stderr.write(f" note: {n}\n") + n1disp = (N1[:12] + "..") if N1 else "?" + sys.stderr.write(f" (id={ID} node1={n1disp} " + f"explicit={'hit' if explicit is not None else 'miss'} " + f"transparent={'hit' if transparent is not None else 'miss'} " + f"find_providers={len(providers)})\n") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/read-remote-object/verify.sh b/netsim/tasks/read-remote-object/verify.sh new file mode 100755 index 00000000..9d719cca --- /dev/null +++ b/netsim/tasks/read-remote-object/verify.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# Thin shim — all verification logic lives in verify.py. netsim sets $NETSIM_TASK_DIR +# to this task's directory and only auto-runs run.sh/verify.sh, so verify.py sits +# next to us and is invoked here (the dirname fallback covers running this directly). +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" diff --git a/netsim/tasks/share-object/README.md b/netsim/tasks/share-object/README.md deleted file mode 100644 index 6faeec9a..00000000 --- a/netsim/tasks/share-object/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# share-object - -Drives the Qwen operator on node1 to **store an astral object on its swarm sibling -node2** (`:objects.store`) and read it back. `verify.py` independently -confirms node2 physically holds the object in its local repo. Produces stage -`astrald-shared` (from `astrald-swarm`). diff --git a/netsim/tasks/share-object/prompt.md b/netsim/tasks/share-object/prompt.md deleted file mode 100644 index fc385228..00000000 --- a/netsim/tasks/share-object/prompt.md +++ /dev/null @@ -1,15 +0,0 @@ -On this machine an `astrald` node is running and you control it as its User (a -User-bound apphost token is recorded in `$HOME/info.json` under `user_token`). Your -swarm has one other node — a sibling. Acting as that User, store a short, -distinctive text payload as an astral object **on that other node** — address it -explicitly as the query target — via the objects protocol, following your -**astral-agent** skill, and note the Object ID it returns. Then read the object -back **from that other node** by its Object ID and confirm the bytes match what you -stored. - -Then add to `$HOME/info.json` (keep the existing `user_*` keys) these keys: -`object_id` (the Object ID), `object_payload` (the exact payload you stored), -`object_readback` (the bytes you read back), and `object_target` (the node id you -stored it on). The skill won't mention this — it's how the run is checked. Success -means the object is stored on the other node, read back with matching bytes, and -those keys are written. diff --git a/netsim/tasks/share-object/run.sh b/netsim/tasks/share-object/run.sh deleted file mode 100755 index c5896793..00000000 --- a/netsim/tasks/share-object/run.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/sh -# share-object: have node1 store an astral object ON its swarm sibling (node2) and -# read it back, driven by the Qwen Code agent running INSIDE node1 (already a User -# node in one swarm with node2 — default starting stage: astrald-swarm). -# share-object [--vm ] (default: node1 — the VM carrying Qwen) -# -# Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user-software-key / -# adopt-node: tiny script, thin prompt, intelligence in the agent's astral-agent -# skill. The agent stores a payload ON THE OTHER node — addressing the sibling -# explicitly as the query target (:objects.store) — then loads it back from -# that node and confirms the bytes round-trip. verify.py then INDEPENDENTLY -# confirms node2 physically holds the object in its LOCAL repo with matching bytes -# (objects.contains/load -repo local on node2). The whole remote program travels as -# ONE argv to `netsim ssh`; the prompt rides along base64-encoded so a multi-line -# file never fights shell quoting. -set -eu - -VM="node1" -while [ $# -gt 0 ]; do - case "$1" in - --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; - *) echo "usage: share-object [--vm ]" >&2; exit 64 ;; - esac -done - -# CDPATH= is an intentional one-shot env prefix for cd, not an assignment -# shellcheck disable=SC1007 -here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) -[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } -prompt_b64=$(base64 -w0 "$here/prompt.md") # GNU coreutils; -w0 = single line - -REMOTE_BODY=$(cat <<'EOS' -set -eu -d=/home/tester/.netsim -mkdir -p "$d" -printf '%s' "$prompt_b64" | base64 -d > "$d/share-object.prompt" -chown -R tester:tester "$d" - -# Run the agent as `tester` (qwen is installed for that user), non-interactively. -# Invocation matches what was validated for bootstrap-user-software-key / adopt-node: one-shot -# positional prompt + `-y` (auto-approve). -su - tester -c 'qwen -y "$(cat /home/tester/.netsim/share-object.prompt)"' \ - > "$d/share-object.log" 2>&1 || { - echo "qwen run failed on $(hostname); tail of log:" >&2 - tail -n 40 "$d/share-object.log" >&2 - exit 1 - } - -# Cheap smoke-check; verify.py does the authoritative, independent check (node2 -# physically holds the object in its local repo). The agent records its outputs in -# $HOME/info.json (/home/tester/info.json). -oid=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_id",""))' 2>/dev/null || true) -opay=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_payload",""))' 2>/dev/null || true) -orb=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_readback",""))' 2>/dev/null || true) -otgt=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_target",""))' 2>/dev/null || true) -[ -n "$oid" ] || { echo "agent recorded no object_id in /home/tester/info.json on $(hostname)" >&2; exit 1; } -[ -n "$opay" ] || { echo "agent recorded no object_payload on $(hostname)" >&2; exit 1; } -[ -n "$orb" ] || { echo "agent recorded no object_readback on $(hostname)" >&2; exit 1; } -[ -n "$otgt" ] || { echo "agent recorded no object_target on $(hostname)" >&2; exit 1; } -case "$oid" in - data1*) : ;; - *) echo "WARNING $(hostname): object_id does not look like a data1… Object ID (verify.py decides)" >&2 ;; -esac -# Advisory: the agent's own round-trip should already match (verify.py re-checks). -[ "$opay" = "$orb" ] || echo "WARNING $(hostname): agent read-back != stored payload (verify.py decides)" >&2 -echo "share-object: agent finished on $(hostname); stored object $oid on $otgt" -EOS -) - -echo "share-object: driving Qwen operator on $VM ..." -# assignment prefix carries the prompt to the guest; body re-parses it -# shellcheck disable=SC2029 -netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" -echo "share-object: done on $VM" diff --git a/netsim/tasks/share-object/verify.py b/netsim/tasks/share-object/verify.py deleted file mode 100755 index 722134e1..00000000 --- a/netsim/tasks/share-object/verify.py +++ /dev/null @@ -1,180 +0,0 @@ -#!/usr/bin/env python3 -"""verify share-object: prove node2 physically holds the object node1 stored on it. - -Independent host-side check (does not trust run.sh or the agent's read-back): a -repo-pinned, ungated objects.load -repo local on node2 must return the exact stored -bytes. Reaches the VMs via netsim ssh. See README.md for the full rationale. -""" -import argparse -import json -import subprocess -import sys - -def ssh(vm, remote): - """Run `netsim ssh -- ` on the host; return stdout (best-effort). - - astral-query writes error_message frames to stdout (which we parse) and other - failures (route_not_found, etc.) to stderr (which we drop). - """ - p = subprocess.run(["netsim", "ssh", vm, "--", remote], - capture_output=True, text=True) - return p.stdout - - -def info(vm): - """The agent's $HOME/info.json (/home/tester/info.json) on the VM, as a dict.""" - try: - return json.loads(ssh(vm, "cat /home/tester/info.json") or "{}") or {} - except json.JSONDecodeError: - return {} - - -# ---- JSON object-stream parsing (one object/line + an eos terminator) ---------- - -def objs(stream): - out = [] - for ln in (stream or "").splitlines(): - ln = ln.strip() - if not ln: - continue - try: - out.append(json.loads(ln)) - except json.JSONDecodeError: - pass - return out - - -def loaded_payload(stream): - """From an objects.load stream, the decoded payload string (the stored - string8's Object), or None. Skips eos / error_message frames.""" - for o in objs(stream): - if o.get("Type") in ("eos", "error_message"): - continue - ob = o.get("Object") - if isinstance(ob, str): - return ob - return None - - -def errors(stream): - return [o.get("Object") for o in objs(stream) if o.get("Type") == "error_message"] - - -def contains_local(stream): - """objects.contains stream -> a bool frame. Returns True/False/None.""" - for o in objs(stream): - if o.get("Type") in ("eos", "error_message"): - continue - if isinstance(o.get("Object"), bool): - return o["Object"] - return None - - -def contract_subject(stream): - """node identity = Subject of the active contract (from user.info).""" - for o in objs(stream): - ob = o.get("Object") - if isinstance(ob, dict) and isinstance(ob.get("Contract"), dict): - c = ob["Contract"].get("Contract", {}) - if c.get("Subject"): - return c["Subject"] - return None - - -def remote_identity(stream): - """Fallback: RemoteIdentity from a nodes.links stream (the link to the peer).""" - for o in objs(stream): - ob = o.get("Object") - if isinstance(ob, dict) and ob.get("RemoteIdentity"): - return ob["RemoteIdentity"] - return None - - -def main(): - ap = argparse.ArgumentParser() - ap.add_argument("--node1", default="node1") - ap.add_argument("--node2", default="node2") - args, _ = ap.parse_known_args() - vm1, vm2 = args.node1, args.node2 - - # What the agent persisted on node1. ID strips all whitespace; the text fields - # tolerate a trailing newline. - info1 = info(vm1) - ID = "".join(str(info1.get("object_id", "")).split()) - PAY = str(info1.get("object_payload", "")).rstrip("\n") - READBACK = str(info1.get("object_readback", "")).rstrip("\n") - TARGET = "".join(str(info1.get("object_target", "")).split()) - - # node2's real identity, resolved host-side: Subject of node2's active contract - # (node2 answers user.info under its node identity), with node1's link-back as a - # fallback. Used only to cross-check the node the agent claims it targeted. - n2_info = ssh(vm2, "astral-query user.info -out json") - n1_links = ssh(vm1, "astral-query nodes.links -out json") - N2 = contract_subject(n2_info) or remote_identity(n1_links) or "" - - # DECISIVE: read the object straight out of node2's "local" repo (where - # objects.store writes). Repo-pinned + ungated, so a hit proves node2 itself - # holds the bytes -- not a network re-fetch from node1. - n2_load = ssh(vm2, f"astral-query objects.load -id '{ID}' -repo local -out json") - n2_contains = ssh(vm2, f"astral-query objects.contains -repo local -id '{ID}' -out json") - got = loaded_payload(n2_load) - held = contains_local(n2_contains) - bytes_ok = got is not None and got.rstrip("\n") == PAY - - # Advisory: did the object ALSO land in node1's local repo? (Not required -- the - # agent targeted node2 explicitly; a copy on node1 is fine, just noted.) - n1_contains = ssh(vm1, f"astral-query objects.contains -repo local -id '{ID}' -out json") - on_node1 = contains_local(n1_contains) - - errs, notes = [], [] - if not ID: - errs.append("no object_id in node1's info.json") - if not PAY: - errs.append("no object_payload in node1's info.json") - if READBACK and READBACK != PAY: - notes.append(f"agent's own read-back != stored payload ({READBACK!r} != {PAY!r})") - if TARGET and N2 and TARGET != N2: - notes.append(f"agent stored on {TARGET[:12]}.. but node2's identity is {N2[:12]}.. " - "(agent may have targeted the wrong node)") - elif not TARGET: - notes.append("agent recorded no object_target in info.json") - if on_node1 is True: - notes.append("object is ALSO present in node1's local repo (a local copy alongside the " - "remote store -- not required, just noted)") - - if not errs and bytes_ok: - tgt = (N2[:12] + "..") if N2 else (TARGET[:12] + ".." if TARGET else "node2") - print(f"share-object OK: node1 stored object {ID[:12]}.. ON sibling {tgt} and node2's " - f"local repo returns the exact bytes ({len(PAY)} B). " - f"contains(local)={held}.") - for n in notes: - sys.stderr.write(f" note: {n}\n") - return 0 - - # Failure -- pinpoint what broke. - sys.stderr.write("share-object verify FAILED: node2 does NOT hold the object in its local repo.\n") - for e in errs: - sys.stderr.write(f" - {e}\n") - if held is False: - sys.stderr.write(" node2 objects.contains -repo local = false: the store never landed on node2 " - "(relay rejected, or the agent stored locally on node1 instead of targeting node2). " - "Check node2's journal for an objects.store and node1's for a 'query rejected'.\n") - elif got is None: - sys.stderr.write(" node2 objects.load -repo local returned no payload (see error frames below).\n") - elif not bytes_ok: - sys.stderr.write(f" node2 returned bytes that do not match: got {got!r} != stored {PAY!r}.\n") - # surface error frames (auth vs not-found vs repo-missing) without conflating. - for label, stream in (("node2 load", n2_load), ("node2 contains", n2_contains)): - for e in errors(stream): - sys.stderr.write(f" {label} error_message: {e}\n") - for n in notes: - sys.stderr.write(f" note: {n}\n") - n2disp = (N2[:12] + "..") if N2 else "?" - sys.stderr.write(f" (id={ID} node2={n2disp} target={(TARGET[:12]+'..') if TARGET else '?'} " - f"contains={held} load={'hit' if got is not None else 'miss'} " - f"on_node1={on_node1})\n") - return 1 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/netsim/tasks/share-object/verify.sh b/netsim/tasks/share-object/verify.sh deleted file mode 100755 index fac4b4ff..00000000 --- a/netsim/tasks/share-object/verify.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -# Thin shim — all verification logic lives in verify.py. Calling astral-query and -# walking its JSON streams is far cleaner in python than bash, so verify.sh just -# hands off. netsim sets $NETSIM_TASK_DIR to this task's directory and only -# auto-runs run.sh/verify.sh, so verify.py sits next to us and is invoked here -# (the dirname fallback covers running this script directly). -exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" From 9e41b453ef72c75d1c26e4244432ff17469d1602 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 15:48:09 +0200 Subject: [PATCH 28/57] netsim: rename stages to the node-topology scheme astrald-single-node -> one-node, astrald-swarm -> two-nodes, astrald-stored -> two-nodes-data, astrald-read -> two-nodes-data-read. Stage-name change only (set via --save/--stage in story headers + docs); no script logic depends on it. astrald-lab kept as the base build fixture. --- netsim/README.md | 32 +++++++++---------- netsim/stories/adopt-node.story | 4 +-- .../stories/bootstrap-user-software-key.story | 4 +-- netsim/stories/import-user-software-key.story | 4 +-- netsim/stories/object-store.story | 4 +-- netsim/stories/read-remote-object.story | 4 +-- netsim/tasks/adopt-node/README.md | 2 +- netsim/tasks/adopt-node/run.sh | 2 +- .../bootstrap-user-software-key/README.md | 2 +- .../tasks/import-user-software-key/README.md | 2 +- netsim/tasks/object-store/README.md | 2 +- netsim/tasks/read-remote-object/README.md | 4 +-- 12 files changed, 33 insertions(+), 33 deletions(-) diff --git a/netsim/README.md b/netsim/README.md index bf10a4b9..360a1865 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -16,18 +16,18 @@ netsim/ tasks/ # each task: run.sh (+ verify.sh / verify.py) + README.md install-astrald/ # build + run astrald as a service on each node configure-astral-agent/ # install the astral-agent skill into the qwen operator - bootstrap-user-software-key/ # make node1 a User node, new key -> astrald-single-node - import-user-software-key/ # make node1 a User node, existing mnemonic -> astrald-single-node - adopt-node/ # adopt node2 into node1's swarm -> astrald-swarm - object-store/ # node1 stores an object locally + reads it -> astrald-stored - read-remote-object/ # node2 reads node1's object over astral -> astrald-read + bootstrap-user-software-key/ # make node1 a User node, new key -> one-node + import-user-software-key/ # make node1 a User node, existing mnemonic -> one-node + adopt-node/ # adopt node2 into node1's swarm -> two-nodes + object-store/ # node1 stores an object locally + reads it -> two-nodes-data + read-remote-object/ # node2 reads node1's object over astral -> two-nodes-data-read stories/ # one story per tested flow (start/save stage in each header) - lab.story # null -> astrald-lab - bootstrap-user-software-key.story # astrald-lab -> astrald-single-node - import-user-software-key.story # astrald-lab -> astrald-single-node (alt.) - adopt-node.story # astrald-single-node -> astrald-swarm - object-store.story # astrald-swarm -> astrald-stored - read-remote-object.story # astrald-stored -> astrald-read + lab.story # null -> astrald-lab + bootstrap-user-software-key.story # astrald-lab -> one-node + import-user-software-key.story # astrald-lab -> one-node (alt.) + adopt-node.story # one-node -> two-nodes + object-store.story # two-nodes -> two-nodes-data + read-remote-object.story # two-nodes-data -> two-nodes-data-read link.sh # register tasks with netsim (idempotent; re-run anytime) README.md ``` @@ -107,14 +107,14 @@ stage (its `start`/`save` stages are in the story header). Intermediate stages stay reusable, so you can replay one flow without rebuilding the chain: ``` -astrald-lab ─[bootstrap-user-software-key]→ astrald-single-node ─[adopt-node]→ astrald-swarm ─[object-store]→ astrald-stored ─[read-remote-object]→ astrald-read +astrald-lab ─[bootstrap-user-software-key]→ one-node ─[adopt-node]→ two-nodes ─[object-store]→ two-nodes-data ─[read-remote-object]→ two-nodes-data-read ``` ```sh -netsim story --stage astrald-lab --save astrald-single-node netsim/stories/bootstrap-user-software-key.story -netsim story --stage astrald-single-node --save astrald-swarm netsim/stories/adopt-node.story -netsim story --stage astrald-swarm --save astrald-stored netsim/stories/object-store.story -netsim story --stage astrald-stored --save astrald-read netsim/stories/read-remote-object.story +netsim story --stage astrald-lab --save one-node netsim/stories/bootstrap-user-software-key.story +netsim story --stage one-node --save two-nodes netsim/stories/adopt-node.story +netsim story --stage two-nodes --save two-nodes-data netsim/stories/object-store.story +netsim story --stage two-nodes-data --save two-nodes-data-read netsim/stories/read-remote-object.story ``` Each story drives the Qwen operator through its `astral-agent` skill, then runs an diff --git a/netsim/stories/adopt-node.story b/netsim/stories/adopt-node.story index b44b55c9..98b62b06 100644 --- a/netsim/stories/adopt-node.story +++ b/netsim/stories/adopt-node.story @@ -1,4 +1,4 @@ # adopt-node.story — adopt node2 into node1's User swarm (symmetric roster). -# start: astrald-single-node save: astrald-swarm -# netsim story --stage astrald-single-node --save astrald-swarm netsim/stories/adopt-node.story +# start: one-node save: two-nodes +# netsim story --stage one-node --save two-nodes netsim/stories/adopt-node.story adopt-node diff --git a/netsim/stories/bootstrap-user-software-key.story b/netsim/stories/bootstrap-user-software-key.story index 6a96275a..3dbb9e63 100644 --- a/netsim/stories/bootstrap-user-software-key.story +++ b/netsim/stories/bootstrap-user-software-key.story @@ -1,4 +1,4 @@ # bootstrap-user-software-key.story — node1 becomes a User-controlled node. -# start: astrald-lab save: astrald-single-node -# netsim story --stage astrald-lab --save astrald-single-node netsim/stories/bootstrap-user-software-key.story +# start: astrald-lab save: one-node +# netsim story --stage astrald-lab --save one-node netsim/stories/bootstrap-user-software-key.story bootstrap-user-software-key diff --git a/netsim/stories/import-user-software-key.story b/netsim/stories/import-user-software-key.story index 12f12d7f..5a65dbe2 100644 --- a/netsim/stories/import-user-software-key.story +++ b/netsim/stories/import-user-software-key.story @@ -1,6 +1,6 @@ # import-user-software-key.story — make node1 a User node from an EXISTING mnemonic # (embedded in the task's prompt.md; alternative to bootstrap-user-software-key). # Optional env ASTRAL_USER_ID makes verify assert the derived id. -# start: astrald-lab save: astrald-single-node -# netsim story --stage astrald-lab --save astrald-single-node netsim/stories/import-user-software-key.story +# start: astrald-lab save: one-node +# netsim story --stage astrald-lab --save one-node netsim/stories/import-user-software-key.story import-user-software-key diff --git a/netsim/stories/object-store.story b/netsim/stories/object-store.story index d8de8e89..27df893a 100644 --- a/netsim/stories/object-store.story +++ b/netsim/stories/object-store.story @@ -1,4 +1,4 @@ # object-store.story — node1 stores an object locally and reads it back. -# start: astrald-swarm save: astrald-stored -# netsim story --stage astrald-swarm --save astrald-stored netsim/stories/object-store.story +# start: two-nodes save: two-nodes-data +# netsim story --stage two-nodes --save two-nodes-data netsim/stories/object-store.story object-store diff --git a/netsim/stories/read-remote-object.story b/netsim/stories/read-remote-object.story index 5198c14f..157d751a 100644 --- a/netsim/stories/read-remote-object.story +++ b/netsim/stories/read-remote-object.story @@ -1,4 +1,4 @@ # read-remote-object.story — node2 reads node1's object over astral. -# start: astrald-stored save: astrald-read -# netsim story --stage astrald-stored --save astrald-read netsim/stories/read-remote-object.story +# start: two-nodes-data save: two-nodes-data-read +# netsim story --stage two-nodes-data --save two-nodes-data-read netsim/stories/read-remote-object.story read-remote-object diff --git a/netsim/tasks/adopt-node/README.md b/netsim/tasks/adopt-node/README.md index 7ad54a8e..cdf2362f 100644 --- a/netsim/tasks/adopt-node/README.md +++ b/netsim/tasks/adopt-node/README.md @@ -4,4 +4,4 @@ Drives the Qwen operator on node1 to **adopt node2** into the User's swarm (`user.adopt`), following the astral-agent skill's node-adoption playbook. `verify.py` independently confirms both nodes hold a contract under the same User, a mutual link, and a symmetric roster (each lists the other as a Linked sibling). -Produces stage `astrald-swarm` (from `astrald-single-node`). +Produces stage `two-nodes` (from `one-node`). diff --git a/netsim/tasks/adopt-node/run.sh b/netsim/tasks/adopt-node/run.sh index 0a218968..1f3eec61 100755 --- a/netsim/tasks/adopt-node/run.sh +++ b/netsim/tasks/adopt-node/run.sh @@ -1,7 +1,7 @@ #!/bin/sh # adopt-node: adopt the second node into the User's swarm, driven by the Qwen # Code agent running INSIDE node1 (which is already a User node from -# bootstrap-user-software-key — default starting stage: astrald-single-node). +# bootstrap-user-software-key — default starting stage: one-node). # adopt-node [--vm ] (default: node1 — the VM carrying Qwen) # # Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user-software-key: diff --git a/netsim/tasks/bootstrap-user-software-key/README.md b/netsim/tasks/bootstrap-user-software-key/README.md index 65c12e91..1a2eb316 100644 --- a/netsim/tasks/bootstrap-user-software-key/README.md +++ b/netsim/tasks/bootstrap-user-software-key/README.md @@ -3,4 +3,4 @@ Drives the Qwen operator on node1 to make it a **User-controlled node** — mint a software User and install node1's active contract — following the astral-agent skill's node-setup playbook. `verify.sh` independently confirms node1 answers as -that User. Produces stage `astrald-single-node` (from `astrald-lab`). +that User. Produces stage `one-node` (from `astrald-lab`). diff --git a/netsim/tasks/import-user-software-key/README.md b/netsim/tasks/import-user-software-key/README.md index 74e6e244..3a56e22c 100644 --- a/netsim/tasks/import-user-software-key/README.md +++ b/netsim/tasks/import-user-software-key/README.md @@ -5,4 +5,4 @@ existing software User** — deriving the User key from a fixed, known BIP-39 mnemonic embedded in `prompt.md` instead of minting a fresh one — following the astral-agent skill's node-setup playbook. `verify.sh` confirms node1 answers as that User (and, if `ASTRAL_USER_ID` is set, that the derived id matches exactly). -A drop-in alternative to `bootstrap-user-software-key`; produces stage `astrald-single-node`. +A drop-in alternative to `bootstrap-user-software-key`; produces stage `one-node`. diff --git a/netsim/tasks/object-store/README.md b/netsim/tasks/object-store/README.md index 44fc5b17..333e4f23 100644 --- a/netsim/tasks/object-store/README.md +++ b/netsim/tasks/object-store/README.md @@ -4,5 +4,5 @@ Drives the Qwen operator on node1 to **store an astral object in its own local repo and read it back** — the basic local object operations (`objects.store` then `objects.load`) — following the astral-agent skill. `verify.py` independently re-loads the object from node1's local repo and asserts the bytes match. Produces -stage `astrald-stored` (from `astrald-swarm`); the saved object is what +stage `two-nodes-data` (from `two-nodes`); the saved object is what `read-remote-object` then fetches from a peer. diff --git a/netsim/tasks/read-remote-object/README.md b/netsim/tasks/read-remote-object/README.md index 8e6bbd71..dd05f9a0 100644 --- a/netsim/tasks/read-remote-object/README.md +++ b/netsim/tasks/read-remote-object/README.md @@ -5,8 +5,8 @@ that `object-store` stored in node1's local repo. Host-driven: node2 has no Qwen operator, so `verify.py` issues the read (resolve node1's identity + the stored `object_id`, then node2 runs `:objects.load` and asserts the exact bytes, with transparent/`objects.find` as fallback diagnostics). No agent comprehension -axis — this is a pure implementation-axis probe. Produces stage `astrald-read` -(from `astrald-stored`). +axis — this is a pure implementation-axis probe. Produces stage `two-nodes-data-read` +(from `two-nodes-data`). Note: the peer-reads-node1 direction **failed before astrald #348** (the roster sync); this task re-probes it on current master. It may now pass (node2 knows From 53b412f0b7dd5bc4f47e89b3bd44ae07914a2e43 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 15:54:56 +0200 Subject: [PATCH 29/57] netsim: parametrize object-store with --target self|peer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit object-store now stores either in node1's own local repo (--target self, default) or on the sibling node2 (--target peer, via :objects.store) — so one story tests local storage and another tests storing on a peer. run.sh selects the prompt (prompt.md / prompt-peer.md); verify.py checks the holder's local repo (node1 for self, node2 for peer). Adds object-store-peer.story (two-nodes -> two-nodes-data-peer); object-store.story stays self -> two-nodes-data and feeds read-remote-object. --- netsim/README.md | 5 +-- netsim/stories/object-store-peer.story | 4 +++ netsim/tasks/object-store/README.md | 19 +++++++---- netsim/tasks/object-store/prompt-peer.md | 14 +++++++++ netsim/tasks/object-store/run.sh | 37 ++++++++++++---------- netsim/tasks/object-store/verify.py | 40 ++++++++++++++---------- 6 files changed, 78 insertions(+), 41 deletions(-) create mode 100644 netsim/stories/object-store-peer.story create mode 100644 netsim/tasks/object-store/prompt-peer.md diff --git a/netsim/README.md b/netsim/README.md index 360a1865..7297a4a8 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -19,14 +19,15 @@ netsim/ bootstrap-user-software-key/ # make node1 a User node, new key -> one-node import-user-software-key/ # make node1 a User node, existing mnemonic -> one-node adopt-node/ # adopt node2 into node1's swarm -> two-nodes - object-store/ # node1 stores an object locally + reads it -> two-nodes-data + object-store/ # node1 stores an object (--target self|peer) -> two-nodes-data[-peer] read-remote-object/ # node2 reads node1's object over astral -> two-nodes-data-read stories/ # one story per tested flow (start/save stage in each header) lab.story # null -> astrald-lab bootstrap-user-software-key.story # astrald-lab -> one-node import-user-software-key.story # astrald-lab -> one-node (alt.) adopt-node.story # one-node -> two-nodes - object-store.story # two-nodes -> two-nodes-data + object-store.story # two-nodes -> two-nodes-data (store on node1) + object-store-peer.story # two-nodes -> two-nodes-data-peer (store on node2) read-remote-object.story # two-nodes-data -> two-nodes-data-read link.sh # register tasks with netsim (idempotent; re-run anytime) README.md diff --git a/netsim/stories/object-store-peer.story b/netsim/stories/object-store-peer.story new file mode 100644 index 00000000..381814a7 --- /dev/null +++ b/netsim/stories/object-store-peer.story @@ -0,0 +1,4 @@ +# object-store-peer.story — node1 stores an object ON the peer (node2) and reads it back. +# start: two-nodes save: two-nodes-data-peer +# netsim story --stage two-nodes --save two-nodes-data-peer netsim/stories/object-store-peer.story +object-store --target peer diff --git a/netsim/tasks/object-store/README.md b/netsim/tasks/object-store/README.md index 333e4f23..14d41f69 100644 --- a/netsim/tasks/object-store/README.md +++ b/netsim/tasks/object-store/README.md @@ -1,8 +1,15 @@ # object-store -Drives the Qwen operator on node1 to **store an astral object in its own local -repo and read it back** — the basic local object operations (`objects.store` then -`objects.load`) — following the astral-agent skill. `verify.py` independently -re-loads the object from node1's local repo and asserts the bytes match. Produces -stage `two-nodes-data` (from `two-nodes`); the saved object is what -`read-remote-object` then fetches from a peer. +Drives the Qwen operator on node1 to **store an astral object and read it back**, +following the astral-agent skill, in one of two modes (`--target`): + +- `self` (default): store in node1's **own local repo** — the basic local object + operations (`objects.store` then `objects.load`). +- `peer`: store **on the sibling node2** (`:objects.store`) — write to a peer. + +`verify.py` independently re-loads the object from the **holder's** local repo +(`objects.load -repo local`, ungated) and asserts the bytes match. Stories: + +- `object-store.story` (self) → `two-nodes-data` (object on node1) — feeds + `read-remote-object`. +- `object-store-peer.story` (peer) → `two-nodes-data-peer` (object on node2). diff --git a/netsim/tasks/object-store/prompt-peer.md b/netsim/tasks/object-store/prompt-peer.md new file mode 100644 index 00000000..fcd1fb6f --- /dev/null +++ b/netsim/tasks/object-store/prompt-peer.md @@ -0,0 +1,14 @@ +On this machine an `astrald` node is running and you control it as its User (a +User-bound apphost token is recorded in `$HOME/info.json` under `user_token`). Your +swarm has one other node — a sibling. Acting as that User, store a short, +distinctive text payload as an astral object **on that other node** — address it +explicitly as the query target — via the objects protocol, following your +**astral-agent** skill, and note the Object ID it returns. Then read the object +back **from that other node** by its Object ID and confirm the bytes match what you +stored. + +Then add to `$HOME/info.json` (keep the existing `user_*` keys) the keys +`object_id` (the Object ID), `object_payload` (the exact payload you stored), and +`object_readback` (the bytes you read back). The skill won't mention this — it's +how the run is checked. Success means the object is stored on the other node, read +back with matching bytes, and those keys are written. diff --git a/netsim/tasks/object-store/run.sh b/netsim/tasks/object-store/run.sh index 4d88748b..6c82ee97 100755 --- a/netsim/tasks/object-store/run.sh +++ b/netsim/tasks/object-store/run.sh @@ -1,29 +1,34 @@ #!/bin/sh -# object-store: have node1 store an astral object in its OWN local repo and read it -# back, driven by the Qwen Code agent running INSIDE node1. -# object-store [--vm ] (default: node1 — the VM carrying Qwen) +# object-store: have node1 (the operator) store an astral object and read it back — +# either in its OWN local repo (--target self, default) or ON the peer node2 +# (--target peer, via :objects.store). Driven by the Qwen Code agent on node1. +# object-store [--vm ] [--target self|peer] (default: node1, self) # -# Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user-*: tiny -# script, thin prompt, intelligence in the agent's astral-agent skill. The agent -# exercises the basic LOCAL object operations — store a payload, surface its Object -# ID, load it back by that id, confirm the round-trip. verify.py then INDEPENDENTLY -# re-reads the object from node1's local repo. The whole remote program travels as -# ONE argv to `netsim ssh`; the prompt rides along base64-encoded. +# Runs ON THE HOST. Tiny script, thin prompt, intelligence in the astral-agent skill. +# self: basic local object ops. peer: store onto the sibling (write-to-peer). verify.py +# then INDEPENDENTLY re-reads the object from the holder's local repo. The remote +# program travels as ONE argv to `netsim ssh`; the prompt rides along base64-encoded. set -eu -VM="node1" +VM="node1"; TARGET="self" while [ $# -gt 0 ]; do case "$1" in - --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; - *) echo "usage: object-store [--vm ]" >&2; exit 64 ;; + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + --target) [ $# -ge 2 ] || { echo "need self|peer after --target" >&2; exit 64; }; TARGET=$2; shift 2 ;; + *) echo "usage: object-store [--vm ] [--target self|peer]" >&2; exit 64 ;; esac done +case "$TARGET" in + self) pf=prompt.md ;; + peer) pf=prompt-peer.md ;; + *) echo "bad --target '$TARGET' (expected self|peer)" >&2; exit 64 ;; +esac # CDPATH= is an intentional one-shot env prefix for cd, not an assignment # shellcheck disable=SC1007 here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) -[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } -prompt_b64=$(base64 -w0 "$here/prompt.md") # GNU coreutils; -w0 = single line +[ -f "$here/$pf" ] || { echo "missing $here/$pf" >&2; exit 1; } +prompt_b64=$(base64 -w0 "$here/$pf") # GNU coreutils; -w0 = single line REMOTE_BODY=$(cat <<'EOS' set -eu @@ -56,7 +61,7 @@ echo "object-store: agent finished on $(hostname); stored+read object $oid" EOS ) -echo "object-store: driving Qwen operator on $VM ..." +echo "object-store ($TARGET): driving Qwen operator on $VM ..." # shellcheck disable=SC2029 netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" -echo "object-store: done on $VM" +echo "object-store ($TARGET): done on $VM" diff --git a/netsim/tasks/object-store/verify.py b/netsim/tasks/object-store/verify.py index 24595005..7144f942 100644 --- a/netsim/tasks/object-store/verify.py +++ b/netsim/tasks/object-store/verify.py @@ -1,9 +1,12 @@ #!/usr/bin/env python3 -"""verify object-store: node1 stored an object in its local repo and can read it back. - -Independent host-side check (does not trust run.sh or the agent's read-back): a -repo-pinned, ungated objects.load -repo local on node1 must return the exact stored -bytes. Reaches the VM via netsim ssh. +"""verify object-store: the stored object is present in the holder's local repo. + +The agent (on node1) stored an object either locally (--target self -> node1 holds) +or on the peer (--target peer -> node2 holds). Independent host-side check (does not +trust run.sh or the agent's read-back): a repo-pinned, ungated objects.load -repo +local on the HOLDER must return the exact stored bytes. Reaches the VMs via netsim +ssh. The object id + payload are read from node1's info.json (the agent records +there regardless of where it stored). """ import argparse import json @@ -56,18 +59,20 @@ def errors(stream): def main(): ap = argparse.ArgumentParser() - ap.add_argument("--vm", default="node1") + ap.add_argument("--vm", default="node1") # the operator; records info.json here + ap.add_argument("--node2", default="node2") # the peer + ap.add_argument("--target", default="self") # self -> node1 holds; peer -> node2 holds args, _ = ap.parse_known_args() - vm = args.vm + holder = args.node2 if args.target == "peer" else args.vm - info1 = info(vm) + info1 = info(args.vm) ID = "".join(str(info1.get("object_id", "")).split()) PAY = str(info1.get("object_payload", "")).rstrip("\n") READBACK = str(info1.get("object_readback", "")).rstrip("\n") - # Decisive: re-load the object from node1's local repo (repo-pinned + ungated). - n1_load = ssh(vm, f"astral-query objects.load -id '{ID}' -repo local -out json") - got = loaded_payload(n1_load) + # Decisive: re-load the object from the holder's local repo (repo-pinned + ungated). + h_load = ssh(holder, f"astral-query objects.load -id '{ID}' -repo local -out json") + got = loaded_payload(h_load) local_ok = got is not None and got.rstrip("\n") == PAY errs, notes = [], [] @@ -79,24 +84,25 @@ def main(): notes.append(f"agent's own read-back != stored payload ({READBACK!r} != {PAY!r})") if not errs and local_ok: - print(f"object-store OK: node1 stored object {ID[:12]}.. and its local repo " - f"returns the exact bytes ({len(PAY)} B).") + print(f"object-store OK ({args.target}): {holder}'s local repo holds object " + f"{ID[:12]}.. with the exact bytes ({len(PAY)} B).") for n in notes: sys.stderr.write(f" note: {n}\n") return 0 - sys.stderr.write("object-store verify FAILED: node1 could not re-load its own stored object.\n") + sys.stderr.write(f"object-store verify FAILED ({args.target}): {holder}'s local repo does NOT " + "hold the stored object.\n") for e in errs: sys.stderr.write(f" - {e}\n") if got is None: - sys.stderr.write(" objects.load -repo local returned no payload (see error frames below).\n") + sys.stderr.write(f" objects.load -repo local on {holder} returned no payload (see errors below).\n") elif not local_ok: sys.stderr.write(f" bytes mismatch: got {got!r} != stored {PAY!r}.\n") - for e in errors(n1_load): + for e in errors(h_load): sys.stderr.write(f" load error_message: {e}\n") for n in notes: sys.stderr.write(f" note: {n}\n") - sys.stderr.write(f" (id={ID} load={'hit' if got is not None else 'miss'})\n") + sys.stderr.write(f" (id={ID} holder={holder} load={'hit' if got is not None else 'miss'})\n") return 1 From 8a83901706cd16442a30aec76ecdaa901c584f38 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 16:04:59 +0200 Subject: [PATCH 30/57] netsim: object-store --target uses astral addresses; adopt-node registers node aliases Replace object-store's abstract --target self|peer with a real astral query target (--target, default localnode; e.g. node2). One prompt template (drops prompt-peer.md): the agent stores on / reads back from , forming the right query itself. verify.py maps target -> holder (localnode/node1 -> node1, node2 -> node2). adopt-node now registers node1/node2 directory aliases (dir.set_alias) on both nodes when the swarm forms, so tasks can address nodes by name. Also fixes adopt-node's stale soft-check (read the User token from info.json, not the removed user.token). object-store-peer.story now passes --target node2. --- netsim/README.md | 4 +-- netsim/stories/object-store-peer.story | 2 +- netsim/tasks/adopt-node/README.md | 4 ++- netsim/tasks/adopt-node/run.sh | 43 +++++++++++++++++++----- netsim/tasks/object-store/README.md | 17 ++++++---- netsim/tasks/object-store/prompt-peer.md | 14 -------- netsim/tasks/object-store/prompt.md | 10 +++--- netsim/tasks/object-store/run.sh | 39 +++++++++++---------- netsim/tasks/object-store/verify.py | 26 +++++++------- 9 files changed, 88 insertions(+), 71 deletions(-) delete mode 100644 netsim/tasks/object-store/prompt-peer.md diff --git a/netsim/README.md b/netsim/README.md index 7297a4a8..0d5d5cc0 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -18,8 +18,8 @@ netsim/ configure-astral-agent/ # install the astral-agent skill into the qwen operator bootstrap-user-software-key/ # make node1 a User node, new key -> one-node import-user-software-key/ # make node1 a User node, existing mnemonic -> one-node - adopt-node/ # adopt node2 into node1's swarm -> two-nodes - object-store/ # node1 stores an object (--target self|peer) -> two-nodes-data[-peer] + adopt-node/ # adopt node2 into swarm + register node aliases -> two-nodes + object-store/ # node1 stores an object (--target localnode|node2) -> two-nodes-data[-peer] read-remote-object/ # node2 reads node1's object over astral -> two-nodes-data-read stories/ # one story per tested flow (start/save stage in each header) lab.story # null -> astrald-lab diff --git a/netsim/stories/object-store-peer.story b/netsim/stories/object-store-peer.story index 381814a7..b32fe755 100644 --- a/netsim/stories/object-store-peer.story +++ b/netsim/stories/object-store-peer.story @@ -1,4 +1,4 @@ # object-store-peer.story — node1 stores an object ON the peer (node2) and reads it back. # start: two-nodes save: two-nodes-data-peer # netsim story --stage two-nodes --save two-nodes-data-peer netsim/stories/object-store-peer.story -object-store --target peer +object-store --target node2 diff --git a/netsim/tasks/adopt-node/README.md b/netsim/tasks/adopt-node/README.md index cdf2362f..aee42cdd 100644 --- a/netsim/tasks/adopt-node/README.md +++ b/netsim/tasks/adopt-node/README.md @@ -4,4 +4,6 @@ Drives the Qwen operator on node1 to **adopt node2** into the User's swarm (`user.adopt`), following the astral-agent skill's node-adoption playbook. `verify.py` independently confirms both nodes hold a contract under the same User, a mutual link, and a symmetric roster (each lists the other as a Linked sibling). -Produces stage `two-nodes` (from `one-node`). +Also registers `node1`/`node2` directory aliases (`dir.set_alias`) on both nodes so +later tasks can address nodes by name (e.g. `object-store --target node2`). Produces +stage `two-nodes` (from `one-node`). diff --git a/netsim/tasks/adopt-node/run.sh b/netsim/tasks/adopt-node/run.sh index 1f3eec61..696218c9 100755 --- a/netsim/tasks/adopt-node/run.sh +++ b/netsim/tasks/adopt-node/run.sh @@ -41,14 +41,13 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/adopt-node.prompt)"' \ exit 1 } -# Soft smoke-check only (verify.sh is the authoritative, independent check). -# node1 already holds a User token from bootstrap-user-software-key, so we can peek at the -# swarm here; don't fail the run on a shape mismatch — leave the verdict to -# verify.sh. CONFIRM the user.swarm_status JSON field for a linked sibling. -tok="$d/user.token" -if [ -s "$tok" ]; then - if ASTRALD_APPHOST_TOKEN=$(cat "$tok") astral-query user.swarm_status -out json 2>/dev/null \ - | grep -q '"Linked":true'; then +# Soft smoke-check only (verify.sh is the authoritative, independent check). node1 +# holds the User token in $HOME/info.json, so we can peek at the swarm here; don't +# fail the run on a shape mismatch — leave the verdict to verify.sh. +ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_token",""))' 2>/dev/null || true) +if [ -n "$ASTRALD_APPHOST_TOKEN" ]; then + export ASTRALD_APPHOST_TOKEN + if astral-query user.swarm_status -out json 2>/dev/null | grep -q '"Linked":true'; then echo "adopt-node: $(hostname) reports a linked sibling" else echo "adopt-node: WARNING $(hostname) shows no linked sibling yet (verify.sh decides)" >&2 @@ -62,4 +61,32 @@ echo "adopt-node: driving Qwen operator on $VM ..." # assignment prefix carries the prompt to the guest; body re-parses it # shellcheck disable=SC2029 netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" + +# Register friendly node aliases (node1/node2) on BOTH nodes so later tasks can +# address nodes by name (object-store --target node2, read of :..., etc.). +# Host-side; identities resolved from the mutual link (anonymous nodes.links). +# CONFIRM (live): dir.set_alias works for the anonymous host-side caller. +PEER="node2" +_remote_id() { # $1 = vm; prints the first RemoteIdentity from its nodes.links + netsim ssh "$1" -- "astral-query nodes.links -out json" 2>/dev/null | python3 -c ' +import json,sys +for ln in sys.stdin: + ln=ln.strip() + if not ln: continue + try: o=json.loads(ln) + except Exception: continue + ob=o.get("Object") + if isinstance(ob,dict) and ob.get("RemoteIdentity"): + print(ob["RemoteIdentity"]); break' +} +node2_id=$(_remote_id "$VM" || true) # node1's link -> node2 +node1_id=$(_remote_id "$PEER" || true) # node2's link -> node1 +if [ -n "$node1_id" ] && [ -n "$node2_id" ]; then + for vm in "$VM" "$PEER"; do + netsim ssh "$vm" -- "astral-query dir.set_alias -id '$node1_id' -alias node1 >/dev/null 2>&1; astral-query dir.set_alias -id '$node2_id' -alias node2 >/dev/null 2>&1" || true + done + echo "adopt-node: registered aliases node1=$node1_id node2=$node2_id on $VM + $PEER" +else + echo "adopt-node: WARNING could not resolve node identities for aliases (n1='$node1_id' n2='$node2_id')" >&2 +fi echo "adopt-node: done on $VM" diff --git a/netsim/tasks/object-store/README.md b/netsim/tasks/object-store/README.md index 14d41f69..69b1fff6 100644 --- a/netsim/tasks/object-store/README.md +++ b/netsim/tasks/object-store/README.md @@ -1,15 +1,18 @@ # object-store Drives the Qwen operator on node1 to **store an astral object and read it back**, -following the astral-agent skill, in one of two modes (`--target`): +following the astral-agent skill, on a chosen **target node** — `--target` is an +astral query target: -- `self` (default): store in node1's **own local repo** — the basic local object - operations (`objects.store` then `objects.load`). -- `peer`: store **on the sibling node2** (`:objects.store`) — write to a peer. +- `localnode` (default): store on the local node (node1's own repo) — basic local + object operations (`objects.store` / `objects.load`). +- `node2` (a node alias registered by `adopt-node`): store on that node + (`node2:objects.store`) — write to a peer. `verify.py` independently re-loads the object from the **holder's** local repo -(`objects.load -repo local`, ungated) and asserts the bytes match. Stories: +(`objects.load -repo local`, ungated) and asserts the bytes match (`localnode`/ +`node1` → node1, `node2` → node2). Stories: -- `object-store.story` (self) → `two-nodes-data` (object on node1) — feeds +- `object-store.story` (`localnode`) → `two-nodes-data` (object on node1) — feeds `read-remote-object`. -- `object-store-peer.story` (peer) → `two-nodes-data-peer` (object on node2). +- `object-store-peer.story` (`--target node2`) → `two-nodes-data-peer` (object on node2). diff --git a/netsim/tasks/object-store/prompt-peer.md b/netsim/tasks/object-store/prompt-peer.md deleted file mode 100644 index fcd1fb6f..00000000 --- a/netsim/tasks/object-store/prompt-peer.md +++ /dev/null @@ -1,14 +0,0 @@ -On this machine an `astrald` node is running and you control it as its User (a -User-bound apphost token is recorded in `$HOME/info.json` under `user_token`). Your -swarm has one other node — a sibling. Acting as that User, store a short, -distinctive text payload as an astral object **on that other node** — address it -explicitly as the query target — via the objects protocol, following your -**astral-agent** skill, and note the Object ID it returns. Then read the object -back **from that other node** by its Object ID and confirm the bytes match what you -stored. - -Then add to `$HOME/info.json` (keep the existing `user_*` keys) the keys -`object_id` (the Object ID), `object_payload` (the exact payload you stored), and -`object_readback` (the bytes you read back). The skill won't mention this — it's -how the run is checked. Success means the object is stored on the other node, read -back with matching bytes, and those keys are written. diff --git a/netsim/tasks/object-store/prompt.md b/netsim/tasks/object-store/prompt.md index 809561df..b8575555 100644 --- a/netsim/tasks/object-store/prompt.md +++ b/netsim/tasks/object-store/prompt.md @@ -1,12 +1,12 @@ On this machine an `astrald` node is running and you control it as its User (a User-bound apphost token is recorded in `$HOME/info.json` under `user_token`). Acting as that User, store a short, distinctive text payload as an astral object -via the objects protocol, following your **astral-agent** skill, and note the -Object ID it returns. Then read that object back by its Object ID and confirm the -bytes match what you stored. +on `__TARGET__`, following your **astral-agent** skill, and note the Object ID it +returns. Then read that object back from `__TARGET__` by its Object ID and confirm +the bytes match what you stored. Then add to `$HOME/info.json` (keep the existing `user_*` keys) the keys `object_id` (the Object ID), `object_payload` (the exact payload you stored), and `object_readback` (the bytes you read back). The skill won't mention this — it's -how the run is checked. Success means the object is stored, read back with matching -bytes, and those keys are written. +how the run is checked. Success means the object is stored on `__TARGET__`, read +back with matching bytes, and those keys are written. diff --git a/netsim/tasks/object-store/run.sh b/netsim/tasks/object-store/run.sh index 6c82ee97..2c31818b 100755 --- a/netsim/tasks/object-store/run.sh +++ b/netsim/tasks/object-store/run.sh @@ -1,34 +1,33 @@ #!/bin/sh -# object-store: have node1 (the operator) store an astral object and read it back — -# either in its OWN local repo (--target self, default) or ON the peer node2 -# (--target peer, via :objects.store). Driven by the Qwen Code agent on node1. -# object-store [--vm ] [--target self|peer] (default: node1, self) +# object-store: have node1 (the operator) store an astral object and read it back, +# on a chosen target node. --target is an astral query target: +# localnode (default) store on the local node (node1's own repo) +# node2 (or any alias) store on that node (e.g. :objects.store) +# The node aliases (node1/node2) are registered by adopt-node when the swarm forms. +# object-store [--vm ] [--target ] # -# Runs ON THE HOST. Tiny script, thin prompt, intelligence in the astral-agent skill. -# self: basic local object ops. peer: store onto the sibling (write-to-peer). verify.py -# then INDEPENDENTLY re-reads the object from the holder's local repo. The remote -# program travels as ONE argv to `netsim ssh`; the prompt rides along base64-encoded. +# Runs ON THE HOST. Tiny script, thin prompt, intelligence in the astral-agent skill; +# the agent forms the right query for the target. verify.py then INDEPENDENTLY +# re-reads the object from the holder's local repo. The remote program travels as +# ONE argv to `netsim ssh`; the prompt rides along base64-encoded. set -eu -VM="node1"; TARGET="self" +VM="node1"; TARGET="localnode" while [ $# -gt 0 ]; do case "$1" in --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; - --target) [ $# -ge 2 ] || { echo "need self|peer after --target" >&2; exit 64; }; TARGET=$2; shift 2 ;; - *) echo "usage: object-store [--vm ] [--target self|peer]" >&2; exit 64 ;; + --target) [ $# -ge 2 ] || { echo "need an address after --target" >&2; exit 64; }; TARGET=$2; shift 2 ;; + *) echo "usage: object-store [--vm ] [--target ]" >&2; exit 64 ;; esac done -case "$TARGET" in - self) pf=prompt.md ;; - peer) pf=prompt-peer.md ;; - *) echo "bad --target '$TARGET' (expected self|peer)" >&2; exit 64 ;; -esac # CDPATH= is an intentional one-shot env prefix for cd, not an assignment # shellcheck disable=SC1007 here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) -[ -f "$here/$pf" ] || { echo "missing $here/$pf" >&2; exit 1; } -prompt_b64=$(base64 -w0 "$here/$pf") # GNU coreutils; -w0 = single line +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +# Substitute the target alias into the prompt (aliases are [a-z0-9.] — sed-safe). +prompt=$(sed "s|__TARGET__|$TARGET|g" "$here/prompt.md") +prompt_b64=$(printf '%s' "$prompt" | base64 -w0) # GNU coreutils; -w0 = single line REMOTE_BODY=$(cat <<'EOS' set -eu @@ -61,7 +60,7 @@ echo "object-store: agent finished on $(hostname); stored+read object $oid" EOS ) -echo "object-store ($TARGET): driving Qwen operator on $VM ..." +echo "object-store (target=$TARGET): driving Qwen operator on $VM ..." # shellcheck disable=SC2029 netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" -echo "object-store ($TARGET): done on $VM" +echo "object-store (target=$TARGET): done on $VM" diff --git a/netsim/tasks/object-store/verify.py b/netsim/tasks/object-store/verify.py index 7144f942..6609aa0a 100644 --- a/netsim/tasks/object-store/verify.py +++ b/netsim/tasks/object-store/verify.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 """verify object-store: the stored object is present in the holder's local repo. -The agent (on node1) stored an object either locally (--target self -> node1 holds) -or on the peer (--target peer -> node2 holds). Independent host-side check (does not -trust run.sh or the agent's read-back): a repo-pinned, ungated objects.load -repo -local on the HOLDER must return the exact stored bytes. Reaches the VMs via netsim -ssh. The object id + payload are read from node1's info.json (the agent records -there regardless of where it stored). +The agent (on node1) stored an object on a target node (--target). Independent +host-side check (does not trust run.sh or the agent's read-back): a repo-pinned, +ungated objects.load -repo local on the HOLDER must return the exact stored bytes. +The holder is resolved from --target: localnode/node1 -> node1 (the operator vm), +node2 -> node2. The object id + payload come from node1's info.json (the agent +records there regardless of where it stored). Reaches the VMs via netsim ssh. """ import argparse import json @@ -59,11 +59,11 @@ def errors(stream): def main(): ap = argparse.ArgumentParser() - ap.add_argument("--vm", default="node1") # the operator; records info.json here - ap.add_argument("--node2", default="node2") # the peer - ap.add_argument("--target", default="self") # self -> node1 holds; peer -> node2 holds + ap.add_argument("--vm", default="node1") # the operator; records info.json here + ap.add_argument("--node2", default="node2") # the peer + ap.add_argument("--target", default="localnode") # localnode/node1 -> node1; node2 -> node2 args, _ = ap.parse_known_args() - holder = args.node2 if args.target == "peer" else args.vm + holder = args.node2 if args.target == args.node2 else args.vm info1 = info(args.vm) ID = "".join(str(info1.get("object_id", "")).split()) @@ -84,14 +84,14 @@ def main(): notes.append(f"agent's own read-back != stored payload ({READBACK!r} != {PAY!r})") if not errs and local_ok: - print(f"object-store OK ({args.target}): {holder}'s local repo holds object " + print(f"object-store OK (target={args.target}): {holder}'s local repo holds object " f"{ID[:12]}.. with the exact bytes ({len(PAY)} B).") for n in notes: sys.stderr.write(f" note: {n}\n") return 0 - sys.stderr.write(f"object-store verify FAILED ({args.target}): {holder}'s local repo does NOT " - "hold the stored object.\n") + sys.stderr.write(f"object-store verify FAILED (target={args.target}): {holder}'s local repo " + "does NOT hold the stored object.\n") for e in errs: sys.stderr.write(f" - {e}\n") if got is None: From b17f21e378365c54219ccc663b6e4a205907cef9 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 16:19:42 +0200 Subject: [PATCH 31/57] netsim: minimize task prompts to natural, human-style requests Strip astral-agent/playbook/skill-location references and harness meta ('the skill won't mention this'); the operator already has the skill auto-loaded. Prompts now read like a person's request (still naming astral/astrald), keeping only the task plus a terse 'save results to ~/info.json' the automated check needs. --- netsim/tasks/adopt-node/prompt.md | 6 +++--- .../tasks/bootstrap-user-software-key/prompt.md | 10 ++++------ netsim/tasks/import-user-software-key/prompt.md | 17 ++++++----------- netsim/tasks/object-store/prompt.md | 17 ++++++----------- 4 files changed, 19 insertions(+), 31 deletions(-) diff --git a/netsim/tasks/adopt-node/prompt.md b/netsim/tasks/adopt-node/prompt.md index 337ff9e2..c7cceaaa 100644 --- a/netsim/tasks/adopt-node/prompt.md +++ b/netsim/tasks/adopt-node/prompt.md @@ -1,3 +1,3 @@ -On this machine an `astrald` node is running, and you control it as its User. -Another astrald node is on the local network, not yet in your swarm. Bring it -into your swarm, following your **astral-agent** skill's node-adoption playbook. +You're running an astral node here that you control as its user. There's another +astral node on the local network that isn't in your swarm yet — bring it into your +swarm. diff --git a/netsim/tasks/bootstrap-user-software-key/prompt.md b/netsim/tasks/bootstrap-user-software-key/prompt.md index 757b223d..ec728dba 100644 --- a/netsim/tasks/bootstrap-user-software-key/prompt.md +++ b/netsim/tasks/bootstrap-user-software-key/prompt.md @@ -1,7 +1,5 @@ -On this machine there is an `astrald` node running. It has its own node identity -but no User. Make it a User-controlled node under a fresh software User, following -your **astral-agent** skill's node-setup playbook. +There's an astral node running on this machine with no user set up yet. Make it a +user node under a new software user. -Then write the User's id and a User-bound apphost token to `$HOME/info.json` as a -JSON object with keys `user_id` and `user_token`. The skill won't mention this — -it's how the run is checked. +When you're done, save the user's id and an access token for it to `~/info.json` +(as `user_id` and `user_token`). diff --git a/netsim/tasks/import-user-software-key/prompt.md b/netsim/tasks/import-user-software-key/prompt.md index ab4e2b6b..193376ae 100644 --- a/netsim/tasks/import-user-software-key/prompt.md +++ b/netsim/tasks/import-user-software-key/prompt.md @@ -1,15 +1,10 @@ -On this machine there is an `astrald` node running. It has its own node identity -but no User. You already control a software User whose BIP-39 mnemonic seed phrase -is: +There's an astral node running on this machine with no user set up yet. I already +have a software user — its seed phrase is: horse soldier imitate stool square buyer verb party enjoy result jazz rabbit trigger file benefit cloth term change -Make this node a User-controlled node under THAT existing User: derive the User's -`secp256k1` key from the mnemonic above (start from the mnemonic — do NOT generate -new entropy), then build, sign, and install the node contract, following your -**astral-agent** skill's node-setup playbook (software User) but substituting the -given mnemonic for the entropy-generation step. +Set the node up as a user node under that existing user — use the seed phrase +above, don't create a new user. -Then write the User's id and a User-bound apphost token to `$HOME/info.json` as a -JSON object with keys `user_id` and `user_token`. The skill won't mention this — -it's how the run is checked. +When you're done, save the user's id and an access token for it to `~/info.json` +(as `user_id` and `user_token`). diff --git a/netsim/tasks/object-store/prompt.md b/netsim/tasks/object-store/prompt.md index b8575555..b1555e3a 100644 --- a/netsim/tasks/object-store/prompt.md +++ b/netsim/tasks/object-store/prompt.md @@ -1,12 +1,7 @@ -On this machine an `astrald` node is running and you control it as its User (a -User-bound apphost token is recorded in `$HOME/info.json` under `user_token`). -Acting as that User, store a short, distinctive text payload as an astral object -on `__TARGET__`, following your **astral-agent** skill, and note the Object ID it -returns. Then read that object back from `__TARGET__` by its Object ID and confirm -the bytes match what you stored. +You're running an astral node here that you control as its user. Store a short, +distinctive piece of text as an astral object on `__TARGET__`, and note the object +id you get back. Then read it back from `__TARGET__` and check the bytes match what +you stored. -Then add to `$HOME/info.json` (keep the existing `user_*` keys) the keys -`object_id` (the Object ID), `object_payload` (the exact payload you stored), and -`object_readback` (the bytes you read back). The skill won't mention this — it's -how the run is checked. Success means the object is stored on `__TARGET__`, read -back with matching bytes, and those keys are written. +When you're done, save the object id, the exact text you stored, and what you read +back to `~/info.json` (as `object_id`, `object_payload`, `object_readback`). From 8b78dab148ce3771fb9d76d90a3ff4eaa1623aa9 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 18:40:53 +0200 Subject: [PATCH 32/57] netsim: add expel-node task + story (two-nodes -> two-nodes-expel) The User on node1 permanently bans node2 from the swarm via user.expel, driven by the Qwen operator through its astral-agent skill. verify.py confirms the ban from both ends: node2 lands in user.list_expelled, drops out of user.swarm_status (OpSwarmStatus lists ActiveNodes, which filters the expelledSet), and the node1<->node2 link is torn down. README registers the new task/story and the two-nodes -> two-nodes-expel branch. --- netsim/README.md | 8 ++ netsim/stories/expel-node.story | 4 + netsim/tasks/expel-node/README.md | 13 +++ netsim/tasks/expel-node/prompt.md | 3 + netsim/tasks/expel-node/run.sh | 64 +++++++++++++ netsim/tasks/expel-node/verify.py | 150 ++++++++++++++++++++++++++++++ netsim/tasks/expel-node/verify.sh | 7 ++ 7 files changed, 249 insertions(+) create mode 100644 netsim/stories/expel-node.story create mode 100644 netsim/tasks/expel-node/README.md create mode 100644 netsim/tasks/expel-node/prompt.md create mode 100755 netsim/tasks/expel-node/run.sh create mode 100755 netsim/tasks/expel-node/verify.py create mode 100755 netsim/tasks/expel-node/verify.sh diff --git a/netsim/README.md b/netsim/README.md index 0d5d5cc0..7f22f0c9 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -21,6 +21,7 @@ netsim/ adopt-node/ # adopt node2 into swarm + register node aliases -> two-nodes object-store/ # node1 stores an object (--target localnode|node2) -> two-nodes-data[-peer] read-remote-object/ # node2 reads node1's object over astral -> two-nodes-data-read + expel-node/ # node1 (User) permanently bans node2 from the swarm -> two-nodes-expel stories/ # one story per tested flow (start/save stage in each header) lab.story # null -> astrald-lab bootstrap-user-software-key.story # astrald-lab -> one-node @@ -29,6 +30,7 @@ netsim/ object-store.story # two-nodes -> two-nodes-data (store on node1) object-store-peer.story # two-nodes -> two-nodes-data-peer (store on node2) read-remote-object.story # two-nodes-data -> two-nodes-data-read + expel-node.story # two-nodes -> two-nodes-expel link.sh # register tasks with netsim (idempotent; re-run anytime) README.md ``` @@ -116,8 +118,14 @@ netsim story --stage astrald-lab --save one-node netsim/stories/b netsim story --stage one-node --save two-nodes netsim/stories/adopt-node.story netsim story --stage two-nodes --save two-nodes-data netsim/stories/object-store.story netsim story --stage two-nodes-data --save two-nodes-data-read netsim/stories/read-remote-object.story +netsim story --stage two-nodes --save two-nodes-expel netsim/stories/expel-node.story ``` +`expel-node` is a separate branch off `two-nodes`: the User on node1 permanently +bans node2, so the swarm roster shrinks (node2 drops out of `user.swarm_status`, +lands in `user.list_expelled`, and the link is torn down). It produces its own +`two-nodes-expel` stage rather than feeding the data-object chain. + Each story drives the Qwen operator through its `astral-agent` skill, then runs an independent `verify.sh`/`verify.py` check — so a story is a pass/fail integration test for that flow. diff --git a/netsim/stories/expel-node.story b/netsim/stories/expel-node.story new file mode 100644 index 00000000..c8ab8c7f --- /dev/null +++ b/netsim/stories/expel-node.story @@ -0,0 +1,4 @@ +# expel-node.story — node1 (the User) permanently bans node2 from its swarm. +# start: two-nodes save: two-nodes-expel +# netsim story --stage two-nodes --save two-nodes-expel netsim/stories/expel-node.story +expel-node diff --git a/netsim/tasks/expel-node/README.md b/netsim/tasks/expel-node/README.md new file mode 100644 index 00000000..52c3f8a6 --- /dev/null +++ b/netsim/tasks/expel-node/README.md @@ -0,0 +1,13 @@ +# expel-node + +Drives the Qwen operator on node1 (the swarm's User) to **permanently expel node2** +from the swarm (`user.expel`), following the astral-agent skill's knowledge of the +user protocol. Expelling bans the node's identity: it is dropped from the swarm +roster and its links are torn down, though its underlying membership contract is +**not** revoked — the ban is enforced by a membership filter, not contract removal. + +`verify.py` independently confirms the post-ban state from both ends: node2 is +recorded in node1's `user.list_expelled`, node2 no longer appears in node1's +`user.swarm_status` (the roster shrinks — `OpSwarmStatus` lists `ActiveNodes`, +which filters the `expelledSet`), and the node1↔node2 link is gone on both ends. +Produces stage `two-nodes-expel` (from `two-nodes`). diff --git a/netsim/tasks/expel-node/prompt.md b/netsim/tasks/expel-node/prompt.md new file mode 100644 index 00000000..c6a146e8 --- /dev/null +++ b/netsim/tasks/expel-node/prompt.md @@ -0,0 +1,3 @@ +You're running an astral node here that you control as its user, and another node +is currently a member of your swarm. You no longer trust that node — permanently +remove it from your swarm so it is banned and cannot rejoin. diff --git a/netsim/tasks/expel-node/run.sh b/netsim/tasks/expel-node/run.sh new file mode 100755 index 00000000..679988c1 --- /dev/null +++ b/netsim/tasks/expel-node/run.sh @@ -0,0 +1,64 @@ +#!/bin/sh +# expel-node: the User (node1's Qwen operator) permanently bans the peer node from +# the swarm, driven by the Qwen Code agent running INSIDE node1. node1 is already a +# User node with node2 adopted into its swarm (default starting stage: two-nodes). +# expel-node [--vm ] (default: node1 — the VM carrying Qwen) +# +# Runs ON THE HOST (cwd = simulation root). Same mechanic as adopt-node: tiny script, +# thin prompt, intelligence in the agent's astral-agent skill. The whole remote +# program travels as ONE argv to `netsim ssh`; the prompt rides along base64-encoded +# so a multi-line file never fights shell quoting. +set -eu + +VM="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + *) echo "usage: expel-node [--vm ]" >&2; exit 64 ;; + esac +done + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +prompt_b64=$(base64 -w0 "$here/prompt.md") # GNU coreutils; -w0 = single line + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/expel-node.prompt" +chown -R tester:tester "$d" + +# Run the agent as `tester` (qwen is installed for that user), non-interactively. +# Invocation matches what was validated for adopt-node: one-shot positional prompt +# + `-y` (auto-approve). +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/expel-node.prompt)"' \ + > "$d/expel-node.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/expel-node.log" >&2 + exit 1 + } + +# Soft smoke-check only (verify.py is the authoritative, independent check). node1 +# holds the User token in $HOME/info.json, so we can peek at the swarm here; don't +# fail the run on a shape mismatch — leave the verdict to verify.py. +ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_token",""))' 2>/dev/null || true) +if [ -n "$ASTRALD_APPHOST_TOKEN" ]; then + export ASTRALD_APPHOST_TOKEN + if astral-query user.list_expelled -out json 2>/dev/null | grep -q '"Subject"'; then + echo "expel-node: $(hostname) records at least one expelled node" + else + echo "expel-node: WARNING $(hostname) shows no expelled node yet (verify.py decides)" >&2 + fi +fi +echo "expel-node: agent finished on $(hostname)" +EOS +) + +echo "expel-node: driving Qwen operator on $VM ..." +# assignment prefix carries the prompt to the guest; body re-parses it +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" +echo "expel-node: done on $VM" diff --git a/netsim/tasks/expel-node/verify.py b/netsim/tasks/expel-node/verify.py new file mode 100755 index 00000000..650d31f7 --- /dev/null +++ b/netsim/tasks/expel-node/verify.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +"""verify expel-node: node1 (the User) permanently banned node2 from the swarm. + +Independent both-ends check (does not trust run.sh); reaches the VMs via netsim ssh. +The core property — confirmed in code (user.OpSwarmStatus -> ActiveNodes filters the +expelledSet) — is that an expelled node yields FEWER swarm_status results: node2 is +gone from node1's roster, recorded in user.list_expelled, and the link is torn down. +""" +import argparse +import json +import subprocess +import sys + + +def ssh(vm, remote): + """Run `netsim ssh -- ` on the host; return stdout.""" + p = subprocess.run(["netsim", "ssh", vm, "--", remote], + capture_output=True, text=True) + return p.stdout + + +def info(vm): + """The agent's $HOME/info.json (/home/tester/info.json) on the VM, as a dict.""" + try: + return json.loads(ssh(vm, "cat /home/tester/info.json") or "{}") or {} + except json.JSONDecodeError: + return {} + + +def objs(stream): + """astral-query -out json emits one object per line + an eos terminator.""" + out = [] + for ln in (stream or "").splitlines(): + ln = ln.strip() + if not ln: + continue + try: + out.append(json.loads(ln)) + except json.JSONDecodeError: + pass + return out + + +def contract(stream): + """(Issuer, Subject) of the active contract from a user.info stream.""" + for o in objs(stream): + ob = o.get("Object") + if isinstance(ob, dict) and isinstance(ob.get("Contract"), dict): + c = ob["Contract"].get("Contract", {}) + return c.get("Issuer"), c.get("Subject") + return None, None + + +def swarm_identities(stream): + """Set of node identities listed in a user.swarm_status stream.""" + ids = set() + for o in objs(stream): + ob = o.get("Object") + if isinstance(ob, dict) and ob.get("Identity"): + ids.add(ob["Identity"]) + return ids + + +def contains_identity(value, ident): + """True if `ident` appears anywhere in a parsed JSON value (string match).""" + if isinstance(value, str): + return value == ident + if isinstance(value, dict): + return any(contains_identity(v, ident) for v in value.values()) + if isinstance(value, list): + return any(contains_identity(v, ident) for v in value) + return False + + +def is_expelled(stream, ident): + """True if a user.list_expelled stream bans `ident` (as a SignedExpulsion Subject).""" + for o in objs(stream): + if contains_identity(o.get("Object", o), ident): + return True + return False + + +def has_link_to(links, identity): + """True if a nodes.links stream contains an active link to `identity`.""" + for o in objs(links): + ob = o.get("Object") + if isinstance(ob, dict) and ob.get("RemoteIdentity") == identity: + return True + return False + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--node1", default="node1") + ap.add_argument("--node2", default="node2") + args, _ = ap.parse_known_args() + vm1, vm2 = args.node1, args.node2 + + # node1 acts as the User (token from bootstrap); the expel op requires the caller + # to be the contract issuer, so list_expelled / swarm_status run under that token. + info1 = info(vm1) + U = "".join(str(info1.get("user_id", "")).split()) + TOKEN = f"export ASTRALD_APPHOST_TOKEN={info1.get('user_token', '')};" + + n1_info = ssh(vm1, TOKEN + " astral-query user.info -out json") + n1_swarm = ssh(vm1, TOKEN + " astral-query user.swarm_status -out json") + n1_expelled = ssh(vm1, TOKEN + " astral-query user.list_expelled -out json") + n1_links = ssh(vm1, "astral-query nodes.links -out json") + + # node2 still holds its membership contract (expel bans, it does not revoke the + # contract), so its identity is still readable from its own user.info. + n2_info = ssh(vm2, "astral-query user.info -out json") + n2_links = ssh(vm2, "astral-query nodes.links -out json") + + _, s1 = contract(n1_info) # node1's identity (the swarm User's own node) + _, s2 = contract(n2_info) # node2's identity (the expelled subject) + members = swarm_identities(n1_swarm) + + errs = [] + if not U: + errs.append("no user_id in node1's info.json") + if not s2: + errs.append("could not resolve node2's identity from its user.info") + if not is_expelled(n1_expelled, s2): + errs.append(f"node2 {s2} is NOT in node1's user.list_expelled " + "(expulsion was never issued — agent did not expel the node)") + if s2 and s2 in members: + errs.append(f"node2 {s2} still appears in node1's user.swarm_status " + "(roster not reduced — expelledSet filter did not drop it)") + if s2 and has_link_to(n1_links, s2): + errs.append(f"node1 still holds an active link to expelled node2 {s2} " + "(applyExpulsion did not close the link)") + if s1 and has_link_to(n2_links, s1): + errs.append(f"node2 still holds an active link back to node1 {s1} " + "(link not torn down on the peer end)") + + if errs: + sys.stderr.write("expel-node verify FAILED:\n") + for e in errs: + sys.stderr.write(f" - {e}\n") + return 1 + + print(f"expel OK: User {U[:8]}.. banned node2 {s2[:8]}.. — recorded in " + f"user.list_expelled, dropped from user.swarm_status ({len(members)} " + f"member(s) remain), and the link is torn down on both ends") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/expel-node/verify.sh b/netsim/tasks/expel-node/verify.sh new file mode 100755 index 00000000..fac4b4ff --- /dev/null +++ b/netsim/tasks/expel-node/verify.sh @@ -0,0 +1,7 @@ +#!/bin/sh +# Thin shim — all verification logic lives in verify.py. Calling astral-query and +# walking its JSON streams is far cleaner in python than bash, so verify.sh just +# hands off. netsim sets $NETSIM_TASK_DIR to this task's directory and only +# auto-runs run.sh/verify.sh, so verify.py sits next to us and is invoked here +# (the dirname fallback covers running this script directly). +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" From d3d70ea147e33970db6b0799c5b2aa53ed50b199 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 19:02:51 +0200 Subject: [PATCH 33/57] netsim: redesign read-remote-object as an agent-driven peer read The old read-remote-object was host-driven and read node2->node1 anonymously, which can't route (network zone stripped) -- it tested the wrong, unroutable direction. Now it's agent-driven on node1: the agent reads the object (id from ~/info.json, written by object-store --target node2) FROM the peer as the User -- the authenticated, routable direction -- and records what it read. verify.py independently re-reads :objects.load as the User and asserts the bytes. New read-remote-peer.story chains object-store --target node2 (store on the peer) then read-remote-object (read it back from node1). Drops the old read-remote-object.story; README pipeline updated. --- netsim/README.md | 8 +- netsim/stories/read-remote-object.story | 4 - netsim/stories/read-remote-peer.story | 6 + netsim/tasks/read-remote-object/README.md | 20 +-- netsim/tasks/read-remote-object/prompt.md | 5 + netsim/tasks/read-remote-object/run.sh | 59 ++++++++- netsim/tasks/read-remote-object/verify.py | 144 +++++++--------------- 7 files changed, 120 insertions(+), 126 deletions(-) delete mode 100644 netsim/stories/read-remote-object.story create mode 100644 netsim/stories/read-remote-peer.story create mode 100644 netsim/tasks/read-remote-object/prompt.md diff --git a/netsim/README.md b/netsim/README.md index 7f22f0c9..3926657d 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -20,7 +20,7 @@ netsim/ import-user-software-key/ # make node1 a User node, existing mnemonic -> one-node adopt-node/ # adopt node2 into swarm + register node aliases -> two-nodes object-store/ # node1 stores an object (--target localnode|node2) -> two-nodes-data[-peer] - read-remote-object/ # node2 reads node1's object over astral -> two-nodes-data-read + read-remote-object/ # node1's agent reads node2's object over astral (used by read-remote-peer) expel-node/ # node1 (User) permanently bans node2 from the swarm -> two-nodes-expel stories/ # one story per tested flow (start/save stage in each header) lab.story # null -> astrald-lab @@ -29,7 +29,7 @@ netsim/ adopt-node.story # one-node -> two-nodes object-store.story # two-nodes -> two-nodes-data (store on node1) object-store-peer.story # two-nodes -> two-nodes-data-peer (store on node2) - read-remote-object.story # two-nodes-data -> two-nodes-data-read + read-remote-peer.story # two-nodes -> two-nodes-peer-read (store on node2, then read it) expel-node.story # two-nodes -> two-nodes-expel link.sh # register tasks with netsim (idempotent; re-run anytime) README.md @@ -110,14 +110,14 @@ stage (its `start`/`save` stages are in the story header). Intermediate stages stay reusable, so you can replay one flow without rebuilding the chain: ``` -astrald-lab ─[bootstrap-user-software-key]→ one-node ─[adopt-node]→ two-nodes ─[object-store]→ two-nodes-data ─[read-remote-object]→ two-nodes-data-read +astrald-lab ─[bootstrap-user-software-key]→ one-node ─[adopt-node]→ two-nodes ─[object-store]→ two-nodes-data ``` ```sh netsim story --stage astrald-lab --save one-node netsim/stories/bootstrap-user-software-key.story netsim story --stage one-node --save two-nodes netsim/stories/adopt-node.story netsim story --stage two-nodes --save two-nodes-data netsim/stories/object-store.story -netsim story --stage two-nodes-data --save two-nodes-data-read netsim/stories/read-remote-object.story +netsim story --stage two-nodes --save two-nodes-peer-read netsim/stories/read-remote-peer.story netsim story --stage two-nodes --save two-nodes-expel netsim/stories/expel-node.story ``` diff --git a/netsim/stories/read-remote-object.story b/netsim/stories/read-remote-object.story deleted file mode 100644 index 157d751a..00000000 --- a/netsim/stories/read-remote-object.story +++ /dev/null @@ -1,4 +0,0 @@ -# read-remote-object.story — node2 reads node1's object over astral. -# start: two-nodes-data save: two-nodes-data-read -# netsim story --stage two-nodes-data --save two-nodes-data-read netsim/stories/read-remote-object.story -read-remote-object diff --git a/netsim/stories/read-remote-peer.story b/netsim/stories/read-remote-peer.story new file mode 100644 index 00000000..44c6639d --- /dev/null +++ b/netsim/stories/read-remote-peer.story @@ -0,0 +1,6 @@ +# read-remote-peer.story — store an object on the peer (node2), then node1's agent +# reads it back from the peer over astral. +# start: two-nodes save: two-nodes-peer-read +# netsim story --stage two-nodes --save two-nodes-peer-read netsim/stories/read-remote-peer.story +object-store --target node2 +read-remote-object diff --git a/netsim/tasks/read-remote-object/README.md b/netsim/tasks/read-remote-object/README.md index dd05f9a0..3bc2a91c 100644 --- a/netsim/tasks/read-remote-object/README.md +++ b/netsim/tasks/read-remote-object/README.md @@ -1,13 +1,13 @@ # read-remote-object -Confirms a peer (node2) can read **node1's** object **over astral** — the object -that `object-store` stored in node1's local repo. Host-driven: node2 has no Qwen -operator, so `verify.py` issues the read (resolve node1's identity + the stored -`object_id`, then node2 runs `:objects.load` and asserts the exact bytes, -with transparent/`objects.find` as fallback diagnostics). No agent comprehension -axis — this is a pure implementation-axis probe. Produces stage `two-nodes-data-read` -(from `two-nodes-data`). +Has **node1's agent read an astral object that lives on the peer** (node2), over +astral. The object id is in node1's `~/info.json` (`object_id`, written by +`object-store --target node2`); the agent reads it from the peer **as the User** +(addressing the peer by its alias from `adopt-node`) and records what it read. +`verify.py` independently re-reads the peer's object as the User and asserts the +bytes match. -Note: the peer-reads-node1 direction **failed before astrald #348** (the roster -sync); this task re-probes it on current master. It may now pass (node2 knows -node1, and `op_load` is ungated) or surface the gap — either is a valid finding. +Used by `read-remote-peer.story` (which first stores the object on node2, then runs +this read). Note: an *anonymous* read of a peer's object does **not** route (the +network zone is stripped); the read must come from an authenticated identity, which +is why it's driven by the User on node1. diff --git a/netsim/tasks/read-remote-object/prompt.md b/netsim/tasks/read-remote-object/prompt.md new file mode 100644 index 00000000..5ea98f23 --- /dev/null +++ b/netsim/tasks/read-remote-object/prompt.md @@ -0,0 +1,5 @@ +You're running an astral node here that you control as its user. There's an astral +object stored on `__PEER__` — its id is in `~/info.json` (the `object_id` value). +Read that object from `__PEER__` and check what it contains. + +When you're done, save what you read to `~/info.json` (as `object_remote`). diff --git a/netsim/tasks/read-remote-object/run.sh b/netsim/tasks/read-remote-object/run.sh index 1cde3c42..befde1fd 100755 --- a/netsim/tasks/read-remote-object/run.sh +++ b/netsim/tasks/read-remote-object/run.sh @@ -1,8 +1,55 @@ #!/bin/sh -# read-remote-object has no run-phase setup: node2 has no Qwen operator, so the -# remote read of node1's object IS the thing under test. verify.py performs it -# (resolve node1's identity + the object_id stored by object-store, then have node2 -# read :objects.load and assert the bytes). run.sh is a no-op placeholder so -# netsim discovers the task and hands off to verify.sh. +# read-remote-object: have node1's agent read an astral object that lives on the +# peer (node2), over astral. The object's id is in node1's ~/info.json (object_id, +# written by object-store --target node2). Driven by the Qwen Code agent on node1 — +# the read is issued AS THE USER (authenticated), which routes to the peer (an +# anonymous read would not). The agent addresses the peer by its alias (registered +# by adopt-node). +# read-remote-object [--vm ] [--peer ] (default: node1, node2) +# +# Runs ON THE HOST. Tiny script, thin prompt, intelligence in the astral-agent skill. +# verify.py then INDEPENDENTLY re-reads the peer's object as the User and asserts. set -eu -echo "read-remote-object: no run-phase setup; verify.py performs the cross-read." + +VM="node1"; PEER="node2" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + --peer) [ $# -ge 2 ] || { echo "need alias after --peer" >&2; exit 64; }; PEER=$2; shift 2 ;; + *) echo "usage: read-remote-object [--vm ] [--peer ]" >&2; exit 64 ;; + esac +done + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +prompt=$(sed "s|__PEER__|$PEER|g" "$here/prompt.md") # alias is [a-z0-9] — sed-safe +prompt_b64=$(printf '%s' "$prompt" | base64 -w0) + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/read-remote-object.prompt" +chown -R tester:tester "$d" + +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/read-remote-object.prompt)"' \ + > "$d/read-remote-object.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/read-remote-object.log" >&2 + exit 1 + } + +# Cheap smoke-check; verify.py does the authoritative, independent check. The agent +# records what it read in $HOME/info.json under object_remote. +rem=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_remote",""))' 2>/dev/null || true) +[ -n "$rem" ] || { echo "agent recorded no object_remote in /home/tester/info.json on $(hostname)" >&2; exit 1; } +echo "read-remote-object: agent finished on $(hostname); read back from peer" +EOS +) + +echo "read-remote-object: driving Qwen operator on $VM to read from $PEER ..." +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" +echo "read-remote-object: done on $VM" diff --git a/netsim/tasks/read-remote-object/verify.py b/netsim/tasks/read-remote-object/verify.py index 129bdccd..564ae010 100644 --- a/netsim/tasks/read-remote-object/verify.py +++ b/netsim/tasks/read-remote-object/verify.py @@ -1,17 +1,14 @@ #!/usr/bin/env python3 -"""verify read-remote-object: node2 reads node1's object OVER ASTRAL. - -node1 stored an object in its local repo (object-store). This confirms a peer -(node2) can obtain those exact bytes across the swarm. Host-driven, since node2 has -no operator. PRE-#348 this direction (peer reads node1) failed (route_not_found / -0 providers); this task re-probes it on current master. op_load is ungated, so a -successful route returns the bytes. - -Ladder on node2 (strongest -> weakest), PASS iff node2 gets the exact stored bytes -via hop 1 or 2: - 1. explicit target :objects.load -id (query-target routing) - 2. transparent objects.load -id (zone-based) - 3. provider find objects.find -id (discovery; identities, not bytes) +"""verify read-remote-object: node1 read the peer's object over astral. + +object-store --target node2 put the object on the peer (node2) and recorded +object_id + object_payload in node1's info.json; read-remote-object's agent (on +node1, as the User) read it back from the peer and recorded object_remote. + +Independent host-side check: re-read the peer's object AS THE USER (node1 holds the +token) via :objects.load and assert the bytes equal the stored payload — this +is the authenticated, routable direction. Also cross-checks the agent's recorded +read. Reaches the VMs via netsim ssh. """ import argparse import json @@ -48,6 +45,7 @@ def objs(stream): def loaded_payload(stream): + """From an objects.load stream, the decoded payload string, or None.""" for o in objs(stream): if o.get("Type") in ("eos", "error_message"): continue @@ -61,116 +59,58 @@ def errors(stream): return [o.get("Object") for o in objs(stream) if o.get("Type") == "error_message"] -def contains_local(stream): - for o in objs(stream): - if o.get("Type") in ("eos", "error_message"): - continue - if isinstance(o.get("Object"), bool): - return o["Object"] - return None - - -def find_identities(stream): - ids = [] - for o in objs(stream): - if o.get("Type") in ("eos", "error_message"): - continue - ob = o.get("Object") - if isinstance(ob, str): - ids.append(ob) - return ids - - -def contract_subject(stream): - """node1's node identity = Subject of its active contract (from user.info).""" - for o in objs(stream): - ob = o.get("Object") - if isinstance(ob, dict) and isinstance(ob.get("Contract"), dict): - c = ob["Contract"].get("Contract", {}) - if c.get("Subject"): - return c["Subject"] - return None - - -def remote_identity(stream): - """Fallback: RemoteIdentity from node2's nodes.links (the link to node1).""" - for o in objs(stream): - ob = o.get("Object") - if isinstance(ob, dict) and ob.get("RemoteIdentity"): - return ob["RemoteIdentity"] - return None - - def main(): ap = argparse.ArgumentParser() - ap.add_argument("--node1", default="node1") - ap.add_argument("--node2", default="node2") + ap.add_argument("--vm", default="node1") # operator; reads as the User + ap.add_argument("--peer", default="node2") # the node holding the object (alias) args, _ = ap.parse_known_args() - vm1, vm2 = args.node1, args.node2 - # The object node1 stored (object-store) and node1's node identity. node1 acts as - # the User (token from info.json) so user.info returns the contract whose Subject - # is node1's node identity; node2's link-back is the fallback. - info1 = info(vm1) + info1 = info(args.vm) ID = "".join(str(info1.get("object_id", "")).split()) PAY = str(info1.get("object_payload", "")).rstrip("\n") + REMOTE = str(info1.get("object_remote", "")) token = info1.get("user_token", "") - n1_info = ssh(vm1, f"export ASTRALD_APPHOST_TOKEN={token}; astral-query user.info -out json") - n2_links = ssh(vm2, "astral-query nodes.links -out json") - N1 = contract_subject(n1_info) or remote_identity(n2_links) or "" - - # node2 answers under its node identity (anonymous host-side caller, no token). - n2_contains = ssh(vm2, f"astral-query objects.contains -repo local -id '{ID}' -out json") - n2_explicit = ssh(vm2, f"astral-query '{N1}':objects.load -id '{ID}' -out json") if N1 else "" - n2_transparent = ssh(vm2, f"astral-query objects.load -id '{ID}' -out json") - n2_find = ssh(vm2, f"astral-query objects.find -id '{ID}' -out json") - - already_local = contains_local(n2_contains) - explicit = loaded_payload(n2_explicit) - transparent = loaded_payload(n2_transparent) - providers = find_identities(n2_find) - explicit_ok = explicit is not None and explicit.rstrip("\n") == PAY - transparent_ok = transparent is not None and transparent.rstrip("\n") == PAY + # Independent: node1, as the User, reads the peer's object over astral. This is + # authenticated (token), so the query keeps the network zone and routes to the peer. + tok = f"export ASTRALD_APPHOST_TOKEN={token};" if token else "" + out = ssh(args.vm, f"{tok} astral-query {args.peer}:objects.load -id '{ID}' -out json") + got = loaded_payload(out) + read_ok = got is not None and got.rstrip("\n") == PAY errs, notes = [], [] if not ID: - errs.append("no object_id in node1's info.json (run object-store first)") + errs.append("no object_id in node1's info.json (object-store --target node2 must run first)") if not PAY: errs.append("no object_payload in node1's info.json") - if not N1: - notes.append("could not resolve node1's node identity host-side (explicit-target read skipped)") - if already_local is True: - notes.append("objects.contains reports node2 may ALREADY hold this object locally; " - "the byte-match might not be a genuine remote pull") - - if not errs and (explicit_ok or transparent_ok): - path = ("explicit-target (:objects.load)" if explicit_ok - else "transparent (objects.load)") - print(f"read-remote-object OK: node2 read node1's object {ID[:12]}.. across the swarm " - f"via {path}; bytes match ({len(PAY)} B). providers via objects.find: {len(providers)}.") + if not token: + errs.append("no user_token in node1's info.json (can't read the peer as the User)") + if not REMOTE: + notes.append("agent recorded no object_remote (the agent's own read)") + elif PAY and PAY not in REMOTE: + notes.append(f"agent's recorded read does not contain the payload ({REMOTE!r})") + + if not errs and read_ok: + print(f"read-remote-object OK: node1 (as User) read object {ID[:12]}.. from " + f"{args.peer} over astral; bytes match ({len(PAY)} B).") for n in notes: sys.stderr.write(f" note: {n}\n") return 0 - sys.stderr.write("read-remote-object verify FAILED: node2 did NOT obtain node1's object across the swarm.\n") + sys.stderr.write(f"read-remote-object verify FAILED: node1 could not read the object from " + f"{args.peer} over astral.\n") for e in errs: sys.stderr.write(f" - {e}\n") - if (N1 in providers) if N1 else bool(providers): - sys.stderr.write(" FINDING: objects.find DID return a provider (discovery crosses) but the byte " - "READ did not route -- record which hop in the task log.\n") - for label, stream in (("explicit-target", n2_explicit), - ("transparent", n2_transparent), - ("objects.find", n2_find)): - for e in errors(stream): - sys.stderr.write(f" {label} error_message: {e}\n") + if got is None: + sys.stderr.write(f" {args.peer}:objects.load (as User) returned no payload " + "(route_not_found means the read didn't route — check auth/zone).\n") + elif not read_ok: + sys.stderr.write(f" bytes mismatch: got {got!r} != stored {PAY!r}.\n") + for e in errors(out): + sys.stderr.write(f" load error_message: {e}\n") for n in notes: sys.stderr.write(f" note: {n}\n") - n1disp = (N1[:12] + "..") if N1 else "?" - sys.stderr.write(f" (id={ID} node1={n1disp} " - f"explicit={'hit' if explicit is not None else 'miss'} " - f"transparent={'hit' if transparent is not None else 'miss'} " - f"find_providers={len(providers)})\n") + sys.stderr.write(f" (id={ID} peer={args.peer} read={'hit' if got is not None else 'miss'})\n") return 1 From 1663417e9e1bff3ac53b03338037fda0a4e1f866 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 19:32:57 +0200 Subject: [PATCH 34/57] netsim: prompts that append to info.json must keep existing entries Minimizing the prompts dropped the 'keep existing keys' hint, so object-store's agent overwrote ~/info.json with object_* and wiped the user_token bootstrap wrote -- breaking read-remote-object's verify (which reads the peer as the User). Restore a natural 'leaving the existing entries in place' instruction in object-store and read-remote-object. --- netsim/tasks/object-store/prompt.md | 5 +++-- netsim/tasks/read-remote-object/prompt.md | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/netsim/tasks/object-store/prompt.md b/netsim/tasks/object-store/prompt.md index b1555e3a..5fa83de9 100644 --- a/netsim/tasks/object-store/prompt.md +++ b/netsim/tasks/object-store/prompt.md @@ -3,5 +3,6 @@ distinctive piece of text as an astral object on `__TARGET__`, and note the obje id you get back. Then read it back from `__TARGET__` and check the bytes match what you stored. -When you're done, save the object id, the exact text you stored, and what you read -back to `~/info.json` (as `object_id`, `object_payload`, `object_readback`). +When you're done, add the object id, the exact text you stored, and what you read +back to `~/info.json` (as `object_id`, `object_payload`, `object_readback`), leaving +any existing entries in the file in place. diff --git a/netsim/tasks/read-remote-object/prompt.md b/netsim/tasks/read-remote-object/prompt.md index 5ea98f23..66aa97be 100644 --- a/netsim/tasks/read-remote-object/prompt.md +++ b/netsim/tasks/read-remote-object/prompt.md @@ -2,4 +2,5 @@ You're running an astral node here that you control as its user. There's an astr object stored on `__PEER__` — its id is in `~/info.json` (the `object_id` value). Read that object from `__PEER__` and check what it contains. -When you're done, save what you read to `~/info.json` (as `object_remote`). +When you're done, add what you read to `~/info.json` (as `object_remote`), leaving +the existing entries in the file in place. From 6a9de8584dc217aae6e1f8502265768ee039ae23 Mon Sep 17 00:00:00 2001 From: intern0 Date: Mon, 22 Jun 2026 19:47:37 +0200 Subject: [PATCH 35/57] netsim: per-task result files instead of one shared info.json Each task writes its own file (no shared accumulator, no merge, no clobbering): - bootstrap/import -> ~/user.json (user_id, user_token) - object-store -> ~/object.json (object_id, object_payload, object_readback) - read-remote-object -> ~/read.json (object_remote) Readers reference the specific file(s) they need: adopt-node + expel-node read user.json; object-store verify reads object.json; read-remote-object verify reads user.json + object.json + read.json. Prompts drop the 'keep existing entries' hint (own file, overwrite is fine). Updates expel-node's reads to user.json too. --- netsim/tasks/adopt-node/run.sh | 4 +-- netsim/tasks/adopt-node/verify.py | 6 ++-- .../bootstrap-user-software-key/prompt.md | 2 +- .../tasks/bootstrap-user-software-key/run.sh | 6 ++-- .../bootstrap-user-software-key/verify.sh | 6 ++-- netsim/tasks/expel-node/run.sh | 4 +-- netsim/tasks/expel-node/verify.py | 6 ++-- .../tasks/import-user-software-key/prompt.md | 2 +- netsim/tasks/import-user-software-key/run.sh | 6 ++-- .../tasks/import-user-software-key/verify.sh | 8 +++--- netsim/tasks/object-store/prompt.md | 5 ++-- netsim/tasks/object-store/run.sh | 10 +++---- netsim/tasks/object-store/verify.py | 12 ++++---- netsim/tasks/read-remote-object/README.md | 2 +- netsim/tasks/read-remote-object/prompt.md | 5 ++-- netsim/tasks/read-remote-object/run.sh | 8 +++--- netsim/tasks/read-remote-object/verify.py | 28 ++++++++++--------- 17 files changed, 60 insertions(+), 60 deletions(-) diff --git a/netsim/tasks/adopt-node/run.sh b/netsim/tasks/adopt-node/run.sh index 696218c9..23bdf5c5 100755 --- a/netsim/tasks/adopt-node/run.sh +++ b/netsim/tasks/adopt-node/run.sh @@ -42,9 +42,9 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/adopt-node.prompt)"' \ } # Soft smoke-check only (verify.sh is the authoritative, independent check). node1 -# holds the User token in $HOME/info.json, so we can peek at the swarm here; don't +# holds the User token in $HOME/user.json, so we can peek at the swarm here; don't # fail the run on a shape mismatch — leave the verdict to verify.sh. -ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_token",""))' 2>/dev/null || true) +ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_token",""))' 2>/dev/null || true) if [ -n "$ASTRALD_APPHOST_TOKEN" ]; then export ASTRALD_APPHOST_TOKEN if astral-query user.swarm_status -out json 2>/dev/null | grep -q '"Linked":true'; then diff --git a/netsim/tasks/adopt-node/verify.py b/netsim/tasks/adopt-node/verify.py index 9727f248..b9b8bd68 100755 --- a/netsim/tasks/adopt-node/verify.py +++ b/netsim/tasks/adopt-node/verify.py @@ -16,9 +16,9 @@ def ssh(vm, remote): def info(vm): - """The agent's $HOME/info.json (/home/tester/info.json) on the VM, as a dict.""" + """The agent's $HOME/user.json (/home/tester/user.json) on the VM, as a dict.""" try: - return json.loads(ssh(vm, "cat /home/tester/info.json") or "{}") or {} + return json.loads(ssh(vm, "cat /home/tester/user.json") or "{}") or {} except json.JSONDecodeError: return {} @@ -92,7 +92,7 @@ def main(): errs = [] if not U: - errs.append("no user_id in node1's info.json") + errs.append("no user_id in node1's user.json") if i1 != U: errs.append(f"node1 contract issuer {i1} != User {U}") if i2 != U: diff --git a/netsim/tasks/bootstrap-user-software-key/prompt.md b/netsim/tasks/bootstrap-user-software-key/prompt.md index ec728dba..bc4ace3e 100644 --- a/netsim/tasks/bootstrap-user-software-key/prompt.md +++ b/netsim/tasks/bootstrap-user-software-key/prompt.md @@ -1,5 +1,5 @@ There's an astral node running on this machine with no user set up yet. Make it a user node under a new software user. -When you're done, save the user's id and an access token for it to `~/info.json` +When you're done, save the user's id and an access token for it to `~/user.json` (as `user_id` and `user_token`). diff --git a/netsim/tasks/bootstrap-user-software-key/run.sh b/netsim/tasks/bootstrap-user-software-key/run.sh index 7a58a673..218180f3 100755 --- a/netsim/tasks/bootstrap-user-software-key/run.sh +++ b/netsim/tasks/bootstrap-user-software-key/run.sh @@ -46,9 +46,9 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/bootstrap-user-software-key. } # Cheap smoke-check; verify.sh does the authoritative, independent check. The agent -# records its outputs in $HOME/info.json (/home/tester/info.json). -uid=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_id",""))' 2>/dev/null || true) -[ -n "$uid" ] || { echo "agent recorded no user_id in /home/tester/info.json on $(hostname)" >&2; exit 1; } +# records its outputs in $HOME/user.json (/home/tester/user.json). +uid=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_id",""))' 2>/dev/null || true) +[ -n "$uid" ] || { echo "agent recorded no user_id in /home/tester/user.json on $(hostname)" >&2; exit 1; } echo "bootstrap-user-software-key: agent finished on $(hostname); User id $uid" EOS ) diff --git a/netsim/tasks/bootstrap-user-software-key/verify.sh b/netsim/tasks/bootstrap-user-software-key/verify.sh index 336d280f..e157fbfc 100755 --- a/netsim/tasks/bootstrap-user-software-key/verify.sh +++ b/netsim/tasks/bootstrap-user-software-key/verify.sh @@ -16,10 +16,10 @@ done REMOTE_CHECK=$(cat <<'EOS' set -eu -info=/home/tester/info.json +info=/home/tester/user.json [ -s "$info" ] || { echo "no $info on $(hostname)" >&2; exit 1; } -uid=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_id",""))') -ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_token",""))') +uid=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_id",""))') +ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_token",""))') export ASTRALD_APPHOST_TOKEN [ -n "$uid" ] || { echo "no user_id in $info on $(hostname)" >&2; exit 1; } [ -n "$ASTRALD_APPHOST_TOKEN" ] || { echo "no user_token in $info on $(hostname)" >&2; exit 1; } diff --git a/netsim/tasks/expel-node/run.sh b/netsim/tasks/expel-node/run.sh index 679988c1..e3b18def 100755 --- a/netsim/tasks/expel-node/run.sh +++ b/netsim/tasks/expel-node/run.sh @@ -42,9 +42,9 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/expel-node.prompt)"' \ } # Soft smoke-check only (verify.py is the authoritative, independent check). node1 -# holds the User token in $HOME/info.json, so we can peek at the swarm here; don't +# holds the User token in $HOME/user.json, so we can peek at the swarm here; don't # fail the run on a shape mismatch — leave the verdict to verify.py. -ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_token",""))' 2>/dev/null || true) +ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_token",""))' 2>/dev/null || true) if [ -n "$ASTRALD_APPHOST_TOKEN" ]; then export ASTRALD_APPHOST_TOKEN if astral-query user.list_expelled -out json 2>/dev/null | grep -q '"Subject"'; then diff --git a/netsim/tasks/expel-node/verify.py b/netsim/tasks/expel-node/verify.py index 650d31f7..d45e6a93 100755 --- a/netsim/tasks/expel-node/verify.py +++ b/netsim/tasks/expel-node/verify.py @@ -20,9 +20,9 @@ def ssh(vm, remote): def info(vm): - """The agent's $HOME/info.json (/home/tester/info.json) on the VM, as a dict.""" + """The agent's $HOME/user.json (/home/tester/user.json) on the VM, as a dict.""" try: - return json.loads(ssh(vm, "cat /home/tester/info.json") or "{}") or {} + return json.loads(ssh(vm, "cat /home/tester/user.json") or "{}") or {} except json.JSONDecodeError: return {} @@ -118,7 +118,7 @@ def main(): errs = [] if not U: - errs.append("no user_id in node1's info.json") + errs.append("no user_id in node1's user.json") if not s2: errs.append("could not resolve node2's identity from its user.info") if not is_expelled(n1_expelled, s2): diff --git a/netsim/tasks/import-user-software-key/prompt.md b/netsim/tasks/import-user-software-key/prompt.md index 193376ae..a4d35c66 100644 --- a/netsim/tasks/import-user-software-key/prompt.md +++ b/netsim/tasks/import-user-software-key/prompt.md @@ -6,5 +6,5 @@ have a software user — its seed phrase is: Set the node up as a user node under that existing user — use the seed phrase above, don't create a new user. -When you're done, save the user's id and an access token for it to `~/info.json` +When you're done, save the user's id and an access token for it to `~/user.json` (as `user_id` and `user_token`). diff --git a/netsim/tasks/import-user-software-key/run.sh b/netsim/tasks/import-user-software-key/run.sh index 0180b23f..488af62e 100755 --- a/netsim/tasks/import-user-software-key/run.sh +++ b/netsim/tasks/import-user-software-key/run.sh @@ -42,9 +42,9 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/import-user-software-key.pro } # Cheap smoke-check; verify.sh does the authoritative, independent check. The agent -# records its outputs in $HOME/info.json (/home/tester/info.json). -uid=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_id",""))' 2>/dev/null || true) -[ -n "$uid" ] || { echo "agent recorded no user_id in /home/tester/info.json on $(hostname)" >&2; exit 1; } +# records its outputs in $HOME/user.json (/home/tester/user.json). +uid=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_id",""))' 2>/dev/null || true) +[ -n "$uid" ] || { echo "agent recorded no user_id in /home/tester/user.json on $(hostname)" >&2; exit 1; } echo "import-user-software-key: agent finished on $(hostname); User id $uid" EOS ) diff --git a/netsim/tasks/import-user-software-key/verify.sh b/netsim/tasks/import-user-software-key/verify.sh index 6f1b82f2..9d991e94 100755 --- a/netsim/tasks/import-user-software-key/verify.sh +++ b/netsim/tasks/import-user-software-key/verify.sh @@ -1,6 +1,6 @@ #!/bin/sh # verify import-user-software-key: the node must be a User node under the imported software User. -# INDEPENDENT re-check -- reads $HOME/info.json, acts AS the User, and asserts the +# INDEPENDENT re-check -- reads $HOME/user.json, acts AS the User, and asserts the # node answers as a user node. If ASTRAL_USER_ID is set, the derived User id must # equal it (proof the EXISTING key was used, not a fresh one). set -eu @@ -16,10 +16,10 @@ EXPECT=${ASTRAL_USER_ID:-} REMOTE_CHECK=$(cat <<'EOS' set -eu -info=/home/tester/info.json +info=/home/tester/user.json [ -s "$info" ] || { echo "no $info on $(hostname)" >&2; exit 1; } -uid=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_id",""))') -ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("user_token",""))') +uid=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_id",""))') +ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_token",""))') export ASTRALD_APPHOST_TOKEN [ -n "$uid" ] || { echo "no user_id in $info on $(hostname)" >&2; exit 1; } [ -n "$ASTRALD_APPHOST_TOKEN" ] || { echo "no user_token in $info on $(hostname)" >&2; exit 1; } diff --git a/netsim/tasks/object-store/prompt.md b/netsim/tasks/object-store/prompt.md index 5fa83de9..dd35e752 100644 --- a/netsim/tasks/object-store/prompt.md +++ b/netsim/tasks/object-store/prompt.md @@ -3,6 +3,5 @@ distinctive piece of text as an astral object on `__TARGET__`, and note the obje id you get back. Then read it back from `__TARGET__` and check the bytes match what you stored. -When you're done, add the object id, the exact text you stored, and what you read -back to `~/info.json` (as `object_id`, `object_payload`, `object_readback`), leaving -any existing entries in the file in place. +When you're done, save the object id, the exact text you stored, and what you read +back to `~/object.json` (as `object_id`, `object_payload`, `object_readback`). diff --git a/netsim/tasks/object-store/run.sh b/netsim/tasks/object-store/run.sh index 2c31818b..ff6ab051 100755 --- a/netsim/tasks/object-store/run.sh +++ b/netsim/tasks/object-store/run.sh @@ -44,11 +44,11 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/object-store.prompt)"' \ } # Cheap smoke-check; verify.py does the authoritative, independent check. The agent -# records its outputs in $HOME/info.json (/home/tester/info.json). -oid=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_id",""))' 2>/dev/null || true) -opay=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_payload",""))' 2>/dev/null || true) -orb=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_readback",""))' 2>/dev/null || true) -[ -n "$oid" ] || { echo "agent recorded no object_id in /home/tester/info.json on $(hostname)" >&2; exit 1; } +# records its outputs in $HOME/object.json (/home/tester/object.json). +oid=$(python3 -c 'import json;print(json.load(open("/home/tester/object.json")).get("object_id",""))' 2>/dev/null || true) +opay=$(python3 -c 'import json;print(json.load(open("/home/tester/object.json")).get("object_payload",""))' 2>/dev/null || true) +orb=$(python3 -c 'import json;print(json.load(open("/home/tester/object.json")).get("object_readback",""))' 2>/dev/null || true) +[ -n "$oid" ] || { echo "agent recorded no object_id in /home/tester/object.json on $(hostname)" >&2; exit 1; } [ -n "$opay" ] || { echo "agent recorded no object_payload on $(hostname)" >&2; exit 1; } [ -n "$orb" ] || { echo "agent recorded no object_readback on $(hostname)" >&2; exit 1; } case "$oid" in diff --git a/netsim/tasks/object-store/verify.py b/netsim/tasks/object-store/verify.py index 6609aa0a..52989f80 100644 --- a/netsim/tasks/object-store/verify.py +++ b/netsim/tasks/object-store/verify.py @@ -5,7 +5,7 @@ host-side check (does not trust run.sh or the agent's read-back): a repo-pinned, ungated objects.load -repo local on the HOLDER must return the exact stored bytes. The holder is resolved from --target: localnode/node1 -> node1 (the operator vm), -node2 -> node2. The object id + payload come from node1's info.json (the agent +node2 -> node2. The object id + payload come from node1's object.json (the agent records there regardless of where it stored). Reaches the VMs via netsim ssh. """ import argparse @@ -22,9 +22,9 @@ def ssh(vm, remote): def info(vm): - """The agent's $HOME/info.json (/home/tester/info.json) on the VM, as a dict.""" + """The agent's $HOME/object.json (/home/tester/object.json) on the VM, as a dict.""" try: - return json.loads(ssh(vm, "cat /home/tester/info.json") or "{}") or {} + return json.loads(ssh(vm, "cat /home/tester/object.json") or "{}") or {} except json.JSONDecodeError: return {} @@ -59,7 +59,7 @@ def errors(stream): def main(): ap = argparse.ArgumentParser() - ap.add_argument("--vm", default="node1") # the operator; records info.json here + ap.add_argument("--vm", default="node1") # the operator; records object.json here ap.add_argument("--node2", default="node2") # the peer ap.add_argument("--target", default="localnode") # localnode/node1 -> node1; node2 -> node2 args, _ = ap.parse_known_args() @@ -77,9 +77,9 @@ def main(): errs, notes = [], [] if not ID: - errs.append("no object_id in node1's info.json") + errs.append("no object_id in node1's object.json") if not PAY: - errs.append("no object_payload in node1's info.json") + errs.append("no object_payload in node1's object.json") if READBACK and READBACK != PAY: notes.append(f"agent's own read-back != stored payload ({READBACK!r} != {PAY!r})") diff --git a/netsim/tasks/read-remote-object/README.md b/netsim/tasks/read-remote-object/README.md index 3bc2a91c..1df209e5 100644 --- a/netsim/tasks/read-remote-object/README.md +++ b/netsim/tasks/read-remote-object/README.md @@ -1,7 +1,7 @@ # read-remote-object Has **node1's agent read an astral object that lives on the peer** (node2), over -astral. The object id is in node1's `~/info.json` (`object_id`, written by +astral. The object id is in node1's `~/object.json` (`object_id`, written by `object-store --target node2`); the agent reads it from the peer **as the User** (addressing the peer by its alias from `adopt-node`) and records what it read. `verify.py` independently re-reads the peer's object as the User and asserts the diff --git a/netsim/tasks/read-remote-object/prompt.md b/netsim/tasks/read-remote-object/prompt.md index 66aa97be..4e7a5d78 100644 --- a/netsim/tasks/read-remote-object/prompt.md +++ b/netsim/tasks/read-remote-object/prompt.md @@ -1,6 +1,5 @@ You're running an astral node here that you control as its user. There's an astral -object stored on `__PEER__` — its id is in `~/info.json` (the `object_id` value). +object stored on `__PEER__` — its id is in `~/object.json` (the `object_id` value). Read that object from `__PEER__` and check what it contains. -When you're done, add what you read to `~/info.json` (as `object_remote`), leaving -the existing entries in the file in place. +When you're done, save what you read to `~/read.json` (as `object_remote`). diff --git a/netsim/tasks/read-remote-object/run.sh b/netsim/tasks/read-remote-object/run.sh index befde1fd..8f606e80 100755 --- a/netsim/tasks/read-remote-object/run.sh +++ b/netsim/tasks/read-remote-object/run.sh @@ -1,6 +1,6 @@ #!/bin/sh # read-remote-object: have node1's agent read an astral object that lives on the -# peer (node2), over astral. The object's id is in node1's ~/info.json (object_id, +# peer (node2), over astral. The object's id is in node1's ~/object.json (object_id, # written by object-store --target node2). Driven by the Qwen Code agent on node1 — # the read is issued AS THE USER (authenticated), which routes to the peer (an # anonymous read would not). The agent addresses the peer by its alias (registered @@ -42,9 +42,9 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/read-remote-object.prompt)"' } # Cheap smoke-check; verify.py does the authoritative, independent check. The agent -# records what it read in $HOME/info.json under object_remote. -rem=$(python3 -c 'import json;print(json.load(open("/home/tester/info.json")).get("object_remote",""))' 2>/dev/null || true) -[ -n "$rem" ] || { echo "agent recorded no object_remote in /home/tester/info.json on $(hostname)" >&2; exit 1; } +# records what it read in $HOME/read.json under object_remote. +rem=$(python3 -c 'import json;print(json.load(open("/home/tester/read.json")).get("object_remote",""))' 2>/dev/null || true) +[ -n "$rem" ] || { echo "agent recorded no object_remote in /home/tester/read.json on $(hostname)" >&2; exit 1; } echo "read-remote-object: agent finished on $(hostname); read back from peer" EOS ) diff --git a/netsim/tasks/read-remote-object/verify.py b/netsim/tasks/read-remote-object/verify.py index 564ae010..286652f7 100644 --- a/netsim/tasks/read-remote-object/verify.py +++ b/netsim/tasks/read-remote-object/verify.py @@ -2,8 +2,8 @@ """verify read-remote-object: node1 read the peer's object over astral. object-store --target node2 put the object on the peer (node2) and recorded -object_id + object_payload in node1's info.json; read-remote-object's agent (on -node1, as the User) read it back from the peer and recorded object_remote. +object_id + object_payload in node1's object.json; read-remote-object's agent (on +node1, as the User) read it back from the peer and recorded object_remote in read.json. Independent host-side check: re-read the peer's object AS THE USER (node1 holds the token) via :objects.load and assert the bytes equal the stored payload — this @@ -23,10 +23,10 @@ def ssh(vm, remote): return p.stdout -def info(vm): - """The agent's $HOME/info.json (/home/tester/info.json) on the VM, as a dict.""" +def jload(vm, name): + """A JSON file under the agent's $HOME (/home/tester/) on the VM, as a dict.""" try: - return json.loads(ssh(vm, "cat /home/tester/info.json") or "{}") or {} + return json.loads(ssh(vm, f"cat /home/tester/{name}") or "{}") or {} except json.JSONDecodeError: return {} @@ -65,11 +65,13 @@ def main(): ap.add_argument("--peer", default="node2") # the node holding the object (alias) args, _ = ap.parse_known_args() - info1 = info(args.vm) - ID = "".join(str(info1.get("object_id", "")).split()) - PAY = str(info1.get("object_payload", "")).rstrip("\n") - REMOTE = str(info1.get("object_remote", "")) - token = info1.get("user_token", "") + obj = jload(args.vm, "object.json") # object-store: object_id, object_payload + user = jload(args.vm, "user.json") # bootstrap/import: user_token + rd = jload(args.vm, "read.json") # this task's agent: object_remote + ID = "".join(str(obj.get("object_id", "")).split()) + PAY = str(obj.get("object_payload", "")).rstrip("\n") + REMOTE = str(rd.get("object_remote", "")) + token = user.get("user_token", "") # Independent: node1, as the User, reads the peer's object over astral. This is # authenticated (token), so the query keeps the network zone and routes to the peer. @@ -80,11 +82,11 @@ def main(): errs, notes = [], [] if not ID: - errs.append("no object_id in node1's info.json (object-store --target node2 must run first)") + errs.append("no object_id in node1's object.json (object-store --target node2 must run first)") if not PAY: - errs.append("no object_payload in node1's info.json") + errs.append("no object_payload in node1's object.json") if not token: - errs.append("no user_token in node1's info.json (can't read the peer as the User)") + errs.append("no user_token in node1's user.json (can't read the peer as the User)") if not REMOTE: notes.append("agent recorded no object_remote (the agent's own read)") elif PAY and PAY not in REMOTE: From 0c7124bbf556d8031a7eaf4f7823df4dab7eff54 Mon Sep 17 00:00:00 2001 From: intern0 Date: Tue, 23 Jun 2026 14:17:32 +0200 Subject: [PATCH 36/57] netsim: ship fixed payload.txt for object-store; tighten task prompts object-store now ships a fixed payload.txt to the operator and tells the agent to store that file's contents (deterministic id/bytes) instead of inventing 'distinctive text'; verify.py (object-store and read-remote-object) uses the shipped file as ground truth. Simplify every task prompt to precise, minimal wording and name __TARGET__/__PEER__ as astral nodes. --- netsim/tasks/adopt-node/prompt.md | 3 +-- netsim/tasks/bootstrap-user-software-key/prompt.md | 7 +++---- netsim/tasks/expel-node/prompt.md | 5 ++--- netsim/tasks/import-user-software-key/prompt.md | 11 +++++------ netsim/tasks/object-store/README.md | 10 ++++++---- netsim/tasks/object-store/payload.txt | 1 + netsim/tasks/object-store/prompt.md | 10 ++++------ netsim/tasks/object-store/run.sh | 11 +++++++---- netsim/tasks/object-store/verify.py | 14 +++++++++----- netsim/tasks/read-remote-object/prompt.md | 7 +++---- netsim/tasks/read-remote-object/verify.py | 13 ++++++++----- 11 files changed, 49 insertions(+), 43 deletions(-) create mode 100644 netsim/tasks/object-store/payload.txt diff --git a/netsim/tasks/adopt-node/prompt.md b/netsim/tasks/adopt-node/prompt.md index c7cceaaa..32b2b6c1 100644 --- a/netsim/tasks/adopt-node/prompt.md +++ b/netsim/tasks/adopt-node/prompt.md @@ -1,3 +1,2 @@ -You're running an astral node here that you control as its user. There's another -astral node on the local network that isn't in your swarm yet — bring it into your +Another astral node on the local network isn't in your swarm yet. Bring it into your swarm. diff --git a/netsim/tasks/bootstrap-user-software-key/prompt.md b/netsim/tasks/bootstrap-user-software-key/prompt.md index bc4ace3e..22a7482d 100644 --- a/netsim/tasks/bootstrap-user-software-key/prompt.md +++ b/netsim/tasks/bootstrap-user-software-key/prompt.md @@ -1,5 +1,4 @@ -There's an astral node running on this machine with no user set up yet. Make it a -user node under a new software user. +The local astral node has no user yet. Make it a user node under a new software user. -When you're done, save the user's id and an access token for it to `~/user.json` -(as `user_id` and `user_token`). +Save the user's id and an access token to `~/user.json` (as `user_id` and +`user_token`). diff --git a/netsim/tasks/expel-node/prompt.md b/netsim/tasks/expel-node/prompt.md index c6a146e8..1b2c626d 100644 --- a/netsim/tasks/expel-node/prompt.md +++ b/netsim/tasks/expel-node/prompt.md @@ -1,3 +1,2 @@ -You're running an astral node here that you control as its user, and another node -is currently a member of your swarm. You no longer trust that node — permanently -remove it from your swarm so it is banned and cannot rejoin. +Another node is a member of your swarm and you no longer trust it. Permanently ban it +from the swarm so it can't rejoin. diff --git a/netsim/tasks/import-user-software-key/prompt.md b/netsim/tasks/import-user-software-key/prompt.md index a4d35c66..7136a2d7 100644 --- a/netsim/tasks/import-user-software-key/prompt.md +++ b/netsim/tasks/import-user-software-key/prompt.md @@ -1,10 +1,9 @@ -There's an astral node running on this machine with no user set up yet. I already -have a software user — its seed phrase is: +The local astral node has no user yet. Set it up under my existing software user — +seed phrase: horse soldier imitate stool square buyer verb party enjoy result jazz rabbit trigger file benefit cloth term change -Set the node up as a user node under that existing user — use the seed phrase -above, don't create a new user. +Use that seed phrase; don't create a new user. -When you're done, save the user's id and an access token for it to `~/user.json` -(as `user_id` and `user_token`). +Save the user's id and an access token to `~/user.json` (as `user_id` and +`user_token`). diff --git a/netsim/tasks/object-store/README.md b/netsim/tasks/object-store/README.md index 69b1fff6..cc84e064 100644 --- a/netsim/tasks/object-store/README.md +++ b/netsim/tasks/object-store/README.md @@ -1,8 +1,10 @@ # object-store Drives the Qwen operator on node1 to **store an astral object and read it back**, -following the astral-agent skill, on a chosen **target node** — `--target` is an -astral query target: +following the astral-agent skill, on a chosen **target node**. The bytes are fixed: +`run.sh` ships this task's `payload.txt` to the operator's home and the prompt tells +the agent to store *that file's* contents (no inventing text), so the object id and +bytes are deterministic. `--target` is an astral query target: - `localnode` (default): store on the local node (node1's own repo) — basic local object operations (`objects.store` / `objects.load`). @@ -10,8 +12,8 @@ astral query target: (`node2:objects.store`) — write to a peer. `verify.py` independently re-loads the object from the **holder's** local repo -(`objects.load -repo local`, ungated) and asserts the bytes match (`localnode`/ -`node1` → node1, `node2` → node2). Stories: +(`objects.load -repo local`, ungated) and asserts the bytes equal `payload.txt` +(`localnode`/`node1` → node1, `node2` → node2). Stories: - `object-store.story` (`localnode`) → `two-nodes-data` (object on node1) — feeds `read-remote-object`. diff --git a/netsim/tasks/object-store/payload.txt b/netsim/tasks/object-store/payload.txt new file mode 100644 index 00000000..2d97a433 --- /dev/null +++ b/netsim/tasks/object-store/payload.txt @@ -0,0 +1 @@ +astral netsim object-store probe — sphinx of black quartz, judge my vow 0xC0FFEE \ No newline at end of file diff --git a/netsim/tasks/object-store/prompt.md b/netsim/tasks/object-store/prompt.md index dd35e752..3c50416f 100644 --- a/netsim/tasks/object-store/prompt.md +++ b/netsim/tasks/object-store/prompt.md @@ -1,7 +1,5 @@ -You're running an astral node here that you control as its user. Store a short, -distinctive piece of text as an astral object on `__TARGET__`, and note the object -id you get back. Then read it back from `__TARGET__` and check the bytes match what -you stored. +Store the contents of `~/payload.txt` as an astral object on astral node +`__TARGET__`. Read it back from `__TARGET__` and confirm the bytes match the file. -When you're done, save the object id, the exact text you stored, and what you read -back to `~/object.json` (as `object_id`, `object_payload`, `object_readback`). +Save the object id and what you read back to `~/object.json` (as `object_id` and +`object_readback`). diff --git a/netsim/tasks/object-store/run.sh b/netsim/tasks/object-store/run.sh index ff6ab051..cf7e60d4 100755 --- a/netsim/tasks/object-store/run.sh +++ b/netsim/tasks/object-store/run.sh @@ -28,13 +28,17 @@ here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) # Substitute the target alias into the prompt (aliases are [a-z0-9.] — sed-safe). prompt=$(sed "s|__TARGET__|$TARGET|g" "$here/prompt.md") prompt_b64=$(printf '%s' "$prompt" | base64 -w0) # GNU coreutils; -w0 = single line +[ -f "$here/payload.txt" ] || { echo "missing $here/payload.txt" >&2; exit 1; } +payload_b64=$(base64 -w0 "$here/payload.txt") # the fixed bytes the agent stores REMOTE_BODY=$(cat <<'EOS' set -eu d=/home/tester/.netsim mkdir -p "$d" printf '%s' "$prompt_b64" | base64 -d > "$d/object-store.prompt" +printf '%s' "$payload_b64" | base64 -d > /home/tester/payload.txt chown -R tester:tester "$d" +chown tester:tester /home/tester/payload.txt su - tester -c 'qwen -y "$(cat /home/tester/.netsim/object-store.prompt)"' \ > "$d/object-store.log" 2>&1 || { @@ -46,21 +50,20 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/object-store.prompt)"' \ # Cheap smoke-check; verify.py does the authoritative, independent check. The agent # records its outputs in $HOME/object.json (/home/tester/object.json). oid=$(python3 -c 'import json;print(json.load(open("/home/tester/object.json")).get("object_id",""))' 2>/dev/null || true) -opay=$(python3 -c 'import json;print(json.load(open("/home/tester/object.json")).get("object_payload",""))' 2>/dev/null || true) orb=$(python3 -c 'import json;print(json.load(open("/home/tester/object.json")).get("object_readback",""))' 2>/dev/null || true) +pay=$(cat /home/tester/payload.txt 2>/dev/null || true) [ -n "$oid" ] || { echo "agent recorded no object_id in /home/tester/object.json on $(hostname)" >&2; exit 1; } -[ -n "$opay" ] || { echo "agent recorded no object_payload on $(hostname)" >&2; exit 1; } [ -n "$orb" ] || { echo "agent recorded no object_readback on $(hostname)" >&2; exit 1; } case "$oid" in data1*) : ;; *) echo "WARNING $(hostname): object_id does not look like a data1… Object ID (verify.py decides)" >&2 ;; esac -[ "$opay" = "$orb" ] || echo "WARNING $(hostname): agent read-back != stored payload (verify.py decides)" >&2 +[ "$pay" = "$orb" ] || echo "WARNING $(hostname): agent read-back != payload.txt (verify.py decides)" >&2 echo "object-store: agent finished on $(hostname); stored+read object $oid" EOS ) echo "object-store (target=$TARGET): driving Qwen operator on $VM ..." # shellcheck disable=SC2029 -netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; payload_b64='$payload_b64'; $REMOTE_BODY" echo "object-store (target=$TARGET): done on $VM" diff --git a/netsim/tasks/object-store/verify.py b/netsim/tasks/object-store/verify.py index 52989f80..0d68f4b3 100644 --- a/netsim/tasks/object-store/verify.py +++ b/netsim/tasks/object-store/verify.py @@ -5,8 +5,9 @@ host-side check (does not trust run.sh or the agent's read-back): a repo-pinned, ungated objects.load -repo local on the HOLDER must return the exact stored bytes. The holder is resolved from --target: localnode/node1 -> node1 (the operator vm), -node2 -> node2. The object id + payload come from node1's object.json (the agent -records there regardless of where it stored). Reaches the VMs via netsim ssh. +node2 -> node2. The object id comes from node1's object.json; the ground-truth +payload is the fixed payload.txt that run.sh shipped to the operator's home (not the +agent's account of what it stored). Reaches the VMs via netsim ssh. """ import argparse import json @@ -67,8 +68,11 @@ def main(): info1 = info(args.vm) ID = "".join(str(info1.get("object_id", "")).split()) - PAY = str(info1.get("object_payload", "")).rstrip("\n") READBACK = str(info1.get("object_readback", "")).rstrip("\n") + # Canonical input: the exact bytes the agent was handed to store (run.sh shipped + # payload.txt to the operator's home). Ground truth — we don't trust the agent's + # own account of what it stored. + PAY = (ssh(args.vm, "cat /home/tester/payload.txt") or "").rstrip("\n") # Decisive: re-load the object from the holder's local repo (repo-pinned + ungated). h_load = ssh(holder, f"astral-query objects.load -id '{ID}' -repo local -out json") @@ -79,9 +83,9 @@ def main(): if not ID: errs.append("no object_id in node1's object.json") if not PAY: - errs.append("no object_payload in node1's object.json") + errs.append("payload.txt missing on the operator (run.sh must ship it)") if READBACK and READBACK != PAY: - notes.append(f"agent's own read-back != stored payload ({READBACK!r} != {PAY!r})") + notes.append(f"agent's own read-back != payload.txt ({READBACK!r} != {PAY!r})") if not errs and local_ok: print(f"object-store OK (target={args.target}): {holder}'s local repo holds object " diff --git a/netsim/tasks/read-remote-object/prompt.md b/netsim/tasks/read-remote-object/prompt.md index 4e7a5d78..3d5e20c0 100644 --- a/netsim/tasks/read-remote-object/prompt.md +++ b/netsim/tasks/read-remote-object/prompt.md @@ -1,5 +1,4 @@ -You're running an astral node here that you control as its user. There's an astral -object stored on `__PEER__` — its id is in `~/object.json` (the `object_id` value). -Read that object from `__PEER__` and check what it contains. +An astral object is stored on astral node `__PEER__`; its id is in `~/object.json` +(the `object_id` value). Read it from `__PEER__`. -When you're done, save what you read to `~/read.json` (as `object_remote`). +Save what you read to `~/read.json` (as `object_remote`). diff --git a/netsim/tasks/read-remote-object/verify.py b/netsim/tasks/read-remote-object/verify.py index 286652f7..cfa98b8f 100644 --- a/netsim/tasks/read-remote-object/verify.py +++ b/netsim/tasks/read-remote-object/verify.py @@ -2,8 +2,9 @@ """verify read-remote-object: node1 read the peer's object over astral. object-store --target node2 put the object on the peer (node2) and recorded -object_id + object_payload in node1's object.json; read-remote-object's agent (on -node1, as the User) read it back from the peer and recorded object_remote in read.json. +object_id in node1's object.json (the bytes are the fixed payload.txt it shipped to +node1); read-remote-object's agent (on node1, as the User) read it back from the peer +and recorded object_remote in read.json. Independent host-side check: re-read the peer's object AS THE USER (node1 holds the token) via :objects.load and assert the bytes equal the stored payload — this @@ -65,11 +66,13 @@ def main(): ap.add_argument("--peer", default="node2") # the node holding the object (alias) args, _ = ap.parse_known_args() - obj = jload(args.vm, "object.json") # object-store: object_id, object_payload + obj = jload(args.vm, "object.json") # object-store: object_id user = jload(args.vm, "user.json") # bootstrap/import: user_token rd = jload(args.vm, "read.json") # this task's agent: object_remote ID = "".join(str(obj.get("object_id", "")).split()) - PAY = str(obj.get("object_payload", "")).rstrip("\n") + # Ground-truth bytes: the fixed payload.txt that object-store shipped to the + # operator (node1), not the agent's account of what was stored. + PAY = (ssh(args.vm, "cat /home/tester/payload.txt") or "").rstrip("\n") REMOTE = str(rd.get("object_remote", "")) token = user.get("user_token", "") @@ -84,7 +87,7 @@ def main(): if not ID: errs.append("no object_id in node1's object.json (object-store --target node2 must run first)") if not PAY: - errs.append("no object_payload in node1's object.json") + errs.append("payload.txt missing on node1 (object-store --target node2 must run first)") if not token: errs.append("no user_token in node1's user.json (can't read the peer as the User)") if not REMOTE: From a52cf3798adfd70ad4d5ed022c171bcbfe8dee6c Mon Sep 17 00:00:00 2001 From: intern0 Date: Tue, 23 Jun 2026 18:28:02 +0200 Subject: [PATCH 37/57] netsim: enable-tor task, store-only object-store, siblings.json, skills ref, minimized READMEs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - enable-tor: new host task — bring up a node with a Tor endpoint and save it to /root/tor.json (validated live: real onion published + saved). - object-store: agent only stores + records object_id; verify owns the read-back and byte match against the shipped payload.txt. - adopt-node: agent records swarm siblings to ~/siblings.json (sibling_ids); verify asserts it includes the adopted node. - configure-astral-agent: SATFORGE_SKILLS_REF builds the lab against a skills branch (fails loudly if the ref can't be fetched). - prompts: terser, human-style wording (adopt/bootstrap/import/expel/object-store). - READMEs: minimized to astral-docs voice across all tasks. --- netsim/tasks/adopt-node/README.md | 12 +-- netsim/tasks/adopt-node/prompt.md | 5 +- netsim/tasks/adopt-node/run.sh | 5 + netsim/tasks/adopt-node/verify.py | 17 +++- .../bootstrap-user-software-key/README.md | 5 +- .../bootstrap-user-software-key/prompt.md | 2 +- netsim/tasks/configure-astral-agent/README.md | 7 +- netsim/tasks/configure-astral-agent/run.sh | 18 +++- netsim/tasks/enable-tor/README.md | 5 + netsim/tasks/enable-tor/run.sh | 89 +++++++++++++++++ netsim/tasks/enable-tor/verify.py | 97 +++++++++++++++++++ netsim/tasks/enable-tor/verify.sh | 5 + netsim/tasks/expel-node/README.md | 12 +-- netsim/tasks/expel-node/prompt.md | 3 +- .../tasks/import-user-software-key/README.md | 7 +- .../tasks/import-user-software-key/prompt.md | 9 +- netsim/tasks/install-astrald/README.md | 8 +- netsim/tasks/object-store/README.md | 19 +--- netsim/tasks/object-store/prompt.md | 5 +- netsim/tasks/object-store/run.sh | 10 +- netsim/tasks/object-store/verify.py | 15 ++- netsim/tasks/read-remote-object/README.md | 12 +-- 22 files changed, 264 insertions(+), 103 deletions(-) create mode 100644 netsim/tasks/enable-tor/README.md create mode 100755 netsim/tasks/enable-tor/run.sh create mode 100644 netsim/tasks/enable-tor/verify.py create mode 100755 netsim/tasks/enable-tor/verify.sh diff --git a/netsim/tasks/adopt-node/README.md b/netsim/tasks/adopt-node/README.md index aee42cdd..e253faee 100644 --- a/netsim/tasks/adopt-node/README.md +++ b/netsim/tasks/adopt-node/README.md @@ -1,9 +1,7 @@ # adopt-node -Drives the Qwen operator on node1 to **adopt node2** into the User's swarm -(`user.adopt`), following the astral-agent skill's node-adoption playbook. -`verify.py` independently confirms both nodes hold a contract under the same User, -a mutual link, and a symmetric roster (each lists the other as a Linked sibling). -Also registers `node1`/`node2` directory aliases (`dir.set_alias`) on both nodes so -later tasks can address nodes by name (e.g. `object-store --target node2`). Produces -stage `two-nodes` (from `one-node`). +Adopts node2 into node1's User swarm and registers `node1`/`node2` aliases on both nodes. + +`verify.py` proves both nodes hold a contract from the same User, node2 links back to node1, the roster is symmetric (each lists the other as a Linked sibling), and `sibling_ids` includes node2. + +Produces stage `two-nodes` (from `one-node`). diff --git a/netsim/tasks/adopt-node/prompt.md b/netsim/tasks/adopt-node/prompt.md index 32b2b6c1..8d9f03fd 100644 --- a/netsim/tasks/adopt-node/prompt.md +++ b/netsim/tasks/adopt-node/prompt.md @@ -1,2 +1,3 @@ -Another astral node on the local network isn't in your swarm yet. Bring it into your -swarm. +Adopt the other astral node from the local network into your swarm. + +Save the ids of your swarm siblings to `~/siblings.json` (as `sibling_ids`, a list). diff --git a/netsim/tasks/adopt-node/run.sh b/netsim/tasks/adopt-node/run.sh index 23bdf5c5..eb4c5fe6 100755 --- a/netsim/tasks/adopt-node/run.sh +++ b/netsim/tasks/adopt-node/run.sh @@ -44,6 +44,11 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/adopt-node.prompt)"' \ # Soft smoke-check only (verify.sh is the authoritative, independent check). node1 # holds the User token in $HOME/user.json, so we can peek at the swarm here; don't # fail the run on a shape mismatch — leave the verdict to verify.sh. +if [ -n "$(python3 -c 'import json;print(len(json.load(open("/home/tester/siblings.json")).get("sibling_ids") or []))' 2>/dev/null | grep -v '^0$')" ]; then + echo "adopt-node: $(hostname) recorded swarm siblings in siblings.json" +else + echo "adopt-node: WARNING $(hostname) recorded no sibling_ids in siblings.json (verify.sh decides)" >&2 +fi ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_token",""))' 2>/dev/null || true) if [ -n "$ASTRALD_APPHOST_TOKEN" ]; then export ASTRALD_APPHOST_TOKEN diff --git a/netsim/tasks/adopt-node/verify.py b/netsim/tasks/adopt-node/verify.py index b9b8bd68..9c164a1a 100755 --- a/netsim/tasks/adopt-node/verify.py +++ b/netsim/tasks/adopt-node/verify.py @@ -15,14 +15,19 @@ def ssh(vm, remote): return p.stdout -def info(vm): - """The agent's $HOME/user.json (/home/tester/user.json) on the VM, as a dict.""" +def jfile(vm, name): + """A JSON file under the agent's $HOME (/home/tester/) on the VM, as a dict.""" try: - return json.loads(ssh(vm, "cat /home/tester/user.json") or "{}") or {} + return json.loads(ssh(vm, f"cat /home/tester/{name}") or "{}") or {} except json.JSONDecodeError: return {} +def info(vm): + """The agent's $HOME/user.json (/home/tester/user.json) on the VM, as a dict.""" + return jfile(vm, "user.json") + + def objs(stream): out = [] for ln in (stream or "").splitlines(): @@ -74,6 +79,8 @@ def main(): # node1 acts as the User (token from bootstrap-user-software-key); node2 answers under its # node identity (it holds the contract after the adoption). info1 = info(vm1) + siblings = jfile(vm1, "siblings.json") # adopt-node agent: ids of the swarm siblings + sib_ids = ["".join(str(x).split()) for x in (siblings.get("sibling_ids") or []) if x] U = "".join(str(info1.get("user_id", "")).split()) TOKEN = f"export ASTRALD_APPHOST_TOKEN={info1.get('user_token', '')};" n1_info = ssh(vm1, TOKEN + " astral-query user.info -out json") @@ -108,6 +115,10 @@ def main(): "(node2 does not list node1 -- swarm roster not symmetric; #348 regression?)") if not linkback: errs.append(f"node2 has no active link back to node1 ({s1})") + if not sib_ids: + errs.append("node1 recorded no sibling_ids in ~/siblings.json") + elif s2 and s2 not in sib_ids: + errs.append(f"node1's recorded sibling_ids {sib_ids} do not include adopted node {s2}") if errs: sys.stderr.write("adopt-node verify FAILED:\n") diff --git a/netsim/tasks/bootstrap-user-software-key/README.md b/netsim/tasks/bootstrap-user-software-key/README.md index 1a2eb316..3ab173c1 100644 --- a/netsim/tasks/bootstrap-user-software-key/README.md +++ b/netsim/tasks/bootstrap-user-software-key/README.md @@ -1,6 +1,3 @@ # bootstrap-user-software-key -Drives the Qwen operator on node1 to make it a **User-controlled node** — mint a -software User and install node1's active contract — following the astral-agent -skill's node-setup playbook. `verify.sh` independently confirms node1 answers as -that User. Produces stage `one-node` (from `astrald-lab`). +Mints a software User and installs an active contract on the operator node, turning it into a User-controlled node. verify proves it: acting as the persisted User, `apphost.whoami` reports the User id and `user.info` succeeds (it rejects without an active contract). Produces stage `one-node` in `astrald-lab`. diff --git a/netsim/tasks/bootstrap-user-software-key/prompt.md b/netsim/tasks/bootstrap-user-software-key/prompt.md index 22a7482d..d790fb76 100644 --- a/netsim/tasks/bootstrap-user-software-key/prompt.md +++ b/netsim/tasks/bootstrap-user-software-key/prompt.md @@ -1,4 +1,4 @@ -The local astral node has no user yet. Make it a user node under a new software user. +Set up a new user with a software key on the local astral node. Save the user's id and an access token to `~/user.json` (as `user_id` and `user_token`). diff --git a/netsim/tasks/configure-astral-agent/README.md b/netsim/tasks/configure-astral-agent/README.md index ccc8ad60..c80e449e 100644 --- a/netsim/tasks/configure-astral-agent/README.md +++ b/netsim/tasks/configure-astral-agent/README.md @@ -1,8 +1,3 @@ # configure-astral-agent -Installs the `astral-agent` skill into the Qwen Code operator on node1, so it can -drive astrald from the skill's playbooks + astral-docs instead of from procedures -spelled out in each prompt. The host clones the private `satforge/skills` -(`ssh://git@git.satforge.dev/satforge/skills.git`) via a deploy key -(`SATFORGE_SKILLS_DEPLOY_KEY`, a host path to the private key) and links -the skill into `~tester/.qwen/skills/astral-agent`. Part of `lab.story`. +Installs the `astral-agent` skill into the Qwen Code operator. `verify.sh` asserts the linked skill exists at `~/.qwen/skills/astral-agent` with `SKILL.md` frontmatter intact, a `references/` dir, the `astral-docs/README.md` mount, and operator ownership. Part of `lab.story`. diff --git a/netsim/tasks/configure-astral-agent/run.sh b/netsim/tasks/configure-astral-agent/run.sh index d0161ed0..83c2016b 100755 --- a/netsim/tasks/configure-astral-agent/run.sh +++ b/netsim/tasks/configure-astral-agent/run.sh @@ -31,6 +31,7 @@ while [ $# -gt 0 ]; do done REPO=${SATFORGE_SKILLS_REPO:-ssh://git@git.satforge.dev/satforge/skills.git} +REF=${SATFORGE_SKILLS_REF:-} # optional branch/tag/sha to check out (default: clone's default branch) KEY=${SATFORGE_SKILLS_DEPLOY_KEY:-} [ -n "$KEY" ] || { echo "set SATFORGE_SKILLS_DEPLOY_KEY to the deploy key path for $REPO" >&2; exit 1; } [ -r "$KEY" ] || { echo "deploy key not readable: $KEY" >&2; exit 1; } @@ -56,10 +57,21 @@ set -eu export PATH=/usr/local/go/bin:$PATH export GIT_SSH_COMMAND="ssh -i $HOME/.ssh/skills_deploy -o IdentitiesOnly=yes -o StrictHostKeyChecking=accept-new" repo=$1 +ref=$2 src=$HOME/satforge-skills [ -d "$src/.git" ] || git clone --recurse-submodules "$repo" "$src" cd "$src" -git pull --ff-only --quiet 2>/dev/null || true +if [ -n "$ref" ]; then + # Fail loudly if the ref can't be fetched -- otherwise we'd silently link the + # default-branch skill (missing whatever the ref was supposed to add). + git fetch --quiet origin "$ref" + git rev-parse --verify --quiet "origin/$ref" >/dev/null \ + || { echo "skills ref '$ref' not found on origin" >&2; exit 1; } + git checkout --quiet -B "$ref" "origin/$ref" + git reset --hard --quiet "origin/$ref" +else + git pull --ff-only --quiet 2>/dev/null || true +fi git submodule update --init --recursive --quiet go build -C bin/satforge-skills -o satforge-skills . bin="$src/bin/satforge-skills/satforge-skills" @@ -68,11 +80,11 @@ bin="$src/bin/satforge-skills/satforge-skills" SCRIPT chown "$u:$u" "$home/.netsim/setup-skill.sh" -su - "$u" -c "sh '$home/.netsim/setup-skill.sh' '$repo'" +su - "$u" -c "sh '$home/.netsim/setup-skill.sh' '$repo' '$ref'" echo "configure-astral-agent: $(hostname) cloned skills + linked astral-agent (deploy key left in place)" EOS ) echo "configure-astral-agent: injecting deploy key + linking on $VM (user $USER_NAME) ..." -netsim ssh "$VM" -- "u='$USER_NAME' key_b64='$key_b64' repo='$REPO'; $REMOTE_BODY" +netsim ssh "$VM" -- "u='$USER_NAME' key_b64='$key_b64' repo='$REPO' ref='$REF'; $REMOTE_BODY" echo "configure-astral-agent: done on $VM" diff --git a/netsim/tasks/enable-tor/README.md b/netsim/tasks/enable-tor/README.md new file mode 100644 index 00000000..85d1ad32 --- /dev/null +++ b/netsim/tasks/enable-tor/README.md @@ -0,0 +1,5 @@ +# enable-tor + +Gives each target node a Tor endpoint and saves its own onion to `/root/tor.json`. + +`verify.py` proves each VM runs tor and the saved onion matches the one astrald advertises live. diff --git a/netsim/tasks/enable-tor/run.sh b/netsim/tasks/enable-tor/run.sh new file mode 100755 index 00000000..94c2783a --- /dev/null +++ b/netsim/tasks/enable-tor/run.sh @@ -0,0 +1,89 @@ +#!/bin/sh +# enable-tor: bring up an astrald node with a Tor endpoint. Three steps per node: +# 1. install Tor and enable its control port (astrald's tor module uses SOCKS +# 127.0.0.1:9050 + control 127.0.0.1:9051 with cookie auth; stock Debian tor gives +# SOCKS but leaves the control port off); +# 2. restart astrald so its tor module re-initializes against the now-present control +# port (it connects only at start, with no retry) and publishes an onion service; +# 3. read the node's own Tor endpoint and save it to /root/tor.json. +# enable-tor [--vm ]... (no --vm -> every running VM) +# +# Runs ON THE HOST (cwd = sim root); ssh lands as root. astrald runs as root, so it can +# read Tor's control cookie regardless of its mode. +set -eu + +VMS="" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VMS="${VMS:+$VMS }$2"; shift 2 ;; + *) echo "usage: enable-tor [--vm ]..." >&2; exit 64 ;; + esac +done +if [ -z "$VMS" ]; then + VMS=$(netsim vm ls --json | python3 -c \ + 'import json,sys; print(" ".join(v["hostname"] for v in json.load(sys.stdin) if v["state"]=="running"))') +fi +[ -n "$VMS" ] || { echo "no running VMs" >&2; exit 1; } + +REMOTE_BODY=$(cat <<'EOS' +set -eu +export DEBIAN_FRONTEND=noninteractive + +# 1) install Tor and enable the control port (cookie auth, loopback) +command -v tor >/dev/null 2>&1 || { + apt-get -qq -o DPkg::Lock::Timeout=120 update + apt-get -qq -y -o DPkg::Lock::Timeout=120 install tor >/dev/null +} +torrc=/etc/tor/torrc +grep -q '^ControlPort 9051' "$torrc" || printf '\nControlPort 9051\nCookieAuthentication 1\n' >> "$torrc" +systemctl restart tor +ok= +for _ in $(seq 1 30); do + if ss -ltn 2>/dev/null | grep -q '127.0.0.1:9051'; then ok=1; break; fi + sleep 1 +done +[ -n "$ok" ] || { echo "tor control port 9051 did not open on $(hostname)" >&2; exit 1; } + +# 2) restart astrald so its tor module re-initializes against the control port +systemctl restart astrald + +# 3) read the node's own onion endpoint and save it to /root/tor.json +onion= +for _ in $(seq 1 90); do + if systemctl is-active --quiet astrald; then + onion=$(astral-query nodes.resolve_endpoints -id localnode -out json 2>/dev/null | python3 -c ' +import json,sys +def addr(ep): + if isinstance(ep, str): return ep + if isinstance(ep, dict): + o = ep.get("Object"); return o if isinstance(o, str) else "" + return "" +for ln in sys.stdin: + ln = ln.strip() + if not ln: continue + try: o = json.loads(ln) + except Exception: continue + a = addr((o.get("Object") or {}).get("Endpoint")) + if ".onion" in a: print(a); break') + [ -n "$onion" ] && break + fi + sleep 2 +done +[ -n "$onion" ] || { + echo "astrald did not publish a tor onion on $(hostname)" >&2 + journalctl -u astrald --no-pager 2>&1 | tail -30 >&2 || true + exit 1 +} +python3 -c 'import json,sys; json.dump({"onion": sys.argv[1], "endpoint": "tor:"+sys.argv[1]}, open("/root/tor.json","w"))' "$onion" +echo "enable-tor: $(hostname) tor up; onion=$onion (saved /root/tor.json)" +EOS +) + +# $VMS is a space-separated list -> intentional word-splitting +# shellcheck disable=SC2086 +for vm in $VMS; do + echo "enable-tor: bringing up Tor on $vm ..." + # shellcheck disable=SC2029 + netsim ssh "$vm" -- "$REMOTE_BODY" +done +echo "enable-tor: done on: $VMS" diff --git a/netsim/tasks/enable-tor/verify.py b/netsim/tasks/enable-tor/verify.py new file mode 100644 index 00000000..b0afbfea --- /dev/null +++ b/netsim/tasks/enable-tor/verify.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +"""verify enable-tor: each target VM runs Tor and saved its own onion endpoint. + +Independent host-side check (does not trust run.sh): on each VM the tor service is active, +/root/tor.json holds an onion endpoint, and that saved onion matches what astrald actually +advertises now (nodes.resolve_endpoints -id localnode). Reaches the VMs via netsim ssh. +""" +import argparse +import json +import subprocess +import sys + + +def ssh(vm, remote): + p = subprocess.run(["netsim", "ssh", vm, "--", remote], capture_output=True, text=True) + return p.stdout + + +def all_running_vms(): + out = subprocess.run(["netsim", "vm", "ls", "--json"], capture_output=True, text=True).stdout + try: + return [v["hostname"] for v in json.loads(out or "[]") if v.get("state") == "running"] + except json.JSONDecodeError: + return [] + + +def endpoint_addr(ep): + if isinstance(ep, str): + return ep + if isinstance(ep, dict): + o = ep.get("Object") + return o if isinstance(o, str) else "" + return "" + + +def live_onion(vm): + """The onion address astrald advertises now (resolve_endpoints -id localnode), or None.""" + stream = ssh(vm, "astral-query nodes.resolve_endpoints -id localnode -out json") + for ln in (stream or "").splitlines(): + ln = ln.strip() + if not ln: + continue + try: + o = json.loads(ln) + except json.JSONDecodeError: + continue + a = endpoint_addr((o.get("Object") or {}).get("Endpoint")) + if ".onion" in a: + return a + return None + + +def saved(vm): + """The contents of /root/tor.json on the VM, as a dict.""" + try: + return json.loads(ssh(vm, "cat /root/tor.json") or "{}") or {} + except json.JSONDecodeError: + return {} + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--vm", action="append", default=[]) + args, _ = ap.parse_known_args() + vms = args.vm or all_running_vms() + if not vms: + sys.stderr.write("enable-tor verify FAILED: no VMs to verify\n") + return 1 + + bad = False + for vm in vms: + tor_active = ssh(vm, "systemctl is-active tor 2>/dev/null").strip() == "active" + file_onion = str(saved(vm).get("onion", "")) + live = live_onion(vm) + + errs = [] + if not tor_active: + errs.append("the tor service is not active") + if not file_onion: + errs.append("no onion in /root/tor.json") + if not live: + errs.append("astrald advertises no onion (resolve_endpoints -id localnode)") + if file_onion and live and file_onion != live: + errs.append(f"saved onion {file_onion} != live onion {live}") + + if errs: + bad = True + sys.stderr.write(f"enable-tor verify FAILED on {vm}:\n") + for e in errs: + sys.stderr.write(f" - {e}\n") + else: + print(f"enable-tor OK: {vm} runs tor and saved its onion {file_onion} to /root/tor.json") + return 1 if bad else 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/enable-tor/verify.sh b/netsim/tasks/enable-tor/verify.sh new file mode 100755 index 00000000..07821264 --- /dev/null +++ b/netsim/tasks/enable-tor/verify.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# Thin shim — verification logic lives in verify.py. netsim sets $NETSIM_TASK_DIR +# to this task's directory and only auto-runs run.sh/verify.sh (the dirname +# fallback covers running this script directly). +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" diff --git a/netsim/tasks/expel-node/README.md b/netsim/tasks/expel-node/README.md index 52c3f8a6..c9307976 100644 --- a/netsim/tasks/expel-node/README.md +++ b/netsim/tasks/expel-node/README.md @@ -1,13 +1,3 @@ # expel-node -Drives the Qwen operator on node1 (the swarm's User) to **permanently expel node2** -from the swarm (`user.expel`), following the astral-agent skill's knowledge of the -user protocol. Expelling bans the node's identity: it is dropped from the swarm -roster and its links are torn down, though its underlying membership contract is -**not** revoked — the ban is enforced by a membership filter, not contract removal. - -`verify.py` independently confirms the post-ban state from both ends: node2 is -recorded in node1's `user.list_expelled`, node2 no longer appears in node1's -`user.swarm_status` (the roster shrinks — `OpSwarmStatus` lists `ActiveNodes`, -which filters the `expelledSet`), and the node1↔node2 link is gone on both ends. -Produces stage `two-nodes-expel` (from `two-nodes`). +The User (node1) permanently bans node2 from the swarm via `user.expel`. `verify.py` proves node2 is in `user.list_expelled`, gone from `user.swarm_status`, and the node1↔node2 link is torn down on both ends. From `two-nodes`; produces `two-nodes-expel`. diff --git a/netsim/tasks/expel-node/prompt.md b/netsim/tasks/expel-node/prompt.md index 1b2c626d..3d9b433a 100644 --- a/netsim/tasks/expel-node/prompt.md +++ b/netsim/tasks/expel-node/prompt.md @@ -1,2 +1 @@ -Another node is a member of your swarm and you no longer trust it. Permanently ban it -from the swarm so it can't rejoin. +Expel the other node from your local astral swarm. diff --git a/netsim/tasks/import-user-software-key/README.md b/netsim/tasks/import-user-software-key/README.md index 3a56e22c..2ff03cb5 100644 --- a/netsim/tasks/import-user-software-key/README.md +++ b/netsim/tasks/import-user-software-key/README.md @@ -1,8 +1,3 @@ # import-user-software-key -Drives the Qwen operator on node1 to make it a **User-controlled node from an -existing software User** — deriving the User key from a fixed, known BIP-39 -mnemonic embedded in `prompt.md` instead of minting a fresh one — following the -astral-agent skill's node-setup playbook. `verify.sh` confirms node1 answers as -that User (and, if `ASTRAL_USER_ID` is set, that the derived id matches exactly). -A drop-in alternative to `bootstrap-user-software-key`; produces stage `one-node`. +Makes the target node a User node from an existing software User, deriving the key from the BIP-39 mnemonic in `prompt.md` rather than minting fresh entropy. Verify asserts `apphost.whoami` reports that User id and `user.info` finds an active contract; if `ASTRAL_USER_ID` is set, the derived id must equal it. Drop-in alternative to `bootstrap-user-software-key`; produces stage `one-node`. diff --git a/netsim/tasks/import-user-software-key/prompt.md b/netsim/tasks/import-user-software-key/prompt.md index 7136a2d7..302c99ee 100644 --- a/netsim/tasks/import-user-software-key/prompt.md +++ b/netsim/tasks/import-user-software-key/prompt.md @@ -1,9 +1,8 @@ -The local astral node has no user yet. Set it up under my existing software user — -seed phrase: +Set up user on local astral node with given seed phrase: - horse soldier imitate stool square buyer verb party enjoy result jazz rabbit trigger file benefit cloth term change - -Use that seed phrase; don't create a new user. +``` +horse soldier imitate stool square buyer verb party enjoy result jazz rabbit trigger file benefit cloth term change +``` Save the user's id and an access token to `~/user.json` (as `user_id` and `user_token`). diff --git a/netsim/tasks/install-astrald/README.md b/netsim/tasks/install-astrald/README.md index d81fe9ca..5481023c 100644 --- a/netsim/tasks/install-astrald/README.md +++ b/netsim/tasks/install-astrald/README.md @@ -1,9 +1,3 @@ # install-astrald -Builds `astrald` and `astral-query` from source and runs `astrald` as a systemd -service on the target VMs (all running VMs by default, or `--vm `; `--ref` -builds a specific git ref). `verify.sh` confirms each node answers -`astral-query localnode:.spec`. The service is left running so the netsim stage -snapshots a live node that resumes already-running. Used by `lab.story`; see -[Running astrald as a service](../../../docs/running-as-a-service.md) for the unit -file and operational details. +Builds `astrald` and `astral-query` from source and runs `astrald` as a systemd service on the target VMs (all running, or `--vm `; `--ref` picks a git ref). Verify proves the unit is enabled and each node answers `astral-query localnode:.spec`. Left running for the snapshot; used by `lab.story`. diff --git a/netsim/tasks/object-store/README.md b/netsim/tasks/object-store/README.md index cc84e064..49fd4d05 100644 --- a/netsim/tasks/object-store/README.md +++ b/netsim/tasks/object-store/README.md @@ -1,20 +1,5 @@ # object-store -Drives the Qwen operator on node1 to **store an astral object and read it back**, -following the astral-agent skill, on a chosen **target node**. The bytes are fixed: -`run.sh` ships this task's `payload.txt` to the operator's home and the prompt tells -the agent to store *that file's* contents (no inventing text), so the object id and -bytes are deterministic. `--target` is an astral query target: +The operator on node1 stores `payload.txt` as an astral object on `--target` (default `localnode`; an alias like `node2` stores on that peer) and records the id. `verify.py` loads the object from the holder's local repo and asserts the bytes equal `payload.txt`. -- `localnode` (default): store on the local node (node1's own repo) — basic local - object operations (`objects.store` / `objects.load`). -- `node2` (a node alias registered by `adopt-node`): store on that node - (`node2:objects.store`) — write to a peer. - -`verify.py` independently re-loads the object from the **holder's** local repo -(`objects.load -repo local`, ungated) and asserts the bytes equal `payload.txt` -(`localnode`/`node1` → node1, `node2` → node2). Stories: - -- `object-store.story` (`localnode`) → `two-nodes-data` (object on node1) — feeds - `read-remote-object`. -- `object-store-peer.story` (`--target node2`) → `two-nodes-data-peer` (object on node2). +Stories: `object-store.story` (`localnode`) produces `two-nodes-data` and feeds `read-remote-object`; `object-store-peer.story` (`--target node2`) produces `two-nodes-data-peer`. diff --git a/netsim/tasks/object-store/prompt.md b/netsim/tasks/object-store/prompt.md index 3c50416f..ff63f443 100644 --- a/netsim/tasks/object-store/prompt.md +++ b/netsim/tasks/object-store/prompt.md @@ -1,5 +1,4 @@ Store the contents of `~/payload.txt` as an astral object on astral node -`__TARGET__`. Read it back from `__TARGET__` and confirm the bytes match the file. +`__TARGET__`. -Save the object id and what you read back to `~/object.json` (as `object_id` and -`object_readback`). +Save the object id to `~/object.json` (as `object_id`). diff --git a/netsim/tasks/object-store/run.sh b/netsim/tasks/object-store/run.sh index cf7e60d4..fd06cd04 100755 --- a/netsim/tasks/object-store/run.sh +++ b/netsim/tasks/object-store/run.sh @@ -47,19 +47,15 @@ su - tester -c 'qwen -y "$(cat /home/tester/.netsim/object-store.prompt)"' \ exit 1 } -# Cheap smoke-check; verify.py does the authoritative, independent check. The agent -# records its outputs in $HOME/object.json (/home/tester/object.json). +# Cheap smoke-check; verify.py does the authoritative read-back + byte match. The +# agent only stores and records the id in $HOME/object.json (/home/tester/object.json). oid=$(python3 -c 'import json;print(json.load(open("/home/tester/object.json")).get("object_id",""))' 2>/dev/null || true) -orb=$(python3 -c 'import json;print(json.load(open("/home/tester/object.json")).get("object_readback",""))' 2>/dev/null || true) -pay=$(cat /home/tester/payload.txt 2>/dev/null || true) [ -n "$oid" ] || { echo "agent recorded no object_id in /home/tester/object.json on $(hostname)" >&2; exit 1; } -[ -n "$orb" ] || { echo "agent recorded no object_readback on $(hostname)" >&2; exit 1; } case "$oid" in data1*) : ;; *) echo "WARNING $(hostname): object_id does not look like a data1… Object ID (verify.py decides)" >&2 ;; esac -[ "$pay" = "$orb" ] || echo "WARNING $(hostname): agent read-back != payload.txt (verify.py decides)" >&2 -echo "object-store: agent finished on $(hostname); stored+read object $oid" +echo "object-store: agent finished on $(hostname); stored object $oid" EOS ) diff --git a/netsim/tasks/object-store/verify.py b/netsim/tasks/object-store/verify.py index 0d68f4b3..66d7e222 100644 --- a/netsim/tasks/object-store/verify.py +++ b/netsim/tasks/object-store/verify.py @@ -1,13 +1,13 @@ #!/usr/bin/env python3 """verify object-store: the stored object is present in the holder's local repo. -The agent (on node1) stored an object on a target node (--target). Independent -host-side check (does not trust run.sh or the agent's read-back): a repo-pinned, +The agent (on node1) only stored an object on a target node (--target) and recorded +its id. Reading it back and confirming the bytes is verify's job: a repo-pinned, ungated objects.load -repo local on the HOLDER must return the exact stored bytes. The holder is resolved from --target: localnode/node1 -> node1 (the operator vm), node2 -> node2. The object id comes from node1's object.json; the ground-truth -payload is the fixed payload.txt that run.sh shipped to the operator's home (not the -agent's account of what it stored). Reaches the VMs via netsim ssh. +payload is the fixed payload.txt that run.sh shipped to the operator's home. Reaches +the VMs via netsim ssh. """ import argparse import json @@ -68,13 +68,14 @@ def main(): info1 = info(args.vm) ID = "".join(str(info1.get("object_id", "")).split()) - READBACK = str(info1.get("object_readback", "")).rstrip("\n") # Canonical input: the exact bytes the agent was handed to store (run.sh shipped # payload.txt to the operator's home). Ground truth — we don't trust the agent's # own account of what it stored. PAY = (ssh(args.vm, "cat /home/tester/payload.txt") or "").rstrip("\n") - # Decisive: re-load the object from the holder's local repo (repo-pinned + ungated). + # Decisive: re-load the object from the holder's local repo (repo-pinned + ungated) + # and confirm the bytes match payload.txt — the read-back is verify's job, not the + # agent's (the agent only stores and records the id). h_load = ssh(holder, f"astral-query objects.load -id '{ID}' -repo local -out json") got = loaded_payload(h_load) local_ok = got is not None and got.rstrip("\n") == PAY @@ -84,8 +85,6 @@ def main(): errs.append("no object_id in node1's object.json") if not PAY: errs.append("payload.txt missing on the operator (run.sh must ship it)") - if READBACK and READBACK != PAY: - notes.append(f"agent's own read-back != payload.txt ({READBACK!r} != {PAY!r})") if not errs and local_ok: print(f"object-store OK (target={args.target}): {holder}'s local repo holds object " diff --git a/netsim/tasks/read-remote-object/README.md b/netsim/tasks/read-remote-object/README.md index 1df209e5..a87367ea 100644 --- a/netsim/tasks/read-remote-object/README.md +++ b/netsim/tasks/read-remote-object/README.md @@ -1,13 +1,3 @@ # read-remote-object -Has **node1's agent read an astral object that lives on the peer** (node2), over -astral. The object id is in node1's `~/object.json` (`object_id`, written by -`object-store --target node2`); the agent reads it from the peer **as the User** -(addressing the peer by its alias from `adopt-node`) and records what it read. -`verify.py` independently re-reads the peer's object as the User and asserts the -bytes match. - -Used by `read-remote-peer.story` (which first stores the object on node2, then runs -this read). Note: an *anonymous* read of a peer's object does **not** route (the -network zone is stripped); the read must come from an authenticated identity, which -is why it's driven by the User on node1. +node1's agent reads a peer's object (id from `~/object.json`) over astral as the User and records the bytes to `~/read.json`. verify re-reads the peer's object via `:objects.load` and asserts the bytes equal node1's stored `payload.txt`. Produces the remote read in `read-remote-peer.story`. From 8de785f7be6ffb890945c992b7cb5ff110fa40e9 Mon Sep 17 00:00:00 2001 From: intern0 Date: Tue, 23 Jun 2026 19:06:29 +0200 Subject: [PATCH 38/57] netsim: restore leave-lan + link-over-tor tasks + tor-link.story (Tor scenario 0004) Restore the two parked tasks for Tor scenario 0004 ("a node leaves the LAN and links over Tor"), completing the scenario alongside the already-committed enable-tor building block. Sequenced by tor-link.story (two-nodes -> two-nodes-tor): enable-tor -> leave-lan -> link-over-tor. - leave-lan (host): seed node1 with node2's onion while the LAN is up, then nftables-drop the LAN path between them (WAN/Tor egress stays up). verify.py asserts node2 can no longer TCP-connect to node1:1791 (only a timeout passes). - link-over-tor (agent): thin prompt drives the Qwen operator to force the swarm link over Tor (nodes.new_link -strategies tor) per the astral-agent skill's linking-over-tor playbook; verify.py independently asserts a nodes.links entry with Network=tor. The linking-over-tor playbook is now on skills main (PR #4), so the lab builds against main with no SATFORGE_SKILLS_REF override. Checkpoint: not yet validated against the real Tor network (the original parking gate -- VM WAN NAT -> Tor reachability + the agent's per-turn tool-call cap). --- netsim/stories/tor-link.story | 9 +++ netsim/tasks/leave-lan/README.md | 19 ++++++ netsim/tasks/leave-lan/run.sh | 83 ++++++++++++++++++++++++++ netsim/tasks/leave-lan/verify.py | 67 +++++++++++++++++++++ netsim/tasks/leave-lan/verify.sh | 3 + netsim/tasks/link-over-tor/README.md | 14 +++++ netsim/tasks/link-over-tor/prompt.md | 5 ++ netsim/tasks/link-over-tor/run.sh | 52 ++++++++++++++++ netsim/tasks/link-over-tor/verify.py | 89 ++++++++++++++++++++++++++++ netsim/tasks/link-over-tor/verify.sh | 3 + 10 files changed, 344 insertions(+) create mode 100644 netsim/stories/tor-link.story create mode 100644 netsim/tasks/leave-lan/README.md create mode 100755 netsim/tasks/leave-lan/run.sh create mode 100644 netsim/tasks/leave-lan/verify.py create mode 100755 netsim/tasks/leave-lan/verify.sh create mode 100644 netsim/tasks/link-over-tor/README.md create mode 100644 netsim/tasks/link-over-tor/prompt.md create mode 100755 netsim/tasks/link-over-tor/run.sh create mode 100644 netsim/tasks/link-over-tor/verify.py create mode 100755 netsim/tasks/link-over-tor/verify.sh diff --git a/netsim/stories/tor-link.story b/netsim/stories/tor-link.story new file mode 100644 index 00000000..92736939 --- /dev/null +++ b/netsim/stories/tor-link.story @@ -0,0 +1,9 @@ +# tor-link.story — a node leaves the LAN and links over Tor (scenario 0004). +# Both nodes get system Tor (so astrald's tor module can publish/dial onions); node2 +# drops its LAN path to node1 (after node1 is seeded with node2's onion); then node1's +# agent re-establishes the swarm link over Tor. +# start: two-nodes save: two-nodes-tor +# netsim story --stage two-nodes --save two-nodes-tor netsim/stories/tor-link.story +enable-tor --vm node1 --vm node2 +leave-lan --vm node2 --peer node1 +link-over-tor --vm node1 --peer node2 diff --git a/netsim/tasks/leave-lan/README.md b/netsim/tasks/leave-lan/README.md new file mode 100644 index 00000000..9409464f --- /dev/null +++ b/netsim/tasks/leave-lan/README.md @@ -0,0 +1,19 @@ +# leave-lan + +Makes `--vm` (node2) **leave the LAN** with respect to `--peer` (node1). Two host-side +steps: + +1. **Seed the peer with the leaver's onion while the LAN is still up.** Once the LAN is + cut, node1 could no longer ask node2 for its address, so `leave-lan` first records + node2's `.onion` on node1 (`:nodes.resolve_endpoints` → `nodes.add_endpoint`), + keeping node2 reachable over Tor. +2. **Sever the LAN.** An nftables drop (a dedicated `netsimcut` table) blackholes all + traffic to/from the peer's LAN (`10.77.0.0/24`) address. The NIC and Internet/Tor + egress (the WAN NAT) stay up — only the direct LAN path is cut. + +astrald has no link keepalive, so the dead LAN link lingers as a (blackholed) stale +entry rather than closing — which is why the agent (`link-over-tor`) must *force* the +Tor link rather than wait for an automatic reconnect. `verify.py` independently confirms +`--vm` can no longer open a TCP connection to the peer's astral port (1791) — only a +connect **timeout** counts (a refusal/reset would be inconclusive, not a pass). +Host-driven. Used by `tor-link.story` after `enable-tor`. diff --git a/netsim/tasks/leave-lan/run.sh b/netsim/tasks/leave-lan/run.sh new file mode 100755 index 00000000..2e1080d9 --- /dev/null +++ b/netsim/tasks/leave-lan/run.sh @@ -0,0 +1,83 @@ +#!/bin/sh +# leave-lan: sever the LAN path between (node2, the node that "leaves") and +# (node1). astrald's tor module + the swarm link maintainer will then re-link over Tor. +# +# Two steps, both on the host: +# 1. Seed with 's onion WHILE THE LAN IS STILL UP — once the LAN is gone the +# peer can no longer ask for its address, so it needs the .onion cached first. +# 2. nftables-drop all traffic between them on the LAN. The NIC stays up and Internet +# egress (the WAN NAT, used for Tor) is untouched — only the direct LAN path is cut. +# leave-lan [--vm ] [--peer ] (default: node2 leaves, peer node1) +# +# Both nodes must have Tor up (enable-tor) and the alias must resolve on +# (adopt-node). astral-query ops here (resolve_endpoints / add_endpoint) are ungated. +set -eu + +VM="node2"; PEER="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + --peer) [ $# -ge 2 ] || { echo "need host after --peer" >&2; exit 64; }; PEER=$2; shift 2 ;; + *) echo "usage: leave-lan [--vm ] [--peer ]" >&2; exit 64 ;; + esac +done + +# 1) seed with 's onion before the LAN goes away +SEED_BODY=$(cat <<'EOS' +set -eu +torof() { # read a .onion endpoint address from a resolve_endpoints json stream on stdin + python3 -c ' +import json,sys +def addr(ep): + if isinstance(ep,str): return ep + if isinstance(ep,dict): + o=ep.get("Object"); return o if isinstance(o,str) else "" + return "" +for ln in sys.stdin: + ln=ln.strip() + if not ln: continue + try: o=json.loads(ln) + except Exception: continue + a=addr((o.get("Object") or {}).get("Endpoint")) + if ".onion" in a: print(a); break' +} +# prefer the local cache (auto-synced over the live link); else ask the leaver directly +onion=$(astral-query nodes.resolve_endpoints -id "$leaver" -out json 2>/dev/null | torof || true) +[ -n "$onion" ] || onion=$(astral-query "$leaver":nodes.resolve_endpoints -id "$leaver" -out json 2>/dev/null | torof || true) +[ -n "$onion" ] || { echo "leave-lan: $(hostname) could not learn $leaver's onion before the cut" >&2; exit 1; } +astral-query nodes.add_endpoint -id "$leaver" -endpoint "tor:$onion" >/dev/null 2>&1 || true +echo "leave-lan: $(hostname) seeded $leaver onion=$onion" +EOS +) +echo "leave-lan: seeding $PEER with $VM's onion ..." +# shellcheck disable=SC2029 +netsim ssh "$PEER" -- "leaver='$VM'; $SEED_BODY" + +# 2) resolve 's LAN address and drop it on +peer_ip=$(netsim ssh "$PEER" -- "hostname -I" | tr ' ' '\n' | grep '^10\.77\.' | head -1) +[ -n "$peer_ip" ] || { echo "leave-lan: could not find $PEER's 10.77 LAN address" >&2; exit 1; } + +CUT_BODY=$(cat <<'EOS' +set -eu +export DEBIAN_FRONTEND=noninteractive +command -v nft >/dev/null 2>&1 || { + apt-get -qq -o DPkg::Lock::Timeout=120 update + apt-get -qq -y -o DPkg::Lock::Timeout=120 install nftables >/dev/null +} +# A dedicated table so the cut is self-contained and easy to reason about. Chains are +# named netout/netin (not the nft scanner keywords in/out). Flush before adding so a +# re-run yields exactly one rule per direction. +nft add table ip netsimcut 2>/dev/null || true +nft 'add chain ip netsimcut netout { type filter hook output priority 0 ; }' 2>/dev/null || true +nft 'add chain ip netsimcut netin { type filter hook input priority 0 ; }' 2>/dev/null || true +nft flush chain ip netsimcut netout 2>/dev/null || true +nft flush chain ip netsimcut netin 2>/dev/null || true +nft add rule ip netsimcut netout ip daddr "$peer_ip" drop +nft add rule ip netsimcut netin ip saddr "$peer_ip" drop +echo "leave-lan: $(hostname) dropped LAN traffic to/from $peer_ip" +EOS +) +echo "leave-lan: severing LAN path $VM <-> $PEER ($peer_ip) ..." +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "peer_ip='$peer_ip'; $CUT_BODY" +echo "leave-lan: done on $VM" diff --git a/netsim/tasks/leave-lan/verify.py b/netsim/tasks/leave-lan/verify.py new file mode 100644 index 00000000..2111ca6b --- /dev/null +++ b/netsim/tasks/leave-lan/verify.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +"""verify leave-lan: can no longer reach over the LAN. + +Independent host-side check: from , a TCP connect to the peer's LAN address on the +astral port (1791) must NOT succeed (the nftables drop blackholes it -> timeout). The +peer's LAN IP is resolved from the peer. Reaches the VMs via netsim ssh. +""" +import argparse +import subprocess +import sys + +PORT = 1791 + + +def ssh(vm, remote): + p = subprocess.run(["netsim", "ssh", vm, "--", remote], capture_output=True, text=True) + return p.stdout + + +def peer_lan_ip(peer): + for tok in (ssh(peer, "hostname -I") or "").split(): + if tok.startswith("10.77."): + return tok + return "" + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--vm", default="node2") # the node that left the LAN + ap.add_argument("--peer", default="node1") # the node it can no longer reach + args, _ = ap.parse_known_args() + + ip = peer_lan_ip(args.peer) + if not ip: + sys.stderr.write(f"leave-lan verify FAILED: could not resolve {args.peer}'s 10.77 LAN IP.\n") + return 1 + + # Only a TIMEOUT proves the nftables DROP blackholed the path. A connect that + # succeeds means the LAN is not severed; a refusal/reset (or any other error) means + # the path is reachable but the port is closed for another reason -> inconclusive, + # NOT a pass (would otherwise false-pass if the drop rule were missing). + probe = ( + "python3 -c 'import socket\n" + "s=socket.socket(); s.settimeout(3)\n" + f"try:\n s.connect((\"{ip}\",{PORT})); print(\"open\")\n" + "except socket.timeout:\n print(\"timeout\")\n" + "except Exception as e:\n print(\"err:\"+type(e).__name__)'" + ) + result = (ssh(args.vm, probe) or "").strip() + + if result == "timeout": + print(f"leave-lan OK: {args.vm} can no longer reach {args.peer} ({ip}:{PORT}) over the LAN " + "(connect times out — blackholed)") + return 0 + + if result == "open": + sys.stderr.write(f"leave-lan verify FAILED: {args.vm} still reaches {args.peer} " + f"({ip}:{PORT}) over the LAN (connect succeeded).\n") + else: + sys.stderr.write(f"leave-lan verify FAILED: probe to {args.peer} ({ip}:{PORT}) was " + f"inconclusive ({result!r}) — expected a timeout from the drop, not a " + "refusal/reset.\n") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/leave-lan/verify.sh b/netsim/tasks/leave-lan/verify.sh new file mode 100755 index 00000000..042470ab --- /dev/null +++ b/netsim/tasks/leave-lan/verify.sh @@ -0,0 +1,3 @@ +#!/bin/sh +# Thin shim — verification logic lives in verify.py. +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" diff --git a/netsim/tasks/link-over-tor/README.md b/netsim/tasks/link-over-tor/README.md new file mode 100644 index 00000000..b5223df8 --- /dev/null +++ b/netsim/tasks/link-over-tor/README.md @@ -0,0 +1,14 @@ +# link-over-tor + +Drives the Qwen operator on node1 to **re-establish the swarm link to the peer (node2) +over Tor** after node2 left the LAN, and to confirm the link rides over Tor — following +the astral-agent skill's *linking-over-tor* playbook. The dead LAN link still shows as +up (astrald has no keepalive), so the agent must *force* the Tor link +(`nodes.new_link -strategies tor`) using node2's onion, which `leave-lan` cached on node1 +before the cut. The agent records the peer's onion address and the link's transport in +`~/tor.json` (`peer_onion`, `link_network`). + +`verify.py` independently confirms node1 holds a link to the peer whose `Network` is +`tor` (`nodes.links`) — it asserts the transport, not the `.onion` endpoint string, since +an inbound tor link legitimately has no remote onion — and cross-checks the agent's +record. Agent-driven. Final task of `tor-link.story`; produces the `two-nodes-tor` stage. diff --git a/netsim/tasks/link-over-tor/prompt.md b/netsim/tasks/link-over-tor/prompt.md new file mode 100644 index 00000000..7754c5bd --- /dev/null +++ b/netsim/tasks/link-over-tor/prompt.md @@ -0,0 +1,5 @@ +Your swarm peer `__PEER__` has left the local network and is now reachable only over +Tor. Re-establish your link to `__PEER__` over Tor, and confirm the link is over Tor. + +Save `__PEER__`'s onion address and the link's transport to `~/tor.json` (as +`peer_onion` and `link_network`). diff --git a/netsim/tasks/link-over-tor/run.sh b/netsim/tasks/link-over-tor/run.sh new file mode 100755 index 00000000..5df1571e --- /dev/null +++ b/netsim/tasks/link-over-tor/run.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# link-over-tor: have node1's Qwen agent re-establish the swarm link to the peer +# (node2) over Tor after node2 left the LAN, and confirm the link rides over Tor. +# Driven by the agent following the astral-agent skill's linking-over-tor playbook. +# link-over-tor [--vm ] [--peer ] (default: node1, node2) +# +# Runs ON THE HOST. Tiny script, thin prompt, intelligence in the skill. verify.py +# then INDEPENDENTLY confirms node1 holds a tor link to the peer. +set -eu + +VM="node1"; PEER="node2" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + --peer) [ $# -ge 2 ] || { echo "need alias after --peer" >&2; exit 64; }; PEER=$2; shift 2 ;; + *) echo "usage: link-over-tor [--vm ] [--peer ]" >&2; exit 64 ;; + esac +done + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +prompt=$(sed "s|__PEER__|$PEER|g" "$here/prompt.md") # alias is [a-z0-9] — sed-safe +prompt_b64=$(printf '%s' "$prompt" | base64 -w0) + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/link-over-tor.prompt" +chown -R tester:tester "$d" + +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/link-over-tor.prompt)"' \ + > "$d/link-over-tor.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/link-over-tor.log" >&2 + exit 1 + } + +# Cheap smoke-check; verify.py does the authoritative, independent check. The agent +# records what it read in $HOME/tor.json under link_network (and peer_onion). +net=$(python3 -c 'import json;print(json.load(open("/home/tester/tor.json")).get("link_network",""))' 2>/dev/null || true) +[ -n "$net" ] || { echo "agent recorded no link_network in /home/tester/tor.json on $(hostname)" >&2; exit 1; } +echo "link-over-tor: agent finished on $(hostname); recorded link_network=$net" +EOS +) + +echo "link-over-tor: driving Qwen operator on $VM to link with $PEER over Tor ..." +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" +echo "link-over-tor: done on $VM" diff --git a/netsim/tasks/link-over-tor/verify.py b/netsim/tasks/link-over-tor/verify.py new file mode 100644 index 00000000..8305f743 --- /dev/null +++ b/netsim/tasks/link-over-tor/verify.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +"""verify link-over-tor: node1 holds a live link to the peer over Tor. + +Independent host-side check (does not trust the agent): nodes.links on node1 must list a +link whose Network is "tor". (We assert the transport, not the .onion endpoint string — +an inbound tor link legitimately has no remote onion, so requiring ".onion" would +false-negative; node2 is the only sibling, so a tor link is a tor link to node2.) Also +cross-checks the agent's record. Reaches the VM via netsim ssh. +""" +import argparse +import json +import subprocess +import sys + + +def ssh(vm, remote): + p = subprocess.run(["netsim", "ssh", vm, "--", remote], capture_output=True, text=True) + return p.stdout + + +def jfile(vm, name): + try: + return json.loads(ssh(vm, f"cat /home/tester/{name}") or "{}") or {} + except json.JSONDecodeError: + return {} + + +def ep_addr(e): + """Address string of an exonet.Endpoint, whether it marshals bare or as {Type,Object}.""" + if isinstance(e, str): + return e + if isinstance(e, dict): + o = e.get("Object") + return o if isinstance(o, str) else "" + return "" + + +def tor_links(stream): + """(RemoteIdentity, endpoint-address) for links whose Network == 'tor'.""" + out = [] + for ln in (stream or "").splitlines(): + ln = ln.strip() + if not ln: + continue + try: + o = json.loads(ln) + except json.JSONDecodeError: + continue + ob = o.get("Object") or {} + if str(ob.get("Network")) == "tor": + out.append((str(ob.get("RemoteIdentity", "")), ep_addr(ob.get("RemoteEndpoint")))) + return out + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--vm", default="node1") # the operator; records tor.json here + ap.add_argument("--peer", default="node2") # the node that left the LAN + args, _ = ap.parse_known_args() + + tor = jfile(args.vm, "tor.json") # agent: peer_onion, link_network + net = str(tor.get("link_network", "")) + onion = str(tor.get("peer_onion", "")) + + # Decisive: an actual link over Tor from node1 (to the only sibling, the peer). + links = tor_links(ssh(args.vm, "astral-query nodes.links -out json")) + + notes = [] + if net != "tor": + notes.append(f"agent recorded link_network={net!r} (expected 'tor')") + if not onion: + notes.append("agent recorded no peer_onion") + + if links: + ep = links[0][1] or "(inbound, no remote onion)" + print(f"link-over-tor OK: {args.vm} holds a link to {args.peer} over Tor (endpoint {ep}).") + for n in notes: + sys.stderr.write(f" note: {n}\n") + return 0 + + sys.stderr.write(f"link-over-tor verify FAILED: {args.vm} has no link to {args.peer} over Tor.\n") + for n in notes: + sys.stderr.write(f" note: {n}\n") + sys.stderr.write(f" nodes.links:\n{ssh(args.vm, 'astral-query nodes.links -out json')}\n") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/link-over-tor/verify.sh b/netsim/tasks/link-over-tor/verify.sh new file mode 100755 index 00000000..042470ab --- /dev/null +++ b/netsim/tasks/link-over-tor/verify.sh @@ -0,0 +1,3 @@ +#!/bin/sh +# Thin shim — verification logic lives in verify.py. +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" From d0d78e2576d7de0aaa3271dde6f5f151b4f9e7a7 Mon Sep 17 00:00:00 2001 From: intern0 Date: Thu, 25 Jun 2026 03:16:44 +0200 Subject: [PATCH 39/57] netsim: disable apt-daily in the lab image; drop per-task apt quiescing A resumed stage runs astrald + all userspace live; netsim's sync_clock corrects the stale snapshot clock with a ~day forward jump, which makes systemd's Persistent apt-daily/apt-daily-upgrade timers fire and unattended-upgrades saturate the 1-vCPU VMs for minutes -- breaking every resumed scenario (node unreachable, QMP save timeouts). astrald itself tolerates the jump. Fix it once in the image (standard ephemeral-VM hygiene): install-astrald masks apt-daily/apt-daily-upgrade/unattended-upgrades on the fresh build boot, so every stage is born quiet. The per-task quiescing in enable-tor/leave-lan is now redundant and removed (DPkg::Lock::Timeout kept). Validated by a full rebuild: the resumed scenarios no longer saturate (object-store-peer/read-remote/tor-link green, no QMP/ssh-banner timeouts); tor-link clears the real-Tor path end to end. --- netsim/tasks/enable-tor/run.sh | 4 ++-- netsim/tasks/install-astrald/run.sh | 9 +++++++++ netsim/tasks/leave-lan/run.sh | 4 ++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/netsim/tasks/enable-tor/run.sh b/netsim/tasks/enable-tor/run.sh index 94c2783a..3ded7237 100755 --- a/netsim/tasks/enable-tor/run.sh +++ b/netsim/tasks/enable-tor/run.sh @@ -31,8 +31,8 @@ export DEBIAN_FRONTEND=noninteractive # 1) install Tor and enable the control port (cookie auth, loopback) command -v tor >/dev/null 2>&1 || { - apt-get -qq -o DPkg::Lock::Timeout=120 update - apt-get -qq -y -o DPkg::Lock::Timeout=120 install tor >/dev/null + apt-get -qq -o DPkg::Lock::Timeout=300 update + apt-get -qq -y -o DPkg::Lock::Timeout=300 install tor >/dev/null } torrc=/etc/tor/torrc grep -q '^ControlPort 9051' "$torrc" || printf '\nControlPort 9051\nCookieAuthentication 1\n' >> "$torrc" diff --git a/netsim/tasks/install-astrald/run.sh b/netsim/tasks/install-astrald/run.sh index 760be837..19a4e83e 100755 --- a/netsim/tasks/install-astrald/run.sh +++ b/netsim/tasks/install-astrald/run.sh @@ -37,6 +37,15 @@ if [ -n "$need" ]; then apt-get -qq -o DPkg::Lock::Timeout=120 update apt-get -qq -y -o DPkg::Lock::Timeout=120 install $need ca-certificates >/dev/null fi + +# Ephemeral test-VM hygiene: disable the apt periodic machinery so a clock jump on +# resume (netsim corrects the stale snapshot clock) can't wake apt-daily / +# unattended-upgrades and saturate this 1-vCPU VM. Baked into the saved snapshot; +# mask the timers too so a later apt-get update can't re-arm them. Intentional — these +# are throwaway VMs that never need background package refreshes/security upgrades. +systemctl disable --now apt-daily.timer apt-daily-upgrade.timer >/dev/null 2>&1 || true +systemctl mask apt-daily.timer apt-daily-upgrade.timer apt-daily.service apt-daily-upgrade.service unattended-upgrades.service >/dev/null 2>&1 || true + if ! /usr/local/go/bin/go version 2>/dev/null | grep -q "go$go_ver "; then case "$(uname -m)" in x86_64) ga=amd64 ;; aarch64) ga=arm64 ;; diff --git a/netsim/tasks/leave-lan/run.sh b/netsim/tasks/leave-lan/run.sh index 2e1080d9..7f15fcc8 100755 --- a/netsim/tasks/leave-lan/run.sh +++ b/netsim/tasks/leave-lan/run.sh @@ -61,8 +61,8 @@ CUT_BODY=$(cat <<'EOS' set -eu export DEBIAN_FRONTEND=noninteractive command -v nft >/dev/null 2>&1 || { - apt-get -qq -o DPkg::Lock::Timeout=120 update - apt-get -qq -y -o DPkg::Lock::Timeout=120 install nftables >/dev/null + apt-get -qq -o DPkg::Lock::Timeout=300 update + apt-get -qq -y -o DPkg::Lock::Timeout=300 install nftables >/dev/null } # A dedicated table so the cut is self-contained and easy to reason about. Chains are # named netout/netin (not the nft scanner keywords in/out). Flush before adding so a From 031137160adcc8c92e4863d7109135ef837a87e7 Mon Sep 17 00:00:00 2001 From: intern0 Date: Thu, 25 Jun 2026 03:16:44 +0200 Subject: [PATCH 40/57] netsim: expel-node verify asserts swarm membership, not link teardown Expulsion is a membership change, not a disconnect -- a lingering link is permitted -- so verify no longer checks nodes.links. It asserts node2 is in user.list_expelled and gone from user.swarm_status. node2's identity now comes from node1's siblings.json (recorded by adopt-node), not from node2: once expelled, node2 rejects user.info (query rejected (2) untokened, auth_failed with the User token), so it can't identify itself. Verified live against a post-expel stage. --- netsim/tasks/expel-node/README.md | 2 +- netsim/tasks/expel-node/verify.py | 75 +++++++++++-------------------- 2 files changed, 26 insertions(+), 51 deletions(-) diff --git a/netsim/tasks/expel-node/README.md b/netsim/tasks/expel-node/README.md index c9307976..b29eb8dc 100644 --- a/netsim/tasks/expel-node/README.md +++ b/netsim/tasks/expel-node/README.md @@ -1,3 +1,3 @@ # expel-node -The User (node1) permanently bans node2 from the swarm via `user.expel`. `verify.py` proves node2 is in `user.list_expelled`, gone from `user.swarm_status`, and the node1↔node2 link is torn down on both ends. From `two-nodes`; produces `two-nodes-expel`. +The User (node1) permanently bans node2 from the swarm via `user.expel`. `verify.py` proves node2 is in `user.list_expelled` and gone from `user.swarm_status`. From `two-nodes`; produces `two-nodes-expel`. diff --git a/netsim/tasks/expel-node/verify.py b/netsim/tasks/expel-node/verify.py index d45e6a93..e1b0d16f 100755 --- a/netsim/tasks/expel-node/verify.py +++ b/netsim/tasks/expel-node/verify.py @@ -1,10 +1,14 @@ #!/usr/bin/env python3 """verify expel-node: node1 (the User) permanently banned node2 from the swarm. -Independent both-ends check (does not trust run.sh); reaches the VMs via netsim ssh. -The core property — confirmed in code (user.OpSwarmStatus -> ActiveNodes filters the -expelledSet) — is that an expelled node yields FEWER swarm_status results: node2 is -gone from node1's roster, recorded in user.list_expelled, and the link is torn down. +Independent check (does not trust run.sh); reaches the VMs via netsim ssh. Asserts node2 +is recorded in user.list_expelled and is gone from node1's user.swarm_status roster +(user.OpSwarmStatus -> ActiveNodes filters the expelledSet). Link state is not asserted. + +node2's identity comes from node1's siblings.json (recorded by adopt-node), NOT from node2 +itself: once expelled, node2 rejects user.info (query rejected (2) untokened, auth_failed +with the User token — it no longer accepts the User it was banned from), so it is not a +usable identity source. """ import argparse import json @@ -19,10 +23,10 @@ def ssh(vm, remote): return p.stdout -def info(vm): - """The agent's $HOME/user.json (/home/tester/user.json) on the VM, as a dict.""" +def jfile(vm, name): + """/home/tester/ on the VM, parsed as a dict.""" try: - return json.loads(ssh(vm, "cat /home/tester/user.json") or "{}") or {} + return json.loads(ssh(vm, f"cat /home/tester/{name}") or "{}") or {} except json.JSONDecodeError: return {} @@ -41,16 +45,6 @@ def objs(stream): return out -def contract(stream): - """(Issuer, Subject) of the active contract from a user.info stream.""" - for o in objs(stream): - ob = o.get("Object") - if isinstance(ob, dict) and isinstance(ob.get("Contract"), dict): - c = ob["Contract"].get("Contract", {}) - return c.get("Issuer"), c.get("Subject") - return None, None - - def swarm_identities(stream): """Set of node identities listed in a user.swarm_status stream.""" ids = set() @@ -80,15 +74,6 @@ def is_expelled(stream, ident): return False -def has_link_to(links, identity): - """True if a nodes.links stream contains an active link to `identity`.""" - for o in objs(links): - ob = o.get("Object") - if isinstance(ob, dict) and ob.get("RemoteIdentity") == identity: - return True - return False - - def main(): ap = argparse.ArgumentParser() ap.add_argument("--node1", default="node1") @@ -96,43 +81,33 @@ def main(): args, _ = ap.parse_known_args() vm1, vm2 = args.node1, args.node2 - # node1 acts as the User (token from bootstrap); the expel op requires the caller - # to be the contract issuer, so list_expelled / swarm_status run under that token. - info1 = info(vm1) + # node1 acts as the User (token from bootstrap); list_expelled / swarm_status require + # the caller to be the contract issuer, so they run under that token. + info1 = jfile(vm1, "user.json") U = "".join(str(info1.get("user_id", "")).split()) TOKEN = f"export ASTRALD_APPHOST_TOKEN={info1.get('user_token', '')};" - n1_info = ssh(vm1, TOKEN + " astral-query user.info -out json") - n1_swarm = ssh(vm1, TOKEN + " astral-query user.swarm_status -out json") - n1_expelled = ssh(vm1, TOKEN + " astral-query user.list_expelled -out json") - n1_links = ssh(vm1, "astral-query nodes.links -out json") + # node2's identity from node1's siblings.json (recorded by adopt-node) — a stable + # source. The expelled node itself can't be queried (post-ban node2 rejects user.info). + sibs = jfile(vm1, "siblings.json") + sib_ids = ["".join(str(x).split()) for x in (sibs.get("sibling_ids") or []) if x] + s2 = sib_ids[0] if sib_ids else None - # node2 still holds its membership contract (expel bans, it does not revoke the - # contract), so its identity is still readable from its own user.info. - n2_info = ssh(vm2, "astral-query user.info -out json") - n2_links = ssh(vm2, "astral-query nodes.links -out json") - - _, s1 = contract(n1_info) # node1's identity (the swarm User's own node) - _, s2 = contract(n2_info) # node2's identity (the expelled subject) + n1_expelled = ssh(vm1, TOKEN + " astral-query user.list_expelled -out json") + n1_swarm = ssh(vm1, TOKEN + " astral-query user.swarm_status -out json") members = swarm_identities(n1_swarm) errs = [] if not U: errs.append("no user_id in node1's user.json") if not s2: - errs.append("could not resolve node2's identity from its user.info") - if not is_expelled(n1_expelled, s2): + errs.append("no sibling_ids in node1's siblings.json — can't identify the expelled node") + if s2 and not is_expelled(n1_expelled, s2): errs.append(f"node2 {s2} is NOT in node1's user.list_expelled " "(expulsion was never issued — agent did not expel the node)") if s2 and s2 in members: errs.append(f"node2 {s2} still appears in node1's user.swarm_status " "(roster not reduced — expelledSet filter did not drop it)") - if s2 and has_link_to(n1_links, s2): - errs.append(f"node1 still holds an active link to expelled node2 {s2} " - "(applyExpulsion did not close the link)") - if s1 and has_link_to(n2_links, s1): - errs.append(f"node2 still holds an active link back to node1 {s1} " - "(link not torn down on the peer end)") if errs: sys.stderr.write("expel-node verify FAILED:\n") @@ -141,8 +116,8 @@ def main(): return 1 print(f"expel OK: User {U[:8]}.. banned node2 {s2[:8]}.. — recorded in " - f"user.list_expelled, dropped from user.swarm_status ({len(members)} " - f"member(s) remain), and the link is torn down on both ends") + f"user.list_expelled and dropped from user.swarm_status " + f"({len(members)} member(s) remain).") return 0 From d78c366b5044108f5dc2a41799543874252ad47b Mon Sep 17 00:00:00 2001 From: intern0 Date: Thu, 25 Jun 2026 03:16:44 +0200 Subject: [PATCH 41/57] netsim: node-setup prompts require the active contract, not just the key The import/bootstrap prompts said "set up user ... save id + token", which the agent could satisfy by importing the key + minting a token without installing the node's active contract (user.info then rejects). State the end goal -- make this a User node, install the active contract -- so the agent runs the full node-setup flow. Validated: import-user now passes. --- netsim/tasks/bootstrap-user-software-key/prompt.md | 2 +- netsim/tasks/import-user-software-key/prompt.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/netsim/tasks/bootstrap-user-software-key/prompt.md b/netsim/tasks/bootstrap-user-software-key/prompt.md index d790fb76..3fa02e32 100644 --- a/netsim/tasks/bootstrap-user-software-key/prompt.md +++ b/netsim/tasks/bootstrap-user-software-key/prompt.md @@ -1,4 +1,4 @@ -Set up a new user with a software key on the local astral node. +Make this node a User node with a new software key — install its active user contract, not just create the key. Save the user's id and an access token to `~/user.json` (as `user_id` and `user_token`). diff --git a/netsim/tasks/import-user-software-key/prompt.md b/netsim/tasks/import-user-software-key/prompt.md index 302c99ee..a6692499 100644 --- a/netsim/tasks/import-user-software-key/prompt.md +++ b/netsim/tasks/import-user-software-key/prompt.md @@ -1,4 +1,4 @@ -Set up user on local astral node with given seed phrase: +Make this node a User node from the given seed phrase — install its active user contract, not just import the key: ``` horse soldier imitate stool square buyer verb party enjoy result jazz rabbit trigger file benefit cloth term change From 2a2136b642e4eeeb4fe04be022ab75017d72dcd0 Mon Sep 17 00:00:00 2001 From: intern0 Date: Thu, 25 Jun 2026 13:19:22 +0200 Subject: [PATCH 42/57] netsim: minimize task READMEs to a single tight paragraph each --- netsim/tasks/adopt-node/README.md | 6 +----- .../bootstrap-user-software-key/README.md | 2 +- netsim/tasks/configure-astral-agent/README.md | 2 +- netsim/tasks/enable-tor/README.md | 4 +--- netsim/tasks/expel-node/README.md | 2 +- .../tasks/import-user-software-key/README.md | 2 +- netsim/tasks/install-astrald/README.md | 2 +- netsim/tasks/leave-lan/README.md | 18 +----------------- netsim/tasks/link-over-tor/README.md | 13 +------------ netsim/tasks/object-store/README.md | 4 +--- netsim/tasks/read-remote-object/README.md | 2 +- 11 files changed, 11 insertions(+), 46 deletions(-) diff --git a/netsim/tasks/adopt-node/README.md b/netsim/tasks/adopt-node/README.md index e253faee..6562bf7a 100644 --- a/netsim/tasks/adopt-node/README.md +++ b/netsim/tasks/adopt-node/README.md @@ -1,7 +1,3 @@ # adopt-node -Adopts node2 into node1's User swarm and registers `node1`/`node2` aliases on both nodes. - -`verify.py` proves both nodes hold a contract from the same User, node2 links back to node1, the roster is symmetric (each lists the other as a Linked sibling), and `sibling_ids` includes node2. - -Produces stage `two-nodes` (from `one-node`). +node1's agent adopts node2 into its User swarm and saves the sibling ids to `~/siblings.json`; the host then registers the `node1`/`node2` aliases. verify.py asserts both nodes hold a contract from the same User, each lists the other as a Linked sibling, and `sibling_ids` includes node2. one-node → two-nodes. diff --git a/netsim/tasks/bootstrap-user-software-key/README.md b/netsim/tasks/bootstrap-user-software-key/README.md index 3ab173c1..52dbb17d 100644 --- a/netsim/tasks/bootstrap-user-software-key/README.md +++ b/netsim/tasks/bootstrap-user-software-key/README.md @@ -1,3 +1,3 @@ # bootstrap-user-software-key -Mints a software User and installs an active contract on the operator node, turning it into a User-controlled node. verify proves it: acting as the persisted User, `apphost.whoami` reports the User id and `user.info` succeeds (it rejects without an active contract). Produces stage `one-node` in `astrald-lab`. +node1's agent creates a software User and installs an active contract, making node1 a User node. verify.sh acts as the User and asserts `apphost.whoami` reports the User id and `user.info` succeeds. astrald-lab → one-node. diff --git a/netsim/tasks/configure-astral-agent/README.md b/netsim/tasks/configure-astral-agent/README.md index c80e449e..202becfd 100644 --- a/netsim/tasks/configure-astral-agent/README.md +++ b/netsim/tasks/configure-astral-agent/README.md @@ -1,3 +1,3 @@ # configure-astral-agent -Installs the `astral-agent` skill into the Qwen Code operator. `verify.sh` asserts the linked skill exists at `~/.qwen/skills/astral-agent` with `SKILL.md` frontmatter intact, a `references/` dir, the `astral-docs/README.md` mount, and operator ownership. Part of `lab.story`. +On node1, the host clones the satforge/skills repo with a deploy key, builds the linker, and links the astral-agent skill into the Qwen operator at `~/.qwen/skills/astral-agent`. verify.sh asserts the linked skill is present and owned by the operator. diff --git a/netsim/tasks/enable-tor/README.md b/netsim/tasks/enable-tor/README.md index 85d1ad32..547caeeb 100644 --- a/netsim/tasks/enable-tor/README.md +++ b/netsim/tasks/enable-tor/README.md @@ -1,5 +1,3 @@ # enable-tor -Gives each target node a Tor endpoint and saves its own onion to `/root/tor.json`. - -`verify.py` proves each VM runs tor and the saved onion matches the one astrald advertises live. +On each target VM, installs Tor with its control port, restarts astrald to publish an onion, and saves the node's own endpoint to `/root/tor.json`. verify.py asserts each VM runs tor and its saved onion matches the one astrald advertises via `nodes.resolve_endpoints`. diff --git a/netsim/tasks/expel-node/README.md b/netsim/tasks/expel-node/README.md index b29eb8dc..271b1ca5 100644 --- a/netsim/tasks/expel-node/README.md +++ b/netsim/tasks/expel-node/README.md @@ -1,3 +1,3 @@ # expel-node -The User (node1) permanently bans node2 from the swarm via `user.expel`. `verify.py` proves node2 is in `user.list_expelled` and gone from `user.swarm_status`. From `two-nodes`; produces `two-nodes-expel`. +node1's agent expels node2 from the swarm via `user.expel`. verify.py asserts node2 is in `user.list_expelled` and gone from `user.swarm_status`. two-nodes → two-nodes-expel. diff --git a/netsim/tasks/import-user-software-key/README.md b/netsim/tasks/import-user-software-key/README.md index 2ff03cb5..9cd86ef8 100644 --- a/netsim/tasks/import-user-software-key/README.md +++ b/netsim/tasks/import-user-software-key/README.md @@ -1,3 +1,3 @@ # import-user-software-key -Makes the target node a User node from an existing software User, deriving the key from the BIP-39 mnemonic in `prompt.md` rather than minting fresh entropy. Verify asserts `apphost.whoami` reports that User id and `user.info` finds an active contract; if `ASTRAL_USER_ID` is set, the derived id must equal it. Drop-in alternative to `bootstrap-user-software-key`; produces stage `one-node`. +node1's agent makes node1 a User node from the BIP-39 mnemonic in `prompt.md`, deriving the existing key and installing its active contract. verify.sh asserts `apphost.whoami` reports that User id and `user.info` finds an active contract (matching `ASTRAL_USER_ID` if set). astrald-lab → one-node. diff --git a/netsim/tasks/install-astrald/README.md b/netsim/tasks/install-astrald/README.md index 5481023c..bfd68283 100644 --- a/netsim/tasks/install-astrald/README.md +++ b/netsim/tasks/install-astrald/README.md @@ -1,3 +1,3 @@ # install-astrald -Builds `astrald` and `astral-query` from source and runs `astrald` as a systemd service on the target VMs (all running, or `--vm `; `--ref` picks a git ref). Verify proves the unit is enabled and each node answers `astral-query localnode:.spec`. Left running for the snapshot; used by `lab.story`. +Builds `astrald` and `astral-query` from source and runs `astrald` as a systemd service on the target VMs (all running, or `--vm `; `--ref` picks a git ref). verify.sh asserts the unit is enabled and each node answers `astral-query localnode:.spec`. diff --git a/netsim/tasks/leave-lan/README.md b/netsim/tasks/leave-lan/README.md index 9409464f..47d4b9f4 100644 --- a/netsim/tasks/leave-lan/README.md +++ b/netsim/tasks/leave-lan/README.md @@ -1,19 +1,3 @@ # leave-lan -Makes `--vm` (node2) **leave the LAN** with respect to `--peer` (node1). Two host-side -steps: - -1. **Seed the peer with the leaver's onion while the LAN is still up.** Once the LAN is - cut, node1 could no longer ask node2 for its address, so `leave-lan` first records - node2's `.onion` on node1 (`:nodes.resolve_endpoints` → `nodes.add_endpoint`), - keeping node2 reachable over Tor. -2. **Sever the LAN.** An nftables drop (a dedicated `netsimcut` table) blackholes all - traffic to/from the peer's LAN (`10.77.0.0/24`) address. The NIC and Internet/Tor - egress (the WAN NAT) stay up — only the direct LAN path is cut. - -astrald has no link keepalive, so the dead LAN link lingers as a (blackholed) stale -entry rather than closing — which is why the agent (`link-over-tor`) must *force* the -Tor link rather than wait for an automatic reconnect. `verify.py` independently confirms -`--vm` can no longer open a TCP connection to the peer's astral port (1791) — only a -connect **timeout** counts (a refusal/reset would be inconclusive, not a pass). -Host-driven. Used by `tor-link.story` after `enable-tor`. +On the host, seeds `--peer` (node1) with `--vm` (node2)'s onion (`nodes.resolve_endpoints` → `nodes.add_endpoint`), then nftables-drops the LAN path between them, leaving node2 reachable from node1 only over Tor. verify.py asserts node2 can no longer TCP-connect to node1's LAN address on port 1791. diff --git a/netsim/tasks/link-over-tor/README.md b/netsim/tasks/link-over-tor/README.md index b5223df8..1bd8a2fb 100644 --- a/netsim/tasks/link-over-tor/README.md +++ b/netsim/tasks/link-over-tor/README.md @@ -1,14 +1,3 @@ # link-over-tor -Drives the Qwen operator on node1 to **re-establish the swarm link to the peer (node2) -over Tor** after node2 left the LAN, and to confirm the link rides over Tor — following -the astral-agent skill's *linking-over-tor* playbook. The dead LAN link still shows as -up (astrald has no keepalive), so the agent must *force* the Tor link -(`nodes.new_link -strategies tor`) using node2's onion, which `leave-lan` cached on node1 -before the cut. The agent records the peer's onion address and the link's transport in -`~/tor.json` (`peer_onion`, `link_network`). - -`verify.py` independently confirms node1 holds a link to the peer whose `Network` is -`tor` (`nodes.links`) — it asserts the transport, not the `.onion` endpoint string, since -an inbound tor link legitimately has no remote onion — and cross-checks the agent's -record. Agent-driven. Final task of `tor-link.story`; produces the `two-nodes-tor` stage. +node1's agent re-links to node2 over Tor. verify.py asserts node1 holds a link to node2 with `Network=tor`. two-nodes → two-nodes-tor. diff --git a/netsim/tasks/object-store/README.md b/netsim/tasks/object-store/README.md index 49fd4d05..548f9a7a 100644 --- a/netsim/tasks/object-store/README.md +++ b/netsim/tasks/object-store/README.md @@ -1,5 +1,3 @@ # object-store -The operator on node1 stores `payload.txt` as an astral object on `--target` (default `localnode`; an alias like `node2` stores on that peer) and records the id. `verify.py` loads the object from the holder's local repo and asserts the bytes equal `payload.txt`. - -Stories: `object-store.story` (`localnode`) produces `two-nodes-data` and feeds `read-remote-object`; `object-store-peer.story` (`--target node2`) produces `two-nodes-data-peer`. +node1's agent stores `payload.txt` as an Object via `objects.store` on `--target` (default `localnode`; an alias stores on that peer) and records the id. verify.py re-loads the id with `objects.load -repo local` on the holder and asserts the bytes equal `payload.txt`. two-nodes → two-nodes-data (localnode) or two-nodes-data-peer (`--target node2`). diff --git a/netsim/tasks/read-remote-object/README.md b/netsim/tasks/read-remote-object/README.md index a87367ea..15ac018b 100644 --- a/netsim/tasks/read-remote-object/README.md +++ b/netsim/tasks/read-remote-object/README.md @@ -1,3 +1,3 @@ # read-remote-object -node1's agent reads a peer's object (id from `~/object.json`) over astral as the User and records the bytes to `~/read.json`. verify re-reads the peer's object via `:objects.load` and asserts the bytes equal node1's stored `payload.txt`. Produces the remote read in `read-remote-peer.story`. +node1's agent reads node2's Object (id from `~/object.json`) over astral as the User and records it to `~/read.json`. verify.py independently re-reads it via `node2:objects.load` as the User and asserts the bytes equal node1's stored `payload.txt`. From 4d7c165aa7406bef3d408f0e1563626784a34fd5 Mon Sep 17 00:00:00 2001 From: intern0 Date: Thu, 25 Jun 2026 13:19:22 +0200 Subject: [PATCH 43/57] netsim: trim link-over-tor prompt; verify owns the Tor confirmation --- netsim/tasks/link-over-tor/prompt.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/netsim/tasks/link-over-tor/prompt.md b/netsim/tasks/link-over-tor/prompt.md index 7754c5bd..95333693 100644 --- a/netsim/tasks/link-over-tor/prompt.md +++ b/netsim/tasks/link-over-tor/prompt.md @@ -1,5 +1,5 @@ -Your swarm peer `__PEER__` has left the local network and is now reachable only over -Tor. Re-establish your link to `__PEER__` over Tor, and confirm the link is over Tor. +Your swarm peer `__PEER__` is reachable over Tor. Re-establish your link to +`__PEER__` over Tor. Save `__PEER__`'s onion address and the link's transport to `~/tor.json` (as `peer_onion` and `link_network`). From 50d4081b1893cc07c7d58058c1b5176f64c7bc88 Mon Sep 17 00:00:00 2001 From: intern0 Date: Thu, 25 Jun 2026 19:22:56 +0200 Subject: [PATCH 44/57] netsim: add tasks/_lib astral-py verify library (client+tunnel, CLI fallback, offline tests) --- netsim/tasks/_lib/netsim_astral.py | 303 ++++++++++++++++++++++++ netsim/tasks/_lib/test_netsim_astral.py | 134 +++++++++++ 2 files changed, 437 insertions(+) create mode 100644 netsim/tasks/_lib/netsim_astral.py create mode 100644 netsim/tasks/_lib/test_netsim_astral.py diff --git a/netsim/tasks/_lib/netsim_astral.py b/netsim/tasks/_lib/netsim_astral.py new file mode 100644 index 00000000..228316e3 --- /dev/null +++ b/netsim/tasks/_lib/netsim_astral.py @@ -0,0 +1,303 @@ +"""Shared host-side verify library for the netsim astral scenarios. + +Each task's verify.py reaches this through a realpath shim that crosses netsim's +per-task symlink: + + import os, sys + sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) + import netsim_astral as na + +It centralises the two halves every verifier shares: + + * transport -- ssh()/file readers/all_running_vms()/peer_lan_ip(): unchanged + subprocess plumbing for reading the agent's recorded artifacts and probing + inside a VM. + + * queries -- connect(vm, token=...) yields a Node whose .call(op, ...) returns + a list of astral.AstralObject. Queries go through the astral-py typed client + (reached host-side over an ssh -L forward of the VM's WebSocket apphost + port), falling back to the lockstep Go `astral-query` CLI -- same JSON, + parsed with astral-py's from_json_envelope -- whenever the client can't + serve an op (pinned in SHELL_OPS, or it raised). Both paths return the same + list[AstralObject], so the interrogators below are transport-agnostic. + +astral-py is imported from an editable checkout (no pip needed on this host): +$ASTRALPY_SRC, else ~/work/satforge/astral-py/master/src. +""" +import contextlib +import json +import os +import socket +import subprocess +import sys +import time + +# --- astral-py (editable checkout; pip-free) --------------------------------- +_ASTRALPY_SRC = os.environ.get("ASTRALPY_SRC") or os.path.expanduser( + "~/work/satforge/astral-py/master/src") +if os.path.isdir(_ASTRALPY_SRC) and _ASTRALPY_SRC not in sys.path: + sys.path.insert(0, _ASTRALPY_SRC) +import astral # noqa: E402 +from astral.encoding import from_json_envelope # noqa: E402 + +# apphost WebSocket port inside each VM (binds 0.0.0.0; reachable via ssh -L). +WS_PORT = 8624 + +# Ops to keep on the Go astral-query CLI instead of the astral-py client. +# Populated by the smoke-test triage when the client disagrees with the CLI on a +# specific op (a silent mismatch the auto-fallback can't catch). Empty => every +# op tries the client first. +SHELL_OPS = set() + + +# --- transport: subprocess into the VM --------------------------------------- +def ssh(vm, remote): + """Run `netsim ssh -- ` on the host; return stdout (best-effort).""" + p = subprocess.run(["netsim", "ssh", vm, "--", remote], + capture_output=True, text=True) + return p.stdout + + +def read_file(vm, path): + """Contents of on the VM, trailing newline stripped ("" on error).""" + return (ssh(vm, f"cat {path}") or "").rstrip("\n") + + +def read_json(vm, path): + """ on the VM parsed as a dict ({} on error).""" + try: + return json.loads(ssh(vm, f"cat {path}") or "{}") or {} + except json.JSONDecodeError: + return {} + + +def home_json(vm, name): + """An agent artifact under /home/tester/, parsed as a dict.""" + return read_json(vm, f"/home/tester/{name}") + + +def all_running_vms(): + """Hostnames of the running VMs in the current simulation.""" + out = subprocess.run(["netsim", "vm", "ls", "--json"], + capture_output=True, text=True).stdout + try: + return [v["hostname"] for v in json.loads(out or "[]") + if v.get("state") == "running"] + except json.JSONDecodeError: + return [] + + +def peer_lan_ip(peer): + """The 10.77.* LAN address of ("" if none).""" + for tok in (ssh(peer, "hostname -I") or "").split(): + if tok.startswith("10.77."): + return tok + return "" + + +# --- queries: astral-py client over an ssh -L forward, Go-CLI fallback ------- +def parse_cli(raw): + """Parse `astral-query -out json` output into AstralObjects (eos dropped).""" + out = [] + for ln in (raw or "").splitlines(): + ln = ln.strip() + if not ln: + continue + try: + obj = from_json_envelope(json.loads(ln)) + except Exception: + continue + if not obj.is_eos: + out.append(obj) + return out + + +def _free_port(): + s = socket.socket() + s.bind(("127.0.0.1", 0)) + port = s.getsockname()[1] + s.close() + return port + + +def _wait_port(port, timeout=10.0): + deadline = time.time() + timeout + while time.time() < deadline: + try: + with socket.create_connection(("127.0.0.1", port), timeout=0.5): + return True + except OSError: + time.sleep(0.1) + return False + + +class Node: + """A handle to one VM's apphost: .call(op, ...) -> list[AstralObject].""" + + def __init__(self, vm, client, token): + self.vm = vm + self._client = client + self._token = token + + @property + def uses_client(self): + return self._client is not None + + def _via_shell(self, op, args, target): + q = f"{target}:{op}" if target else op + flags = "".join(f" -{k} '{v}'" for k, v in (args or {}).items()) + tok = f"export ASTRALD_APPHOST_TOKEN={self._token}; " if self._token else "" + return parse_cli(ssh(self.vm, f"{tok}astral-query {q}{flags} -out json")) + + def call(self, op, args=None, target=None): + """Run an apphost op; return its result objects (eos dropped, errors kept). + + Routes through the astral-py client unless the op is pinned in SHELL_OPS + or no client is available; on any client error, falls back to the Go CLI. + """ + base = op.split(":")[-1] + if self._client is None or op in SHELL_OPS or base in SHELL_OPS: + return self._via_shell(op, args, target) + try: + with self._client.query(op, args or None, target=target) as st: + return list(st) + except Exception: + return self._via_shell(op, args, target) + + +@contextlib.contextmanager +def connect(vm, token=None): + """Yield a Node for . + + Opens an ssh -L forward of the VM's WebSocket apphost port (using netsim's + own $NETSIM_SSH_CONFIG) and an astral-py client over it. If the forward or + client can't be established, yields a shell-only Node so verification still + runs via the Go CLI. + """ + cfg = os.environ.get("NETSIM_SSH_CONFIG") + client = None + tunnel = None + if cfg: + try: + port = _free_port() + tunnel = subprocess.Popen( + ["ssh", "-F", cfg, "-o", "ExitOnForwardFailure=yes", + "-L", f"{port}:127.0.0.1:{WS_PORT}", "-N", vm], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + if _wait_port(port): + client = astral.connect(f"ws://127.0.0.1:{port}/.ws", token=token) + except Exception: + client = None + try: + yield Node(vm, client, token) + finally: + try: + if client is not None: + client.close() + except Exception: + pass + if tunnel is not None: + tunnel.terminate() + try: + tunnel.wait(timeout=3) + except Exception: + tunnel.kill() + + +# --- interrogators: list[AstralObject] -> extracted value -------------------- +def _values(objs): + return [o.value for o in objs if not o.is_eos] + + +def contract(objs): + """(Issuer, Subject) of the active contract from a user.info result.""" + for v in _values(objs): + if isinstance(v, dict) and isinstance(v.get("Contract"), dict): + c = v["Contract"].get("Contract", {}) + return c.get("Issuer"), c.get("Subject") + return None, None + + +def linked_sibling(objs): + """Identity of the first Linked sibling in a user.swarm_status result.""" + for v in _values(objs): + if isinstance(v, dict) and v.get("Linked"): + return v.get("Identity") + return None + + +def swarm_identities(objs): + """Set of node identities in a user.swarm_status result.""" + ids = set() + for v in _values(objs): + if isinstance(v, dict) and v.get("Identity"): + ids.add(v["Identity"]) + return ids + + +def has_link_to(objs, ident): + """True if a nodes.links result holds an active link to .""" + return any(isinstance(v, dict) and v.get("RemoteIdentity") == ident + for v in _values(objs)) + + +def _contains_identity(value, ident): + if isinstance(value, str): + return value == ident + if isinstance(value, dict): + return any(_contains_identity(v, ident) for v in value.values()) + if isinstance(value, list): + return any(_contains_identity(v, ident) for v in value) + return False + + +def is_expelled(objs, ident): + """True if a user.list_expelled result bans (nested Subject match).""" + return any(_contains_identity(o.value, ident) for o in objs + if o.type not in ("eos", "error_message")) + + +def loaded_payload(objs): + """The decoded string payload from an objects.load result, or None.""" + for o in objs: + if o.type in ("eos", "error_message"): + continue + if isinstance(o.value, str): + return o.value + return None + + +def error_messages(objs): + """The error_message strings in a result stream.""" + return [o.value for o in objs if o.type == "error_message"] + + +def endpoint_addr(ep): + """Address string of an exonet.Endpoint (bare or {Type,Object}).""" + if isinstance(ep, str): + return ep + if isinstance(ep, dict): + o = ep.get("Object") + return o if isinstance(o, str) else "" + return "" + + +def tor_links(objs): + """(RemoteIdentity, endpoint-address) for links whose Network == 'tor'.""" + out = [] + for v in _values(objs): + if isinstance(v, dict) and str(v.get("Network")) == "tor": + out.append((str(v.get("RemoteIdentity", "")), + endpoint_addr(v.get("RemoteEndpoint")))) + return out + + +def resolve_onion(objs): + """The .onion address from a nodes.resolve_endpoints result, or None.""" + for v in _values(objs): + if isinstance(v, dict): + a = endpoint_addr(v.get("Endpoint")) + if ".onion" in a: + return a + return None diff --git a/netsim/tasks/_lib/test_netsim_astral.py b/netsim/tasks/_lib/test_netsim_astral.py new file mode 100644 index 00000000..a1573bc1 --- /dev/null +++ b/netsim/tasks/_lib/test_netsim_astral.py @@ -0,0 +1,134 @@ +"""Offline tests for netsim_astral -- no VM, no live astrald. + +Exercises the interrogators against synthetic AstralObjects, parse_cli's +stream handling, and the Go-CLI fallback command construction. Run with: + + python3 -m unittest -v # from this directory +""" +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.realpath(__file__))) +import netsim_astral as na # noqa: E402 (also bootstraps astral onto sys.path) +import astral # noqa: E402 + + +def O(type, value=None): + return astral.obj(type, value) + + +class InterrogatorTests(unittest.TestCase): + def test_contract(self): + objs = [O("mod.user.contract", + {"Contract": {"Contract": {"Issuer": "02aa", "Subject": "03bb"}}})] + self.assertEqual(na.contract(objs), ("02aa", "03bb")) + self.assertEqual(na.contract([O("x", {})]), (None, None)) + + def test_linked_sibling_and_identities(self): + objs = [O("s", {"Identity": "03bb", "Linked": True}), + O("s", {"Identity": "03cc", "Linked": False})] + self.assertEqual(na.linked_sibling(objs), "03bb") + self.assertEqual(na.swarm_identities(objs), {"03bb", "03cc"}) + self.assertIsNone(na.linked_sibling([O("s", {"Identity": "03cc", "Linked": False})])) + + def test_has_link_to(self): + objs = [O("l", {"RemoteIdentity": "03bb", "Network": "tcp"})] + self.assertTrue(na.has_link_to(objs, "03bb")) + self.assertFalse(na.has_link_to(objs, "03cc")) + + def test_is_expelled_nested(self): + objs = [O("mod.user.signed_expulsion", {"Expulsion": {"Subject": "03bb"}})] + self.assertTrue(na.is_expelled(objs, "03bb")) + self.assertFalse(na.is_expelled(objs, "03cc")) + # an error_message naming the id must not count as an expulsion record + self.assertFalse(na.is_expelled([O("error_message", "03bb not found")], "03bb")) + + def test_loaded_payload_and_errors(self): + objs = [O("error_message", "boom"), O("string8", "hello")] + self.assertEqual(na.loaded_payload(objs), "hello") + self.assertEqual(na.error_messages(objs), ["boom"]) + self.assertIsNone(na.loaded_payload([O("error_message", "boom")])) + + def test_tor_links_and_endpoint(self): + objs = [O("l", {"Network": "tor", "RemoteIdentity": "03bb", + "RemoteEndpoint": {"Object": "abc.onion:1791"}}), + O("l", {"Network": "tcp", "RemoteIdentity": "03cc"})] + self.assertEqual(na.tor_links(objs), [("03bb", "abc.onion:1791")]) + self.assertEqual(na.endpoint_addr("x.onion"), "x.onion") + self.assertEqual(na.endpoint_addr({"Object": "y.onion"}), "y.onion") + self.assertEqual(na.endpoint_addr(None), "") + + def test_resolve_onion(self): + objs = [O("e", {"Endpoint": "10.0.0.1:1791"}), + O("e", {"Endpoint": {"Object": "abc.onion:1791"}})] + self.assertEqual(na.resolve_onion(objs), "abc.onion:1791") + self.assertIsNone(na.resolve_onion([O("e", {"Endpoint": "10.0.0.1:1791"})])) + + +class ParseCliTests(unittest.TestCase): + def test_drops_eos_keeps_error(self): + raw = ('{"Type":"string8","Object":"hi"}\n' + '{"Type":"error_message","Object":"nope"}\n' + '\n' + 'not-json\n' + '{"Type":"eos","Object":null}\n') + objs = na.parse_cli(raw) + self.assertEqual([o.type for o in objs], ["string8", "error_message"]) + self.assertEqual(na.loaded_payload(objs), "hi") + self.assertEqual(na.error_messages(objs), ["nope"]) + + def test_empty(self): + self.assertEqual(na.parse_cli(""), []) + self.assertEqual(na.parse_cli(None), []) + + +class ShellRoutingTests(unittest.TestCase): + """Node with no client must build the exact Go astral-query command.""" + + def setUp(self): + self.calls = [] + self._orig = na.ssh + + def fake_ssh(vm, remote): + self.calls.append((vm, remote)) + return '{"Type":"string8","Object":"hi"}\n{"Type":"eos","Object":null}\n' + + na.ssh = fake_ssh + + def tearDown(self): + na.ssh = self._orig + + def test_untokened(self): + node = na.Node("node1", None, "") + objs = node.call("user.info") + self.assertEqual(self.calls[-1], ("node1", "astral-query user.info -out json")) + self.assertEqual(na.loaded_payload(objs), "hi") + + def test_tokened_with_args(self): + na.Node("node1", None, "TKN").call("objects.load", {"id": "X", "repo": "local"}) + self.assertEqual( + self.calls[-1][1], + "export ASTRALD_APPHOST_TOKEN=TKN; " + "astral-query objects.load -id 'X' -repo 'local' -out json") + + def test_peer_target(self): + na.Node("node1", None, "TKN").call("objects.load", {"id": "X"}, target="node2") + self.assertEqual( + self.calls[-1][1], + "export ASTRALD_APPHOST_TOKEN=TKN; " + "astral-query node2:objects.load -id 'X' -out json") + + def test_shell_ops_pin_forces_cli(self): + # even with a (truthy sentinel) client, a pinned op must go to the shell + na.SHELL_OPS.add("user.info") + try: + node = na.Node("node1", object(), "") + node.call("user.info") + self.assertEqual(self.calls[-1][1], "astral-query user.info -out json") + finally: + na.SHELL_OPS.discard("user.info") + + +if __name__ == "__main__": + unittest.main() From f3e9f23f94306a8ed28c76ca03e8c4ecc28dbe4e Mon Sep 17 00:00:00 2001 From: intern0 Date: Thu, 25 Jun 2026 19:58:53 +0200 Subject: [PATCH 45/57] netsim: migrate verifiers to the astral-py client (CLI fallback for anonymous WS sessions) --- netsim/tasks/_lib/netsim_astral.py | 14 ++- netsim/tasks/adopt-node/verify.py | 98 ++++-------------- netsim/tasks/enable-tor/verify.py | 70 +++---------- netsim/tasks/expel-node/verify.py | 116 ++++++---------------- netsim/tasks/leave-lan/verify.py | 33 +++--- netsim/tasks/link-over-tor/verify.py | 64 +++--------- netsim/tasks/object-store/verify.py | 85 +++++----------- netsim/tasks/read-remote-object/verify.py | 83 +++++----------- 8 files changed, 157 insertions(+), 406 deletions(-) diff --git a/netsim/tasks/_lib/netsim_astral.py b/netsim/tasks/_lib/netsim_astral.py index 228316e3..f29cbf14 100644 --- a/netsim/tasks/_lib/netsim_astral.py +++ b/netsim/tasks/_lib/netsim_astral.py @@ -186,7 +186,19 @@ def connect(vm, token=None): "-L", f"{port}:127.0.0.1:{WS_PORT}", "-N", vm], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) if _wait_port(port): - client = astral.connect(f"ws://127.0.0.1:{port}/.ws", token=token) + c = astral.connect(f"ws://127.0.0.1:{port}/.ws", token=token) + # Some astrald builds reject anonymous WS route_query with a + # ProtocolError; probe once so such a session degrades to the CLI + # wholesale instead of failing-then-falling-back on every call. + try: + c.whoami() + client = c + except Exception: + try: + c.close() + except Exception: + pass + client = None except Exception: client = None try: diff --git a/netsim/tasks/adopt-node/verify.py b/netsim/tasks/adopt-node/verify.py index 9c164a1a..6dd1234c 100755 --- a/netsim/tasks/adopt-node/verify.py +++ b/netsim/tasks/adopt-node/verify.py @@ -1,72 +1,17 @@ #!/usr/bin/env python3 """verify adopt-node: node1 and node2 linked into one User swarm, symmetric roster. -Independent both-ends check (does not trust run.sh); reaches the VMs via netsim ssh. +Independent both-ends check (does not trust run.sh). Queries reach each VM's +apphost through the shared astral-py client (tasks/_lib/netsim_astral.py), which +forwards to the lockstep Go astral-query CLI for any op it can't serve. """ import argparse -import json -import subprocess +import os import sys -def ssh(vm, remote): - """Run `netsim ssh -- ` on the host; return stdout.""" - p = subprocess.run(["netsim", "ssh", vm, "--", remote], - capture_output=True, text=True) - return p.stdout - - -def jfile(vm, name): - """A JSON file under the agent's $HOME (/home/tester/) on the VM, as a dict.""" - try: - return json.loads(ssh(vm, f"cat /home/tester/{name}") or "{}") or {} - except json.JSONDecodeError: - return {} - - -def info(vm): - """The agent's $HOME/user.json (/home/tester/user.json) on the VM, as a dict.""" - return jfile(vm, "user.json") - - -def objs(stream): - out = [] - for ln in (stream or "").splitlines(): - ln = ln.strip() - if not ln: - continue - try: - out.append(json.loads(ln)) - except json.JSONDecodeError: - pass - return out - - -def contract(info): - """(Issuer, Subject) of the active contract from a user.info stream.""" - for o in objs(info): - ob = o.get("Object") - if isinstance(ob, dict) and isinstance(ob.get("Contract"), dict): - c = ob["Contract"].get("Contract", {}) - return c.get("Issuer"), c.get("Subject") - return None, None - - -def linked_sibling(swarm): - """Identity of the first Linked sibling in a user.swarm_status stream.""" - for o in objs(swarm): - ob = o.get("Object") - if isinstance(ob, dict) and ob.get("Linked"): - return ob.get("Identity") - return None - - -def has_link_to(links, identity): - """True if a nodes.links stream contains an active link to `identity`.""" - for o in objs(links): - ob = o.get("Object") - if isinstance(ob, dict) and ob.get("RemoteIdentity") == identity: - return True - return False +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import netsim_astral as na # noqa: E402 def main(): @@ -76,26 +21,23 @@ def main(): args, _ = ap.parse_known_args() vm1, vm2 = args.node1, args.node2 - # node1 acts as the User (token from bootstrap-user-software-key); node2 answers under its - # node identity (it holds the contract after the adoption). - info1 = info(vm1) - siblings = jfile(vm1, "siblings.json") # adopt-node agent: ids of the swarm siblings + info1 = na.home_json(vm1, "user.json") + siblings = na.home_json(vm1, "siblings.json") # adopt-node agent: swarm sibling ids sib_ids = ["".join(str(x).split()) for x in (siblings.get("sibling_ids") or []) if x] U = "".join(str(info1.get("user_id", "")).split()) - TOKEN = f"export ASTRALD_APPHOST_TOKEN={info1.get('user_token', '')};" - n1_info = ssh(vm1, TOKEN + " astral-query user.info -out json") - n1_swarm = ssh(vm1, TOKEN + " astral-query user.swarm_status -out json") - n2_info = ssh(vm2, "astral-query user.info -out json") - n2_links = ssh(vm2, "astral-query nodes.links -out json") + token = info1.get("user_token", "") + + # node1 acts as the User (token from bootstrap-user-software-key); node2 answers + # under its node identity (it holds the contract after the adoption). + with na.connect(vm1, token=token) as n1: + i1, s1 = na.contract(n1.call("user.info")) + sib = na.linked_sibling(n1.call("user.swarm_status")) # node2's own swarm view: swarm_status derives from node2's active contract, # not the caller, so no token is needed; post-#348 it must list node1 too. - n2_swarm = ssh(vm2, "astral-query user.swarm_status -out json") - - i1, s1 = contract(n1_info) - i2, s2 = contract(n2_info) - sib = linked_sibling(n1_swarm) - n2_sib = linked_sibling(n2_swarm) - linkback = has_link_to(n2_links, s1) + with na.connect(vm2) as n2: + i2, s2 = na.contract(n2.call("user.info")) + linkback = na.has_link_to(n2.call("nodes.links"), s1) + n2_sib = na.linked_sibling(n2.call("user.swarm_status")) errs = [] if not U: diff --git a/netsim/tasks/enable-tor/verify.py b/netsim/tasks/enable-tor/verify.py index b0afbfea..c6a4b569 100644 --- a/netsim/tasks/enable-tor/verify.py +++ b/netsim/tasks/enable-tor/verify.py @@ -1,77 +1,37 @@ #!/usr/bin/env python3 """verify enable-tor: each target VM runs Tor and saved its own onion endpoint. -Independent host-side check (does not trust run.sh): on each VM the tor service is active, -/root/tor.json holds an onion endpoint, and that saved onion matches what astrald actually -advertises now (nodes.resolve_endpoints -id localnode). Reaches the VMs via netsim ssh. +Independent host-side check (does not trust run.sh): on each VM the tor service is +active, /root/tor.json holds an onion endpoint, and that saved onion matches what +astrald actually advertises now (nodes.resolve_endpoints -id localnode). + +Queries reach each VM's apphost through the shared astral-py client +(tasks/_lib/netsim_astral.py), CLI fallback for anything it can't serve. """ import argparse -import json -import subprocess +import os import sys - -def ssh(vm, remote): - p = subprocess.run(["netsim", "ssh", vm, "--", remote], capture_output=True, text=True) - return p.stdout - - -def all_running_vms(): - out = subprocess.run(["netsim", "vm", "ls", "--json"], capture_output=True, text=True).stdout - try: - return [v["hostname"] for v in json.loads(out or "[]") if v.get("state") == "running"] - except json.JSONDecodeError: - return [] - - -def endpoint_addr(ep): - if isinstance(ep, str): - return ep - if isinstance(ep, dict): - o = ep.get("Object") - return o if isinstance(o, str) else "" - return "" - - -def live_onion(vm): - """The onion address astrald advertises now (resolve_endpoints -id localnode), or None.""" - stream = ssh(vm, "astral-query nodes.resolve_endpoints -id localnode -out json") - for ln in (stream or "").splitlines(): - ln = ln.strip() - if not ln: - continue - try: - o = json.loads(ln) - except json.JSONDecodeError: - continue - a = endpoint_addr((o.get("Object") or {}).get("Endpoint")) - if ".onion" in a: - return a - return None - - -def saved(vm): - """The contents of /root/tor.json on the VM, as a dict.""" - try: - return json.loads(ssh(vm, "cat /root/tor.json") or "{}") or {} - except json.JSONDecodeError: - return {} +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import netsim_astral as na # noqa: E402 def main(): ap = argparse.ArgumentParser() ap.add_argument("--vm", action="append", default=[]) args, _ = ap.parse_known_args() - vms = args.vm or all_running_vms() + vms = args.vm or na.all_running_vms() if not vms: sys.stderr.write("enable-tor verify FAILED: no VMs to verify\n") return 1 bad = False for vm in vms: - tor_active = ssh(vm, "systemctl is-active tor 2>/dev/null").strip() == "active" - file_onion = str(saved(vm).get("onion", "")) - live = live_onion(vm) + tor_active = na.ssh(vm, "systemctl is-active tor 2>/dev/null").strip() == "active" + file_onion = str(na.read_json(vm, "/root/tor.json").get("onion", "")) + with na.connect(vm) as node: + live = na.resolve_onion(node.call("nodes.resolve_endpoints", {"id": "localnode"})) errs = [] if not tor_active: diff --git a/netsim/tasks/expel-node/verify.py b/netsim/tasks/expel-node/verify.py index e1b0d16f..7d41aea9 100755 --- a/netsim/tasks/expel-node/verify.py +++ b/netsim/tasks/expel-node/verify.py @@ -1,77 +1,26 @@ #!/usr/bin/env python3 """verify expel-node: node1 (the User) permanently banned node2 from the swarm. -Independent check (does not trust run.sh); reaches the VMs via netsim ssh. Asserts node2 -is recorded in user.list_expelled and is gone from node1's user.swarm_status roster -(user.OpSwarmStatus -> ActiveNodes filters the expelledSet). Link state is not asserted. - -node2's identity comes from node1's siblings.json (recorded by adopt-node), NOT from node2 -itself: once expelled, node2 rejects user.info (query rejected (2) untokened, auth_failed -with the User token — it no longer accepts the User it was banned from), so it is not a -usable identity source. +Independent check (does not trust run.sh). Asserts node2 is recorded in +user.list_expelled and is gone from node1's user.swarm_status roster +(user.OpSwarmStatus -> ActiveNodes filters the expelledSet). Link state is not +asserted. + +node2's identity comes from node1's siblings.json (recorded by adopt-node), NOT +from node2 itself: once expelled, node2 rejects user.info (query rejected (2) +untokened, auth_failed with the User token -- it no longer accepts the User it +was banned from), so it is not a usable identity source. + +Queries reach node1's apphost through the shared astral-py client +(tasks/_lib/netsim_astral.py), CLI fallback for anything it can't serve. """ import argparse -import json -import subprocess +import os import sys - -def ssh(vm, remote): - """Run `netsim ssh -- ` on the host; return stdout.""" - p = subprocess.run(["netsim", "ssh", vm, "--", remote], - capture_output=True, text=True) - return p.stdout - - -def jfile(vm, name): - """/home/tester/ on the VM, parsed as a dict.""" - try: - return json.loads(ssh(vm, f"cat /home/tester/{name}") or "{}") or {} - except json.JSONDecodeError: - return {} - - -def objs(stream): - """astral-query -out json emits one object per line + an eos terminator.""" - out = [] - for ln in (stream or "").splitlines(): - ln = ln.strip() - if not ln: - continue - try: - out.append(json.loads(ln)) - except json.JSONDecodeError: - pass - return out - - -def swarm_identities(stream): - """Set of node identities listed in a user.swarm_status stream.""" - ids = set() - for o in objs(stream): - ob = o.get("Object") - if isinstance(ob, dict) and ob.get("Identity"): - ids.add(ob["Identity"]) - return ids - - -def contains_identity(value, ident): - """True if `ident` appears anywhere in a parsed JSON value (string match).""" - if isinstance(value, str): - return value == ident - if isinstance(value, dict): - return any(contains_identity(v, ident) for v in value.values()) - if isinstance(value, list): - return any(contains_identity(v, ident) for v in value) - return False - - -def is_expelled(stream, ident): - """True if a user.list_expelled stream bans `ident` (as a SignedExpulsion Subject).""" - for o in objs(stream): - if contains_identity(o.get("Object", o), ident): - return True - return False +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import netsim_astral as na # noqa: E402 def main(): @@ -79,35 +28,36 @@ def main(): ap.add_argument("--node1", default="node1") ap.add_argument("--node2", default="node2") args, _ = ap.parse_known_args() - vm1, vm2 = args.node1, args.node2 + vm1 = args.node1 - # node1 acts as the User (token from bootstrap); list_expelled / swarm_status require - # the caller to be the contract issuer, so they run under that token. - info1 = jfile(vm1, "user.json") + # node1 acts as the User (token from bootstrap); list_expelled / swarm_status + # require the caller to be the contract issuer, so they run under that token. + info1 = na.home_json(vm1, "user.json") U = "".join(str(info1.get("user_id", "")).split()) - TOKEN = f"export ASTRALD_APPHOST_TOKEN={info1.get('user_token', '')};" + token = info1.get("user_token", "") - # node2's identity from node1's siblings.json (recorded by adopt-node) — a stable - # source. The expelled node itself can't be queried (post-ban node2 rejects user.info). - sibs = jfile(vm1, "siblings.json") + # node2's identity from node1's siblings.json (recorded by adopt-node) -- a + # stable source. The expelled node itself can't be queried (post-ban node2 + # rejects user.info). + sibs = na.home_json(vm1, "siblings.json") sib_ids = ["".join(str(x).split()) for x in (sibs.get("sibling_ids") or []) if x] s2 = sib_ids[0] if sib_ids else None - n1_expelled = ssh(vm1, TOKEN + " astral-query user.list_expelled -out json") - n1_swarm = ssh(vm1, TOKEN + " astral-query user.swarm_status -out json") - members = swarm_identities(n1_swarm) + with na.connect(vm1, token=token) as n1: + n1_expelled = n1.call("user.list_expelled") + members = na.swarm_identities(n1.call("user.swarm_status")) errs = [] if not U: errs.append("no user_id in node1's user.json") if not s2: - errs.append("no sibling_ids in node1's siblings.json — can't identify the expelled node") - if s2 and not is_expelled(n1_expelled, s2): + errs.append("no sibling_ids in node1's siblings.json -- can't identify the expelled node") + if s2 and not na.is_expelled(n1_expelled, s2): errs.append(f"node2 {s2} is NOT in node1's user.list_expelled " - "(expulsion was never issued — agent did not expel the node)") + "(expulsion was never issued -- agent did not expel the node)") if s2 and s2 in members: errs.append(f"node2 {s2} still appears in node1's user.swarm_status " - "(roster not reduced — expelledSet filter did not drop it)") + "(roster not reduced -- expelledSet filter did not drop it)") if errs: sys.stderr.write("expel-node verify FAILED:\n") @@ -115,7 +65,7 @@ def main(): sys.stderr.write(f" - {e}\n") return 1 - print(f"expel OK: User {U[:8]}.. banned node2 {s2[:8]}.. — recorded in " + print(f"expel OK: User {U[:8]}.. banned node2 {s2[:8]}.. -- recorded in " f"user.list_expelled and dropped from user.swarm_status " f"({len(members)} member(s) remain).") return 0 diff --git a/netsim/tasks/leave-lan/verify.py b/netsim/tasks/leave-lan/verify.py index 2111ca6b..a4e7d199 100644 --- a/netsim/tasks/leave-lan/verify.py +++ b/netsim/tasks/leave-lan/verify.py @@ -1,27 +1,20 @@ #!/usr/bin/env python3 """verify leave-lan: can no longer reach over the LAN. -Independent host-side check: from , a TCP connect to the peer's LAN address on the -astral port (1791) must NOT succeed (the nftables drop blackholes it -> timeout). The -peer's LAN IP is resolved from the peer. Reaches the VMs via netsim ssh. +Independent host-side check: from , a TCP connect to the peer's LAN address on +the astral port (1791) must NOT succeed (the nftables drop blackholes it -> +timeout). The peer's LAN IP is resolved from the peer. No astral-query here -- this +is a raw socket probe, run through tasks/_lib/netsim_astral.py's ssh transport. """ import argparse -import subprocess +import os import sys -PORT = 1791 - - -def ssh(vm, remote): - p = subprocess.run(["netsim", "ssh", vm, "--", remote], capture_output=True, text=True) - return p.stdout +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import netsim_astral as na # noqa: E402 - -def peer_lan_ip(peer): - for tok in (ssh(peer, "hostname -I") or "").split(): - if tok.startswith("10.77."): - return tok - return "" +PORT = 1791 def main(): @@ -30,7 +23,7 @@ def main(): ap.add_argument("--peer", default="node1") # the node it can no longer reach args, _ = ap.parse_known_args() - ip = peer_lan_ip(args.peer) + ip = na.peer_lan_ip(args.peer) if not ip: sys.stderr.write(f"leave-lan verify FAILED: could not resolve {args.peer}'s 10.77 LAN IP.\n") return 1 @@ -46,11 +39,11 @@ def main(): "except socket.timeout:\n print(\"timeout\")\n" "except Exception as e:\n print(\"err:\"+type(e).__name__)'" ) - result = (ssh(args.vm, probe) or "").strip() + result = (na.ssh(args.vm, probe) or "").strip() if result == "timeout": print(f"leave-lan OK: {args.vm} can no longer reach {args.peer} ({ip}:{PORT}) over the LAN " - "(connect times out — blackholed)") + "(connect times out -- blackholed)") return 0 if result == "open": @@ -58,7 +51,7 @@ def main(): f"({ip}:{PORT}) over the LAN (connect succeeded).\n") else: sys.stderr.write(f"leave-lan verify FAILED: probe to {args.peer} ({ip}:{PORT}) was " - f"inconclusive ({result!r}) — expected a timeout from the drop, not a " + f"inconclusive ({result!r}) -- expected a timeout from the drop, not a " "refusal/reset.\n") return 1 diff --git a/netsim/tasks/link-over-tor/verify.py b/netsim/tasks/link-over-tor/verify.py index 8305f743..1df54c81 100644 --- a/netsim/tasks/link-over-tor/verify.py +++ b/netsim/tasks/link-over-tor/verify.py @@ -1,55 +1,22 @@ #!/usr/bin/env python3 """verify link-over-tor: node1 holds a live link to the peer over Tor. -Independent host-side check (does not trust the agent): nodes.links on node1 must list a -link whose Network is "tor". (We assert the transport, not the .onion endpoint string — -an inbound tor link legitimately has no remote onion, so requiring ".onion" would -false-negative; node2 is the only sibling, so a tor link is a tor link to node2.) Also -cross-checks the agent's record. Reaches the VM via netsim ssh. +Independent host-side check (does not trust the agent): nodes.links on node1 must +list a link whose Network is "tor". (We assert the transport, not the .onion +endpoint string -- an inbound tor link legitimately has no remote onion, so +requiring ".onion" would false-negative; node2 is the only sibling, so a tor link +is a tor link to node2.) Also cross-checks the agent's record. + +Queries reach node1's apphost through the shared astral-py client +(tasks/_lib/netsim_astral.py), CLI fallback for anything it can't serve. """ import argparse -import json -import subprocess +import os import sys - -def ssh(vm, remote): - p = subprocess.run(["netsim", "ssh", vm, "--", remote], capture_output=True, text=True) - return p.stdout - - -def jfile(vm, name): - try: - return json.loads(ssh(vm, f"cat /home/tester/{name}") or "{}") or {} - except json.JSONDecodeError: - return {} - - -def ep_addr(e): - """Address string of an exonet.Endpoint, whether it marshals bare or as {Type,Object}.""" - if isinstance(e, str): - return e - if isinstance(e, dict): - o = e.get("Object") - return o if isinstance(o, str) else "" - return "" - - -def tor_links(stream): - """(RemoteIdentity, endpoint-address) for links whose Network == 'tor'.""" - out = [] - for ln in (stream or "").splitlines(): - ln = ln.strip() - if not ln: - continue - try: - o = json.loads(ln) - except json.JSONDecodeError: - continue - ob = o.get("Object") or {} - if str(ob.get("Network")) == "tor": - out.append((str(ob.get("RemoteIdentity", "")), ep_addr(ob.get("RemoteEndpoint")))) - return out +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import netsim_astral as na # noqa: E402 def main(): @@ -58,12 +25,13 @@ def main(): ap.add_argument("--peer", default="node2") # the node that left the LAN args, _ = ap.parse_known_args() - tor = jfile(args.vm, "tor.json") # agent: peer_onion, link_network + tor = na.home_json(args.vm, "tor.json") # agent: peer_onion, link_network net = str(tor.get("link_network", "")) onion = str(tor.get("peer_onion", "")) # Decisive: an actual link over Tor from node1 (to the only sibling, the peer). - links = tor_links(ssh(args.vm, "astral-query nodes.links -out json")) + with na.connect(args.vm) as node: + links = na.tor_links(node.call("nodes.links")) notes = [] if net != "tor": @@ -81,7 +49,7 @@ def main(): sys.stderr.write(f"link-over-tor verify FAILED: {args.vm} has no link to {args.peer} over Tor.\n") for n in notes: sys.stderr.write(f" note: {n}\n") - sys.stderr.write(f" nodes.links:\n{ssh(args.vm, 'astral-query nodes.links -out json')}\n") + sys.stderr.write(f" nodes.links:\n{na.ssh(args.vm, 'astral-query nodes.links -out json')}\n") return 1 diff --git a/netsim/tasks/object-store/verify.py b/netsim/tasks/object-store/verify.py index 66d7e222..952eed52 100644 --- a/netsim/tasks/object-store/verify.py +++ b/netsim/tasks/object-store/verify.py @@ -1,83 +1,46 @@ #!/usr/bin/env python3 """verify object-store: the stored object is present in the holder's local repo. -The agent (on node1) only stored an object on a target node (--target) and recorded -its id. Reading it back and confirming the bytes is verify's job: a repo-pinned, -ungated objects.load -repo local on the HOLDER must return the exact stored bytes. -The holder is resolved from --target: localnode/node1 -> node1 (the operator vm), -node2 -> node2. The object id comes from node1's object.json; the ground-truth -payload is the fixed payload.txt that run.sh shipped to the operator's home. Reaches -the VMs via netsim ssh. +The agent (on node1) only stored an object on a target node (--target) and +recorded its id. Reading it back and confirming the bytes is verify's job: a +repo-pinned, ungated objects.load -repo local on the HOLDER must return the exact +stored bytes. The holder is resolved from --target: localnode/node1 -> node1 (the +operator vm), node2 -> node2. The object id comes from node1's object.json; the +ground-truth payload is the fixed payload.txt that run.sh shipped to the +operator's home. + +Queries reach the holder's apphost through the shared astral-py client +(tasks/_lib/netsim_astral.py), CLI fallback for anything it can't serve. """ import argparse -import json -import subprocess +import os import sys - -def ssh(vm, remote): - """Run `netsim ssh -- ` on the host; return stdout (best-effort).""" - p = subprocess.run(["netsim", "ssh", vm, "--", remote], - capture_output=True, text=True) - return p.stdout - - -def info(vm): - """The agent's $HOME/object.json (/home/tester/object.json) on the VM, as a dict.""" - try: - return json.loads(ssh(vm, "cat /home/tester/object.json") or "{}") or {} - except json.JSONDecodeError: - return {} - - -def objs(stream): - out = [] - for ln in (stream or "").splitlines(): - ln = ln.strip() - if not ln: - continue - try: - out.append(json.loads(ln)) - except json.JSONDecodeError: - pass - return out - - -def loaded_payload(stream): - """From an objects.load stream, the decoded payload string, or None.""" - for o in objs(stream): - if o.get("Type") in ("eos", "error_message"): - continue - ob = o.get("Object") - if isinstance(ob, str): - return ob - return None - - -def errors(stream): - return [o.get("Object") for o in objs(stream) if o.get("Type") == "error_message"] +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import netsim_astral as na # noqa: E402 def main(): ap = argparse.ArgumentParser() - ap.add_argument("--vm", default="node1") # the operator; records object.json here - ap.add_argument("--node2", default="node2") # the peer + ap.add_argument("--vm", default="node1") # the operator; records object.json here + ap.add_argument("--node2", default="node2") # the peer ap.add_argument("--target", default="localnode") # localnode/node1 -> node1; node2 -> node2 args, _ = ap.parse_known_args() holder = args.node2 if args.target == args.node2 else args.vm - info1 = info(args.vm) - ID = "".join(str(info1.get("object_id", "")).split()) + ID = "".join(str(na.home_json(args.vm, "object.json").get("object_id", "")).split()) # Canonical input: the exact bytes the agent was handed to store (run.sh shipped - # payload.txt to the operator's home). Ground truth — we don't trust the agent's + # payload.txt to the operator's home). Ground truth -- we don't trust the agent's # own account of what it stored. - PAY = (ssh(args.vm, "cat /home/tester/payload.txt") or "").rstrip("\n") + PAY = na.read_file(args.vm, "/home/tester/payload.txt") # Decisive: re-load the object from the holder's local repo (repo-pinned + ungated) - # and confirm the bytes match payload.txt — the read-back is verify's job, not the + # and confirm the bytes match payload.txt -- the read-back is verify's job, not the # agent's (the agent only stores and records the id). - h_load = ssh(holder, f"astral-query objects.load -id '{ID}' -repo local -out json") - got = loaded_payload(h_load) + with na.connect(holder) as h: + h_load = h.call("objects.load", {"id": ID, "repo": "local"}) + got = na.loaded_payload(h_load) local_ok = got is not None and got.rstrip("\n") == PAY errs, notes = [], [] @@ -101,7 +64,7 @@ def main(): sys.stderr.write(f" objects.load -repo local on {holder} returned no payload (see errors below).\n") elif not local_ok: sys.stderr.write(f" bytes mismatch: got {got!r} != stored {PAY!r}.\n") - for e in errors(h_load): + for e in na.error_messages(h_load): sys.stderr.write(f" load error_message: {e}\n") for n in notes: sys.stderr.write(f" note: {n}\n") diff --git a/netsim/tasks/read-remote-object/verify.py b/netsim/tasks/read-remote-object/verify.py index cfa98b8f..3c05fd2c 100644 --- a/netsim/tasks/read-remote-object/verify.py +++ b/netsim/tasks/read-remote-object/verify.py @@ -2,62 +2,25 @@ """verify read-remote-object: node1 read the peer's object over astral. object-store --target node2 put the object on the peer (node2) and recorded -object_id in node1's object.json (the bytes are the fixed payload.txt it shipped to -node1); read-remote-object's agent (on node1, as the User) read it back from the peer -and recorded object_remote in read.json. +object_id in node1's object.json (the bytes are the fixed payload.txt it shipped +to node1); read-remote-object's agent (on node1, as the User) read it back from +the peer and recorded object_remote in read.json. -Independent host-side check: re-read the peer's object AS THE USER (node1 holds the -token) via :objects.load and assert the bytes equal the stored payload — this -is the authenticated, routable direction. Also cross-checks the agent's recorded -read. Reaches the VMs via netsim ssh. +Independent host-side check: re-read the peer's object AS THE USER (node1 holds +the token) via :objects.load and assert the bytes equal the stored payload +-- this is the authenticated, routable direction. Also cross-checks the agent's +recorded read. + +Queries reach node1's apphost through the shared astral-py client +(tasks/_lib/netsim_astral.py), CLI fallback for anything it can't serve. """ import argparse -import json -import subprocess +import os import sys - -def ssh(vm, remote): - """Run `netsim ssh -- ` on the host; return stdout (best-effort).""" - p = subprocess.run(["netsim", "ssh", vm, "--", remote], - capture_output=True, text=True) - return p.stdout - - -def jload(vm, name): - """A JSON file under the agent's $HOME (/home/tester/) on the VM, as a dict.""" - try: - return json.loads(ssh(vm, f"cat /home/tester/{name}") or "{}") or {} - except json.JSONDecodeError: - return {} - - -def objs(stream): - out = [] - for ln in (stream or "").splitlines(): - ln = ln.strip() - if not ln: - continue - try: - out.append(json.loads(ln)) - except json.JSONDecodeError: - pass - return out - - -def loaded_payload(stream): - """From an objects.load stream, the decoded payload string, or None.""" - for o in objs(stream): - if o.get("Type") in ("eos", "error_message"): - continue - ob = o.get("Object") - if isinstance(ob, str): - return ob - return None - - -def errors(stream): - return [o.get("Object") for o in objs(stream) if o.get("Type") == "error_message"] +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import netsim_astral as na # noqa: E402 def main(): @@ -66,21 +29,21 @@ def main(): ap.add_argument("--peer", default="node2") # the node holding the object (alias) args, _ = ap.parse_known_args() - obj = jload(args.vm, "object.json") # object-store: object_id - user = jload(args.vm, "user.json") # bootstrap/import: user_token - rd = jload(args.vm, "read.json") # this task's agent: object_remote + obj = na.home_json(args.vm, "object.json") # object-store: object_id + user = na.home_json(args.vm, "user.json") # bootstrap/import: user_token + rd = na.home_json(args.vm, "read.json") # this task's agent: object_remote ID = "".join(str(obj.get("object_id", "")).split()) # Ground-truth bytes: the fixed payload.txt that object-store shipped to the # operator (node1), not the agent's account of what was stored. - PAY = (ssh(args.vm, "cat /home/tester/payload.txt") or "").rstrip("\n") + PAY = na.read_file(args.vm, "/home/tester/payload.txt") REMOTE = str(rd.get("object_remote", "")) token = user.get("user_token", "") # Independent: node1, as the User, reads the peer's object over astral. This is # authenticated (token), so the query keeps the network zone and routes to the peer. - tok = f"export ASTRALD_APPHOST_TOKEN={token};" if token else "" - out = ssh(args.vm, f"{tok} astral-query {args.peer}:objects.load -id '{ID}' -out json") - got = loaded_payload(out) + with na.connect(args.vm, token=token) as n1: + out = n1.call("objects.load", {"id": ID}, target=args.peer) + got = na.loaded_payload(out) read_ok = got is not None and got.rstrip("\n") == PAY errs, notes = [], [] @@ -108,10 +71,10 @@ def main(): sys.stderr.write(f" - {e}\n") if got is None: sys.stderr.write(f" {args.peer}:objects.load (as User) returned no payload " - "(route_not_found means the read didn't route — check auth/zone).\n") + "(route_not_found means the read didn't route -- check auth/zone).\n") elif not read_ok: sys.stderr.write(f" bytes mismatch: got {got!r} != stored {PAY!r}.\n") - for e in errors(out): + for e in na.error_messages(out): sys.stderr.write(f" load error_message: {e}\n") for n in notes: sys.stderr.write(f" note: {n}\n") From 7f5488e3b13035c9ac2de173c0c6cfa5b791bf17 Mon Sep 17 00:00:00 2001 From: intern0 Date: Fri, 26 Jun 2026 12:06:44 +0200 Subject: [PATCH 46/57] netsim: readability pass on verifier Python (shlex-quote CLI args, drop dead code, intent comments) --- netsim/tasks/_lib/netsim_astral.py | 17 ++++++++++------- netsim/tasks/_lib/test_netsim_astral.py | 10 ++++++++-- netsim/tasks/adopt-node/verify.py | 1 + netsim/tasks/enable-tor/verify.py | 1 + netsim/tasks/expel-node/verify.py | 15 ++++++++------- netsim/tasks/leave-lan/verify.py | 1 + netsim/tasks/link-over-tor/verify.py | 1 + netsim/tasks/object-store/verify.py | 7 ++----- netsim/tasks/read-remote-object/verify.py | 1 + 9 files changed, 33 insertions(+), 21 deletions(-) diff --git a/netsim/tasks/_lib/netsim_astral.py b/netsim/tasks/_lib/netsim_astral.py index f29cbf14..496ef684 100644 --- a/netsim/tasks/_lib/netsim_astral.py +++ b/netsim/tasks/_lib/netsim_astral.py @@ -28,6 +28,7 @@ import contextlib import json import os +import shlex import socket import subprocess import sys @@ -146,7 +147,7 @@ def uses_client(self): def _via_shell(self, op, args, target): q = f"{target}:{op}" if target else op - flags = "".join(f" -{k} '{v}'" for k, v in (args or {}).items()) + flags = "".join(f" -{k} {shlex.quote(str(v))}" for k, v in (args or {}).items()) tok = f"export ASTRALD_APPHOST_TOKEN={self._token}; " if self._token else "" return parse_cli(ssh(self.vm, f"{tok}astral-query {q}{flags} -out json")) @@ -156,13 +157,14 @@ def call(self, op, args=None, target=None): Routes through the astral-py client unless the op is pinned in SHELL_OPS or no client is available; on any client error, falls back to the Go CLI. """ - base = op.split(":")[-1] - if self._client is None or op in SHELL_OPS or base in SHELL_OPS: + if self._client is None or op in SHELL_OPS: return self._via_shell(op, args, target) try: with self._client.query(op, args or None, target=target) as st: return list(st) except Exception: + # why: anonymous WS sessions and any client error fall back to the + # lockstep astral-query so verification still runs. return self._via_shell(op, args, target) @@ -187,9 +189,8 @@ def connect(vm, token=None): stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) if _wait_port(port): c = astral.connect(f"ws://127.0.0.1:{port}/.ws", token=token) - # Some astrald builds reject anonymous WS route_query with a - # ProtocolError; probe once so such a session degrades to the CLI - # wholesale instead of failing-then-falling-back on every call. + # why: some astrald builds reject anonymous WS route_query (ProtocolError); + # probe once so the session degrades to the Go CLI wholesale, not per call. try: c.whoami() client = c @@ -219,7 +220,8 @@ def connect(vm, token=None): # --- interrogators: list[AstralObject] -> extracted value -------------------- def _values(objs): - return [o.value for o in objs if not o.is_eos] + # note: interrogators below test isinstance(v, dict), so eos/error values are skipped + return [o.value for o in objs] def contract(objs): @@ -255,6 +257,7 @@ def has_link_to(objs, ident): def _contains_identity(value, ident): + # why: expulsion records nest the Subject at varying depth; recurse the dict/list tree if isinstance(value, str): return value == ident if isinstance(value, dict): diff --git a/netsim/tasks/_lib/test_netsim_astral.py b/netsim/tasks/_lib/test_netsim_astral.py index a1573bc1..da7a56e2 100644 --- a/netsim/tasks/_lib/test_netsim_astral.py +++ b/netsim/tasks/_lib/test_netsim_astral.py @@ -110,14 +110,20 @@ def test_tokened_with_args(self): self.assertEqual( self.calls[-1][1], "export ASTRALD_APPHOST_TOKEN=TKN; " - "astral-query objects.load -id 'X' -repo 'local' -out json") + "astral-query objects.load -id X -repo local -out json") def test_peer_target(self): na.Node("node1", None, "TKN").call("objects.load", {"id": "X"}, target="node2") self.assertEqual( self.calls[-1][1], "export ASTRALD_APPHOST_TOKEN=TKN; " - "astral-query node2:objects.load -id 'X' -out json") + "astral-query node2:objects.load -id X -out json") + + def test_arg_value_is_shell_quoted(self): + import shlex + v = "a b'c" # a value with a space and a quote + na.Node("node1", None, "").call("objects.load", {"id": v}) + self.assertIn(f"-id {shlex.quote(v)}", self.calls[-1][1]) def test_shell_ops_pin_forces_cli(self): # even with a (truthy sentinel) client, a pinned op must go to the shell diff --git a/netsim/tasks/adopt-node/verify.py b/netsim/tasks/adopt-node/verify.py index 6dd1234c..3eba6049 100755 --- a/netsim/tasks/adopt-node/verify.py +++ b/netsim/tasks/adopt-node/verify.py @@ -9,6 +9,7 @@ import os import sys +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) import netsim_astral as na # noqa: E402 diff --git a/netsim/tasks/enable-tor/verify.py b/netsim/tasks/enable-tor/verify.py index c6a4b569..117a3b5f 100644 --- a/netsim/tasks/enable-tor/verify.py +++ b/netsim/tasks/enable-tor/verify.py @@ -12,6 +12,7 @@ import os import sys +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) import netsim_astral as na # noqa: E402 diff --git a/netsim/tasks/expel-node/verify.py b/netsim/tasks/expel-node/verify.py index 7d41aea9..47037b97 100755 --- a/netsim/tasks/expel-node/verify.py +++ b/netsim/tasks/expel-node/verify.py @@ -18,6 +18,7 @@ import os import sys +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) import netsim_astral as na # noqa: E402 @@ -41,7 +42,7 @@ def main(): # rejects user.info). sibs = na.home_json(vm1, "siblings.json") sib_ids = ["".join(str(x).split()) for x in (sibs.get("sibling_ids") or []) if x] - s2 = sib_ids[0] if sib_ids else None + expelled_id = sib_ids[0] if sib_ids else None with na.connect(vm1, token=token) as n1: n1_expelled = n1.call("user.list_expelled") @@ -50,13 +51,13 @@ def main(): errs = [] if not U: errs.append("no user_id in node1's user.json") - if not s2: + if not expelled_id: errs.append("no sibling_ids in node1's siblings.json -- can't identify the expelled node") - if s2 and not na.is_expelled(n1_expelled, s2): - errs.append(f"node2 {s2} is NOT in node1's user.list_expelled " + if expelled_id and not na.is_expelled(n1_expelled, expelled_id): + errs.append(f"node2 {expelled_id} is NOT in node1's user.list_expelled " "(expulsion was never issued -- agent did not expel the node)") - if s2 and s2 in members: - errs.append(f"node2 {s2} still appears in node1's user.swarm_status " + if expelled_id and expelled_id in members: + errs.append(f"node2 {expelled_id} still appears in node1's user.swarm_status " "(roster not reduced -- expelledSet filter did not drop it)") if errs: @@ -65,7 +66,7 @@ def main(): sys.stderr.write(f" - {e}\n") return 1 - print(f"expel OK: User {U[:8]}.. banned node2 {s2[:8]}.. -- recorded in " + print(f"expel OK: User {U[:8]}.. banned node2 {expelled_id[:8]}.. -- recorded in " f"user.list_expelled and dropped from user.swarm_status " f"({len(members)} member(s) remain).") return 0 diff --git a/netsim/tasks/leave-lan/verify.py b/netsim/tasks/leave-lan/verify.py index a4e7d199..815b345e 100644 --- a/netsim/tasks/leave-lan/verify.py +++ b/netsim/tasks/leave-lan/verify.py @@ -10,6 +10,7 @@ import os import sys +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) import netsim_astral as na # noqa: E402 diff --git a/netsim/tasks/link-over-tor/verify.py b/netsim/tasks/link-over-tor/verify.py index 1df54c81..a8b3f200 100644 --- a/netsim/tasks/link-over-tor/verify.py +++ b/netsim/tasks/link-over-tor/verify.py @@ -14,6 +14,7 @@ import os import sys +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) import netsim_astral as na # noqa: E402 diff --git a/netsim/tasks/object-store/verify.py b/netsim/tasks/object-store/verify.py index 952eed52..d8865a8b 100644 --- a/netsim/tasks/object-store/verify.py +++ b/netsim/tasks/object-store/verify.py @@ -16,6 +16,7 @@ import os import sys +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) import netsim_astral as na # noqa: E402 @@ -43,7 +44,7 @@ def main(): got = na.loaded_payload(h_load) local_ok = got is not None and got.rstrip("\n") == PAY - errs, notes = [], [] + errs = [] if not ID: errs.append("no object_id in node1's object.json") if not PAY: @@ -52,8 +53,6 @@ def main(): if not errs and local_ok: print(f"object-store OK (target={args.target}): {holder}'s local repo holds object " f"{ID[:12]}.. with the exact bytes ({len(PAY)} B).") - for n in notes: - sys.stderr.write(f" note: {n}\n") return 0 sys.stderr.write(f"object-store verify FAILED (target={args.target}): {holder}'s local repo " @@ -66,8 +65,6 @@ def main(): sys.stderr.write(f" bytes mismatch: got {got!r} != stored {PAY!r}.\n") for e in na.error_messages(h_load): sys.stderr.write(f" load error_message: {e}\n") - for n in notes: - sys.stderr.write(f" note: {n}\n") sys.stderr.write(f" (id={ID} holder={holder} load={'hit' if got is not None else 'miss'})\n") return 1 diff --git a/netsim/tasks/read-remote-object/verify.py b/netsim/tasks/read-remote-object/verify.py index 3c05fd2c..54becaf5 100644 --- a/netsim/tasks/read-remote-object/verify.py +++ b/netsim/tasks/read-remote-object/verify.py @@ -18,6 +18,7 @@ import os import sys +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) import netsim_astral as na # noqa: E402 From 60a8409025f53425c736a8aa46fd0dbbb5fddb8e Mon Sep 17 00:00:00 2001 From: intern0 Date: Fri, 26 Jun 2026 12:33:01 +0200 Subject: [PATCH 47/57] netsim: vendor astral-py as a submodule under tasks/_lib (replaces external-checkout path) --- .gitmodules | 3 +++ netsim/README.md | 15 +++++++++++++++ netsim/tasks/_lib/astral-py | 1 + netsim/tasks/_lib/netsim_astral.py | 20 +++++++++++++------- 4 files changed, 32 insertions(+), 7 deletions(-) create mode 160000 netsim/tasks/_lib/astral-py diff --git a/.gitmodules b/.gitmodules index 727da6bf..a899076e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule ".ai/system"] path = .ai/system url = git@github.com:cryptopunkscc/astral-docs.git +[submodule "netsim/tasks/_lib/astral-py"] + path = netsim/tasks/_lib/astral-py + url = ssh://git@git.satforge.dev/satforge/astral-py.git diff --git a/netsim/README.md b/netsim/README.md index 3926657d..f3aa904c 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -22,6 +22,7 @@ netsim/ object-store/ # node1 stores an object (--target localnode|node2) -> two-nodes-data[-peer] read-remote-object/ # node1's agent reads node2's object over astral (used by read-remote-peer) expel-node/ # node1 (User) permanently bans node2 from the swarm -> two-nodes-expel + _lib/ # shared verify library (netsim_astral.py) + astral-py submodule stories/ # one story per tested flow (start/save stage in each header) lab.story # null -> astrald-lab bootstrap-user-software-key.story # astrald-lab -> one-node @@ -47,6 +48,20 @@ shipped builtins intact. netsim tasks # confirm: install-astrald is listed as a user task ``` +## Verifier library + +The `verify.py` oracles share `tasks/_lib/netsim_astral.py`, which reaches each +VM's apphost through the **astral-py** client vendored as a submodule at +`tasks/_lib/astral-py`. Initialize it once per worktree (`workon.sh` does not +`--recurse`); a missing submodule fails with a loud `ImportError`: + +```sh +git submodule update --init netsim/tasks/_lib/astral-py +``` + +The verifiers fall back to the Go `astral-query` CLI for any op the client can't +serve, but the submodule must be present for `verify.py` to import. + ## Lab `lab.story` builds the full lab in one simulation: two nodes running astrald and diff --git a/netsim/tasks/_lib/astral-py b/netsim/tasks/_lib/astral-py new file mode 160000 index 00000000..c68919ff --- /dev/null +++ b/netsim/tasks/_lib/astral-py @@ -0,0 +1 @@ +Subproject commit c68919ffc1acc071ee91ef3f529e6677f1adbd54 diff --git a/netsim/tasks/_lib/netsim_astral.py b/netsim/tasks/_lib/netsim_astral.py index 496ef684..ba4f4316 100644 --- a/netsim/tasks/_lib/netsim_astral.py +++ b/netsim/tasks/_lib/netsim_astral.py @@ -22,8 +22,8 @@ serve an op (pinned in SHELL_OPS, or it raised). Both paths return the same list[AstralObject], so the interrogators below are transport-agnostic. -astral-py is imported from an editable checkout (no pip needed on this host): -$ASTRALPY_SRC, else ~/work/satforge/astral-py/master/src. +astral-py is the submodule at _lib/astral-py (package under src/); imported without +pip. $ASTRALPY_SRC overrides the src dir for local dev against another checkout. """ import contextlib import json @@ -34,11 +34,17 @@ import sys import time -# --- astral-py (editable checkout; pip-free) --------------------------------- -_ASTRALPY_SRC = os.environ.get("ASTRALPY_SRC") or os.path.expanduser( - "~/work/satforge/astral-py/master/src") -if os.path.isdir(_ASTRALPY_SRC) and _ASTRALPY_SRC not in sys.path: - sys.path.insert(0, _ASTRALPY_SRC) +# --- astral-py (submodule at _lib/astral-py; pip-free) ----------------------- +# why: realpath resolves _lib through netsim's per-task symlink; the submodule's +# package lives under src/. $ASTRALPY_SRC overrides for local dev. +_ASTRALPY_SRC = os.environ.get("ASTRALPY_SRC") or os.path.join( + os.path.dirname(os.path.realpath(__file__)), "astral-py", "src") +if not os.path.isdir(os.path.join(_ASTRALPY_SRC, "astral")): + raise ImportError( + f"astral-py not found at {_ASTRALPY_SRC} -- run " + "`git submodule update --init netsim/tasks/_lib/astral-py` " + "(or set $ASTRALPY_SRC to an astral-py checkout's src/)") +sys.path.insert(0, _ASTRALPY_SRC) import astral # noqa: E402 from astral.encoding import from_json_envelope # noqa: E402 From 04d9df0144a4bade5355ac14dd5c0eaf90581013 Mon Sep 17 00:00:00 2001 From: intern0 Date: Fri, 26 Jun 2026 14:50:16 +0200 Subject: [PATCH 48/57] netsim: rename verify lib netsim_astral.py -> astralapi.py --- netsim/README.md | 4 +- .../_lib/{netsim_astral.py => astralapi.py} | 2 +- ...est_netsim_astral.py => test_astralapi.py} | 74 +++++++++---------- netsim/tasks/adopt-node/verify.py | 22 +++--- netsim/tasks/enable-tor/verify.py | 14 ++-- netsim/tasks/expel-node/verify.py | 14 ++-- netsim/tasks/leave-lan/verify.py | 8 +- netsim/tasks/link-over-tor/verify.py | 12 +-- netsim/tasks/object-store/verify.py | 14 ++-- netsim/tasks/read-remote-object/verify.py | 18 ++--- 10 files changed, 91 insertions(+), 91 deletions(-) rename netsim/tasks/_lib/{netsim_astral.py => astralapi.py} (99%) rename netsim/tasks/_lib/{test_netsim_astral.py => test_astralapi.py} (58%) diff --git a/netsim/README.md b/netsim/README.md index f3aa904c..ee7e300e 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -22,7 +22,7 @@ netsim/ object-store/ # node1 stores an object (--target localnode|node2) -> two-nodes-data[-peer] read-remote-object/ # node1's agent reads node2's object over astral (used by read-remote-peer) expel-node/ # node1 (User) permanently bans node2 from the swarm -> two-nodes-expel - _lib/ # shared verify library (netsim_astral.py) + astral-py submodule + _lib/ # shared verify library (astralapi.py) + astral-py submodule stories/ # one story per tested flow (start/save stage in each header) lab.story # null -> astrald-lab bootstrap-user-software-key.story # astrald-lab -> one-node @@ -50,7 +50,7 @@ netsim tasks # confirm: install-astrald is listed as a user task ## Verifier library -The `verify.py` oracles share `tasks/_lib/netsim_astral.py`, which reaches each +The `verify.py` oracles share `tasks/_lib/astralapi.py`, which reaches each VM's apphost through the **astral-py** client vendored as a submodule at `tasks/_lib/astral-py`. Initialize it once per worktree (`workon.sh` does not `--recurse`); a missing submodule fails with a loud `ImportError`: diff --git a/netsim/tasks/_lib/netsim_astral.py b/netsim/tasks/_lib/astralapi.py similarity index 99% rename from netsim/tasks/_lib/netsim_astral.py rename to netsim/tasks/_lib/astralapi.py index ba4f4316..c02a9c78 100644 --- a/netsim/tasks/_lib/netsim_astral.py +++ b/netsim/tasks/_lib/astralapi.py @@ -6,7 +6,7 @@ import os, sys sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) - import netsim_astral as na + import astralapi It centralises the two halves every verifier shares: diff --git a/netsim/tasks/_lib/test_netsim_astral.py b/netsim/tasks/_lib/test_astralapi.py similarity index 58% rename from netsim/tasks/_lib/test_netsim_astral.py rename to netsim/tasks/_lib/test_astralapi.py index da7a56e2..bea73117 100644 --- a/netsim/tasks/_lib/test_netsim_astral.py +++ b/netsim/tasks/_lib/test_astralapi.py @@ -1,4 +1,4 @@ -"""Offline tests for netsim_astral -- no VM, no live astrald. +"""Offline tests for astralapi -- no VM, no live astrald. Exercises the interrogators against synthetic AstralObjects, parse_cli's stream handling, and the Go-CLI fallback command construction. Run with: @@ -10,7 +10,7 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.realpath(__file__))) -import netsim_astral as na # noqa: E402 (also bootstraps astral onto sys.path) +import astralapi # noqa: E402 (also bootstraps astral onto sys.path) import astral # noqa: E402 @@ -22,48 +22,48 @@ class InterrogatorTests(unittest.TestCase): def test_contract(self): objs = [O("mod.user.contract", {"Contract": {"Contract": {"Issuer": "02aa", "Subject": "03bb"}}})] - self.assertEqual(na.contract(objs), ("02aa", "03bb")) - self.assertEqual(na.contract([O("x", {})]), (None, None)) + self.assertEqual(astralapi.contract(objs), ("02aa", "03bb")) + self.assertEqual(astralapi.contract([O("x", {})]), (None, None)) def test_linked_sibling_and_identities(self): objs = [O("s", {"Identity": "03bb", "Linked": True}), O("s", {"Identity": "03cc", "Linked": False})] - self.assertEqual(na.linked_sibling(objs), "03bb") - self.assertEqual(na.swarm_identities(objs), {"03bb", "03cc"}) - self.assertIsNone(na.linked_sibling([O("s", {"Identity": "03cc", "Linked": False})])) + self.assertEqual(astralapi.linked_sibling(objs), "03bb") + self.assertEqual(astralapi.swarm_identities(objs), {"03bb", "03cc"}) + self.assertIsNone(astralapi.linked_sibling([O("s", {"Identity": "03cc", "Linked": False})])) def test_has_link_to(self): objs = [O("l", {"RemoteIdentity": "03bb", "Network": "tcp"})] - self.assertTrue(na.has_link_to(objs, "03bb")) - self.assertFalse(na.has_link_to(objs, "03cc")) + self.assertTrue(astralapi.has_link_to(objs, "03bb")) + self.assertFalse(astralapi.has_link_to(objs, "03cc")) def test_is_expelled_nested(self): objs = [O("mod.user.signed_expulsion", {"Expulsion": {"Subject": "03bb"}})] - self.assertTrue(na.is_expelled(objs, "03bb")) - self.assertFalse(na.is_expelled(objs, "03cc")) + self.assertTrue(astralapi.is_expelled(objs, "03bb")) + self.assertFalse(astralapi.is_expelled(objs, "03cc")) # an error_message naming the id must not count as an expulsion record - self.assertFalse(na.is_expelled([O("error_message", "03bb not found")], "03bb")) + self.assertFalse(astralapi.is_expelled([O("error_message", "03bb not found")], "03bb")) def test_loaded_payload_and_errors(self): objs = [O("error_message", "boom"), O("string8", "hello")] - self.assertEqual(na.loaded_payload(objs), "hello") - self.assertEqual(na.error_messages(objs), ["boom"]) - self.assertIsNone(na.loaded_payload([O("error_message", "boom")])) + self.assertEqual(astralapi.loaded_payload(objs), "hello") + self.assertEqual(astralapi.error_messages(objs), ["boom"]) + self.assertIsNone(astralapi.loaded_payload([O("error_message", "boom")])) def test_tor_links_and_endpoint(self): objs = [O("l", {"Network": "tor", "RemoteIdentity": "03bb", "RemoteEndpoint": {"Object": "abc.onion:1791"}}), O("l", {"Network": "tcp", "RemoteIdentity": "03cc"})] - self.assertEqual(na.tor_links(objs), [("03bb", "abc.onion:1791")]) - self.assertEqual(na.endpoint_addr("x.onion"), "x.onion") - self.assertEqual(na.endpoint_addr({"Object": "y.onion"}), "y.onion") - self.assertEqual(na.endpoint_addr(None), "") + self.assertEqual(astralapi.tor_links(objs), [("03bb", "abc.onion:1791")]) + self.assertEqual(astralapi.endpoint_addr("x.onion"), "x.onion") + self.assertEqual(astralapi.endpoint_addr({"Object": "y.onion"}), "y.onion") + self.assertEqual(astralapi.endpoint_addr(None), "") def test_resolve_onion(self): objs = [O("e", {"Endpoint": "10.0.0.1:1791"}), O("e", {"Endpoint": {"Object": "abc.onion:1791"}})] - self.assertEqual(na.resolve_onion(objs), "abc.onion:1791") - self.assertIsNone(na.resolve_onion([O("e", {"Endpoint": "10.0.0.1:1791"})])) + self.assertEqual(astralapi.resolve_onion(objs), "abc.onion:1791") + self.assertIsNone(astralapi.resolve_onion([O("e", {"Endpoint": "10.0.0.1:1791"})])) class ParseCliTests(unittest.TestCase): @@ -73,14 +73,14 @@ def test_drops_eos_keeps_error(self): '\n' 'not-json\n' '{"Type":"eos","Object":null}\n') - objs = na.parse_cli(raw) + objs = astralapi.parse_cli(raw) self.assertEqual([o.type for o in objs], ["string8", "error_message"]) - self.assertEqual(na.loaded_payload(objs), "hi") - self.assertEqual(na.error_messages(objs), ["nope"]) + self.assertEqual(astralapi.loaded_payload(objs), "hi") + self.assertEqual(astralapi.error_messages(objs), ["nope"]) def test_empty(self): - self.assertEqual(na.parse_cli(""), []) - self.assertEqual(na.parse_cli(None), []) + self.assertEqual(astralapi.parse_cli(""), []) + self.assertEqual(astralapi.parse_cli(None), []) class ShellRoutingTests(unittest.TestCase): @@ -88,32 +88,32 @@ class ShellRoutingTests(unittest.TestCase): def setUp(self): self.calls = [] - self._orig = na.ssh + self._orig = astralapi.ssh def fake_ssh(vm, remote): self.calls.append((vm, remote)) return '{"Type":"string8","Object":"hi"}\n{"Type":"eos","Object":null}\n' - na.ssh = fake_ssh + astralapi.ssh = fake_ssh def tearDown(self): - na.ssh = self._orig + astralapi.ssh = self._orig def test_untokened(self): - node = na.Node("node1", None, "") + node = astralapi.Node("node1", None, "") objs = node.call("user.info") self.assertEqual(self.calls[-1], ("node1", "astral-query user.info -out json")) - self.assertEqual(na.loaded_payload(objs), "hi") + self.assertEqual(astralapi.loaded_payload(objs), "hi") def test_tokened_with_args(self): - na.Node("node1", None, "TKN").call("objects.load", {"id": "X", "repo": "local"}) + astralapi.Node("node1", None, "TKN").call("objects.load", {"id": "X", "repo": "local"}) self.assertEqual( self.calls[-1][1], "export ASTRALD_APPHOST_TOKEN=TKN; " "astral-query objects.load -id X -repo local -out json") def test_peer_target(self): - na.Node("node1", None, "TKN").call("objects.load", {"id": "X"}, target="node2") + astralapi.Node("node1", None, "TKN").call("objects.load", {"id": "X"}, target="node2") self.assertEqual( self.calls[-1][1], "export ASTRALD_APPHOST_TOKEN=TKN; " @@ -122,18 +122,18 @@ def test_peer_target(self): def test_arg_value_is_shell_quoted(self): import shlex v = "a b'c" # a value with a space and a quote - na.Node("node1", None, "").call("objects.load", {"id": v}) + astralapi.Node("node1", None, "").call("objects.load", {"id": v}) self.assertIn(f"-id {shlex.quote(v)}", self.calls[-1][1]) def test_shell_ops_pin_forces_cli(self): # even with a (truthy sentinel) client, a pinned op must go to the shell - na.SHELL_OPS.add("user.info") + astralapi.SHELL_OPS.add("user.info") try: - node = na.Node("node1", object(), "") + node = astralapi.Node("node1", object(), "") node.call("user.info") self.assertEqual(self.calls[-1][1], "astral-query user.info -out json") finally: - na.SHELL_OPS.discard("user.info") + astralapi.SHELL_OPS.discard("user.info") if __name__ == "__main__": diff --git a/netsim/tasks/adopt-node/verify.py b/netsim/tasks/adopt-node/verify.py index 3eba6049..1864aac1 100755 --- a/netsim/tasks/adopt-node/verify.py +++ b/netsim/tasks/adopt-node/verify.py @@ -2,7 +2,7 @@ """verify adopt-node: node1 and node2 linked into one User swarm, symmetric roster. Independent both-ends check (does not trust run.sh). Queries reach each VM's -apphost through the shared astral-py client (tasks/_lib/netsim_astral.py), which +apphost through the shared astral-py client (tasks/_lib/astralapi.py), which forwards to the lockstep Go astral-query CLI for any op it can't serve. """ import argparse @@ -12,7 +12,7 @@ # why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) -import netsim_astral as na # noqa: E402 +import astralapi # noqa: E402 def main(): @@ -22,23 +22,23 @@ def main(): args, _ = ap.parse_known_args() vm1, vm2 = args.node1, args.node2 - info1 = na.home_json(vm1, "user.json") - siblings = na.home_json(vm1, "siblings.json") # adopt-node agent: swarm sibling ids + info1 = astralapi.home_json(vm1, "user.json") + siblings = astralapi.home_json(vm1, "siblings.json") # adopt-node agent: swarm sibling ids sib_ids = ["".join(str(x).split()) for x in (siblings.get("sibling_ids") or []) if x] U = "".join(str(info1.get("user_id", "")).split()) token = info1.get("user_token", "") # node1 acts as the User (token from bootstrap-user-software-key); node2 answers # under its node identity (it holds the contract after the adoption). - with na.connect(vm1, token=token) as n1: - i1, s1 = na.contract(n1.call("user.info")) - sib = na.linked_sibling(n1.call("user.swarm_status")) + with astralapi.connect(vm1, token=token) as n1: + i1, s1 = astralapi.contract(n1.call("user.info")) + sib = astralapi.linked_sibling(n1.call("user.swarm_status")) # node2's own swarm view: swarm_status derives from node2's active contract, # not the caller, so no token is needed; post-#348 it must list node1 too. - with na.connect(vm2) as n2: - i2, s2 = na.contract(n2.call("user.info")) - linkback = na.has_link_to(n2.call("nodes.links"), s1) - n2_sib = na.linked_sibling(n2.call("user.swarm_status")) + with astralapi.connect(vm2) as n2: + i2, s2 = astralapi.contract(n2.call("user.info")) + linkback = astralapi.has_link_to(n2.call("nodes.links"), s1) + n2_sib = astralapi.linked_sibling(n2.call("user.swarm_status")) errs = [] if not U: diff --git a/netsim/tasks/enable-tor/verify.py b/netsim/tasks/enable-tor/verify.py index 117a3b5f..8c5efb6e 100644 --- a/netsim/tasks/enable-tor/verify.py +++ b/netsim/tasks/enable-tor/verify.py @@ -6,7 +6,7 @@ astrald actually advertises now (nodes.resolve_endpoints -id localnode). Queries reach each VM's apphost through the shared astral-py client -(tasks/_lib/netsim_astral.py), CLI fallback for anything it can't serve. +(tasks/_lib/astralapi.py), CLI fallback for anything it can't serve. """ import argparse import os @@ -15,24 +15,24 @@ # why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) -import netsim_astral as na # noqa: E402 +import astralapi # noqa: E402 def main(): ap = argparse.ArgumentParser() ap.add_argument("--vm", action="append", default=[]) args, _ = ap.parse_known_args() - vms = args.vm or na.all_running_vms() + vms = args.vm or astralapi.all_running_vms() if not vms: sys.stderr.write("enable-tor verify FAILED: no VMs to verify\n") return 1 bad = False for vm in vms: - tor_active = na.ssh(vm, "systemctl is-active tor 2>/dev/null").strip() == "active" - file_onion = str(na.read_json(vm, "/root/tor.json").get("onion", "")) - with na.connect(vm) as node: - live = na.resolve_onion(node.call("nodes.resolve_endpoints", {"id": "localnode"})) + tor_active = astralapi.ssh(vm, "systemctl is-active tor 2>/dev/null").strip() == "active" + file_onion = str(astralapi.read_json(vm, "/root/tor.json").get("onion", "")) + with astralapi.connect(vm) as node: + live = astralapi.resolve_onion(node.call("nodes.resolve_endpoints", {"id": "localnode"})) errs = [] if not tor_active: diff --git a/netsim/tasks/expel-node/verify.py b/netsim/tasks/expel-node/verify.py index 47037b97..05136970 100755 --- a/netsim/tasks/expel-node/verify.py +++ b/netsim/tasks/expel-node/verify.py @@ -12,7 +12,7 @@ was banned from), so it is not a usable identity source. Queries reach node1's apphost through the shared astral-py client -(tasks/_lib/netsim_astral.py), CLI fallback for anything it can't serve. +(tasks/_lib/astralapi.py), CLI fallback for anything it can't serve. """ import argparse import os @@ -21,7 +21,7 @@ # why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) -import netsim_astral as na # noqa: E402 +import astralapi # noqa: E402 def main(): @@ -33,27 +33,27 @@ def main(): # node1 acts as the User (token from bootstrap); list_expelled / swarm_status # require the caller to be the contract issuer, so they run under that token. - info1 = na.home_json(vm1, "user.json") + info1 = astralapi.home_json(vm1, "user.json") U = "".join(str(info1.get("user_id", "")).split()) token = info1.get("user_token", "") # node2's identity from node1's siblings.json (recorded by adopt-node) -- a # stable source. The expelled node itself can't be queried (post-ban node2 # rejects user.info). - sibs = na.home_json(vm1, "siblings.json") + sibs = astralapi.home_json(vm1, "siblings.json") sib_ids = ["".join(str(x).split()) for x in (sibs.get("sibling_ids") or []) if x] expelled_id = sib_ids[0] if sib_ids else None - with na.connect(vm1, token=token) as n1: + with astralapi.connect(vm1, token=token) as n1: n1_expelled = n1.call("user.list_expelled") - members = na.swarm_identities(n1.call("user.swarm_status")) + members = astralapi.swarm_identities(n1.call("user.swarm_status")) errs = [] if not U: errs.append("no user_id in node1's user.json") if not expelled_id: errs.append("no sibling_ids in node1's siblings.json -- can't identify the expelled node") - if expelled_id and not na.is_expelled(n1_expelled, expelled_id): + if expelled_id and not astralapi.is_expelled(n1_expelled, expelled_id): errs.append(f"node2 {expelled_id} is NOT in node1's user.list_expelled " "(expulsion was never issued -- agent did not expel the node)") if expelled_id and expelled_id in members: diff --git a/netsim/tasks/leave-lan/verify.py b/netsim/tasks/leave-lan/verify.py index 815b345e..3d58d9a1 100644 --- a/netsim/tasks/leave-lan/verify.py +++ b/netsim/tasks/leave-lan/verify.py @@ -4,7 +4,7 @@ Independent host-side check: from , a TCP connect to the peer's LAN address on the astral port (1791) must NOT succeed (the nftables drop blackholes it -> timeout). The peer's LAN IP is resolved from the peer. No astral-query here -- this -is a raw socket probe, run through tasks/_lib/netsim_astral.py's ssh transport. +is a raw socket probe, run through tasks/_lib/astralapi.py's ssh transport. """ import argparse import os @@ -13,7 +13,7 @@ # why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) -import netsim_astral as na # noqa: E402 +import astralapi # noqa: E402 PORT = 1791 @@ -24,7 +24,7 @@ def main(): ap.add_argument("--peer", default="node1") # the node it can no longer reach args, _ = ap.parse_known_args() - ip = na.peer_lan_ip(args.peer) + ip = astralapi.peer_lan_ip(args.peer) if not ip: sys.stderr.write(f"leave-lan verify FAILED: could not resolve {args.peer}'s 10.77 LAN IP.\n") return 1 @@ -40,7 +40,7 @@ def main(): "except socket.timeout:\n print(\"timeout\")\n" "except Exception as e:\n print(\"err:\"+type(e).__name__)'" ) - result = (na.ssh(args.vm, probe) or "").strip() + result = (astralapi.ssh(args.vm, probe) or "").strip() if result == "timeout": print(f"leave-lan OK: {args.vm} can no longer reach {args.peer} ({ip}:{PORT}) over the LAN " diff --git a/netsim/tasks/link-over-tor/verify.py b/netsim/tasks/link-over-tor/verify.py index a8b3f200..89252549 100644 --- a/netsim/tasks/link-over-tor/verify.py +++ b/netsim/tasks/link-over-tor/verify.py @@ -8,7 +8,7 @@ is a tor link to node2.) Also cross-checks the agent's record. Queries reach node1's apphost through the shared astral-py client -(tasks/_lib/netsim_astral.py), CLI fallback for anything it can't serve. +(tasks/_lib/astralapi.py), CLI fallback for anything it can't serve. """ import argparse import os @@ -17,7 +17,7 @@ # why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) -import netsim_astral as na # noqa: E402 +import astralapi # noqa: E402 def main(): @@ -26,13 +26,13 @@ def main(): ap.add_argument("--peer", default="node2") # the node that left the LAN args, _ = ap.parse_known_args() - tor = na.home_json(args.vm, "tor.json") # agent: peer_onion, link_network + tor = astralapi.home_json(args.vm, "tor.json") # agent: peer_onion, link_network net = str(tor.get("link_network", "")) onion = str(tor.get("peer_onion", "")) # Decisive: an actual link over Tor from node1 (to the only sibling, the peer). - with na.connect(args.vm) as node: - links = na.tor_links(node.call("nodes.links")) + with astralapi.connect(args.vm) as node: + links = astralapi.tor_links(node.call("nodes.links")) notes = [] if net != "tor": @@ -50,7 +50,7 @@ def main(): sys.stderr.write(f"link-over-tor verify FAILED: {args.vm} has no link to {args.peer} over Tor.\n") for n in notes: sys.stderr.write(f" note: {n}\n") - sys.stderr.write(f" nodes.links:\n{na.ssh(args.vm, 'astral-query nodes.links -out json')}\n") + sys.stderr.write(f" nodes.links:\n{astralapi.ssh(args.vm, 'astral-query nodes.links -out json')}\n") return 1 diff --git a/netsim/tasks/object-store/verify.py b/netsim/tasks/object-store/verify.py index d8865a8b..b211bbc0 100644 --- a/netsim/tasks/object-store/verify.py +++ b/netsim/tasks/object-store/verify.py @@ -10,7 +10,7 @@ operator's home. Queries reach the holder's apphost through the shared astral-py client -(tasks/_lib/netsim_astral.py), CLI fallback for anything it can't serve. +(tasks/_lib/astralapi.py), CLI fallback for anything it can't serve. """ import argparse import os @@ -19,7 +19,7 @@ # why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) -import netsim_astral as na # noqa: E402 +import astralapi # noqa: E402 def main(): @@ -30,18 +30,18 @@ def main(): args, _ = ap.parse_known_args() holder = args.node2 if args.target == args.node2 else args.vm - ID = "".join(str(na.home_json(args.vm, "object.json").get("object_id", "")).split()) + ID = "".join(str(astralapi.home_json(args.vm, "object.json").get("object_id", "")).split()) # Canonical input: the exact bytes the agent was handed to store (run.sh shipped # payload.txt to the operator's home). Ground truth -- we don't trust the agent's # own account of what it stored. - PAY = na.read_file(args.vm, "/home/tester/payload.txt") + PAY = astralapi.read_file(args.vm, "/home/tester/payload.txt") # Decisive: re-load the object from the holder's local repo (repo-pinned + ungated) # and confirm the bytes match payload.txt -- the read-back is verify's job, not the # agent's (the agent only stores and records the id). - with na.connect(holder) as h: + with astralapi.connect(holder) as h: h_load = h.call("objects.load", {"id": ID, "repo": "local"}) - got = na.loaded_payload(h_load) + got = astralapi.loaded_payload(h_load) local_ok = got is not None and got.rstrip("\n") == PAY errs = [] @@ -63,7 +63,7 @@ def main(): sys.stderr.write(f" objects.load -repo local on {holder} returned no payload (see errors below).\n") elif not local_ok: sys.stderr.write(f" bytes mismatch: got {got!r} != stored {PAY!r}.\n") - for e in na.error_messages(h_load): + for e in astralapi.error_messages(h_load): sys.stderr.write(f" load error_message: {e}\n") sys.stderr.write(f" (id={ID} holder={holder} load={'hit' if got is not None else 'miss'})\n") return 1 diff --git a/netsim/tasks/read-remote-object/verify.py b/netsim/tasks/read-remote-object/verify.py index 54becaf5..a0014af6 100644 --- a/netsim/tasks/read-remote-object/verify.py +++ b/netsim/tasks/read-remote-object/verify.py @@ -12,7 +12,7 @@ recorded read. Queries reach node1's apphost through the shared astral-py client -(tasks/_lib/netsim_astral.py), CLI fallback for anything it can't serve. +(tasks/_lib/astralapi.py), CLI fallback for anything it can't serve. """ import argparse import os @@ -21,7 +21,7 @@ # why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib sys.path.insert(0, os.path.join( os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) -import netsim_astral as na # noqa: E402 +import astralapi # noqa: E402 def main(): @@ -30,21 +30,21 @@ def main(): ap.add_argument("--peer", default="node2") # the node holding the object (alias) args, _ = ap.parse_known_args() - obj = na.home_json(args.vm, "object.json") # object-store: object_id - user = na.home_json(args.vm, "user.json") # bootstrap/import: user_token - rd = na.home_json(args.vm, "read.json") # this task's agent: object_remote + obj = astralapi.home_json(args.vm, "object.json") # object-store: object_id + user = astralapi.home_json(args.vm, "user.json") # bootstrap/import: user_token + rd = astralapi.home_json(args.vm, "read.json") # this task's agent: object_remote ID = "".join(str(obj.get("object_id", "")).split()) # Ground-truth bytes: the fixed payload.txt that object-store shipped to the # operator (node1), not the agent's account of what was stored. - PAY = na.read_file(args.vm, "/home/tester/payload.txt") + PAY = astralapi.read_file(args.vm, "/home/tester/payload.txt") REMOTE = str(rd.get("object_remote", "")) token = user.get("user_token", "") # Independent: node1, as the User, reads the peer's object over astral. This is # authenticated (token), so the query keeps the network zone and routes to the peer. - with na.connect(args.vm, token=token) as n1: + with astralapi.connect(args.vm, token=token) as n1: out = n1.call("objects.load", {"id": ID}, target=args.peer) - got = na.loaded_payload(out) + got = astralapi.loaded_payload(out) read_ok = got is not None and got.rstrip("\n") == PAY errs, notes = [], [] @@ -75,7 +75,7 @@ def main(): "(route_not_found means the read didn't route -- check auth/zone).\n") elif not read_ok: sys.stderr.write(f" bytes mismatch: got {got!r} != stored {PAY!r}.\n") - for e in na.error_messages(out): + for e in astralapi.error_messages(out): sys.stderr.write(f" load error_message: {e}\n") for n in notes: sys.stderr.write(f" note: {n}\n") From 3832741cf797a33f3ab9028e65fe9de0a4421428 Mon Sep 17 00:00:00 2001 From: intern0 Date: Fri, 26 Jun 2026 19:38:53 +0200 Subject: [PATCH 49/57] netsim: leave-lan withdraws the LAN address instead of nftables-dropping it astrald has no carrier/operstate monitor -- mod/ip polls net.InterfaceAddrs() every 3s and advertises one tcp endpoint per assigned IP. An nftables DROP (or a bare link/carrier down, which retains the IPv4 address) is invisible to it: it never withdraws the 10.77 endpoint and never exercises the re-link-over-Tor path. Flush node2's own 10.77 address instead (RTM_DELADDR drops the address and its connected route; the NIC is downed too) -- exactly what astrald observes as leaving the network. verify.py becomes a blind, deterministic check (node2 has no 10.77 address or route) instead of a TCP-timeout probe, which would be unreliable anyway: with a WAN default route the LAN connect falls through to the WAN NAT and times out rather than returning ENETUNREACH. The Tor re-link stays asserted by link-over-tor. Offline-validated (sh -n, py_compile, 14/14 _lib tests); live validation deferred to the next full rebuild. --- netsim/tasks/leave-lan/README.md | 2 +- netsim/tasks/leave-lan/run.sh | 47 ++++++++++------------ netsim/tasks/leave-lan/verify.py | 68 +++++++++++++++----------------- 3 files changed, 53 insertions(+), 64 deletions(-) diff --git a/netsim/tasks/leave-lan/README.md b/netsim/tasks/leave-lan/README.md index 47d4b9f4..9d130504 100644 --- a/netsim/tasks/leave-lan/README.md +++ b/netsim/tasks/leave-lan/README.md @@ -1,3 +1,3 @@ # leave-lan -On the host, seeds `--peer` (node1) with `--vm` (node2)'s onion (`nodes.resolve_endpoints` → `nodes.add_endpoint`), then nftables-drops the LAN path between them, leaving node2 reachable from node1 only over Tor. verify.py asserts node2 can no longer TCP-connect to node1's LAN address on port 1791. +On the host, seeds `--peer` (node1) with `--vm` (node2)'s onion (`nodes.resolve_endpoints` → `nodes.add_endpoint`) while the LAN is still up, then makes node2 **leave** the LAN by withdrawing its own 10.77 address (`ip addr flush`, which also drops the connected route; the NIC is taken down too). astrald polls `net.InterfaceAddrs()` and advertises one tcp endpoint per address, so the withdrawal is what it observes as a network change — it drops the 10.77 endpoint and the swarm link re-forms over Tor. SSH/management rides the separate WAN NIC, so it stays up. verify.py asserts (blind, deterministic) that node2 no longer holds a 10.77 LAN address or route; the Tor re-link is asserted by link-over-tor. diff --git a/netsim/tasks/leave-lan/run.sh b/netsim/tasks/leave-lan/run.sh index 7f15fcc8..e93cbdcb 100755 --- a/netsim/tasks/leave-lan/run.sh +++ b/netsim/tasks/leave-lan/run.sh @@ -1,12 +1,16 @@ #!/bin/sh -# leave-lan: sever the LAN path between (node2, the node that "leaves") and -# (node1). astrald's tor module + the swarm link maintainer will then re-link over Tor. +# leave-lan: make (node2, the node that "leaves") genuinely leave the 10.77 LAN, so +# astrald's tor module + the swarm link maintainer re-link to (node1) over Tor. # # Two steps, both on the host: # 1. Seed with 's onion WHILE THE LAN IS STILL UP — once the LAN is gone the # peer can no longer ask for its address, so it needs the .onion cached first. -# 2. nftables-drop all traffic between them on the LAN. The NIC stays up and Internet -# egress (the WAN NAT, used for Tor) is untouched — only the direct LAN path is cut. +# 2. Withdraw 's own 10.77 LAN address (ip addr flush). astrald has no carrier/ +# operstate monitor: it polls net.InterfaceAddrs() every 3s and advertises one tcp +# endpoint per assigned IP, so removing the address is what it observes as "left the +# network" — it drops the 10.77 endpoint and re-links over Tor. (A packet-filter DROP, +# or even a link/carrier down, leaves the IPv4 address in place and is invisible to +# that monitor.) SSH/management rides the separate WAN NIC and is untouched. # leave-lan [--vm ] [--peer ] (default: node2 leaves, peer node1) # # Both nodes must have Tor up (enable-tor) and the alias must resolve on @@ -53,31 +57,22 @@ echo "leave-lan: seeding $PEER with $VM's onion ..." # shellcheck disable=SC2029 netsim ssh "$PEER" -- "leaver='$VM'; $SEED_BODY" -# 2) resolve 's LAN address and drop it on -peer_ip=$(netsim ssh "$PEER" -- "hostname -I" | tr ' ' '\n' | grep '^10\.77\.' | head -1) -[ -n "$peer_ip" ] || { echo "leave-lan: could not find $PEER's 10.77 LAN address" >&2; exit 1; } - +# 2) make leave the LAN: withdraw its own 10.77 address (and drop the NIC for realism). +# Removing the address takes its connected /24 route with it, so has no address on +# and no route to the LAN — it has genuinely left at the IP layer, which is exactly what +# astrald observes (see the header). No peer IP needed: drops its own membership. CUT_BODY=$(cat <<'EOS' set -eu -export DEBIAN_FRONTEND=noninteractive -command -v nft >/dev/null 2>&1 || { - apt-get -qq -o DPkg::Lock::Timeout=300 update - apt-get -qq -y -o DPkg::Lock::Timeout=300 install nftables >/dev/null -} -# A dedicated table so the cut is self-contained and easy to reason about. Chains are -# named netout/netin (not the nft scanner keywords in/out). Flush before adding so a -# re-run yields exactly one rule per direction. -nft add table ip netsimcut 2>/dev/null || true -nft 'add chain ip netsimcut netout { type filter hook output priority 0 ; }' 2>/dev/null || true -nft 'add chain ip netsimcut netin { type filter hook input priority 0 ; }' 2>/dev/null || true -nft flush chain ip netsimcut netout 2>/dev/null || true -nft flush chain ip netsimcut netin 2>/dev/null || true -nft add rule ip netsimcut netout ip daddr "$peer_ip" drop -nft add rule ip netsimcut netin ip saddr "$peer_ip" drop -echo "leave-lan: $(hostname) dropped LAN traffic to/from $peer_ip" +# the NIC holding the 10.77 LAN address is nic2; SSH rides the separate WAN NIC, untouched. +lan_if=$(ip -o -4 addr show | awk '$4 ~ /^10\.77\./ {print $2; exit}') +[ -n "$lan_if" ] || { echo "leave-lan: no 10.77 LAN interface on $(hostname)" >&2; exit 1; } +lan_ip=$(ip -o -4 addr show dev "$lan_if" | awk '$4 ~ /^10\.77\./ {print $4; exit}') +ip addr flush dev "$lan_if" # RTM_DELADDR: drops the address AND its connected /24 route +ip link set "$lan_if" down # carrier/admin down too, so the NIC is faithfully "gone" +echo "leave-lan: $(hostname) withdrew $lan_ip from $lan_if (left the LAN)" EOS ) -echo "leave-lan: severing LAN path $VM <-> $PEER ($peer_ip) ..." +echo "leave-lan: $VM leaving the LAN (withdrawing its 10.77 address) ..." # shellcheck disable=SC2029 -netsim ssh "$VM" -- "peer_ip='$peer_ip'; $CUT_BODY" +netsim ssh "$VM" -- "$CUT_BODY" echo "leave-lan: done on $VM" diff --git a/netsim/tasks/leave-lan/verify.py b/netsim/tasks/leave-lan/verify.py index 3d58d9a1..94fa25e4 100644 --- a/netsim/tasks/leave-lan/verify.py +++ b/netsim/tasks/leave-lan/verify.py @@ -1,10 +1,19 @@ #!/usr/bin/env python3 -"""verify leave-lan: can no longer reach over the LAN. - -Independent host-side check: from , a TCP connect to the peer's LAN address on -the astral port (1791) must NOT succeed (the nftables drop blackholes it -> -timeout). The peer's LAN IP is resolved from the peer. No astral-query here -- this -is a raw socket probe, run through tasks/_lib/astralapi.py's ssh transport. +"""verify leave-lan: has withdrawn its LAN identity, so it genuinely left the +10.77 LAN (not merely had its packets filtered). + +The cut (run.sh) flushes 's own 10.77 address, which is what astrald observes as +"left the network": it polls net.InterfaceAddrs() every 3s and advertises one tcp +endpoint per assigned IP, so removing the address fires EventNetworkAddressChanged and +withdraws the 10.77 tcp endpoint. (A packet-filter DROP -- or a bare link/carrier down -- +leaves the IPv4 address in place and is invisible to that monitor.) + +This is a blind, deterministic host-side check: it reads 's own network state over +ssh, independent of astral, and asserts two consequences of the address withdrawal -- + has (1) no 10.77 LAN address and (2) no route into the 10.77 subnet. Neither depends +on a TCP probe's error code, which would vary with the WAN default route (a connect to +the LAN falls through to the WAN NAT and times out rather than returning ENETUNREACH). +astrald's reaction -- re-linking over Tor -- is asserted separately by link-over-tor. """ import argparse import os @@ -15,8 +24,6 @@ os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) import astralapi # noqa: E402 -PORT = 1791 - def main(): ap = argparse.ArgumentParser() @@ -24,37 +31,24 @@ def main(): ap.add_argument("--peer", default="node1") # the node it can no longer reach args, _ = ap.parse_known_args() - ip = astralapi.peer_lan_ip(args.peer) - if not ip: - sys.stderr.write(f"leave-lan verify FAILED: could not resolve {args.peer}'s 10.77 LAN IP.\n") + # 1) the leaver no longer holds any 10.77 LAN address (the thing astrald keys on) + lan_ip = astralapi.peer_lan_ip(args.vm) + # 2) and has no route into the 10.77 subnet (the connected route went with the address) + lan_routes = [ln for ln in (astralapi.ssh(args.vm, "ip -o route show") or "").splitlines() + if "10.77." in ln] + + if lan_ip: + sys.stderr.write(f"leave-lan verify FAILED: {args.vm} still holds a LAN address " + f"({lan_ip}) -- it has not left the 10.77 LAN.\n") + return 1 + if lan_routes: + sys.stderr.write(f"leave-lan verify FAILED: {args.vm} still has a route into the " + "10.77 LAN:\n " + "\n ".join(lan_routes) + "\n") return 1 - # Only a TIMEOUT proves the nftables DROP blackholed the path. A connect that - # succeeds means the LAN is not severed; a refusal/reset (or any other error) means - # the path is reachable but the port is closed for another reason -> inconclusive, - # NOT a pass (would otherwise false-pass if the drop rule were missing). - probe = ( - "python3 -c 'import socket\n" - "s=socket.socket(); s.settimeout(3)\n" - f"try:\n s.connect((\"{ip}\",{PORT})); print(\"open\")\n" - "except socket.timeout:\n print(\"timeout\")\n" - "except Exception as e:\n print(\"err:\"+type(e).__name__)'" - ) - result = (astralapi.ssh(args.vm, probe) or "").strip() - - if result == "timeout": - print(f"leave-lan OK: {args.vm} can no longer reach {args.peer} ({ip}:{PORT}) over the LAN " - "(connect times out -- blackholed)") - return 0 - - if result == "open": - sys.stderr.write(f"leave-lan verify FAILED: {args.vm} still reaches {args.peer} " - f"({ip}:{PORT}) over the LAN (connect succeeded).\n") - else: - sys.stderr.write(f"leave-lan verify FAILED: probe to {args.peer} ({ip}:{PORT}) was " - f"inconclusive ({result!r}) -- expected a timeout from the drop, not a " - "refusal/reset.\n") - return 1 + print(f"leave-lan OK: {args.vm} withdrew its 10.77 LAN address and route -- it has left " + f"the LAN (astrald re-links to {args.peer} over Tor; asserted by link-over-tor).") + return 0 if __name__ == "__main__": From 7e128b2f84bf4de86e07055d2cbe89b9ea629912 Mon Sep 17 00:00:00 2001 From: intern0 Date: Wed, 1 Jul 2026 13:18:19 +0200 Subject: [PATCH 50/57] netsim: install qemu-guest-agent in the lab image (pairs with netsim QGA time-sync) Install + enable qemu-guest-agent so the host can correct the guest clock out-of-band over virtio-serial on snapshot resume (netsim's qga.sync_time -> guest-set-time), instead of racing sshd while the resume clock-jump storms the 1-vCPU VM. Folded into the existing deps install; enabled so it is baked running into the snapshot and answers on resume. Requires the netsim guest-agent virtio-serial device (satforge/netsim branch intern0/dev/guest-agent-time-sync); inert without it. Adding that device changes the machine model, so this only takes effect on a from-scratch rebuild. --- netsim/tasks/install-astrald/run.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/netsim/tasks/install-astrald/run.sh b/netsim/tasks/install-astrald/run.sh index 19a4e83e..301104f9 100755 --- a/netsim/tasks/install-astrald/run.sh +++ b/netsim/tasks/install-astrald/run.sh @@ -30,13 +30,20 @@ REMOTE_BODY=$(cat <<'EOS' set -eu export DEBIAN_FRONTEND=noninteractive -# deps: git + curl (Go comes from the official tarball, not apt -> need >= 1.25) -need=""; command -v git >/dev/null 2>&1 || need="$need git" - command -v curl >/dev/null 2>&1 || need="$need curl" +# deps: git + curl (Go comes from the official tarball, not apt -> need >= 1.25). +# qemu-guest-agent lets the host correct the guest clock out-of-band over +# virtio-serial on snapshot resume (netsim's qga guest-set-time), instead of +# racing sshd while the resume clock-jump storms this 1-vCPU VM. +need=""; command -v git >/dev/null 2>&1 || need="$need git" + command -v curl >/dev/null 2>&1 || need="$need curl" + command -v qemu-ga >/dev/null 2>&1 || need="$need qemu-guest-agent" if [ -n "$need" ]; then apt-get -qq -o DPkg::Lock::Timeout=120 update apt-get -qq -y -o DPkg::Lock::Timeout=120 install $need ca-certificates >/dev/null fi +# Bind the agent to netsim's guest-agent virtio-serial port (present from boot); +# left running so it is baked into the snapshot and answers on resume. +systemctl enable --now qemu-guest-agent >/dev/null 2>&1 || true # Ephemeral test-VM hygiene: disable the apt periodic machinery so a clock jump on # resume (netsim corrects the stale snapshot clock) can't wake apt-daily / From 963554c79c71a8f4100aa1052990321eac27efa1 Mon Sep 17 00:00:00 2001 From: intern0 Date: Wed, 1 Jul 2026 18:54:31 +0200 Subject: [PATCH 51/57] netsim: add configure-nat-tor task (relocate Tor into the NAT'd node's netns) enter-nat moves astrald into netns "priv", so its 127.0.0.1 becomes the netns loopback and it loses the root-ns Tor. astrald's tor module needs Tor at 127.0.0.1:9050/9051 AND its onion service's local listener is hardcoded 127.0.0.1:0 (mod/tor/src/server.go), which Tor dials on inbound -- so a config knob can't fix it; Tor must run in the same netns. configure-nat-tor (runs after enable-tor + enter-nat, per --vm): 1. WAN masquerade for 192.168.99.0/24 so Tor-in-netns reaches the real Tor network (enter-nat's LAN SNAT still handles 198.51.100.x peer traffic; routing splits by destination); 2. move tor@default.service into netns "priv" via a NetworkNamespacePath drop-in (same idiom enter-nat uses for astrald); 3. restart Tor (in netns) then astrald; self-validates by confirming astrald re-publishes its onion. No astrald source/config change. Deferred-phase task for the NAT-punch line (enable-tor . enter-nat . configure-nat-tor . add-reflector . punch-nat). Syntax-validated (sh -n + embedded python); live validation deferred to the punch-phase bring-up (needs the enable-tor + enter-nat preconditions). --- netsim/tasks/configure-nat-tor/README.md | 26 +++++ netsim/tasks/configure-nat-tor/run.sh | 115 +++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 netsim/tasks/configure-nat-tor/README.md create mode 100755 netsim/tasks/configure-nat-tor/run.sh diff --git a/netsim/tasks/configure-nat-tor/README.md b/netsim/tasks/configure-nat-tor/README.md new file mode 100644 index 00000000..e38b9084 --- /dev/null +++ b/netsim/tasks/configure-nat-tor/README.md @@ -0,0 +1,26 @@ +# configure-nat-tor + +Relocates a NAT'd node's **Tor into the same network namespace as its astrald** — the +piece `enter-nat` can't do, needed before the pair can re-link (and punch) over Tor. + +`enter-nat` moves astrald into netns `priv`, so its `127.0.0.1` becomes the *netns* +loopback. astrald's `tor` module needs Tor at `127.0.0.1:9050`/`:9051`, and — the part a +config knob can't fix — its onion service's **local listener is hardcoded `127.0.0.1:0`** +(`mod/tor/src/server.go`), which Tor dials on **inbound** onion connections. So a root-ns +Tor can neither be reached nor deliver inbound onion to a netns'd astrald; **Tor must live +in the netns too.** + +On each `--vm` (default `node1 node2`), run **after** `enable-tor` and `enter-nat`: + +1. **WAN masquerade** for `192.168.99.0/24` out the default-route (slirp WAN) NIC, so + Tor-in-netns can reach the real Tor network. Routing splits by destination: + `198.51.100.0/24` (peers) → `lan0` via `enter-nat`'s SNAT; internet (Tor) → WAN. +2. **Move `tor@default.service` into netns `priv`** via a `NetworkNamespacePath` systemd + drop-in (same idiom `enter-nat` uses for astrald; net ns only, so `torrc` is untouched). +3. Restart **Tor first** (binds netns `127.0.0.1:9050/9051`), then **astrald** (its tor + module connects to the control port once at start, no retry). + +Self-validating: waits for the control port inside the netns, then confirms astrald +**re-publishes its onion** — the end-to-end proof that bootstrap (via the WAN masquerade), +control, and the netns-local onion listener all work. **No astrald source/config change.** +Host-driven. Used by the NAT-punch story after `enable-tor` + `enter-nat`. diff --git a/netsim/tasks/configure-nat-tor/run.sh b/netsim/tasks/configure-nat-tor/run.sh new file mode 100755 index 00000000..cd9eb23f --- /dev/null +++ b/netsim/tasks/configure-nat-tor/run.sh @@ -0,0 +1,115 @@ +#!/bin/sh +# configure-nat-tor: relocate a NAT'd node's Tor into its private netns so astrald +# (moved into netns "priv" by enter-nat) regains full Tor -- inbound AND outbound onion. +# +# Why a dedicated task (not folded into enter-nat): astrald's tor module reaches Tor at +# 127.0.0.1:9050 (SOCKS) / 127.0.0.1:9051 (control), AND its onion service's local +# listener is hardcoded 127.0.0.1:0 (mod/tor/src/server.go) which Tor dials on inbound. +# Once enter-nat moves astrald into netns "priv", that 127.0.0.1 is the netns loopback, so +# a root-ns Tor can neither be reached for SOCKS/control nor deliver inbound onion. Fix +# (no astrald change): run Tor INSIDE the same netns, and give the netns internet egress +# (WAN masquerade) so Tor can still reach the real Tor network. On each --vm: +# * WAN masquerade for 192.168.99.0/24 (Tor's internet path). enter-nat's LAN SNAT to +# 198.51.100.x still handles peer traffic -- routing splits by destination. +# * move tor@default.service into netns "priv" via a systemd drop-in, restart it there; +# * restart astrald (already in the netns) so its tor module re-inits against the now +# netns-local control port, then confirm it re-publishes its onion (end-to-end proof). +# +# Run AFTER enable-tor (Tor installed + control port) and enter-nat (netns + astrald in it). +# configure-nat-tor [--vm ]... (default: node1 node2) +set -eu + +VMS="" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VMS="${VMS:+$VMS }$2"; shift 2 ;; + *) echo "usage: configure-nat-tor [--vm ]..." >&2; exit 64 ;; + esac +done +[ -n "$VMS" ] || VMS="node1 node2" + +REMOTE_BODY=$(cat <<'EOS' +set -eu + +# preconditions from enter-nat / enable-tor +ip netns list 2>/dev/null | grep -qw priv \ + || { echo "configure-nat-tor: netns priv missing on $(hostname) (run enter-nat first)" >&2; exit 1; } +systemctl cat tor@default.service >/dev/null 2>&1 \ + || { echo "configure-nat-tor: tor@default.service not found on $(hostname) (run enable-tor first)" >&2; exit 1; } + +# 1) WAN egress for the netns so Tor-in-netns can reach the real Tor network. The slirp WAN +# NIC is the default-route interface (it keeps its kernel name; only lan0 is renamed). +wan=$(ip route show default | awk '{print $5; exit}') +[ -n "$wan" ] || { echo "configure-nat-tor: no default route / WAN nic on $(hostname)" >&2; exit 1; } +# idempotent append to enter-nat's existing ip/nat postrouting chain (keeps the LAN SNAT). +nft list chain ip nat postrouting 2>/dev/null | grep -q "oifname \"$wan\" masquerade" \ + || nft add rule ip nat postrouting ip saddr 192.168.99.0/24 oifname "$wan" masquerade + +# 2) move the Tor daemon into netns "priv" (Debian runs it as tor@default.service; the +# tor.service wrapper pulls it in). Same NetworkNamespacePath idiom enter-nat used for +# astrald -- joins only the NET ns, so torrc (mount ns) is untouched. +mkdir -p /etc/systemd/system/tor@default.service.d +cat > /etc/systemd/system/tor@default.service.d/netns.conf </dev/null | grep -q '127.0.0.1:9051'; then ok=1; break; fi + sleep 1 +done +[ -n "$ok" ] || { + echo "configure-nat-tor: tor control 9051 did not open in netns priv on $(hostname)" >&2 + journalctl -u tor@default --no-pager 2>&1 | tail -20 >&2 || true + exit 1 +} + +# 4) restart astrald (already in the netns) so its tor module re-inits against the now +# netns-local control port, then confirm it re-publishes an onion. Success here proves +# Tor-in-netns end to end: bootstrap via the WAN masquerade + control + the onion local +# listener are ALL netns-local. astrald's onion key persists under -root (shared mount +# ns), so it comes back as the same onion. +systemctl restart astrald +onion= +for _ in $(seq 1 90); do + if systemctl is-active --quiet astrald; then + onion=$(astral-query nodes.resolve_endpoints -id localnode -out json 2>/dev/null | python3 -c ' +import json,sys +def addr(ep): + if isinstance(ep, str): return ep + if isinstance(ep, dict): + o = ep.get("Object"); return o if isinstance(o, str) else "" + return "" +for ln in sys.stdin: + ln = ln.strip() + if not ln: continue + try: o = json.loads(ln) + except Exception: continue + a = addr((o.get("Object") or {}).get("Endpoint")) + if ".onion" in a: print(a); break') + [ -n "$onion" ] && break + fi + sleep 2 +done +[ -n "$onion" ] || { + echo "configure-nat-tor: astrald did not re-publish a tor onion in netns on $(hostname)" >&2 + journalctl -u tor@default --no-pager 2>&1 | tail -20 >&2 || true + journalctl -u astrald --no-pager 2>&1 | tail -20 >&2 || true + exit 1 +} +echo "configure-nat-tor: $(hostname) Tor now in netns priv (onion=$onion, wan=$wan)" +EOS +) + +# $VMS is a space-separated list -> intentional word-splitting +# shellcheck disable=SC2086 +for vm in $VMS; do + echo "configure-nat-tor: relocating Tor into $vm's netns ..." + # shellcheck disable=SC2029 + netsim ssh "$vm" -- "$REMOTE_BODY" +done +echo "configure-nat-tor: done ($VMS)" From f7c06d485ebe9fcc0e18baa98f463080fd7f8a86 Mon Sep 17 00:00:00 2001 From: intern0 Date: Wed, 1 Jul 2026 19:19:14 +0200 Subject: [PATCH 52/57] netsim: add NAT scenario milestone tasks (enter-nat, add-reflector) + kcp helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WIP checkpoint of the NAT hole-punch scenario (see the "simulate NAT hole punching" task doc), milestone phase — no punch yet. - enter-nat: put a peer's astrald behind its own symmetric true-masquerade NAT (private netns "priv" + veth + port-preserving SNAT to a public TEST-NET alias 198.51.100.; astrald relaunched inside the netns via a NetworkNamespacePath drop-in). The leave-lan analog for the NAT line. - add-reflector: wire a public reflector node so each NAT'd peer learns its own public endpoint by reflection (ObservedEndpointMessage), arming its nat module. - _lib/astralapi.py: links_by_network() + kcp_links() — the verify helper for the eventual punch (a kcp link is the unique punch signal). nat-eim-probe (throwaway de-risk probe) and the throwaway stories are held back deliberately. No milestone verify yet; enter-nat/add-reflector have no verify.sh. Committed on dev--netsim-scenarios atop configure-nat-tor (963554c7). --- netsim/tasks/_lib/astralapi.py | 21 ++++++ netsim/tasks/add-reflector/run.sh | 70 ++++++++++++++++++++ netsim/tasks/enter-nat/README.md | 34 ++++++++++ netsim/tasks/enter-nat/run.sh | 104 ++++++++++++++++++++++++++++++ 4 files changed, 229 insertions(+) create mode 100755 netsim/tasks/add-reflector/run.sh create mode 100644 netsim/tasks/enter-nat/README.md create mode 100755 netsim/tasks/enter-nat/run.sh diff --git a/netsim/tasks/_lib/astralapi.py b/netsim/tasks/_lib/astralapi.py index c02a9c78..0814488b 100644 --- a/netsim/tasks/_lib/astralapi.py +++ b/netsim/tasks/_lib/astralapi.py @@ -314,6 +314,27 @@ def tor_links(objs): return out +def links_by_network(objs, network): + """(RemoteIdentity, endpoint-address) for links whose Network == .""" + out = [] + for v in _values(objs): + if isinstance(v, dict) and str(v.get("Network")) == network: + out.append((str(v.get("RemoteIdentity", "")), + endpoint_addr(v.get("RemoteEndpoint")))) + return out + + +def kcp_links(objs): + """(RemoteIdentity, endpoint-address) for links whose Network == 'kcp'. + + A 'kcp' link is the unique signal of a completed NAT hole-punch: mod/nodes' + NATLinkStrategy is the only path that dials a kcp.Endpoint (BasicLinkStrategy + dials only tcp, and kcp endpoints are never advertised for an ordinary peer + dial), so a kcp link to a sibling means the punch succeeded and was promoted + to a direct link. Mirrors tor_links(); cf. links_by_network(objs, "kcp").""" + return links_by_network(objs, "kcp") + + def resolve_onion(objs): """The .onion address from a nodes.resolve_endpoints result, or None.""" for v in _values(objs): diff --git a/netsim/tasks/add-reflector/run.sh b/netsim/tasks/add-reflector/run.sh new file mode 100755 index 00000000..6d6c8076 --- /dev/null +++ b/netsim/tasks/add-reflector/run.sh @@ -0,0 +1,70 @@ +#!/bin/sh +# add-reflector: wire the public reflector node so both NAT'd peers learn their own +# public endpoint by reflection, arming each peer's `nat` module. +# +# Symmetric-masquerade NAT hides a node's public address from itself (it only exists as a +# conntrack translation), so astrald can only learn it when a directly-reachable peer +# observes the SNAT'd source and reflects it back (`reflectLink` -> ObservedEndpointMessage, +# accepted only for a public tcp/utp endpoint). Two masqueraded peers can't reflect each +# other before the punch, so a non-NAT'd reflector does it. The reflector VM itself is made +# by add-vm + install-astrald; this task does the reflector-specific wiring: +# 1. give the reflector a public TEST-NET alias 198.51.100. and read its id; +# 2. on each peer: register that endpoint and force a tcp link to it -> the reflector +# observes the peer's 198.51.100. source and reflects it -> the peer's nat arms. +# Run AFTER enter-nat (the peer must already be behind its NAT so the reflected source is +# its public alias, not its private 192.168.99.2). +# add-reflector [--reflector ] [--vm ]... (default: reflector; peers node1 node2) +set -eu + +REFL="reflector"; PEERS="" +while [ $# -gt 0 ]; do + case "$1" in + --reflector) [ $# -ge 2 ] || { echo "need host after --reflector" >&2; exit 64; }; REFL=$2; shift 2 ;; + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; PEERS="${PEERS:+$PEERS }$2"; shift 2 ;; + *) echo "usage: add-reflector [--reflector ] [--vm ]..." >&2; exit 64 ;; + esac +done +[ -n "$PEERS" ] || PEERS="node1 node2" + +# 1) give the reflector a public alias and read its node identity +REFL_SETUP=$(cat <<'EOS' +set -eu +lan=$(ip -o -4 addr show | awk '$4 ~ /^10\.77\./ {print $2; exit}') +[ -n "$lan" ] || { echo "add-reflector: no 10.77 LAN nic on $(hostname)" >&2; exit 1; } +oct=$(ip -o -4 addr show dev "$lan" | awk '$4 ~ /^10\.77\./ {n=$4; sub(/\/.*/,"",n); split(n,a,"."); print a[4]; exit}') +pub="198.51.100.$oct" +ip addr add "$pub/24" dev "$lan" 2>/dev/null || true +# the reflector's own node identity (host sees the local anonymous caller as the node) +rid=$(astral-query apphost.whoami -out json 2>/dev/null | python3 -c ' +import json,sys +for ln in sys.stdin: + ln=ln.strip() + if not ln: continue + try: o=json.loads(ln) + except Exception: continue + v=o.get("Object") + if isinstance(v,str) and len(v)>=64: print(v); break + if isinstance(v,dict) and isinstance(v.get("Identity"),str): print(v["Identity"]); break') +[ -n "$rid" ] || { echo "add-reflector: could not read reflector identity via apphost.whoami on $(hostname)" >&2; exit 1; } +echo "$pub $rid" # LAST stdout line: +EOS +) +echo "add-reflector: configuring reflector on $REFL ..." >&2 +out=$(netsim ssh "$REFL" -- "$REFL_SETUP" | tail -n1) +REFL_PUB=$(echo "$out" | awk '{print $1}') +REFL_ID=$(echo "$out" | awk '{print $2}') +case "$REFL_PUB" in 198.51.100.*) : ;; *) echo "add-reflector: bad reflector pub '$REFL_PUB' (out: $out)" >&2; exit 1 ;; esac +[ -n "$REFL_ID" ] || { echo "add-reflector: no reflector identity (out: $out)" >&2; exit 1; } +echo "add-reflector: reflector '$REFL' at tcp:$REFL_PUB:1791 id=$REFL_ID" >&2 + +# 2) seed each peer with the reflector endpoint and force a tcp link so it gets reflected +for p in $PEERS; do + echo "add-reflector: linking $p -> reflector (for endpoint reflection) ..." >&2 + # shellcheck disable=SC2029 + netsim ssh "$p" -- " + astral-query nodes.add_endpoint -id '$REFL_ID' -endpoint 'tcp:$REFL_PUB:1791' >/dev/null 2>&1 || true + astral-query dir.set_alias -id '$REFL_ID' -alias reflector >/dev/null 2>&1 || true + astral-query nodes.new_link -target '$REFL_ID' -endpoint 'tcp:$REFL_PUB:1791' -out json 2>&1 | tail -3 + " || echo "add-reflector: WARNING new_link to reflector failed on $p (bring-up diagnoses)" >&2 +done +echo "add-reflector: done (reflector=$REFL id=$REFL_ID pub=$REFL_PUB; peers: $PEERS)" diff --git a/netsim/tasks/enter-nat/README.md b/netsim/tasks/enter-nat/README.md new file mode 100644 index 00000000..66c7ee94 --- /dev/null +++ b/netsim/tasks/enter-nat/README.md @@ -0,0 +1,34 @@ +# enter-nat + +Puts a node's astrald behind its own **symmetric, true-masquerade NAT** — the `leave-lan` +analog for the NAT scenario. + +On each `--vm` (default `node1 node2`): + +- creates netns **`priv`** holding the private host `192.168.99.2`, wired to the VM by a + `veth` pair (`192.168.99.1` on the VM side); +- installs a **port-preserving SNAT** of `192.168.99.0/24` to a per-node public TEST-NET + alias **`198.51.100.`** on the LAN NIC (`lan0`) — validated as + endpoint-independent (cone) by `nat-eim-probe`; +- relaunches astrald **inside the netns** via a systemd drop-in + (`NetworkNamespacePath=/run/netns/priv`), which joins only the *network* namespace, so + the apphost unix socket stays in the shared mount namespace and `astral-query` still + reaches it from the root namespace. astrald keeps its `-root` and identity. + +Moving astrald off the flat `10.77` address withdraws its direct LAN endpoint (astrald +polls `InterfaceAddrs`; in the netns it sees only `192.168.99.2`), so the pair re-links +over Tor — exactly the `leave-lan` dynamic. + +**This task only builds the NAT; it does not punch.** astrald cannot see its own public +alias (that is what masquerade means), so its `nat` module stays **disabled** until the +`reflector` node reflects that endpoint back — see `add-reflector`. The pre-punch +milestone is: after `enter-nat` + `add-reflector`, `nat` reports **enabled** on both peers. + +## Notes / follow-ups + +- The netns currently routes only to the LAN (for reflection and, later, the peer punch). + The **punch increment** will additionally need the netns routed to the WAN (slirp) so + the Tor signaling link can form from inside the netns — add a `masquerade` rule for the + WAN NIC then. +- Verify via `astral-query` (unix socket, reachable from the root ns) or + `ip netns exec priv astral-query …`; the apphost WS port now lives inside the netns. diff --git a/netsim/tasks/enter-nat/run.sh b/netsim/tasks/enter-nat/run.sh new file mode 100755 index 00000000..ec1148be --- /dev/null +++ b/netsim/tasks/enter-nat/run.sh @@ -0,0 +1,104 @@ +#!/bin/sh +# enter-nat: put a node's astrald behind its own (symmetric, true-masquerade) NAT. +# +# The leave-lan analog: relocating astrald into a private network namespace severs its +# direct 10.77 LAN path, so the swarm link maintainer re-links the pair over Tor -- and +# the node is now a genuine NAT'd peer. On each --vm: +# * create netns "priv" (192.168.99.2) wired to the VM by a veth pair; +# * port-preserving SNAT of 192.168.99.0/24 to a per-node public TEST-NET alias +# 198.51.100. on the LAN NIC (validated as endpoint-independent/cone by +# nat-eim-probe); +# * relaunch astrald INSIDE the netns (same -root, so same identity) via a systemd +# drop-in (NetworkNamespacePath -- joins only the NET ns, leaving the apphost unix +# socket in the shared mount ns so `astral-query` still reaches it from the root ns). +# +# astrald cannot see its own public alias -- that is what masquerade means -- so its nat +# module stays disabled until the reflector node reflects that endpoint back (see +# add-reflector). This task only builds the NAT; it does NOT punch. +# enter-nat [--vm ]... (default: node1 node2; one call NATs each peer) +set -eu + +VMS="" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VMS="${VMS:+$VMS }$2"; shift 2 ;; + *) echo "usage: enter-nat [--vm ]..." >&2; exit 64 ;; + esac +done +[ -n "$VMS" ] || VMS="node1 node2" + +REMOTE_BODY=$(cat <<'EOS' +set -eu +export DEBIAN_FRONTEND=noninteractive +command -v nft >/dev/null 2>&1 || { + apt-get -qq -o DPkg::Lock::Timeout=120 update + apt-get -qq -y -o DPkg::Lock::Timeout=120 install nftables >/dev/null +} + +# the LAN NIC carries the 10.77 address; its last octet indexes our public alias. +lan=$(ip -o -4 addr show | awk '$4 ~ /^10\.77\./ {print $2; exit}') +[ -n "$lan" ] || { echo "enter-nat: no 10.77 LAN interface on $(hostname)" >&2; exit 1; } +oct=$(ip -o -4 addr show dev "$lan" | awk '$4 ~ /^10\.77\./ {n=$4; sub(/\/.*/,"",n); split(n,a,"."); print a[4]; exit}') +[ -n "$oct" ] || { echo "enter-nat: could not read 10.77 octet on $(hostname)" >&2; exit 1; } +pub="198.51.100.$oct" +ip addr add "$pub/24" dev "$lan" 2>/dev/null || true + +# private host 192.168.99.2 in netns "priv"; this VM is its only way out +ip netns add priv 2>/dev/null || true +ip link add veth0 type veth peer name veth0p 2>/dev/null || true +ip link set veth0p netns priv 2>/dev/null || true +ip addr add 192.168.99.1/24 dev veth0 2>/dev/null || true +ip link set veth0 up +ip -n priv addr add 192.168.99.2/24 dev veth0p 2>/dev/null || true +ip -n priv link set veth0p up; ip -n priv link set lo up +ip -n priv route replace default via 192.168.99.1 +sysctl -wq net.ipv4.ip_forward=1 +sysctl -wq net.ipv4.conf.all.rp_filter=2 +sysctl -wq net.netfilter.nf_conntrack_udp_timeout=60 2>/dev/null || true +sysctl -wq net.netfilter.nf_conntrack_udp_timeout_stream=180 2>/dev/null || true + +# port-preserving SNAT to the public alias (idempotent: rebuild the nat table) +nft add table ip nat 2>/dev/null || true +nft flush table ip nat +nft add chain ip nat postrouting '{ type nat hook postrouting priority 100 ; }' +nft add rule ip nat postrouting ip saddr 192.168.99.0/24 oifname "$lan" snat ip to "$pub" + +# move astrald into the netns: join only the NET namespace (mount ns untouched, so the +# apphost unix socket stays reachable from the root ns for astral-query). +mkdir -p /etc/systemd/system/astrald.service.d +cat > /etc/systemd/system/astrald.service.d/netns.conf </dev/null 2>&1; then + ok=1; break + fi + n=$((n + 1)); sleep 1 +done +if [ -z "$ok" ]; then + echo "enter-nat: astrald did not come back up in netns on $(hostname) after ${n}s" >&2 + systemctl status astrald --no-pager >&2 2>&1 || true + journalctl -u astrald --no-pager 2>&1 | tail -30 >&2 || true + exit 1 +fi + +# sanity: astrald must now be in the netns (its own 10.77 endpoint withdrawn) and see 192.168.99.2 +in_ns=$(ip netns identify "$(pgrep -x astrald | head -1)" 2>/dev/null || true) +echo "enter-nat: $(hostname) astrald behind NAT (priv 192.168.99.2 -> public $pub via $lan; netns=${in_ns:-?})" +EOS +) + +# $VMS is a space-separated list -> intentional word-splitting +# shellcheck disable=SC2086 +for vm in $VMS; do + echo "enter-nat: putting $vm behind its NAT ..." + # shellcheck disable=SC2029 + netsim ssh "$vm" -- "$REMOTE_BODY" +done +echo "enter-nat: done ($VMS)" From 2e2e64e7c8bb11609136e8851774845ca65bcccd Mon Sep 17 00:00:00 2001 From: intern0 Date: Wed, 1 Jul 2026 19:38:22 +0200 Subject: [PATCH 53/57] =?UTF-8?q?netsim:=20add-reflector=20verify=20?= =?UTF-8?q?=E2=80=94=20assert=20nat=20armed=20(public=20198.51.100.x)=20on?= =?UTF-8?q?=20both=20peers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current NAT milestone is "nat module enabled on both peers", but add-reflector had no verify, so the wiring could silently fail. Add a blind host-side check: for each peer, derive its public TEST-NET alias 198.51.100.<10.77-octet>, query the peer's astrald `ip.public_ip_candidates`, and assert that address is present -- which is exactly what flips nat.enabled (evaluateEnabled = setting-default-on AND len(PublicIPCandidates())>0). Queries via the Go astral-query CLI over the apphost unix socket (not the astral-py WS client): astrald runs inside netns "priv" so its WS port is netns-local, but the unix socket is in the shared mount ns and crosses the net-ns boundary. Syntax-validated; live validation comes with the milestone story (needs enter-nat + add-reflector applied to a NAT'd pair -- no stage carries that yet). --- netsim/tasks/add-reflector/verify.py | 62 ++++++++++++++++++++++++++++ netsim/tasks/add-reflector/verify.sh | 3 ++ 2 files changed, 65 insertions(+) create mode 100644 netsim/tasks/add-reflector/verify.py create mode 100755 netsim/tasks/add-reflector/verify.sh diff --git a/netsim/tasks/add-reflector/verify.py b/netsim/tasks/add-reflector/verify.py new file mode 100644 index 00000000..4e39dfef --- /dev/null +++ b/netsim/tasks/add-reflector/verify.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +"""verify add-reflector: both NAT'd peers learned their public endpoint by reflection, +arming astrald's nat module. + +After enter-nat hides each peer behind a symmetric masquerade NAT, the peer can't see its +own public address; add-reflector wires a public reflector that observes the peer's SNAT'd +source and reflects it back. The observable result -- and exactly what flips astrald's nat +module on (`evaluateEnabled`: the `enabled` setting defaults on AND +len(PublicIPCandidates())>0, `mod/nat/src/module.go`) -- is that each peer's public IP +candidates now include its TEST-NET alias 198.51.100.. + +Blind host-side check: for each peer, derive its 198.51.100. from its 10.77 LAN +octet, then query the peer's astrald `ip.public_ip_candidates` and assert that address is +present. That address being a public candidate == the peer is armed (nat can/does enable). + +Note: the peer's astrald runs INSIDE netns "priv" (enter-nat), so its WS apphost port is +netns-local and NOT reachable over the ssh -L forward -- we query via the Go `astral-query` +CLI over the apphost unix socket (shared mount ns), which crosses the net-ns boundary. So +this verify uses astralapi.ssh directly, not the astral-py client. +""" +import argparse +import os +import sys + +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import astralapi # noqa: E402 + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--reflector", default="reflector") # accepted (same argv as run.sh), unused here + ap.add_argument("--vm", dest="vms", action="append", default=[]) + args, _ = ap.parse_known_args() + peers = args.vms or ["node1", "node2"] + + failed = [] + for p in peers: + lan = astralapi.peer_lan_ip(p) # e.g. "10.77.0.12" + if not lan: + failed.append(f"{p}: could not read its 10.77 LAN address") + continue + want = "198.51.100." + lan.split(".")[-1] # the peer's public TEST-NET alias + # local introspection op, ungated; astrald is in netns priv -> unix-socket CLI. + raw = astralapi.ssh(p, "astral-query ip.public_ip_candidates -out json") or "" + if want in raw: + print(f"add-reflector OK: {p} nat armed -- public candidate {want} present.") + else: + failed.append(f"{p}: public candidate {want} NOT among ip.public_ip_candidates") + sys.stderr.write(f" {p} ip.public_ip_candidates:\n{raw or '(empty)'}\n") + + if failed: + for f in failed: + sys.stderr.write(f"add-reflector verify FAILED: {f}\n") + return 1 + print(f"add-reflector verified: nat armed on all peers ({', '.join(peers)})") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/add-reflector/verify.sh b/netsim/tasks/add-reflector/verify.sh new file mode 100755 index 00000000..042470ab --- /dev/null +++ b/netsim/tasks/add-reflector/verify.sh @@ -0,0 +1,3 @@ +#!/bin/sh +# Thin shim — verification logic lives in verify.py. +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" From 6b928c8500e8eade6db7cd5bf774fb0d7c658254 Mon Sep 17 00:00:00 2001 From: intern0 Date: Wed, 1 Jul 2026 21:03:29 +0200 Subject: [PATCH 54/57] netsim NAT: run astral-query INSIDE the netns for NAT'd nodes (fixes enter-nat) Root cause of the long-standing enter-nat failure ("astrald did not come back up in netns"): astral-query defaults to tcp:127.0.0.1:8625 (lib/apphost DefaultEndpoint; only the token is env-overridable, not the endpoint). Once enter-nat moves astrald into netns "priv", that 127.0.0.1 is the netns loopback, unreachable from the root ns -- so the readiness probe (and add-reflector's peer calls, the armed verify, configure-nat-tor's onion check) all silently failed. Fix: prefix every astral-query targeting a NAT'd node with `ip netns exec priv` (astral-query in the netns hits astrald's netns-local 127.0.0.1:8625). Corrected the enter-nat header comment (the unix-socket-from-root-ns assumption was wrong). No astrald change. --- netsim/tasks/add-reflector/run.sh | 8 +++++--- netsim/tasks/add-reflector/verify.py | 5 +++-- netsim/tasks/configure-nat-tor/run.sh | 3 ++- netsim/tasks/enter-nat/run.sh | 12 +++++++++--- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/netsim/tasks/add-reflector/run.sh b/netsim/tasks/add-reflector/run.sh index 6d6c8076..4bfb17f9 100755 --- a/netsim/tasks/add-reflector/run.sh +++ b/netsim/tasks/add-reflector/run.sh @@ -61,10 +61,12 @@ echo "add-reflector: reflector '$REFL' at tcp:$REFL_PUB:1791 id=$REFL_ID" >&2 for p in $PEERS; do echo "add-reflector: linking $p -> reflector (for endpoint reflection) ..." >&2 # shellcheck disable=SC2029 + # the peer's astrald is in netns "priv" (enter-nat); astral-query defaults to + # tcp:127.0.0.1:8625 which is netns-local, so run it inside the netns. netsim ssh "$p" -- " - astral-query nodes.add_endpoint -id '$REFL_ID' -endpoint 'tcp:$REFL_PUB:1791' >/dev/null 2>&1 || true - astral-query dir.set_alias -id '$REFL_ID' -alias reflector >/dev/null 2>&1 || true - astral-query nodes.new_link -target '$REFL_ID' -endpoint 'tcp:$REFL_PUB:1791' -out json 2>&1 | tail -3 + ip netns exec priv astral-query nodes.add_endpoint -id '$REFL_ID' -endpoint 'tcp:$REFL_PUB:1791' >/dev/null 2>&1 || true + ip netns exec priv astral-query dir.set_alias -id '$REFL_ID' -alias reflector >/dev/null 2>&1 || true + ip netns exec priv astral-query nodes.new_link -target '$REFL_ID' -endpoint 'tcp:$REFL_PUB:1791' -out json 2>&1 | tail -3 " || echo "add-reflector: WARNING new_link to reflector failed on $p (bring-up diagnoses)" >&2 done echo "add-reflector: done (reflector=$REFL id=$REFL_ID pub=$REFL_PUB; peers: $PEERS)" diff --git a/netsim/tasks/add-reflector/verify.py b/netsim/tasks/add-reflector/verify.py index 4e39dfef..c57184e0 100644 --- a/netsim/tasks/add-reflector/verify.py +++ b/netsim/tasks/add-reflector/verify.py @@ -42,8 +42,9 @@ def main(): failed.append(f"{p}: could not read its 10.77 LAN address") continue want = "198.51.100." + lan.split(".")[-1] # the peer's public TEST-NET alias - # local introspection op, ungated; astrald is in netns priv -> unix-socket CLI. - raw = astralapi.ssh(p, "astral-query ip.public_ip_candidates -out json") or "" + # astrald is in netns "priv"; astral-query defaults to tcp:127.0.0.1:8625 (netns-local), + # so run it inside the netns. Local introspection op -> ungated, no token needed. + raw = astralapi.ssh(p, "ip netns exec priv astral-query ip.public_ip_candidates -out json") or "" if want in raw: print(f"add-reflector OK: {p} nat armed -- public candidate {want} present.") else: diff --git a/netsim/tasks/configure-nat-tor/run.sh b/netsim/tasks/configure-nat-tor/run.sh index cd9eb23f..3cea11d6 100755 --- a/netsim/tasks/configure-nat-tor/run.sh +++ b/netsim/tasks/configure-nat-tor/run.sh @@ -77,7 +77,8 @@ systemctl restart astrald onion= for _ in $(seq 1 90); do if systemctl is-active --quiet astrald; then - onion=$(astral-query nodes.resolve_endpoints -id localnode -out json 2>/dev/null | python3 -c ' + # astrald is in netns "priv"; astral-query defaults to tcp:127.0.0.1:8625 (netns-local). + onion=$(ip netns exec priv astral-query nodes.resolve_endpoints -id localnode -out json 2>/dev/null | python3 -c ' import json,sys def addr(ep): if isinstance(ep, str): return ep diff --git a/netsim/tasks/enter-nat/run.sh b/netsim/tasks/enter-nat/run.sh index ec1148be..b4fd3139 100755 --- a/netsim/tasks/enter-nat/run.sh +++ b/netsim/tasks/enter-nat/run.sh @@ -9,8 +9,14 @@ # 198.51.100. on the LAN NIC (validated as endpoint-independent/cone by # nat-eim-probe); # * relaunch astrald INSIDE the netns (same -root, so same identity) via a systemd -# drop-in (NetworkNamespacePath -- joins only the NET ns, leaving the apphost unix -# socket in the shared mount ns so `astral-query` still reaches it from the root ns). +# drop-in (NetworkNamespacePath -- joins only the NET ns; the -root/apphost files stay +# in the shared mount ns). +# +# Reaching the netns'd astrald: `astral-query` defaults to tcp:127.0.0.1:8625 +# (lib/apphost DefaultEndpoint; only the TOKEN is env-overridable, not the endpoint), and +# once astrald is in "priv" that 127.0.0.1 is the NETNS loopback -- unreachable from the +# root ns. So EVERY astral-query against a NAT'd node must run inside the netns: +# `ip netns exec priv astral-query ...` (see add-reflector / verify / configure-nat-tor). # # astrald cannot see its own public alias -- that is what masquerade means -- so its nat # module stays disabled until the reflector node reflects that endpoint back (see @@ -76,7 +82,7 @@ systemctl restart astrald # wait for astrald to come back up inside the netns ok=; n=0 while [ "$n" -lt 90 ]; do - if systemctl is-active --quiet astrald && timeout 5 astral-query localnode:.spec -out json >/dev/null 2>&1; then + if systemctl is-active --quiet astrald && timeout 5 ip netns exec priv astral-query localnode:.spec -out json >/dev/null 2>&1; then ok=1; break fi n=$((n + 1)); sleep 1 From 316674b82fccd1b0e324e7ab70f9943fbfc46d2b Mon Sep 17 00:00:00 2001 From: intern0 Date: Wed, 1 Jul 2026 21:26:34 +0200 Subject: [PATCH 55/57] netsim: add punch-nat task + kcp verify + nat-punch story (full NAT scenario) Completes the NAT hole-punch line. From a source-grounded design pass (signaling is Tor: the tcp-only Basic link strategy can't form for two symmetric masquerade NATs, and the punch client sets no relay hint, so nat.node_punch/peerSupportsNAT route over a Tor link -> configure-nat-tor is required): - punch-nat: host-driven. Resolves both node identities, host-brokers mutual onion knowledge (nodes.add_endpoint tor:, not trusting auto-sync), ensures a live Tor signaling link, then triggers `nodes.new_link -strategies nat` on node1 (drives NATLinkStrategy end-to-end; NOT nat.punch which yields no kcp link). Confirms a durable kcp link on BOTH peers; diagnosis dump on failure (nodes.links / nat.list_holes / conntrack / ss / journal). - verify.py: blind kcp assertion on both peers -- a kcp link to the sibling (unique punch signal), and NEGATIVE: no direct/10.77 tcp link to the sibling. - nat-punch.story: two-nodes -> enable-tor -> enter-nat -> add-reflector -> configure-nat-tor -> punch-nat -> two-nodes-nat (sibling of tor-link). All astral-query targeting a NAT'd node runs inside its netns (ip netns exec priv). Milestone (enter-nat + add-reflector -> nat armed on both) is now live-GREEN. Syntax-validated; the full punch run is next. --- netsim/stories/nat-punch.story | 21 ++++++ netsim/tasks/punch-nat/run.sh | 117 +++++++++++++++++++++++++++++++ netsim/tasks/punch-nat/verify.py | 93 ++++++++++++++++++++++++ netsim/tasks/punch-nat/verify.sh | 3 + 4 files changed, 234 insertions(+) create mode 100644 netsim/stories/nat-punch.story create mode 100755 netsim/tasks/punch-nat/run.sh create mode 100644 netsim/tasks/punch-nat/verify.py create mode 100755 netsim/tasks/punch-nat/verify.sh diff --git a/netsim/stories/nat-punch.story b/netsim/stories/nat-punch.story new file mode 100644 index 00000000..200ff80f --- /dev/null +++ b/netsim/stories/nat-punch.story @@ -0,0 +1,21 @@ +# nat-punch.story — two NAT'd peers hole-punch to a direct kcp link (sibling of tor-link). +# +# Both nodes get Tor WHILE the LAN link is still live (so their onions publish and sync), +# then each enters its own symmetric true-masquerade NAT (astrald in netns priv + port- +# preserving SNAT to 198.51.100., severing the direct 10.77 path). A public reflector +# arms each peer's `nat` module by reflecting its public endpoint back. Tor is relocated +# INTO the netns (with WAN egress) so the pair can signal over Tor. Then node1 triggers the +# NAT hole-punch to node2 -> a direct kcp link on BOTH peers. +# +# Signaling is over Tor (source-verified: the tcp-only Basic strategy can't form for two +# symmetric NATs, and the punch client sets no relay hint), so configure-nat-tor is required. +# +# start: two-nodes save: two-nodes-nat +# netsim story --stage two-nodes --save two-nodes-nat netsim/stories/nat-punch.story +add-vm --hostname reflector +install-astrald --vm reflector +enable-tor --vm node1 --vm node2 +enter-nat --vm node1 --vm node2 +add-reflector --reflector reflector --vm node1 --vm node2 +configure-nat-tor --vm node1 --vm node2 +punch-nat --vm node1 --peer node2 diff --git a/netsim/tasks/punch-nat/run.sh b/netsim/tasks/punch-nat/run.sh new file mode 100755 index 00000000..8592cc54 --- /dev/null +++ b/netsim/tasks/punch-nat/run.sh @@ -0,0 +1,117 @@ +#!/bin/sh +# punch-nat: trigger astrald's NAT hole-punch between two NAT'd peers, leaving them with a +# direct kcp link. Final step of the nat-punch line (sibling of link-over-tor). +# +# Preconditions (the nat-punch story order): both peers behind a symmetric true-masquerade +# NAT (enter-nat: astrald in netns priv, port-preserving SNAT to 198.51.100.), nat-armed +# by reflection (add-reflector), and Tor relocated INTO the netns with WAN egress +# (configure-nat-tor). The punch's nat.node_punch signaling + peerSupportsNAT discovery route +# over a Tor link node1<->node2 (source-verified: tcp-only Basic strategy can't form for +# symmetric NAT, and the punch client sets no relay hint -> Tor is the sole mutual transport). +# On success the punch is promoted to a direct kcp link on BOTH peers (verify.py asserts it). +# +# Trigger is `nodes.new_link -strategies nat` (drives NATLinkStrategy end-to-end), NOT +# `nat.punch` (which only registers a Hole and yields no kcp link). Every astral-query targets +# a NAT'd node -> runs inside its netns (astral-query defaults to tcp:127.0.0.1:8625, which is +# netns-local; see enter-nat's header). +# punch-nat [--vm ] [--peer ] (default: node1 punches to node2) +set -eu + +VM=node1; PEER=node2 +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + --peer) [ $# -ge 2 ] || { echo "need host after --peer" >&2; exit 64; }; PEER=$2; shift 2 ;; + *) echo "usage: punch-nat [--vm ] [--peer ]" >&2; exit 64 ;; + esac +done + +# --- host-side helpers (astral-query runs in the target's netns; parse on the host) ------ +nid() { # a node's own identity hex (>=64 hex) via apphost.whoami + netsim ssh "$1" -- "ip netns exec priv astral-query apphost.whoami -out json" 2>/dev/null | python3 -c ' +import json,sys +for ln in sys.stdin: + ln=ln.strip() + if not ln: continue + try: o=json.loads(ln) + except Exception: continue + v=o.get("Object") + if isinstance(v,str) and len(v)>=64: print(v); break + if isinstance(v,dict) and isinstance(v.get("Identity"),str): print(v["Identity"]); break' +} +onion_of() { # a node's own .onion via resolve_endpoints localnode + netsim ssh "$1" -- "ip netns exec priv astral-query nodes.resolve_endpoints -id localnode -out json" 2>/dev/null | python3 -c ' +import json,sys +def addr(ep): + if isinstance(ep,str): return ep + if isinstance(ep,dict): + o=ep.get("Object"); return o if isinstance(o,str) else "" + return "" +for ln in sys.stdin: + ln=ln.strip() + if not ln: continue + try: o=json.loads(ln) + except Exception: continue + a=addr((o.get("Object") or {}).get("Endpoint")) + if ".onion" in a: print(a); break' +} +has_link() { # -> prints "yes" if that link exists + netsim ssh "$1" -- "ip netns exec priv astral-query nodes.links -out json" 2>/dev/null | python3 -c ' +import json,sys +net,want=sys.argv[1],sys.argv[2] +for ln in sys.stdin: + ln=ln.strip() + if not ln: continue + try: o=json.loads(ln) + except Exception: continue + v=o.get("Object") or {} + if str(v.get("Network"))==net and str(v.get("RemoteIdentity",""))==want: print("yes"); break' "$2" "$3" +} +diag() { # per-peer failure diagnosis (see the task doc "live_diagnostics") + for v in "$VM" "$PEER"; do + echo "--- diag $v ---" >&2 + netsim ssh "$v" -- ' + echo "[nodes.links]"; ip netns exec priv astral-query nodes.links -out json 2>&1 | tail -20 + echo "[nat.list_holes]"; ip netns exec priv astral-query nat.list_holes -out json 2>&1 | tail -5 + echo "[public_ip]"; ip netns exec priv astral-query ip.public_ip_candidates -out json 2>&1 | tail -5 + echo "[tor ctl 9051]"; ip netns exec priv ss -ltn 2>/dev/null | grep 9051 || echo none + echo "[conntrack 198.51.100]"; (conntrack -L -p udp 2>/dev/null | grep 198.51.100 || grep 198.51.100 /proc/net/nf_conntrack 2>/dev/null) | head -6 + echo "[astrald journal]"; journalctl -u astrald --no-pager 2>&1 | tail -40 + ' >&2 2>&1 || true + done +} + +echo "punch-nat: resolving identities ($VM initiator -> $PEER target) ..." +VMID=$(nid "$VM"); [ -n "$VMID" ] || { echo "punch-nat: could not resolve $VM identity" >&2; exit 1; } +PEERID=$(nid "$PEER"); [ -n "$PEERID" ] || { echo "punch-nat: could not resolve $PEER identity" >&2; exit 1; } + +# 1) ensure mutual onion knowledge (host-brokered; do NOT trust auto-sync -- risk per doc) +O_PEER=$(onion_of "$PEER"); O_VM=$(onion_of "$VM") +[ -n "$O_PEER" ] || { echo "punch-nat: $PEER published no onion (Tor-in-netns down? run configure-nat-tor)" >&2; diag; exit 1; } +[ -n "$O_VM" ] || { echo "punch-nat: $VM published no onion (Tor-in-netns down? run configure-nat-tor)" >&2; diag; exit 1; } +netsim ssh "$VM" -- "ip netns exec priv astral-query nodes.add_endpoint -id '$PEERID' -endpoint 'tor:$O_PEER' >/dev/null 2>&1 || true" +netsim ssh "$PEER" -- "ip netns exec priv astral-query nodes.add_endpoint -id '$VMID' -endpoint 'tor:$O_VM' >/dev/null 2>&1 || true" +echo "punch-nat: seeded onions ($VM<->$PEER)" + +# 2) readiness: a live tor signaling link $VM->$PEER (form one if absent; ~60s bound) +tor_up= +for _ in $(seq 1 20); do + [ "$(has_link "$VM" tor "$PEERID")" = yes ] && { tor_up=1; break; } + netsim ssh "$VM" -- "timeout 60 ip netns exec priv astral-query nodes.new_link -target '$PEERID' -strategies tor -out json >/dev/null 2>&1 || true" + sleep 3 +done +[ -n "$tor_up" ] || { echo "punch-nat: no tor link $VM->$PEER (signaling path down)" >&2; diag; exit 1; } +echo "punch-nat: tor signaling link up ($VM->$PEER)" + +# 3) trigger the punch (initiator only; node2's side runs automatically over nat.node_punch) +echo "punch-nat: triggering NAT punch $VM -> $PEER ..." +netsim ssh "$VM" -- "timeout 180 ip netns exec priv astral-query nodes.new_link -target '$PEERID' -strategies nat -out json 2>&1 | tail -3" || true + +# 4) confirm a durable kcp link on BOTH peers (~60s bound) +ok= +for _ in $(seq 1 20); do + if [ "$(has_link "$VM" kcp "$PEERID")" = yes ] && [ "$(has_link "$PEER" kcp "$VMID")" = yes ]; then ok=1; break; fi + sleep 3 +done +[ -n "$ok" ] || { echo "punch-nat: no kcp link between $VM and $PEER after the punch" >&2; diag; exit 1; } +echo "punch-nat: kcp link established ($VM<->$PEER); done" diff --git a/netsim/tasks/punch-nat/verify.py b/netsim/tasks/punch-nat/verify.py new file mode 100644 index 00000000..c6b62124 --- /dev/null +++ b/netsim/tasks/punch-nat/verify.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +"""verify punch-nat: both NAT'd peers hold a direct kcp link to each other -- the hole-punch +completed and was promoted to a real link -- and NOT a direct/LAN tcp link (the NAT was +genuinely entered). + +A kcp link is the unique signal of a completed+promoted punch: only NATLinkStrategy dials +kcp, and kcp endpoints are never advertised for an ordinary peer dial. Assert on +Network+RemoteIdentity, NOT the endpoint address (the passive/inbound side has swapped +endpoints). Negatives: no tcp link to the sibling, and none at a 10.77 LAN address (the only +tcp links present should be to the reflector at 198.51.100.). + +astrald is in netns "priv" on both peers -> astral-query runs inside the netns (it defaults +to tcp:127.0.0.1:8625, which is netns-local). Uses the Go CLI over ssh, not the astral-py +WS client (the WS port is netns-local too). +""" +import argparse +import json +import os +import sys + +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import astralapi # noqa: E402 + + +def node_id(vm): + """The node's own identity hex via apphost.whoami (inside its netns).""" + raw = astralapi.ssh(vm, "ip netns exec priv astral-query apphost.whoami -out json") or "" + for ln in raw.splitlines(): + ln = ln.strip() + if not ln: + continue + try: + o = json.loads(ln) + except json.JSONDecodeError: + continue + v = o.get("Object") + if isinstance(v, str) and len(v) >= 64: + return v + if isinstance(v, dict) and isinstance(v.get("Identity"), str): + return v["Identity"] + return "" + + +def links(vm): + return astralapi.parse_cli( + astralapi.ssh(vm, "ip netns exec priv astral-query nodes.links -out json") or "") + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--vm", default="node1") # initiator + ap.add_argument("--peer", default="node2") # target + args, _ = ap.parse_known_args() + peers = [args.vm, args.peer] + ids = {p: node_id(p) for p in peers} + + failed = [] + for p in peers: + sib = args.peer if p == args.vm else args.vm + sib_id = ids.get(sib, "") + if not sib_id: + failed.append(f"{p}: could not resolve sibling {sib} identity") + continue + objs = links(p) + kcp = astralapi.kcp_links(objs) # [(RemoteIdentity, endpoint)] + tcp = astralapi.links_by_network(objs, "tcp") + # positive: a direct kcp link to the sibling (the promoted punch) + if not any(rid == sib_id for rid, _ in kcp): + failed.append(f"{p}: no kcp link to {sib} -- punch not promoted (kcp={kcp})") + sys.stderr.write(f" {p} tcp links: {tcp}\n") + continue + # negative: the sibling must be reachable ONLY via the punch, never a direct tcp link + if any(rid == sib_id for rid, _ in tcp): + failed.append(f"{p}: has a direct tcp link to {sib} -- not a NAT traversal") + continue + # negative: no LAN (10.77) tcp link at all -- the NAT must be genuinely entered + if any("10.77." in str(addr) for _rid, addr in tcp): + failed.append(f"{p}: has a 10.77 LAN tcp link -- NAT not genuinely entered (tcp={tcp})") + continue + print(f"punch-nat OK: {p} holds a direct kcp link to {sib} (no direct/LAN tcp link).") + + if failed: + for f in failed: + sys.stderr.write(f"punch-nat verify FAILED: {f}\n") + return 1 + print(f"punch-nat verified: direct kcp link on both peers ({', '.join(peers)})") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/punch-nat/verify.sh b/netsim/tasks/punch-nat/verify.sh new file mode 100755 index 00000000..042470ab --- /dev/null +++ b/netsim/tasks/punch-nat/verify.sh @@ -0,0 +1,3 @@ +#!/bin/sh +# Thin shim — verification logic lives in verify.py. +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" From ad7b50e91982ae0c9537902e6384590128fe8370 Mon Sep 17 00:00:00 2001 From: intern0 Date: Thu, 2 Jul 2026 01:46:38 +0200 Subject: [PATCH 56/57] =?UTF-8?q?netsim=20NAT:=20fix=20the=20hole-punch=20?= =?UTF-8?q?=E2=80=94=20inbound=20DNAT=20+=20arm-after-Tor=20ordering?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two defects blocked the punch from promoting to a kcp link; both fixed and confirmed live (kcp link on both peers; full nat-punch story green end to end). enter-nat: add an inbound DNAT of the public alias into the netns (iif ip daddr dnat to 192.168.99.2). The alias is a LOCAL address on the collapsed NAT+endpoint VM, so with SNAT only, inbound punch packets hit the local INPUT (no listener) and never reach the netns puncher — the conntrack-reply reverse-SNAT that should forward them fails under a source-port realloc clash. A 3-point packet trace (netns veth / root veth / lan0) showed both punchers emit and packets reach each peer's lan0, but inbound is never delivered inward. The DNAT makes the box a real cone-NAT gateway (inbound and outbound become one conntrack flow). This makes the NAT full-cone — the permissive punchable type; "symmetric" was always a misnomer since astrald's cone puncher cannot traverse a symmetric NAT. nat-punch.story: run configure-nat-tor BEFORE add-reflector. add-reflector arms nat via an in-memory reflected endpoint; configure-nat-tor restarts astrald, which wiped it, so both peers were disarmed at punch time ("does not support NAT traversal"). Arm last, after the final restart. --- netsim/stories/nat-punch.story | 6 +++++- netsim/tasks/enter-nat/run.sh | 19 ++++++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/netsim/stories/nat-punch.story b/netsim/stories/nat-punch.story index 200ff80f..51373e20 100644 --- a/netsim/stories/nat-punch.story +++ b/netsim/stories/nat-punch.story @@ -16,6 +16,10 @@ add-vm --hostname reflector install-astrald --vm reflector enable-tor --vm node1 --vm node2 enter-nat --vm node1 --vm node2 -add-reflector --reflector reflector --vm node1 --vm node2 configure-nat-tor --vm node1 --vm node2 +add-reflector --reflector reflector --vm node1 --vm node2 punch-nat --vm node1 --peer node2 +# NOTE order: configure-nat-tor (which RESTARTS astrald) must run BEFORE add-reflector. +# add-reflector arms `nat` via an in-memory reflected endpoint; an astrald restart after +# it would wipe that endpoint and disarm nat -> the punch aborts "does not support NAT +# traversal". So arm LAST, after the final restart. diff --git a/netsim/tasks/enter-nat/run.sh b/netsim/tasks/enter-nat/run.sh index b4fd3139..0100dd02 100755 --- a/netsim/tasks/enter-nat/run.sh +++ b/netsim/tasks/enter-nat/run.sh @@ -7,7 +7,9 @@ # * create netns "priv" (192.168.99.2) wired to the VM by a veth pair; # * port-preserving SNAT of 192.168.99.0/24 to a per-node public TEST-NET alias # 198.51.100. on the LAN NIC (validated as endpoint-independent/cone by -# nat-eim-probe); +# nat-eim-probe), plus an inbound DNAT of that alias back into the netns so the box is a +# real cone-NAT gateway (without it inbound punch packets hit the local INPUT and the +# punch never completes -- see the nat table section below); # * relaunch astrald INSIDE the netns (same -root, so same identity) via a systemd # drop-in (NetworkNamespacePath -- joins only the NET ns; the -root/apphost files stay # in the shared mount ns). @@ -69,6 +71,21 @@ nft flush table ip nat nft add chain ip nat postrouting '{ type nat hook postrouting priority 100 ; }' nft add rule ip nat postrouting ip saddr 192.168.99.0/24 oifname "$lan" snat ip to "$pub" +# Inbound DNAT of the public alias into the netns -- REQUIRED for the punch to complete. +# The alias 198.51.100. is a LOCAL address on this box (NAT and endpoint are collapsed +# onto one VM), so inbound punch packets are delivered to the local INPUT (no listener) and +# never reach the netns puncher; the conntrack-reply reverse-SNAT that should forward them +# fails under a source-port-realloc clash (the inbound-to-local-alias creates a conntrack +# entry that collides with the outbound SNAT). DNAT the alias to the netns host (port +# preserved) so the box acts as a real cone-NAT gateway -- inbound and outbound become one +# conntrack flow, no clash. Confirmed live: with this rule the punch promotes to a kcp link +# on BOTH peers; without it both punchers emit and packets reach each peer's lan0, but +# inbound is never delivered to the netns and the punch times out ("context deadline +# exceeded"). This makes the NAT a full-cone (endpoint-independent) NAT -- the permissive +# punchable type; a restricted-cone/symmetric simulation would need a separate router VM. +nft add chain ip nat prerouting '{ type nat hook prerouting priority -100 ; }' +nft add rule ip nat prerouting iif "$lan" ip daddr "$pub" dnat to 192.168.99.2 + # move astrald into the netns: join only the NET namespace (mount ns untouched, so the # apphost unix socket stays reachable from the root ns for astral-query). mkdir -p /etc/systemd/system/astrald.service.d From 59bf438c1ff96a96c105f60afc4e904b87322ac3 Mon Sep 17 00:00:00 2001 From: intern0 Date: Thu, 2 Jul 2026 11:53:40 +0200 Subject: [PATCH 57/57] netsim: give each scenario its own directory with a plain-words README Move netsim/stories/.story -> netsim/scenarios//.story and add a short, high-level README.md per scenario: one-line summary, kind (fixture vs scenario) + family, start->save chain, task steps, the run command, and a plain- words explanation. Mirrors the task convention (a directory holding the recipe + its README). Update netsim/README.md tree and invocation examples to the new paths; nat-punch README notes the current full-cone NAT caveat. --- netsim/README.md | 39 ++++++++++--------- netsim/scenarios/adopt-node/README.md | 10 +++++ .../adopt-node}/adopt-node.story | 2 +- .../bootstrap-user-software-key/README.md | 10 +++++ .../bootstrap-user-software-key.story | 2 +- netsim/scenarios/expel-node/README.md | 10 +++++ .../expel-node}/expel-node.story | 2 +- .../import-user-software-key/README.md | 10 +++++ .../import-user-software-key.story | 2 +- netsim/scenarios/lab/README.md | 10 +++++ netsim/{stories => scenarios/lab}/lab.story | 0 netsim/scenarios/nat-punch/README.md | 12 ++++++ .../nat-punch}/nat-punch.story | 2 +- netsim/scenarios/object-store-peer/README.md | 10 +++++ .../object-store-peer.story | 2 +- netsim/scenarios/object-store/README.md | 10 +++++ .../object-store}/object-store.story | 2 +- netsim/scenarios/read-remote-peer/README.md | 10 +++++ .../read-remote-peer}/read-remote-peer.story | 2 +- netsim/scenarios/tor-link/README.md | 10 +++++ .../tor-link}/tor-link.story | 2 +- 21 files changed, 132 insertions(+), 27 deletions(-) create mode 100644 netsim/scenarios/adopt-node/README.md rename netsim/{stories => scenarios/adopt-node}/adopt-node.story (56%) create mode 100644 netsim/scenarios/bootstrap-user-software-key/README.md rename netsim/{stories => scenarios/bootstrap-user-software-key}/bootstrap-user-software-key.story (52%) create mode 100644 netsim/scenarios/expel-node/README.md rename netsim/{stories => scenarios/expel-node}/expel-node.story (83%) create mode 100644 netsim/scenarios/import-user-software-key/README.md rename netsim/{stories => scenarios/import-user-software-key}/import-user-software-key.story (70%) create mode 100644 netsim/scenarios/lab/README.md rename netsim/{stories => scenarios/lab}/lab.story (100%) create mode 100644 netsim/scenarios/nat-punch/README.md rename netsim/{stories => scenarios/nat-punch}/nat-punch.story (97%) create mode 100644 netsim/scenarios/object-store-peer/README.md rename netsim/{stories => scenarios/object-store-peer}/object-store-peer.story (80%) create mode 100644 netsim/scenarios/object-store/README.md rename netsim/{stories => scenarios/object-store}/object-store.story (82%) create mode 100644 netsim/scenarios/read-remote-peer/README.md rename netsim/{stories => scenarios/read-remote-peer}/read-remote-peer.story (83%) create mode 100644 netsim/scenarios/tor-link/README.md rename netsim/{stories => scenarios/tor-link}/tor-link.story (93%) diff --git a/netsim/README.md b/netsim/README.md index ee7e300e..cdcc28f2 100644 --- a/netsim/README.md +++ b/netsim/README.md @@ -23,15 +23,17 @@ netsim/ read-remote-object/ # node1's agent reads node2's object over astral (used by read-remote-peer) expel-node/ # node1 (User) permanently bans node2 from the swarm -> two-nodes-expel _lib/ # shared verify library (astralapi.py) + astral-py submodule - stories/ # one story per tested flow (start/save stage in each header) - lab.story # null -> astrald-lab - bootstrap-user-software-key.story # astrald-lab -> one-node - import-user-software-key.story # astrald-lab -> one-node (alt.) - adopt-node.story # one-node -> two-nodes - object-store.story # two-nodes -> two-nodes-data (store on node1) - object-store-peer.story # two-nodes -> two-nodes-data-peer (store on node2) - read-remote-peer.story # two-nodes -> two-nodes-peer-read (store on node2, then read it) - expel-node.story # two-nodes -> two-nodes-expel + scenarios/ # one dir per scenario: .story + README.md (plain-words) + lab/ # null -> astrald-lab (fixture) + bootstrap-user-software-key/ # astrald-lab -> one-node (fixture) + import-user-software-key/ # astrald-lab -> one-node (alt.) + adopt-node/ # one-node -> two-nodes (fixture) + object-store/ # two-nodes -> two-nodes-data (store on node1) + object-store-peer/ # two-nodes -> two-nodes-data-peer (store on node2) + read-remote-peer/ # two-nodes -> two-nodes-peer-read (store on node2, then read it) + expel-node/ # two-nodes -> two-nodes-expel + tor-link/ # two-nodes -> two-nodes-tor (re-link over Tor) + nat-punch/ # two-nodes -> two-nodes-nat (NAT hole-punch) link.sh # register tasks with netsim (idempotent; re-run anytime) README.md ``` @@ -111,7 +113,7 @@ then build the lab: ```sh ./netsim/link.sh export SATFORGE_SKILLS_DEPLOY_KEY=~/.ssh/satforge_skills_deploy # see tasks/configure-astral-agent -netsim story --stage null --save astrald-lab netsim/stories/lab.story +netsim story --stage null --save astrald-lab netsim/scenarios/lab/lab.story ``` The result is the stage `astrald-lab`: `node1` and `node2` running astrald, with a @@ -120,20 +122,21 @@ with `netsim shell --stage astrald-lab`. ## Swarm pipeline -Each post-lab flow is its own story under `stories/`, layered on the previous -stage (its `start`/`save` stages are in the story header). Intermediate stages -stay reusable, so you can replay one flow without rebuilding the chain: +Each post-lab flow is its own scenario under `scenarios//` — a `.story` +plus a plain-words `README.md` — layered on the previous stage (its `start`/`save` +stages are in the story header and README). Intermediate stages stay reusable, so +you can replay one flow without rebuilding the chain: ``` astrald-lab ─[bootstrap-user-software-key]→ one-node ─[adopt-node]→ two-nodes ─[object-store]→ two-nodes-data ``` ```sh -netsim story --stage astrald-lab --save one-node netsim/stories/bootstrap-user-software-key.story -netsim story --stage one-node --save two-nodes netsim/stories/adopt-node.story -netsim story --stage two-nodes --save two-nodes-data netsim/stories/object-store.story -netsim story --stage two-nodes --save two-nodes-peer-read netsim/stories/read-remote-peer.story -netsim story --stage two-nodes --save two-nodes-expel netsim/stories/expel-node.story +netsim story --stage astrald-lab --save one-node netsim/scenarios/bootstrap-user-software-key/bootstrap-user-software-key.story +netsim story --stage one-node --save two-nodes netsim/scenarios/adopt-node/adopt-node.story +netsim story --stage two-nodes --save two-nodes-data netsim/scenarios/object-store/object-store.story +netsim story --stage two-nodes --save two-nodes-peer-read netsim/scenarios/read-remote-peer/read-remote-peer.story +netsim story --stage two-nodes --save two-nodes-expel netsim/scenarios/expel-node/expel-node.story ``` `expel-node` is a separate branch off `two-nodes`: the User on node1 permanently diff --git a/netsim/scenarios/adopt-node/README.md b/netsim/scenarios/adopt-node/README.md new file mode 100644 index 00000000..d3fa0f0a --- /dev/null +++ b/netsim/scenarios/adopt-node/README.md @@ -0,0 +1,10 @@ +# adopt-node + +Joins two nodes into the same user's network so they trust each other. + +- **Kind:** fixture · **Family:** foundation +- **Chain:** `one-node` → `two-nodes` +- **Steps:** adopt-node +- **Run:** `netsim story --stage one-node --save two-nodes netsim/scenarios/adopt-node/adopt-node.story` + +One node brings a second node into its personal network as a sibling, then verifies both share the same user contract and see each other as linked peers. This is the stable two-node baseline the multi-node scenarios start from. diff --git a/netsim/stories/adopt-node.story b/netsim/scenarios/adopt-node/adopt-node.story similarity index 56% rename from netsim/stories/adopt-node.story rename to netsim/scenarios/adopt-node/adopt-node.story index 98b62b06..86e8747a 100644 --- a/netsim/stories/adopt-node.story +++ b/netsim/scenarios/adopt-node/adopt-node.story @@ -1,4 +1,4 @@ # adopt-node.story — adopt node2 into node1's User swarm (symmetric roster). # start: one-node save: two-nodes -# netsim story --stage one-node --save two-nodes netsim/stories/adopt-node.story +# netsim story --stage one-node --save two-nodes netsim/scenarios/adopt-node/adopt-node.story adopt-node diff --git a/netsim/scenarios/bootstrap-user-software-key/README.md b/netsim/scenarios/bootstrap-user-software-key/README.md new file mode 100644 index 00000000..d73dd003 --- /dev/null +++ b/netsim/scenarios/bootstrap-user-software-key/README.md @@ -0,0 +1,10 @@ +# bootstrap-user-software-key + +Turns a node into a user-controlled node by creating a fresh user identity. + +- **Kind:** fixture · **Family:** identity +- **Chain:** `astrald-lab` → `one-node` +- **Steps:** bootstrap-user-software-key +- **Run:** `netsim story --stage astrald-lab --save one-node netsim/scenarios/bootstrap-user-software-key/bootstrap-user-software-key.story` + +Creates a user account on the node and activates it with a contract, then confirms the node recognizes the user and accepts user commands. The result is a working user-controlled node that later scenarios build on. diff --git a/netsim/stories/bootstrap-user-software-key.story b/netsim/scenarios/bootstrap-user-software-key/bootstrap-user-software-key.story similarity index 52% rename from netsim/stories/bootstrap-user-software-key.story rename to netsim/scenarios/bootstrap-user-software-key/bootstrap-user-software-key.story index 3dbb9e63..c6a32525 100644 --- a/netsim/stories/bootstrap-user-software-key.story +++ b/netsim/scenarios/bootstrap-user-software-key/bootstrap-user-software-key.story @@ -1,4 +1,4 @@ # bootstrap-user-software-key.story — node1 becomes a User-controlled node. # start: astrald-lab save: one-node -# netsim story --stage astrald-lab --save one-node netsim/stories/bootstrap-user-software-key.story +# netsim story --stage astrald-lab --save one-node netsim/scenarios/bootstrap-user-software-key/bootstrap-user-software-key.story bootstrap-user-software-key diff --git a/netsim/scenarios/expel-node/README.md b/netsim/scenarios/expel-node/README.md new file mode 100644 index 00000000..933a9f5a --- /dev/null +++ b/netsim/scenarios/expel-node/README.md @@ -0,0 +1,10 @@ +# expel-node + +Tests permanently banning another node from a shared swarm. + +- **Kind:** scenario · **Family:** network +- **Chain:** `two-nodes` → `two-nodes-expel` +- **Steps:** expel-node +- **Run:** `netsim story --stage two-nodes --save two-nodes-expel netsim/scenarios/expel-node/expel-node.story` + +One node expels another: it goes on a blocklist and is dropped from the active members, and the test confirms it is blocked and no longer on the roster. This is how a swarm enforces membership and keeps out unwanted nodes. diff --git a/netsim/stories/expel-node.story b/netsim/scenarios/expel-node/expel-node.story similarity index 83% rename from netsim/stories/expel-node.story rename to netsim/scenarios/expel-node/expel-node.story index c8ab8c7f..9ea68db2 100644 --- a/netsim/stories/expel-node.story +++ b/netsim/scenarios/expel-node/expel-node.story @@ -1,4 +1,4 @@ # expel-node.story — node1 (the User) permanently bans node2 from its swarm. # start: two-nodes save: two-nodes-expel -# netsim story --stage two-nodes --save two-nodes-expel netsim/stories/expel-node.story +# netsim story --stage two-nodes --save two-nodes-expel netsim/scenarios/expel-node/expel-node.story expel-node diff --git a/netsim/scenarios/import-user-software-key/README.md b/netsim/scenarios/import-user-software-key/README.md new file mode 100644 index 00000000..97e8ad62 --- /dev/null +++ b/netsim/scenarios/import-user-software-key/README.md @@ -0,0 +1,10 @@ +# import-user-software-key + +Tests importing an existing user identity into a node from a recovery phrase. + +- **Kind:** scenario · **Family:** identity +- **Chain:** `astrald-lab` → `one-node` +- **Steps:** import-user-software-key +- **Run:** `netsim story --stage astrald-lab --save one-node netsim/scenarios/import-user-software-key/import-user-software-key.story` + +Takes an existing recovery phrase and uses it to rebuild the keys that make the node a user node with an active contract, then confirms the node identifies as that user. This is the alternative to bootstrap-user-software-key (both yield the one-node state) for the recover-an-identity path. diff --git a/netsim/stories/import-user-software-key.story b/netsim/scenarios/import-user-software-key/import-user-software-key.story similarity index 70% rename from netsim/stories/import-user-software-key.story rename to netsim/scenarios/import-user-software-key/import-user-software-key.story index 5a65dbe2..7e074096 100644 --- a/netsim/stories/import-user-software-key.story +++ b/netsim/scenarios/import-user-software-key/import-user-software-key.story @@ -2,5 +2,5 @@ # (embedded in the task's prompt.md; alternative to bootstrap-user-software-key). # Optional env ASTRAL_USER_ID makes verify assert the derived id. # start: astrald-lab save: one-node -# netsim story --stage astrald-lab --save one-node netsim/stories/import-user-software-key.story +# netsim story --stage astrald-lab --save one-node netsim/scenarios/import-user-software-key/import-user-software-key.story import-user-software-key diff --git a/netsim/scenarios/lab/README.md b/netsim/scenarios/lab/README.md new file mode 100644 index 00000000..371a1763 --- /dev/null +++ b/netsim/scenarios/lab/README.md @@ -0,0 +1,10 @@ +# lab + +Builds the shared test lab: two astrald nodes plus an AI operator. + +- **Kind:** fixture · **Family:** foundation +- **Chain:** `null` → `astrald-lab` +- **Steps:** add-vm · install-astrald · install-qwen-code · configure-astral-agent +- **Run:** `netsim story --stage null --save astrald-lab netsim/scenarios/lab/lab.story` + +Creates two virtual machines and installs astrald on each so they can work together over a network. It also sets up Qwen Code, an AI assistant, on the first machine with a skill for talking to astrald. This baseline is the foundation every other scenario starts from. diff --git a/netsim/stories/lab.story b/netsim/scenarios/lab/lab.story similarity index 100% rename from netsim/stories/lab.story rename to netsim/scenarios/lab/lab.story diff --git a/netsim/scenarios/nat-punch/README.md b/netsim/scenarios/nat-punch/README.md new file mode 100644 index 00000000..ffd75bfc --- /dev/null +++ b/netsim/scenarios/nat-punch/README.md @@ -0,0 +1,12 @@ +# nat-punch + +Two NAT'd peers hole-punch to a direct connection, coordinating over Tor. + +- **Kind:** scenario · **Family:** network +- **Chain:** `two-nodes` → `two-nodes-nat` +- **Steps:** add-vm · install-astrald · enable-tor · enter-nat · configure-nat-tor · add-reflector · punch-nat +- **Run:** `netsim story --stage two-nodes --save two-nodes-nat netsim/scenarios/nat-punch/nat-punch.story` + +Both nodes are put behind their own NAT so they have no direct path to each other. A public reflector node tells each one its outside address, and they use a Tor link to coordinate a simultaneous connection attempt. On success the pair ends up talking over a direct kcp link instead of relaying through Tor. + +> **Status:** works end to end (direct kcp link verified on both peers). The simulated NAT is currently a permissive *full-cone* NAT; a stricter, filtered NAT that a punch must genuinely defeat is under evaluation. diff --git a/netsim/stories/nat-punch.story b/netsim/scenarios/nat-punch/nat-punch.story similarity index 97% rename from netsim/stories/nat-punch.story rename to netsim/scenarios/nat-punch/nat-punch.story index 51373e20..2ae01f60 100644 --- a/netsim/stories/nat-punch.story +++ b/netsim/scenarios/nat-punch/nat-punch.story @@ -11,7 +11,7 @@ # symmetric NATs, and the punch client sets no relay hint), so configure-nat-tor is required. # # start: two-nodes save: two-nodes-nat -# netsim story --stage two-nodes --save two-nodes-nat netsim/stories/nat-punch.story +# netsim story --stage two-nodes --save two-nodes-nat netsim/scenarios/nat-punch/nat-punch.story add-vm --hostname reflector install-astrald --vm reflector enable-tor --vm node1 --vm node2 diff --git a/netsim/scenarios/object-store-peer/README.md b/netsim/scenarios/object-store-peer/README.md new file mode 100644 index 00000000..4cb7e8c3 --- /dev/null +++ b/netsim/scenarios/object-store-peer/README.md @@ -0,0 +1,10 @@ +# object-store-peer + +Tests that one node can store a data object on a peer and get it back. + +- **Kind:** scenario · **Family:** objectstore +- **Chain:** `two-nodes` → `two-nodes-data-peer` +- **Steps:** object-store --target node2 +- **Run:** `netsim story --stage two-nodes --save two-nodes-data-peer netsim/scenarios/object-store-peer/object-store-peer.story` + +One node creates a file-based object and stores it on a connected peer node, then confirms the peer can serve back the exact same data. Shows the object store works across two connected nodes. diff --git a/netsim/stories/object-store-peer.story b/netsim/scenarios/object-store-peer/object-store-peer.story similarity index 80% rename from netsim/stories/object-store-peer.story rename to netsim/scenarios/object-store-peer/object-store-peer.story index b32fe755..b2e19861 100644 --- a/netsim/stories/object-store-peer.story +++ b/netsim/scenarios/object-store-peer/object-store-peer.story @@ -1,4 +1,4 @@ # object-store-peer.story — node1 stores an object ON the peer (node2) and reads it back. # start: two-nodes save: two-nodes-data-peer -# netsim story --stage two-nodes --save two-nodes-data-peer netsim/stories/object-store-peer.story +# netsim story --stage two-nodes --save two-nodes-data-peer netsim/scenarios/object-store-peer/object-store-peer.story object-store --target node2 diff --git a/netsim/scenarios/object-store/README.md b/netsim/scenarios/object-store/README.md new file mode 100644 index 00000000..08de4a81 --- /dev/null +++ b/netsim/scenarios/object-store/README.md @@ -0,0 +1,10 @@ +# object-store + +Stores a file as an object and reads it back to check the data is intact. + +- **Kind:** scenario · **Family:** objectstore +- **Chain:** `two-nodes` → `two-nodes-data` +- **Steps:** object-store +- **Run:** `netsim story --stage two-nodes --save two-nodes-data netsim/scenarios/object-store/object-store.story` + +A node stores a file as an object and records its id, then reads it back by that id and checks the bytes match the original. Confirms the object store can reliably save and retrieve data on a single node. diff --git a/netsim/stories/object-store.story b/netsim/scenarios/object-store/object-store.story similarity index 82% rename from netsim/stories/object-store.story rename to netsim/scenarios/object-store/object-store.story index 27df893a..30a47134 100644 --- a/netsim/stories/object-store.story +++ b/netsim/scenarios/object-store/object-store.story @@ -1,4 +1,4 @@ # object-store.story — node1 stores an object locally and reads it back. # start: two-nodes save: two-nodes-data -# netsim story --stage two-nodes --save two-nodes-data netsim/stories/object-store.story +# netsim story --stage two-nodes --save two-nodes-data netsim/scenarios/object-store/object-store.story object-store diff --git a/netsim/scenarios/read-remote-peer/README.md b/netsim/scenarios/read-remote-peer/README.md new file mode 100644 index 00000000..9e5ac789 --- /dev/null +++ b/netsim/scenarios/read-remote-peer/README.md @@ -0,0 +1,10 @@ +# read-remote-peer + +Stores data on a peer, then reads it back over the network. + +- **Kind:** scenario · **Family:** objectstore +- **Chain:** `two-nodes` → `two-nodes-peer-read` +- **Steps:** object-store · read-remote-object +- **Run:** `netsim story --stage two-nodes --save two-nodes-peer-read netsim/scenarios/read-remote-peer/read-remote-peer.story` + +One node stores a file on a peer and notes which object was created, then reads that object back from the peer across the link. Verifies astrald can move and fetch data between different nodes. diff --git a/netsim/stories/read-remote-peer.story b/netsim/scenarios/read-remote-peer/read-remote-peer.story similarity index 83% rename from netsim/stories/read-remote-peer.story rename to netsim/scenarios/read-remote-peer/read-remote-peer.story index 44c6639d..33592e78 100644 --- a/netsim/stories/read-remote-peer.story +++ b/netsim/scenarios/read-remote-peer/read-remote-peer.story @@ -1,6 +1,6 @@ # read-remote-peer.story — store an object on the peer (node2), then node1's agent # reads it back from the peer over astral. # start: two-nodes save: two-nodes-peer-read -# netsim story --stage two-nodes --save two-nodes-peer-read netsim/stories/read-remote-peer.story +# netsim story --stage two-nodes --save two-nodes-peer-read netsim/scenarios/read-remote-peer/read-remote-peer.story object-store --target node2 read-remote-object diff --git a/netsim/scenarios/tor-link/README.md b/netsim/scenarios/tor-link/README.md new file mode 100644 index 00000000..986c8816 --- /dev/null +++ b/netsim/scenarios/tor-link/README.md @@ -0,0 +1,10 @@ +# tor-link + +A node automatically reconnects over Tor when it loses the local network. + +- **Kind:** scenario · **Family:** network +- **Chain:** `two-nodes` → `two-nodes-tor` +- **Steps:** enable-tor · leave-lan · link-over-tor +- **Run:** `netsim story --stage two-nodes --save two-nodes-tor netsim/scenarios/tor-link/tor-link.story` + +Two nodes start connected on a local network and both set up Tor. Then one node leaves the LAN entirely, and the scenario checks that the pair automatically re-links over Tor with no help. Tests that astrald finds an alternate path when the primary one disappears. diff --git a/netsim/stories/tor-link.story b/netsim/scenarios/tor-link/tor-link.story similarity index 93% rename from netsim/stories/tor-link.story rename to netsim/scenarios/tor-link/tor-link.story index 92736939..3e189829 100644 --- a/netsim/stories/tor-link.story +++ b/netsim/scenarios/tor-link/tor-link.story @@ -3,7 +3,7 @@ # drops its LAN path to node1 (after node1 is seeded with node2's onion); then node1's # agent re-establishes the swarm link over Tor. # start: two-nodes save: two-nodes-tor -# netsim story --stage two-nodes --save two-nodes-tor netsim/stories/tor-link.story +# netsim story --stage two-nodes --save two-nodes-tor netsim/scenarios/tor-link/tor-link.story enable-tor --vm node1 --vm node2 leave-lan --vm node2 --peer node1 link-over-tor --vm node1 --peer node2