diff --git a/.gitmodules b/.gitmodules index 727da6bf..a899076e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule ".ai/system"] path = .ai/system url = git@github.com:cryptopunkscc/astral-docs.git +[submodule "netsim/tasks/_lib/astral-py"] + path = netsim/tasks/_lib/astral-py + url = ssh://git@git.satforge.dev/satforge/astral-py.git diff --git a/docs/running-as-a-service.md b/docs/running-as-a-service.md new file mode 100644 index 00000000..da9c591f --- /dev/null +++ b/docs/running-as-a-service.md @@ -0,0 +1,91 @@ +# Running astrald as a service + +`astrald` is a long-running daemon. Run it under systemd on Linux. + +## Build + +```shell +CGO_ENABLED=0 go build -o /usr/local/bin/astrald ./cmd/astrald +CGO_ENABLED=0 go build -o /usr/local/bin/astral-query ./cmd/astral-query +``` + +Go >= 1.25.0 is required. astrald uses pure-Go SQLite, so `CGO_ENABLED=0` builds a +static binary. The `./` prefix is required; `go build .` at the repo root builds +an empty stub. + +## Root directory + +astrald stores config, identity, and data under a root directory derived from +`$HOME`. A systemd service has no `$HOME`. Pass `-root ` to set the root +explicitly, or set `Environment=HOME=`. The first start generates the node +identity — a `secp256k1` key at `/config/node_key` — with no interaction. + +## Unit + +`/etc/systemd/system/astrald.service`: + +```ini +[Unit] +Description=astral daemon + +[Service] +ExecStart=/usr/local/bin/astrald -root /var/lib/astrald +Environment=HOME=/root +Restart=on-failure +KillSignal=SIGINT + +[Install] +WantedBy=multi-user.target +``` + +`Type=simple` is the systemd default and is omitted. astrald traps `SIGINT`, not +`SIGTERM`; `KillSignal=SIGINT` makes `systemctl stop` shut it down gracefully. + +```shell +systemctl enable --now astrald +``` + +This unit runs astrald as root — the simplest setup. To run it as your own user +instead, install it as a user service: place the unit at +`~/.config/systemd/user/astrald.service`, drop `Environment=HOME=` and the `-root` +flag (config and data then default to `~/.config/astrald` and +`~/.local/share/astrald`), and run `systemctl --user enable --now astrald`. +`loginctl enable-linger $USER` keeps it running without an active login session. + +## Health check + +```shell +astral-query localnode:.spec +``` + +The local API listens on `tcp:127.0.0.1:8625` with anonymous access. `.spec` is a +built-in, always-available op. Exit code 0 means the node is up. + +## Ports + +Default transports bind all interfaces. + +| Port | Proto | Purpose | +|---|---|---| +| 1791 | TCP | node links | +| 1792 | UDP | KCP transport | +| 1791 | UDP | UTP transport | +| 8822 | UDP | `ether` LAN discovery | +| 8625 | TCP 127.0.0.1 | local apphost API | +| 8624 | TCP 0.0.0.0 | apphost HTTP API | + +## Imaging and snapshots + +Which step you take depends on the capture type: + +- **Disk image (cold):** stop astrald first for a clean on-disk state; keep the + unit enabled so it autostarts on boot. +- **Live RAM snapshot (e.g. netsim):** leave astrald running so it resumes + already-running on restore. + +```shell +systemctl enable astrald +systemctl stop astrald # disk image only — skip for a live RAM snapshot +``` + +The identity at `/config/node_key` persists across either capture. diff --git a/netsim/.gitignore b/netsim/.gitignore new file mode 100644 index 00000000..7a60b85e --- /dev/null +++ b/netsim/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +*.pyc diff --git a/netsim/README.md b/netsim/README.md new file mode 100644 index 00000000..cdcc28f2 --- /dev/null +++ b/netsim/README.md @@ -0,0 +1,155 @@ +# netsim scenarios for astrald + +Test scaffolding that drives `netsim` to build and run `astrald` on a simulated +LAN. It contains no astrald Go source and modifies none. + +`netsim` boots Ubuntu 26.04 cloud-image VMs on `10.77.0.0/24` with per-VM NAT. A +*task* is a host-side script that configures the VMs. A *story* runs a list of +tasks in one simulation and saves a named *stage*. `lab.story` builds the +`astrald-lab` stage: two nodes running astrald, with a Qwen Code operator on +`node1`. + +## Layout + +``` +netsim/ + tasks/ # each task: run.sh (+ verify.sh / verify.py) + README.md + install-astrald/ # build + run astrald as a service on each node + configure-astral-agent/ # install the astral-agent skill into the qwen operator + bootstrap-user-software-key/ # make node1 a User node, new key -> one-node + import-user-software-key/ # make node1 a User node, existing mnemonic -> one-node + adopt-node/ # adopt node2 into swarm + register node aliases -> two-nodes + object-store/ # node1 stores an object (--target localnode|node2) -> two-nodes-data[-peer] + read-remote-object/ # node1's agent reads node2's object over astral (used by read-remote-peer) + expel-node/ # node1 (User) permanently bans node2 from the swarm -> two-nodes-expel + _lib/ # shared verify library (astralapi.py) + astral-py submodule + scenarios/ # one dir per scenario: .story + README.md (plain-words) + lab/ # null -> astrald-lab (fixture) + bootstrap-user-software-key/ # astrald-lab -> one-node (fixture) + import-user-software-key/ # astrald-lab -> one-node (alt.) + adopt-node/ # one-node -> two-nodes (fixture) + object-store/ # two-nodes -> two-nodes-data (store on node1) + object-store-peer/ # two-nodes -> two-nodes-data-peer (store on node2) + read-remote-peer/ # two-nodes -> two-nodes-peer-read (store on node2, then read it) + expel-node/ # two-nodes -> two-nodes-expel + tor-link/ # two-nodes -> two-nodes-tor (re-link over Tor) + nat-punch/ # two-nodes -> two-nodes-nat (NAT hole-punch) + link.sh # register tasks with netsim (idempotent; re-run anytime) + README.md +``` + +## Registering tasks + +`netsim` discovers tasks only under `~/.local/share/netsim/tasks/`. `link.sh` +symlinks every task under `tasks/` — each folder containing a `run.sh` — there. +It is idempotent; re-run it after adding a task. The symlinks leave netsim's +shipped builtins intact. + +```sh +./netsim/link.sh +netsim tasks # confirm: install-astrald is listed as a user task +``` + +## Verifier library + +The `verify.py` oracles share `tasks/_lib/astralapi.py`, which reaches each +VM's apphost through the **astral-py** client vendored as a submodule at +`tasks/_lib/astral-py`. Initialize it once per worktree (`workon.sh` does not +`--recurse`); a missing submodule fails with a loud `ImportError`: + +```sh +git submodule update --init netsim/tasks/_lib/astral-py +``` + +The verifiers fall back to the Go `astral-query` CLI for any op the client can't +serve, but the submodule must be present for `verify.py` to import. + +## Lab + +`lab.story` builds the full lab in one simulation: two nodes running astrald and +a Qwen Code operator on `node1`, equipped with the `astral-agent` skill. + +``` +# lab.story — the astrald lab, built in one netsim simulation. +# Result: a single stage with two nodes running astrald and a Qwen Code +# operator on node1, equipped with the astral-agent skill. +add-vm --hostname node1 +add-vm --hostname node2 +install-astrald +install-qwen-code --vm node1 --create-user +configure-astral-agent --vm node1 +``` + +A story is a plain-text file with one `task [args...]` per line, shell-style +quoting, and `#` for full-line or trailing comments. `netsim story` boots one +simulation, runs the listed tasks in order in the same VMs, and saves a single +stage at the end. It stops at the first failing task. Order is significant: + +* `add-vm --hostname node1` and `add-vm --hostname node2` use the `add-vm` + builtin; they create the two plain Ubuntu VMs on the LAN. +* `install-astrald` is the [custom task](tasks/install-astrald/README.md); with no + `--vm` it installs astrald on every running VM, so on both nodes. It runs + `run.sh` then `verify.sh` and fails the story unless astrald builds, starts, and + answers `astral-query localnode:.spec` on every node. The service is left + enabled and running, so the stage snapshots a live node that resumes + already-running on restore. +* `install-qwen-code --vm node1 --create-user` uses the `install-qwen-code` + builtin; it installs the Qwen Code CLI on `node1` and points it at the + inference endpoint. The builtin installs for user `tester`, which does not + exist on a fresh cloud image, so `--create-user` is required. `node2` stays a + plain astrald peer. +* `configure-astral-agent --vm node1` is a [custom task](tasks/configure-astral-agent/README.md); + it installs the `astral-agent` skill into the Qwen Code operator so it can drive + astrald from the skill's knowledge. The host must have `SATFORGE_SKILLS_DEPLOY_KEY` + set (a deploy key for the private skills repo) — see its README. + +Both VMs must exist and run before `install-astrald`, astrald must be present +before the Qwen Code operator is layered on `node1`, and the operator must exist +before its skill is configured. + +Register the custom tasks once (see [Registering tasks](#registering-tasks)), +then build the lab: + +```sh +./netsim/link.sh +export SATFORGE_SKILLS_DEPLOY_KEY=~/.ssh/satforge_skills_deploy # see tasks/configure-astral-agent +netsim story --stage null --save astrald-lab netsim/scenarios/lab/lab.story +``` + +The result is the stage `astrald-lab`: `node1` and `node2` running astrald, with a +Qwen Code operator on `node1` equipped with the `astral-agent` skill. Re-enter it +with `netsim shell --stage astrald-lab`. + +## Swarm pipeline + +Each post-lab flow is its own scenario under `scenarios//` — a `.story` +plus a plain-words `README.md` — layered on the previous stage (its `start`/`save` +stages are in the story header and README). Intermediate stages stay reusable, so +you can replay one flow without rebuilding the chain: + +``` +astrald-lab ─[bootstrap-user-software-key]→ one-node ─[adopt-node]→ two-nodes ─[object-store]→ two-nodes-data +``` + +```sh +netsim story --stage astrald-lab --save one-node netsim/scenarios/bootstrap-user-software-key/bootstrap-user-software-key.story +netsim story --stage one-node --save two-nodes netsim/scenarios/adopt-node/adopt-node.story +netsim story --stage two-nodes --save two-nodes-data netsim/scenarios/object-store/object-store.story +netsim story --stage two-nodes --save two-nodes-peer-read netsim/scenarios/read-remote-peer/read-remote-peer.story +netsim story --stage two-nodes --save two-nodes-expel netsim/scenarios/expel-node/expel-node.story +``` + +`expel-node` is a separate branch off `two-nodes`: the User on node1 permanently +bans node2, so the swarm roster shrinks (node2 drops out of `user.swarm_status`, +lands in `user.list_expelled`, and the link is torn down). It produces its own +`two-nodes-expel` stage rather than feeding the data-object chain. + +Each story drives the Qwen operator through its `astral-agent` skill, then runs an +independent `verify.sh`/`verify.py` check — so a story is a pass/fail integration +test for that flow. + +## Scope + +The lab stands up two astrald nodes, links them into one User Swarm, stores an +object on a node, and reads it from a peer across the swarm. Nodes discover each +other on the shared L2 LAN via UDP 8822 (`ether`/`nearby`). diff --git a/netsim/link.sh b/netsim/link.sh new file mode 100755 index 00000000..3828e10a --- /dev/null +++ b/netsim/link.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# link.sh — register every task under tasks/ as a netsim user task. +# netsim only discovers tasks in ~/.local/share/netsim/tasks/, so symlink each +# task dir (each folder under tasks/ with a run.sh) there. Idempotent; re-run anytime. +set -eu + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +repo=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +dest="${NETSIM_HOME:-$HOME/.local/share/netsim}/tasks" +mkdir -p "$dest" + +found=0 +# a "task" = any folder under tasks/ that contains a run.sh +for rs in "$repo"/tasks/*/run.sh; do + [ -f "$rs" ] || continue + d=$(dirname "$rs") + ln -sfn "$d" "$dest/$(basename "$d")" + echo "linked $(basename "$d")" + found=$((found + 1)) +done + +[ "$found" -gt 0 ] || { echo "no tasks (folders with run.sh) found in $repo/tasks" >&2; exit 1; } +echo "done: $found task(s) registered — run 'netsim tasks' to confirm" diff --git a/netsim/scenarios/adopt-node/README.md b/netsim/scenarios/adopt-node/README.md new file mode 100644 index 00000000..d3fa0f0a --- /dev/null +++ b/netsim/scenarios/adopt-node/README.md @@ -0,0 +1,10 @@ +# adopt-node + +Joins two nodes into the same user's network so they trust each other. + +- **Kind:** fixture · **Family:** foundation +- **Chain:** `one-node` → `two-nodes` +- **Steps:** adopt-node +- **Run:** `netsim story --stage one-node --save two-nodes netsim/scenarios/adopt-node/adopt-node.story` + +One node brings a second node into its personal network as a sibling, then verifies both share the same user contract and see each other as linked peers. This is the stable two-node baseline the multi-node scenarios start from. diff --git a/netsim/scenarios/adopt-node/adopt-node.story b/netsim/scenarios/adopt-node/adopt-node.story new file mode 100644 index 00000000..86e8747a --- /dev/null +++ b/netsim/scenarios/adopt-node/adopt-node.story @@ -0,0 +1,4 @@ +# adopt-node.story — adopt node2 into node1's User swarm (symmetric roster). +# start: one-node save: two-nodes +# netsim story --stage one-node --save two-nodes netsim/scenarios/adopt-node/adopt-node.story +adopt-node diff --git a/netsim/scenarios/bootstrap-user-software-key/README.md b/netsim/scenarios/bootstrap-user-software-key/README.md new file mode 100644 index 00000000..d73dd003 --- /dev/null +++ b/netsim/scenarios/bootstrap-user-software-key/README.md @@ -0,0 +1,10 @@ +# bootstrap-user-software-key + +Turns a node into a user-controlled node by creating a fresh user identity. + +- **Kind:** fixture · **Family:** identity +- **Chain:** `astrald-lab` → `one-node` +- **Steps:** bootstrap-user-software-key +- **Run:** `netsim story --stage astrald-lab --save one-node netsim/scenarios/bootstrap-user-software-key/bootstrap-user-software-key.story` + +Creates a user account on the node and activates it with a contract, then confirms the node recognizes the user and accepts user commands. The result is a working user-controlled node that later scenarios build on. diff --git a/netsim/scenarios/bootstrap-user-software-key/bootstrap-user-software-key.story b/netsim/scenarios/bootstrap-user-software-key/bootstrap-user-software-key.story new file mode 100644 index 00000000..c6a32525 --- /dev/null +++ b/netsim/scenarios/bootstrap-user-software-key/bootstrap-user-software-key.story @@ -0,0 +1,4 @@ +# bootstrap-user-software-key.story — node1 becomes a User-controlled node. +# start: astrald-lab save: one-node +# netsim story --stage astrald-lab --save one-node netsim/scenarios/bootstrap-user-software-key/bootstrap-user-software-key.story +bootstrap-user-software-key diff --git a/netsim/scenarios/expel-node/README.md b/netsim/scenarios/expel-node/README.md new file mode 100644 index 00000000..933a9f5a --- /dev/null +++ b/netsim/scenarios/expel-node/README.md @@ -0,0 +1,10 @@ +# expel-node + +Tests permanently banning another node from a shared swarm. + +- **Kind:** scenario · **Family:** network +- **Chain:** `two-nodes` → `two-nodes-expel` +- **Steps:** expel-node +- **Run:** `netsim story --stage two-nodes --save two-nodes-expel netsim/scenarios/expel-node/expel-node.story` + +One node expels another: it goes on a blocklist and is dropped from the active members, and the test confirms it is blocked and no longer on the roster. This is how a swarm enforces membership and keeps out unwanted nodes. diff --git a/netsim/scenarios/expel-node/expel-node.story b/netsim/scenarios/expel-node/expel-node.story new file mode 100644 index 00000000..9ea68db2 --- /dev/null +++ b/netsim/scenarios/expel-node/expel-node.story @@ -0,0 +1,4 @@ +# expel-node.story — node1 (the User) permanently bans node2 from its swarm. +# start: two-nodes save: two-nodes-expel +# netsim story --stage two-nodes --save two-nodes-expel netsim/scenarios/expel-node/expel-node.story +expel-node diff --git a/netsim/scenarios/import-user-software-key/README.md b/netsim/scenarios/import-user-software-key/README.md new file mode 100644 index 00000000..97e8ad62 --- /dev/null +++ b/netsim/scenarios/import-user-software-key/README.md @@ -0,0 +1,10 @@ +# import-user-software-key + +Tests importing an existing user identity into a node from a recovery phrase. + +- **Kind:** scenario · **Family:** identity +- **Chain:** `astrald-lab` → `one-node` +- **Steps:** import-user-software-key +- **Run:** `netsim story --stage astrald-lab --save one-node netsim/scenarios/import-user-software-key/import-user-software-key.story` + +Takes an existing recovery phrase and uses it to rebuild the keys that make the node a user node with an active contract, then confirms the node identifies as that user. This is the alternative to bootstrap-user-software-key (both yield the one-node state) for the recover-an-identity path. diff --git a/netsim/scenarios/import-user-software-key/import-user-software-key.story b/netsim/scenarios/import-user-software-key/import-user-software-key.story new file mode 100644 index 00000000..7e074096 --- /dev/null +++ b/netsim/scenarios/import-user-software-key/import-user-software-key.story @@ -0,0 +1,6 @@ +# import-user-software-key.story — make node1 a User node from an EXISTING mnemonic +# (embedded in the task's prompt.md; alternative to bootstrap-user-software-key). +# Optional env ASTRAL_USER_ID makes verify assert the derived id. +# start: astrald-lab save: one-node +# netsim story --stage astrald-lab --save one-node netsim/scenarios/import-user-software-key/import-user-software-key.story +import-user-software-key diff --git a/netsim/scenarios/lab/README.md b/netsim/scenarios/lab/README.md new file mode 100644 index 00000000..371a1763 --- /dev/null +++ b/netsim/scenarios/lab/README.md @@ -0,0 +1,10 @@ +# lab + +Builds the shared test lab: two astrald nodes plus an AI operator. + +- **Kind:** fixture · **Family:** foundation +- **Chain:** `null` → `astrald-lab` +- **Steps:** add-vm · install-astrald · install-qwen-code · configure-astral-agent +- **Run:** `netsim story --stage null --save astrald-lab netsim/scenarios/lab/lab.story` + +Creates two virtual machines and installs astrald on each so they can work together over a network. It also sets up Qwen Code, an AI assistant, on the first machine with a skill for talking to astrald. This baseline is the foundation every other scenario starts from. diff --git a/netsim/scenarios/lab/lab.story b/netsim/scenarios/lab/lab.story new file mode 100644 index 00000000..b5401d50 --- /dev/null +++ b/netsim/scenarios/lab/lab.story @@ -0,0 +1,9 @@ +# lab.story — the astrald lab, built in one netsim simulation. +# start: null save: astrald-lab +# Result: a single stage with two nodes running astrald and a Qwen Code +# operator on node1, equipped with the astral-agent skill. +add-vm --hostname node1 +add-vm --hostname node2 +install-astrald +install-qwen-code --vm node1 --create-user +configure-astral-agent --vm node1 diff --git a/netsim/scenarios/nat-punch/README.md b/netsim/scenarios/nat-punch/README.md new file mode 100644 index 00000000..ffd75bfc --- /dev/null +++ b/netsim/scenarios/nat-punch/README.md @@ -0,0 +1,12 @@ +# nat-punch + +Two NAT'd peers hole-punch to a direct connection, coordinating over Tor. + +- **Kind:** scenario · **Family:** network +- **Chain:** `two-nodes` → `two-nodes-nat` +- **Steps:** add-vm · install-astrald · enable-tor · enter-nat · configure-nat-tor · add-reflector · punch-nat +- **Run:** `netsim story --stage two-nodes --save two-nodes-nat netsim/scenarios/nat-punch/nat-punch.story` + +Both nodes are put behind their own NAT so they have no direct path to each other. A public reflector node tells each one its outside address, and they use a Tor link to coordinate a simultaneous connection attempt. On success the pair ends up talking over a direct kcp link instead of relaying through Tor. + +> **Status:** works end to end (direct kcp link verified on both peers). The simulated NAT is currently a permissive *full-cone* NAT; a stricter, filtered NAT that a punch must genuinely defeat is under evaluation. diff --git a/netsim/scenarios/nat-punch/nat-punch.story b/netsim/scenarios/nat-punch/nat-punch.story new file mode 100644 index 00000000..2ae01f60 --- /dev/null +++ b/netsim/scenarios/nat-punch/nat-punch.story @@ -0,0 +1,25 @@ +# nat-punch.story — two NAT'd peers hole-punch to a direct kcp link (sibling of tor-link). +# +# Both nodes get Tor WHILE the LAN link is still live (so their onions publish and sync), +# then each enters its own symmetric true-masquerade NAT (astrald in netns priv + port- +# preserving SNAT to 198.51.100., severing the direct 10.77 path). A public reflector +# arms each peer's `nat` module by reflecting its public endpoint back. Tor is relocated +# INTO the netns (with WAN egress) so the pair can signal over Tor. Then node1 triggers the +# NAT hole-punch to node2 -> a direct kcp link on BOTH peers. +# +# Signaling is over Tor (source-verified: the tcp-only Basic strategy can't form for two +# symmetric NATs, and the punch client sets no relay hint), so configure-nat-tor is required. +# +# start: two-nodes save: two-nodes-nat +# netsim story --stage two-nodes --save two-nodes-nat netsim/scenarios/nat-punch/nat-punch.story +add-vm --hostname reflector +install-astrald --vm reflector +enable-tor --vm node1 --vm node2 +enter-nat --vm node1 --vm node2 +configure-nat-tor --vm node1 --vm node2 +add-reflector --reflector reflector --vm node1 --vm node2 +punch-nat --vm node1 --peer node2 +# NOTE order: configure-nat-tor (which RESTARTS astrald) must run BEFORE add-reflector. +# add-reflector arms `nat` via an in-memory reflected endpoint; an astrald restart after +# it would wipe that endpoint and disarm nat -> the punch aborts "does not support NAT +# traversal". So arm LAST, after the final restart. diff --git a/netsim/scenarios/object-store-peer/README.md b/netsim/scenarios/object-store-peer/README.md new file mode 100644 index 00000000..4cb7e8c3 --- /dev/null +++ b/netsim/scenarios/object-store-peer/README.md @@ -0,0 +1,10 @@ +# object-store-peer + +Tests that one node can store a data object on a peer and get it back. + +- **Kind:** scenario · **Family:** objectstore +- **Chain:** `two-nodes` → `two-nodes-data-peer` +- **Steps:** object-store --target node2 +- **Run:** `netsim story --stage two-nodes --save two-nodes-data-peer netsim/scenarios/object-store-peer/object-store-peer.story` + +One node creates a file-based object and stores it on a connected peer node, then confirms the peer can serve back the exact same data. Shows the object store works across two connected nodes. diff --git a/netsim/scenarios/object-store-peer/object-store-peer.story b/netsim/scenarios/object-store-peer/object-store-peer.story new file mode 100644 index 00000000..b2e19861 --- /dev/null +++ b/netsim/scenarios/object-store-peer/object-store-peer.story @@ -0,0 +1,4 @@ +# object-store-peer.story — node1 stores an object ON the peer (node2) and reads it back. +# start: two-nodes save: two-nodes-data-peer +# netsim story --stage two-nodes --save two-nodes-data-peer netsim/scenarios/object-store-peer/object-store-peer.story +object-store --target node2 diff --git a/netsim/scenarios/object-store/README.md b/netsim/scenarios/object-store/README.md new file mode 100644 index 00000000..08de4a81 --- /dev/null +++ b/netsim/scenarios/object-store/README.md @@ -0,0 +1,10 @@ +# object-store + +Stores a file as an object and reads it back to check the data is intact. + +- **Kind:** scenario · **Family:** objectstore +- **Chain:** `two-nodes` → `two-nodes-data` +- **Steps:** object-store +- **Run:** `netsim story --stage two-nodes --save two-nodes-data netsim/scenarios/object-store/object-store.story` + +A node stores a file as an object and records its id, then reads it back by that id and checks the bytes match the original. Confirms the object store can reliably save and retrieve data on a single node. diff --git a/netsim/scenarios/object-store/object-store.story b/netsim/scenarios/object-store/object-store.story new file mode 100644 index 00000000..30a47134 --- /dev/null +++ b/netsim/scenarios/object-store/object-store.story @@ -0,0 +1,4 @@ +# object-store.story — node1 stores an object locally and reads it back. +# start: two-nodes save: two-nodes-data +# netsim story --stage two-nodes --save two-nodes-data netsim/scenarios/object-store/object-store.story +object-store diff --git a/netsim/scenarios/read-remote-peer/README.md b/netsim/scenarios/read-remote-peer/README.md new file mode 100644 index 00000000..9e5ac789 --- /dev/null +++ b/netsim/scenarios/read-remote-peer/README.md @@ -0,0 +1,10 @@ +# read-remote-peer + +Stores data on a peer, then reads it back over the network. + +- **Kind:** scenario · **Family:** objectstore +- **Chain:** `two-nodes` → `two-nodes-peer-read` +- **Steps:** object-store · read-remote-object +- **Run:** `netsim story --stage two-nodes --save two-nodes-peer-read netsim/scenarios/read-remote-peer/read-remote-peer.story` + +One node stores a file on a peer and notes which object was created, then reads that object back from the peer across the link. Verifies astrald can move and fetch data between different nodes. diff --git a/netsim/scenarios/read-remote-peer/read-remote-peer.story b/netsim/scenarios/read-remote-peer/read-remote-peer.story new file mode 100644 index 00000000..33592e78 --- /dev/null +++ b/netsim/scenarios/read-remote-peer/read-remote-peer.story @@ -0,0 +1,6 @@ +# read-remote-peer.story — store an object on the peer (node2), then node1's agent +# reads it back from the peer over astral. +# start: two-nodes save: two-nodes-peer-read +# netsim story --stage two-nodes --save two-nodes-peer-read netsim/scenarios/read-remote-peer/read-remote-peer.story +object-store --target node2 +read-remote-object diff --git a/netsim/scenarios/tor-link/README.md b/netsim/scenarios/tor-link/README.md new file mode 100644 index 00000000..986c8816 --- /dev/null +++ b/netsim/scenarios/tor-link/README.md @@ -0,0 +1,10 @@ +# tor-link + +A node automatically reconnects over Tor when it loses the local network. + +- **Kind:** scenario · **Family:** network +- **Chain:** `two-nodes` → `two-nodes-tor` +- **Steps:** enable-tor · leave-lan · link-over-tor +- **Run:** `netsim story --stage two-nodes --save two-nodes-tor netsim/scenarios/tor-link/tor-link.story` + +Two nodes start connected on a local network and both set up Tor. Then one node leaves the LAN entirely, and the scenario checks that the pair automatically re-links over Tor with no help. Tests that astrald finds an alternate path when the primary one disappears. diff --git a/netsim/scenarios/tor-link/tor-link.story b/netsim/scenarios/tor-link/tor-link.story new file mode 100644 index 00000000..3e189829 --- /dev/null +++ b/netsim/scenarios/tor-link/tor-link.story @@ -0,0 +1,9 @@ +# tor-link.story — a node leaves the LAN and links over Tor (scenario 0004). +# Both nodes get system Tor (so astrald's tor module can publish/dial onions); node2 +# drops its LAN path to node1 (after node1 is seeded with node2's onion); then node1's +# agent re-establishes the swarm link over Tor. +# start: two-nodes save: two-nodes-tor +# netsim story --stage two-nodes --save two-nodes-tor netsim/scenarios/tor-link/tor-link.story +enable-tor --vm node1 --vm node2 +leave-lan --vm node2 --peer node1 +link-over-tor --vm node1 --peer node2 diff --git a/netsim/tasks/_lib/astral-py b/netsim/tasks/_lib/astral-py new file mode 160000 index 00000000..c68919ff --- /dev/null +++ b/netsim/tasks/_lib/astral-py @@ -0,0 +1 @@ +Subproject commit c68919ffc1acc071ee91ef3f529e6677f1adbd54 diff --git a/netsim/tasks/_lib/astralapi.py b/netsim/tasks/_lib/astralapi.py new file mode 100644 index 00000000..0814488b --- /dev/null +++ b/netsim/tasks/_lib/astralapi.py @@ -0,0 +1,345 @@ +"""Shared host-side verify library for the netsim astral scenarios. + +Each task's verify.py reaches this through a realpath shim that crosses netsim's +per-task symlink: + + import os, sys + sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) + import astralapi + +It centralises the two halves every verifier shares: + + * transport -- ssh()/file readers/all_running_vms()/peer_lan_ip(): unchanged + subprocess plumbing for reading the agent's recorded artifacts and probing + inside a VM. + + * queries -- connect(vm, token=...) yields a Node whose .call(op, ...) returns + a list of astral.AstralObject. Queries go through the astral-py typed client + (reached host-side over an ssh -L forward of the VM's WebSocket apphost + port), falling back to the lockstep Go `astral-query` CLI -- same JSON, + parsed with astral-py's from_json_envelope -- whenever the client can't + serve an op (pinned in SHELL_OPS, or it raised). Both paths return the same + list[AstralObject], so the interrogators below are transport-agnostic. + +astral-py is the submodule at _lib/astral-py (package under src/); imported without +pip. $ASTRALPY_SRC overrides the src dir for local dev against another checkout. +""" +import contextlib +import json +import os +import shlex +import socket +import subprocess +import sys +import time + +# --- astral-py (submodule at _lib/astral-py; pip-free) ----------------------- +# why: realpath resolves _lib through netsim's per-task symlink; the submodule's +# package lives under src/. $ASTRALPY_SRC overrides for local dev. +_ASTRALPY_SRC = os.environ.get("ASTRALPY_SRC") or os.path.join( + os.path.dirname(os.path.realpath(__file__)), "astral-py", "src") +if not os.path.isdir(os.path.join(_ASTRALPY_SRC, "astral")): + raise ImportError( + f"astral-py not found at {_ASTRALPY_SRC} -- run " + "`git submodule update --init netsim/tasks/_lib/astral-py` " + "(or set $ASTRALPY_SRC to an astral-py checkout's src/)") +sys.path.insert(0, _ASTRALPY_SRC) +import astral # noqa: E402 +from astral.encoding import from_json_envelope # noqa: E402 + +# apphost WebSocket port inside each VM (binds 0.0.0.0; reachable via ssh -L). +WS_PORT = 8624 + +# Ops to keep on the Go astral-query CLI instead of the astral-py client. +# Populated by the smoke-test triage when the client disagrees with the CLI on a +# specific op (a silent mismatch the auto-fallback can't catch). Empty => every +# op tries the client first. +SHELL_OPS = set() + + +# --- transport: subprocess into the VM --------------------------------------- +def ssh(vm, remote): + """Run `netsim ssh -- ` on the host; return stdout (best-effort).""" + p = subprocess.run(["netsim", "ssh", vm, "--", remote], + capture_output=True, text=True) + return p.stdout + + +def read_file(vm, path): + """Contents of on the VM, trailing newline stripped ("" on error).""" + return (ssh(vm, f"cat {path}") or "").rstrip("\n") + + +def read_json(vm, path): + """ on the VM parsed as a dict ({} on error).""" + try: + return json.loads(ssh(vm, f"cat {path}") or "{}") or {} + except json.JSONDecodeError: + return {} + + +def home_json(vm, name): + """An agent artifact under /home/tester/, parsed as a dict.""" + return read_json(vm, f"/home/tester/{name}") + + +def all_running_vms(): + """Hostnames of the running VMs in the current simulation.""" + out = subprocess.run(["netsim", "vm", "ls", "--json"], + capture_output=True, text=True).stdout + try: + return [v["hostname"] for v in json.loads(out or "[]") + if v.get("state") == "running"] + except json.JSONDecodeError: + return [] + + +def peer_lan_ip(peer): + """The 10.77.* LAN address of ("" if none).""" + for tok in (ssh(peer, "hostname -I") or "").split(): + if tok.startswith("10.77."): + return tok + return "" + + +# --- queries: astral-py client over an ssh -L forward, Go-CLI fallback ------- +def parse_cli(raw): + """Parse `astral-query -out json` output into AstralObjects (eos dropped).""" + out = [] + for ln in (raw or "").splitlines(): + ln = ln.strip() + if not ln: + continue + try: + obj = from_json_envelope(json.loads(ln)) + except Exception: + continue + if not obj.is_eos: + out.append(obj) + return out + + +def _free_port(): + s = socket.socket() + s.bind(("127.0.0.1", 0)) + port = s.getsockname()[1] + s.close() + return port + + +def _wait_port(port, timeout=10.0): + deadline = time.time() + timeout + while time.time() < deadline: + try: + with socket.create_connection(("127.0.0.1", port), timeout=0.5): + return True + except OSError: + time.sleep(0.1) + return False + + +class Node: + """A handle to one VM's apphost: .call(op, ...) -> list[AstralObject].""" + + def __init__(self, vm, client, token): + self.vm = vm + self._client = client + self._token = token + + @property + def uses_client(self): + return self._client is not None + + def _via_shell(self, op, args, target): + q = f"{target}:{op}" if target else op + flags = "".join(f" -{k} {shlex.quote(str(v))}" for k, v in (args or {}).items()) + tok = f"export ASTRALD_APPHOST_TOKEN={self._token}; " if self._token else "" + return parse_cli(ssh(self.vm, f"{tok}astral-query {q}{flags} -out json")) + + def call(self, op, args=None, target=None): + """Run an apphost op; return its result objects (eos dropped, errors kept). + + Routes through the astral-py client unless the op is pinned in SHELL_OPS + or no client is available; on any client error, falls back to the Go CLI. + """ + if self._client is None or op in SHELL_OPS: + return self._via_shell(op, args, target) + try: + with self._client.query(op, args or None, target=target) as st: + return list(st) + except Exception: + # why: anonymous WS sessions and any client error fall back to the + # lockstep astral-query so verification still runs. + return self._via_shell(op, args, target) + + +@contextlib.contextmanager +def connect(vm, token=None): + """Yield a Node for . + + Opens an ssh -L forward of the VM's WebSocket apphost port (using netsim's + own $NETSIM_SSH_CONFIG) and an astral-py client over it. If the forward or + client can't be established, yields a shell-only Node so verification still + runs via the Go CLI. + """ + cfg = os.environ.get("NETSIM_SSH_CONFIG") + client = None + tunnel = None + if cfg: + try: + port = _free_port() + tunnel = subprocess.Popen( + ["ssh", "-F", cfg, "-o", "ExitOnForwardFailure=yes", + "-L", f"{port}:127.0.0.1:{WS_PORT}", "-N", vm], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + if _wait_port(port): + c = astral.connect(f"ws://127.0.0.1:{port}/.ws", token=token) + # why: some astrald builds reject anonymous WS route_query (ProtocolError); + # probe once so the session degrades to the Go CLI wholesale, not per call. + try: + c.whoami() + client = c + except Exception: + try: + c.close() + except Exception: + pass + client = None + except Exception: + client = None + try: + yield Node(vm, client, token) + finally: + try: + if client is not None: + client.close() + except Exception: + pass + if tunnel is not None: + tunnel.terminate() + try: + tunnel.wait(timeout=3) + except Exception: + tunnel.kill() + + +# --- interrogators: list[AstralObject] -> extracted value -------------------- +def _values(objs): + # note: interrogators below test isinstance(v, dict), so eos/error values are skipped + return [o.value for o in objs] + + +def contract(objs): + """(Issuer, Subject) of the active contract from a user.info result.""" + for v in _values(objs): + if isinstance(v, dict) and isinstance(v.get("Contract"), dict): + c = v["Contract"].get("Contract", {}) + return c.get("Issuer"), c.get("Subject") + return None, None + + +def linked_sibling(objs): + """Identity of the first Linked sibling in a user.swarm_status result.""" + for v in _values(objs): + if isinstance(v, dict) and v.get("Linked"): + return v.get("Identity") + return None + + +def swarm_identities(objs): + """Set of node identities in a user.swarm_status result.""" + ids = set() + for v in _values(objs): + if isinstance(v, dict) and v.get("Identity"): + ids.add(v["Identity"]) + return ids + + +def has_link_to(objs, ident): + """True if a nodes.links result holds an active link to .""" + return any(isinstance(v, dict) and v.get("RemoteIdentity") == ident + for v in _values(objs)) + + +def _contains_identity(value, ident): + # why: expulsion records nest the Subject at varying depth; recurse the dict/list tree + if isinstance(value, str): + return value == ident + if isinstance(value, dict): + return any(_contains_identity(v, ident) for v in value.values()) + if isinstance(value, list): + return any(_contains_identity(v, ident) for v in value) + return False + + +def is_expelled(objs, ident): + """True if a user.list_expelled result bans (nested Subject match).""" + return any(_contains_identity(o.value, ident) for o in objs + if o.type not in ("eos", "error_message")) + + +def loaded_payload(objs): + """The decoded string payload from an objects.load result, or None.""" + for o in objs: + if o.type in ("eos", "error_message"): + continue + if isinstance(o.value, str): + return o.value + return None + + +def error_messages(objs): + """The error_message strings in a result stream.""" + return [o.value for o in objs if o.type == "error_message"] + + +def endpoint_addr(ep): + """Address string of an exonet.Endpoint (bare or {Type,Object}).""" + if isinstance(ep, str): + return ep + if isinstance(ep, dict): + o = ep.get("Object") + return o if isinstance(o, str) else "" + return "" + + +def tor_links(objs): + """(RemoteIdentity, endpoint-address) for links whose Network == 'tor'.""" + out = [] + for v in _values(objs): + if isinstance(v, dict) and str(v.get("Network")) == "tor": + out.append((str(v.get("RemoteIdentity", "")), + endpoint_addr(v.get("RemoteEndpoint")))) + return out + + +def links_by_network(objs, network): + """(RemoteIdentity, endpoint-address) for links whose Network == .""" + out = [] + for v in _values(objs): + if isinstance(v, dict) and str(v.get("Network")) == network: + out.append((str(v.get("RemoteIdentity", "")), + endpoint_addr(v.get("RemoteEndpoint")))) + return out + + +def kcp_links(objs): + """(RemoteIdentity, endpoint-address) for links whose Network == 'kcp'. + + A 'kcp' link is the unique signal of a completed NAT hole-punch: mod/nodes' + NATLinkStrategy is the only path that dials a kcp.Endpoint (BasicLinkStrategy + dials only tcp, and kcp endpoints are never advertised for an ordinary peer + dial), so a kcp link to a sibling means the punch succeeded and was promoted + to a direct link. Mirrors tor_links(); cf. links_by_network(objs, "kcp").""" + return links_by_network(objs, "kcp") + + +def resolve_onion(objs): + """The .onion address from a nodes.resolve_endpoints result, or None.""" + for v in _values(objs): + if isinstance(v, dict): + a = endpoint_addr(v.get("Endpoint")) + if ".onion" in a: + return a + return None diff --git a/netsim/tasks/_lib/test_astralapi.py b/netsim/tasks/_lib/test_astralapi.py new file mode 100644 index 00000000..bea73117 --- /dev/null +++ b/netsim/tasks/_lib/test_astralapi.py @@ -0,0 +1,140 @@ +"""Offline tests for astralapi -- no VM, no live astrald. + +Exercises the interrogators against synthetic AstralObjects, parse_cli's +stream handling, and the Go-CLI fallback command construction. Run with: + + python3 -m unittest -v # from this directory +""" +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.realpath(__file__))) +import astralapi # noqa: E402 (also bootstraps astral onto sys.path) +import astral # noqa: E402 + + +def O(type, value=None): + return astral.obj(type, value) + + +class InterrogatorTests(unittest.TestCase): + def test_contract(self): + objs = [O("mod.user.contract", + {"Contract": {"Contract": {"Issuer": "02aa", "Subject": "03bb"}}})] + self.assertEqual(astralapi.contract(objs), ("02aa", "03bb")) + self.assertEqual(astralapi.contract([O("x", {})]), (None, None)) + + def test_linked_sibling_and_identities(self): + objs = [O("s", {"Identity": "03bb", "Linked": True}), + O("s", {"Identity": "03cc", "Linked": False})] + self.assertEqual(astralapi.linked_sibling(objs), "03bb") + self.assertEqual(astralapi.swarm_identities(objs), {"03bb", "03cc"}) + self.assertIsNone(astralapi.linked_sibling([O("s", {"Identity": "03cc", "Linked": False})])) + + def test_has_link_to(self): + objs = [O("l", {"RemoteIdentity": "03bb", "Network": "tcp"})] + self.assertTrue(astralapi.has_link_to(objs, "03bb")) + self.assertFalse(astralapi.has_link_to(objs, "03cc")) + + def test_is_expelled_nested(self): + objs = [O("mod.user.signed_expulsion", {"Expulsion": {"Subject": "03bb"}})] + self.assertTrue(astralapi.is_expelled(objs, "03bb")) + self.assertFalse(astralapi.is_expelled(objs, "03cc")) + # an error_message naming the id must not count as an expulsion record + self.assertFalse(astralapi.is_expelled([O("error_message", "03bb not found")], "03bb")) + + def test_loaded_payload_and_errors(self): + objs = [O("error_message", "boom"), O("string8", "hello")] + self.assertEqual(astralapi.loaded_payload(objs), "hello") + self.assertEqual(astralapi.error_messages(objs), ["boom"]) + self.assertIsNone(astralapi.loaded_payload([O("error_message", "boom")])) + + def test_tor_links_and_endpoint(self): + objs = [O("l", {"Network": "tor", "RemoteIdentity": "03bb", + "RemoteEndpoint": {"Object": "abc.onion:1791"}}), + O("l", {"Network": "tcp", "RemoteIdentity": "03cc"})] + self.assertEqual(astralapi.tor_links(objs), [("03bb", "abc.onion:1791")]) + self.assertEqual(astralapi.endpoint_addr("x.onion"), "x.onion") + self.assertEqual(astralapi.endpoint_addr({"Object": "y.onion"}), "y.onion") + self.assertEqual(astralapi.endpoint_addr(None), "") + + def test_resolve_onion(self): + objs = [O("e", {"Endpoint": "10.0.0.1:1791"}), + O("e", {"Endpoint": {"Object": "abc.onion:1791"}})] + self.assertEqual(astralapi.resolve_onion(objs), "abc.onion:1791") + self.assertIsNone(astralapi.resolve_onion([O("e", {"Endpoint": "10.0.0.1:1791"})])) + + +class ParseCliTests(unittest.TestCase): + def test_drops_eos_keeps_error(self): + raw = ('{"Type":"string8","Object":"hi"}\n' + '{"Type":"error_message","Object":"nope"}\n' + '\n' + 'not-json\n' + '{"Type":"eos","Object":null}\n') + objs = astralapi.parse_cli(raw) + self.assertEqual([o.type for o in objs], ["string8", "error_message"]) + self.assertEqual(astralapi.loaded_payload(objs), "hi") + self.assertEqual(astralapi.error_messages(objs), ["nope"]) + + def test_empty(self): + self.assertEqual(astralapi.parse_cli(""), []) + self.assertEqual(astralapi.parse_cli(None), []) + + +class ShellRoutingTests(unittest.TestCase): + """Node with no client must build the exact Go astral-query command.""" + + def setUp(self): + self.calls = [] + self._orig = astralapi.ssh + + def fake_ssh(vm, remote): + self.calls.append((vm, remote)) + return '{"Type":"string8","Object":"hi"}\n{"Type":"eos","Object":null}\n' + + astralapi.ssh = fake_ssh + + def tearDown(self): + astralapi.ssh = self._orig + + def test_untokened(self): + node = astralapi.Node("node1", None, "") + objs = node.call("user.info") + self.assertEqual(self.calls[-1], ("node1", "astral-query user.info -out json")) + self.assertEqual(astralapi.loaded_payload(objs), "hi") + + def test_tokened_with_args(self): + astralapi.Node("node1", None, "TKN").call("objects.load", {"id": "X", "repo": "local"}) + self.assertEqual( + self.calls[-1][1], + "export ASTRALD_APPHOST_TOKEN=TKN; " + "astral-query objects.load -id X -repo local -out json") + + def test_peer_target(self): + astralapi.Node("node1", None, "TKN").call("objects.load", {"id": "X"}, target="node2") + self.assertEqual( + self.calls[-1][1], + "export ASTRALD_APPHOST_TOKEN=TKN; " + "astral-query node2:objects.load -id X -out json") + + def test_arg_value_is_shell_quoted(self): + import shlex + v = "a b'c" # a value with a space and a quote + astralapi.Node("node1", None, "").call("objects.load", {"id": v}) + self.assertIn(f"-id {shlex.quote(v)}", self.calls[-1][1]) + + def test_shell_ops_pin_forces_cli(self): + # even with a (truthy sentinel) client, a pinned op must go to the shell + astralapi.SHELL_OPS.add("user.info") + try: + node = astralapi.Node("node1", object(), "") + node.call("user.info") + self.assertEqual(self.calls[-1][1], "astral-query user.info -out json") + finally: + astralapi.SHELL_OPS.discard("user.info") + + +if __name__ == "__main__": + unittest.main() diff --git a/netsim/tasks/add-reflector/run.sh b/netsim/tasks/add-reflector/run.sh new file mode 100755 index 00000000..4bfb17f9 --- /dev/null +++ b/netsim/tasks/add-reflector/run.sh @@ -0,0 +1,72 @@ +#!/bin/sh +# add-reflector: wire the public reflector node so both NAT'd peers learn their own +# public endpoint by reflection, arming each peer's `nat` module. +# +# Symmetric-masquerade NAT hides a node's public address from itself (it only exists as a +# conntrack translation), so astrald can only learn it when a directly-reachable peer +# observes the SNAT'd source and reflects it back (`reflectLink` -> ObservedEndpointMessage, +# accepted only for a public tcp/utp endpoint). Two masqueraded peers can't reflect each +# other before the punch, so a non-NAT'd reflector does it. The reflector VM itself is made +# by add-vm + install-astrald; this task does the reflector-specific wiring: +# 1. give the reflector a public TEST-NET alias 198.51.100. and read its id; +# 2. on each peer: register that endpoint and force a tcp link to it -> the reflector +# observes the peer's 198.51.100. source and reflects it -> the peer's nat arms. +# Run AFTER enter-nat (the peer must already be behind its NAT so the reflected source is +# its public alias, not its private 192.168.99.2). +# add-reflector [--reflector ] [--vm ]... (default: reflector; peers node1 node2) +set -eu + +REFL="reflector"; PEERS="" +while [ $# -gt 0 ]; do + case "$1" in + --reflector) [ $# -ge 2 ] || { echo "need host after --reflector" >&2; exit 64; }; REFL=$2; shift 2 ;; + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; PEERS="${PEERS:+$PEERS }$2"; shift 2 ;; + *) echo "usage: add-reflector [--reflector ] [--vm ]..." >&2; exit 64 ;; + esac +done +[ -n "$PEERS" ] || PEERS="node1 node2" + +# 1) give the reflector a public alias and read its node identity +REFL_SETUP=$(cat <<'EOS' +set -eu +lan=$(ip -o -4 addr show | awk '$4 ~ /^10\.77\./ {print $2; exit}') +[ -n "$lan" ] || { echo "add-reflector: no 10.77 LAN nic on $(hostname)" >&2; exit 1; } +oct=$(ip -o -4 addr show dev "$lan" | awk '$4 ~ /^10\.77\./ {n=$4; sub(/\/.*/,"",n); split(n,a,"."); print a[4]; exit}') +pub="198.51.100.$oct" +ip addr add "$pub/24" dev "$lan" 2>/dev/null || true +# the reflector's own node identity (host sees the local anonymous caller as the node) +rid=$(astral-query apphost.whoami -out json 2>/dev/null | python3 -c ' +import json,sys +for ln in sys.stdin: + ln=ln.strip() + if not ln: continue + try: o=json.loads(ln) + except Exception: continue + v=o.get("Object") + if isinstance(v,str) and len(v)>=64: print(v); break + if isinstance(v,dict) and isinstance(v.get("Identity"),str): print(v["Identity"]); break') +[ -n "$rid" ] || { echo "add-reflector: could not read reflector identity via apphost.whoami on $(hostname)" >&2; exit 1; } +echo "$pub $rid" # LAST stdout line: +EOS +) +echo "add-reflector: configuring reflector on $REFL ..." >&2 +out=$(netsim ssh "$REFL" -- "$REFL_SETUP" | tail -n1) +REFL_PUB=$(echo "$out" | awk '{print $1}') +REFL_ID=$(echo "$out" | awk '{print $2}') +case "$REFL_PUB" in 198.51.100.*) : ;; *) echo "add-reflector: bad reflector pub '$REFL_PUB' (out: $out)" >&2; exit 1 ;; esac +[ -n "$REFL_ID" ] || { echo "add-reflector: no reflector identity (out: $out)" >&2; exit 1; } +echo "add-reflector: reflector '$REFL' at tcp:$REFL_PUB:1791 id=$REFL_ID" >&2 + +# 2) seed each peer with the reflector endpoint and force a tcp link so it gets reflected +for p in $PEERS; do + echo "add-reflector: linking $p -> reflector (for endpoint reflection) ..." >&2 + # shellcheck disable=SC2029 + # the peer's astrald is in netns "priv" (enter-nat); astral-query defaults to + # tcp:127.0.0.1:8625 which is netns-local, so run it inside the netns. + netsim ssh "$p" -- " + ip netns exec priv astral-query nodes.add_endpoint -id '$REFL_ID' -endpoint 'tcp:$REFL_PUB:1791' >/dev/null 2>&1 || true + ip netns exec priv astral-query dir.set_alias -id '$REFL_ID' -alias reflector >/dev/null 2>&1 || true + ip netns exec priv astral-query nodes.new_link -target '$REFL_ID' -endpoint 'tcp:$REFL_PUB:1791' -out json 2>&1 | tail -3 + " || echo "add-reflector: WARNING new_link to reflector failed on $p (bring-up diagnoses)" >&2 +done +echo "add-reflector: done (reflector=$REFL id=$REFL_ID pub=$REFL_PUB; peers: $PEERS)" diff --git a/netsim/tasks/add-reflector/verify.py b/netsim/tasks/add-reflector/verify.py new file mode 100644 index 00000000..c57184e0 --- /dev/null +++ b/netsim/tasks/add-reflector/verify.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""verify add-reflector: both NAT'd peers learned their public endpoint by reflection, +arming astrald's nat module. + +After enter-nat hides each peer behind a symmetric masquerade NAT, the peer can't see its +own public address; add-reflector wires a public reflector that observes the peer's SNAT'd +source and reflects it back. The observable result -- and exactly what flips astrald's nat +module on (`evaluateEnabled`: the `enabled` setting defaults on AND +len(PublicIPCandidates())>0, `mod/nat/src/module.go`) -- is that each peer's public IP +candidates now include its TEST-NET alias 198.51.100.. + +Blind host-side check: for each peer, derive its 198.51.100. from its 10.77 LAN +octet, then query the peer's astrald `ip.public_ip_candidates` and assert that address is +present. That address being a public candidate == the peer is armed (nat can/does enable). + +Note: the peer's astrald runs INSIDE netns "priv" (enter-nat), so its WS apphost port is +netns-local and NOT reachable over the ssh -L forward -- we query via the Go `astral-query` +CLI over the apphost unix socket (shared mount ns), which crosses the net-ns boundary. So +this verify uses astralapi.ssh directly, not the astral-py client. +""" +import argparse +import os +import sys + +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import astralapi # noqa: E402 + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--reflector", default="reflector") # accepted (same argv as run.sh), unused here + ap.add_argument("--vm", dest="vms", action="append", default=[]) + args, _ = ap.parse_known_args() + peers = args.vms or ["node1", "node2"] + + failed = [] + for p in peers: + lan = astralapi.peer_lan_ip(p) # e.g. "10.77.0.12" + if not lan: + failed.append(f"{p}: could not read its 10.77 LAN address") + continue + want = "198.51.100." + lan.split(".")[-1] # the peer's public TEST-NET alias + # astrald is in netns "priv"; astral-query defaults to tcp:127.0.0.1:8625 (netns-local), + # so run it inside the netns. Local introspection op -> ungated, no token needed. + raw = astralapi.ssh(p, "ip netns exec priv astral-query ip.public_ip_candidates -out json") or "" + if want in raw: + print(f"add-reflector OK: {p} nat armed -- public candidate {want} present.") + else: + failed.append(f"{p}: public candidate {want} NOT among ip.public_ip_candidates") + sys.stderr.write(f" {p} ip.public_ip_candidates:\n{raw or '(empty)'}\n") + + if failed: + for f in failed: + sys.stderr.write(f"add-reflector verify FAILED: {f}\n") + return 1 + print(f"add-reflector verified: nat armed on all peers ({', '.join(peers)})") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/add-reflector/verify.sh b/netsim/tasks/add-reflector/verify.sh new file mode 100755 index 00000000..042470ab --- /dev/null +++ b/netsim/tasks/add-reflector/verify.sh @@ -0,0 +1,3 @@ +#!/bin/sh +# Thin shim — verification logic lives in verify.py. +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" diff --git a/netsim/tasks/adopt-node/README.md b/netsim/tasks/adopt-node/README.md new file mode 100644 index 00000000..6562bf7a --- /dev/null +++ b/netsim/tasks/adopt-node/README.md @@ -0,0 +1,3 @@ +# adopt-node + +node1's agent adopts node2 into its User swarm and saves the sibling ids to `~/siblings.json`; the host then registers the `node1`/`node2` aliases. verify.py asserts both nodes hold a contract from the same User, each lists the other as a Linked sibling, and `sibling_ids` includes node2. one-node → two-nodes. diff --git a/netsim/tasks/adopt-node/prompt.md b/netsim/tasks/adopt-node/prompt.md new file mode 100644 index 00000000..8d9f03fd --- /dev/null +++ b/netsim/tasks/adopt-node/prompt.md @@ -0,0 +1,3 @@ +Adopt the other astral node from the local network into your swarm. + +Save the ids of your swarm siblings to `~/siblings.json` (as `sibling_ids`, a list). diff --git a/netsim/tasks/adopt-node/run.sh b/netsim/tasks/adopt-node/run.sh new file mode 100755 index 00000000..eb4c5fe6 --- /dev/null +++ b/netsim/tasks/adopt-node/run.sh @@ -0,0 +1,97 @@ +#!/bin/sh +# adopt-node: adopt the second node into the User's swarm, driven by the Qwen +# Code agent running INSIDE node1 (which is already a User node from +# bootstrap-user-software-key — default starting stage: one-node). +# adopt-node [--vm ] (default: node1 — the VM carrying Qwen) +# +# Runs ON THE HOST (cwd = simulation root). Same mechanic as bootstrap-user-software-key: +# tiny script, thin prompt, intelligence in the agent's astral-agent skill. The +# whole remote program travels as ONE argv to `netsim ssh`; the prompt rides +# along base64-encoded so a multi-line file never fights shell quoting. +set -eu + +VM="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + *) echo "usage: adopt-node [--vm ]" >&2; exit 64 ;; + esac +done + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +prompt_b64=$(base64 -w0 "$here/prompt.md") # GNU coreutils; -w0 = single line + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/adopt-node.prompt" +chown -R tester:tester "$d" + +# Run the agent as `tester` (qwen is installed for that user), non-interactively. +# Invocation matches what was validated for bootstrap-user-software-key: one-shot positional +# prompt + `-y` (auto-approve). +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/adopt-node.prompt)"' \ + > "$d/adopt-node.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/adopt-node.log" >&2 + exit 1 + } + +# Soft smoke-check only (verify.sh is the authoritative, independent check). node1 +# holds the User token in $HOME/user.json, so we can peek at the swarm here; don't +# fail the run on a shape mismatch — leave the verdict to verify.sh. +if [ -n "$(python3 -c 'import json;print(len(json.load(open("/home/tester/siblings.json")).get("sibling_ids") or []))' 2>/dev/null | grep -v '^0$')" ]; then + echo "adopt-node: $(hostname) recorded swarm siblings in siblings.json" +else + echo "adopt-node: WARNING $(hostname) recorded no sibling_ids in siblings.json (verify.sh decides)" >&2 +fi +ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_token",""))' 2>/dev/null || true) +if [ -n "$ASTRALD_APPHOST_TOKEN" ]; then + export ASTRALD_APPHOST_TOKEN + if astral-query user.swarm_status -out json 2>/dev/null | grep -q '"Linked":true'; then + echo "adopt-node: $(hostname) reports a linked sibling" + else + echo "adopt-node: WARNING $(hostname) shows no linked sibling yet (verify.sh decides)" >&2 + fi +fi +echo "adopt-node: agent finished on $(hostname)" +EOS +) + +echo "adopt-node: driving Qwen operator on $VM ..." +# assignment prefix carries the prompt to the guest; body re-parses it +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" + +# Register friendly node aliases (node1/node2) on BOTH nodes so later tasks can +# address nodes by name (object-store --target node2, read of :..., etc.). +# Host-side; identities resolved from the mutual link (anonymous nodes.links). +# CONFIRM (live): dir.set_alias works for the anonymous host-side caller. +PEER="node2" +_remote_id() { # $1 = vm; prints the first RemoteIdentity from its nodes.links + netsim ssh "$1" -- "astral-query nodes.links -out json" 2>/dev/null | python3 -c ' +import json,sys +for ln in sys.stdin: + ln=ln.strip() + if not ln: continue + try: o=json.loads(ln) + except Exception: continue + ob=o.get("Object") + if isinstance(ob,dict) and ob.get("RemoteIdentity"): + print(ob["RemoteIdentity"]); break' +} +node2_id=$(_remote_id "$VM" || true) # node1's link -> node2 +node1_id=$(_remote_id "$PEER" || true) # node2's link -> node1 +if [ -n "$node1_id" ] && [ -n "$node2_id" ]; then + for vm in "$VM" "$PEER"; do + netsim ssh "$vm" -- "astral-query dir.set_alias -id '$node1_id' -alias node1 >/dev/null 2>&1; astral-query dir.set_alias -id '$node2_id' -alias node2 >/dev/null 2>&1" || true + done + echo "adopt-node: registered aliases node1=$node1_id node2=$node2_id on $VM + $PEER" +else + echo "adopt-node: WARNING could not resolve node identities for aliases (n1='$node1_id' n2='$node2_id')" >&2 +fi +echo "adopt-node: done on $VM" diff --git a/netsim/tasks/adopt-node/verify.py b/netsim/tasks/adopt-node/verify.py new file mode 100755 index 00000000..1864aac1 --- /dev/null +++ b/netsim/tasks/adopt-node/verify.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +"""verify adopt-node: node1 and node2 linked into one User swarm, symmetric roster. + +Independent both-ends check (does not trust run.sh). Queries reach each VM's +apphost through the shared astral-py client (tasks/_lib/astralapi.py), which +forwards to the lockstep Go astral-query CLI for any op it can't serve. +""" +import argparse +import os +import sys + +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import astralapi # noqa: E402 + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--node1", default="node1") + ap.add_argument("--node2", default="node2") + args, _ = ap.parse_known_args() + vm1, vm2 = args.node1, args.node2 + + info1 = astralapi.home_json(vm1, "user.json") + siblings = astralapi.home_json(vm1, "siblings.json") # adopt-node agent: swarm sibling ids + sib_ids = ["".join(str(x).split()) for x in (siblings.get("sibling_ids") or []) if x] + U = "".join(str(info1.get("user_id", "")).split()) + token = info1.get("user_token", "") + + # node1 acts as the User (token from bootstrap-user-software-key); node2 answers + # under its node identity (it holds the contract after the adoption). + with astralapi.connect(vm1, token=token) as n1: + i1, s1 = astralapi.contract(n1.call("user.info")) + sib = astralapi.linked_sibling(n1.call("user.swarm_status")) + # node2's own swarm view: swarm_status derives from node2's active contract, + # not the caller, so no token is needed; post-#348 it must list node1 too. + with astralapi.connect(vm2) as n2: + i2, s2 = astralapi.contract(n2.call("user.info")) + linkback = astralapi.has_link_to(n2.call("nodes.links"), s1) + n2_sib = astralapi.linked_sibling(n2.call("user.swarm_status")) + + errs = [] + if not U: + errs.append("no user_id in node1's user.json") + if i1 != U: + errs.append(f"node1 contract issuer {i1} != User {U}") + if i2 != U: + errs.append(f"node2 contract issuer {i2} != User {U} (node2 not adopted under this User)") + if not s1: + errs.append("node1 has no active contract subject") + if not s2: + errs.append("node2 has no active contract subject") + if s2 and sib != s2: + errs.append(f"node1's linked sibling {sib} != node2 {s2}") + if s1 and n2_sib != s1: + errs.append(f"node2's linked sibling {n2_sib} != node1 {s1} " + "(node2 does not list node1 -- swarm roster not symmetric; #348 regression?)") + if not linkback: + errs.append(f"node2 has no active link back to node1 ({s1})") + if not sib_ids: + errs.append("node1 recorded no sibling_ids in ~/siblings.json") + elif s2 and s2 not in sib_ids: + errs.append(f"node1's recorded sibling_ids {sib_ids} do not include adopted node {s2}") + + if errs: + sys.stderr.write("adopt-node verify FAILED:\n") + for e in errs: + sys.stderr.write(f" - {e}\n") + return 1 + + print(f"swarm OK: User {U[:8]}.. ; node1 {s1[:8]}.. <-link-> node2 {s2[:8]}.. ; " + f"both under one User; each lists the other as a Linked sibling (symmetric roster)") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/adopt-node/verify.sh b/netsim/tasks/adopt-node/verify.sh new file mode 100755 index 00000000..fac4b4ff --- /dev/null +++ b/netsim/tasks/adopt-node/verify.sh @@ -0,0 +1,7 @@ +#!/bin/sh +# Thin shim — all verification logic lives in verify.py. Calling astral-query and +# walking its JSON streams is far cleaner in python than bash, so verify.sh just +# hands off. netsim sets $NETSIM_TASK_DIR to this task's directory and only +# auto-runs run.sh/verify.sh, so verify.py sits next to us and is invoked here +# (the dirname fallback covers running this script directly). +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" diff --git a/netsim/tasks/bootstrap-user-software-key/README.md b/netsim/tasks/bootstrap-user-software-key/README.md new file mode 100644 index 00000000..52dbb17d --- /dev/null +++ b/netsim/tasks/bootstrap-user-software-key/README.md @@ -0,0 +1,3 @@ +# bootstrap-user-software-key + +node1's agent creates a software User and installs an active contract, making node1 a User node. verify.sh acts as the User and asserts `apphost.whoami` reports the User id and `user.info` succeeds. astrald-lab → one-node. diff --git a/netsim/tasks/bootstrap-user-software-key/prompt.md b/netsim/tasks/bootstrap-user-software-key/prompt.md new file mode 100644 index 00000000..3fa02e32 --- /dev/null +++ b/netsim/tasks/bootstrap-user-software-key/prompt.md @@ -0,0 +1,4 @@ +Make this node a User node with a new software key — install its active user contract, not just create the key. + +Save the user's id and an access token to `~/user.json` (as `user_id` and +`user_token`). diff --git a/netsim/tasks/bootstrap-user-software-key/run.sh b/netsim/tasks/bootstrap-user-software-key/run.sh new file mode 100755 index 00000000..218180f3 --- /dev/null +++ b/netsim/tasks/bootstrap-user-software-key/run.sh @@ -0,0 +1,60 @@ +#!/bin/sh +# bootstrap-user-software-key: turn the operator node into a User-controlled node, driven by +# the Qwen Code agent running INSIDE the VM. +# bootstrap-user-software-key [--vm ] (default: node1 — the VM carrying Qwen) +# +# Runs ON THE HOST (cwd = simulation root). This script is deliberately tiny: it +# ships prompt.md to the agent on the guest and lets the agent do the astral +# work via astral-query against the local node API. The intelligence lives in +# the prompt and — by design — in the agent's astral-agent skill, not here. The +# whole remote program travels as ONE argv to `netsim ssh` (no reliance on stdin +# forwarding); the prompt rides along base64-encoded so a multi-line file never +# fights shell quoting. +set -eu + +VM="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + *) echo "usage: bootstrap-user-software-key [--vm ]" >&2; exit 64 ;; + esac +done + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +prompt_b64=$(base64 -w0 "$here/prompt.md") # GNU coreutils; -w0 = single line + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/bootstrap-user-software-key.prompt" +chown -R tester:tester "$d" + +# Run the agent as `tester` (qwen is installed for that user), non-interactively. +# Invocation matches what was validated against the live lab: one-shot positional +# prompt + `-y` (auto-approve). The prompt is passed positionally via command +# substitution; the substituted text is used literally (not re-scanned), so the +# backticks and $-signs inside it are safe. +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/bootstrap-user-software-key.prompt)"' \ + > "$d/bootstrap-user-software-key.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/bootstrap-user-software-key.log" >&2 + exit 1 + } + +# Cheap smoke-check; verify.sh does the authoritative, independent check. The agent +# records its outputs in $HOME/user.json (/home/tester/user.json). +uid=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_id",""))' 2>/dev/null || true) +[ -n "$uid" ] || { echo "agent recorded no user_id in /home/tester/user.json on $(hostname)" >&2; exit 1; } +echo "bootstrap-user-software-key: agent finished on $(hostname); User id $uid" +EOS +) + +echo "bootstrap-user-software-key: driving Qwen operator on $VM ..." +# assignment prefix carries the prompt to the guest; body re-parses it +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" +echo "bootstrap-user-software-key: done on $VM" diff --git a/netsim/tasks/bootstrap-user-software-key/verify.sh b/netsim/tasks/bootstrap-user-software-key/verify.sh new file mode 100755 index 00000000..e157fbfc --- /dev/null +++ b/netsim/tasks/bootstrap-user-software-key/verify.sh @@ -0,0 +1,43 @@ +#!/bin/sh +# verify bootstrap-user-software-key (same args as run.sh): the target node must be a +# User-controlled node. INDEPENDENT re-check -- it does not trust run.sh's +# output: it reads the persisted User credentials, acts AS the User, and asserts +# the node answers as a user node. user.info itself rejects (code 2) when there +# is no active contract, so a successful call IS the proof. +set -eu + +VM="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) VM=$2; shift 2 ;; + *) shift ;; + esac +done + +REMOTE_CHECK=$(cat <<'EOS' +set -eu +info=/home/tester/user.json +[ -s "$info" ] || { echo "no $info on $(hostname)" >&2; exit 1; } +uid=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_id",""))') +ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_token",""))') +export ASTRALD_APPHOST_TOKEN +[ -n "$uid" ] || { echo "no user_id in $info on $(hostname)" >&2; exit 1; } +[ -n "$ASTRALD_APPHOST_TOKEN" ] || { echo "no user_token in $info on $(hostname)" >&2; exit 1; } + +# acting as the User: whoami must report the User identity +who=$(astral-query apphost.whoami -out json) \ + || { echo "apphost.whoami failed on $(hostname)" >&2; exit 1; } +echo "$who" | grep -q "$uid" \ + || { echo "whoami != User id on $(hostname): $who" >&2; exit 1; } + +# active contract present (user.info rejects with code 2 if none) +astral-query user.info -out json \ + || { echo "user.info failed on $(hostname) -- no active contract?" >&2; exit 1; } + +echo "$(hostname): user node OK (User $uid)" +EOS +) + +netsim ssh "$VM" -- "$REMOTE_CHECK" \ + || { echo "bootstrap-user-software-key verify FAILED on $VM" >&2; exit 1; } +echo "verified user node on: $VM" diff --git a/netsim/tasks/configure-astral-agent/README.md b/netsim/tasks/configure-astral-agent/README.md new file mode 100644 index 00000000..202becfd --- /dev/null +++ b/netsim/tasks/configure-astral-agent/README.md @@ -0,0 +1,3 @@ +# configure-astral-agent + +On node1, the host clones the satforge/skills repo with a deploy key, builds the linker, and links the astral-agent skill into the Qwen operator at `~/.qwen/skills/astral-agent`. verify.sh asserts the linked skill is present and owned by the operator. diff --git a/netsim/tasks/configure-astral-agent/run.sh b/netsim/tasks/configure-astral-agent/run.sh new file mode 100755 index 00000000..83c2016b --- /dev/null +++ b/netsim/tasks/configure-astral-agent/run.sh @@ -0,0 +1,90 @@ +#!/bin/sh +# configure-astral-agent: install the astral-agent skill into the Qwen Code +# operator by having the VM clone the (private) satforge/skills repo with an +# injected deploy key and run the linker itself. +# configure-astral-agent [--vm ] [--user ] +# Default: --vm node1 --user tester (the operator created by install-qwen-code). +# +# The HOST owns the deploy key; the VM never needs git credentials of its own. +# run.sh reads the private key path from $SATFORGE_SKILLS_DEPLOY_KEY, base64-ships +# it in over a single `netsim ssh` argv, and the guest then: +# 1. installs the key for the operator and clones +# ssh://git@git.satforge.dev/satforge/skills.git over SSH via the deploy key +# (submodules resolve per the repo's .gitmodules), +# 2. builds the satforge-skills linker (Go is already on the node from +# install-astrald), +# 3. runs `link astral-agent --target qwen` -> ~/.qwen/skills/astral-agent. +# +# NOTE: for now the deploy key is LEFT in the VM (simpler; lets the operator +# re-clone/pull skills later), which means it also lives in the saved snapshot. +# We may switch to wiping the key before the snapshot if that exposure matters. +set -eu + +VM=node1 +USER_NAME=tester +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + --user) [ $# -ge 2 ] || { echo "need name after --user" >&2; exit 64; }; USER_NAME=$2; shift 2 ;; + *) echo "usage: configure-astral-agent [--vm ] [--user ]" >&2; exit 64 ;; + esac +done + +REPO=${SATFORGE_SKILLS_REPO:-ssh://git@git.satforge.dev/satforge/skills.git} +REF=${SATFORGE_SKILLS_REF:-} # optional branch/tag/sha to check out (default: clone's default branch) +KEY=${SATFORGE_SKILLS_DEPLOY_KEY:-} +[ -n "$KEY" ] || { echo "set SATFORGE_SKILLS_DEPLOY_KEY to the deploy key path for $REPO" >&2; exit 1; } +[ -r "$KEY" ] || { echo "deploy key not readable: $KEY" >&2; exit 1; } +key_b64=$(base64 -w0 "$KEY") + +REMOTE_BODY=$(cat <<'EOS' +set -eu +home=$(getent passwd "$u" | cut -d: -f6) +[ -n "$home" ] || { echo "user '$u' not found on $(hostname)" >&2; exit 1; } +command -v git >/dev/null 2>&1 || { echo "git missing on $(hostname)" >&2; exit 1; } + +install -d -m 700 -o "$u" -g "$u" "$home/.ssh" "$home/.netsim" +printf '%s' "$key_b64" | base64 -d > "$home/.ssh/skills_deploy" +chmod 600 "$home/.ssh/skills_deploy" +chown "$u:$u" "$home/.ssh/skills_deploy" + +# Guest-side provisioning, run as the operator. Quoted heredoc: fully literal; +# repo arrives as a positional arg. The git host's key is auto-accepted on first +# connect (StrictHostKeyChecking=accept-new). +cat > "$home/.netsim/setup-skill.sh" <<'SCRIPT' +#!/bin/sh +set -eu +export PATH=/usr/local/go/bin:$PATH +export GIT_SSH_COMMAND="ssh -i $HOME/.ssh/skills_deploy -o IdentitiesOnly=yes -o StrictHostKeyChecking=accept-new" +repo=$1 +ref=$2 +src=$HOME/satforge-skills +[ -d "$src/.git" ] || git clone --recurse-submodules "$repo" "$src" +cd "$src" +if [ -n "$ref" ]; then + # Fail loudly if the ref can't be fetched -- otherwise we'd silently link the + # default-branch skill (missing whatever the ref was supposed to add). + git fetch --quiet origin "$ref" + git rev-parse --verify --quiet "origin/$ref" >/dev/null \ + || { echo "skills ref '$ref' not found on origin" >&2; exit 1; } + git checkout --quiet -B "$ref" "origin/$ref" + git reset --hard --quiet "origin/$ref" +else + git pull --ff-only --quiet 2>/dev/null || true +fi +git submodule update --init --recursive --quiet +go build -C bin/satforge-skills -o satforge-skills . +bin="$src/bin/satforge-skills/satforge-skills" +"$bin" unlink astral-agent --target qwen >/dev/null 2>&1 || true # idempotent re-run +"$bin" link astral-agent --target qwen +SCRIPT +chown "$u:$u" "$home/.netsim/setup-skill.sh" + +su - "$u" -c "sh '$home/.netsim/setup-skill.sh' '$repo' '$ref'" +echo "configure-astral-agent: $(hostname) cloned skills + linked astral-agent (deploy key left in place)" +EOS +) + +echo "configure-astral-agent: injecting deploy key + linking on $VM (user $USER_NAME) ..." +netsim ssh "$VM" -- "u='$USER_NAME' key_b64='$key_b64' repo='$REPO' ref='$REF'; $REMOTE_BODY" +echo "configure-astral-agent: done on $VM" diff --git a/netsim/tasks/configure-astral-agent/verify.sh b/netsim/tasks/configure-astral-agent/verify.sh new file mode 100755 index 00000000..64994880 --- /dev/null +++ b/netsim/tasks/configure-astral-agent/verify.sh @@ -0,0 +1,34 @@ +#!/bin/sh +# verify configure-astral-agent (same args as run.sh): the astral-agent skill is +# installed for the operator where Qwen Code reads it +# (~/.qwen/skills/astral-agent), with SKILL.md frontmatter intact, the +# references/ dir, and the astral-docs mount present, owned by the operator. +set -eu + +VM=node1 +USER_NAME=tester +while [ $# -gt 0 ]; do + case "$1" in + --vm) VM=$2; shift 2 ;; + --user) USER_NAME=$2; shift 2 ;; + *) shift ;; + esac +done + +REMOTE_CHECK=$(cat <<'EOS' +set -eu +home=$(getent passwd "$u" | cut -d: -f6) +d="$home/.qwen/skills/astral-agent" +[ -f "$d/SKILL.md" ] || { echo "missing $d/SKILL.md on $(hostname)" >&2; exit 1; } +head -n1 "$d/SKILL.md" | grep -qx -- '---' || { echo "SKILL.md frontmatter missing on $(hostname)" >&2; exit 1; } +[ -d "$d/references" ] || { echo "missing references/ on $(hostname)" >&2; exit 1; } +[ -f "$d/astral-docs/README.md" ] || { echo "astral-docs mount missing on $(hostname)" >&2; exit 1; } +owner=$(stat -c '%U' "$d") +[ "$owner" = "$u" ] || { echo "astral-agent owned by '$owner', expected '$u' on $(hostname)" >&2; exit 1; } +echo "$(hostname): astral-agent present for $u ($(find -L "$d" -type f 2>/dev/null | wc -l) files via symlinks), frontmatter intact" +EOS +) + +netsim ssh "$VM" -- "u='$USER_NAME'; $REMOTE_CHECK" \ + || { echo "configure-astral-agent verify FAILED on $VM" >&2; exit 1; } +echo "verified astral-agent skill on: $VM" diff --git a/netsim/tasks/configure-nat-tor/README.md b/netsim/tasks/configure-nat-tor/README.md new file mode 100644 index 00000000..e38b9084 --- /dev/null +++ b/netsim/tasks/configure-nat-tor/README.md @@ -0,0 +1,26 @@ +# configure-nat-tor + +Relocates a NAT'd node's **Tor into the same network namespace as its astrald** — the +piece `enter-nat` can't do, needed before the pair can re-link (and punch) over Tor. + +`enter-nat` moves astrald into netns `priv`, so its `127.0.0.1` becomes the *netns* +loopback. astrald's `tor` module needs Tor at `127.0.0.1:9050`/`:9051`, and — the part a +config knob can't fix — its onion service's **local listener is hardcoded `127.0.0.1:0`** +(`mod/tor/src/server.go`), which Tor dials on **inbound** onion connections. So a root-ns +Tor can neither be reached nor deliver inbound onion to a netns'd astrald; **Tor must live +in the netns too.** + +On each `--vm` (default `node1 node2`), run **after** `enable-tor` and `enter-nat`: + +1. **WAN masquerade** for `192.168.99.0/24` out the default-route (slirp WAN) NIC, so + Tor-in-netns can reach the real Tor network. Routing splits by destination: + `198.51.100.0/24` (peers) → `lan0` via `enter-nat`'s SNAT; internet (Tor) → WAN. +2. **Move `tor@default.service` into netns `priv`** via a `NetworkNamespacePath` systemd + drop-in (same idiom `enter-nat` uses for astrald; net ns only, so `torrc` is untouched). +3. Restart **Tor first** (binds netns `127.0.0.1:9050/9051`), then **astrald** (its tor + module connects to the control port once at start, no retry). + +Self-validating: waits for the control port inside the netns, then confirms astrald +**re-publishes its onion** — the end-to-end proof that bootstrap (via the WAN masquerade), +control, and the netns-local onion listener all work. **No astrald source/config change.** +Host-driven. Used by the NAT-punch story after `enable-tor` + `enter-nat`. diff --git a/netsim/tasks/configure-nat-tor/run.sh b/netsim/tasks/configure-nat-tor/run.sh new file mode 100755 index 00000000..3cea11d6 --- /dev/null +++ b/netsim/tasks/configure-nat-tor/run.sh @@ -0,0 +1,116 @@ +#!/bin/sh +# configure-nat-tor: relocate a NAT'd node's Tor into its private netns so astrald +# (moved into netns "priv" by enter-nat) regains full Tor -- inbound AND outbound onion. +# +# Why a dedicated task (not folded into enter-nat): astrald's tor module reaches Tor at +# 127.0.0.1:9050 (SOCKS) / 127.0.0.1:9051 (control), AND its onion service's local +# listener is hardcoded 127.0.0.1:0 (mod/tor/src/server.go) which Tor dials on inbound. +# Once enter-nat moves astrald into netns "priv", that 127.0.0.1 is the netns loopback, so +# a root-ns Tor can neither be reached for SOCKS/control nor deliver inbound onion. Fix +# (no astrald change): run Tor INSIDE the same netns, and give the netns internet egress +# (WAN masquerade) so Tor can still reach the real Tor network. On each --vm: +# * WAN masquerade for 192.168.99.0/24 (Tor's internet path). enter-nat's LAN SNAT to +# 198.51.100.x still handles peer traffic -- routing splits by destination. +# * move tor@default.service into netns "priv" via a systemd drop-in, restart it there; +# * restart astrald (already in the netns) so its tor module re-inits against the now +# netns-local control port, then confirm it re-publishes its onion (end-to-end proof). +# +# Run AFTER enable-tor (Tor installed + control port) and enter-nat (netns + astrald in it). +# configure-nat-tor [--vm ]... (default: node1 node2) +set -eu + +VMS="" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VMS="${VMS:+$VMS }$2"; shift 2 ;; + *) echo "usage: configure-nat-tor [--vm ]..." >&2; exit 64 ;; + esac +done +[ -n "$VMS" ] || VMS="node1 node2" + +REMOTE_BODY=$(cat <<'EOS' +set -eu + +# preconditions from enter-nat / enable-tor +ip netns list 2>/dev/null | grep -qw priv \ + || { echo "configure-nat-tor: netns priv missing on $(hostname) (run enter-nat first)" >&2; exit 1; } +systemctl cat tor@default.service >/dev/null 2>&1 \ + || { echo "configure-nat-tor: tor@default.service not found on $(hostname) (run enable-tor first)" >&2; exit 1; } + +# 1) WAN egress for the netns so Tor-in-netns can reach the real Tor network. The slirp WAN +# NIC is the default-route interface (it keeps its kernel name; only lan0 is renamed). +wan=$(ip route show default | awk '{print $5; exit}') +[ -n "$wan" ] || { echo "configure-nat-tor: no default route / WAN nic on $(hostname)" >&2; exit 1; } +# idempotent append to enter-nat's existing ip/nat postrouting chain (keeps the LAN SNAT). +nft list chain ip nat postrouting 2>/dev/null | grep -q "oifname \"$wan\" masquerade" \ + || nft add rule ip nat postrouting ip saddr 192.168.99.0/24 oifname "$wan" masquerade + +# 2) move the Tor daemon into netns "priv" (Debian runs it as tor@default.service; the +# tor.service wrapper pulls it in). Same NetworkNamespacePath idiom enter-nat used for +# astrald -- joins only the NET ns, so torrc (mount ns) is untouched. +mkdir -p /etc/systemd/system/tor@default.service.d +cat > /etc/systemd/system/tor@default.service.d/netns.conf </dev/null | grep -q '127.0.0.1:9051'; then ok=1; break; fi + sleep 1 +done +[ -n "$ok" ] || { + echo "configure-nat-tor: tor control 9051 did not open in netns priv on $(hostname)" >&2 + journalctl -u tor@default --no-pager 2>&1 | tail -20 >&2 || true + exit 1 +} + +# 4) restart astrald (already in the netns) so its tor module re-inits against the now +# netns-local control port, then confirm it re-publishes an onion. Success here proves +# Tor-in-netns end to end: bootstrap via the WAN masquerade + control + the onion local +# listener are ALL netns-local. astrald's onion key persists under -root (shared mount +# ns), so it comes back as the same onion. +systemctl restart astrald +onion= +for _ in $(seq 1 90); do + if systemctl is-active --quiet astrald; then + # astrald is in netns "priv"; astral-query defaults to tcp:127.0.0.1:8625 (netns-local). + onion=$(ip netns exec priv astral-query nodes.resolve_endpoints -id localnode -out json 2>/dev/null | python3 -c ' +import json,sys +def addr(ep): + if isinstance(ep, str): return ep + if isinstance(ep, dict): + o = ep.get("Object"); return o if isinstance(o, str) else "" + return "" +for ln in sys.stdin: + ln = ln.strip() + if not ln: continue + try: o = json.loads(ln) + except Exception: continue + a = addr((o.get("Object") or {}).get("Endpoint")) + if ".onion" in a: print(a); break') + [ -n "$onion" ] && break + fi + sleep 2 +done +[ -n "$onion" ] || { + echo "configure-nat-tor: astrald did not re-publish a tor onion in netns on $(hostname)" >&2 + journalctl -u tor@default --no-pager 2>&1 | tail -20 >&2 || true + journalctl -u astrald --no-pager 2>&1 | tail -20 >&2 || true + exit 1 +} +echo "configure-nat-tor: $(hostname) Tor now in netns priv (onion=$onion, wan=$wan)" +EOS +) + +# $VMS is a space-separated list -> intentional word-splitting +# shellcheck disable=SC2086 +for vm in $VMS; do + echo "configure-nat-tor: relocating Tor into $vm's netns ..." + # shellcheck disable=SC2029 + netsim ssh "$vm" -- "$REMOTE_BODY" +done +echo "configure-nat-tor: done ($VMS)" diff --git a/netsim/tasks/enable-tor/README.md b/netsim/tasks/enable-tor/README.md new file mode 100644 index 00000000..547caeeb --- /dev/null +++ b/netsim/tasks/enable-tor/README.md @@ -0,0 +1,3 @@ +# enable-tor + +On each target VM, installs Tor with its control port, restarts astrald to publish an onion, and saves the node's own endpoint to `/root/tor.json`. verify.py asserts each VM runs tor and its saved onion matches the one astrald advertises via `nodes.resolve_endpoints`. diff --git a/netsim/tasks/enable-tor/run.sh b/netsim/tasks/enable-tor/run.sh new file mode 100755 index 00000000..3ded7237 --- /dev/null +++ b/netsim/tasks/enable-tor/run.sh @@ -0,0 +1,89 @@ +#!/bin/sh +# enable-tor: bring up an astrald node with a Tor endpoint. Three steps per node: +# 1. install Tor and enable its control port (astrald's tor module uses SOCKS +# 127.0.0.1:9050 + control 127.0.0.1:9051 with cookie auth; stock Debian tor gives +# SOCKS but leaves the control port off); +# 2. restart astrald so its tor module re-initializes against the now-present control +# port (it connects only at start, with no retry) and publishes an onion service; +# 3. read the node's own Tor endpoint and save it to /root/tor.json. +# enable-tor [--vm ]... (no --vm -> every running VM) +# +# Runs ON THE HOST (cwd = sim root); ssh lands as root. astrald runs as root, so it can +# read Tor's control cookie regardless of its mode. +set -eu + +VMS="" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VMS="${VMS:+$VMS }$2"; shift 2 ;; + *) echo "usage: enable-tor [--vm ]..." >&2; exit 64 ;; + esac +done +if [ -z "$VMS" ]; then + VMS=$(netsim vm ls --json | python3 -c \ + 'import json,sys; print(" ".join(v["hostname"] for v in json.load(sys.stdin) if v["state"]=="running"))') +fi +[ -n "$VMS" ] || { echo "no running VMs" >&2; exit 1; } + +REMOTE_BODY=$(cat <<'EOS' +set -eu +export DEBIAN_FRONTEND=noninteractive + +# 1) install Tor and enable the control port (cookie auth, loopback) +command -v tor >/dev/null 2>&1 || { + apt-get -qq -o DPkg::Lock::Timeout=300 update + apt-get -qq -y -o DPkg::Lock::Timeout=300 install tor >/dev/null +} +torrc=/etc/tor/torrc +grep -q '^ControlPort 9051' "$torrc" || printf '\nControlPort 9051\nCookieAuthentication 1\n' >> "$torrc" +systemctl restart tor +ok= +for _ in $(seq 1 30); do + if ss -ltn 2>/dev/null | grep -q '127.0.0.1:9051'; then ok=1; break; fi + sleep 1 +done +[ -n "$ok" ] || { echo "tor control port 9051 did not open on $(hostname)" >&2; exit 1; } + +# 2) restart astrald so its tor module re-initializes against the control port +systemctl restart astrald + +# 3) read the node's own onion endpoint and save it to /root/tor.json +onion= +for _ in $(seq 1 90); do + if systemctl is-active --quiet astrald; then + onion=$(astral-query nodes.resolve_endpoints -id localnode -out json 2>/dev/null | python3 -c ' +import json,sys +def addr(ep): + if isinstance(ep, str): return ep + if isinstance(ep, dict): + o = ep.get("Object"); return o if isinstance(o, str) else "" + return "" +for ln in sys.stdin: + ln = ln.strip() + if not ln: continue + try: o = json.loads(ln) + except Exception: continue + a = addr((o.get("Object") or {}).get("Endpoint")) + if ".onion" in a: print(a); break') + [ -n "$onion" ] && break + fi + sleep 2 +done +[ -n "$onion" ] || { + echo "astrald did not publish a tor onion on $(hostname)" >&2 + journalctl -u astrald --no-pager 2>&1 | tail -30 >&2 || true + exit 1 +} +python3 -c 'import json,sys; json.dump({"onion": sys.argv[1], "endpoint": "tor:"+sys.argv[1]}, open("/root/tor.json","w"))' "$onion" +echo "enable-tor: $(hostname) tor up; onion=$onion (saved /root/tor.json)" +EOS +) + +# $VMS is a space-separated list -> intentional word-splitting +# shellcheck disable=SC2086 +for vm in $VMS; do + echo "enable-tor: bringing up Tor on $vm ..." + # shellcheck disable=SC2029 + netsim ssh "$vm" -- "$REMOTE_BODY" +done +echo "enable-tor: done on: $VMS" diff --git a/netsim/tasks/enable-tor/verify.py b/netsim/tasks/enable-tor/verify.py new file mode 100644 index 00000000..8c5efb6e --- /dev/null +++ b/netsim/tasks/enable-tor/verify.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +"""verify enable-tor: each target VM runs Tor and saved its own onion endpoint. + +Independent host-side check (does not trust run.sh): on each VM the tor service is +active, /root/tor.json holds an onion endpoint, and that saved onion matches what +astrald actually advertises now (nodes.resolve_endpoints -id localnode). + +Queries reach each VM's apphost through the shared astral-py client +(tasks/_lib/astralapi.py), CLI fallback for anything it can't serve. +""" +import argparse +import os +import sys + +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import astralapi # noqa: E402 + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--vm", action="append", default=[]) + args, _ = ap.parse_known_args() + vms = args.vm or astralapi.all_running_vms() + if not vms: + sys.stderr.write("enable-tor verify FAILED: no VMs to verify\n") + return 1 + + bad = False + for vm in vms: + tor_active = astralapi.ssh(vm, "systemctl is-active tor 2>/dev/null").strip() == "active" + file_onion = str(astralapi.read_json(vm, "/root/tor.json").get("onion", "")) + with astralapi.connect(vm) as node: + live = astralapi.resolve_onion(node.call("nodes.resolve_endpoints", {"id": "localnode"})) + + errs = [] + if not tor_active: + errs.append("the tor service is not active") + if not file_onion: + errs.append("no onion in /root/tor.json") + if not live: + errs.append("astrald advertises no onion (resolve_endpoints -id localnode)") + if file_onion and live and file_onion != live: + errs.append(f"saved onion {file_onion} != live onion {live}") + + if errs: + bad = True + sys.stderr.write(f"enable-tor verify FAILED on {vm}:\n") + for e in errs: + sys.stderr.write(f" - {e}\n") + else: + print(f"enable-tor OK: {vm} runs tor and saved its onion {file_onion} to /root/tor.json") + return 1 if bad else 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/enable-tor/verify.sh b/netsim/tasks/enable-tor/verify.sh new file mode 100755 index 00000000..07821264 --- /dev/null +++ b/netsim/tasks/enable-tor/verify.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# Thin shim — verification logic lives in verify.py. netsim sets $NETSIM_TASK_DIR +# to this task's directory and only auto-runs run.sh/verify.sh (the dirname +# fallback covers running this script directly). +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" diff --git a/netsim/tasks/enter-nat/README.md b/netsim/tasks/enter-nat/README.md new file mode 100644 index 00000000..66c7ee94 --- /dev/null +++ b/netsim/tasks/enter-nat/README.md @@ -0,0 +1,34 @@ +# enter-nat + +Puts a node's astrald behind its own **symmetric, true-masquerade NAT** — the `leave-lan` +analog for the NAT scenario. + +On each `--vm` (default `node1 node2`): + +- creates netns **`priv`** holding the private host `192.168.99.2`, wired to the VM by a + `veth` pair (`192.168.99.1` on the VM side); +- installs a **port-preserving SNAT** of `192.168.99.0/24` to a per-node public TEST-NET + alias **`198.51.100.`** on the LAN NIC (`lan0`) — validated as + endpoint-independent (cone) by `nat-eim-probe`; +- relaunches astrald **inside the netns** via a systemd drop-in + (`NetworkNamespacePath=/run/netns/priv`), which joins only the *network* namespace, so + the apphost unix socket stays in the shared mount namespace and `astral-query` still + reaches it from the root namespace. astrald keeps its `-root` and identity. + +Moving astrald off the flat `10.77` address withdraws its direct LAN endpoint (astrald +polls `InterfaceAddrs`; in the netns it sees only `192.168.99.2`), so the pair re-links +over Tor — exactly the `leave-lan` dynamic. + +**This task only builds the NAT; it does not punch.** astrald cannot see its own public +alias (that is what masquerade means), so its `nat` module stays **disabled** until the +`reflector` node reflects that endpoint back — see `add-reflector`. The pre-punch +milestone is: after `enter-nat` + `add-reflector`, `nat` reports **enabled** on both peers. + +## Notes / follow-ups + +- The netns currently routes only to the LAN (for reflection and, later, the peer punch). + The **punch increment** will additionally need the netns routed to the WAN (slirp) so + the Tor signaling link can form from inside the netns — add a `masquerade` rule for the + WAN NIC then. +- Verify via `astral-query` (unix socket, reachable from the root ns) or + `ip netns exec priv astral-query …`; the apphost WS port now lives inside the netns. diff --git a/netsim/tasks/enter-nat/run.sh b/netsim/tasks/enter-nat/run.sh new file mode 100755 index 00000000..0100dd02 --- /dev/null +++ b/netsim/tasks/enter-nat/run.sh @@ -0,0 +1,127 @@ +#!/bin/sh +# enter-nat: put a node's astrald behind its own (symmetric, true-masquerade) NAT. +# +# The leave-lan analog: relocating astrald into a private network namespace severs its +# direct 10.77 LAN path, so the swarm link maintainer re-links the pair over Tor -- and +# the node is now a genuine NAT'd peer. On each --vm: +# * create netns "priv" (192.168.99.2) wired to the VM by a veth pair; +# * port-preserving SNAT of 192.168.99.0/24 to a per-node public TEST-NET alias +# 198.51.100. on the LAN NIC (validated as endpoint-independent/cone by +# nat-eim-probe), plus an inbound DNAT of that alias back into the netns so the box is a +# real cone-NAT gateway (without it inbound punch packets hit the local INPUT and the +# punch never completes -- see the nat table section below); +# * relaunch astrald INSIDE the netns (same -root, so same identity) via a systemd +# drop-in (NetworkNamespacePath -- joins only the NET ns; the -root/apphost files stay +# in the shared mount ns). +# +# Reaching the netns'd astrald: `astral-query` defaults to tcp:127.0.0.1:8625 +# (lib/apphost DefaultEndpoint; only the TOKEN is env-overridable, not the endpoint), and +# once astrald is in "priv" that 127.0.0.1 is the NETNS loopback -- unreachable from the +# root ns. So EVERY astral-query against a NAT'd node must run inside the netns: +# `ip netns exec priv astral-query ...` (see add-reflector / verify / configure-nat-tor). +# +# astrald cannot see its own public alias -- that is what masquerade means -- so its nat +# module stays disabled until the reflector node reflects that endpoint back (see +# add-reflector). This task only builds the NAT; it does NOT punch. +# enter-nat [--vm ]... (default: node1 node2; one call NATs each peer) +set -eu + +VMS="" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VMS="${VMS:+$VMS }$2"; shift 2 ;; + *) echo "usage: enter-nat [--vm ]..." >&2; exit 64 ;; + esac +done +[ -n "$VMS" ] || VMS="node1 node2" + +REMOTE_BODY=$(cat <<'EOS' +set -eu +export DEBIAN_FRONTEND=noninteractive +command -v nft >/dev/null 2>&1 || { + apt-get -qq -o DPkg::Lock::Timeout=120 update + apt-get -qq -y -o DPkg::Lock::Timeout=120 install nftables >/dev/null +} + +# the LAN NIC carries the 10.77 address; its last octet indexes our public alias. +lan=$(ip -o -4 addr show | awk '$4 ~ /^10\.77\./ {print $2; exit}') +[ -n "$lan" ] || { echo "enter-nat: no 10.77 LAN interface on $(hostname)" >&2; exit 1; } +oct=$(ip -o -4 addr show dev "$lan" | awk '$4 ~ /^10\.77\./ {n=$4; sub(/\/.*/,"",n); split(n,a,"."); print a[4]; exit}') +[ -n "$oct" ] || { echo "enter-nat: could not read 10.77 octet on $(hostname)" >&2; exit 1; } +pub="198.51.100.$oct" +ip addr add "$pub/24" dev "$lan" 2>/dev/null || true + +# private host 192.168.99.2 in netns "priv"; this VM is its only way out +ip netns add priv 2>/dev/null || true +ip link add veth0 type veth peer name veth0p 2>/dev/null || true +ip link set veth0p netns priv 2>/dev/null || true +ip addr add 192.168.99.1/24 dev veth0 2>/dev/null || true +ip link set veth0 up +ip -n priv addr add 192.168.99.2/24 dev veth0p 2>/dev/null || true +ip -n priv link set veth0p up; ip -n priv link set lo up +ip -n priv route replace default via 192.168.99.1 +sysctl -wq net.ipv4.ip_forward=1 +sysctl -wq net.ipv4.conf.all.rp_filter=2 +sysctl -wq net.netfilter.nf_conntrack_udp_timeout=60 2>/dev/null || true +sysctl -wq net.netfilter.nf_conntrack_udp_timeout_stream=180 2>/dev/null || true + +# port-preserving SNAT to the public alias (idempotent: rebuild the nat table) +nft add table ip nat 2>/dev/null || true +nft flush table ip nat +nft add chain ip nat postrouting '{ type nat hook postrouting priority 100 ; }' +nft add rule ip nat postrouting ip saddr 192.168.99.0/24 oifname "$lan" snat ip to "$pub" + +# Inbound DNAT of the public alias into the netns -- REQUIRED for the punch to complete. +# The alias 198.51.100. is a LOCAL address on this box (NAT and endpoint are collapsed +# onto one VM), so inbound punch packets are delivered to the local INPUT (no listener) and +# never reach the netns puncher; the conntrack-reply reverse-SNAT that should forward them +# fails under a source-port-realloc clash (the inbound-to-local-alias creates a conntrack +# entry that collides with the outbound SNAT). DNAT the alias to the netns host (port +# preserved) so the box acts as a real cone-NAT gateway -- inbound and outbound become one +# conntrack flow, no clash. Confirmed live: with this rule the punch promotes to a kcp link +# on BOTH peers; without it both punchers emit and packets reach each peer's lan0, but +# inbound is never delivered to the netns and the punch times out ("context deadline +# exceeded"). This makes the NAT a full-cone (endpoint-independent) NAT -- the permissive +# punchable type; a restricted-cone/symmetric simulation would need a separate router VM. +nft add chain ip nat prerouting '{ type nat hook prerouting priority -100 ; }' +nft add rule ip nat prerouting iif "$lan" ip daddr "$pub" dnat to 192.168.99.2 + +# move astrald into the netns: join only the NET namespace (mount ns untouched, so the +# apphost unix socket stays reachable from the root ns for astral-query). +mkdir -p /etc/systemd/system/astrald.service.d +cat > /etc/systemd/system/astrald.service.d/netns.conf </dev/null 2>&1; then + ok=1; break + fi + n=$((n + 1)); sleep 1 +done +if [ -z "$ok" ]; then + echo "enter-nat: astrald did not come back up in netns on $(hostname) after ${n}s" >&2 + systemctl status astrald --no-pager >&2 2>&1 || true + journalctl -u astrald --no-pager 2>&1 | tail -30 >&2 || true + exit 1 +fi + +# sanity: astrald must now be in the netns (its own 10.77 endpoint withdrawn) and see 192.168.99.2 +in_ns=$(ip netns identify "$(pgrep -x astrald | head -1)" 2>/dev/null || true) +echo "enter-nat: $(hostname) astrald behind NAT (priv 192.168.99.2 -> public $pub via $lan; netns=${in_ns:-?})" +EOS +) + +# $VMS is a space-separated list -> intentional word-splitting +# shellcheck disable=SC2086 +for vm in $VMS; do + echo "enter-nat: putting $vm behind its NAT ..." + # shellcheck disable=SC2029 + netsim ssh "$vm" -- "$REMOTE_BODY" +done +echo "enter-nat: done ($VMS)" diff --git a/netsim/tasks/expel-node/README.md b/netsim/tasks/expel-node/README.md new file mode 100644 index 00000000..271b1ca5 --- /dev/null +++ b/netsim/tasks/expel-node/README.md @@ -0,0 +1,3 @@ +# expel-node + +node1's agent expels node2 from the swarm via `user.expel`. verify.py asserts node2 is in `user.list_expelled` and gone from `user.swarm_status`. two-nodes → two-nodes-expel. diff --git a/netsim/tasks/expel-node/prompt.md b/netsim/tasks/expel-node/prompt.md new file mode 100644 index 00000000..3d9b433a --- /dev/null +++ b/netsim/tasks/expel-node/prompt.md @@ -0,0 +1 @@ +Expel the other node from your local astral swarm. diff --git a/netsim/tasks/expel-node/run.sh b/netsim/tasks/expel-node/run.sh new file mode 100755 index 00000000..e3b18def --- /dev/null +++ b/netsim/tasks/expel-node/run.sh @@ -0,0 +1,64 @@ +#!/bin/sh +# expel-node: the User (node1's Qwen operator) permanently bans the peer node from +# the swarm, driven by the Qwen Code agent running INSIDE node1. node1 is already a +# User node with node2 adopted into its swarm (default starting stage: two-nodes). +# expel-node [--vm ] (default: node1 — the VM carrying Qwen) +# +# Runs ON THE HOST (cwd = simulation root). Same mechanic as adopt-node: tiny script, +# thin prompt, intelligence in the agent's astral-agent skill. The whole remote +# program travels as ONE argv to `netsim ssh`; the prompt rides along base64-encoded +# so a multi-line file never fights shell quoting. +set -eu + +VM="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + *) echo "usage: expel-node [--vm ]" >&2; exit 64 ;; + esac +done + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +prompt_b64=$(base64 -w0 "$here/prompt.md") # GNU coreutils; -w0 = single line + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/expel-node.prompt" +chown -R tester:tester "$d" + +# Run the agent as `tester` (qwen is installed for that user), non-interactively. +# Invocation matches what was validated for adopt-node: one-shot positional prompt +# + `-y` (auto-approve). +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/expel-node.prompt)"' \ + > "$d/expel-node.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/expel-node.log" >&2 + exit 1 + } + +# Soft smoke-check only (verify.py is the authoritative, independent check). node1 +# holds the User token in $HOME/user.json, so we can peek at the swarm here; don't +# fail the run on a shape mismatch — leave the verdict to verify.py. +ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_token",""))' 2>/dev/null || true) +if [ -n "$ASTRALD_APPHOST_TOKEN" ]; then + export ASTRALD_APPHOST_TOKEN + if astral-query user.list_expelled -out json 2>/dev/null | grep -q '"Subject"'; then + echo "expel-node: $(hostname) records at least one expelled node" + else + echo "expel-node: WARNING $(hostname) shows no expelled node yet (verify.py decides)" >&2 + fi +fi +echo "expel-node: agent finished on $(hostname)" +EOS +) + +echo "expel-node: driving Qwen operator on $VM ..." +# assignment prefix carries the prompt to the guest; body re-parses it +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" +echo "expel-node: done on $VM" diff --git a/netsim/tasks/expel-node/verify.py b/netsim/tasks/expel-node/verify.py new file mode 100755 index 00000000..05136970 --- /dev/null +++ b/netsim/tasks/expel-node/verify.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +"""verify expel-node: node1 (the User) permanently banned node2 from the swarm. + +Independent check (does not trust run.sh). Asserts node2 is recorded in +user.list_expelled and is gone from node1's user.swarm_status roster +(user.OpSwarmStatus -> ActiveNodes filters the expelledSet). Link state is not +asserted. + +node2's identity comes from node1's siblings.json (recorded by adopt-node), NOT +from node2 itself: once expelled, node2 rejects user.info (query rejected (2) +untokened, auth_failed with the User token -- it no longer accepts the User it +was banned from), so it is not a usable identity source. + +Queries reach node1's apphost through the shared astral-py client +(tasks/_lib/astralapi.py), CLI fallback for anything it can't serve. +""" +import argparse +import os +import sys + +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import astralapi # noqa: E402 + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--node1", default="node1") + ap.add_argument("--node2", default="node2") + args, _ = ap.parse_known_args() + vm1 = args.node1 + + # node1 acts as the User (token from bootstrap); list_expelled / swarm_status + # require the caller to be the contract issuer, so they run under that token. + info1 = astralapi.home_json(vm1, "user.json") + U = "".join(str(info1.get("user_id", "")).split()) + token = info1.get("user_token", "") + + # node2's identity from node1's siblings.json (recorded by adopt-node) -- a + # stable source. The expelled node itself can't be queried (post-ban node2 + # rejects user.info). + sibs = astralapi.home_json(vm1, "siblings.json") + sib_ids = ["".join(str(x).split()) for x in (sibs.get("sibling_ids") or []) if x] + expelled_id = sib_ids[0] if sib_ids else None + + with astralapi.connect(vm1, token=token) as n1: + n1_expelled = n1.call("user.list_expelled") + members = astralapi.swarm_identities(n1.call("user.swarm_status")) + + errs = [] + if not U: + errs.append("no user_id in node1's user.json") + if not expelled_id: + errs.append("no sibling_ids in node1's siblings.json -- can't identify the expelled node") + if expelled_id and not astralapi.is_expelled(n1_expelled, expelled_id): + errs.append(f"node2 {expelled_id} is NOT in node1's user.list_expelled " + "(expulsion was never issued -- agent did not expel the node)") + if expelled_id and expelled_id in members: + errs.append(f"node2 {expelled_id} still appears in node1's user.swarm_status " + "(roster not reduced -- expelledSet filter did not drop it)") + + if errs: + sys.stderr.write("expel-node verify FAILED:\n") + for e in errs: + sys.stderr.write(f" - {e}\n") + return 1 + + print(f"expel OK: User {U[:8]}.. banned node2 {expelled_id[:8]}.. -- recorded in " + f"user.list_expelled and dropped from user.swarm_status " + f"({len(members)} member(s) remain).") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/expel-node/verify.sh b/netsim/tasks/expel-node/verify.sh new file mode 100755 index 00000000..fac4b4ff --- /dev/null +++ b/netsim/tasks/expel-node/verify.sh @@ -0,0 +1,7 @@ +#!/bin/sh +# Thin shim — all verification logic lives in verify.py. Calling astral-query and +# walking its JSON streams is far cleaner in python than bash, so verify.sh just +# hands off. netsim sets $NETSIM_TASK_DIR to this task's directory and only +# auto-runs run.sh/verify.sh, so verify.py sits next to us and is invoked here +# (the dirname fallback covers running this script directly). +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" diff --git a/netsim/tasks/import-user-software-key/README.md b/netsim/tasks/import-user-software-key/README.md new file mode 100644 index 00000000..9cd86ef8 --- /dev/null +++ b/netsim/tasks/import-user-software-key/README.md @@ -0,0 +1,3 @@ +# import-user-software-key + +node1's agent makes node1 a User node from the BIP-39 mnemonic in `prompt.md`, deriving the existing key and installing its active contract. verify.sh asserts `apphost.whoami` reports that User id and `user.info` finds an active contract (matching `ASTRAL_USER_ID` if set). astrald-lab → one-node. diff --git a/netsim/tasks/import-user-software-key/prompt.md b/netsim/tasks/import-user-software-key/prompt.md new file mode 100644 index 00000000..a6692499 --- /dev/null +++ b/netsim/tasks/import-user-software-key/prompt.md @@ -0,0 +1,8 @@ +Make this node a User node from the given seed phrase — install its active user contract, not just import the key: + +``` +horse soldier imitate stool square buyer verb party enjoy result jazz rabbit trigger file benefit cloth term change +``` + +Save the user's id and an access token to `~/user.json` (as `user_id` and +`user_token`). diff --git a/netsim/tasks/import-user-software-key/run.sh b/netsim/tasks/import-user-software-key/run.sh new file mode 100755 index 00000000..488af62e --- /dev/null +++ b/netsim/tasks/import-user-software-key/run.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# import-user-software-key: configure the operator node as a User node from an +# EXISTING software User key — the User's BIP-39 mnemonic is embedded in prompt.md +# and derived instead of minting fresh entropy. Driven by the Qwen Code agent. +# import-user-software-key [--vm ] (default: node1 — the VM carrying Qwen) +# env: ASTRAL_USER_ID (optional; verify.sh asserts the derived id matches it) +# +# Drop-in alternative to bootstrap-user-software-key. Runs ON THE HOST (cwd = +# simulation root): base64-ships prompt.md to the agent over one `netsim ssh` argv +# and runs `qwen -y`. Intelligence lives in the prompt and the agent's astral-agent +# skill, not here. +set -eu + +VM="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + *) echo "usage: import-user-software-key [--vm ]" >&2; exit 64 ;; + esac +done + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +prompt_b64=$(base64 -w0 "$here/prompt.md") # GNU coreutils; -w0 = single line + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/import-user-software-key.prompt" +chown -R tester:tester "$d" + +# Run the agent as `tester` (qwen is installed for that user), non-interactively: +# one-shot positional prompt + `-y` (auto-approve). +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/import-user-software-key.prompt)"' \ + > "$d/import-user-software-key.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/import-user-software-key.log" >&2 + exit 1 + } + +# Cheap smoke-check; verify.sh does the authoritative, independent check. The agent +# records its outputs in $HOME/user.json (/home/tester/user.json). +uid=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_id",""))' 2>/dev/null || true) +[ -n "$uid" ] || { echo "agent recorded no user_id in /home/tester/user.json on $(hostname)" >&2; exit 1; } +echo "import-user-software-key: agent finished on $(hostname); User id $uid" +EOS +) + +echo "import-user-software-key: driving Qwen operator on $VM ..." +# assignment prefix carries the prompt to the guest; body re-parses it +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" +echo "import-user-software-key: done on $VM" diff --git a/netsim/tasks/import-user-software-key/verify.sh b/netsim/tasks/import-user-software-key/verify.sh new file mode 100755 index 00000000..9d991e94 --- /dev/null +++ b/netsim/tasks/import-user-software-key/verify.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# verify import-user-software-key: the node must be a User node under the imported software User. +# INDEPENDENT re-check -- reads $HOME/user.json, acts AS the User, and asserts the +# node answers as a user node. If ASTRAL_USER_ID is set, the derived User id must +# equal it (proof the EXISTING key was used, not a fresh one). +set -eu + +VM="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) VM=$2; shift 2 ;; + *) shift ;; + esac +done +EXPECT=${ASTRAL_USER_ID:-} + +REMOTE_CHECK=$(cat <<'EOS' +set -eu +info=/home/tester/user.json +[ -s "$info" ] || { echo "no $info on $(hostname)" >&2; exit 1; } +uid=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_id",""))') +ASTRALD_APPHOST_TOKEN=$(python3 -c 'import json;print(json.load(open("/home/tester/user.json")).get("user_token",""))') +export ASTRALD_APPHOST_TOKEN +[ -n "$uid" ] || { echo "no user_id in $info on $(hostname)" >&2; exit 1; } +[ -n "$ASTRALD_APPHOST_TOKEN" ] || { echo "no user_token in $info on $(hostname)" >&2; exit 1; } + +# if an expected User id was supplied, the imported key must derive exactly it +if [ -n "$expect" ] && [ "$uid" != "$expect" ]; then + echo "imported User id $uid != expected $expect on $(hostname) (wrong key derived?)" >&2 + exit 1 +fi + +# acting as the User: whoami must report the User identity +who=$(astral-query apphost.whoami -out json) \ + || { echo "apphost.whoami failed on $(hostname)" >&2; exit 1; } +echo "$who" | grep -q "$uid" \ + || { echo "whoami != User id on $(hostname): $who" >&2; exit 1; } + +# active contract present (user.info rejects with code 2 if none) +astral-query user.info -out json \ + || { echo "user.info failed on $(hostname) -- no active contract?" >&2; exit 1; } + +echo "$(hostname): user node OK (User $uid${expect:+ — matches expected})" +EOS +) + +netsim ssh "$VM" -- "expect='$EXPECT'; $REMOTE_CHECK" \ + || { echo "import-user-software-key verify FAILED on $VM" >&2; exit 1; } +echo "verified imported user node on: $VM" diff --git a/netsim/tasks/install-astrald/README.md b/netsim/tasks/install-astrald/README.md new file mode 100644 index 00000000..bfd68283 --- /dev/null +++ b/netsim/tasks/install-astrald/README.md @@ -0,0 +1,3 @@ +# install-astrald + +Builds `astrald` and `astral-query` from source and runs `astrald` as a systemd service on the target VMs (all running, or `--vm `; `--ref` picks a git ref). verify.sh asserts the unit is enabled and each node answers `astral-query localnode:.spec`. diff --git a/netsim/tasks/install-astrald/run.sh b/netsim/tasks/install-astrald/run.sh new file mode 100755 index 00000000..301104f9 --- /dev/null +++ b/netsim/tasks/install-astrald/run.sh @@ -0,0 +1,125 @@ +#!/bin/sh +# install-astrald: build astrald from source, install it as a systemd service on VMs. +# install-astrald [--vm ]... [--ref ] +# No --vm -> every running VM in the simulation. +# +# Runs ON THE HOST (cwd = simulation root). Reaches each VM with a single +# `netsim ssh -- ` call: the whole remote script travels as ONE +# argument (assignment prefix + single-quoted heredoc body, so host-side $... are +# left for the guest to expand). ssh lands as root on the guest. +set -eu +REPO="https://github.com/cryptopunkscc/astrald" +GO_VERSION="1.25.1" # must be >= 1.25.0 (astrald go.mod); pin to current 1.25.x +REF="" + +VMS="" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VMS="${VMS:+$VMS }$2"; shift 2 ;; + --ref) [ $# -ge 2 ] || { echo "need ref after --ref" >&2; exit 64; }; REF=$2; shift 2 ;; + *) echo "usage: install-astrald [--vm ]... [--ref ]" >&2; exit 64 ;; + esac +done +if [ -z "$VMS" ]; then + VMS=$(netsim vm ls --json | python3 -c \ + 'import json,sys; print(" ".join(v["hostname"] for v in json.load(sys.stdin) if v["state"]=="running"))') +fi +[ -n "$VMS" ] || { echo "no running VMs" >&2; exit 1; } + +REMOTE_BODY=$(cat <<'EOS' +set -eu +export DEBIAN_FRONTEND=noninteractive + +# deps: git + curl (Go comes from the official tarball, not apt -> need >= 1.25). +# qemu-guest-agent lets the host correct the guest clock out-of-band over +# virtio-serial on snapshot resume (netsim's qga guest-set-time), instead of +# racing sshd while the resume clock-jump storms this 1-vCPU VM. +need=""; command -v git >/dev/null 2>&1 || need="$need git" + command -v curl >/dev/null 2>&1 || need="$need curl" + command -v qemu-ga >/dev/null 2>&1 || need="$need qemu-guest-agent" +if [ -n "$need" ]; then + apt-get -qq -o DPkg::Lock::Timeout=120 update + apt-get -qq -y -o DPkg::Lock::Timeout=120 install $need ca-certificates >/dev/null +fi +# Bind the agent to netsim's guest-agent virtio-serial port (present from boot); +# left running so it is baked into the snapshot and answers on resume. +systemctl enable --now qemu-guest-agent >/dev/null 2>&1 || true + +# Ephemeral test-VM hygiene: disable the apt periodic machinery so a clock jump on +# resume (netsim corrects the stale snapshot clock) can't wake apt-daily / +# unattended-upgrades and saturate this 1-vCPU VM. Baked into the saved snapshot; +# mask the timers too so a later apt-get update can't re-arm them. Intentional — these +# are throwaway VMs that never need background package refreshes/security upgrades. +systemctl disable --now apt-daily.timer apt-daily-upgrade.timer >/dev/null 2>&1 || true +systemctl mask apt-daily.timer apt-daily-upgrade.timer apt-daily.service apt-daily-upgrade.service unattended-upgrades.service >/dev/null 2>&1 || true + +if ! /usr/local/go/bin/go version 2>/dev/null | grep -q "go$go_ver "; then + case "$(uname -m)" in + x86_64) ga=amd64 ;; aarch64) ga=arm64 ;; + *) echo "unsupported arch $(uname -m)" >&2; exit 1 ;; + esac + t=$(mktemp); curl -fsSL -o "$t" "https://go.dev/dl/go${go_ver}.linux-${ga}.tar.gz" + rm -rf /usr/local/go; tar -C /usr/local -xzf "$t"; rm -f "$t" +fi +export PATH=/usr/local/go/bin:$PATH CGO_ENABLED=0 + +# build (plain clone, NO --recursive; subpackages need the ./ prefix) +src=/opt/astrald-src +[ -d "$src/.git" ] || git clone --depth 1 ${ref:+--branch "$ref"} "$repo" "$src" +cd "$src" +go build -o /usr/local/bin/astrald ./cmd/astrald +go build -o /usr/local/bin/astral-query ./cmd/astral-query + +# run as a service: explicit -root and HOME (default paths break without HOME) +install -d -m 700 /var/lib/astrald +cat > /etc/systemd/system/astrald.service </dev/null 2>&1; then + ok=1; break + fi + n=$((n + 1)); sleep 1 +done +if [ -z "$ok" ]; then + echo "astrald did not come up on $(hostname) after ${n}s" >&2 + echo "--- systemctl status astrald ---" >&2 + systemctl status astrald --no-pager >&2 2>&1 || true + echo "--- journalctl -u astrald (tail 40) ---" >&2 + journalctl -u astrald --no-pager 2>&1 | tail -40 >&2 || true + exit 1 +fi + +# leave astrald running: netsim snapshots live RAM, so the node resumes +# already-running when the stage is restored (a stopped service would not +# restart, as resume is not a boot). astrald's footprint is tiny (~17 MB peak), +# so it does not stall the live snapshot against a sane qmp timeout. +echo "astrald installed, verified, and left running on $(hostname)" +EOS +) + +# $VMS is a space-separated list -> intentional word-splitting +# shellcheck disable=SC2086 +for vm in $VMS; do + echo "installing astrald on $vm ..." + netsim ssh "$vm" -- "repo='$REPO' ref='$REF' go_ver='$GO_VERSION'; $REMOTE_BODY" +done diff --git a/netsim/tasks/install-astrald/verify.sh b/netsim/tasks/install-astrald/verify.sh new file mode 100755 index 00000000..951d3618 --- /dev/null +++ b/netsim/tasks/install-astrald/verify.sh @@ -0,0 +1,43 @@ +#!/bin/sh +# verify install-astrald (same args as run.sh): on every target VM the astrald +# unit must be enabled and answer its local API. INDEPENDENT re-check -- it +# re-derives the VM list, ensures the service is running, probes it, and leaves +# it running for the snapshot; it does not trust run.sh's output. +set -eu +VMS="" +while [ $# -gt 0 ]; do + case "$1" in + --vm) VMS="${VMS:+$VMS }$2"; shift 2 ;; + --ref) shift 2 ;; + *) shift ;; + esac +done +if [ -z "$VMS" ]; then + VMS=$(netsim vm ls --json | python3 -c \ + 'import json,sys; print(" ".join(v["hostname"] for v in json.load(sys.stdin) if v["state"]=="running"))') +fi +[ -n "$VMS" ] || { echo "no running VMs to verify" >&2; exit 1; } + +REMOTE_CHECK=$(cat <<'EOS' +set -eu +systemctl is-enabled --quiet astrald +systemctl start astrald +ok= +for _ in 1 2 3 4 5 6 7 8 9 10; do + if systemctl is-active --quiet astrald && timeout 5 astral-query localnode:.spec -out json >/dev/null 2>&1; then + ok=1; break + fi + sleep 1 +done +[ -n "$ok" ] || { echo "astrald did not answer on $(hostname)" >&2; exit 1; } +echo "$(hostname): astrald healthy" +EOS +) + +# $VMS is a space-separated list -> intentional word-splitting +# shellcheck disable=SC2086 +for vm in $VMS; do + netsim ssh "$vm" -- "$REMOTE_CHECK" \ + || { echo "astrald NOT healthy on $vm" >&2; exit 1; } +done +echo "verified astrald on: $VMS" diff --git a/netsim/tasks/leave-lan/README.md b/netsim/tasks/leave-lan/README.md new file mode 100644 index 00000000..9d130504 --- /dev/null +++ b/netsim/tasks/leave-lan/README.md @@ -0,0 +1,3 @@ +# leave-lan + +On the host, seeds `--peer` (node1) with `--vm` (node2)'s onion (`nodes.resolve_endpoints` → `nodes.add_endpoint`) while the LAN is still up, then makes node2 **leave** the LAN by withdrawing its own 10.77 address (`ip addr flush`, which also drops the connected route; the NIC is taken down too). astrald polls `net.InterfaceAddrs()` and advertises one tcp endpoint per address, so the withdrawal is what it observes as a network change — it drops the 10.77 endpoint and the swarm link re-forms over Tor. SSH/management rides the separate WAN NIC, so it stays up. verify.py asserts (blind, deterministic) that node2 no longer holds a 10.77 LAN address or route; the Tor re-link is asserted by link-over-tor. diff --git a/netsim/tasks/leave-lan/run.sh b/netsim/tasks/leave-lan/run.sh new file mode 100755 index 00000000..e93cbdcb --- /dev/null +++ b/netsim/tasks/leave-lan/run.sh @@ -0,0 +1,78 @@ +#!/bin/sh +# leave-lan: make (node2, the node that "leaves") genuinely leave the 10.77 LAN, so +# astrald's tor module + the swarm link maintainer re-link to (node1) over Tor. +# +# Two steps, both on the host: +# 1. Seed with 's onion WHILE THE LAN IS STILL UP — once the LAN is gone the +# peer can no longer ask for its address, so it needs the .onion cached first. +# 2. Withdraw 's own 10.77 LAN address (ip addr flush). astrald has no carrier/ +# operstate monitor: it polls net.InterfaceAddrs() every 3s and advertises one tcp +# endpoint per assigned IP, so removing the address is what it observes as "left the +# network" — it drops the 10.77 endpoint and re-links over Tor. (A packet-filter DROP, +# or even a link/carrier down, leaves the IPv4 address in place and is invisible to +# that monitor.) SSH/management rides the separate WAN NIC and is untouched. +# leave-lan [--vm ] [--peer ] (default: node2 leaves, peer node1) +# +# Both nodes must have Tor up (enable-tor) and the alias must resolve on +# (adopt-node). astral-query ops here (resolve_endpoints / add_endpoint) are ungated. +set -eu + +VM="node2"; PEER="node1" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + --peer) [ $# -ge 2 ] || { echo "need host after --peer" >&2; exit 64; }; PEER=$2; shift 2 ;; + *) echo "usage: leave-lan [--vm ] [--peer ]" >&2; exit 64 ;; + esac +done + +# 1) seed with 's onion before the LAN goes away +SEED_BODY=$(cat <<'EOS' +set -eu +torof() { # read a .onion endpoint address from a resolve_endpoints json stream on stdin + python3 -c ' +import json,sys +def addr(ep): + if isinstance(ep,str): return ep + if isinstance(ep,dict): + o=ep.get("Object"); return o if isinstance(o,str) else "" + return "" +for ln in sys.stdin: + ln=ln.strip() + if not ln: continue + try: o=json.loads(ln) + except Exception: continue + a=addr((o.get("Object") or {}).get("Endpoint")) + if ".onion" in a: print(a); break' +} +# prefer the local cache (auto-synced over the live link); else ask the leaver directly +onion=$(astral-query nodes.resolve_endpoints -id "$leaver" -out json 2>/dev/null | torof || true) +[ -n "$onion" ] || onion=$(astral-query "$leaver":nodes.resolve_endpoints -id "$leaver" -out json 2>/dev/null | torof || true) +[ -n "$onion" ] || { echo "leave-lan: $(hostname) could not learn $leaver's onion before the cut" >&2; exit 1; } +astral-query nodes.add_endpoint -id "$leaver" -endpoint "tor:$onion" >/dev/null 2>&1 || true +echo "leave-lan: $(hostname) seeded $leaver onion=$onion" +EOS +) +echo "leave-lan: seeding $PEER with $VM's onion ..." +# shellcheck disable=SC2029 +netsim ssh "$PEER" -- "leaver='$VM'; $SEED_BODY" + +# 2) make leave the LAN: withdraw its own 10.77 address (and drop the NIC for realism). +# Removing the address takes its connected /24 route with it, so has no address on +# and no route to the LAN — it has genuinely left at the IP layer, which is exactly what +# astrald observes (see the header). No peer IP needed: drops its own membership. +CUT_BODY=$(cat <<'EOS' +set -eu +# the NIC holding the 10.77 LAN address is nic2; SSH rides the separate WAN NIC, untouched. +lan_if=$(ip -o -4 addr show | awk '$4 ~ /^10\.77\./ {print $2; exit}') +[ -n "$lan_if" ] || { echo "leave-lan: no 10.77 LAN interface on $(hostname)" >&2; exit 1; } +lan_ip=$(ip -o -4 addr show dev "$lan_if" | awk '$4 ~ /^10\.77\./ {print $4; exit}') +ip addr flush dev "$lan_if" # RTM_DELADDR: drops the address AND its connected /24 route +ip link set "$lan_if" down # carrier/admin down too, so the NIC is faithfully "gone" +echo "leave-lan: $(hostname) withdrew $lan_ip from $lan_if (left the LAN)" +EOS +) +echo "leave-lan: $VM leaving the LAN (withdrawing its 10.77 address) ..." +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "$CUT_BODY" +echo "leave-lan: done on $VM" diff --git a/netsim/tasks/leave-lan/verify.py b/netsim/tasks/leave-lan/verify.py new file mode 100644 index 00000000..94fa25e4 --- /dev/null +++ b/netsim/tasks/leave-lan/verify.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +"""verify leave-lan: has withdrawn its LAN identity, so it genuinely left the +10.77 LAN (not merely had its packets filtered). + +The cut (run.sh) flushes 's own 10.77 address, which is what astrald observes as +"left the network": it polls net.InterfaceAddrs() every 3s and advertises one tcp +endpoint per assigned IP, so removing the address fires EventNetworkAddressChanged and +withdraws the 10.77 tcp endpoint. (A packet-filter DROP -- or a bare link/carrier down -- +leaves the IPv4 address in place and is invisible to that monitor.) + +This is a blind, deterministic host-side check: it reads 's own network state over +ssh, independent of astral, and asserts two consequences of the address withdrawal -- + has (1) no 10.77 LAN address and (2) no route into the 10.77 subnet. Neither depends +on a TCP probe's error code, which would vary with the WAN default route (a connect to +the LAN falls through to the WAN NAT and times out rather than returning ENETUNREACH). +astrald's reaction -- re-linking over Tor -- is asserted separately by link-over-tor. +""" +import argparse +import os +import sys + +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import astralapi # noqa: E402 + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--vm", default="node2") # the node that left the LAN + ap.add_argument("--peer", default="node1") # the node it can no longer reach + args, _ = ap.parse_known_args() + + # 1) the leaver no longer holds any 10.77 LAN address (the thing astrald keys on) + lan_ip = astralapi.peer_lan_ip(args.vm) + # 2) and has no route into the 10.77 subnet (the connected route went with the address) + lan_routes = [ln for ln in (astralapi.ssh(args.vm, "ip -o route show") or "").splitlines() + if "10.77." in ln] + + if lan_ip: + sys.stderr.write(f"leave-lan verify FAILED: {args.vm} still holds a LAN address " + f"({lan_ip}) -- it has not left the 10.77 LAN.\n") + return 1 + if lan_routes: + sys.stderr.write(f"leave-lan verify FAILED: {args.vm} still has a route into the " + "10.77 LAN:\n " + "\n ".join(lan_routes) + "\n") + return 1 + + print(f"leave-lan OK: {args.vm} withdrew its 10.77 LAN address and route -- it has left " + f"the LAN (astrald re-links to {args.peer} over Tor; asserted by link-over-tor).") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/leave-lan/verify.sh b/netsim/tasks/leave-lan/verify.sh new file mode 100755 index 00000000..042470ab --- /dev/null +++ b/netsim/tasks/leave-lan/verify.sh @@ -0,0 +1,3 @@ +#!/bin/sh +# Thin shim — verification logic lives in verify.py. +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" diff --git a/netsim/tasks/link-over-tor/README.md b/netsim/tasks/link-over-tor/README.md new file mode 100644 index 00000000..1bd8a2fb --- /dev/null +++ b/netsim/tasks/link-over-tor/README.md @@ -0,0 +1,3 @@ +# link-over-tor + +node1's agent re-links to node2 over Tor. verify.py asserts node1 holds a link to node2 with `Network=tor`. two-nodes → two-nodes-tor. diff --git a/netsim/tasks/link-over-tor/prompt.md b/netsim/tasks/link-over-tor/prompt.md new file mode 100644 index 00000000..95333693 --- /dev/null +++ b/netsim/tasks/link-over-tor/prompt.md @@ -0,0 +1,5 @@ +Your swarm peer `__PEER__` is reachable over Tor. Re-establish your link to +`__PEER__` over Tor. + +Save `__PEER__`'s onion address and the link's transport to `~/tor.json` (as +`peer_onion` and `link_network`). diff --git a/netsim/tasks/link-over-tor/run.sh b/netsim/tasks/link-over-tor/run.sh new file mode 100755 index 00000000..5df1571e --- /dev/null +++ b/netsim/tasks/link-over-tor/run.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# link-over-tor: have node1's Qwen agent re-establish the swarm link to the peer +# (node2) over Tor after node2 left the LAN, and confirm the link rides over Tor. +# Driven by the agent following the astral-agent skill's linking-over-tor playbook. +# link-over-tor [--vm ] [--peer ] (default: node1, node2) +# +# Runs ON THE HOST. Tiny script, thin prompt, intelligence in the skill. verify.py +# then INDEPENDENTLY confirms node1 holds a tor link to the peer. +set -eu + +VM="node1"; PEER="node2" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + --peer) [ $# -ge 2 ] || { echo "need alias after --peer" >&2; exit 64; }; PEER=$2; shift 2 ;; + *) echo "usage: link-over-tor [--vm ] [--peer ]" >&2; exit 64 ;; + esac +done + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +prompt=$(sed "s|__PEER__|$PEER|g" "$here/prompt.md") # alias is [a-z0-9] — sed-safe +prompt_b64=$(printf '%s' "$prompt" | base64 -w0) + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/link-over-tor.prompt" +chown -R tester:tester "$d" + +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/link-over-tor.prompt)"' \ + > "$d/link-over-tor.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/link-over-tor.log" >&2 + exit 1 + } + +# Cheap smoke-check; verify.py does the authoritative, independent check. The agent +# records what it read in $HOME/tor.json under link_network (and peer_onion). +net=$(python3 -c 'import json;print(json.load(open("/home/tester/tor.json")).get("link_network",""))' 2>/dev/null || true) +[ -n "$net" ] || { echo "agent recorded no link_network in /home/tester/tor.json on $(hostname)" >&2; exit 1; } +echo "link-over-tor: agent finished on $(hostname); recorded link_network=$net" +EOS +) + +echo "link-over-tor: driving Qwen operator on $VM to link with $PEER over Tor ..." +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" +echo "link-over-tor: done on $VM" diff --git a/netsim/tasks/link-over-tor/verify.py b/netsim/tasks/link-over-tor/verify.py new file mode 100644 index 00000000..89252549 --- /dev/null +++ b/netsim/tasks/link-over-tor/verify.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +"""verify link-over-tor: node1 holds a live link to the peer over Tor. + +Independent host-side check (does not trust the agent): nodes.links on node1 must +list a link whose Network is "tor". (We assert the transport, not the .onion +endpoint string -- an inbound tor link legitimately has no remote onion, so +requiring ".onion" would false-negative; node2 is the only sibling, so a tor link +is a tor link to node2.) Also cross-checks the agent's record. + +Queries reach node1's apphost through the shared astral-py client +(tasks/_lib/astralapi.py), CLI fallback for anything it can't serve. +""" +import argparse +import os +import sys + +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import astralapi # noqa: E402 + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--vm", default="node1") # the operator; records tor.json here + ap.add_argument("--peer", default="node2") # the node that left the LAN + args, _ = ap.parse_known_args() + + tor = astralapi.home_json(args.vm, "tor.json") # agent: peer_onion, link_network + net = str(tor.get("link_network", "")) + onion = str(tor.get("peer_onion", "")) + + # Decisive: an actual link over Tor from node1 (to the only sibling, the peer). + with astralapi.connect(args.vm) as node: + links = astralapi.tor_links(node.call("nodes.links")) + + notes = [] + if net != "tor": + notes.append(f"agent recorded link_network={net!r} (expected 'tor')") + if not onion: + notes.append("agent recorded no peer_onion") + + if links: + ep = links[0][1] or "(inbound, no remote onion)" + print(f"link-over-tor OK: {args.vm} holds a link to {args.peer} over Tor (endpoint {ep}).") + for n in notes: + sys.stderr.write(f" note: {n}\n") + return 0 + + sys.stderr.write(f"link-over-tor verify FAILED: {args.vm} has no link to {args.peer} over Tor.\n") + for n in notes: + sys.stderr.write(f" note: {n}\n") + sys.stderr.write(f" nodes.links:\n{astralapi.ssh(args.vm, 'astral-query nodes.links -out json')}\n") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/link-over-tor/verify.sh b/netsim/tasks/link-over-tor/verify.sh new file mode 100755 index 00000000..042470ab --- /dev/null +++ b/netsim/tasks/link-over-tor/verify.sh @@ -0,0 +1,3 @@ +#!/bin/sh +# Thin shim — verification logic lives in verify.py. +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" diff --git a/netsim/tasks/object-store/README.md b/netsim/tasks/object-store/README.md new file mode 100644 index 00000000..548f9a7a --- /dev/null +++ b/netsim/tasks/object-store/README.md @@ -0,0 +1,3 @@ +# object-store + +node1's agent stores `payload.txt` as an Object via `objects.store` on `--target` (default `localnode`; an alias stores on that peer) and records the id. verify.py re-loads the id with `objects.load -repo local` on the holder and asserts the bytes equal `payload.txt`. two-nodes → two-nodes-data (localnode) or two-nodes-data-peer (`--target node2`). diff --git a/netsim/tasks/object-store/payload.txt b/netsim/tasks/object-store/payload.txt new file mode 100644 index 00000000..2d97a433 --- /dev/null +++ b/netsim/tasks/object-store/payload.txt @@ -0,0 +1 @@ +astral netsim object-store probe — sphinx of black quartz, judge my vow 0xC0FFEE \ No newline at end of file diff --git a/netsim/tasks/object-store/prompt.md b/netsim/tasks/object-store/prompt.md new file mode 100644 index 00000000..ff63f443 --- /dev/null +++ b/netsim/tasks/object-store/prompt.md @@ -0,0 +1,4 @@ +Store the contents of `~/payload.txt` as an astral object on astral node +`__TARGET__`. + +Save the object id to `~/object.json` (as `object_id`). diff --git a/netsim/tasks/object-store/run.sh b/netsim/tasks/object-store/run.sh new file mode 100755 index 00000000..fd06cd04 --- /dev/null +++ b/netsim/tasks/object-store/run.sh @@ -0,0 +1,65 @@ +#!/bin/sh +# object-store: have node1 (the operator) store an astral object and read it back, +# on a chosen target node. --target is an astral query target: +# localnode (default) store on the local node (node1's own repo) +# node2 (or any alias) store on that node (e.g. :objects.store) +# The node aliases (node1/node2) are registered by adopt-node when the swarm forms. +# object-store [--vm ] [--target ] +# +# Runs ON THE HOST. Tiny script, thin prompt, intelligence in the astral-agent skill; +# the agent forms the right query for the target. verify.py then INDEPENDENTLY +# re-reads the object from the holder's local repo. The remote program travels as +# ONE argv to `netsim ssh`; the prompt rides along base64-encoded. +set -eu + +VM="node1"; TARGET="localnode" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + --target) [ $# -ge 2 ] || { echo "need an address after --target" >&2; exit 64; }; TARGET=$2; shift 2 ;; + *) echo "usage: object-store [--vm ] [--target ]" >&2; exit 64 ;; + esac +done + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +# Substitute the target alias into the prompt (aliases are [a-z0-9.] — sed-safe). +prompt=$(sed "s|__TARGET__|$TARGET|g" "$here/prompt.md") +prompt_b64=$(printf '%s' "$prompt" | base64 -w0) # GNU coreutils; -w0 = single line +[ -f "$here/payload.txt" ] || { echo "missing $here/payload.txt" >&2; exit 1; } +payload_b64=$(base64 -w0 "$here/payload.txt") # the fixed bytes the agent stores + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/object-store.prompt" +printf '%s' "$payload_b64" | base64 -d > /home/tester/payload.txt +chown -R tester:tester "$d" +chown tester:tester /home/tester/payload.txt + +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/object-store.prompt)"' \ + > "$d/object-store.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/object-store.log" >&2 + exit 1 + } + +# Cheap smoke-check; verify.py does the authoritative read-back + byte match. The +# agent only stores and records the id in $HOME/object.json (/home/tester/object.json). +oid=$(python3 -c 'import json;print(json.load(open("/home/tester/object.json")).get("object_id",""))' 2>/dev/null || true) +[ -n "$oid" ] || { echo "agent recorded no object_id in /home/tester/object.json on $(hostname)" >&2; exit 1; } +case "$oid" in + data1*) : ;; + *) echo "WARNING $(hostname): object_id does not look like a data1… Object ID (verify.py decides)" >&2 ;; +esac +echo "object-store: agent finished on $(hostname); stored object $oid" +EOS +) + +echo "object-store (target=$TARGET): driving Qwen operator on $VM ..." +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; payload_b64='$payload_b64'; $REMOTE_BODY" +echo "object-store (target=$TARGET): done on $VM" diff --git a/netsim/tasks/object-store/verify.py b/netsim/tasks/object-store/verify.py new file mode 100644 index 00000000..b211bbc0 --- /dev/null +++ b/netsim/tasks/object-store/verify.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +"""verify object-store: the stored object is present in the holder's local repo. + +The agent (on node1) only stored an object on a target node (--target) and +recorded its id. Reading it back and confirming the bytes is verify's job: a +repo-pinned, ungated objects.load -repo local on the HOLDER must return the exact +stored bytes. The holder is resolved from --target: localnode/node1 -> node1 (the +operator vm), node2 -> node2. The object id comes from node1's object.json; the +ground-truth payload is the fixed payload.txt that run.sh shipped to the +operator's home. + +Queries reach the holder's apphost through the shared astral-py client +(tasks/_lib/astralapi.py), CLI fallback for anything it can't serve. +""" +import argparse +import os +import sys + +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import astralapi # noqa: E402 + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--vm", default="node1") # the operator; records object.json here + ap.add_argument("--node2", default="node2") # the peer + ap.add_argument("--target", default="localnode") # localnode/node1 -> node1; node2 -> node2 + args, _ = ap.parse_known_args() + holder = args.node2 if args.target == args.node2 else args.vm + + ID = "".join(str(astralapi.home_json(args.vm, "object.json").get("object_id", "")).split()) + # Canonical input: the exact bytes the agent was handed to store (run.sh shipped + # payload.txt to the operator's home). Ground truth -- we don't trust the agent's + # own account of what it stored. + PAY = astralapi.read_file(args.vm, "/home/tester/payload.txt") + + # Decisive: re-load the object from the holder's local repo (repo-pinned + ungated) + # and confirm the bytes match payload.txt -- the read-back is verify's job, not the + # agent's (the agent only stores and records the id). + with astralapi.connect(holder) as h: + h_load = h.call("objects.load", {"id": ID, "repo": "local"}) + got = astralapi.loaded_payload(h_load) + local_ok = got is not None and got.rstrip("\n") == PAY + + errs = [] + if not ID: + errs.append("no object_id in node1's object.json") + if not PAY: + errs.append("payload.txt missing on the operator (run.sh must ship it)") + + if not errs and local_ok: + print(f"object-store OK (target={args.target}): {holder}'s local repo holds object " + f"{ID[:12]}.. with the exact bytes ({len(PAY)} B).") + return 0 + + sys.stderr.write(f"object-store verify FAILED (target={args.target}): {holder}'s local repo " + "does NOT hold the stored object.\n") + for e in errs: + sys.stderr.write(f" - {e}\n") + if got is None: + sys.stderr.write(f" objects.load -repo local on {holder} returned no payload (see errors below).\n") + elif not local_ok: + sys.stderr.write(f" bytes mismatch: got {got!r} != stored {PAY!r}.\n") + for e in astralapi.error_messages(h_load): + sys.stderr.write(f" load error_message: {e}\n") + sys.stderr.write(f" (id={ID} holder={holder} load={'hit' if got is not None else 'miss'})\n") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/object-store/verify.sh b/netsim/tasks/object-store/verify.sh new file mode 100755 index 00000000..9d719cca --- /dev/null +++ b/netsim/tasks/object-store/verify.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# Thin shim — all verification logic lives in verify.py. netsim sets $NETSIM_TASK_DIR +# to this task's directory and only auto-runs run.sh/verify.sh, so verify.py sits +# next to us and is invoked here (the dirname fallback covers running this directly). +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" diff --git a/netsim/tasks/punch-nat/run.sh b/netsim/tasks/punch-nat/run.sh new file mode 100755 index 00000000..8592cc54 --- /dev/null +++ b/netsim/tasks/punch-nat/run.sh @@ -0,0 +1,117 @@ +#!/bin/sh +# punch-nat: trigger astrald's NAT hole-punch between two NAT'd peers, leaving them with a +# direct kcp link. Final step of the nat-punch line (sibling of link-over-tor). +# +# Preconditions (the nat-punch story order): both peers behind a symmetric true-masquerade +# NAT (enter-nat: astrald in netns priv, port-preserving SNAT to 198.51.100.), nat-armed +# by reflection (add-reflector), and Tor relocated INTO the netns with WAN egress +# (configure-nat-tor). The punch's nat.node_punch signaling + peerSupportsNAT discovery route +# over a Tor link node1<->node2 (source-verified: tcp-only Basic strategy can't form for +# symmetric NAT, and the punch client sets no relay hint -> Tor is the sole mutual transport). +# On success the punch is promoted to a direct kcp link on BOTH peers (verify.py asserts it). +# +# Trigger is `nodes.new_link -strategies nat` (drives NATLinkStrategy end-to-end), NOT +# `nat.punch` (which only registers a Hole and yields no kcp link). Every astral-query targets +# a NAT'd node -> runs inside its netns (astral-query defaults to tcp:127.0.0.1:8625, which is +# netns-local; see enter-nat's header). +# punch-nat [--vm ] [--peer ] (default: node1 punches to node2) +set -eu + +VM=node1; PEER=node2 +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + --peer) [ $# -ge 2 ] || { echo "need host after --peer" >&2; exit 64; }; PEER=$2; shift 2 ;; + *) echo "usage: punch-nat [--vm ] [--peer ]" >&2; exit 64 ;; + esac +done + +# --- host-side helpers (astral-query runs in the target's netns; parse on the host) ------ +nid() { # a node's own identity hex (>=64 hex) via apphost.whoami + netsim ssh "$1" -- "ip netns exec priv astral-query apphost.whoami -out json" 2>/dev/null | python3 -c ' +import json,sys +for ln in sys.stdin: + ln=ln.strip() + if not ln: continue + try: o=json.loads(ln) + except Exception: continue + v=o.get("Object") + if isinstance(v,str) and len(v)>=64: print(v); break + if isinstance(v,dict) and isinstance(v.get("Identity"),str): print(v["Identity"]); break' +} +onion_of() { # a node's own .onion via resolve_endpoints localnode + netsim ssh "$1" -- "ip netns exec priv astral-query nodes.resolve_endpoints -id localnode -out json" 2>/dev/null | python3 -c ' +import json,sys +def addr(ep): + if isinstance(ep,str): return ep + if isinstance(ep,dict): + o=ep.get("Object"); return o if isinstance(o,str) else "" + return "" +for ln in sys.stdin: + ln=ln.strip() + if not ln: continue + try: o=json.loads(ln) + except Exception: continue + a=addr((o.get("Object") or {}).get("Endpoint")) + if ".onion" in a: print(a); break' +} +has_link() { # -> prints "yes" if that link exists + netsim ssh "$1" -- "ip netns exec priv astral-query nodes.links -out json" 2>/dev/null | python3 -c ' +import json,sys +net,want=sys.argv[1],sys.argv[2] +for ln in sys.stdin: + ln=ln.strip() + if not ln: continue + try: o=json.loads(ln) + except Exception: continue + v=o.get("Object") or {} + if str(v.get("Network"))==net and str(v.get("RemoteIdentity",""))==want: print("yes"); break' "$2" "$3" +} +diag() { # per-peer failure diagnosis (see the task doc "live_diagnostics") + for v in "$VM" "$PEER"; do + echo "--- diag $v ---" >&2 + netsim ssh "$v" -- ' + echo "[nodes.links]"; ip netns exec priv astral-query nodes.links -out json 2>&1 | tail -20 + echo "[nat.list_holes]"; ip netns exec priv astral-query nat.list_holes -out json 2>&1 | tail -5 + echo "[public_ip]"; ip netns exec priv astral-query ip.public_ip_candidates -out json 2>&1 | tail -5 + echo "[tor ctl 9051]"; ip netns exec priv ss -ltn 2>/dev/null | grep 9051 || echo none + echo "[conntrack 198.51.100]"; (conntrack -L -p udp 2>/dev/null | grep 198.51.100 || grep 198.51.100 /proc/net/nf_conntrack 2>/dev/null) | head -6 + echo "[astrald journal]"; journalctl -u astrald --no-pager 2>&1 | tail -40 + ' >&2 2>&1 || true + done +} + +echo "punch-nat: resolving identities ($VM initiator -> $PEER target) ..." +VMID=$(nid "$VM"); [ -n "$VMID" ] || { echo "punch-nat: could not resolve $VM identity" >&2; exit 1; } +PEERID=$(nid "$PEER"); [ -n "$PEERID" ] || { echo "punch-nat: could not resolve $PEER identity" >&2; exit 1; } + +# 1) ensure mutual onion knowledge (host-brokered; do NOT trust auto-sync -- risk per doc) +O_PEER=$(onion_of "$PEER"); O_VM=$(onion_of "$VM") +[ -n "$O_PEER" ] || { echo "punch-nat: $PEER published no onion (Tor-in-netns down? run configure-nat-tor)" >&2; diag; exit 1; } +[ -n "$O_VM" ] || { echo "punch-nat: $VM published no onion (Tor-in-netns down? run configure-nat-tor)" >&2; diag; exit 1; } +netsim ssh "$VM" -- "ip netns exec priv astral-query nodes.add_endpoint -id '$PEERID' -endpoint 'tor:$O_PEER' >/dev/null 2>&1 || true" +netsim ssh "$PEER" -- "ip netns exec priv astral-query nodes.add_endpoint -id '$VMID' -endpoint 'tor:$O_VM' >/dev/null 2>&1 || true" +echo "punch-nat: seeded onions ($VM<->$PEER)" + +# 2) readiness: a live tor signaling link $VM->$PEER (form one if absent; ~60s bound) +tor_up= +for _ in $(seq 1 20); do + [ "$(has_link "$VM" tor "$PEERID")" = yes ] && { tor_up=1; break; } + netsim ssh "$VM" -- "timeout 60 ip netns exec priv astral-query nodes.new_link -target '$PEERID' -strategies tor -out json >/dev/null 2>&1 || true" + sleep 3 +done +[ -n "$tor_up" ] || { echo "punch-nat: no tor link $VM->$PEER (signaling path down)" >&2; diag; exit 1; } +echo "punch-nat: tor signaling link up ($VM->$PEER)" + +# 3) trigger the punch (initiator only; node2's side runs automatically over nat.node_punch) +echo "punch-nat: triggering NAT punch $VM -> $PEER ..." +netsim ssh "$VM" -- "timeout 180 ip netns exec priv astral-query nodes.new_link -target '$PEERID' -strategies nat -out json 2>&1 | tail -3" || true + +# 4) confirm a durable kcp link on BOTH peers (~60s bound) +ok= +for _ in $(seq 1 20); do + if [ "$(has_link "$VM" kcp "$PEERID")" = yes ] && [ "$(has_link "$PEER" kcp "$VMID")" = yes ]; then ok=1; break; fi + sleep 3 +done +[ -n "$ok" ] || { echo "punch-nat: no kcp link between $VM and $PEER after the punch" >&2; diag; exit 1; } +echo "punch-nat: kcp link established ($VM<->$PEER); done" diff --git a/netsim/tasks/punch-nat/verify.py b/netsim/tasks/punch-nat/verify.py new file mode 100644 index 00000000..c6b62124 --- /dev/null +++ b/netsim/tasks/punch-nat/verify.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +"""verify punch-nat: both NAT'd peers hold a direct kcp link to each other -- the hole-punch +completed and was promoted to a real link -- and NOT a direct/LAN tcp link (the NAT was +genuinely entered). + +A kcp link is the unique signal of a completed+promoted punch: only NATLinkStrategy dials +kcp, and kcp endpoints are never advertised for an ordinary peer dial. Assert on +Network+RemoteIdentity, NOT the endpoint address (the passive/inbound side has swapped +endpoints). Negatives: no tcp link to the sibling, and none at a 10.77 LAN address (the only +tcp links present should be to the reflector at 198.51.100.). + +astrald is in netns "priv" on both peers -> astral-query runs inside the netns (it defaults +to tcp:127.0.0.1:8625, which is netns-local). Uses the Go CLI over ssh, not the astral-py +WS client (the WS port is netns-local too). +""" +import argparse +import json +import os +import sys + +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import astralapi # noqa: E402 + + +def node_id(vm): + """The node's own identity hex via apphost.whoami (inside its netns).""" + raw = astralapi.ssh(vm, "ip netns exec priv astral-query apphost.whoami -out json") or "" + for ln in raw.splitlines(): + ln = ln.strip() + if not ln: + continue + try: + o = json.loads(ln) + except json.JSONDecodeError: + continue + v = o.get("Object") + if isinstance(v, str) and len(v) >= 64: + return v + if isinstance(v, dict) and isinstance(v.get("Identity"), str): + return v["Identity"] + return "" + + +def links(vm): + return astralapi.parse_cli( + astralapi.ssh(vm, "ip netns exec priv astral-query nodes.links -out json") or "") + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--vm", default="node1") # initiator + ap.add_argument("--peer", default="node2") # target + args, _ = ap.parse_known_args() + peers = [args.vm, args.peer] + ids = {p: node_id(p) for p in peers} + + failed = [] + for p in peers: + sib = args.peer if p == args.vm else args.vm + sib_id = ids.get(sib, "") + if not sib_id: + failed.append(f"{p}: could not resolve sibling {sib} identity") + continue + objs = links(p) + kcp = astralapi.kcp_links(objs) # [(RemoteIdentity, endpoint)] + tcp = astralapi.links_by_network(objs, "tcp") + # positive: a direct kcp link to the sibling (the promoted punch) + if not any(rid == sib_id for rid, _ in kcp): + failed.append(f"{p}: no kcp link to {sib} -- punch not promoted (kcp={kcp})") + sys.stderr.write(f" {p} tcp links: {tcp}\n") + continue + # negative: the sibling must be reachable ONLY via the punch, never a direct tcp link + if any(rid == sib_id for rid, _ in tcp): + failed.append(f"{p}: has a direct tcp link to {sib} -- not a NAT traversal") + continue + # negative: no LAN (10.77) tcp link at all -- the NAT must be genuinely entered + if any("10.77." in str(addr) for _rid, addr in tcp): + failed.append(f"{p}: has a 10.77 LAN tcp link -- NAT not genuinely entered (tcp={tcp})") + continue + print(f"punch-nat OK: {p} holds a direct kcp link to {sib} (no direct/LAN tcp link).") + + if failed: + for f in failed: + sys.stderr.write(f"punch-nat verify FAILED: {f}\n") + return 1 + print(f"punch-nat verified: direct kcp link on both peers ({', '.join(peers)})") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/punch-nat/verify.sh b/netsim/tasks/punch-nat/verify.sh new file mode 100755 index 00000000..042470ab --- /dev/null +++ b/netsim/tasks/punch-nat/verify.sh @@ -0,0 +1,3 @@ +#!/bin/sh +# Thin shim — verification logic lives in verify.py. +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@" diff --git a/netsim/tasks/read-remote-object/README.md b/netsim/tasks/read-remote-object/README.md new file mode 100644 index 00000000..15ac018b --- /dev/null +++ b/netsim/tasks/read-remote-object/README.md @@ -0,0 +1,3 @@ +# read-remote-object + +node1's agent reads node2's Object (id from `~/object.json`) over astral as the User and records it to `~/read.json`. verify.py independently re-reads it via `node2:objects.load` as the User and asserts the bytes equal node1's stored `payload.txt`. diff --git a/netsim/tasks/read-remote-object/prompt.md b/netsim/tasks/read-remote-object/prompt.md new file mode 100644 index 00000000..3d5e20c0 --- /dev/null +++ b/netsim/tasks/read-remote-object/prompt.md @@ -0,0 +1,4 @@ +An astral object is stored on astral node `__PEER__`; its id is in `~/object.json` +(the `object_id` value). Read it from `__PEER__`. + +Save what you read to `~/read.json` (as `object_remote`). diff --git a/netsim/tasks/read-remote-object/run.sh b/netsim/tasks/read-remote-object/run.sh new file mode 100755 index 00000000..8f606e80 --- /dev/null +++ b/netsim/tasks/read-remote-object/run.sh @@ -0,0 +1,55 @@ +#!/bin/sh +# read-remote-object: have node1's agent read an astral object that lives on the +# peer (node2), over astral. The object's id is in node1's ~/object.json (object_id, +# written by object-store --target node2). Driven by the Qwen Code agent on node1 — +# the read is issued AS THE USER (authenticated), which routes to the peer (an +# anonymous read would not). The agent addresses the peer by its alias (registered +# by adopt-node). +# read-remote-object [--vm ] [--peer ] (default: node1, node2) +# +# Runs ON THE HOST. Tiny script, thin prompt, intelligence in the astral-agent skill. +# verify.py then INDEPENDENTLY re-reads the peer's object as the User and asserts. +set -eu + +VM="node1"; PEER="node2" +while [ $# -gt 0 ]; do + case "$1" in + --vm) [ $# -ge 2 ] || { echo "need host after --vm" >&2; exit 64; }; VM=$2; shift 2 ;; + --peer) [ $# -ge 2 ] || { echo "need alias after --peer" >&2; exit 64; }; PEER=$2; shift 2 ;; + *) echo "usage: read-remote-object [--vm ] [--peer ]" >&2; exit 64 ;; + esac +done + +# CDPATH= is an intentional one-shot env prefix for cd, not an assignment +# shellcheck disable=SC1007 +here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) +[ -f "$here/prompt.md" ] || { echo "missing $here/prompt.md" >&2; exit 1; } +prompt=$(sed "s|__PEER__|$PEER|g" "$here/prompt.md") # alias is [a-z0-9] — sed-safe +prompt_b64=$(printf '%s' "$prompt" | base64 -w0) + +REMOTE_BODY=$(cat <<'EOS' +set -eu +d=/home/tester/.netsim +mkdir -p "$d" +printf '%s' "$prompt_b64" | base64 -d > "$d/read-remote-object.prompt" +chown -R tester:tester "$d" + +su - tester -c 'qwen -y "$(cat /home/tester/.netsim/read-remote-object.prompt)"' \ + > "$d/read-remote-object.log" 2>&1 || { + echo "qwen run failed on $(hostname); tail of log:" >&2 + tail -n 40 "$d/read-remote-object.log" >&2 + exit 1 + } + +# Cheap smoke-check; verify.py does the authoritative, independent check. The agent +# records what it read in $HOME/read.json under object_remote. +rem=$(python3 -c 'import json;print(json.load(open("/home/tester/read.json")).get("object_remote",""))' 2>/dev/null || true) +[ -n "$rem" ] || { echo "agent recorded no object_remote in /home/tester/read.json on $(hostname)" >&2; exit 1; } +echo "read-remote-object: agent finished on $(hostname); read back from peer" +EOS +) + +echo "read-remote-object: driving Qwen operator on $VM to read from $PEER ..." +# shellcheck disable=SC2029 +netsim ssh "$VM" -- "prompt_b64='$prompt_b64'; $REMOTE_BODY" +echo "read-remote-object: done on $VM" diff --git a/netsim/tasks/read-remote-object/verify.py b/netsim/tasks/read-remote-object/verify.py new file mode 100644 index 00000000..a0014af6 --- /dev/null +++ b/netsim/tasks/read-remote-object/verify.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +"""verify read-remote-object: node1 read the peer's object over astral. + +object-store --target node2 put the object on the peer (node2) and recorded +object_id in node1's object.json (the bytes are the fixed payload.txt it shipped +to node1); read-remote-object's agent (on node1, as the User) read it back from +the peer and recorded object_remote in read.json. + +Independent host-side check: re-read the peer's object AS THE USER (node1 holds +the token) via :objects.load and assert the bytes equal the stored payload +-- this is the authenticated, routable direction. Also cross-checks the agent's +recorded read. + +Queries reach node1's apphost through the shared astral-py client +(tasks/_lib/astralapi.py), CLI fallback for anything it can't serve. +""" +import argparse +import os +import sys + +# why: realpath crosses netsim's per-task symlink to reach the sibling tasks/_lib +sys.path.insert(0, os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "_lib")) +import astralapi # noqa: E402 + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--vm", default="node1") # operator; reads as the User + ap.add_argument("--peer", default="node2") # the node holding the object (alias) + args, _ = ap.parse_known_args() + + obj = astralapi.home_json(args.vm, "object.json") # object-store: object_id + user = astralapi.home_json(args.vm, "user.json") # bootstrap/import: user_token + rd = astralapi.home_json(args.vm, "read.json") # this task's agent: object_remote + ID = "".join(str(obj.get("object_id", "")).split()) + # Ground-truth bytes: the fixed payload.txt that object-store shipped to the + # operator (node1), not the agent's account of what was stored. + PAY = astralapi.read_file(args.vm, "/home/tester/payload.txt") + REMOTE = str(rd.get("object_remote", "")) + token = user.get("user_token", "") + + # Independent: node1, as the User, reads the peer's object over astral. This is + # authenticated (token), so the query keeps the network zone and routes to the peer. + with astralapi.connect(args.vm, token=token) as n1: + out = n1.call("objects.load", {"id": ID}, target=args.peer) + got = astralapi.loaded_payload(out) + read_ok = got is not None and got.rstrip("\n") == PAY + + errs, notes = [], [] + if not ID: + errs.append("no object_id in node1's object.json (object-store --target node2 must run first)") + if not PAY: + errs.append("payload.txt missing on node1 (object-store --target node2 must run first)") + if not token: + errs.append("no user_token in node1's user.json (can't read the peer as the User)") + if not REMOTE: + notes.append("agent recorded no object_remote (the agent's own read)") + elif PAY and PAY not in REMOTE: + notes.append(f"agent's recorded read does not contain the payload ({REMOTE!r})") + + if not errs and read_ok: + print(f"read-remote-object OK: node1 (as User) read object {ID[:12]}.. from " + f"{args.peer} over astral; bytes match ({len(PAY)} B).") + for n in notes: + sys.stderr.write(f" note: {n}\n") + return 0 + + sys.stderr.write(f"read-remote-object verify FAILED: node1 could not read the object from " + f"{args.peer} over astral.\n") + for e in errs: + sys.stderr.write(f" - {e}\n") + if got is None: + sys.stderr.write(f" {args.peer}:objects.load (as User) returned no payload " + "(route_not_found means the read didn't route -- check auth/zone).\n") + elif not read_ok: + sys.stderr.write(f" bytes mismatch: got {got!r} != stored {PAY!r}.\n") + for e in astralapi.error_messages(out): + sys.stderr.write(f" load error_message: {e}\n") + for n in notes: + sys.stderr.write(f" note: {n}\n") + sys.stderr.write(f" (id={ID} peer={args.peer} read={'hit' if got is not None else 'miss'})\n") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/netsim/tasks/read-remote-object/verify.sh b/netsim/tasks/read-remote-object/verify.sh new file mode 100755 index 00000000..9d719cca --- /dev/null +++ b/netsim/tasks/read-remote-object/verify.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# Thin shim — all verification logic lives in verify.py. netsim sets $NETSIM_TASK_DIR +# to this task's directory and only auto-runs run.sh/verify.sh, so verify.py sits +# next to us and is invoked here (the dirname fallback covers running this directly). +exec python3 "${NETSIM_TASK_DIR:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}/verify.py" "$@"