diff --git a/.gitignore b/.gitignore index f6e73b6..71e0d97 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ build/ -.vscode/settings.json \ No newline at end of file +.vscode/settings.json diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 65f85e5..c83a17f 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -23,7 +23,7 @@ "args": [ "build", "-o", "${workspaceFolder}/build/devicecode.elf", - "-stack-size=3KB", + "-stack-size=8KB", "-serial=none", "-target=pico2", "-tags", "pico_bb_proto_1", diff --git a/README.md b/README.md index a94e2d6..5d77520 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ tinygo flash -stack-size=3KB -monitor -scheduler tasks -target=pico -tags "pico_bb_proto_1" main.go ## Flashing ISOC Power Board via USB port on Pico2 -tinygo flash -stack-size=3KB -monitor -scheduler tasks -target=pico2 -tags "pico_bb_proto_1" main.go +tinygo flash -stack-size=8KB -monitor -scheduler tasks -target=pico2 -tags "pico_bb_proto_1" main.go ------------------- diff --git a/go.mod b/go.mod index f60f86a..6962427 100644 --- a/go.mod +++ b/go.mod @@ -1,11 +1,14 @@ module devicecode-go -go 1.25.0 +go 1.25.1 require ( + pico2-a-b v0.0.0 github.com/jangala-dev/tinygo-uartx v0.0.0-20251028085354-58b6258234b3 golang.org/x/exp v0.0.0-20251002181428-27f1f14c8bb9 tinygo.org/x/drivers v0.33.0 ) require github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect + +replace pico2-a-b => ../pico2-a-b diff --git a/go.sum b/go.sum index b2f089d..a00618c 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,5 @@ github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= -github.com/jangala-dev/tinygo-uartx v0.0.0-20251008020047-bc80b114e3cc h1:HU2VI0lw5wlu1rUgjzSuVH7IWQMNdZEbpDaoxCTVMmY= -github.com/jangala-dev/tinygo-uartx v0.0.0-20251008020047-bc80b114e3cc/go.mod h1:e3HxjGzBZBIsn/oYvWr707ug3IbkglEyivyYVxHRph4= github.com/jangala-dev/tinygo-uartx v0.0.0-20251028085354-58b6258234b3 h1:b6mCDQEeeICoGpsbKyh/kfIRnr2DMK/wACLLi0t8uoU= github.com/jangala-dev/tinygo-uartx v0.0.0-20251028085354-58b6258234b3/go.mod h1:e3HxjGzBZBIsn/oYvWr707ug3IbkglEyivyYVxHRph4= golang.org/x/exp v0.0.0-20251002181428-27f1f14c8bb9 h1:TQwNpfvNkxAVlItJf6Cr5JTsVZoC/Sj7K3OZv2Pc14A= diff --git a/main.go b/main.go index 1a77591..74b705d 100644 --- a/main.go +++ b/main.go @@ -27,7 +27,12 @@ func main() { ctx := context.Background() log.Println("[main] bootstrapping bus …") - b := bus.NewBus(3, "+", "#") + // Queue length must cover the retained replay burst when fabric + // subscribes to wildcard export patterns (hal/cap/env/#, + // hal/cap/power/#). Each capability publishes retained info + + // status + value; pico_bb_proto_1 has ~26 retained topics across + // env and power domains. 32 provides margin for growth. + b := bus.NewBus(32, "+", "#") halConn := b.NewConnection("hal") uiConn := b.NewConnection("ui") @@ -43,7 +48,7 @@ func main() { } // Reactor - r := reactor.NewReactor(uiConn) + r := reactor.NewReactor(b, uiConn) r.Run(ctx) } diff --git a/services/fabric/config.go b/services/fabric/config.go new file mode 100644 index 0000000..28b7cb1 --- /dev/null +++ b/services/fabric/config.go @@ -0,0 +1,124 @@ +package fabric + +import ( + "encoding/json" + + "devicecode-go/types" +) + +// decodeHALConfig extracts a HALConfig from an arbitrary payload, +// normalizing Lua empty-table encoding ({} → []) for known slice fields. +func decodeHALConfig(payload any) (types.HALConfig, string) { + switch v := payload.(type) { + case types.HALConfig: + return v, "" + case *types.HALConfig: + if v == nil { + return types.HALConfig{}, "nil_hal_config" + } + return *v, "" + case json.RawMessage: + return decodeHALConfigBytes(v) + case []byte: + return decodeHALConfigBytes(v) + default: + b, err := json.Marshal(v) + if err != nil { + return types.HALConfig{}, "payload_marshal_failed: " + err.Error() + } + return decodeHALConfigBytes(b) + } +} + +func decodeHALConfigBytes(b []byte) (types.HALConfig, string) { + var probe map[string]json.RawMessage + if err := json.Unmarshal(b, &probe); err != nil { + return types.HALConfig{}, "json_unmarshal_failed: " + err.Error() + "; raw=" + truncateRawJSON(b) + } + if _, ok := probe["devices"]; !ok { + return types.HALConfig{}, "missing_devices_field; raw=" + truncateRawJSON(b) + } + + // Lua encodes empty tables as {} (object) not [] (array). + // Normalize known slice fields so Go unmarshal accepts them. + for _, key := range []string{"devices", "pollers"} { + if raw, ok := probe[key]; ok && len(raw) == 2 && raw[0] == '{' && raw[1] == '}' { + probe[key] = json.RawMessage("[]") + } + } + fixed, err := json.Marshal(probe) + if err != nil { + return types.HALConfig{}, "normalize_failed: " + err.Error() + } + + var out types.HALConfig + if err := json.Unmarshal(fixed, &out); err != nil { + return types.HALConfig{}, "hal_config_unmarshal_failed: " + err.Error() + "; raw=" + truncateRawJSON(fixed) + } + return out, "" +} + +func decodeHALState(payload any) (types.HALState, bool) { + switch v := payload.(type) { + case types.HALState: + return v, true + case *types.HALState: + if v == nil { + return types.HALState{}, false + } + return *v, true + case json.RawMessage: + var out types.HALState + return out, json.Unmarshal(v, &out) == nil + case []byte: + var out types.HALState + return out, json.Unmarshal(v, &out) == nil + default: + b, err := json.Marshal(v) + if err != nil { + return types.HALState{}, false + } + var out types.HALState + return out, json.Unmarshal(b, &out) == nil + } +} + +func decodePayload(payload any) any { + switch v := payload.(type) { + case nil: + return nil + case json.RawMessage: + if len(v) == 0 { + return nil + } + var out any + if err := json.Unmarshal(v, &out); err == nil { + return out + } + return []byte(v) + case []byte: + if len(v) == 0 { + return nil + } + var out any + if err := json.Unmarshal(v, &out); err == nil { + return out + } + cp := make([]byte, len(v)) + copy(cp, v) + return cp + default: + return v + } +} + +func truncateRawJSON(b []byte) string { + if len(b) == 0 { + return "" + } + const max = 160 + if len(b) <= max { + return string(b) + } + return string(b[:max]) + "..." +} diff --git a/services/fabric/fabric.go b/services/fabric/fabric.go new file mode 100644 index 0000000..0a2ab70 --- /dev/null +++ b/services/fabric/fabric.go @@ -0,0 +1,119 @@ +package fabric + +import ( + "context" + "sync/atomic" + "time" + + "devicecode-go/bus" + "devicecode-go/x/strconvx" +) + +// Transport abstracts the byte stream as newline-delimited JSON lines. +type Transport interface { + ReadLine() ([]byte, error) + WriteLine(data []byte) error + Close() error +} + +const protoVersion = 1 +const defaultLinkID = "mcu0" + +// LinkConfig carries the fabric link parameters that the CM5 publishes +// alongside its own session/transfer-mgr instances. Mirrors the relevant +// keys in `bigbox-v1-cm-2.json` `service.fabric.links.` for the +// MCU-facing link. Missing fields fall back to release defaults via +// applyDefaults so callers can pass `LinkConfig{}` to mean "release". +type LinkConfig struct { + // ChunkSize is the expected raw-byte payload per xfer_chunk. The MCU + // is receive-only for transfers, so this is informational/validation + // only on the Go side. Release: 2048 bytes. + ChunkSize uint32 + // PhaseTimeout is the idle-chunk watchdog: an active inbound transfer + // is aborted with reason="timeout" if no xfer_chunk arrives within + // this window. Mirrors transfer_mgr.lua's `phase_timeout`. + // Release: 15s. + PhaseTimeout time.Duration + // PingInterval drives the unconditional outbound ping cadence after + // the link is established (`session_ctl.lua` resets next_ping_at = + // now + ping_interval after every send; not TX-activity-based). + // Release: 10s. + PingInterval time.Duration + // LivenessTimeout tears the link down if no frame arrives within + // this window once established. Mirrors session_ctl.lua's + // liveness_timeout_s. Release: 30s. + LivenessTimeout time.Duration + // MaxInboundHelpers caps the number of in-flight inbound RPC calls. + // Excess inbound calls reply `{ok=false, err="busy"}` per + // rpc_bridge.lua's `spawn_local_call_helper`. Lua default is 64 + // (falls back to max_pending_calls); we keep that for parity. + MaxInboundHelpers int + // RPCQuantum and BulkQuantum control the writer's weighted + // round-robin between the rpc and bulk lanes after the control + // lane drains. Mirrors writer.lua's lane scheduler. Release: 4 and 1. + RPCQuantum int + BulkQuantum int +} + +func DefaultLinkConfig() LinkConfig { + return LinkConfig{ + ChunkSize: 2048, + PhaseTimeout: 15 * time.Second, + PingInterval: 10 * time.Second, + LivenessTimeout: 30 * time.Second, + MaxInboundHelpers: 64, + RPCQuantum: 4, + BulkQuantum: 1, + } +} + +func (c *LinkConfig) applyDefaults() { + d := DefaultLinkConfig() + if c.ChunkSize == 0 { + c.ChunkSize = d.ChunkSize + } + if c.PhaseTimeout == 0 { + c.PhaseTimeout = d.PhaseTimeout + } + if c.PingInterval == 0 { + c.PingInterval = d.PingInterval + } + if c.LivenessTimeout == 0 { + c.LivenessTimeout = d.LivenessTimeout + } + if c.MaxInboundHelpers == 0 { + c.MaxInboundHelpers = d.MaxInboundHelpers + } + if c.RPCQuantum == 0 { + c.RPCQuantum = d.RPCQuantum + } + if c.BulkQuantum == 0 { + c.BulkQuantum = d.BulkQuantum + } +} + +var nextSessionID atomic.Uint64 + +func newLocalSID() string { + return "mcu-sid-" + strconvx.Utoa64(nextSessionID.Add(1)) +} + +// Run starts the fabric session. Blocks until ctx is cancelled or the +// transport returns an unrecoverable error. The MCU is a hello +// responder (CM5 always initiates hello/hello_ack), but otherwise +// runs the symmetric session_ctl semantics: once established, it +// sends pings every PingInterval and tears the link down if no frame +// arrives within LivenessTimeout. Mirrors session_ctl.lua at +// devicecode-lua@2c88090. +func Run(ctx context.Context, tr Transport, conn *bus.Connection, nodeID, peerID string, cfg LinkConfig) { + s := session{ + linkID: defaultLinkID, + nodeID: nodeID, + peerID: peerID, + localSID: newLocalSID(), + tr: tr, + conn: conn, + cfg: cfg, + } + s.run(ctx) +} diff --git a/services/fabric/fabric_test.go b/services/fabric/fabric_test.go new file mode 100644 index 0000000..361495c --- /dev/null +++ b/services/fabric/fabric_test.go @@ -0,0 +1,1861 @@ +package fabric + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "io" + "strings" + "testing" + "time" + + "devicecode-go/bus" + "devicecode-go/types" + "devicecode-go/x/shmring" +) + +func pipePair() (*rwTransport, *rwTransport) { + r1, w1 := io.Pipe() + r2, w2 := io.Pipe() + return newRWTransport(r2, w1), newRWTransport(r1, w2) +} + +func newBus() *bus.Bus { return bus.NewBus(3, "+", "#") } + +type captureTransport struct { + writes [][]byte + writeErr error +} + +func (t *captureTransport) ReadLine() ([]byte, error) { return nil, io.EOF } + +func (t *captureTransport) WriteLine(data []byte) error { + if t.writeErr != nil { + return t.writeErr + } + cp := append([]byte(nil), data...) + t.writes = append(t.writes, cp) + return nil +} + +func (t *captureTransport) Close() error { return nil } + +func readMsg[T any](t *testing.T, tr Transport) T { + t.Helper() + line, err := tr.ReadLine() + if err != nil { + t.Fatalf("ReadLine: %v", err) + } + var msg T + if err := json.Unmarshal(line, &msg); err != nil { + t.Fatalf("Unmarshal %q: %v", line, err) + } + return msg +} + +func sendMsg(t *testing.T, tr Transport, v any) { + t.Helper() + b := marshal(v) + if err := tr.WriteLine(b[:len(b)-1]); err != nil { + t.Fatalf("WriteLine: %v", err) + } +} + +const testCM5SID = "s1" + +func bringUp(t *testing.T, cm5 Transport) protoHelloAck { + t.Helper() + sendMsg(t, cm5, protoHello{ + Type: "hello", Node: "cm5-local", Peer: "mcu-1", SID: testCM5SID, Proto: protoVersion, + }) + ack := readMsg[protoHelloAck](t, cm5) + if !ack.OK || ack.Node != "mcu-1" || ack.SID == "" || ack.Proto != protoVersion { + t.Fatalf("bad hello_ack: %+v", ack) + } + time.Sleep(50 * time.Millisecond) + return ack +} + +func unlockExports(t *testing.T, cm5 Transport) { + t.Helper() + sendMsg(t, cm5, protoPing{Type: "ping", TS: 77, SID: testCM5SID}) + pong := readMsg[protoPong](t, cm5) + if pong.Type != "pong" { + t.Fatalf("expected pong, got %q", pong.Type) + } +} + +// ---- codec ---- + +func TestCodecRoundTrip(t *testing.T) { + orig := protoHello{Type: "hello", Node: "mcu-1", Peer: "cm5-local", SID: "abc", Proto: protoVersion} + data := marshal(orig) + if !bytes.HasSuffix(data, []byte("\n")) { + t.Error("marshal should end with newline") + } + jsonPart := data[:len(data)-1] + if bytes.Contains(jsonPart, []byte("\n")) { + t.Error("JSON should not contain embedded newlines") + } + if protoType(jsonPart) != "hello" { + t.Errorf("protoType = %q", protoType(jsonPart)) + } + var dec protoHello + json.Unmarshal(jsonPart, &dec) + if dec != orig { + t.Errorf("round-trip: %+v vs %+v", dec, orig) + } +} + +func TestCodecAllTypes(t *testing.T) { + for _, tc := range []struct { + v any + want string + }{ + {protoHello{Type: "hello"}, "hello"}, + {protoHelloAck{Type: "hello_ack"}, "hello_ack"}, + {protoPing{Type: "ping", TS: 1}, "ping"}, + {protoPong{Type: "pong", TS: 2}, "pong"}, + {protoPub{Type: "pub", Topic: []string{"a"}}, "pub"}, + {protoUnretain{Type: "unretain", Topic: []string{"a"}}, "unretain"}, + {protoCall{Type: "call", ID: "c1"}, "call"}, + {protoReply{Type: "reply", Corr: "c1", OK: true}, "reply"}, + {protoXferBegin{Type: "xfer_begin", XferID: "x1"}, "xfer_begin"}, + {protoXferReady{Type: "xfer_ready", XferID: "x1"}, "xfer_ready"}, + {protoXferChunk{Type: "xfer_chunk", XferID: "x1"}, "xfer_chunk"}, + {protoXferNeed{Type: "xfer_need", XferID: "x1"}, "xfer_need"}, + {protoXferCommit{Type: "xfer_commit", XferID: "x1"}, "xfer_commit"}, + {protoXferDone{Type: "xfer_done", XferID: "x1"}, "xfer_done"}, + {protoXferAbort{Type: "xfer_abort", XferID: "x1", Err: "aborted"}, "xfer_abort"}, + } { + b := marshal(tc.v) + if got := protoType(b[:len(b)-1]); got != tc.want { + t.Errorf("protoType = %q, want %q", got, tc.want) + } + } +} + +func TestWireTypeBadInput(t *testing.T) { + for _, b := range [][]byte{[]byte("not json"), []byte(`{"no_type":true}`), nil} { + if got := protoType(b); got != "" { + t.Errorf("protoType(%q) = %q, want empty", b, got) + } + } +} + +func TestWireTypeIgnoresNestedTypeKeys(t *testing.T) { + // protoType must return the top-level discriminator, not a nested + // payload.type / meta.type key. The previous heuristic-only scan + // would mis-route e.g. a `pub` with a payload that happened to + // contain its own "type" field. + for _, tc := range []struct { + line []byte + want string + }{ + // Nested payload object with its own "type": + {[]byte(`{"payload":{"type":"x"},"type":"pub"}`), "pub"}, + // Nested type appears before the real top-level type: + {[]byte(`{"meta":{"type":"firmware"},"type":"xfer_begin","xfer_id":"a"}`), "xfer_begin"}, + // Type buried inside an array element: + {[]byte(`{"topic":["a","type","b"],"type":"unretain"}`), "unretain"}, + // Type as a substring of a value (must NOT match): + {[]byte(`{"id":"my-type-here","type":"call"}`), "call"}, + // Real-world hello shape from CM5 (regression for the malformed-frame bug): + {[]byte(`{"sid":"a08590c4-afb8-4a23-ae39-ded871a3d433","node":"cm5","type":"hello"}`), "hello"}, + } { + if got := protoType(tc.line); got != tc.want { + t.Errorf("protoType(%s) = %q, want %q", tc.line, got, tc.want) + } + } +} + +// ---- transport ---- + +func TestTransportRoundTrip(t *testing.T) { + a, b := pipePair() + done := make(chan struct{}) + go func() { + defer close(done) + line, err := b.ReadLine() + if err != nil { + t.Errorf("ReadLine: %v", err) + return + } + if string(line) != `{"type":"ping","ts":99}` { + t.Errorf("got %q", line) + } + }() + sendMsg(t, a, protoPing{Type: "ping", TS: 99}) + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("timeout") + } +} + +func TestOversizeLineRecovery(t *testing.T) { + big := `{"type":"ping","ts":0,"x":"` + strings.Repeat("x", maxLineLen+100) + `"}` + input := big + "\n" + `{"type":"ping","ts":3}` + "\n" + tr := newRWTransport(strings.NewReader(input), io.Discard) + _, err := tr.ReadLine() + if !errors.Is(err, ErrLineTooLong) { + t.Fatalf("expected ErrLineTooLong, got %v", err) + } + line, err := tr.ReadLine() + if err != nil { + t.Fatalf("second ReadLine: %v", err) + } + if string(line) != `{"type":"ping","ts":3}` { + t.Errorf("got %q", line) + } +} + +// ---- shmring transport ---- + +func TestShmringTransportRoundTrip(t *testing.T) { + rx := shmring.New(256) + tx := shmring.New(256) + mcuTr := NewShmringTransport(rx, tx) + defer mcuTr.Close() + + rx.TryWriteFrom([]byte(`{"type":"ping","ts":42}` + "\n")) + line, err := mcuTr.ReadLine() + if err != nil { + t.Fatalf("ReadLine: %v", err) + } + if string(line) != `{"type":"ping","ts":42}` { + t.Errorf("got %q", line) + } + + if err := mcuTr.WriteLine([]byte(`{"type":"pong","ts":42}`)); err != nil { + t.Fatalf("WriteLine: %v", err) + } + var out [128]byte + n := tx.TryReadInto(out[:]) + if string(out[:n]) != `{"type":"pong","ts":42}`+"\n" { + t.Errorf("tx got %q", out[:n]) + } +} + +func TestShmringTransportMultiLine(t *testing.T) { + rx := shmring.New(256) + tr := NewShmringTransport(rx, shmring.New(256)) + defer tr.Close() + rx.TryWriteFrom([]byte(`{"type":"ping","ts":1}` + "\n" + `{"type":"ping","ts":2}` + "\n")) + line1, _ := tr.ReadLine() + line2, _ := tr.ReadLine() + if string(line1) != `{"type":"ping","ts":1}` { + t.Errorf("line1 = %q", line1) + } + if string(line2) != `{"type":"ping","ts":2}` { + t.Errorf("line2 = %q", line2) + } +} + +func TestShmringTransportReadLineWrapsAcrossSegments(t *testing.T) { + rx := shmring.New(8) + tr := NewShmringTransport(rx, shmring.New(8)) + defer tr.Close() + + rx.TryWriteFrom([]byte("123456")) + var discard [6]byte + if n := rx.TryReadInto(discard[:]); n != len(discard) { + t.Fatalf("priming read = %d, want %d", n, len(discard)) + } + if n := rx.TryWriteFrom([]byte("ab\n")); n != 3 { + t.Fatalf("wrapped write = %d, want 3", n) + } + + line, err := tr.ReadLine() + if err != nil { + t.Fatalf("ReadLine: %v", err) + } + if string(line) != "ab" { + t.Errorf("got %q", line) + } +} + +func TestShmringTransportWriteLineWrapsAcrossSegments(t *testing.T) { + tx := shmring.New(8) + tr := NewShmringTransport(shmring.New(8), tx) + defer tr.Close() + + tx.TryWriteFrom([]byte("123456")) + var discard [6]byte + if n := tx.TryReadInto(discard[:]); n != len(discard) { + t.Fatalf("priming read = %d, want %d", n, len(discard)) + } + + if err := tr.WriteLine([]byte("ab")); err != nil { + t.Fatalf("WriteLine: %v", err) + } + var out [3]byte + if n := tx.TryReadInto(out[:]); n != len(out) { + t.Fatalf("wrapped read = %d, want %d", n, len(out)) + } + if string(out[:]) != "ab\n" { + t.Errorf("tx got %q", out[:]) + } +} + +func TestShmringTransportOversize(t *testing.T) { + // Ring must be larger than maxLineLen+100 + newline + the trailing ping + // frame so the producer can deposit both lines without blocking. The rx + // ring used to be 4096 when maxLineLen=2048, leaving comfortable + // headroom; now that maxLineLen=4096, bump to 8192. + rx := shmring.New(8192) + tr := NewShmringTransport(rx, shmring.New(256)) + defer tr.Close() + big := make([]byte, maxLineLen+100) + for i := range big { + big[i] = 'x' + } + rx.TryWriteFrom(big) + rx.TryWriteFrom([]byte("\n")) + rx.TryWriteFrom([]byte(`{"type":"ping","ts":7}` + "\n")) + _, err := tr.ReadLine() + if !errors.Is(err, ErrLineTooLong) { + t.Fatalf("expected ErrLineTooLong, got %v", err) + } + line, err := tr.ReadLine() + if err != nil { + t.Fatalf("second ReadLine: %v", err) + } + if string(line) != `{"type":"ping","ts":7}` { + t.Errorf("got %q", line) + } +} + +func TestShmringTransportCloseUnblocks(t *testing.T) { + tr := NewShmringTransport(shmring.New(256), shmring.New(256)) + done := make(chan struct{}) + go func() { tr.ReadLine(); close(done) }() + tr.Close() + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("ReadLine did not unblock") + } +} + +// ---- handshake ---- + +func TestHandshake(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + + sendMsg(t, cm5, protoHello{ + Type: "hello", Node: "cm5-local", Peer: "mcu-1", SID: "s1", Proto: protoVersion, + }) + ack := readMsg[protoHelloAck](t, cm5) + if !ack.OK || ack.Node != "mcu-1" || ack.SID == "" || ack.Proto != protoVersion { + t.Errorf("bad ack: %+v", ack) + } + time.Sleep(50 * time.Millisecond) + sendMsg(t, cm5, protoPing{Type: "ping", TS: 99, SID: "s1"}) + pong := readMsg[protoPong](t, cm5) + if pong.TS != 99 || pong.SID != ack.SID { + t.Errorf("bad pong: %+v ack=%+v", pong, ack) + } +} + +func TestSessionReset(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + + sendMsg(t, cm5, protoHello{Type: "hello", Node: "cm5-local", Peer: "mcu-1", SID: "s2", Proto: protoVersion}) + ack := readMsg[protoHelloAck](t, cm5) + if !ack.OK || ack.SID == "" || ack.Proto != protoVersion { + t.Error("hello_ack.OK = false") + } + sendMsg(t, cm5, protoPing{Type: "ping", TS: 55, SID: "s2"}) + pong := readMsg[protoPong](t, cm5) + if pong.TS != 55 || pong.SID != ack.SID { + t.Errorf("bad pong: %+v ack=%+v", pong, ack) + } +} + +func TestRejectsWrongPeer(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + + sendMsg(t, cm5, protoHello{Type: "hello", Node: "cm5-local", Peer: "mcu-999", SID: "s1", Proto: protoVersion}) + gotLine := make(chan readResult, 1) + go func() { + line, err := cm5.ReadLine() + gotLine <- readResult{line: line, err: err} + }() + select { + case <-gotLine: + t.Fatal("got response to wrong-peer hello") + case <-time.After(200 * time.Millisecond): + } + sendMsg(t, cm5, protoHello{Type: "hello", Node: "cm5-local", Peer: "mcu-1", SID: "s2", Proto: protoVersion}) + select { + case res := <-gotLine: + if res.err != nil { + t.Fatalf("ReadLine error: %v", res.err) + } + var ack protoHelloAck + if err := json.Unmarshal(res.line, &ack); err != nil { + t.Fatalf("expected hello_ack: %v", err) + } + if !ack.OK { + t.Fatal("hello_ack.OK = false") + } + case <-time.After(2 * time.Second): + t.Fatal("no hello_ack for correct peer") + } +} + +func TestRejectsMissingNodeWhenPeerPinned(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + + gotLine := make(chan readResult, 1) + go func() { + line, err := cm5.ReadLine() + gotLine <- readResult{line: line, err: err} + }() + + sendMsg(t, cm5, protoHello{Type: "hello", Peer: "mcu-1", SID: "s1", Proto: protoVersion}) + select { + case <-gotLine: + t.Fatal("got response to hello without node") + case <-time.After(200 * time.Millisecond): + } + + sendMsg(t, cm5, protoHello{Type: "hello", Node: "cm5-local", Peer: "mcu-1", SID: "s2", Proto: protoVersion}) + select { + case res := <-gotLine: + if res.err != nil { + t.Fatalf("ReadLine error: %v", res.err) + } + var ack protoHelloAck + if err := json.Unmarshal(res.line, &ack); err != nil { + t.Fatalf("expected hello_ack: %v", err) + } + if !ack.OK { + t.Fatal("hello_ack.OK = false") + } + case <-time.After(2 * time.Second): + t.Fatal("no hello_ack for correct peer") + } +} + +func TestPingPong(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + ack := bringUp(t, cm5) + sendMsg(t, cm5, protoPing{Type: "ping", TS: 42, SID: "s1"}) + pong := readMsg[protoPong](t, cm5) + if pong.TS != 42 || pong.SID != ack.SID { + t.Errorf("bad pong: %+v ack=%+v", pong, ack) + } +} + +func TestMCUNeverInitiates(t *testing.T) { + // Pre-handshake the MCU is silent; tickPing only fires once the link + // is up. Active outbound pings post-handshake are covered by + // TestSessionPingsUnconditionally. + mcu, cm5 := pipePair() + b := newBus() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + gotLine := make(chan struct{}) + go func() { cm5.ReadLine(); close(gotLine) }() + select { + case <-gotLine: + t.Fatal("MCU sent unsolicited message") + case <-time.After(2 * time.Second): + } + cancel() +} + +func TestSessionPingsUnconditionally(t *testing.T) { + // session_ctl.lua resets next_ping_at = now + ping_interval after + // every send, with no TX-activity dependency. Once the link is up, + // pings must keep flowing even if neither side talks otherwise. + mcu, cm5 := pipePair() + b := newBus() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", LinkConfig{PingInterval: 150 * time.Millisecond}) + bringUp(t, cm5) + + for i := 0; i < 3; i++ { + ping := readMsg[protoPing](t, cm5) + if ping.Type != msgPing { + t.Fatalf("ping[%d] type = %q, want %q", i, ping.Type, msgPing) + } + } +} + +func TestReadyHeldUntilExportHoldoff(t *testing.T) { + // session_ctl.lua / rpc_bridge.lua: ready == established and rpc_ready, + // where rpc_ready edges true only after retained replay completes. + // The Go side gates rpcReady on exportReadyAt elapsing post-handshake. + mcu, cm5 := pipePair() + b := newBus() + observer := b.NewConnection("observer") + sub := observer.Subscribe(bus.T("state", "fabric", "link", "mcu0")) + defer observer.Unsubscribe(sub) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + + var sawNotReady, sawReady bool + deadline := time.After(3 * time.Second) + for !sawReady { + select { + case msg := <-sub.Channel(): + payload, ok := msg.Payload.(linkStatePayload) + if !ok { + t.Fatalf("payload type = %T", msg.Payload) + } + if payload.Established && !payload.Ready { + sawNotReady = true + } + if payload.Ready { + if !sawNotReady { + t.Fatalf("Ready edge raised without prior Established+!Ready state") + } + sawReady = true + } + case <-deadline: + t.Fatal("timeout waiting for Ready=true") + } + } +} + +func TestSessionResetUnretainsImports(t *testing.T) { + // rpc_bridge.lua's invalidate_imported_retained clears every imported + // retained slot on session-generation bump. The Go side mirrors this + // in promoteLink/teardownImportedRetained: each tracked local topic + // gets a nil-payload retained publish that clears the bus's retain + // store, so consumers don't see stale CM5-session data. + mcu, cm5 := pipePair() + b := newBus() + observer := b.NewConnection("observer") + cfgSub := observer.Subscribe(tConfigHAL) + defer observer.Unsubscribe(cfgSub) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + + // Push a config via the import pub path so config/hal becomes a + // tracked imported retain. + sendMsg(t, cm5, protoPub{ + Type: msgPub, + Topic: []string{"config", "device"}, + Payload: json.RawMessage(`{"devices":[]}`), + Retain: true, + }) + + // Observe the local retain (non-nil payload). + deadline := time.After(2 * time.Second) + gotInitial := false + for !gotInitial { + select { + case msg := <-cfgSub.Channel(): + if msg.Retained && msg.Payload != nil { + gotInitial = true + } + case <-deadline: + t.Fatal("timeout waiting for initial config/hal retain") + } + } + + // Force a session reset: hello with a new SID. Concurrent reader + // drains the new hello_ack the MCU sends back; pipePair is + // synchronous so without this the MCU's sendControl would block, + // promoteLink would never fire, and teardownImportedRetained would + // not run. + go func() { _ = readMsg[protoHelloAck](t, cm5) }() + sendMsg(t, cm5, protoHello{ + Type: msgHello, + Node: "cm5-local", + Peer: "mcu-1", + SID: "cm5-sid-new", + }) + + // Expect a nil-payload retained publish on config/hal. + deadline = time.After(2 * time.Second) + for { + select { + case msg := <-cfgSub.Channel(): + if msg.Retained && msg.Payload == nil { + return + } + case <-deadline: + t.Fatal("timeout waiting for unretain after session reset") + } + } +} + +func TestSessionResetUnretainsImportsAfterTransientPub(t *testing.T) { + // Regression: a non-retained pub arriving on the same imported topic + // after an earlier retained pub must NOT untrack — the bus retain + // store still holds the prior retained value (the bus only clears it + // on explicit unretain/retained-nil). Without this, the stale retain + // would survive a session-reset because we'd think nothing was tracked. + prev := importPublishRules + importPublishRules = append([]importRule{}, prev...) + importPublishRules = append(importPublishRules, importRule{ + wire: []string{"telem", "device", "fast"}, + local: []string{"telem", "hal", "fast"}, + }) + t.Cleanup(func() { importPublishRules = prev }) + + mcu, cm5 := pipePair() + b := newBus() + observer := b.NewConnection("observer") + subFast := observer.Subscribe(bus.T("telem", "hal", "fast")) + defer observer.Unsubscribe(subFast) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + + // 1) Retained import — establishes the bus retain + tracking entry. + sendMsg(t, cm5, protoPub{ + Type: msgPub, + Topic: []string{"telem", "device", "fast"}, + Payload: json.RawMessage(`{"v":1}`), + Retain: true, + }) + + // Drain until we see the retained payload. + deadline := time.After(2 * time.Second) + gotRetain := false + for !gotRetain { + select { + case msg := <-subFast.Channel(): + if msg.Retained && msg.Payload != nil { + gotRetain = true + } + case <-deadline: + t.Fatal("timeout waiting for initial retained pub") + } + } + + // 2) Non-retained pub on same topic — must not untrack. + sendMsg(t, cm5, protoPub{ + Type: msgPub, + Topic: []string{"telem", "device", "fast"}, + Payload: json.RawMessage(`{"v":2}`), + Retain: false, + }) + // Best-effort drain so the next subFast read sees the unretain edge. + deadline = time.After(500 * time.Millisecond) + draining := true + for draining { + select { + case <-subFast.Channel(): + case <-deadline: + draining = false + } + } + + // 3) Session reset → expect the original retain to be cleared. + go func() { _ = readMsg[protoHelloAck](t, cm5) }() + sendMsg(t, cm5, protoHello{ + Type: msgHello, + Node: "cm5-local", + Peer: "mcu-1", + SID: "cm5-sid-new", + }) + + deadline = time.After(2 * time.Second) + for { + select { + case msg := <-subFast.Channel(): + if msg.Retained && msg.Payload == nil { + return + } + case <-deadline: + t.Fatal("timeout waiting for unretain after session reset") + } + } +} + +func TestWriterControlPreemptsRPCAndBulk(t *testing.T) { + // writer.lua drains the control lane first (no fairness); then + // weighted RR between rpc and bulk. Pre-load all three lanes and + // assert the drain order is: all control, then 4 rpc, then 1 bulk, + // then any remaining rpc/bulk (default rpc_quantum=4, bulk_quantum=1). + tr := &captureTransport{} + s := session{tr: tr, cfg: DefaultLinkConfig()} + s.txBulk.push([]byte(`{"type":"xfer_chunk","i":0}`)) + s.txBulk.push([]byte(`{"type":"xfer_chunk","i":1}`)) + for i := 0; i < 5; i++ { + s.txRPC.push([]byte(`{"type":"pub","i":` + string(rune('0'+i)) + `}`)) + } + s.txControl.push([]byte(`{"type":"ping"}`)) + s.txControl.push([]byte(`{"type":"xfer_need"}`)) + + if !s.flushWriter() { + t.Fatal("flushWriter returned false") + } + if len(tr.writes) != 9 { + t.Fatalf("writes = %d, want 9", len(tr.writes)) + } + // Control drains first. + want := []string{ + `{"type":"ping"}`, + `{"type":"xfer_need"}`, + // Then RR: 4 rpc, 1 bulk, 1 rpc, 1 bulk, 0 (no more bulk; remaining rpc). + `{"type":"pub","i":0}`, + `{"type":"pub","i":1}`, + `{"type":"pub","i":2}`, + `{"type":"pub","i":3}`, + `{"type":"xfer_chunk","i":0}`, + `{"type":"pub","i":4}`, + `{"type":"xfer_chunk","i":1}`, + } + for i, w := range want { + if string(tr.writes[i]) != w { + t.Fatalf("write[%d] = %q, want %q", i, tr.writes[i], w) + } + } +} + +func TestInboundCallBusyAtCapacity(t *testing.T) { + // rpc_bridge.lua's spawn_local_call_helper rejects with err="busy" + // when inbound_helpers >= max_inbound_helpers, before the route check. + // With MaxInboundHelpers=1, the second concurrent inbound call must + // reply busy without going through routing. + prev := importCallRules + importCallRules = append([]importRule{}, prev...) + importCallRules = append(importCallRules, importRule{ + wire: []string{"rpc", "test", "noop"}, + local: []string{"rpc", "test", "noop"}, + }) + t.Cleanup(func() { importCallRules = prev }) + + mcu, cm5 := pipePair() + b := newBus() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", LinkConfig{MaxInboundHelpers: 1}) + bringUp(t, cm5) + + // First call holds the only helper slot. The bus has no handler, so + // the call sits as a pending request until timeout. + sendMsg(t, cm5, protoCall{ + Type: msgCall, + ID: "c1", + Topic: []string{"rpc", "test", "noop"}, + Payload: json.RawMessage(`{}`), + TimeoutMs: 5000, + }) + + // Second call arrives while the helper is full → busy reply. + sendMsg(t, cm5, protoCall{ + Type: msgCall, + ID: "c2", + Topic: []string{"rpc", "test", "noop"}, + Payload: json.RawMessage(`{}`), + }) + + reply := readMsg[protoReply](t, cm5) + if reply.Corr != "c2" { + t.Fatalf("first reply corr = %q, want c2", reply.Corr) + } + if reply.OK || reply.Err != "busy" { + t.Fatalf("expected busy reply for c2, got %+v", reply) + } +} + +func TestUnknownTypeIgnored(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + cm5.WriteLine([]byte(`{"type":"future_msg"}`)) + sendMsg(t, cm5, protoPing{Type: "ping", TS: 1}) + pong := readMsg[protoPong](t, cm5) + if pong.TS != 1 { + t.Errorf("pong.TS = %d", pong.TS) + } +} + +func TestMalformedJSONIgnored(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + cm5.WriteLine([]byte("not json")) + sendMsg(t, cm5, protoPing{Type: "ping", TS: 2}) + pong := readMsg[protoPong](t, cm5) + if pong.TS != 2 { + t.Errorf("pong.TS = %d", pong.TS) + } +} + +func TestCancelClosesCleanly(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + ctx, cancel := context.WithCancel(context.Background()) + done := make(chan struct{}) + go func() { + Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + close(done) + }() + bringUp(t, cm5) + cancel() + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("Run did not return") + } +} + +func TestLinkStatePublishedOnHandshake(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + observer := b.NewConnection("observer") + sub := observer.Subscribe(bus.T("state", "fabric", "link", "mcu0")) + defer observer.Unsubscribe(sub) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + + ack := bringUp(t, cm5) + + var sawOpening bool + deadline := time.After(2 * time.Second) + for { + select { + case msg := <-sub.Channel(): + if msg == nil { + t.Fatal("nil link-state message") + } + payload, ok := msg.Payload.(linkStatePayload) + if !ok { + t.Fatalf("payload type = %T, want linkStatePayload", msg.Payload) + } + if payload.Status == "opening" { + sawOpening = true + } + if payload.Status == "ready" { + if payload.LinkID != "mcu0" { + t.Fatalf("link_id = %q, want mcu0", payload.LinkID) + } + if !payload.Ready || !payload.Established { + t.Fatalf("expected ready/established link state, got %+v", payload) + } + if payload.PeerID != "cm5-local" { + t.Fatalf("peer_id = %q, want cm5-local", payload.PeerID) + } + if payload.LocalSID != ack.SID { + t.Fatalf("local_sid = %q, want %q", payload.LocalSID, ack.SID) + } + if payload.PeerSID != "s1" { + t.Fatalf("peer_sid = %q, want s1", payload.PeerSID) + } + if !sawOpening { + t.Fatal("did not observe opening link state before ready") + } + return + } + case <-deadline: + t.Fatal("timed out waiting for ready link state") + } + } +} + +// ---- remap ---- + +func topicString(t bus.Topic) string { + if t == nil { + return "" + } + var parts []string + for i := 0; i < t.Len(); i++ { + parts = append(parts, t.At(i).(string)) + } + return strings.Join(parts, "/") +} + +func TestImportPublishTopic(t *testing.T) { + for _, tc := range []struct { + wire []string + want string + }{ + {[]string{"config", "device"}, "config/hal"}, + {[]string{"config", "other"}, ""}, + {[]string{"unknown", "x"}, ""}, + {nil, ""}, + } { + got := importPublishTopic(tc.wire) + if gotStr := topicString(got); gotStr != tc.want { + t.Errorf("importPublishTopic(%v) = %q, want %q", tc.wire, gotStr, tc.want) + } + } +} + +func TestImportCallTopic(t *testing.T) { + for _, tc := range []struct { + wire []string + want string + }{ + // rpc/hal/dump is handled directly by onCall, not via import rules. + {[]string{"rpc", "hal", "other"}, ""}, + {[]string{"config", "device"}, ""}, + {nil, ""}, + } { + got := importCallTopic(tc.wire) + if gotStr := topicString(got); gotStr != tc.want { + t.Errorf("importCallTopic(%v) = %q, want %q", tc.wire, gotStr, tc.want) + } + } +} + +func TestExportTopic(t *testing.T) { + for _, tc := range []struct { + bus bus.Topic + want []string + }{ + {bus.T("hal", "cap", "env", "temperature", "core", "value"), []string{"state", "env", "temperature", "core", "value"}}, + {bus.T("hal", "cap", "power", "battery", "internal", "value"), []string{"state", "power", "battery", "internal", "value"}}, + {bus.T("hal", "state"), []string{"state", "hal"}}, + {bus.T("hal", "cap", "gpio", "fan", "value"), nil}, + {bus.T("other", "topic"), nil}, + } { + got := exportTopic(tc.bus) + if tc.want == nil { + if got != nil { + t.Errorf("exportTopic(%v) = %v, want nil", tc.bus, got) + } + } else { + if !slicesEqual(got, tc.want) { + t.Errorf("exportTopic(%v) = %v, want %v", tc.bus, got, tc.want) + } + } + } +} + +func TestExportCallTopic(t *testing.T) { + for _, tc := range []struct { + bus bus.Topic + want []string + }{ + {bus.T("fabric", "out", "rpc", "hal", "dump"), []string{"rpc", "hal", "dump"}}, + {bus.T("fabric", "out", "rpc", "hal"), nil}, + {bus.T("other", "topic"), nil}, + } { + got := exportCallTopic(tc.bus) + if tc.want == nil { + if got != nil { + t.Errorf("exportCallTopic(%v) = %v, want nil", tc.bus, got) + } + } else if !slicesEqual(got, tc.want) { + t.Errorf("exportCallTopic(%v) = %v, want %v", tc.bus, got, tc.want) + } + } +} + +func TestExportCallPatterns(t *testing.T) { + patterns := exportCallPatterns() + if len(patterns) != 1 { + t.Fatalf("len(exportCallPatterns()) = %d, want 1", len(patterns)) + } + if got := topicString(patterns[0]); got != "fabric/out/rpc/hal/dump" { + t.Fatalf("exportCallPatterns()[0] = %q, want fabric/out/rpc/hal/dump", got) + } +} + +func slicesEqual(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +// ---- pub import ---- + +func TestPubImport(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + conn := b.NewConnection("fabric") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, conn, "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + + reader := b.NewConnection("test") + sub := reader.Subscribe(bus.T("config", "hal")) + + sendMsg(t, cm5, protoPub{ + Type: "pub", + Topic: []string{"config", "device"}, + Payload: json.RawMessage(`{"devices":[],"pollers":[]}`), + Retain: true, + }) + + select { + case m := <-sub.Channel(): + if m == nil { + t.Fatal("nil message") + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for imported config on config/hal") + } +} + +// ---- pub export ---- + +func TestPubExport(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + fabricConn := b.NewConnection("fabric") + publishConn := b.NewConnection("hal") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, fabricConn, "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + unlockExports(t, cm5) + + publishConn.Publish(publishConn.NewMessage( + bus.T("hal", "cap", "env", "temperature", "core", "value"), + map[string]int{"deci_c": 412}, + true, + )) + + msg := readMsg[protoPub](t, cm5) + if msg.Type != "pub" { + t.Fatalf("expected pub, got %q", msg.Type) + } + want := []string{"state", "env", "temperature", "core", "value"} + if !slicesEqual(msg.Topic, want) { + t.Errorf("topic = %v, want %v", msg.Topic, want) + } + if !msg.Retain { + t.Error("expected retain=true") + } +} + +func TestUnretainExport(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + fabricConn := b.NewConnection("fabric") + publishConn := b.NewConnection("hal") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, fabricConn, "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + unlockExports(t, cm5) + + // Publish retained value first. + publishConn.Publish(publishConn.NewMessage( + bus.T("hal", "cap", "env", "temperature", "core", "value"), + map[string]int{"deci_c": 412}, + true, + )) + pub := readMsg[protoPub](t, cm5) + if pub.Type != "pub" || !pub.Retain { + t.Fatalf("expected retained pub, got t=%q retain=%v", pub.Type, pub.Retain) + } + + // Clear retained state (retain=true, payload=nil). + publishConn.Publish(publishConn.NewMessage( + bus.T("hal", "cap", "env", "temperature", "core", "value"), + nil, + true, + )) + unr := readMsg[protoUnretain](t, cm5) + if unr.Type != "unretain" { + t.Fatalf("expected unretain, got %q", unr.Type) + } + want := []string{"state", "env", "temperature", "core", "value"} + if !slicesEqual(unr.Topic, want) { + t.Errorf("topic = %v, want %v", unr.Topic, want) + } +} + +func TestDrainExportsReturnsWhenSubscriptionClosed(t *testing.T) { + b := newBus() + conn := b.NewConnection("fabric") + sub := conn.Subscribe(bus.T("state", "#")) + conn.Unsubscribe(sub) + + s := session{ + link: linkUp, + exportSubs: []*bus.Subscription{sub}, + } + + done := make(chan struct{}) + go func() { + s.drainExports() + close(done) + }() + + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("drainExports did not return") + } +} + +func TestDrainExportsWaitsForStartupHoldoff(t *testing.T) { + b := newBus() + conn := b.NewConnection("fabric") + pub := b.NewConnection("hal") + sub := conn.Subscribe(bus.T("hal", "cap", "env", "#")) + defer conn.Unsubscribe(sub) + + msg := pub.NewMessage( + bus.T("hal", "cap", "env", "temperature", "core", "value"), + map[string]int{"deci_c": 412}, + true, + ) + + s := session{ + link: linkUp, + exportsEnabled: true, + exportSubs: []*bus.Subscription{sub}, + exportReadyAt: time.Now().Add(time.Second), + } + + pub.Publish(msg) + + done := make(chan struct{}) + go func() { + s.drainExports() + close(done) + }() + + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("drainExports did not return") + } +} + +// ---- unretain ---- + +func TestPubIgnoredBeforeHandshake(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + + sendMsg(t, cm5, protoPub{ + Type: "pub", Topic: []string{"config", "device"}, + Payload: json.RawMessage(`{"v":1}`), Retain: true, + }) + time.Sleep(50 * time.Millisecond) + + reader := b.NewConnection("test") + sub := reader.Subscribe(bus.T("config", "device")) + defer reader.Unsubscribe(sub) + select { + case m := <-sub.Channel(): + t.Fatalf("unexpected pre-handshake publish: %+v", m) + case <-time.After(100 * time.Millisecond): + } +} + +func TestUnretainIgnoredBeforeHandshake(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + + writer := b.NewConnection("writer") + writer.Publish(writer.NewMessage(bus.T("config", "device"), json.RawMessage(`{"v":1}`), true)) + + reader := b.NewConnection("test") + sub := reader.Subscribe(bus.T("config", "device")) + defer reader.Unsubscribe(sub) + select { + case m := <-sub.Channel(): + if m == nil || m.Payload == nil { + t.Fatalf("expected retained config/device, got %+v", m) + } + case <-time.After(2 * time.Second): + t.Fatal("timed out waiting for retained config/device") + } + + sendMsg(t, cm5, protoUnretain{Type: "unretain", Topic: []string{"config", "device"}}) + select { + case m := <-sub.Channel(): + t.Fatalf("unexpected pre-handshake unretain effect: %+v", m) + case <-time.After(100 * time.Millisecond): + } +} + +func TestUnretain(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + conn := b.NewConnection("fabric") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, conn, "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + + sendMsg(t, cm5, protoPub{ + Type: "pub", Topic: []string{"config", "device"}, + Payload: json.RawMessage(`{"v":1}`), Retain: true, + }) + time.Sleep(50 * time.Millisecond) + sendMsg(t, cm5, protoUnretain{Type: "unretain", Topic: []string{"config", "device"}}) + time.Sleep(50 * time.Millisecond) + + reader := b.NewConnection("test") + sub := reader.Subscribe(bus.T("config", "device")) + select { + case m := <-sub.Channel(): + if m != nil && m.Payload != nil { + t.Errorf("expected no retained message, got %+v", m) + } + case <-time.After(100 * time.Millisecond): + } +} + +// ---- call import ---- + +func TestCallIgnoredBeforeHandshake(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + fabricConn := b.NewConnection("fabric") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, fabricConn, "mcu-1", "cm5-local", DefaultLinkConfig()) + + handler := b.NewConnection("handler") + sub := handler.Subscribe(bus.T("rpc", "hal", "dump")) + defer handler.Unsubscribe(sub) + + sendMsg(t, cm5, protoCall{ + Type: "call", ID: "pre-hello-1", Topic: []string{"rpc", "hal", "dump"}, + Payload: json.RawMessage(`{}`), TimeoutMs: 5000, + }) + + select { + case m := <-sub.Channel(): + t.Fatalf("unexpected pre-handshake call dispatch: %+v", m) + case <-time.After(100 * time.Millisecond): + } +} + +func TestCallImport(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + fabricConn := b.NewConnection("fabric") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, fabricConn, "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + + handler := b.NewConnection("handler") + sub := handler.Subscribe(bus.T("rpc", "hal", "dump")) + go func() { + for m := range sub.Channel() { + handler.Reply(m, map[string]string{"result": "ok"}, false) + } + }() + + sendMsg(t, cm5, protoCall{ + Type: "call", ID: "test-corr-1", Topic: []string{"rpc", "hal", "dump"}, + Payload: json.RawMessage(`{}`), TimeoutMs: 5000, + }) + + reply := readMsg[protoReply](t, cm5) + if reply.Corr != "test-corr-1" { + t.Errorf("corr = %q", reply.Corr) + } + if !reply.OK { + t.Errorf("reply not ok: %s", reply.Err) + } +} + +func TestCallNoRoute(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, b.NewConnection("fabric"), "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + + sendMsg(t, cm5, protoCall{ + Type: "call", ID: "no-route-1", Topic: []string{"unknown", "endpoint"}, + Payload: json.RawMessage(`{}`), TimeoutMs: 1000, + }) + + reply := readMsg[protoReply](t, cm5) + if reply.Corr != "no-route-1" { + t.Errorf("corr = %q", reply.Corr) + } + if reply.OK { + t.Error("expected ok=false") + } + if reply.Err != "no_route" { + t.Errorf("err = %q, want no_route", reply.Err) + } +} + +func TestDumpCallReturnsConfigState(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + fabricConn := b.NewConnection("fabric") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, fabricConn, "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + + // Send config first so the session has state. + sendMsg(t, cm5, protoPub{ + Type: "pub", + Topic: []string{"config", "device"}, + Payload: json.RawMessage(`{"devices":[],"pollers":[]}`), + Retain: true, + }) + time.Sleep(100 * time.Millisecond) + + // Call dump. + sendMsg(t, cm5, protoCall{ + Type: "call", ID: "dump-1", Topic: []string{"rpc", "hal", "dump"}, + Payload: json.RawMessage(`{"ask":"status"}`), TimeoutMs: 5000, + }) + + reply := readMsg[protoReply](t, cm5) + if reply.Corr != "dump-1" { + t.Errorf("corr = %q", reply.Corr) + } + if !reply.OK { + t.Errorf("expected ok=true, got err=%q", reply.Err) + } + var dump dumpReply + if err := json.Unmarshal(reply.Value, &dump); err != nil { + t.Fatalf("unmarshal dump reply: %v", err) + } + if !dump.Applied { + t.Error("expected applied=true") + } + if dump.ConfigCount != 1 { + t.Errorf("config_count = %d, want 1", dump.ConfigCount) + } +} + +func TestDumpCallDoesNotBlockPing(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + fabricConn := b.NewConnection("fabric") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, fabricConn, "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + + // Send dump call and ping back-to-back. + sendMsg(t, cm5, protoCall{ + Type: "call", ID: "dump-1", Topic: []string{"rpc", "hal", "dump"}, + Payload: json.RawMessage(`{}`), TimeoutMs: 1000, + }) + sendMsg(t, cm5, protoPing{Type: "ping", TS: 77, SID: testCM5SID}) + + type readResult struct { + line []byte + err error + } + type wireHeader struct { + Type string `json:"type"` + } + var gotReply, gotPong bool + for i := 0; i < 2; i++ { + msg := readMsg[wireHeader](t, cm5) + switch msg.Type { + case msgReply: + gotReply = true + case msgPong: + gotPong = true + default: + t.Fatalf("unexpected message type %q", msg.Type) + } + } + if !gotReply { + t.Error("missing dump reply") + } + if !gotPong { + t.Error("missing pong") + } +} + +func TestCallExport(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + fabricConn := b.NewConnection("fabric") + reqConn := b.NewConnection("caller") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, fabricConn, "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + unlockExports(t, cm5) + + type result struct { + msg *bus.Message + err error + } + done := make(chan result, 1) + go func() { + msg, err := reqConn.RequestWait(context.Background(), reqConn.NewMessage( + bus.T("fabric", "out", "rpc", "hal", "dump"), + map[string]string{"ask": "status"}, + false, + )) + done <- result{msg: msg, err: err} + }() + + call := readMsg[protoCall](t, cm5) + if call.Type != "call" { + t.Fatalf("expected call, got %q", call.Type) + } + want := []string{"rpc", "hal", "dump"} + if !slicesEqual(call.Topic, want) { + t.Fatalf("topic = %v, want %v", call.Topic, want) + } + var payload map[string]string + if err := json.Unmarshal(call.Payload, &payload); err != nil { + t.Fatalf("Unmarshal payload: %v", err) + } + if payload["ask"] != "status" { + t.Fatalf("payload.ask = %q, want status", payload["ask"]) + } + + sendMsg(t, cm5, protoReply{ + Type: "reply", + Corr: call.ID, + OK: true, + Value: json.RawMessage(`{"ok":true,"remote":"cm5"}`), + }) + + select { + case res := <-done: + if res.err != nil { + t.Fatalf("RequestWait: %v", res.err) + } + if res.msg == nil { + t.Fatal("nil bus reply") + } + reply, ok := res.msg.Payload.(map[string]any) + if !ok { + t.Fatalf("payload type = %T, want map[string]any", res.msg.Payload) + } + if reply["remote"] != "cm5" { + t.Fatalf("reply.remote = %#v", reply["remote"]) + } + if reply["ok"] != true { + t.Fatalf("reply.ok = %#v", reply["ok"]) + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for local reply") + } +} + +func TestCallExportOnlyConfiguredRule(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + fabricConn := b.NewConnection("fabric") + reqConn := b.NewConnection("caller") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, fabricConn, "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + unlockExports(t, cm5) + + // Use an unconfigured topic — only fabric/out/rpc/hal/dump is routed. + reqCtx, reqCancel := context.WithTimeout(context.Background(), 250*time.Millisecond) + defer reqCancel() + go func() { + _, _ = reqConn.RequestWait(reqCtx, reqConn.NewMessage( + bus.T("fabric", "out", "rpc", "hal", "not_configured"), + map[string]string{"ask": "status"}, + false, + )) + }() + + gotLine := make(chan struct{}) + go func() { + _, _ = cm5.ReadLine() + close(gotLine) + }() + + select { + case <-gotLine: + t.Fatal("got wire call for unconfigured export rule") + case <-time.After(200 * time.Millisecond): + } +} + +func TestPendingWireCallsTimeout(t *testing.T) { + b := newBus() + fabricConn := b.NewConnection("fabric") + reqConn := b.NewConnection("caller") + msg := reqConn.NewMessage( + bus.T("fabric", "out", "rpc", "hal", "dump"), + map[string]string{"ask": "status"}, + false, + ) + sub := reqConn.Request(msg) + defer reqConn.Unsubscribe(sub) + + s := session{ + conn: fabricConn, + outboundCalls: []*outboundCall{ + {id: "wire-1", req: msg, deadline: time.Now().Add(-time.Millisecond)}, + }, + } + + s.drainOutbound(time.Now()) + + select { + case reply := <-sub.Channel(): + if reply == nil { + t.Fatal("nil timeout reply") + } + out, ok := reply.Payload.(types.ErrorReply) + if !ok { + t.Fatalf("payload type = %T, want types.ErrorReply", reply.Payload) + } + if out.OK { + t.Fatal("expected ok=false") + } + if out.Error != "timeout" { + t.Fatalf("error = %q, want timeout", out.Error) + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for timeout reply") + } +} + +func TestDrainExportsDropsUnmarshalablePayload(t *testing.T) { + b := newBus() + fabricConn := b.NewConnection("fabric") + pubConn := b.NewConnection("publisher") + tr := &captureTransport{} + s := session{ + conn: fabricConn, + tr: tr, + link: linkUp, + } + + s.setupExports() + defer s.teardownExports() + + pubConn.Publish(pubConn.NewMessage(bus.T("hal", "state"), make(chan int), false)) + s.drainExports() + + if len(tr.writes) != 0 { + t.Fatalf("writes = %d, want 0", len(tr.writes)) + } +} + +func TestDrainPendingCallsReportsMarshalFailure(t *testing.T) { + b := newBus() + fabricConn := b.NewConnection("fabric") + handlerConn := b.NewConnection("handler") + tr := &captureTransport{} + + sub := handlerConn.Subscribe(bus.T("rpc", "hal", "dump")) + defer handlerConn.Unsubscribe(sub) + req := fabricConn.NewMessage(bus.T("rpc", "hal", "dump"), map[string]string{"ask": "status"}, false) + replySub := fabricConn.Request(req) + + var msg *bus.Message + select { + case msg = <-sub.Channel(): + if msg == nil { + t.Fatal("nil request message") + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for request message") + } + handlerConn.Reply(msg, make(chan int), false) + + s := session{ + conn: fabricConn, + tr: tr, + inboundCalls: []*inboundCall{{ + id: "call-1", + sub: replySub, + deadline: time.Now().Add(time.Second), + }}, + } + + s.drainInbound(time.Now()) + + if len(tr.writes) != 1 { + t.Fatalf("writes = %d, want 1", len(tr.writes)) + } + var reply protoReply + if err := json.Unmarshal(tr.writes[0], &reply); err != nil { + t.Fatalf("Unmarshal reply: %v", err) + } + if reply.Corr != "call-1" { + t.Fatalf("corr = %q, want call-1", reply.Corr) + } + if reply.OK { + t.Fatal("expected ok=false") + } + if reply.Err != errPayloadMarshal { + t.Fatalf("err = %q, want %q", reply.Err, errPayloadMarshal) + } +} + +func TestDrainOutgoingWireCallsReportsMarshalFailure(t *testing.T) { + b := newBus() + fabricConn := b.NewConnection("fabric") + reqConn := b.NewConnection("caller") + tr := &captureTransport{} + s := session{ + conn: fabricConn, + tr: tr, + link: linkUp, + } + + s.setupExports() + defer s.teardownExports() + + msg := reqConn.NewMessage( + bus.T("fabric", "out", "rpc", "hal", "dump"), + make(chan int), + false, + ) + replySub := reqConn.Request(msg) + defer reqConn.Unsubscribe(replySub) + + s.drainOutbound(time.Now()) + + if len(tr.writes) != 0 { + t.Fatalf("writes = %d, want 0", len(tr.writes)) + } + if len(s.outboundCalls) != 0 { + t.Fatalf("outboundCalls = %d, want 0", len(s.outboundCalls)) + } + + select { + case reply := <-replySub.Channel(): + if reply == nil { + t.Fatal("nil reply") + } + out, ok := reply.Payload.(types.ErrorReply) + if !ok { + t.Fatalf("payload type = %T, want types.ErrorReply", reply.Payload) + } + if out.OK { + t.Fatal("expected ok=false") + } + if out.Error != errPayloadMarshal { + t.Fatalf("error = %q, want %q", out.Error, errPayloadMarshal) + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for marshal failure reply") + } +} + +func TestDrainOutgoingWireCallsReportsWriteFailure(t *testing.T) { + b := newBus() + fabricConn := b.NewConnection("fabric") + reqConn := b.NewConnection("caller") + tr := &captureTransport{writeErr: errors.New("boom")} + s := session{ + conn: fabricConn, + tr: tr, + link: linkUp, + } + + s.setupExports() + defer s.teardownExports() + + msg := reqConn.NewMessage( + bus.T("fabric", "out", "rpc", "hal", "dump"), + map[string]string{"ask": "status"}, + false, + ) + replySub := reqConn.Request(msg) + defer reqConn.Unsubscribe(replySub) + + s.drainOutbound(time.Now()) + + if s.link != linkDown { + t.Fatalf("link = %v, want %v", s.link, linkDown) + } + if len(s.outboundCalls) != 0 { + t.Fatalf("outboundCalls = %d, want 0", len(s.outboundCalls)) + } + + select { + case reply := <-replySub.Channel(): + if reply == nil { + t.Fatal("nil reply") + } + out, ok := reply.Payload.(types.ErrorReply) + if !ok { + t.Fatalf("payload type = %T, want types.ErrorReply", reply.Payload) + } + if out.OK { + t.Fatal("expected ok=false") + } + if out.Error != "transport_write_failed" { + t.Fatalf("error = %q, want transport_write_failed", out.Error) + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for write failure reply") + } +} + +func TestCallExportPeerReset(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + fabricConn := b.NewConnection("fabric") + reqConn := b.NewConnection("caller") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, fabricConn, "mcu-1", "cm5-local", DefaultLinkConfig()) + bringUp(t, cm5) + unlockExports(t, cm5) + + type result struct { + msg *bus.Message + err error + } + done := make(chan result, 1) + go func() { + msg, err := reqConn.RequestWait(context.Background(), reqConn.NewMessage( + bus.T("fabric", "out", "rpc", "hal", "dump"), + map[string]string{"ask": "status"}, + false, + )) + done <- result{msg: msg, err: err} + }() + + call := readMsg[protoCall](t, cm5) + if call.Type != "call" { + t.Fatalf("expected call, got %q", call.Type) + } + + sendMsg(t, cm5, protoHello{ + Type: "hello", Node: "cm5-local", Peer: "mcu-1", SID: "fresh-session", Proto: protoVersion, + }) + _ = readMsg[protoHelloAck](t, cm5) + + select { + case res := <-done: + if res.err != nil { + t.Fatalf("RequestWait: %v", res.err) + } + if res.msg == nil { + t.Fatal("nil bus reply") + } + out, ok := res.msg.Payload.(types.ErrorReply) + if !ok { + t.Fatalf("payload type = %T, want types.ErrorReply", res.msg.Payload) + } + if out.OK { + t.Fatal("expected ok=false") + } + if out.Error != "session_reset" { + t.Fatalf("error = %q, want session_reset", out.Error) + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for peer-reset reply") + } +} + +func TestEchoedHelloAckIgnoredDuringOutgoingCall(t *testing.T) { + mcu, cm5 := pipePair() + b := newBus() + fabricConn := b.NewConnection("fabric") + reqConn := b.NewConnection("caller") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go Run(ctx, mcu, fabricConn, "mcu-1", "cm5-local", DefaultLinkConfig()) + ack := bringUp(t, cm5) + unlockExports(t, cm5) + + type result struct { + msg *bus.Message + err error + } + done := make(chan result, 1) + go func() { + msg, err := reqConn.RequestWait(context.Background(), reqConn.NewMessage( + bus.T("fabric", "out", "rpc", "hal", "dump"), + map[string]string{"ask": "status"}, + false, + )) + done <- result{msg: msg, err: err} + }() + + call := readMsg[protoCall](t, cm5) + if call.Type != "call" { + t.Fatalf("expected call, got %q", call.Type) + } + + // Send an echoed hello_ack (our own SID) — should be ignored. + sendMsg(t, cm5, protoHelloAck{ + Type: "hello_ack", Node: "mcu-1", SID: ack.SID, Proto: protoVersion, OK: true, + }) + + sendMsg(t, cm5, protoReply{ + Type: "reply", + Corr: call.ID, + OK: true, + Value: json.RawMessage(`{"ok":true,"remote":"cm5"}`), + }) + + select { + case res := <-done: + if res.err != nil { + t.Fatalf("RequestWait: %v", res.err) + } + if res.msg == nil { + t.Fatal("nil bus reply") + } + reply, ok := res.msg.Payload.(map[string]any) + if !ok { + t.Fatalf("payload type = %T, want map[string]any", res.msg.Payload) + } + if reply["remote"] != "cm5" || reply["ok"] != true { + t.Fatalf("unexpected reply payload: %#v", reply) + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for local reply after echoed hello_ack") + } +} diff --git a/services/fabric/protocol.go b/services/fabric/protocol.go new file mode 100644 index 0000000..023b0ec --- /dev/null +++ b/services/fabric/protocol.go @@ -0,0 +1,343 @@ +package fabric + +import "encoding/json" + +// ---- Wire message type identifiers ---- +// +// Wire schema mirrors devicecode-lua/src/services/fabric/protocol.lua at +// update-migration tip (commit 2c88090). The frame discriminator field is +// "type" (not "t"). Reply frames carry {id, ok, value, err}. Transfer frames +// use xfer_id/offset/checksum/data with a minimal xfer_chunk shape and +// xxHash32 hex wire integrity (no algorithm field; Lua source treats checksum +// as opaque hex). + +const ( + msgHello = "hello" + msgHelloAck = "hello_ack" + msgPing = "ping" + msgPong = "pong" + msgPub = "pub" + msgUnretain = "unretain" + msgCall = "call" + msgReply = "reply" + msgXferBegin = "xfer_begin" + msgXferReady = "xfer_ready" + msgXferChunk = "xfer_chunk" + msgXferNeed = "xfer_need" + msgXferCommit = "xfer_commit" + msgXferDone = "xfer_done" + msgXferAbort = "xfer_abort" +) + +// ---- Wire message structs ---- + +// protoCaps is carried in hello for forward compatibility. The Lua side +// sends caps but neither side enforces them in v1. +type protoCaps struct { + Pub bool `json:"pub,omitempty"` + Call bool `json:"call,omitempty"` +} + +type protoHello struct { + Type string `json:"type"` + Node string `json:"node"` + Peer string `json:"peer"` + SID string `json:"sid"` + Proto int `json:"proto,omitempty"` + Caps *protoCaps `json:"caps,omitempty"` +} + +type protoHelloAck struct { + Type string `json:"type"` + Node string `json:"node"` + SID string `json:"sid,omitempty"` + Proto int `json:"proto,omitempty"` + OK bool `json:"ok"` +} + +type protoPing struct { + Type string `json:"type"` + TS int64 `json:"ts"` + SID string `json:"sid,omitempty"` +} + +type protoPong struct { + Type string `json:"type"` + TS int64 `json:"ts"` + SID string `json:"sid,omitempty"` +} + +type protoPub struct { + Type string `json:"type"` + Topic []string `json:"topic"` + Payload json.RawMessage `json:"payload"` + Retain bool `json:"retain"` +} + +type protoUnretain struct { + Type string `json:"type"` + Topic []string `json:"topic"` +} + +type protoCall struct { + Type string `json:"type"` + ID string `json:"id"` + Topic []string `json:"topic"` + Payload json.RawMessage `json:"payload"` + TimeoutMs int `json:"timeout_ms"` +} + +// protoReply mirrors Lua's reply frame: {type, id, ok, value, err}. The Go +// field for the correlation id keeps the name "Corr" for readability — the +// wire spelling is "id" because the reply correlates to a prior call.id. +type protoReply struct { + Type string `json:"type"` + Corr string `json:"id"` + OK bool `json:"ok"` + Value json.RawMessage `json:"value,omitempty"` + Err string `json:"err,omitempty"` +} + +// protoXferBegin (control lane) — required fields per protocol.lua +// validate_control: xfer_id, size, checksum (xxHash32 hex). meta is +// optional but source-used: transfer_mgr.lua sends it on xfer_begin and +// later does conn:call(meta.receiver, …) before xfer_done. Preserve the +// blob opaquely so fabric-update's receiver can pull meta.receiver out. +type protoXferBegin struct { + Type string `json:"type"` + XferID string `json:"xfer_id"` + Size uint32 `json:"size"` + Checksum string `json:"checksum"` + Meta json.RawMessage `json:"meta,omitempty"` +} + +// protoXferReady (control) carries only xfer_id; success/failure is implicit +// (failure is signalled via xfer_abort). +type protoXferReady struct { + Type string `json:"type"` + XferID string `json:"xfer_id"` +} + +// protoXferChunk (bulk) — minimal {xfer_id, offset, data}. No chunk-level +// checksum, no sequence number; ack is by byte offset via xfer_need.next. +type protoXferChunk struct { + Type string `json:"type"` + XferID string `json:"xfer_id"` + Offset uint32 `json:"offset"` + Data string `json:"data"` +} + +// protoXferNeed (control) acks the receiver's expected next byte offset. +type protoXferNeed struct { + Type string `json:"type"` + XferID string `json:"xfer_id"` + Next uint32 `json:"next"` +} + +// protoXferCommit (control) carries the same wire-integrity shape as +// xfer_begin: xfer_id, size, checksum (xxHash32 hex over the payload bytes). +type protoXferCommit struct { + Type string `json:"type"` + XferID string `json:"xfer_id"` + Size uint32 `json:"size"` + Checksum string `json:"checksum"` +} + +// protoXferDone (control) carries only xfer_id; failure is signalled via +// xfer_abort. +type protoXferDone struct { + Type string `json:"type"` + XferID string `json:"xfer_id"` +} + +// protoXferAbort (control) carries xfer_id plus an optional err string. +type protoXferAbort struct { + Type string `json:"type"` + XferID string `json:"xfer_id"` + Err string `json:"err,omitempty"` +} + +// ---- codec helpers ---- + +// marshal returns compact JSON with a trailing newline. +// Panics on encode failure (should be unreachable for wire structs). +func marshal(v any) []byte { + b, err := json.Marshal(v) + if err != nil { + panic("fabric: marshal: " + err.Error()) + } + return append(b, '\n') +} + +// protoType extracts the wire-discriminator "type" field from a JSON +// envelope via a depth-aware scan. We avoid json.Unmarshal here because +// TinyGo's reflect path was observed silently leaving the field empty +// for tagged anonymous-struct targets when the envelope had preceding +// sibling keys. +// +// Returns the value of the FIRST top-level (object-depth 1) "type" key, +// ignoring any nested "type" keys inside payload/meta sub-objects — +// e.g. for `{"payload":{"type":"x"},"type":"pub"}` the result is "pub". +// Returns "" if the line isn't a JSON object, the top-level "type" key +// is missing, or its value isn't a string. +func protoType(line []byte) string { + n := len(line) + i := skipJSONSpace(line, 0) + if i >= n || line[i] != '{' { + return "" + } + i++ + for { + i = skipJSONSpace(line, i) + if i >= n { + return "" + } + switch line[i] { + case '}': + return "" + case ',': + i++ + continue + } + if line[i] != '"' { + return "" + } + keyStart := i + 1 + keyEnd, ok := scanJSONString(line, i) + if !ok { + return "" + } + i = keyEnd + i = skipJSONSpace(line, i) + if i >= n || line[i] != ':' { + return "" + } + i++ + i = skipJSONSpace(line, i) + if i >= n { + return "" + } + isType := keyEnd-1-keyStart == 4 && + line[keyStart] == 't' && line[keyStart+1] == 'y' && + line[keyStart+2] == 'p' && line[keyStart+3] == 'e' + if isType { + if line[i] != '"' { + return "" + } + valStart := i + 1 + valEnd, ok := scanJSONString(line, i) + if !ok { + return "" + } + return string(line[valStart : valEnd-1]) + } + i, ok = skipJSONValue(line, i) + if !ok { + return "" + } + } +} + +func skipJSONSpace(line []byte, i int) int { + for i < len(line) { + switch line[i] { + case ' ', '\t', '\n', '\r': + i++ + default: + return i + } + } + return i +} + +// scanJSONString walks an opening-`"` at line[i] to its closing `"`, +// honouring backslash escapes. Returns the index immediately after the +// closing quote, or false on a malformed string. +func scanJSONString(line []byte, i int) (int, bool) { + n := len(line) + if i >= n || line[i] != '"' { + return 0, false + } + i++ + for i < n { + switch line[i] { + case '\\': + if i+1 >= n { + return 0, false + } + i += 2 + case '"': + return i + 1, true + default: + i++ + } + } + return 0, false +} + +// skipJSONValue advances past a value starting at line[i], whatever +// its kind (string, number, bool, null, object, array). Returns the +// index past the value, or false on parse error. +func skipJSONValue(line []byte, i int) (int, bool) { + n := len(line) + if i >= n { + return 0, false + } + switch line[i] { + case '"': + return scanJSONString(line, i) + case '{', '[': + return skipJSONContainer(line, i) + } + // number / true / false / null — walk to the next structural byte. + for i < n { + switch line[i] { + case ',', '}', ']', ' ', '\t', '\n', '\r': + return i, true + } + i++ + } + return i, true +} + +// skipJSONContainer walks past a balanced { … } or [ … ] block starting +// at line[i], tracking string state so quoted braces don't disturb the +// depth count. Returns the index past the closing brace, or false. +func skipJSONContainer(line []byte, i int) (int, bool) { + n := len(line) + if i >= n { + return 0, false + } + depth := 0 + inString := false + for i < n { + c := line[i] + if inString { + if c == '\\' { + if i+1 >= n { + return 0, false + } + i += 2 + continue + } + if c == '"' { + inString = false + } + i++ + continue + } + switch c { + case '"': + inString = true + case '{', '[': + depth++ + case '}', ']': + depth-- + if depth == 0 { + return i + 1, true + } + } + i++ + } + return 0, false +} diff --git a/services/fabric/remap.go b/services/fabric/remap.go new file mode 100644 index 0000000..175c44d --- /dev/null +++ b/services/fabric/remap.go @@ -0,0 +1,183 @@ +package fabric + +import "devicecode-go/bus" + +// Topic remapping rules matching the shipped Lua fabric link contract. +// +// These rules are hardcoded and exact-match for v1. The Lua (CM5) side +// uses config-driven wildcard rules, but the MCU only needs a fixed set +// of routes. If new routes are required, add them here and on the Lua +// config side. +// +// CM5 -> MCU wire publish: +// ["config","device"] -> config/hal (with Lua empty-table normalization) +// +// CM5 -> MCU wire call: +// ["rpc","hal","dump"] -> handled directly by session (not via import rules) +// +// MCU local bus publish -> wire: +// hal/cap/env/# -> ["state","env",...] +// hal/cap/power/# -> ["state","power",...] +// hal/state -> ["state","hal"] + +type importRule struct { + wire []string + local []string +} + +type busExportRule struct { + localPrefix []string + remotePrefix []string + suffix bool +} + +var importPublishRules = []importRule{ + { + wire: []string{"config", "device"}, + local: []string{"config", "hal"}, + }, +} + +// rpc/hal/dump is handled directly by onCall, not via import rules. +var importCallRules = []importRule{} + +var exportPublishRules = []busExportRule{ + { + localPrefix: []string{"hal", "cap", "env"}, + remotePrefix: []string{"state", "env"}, + suffix: true, + }, + { + localPrefix: []string{"hal", "cap", "power"}, + remotePrefix: []string{"state", "power"}, + suffix: true, + }, + { + localPrefix: []string{"hal", "state"}, + remotePrefix: []string{"state", "hal"}, + }, +} + +var exportCallRules = []busExportRule{ + { + localPrefix: []string{"fabric", "out", "rpc", "hal", "dump"}, + remotePrefix: []string{"rpc", "hal", "dump"}, + }, +} + +func importPublishTopic(wire []string) bus.Topic { + return importMatch(wire, importPublishRules) +} + +func importCallTopic(wire []string) bus.Topic { + return importMatch(wire, importCallRules) +} + +func exportTopic(t bus.Topic) []string { + return busExport(t, exportPublishRules) +} + +func exportPatterns() []bus.Topic { + return exportPatternsFor(exportPublishRules) +} + +func exportCallTopic(t bus.Topic) []string { + return busExport(t, exportCallRules) +} + +func exportCallPatterns() []bus.Topic { + return exportPatternsFor(exportCallRules) +} + +func importMatch(wire []string, rules []importRule) bus.Topic { + for _, rule := range rules { + if slicesEqualStrings(wire, rule.wire) { + return stringsToTopic(rule.local) + } + } + return nil +} + +func busExport(t bus.Topic, rules []busExportRule) []string { + for _, rule := range rules { + out, ok := applyBusExportRule(t, rule) + if ok { + return out + } + } + return nil +} + +func applyBusExportRule(t bus.Topic, rule busExportRule) ([]string, bool) { + if t.Len() < len(rule.localPrefix) { + return nil, false + } + for i, want := range rule.localPrefix { + if str(t, i) != want { + return nil, false + } + } + if !rule.suffix && t.Len() != len(rule.localPrefix) { + return nil, false + } + + out := make([]string, 0, len(rule.remotePrefix)+maxInt(0, t.Len()-len(rule.localPrefix))) + out = append(out, rule.remotePrefix...) + if rule.suffix { + for i := len(rule.localPrefix); i < t.Len(); i++ { + s := str(t, i) + if s == "" { + return nil, false + } + out = append(out, s) + } + } + return out, true +} + +func exportPatternsFor(rules []busExportRule) []bus.Topic { + out := make([]bus.Topic, 0, len(rules)) + for _, rule := range rules { + tokens := make([]bus.Token, 0, len(rule.localPrefix)+1) + for _, s := range rule.localPrefix { + tokens = append(tokens, s) + } + if rule.suffix { + tokens = append(tokens, "#") + } + out = append(out, bus.T(tokens...)) + } + return out +} + +func stringsToTopic(parts []string) bus.Topic { + tokens := make([]bus.Token, len(parts)) + for i, s := range parts { + tokens[i] = s + } + return bus.T(tokens...) +} + +func slicesEqualStrings(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +func maxInt(a, b int) int { + if a > b { + return a + } + return b +} + +func str(t bus.Topic, i int) string { + s, _ := t.At(i).(string) + return s +} diff --git a/services/fabric/session.go b/services/fabric/session.go new file mode 100644 index 0000000..91a4908 --- /dev/null +++ b/services/fabric/session.go @@ -0,0 +1,1040 @@ +package fabric + +import ( + "context" + "encoding/json" + "errors" + "time" + + "devicecode-go/bus" + "devicecode-go/types" + "devicecode-go/x/strconvx" +) + +// ---- link state ---- + +type linkState int + +const ( + linkDown linkState = iota + linkUp +) + +// ---- link status strings (published in link state payload) ---- + +const ( + statusReady = "ready" + statusOpening = "opening" + statusDown = "down" + lineQueueSize = 32 +) + +// ---- timeouts (local policy) ---- +// +// LinkConfig drives the ping cadence (PingInterval) and liveness-stale +// detection (LivenessTimeout). Mirrors session_ctl.lua at +// devicecode-lua@2c88090: pings fire unconditionally every +// ping_interval_s; the link is torn down if no frame arrives within +// liveness_timeout_s. Exports are enabled immediately on link-up +// (after exportStartHoldoff). + +const ( + callTimeoutDef = 5 * time.Second + waitLogEvery = 2 * time.Second + exportStartHoldoff = 1 * time.Second + // exportMaxPerTick caps the total export messages sent per drain + // cycle across all subscriptions, keeping UART throughput within + // the 115200-baud link capacity. + exportMaxPerTick = 1 + exportTickInterval = 50 * time.Millisecond + errPayloadMarshal = "payload_marshal_failed" +) + +// ---- link reasons and error strings ---- + +const ( + reasonLinkDown = "link_down" + reasonPeerStale = "peer_stale" + reasonPeerReset = "peer_reset" + reasonSessionReset = "session_reset" + reasonHelloRejected = "hello_rejected" + reasonTransportDown = "transport_down" + reasonTransportWrite = "transport_write_failed" + reasonNoRoute = "no_route" + reasonBusy = "busy" + reasonTimeout = "timeout" +) + +// ---- bus topics for config handling ---- + +var ( + tConfigHAL = bus.T("config", "hal") + dumpCallTopic = []string{"rpc", "hal", "dump"} +) + +// ---- types ---- + +type dumpReply struct { + OK bool `json:"ok"` + Method string `json:"method"` + Echo any `json:"echo,omitempty"` + HAL *types.HALState `json:"hal,omitempty"` + Applied bool `json:"applied"` + ConfigCount int `json:"config_count,omitempty"` + ConfigError string `json:"config_error,omitempty"` +} + +type inboundCall struct { + id string + sub *bus.Subscription + deadline time.Time +} + +type outboundCall struct { + id string + req *bus.Message + deadline time.Time +} + +type readResult struct { + line []byte + err error +} + +type linkStatePayload struct { + LinkID string `json:"link_id"` + Status string `json:"status"` + Ready bool `json:"ready"` + Established bool `json:"established"` + PeerID string `json:"peer_id"` + LocalSID string `json:"local_sid"` + PeerSID string `json:"peer_sid,omitempty"` + PeerNode string `json:"peer_node,omitempty"` + PeerProto int `json:"peer_proto,omitempty"` + LastRxUnixMilli int64 `json:"last_rx_unix_ms,omitempty"` + LastTxUnixMilli int64 `json:"last_tx_unix_ms,omitempty"` + LastPongUnixMilli int64 `json:"last_pong_unix_ms,omitempty"` + InboundCalls int `json:"inbound_calls"` + OutboundCalls int `json:"outbound_calls"` + Reason string `json:"reason,omitempty"` + Err string `json:"err,omitempty"` +} + +// session manages the fabric link state machine over a Transport. +// +// All bus access happens in the main loop goroutine only. TinyGo's +// cooperative scheduler panics if multiple goroutines contend on +// the bus's internal sync.Mutex. +type session struct { + linkID string + nodeID string + peerID string + localSID string + tr Transport + conn *bus.Connection + cfg LinkConfig + + link linkState + peerNode string + peerSID string + peerProto int + lastRxAt time.Time + lastTxAt time.Time + lastPongAt time.Time + exportReadyAt time.Time + exportsEnabled bool + + exportSubs []*bus.Subscription + exportCallSubs []*bus.Subscription + inboundCalls []*inboundCall + outboundCalls []*outboundCall + nextOutboundID uint64 + nextPingAt time.Time + txControl txLane + txRPC txLane + txBulk txLane + importedRetained []bus.Topic // local topics currently retained on the bus due to wire imports + rpcReady bool // bridge replay complete; gates linkStatePayload.Ready + incomingTransfer *incomingTransfer + beginTransfer func(transferMeta) (transferSink, error) + + // Config state — tracks config/device → config/hal translation. + configApplied bool + configCount int + lastConfigErr string +} + +func (s *session) log(msg string) { + println("[fabric]", "sid", s.localSID, msg) +} + +func (s *session) logKV(msg, key, value string) { + println("[fabric]", "sid", s.localSID, msg, key, value) +} + +// run is the main loop. Blocks until ctx is cancelled. +func (s *session) run(ctx context.Context) { + s.cfg.applyDefaults() + lines := make(chan readResult, lineQueueSize) + + go func() { + defer close(lines) + for { + line, err := s.tr.ReadLine() + if err != nil { + if errors.Is(err, ErrLineTooLong) { + s.log("oversized line dropped") + continue + } + select { + case lines <- readResult{err: err}: + case <-ctx.Done(): + } + return + } + cp := make([]byte, len(line)) + copy(cp, line) + select { + case lines <- readResult{line: cp}: + case <-ctx.Done(): + return + } + } + }() + + defer s.tr.Close() + defer s.teardownExports() + defer s.teardownInbound() + defer s.teardownOutbound(reasonLinkDown) + defer s.abortTransfer(reasonLinkDown) + defer s.log("run stop") + + stale := time.NewTimer(s.cfg.LivenessTimeout) + defer stale.Stop() + + waitTick := time.NewTicker(waitLogEvery) + defer waitTick.Stop() + + // Poll subscription channels periodically. Needed because select + // blocks until a line/timer fires; without this, exported bus + // messages and async call replies would sit in subscription channels. + exportTick := time.NewTicker(exportTickInterval) + defer exportTick.Stop() + + s.publishLinkState("", "") + s.log("run start") + + for { + select { + case <-ctx.Done(): + return + + case res, ok := <-lines: + if !ok { + return + } + if res.err != nil { + s.handleLinkDown(reasonTransportDown, res.err.Error()) + return + } + s.dispatch(res.line) + resetTimer(stale, s.cfg.LivenessTimeout) + + case <-exportTick.C: + now := time.Now() + s.drainExports() + s.drainInbound(now) + s.drainOutbound(now) + s.checkTransferTimeout(now) + s.tickPing(now) + s.tickReady(now) + + case <-waitTick.C: + s.logWaiting() + + case <-stale.C: + if s.link == linkUp { + s.handleLinkDown(reasonPeerStale, "") + } else { + stale.Reset(s.cfg.LivenessTimeout) + } + } + } +} + +func resetTimer(t *time.Timer, d time.Duration) { + if !t.Stop() { + select { + case <-t.C: + default: + } + } + t.Reset(d) +} + +func unixMilli(t time.Time) int64 { + if t.IsZero() { + return 0 + } + return t.UnixMilli() +} + +func (s *session) currentStatus() string { + if s.link == linkUp && s.rpcReady { + return statusReady + } + return statusOpening +} + +func (s *session) publishLinkState(reason, err string) { + if s.conn == nil { + return + } + status := s.currentStatus() + if s.link != linkUp && (reason != "" || err != "") { + status = statusDown + } + s.conn.Publish(s.conn.NewMessage( + bus.T("state", "fabric", "link", s.linkID), + linkStatePayload{ + LinkID: s.linkID, + Status: status, + Ready: s.link == linkUp && s.rpcReady, + Established: s.link == linkUp, + PeerID: s.peerID, + LocalSID: s.localSID, + PeerSID: s.peerSID, + PeerNode: s.peerNode, + PeerProto: s.peerProto, + LastRxUnixMilli: unixMilli(s.lastRxAt), + LastTxUnixMilli: unixMilli(s.lastTxAt), + LastPongUnixMilli: unixMilli(s.lastPongAt), + InboundCalls: len(s.inboundCalls), + OutboundCalls: len(s.outboundCalls), + Reason: reason, + Err: err, + }, + true, + )) +} + +func (s *session) markRx() { + s.lastRxAt = time.Now() +} + +func (s *session) markTx() { + s.lastTxAt = time.Now() +} + +func (s *session) handleLinkDown(reason, err string) { + pendingReason := reason + if pendingReason == "" { + pendingReason = reasonLinkDown + } + s.link = linkDown + s.peerNode = "" + s.peerSID = "" + s.peerProto = 0 + s.exportReadyAt = time.Time{} + s.exportsEnabled = false + s.rpcReady = false + s.teardownExports() + s.teardownInbound() + s.teardownOutbound(pendingReason) + s.teardownImportedRetained() + s.abortTransfer(pendingReason) + s.publishLinkState(reason, err) + if err != "" { + s.logKV("link down", "err", err) + } else if reason != "" { + s.logKV("link down", "reason", reason) + } +} + +// promoteLink transitions to linkUp, tearing down any prior session state. +// `reason` carries the link-state telemetry tag (e.g. session_reset) and +// is also used as the err string on any pending outbound calls cancelled +// by the transition, matching rpc_bridge.lua's session-replace behaviour. +// +// On a session-reset transition (re-promote with the link already up), +// imported retained facts are unretained locally so consumers don't see +// stale data from the previous CM5 session — mirrors rpc_bridge.lua's +// invalidate_imported_retained on generation bump. rpcReady is held low +// until the export holdoff elapses (see tickReady), gating +// linkStatePayload.Ready. +func (s *session) promoteLink(reason string) { + if s.link == linkUp { + if reason == "" { + reason = reasonPeerReset + } + s.abortTransfer(reason) + s.teardownExports() + s.teardownInbound() + s.teardownOutbound(reason) + s.teardownImportedRetained() + } + s.link = linkUp + s.rpcReady = false + s.setupExports() + s.exportsEnabled = true + s.exportReadyAt = time.Now().Add(exportStartHoldoff) + s.nextPingAt = time.Now().Add(s.cfg.PingInterval) + s.log("exports enabled") + s.publishLinkState(reason, "") +} + +// teardownImportedRetained clears every local retained slot we populated +// from a wire import. Mirrors rpc_bridge.lua's invalidate_imported_retained. +func (s *session) teardownImportedRetained() { + for _, t := range s.importedRetained { + s.conn.Publish(s.conn.NewMessage(t, nil, true)) + } + s.importedRetained = nil +} + +func (s *session) trackImportedRetain(t bus.Topic) { + for _, ex := range s.importedRetained { + if topicEquals(ex, t) { + return + } + } + s.importedRetained = append(s.importedRetained, t) +} + +func (s *session) untrackImportedRetain(t bus.Topic) { + for i, ex := range s.importedRetained { + if topicEquals(ex, t) { + s.importedRetained = append(s.importedRetained[:i], s.importedRetained[i+1:]...) + return + } + } +} + +// tickReady promotes rpcReady once the post-handshake export holdoff has +// elapsed, mirroring rpc_bridge.lua's emit_rpc_ready(true) after retained +// replay. Re-publishes link state so consumers observe the ready edge. +func (s *session) tickReady(now time.Time) { + if s.link != linkUp || s.rpcReady { + return + } + if s.exportReadyAt.IsZero() || now.Before(s.exportReadyAt) { + return + } + s.rpcReady = true + s.publishLinkState("", "") +} + +// ---- dispatch ---- + +func (s *session) dispatch(line []byte) { + t := protoType(line) + if t == "" { + s.logMalformed(line, nil) + return + } + s.markRx() + + switch t { + case msgHello: + typedDispatch(s, line, s.onHello) + return + case msgHelloAck: + typedDispatch(s, line, s.onHelloAck) + return + } + + if !s.requireLinkUp(t) { + return + } + + switch t { + case msgPing: + typedDispatch(s, line, s.onPing) + case msgPong: + typedDispatch(s, line, s.onPong) + case msgPub: + typedDispatch(s, line, s.onPub) + case msgUnretain: + typedDispatch(s, line, s.onUnretain) + case msgCall: + typedDispatch(s, line, s.onCall) + case msgReply: + typedDispatch(s, line, s.onReply) + case msgXferBegin: + typedDispatch(s, line, s.onTransferBegin) + case msgXferChunk: + typedDispatch(s, line, s.onTransferChunk) + case msgXferCommit: + typedDispatch(s, line, s.onTransferCommit) + case msgXferAbort: + typedDispatch(s, line, s.onTransferAbort) + default: + s.logKV("unknown message type dropped", "type", t) + } +} + +func typedDispatch[T any](s *session, line []byte, handler func(*T)) { + var msg T + if err := json.Unmarshal(line, &msg); err != nil { + s.logMalformed(line, err) + return + } + handler(&msg) +} + +func (s *session) requireLinkUp(t string) bool { + if s.link != linkUp { + s.logKV("dropped before handshake", "type", t) + return false + } + return true +} + +func (s *session) logMalformed(line []byte, err error) { + errStr := "" + if err != nil { + errStr = err.Error() + } + println( + "[fabric]", "sid", s.localSID, + "malformed frame dropped", + "line_len", strconvx.Itoa(len(line)), + "line_head", tracePreview(line), + "err", errStr, + ) +} + +// notePeerIdentity records the remote peer's node, SID, and proto version. +// If the SID changes mid-session, the returned reason triggers a full +// teardown of exports and pending calls on the Go side. Note: the Lua +// side only tears down pending calls on SID change, not exports — this +// asymmetry is intentional since the CM5 re-subscribes on reconnect. +func (s *session) notePeerIdentity(node, sid string, proto int) string { + reason := "" + if s.link == linkUp && s.peerSID != "" && sid != "" && s.peerSID != sid { + reason = reasonSessionReset + } + if node != "" { + s.peerNode = node + } + if sid != "" { + s.peerSID = sid + } + if proto > 0 { + s.peerProto = proto + } + return reason +} + +func (s *session) isSelfControlFrame(node, sid string) bool { + if sid != "" && sid == s.localSID { + return true + } + if node != "" && node == s.nodeID { + return true + } + return false +} + +func hasWirePrefix(topic, prefix []string) bool { + if len(topic) < len(prefix) { + return false + } + for i := range prefix { + if topic[i] != prefix[i] { + return false + } + } + return true +} + +func (s *session) onHello(msg *protoHello) { + if msg.Peer != "" && msg.Peer != s.nodeID { + s.log("hello dropped: wrong peer") + return + } + if s.peerID != "" && msg.Node != s.peerID { + s.log("hello dropped: wrong node") + return + } + reason := s.notePeerIdentity(msg.Node, msg.SID, msg.Proto) + s.logKV("hello rx", "peer_sid", msg.SID) + + if !s.sendControl(marshal(protoHelloAck{ + Type: msgHelloAck, + Node: s.nodeID, + SID: s.localSID, + Proto: protoVersion, + OK: true, + })) { + return + } + s.log("hello_ack tx") + s.promoteLink(reason) +} + +func (s *session) onHelloAck(msg *protoHelloAck) { + if s.isSelfControlFrame(msg.Node, msg.SID) { + s.log("echoed hello_ack ignored") + return + } + if !msg.OK { + s.log("hello_ack rejected by peer") + s.handleLinkDown(reasonHelloRejected, "") + return + } + reason := s.notePeerIdentity(msg.Node, msg.SID, msg.Proto) + s.logKV("hello_ack rx", "peer_sid", msg.SID) + s.promoteLink(reason) +} + +func (s *session) onPing(msg *protoPing) { + s.logKV("ping rx", "peer_sid", msg.SID) + if !s.sendControl(marshal(protoPong{Type: msgPong, TS: msg.TS, SID: s.localSID})) { + return + } + s.log("pong tx") +} + +// tickPing sends an outbound ping if the link is established and the +// PingInterval cadence has elapsed. Mirrors session_ctl.lua: pings fire +// unconditionally every ping_interval after each send (NOT TX-activity-based). +func (s *session) tickPing(now time.Time) { + if s.link != linkUp { + return + } + if s.nextPingAt.IsZero() || now.Before(s.nextPingAt) { + return + } + if !s.sendControl(marshal(protoPing{Type: msgPing, TS: now.UnixMilli(), SID: s.localSID})) { + return + } + s.nextPingAt = now.Add(s.cfg.PingInterval) +} + +func (s *session) onPong(msg *protoPong) { + if s.isSelfControlFrame("", msg.SID) { + s.log("echoed pong ignored") + return + } + s.lastPongAt = s.lastRxAt +} + +func (s *session) onPub(msg *protoPub) { + localTopic := importPublishTopic(msg.Topic) + if localTopic == nil { + if hasWirePrefix(msg.Topic, []string{"state"}) { + s.log("echoed state pub ignored") + return + } + s.log("incoming pub dropped: no_route") + return + } + + // config/device → config/hal: normalize and track. + if topicEquals(localTopic, tConfigHAL) { + cfg, err := decodeHALConfig(msg.Payload) + if err != "" { + s.lastConfigErr = err + s.log("config/device rejected: " + err) + return + } + s.configApplied = true + s.configCount++ + s.lastConfigErr = "" + s.log("config/device applied to config/hal") + s.conn.Publish(s.conn.NewMessage(localTopic, cfg, true)) + s.trackImportedRetain(localTopic) + return + } + + s.conn.Publish(s.conn.NewMessage(localTopic, msg.Payload, msg.Retain)) + if msg.Retain { + s.trackImportedRetain(localTopic) + } + // A non-retained pub on the same topic must NOT untrack: the bus + // retain store is only cleared by an explicit unretain (or a + // retained-nil publish), so the prior retained value is still live + // and must be cleaned up on session reset. Mirrors rpc_bridge.lua, + // which only mutates imported_retained on retain set/clear. +} + +func (s *session) onUnretain(msg *protoUnretain) { + localTopic := importPublishTopic(msg.Topic) + if localTopic == nil { + s.log("incoming unretain dropped: no_route") + return + } + s.conn.Publish(s.conn.NewMessage(localTopic, nil, true)) + s.untrackImportedRetain(localTopic) +} + +func (s *session) onCall(msg *protoCall) { + // rpc/hal/dump: handle directly — reply with config and HAL state. + if slicesEqualStrings(msg.Topic, dumpCallTopic) { + var halState *types.HALState + sub := s.conn.Subscribe(bus.T("hal", "state")) + select { + case m := <-sub.Channel(): + if m != nil { + if st, ok := decodeHALState(m.Payload); ok { + halState = &st + } + } + default: + } + s.conn.Unsubscribe(sub) + + reply := dumpReply{ + OK: true, + Method: "dump", + Echo: decodePayload(msg.Payload), + HAL: halState, + Applied: s.configApplied, + ConfigCount: s.configCount, + ConfigError: s.lastConfigErr, + } + s.sendRPC(marshal(protoReply{Type: msgReply, Corr: msg.ID, OK: true, Value: mustMarshal(reply)})) + return + } + + if len(s.inboundCalls) >= s.cfg.MaxInboundHelpers { + s.log("incoming call dropped: busy") + s.sendRPC(marshal(protoReply{Type: msgReply, Corr: msg.ID, OK: false, Err: reasonBusy})) + return + } + + localTopic := importCallTopic(msg.Topic) + if localTopic == nil { + s.log("incoming call dropped: no_route") + s.sendRPC(marshal(protoReply{Type: msgReply, Corr: msg.ID, OK: false, Err: reasonNoRoute})) + return + } + + timeout := callTimeoutDef + if msg.TimeoutMs > 0 { + timeout = time.Duration(msg.TimeoutMs) * time.Millisecond + } + busMsg := s.conn.NewMessage(localTopic, msg.Payload, false) + sub := s.conn.Request(busMsg) + s.inboundCalls = append(s.inboundCalls, &inboundCall{ + id: msg.ID, + sub: sub, + deadline: time.Now().Add(timeout), + }) +} + +func (s *session) onReply(msg *protoReply) { + for i, call := range s.outboundCalls { + if call.id != msg.Corr { + continue + } + s.outboundCalls = append(s.outboundCalls[:i], s.outboundCalls[i+1:]...) + if !call.req.CanReply() { + return + } + if !msg.OK { + s.conn.Reply(call.req, types.ErrorReply{OK: false, Error: msg.Err}, false) + return + } + s.conn.Reply(call.req, decodePayload(msg.Value), false) + return + } + + s.logKV("unexpected reply dropped", "corr", msg.Corr) +} + +func checkBusError(payload any) string { + if e, ok := payload.(types.ErrorReply); ok && !e.OK && e.Error != "" { + return e.Error + } + // Fall back to JSON probe for handlers that reply with ad-hoc structs. + b, err := json.Marshal(payload) + if err != nil { + return "" + } + var probe struct { + OK bool `json:"ok"` + Error string `json:"error"` + } + if json.Unmarshal(b, &probe) == nil && !probe.OK && probe.Error != "" { + return probe.Error + } + return "" +} + +func mustMarshal(v any) json.RawMessage { + b, err := json.Marshal(v) + if err != nil { + return json.RawMessage(`{"error":"marshal_failed"}`) + } + return json.RawMessage(b) +} + +func topicEquals(t bus.Topic, expected bus.Topic) bool { + if t.Len() != expected.Len() { + return false + } + for i := 0; i < t.Len(); i++ { + a, _ := t.At(i).(string) + b, _ := expected.At(i).(string) + if a != b { + return false + } + } + return true +} + +func marshalPayload(payload any) (json.RawMessage, error) { + b, err := json.Marshal(payload) + if err != nil { + return nil, err + } + return json.RawMessage(b), nil +} + +// ---- export lifecycle ---- +// +// Exports are drained inline in the main loop (no extra goroutines) +// to avoid TinyGo cooperative scheduler mutex panics. + +func (s *session) setupExports() { + if s.conn == nil { + return + } + for _, p := range exportPatterns() { + s.exportSubs = append(s.exportSubs, s.conn.Subscribe(p)) + } + for _, p := range exportCallPatterns() { + s.exportCallSubs = append(s.exportCallSubs, s.conn.Subscribe(p)) + } +} + +func (s *session) teardownExports() { + for _, sub := range s.exportSubs { + s.conn.Unsubscribe(sub) + } + s.exportSubs = nil + for _, sub := range s.exportCallSubs { + s.conn.Unsubscribe(sub) + } + s.exportCallSubs = nil +} + +func (s *session) teardownInbound() { + for _, call := range s.inboundCalls { + if call.sub != nil { + s.conn.Unsubscribe(call.sub) + call.sub = nil + } + } + s.inboundCalls = nil +} + +func (s *session) teardownOutbound(reason string) { + for _, call := range s.outboundCalls { + if call.req != nil && call.req.CanReply() { + s.conn.Reply(call.req, types.ErrorReply{OK: false, Error: reason}, false) + } + } + s.outboundCalls = nil +} + +// drainExports does a non-blocking read of each export subscription +// and writes any messages to the wire. Called from the main loop. +func (s *session) drainExports() { + if s.link != linkUp { + return + } + if !s.exportsEnabled { + return + } + if !s.exportReadyAt.IsZero() && time.Now().Before(s.exportReadyAt) { + return + } + total := 0 + for _, sub := range s.exportSubs { + for { + if total >= exportMaxPerTick { + return + } + select { + case m, ok := <-sub.Channel(): + if !ok || m == nil { + goto nextSub + } + wire := exportTopic(m.Topic) + if wire == nil { + continue + } + if m.Retained && m.Payload == nil { + if !s.sendRPC(marshal(protoUnretain{ + Type: msgUnretain, + Topic: wire, + })) { + return + } + total++ + continue + } + payload, err := marshalPayload(m.Payload) + if err != nil { + s.logKV("export payload dropped", "err", err.Error()) + continue + } + if !s.sendRPC(marshal(protoPub{ + Type: msgPub, + Topic: wire, + Payload: payload, + Retain: m.Retained, + })) { + return + } + total++ + default: + goto nextSub + } + } + nextSub: + } +} + +func (s *session) drainInbound(now time.Time) { + if len(s.inboundCalls) == 0 { + return + } + + keep := s.inboundCalls[:0] + for _, call := range s.inboundCalls { + select { + case reply, ok := <-call.sub.Channel(): + s.conn.Unsubscribe(call.sub) + call.sub = nil // prevent double-unsubscribe in teardownInbound + if !ok || reply == nil { + if !s.sendRPC(marshal(protoReply{Type: msgReply, Corr: call.id, OK: false, Err: reasonTimeout})) { + return + } + continue + } + if errStr := checkBusError(reply.Payload); errStr != "" { + if !s.sendRPC(marshal(protoReply{Type: msgReply, Corr: call.id, OK: false, Err: errStr})) { + return + } + continue + } + payload, err := marshalPayload(reply.Payload) + if err != nil { + if !s.sendRPC(marshal(protoReply{Type: msgReply, Corr: call.id, OK: false, Err: errPayloadMarshal})) { + return + } + continue + } + if !s.sendRPC(marshal(protoReply{Type: msgReply, Corr: call.id, OK: true, Value: payload})) { + return + } + continue + default: + } + + if !now.Before(call.deadline) { + s.conn.Unsubscribe(call.sub) + call.sub = nil + if !s.sendRPC(marshal(protoReply{Type: msgReply, Corr: call.id, OK: false, Err: reasonTimeout})) { + return + } + continue + } + + keep = append(keep, call) + } + + s.inboundCalls = keep +} + +func (s *session) drainOutbound(now time.Time) { + // Forward new outgoing calls from the local bus onto the wire. + if s.link == linkUp && len(s.exportCallSubs) > 0 { + for _, sub := range s.exportCallSubs { + for { + select { + case msg, ok := <-sub.Channel(): + if !ok || msg == nil { + goto nextSub + } + + wireTopic := exportCallTopic(msg.Topic) + if wireTopic == nil { + continue + } + + payload, err := marshalPayload(msg.Payload) + if err != nil { + s.logKV("outgoing call dropped", "err", err.Error()) + if msg.CanReply() { + s.conn.Reply(msg, types.ErrorReply{OK: false, Error: errPayloadMarshal}, false) + } + continue + } + id := s.nextOutboundID + s.nextOutboundID++ + corr := "wire-" + strconvx.Utoa64(id) + if msg.CanReply() { + s.outboundCalls = append(s.outboundCalls, &outboundCall{ + id: corr, + req: msg, + deadline: now.Add(callTimeoutDef), + }) + } + if !s.sendRPC(marshal(protoCall{ + Type: msgCall, + ID: corr, + Topic: wireTopic, + Payload: payload, + TimeoutMs: int(callTimeoutDef / time.Millisecond), + })) { + return + } + default: + goto nextSub + } + } + nextSub: + } + } + + // Expire outbound calls that have timed out waiting for a remote reply. + if len(s.outboundCalls) > 0 { + keep := s.outboundCalls[:0] + for _, call := range s.outboundCalls { + if !now.Before(call.deadline) { + if call.req != nil && call.req.CanReply() { + s.conn.Reply(call.req, types.ErrorReply{OK: false, Error: reasonTimeout}, false) + } + continue + } + keep = append(keep, call) + } + s.outboundCalls = keep + } +} + +// ---- transport write ---- + +// sendControl, sendRPC, sendBulk are the lane-tagged enqueue entry +// points used at every send site. They wrap enqueueFrame (defined in +// writer.go) so the lane intent is explicit at the call site. +// +// Lane assignment per protocol.lua's FRAME_CLASS: +// +// control: hello, hello_ack, ping, pong, xfer_{begin,ready,need,commit,done,abort} +// rpc: pub, unretain, call, reply +// bulk: xfer_chunk (MCU does not originate; bulk lane unused on MCU) +func (s *session) sendControl(data []byte) bool { return s.enqueueFrame(laneControl, data) } +func (s *session) sendRPC(data []byte) bool { return s.enqueueFrame(laneRPC, data) } + +func (s *session) logWaiting() { + if s.peerSID != "" { + return + } + s.log("waiting for connection start") +} diff --git a/services/fabric/session_timer_test.go b/services/fabric/session_timer_test.go new file mode 100644 index 0000000..f846c9b --- /dev/null +++ b/services/fabric/session_timer_test.go @@ -0,0 +1,20 @@ +package fabric + +import ( + "testing" + "time" +) + +func TestResetTimerDrainsExpiredTick(t *testing.T) { + timer := time.NewTimer(5 * time.Millisecond) + defer timer.Stop() + + <-timer.C + resetTimer(timer, 40*time.Millisecond) + + select { + case <-timer.C: + t.Fatal("timer fired from a stale tick") + case <-time.After(15 * time.Millisecond): + } +} diff --git a/services/fabric/trace.go b/services/fabric/trace.go new file mode 100644 index 0000000..4c2637b --- /dev/null +++ b/services/fabric/trace.go @@ -0,0 +1,45 @@ +package fabric + +func traceLine(dir string, data []byte) { + if !fabricTraceEnabled { + return + } + println("[fabric-trace]", dir, "len", len(data), "line", tracePreview(data)) +} + +func tracePreview(data []byte) string { + const max = 200 + if len(data) > max { + data = data[:max] + } + out := make([]byte, 0, len(data)*2+3) + for _, b := range data { + switch b { + case '\n': + out = append(out, '\\', 'n') + case '\r': + out = append(out, '\\', 'r') + case '\t': + out = append(out, '\\', 't') + default: + if b < 0x20 || b > 0x7e { + out = append(out, '\\', 'x') + out = append(out, hexNibble(b>>4), hexNibble(b)) + } else { + out = append(out, b) + } + } + } + if len(data) == max { + out = append(out, '.', '.', '.') + } + return string(out) +} + +func hexNibble(v byte) byte { + v &= 0x0f + if v < 10 { + return '0' + v + } + return 'a' + (v - 10) +} diff --git a/services/fabric/trace_disabled.go b/services/fabric/trace_disabled.go new file mode 100644 index 0000000..734daec --- /dev/null +++ b/services/fabric/trace_disabled.go @@ -0,0 +1,5 @@ +//go:build !fabric_trace + +package fabric + +const fabricTraceEnabled = false diff --git a/services/fabric/trace_enabled.go b/services/fabric/trace_enabled.go new file mode 100644 index 0000000..4ed89ca --- /dev/null +++ b/services/fabric/trace_enabled.go @@ -0,0 +1,5 @@ +//go:build fabric_trace + +package fabric + +const fabricTraceEnabled = true diff --git a/services/fabric/transfer.go b/services/fabric/transfer.go new file mode 100644 index 0000000..59919b8 --- /dev/null +++ b/services/fabric/transfer.go @@ -0,0 +1,343 @@ +package fabric + +import ( + "encoding/base64" + "encoding/json" + "runtime" + "strings" + "time" + + "devicecode-go/x/strconvx" + "devicecode-go/x/xxhash" +) + +const postTransferDoneSettle = 250 * time.Millisecond +const transferProgressLogEvery = 32 + +// transferMeta captures xfer_begin contents. The required Lua wire shape is +// {xfer_id, size, checksum}; meta is optional but source-used (transfer_mgr +// passes it through to the receiver, where meta.receiver names a local +// endpoint to call after xfer_commit and before xfer_done). Preserve meta +// as an opaque blob — interpretation lives in fabric-update. +type transferMeta struct { + ID string + Size uint32 + Checksum string // xxHash32 hex (8 lower-case hex chars), no algorithm field + Meta json.RawMessage +} + +// transferInfo is internal-only state returned by the sink on Commit. It is +// no longer wire-visible — xfer_done carries only xfer_id in the canonical +// schema; size/checksum reconciliation lives on xfer_commit. +type transferInfo struct { + BytesWritten uint32 + SlotXIPAddr uint32 +} + +// transferSink is the firmware-side write target for an incoming transfer. +// WriteChunk receives bytes at the given byte offset (matching xfer_chunk's +// canonical wire fields). No sequence number is passed — the caller has +// already validated offset against expected progress. +type transferSink interface { + WriteChunk(offset uint32, data []byte) error + Commit() (transferInfo, error) + Apply() error + Abort(reason string) error +} + +type incomingTransfer struct { + meta transferMeta + sink transferSink + bytesWritten uint32 + chunksSeen uint32 + hasher *xxhash.Hasher + // deadline is the idle-chunk watchdog: bumped on every accepted chunk + // and on initial xfer_begin. checkTransferTimeout fires if now > deadline. + // Mirrors transfer_mgr.lua: `active.deadline = runtime.now() + phase_timeout`. + deadline time.Time +} + +func lowerHex(s string) string { + return strings.ToLower(strings.TrimSpace(s)) +} + +func u32s(v uint32) string { + return strconvx.Itoa(int(v)) +} + +func (s *session) sendTransferReady(id string) bool { + return s.sendControl(marshal(protoXferReady{ + Type: msgXferReady, + XferID: id, + })) +} + +func (s *session) sendTransferNeed(id string, next uint32) bool { + return s.sendControl(marshal(protoXferNeed{ + Type: msgXferNeed, + XferID: id, + Next: next, + })) +} + +func (s *session) sendTransferDone(id string) bool { + return s.sendControl(marshal(protoXferDone{ + Type: msgXferDone, + XferID: id, + })) +} + +func (s *session) sendTransferAbort(id, reason string) bool { + return s.sendControl(marshal(protoXferAbort{ + Type: msgXferAbort, + XferID: id, + Err: reason, + })) +} + +func (s *session) clearTransfer() *incomingTransfer { + cur := s.incomingTransfer + s.incomingTransfer = nil + return cur +} + +func (s *session) abortTransfer(reason string) { + cur := s.clearTransfer() + if cur == nil { + return + } + if err := cur.sink.Abort(reason); err != nil { + s.logKV("transfer abort failed", "err", err.Error()) + } +} + +// checkTransferTimeout enforces the idle-chunk watchdog. Fires once per +// drain tick from the session run loop; cheap when no transfer is active. +// On expiry both the local sink is aborted and an xfer_abort frame is sent +// to the peer (matching Lua transfer_mgr.lua's `clear_active('timeout')` + +// outbound xfer_abort). +func (s *session) checkTransferTimeout(now time.Time) { + cur := s.incomingTransfer + if cur == nil { + return + } + if !now.After(cur.deadline) { + return + } + id := cur.meta.ID + println("[fabric]", "sid", s.localSID, "xfer_phase_timeout", + "id", id, "phase_s", u32s(uint32(s.cfg.PhaseTimeout/time.Second))) + s.abortTransfer("timeout") + s.sendTransferAbort(id, "timeout") +} + +func validateTransferBegin(msg *protoXferBegin) (transferMeta, string) { + if msg.XferID == "" { + return transferMeta{}, "xfer_begin.xfer_id" + } + if msg.Size == 0 { + return transferMeta{}, "xfer_begin.size" + } + if msg.Checksum == "" { + return transferMeta{}, "xfer_begin.checksum" + } + return transferMeta{ + ID: msg.XferID, + Size: msg.Size, + Checksum: lowerHex(msg.Checksum), + Meta: append(json.RawMessage(nil), msg.Meta...), + }, "" +} + +func (s *session) onTransferBegin(msg *protoXferBegin) { + meta, errStr := validateTransferBegin(msg) + if errStr != "" { + if msg.XferID != "" { + s.sendTransferAbort(msg.XferID, "bad_message: "+errStr) + } + s.logKV("xfer_begin dropped", "err", errStr) + return + } + if s.incomingTransfer != nil { + s.sendTransferAbort(meta.ID, "busy") + return + } + beginFn := s.beginTransfer + if beginFn == nil { + beginFn = beginTransfer + } + sink, err := beginFn(meta) + if err != nil { + s.sendTransferAbort(meta.ID, err.Error()) + return + } + s.incomingTransfer = &incomingTransfer{ + meta: meta, + sink: sink, + hasher: xxhash.New(0), + deadline: time.Now().Add(s.cfg.PhaseTimeout), + } + println( + "[fabric]", "sid", s.localSID, + "xfer_begin accepted", + "id", meta.ID, + "size", u32s(meta.Size), + "checksum", meta.Checksum, + ) + s.sendTransferReady(meta.ID) +} + +func (s *session) onTransferChunk(msg *protoXferChunk) { + cur := s.incomingTransfer + if cur == nil || cur.meta.ID != msg.XferID { + s.logKV("xfer_chunk dropped", "id", msg.XferID) + return + } + // Lua transfer_mgr.lua aborts and clears the active transfer on any + // chunk-level fault (unexpected offset, decode failure, size mismatch). + // Match that — do not send xfer_need + keep alive. + id := cur.meta.ID + if msg.Offset != cur.bytesWritten { + println("[fabric]", "sid", s.localSID, "xfer_chunk aborted", + "id", id, "err", "unexpected_offset", + "off", u32s(msg.Offset), "want_off", u32s(cur.bytesWritten)) + s.abortTransfer("unexpected_offset") + s.sendTransferAbort(id, "unexpected_offset") + return + } + raw, err := base64.RawURLEncoding.DecodeString(msg.Data) + if err != nil { + println("[fabric]", "sid", s.localSID, "xfer_chunk aborted", + "id", id, "err", "decode_failed", + "off", u32s(msg.Offset), "data_len", u32s(uint32(len(msg.Data)))) + s.abortTransfer("decode_failed") + s.sendTransferAbort(id, "decode_failed") + return + } + if len(raw) == 0 { + println("[fabric]", "sid", s.localSID, "xfer_chunk aborted", + "id", id, "err", "empty_chunk", "off", u32s(msg.Offset)) + s.abortTransfer("empty_chunk") + s.sendTransferAbort(id, "empty_chunk") + return + } + if cur.bytesWritten+uint32(len(raw)) > cur.meta.Size { + println("[fabric]", "sid", s.localSID, "xfer_chunk aborted", + "id", id, "err", "size_overflow", + "bytes_written", u32s(cur.bytesWritten), + "raw_len", u32s(uint32(len(raw))), + "total", u32s(cur.meta.Size)) + s.abortTransfer("size_overflow") + s.sendTransferAbort(id, "size_overflow") + return + } + if err := cur.sink.WriteChunk(msg.Offset, raw); err != nil { + reason := err.Error() + s.logKV("transfer write failed", "err", reason) + s.abortTransfer(reason) + s.sendTransferAbort(id, reason) + return + } + _, _ = cur.hasher.Write(raw) + cur.bytesWritten += uint32(len(raw)) + cur.chunksSeen++ + cur.deadline = time.Now().Add(s.cfg.PhaseTimeout) + if cur.chunksSeen == 1 || (cur.chunksSeen%transferProgressLogEvery) == 0 { + println( + "[fabric]", "sid", s.localSID, + "xfer_chunk accepted", + "id", cur.meta.ID, + "off", u32s(msg.Offset), + "data_len", u32s(uint32(len(raw))), + "bytes_written", u32s(cur.bytesWritten), + ) + } + raw = nil + // Forced GC after each absorbed chunk eliminates firmware-transfer byte + // drops on the safe-window allocator. Do NOT remove this without + // reproducing the regression in firmware-mono/docs/old/FABRIC_TRANSFER_FIX.md. + runtime.GC() + s.sendTransferNeed(cur.meta.ID, cur.bytesWritten) +} + +func (s *session) onTransferCommit(msg *protoXferCommit) { + cur := s.incomingTransfer + if cur == nil || cur.meta.ID != msg.XferID { + s.logKV("xfer_commit dropped", "id", msg.XferID) + return + } + id := cur.meta.ID + if msg.Size != cur.meta.Size || cur.bytesWritten != cur.meta.Size { + println("[fabric]", "sid", s.localSID, "xfer_commit failed", + "id", id, "err", "size_mismatch", + "bytes_written", u32s(cur.bytesWritten), + "msg_size", u32s(msg.Size), "meta_size", u32s(cur.meta.Size)) + s.abortTransfer("size_mismatch") + s.sendTransferAbort(id, "size_mismatch") + return + } + streamedHex := xxhashHex(cur.hasher.Sum32()) + commitChecksum := lowerHex(msg.Checksum) + if commitChecksum != cur.meta.Checksum || streamedHex != cur.meta.Checksum { + println("[fabric]", "sid", s.localSID, "xfer_commit failed", + "id", id, "err", "checksum_mismatch", + "begin", cur.meta.Checksum, + "commit", commitChecksum, + "streamed", streamedHex, + ) + s.abortTransfer("checksum_mismatch") + s.sendTransferAbort(id, "checksum_mismatch") + return + } + info, err := cur.sink.Commit() + if err != nil { + s.logKV("transfer commit failed", "err", err.Error()) + reason := err.Error() + s.abortTransfer(reason) + s.sendTransferAbort(id, reason) + return + } + sink := cur.sink + s.clearTransfer() + println( + "[fabric]", "sid", s.localSID, + "xfer_commit accepted", + "id", id, + "bytes_written", u32s(info.BytesWritten), + ) + if !s.sendTransferDone(id) { + return + } + time.Sleep(postTransferDoneSettle) + if err := sink.Apply(); err != nil { + s.logKV("transfer apply failed", "err", err.Error()) + return + } + println("[fabric]", "sid", s.localSID, "transfer apply ok", "id", id) +} + +func (s *session) onTransferAbort(msg *protoXferAbort) { + cur := s.incomingTransfer + if cur == nil || cur.meta.ID != msg.XferID { + s.logKV("xfer_abort dropped", "id", msg.XferID) + return + } + reason := msg.Err + if reason == "" { + reason = "remote_abort" + } + println("[fabric]", "sid", s.localSID, "xfer_abort received", "id", cur.meta.ID, "reason", reason) + s.abortTransfer(reason) +} + +// xxhashHex formats a uint32 xxHash32 digest as 8 lower-case hex characters, +// matching the wire format used by the Lua reference's M.digest_hex. +func xxhashHex(v uint32) string { + const digits = "0123456789abcdef" + var buf [8]byte + for i := 7; i >= 0; i-- { + buf[i] = digits[v&0xf] + v >>= 4 + } + return string(buf[:]) +} diff --git a/services/fabric/transfer_sink_rp2350.go b/services/fabric/transfer_sink_rp2350.go new file mode 100644 index 0000000..3360aa6 --- /dev/null +++ b/services/fabric/transfer_sink_rp2350.go @@ -0,0 +1,19 @@ +//go:build tinygo && rp2350 + +// Default RP2350 transfer sink for the fabric-protocol baseline. Rejects all +// transfers at xfer_begin: signed-image verification and staged flash writes +// land in fabric-update via the receiver topic +// `raw/member/mcu/cap/updater/main/rpc/receive` and `pico2-a-b/imagev1/`. Until +// that path lands, the safe default is to refuse incoming transfers rather +// than flash unverified bytes directly into the inactive slot. + +package fabric + +import "errors" + +var errTransferUnsupported = errors.New("staging_unavailable: signed-image receiver not present in this build") + +func beginTransfer(meta transferMeta) (transferSink, error) { + _ = meta + return nil, errTransferUnsupported +} diff --git a/services/fabric/transfer_sink_stub.go b/services/fabric/transfer_sink_stub.go new file mode 100644 index 0000000..6386f0a --- /dev/null +++ b/services/fabric/transfer_sink_stub.go @@ -0,0 +1,11 @@ +//go:build !(tinygo && rp2350) + +package fabric + +import "errors" + +var errTransferUnsupported = errors.New("unsupported") + +func beginTransfer(meta transferMeta) (transferSink, error) { + return nil, errTransferUnsupported +} diff --git a/services/fabric/transfer_test.go b/services/fabric/transfer_test.go new file mode 100644 index 0000000..7837980 --- /dev/null +++ b/services/fabric/transfer_test.go @@ -0,0 +1,445 @@ +package fabric + +import ( + "context" + "encoding/base64" + "encoding/json" + "strings" + "testing" + "time" + + "devicecode-go/bus" + "devicecode-go/x/xxhash" +) + +type fakeTransferSink struct { + offs []uint32 + writes [][]byte + writeErr error + commitErr error + applyErr error + commitInfo transferInfo + committed bool + applied bool + abortReasons []string +} + +func (s *fakeTransferSink) WriteChunk(off uint32, data []byte) error { + if s.writeErr != nil { + return s.writeErr + } + s.offs = append(s.offs, off) + s.writes = append(s.writes, append([]byte(nil), data...)) + return nil +} + +func (s *fakeTransferSink) Commit() (transferInfo, error) { + if s.commitErr != nil { + return transferInfo{}, s.commitErr + } + s.committed = true + return s.commitInfo, nil +} + +func (s *fakeTransferSink) Apply() error { + s.applied = true + return s.applyErr +} + +func (s *fakeTransferSink) Abort(reason string) error { + s.abortReasons = append(s.abortReasons, reason) + return nil +} + +func runSessionWithSink(ctx context.Context, tr Transport, conn *bus.Connection, sink *fakeTransferSink) { + s := session{ + linkID: defaultLinkID, + nodeID: "mcu-1", + peerID: "cm5-local", + localSID: "mcu-sid-test", + tr: tr, + conn: conn, + beginTransfer: func(meta transferMeta) (transferSink, error) { + return sink, nil + }, + } + s.run(ctx) +} + +func rawURL(data []byte) string { + return base64.RawURLEncoding.EncodeToString(data) +} + +// xxhashStr is the wire-format checksum: lower-case hex, 8 chars, no algorithm +// field. Mirrors the Lua reference's M.digest_hex. +func xxhashStr(data []byte) string { + return xxhashHex(xxhash.Sum32(data, 0)) +} + +func TestTransferBeginPreservesMeta(t *testing.T) { + // xfer_begin's meta is opaque to fabric-protocol but must be preserved + // for fabric-update's receiver, which pulls meta.receiver out of it. + b := newBus() + cm5, mcu := pipePair() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + var captured transferMeta + sink := &fakeTransferSink{} + s := session{ + linkID: defaultLinkID, + nodeID: "mcu-1", + peerID: "cm5-local", + localSID: "mcu-sid-test", + tr: mcu, + conn: b.NewConnection("fabric"), + beginTransfer: func(meta transferMeta) (transferSink, error) { + captured = meta + return sink, nil + }, + } + go s.run(ctx) + bringUp(t, cm5) + + payload := []byte("abcd") + metaBlob := json.RawMessage(`{"receiver":["raw","member","mcu","cap","updater","main","rpc","receive"],"version":"1.2.3"}`) + + sendMsg(t, cm5, protoXferBegin{ + Type: msgXferBegin, + XferID: "xfer-meta", + Size: uint32(len(payload)), + Checksum: xxhashStr(payload), + Meta: metaBlob, + }) + _ = readMsg[protoXferReady](t, cm5) + + if string(captured.Meta) != string(metaBlob) { + t.Fatalf("transferMeta.Meta = %q, want %q", captured.Meta, metaBlob) + } + if captured.ID != "xfer-meta" || captured.Size != uint32(len(payload)) { + t.Fatalf("transferMeta basic fields wrong: %+v", captured) + } +} + +func TestTransferReceiveSuccess(t *testing.T) { + b := newBus() + cm5, mcu := pipePair() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sink := &fakeTransferSink{ + commitInfo: transferInfo{ + BytesWritten: 10, + SlotXIPAddr: 0x10280000, + }, + } + + go runSessionWithSink(ctx, mcu, b.NewConnection("fabric"), sink) + bringUp(t, cm5) + + payload := []byte("abcdefghij") + checksum := xxhashStr(payload) + + sendMsg(t, cm5, protoXferBegin{ + Type: msgXferBegin, + XferID: "xfer-2", + Size: uint32(len(payload)), + Checksum: checksum, + }) + + ready := readMsg[protoXferReady](t, cm5) + if ready.Type != msgXferReady || ready.XferID != "xfer-2" { + t.Fatalf("bad xfer_ready: %+v", ready) + } + + parts := [][]byte{payload[:4], payload[4:8], payload[8:]} + off := uint32(0) + for i, part := range parts { + sendMsg(t, cm5, protoXferChunk{ + Type: msgXferChunk, + XferID: "xfer-2", + Offset: off, + Data: rawURL(part), + }) + need := readMsg[protoXferNeed](t, cm5) + want := off + uint32(len(part)) + if need.Next != want { + t.Fatalf("xfer_need[%d].next = %d, want %d", i, need.Next, want) + } + off = want + } + + sendMsg(t, cm5, protoXferCommit{ + Type: msgXferCommit, + XferID: "xfer-2", + Size: uint32(len(payload)), + Checksum: checksum, + }) + + done := readMsg[protoXferDone](t, cm5) + if done.Type != msgXferDone || done.XferID != "xfer-2" { + t.Fatalf("bad xfer_done: %+v", done) + } + + time.Sleep(postTransferDoneSettle + 50*time.Millisecond) + + if got := string(sink.writes[0]) + string(sink.writes[1]) + string(sink.writes[2]); got != string(payload) { + t.Fatalf("sink writes = %q, want %q", got, payload) + } + if !sink.committed { + t.Fatal("sink.Commit was not called") + } + if !sink.applied { + t.Fatal("sink.Apply was not called") + } +} + +func TestTransferChunkBadOffsetAborts(t *testing.T) { + // Lua transfer_mgr aborts and clears the active transfer on chunk faults + // (unexpected_offset, decode_failed, size_overflow). Match that — do not + // keep the transfer alive with an xfer_need. + b := newBus() + cm5, mcu := pipePair() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sink := &fakeTransferSink{} + go runSessionWithSink(ctx, mcu, b.NewConnection("fabric"), sink) + bringUp(t, cm5) + + payload := []byte("abcd") + sendMsg(t, cm5, protoXferBegin{ + Type: msgXferBegin, + XferID: "xfer-3", + Size: uint32(len(payload)), + Checksum: xxhashStr(payload), + }) + _ = readMsg[protoXferReady](t, cm5) + + // Send a chunk at the wrong byte offset; expect xfer_abort and + // sink.Abort, not an xfer_need retry. + sendMsg(t, cm5, protoXferChunk{ + Type: msgXferChunk, + XferID: "xfer-3", + Offset: 7, + Data: rawURL(payload), + }) + + abort := readMsg[protoXferAbort](t, cm5) + if abort.Type != msgXferAbort || abort.XferID != "xfer-3" || abort.Err != "unexpected_offset" { + t.Fatalf("bad xfer_abort: %+v", abort) + } + if len(sink.writes) != 0 { + t.Fatalf("sink received %d writes, want 0", len(sink.writes)) + } + if len(sink.abortReasons) == 0 { + t.Fatal("expected sink.Abort to be called on chunk fault") + } +} + +func TestTransferChunkDecodeFailureAborts(t *testing.T) { + b := newBus() + cm5, mcu := pipePair() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sink := &fakeTransferSink{} + go runSessionWithSink(ctx, mcu, b.NewConnection("fabric"), sink) + bringUp(t, cm5) + + payload := []byte("abcd") + sendMsg(t, cm5, protoXferBegin{ + Type: msgXferBegin, + XferID: "xfer-d1", + Size: uint32(len(payload)), + Checksum: xxhashStr(payload), + }) + _ = readMsg[protoXferReady](t, cm5) + + // Bogus base64 (uses non-base64url chars). + sendMsg(t, cm5, protoXferChunk{ + Type: msgXferChunk, + XferID: "xfer-d1", + Offset: 0, + Data: "!!!not-base64!!!", + }) + + abort := readMsg[protoXferAbort](t, cm5) + if abort.Err != "decode_failed" { + t.Fatalf("bad xfer_abort: %+v", abort) + } + if len(sink.abortReasons) == 0 { + t.Fatal("expected sink.Abort on decode failure") + } +} + +func TestTransferChunkSizeOverflowAborts(t *testing.T) { + b := newBus() + cm5, mcu := pipePair() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sink := &fakeTransferSink{} + go runSessionWithSink(ctx, mcu, b.NewConnection("fabric"), sink) + bringUp(t, cm5) + + payload := []byte("abcd") + // Advertise size=4 but send 6 bytes in the first chunk. + sendMsg(t, cm5, protoXferBegin{ + Type: msgXferBegin, + XferID: "xfer-d2", + Size: uint32(len(payload)), + Checksum: xxhashStr(payload), + }) + _ = readMsg[protoXferReady](t, cm5) + + sendMsg(t, cm5, protoXferChunk{ + Type: msgXferChunk, + XferID: "xfer-d2", + Offset: 0, + Data: rawURL([]byte("abcdef")), + }) + + abort := readMsg[protoXferAbort](t, cm5) + if abort.Err != "size_overflow" { + t.Fatalf("bad xfer_abort: %+v", abort) + } +} + +func TestTransferCommitChecksumMismatchAborts(t *testing.T) { + b := newBus() + cm5, mcu := pipePair() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sink := &fakeTransferSink{} + go runSessionWithSink(ctx, mcu, b.NewConnection("fabric"), sink) + bringUp(t, cm5) + + payload := []byte("abcd") + // Begin with the wrong-checksum advertised. The only way to surface a + // commit-time mismatch is for begin/commit checksums to disagree, OR for + // the streamed bytes to disagree with the begin checksum. Use the + // latter: claim a bogus begin/commit checksum but stream the real bytes. + bogus := strings.Repeat("0", 8) + sendMsg(t, cm5, protoXferBegin{ + Type: msgXferBegin, + XferID: "xfer-4", + Size: uint32(len(payload)), + Checksum: bogus, + }) + _ = readMsg[protoXferReady](t, cm5) + + sendMsg(t, cm5, protoXferChunk{ + Type: msgXferChunk, + XferID: "xfer-4", + Offset: 0, + Data: rawURL(payload), + }) + _ = readMsg[protoXferNeed](t, cm5) + + sendMsg(t, cm5, protoXferCommit{ + Type: msgXferCommit, + XferID: "xfer-4", + Size: uint32(len(payload)), + Checksum: bogus, + }) + + abort := readMsg[protoXferAbort](t, cm5) + if abort.Type != msgXferAbort || abort.Err != "checksum_mismatch" { + t.Fatalf("bad xfer_abort: %+v", abort) + } + if len(sink.abortReasons) == 0 { + t.Fatal("expected sink abort on checksum mismatch") + } +} + +func TestTransferIdleChunkWatchdog(t *testing.T) { + // transfer_mgr.lua refreshes active.deadline = now + phase_timeout on + // each accepted chunk and aborts with reason="timeout" if the deadline + // passes. With a tight PhaseTimeout, dropping the wire after xfer_begin + // must produce an unsolicited xfer_abort within ~one drain tick. + b := newBus() + cm5, mcu := pipePair() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sink := &fakeTransferSink{} + s := session{ + linkID: defaultLinkID, + nodeID: "mcu-1", + peerID: "cm5-local", + localSID: "mcu-sid-test", + tr: mcu, + conn: b.NewConnection("fabric"), + cfg: LinkConfig{PhaseTimeout: 100 * time.Millisecond}, + beginTransfer: func(meta transferMeta) (transferSink, error) { + return sink, nil + }, + } + go s.run(ctx) + bringUp(t, cm5) + + payload := []byte("abcd") + sendMsg(t, cm5, protoXferBegin{ + Type: msgXferBegin, + XferID: "xfer-wd", + Size: uint32(len(payload)), + Checksum: xxhashStr(payload), + }) + _ = readMsg[protoXferReady](t, cm5) + + // Stop sending chunks; watchdog should fire within ~PhaseTimeout + + // one exportTickInterval (50ms). + abort := readMsg[protoXferAbort](t, cm5) + if abort.Type != msgXferAbort || abort.XferID != "xfer-wd" || abort.Err != "timeout" { + t.Fatalf("bad xfer_abort: %+v", abort) + } + if len(sink.abortReasons) == 0 || sink.abortReasons[0] != "timeout" { + t.Fatalf("sink.Abort reasons = %v, want [\"timeout\"]", sink.abortReasons) + } +} + +func TestTransferCommitChecksumMismatchOnCommitFrameAborts(t *testing.T) { + // xfer_begin and xfer_commit must agree on the checksum. If they + // disagree (even when the streamed bytes match begin), commit aborts. + b := newBus() + cm5, mcu := pipePair() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sink := &fakeTransferSink{} + go runSessionWithSink(ctx, mcu, b.NewConnection("fabric"), sink) + bringUp(t, cm5) + + payload := []byte("abcd") + good := xxhashStr(payload) + sendMsg(t, cm5, protoXferBegin{ + Type: msgXferBegin, + XferID: "xfer-5", + Size: uint32(len(payload)), + Checksum: good, + }) + _ = readMsg[protoXferReady](t, cm5) + + sendMsg(t, cm5, protoXferChunk{ + Type: msgXferChunk, + XferID: "xfer-5", + Offset: 0, + Data: rawURL(payload), + }) + _ = readMsg[protoXferNeed](t, cm5) + + // Commit advertises a different checksum than begin: must abort. + sendMsg(t, cm5, protoXferCommit{ + Type: msgXferCommit, + XferID: "xfer-5", + Size: uint32(len(payload)), + Checksum: strings.Repeat("0", 8), + }) + + abort := readMsg[protoXferAbort](t, cm5) + if abort.Type != msgXferAbort || abort.Err != "checksum_mismatch" { + t.Fatalf("bad xfer_abort: %+v", abort) + } +} diff --git a/services/fabric/transport_limits.go b/services/fabric/transport_limits.go new file mode 100644 index 0000000..7f5afec --- /dev/null +++ b/services/fabric/transport_limits.go @@ -0,0 +1,11 @@ +package fabric + +import "fmt" + +// maxLineLen caps a single fabric frame (line-delimited JSON) end-to-end. +// It must clear the release transfer chunk: 1024 raw bytes becomes about +// 1366 base64url chars, plus JSON envelope and newline. 4096 leaves margin +// while keeping malformed lines bounded. +const maxLineLen = 4096 + +var ErrLineTooLong = fmt.Errorf("line exceeds %d bytes", maxLineLen) diff --git a/services/fabric/transport_rw_test.go b/services/fabric/transport_rw_test.go new file mode 100644 index 0000000..bcbeb1e --- /dev/null +++ b/services/fabric/transport_rw_test.go @@ -0,0 +1,84 @@ +package fabric + +import ( + "bufio" + "io" + "sync" +) + +type rwTransport struct { + r *bufio.Reader + mu sync.Mutex + w *bufio.Writer + closers []io.Closer +} + +func newRWTransport(r io.Reader, w io.Writer) *rwTransport { + t := &rwTransport{ + r: bufio.NewReaderSize(r, maxLineLen), + w: bufio.NewWriter(w), + } + var rc io.Closer + if c, ok := r.(io.Closer); ok { + rc = c + t.closers = append(t.closers, c) + } + if c, ok := w.(io.Closer); ok { + if c != rc { + t.closers = append(t.closers, c) + } + } + return t +} + +func (t *rwTransport) ReadLine() ([]byte, error) { + var buf []byte + for { + seg, more, err := t.r.ReadLine() + if err != nil { + return nil, err + } + buf = append(buf, seg...) + if !more { + break + } + if len(buf) > maxLineLen { + for more { + _, more, err = t.r.ReadLine() + if err != nil { + return nil, err + } + } + return nil, ErrLineTooLong + } + } + if len(buf) > maxLineLen { + return nil, ErrLineTooLong + } + return buf, nil +} + +func (t *rwTransport) WriteLine(data []byte) error { + if len(data) > maxLineLen { + return ErrLineTooLong + } + t.mu.Lock() + defer t.mu.Unlock() + if _, err := t.w.Write(data); err != nil { + return err + } + if err := t.w.WriteByte('\n'); err != nil { + return err + } + return t.w.Flush() +} + +func (t *rwTransport) Close() error { + var first error + for _, c := range t.closers { + if err := c.Close(); err != nil && first == nil { + first = err + } + } + return first +} diff --git a/services/fabric/transport_shmring.go b/services/fabric/transport_shmring.go new file mode 100644 index 0000000..dece826 --- /dev/null +++ b/services/fabric/transport_shmring.go @@ -0,0 +1,148 @@ +package fabric + +import ( + "context" + "fmt" + + "devicecode-go/x/shmring" +) + +// ShmringTransport implements Transport over two shmring rings (RX + TX). +// Used for UART0 in production (main.go). +type ShmringTransport struct { + rx *shmring.Ring + tx *shmring.Ring + cancel context.CancelFunc + ctx context.Context + buf []byte + over bool // draining an oversize line +} + +func NewShmringTransport(rx, tx *shmring.Ring) *ShmringTransport { + ctx, cancel := context.WithCancel(context.Background()) + return &ShmringTransport{ + rx: rx, + tx: tx, + cancel: cancel, + ctx: ctx, + buf: make([]byte, 0, 256), + } +} + +func (t *ShmringTransport) ReadLine() ([]byte, error) { + t.buf = t.buf[:0] + t.over = false + + for { + p1, p2 := t.rx.ReadAcquire() + if len(p1)+len(p2) == 0 { + select { + case <-t.ctx.Done(): + return nil, fmt.Errorf("transport closed") + case <-t.rx.Readable(): + continue + } + } + + // Scan p1 for newline. + if idx := findByte(p1, '\n'); idx >= 0 { + if !t.over { + t.buf = append(t.buf, p1[:idx]...) + } + t.rx.ReadRelease(idx + 1) + if t.over { + t.buf = t.buf[:0] + t.over = false + return nil, ErrLineTooLong + } + if len(t.buf) > maxLineLen { + return nil, ErrLineTooLong + } + out := make([]byte, len(t.buf)) + copy(out, t.buf) + traceLine("rx", out) + return out, nil + } + + // No newline in p1 — consume it, check p2. + if !t.over { + t.buf = append(t.buf, p1...) + } + + if idx := findByte(p2, '\n'); idx >= 0 { + if !t.over { + t.buf = append(t.buf, p2[:idx]...) + } + t.rx.ReadRelease(len(p1) + idx + 1) + if t.over { + t.buf = t.buf[:0] + t.over = false + return nil, ErrLineTooLong + } + if len(t.buf) > maxLineLen { + return nil, ErrLineTooLong + } + out := make([]byte, len(t.buf)) + copy(out, t.buf) + traceLine("rx", out) + return out, nil + } + + // No newline — consume everything, wait for more. + if !t.over { + t.buf = append(t.buf, p2...) + } + t.rx.ReadRelease(len(p1) + len(p2)) + + // Check for oversize. + if len(t.buf) > maxLineLen { + t.buf = t.buf[:0] + t.over = true + } + } +} + +func (t *ShmringTransport) WriteLine(data []byte) error { + if len(data) > maxLineLen { + return ErrLineTooLong + } + line := append(data, '\n') + written := 0 + + for written < len(line) { + p1, p2 := t.tx.WriteAcquire() + if len(p1)+len(p2) == 0 { + select { + case <-t.ctx.Done(): + return fmt.Errorf("transport closed") + case <-t.tx.Writable(): + continue + } + } + + remaining := line[written:] + n := copy(p1, remaining) + remaining = remaining[n:] + if len(remaining) > 0 && len(p2) > 0 { + n += copy(p2, remaining) + } + t.tx.WriteCommit(n) + written += n + } + traceLine("tx", data) + return nil +} + +func (t *ShmringTransport) Close() error { + t.cancel() + return nil +} + +func findByte(b []byte, c byte) int { + for i, v := range b { + if v == c { + return i + } + } + return -1 +} diff --git a/services/fabric/writer.go b/services/fabric/writer.go new file mode 100644 index 0000000..5d3f596 --- /dev/null +++ b/services/fabric/writer.go @@ -0,0 +1,113 @@ +package fabric + +import "errors" + +// Outbound frame scheduler — control / rpc / bulk lanes per +// devicecode-lua@2c88090 src/services/fabric/writer.lua. Control bypasses +// fairness and drains first; rpc and bulk share remaining bandwidth via +// weighted round-robin (defaults rpc_quantum=4, bulk_quantum=1). +// +// Lane assignment for outbound MCU frames mirrors protocol.lua's +// FRAME_CLASS map. The MCU never originates xfer_chunk so the bulk lane +// is currently unused on the MCU side; it is wired in for symmetry and +// for future fabric-update telemetry that may want to route bulk frames. + +type lane uint8 + +const ( + laneControl lane = iota + laneRPC + laneBulk +) + +// txLane is a single FIFO of pending wire frames. +type txLane struct { + frames [][]byte +} + +func (l *txLane) push(data []byte) { l.frames = append(l.frames, data) } +func (l *txLane) len() int { return len(l.frames) } +func (l *txLane) pop() []byte { + f := l.frames[0] + l.frames = l.frames[1:] + if len(l.frames) == 0 { + l.frames = nil + } + return f +} + +// enqueueFrame routes data into the lane and immediately drains the +// writer in priority order. With a single producer goroutine the queue +// is normally drained empty before the next caller, but the lane +// discipline kicks in when multiple frames are queued in a single tick +// (e.g. drainExports + drainOutbound generating frames back-to-back). +func (s *session) enqueueFrame(l lane, data []byte) bool { + s.lane(l).push(data) + return s.flushWriter() +} + +func (s *session) lane(l lane) *txLane { + switch l { + case laneControl: + return &s.txControl + case laneRPC: + return &s.txRPC + case laneBulk: + return &s.txBulk + default: + return &s.txRPC + } +} + +// flushWriter writes queued frames to the transport in priority order: +// 1. drain controlQ fully (no fairness), +// 2. weighted RR between rpcQ and bulkQ until both empty. +// Returns false on transport-write failure (link torn down). +func (s *session) flushWriter() bool { + rpcQ, bulkQ := s.cfg.RPCQuantum, s.cfg.BulkQuantum + // Defensive: guarantee forward progress even if a caller bypasses + // applyDefaults (e.g. unit tests constructing session{} directly). + // Without this, a zero quantum would spin the outer loop forever. + if rpcQ <= 0 { + rpcQ = 1 + } + if bulkQ <= 0 { + bulkQ = 1 + } + for s.txControl.len() > 0 { + if !s.writeFrame(s.txControl.pop()) { + return false + } + } + for s.txRPC.len() > 0 || s.txBulk.len() > 0 { + for i := 0; i < rpcQ && s.txRPC.len() > 0; i++ { + if !s.writeFrame(s.txRPC.pop()) { + return false + } + } + for i := 0; i < bulkQ && s.txBulk.len() > 0; i++ { + if !s.writeFrame(s.txBulk.pop()) { + return false + } + } + } + return true +} + +// writeFrame is the actual transport write. Mirrors what the prior +// sendFrame did inline; isolated so flushWriter can call it per-frame. +func (s *session) writeFrame(data []byte) bool { + if len(data) > 0 && data[len(data)-1] == '\n' { + data = data[:len(data)-1] + } + if err := s.tr.WriteLine(data); err != nil { + if errors.Is(err, ErrLineTooLong) { + s.log("oversized write dropped") + return true + } + s.handleLinkDown(reasonTransportWrite, err.Error()) + return false + } + s.markTx() + return true +} diff --git a/services/hal/devices/serial_raw/builder.go b/services/hal/devices/serial_raw/builder.go index 5726798..f417760 100644 --- a/services/hal/devices/serial_raw/builder.go +++ b/services/hal/devices/serial_raw/builder.go @@ -9,6 +9,7 @@ import ( "devicecode-go/services/hal/internal/core" "devicecode-go/types" "devicecode-go/x/shmring" + "devicecode-go/x/strconvx" ) // ---- Parameters ---- @@ -50,6 +51,11 @@ type session struct { txHandle shmring.Handle txRing *shmring.Ring + // Reactor-owned observability. Single writer only. + rxRingFull uint32 + rxLogAt time.Time + rxLogHits uint32 + // Single worker (reactor) for the port. ctx context.Context cancel context.CancelFunc @@ -169,6 +175,12 @@ func (d *Device) Control(_ core.CapAddr, verb string, payload any) (core.Enqueue } d.startSession(rxSize, txSize) + println( + "[serial-raw]", "session_open", + "uart", d.a.Name, + "rx_size", strconvx.Itoa(rxSize), + "tx_size", strconvx.Itoa(txSize), + ) // --- Device-level hygiene: drain spurious RX before signalling link up --- // Discard any pre-existing or immediately-arriving bytes on the UART RX path. @@ -311,6 +323,32 @@ func (d *Device) stopSession() { // ---- Reactor (single goroutine) ---- +func (d *Device) logRingFullChange(s *session, force bool) { + const rxLogMinInterval = 1 * time.Second + + hits := s.rxRingFull + + if !force { + now := time.Now() + if now.Sub(s.rxLogAt) < rxLogMinInterval { + return + } + if hits == s.rxLogHits { + return + } + s.rxLogAt = now + } else { + s.rxLogAt = time.Now() + } + + println( + "[serial-raw]", "rx_ring_full", + "uart", d.a.Name, + "hits", strconvx.Utoa64(uint64(hits)), + ) + s.rxLogHits = hits +} + func (d *Device) reactor(s *session) { defer close(s.done) @@ -325,6 +363,7 @@ func (d *Device) reactor(s *session) { for { p1, p2 := rxR.WriteAcquire() if len(p1) == 0 { + s.rxRingFull++ break } n1 := u.TryRead(p1) @@ -372,8 +411,10 @@ func (d *Device) reactor(s *session) { } // Idle: wait for any edge, then re-check. + d.logRingFullChange(s, false) select { case <-s.ctx.Done(): + d.logRingFullChange(s, true) return case <-u.Readable(): case <-u.Writable(): diff --git a/services/hal/internal/provider/resources_host.go b/services/hal/internal/provider/resources_host.go new file mode 100644 index 0000000..884ab54 --- /dev/null +++ b/services/hal/internal/provider/resources_host.go @@ -0,0 +1,52 @@ +//go:build !(rp2040 || rp2350) + +package provider + +import ( + "time" + + "devicecode-go/errcode" + "devicecode-go/services/hal/internal/core" + "devicecode-go/services/hal/internal/provider/setups" + + "tinygo.org/x/drivers" +) + +var ( + SelectedPlan setups.ResourcePlan + InitialHALConfig core.HALConfig +) + +type hostRegistry struct{} + +func NewResources() core.Resources { + return core.Resources{Reg: hostRegistry{}} +} + +func (hostRegistry) ClassOf(id core.ResourceID) (core.BusClass, bool) { + return 0, false +} + +func (hostRegistry) ClaimI2C(devID string, id core.ResourceID) (drivers.I2C, error) { + return nil, errcode.Unsupported +} + +func (hostRegistry) ReleaseI2C(devID string, id core.ResourceID) {} + +func (hostRegistry) ClaimSerial(devID string, id core.ResourceID) (core.SerialPort, error) { + return nil, errcode.Unsupported +} + +func (hostRegistry) ReleaseSerial(devID string, id core.ResourceID) {} + +func (hostRegistry) ClaimPin(devID string, pin int, fn core.PinFunc) (core.PinHandle, error) { + return nil, errcode.Unsupported +} + +func (hostRegistry) ReleasePin(devID string, pin int) {} + +func (hostRegistry) SubscribeGPIOEdges(devID string, pin int, sel core.GPIOEdge, debounce time.Duration, buf int) (core.GPIOEdgeStream, error) { + return nil, errcode.Unsupported +} + +func (hostRegistry) UnsubscribeGPIOEdges(devID string, pin int) {} diff --git a/services/hal/internal/provider/setup_none.go b/services/hal/internal/provider/setup_none.go index 2863103..9bfa5b8 100644 --- a/services/hal/internal/provider/setup_none.go +++ b/services/hal/internal/provider/setup_none.go @@ -1,4 +1,4 @@ -//go:build !((rp2040 || rp2350) && (pico_rich_dev || pico_bb_proto_1)) +//go:build (rp2040 || rp2350) && !(pico_rich_dev || pico_bb_proto_1) package provider diff --git a/services/hal/internal/provider/setups/pico_bb_proto_1.go b/services/hal/internal/provider/setups/pico_bb_proto_1.go index 58a8101..ae3d94d 100644 --- a/services/hal/internal/provider/setups/pico_bb_proto_1.go +++ b/services/hal/internal/provider/setups/pico_bb_proto_1.go @@ -58,8 +58,8 @@ var SelectedSetup = types.HALConfig{ Domain: "io", Name: "uart1", Baud: 115_200, - RXSize: 32, - TXSize: 512, + RXSize: 256, + TXSize: 2048, }}, {ID: "charger0", Type: "ltc4015", Params: ltc4015dev.Params{ diff --git a/services/reactor/qa_reactor.go b/services/reactor/qa_reactor.go index 20d7638..c615063 100644 --- a/services/reactor/qa_reactor.go +++ b/services/reactor/qa_reactor.go @@ -128,11 +128,11 @@ const ( ) type Reactor struct { + bus *bus.Bus uiConn *bus.Connection // UART jsonOut *shmring.Ring // telemetry (JSON UART TX) - // Logger UART1 already handled by global logger (see SetUART1) // inputs (latest) vin_mV, vbat_mV int32 @@ -167,9 +167,10 @@ type Reactor struct { droppedUART0Bytes int } -func NewReactor(uiConn *bus.Connection) *Reactor { +func NewReactor(b *bus.Bus, uiConn *bus.Connection) *Reactor { return &Reactor{ - uiConn: uiConn, + bus: b, + uiConn: uiConn, levelUp: true, state: stateOff, now: time.Now(), diff --git a/services/reactor/reactor.go b/services/reactor/reactor.go index 67d2acd..9429b2c 100644 --- a/services/reactor/reactor.go +++ b/services/reactor/reactor.go @@ -7,13 +7,16 @@ import ( "runtime" "time" - "devicecode-go/utilities" "devicecode-go/bus" + "devicecode-go/services/fabric" "devicecode-go/types" + "devicecode-go/utilities" "devicecode-go/x/shmring" "devicecode-go/x/strconvx" ) +const fabricWaitLogInterval = 2 * time.Second + // ----------------------------------------------------------------------------- // Thresholds & timing // ----------------------------------------------------------------------------- @@ -128,12 +131,9 @@ const ( ) type Reactor struct { + bus *bus.Bus uiConn *bus.Connection - // UART - jsonOut *shmring.Ring // telemetry (JSON UART TX) - // Logger UART1 already handled by global logger (see SetUART1) - // inputs (latest) vin_mV, vbat_mV int32 iin_mA, ibat_mA int32 @@ -162,14 +162,12 @@ type Reactor struct { // misc now time.Time - - // telemetry drop counters (bytes) - droppedUART0Bytes int } -func NewReactor(uiConn *bus.Connection) *Reactor { +func NewReactor(b *bus.Bus, uiConn *bus.Connection) *Reactor { return &Reactor{ - uiConn: uiConn, + bus: b, + uiConn: uiConn, levelUp: true, state: stateOff, now: time.Now(), @@ -373,97 +371,23 @@ func (r *Reactor) OnCharger(v types.ChargerValue) { r.vin_mV = v.VIN_mV r.iin_mA = v.IIn_mA r.tsVIN = r.now - - // JSON: {"power/charger/internal/vin":..,"vsys":..,"iin":..} - if r.jsonOut != nil { - var w utilities.JSONWriter - w.Write = r.jsonWrite - w.Begin() - w.KvInt("power/charger/internal/vin", int(v.VIN_mV)) - w.KvInt("power/charger/internal/vsys", int(v.VSYS_mV)) - w.KvInt("power/charger/internal/iin", int(v.IIn_mA)) - // Full bitfield maps (0/1) for LOCF pipelines - { - it := types.NewBitIter(types.SystemStatus(v.Sys), types.SystemStatusTable[:]) - for { - bitName, set, ok := it.NextAny() - if !ok { - break - } - if set { - w.KvInt("power/charger/internal/system/"+bitName, 1) - } else { - w.KvInt("power/charger/internal/system/"+bitName, 0) - } - } - } - { - it := types.NewBitIter(types.ChargeStatusBits(v.Status), types.ChargeStatusTable[:]) - for { - bitName, set, ok := it.NextAny() - if !ok { - break - } - if set { - w.KvInt("power/charger/internal/status/"+bitName, 1) - } else { - w.KvInt("power/charger/internal/status/"+bitName, 0) - } - } - } - { - it := types.NewBitIter(types.ChargerStateBits(v.State), types.ChargerStateTable[:]) - for { - bitName, set, ok := it.NextAny() - if !ok { - break - } - if set { - w.KvInt("power/charger/internal/state/"+bitName, 1) - } else { - w.KvInt("power/charger/internal/state/"+bitName, 0) - } - } - } - w.End() - } } func (r *Reactor) OnBattery(v types.BatteryValue) { r.vbat_mV = v.PackMilliV r.ibat_mA = v.IBatMilliA r.tsVBAT = r.now - - // JSON: {"power/battery/internal/vbat":..,"ibat":..} - if r.jsonOut != nil { - var w utilities.JSONWriter - w.Write = r.jsonWrite - w.Begin() - w.KvInt("power/battery/internal/vbat", int(v.PackMilliV)) - w.KvInt("power/battery/internal/ibat", int(v.IBatMilliA)) - w.KvInt("power/battery/internal/bsr", int(v.BSR_uOhmPerCell)) - w.End() - } } -func (r *Reactor) OnTempDeciC(label string, deci int, jsonKey string) { +func (r *Reactor) OnTempDeciC(label string, deci int, _ string) { log.Deci(label, deci) - if r.jsonOut != nil { - var w utilities.JSONWriter - w.Write = r.jsonWrite - w.Begin() - w.KvInt(jsonKey, deci) - w.End() - } } -// ---- memory snapshot telemetry (every ~2 s in main loop) ---- +// ---- memory snapshot (every ~3 s in main loop) ---- func (r *Reactor) emitMemSnapshot() { var ms runtime.MemStats - runtime.GC() runtime.ReadMemStats(&ms) - // log line log.Println( "[mem] ", "alloc:", int(ms.Alloc), " ", @@ -471,18 +395,10 @@ func (r *Reactor) emitMemSnapshot() { "mallocs:", int(ms.Mallocs), " ", "frees:", int(ms.Frees), ) - // JSON (minimal to keep overhead low) - if r.jsonOut != nil { - var w utilities.JSONWriter - w.Write = r.jsonWrite - w.Begin() - w.KvInt("sys/mem/alloc", int(ms.Alloc)) - w.End() - } } func (r *Reactor) Run(ctx context.Context) { -// Subscriptions (env + power) + // Subscriptions (env + power) log.Println("[main] subscribing env + power …") tempSub := r.uiConn.Subscribe(tTempValue) tempDieSub := r.uiConn.Subscribe(tDieTempValue) @@ -491,24 +407,33 @@ func (r *Reactor) Run(ctx context.Context) { stSub := r.uiConn.Subscribe(stTopic) evSub := r.uiConn.Subscribe(evTopic) - // UART sessions (TX only needed for our use) - const ( - uartTele = "uart0" // telemetry JSON - uartLog = "uart1" // log mirror - ) - subSessOpenTele := r.uiConn.Subscribe(tSessOpened(uartTele)) - subSessOpenLog := r.uiConn.Subscribe(tSessOpened(uartLog)) - subSessClosedTele := r.uiConn.Subscribe(tSessClosed(uartTele)) - subSessClosedLog := r.uiConn.Subscribe(tSessClosed(uartLog)) - - // Kick open requests (fire-and-forget; events carry handles) - r.uiConn.Publish(r.uiConn.NewMessage(tSessOpen(uartTele), nil, false)) - r.uiConn.Publish(r.uiConn.NewMessage(tSessOpen(uartLog), nil, false)) + // UART session for the CM5 Fabric link on proto_1 hardware. + const uartFabric = "uart1" + subSessOpenFabric := r.uiConn.Subscribe(tSessOpened(uartFabric)) + subSessClosedFabric := r.uiConn.Subscribe(tSessClosed(uartFabric)) + r.uiConn.Publish(r.uiConn.NewMessage(tSessOpen(uartFabric), nil, false)) // Retry back-off guards - var retryTeleAt, retryLogAt time.Time + var retryFabricAt time.Time + + // Fabric session lifecycle state + var fabricCancel context.CancelFunc + var fabricDone chan struct{} + var fabricSessionOpen bool + nextFabricWaitLog := time.Now() + + stopFabricSession := func() { + if fabricCancel == nil { + return + } + fabricCancel() + fabricCancel = nil + if fabricDone != nil { + <-fabricDone + fabricDone = nil + } + } - // Supervisory ticker ticker := time.NewTicker(TICK) defer ticker.Stop() @@ -518,33 +443,39 @@ func (r *Reactor) Run(ctx context.Context) { for { select { // ---- UART session opened/closed ---- - case m := <-subSessOpenTele.Channel(): - if ev, ok := m.Payload.(types.SerialSessionOpened); ok { - r.jsonOut = shmring.Get(shmring.Handle(ev.TXHandle)) - log.Println("[uart0] telemetry session opened") - } - case m := <-subSessOpenLog.Channel(): + case m := <-subSessOpenFabric.Channel(): if ev, ok := m.Payload.(types.SerialSessionOpened); ok { - log.SetUART1(shmring.Get(shmring.Handle(ev.TXHandle))) - log.Println("[uart1] log session opened") + // Tear down any previous fabric session before starting a new one. + stopFabricSession() + rx := shmring.Get(shmring.Handle(ev.RXHandle)) + tx := shmring.Get(shmring.Handle(ev.TXHandle)) + tr := fabric.NewShmringTransport(rx, tx) + fabricConn := r.bus.NewConnection("fabric") + fabricCtx, cancel := context.WithCancel(ctx) + done := make(chan struct{}) + fabricCancel = cancel + fabricDone = done + fabricSessionOpen = true + go func() { + defer close(done) + fabric.Run(fabricCtx, tr, fabricConn, "mcu-1", "cm5", fabric.DefaultLinkConfig()) + }() + log.Println("[uart1] fabric session opened") } - case <-subSessClosedTele.Channel(): - r.jsonOut = nil - log.Println("[uart0] telemetry session closed") - // Auto-reopen with back-off - if time.Now().After(retryTeleAt) { - r.uiConn.Publish(r.uiConn.NewMessage(tSessOpen(uartTele), nil, false)) - retryTeleAt = time.Now().Add(2 * time.Second) + case <-subSessClosedFabric.Channel(): + // Ignore stale close events — the open handler already tears down + // the previous session before starting a new one. + if !fabricSessionOpen { + continue } - case <-subSessClosedLog.Channel(): - log.SetUART1(nil) - log.Println("[uart1] log session closed") - // Auto-reopen with back-off - if time.Now().After(retryLogAt) { - r.uiConn.Publish(r.uiConn.NewMessage(tSessOpen(uartLog), nil, false)) - retryLogAt = time.Now().Add(2 * time.Second) + stopFabricSession() + fabricSessionOpen = false + nextFabricWaitLog = time.Now() + log.Println("[uart1] fabric session closed") + if time.Now().After(retryFabricAt) { + r.uiConn.Publish(r.uiConn.NewMessage(tSessOpen(uartFabric), nil, false)) + retryFabricAt = time.Now().Add(2 * time.Second) } - // ---- Env prints ---- case m := <-tempSub.Channel(): if v, ok := m.Payload.(types.TemperatureValue); ok { @@ -560,14 +491,6 @@ func (r *Reactor) Run(ctx context.Context) { case m := <-humidSub.Channel(): if v, ok := m.Payload.(types.HumidityValue); ok { log.Hundredths("[value] env/humidity/core %RH=", int(v.RHx100)) - // JSON - if r.jsonOut != nil { - var w utilities.JSONWriter - w.Write = r.jsonWrite - w.Begin() - w.KvInt("env/humidity/core", int(v.RHx100)) - w.End() - } } // ---- Die Temp Backup ---- @@ -602,25 +525,16 @@ func (r *Reactor) Run(ctx context.Context) { case m := <-evSub.Channel(): printCapEvent(m) - // JSON: {"///event":""} - if r.jsonOut != nil { - dom, _ := m.Topic.At(2).(string) - kind, _ := m.Topic.At(3).(string) - name, _ := m.Topic.At(4).(string) - tag, _ := m.Topic.At(6).(string) - if dom != "" && kind != "" && name != "" && tag != "" { - var w utilities.JSONWriter - w.Write = r.jsonWrite - w.Begin() - w.KvStr(dom+"/"+kind+"/"+name+"/event", tag) - w.End() - } - } // ---- Supervisory tick ---- case <-ticker.C: r.now = time.Now() + if !fabricSessionOpen && !r.now.Before(nextFabricWaitLog) { + log.Println("[main] waiting for fabric connection start") + nextFabricWaitLog = r.now.Add(fabricWaitLogInterval) + } + // 1) Run FSM (includes symmetric reversal) r.stepFSM() @@ -641,26 +555,6 @@ func (r *Reactor) Run(ctx context.Context) { } } -// ----------------------------------------------------------------------------- -// Centralised UART write helpers (handle partial writes) -// ----------------------------------------------------------------------------- - -// uart0 (telemetry JSON) — returns bytes written; tracks dropped bytes on partial writes. -func (r *Reactor) jsonWrite(b []byte) int { - if r == nil || r.jsonOut == nil || len(b) == 0 { - return 0 - } - n := r.jsonOut.TryWriteFrom(b) - if n < len(b) { - r.droppedUART0Bytes += (len(b) - n) - // Rate-limited note - if r.droppedUART0Bytes == (len(b)-n) || (r.droppedUART0Bytes%1024) == 0 { - log.Println("[uart0] dropped bytes =", r.droppedUART0Bytes) - } - } - return n -} - // ----------------------------------------------------------------------------- // Printing helpers (via Logger) // ----------------------------------------------------------------------------- diff --git a/x/fmtx/fmtx_host.go b/x/fmtx/fmtx_host.go index 848722a..7640d3e 100644 --- a/x/fmtx/fmtx_host.go +++ b/x/fmtx/fmtx_host.go @@ -5,12 +5,30 @@ package fmtx import ( "fmt" "io" + "os" + "strings" ) +// DefaultOutput matches the MCU API surface so tests and callers can redirect +// Print/Printf without depending on the host's process stdout directly. +var DefaultOutput io.Writer = os.Stdout + func Sprintf(format string, a ...any) string { return fmt.Sprintf(format, a...) } -func Printf(format string, a ...any) (int, error) { return fmt.Printf(format, a...) } +func Printf(format string, a ...any) (int, error) { return Fprintf(DefaultOutput, format, a...) } func Fprintf(w io.Writer, format string, a ...any) (int, error) { return fmt.Fprintf(w, format, a...) } func Errorf(format string, a ...any) error { return fmt.Errorf(format, a...) } -func Sprint(a ...any) string { return fmt.Sprint(a...) } -func Fprint(w io.Writer, a ...any) (int, error) { return fmt.Fprint(w, a...) } -func Print(a ...any) (int, error) { return fmt.Print(a...) } + +// Keep host behavior aligned with the MCU formatter, which always separates +// Sprint/Fprint operands with spaces. +func Sprint(a ...any) string { + var b strings.Builder + for i, v := range a { + if i > 0 { + b.WriteByte(' ') + } + b.WriteString(fmt.Sprint(v)) + } + return b.String() +} +func Fprint(w io.Writer, a ...any) (int, error) { return io.WriteString(w, Sprint(a...)) } +func Print(a ...any) (int, error) { return Fprint(DefaultOutput, a...) } diff --git a/x/strconvx/strconvx_host.go b/x/strconvx/strconvx_host.go index bf918c6..398ace4 100644 --- a/x/strconvx/strconvx_host.go +++ b/x/strconvx/strconvx_host.go @@ -4,18 +4,55 @@ package strconvx import "strconv" -// The goal is signature parity with strconv. -// Delegate straight through. +// Mirror the MCU helpers on host builds so tests exercise the same parsing +// rules, including base-0 prefix handling for 0b/0o/0x inputs. -func Itoa(i int) string { return strconv.Itoa(i) } -func Atoi(s string) (int, error) { return strconv.Atoi(s) } -func FormatInt(i int64, base int) string { return strconv.FormatInt(i, base) } -func FormatUint(u uint64, base int) string { return strconv.FormatUint(u, base) } -func ParseInt(s string, base, bitSize int) (int64, error) { return strconv.ParseInt(s, base, bitSize) } +func Itoa(i int) string { return strconv.Itoa(i) } +func Itoa64(i int64) string { return strconv.FormatInt(i, 10) } +func Utoa64(u uint64) string { return strconv.FormatUint(u, 10) } +func Atoi(s string) (int, error) { return strconv.Atoi(s) } +func FormatInt(i int64, base int) string { return strconv.FormatInt(i, base) } +func FormatUint(u uint64, base int) string { return strconv.FormatUint(u, base) } +func ParseInt(s string, base, bitSize int) (int64, error) { + if base != 0 { + return strconv.ParseInt(s, base, bitSize) + } + neg := false + if len(s) > 0 && (s[0] == '+' || s[0] == '-') { + neg = s[0] == '-' + s = s[1:] + } + base = detectBase(&s) + if neg { + s = "-" + s + } + return strconv.ParseInt(s, base, bitSize) +} func ParseUint(s string, base, bitSize int) (uint64, error) { + if base == 0 { + base = detectBase(&s) + } return strconv.ParseUint(s, base, bitSize) } func FormatFloat(f float64, fmt byte, prec, bitSize int) string { return strconv.FormatFloat(f, fmt, prec, bitSize) } func ParseFloat(s string, bitSize int) (float64, error) { return strconv.ParseFloat(s, bitSize) } + +func detectBase(ps *string) int { + s := *ps + if len(s) >= 2 && s[0] == '0' { + switch s[1] { + case 'x', 'X': + *ps = s[2:] + return 16 + case 'b', 'B': + *ps = s[2:] + return 2 + case 'o', 'O': + *ps = s[2:] + return 8 + } + } + return 10 +} diff --git a/x/xxhash/xxhash.go b/x/xxhash/xxhash.go new file mode 100644 index 0000000..9e319c1 --- /dev/null +++ b/x/xxhash/xxhash.go @@ -0,0 +1,145 @@ +// Package xxhash implements the xxHash32 algorithm — a fast, non-cryptographic +// 32-bit hash. +// +// This package mirrors devicecode-lua/src/shared/hash/xxhash32.lua at +// update-migration tip (commit 2c88090). It is used for fabric wire-protocol +// integrity (xfer_begin / xfer_commit checksum field) and for HAL artefact +// hashing. It is not a security primitive. +package xxhash + +import "math/bits" + +// xxHash32 round constants. These match the canonical xxHash32 spec and the +// Lua reference at src/shared/hash/xxhash32.lua. +const ( + prime32_1 uint32 = 0x9E3779B1 + prime32_2 uint32 = 0x85EBCA77 + prime32_3 uint32 = 0xC2B2AE3D + prime32_4 uint32 = 0x27D4EB2F + prime32_5 uint32 = 0x165667B1 +) + +// Hasher is a streaming xxHash32 state. +type Hasher struct { + seed uint32 + totalLen uint32 + v1, v2, v3, v4 uint32 + mem [16]byte + memN uint8 // 0..15 + large bool // true once a 16-byte block has been absorbed +} + +// New returns a streaming xxHash32 hasher seeded with seed. +func New(seed uint32) *Hasher { + h := &Hasher{} + h.reset(seed) + return h +} + +func (h *Hasher) reset(seed uint32) { + h.seed = seed + h.totalLen = 0 + h.v1 = seed + prime32_1 + prime32_2 + h.v2 = seed + prime32_2 + h.v3 = seed + h.v4 = seed - prime32_1 + h.memN = 0 + h.large = false +} + +// Write absorbs p into the running hash. Always returns (len(p), nil). +func (h *Hasher) Write(p []byte) (int, error) { + n := len(p) + if n == 0 { + return 0, nil + } + h.totalLen += uint32(n) + + // Top up the partial-block buffer if it has any bytes. + if h.memN > 0 { + need := 16 - int(h.memN) + if n < need { + copy(h.mem[h.memN:], p) + h.memN += uint8(n) + return n, nil + } + copy(h.mem[h.memN:], p[:need]) + h.absorbBlock(h.mem[:]) + p = p[need:] + h.memN = 0 + } + + // Absorb aligned 16-byte blocks directly from p. + for len(p) >= 16 { + h.absorbBlock(p[:16]) + p = p[16:] + } + + // Stash the trailing remainder (0..15 bytes). + if len(p) > 0 { + copy(h.mem[:], p) + h.memN = uint8(len(p)) + } + return n, nil +} + +func (h *Hasher) absorbBlock(b []byte) { + h.large = true + h.v1 = round(h.v1, leU32(b[0:4])) + h.v2 = round(h.v2, leU32(b[4:8])) + h.v3 = round(h.v3, leU32(b[8:12])) + h.v4 = round(h.v4, leU32(b[12:16])) +} + +// Sum32 returns the xxHash32 of all bytes absorbed so far. It does not modify +// the hasher state; calling Write afterwards continues the hash. +func (h *Hasher) Sum32() uint32 { + var x uint32 + if h.large { + x = bits.RotateLeft32(h.v1, 1) + + bits.RotateLeft32(h.v2, 7) + + bits.RotateLeft32(h.v3, 12) + + bits.RotateLeft32(h.v4, 18) + } else { + x = h.seed + prime32_5 + } + + x += h.totalLen + + rem := h.mem[:h.memN] + for len(rem) >= 4 { + x += leU32(rem) * prime32_3 + x = bits.RotateLeft32(x, 17) * prime32_4 + rem = rem[4:] + } + for _, b := range rem { + x += uint32(b) * prime32_5 + x = bits.RotateLeft32(x, 11) * prime32_1 + } + + x ^= x >> 15 + x *= prime32_2 + x ^= x >> 13 + x *= prime32_3 + x ^= x >> 16 + return x +} + +// Sum32 computes the xxHash32 of p with the given seed in one pass. +func Sum32(p []byte, seed uint32) uint32 { + var h Hasher + h.reset(seed) + _, _ = h.Write(p) + return h.Sum32() +} + +func round(acc, lane uint32) uint32 { + acc += lane * prime32_2 + acc = bits.RotateLeft32(acc, 13) + acc *= prime32_1 + return acc +} + +func leU32(b []byte) uint32 { + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} diff --git a/x/xxhash/xxhash_test.go b/x/xxhash/xxhash_test.go new file mode 100644 index 0000000..a005b59 --- /dev/null +++ b/x/xxhash/xxhash_test.go @@ -0,0 +1,133 @@ +package xxhash + +import ( + "bytes" + "testing" +) + +// Reference vectors validated against +// devicecode-lua/src/shared/hash/xxhash32.lua at update-migration tip +// (commit 2c88090) using `print(M.digest_hex(input))` with seed 0. +var refVectors = []struct { + name string + input string + hex string +}{ + {"empty", "", "02cc5d05"}, + {"a", "a", "550d7456"}, + {"abc", "abc", "32d153ff"}, + {"123456789", "123456789", "937bad67"}, +} + +func TestSum32_KnownAnswer(t *testing.T) { + for _, v := range refVectors { + got := testHex8(Sum32([]byte(v.input), 0)) + if got != v.hex { + t.Errorf("Sum32(%q, 0): got %s, want %s", v.input, got, v.hex) + } + } +} + +func TestStreaming_ByteByByte(t *testing.T) { + for _, v := range refVectors { + h := New(0) + for _, b := range []byte(v.input) { + h.Write([]byte{b}) + } + got := testHex8(h.Sum32()) + if got != v.hex { + t.Errorf("byte-stream %q: got %s, want %s", v.input, got, v.hex) + } + } +} + +func TestStreaming_OddSplits(t *testing.T) { + // A 32-byte input spans two 16-byte blocks, so splits at 1, 7, 15, 16, + // 17, and 31 exercise mem-buffer top-up, exact block boundary, and tail + // bytes. + in := []byte("0123456789abcdef0123456789abcdef") + want := testHex8(Sum32(in, 0)) + + for _, split := range []int{0, 1, 7, 15, 16, 17, 31, 32} { + h := New(0) + h.Write(in[:split]) + h.Write(in[split:]) + got := testHex8(h.Sum32()) + if got != want { + t.Errorf("split=%d: got %s, want %s", split, got, want) + } + } +} + +func TestStreaming_EmptyWritesNoOp(t *testing.T) { + h := New(0) + h.Write(nil) + h.Write([]byte{}) + h.Write([]byte("abc")) + h.Write([]byte{}) + if got := testHex8(h.Sum32()); got != "32d153ff" { + t.Errorf("with empty writes interleaved: got %s, want 32d153ff", got) + } +} + +func TestSeedNonZero(t *testing.T) { + in := []byte("the quick brown fox jumps over the lazy dog") + if Sum32(in, 0) == Sum32(in, 1) { + t.Fatalf("seeds 0 and 1 produced same hash") + } + h := New(42) + h.Write(in) + if h.Sum32() != Sum32(in, 42) { + t.Fatalf("streaming with seed=42 != one-shot") + } +} + +func TestSum32Idempotent(t *testing.T) { + // Sum32 should not mutate state; calling it twice must give the same result. + h := New(0) + h.Write([]byte("abc")) + a := h.Sum32() + b := h.Sum32() + if a != b { + t.Errorf("Sum32 not idempotent: %x != %x", a, b) + } +} + +func TestSum32ContinuesAfter(t *testing.T) { + // Calling Sum32, then Write, then Sum32 again must reflect the new bytes. + h := New(0) + h.Write([]byte("a")) + h.Sum32() + h.Write([]byte("bc")) + got := testHex8(h.Sum32()) + if got != "32d153ff" { + t.Errorf("post-Sum32 continuation: got %s, want 32d153ff", got) + } +} + +const hexdigits = "0123456789abcdef" + +func testHex8(v uint32) string { + var buf [8]byte + for i := 7; i >= 0; i-- { + buf[i] = hexdigits[v&0xf] + v >>= 4 + } + return string(buf[:]) +} + +func TestLargeBuffer(t *testing.T) { + // Confirm one-shot and streaming agree on a buffer comfortably larger + // than the 16-byte block size; this exercises the hot loop in Write. + in := bytes.Repeat([]byte("0123456789abcdef"), 64) // 1024 bytes + want := Sum32(in, 0) + + h := New(0) + for i := 0; i < len(in); i += 7 { + end := min(i+7, len(in)) + h.Write(in[i:end]) + } + if got := h.Sum32(); got != want { + t.Errorf("1024-byte streaming vs one-shot: got %x, want %x", got, want) + } +}