Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@ All notable changes to this project will be documented in this file.
- Add optional result destination to `GeolocationUser` so LocationOffsets can be sent to an alternate endpoint instead of the target IP; supports both IP and domain destinations (e.g., `185.199.108.1:9000` or `results.example.com:9000`); includes `SetResultDestination` onchain instruction, CLI `user set-result-destination` command, and Go SDK deserialization (backwards-compatible with existing accounts)
- CLI
- Add `--owner` flag to `multicast group update`, accepting a pubkey or `me` ([#3527](https://github.com/malbeclabs/doublezero/pull/3527))
<<<<<<< HEAD
- Polish terminal output of `connect` and `disconnect`: fix emoji semantics, normalize message phrasing across IBRL and multicast code paths, resolve tenant to human-readable code on connect (errors if tenant not found), and fix progress bar not clearing before output in `disconnect` ([#3529](https://github.com/malbeclabs/doublezero/pull/3529))
=======
- Client
- Reduce default probing interval to 5m from 30s since DZDs don't generally move.
>>>>>>> 413643f70 (client: increase default latency probe interval from 30s to 5m)

## [v0.17.0](https://github.com/malbeclabs/doublezero/compare/client/v0.16.0...client/v0.17.0) - 2026-04-10

Expand Down
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion client/doublezerod/cmd/doublezerod/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ var (
env = flag.String("env", config.EnvTestnet, "environment to use")
programId = flag.String("program-id", "", "override smartcontract program id to monitor")
rpcEndpoint = flag.String("solana-rpc-endpoint", "", "override solana rpc endpoint url")
probeInterval = flag.Int("probe-interval", 30, "latency probe interval in seconds")
probeInterval = flag.Int("probe-interval", 300, "latency probe interval in seconds")
cacheUpdateInterval = flag.Int("cache-update-interval", 30, "latency cache update interval in seconds")
enableVerboseLogging = flag.Bool("v", false, "enables verbose logging")
enableLatencyMetrics = flag.Bool("enable-latency-metrics", false, "enables latency metrics")
Expand Down
25 changes: 24 additions & 1 deletion client/doublezerod/internal/latency/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

const (
serviceabilityProgramDataFetchTimeout = 20 * time.Second
maxInitialProbeInterval = 30 * time.Second
)

// DeviceInfo contains the minimal device information needed for latency probing and reporting.
Expand Down Expand Up @@ -424,13 +425,35 @@ func (l *LatencyManager) Start(ctx context.Context) error {
probe()
l.probeReady.Store(true)

ticker := time.NewTicker(l.probeInterval)
hasReachable := func() bool {
l.ResultsCache.Lock.RLock()
defer l.ResultsCache.Lock.RUnlock()
for _, r := range l.ResultsCache.Results {
if r.Reachable {
return true
}
}
return false
}

// If no device was reachable on the first probe, use a fast interval
// until one responds, then switch to the steady-state interval.
converged := hasReachable()
interval := l.probeInterval
if !converged {
interval = min(l.probeInterval, maxInitialProbeInterval)
}
ticker := time.NewTicker(interval)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
probe()
if !converged && hasReachable() {
converged = true
ticker.Reset(l.probeInterval)
}
}
}
}()
Expand Down
79 changes: 79 additions & 0 deletions client/doublezerod/internal/latency/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"os"
"runtime"
"sync"
"sync/atomic"
"testing"
"time"

Expand Down Expand Up @@ -1574,6 +1575,84 @@ func TestLatencyManager_ProbeWaitsForDeviceFetch(t *testing.T) {
}
}

// TestLatencyManager_FastRetryWhenUnreachable verifies that when the first probe
// finds no reachable devices, the manager retries at a fast interval (<=30s)
// rather than the configured steady-state interval. probeReady is set after the
// first probe regardless, so the CLI can proceed with unreachable results.
func TestLatencyManager_FastRetryWhenUnreachable(t *testing.T) {
var probeCount atomic.Int32

mockSmartContractFunc := func(ctx context.Context) (*latency.ContractData, error) {
return &latency.ContractData{
Devices: []serviceability.Device{
{
AccountType: serviceability.DeviceType,
PublicIp: [4]uint8{192, 0, 2, 1},
PubKey: [32]byte{1},
Code: "dev01",
},
},
}, nil
}

mockProber := func(ctx context.Context, target latency.ProbeTarget) latency.LatencyResult {
n := probeCount.Add(1)
// First probe: unreachable. Second probe onwards: reachable.
return latency.LatencyResult{
Device: target.Device,
IP: target.IP,
Reachable: n >= 2,
}
}

manager := latency.NewLatencyManager(
latency.WithSmartContractFunc(mockSmartContractFunc),
latency.WithProberFunc(mockProber),
latency.WithProbeInterval(time.Hour), // large so only the fast retry fires
latency.WithCacheUpdateInterval(time.Hour), // don't refetch devices
)

ctx, cancel := context.WithTimeout(context.Background(), 45*time.Second)
defer cancel()

go func() {
_ = manager.Start(ctx)
}()

// Wait for the first probe to complete.
deadline := time.Now().Add(2 * time.Second)
for time.Now().Before(deadline) {
if probeCount.Load() >= 1 {
break
}
time.Sleep(10 * time.Millisecond)
}
if probeCount.Load() < 1 {
t.Fatal("timed out waiting for first probe")
}
time.Sleep(50 * time.Millisecond)

// probeReady should be true even though nothing was reachable — the daemon
// has completed a probe pass and the CLI needs to know it can read results.
if !manager.IsProbeReady() {
t.Fatal("expected probeReady=true after first probe")
}

// The manager should retry quickly (<=30s) rather than waiting the full
// probe interval (1h). Wait for the second probe with a generous timeout
// that is still well below the steady-state interval.
deadline = time.Now().Add(90 * time.Second)
for time.Now().Before(deadline) {
if probeCount.Load() >= 2 {
break
}
time.Sleep(10 * time.Millisecond)
}
if probeCount.Load() < 2 {
t.Fatal("timed out waiting for second probe — fast retry interval may not be working")
}
}

func TestUdpPing_ErrorCases(t *testing.T) {
tests := []struct {
name string
Expand Down
Loading