From a09d92d40f431002629e2eff417e798e8e87f1cf Mon Sep 17 00:00:00 2001 From: Radoslav Sandov Date: Sat, 16 May 2026 00:12:09 +0300 Subject: [PATCH 1/4] fix(#1775, #1489): path quality gating in onContactPathRecv + 3x flood ACK retry - onContactPathRecv: don't replace a fresh stored path (<10 min old) with a longer-hop incoming path; prevents multipath duplicates from silently downgrading a working direct route (fixes #1775) - ContactInfo: add out_path_timestamp (path freshness) and path_ack_count (cheap delivery success counter, reset on path change) - sendAckTo: send flood ACK 3x at 200/800/2000 ms staggered delays when no direct path is known; dedup handled by existing MeshTables::hasSeen() (fixes #1489) - platformio.ini: add native_reliability env with GoogleTest unit tests - test/test_path_gating: 13 unit tests covering gating logic edge cases - test/test_flood_ack: 11 unit tests covering retry count, delays, direct path --- platformio.ini | 14 ++ src/helpers/BaseChatMesh.cpp | 63 +++++++- src/helpers/ContactInfo.h | 2 + test/test_flood_ack/test_flood_ack.cpp | 166 +++++++++++++++++++++ test/test_path_gating/test_path_gating.cpp | 137 +++++++++++++++++ 5 files changed, 377 insertions(+), 5 deletions(-) create mode 100644 test/test_flood_ack/test_flood_ack.cpp create mode 100644 test/test_path_gating/test_path_gating.cpp diff --git a/platformio.ini b/platformio.ini index c390c318fc..27b33351d6 100644 --- a/platformio.ini +++ b/platformio.ini @@ -166,3 +166,17 @@ build_src_filter = +<../src/Utils.cpp> lib_deps = google/googletest @ 1.17.0 + +; Standalone unit tests for reliability changes (issues #1775, #1489). +; These tests have no Mesh-stack dependencies and compile without the +; Arduino framework or crypto libraries. +[env:native_reliability] +platform = native +build_flags = -std=c++17 +test_filter = + test_path_gating + test_flood_ack +build_src_filter = + -<*> +lib_deps = + google/googletest @ 1.17.0 diff --git a/src/helpers/BaseChatMesh.cpp b/src/helpers/BaseChatMesh.cpp index 7ddc461d29..ce40a931e7 100644 --- a/src/helpers/BaseChatMesh.cpp +++ b/src/helpers/BaseChatMesh.cpp @@ -9,6 +9,19 @@ #define TXT_ACK_DELAY 200 #endif +// Path-stickiness window (seconds): a stored direct path that is younger than +// this threshold will not be replaced by a longer-hop incoming path. +// Addresses issue #1775 (unconditional path replacement instability). +#ifndef PATH_STICKINESS_WINDOW_SECS + #define PATH_STICKINESS_WINDOW_SECS 600u // 10 minutes +#endif + +// Number of independent flood-ACK transmissions at increasing delays. +// Addresses issue #1489 (single flood ACK lost in noisy RF environments). +#ifndef FLOOD_ACK_RETRY_COUNT + #define FLOOD_ACK_RETRY_COUNT 3 +#endif + void BaseChatMesh::sendFloodScoped(const ContactInfo& recipient, mesh::Packet* pkt, uint32_t delay_millis) { sendFlood(pkt, delay_millis); } @@ -40,8 +53,14 @@ mesh::Packet* BaseChatMesh::createSelfAdvert(const char* name, double lat, doubl void BaseChatMesh::sendAckTo(const ContactInfo& dest, uint32_t ack_hash) { if (dest.out_path_len == OUT_PATH_UNKNOWN) { - mesh::Packet* ack = createAck(ack_hash); - if (ack) sendFloodScoped(dest, ack, TXT_ACK_DELAY); + // Send flood ACK FLOOD_ACK_RETRY_COUNT times at increasing delays (issue #1489). + // Each transmission is an independent RF attempt; duplicates are discarded by + // hasSeen() at every receiving node so only the first successful copy is processed. + static const uint32_t flood_ack_delays[FLOOD_ACK_RETRY_COUNT] = { TXT_ACK_DELAY, 800, 2000 }; + for (int i = 0; i < FLOOD_ACK_RETRY_COUNT; i++) { + mesh::Packet* ack = createAck(ack_hash); + if (ack) sendFloodScoped(dest, ack, flood_ack_delays[i]); + } } else { uint32_t d = TXT_ACK_DELAY; if (getExtraAckTransmitCount() > 0) { @@ -302,10 +321,37 @@ bool BaseChatMesh::onPeerPathRecv(mesh::Packet* packet, int sender_idx, const ui } bool BaseChatMesh::onContactPathRecv(ContactInfo& from, uint8_t* in_path, uint8_t in_path_len, uint8_t* out_path, uint8_t out_path_len, uint8_t extra_type, uint8_t* extra, uint8_t extra_len) { - // NOTE: default impl, we just replace the current 'out_path' regardless, whenever sender sends us a new out_path. - // FUTURE: could store multiple out_paths per contact, and try to find which is the 'best'(?) + uint32_t now = getRTCClock()->getCurrentTime(); + + // Path quality gating (issue #1775): do not replace a fresh stored path with + // a longer-hop incoming path. A path is "sticky" for PATH_STICKINESS_WINDOW_SECS + // after it was last accepted, protecting a working direct route from being + // silently downgraded by a suboptimal first-arriving multipath duplicate. + if (from.out_path_len != OUT_PATH_UNKNOWN && from.out_path_timestamp != 0) { + uint32_t age_secs = now - from.out_path_timestamp; + if (age_secs < PATH_STICKINESS_WINDOW_SECS) { + uint8_t stored_hops = from.out_path_len & 63; + uint8_t new_hops = out_path_len & 63; + if (new_hops > stored_hops) { + // Incoming path has more hops – keep the fresher, shorter stored path. + onContactPathUpdated(from); + if (extra_type == PAYLOAD_TYPE_ACK && extra_len >= 4) { + if (processAck(extra) != NULL) { + txt_send_timeout = 0; + } + } else if (extra_type == PAYLOAD_TYPE_RESPONSE && extra_len > 0) { + onContactResponse(from, extra, extra_len); + } + return true; + } + } + } + + // Accept the new path. from.out_path_len = mesh::Packet::copyPath(from.out_path, out_path, out_path_len); // store a copy of path, for sendDirect() - from.lastmod = getRTCClock()->getCurrentTime(); + from.out_path_timestamp = now; // record when this path was accepted + from.path_ack_count = 0; // reset delivery counter for the new path + from.lastmod = now; onContactPathUpdated(from); @@ -326,6 +372,13 @@ void BaseChatMesh::onAckRecv(mesh::Packet* packet, uint32_t ack_crc) { txt_send_timeout = 0; // matched one we're waiting for, cancel timeout timer packet->markDoNotRetransmit(); // ACK was for this node, so don't retransmit + // Track per-contact direct-path delivery success (issue #1775). + // Incremented here so the path-stickiness logic can eventually factor in + // proven delivery when making replacement decisions. + if (!packet->isRouteFlood() && from->out_path_len != OUT_PATH_UNKNOWN) { + if (from->path_ack_count < 255) from->path_ack_count++; + } + if (packet->isRouteFlood() && from->out_path_len != OUT_PATH_UNKNOWN) { // we have direct path, but other node is still sending flood, so maybe they didn't receive reciprocal path properly(?) handleReturnPathRetry(*from, packet->path, packet->path_len); diff --git a/src/helpers/ContactInfo.h b/src/helpers/ContactInfo.h index ede977cace..b672850079 100644 --- a/src/helpers/ContactInfo.h +++ b/src/helpers/ContactInfo.h @@ -17,6 +17,8 @@ struct ContactInfo { uint32_t lastmod; // by OUR clock int32_t gps_lat, gps_lon; // 6 dec places uint32_t sync_since; + uint32_t out_path_timestamp; // RTC time (secs) when out_path was last accepted; 0 = never set + uint8_t path_ack_count; // saturating count of direct-path ACKs received (delivery tracking) const uint8_t* getSharedSecret(const mesh::LocalIdentity& self_id) const { if (!shared_secret_valid) { diff --git a/test/test_flood_ack/test_flood_ack.cpp b/test/test_flood_ack/test_flood_ack.cpp new file mode 100644 index 0000000000..185a8a9c72 --- /dev/null +++ b/test/test_flood_ack/test_flood_ack.cpp @@ -0,0 +1,166 @@ +/** + * test_flood_ack.cpp + * + * Unit tests for the flood-ACK retry scheduling introduced in + * BaseChatMesh::sendAckTo (fixes issue #1489). + * + * When a contact has no known direct path (out_path_len == OUT_PATH_UNKNOWN) + * the implementation must schedule exactly FLOOD_ACK_RETRY_COUNT independent + * flood transmissions at the delays [200ms, 800ms, 2000ms]. When a direct + * path is known the pre-existing single-direct-ACK behaviour is unchanged. + * + * Deduplication on the receiver side is handled by the pre-existing + * MeshTables::hasSeen() mechanism; these tests confirm only the sender-side + * scheduling contract. + * + * See docs/reliability_changes.md for the full rationale. + */ + +#include +#include +#include + +// ---- Constants mirrored from BaseChatMesh.cpp -------------------------------- +#define OUT_PATH_UNKNOWN 0xFF +#define TXT_ACK_DELAY 200u +#define FLOOD_ACK_RETRY_COUNT 3 + +// Expected delay schedule — must match the static array in sendAckTo. +static const uint32_t EXPECTED_FLOOD_DELAYS[FLOOD_ACK_RETRY_COUNT] = { + TXT_ACK_DELAY, // 200 ms + 800u, + 2000u, +}; + +// ---- Minimal send recorder --------------------------------------------------- + +struct ScheduledPacket { + bool is_flood; + uint32_t delay_ms; +}; + +static ScheduledPacket sched_buf[16]; +static int sched_count; + +static void reset_sched() { + sched_count = 0; + memset(sched_buf, 0, sizeof(sched_buf)); +} + +static void mock_sendFloodScoped(uint32_t delay) { + if (sched_count < 16) sched_buf[sched_count++] = { true, delay }; +} + +static void mock_sendDirect(uint32_t delay) { + if (sched_count < 16) sched_buf[sched_count++] = { false, delay }; +} + +// ---- sendAckTo logic transcription ------------------------------------------ +// This is a direct copy of the changed sendAckTo body (excluding the Mesh +// packet-creation calls that are irrelevant to scheduling counts and delays). +// If the implementation diverges from this transcription the test author must +// update both, which serves as an explicit change-review gate. +static void testSendAckTo(uint8_t out_path_len, uint32_t /*ack_hash*/) { + if (out_path_len == OUT_PATH_UNKNOWN) { + static const uint32_t flood_ack_delays[FLOOD_ACK_RETRY_COUNT] = { + TXT_ACK_DELAY, 800, 2000 + }; + for (int i = 0; i < FLOOD_ACK_RETRY_COUNT; i++) { + mock_sendFloodScoped(flood_ack_delays[i]); + } + } else { + // Direct path: single ACK at TXT_ACK_DELAY (getExtraAckTransmitCount()==0 default) + mock_sendDirect(TXT_ACK_DELAY); + } +} + +// Flood path (out_path_len == OUT_PATH_UNKNOWN) + +TEST(FloodAck, UnknownPath_SendsExactlyThreeTimes) { + reset_sched(); + testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF); + EXPECT_EQ(FLOOD_ACK_RETRY_COUNT, sched_count); +} + +TEST(FloodAck, UnknownPath_AllSendsAreFlood) { + reset_sched(); + testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF); + for (int i = 0; i < sched_count; i++) { + EXPECT_TRUE(sched_buf[i].is_flood) + << "Packet " << i << " should be a flood send"; + } +} + +TEST(FloodAck, UnknownPath_DelaysMatch200_800_2000ms) { + reset_sched(); + testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF); + ASSERT_EQ(FLOOD_ACK_RETRY_COUNT, sched_count); + for (int i = 0; i < FLOOD_ACK_RETRY_COUNT; i++) { + EXPECT_EQ(EXPECTED_FLOOD_DELAYS[i], sched_buf[i].delay_ms) + << "Delay " << i << " should be " << EXPECTED_FLOOD_DELAYS[i] << " ms"; + } +} + +TEST(FloodAck, UnknownPath_DelaysAreStrictlyIncreasing) { + reset_sched(); + testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF); + for (int i = 1; i < sched_count; i++) { + EXPECT_GT(sched_buf[i].delay_ms, sched_buf[i - 1].delay_ms) + << "Delay " << i << " must be > delay " << (i - 1) + << " to ensure temporally spread retries"; + } +} + +TEST(FloodAck, UnknownPath_FirstDelayIsAckDelay) { + reset_sched(); + testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF); + ASSERT_GE(sched_count, 1); + EXPECT_EQ(TXT_ACK_DELAY, sched_buf[0].delay_ms); +} + +// Direct path (out_path_len != OUT_PATH_UNKNOWN) — pre-existing behaviour + +TEST(FloodAck, KnownPath_SendsExactlyOnce) { + reset_sched(); + testSendAckTo(/*out_path_len=*/1, 0xDEADBEEF); + EXPECT_EQ(1, sched_count); +} + +TEST(FloodAck, KnownPath_SendIsDirect) { + reset_sched(); + testSendAckTo(/*out_path_len=*/1, 0xDEADBEEF); + ASSERT_EQ(1, sched_count); + EXPECT_FALSE(sched_buf[0].is_flood); +} + +TEST(FloodAck, KnownPath_DelayIsAckDelay) { + reset_sched(); + testSendAckTo(/*out_path_len=*/2, 0xDEADBEEF); + ASSERT_EQ(1, sched_count); + EXPECT_EQ(TXT_ACK_DELAY, sched_buf[0].delay_ms); +} + +TEST(FloodAck, KnownPath_MultiHop_SendsOnce) { + reset_sched(); + testSendAckTo(/*out_path_len=*/5, 0x12345678); + EXPECT_EQ(1, sched_count); +} + +// Retry count configuration contract + +TEST(FloodAck, RetryCountIs3) { + // Explicitly documents the expected constant value. + EXPECT_EQ(3, FLOOD_ACK_RETRY_COUNT); +} + +TEST(FloodAck, ExpectedDelaysTableMatchesConstants) { + EXPECT_EQ(TXT_ACK_DELAY, EXPECTED_FLOOD_DELAYS[0]); + EXPECT_EQ(800u, EXPECTED_FLOOD_DELAYS[1]); + EXPECT_EQ(2000u, EXPECTED_FLOOD_DELAYS[2]); +} + + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/test/test_path_gating/test_path_gating.cpp b/test/test_path_gating/test_path_gating.cpp new file mode 100644 index 0000000000..c0d11e7845 --- /dev/null +++ b/test/test_path_gating/test_path_gating.cpp @@ -0,0 +1,137 @@ +/** + * test_path_gating.cpp + * + * Unit tests for the path quality gating condition introduced in + * BaseChatMesh::onContactPathRecv (fixes issue #1775). + * + * The tests exercise the decision function in isolation. The constants and + * the comparison logic are kept in sync with the implementation via the + * shared defines reproduced below; if those values drift the tests will + * either fail or fail to compile, providing an early-warning regression net. + * + * See docs/reliability_changes.md for the full rationale. + */ + +#include +#include + +// ---- Constants mirrored from BaseChatMesh.cpp -------------------------------- +// If these are changed in the implementation the tests will need updating too. +#define OUT_PATH_UNKNOWN 0xFF +#define PATH_STICKINESS_WINDOW_SECS 600u // 10 minutes + +// ---- Path-gating decision extracted as a pure function ---------------------- +// Logic must remain an exact transcription of the condition inside +// BaseChatMesh::onContactPathRecv so that the tests catch any divergence. +static bool shouldKeepStoredPath( + uint32_t now, + uint8_t stored_path_len, + uint32_t stored_path_timestamp, + uint8_t new_path_len) +{ + if (stored_path_len == OUT_PATH_UNKNOWN) return false; + if (stored_path_timestamp == 0) return false; + uint32_t age = now - stored_path_timestamp; + if (age >= PATH_STICKINESS_WINDOW_SECS) return false; + uint8_t stored_hops = stored_path_len & 63; + uint8_t new_hops = new_path_len & 63; + return new_hops > stored_hops; +} + +// No stored path — gating must never block + +TEST(PathGating, NoStoredPath_AlwaysAcceptsIncoming) { + EXPECT_FALSE(shouldKeepStoredPath(1000, OUT_PATH_UNKNOWN, 0, 1)); + EXPECT_FALSE(shouldKeepStoredPath(1000, OUT_PATH_UNKNOWN, 900, 5)); +} + +// Stale stored path (outside the stickiness window) — gating must not block + +TEST(PathGating, StaleStoredPath_ExactlyAtWindowBoundary_AcceptsLonger) { + uint32_t now = 1000; + // age == PATH_STICKINESS_WINDOW_SECS → NOT inside window + uint32_t ts = now - PATH_STICKINESS_WINDOW_SECS; + EXPECT_FALSE(shouldKeepStoredPath(now, 1, ts, 3)); +} + +TEST(PathGating, StaleStoredPath_JustOutsideWindow_AcceptsLonger) { + uint32_t now = 1000; + uint32_t ts = now - (PATH_STICKINESS_WINDOW_SECS + 1); + EXPECT_FALSE(shouldKeepStoredPath(now, 1, ts, 3)); +} + +TEST(PathGating, VeryOldStoredPath_AcceptsLonger) { + uint32_t now = 10000; + EXPECT_FALSE(shouldKeepStoredPath(now, 1, 1u, 3)); // ~9999 seconds old +} + +// Fresh stored path + LONGER incoming path — gating must block + +TEST(PathGating, FreshPath_LongerIncoming_KeepsStored) { + uint32_t now = 1000; + uint32_t ts = now - 100; // 100 s old, well within 600-second window + EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts, 3)); // stored=1 hop, new=3 hops +} + +TEST(PathGating, FreshPath_MuchLongerIncoming_KeepsStored) { + uint32_t now = 1000; + uint32_t ts = now - 1; + EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts, 10)); +} + +TEST(PathGating, FreshPath_JustInsideWindow_KeepsStored) { + uint32_t now = 1000; + uint32_t ts = now - (PATH_STICKINESS_WINDOW_SECS - 1); // 1 second inside window + EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts, 5)); +} + +// Fresh stored path + EQUAL or SHORTER incoming path — gating must NOT block +// (accept path updates that are the same or better) + +TEST(PathGating, FreshPath_SameHopCount_AcceptsNew) { + uint32_t now = 1000; + uint32_t ts = now - 100; + EXPECT_FALSE(shouldKeepStoredPath(now, 2, ts, 2)); // equal hops → no gate +} + +TEST(PathGating, FreshPath_ShorterIncoming_AcceptsNew) { + uint32_t now = 1000; + uint32_t ts = now - 100; + EXPECT_FALSE(shouldKeepStoredPath(now, 3, ts, 1)); // new path is better → accept +} + +TEST(PathGating, FreshPath_OneHopBetter_AcceptsNew) { + uint32_t now = 1000; + uint32_t ts = now - 50; + EXPECT_FALSE(shouldKeepStoredPath(now, 4, ts, 3)); +} + +// Zero timestamp — treated as "never set", must never gate + +TEST(PathGating, ZeroTimestamp_AlwaysAcceptsNew) { + EXPECT_FALSE(shouldKeepStoredPath(1000, 2, 0, 5)); + EXPECT_FALSE(shouldKeepStoredPath(1000, 1, 0, 10)); +} + +// hop count encoding: lower 6 bits of path_len hold the count (path_len & 63) + +TEST(PathGating, PathLenEncodingUpperBitsIgnored) { + uint32_t now = 1000; + uint32_t ts = now - 100; + // stored_path_len = 0xC1 → hop count bits = 0xC1 & 63 = 1 + // new_path_len = 0x83 → hop count bits = 0x83 & 63 = 3 + EXPECT_TRUE(shouldKeepStoredPath(now, 0xC1, ts, 0x83)); +} + +TEST(PathGating, PathLenEncodingUpperBitsIgnored_SameHops) { + uint32_t now = 1000; + uint32_t ts = now - 100; + // Both encode 2 hops with different upper bits — should NOT gate + EXPECT_FALSE(shouldKeepStoredPath(now, 0x42, ts, 0x82)); +} + + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From 30e5b1af908838a53aae9fe28b1b919e0c9617a5 Mon Sep 17 00:00:00 2001 From: Radoslav Sandov Date: Sat, 16 May 2026 10:13:30 +0300 Subject: [PATCH 2/4] fix: missing nonce in PATH+ACK causes hasSeen dedup to drop retries; backup flood ACK Root causes of 'sender sees FAILED, recipient sees message': 1. Mesh::createPathReturn() omitted the random nonce when extra_len > 0 (path + embedded ACK case). AES-ECB is deterministic: without a nonce every PATH+ACK for the same message produced an identical ciphertext and thus an identical calculatePacketHash(). hasSeen() at intermediate nodes treated every retransmission as a duplicate and dropped it silently, so the single PATH+ACK was the only chance to deliver the ACK. Fix: always append the 4-byte random nonce regardless of extra presence. 2. BaseChatMesh::onPeerDataRecv(): when a flood message was received, only one PATH+ACK packet was sent. If that packet was lost over RF the sender had no fallback and showed the message as failed. Fix: after the PATH+ACK, also call sendAckTo() to send standalone flood ACKs at staggered delays (200/800/2000 ms) as an independent backup. 3. PATH_STICKINESS_WINDOW_SECS reduced 600 s -> 30 s. A 10-minute window prevented B from learning A's updated return path for up to 10 minutes, causing ACKs to travel via a stale direct route that no longer works. 30 s is long enough to reject multipath duplicates (50-200 ms apart) but short enough to adapt to topology changes. Tests: 29 tests pass (15 path-gating + 14 flood-ack, 3 new backup-ACK cases) --- src/Mesh.cpp | 13 ++++-- src/helpers/BaseChatMesh.cpp | 13 +++++- test/test_flood_ack/test_flood_ack.cpp | 46 ++++++++++++++++++++++ test/test_path_gating/test_path_gating.cpp | 34 +++++++++++----- 4 files changed, 92 insertions(+), 14 deletions(-) diff --git a/src/Mesh.cpp b/src/Mesh.cpp index 57fee14036..59f68ac151 100644 --- a/src/Mesh.cpp +++ b/src/Mesh.cpp @@ -457,11 +457,16 @@ Packet* Mesh::createPathReturn(const uint8_t* dest_hash, const uint8_t* secret, if (extra_len > 0) { data[data_len++] = extra_type; memcpy(&data[data_len], extra, extra_len); data_len += extra_len; - } else { - // append a timestamp, or random blob (to make packet_hash unique) - data[data_len++] = 0xFF; // dummy payload type - getRNG()->random(&data[data_len], 4); data_len += 4; } + // Always append a random nonce regardless of extra presence. + // AES-ECB is deterministic: without a nonce, identical plaintext produces + // identical ciphertext → identical calculatePacketHash() → hasSeen() treats + // every retransmission of the same PATH+ACK as a duplicate and drops it. + // The nonce is ignored by the receiver (it reads only the typed extra fields). + // This mirrors the original no-extra handling above (issue: nonce was missing + // for the extra_len > 0 branch, breaking flood-ACK retry reliability). + data[data_len++] = 0xFF; // dummy payload type (ignored by receiver) + getRNG()->random(&data[data_len], 4); data_len += 4; len += Utils::encryptThenMAC(secret, &packet->payload[len], data, data_len); } diff --git a/src/helpers/BaseChatMesh.cpp b/src/helpers/BaseChatMesh.cpp index ce40a931e7..e8917fa923 100644 --- a/src/helpers/BaseChatMesh.cpp +++ b/src/helpers/BaseChatMesh.cpp @@ -13,7 +13,10 @@ // this threshold will not be replaced by a longer-hop incoming path. // Addresses issue #1775 (unconditional path replacement instability). #ifndef PATH_STICKINESS_WINDOW_SECS - #define PATH_STICKINESS_WINDOW_SECS 600u // 10 minutes + #define PATH_STICKINESS_WINDOW_SECS 30u // 30 seconds: long enough to ignore + // multipath duplicates (50-200 ms apart) + // but short enough to allow legitimate + // path updates in a changing topology #endif // Number of independent flood-ACK transmissions at increasing delays. @@ -245,6 +248,12 @@ void BaseChatMesh::onPeerDataRecv(mesh::Packet* packet, uint8_t type, int sender mesh::Packet* path = createPathReturn(from.id, secret, packet->path, packet->path_len, PAYLOAD_TYPE_ACK, (uint8_t *) &ack_hash, 4); if (path) sendFloodScoped(from, path, TXT_ACK_DELAY); + // Also send a standalone flood ACK as backup: the PATH+ACK above is a single packet + // whose ciphertext (AES-ECB) was deterministic before the nonce fix. Even with the + // nonce, RF loss can drop the PATH+ACK entirely. The standalone ACK travels + // independently and ensures the sender cancels its retry timeout even if path + // establishment fails (issue #1489). + sendAckTo(from, ack_hash); } else { sendAckTo(from, ack_hash); } @@ -272,6 +281,8 @@ void BaseChatMesh::onPeerDataRecv(mesh::Packet* packet, uint8_t type, int sender mesh::Packet* path = createPathReturn(from.id, secret, packet->path, packet->path_len, PAYLOAD_TYPE_ACK, (uint8_t *) &ack_hash, 4); if (path) sendFloodScoped(from, path, TXT_ACK_DELAY); + // Standalone flood ACK backup — same rationale as TXT_TYPE_PLAIN above + sendAckTo(from, ack_hash); } else { sendAckTo(from, ack_hash); } diff --git a/test/test_flood_ack/test_flood_ack.cpp b/test/test_flood_ack/test_flood_ack.cpp index 185a8a9c72..4dfcb08d86 100644 --- a/test/test_flood_ack/test_flood_ack.cpp +++ b/test/test_flood_ack/test_flood_ack.cpp @@ -159,6 +159,52 @@ TEST(FloodAck, ExpectedDelaysTableMatchesConstants) { EXPECT_EQ(2000u, EXPECTED_FLOOD_DELAYS[2]); } +// ============================================================================= +// Backup ACK for flood-received messages +// +// When a flood message is received, the responder sends a PATH+ACK (one packet +// that serves both path-establishment and ACK delivery). If the PATH+ACK is +// lost, the sender never gets the ACK. The fix: also call sendAckTo() to send +// independent flood ACKs as a backup. This suite verifies the backup contract +// using the same sendAckTo transcription, called from the "no known path" state +// that applies immediately after receiving the very first flood message. +// ============================================================================= + +TEST(BackupFloodAck, FirstMessageScenario_SendsFloodAckBackup) { + // When the receiver has no stored path to the sender (first-ever message), + // sendAckTo() is invoked with OUT_PATH_UNKNOWN and must produce flood ACKs. + reset_sched(); + testSendAckTo(OUT_PATH_UNKNOWN, 0xCAFEBABE); + EXPECT_EQ(FLOOD_ACK_RETRY_COUNT, sched_count); + for (int i = 0; i < sched_count; i++) { + EXPECT_TRUE(sched_buf[i].is_flood) + << "Backup ACK " << i << " must be flood (no path known yet)"; + } +} + +TEST(BackupFloodAck, BackupAndPathAck_AreIndependent) { + // The backup standalone ACK (sendAckTo) and the PATH+ACK are independent + // packets. This test verifies sendAckTo is not affected by the PATH+ACK + // scheduling — it produces its own complete set of FLOOD_ACK_RETRY_COUNT + // flood transmissions at the standard delays. + reset_sched(); + testSendAckTo(OUT_PATH_UNKNOWN, 0x11223344); + ASSERT_EQ(FLOOD_ACK_RETRY_COUNT, sched_count); + EXPECT_EQ(TXT_ACK_DELAY, sched_buf[0].delay_ms); + EXPECT_EQ(800u, sched_buf[1].delay_ms); + EXPECT_EQ(2000u, sched_buf[2].delay_ms); +} + +TEST(BackupFloodAck, KnownReturnPath_SendsDirectAckBackup) { + // If the receiver already has a stored direct path back to the sender + // (e.g., established in a prior exchange), the backup sendAckTo() sends a + // direct ACK rather than a flood — still providing a reliable backup channel. + reset_sched(); + testSendAckTo(/*out_path_len=*/1, 0xCAFEBABE); + EXPECT_EQ(1, sched_count); + EXPECT_FALSE(sched_buf[0].is_flood); +} + int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); diff --git a/test/test_path_gating/test_path_gating.cpp b/test/test_path_gating/test_path_gating.cpp index c0d11e7845..c047a216d6 100644 --- a/test/test_path_gating/test_path_gating.cpp +++ b/test/test_path_gating/test_path_gating.cpp @@ -18,7 +18,10 @@ // ---- Constants mirrored from BaseChatMesh.cpp -------------------------------- // If these are changed in the implementation the tests will need updating too. #define OUT_PATH_UNKNOWN 0xFF -#define PATH_STICKINESS_WINDOW_SECS 600u // 10 minutes +// Reduced from 600 s to 30 s: long enough to reject multipath duplicates +// (50-200 ms apart) but short enough to allow legitimate path updates after +// topology changes, preventing stale paths from silently dropping ACKs. +#define PATH_STICKINESS_WINDOW_SECS 30u // ---- Path-gating decision extracted as a pure function ---------------------- // Logic must remain an exact transcription of the condition inside @@ -65,23 +68,36 @@ TEST(PathGating, VeryOldStoredPath_AcceptsLonger) { EXPECT_FALSE(shouldKeepStoredPath(now, 1, 1u, 3)); // ~9999 seconds old } +// After exactly 30 s the window expires — path replacement must be allowed again +TEST(PathGating, PathExpires_After30Seconds_AcceptsLonger) { + uint32_t now = 1000; + uint32_t ts = now - PATH_STICKINESS_WINDOW_SECS; // exactly at boundary (expired) + EXPECT_FALSE(shouldKeepStoredPath(now, 1, ts, 5)); +} + +TEST(PathGating, PathExpires_OneSecondPast_AcceptsLonger) { + uint32_t now = 1000; + uint32_t ts = now - (PATH_STICKINESS_WINDOW_SECS + 1); + EXPECT_FALSE(shouldKeepStoredPath(now, 1, ts, 5)); +} + // Fresh stored path + LONGER incoming path — gating must block TEST(PathGating, FreshPath_LongerIncoming_KeepsStored) { uint32_t now = 1000; - uint32_t ts = now - 100; // 100 s old, well within 600-second window + uint32_t ts = now - 5; // 5 s old, well within 30-second window EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts, 3)); // stored=1 hop, new=3 hops } TEST(PathGating, FreshPath_MuchLongerIncoming_KeepsStored) { uint32_t now = 1000; - uint32_t ts = now - 1; + uint32_t ts = now - 1; // 1 s old EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts, 10)); } TEST(PathGating, FreshPath_JustInsideWindow_KeepsStored) { uint32_t now = 1000; - uint32_t ts = now - (PATH_STICKINESS_WINDOW_SECS - 1); // 1 second inside window + uint32_t ts = now - (PATH_STICKINESS_WINDOW_SECS - 1); // 1 s inside the 30-s window EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts, 5)); } @@ -90,19 +106,19 @@ TEST(PathGating, FreshPath_JustInsideWindow_KeepsStored) { TEST(PathGating, FreshPath_SameHopCount_AcceptsNew) { uint32_t now = 1000; - uint32_t ts = now - 100; + uint32_t ts = now - 5; EXPECT_FALSE(shouldKeepStoredPath(now, 2, ts, 2)); // equal hops → no gate } TEST(PathGating, FreshPath_ShorterIncoming_AcceptsNew) { uint32_t now = 1000; - uint32_t ts = now - 100; + uint32_t ts = now - 5; EXPECT_FALSE(shouldKeepStoredPath(now, 3, ts, 1)); // new path is better → accept } TEST(PathGating, FreshPath_OneHopBetter_AcceptsNew) { uint32_t now = 1000; - uint32_t ts = now - 50; + uint32_t ts = now - 5; EXPECT_FALSE(shouldKeepStoredPath(now, 4, ts, 3)); } @@ -117,7 +133,7 @@ TEST(PathGating, ZeroTimestamp_AlwaysAcceptsNew) { TEST(PathGating, PathLenEncodingUpperBitsIgnored) { uint32_t now = 1000; - uint32_t ts = now - 100; + uint32_t ts = now - 5; // stored_path_len = 0xC1 → hop count bits = 0xC1 & 63 = 1 // new_path_len = 0x83 → hop count bits = 0x83 & 63 = 3 EXPECT_TRUE(shouldKeepStoredPath(now, 0xC1, ts, 0x83)); @@ -125,7 +141,7 @@ TEST(PathGating, PathLenEncodingUpperBitsIgnored) { TEST(PathGating, PathLenEncodingUpperBitsIgnored_SameHops) { uint32_t now = 1000; - uint32_t ts = now - 100; + uint32_t ts = now - 5; // Both encode 2 hops with different upper bits — should NOT gate EXPECT_FALSE(shouldKeepStoredPath(now, 0x42, ts, 0x82)); } From 422f2ce3fa7049dde1d379a227dd5a4007dbef46 Mon Sep 17 00:00:00 2001 From: Radoslav Sandov Date: Sat, 16 May 2026 18:25:28 +0300 Subject: [PATCH 3/4] fix: 10s blanket path lock, remove hop-count comparison MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revise the path-stickiness logic in onContactPathRecv based on reviewer feedback on PR #2569. Problem with the previous hop-count gate: In meshes that set rxdelay > 1, SNR-ordered propagation means high-SNR (better) paths arrive first — which are often longer-hop paths. The previous 'keep if new_hops > stored_hops' check would then silently replace the first-arrived (better) path with the later-arrived shorter-hop (but lower-SNR) path. Fewer hops is not automatically better quality. New behaviour: Once a path is stored it is locked for PATH_STICKINESS_WINDOW_SECS (now 10 s, down from 30 s). ANY replacement is blocked during the window regardless of hop count — the first path to arrive wins. After 10 s the lock expires and any new path is accepted normally. Embedded ACKs/responses in the incoming path packet are still processed during the lock window so the sender's retry timer is cancelled correctly. path_ack_count is retained for future use as a proven-delivery signal (a path that delivered messages should be stickier after the window). Tests updated to match blanket-lock semantics: - removed hop-comparison test cases - added FreshPath_BlocksReplacement_ShorterHopIncoming (key regression) - added WindowIs10Seconds constant sanity check - 27 tests total, all passing --- src/helpers/BaseChatMesh.cpp | 48 ++++--- test/test_path_gating/test_path_gating.cpp | 155 +++++++++------------ 2 files changed, 94 insertions(+), 109 deletions(-) diff --git a/src/helpers/BaseChatMesh.cpp b/src/helpers/BaseChatMesh.cpp index e8917fa923..a3298ea30b 100644 --- a/src/helpers/BaseChatMesh.cpp +++ b/src/helpers/BaseChatMesh.cpp @@ -9,14 +9,16 @@ #define TXT_ACK_DELAY 200 #endif -// Path-stickiness window (seconds): a stored direct path that is younger than -// this threshold will not be replaced by a longer-hop incoming path. -// Addresses issue #1775 (unconditional path replacement instability). +// Path-stickiness window (seconds): once a path is stored, ANY replacement is +// blocked for this long. No hop-count comparison is performed — fewer hops is +// NOT automatically better because rxdelay-based SNR ordering means high-SNR +// long-hop paths arrive first, then low-SNR short-hop paths follow. A blanket +// first-arrived lock avoids that bias entirely. +// 10 s is long enough to suppress multipath duplicates (50-200 ms apart) yet +// short enough that a genuinely changed topology gets a fresh path quickly. +// Addresses issue #1775. #ifndef PATH_STICKINESS_WINDOW_SECS - #define PATH_STICKINESS_WINDOW_SECS 30u // 30 seconds: long enough to ignore - // multipath duplicates (50-200 ms apart) - // but short enough to allow legitimate - // path updates in a changing topology + #define PATH_STICKINESS_WINDOW_SECS 10u #endif // Number of independent flood-ACK transmissions at increasing delays. @@ -334,27 +336,27 @@ bool BaseChatMesh::onPeerPathRecv(mesh::Packet* packet, int sender_idx, const ui bool BaseChatMesh::onContactPathRecv(ContactInfo& from, uint8_t* in_path, uint8_t in_path_len, uint8_t* out_path, uint8_t out_path_len, uint8_t extra_type, uint8_t* extra, uint8_t extra_len) { uint32_t now = getRTCClock()->getCurrentTime(); - // Path quality gating (issue #1775): do not replace a fresh stored path with - // a longer-hop incoming path. A path is "sticky" for PATH_STICKINESS_WINDOW_SECS - // after it was last accepted, protecting a working direct route from being - // silently downgraded by a suboptimal first-arriving multipath duplicate. + // Path-stickiness lock (issue #1775): block ANY replacement of a freshly + // stored path for PATH_STICKINESS_WINDOW_SECS seconds. No hop-count + // comparison — fewer hops is not automatically better because rxdelay-based + // SNR ordering propagates high-SNR (often longer-hop) paths first; comparing + // hops would therefore bias toward the wrong path. A blanket first-arrived + // lock is simpler and avoids that bias. + // The embedded ACK/response is always processed regardless of the lock. if (from.out_path_len != OUT_PATH_UNKNOWN && from.out_path_timestamp != 0) { uint32_t age_secs = now - from.out_path_timestamp; if (age_secs < PATH_STICKINESS_WINDOW_SECS) { - uint8_t stored_hops = from.out_path_len & 63; - uint8_t new_hops = out_path_len & 63; - if (new_hops > stored_hops) { - // Incoming path has more hops – keep the fresher, shorter stored path. - onContactPathUpdated(from); - if (extra_type == PAYLOAD_TYPE_ACK && extra_len >= 4) { - if (processAck(extra) != NULL) { - txt_send_timeout = 0; - } - } else if (extra_type == PAYLOAD_TYPE_RESPONSE && extra_len > 0) { - onContactResponse(from, extra, extra_len); + // Path is still within the lock window — keep it, but still process any + // embedded ACK or response so the sender's retry timer is cancelled. + onContactPathUpdated(from); + if (extra_type == PAYLOAD_TYPE_ACK && extra_len >= 4) { + if (processAck(extra) != NULL) { + txt_send_timeout = 0; } - return true; + } else if (extra_type == PAYLOAD_TYPE_RESPONSE && extra_len > 0) { + onContactResponse(from, extra, extra_len); } + return true; } } diff --git a/test/test_path_gating/test_path_gating.cpp b/test/test_path_gating/test_path_gating.cpp index c047a216d6..4059e2dd51 100644 --- a/test/test_path_gating/test_path_gating.cpp +++ b/test/test_path_gating/test_path_gating.cpp @@ -1,13 +1,15 @@ /** * test_path_gating.cpp * - * Unit tests for the path quality gating condition introduced in + * Unit tests for the path-stickiness lock introduced in * BaseChatMesh::onContactPathRecv (fixes issue #1775). * - * The tests exercise the decision function in isolation. The constants and - * the comparison logic are kept in sync with the implementation via the - * shared defines reproduced below; if those values drift the tests will - * either fail or fail to compile, providing an early-warning regression net. + * Strategy: blanket first-arrived lock for PATH_STICKINESS_WINDOW_SECS (10 s). + * ANY replacement of a freshly stored path is blocked during the window, + * regardless of hop count. Fewer hops is NOT automatically better: + * rxdelay-based SNR ordering propagates high-SNR long-hop paths first, so a + * hop-count preference would bias toward the wrong path. After the window + * expires any new path is accepted unconditionally. * * See docs/reliability_changes.md for the full rationale. */ @@ -16,134 +18,115 @@ #include // ---- Constants mirrored from BaseChatMesh.cpp -------------------------------- -// If these are changed in the implementation the tests will need updating too. #define OUT_PATH_UNKNOWN 0xFF -// Reduced from 600 s to 30 s: long enough to reject multipath duplicates -// (50-200 ms apart) but short enough to allow legitimate path updates after -// topology changes, preventing stale paths from silently dropping ACKs. -#define PATH_STICKINESS_WINDOW_SECS 30u - -// ---- Path-gating decision extracted as a pure function ---------------------- -// Logic must remain an exact transcription of the condition inside -// BaseChatMesh::onContactPathRecv so that the tests catch any divergence. +#define PATH_STICKINESS_WINDOW_SECS 10u // 10 seconds blanket first-arrived lock + +// ---- Path-lock decision extracted as a pure function ----------------------- +// Must remain an exact transcription of the condition in onContactPathRecv. static bool shouldKeepStoredPath( uint32_t now, uint8_t stored_path_len, - uint32_t stored_path_timestamp, - uint8_t new_path_len) + uint32_t stored_path_timestamp) { if (stored_path_len == OUT_PATH_UNKNOWN) return false; if (stored_path_timestamp == 0) return false; uint32_t age = now - stored_path_timestamp; - if (age >= PATH_STICKINESS_WINDOW_SECS) return false; - uint8_t stored_hops = stored_path_len & 63; - uint8_t new_hops = new_path_len & 63; - return new_hops > stored_hops; + return age < PATH_STICKINESS_WINDOW_SECS; // blanket lock — no hop comparison } -// No stored path — gating must never block +// ============================================================================= +// No stored path — lock must never block +// ============================================================================= TEST(PathGating, NoStoredPath_AlwaysAcceptsIncoming) { - EXPECT_FALSE(shouldKeepStoredPath(1000, OUT_PATH_UNKNOWN, 0, 1)); - EXPECT_FALSE(shouldKeepStoredPath(1000, OUT_PATH_UNKNOWN, 900, 5)); + EXPECT_FALSE(shouldKeepStoredPath(1000, OUT_PATH_UNKNOWN, 0)); + EXPECT_FALSE(shouldKeepStoredPath(1000, OUT_PATH_UNKNOWN, 999)); } -// Stale stored path (outside the stickiness window) — gating must not block +// ============================================================================= +// Stale path (age >= window) — lock must not block +// ============================================================================= -TEST(PathGating, StaleStoredPath_ExactlyAtWindowBoundary_AcceptsLonger) { - uint32_t now = 1000; - // age == PATH_STICKINESS_WINDOW_SECS → NOT inside window - uint32_t ts = now - PATH_STICKINESS_WINDOW_SECS; - EXPECT_FALSE(shouldKeepStoredPath(now, 1, ts, 3)); +TEST(PathGating, StaleStoredPath_ExactlyAtWindowBoundary_Accepts) { + uint32_t now = 1000; + uint32_t ts = now - PATH_STICKINESS_WINDOW_SECS; // age == window → expired + EXPECT_FALSE(shouldKeepStoredPath(now, 1, ts)); } -TEST(PathGating, StaleStoredPath_JustOutsideWindow_AcceptsLonger) { +TEST(PathGating, StaleStoredPath_JustOutsideWindow_Accepts) { uint32_t now = 1000; uint32_t ts = now - (PATH_STICKINESS_WINDOW_SECS + 1); - EXPECT_FALSE(shouldKeepStoredPath(now, 1, ts, 3)); + EXPECT_FALSE(shouldKeepStoredPath(now, 1, ts)); } -TEST(PathGating, VeryOldStoredPath_AcceptsLonger) { - uint32_t now = 10000; - EXPECT_FALSE(shouldKeepStoredPath(now, 1, 1u, 3)); // ~9999 seconds old -} - -// After exactly 30 s the window expires — path replacement must be allowed again -TEST(PathGating, PathExpires_After30Seconds_AcceptsLonger) { - uint32_t now = 1000; - uint32_t ts = now - PATH_STICKINESS_WINDOW_SECS; // exactly at boundary (expired) - EXPECT_FALSE(shouldKeepStoredPath(now, 1, ts, 5)); +TEST(PathGating, PathExpires_After10Seconds_Accepts) { + uint32_t now = 500; + uint32_t ts = now - PATH_STICKINESS_WINDOW_SECS; + EXPECT_FALSE(shouldKeepStoredPath(now, 2, ts)); } -TEST(PathGating, PathExpires_OneSecondPast_AcceptsLonger) { - uint32_t now = 1000; - uint32_t ts = now - (PATH_STICKINESS_WINDOW_SECS + 1); - EXPECT_FALSE(shouldKeepStoredPath(now, 1, ts, 5)); +TEST(PathGating, VeryOldStoredPath_Accepts) { + EXPECT_FALSE(shouldKeepStoredPath(10000, 1, 1u)); } -// Fresh stored path + LONGER incoming path — gating must block +// ============================================================================= +// Fresh path (age < window) — ANY replacement is blocked (blanket lock) +// ============================================================================= -TEST(PathGating, FreshPath_LongerIncoming_KeepsStored) { +TEST(PathGating, FreshPath_BlocksReplacement_1SecondOld) { uint32_t now = 1000; - uint32_t ts = now - 5; // 5 s old, well within 30-second window - EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts, 3)); // stored=1 hop, new=3 hops + uint32_t ts = now - 1; + EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts)); } -TEST(PathGating, FreshPath_MuchLongerIncoming_KeepsStored) { +TEST(PathGating, FreshPath_BlocksReplacement_5SecondsOld) { uint32_t now = 1000; - uint32_t ts = now - 1; // 1 s old - EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts, 10)); + uint32_t ts = now - 5; + EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts)); } -TEST(PathGating, FreshPath_JustInsideWindow_KeepsStored) { +TEST(PathGating, FreshPath_BlocksReplacement_JustInsideWindow) { uint32_t now = 1000; - uint32_t ts = now - (PATH_STICKINESS_WINDOW_SECS - 1); // 1 s inside the 30-s window - EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts, 5)); + uint32_t ts = now - (PATH_STICKINESS_WINDOW_SECS - 1); // 1 s before expiry + EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts)); } -// Fresh stored path + EQUAL or SHORTER incoming path — gating must NOT block -// (accept path updates that are the same or better) +// Key property: blanket lock applies regardless of the incoming hop count. +// These cases verify no hop comparison is performed. -TEST(PathGating, FreshPath_SameHopCount_AcceptsNew) { - uint32_t now = 1000; - uint32_t ts = now - 5; - EXPECT_FALSE(shouldKeepStoredPath(now, 2, ts, 2)); // equal hops → no gate +TEST(PathGating, FreshPath_BlocksReplacement_LongerHopIncoming) { + // stored=1 hop, new=3 hops — old logic would also block; new logic still blocks + uint32_t now = 1000, ts = now - 1; + EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts)); } -TEST(PathGating, FreshPath_ShorterIncoming_AcceptsNew) { - uint32_t now = 1000; - uint32_t ts = now - 5; - EXPECT_FALSE(shouldKeepStoredPath(now, 3, ts, 1)); // new path is better → accept +TEST(PathGating, FreshPath_BlocksReplacement_ShorterHopIncoming) { + // stored=3 hops, new=1 hop — old logic would accept (fewer hops); new logic blocks + uint32_t now = 1000, ts = now - 1; + EXPECT_TRUE(shouldKeepStoredPath(now, 3, ts)); } -TEST(PathGating, FreshPath_OneHopBetter_AcceptsNew) { - uint32_t now = 1000; - uint32_t ts = now - 5; - EXPECT_FALSE(shouldKeepStoredPath(now, 4, ts, 3)); +TEST(PathGating, FreshPath_BlocksReplacement_SameHopIncoming) { + // stored=2 hops, new=2 hops — both old and new logic block + uint32_t now = 1000, ts = now - 1; + EXPECT_TRUE(shouldKeepStoredPath(now, 2, ts)); } -// Zero timestamp — treated as "never set", must never gate +// ============================================================================= +// Zero timestamp — treated as "never set", must never lock +// ============================================================================= TEST(PathGating, ZeroTimestamp_AlwaysAcceptsNew) { - EXPECT_FALSE(shouldKeepStoredPath(1000, 2, 0, 5)); - EXPECT_FALSE(shouldKeepStoredPath(1000, 1, 0, 10)); + EXPECT_FALSE(shouldKeepStoredPath(1000, 2, 0)); + EXPECT_FALSE(shouldKeepStoredPath(1000, 1, 0)); } -// hop count encoding: lower 6 bits of path_len hold the count (path_len & 63) - -TEST(PathGating, PathLenEncodingUpperBitsIgnored) { - uint32_t now = 1000; - uint32_t ts = now - 5; - // stored_path_len = 0xC1 → hop count bits = 0xC1 & 63 = 1 - // new_path_len = 0x83 → hop count bits = 0x83 & 63 = 3 - EXPECT_TRUE(shouldKeepStoredPath(now, 0xC1, ts, 0x83)); -} +// ============================================================================= +// Window constant sanity +// ============================================================================= -TEST(PathGating, PathLenEncodingUpperBitsIgnored_SameHops) { - uint32_t now = 1000; - uint32_t ts = now - 5; - // Both encode 2 hops with different upper bits — should NOT gate - EXPECT_FALSE(shouldKeepStoredPath(now, 0x42, ts, 0x82)); +TEST(PathGating, WindowIs10Seconds) { + EXPECT_EQ(10u, PATH_STICKINESS_WINDOW_SECS); } From 274d0a1a898160cf31fbb0582fe0b3e102ba5cfe Mon Sep 17 00:00:00 2001 From: Radoslav Sandov Date: Wed, 20 May 2026 22:28:55 +0300 Subject: [PATCH 4/4] fix: protect proven paths from reset when remote node loses its route When the remote node (B) loses its own direct path back to the local node (A) it falls back to flooding. A responds with a flood PATH+ACK so B can learn the route. B then sends a reciprocal direct PATH back to A. If A's 10 s blanket stickiness window had already expired (very likely between separate conversations), A silently accepted that flood-derived path and overwrote its own working direct path, resetting path_ack_count to 0 in the process. Fix: onContactPathRecv now uses an extended PATH_PROVEN_LOCK_SECS (300 s / 5 min) window when path_ack_count > 0 (at least one direct-path ACK has been received on the stored route). Unproven paths still use the short 10 s blanket lock. A path that has actually delivered messages survives transient topology disruptions on the remote side without being silently replaced. - Add PATH_PROVEN_LOCK_SECS = 300 constant in BaseChatMesh.cpp - Extend onContactPathRecv stickiness check with effective_window - Add 7 new test cases in test_path_gating covering proven-path lock, boundary conditions, and window-constant sanity (20 total, all pass) --- src/helpers/BaseChatMesh.cpp | 20 ++++++++- test/test_path_gating/test_path_gating.cpp | 51 +++++++++++++++++++++- 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/src/helpers/BaseChatMesh.cpp b/src/helpers/BaseChatMesh.cpp index a3298ea30b..2a7fab05c1 100644 --- a/src/helpers/BaseChatMesh.cpp +++ b/src/helpers/BaseChatMesh.cpp @@ -21,6 +21,17 @@ #define PATH_STICKINESS_WINDOW_SECS 10u #endif +// Proven-path lock (seconds): if at least one direct-path ACK has been +// received for the stored path (path_ack_count > 0), block replacement for +// much longer. This prevents a flood-triggered PATH exchange from silently +// overwriting a working direct path when the remote node temporarily loses +// its own route back (e.g. after a reboot). 5 minutes is long enough to +// survive typical disruptions; the app can always call CMD_RESET_PATH if +// needed. +#ifndef PATH_PROVEN_LOCK_SECS + #define PATH_PROVEN_LOCK_SECS 300u +#endif + // Number of independent flood-ACK transmissions at increasing delays. // Addresses issue #1489 (single flood ACK lost in noisy RF environments). #ifndef FLOOD_ACK_RETRY_COUNT @@ -345,7 +356,14 @@ bool BaseChatMesh::onContactPathRecv(ContactInfo& from, uint8_t* in_path, uint8_ // The embedded ACK/response is always processed regardless of the lock. if (from.out_path_len != OUT_PATH_UNKNOWN && from.out_path_timestamp != 0) { uint32_t age_secs = now - from.out_path_timestamp; - if (age_secs < PATH_STICKINESS_WINDOW_SECS) { + // Proven paths (at least one direct-path ACK received) are locked for + // PATH_PROVEN_LOCK_SECS; unproven paths use the shorter blanket window. + // This stops a flood-triggered reciprocal PATH exchange from silently + // overwriting a working direct path when the remote node loses its route. + uint32_t effective_window = (from.path_ack_count > 0) + ? PATH_PROVEN_LOCK_SECS + : PATH_STICKINESS_WINDOW_SECS; + if (age_secs < effective_window) { // Path is still within the lock window — keep it, but still process any // embedded ACK or response so the sender's retry timer is cancelled. onContactPathUpdated(from); diff --git a/test/test_path_gating/test_path_gating.cpp b/test/test_path_gating/test_path_gating.cpp index 4059e2dd51..2ed3a5b50b 100644 --- a/test/test_path_gating/test_path_gating.cpp +++ b/test/test_path_gating/test_path_gating.cpp @@ -20,18 +20,23 @@ // ---- Constants mirrored from BaseChatMesh.cpp -------------------------------- #define OUT_PATH_UNKNOWN 0xFF #define PATH_STICKINESS_WINDOW_SECS 10u // 10 seconds blanket first-arrived lock +#define PATH_PROVEN_LOCK_SECS 300u // 5 minutes for proven paths (ack_count > 0) // ---- Path-lock decision extracted as a pure function ----------------------- // Must remain an exact transcription of the condition in onContactPathRecv. static bool shouldKeepStoredPath( uint32_t now, uint8_t stored_path_len, - uint32_t stored_path_timestamp) + uint32_t stored_path_timestamp, + uint8_t path_ack_count = 0) { if (stored_path_len == OUT_PATH_UNKNOWN) return false; if (stored_path_timestamp == 0) return false; uint32_t age = now - stored_path_timestamp; - return age < PATH_STICKINESS_WINDOW_SECS; // blanket lock — no hop comparison + uint32_t effective_window = (path_ack_count > 0) + ? PATH_PROVEN_LOCK_SECS + : PATH_STICKINESS_WINDOW_SECS; + return age < effective_window; } // ============================================================================= @@ -129,6 +134,48 @@ TEST(PathGating, WindowIs10Seconds) { EXPECT_EQ(10u, PATH_STICKINESS_WINDOW_SECS); } +// ============================================================================= +// Proven path (path_ack_count > 0) — 5-minute lock +// Prevents a flood-triggered reciprocal PATH from silently overwriting a +// working direct path when the remote node loses its own route back. +// ============================================================================= + +TEST(PathGating, ProvenPath_BlocksReplacement_Within5MinuteWindow) { + // path stored 200 s ago, proven — must still be locked (300 s window) + uint32_t now = 2000, ts = now - 200; + EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts, /*ack_count=*/1)); +} + +TEST(PathGating, ProvenPath_BlocksReplacement_1SecondIntoWindow) { + uint32_t now = 1000, ts = now - 1; + EXPECT_TRUE(shouldKeepStoredPath(now, 1, ts, /*ack_count=*/3)); +} + +TEST(PathGating, ProvenPath_BlocksReplacement_JustInsideWindow) { + uint32_t now = 1000, ts = now - (PATH_PROVEN_LOCK_SECS - 1); // 1 s before expiry + EXPECT_TRUE(shouldKeepStoredPath(now, 2, ts, /*ack_count=*/1)); +} + +TEST(PathGating, ProvenPath_ExpiresAtWindowBoundary) { + uint32_t now = 1000, ts = now - PATH_PROVEN_LOCK_SECS; // age == 300 → expired + EXPECT_FALSE(shouldKeepStoredPath(now, 1, ts, /*ack_count=*/1)); +} + +TEST(PathGating, ProvenPath_ExpiredWellOutsideWindow) { + uint32_t now = 10000, ts = 1u; // very old + EXPECT_FALSE(shouldKeepStoredPath(now, 1, ts, /*ack_count=*/5)); +} + +TEST(PathGating, UnprovenPath_ExpiresAfter10Seconds_EvenWithHighAckCount) { + // ack_count == 0 → use short window regardless + uint32_t now = 1000, ts = now - 15; // age 15 s > 10 s window + EXPECT_FALSE(shouldKeepStoredPath(now, 1, ts, /*ack_count=*/0)); +} + +TEST(PathGating, ProvenPathWindowIs300Seconds) { + EXPECT_EQ(300u, PATH_PROVEN_LOCK_SECS); +} + int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv);