Skip to content

Commit f245139

Browse files
Prefer legacy forward maps on manager read
We are working on removing the requirement of regularly persisting the ChannelManager, and as a result began reconstructing the manager's forwards maps from Channel data on startup in a recent PR, see cb398f6 and parent commits. At the time, we implemented ChannelManager::read to prefer to use the newly reconstructed maps, partly to ensure we have test coverage of the new maps' usage. This resulted in a lot of code that would deduplicate HTLCs that were present in the old maps to avoid redundant HTLC handling/duplicate forwards, adding extra complexity. Instead, always use the old maps in prod, but randomly use the newly reconstructed maps in testing, to exercise the new codepaths (see reconstruct_manager_from_monitors in ChannelManager::read).
1 parent 31b8d54 commit f245139

2 files changed

Lines changed: 113 additions & 174 deletions

File tree

CONTRIBUTING.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,12 @@ welcomed.
192192

193193
* `LDK_TEST_DETERMINISTIC_HASHES` - When set to `1`, uses deterministic hash map iteration order in tests. This ensures consistent test output across runs, useful for comparing logs before and after changes.
194194

195+
* `LDK_TEST_REBUILD_MGR_FROM_MONITORS` - If set to `1`, on test node reload the `ChannelManager`'s
196+
HTLC set will be reconstructed from `Channel{Monitor}` persisted data. If `0`, test nodes will be
197+
reloaded from persisted `ChannelManager` data using legacy code paths. This ensures consistent
198+
test output across runs, useful for comparing logs before and after changes, since otherwise the
199+
selection of which codepaths to be used on reload will be chosen randomly.
200+
195201
C/C++ Bindings
196202
--------------
197203

lightning/src/ln/channelmanager.rs

Lines changed: 107 additions & 174 deletions
Original file line numberDiff line numberDiff line change
@@ -11747,6 +11747,10 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1174711747

1174811748
if !new_intercept_events.is_empty() {
1174911749
let mut events = self.pending_events.lock().unwrap();
11750+
// It's possible we processed this intercept forward, generated an event, then re-processed
11751+
// it here after restart, in which case the intercept event should not be pushed
11752+
// redundantly.
11753+
new_intercept_events.retain(|ev| !events.contains(ev));
1175011754
events.append(&mut new_intercept_events);
1175111755
}
1175211756
}
@@ -17484,9 +17488,9 @@ where
1748417488

1748517489
const MAX_ALLOC_SIZE: usize = 1024 * 64;
1748617490
let forward_htlcs_count: u64 = Readable::read(reader)?;
17487-
// This map is read but may no longer be used because we'll attempt to rebuild the set of HTLC
17488-
// forwards from the `Channel{Monitor}`s instead, as a step towards removing the requirement of
17489-
// regularly persisting the `ChannelManager`.
17491+
// Marked `_legacy` because in versions > 0.2 we are taking steps to remove the requirement of
17492+
// regularly persisting the `ChannelManager` and instead rebuild the set of HTLC forwards from
17493+
// `Channel{Monitor}` data. See `reconstruct_manager_from_monitors` usage below.
1749017494
let mut forward_htlcs_legacy: HashMap<u64, Vec<HTLCForwardInfo>> =
1749117495
hash_map_with_capacity(cmp::min(forward_htlcs_count as usize, 128));
1749217496
for _ in 0..forward_htlcs_count {
@@ -17587,9 +17591,9 @@ where
1758717591
};
1758817592
}
1758917593

17590-
// Some maps are read but may no longer be used because we attempt to rebuild the pending HTLC
17591-
// set from the `Channel{Monitor}`s instead, as a step towards removing the requirement of
17592-
// regularly persisting the `ChannelManager`.
17594+
// Marked `_legacy` because in versions > 0.2 we are taking steps to remove the requirement of
17595+
// regularly persisting the `ChannelManager` and instead rebuild the set of HTLC forwards from
17596+
// `Channel{Monitor}` data. See `reconstruct_manager_from_monitors` below.
1759317597
let mut pending_intercepted_htlcs_legacy: Option<HashMap<InterceptId, PendingAddHTLCInfo>> =
1759417598
None;
1759517599
let mut decode_update_add_htlcs_legacy: Option<HashMap<u64, Vec<msgs::UpdateAddHTLC>>> =
@@ -17930,6 +17934,36 @@ where
1793017934
pending_background_events.push(new_event);
1793117935
}
1793217936

17937+
// In LDK 0.2 and below, the `ChannelManager` would track all payments and HTLCs internally and
17938+
// persist that state, relying on it being up-to-date on restart. Newer versions are moving
17939+
// towards reducing this reliance on regular persistence of the `ChannelManager`, and instead
17940+
// reconstruct HTLC/payment state based on `Channel{Monitor}` data if
17941+
// `reconstruct_manager_from_monitors` is set below. Currently it is only set in tests, randomly
17942+
// to ensure the legacy codepaths also have test coverage.
17943+
#[cfg(not(test))]
17944+
let reconstruct_manager_from_monitors = false;
17945+
#[cfg(test)]
17946+
let reconstruct_manager_from_monitors = {
17947+
use core::hash::{BuildHasher, Hasher};
17948+
17949+
match std::env::var("LDK_TEST_REBUILD_MGR_FROM_MONITORS") {
17950+
Ok(val) => match val.as_str() {
17951+
"1" => true,
17952+
"0" => false,
17953+
_ => panic!("LDK_TEST_REBUILD_MGR_FROM_MONITORS must be 0 or 1, got: {}", val),
17954+
},
17955+
Err(_) => {
17956+
let rand_val =
17957+
std::collections::hash_map::RandomState::new().build_hasher().finish();
17958+
if rand_val % 2 == 0 {
17959+
true
17960+
} else {
17961+
false
17962+
}
17963+
},
17964+
}
17965+
};
17966+
1793317967
// If there's any preimages for forwarded HTLCs hanging around in ChannelMonitors we
1793417968
// should ensure we try them again on the inbound edge. We put them here and do so after we
1793517969
// have a fully-constructed `ChannelManager` at the end.
@@ -17954,18 +17988,20 @@ where
1795417988
let mut peer_state_lock = peer_state_mtx.lock().unwrap();
1795517989
let peer_state = &mut *peer_state_lock;
1795617990
is_channel_closed = !peer_state.channel_by_id.contains_key(channel_id);
17957-
if let Some(chan) = peer_state.channel_by_id.get(channel_id) {
17958-
if let Some(funded_chan) = chan.as_funded() {
17959-
let inbound_committed_update_adds =
17960-
funded_chan.get_inbound_committed_update_adds();
17961-
if !inbound_committed_update_adds.is_empty() {
17962-
// Reconstruct `ChannelManager::decode_update_add_htlcs` from the serialized
17963-
// `Channel`, as part of removing the requirement to regularly persist the
17964-
// `ChannelManager`.
17965-
decode_update_add_htlcs.insert(
17966-
funded_chan.context.outbound_scid_alias(),
17967-
inbound_committed_update_adds,
17968-
);
17991+
if reconstruct_manager_from_monitors {
17992+
if let Some(chan) = peer_state.channel_by_id.get(channel_id) {
17993+
if let Some(funded_chan) = chan.as_funded() {
17994+
let inbound_committed_update_adds =
17995+
funded_chan.get_inbound_committed_update_adds();
17996+
if !inbound_committed_update_adds.is_empty() {
17997+
// Reconstruct `ChannelManager::decode_update_add_htlcs` from the serialized
17998+
// `Channel`, as part of removing the requirement to regularly persist the
17999+
// `ChannelManager`.
18000+
decode_update_add_htlcs.insert(
18001+
funded_chan.context.outbound_scid_alias(),
18002+
inbound_committed_update_adds,
18003+
);
18004+
}
1796918005
}
1797018006
}
1797118007
}
@@ -18020,17 +18056,20 @@ where
1802018056
info.prev_funding_outpoint == prev_hop_data.outpoint
1802118057
&& info.prev_htlc_id == prev_hop_data.htlc_id
1802218058
};
18023-
// We always add all inbound committed HTLCs to `decode_update_add_htlcs` in the above
18024-
// loop, but we need to prune from those added HTLCs if they were already forwarded to
18025-
// the outbound edge. Otherwise, we'll double-forward.
18026-
dedup_decode_update_add_htlcs(
18027-
&mut decode_update_add_htlcs,
18028-
&prev_hop_data,
18029-
"HTLC was forwarded to the closed channel",
18030-
&args.logger,
18031-
);
18059+
// If `reconstruct_manager_from_monitors` is set, we always add all inbound committed
18060+
// HTLCs to `decode_update_add_htlcs` in the above loop, but we need to prune from
18061+
// those added HTLCs if they were already forwarded to the outbound edge. Otherwise,
18062+
// we'll double-forward.
18063+
if reconstruct_manager_from_monitors {
18064+
dedup_decode_update_add_htlcs(
18065+
&mut decode_update_add_htlcs,
18066+
&prev_hop_data,
18067+
"HTLC was forwarded to the closed channel",
18068+
&args.logger,
18069+
);
18070+
}
1803218071

18033-
if !is_channel_closed {
18072+
if !is_channel_closed || reconstruct_manager_from_monitors {
1803418073
continue;
1803518074
}
1803618075
// The ChannelMonitor is now responsible for this HTLC's
@@ -18539,99 +18578,55 @@ where
1853918578
}
1854018579
}
1854118580

18542-
// De-duplicate HTLCs that are present in both `failed_htlcs` and `decode_update_add_htlcs`.
18543-
// Omitting this de-duplication could lead to redundant HTLC processing and/or bugs.
18544-
for (src, _, _, _, _, _) in failed_htlcs.iter() {
18545-
if let HTLCSource::PreviousHopData(prev_hop_data) = src {
18546-
dedup_decode_update_add_htlcs(
18547-
&mut decode_update_add_htlcs,
18548-
prev_hop_data,
18549-
"HTLC was failed backwards during manager read",
18550-
&args.logger,
18551-
);
18552-
}
18553-
}
18554-
18555-
// See above comment on `failed_htlcs`.
18556-
for htlcs in claimable_payments.values().map(|pmt| &pmt.htlcs) {
18557-
for prev_hop_data in htlcs.iter().map(|h| &h.prev_hop) {
18558-
dedup_decode_update_add_htlcs(
18559-
&mut decode_update_add_htlcs,
18560-
prev_hop_data,
18561-
"HTLC was already decoded and marked as a claimable payment",
18562-
&args.logger,
18563-
);
18564-
}
18565-
}
18566-
18567-
// Remove HTLCs from `forward_htlcs` if they are also present in `decode_update_add_htlcs`.
18568-
//
18569-
// In the future, the full set of pending HTLCs will be pulled from `Channel{Monitor}` data and
18570-
// placed in `ChannelManager::decode_update_add_htlcs` on read, to be handled on the next call
18571-
// to `process_pending_htlc_forwards`. This is part of a larger effort to remove the requirement
18572-
// of regularly persisting the `ChannelManager`. The new pipeline is supported for HTLC forwards
18573-
// received on LDK 0.3+ but not <= 0.2, so prune non-legacy HTLCs from `forward_htlcs`.
18574-
forward_htlcs_legacy.retain(|scid, pending_fwds| {
18575-
for fwd in pending_fwds {
18576-
let (prev_scid, prev_htlc_id) = match fwd {
18577-
HTLCForwardInfo::AddHTLC(htlc) => {
18578-
(htlc.prev_outbound_scid_alias, htlc.prev_htlc_id)
18579-
},
18580-
HTLCForwardInfo::FailHTLC { htlc_id, .. }
18581-
| HTLCForwardInfo::FailMalformedHTLC { htlc_id, .. } => (*scid, *htlc_id),
18582-
};
18583-
if let Some(pending_update_adds) = decode_update_add_htlcs.get_mut(&prev_scid) {
18584-
if pending_update_adds
18585-
.iter()
18586-
.any(|update_add| update_add.htlc_id == prev_htlc_id)
18587-
{
18588-
return false;
18589-
}
18581+
if reconstruct_manager_from_monitors {
18582+
// De-duplicate HTLCs that are present in both `failed_htlcs` and `decode_update_add_htlcs`.
18583+
// Omitting this de-duplication could lead to redundant HTLC processing and/or bugs.
18584+
for (src, _, _, _, _, _) in failed_htlcs.iter() {
18585+
if let HTLCSource::PreviousHopData(prev_hop_data) = src {
18586+
dedup_decode_update_add_htlcs(
18587+
&mut decode_update_add_htlcs,
18588+
prev_hop_data,
18589+
"HTLC was failed backwards during manager read",
18590+
&args.logger,
18591+
);
1859018592
}
1859118593
}
18592-
true
18593-
});
18594-
// Remove intercepted HTLC forwards if they are also present in `decode_update_add_htlcs`. See
18595-
// the above comment.
18596-
pending_intercepted_htlcs_legacy.retain(|id, fwd| {
18597-
let prev_scid = fwd.prev_outbound_scid_alias;
18598-
if let Some(pending_update_adds) = decode_update_add_htlcs.get_mut(&prev_scid) {
18599-
if pending_update_adds
18600-
.iter()
18601-
.any(|update_add| update_add.htlc_id == fwd.prev_htlc_id)
18602-
{
18603-
pending_events_read.retain(
18604-
|(ev, _)| !matches!(ev, Event::HTLCIntercepted { intercept_id, .. } if intercept_id == id),
18594+
18595+
// See above comment on `failed_htlcs`.
18596+
for htlcs in claimable_payments.values().map(|pmt| &pmt.htlcs) {
18597+
for prev_hop_data in htlcs.iter().map(|h| &h.prev_hop) {
18598+
dedup_decode_update_add_htlcs(
18599+
&mut decode_update_add_htlcs,
18600+
prev_hop_data,
18601+
"HTLC was already decoded and marked as a claimable payment",
18602+
&args.logger,
1860518603
);
18606-
return false;
1860718604
}
1860818605
}
18606+
}
18607+
18608+
let (decode_update_add_htlcs, forward_htlcs, pending_intercepted_htlcs) =
18609+
if reconstruct_manager_from_monitors {
18610+
(decode_update_add_htlcs, new_hash_map(), new_hash_map())
18611+
} else {
18612+
(
18613+
decode_update_add_htlcs_legacy,
18614+
forward_htlcs_legacy,
18615+
pending_intercepted_htlcs_legacy,
18616+
)
18617+
};
18618+
18619+
// If we have a pending intercept HTLC present but no corresponding event, add that now rather
18620+
// than relying on the user having persisted the event prior to shutdown.
18621+
for (id, fwd) in pending_intercepted_htlcs.iter() {
1860918622
if !pending_events_read.iter().any(
1861018623
|(ev, _)| matches!(ev, Event::HTLCIntercepted { intercept_id, .. } if intercept_id == id),
1861118624
) {
18612-
match create_htlc_intercepted_event(*id, &fwd) {
18625+
match create_htlc_intercepted_event(*id, fwd) {
1861318626
Ok(ev) => pending_events_read.push_back((ev, None)),
1861418627
Err(()) => debug_assert!(false),
1861518628
}
1861618629
}
18617-
true
18618-
});
18619-
// Add legacy update_adds that were received on LDK <= 0.2 that are not present in the
18620-
// `decode_update_add_htlcs` map that was rebuilt from `Channel{Monitor}` data, see above
18621-
// comment.
18622-
for (scid, legacy_update_adds) in decode_update_add_htlcs_legacy.drain() {
18623-
match decode_update_add_htlcs.entry(scid) {
18624-
hash_map::Entry::Occupied(mut update_adds) => {
18625-
for legacy_update_add in legacy_update_adds {
18626-
if !update_adds.get().contains(&legacy_update_add) {
18627-
update_adds.get_mut().push(legacy_update_add);
18628-
}
18629-
}
18630-
},
18631-
hash_map::Entry::Vacant(entry) => {
18632-
entry.insert(legacy_update_adds);
18633-
},
18634-
}
1863518630
}
1863618631

1863718632
let best_block = BestBlock::new(best_block_hash, best_block_height);
@@ -18660,9 +18655,9 @@ where
1866018655

1866118656
inbound_payment_key: expanded_inbound_key,
1866218657
pending_outbound_payments: pending_outbounds,
18663-
pending_intercepted_htlcs: Mutex::new(pending_intercepted_htlcs_legacy),
18658+
pending_intercepted_htlcs: Mutex::new(pending_intercepted_htlcs),
1866418659

18665-
forward_htlcs: Mutex::new(forward_htlcs_legacy),
18660+
forward_htlcs: Mutex::new(forward_htlcs),
1866618661
decode_update_add_htlcs: Mutex::new(decode_update_add_htlcs),
1866718662
claimable_payments: Mutex::new(ClaimablePayments {
1866818663
claimable_payments,
@@ -18998,12 +18993,11 @@ where
1899818993
mod tests {
1899918994
use crate::events::{ClosureReason, Event, HTLCHandlingFailureType};
1900018995
use crate::ln::channelmanager::{
19001-
create_recv_pending_htlc_info, inbound_payment, HTLCForwardInfo, InterceptId, PaymentId,
18996+
create_recv_pending_htlc_info, inbound_payment, InterceptId, PaymentId,
1900218997
RecipientOnionFields,
1900318998
};
1900418999
use crate::ln::functional_test_utils::*;
1900519000
use crate::ln::msgs::{self, BaseMessageHandler, ChannelMessageHandler, MessageSendEvent};
19006-
use crate::ln::onion_utils::AttributionData;
1900719001
use crate::ln::onion_utils::{self, LocalHTLCFailureReason};
1900819002
use crate::ln::outbound_payment::Retry;
1900919003
use crate::ln::types::ChannelId;
@@ -19013,7 +19007,6 @@ mod tests {
1901319007
use crate::types::payment::{PaymentHash, PaymentPreimage, PaymentSecret};
1901419008
use crate::util::config::{ChannelConfig, ChannelConfigUpdate};
1901519009
use crate::util::errors::APIError;
19016-
use crate::util::ser::Writeable;
1901719010
use crate::util::test_utils;
1901819011
use bitcoin::secp256k1::ecdh::SharedSecret;
1901919012
use bitcoin::secp256k1::{PublicKey, Secp256k1, SecretKey};
@@ -20071,66 +20064,6 @@ mod tests {
2007120064
check_spends!(txn[0], funding_tx);
2007220065
}
2007320066
}
20074-
20075-
#[test]
20076-
#[rustfmt::skip]
20077-
fn test_malformed_forward_htlcs_ser() {
20078-
// Ensure that `HTLCForwardInfo::FailMalformedHTLC`s are (de)serialized properly.
20079-
let chanmon_cfg = create_chanmon_cfgs(1);
20080-
let node_cfg = create_node_cfgs(1, &chanmon_cfg);
20081-
let persister;
20082-
let chain_monitor;
20083-
let chanmgrs = create_node_chanmgrs(1, &node_cfg, &[None]);
20084-
let deserialized_chanmgr;
20085-
let mut nodes = create_network(1, &node_cfg, &chanmgrs);
20086-
20087-
let dummy_failed_htlc = |htlc_id| {
20088-
HTLCForwardInfo::FailHTLC { htlc_id, err_packet: msgs::OnionErrorPacket { data: vec![42], attribution_data: Some(AttributionData::new()) } }
20089-
};
20090-
let dummy_malformed_htlc = |htlc_id| {
20091-
HTLCForwardInfo::FailMalformedHTLC {
20092-
htlc_id,
20093-
failure_code: LocalHTLCFailureReason::InvalidOnionPayload.failure_code(),
20094-
sha256_of_onion: [0; 32],
20095-
}
20096-
};
20097-
20098-
let dummy_htlcs_1: Vec<HTLCForwardInfo> = (1..10).map(|htlc_id| {
20099-
if htlc_id % 2 == 0 {
20100-
dummy_failed_htlc(htlc_id)
20101-
} else {
20102-
dummy_malformed_htlc(htlc_id)
20103-
}
20104-
}).collect();
20105-
20106-
let dummy_htlcs_2: Vec<HTLCForwardInfo> = (1..10).map(|htlc_id| {
20107-
if htlc_id % 2 == 1 {
20108-
dummy_failed_htlc(htlc_id)
20109-
} else {
20110-
dummy_malformed_htlc(htlc_id)
20111-
}
20112-
}).collect();
20113-
20114-
20115-
let (scid_1, scid_2) = (42, 43);
20116-
let mut forward_htlcs = new_hash_map();
20117-
forward_htlcs.insert(scid_1, dummy_htlcs_1.clone());
20118-
forward_htlcs.insert(scid_2, dummy_htlcs_2.clone());
20119-
20120-
let mut chanmgr_fwd_htlcs = nodes[0].node.forward_htlcs.lock().unwrap();
20121-
*chanmgr_fwd_htlcs = forward_htlcs.clone();
20122-
core::mem::drop(chanmgr_fwd_htlcs);
20123-
20124-
reload_node!(nodes[0], nodes[0].node.encode(), &[], persister, chain_monitor, deserialized_chanmgr);
20125-
20126-
let mut deserialized_fwd_htlcs = nodes[0].node.forward_htlcs.lock().unwrap();
20127-
for scid in [scid_1, scid_2].iter() {
20128-
let deserialized_htlcs = deserialized_fwd_htlcs.remove(scid).unwrap();
20129-
assert_eq!(forward_htlcs.remove(scid).unwrap(), deserialized_htlcs);
20130-
}
20131-
assert!(deserialized_fwd_htlcs.is_empty());
20132-
core::mem::drop(deserialized_fwd_htlcs);
20133-
}
2013420067
}
2013520068

2013620069
#[cfg(ldk_bench)]

0 commit comments

Comments
 (0)