Skip to content

Commit 7aac803

Browse files
Check pruned HTLCs were resolved on startup
In a recent commit, we added support for pruning an inbound HTLC's persisted onion once the HTLC has been irrevocably forwarded to the outbound edge. Here, we add a check on startup that those inbound HTLCs were actually handled. Specifically, we check that the inbound HTLC is either (a) currently present in the outbound edge or (b) was removed via claim. If neither of those are true, we infer that the HTLC was removed from the outbound edge via fail and fail the inbound HTLC backwards.
1 parent cb32bd2 commit 7aac803

4 files changed

Lines changed: 373 additions & 21 deletions

File tree

lightning/src/ln/channel.rs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,8 @@ impl InboundHTLCState {
314314
/// `ChannelManager` persist.
315315
///
316316
/// Useful for reconstructing the pending HTLC set on startup.
317-
#[derive(Debug)]
318-
enum InboundUpdateAdd {
317+
#[derive(Debug, Clone)]
318+
pub(super) enum InboundUpdateAdd {
319319
/// The inbound committed HTLC's update_add_htlc message.
320320
WithOnion { update_add_htlc: msgs::UpdateAddHTLC },
321321
/// This inbound HTLC is a forward that was irrevocably committed to the outbound edge, allowing
@@ -7885,7 +7885,9 @@ where
78857885
}
78867886

78877887
/// Useful for reconstructing the set of pending HTLCs when deserializing the `ChannelManager`.
7888-
pub(super) fn inbound_committed_unresolved_htlcs(&self) -> Vec<msgs::UpdateAddHTLC> {
7888+
pub(super) fn inbound_committed_unresolved_htlcs(
7889+
&self,
7890+
) -> Vec<(PaymentHash, InboundUpdateAdd)> {
78897891
// We don't want to return an HTLC as needing processing if it already has a resolution that's
78907892
// pending in the holding cell.
78917893
let htlc_resolution_in_holding_cell = |id: u64| -> bool {
@@ -7903,13 +7905,11 @@ where
79037905
.pending_inbound_htlcs
79047906
.iter()
79057907
.filter_map(|htlc| match &htlc.state {
7906-
InboundHTLCState::Committed {
7907-
update_add_htlc: InboundUpdateAdd::WithOnion { update_add_htlc },
7908-
} => {
7908+
InboundHTLCState::Committed { update_add_htlc } => {
79097909
if htlc_resolution_in_holding_cell(htlc.htlc_id) {
79107910
return None;
79117911
}
7912-
Some(update_add_htlc.clone())
7912+
Some((htlc.payment_hash, update_add_htlc.clone()))
79137913
},
79147914
_ => None,
79157915
})
@@ -7967,6 +7967,12 @@ where
79677967
debug_assert!(false, "If we go to prune an inbound HTLC it should be present")
79687968
}
79697969

7970+
/// Useful for testing crash scenarios where the holding cell is not persisted.
7971+
#[cfg(test)]
7972+
pub(super) fn test_clear_holding_cell(&mut self) {
7973+
self.context.holding_cell_htlc_updates.clear()
7974+
}
7975+
79707976
/// Marks an outbound HTLC which we have received update_fail/fulfill/malformed
79717977
#[inline]
79727978
fn mark_outbound_htlc_removed(

lightning/src/ln/channelmanager.rs

Lines changed: 121 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ use crate::ln::chan_utils::selected_commitment_sat_per_1000_weight;
5959
use crate::ln::channel::QuiescentAction;
6060
use crate::ln::channel::{
6161
self, hold_time_since, Channel, ChannelError, ChannelUpdateStatus, DisconnectResult,
62-
FundedChannel, FundingTxSigned, InboundV1Channel, OutboundV1Channel, PendingV2Channel,
63-
ReconnectionMsg, ShutdownResult, SpliceFundingFailed, StfuResponse, UpdateFulfillCommitFetch,
64-
WithChannelContext,
62+
FundedChannel, FundingTxSigned, InboundUpdateAdd, InboundV1Channel, OutboundV1Channel,
63+
PendingV2Channel, ReconnectionMsg, ShutdownResult, SpliceFundingFailed, StfuResponse,
64+
UpdateFulfillCommitFetch, WithChannelContext,
6565
};
6666
use crate::ln::channel_state::ChannelDetails;
6767
use crate::ln::funding::SpliceContribution;
@@ -10183,7 +10183,20 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1018310183
let per_peer_state = self.per_peer_state.read().unwrap();
1018410184
let peer_state = per_peer_state.get(&cp_id).map(|state| state.lock().unwrap()).unwrap();
1018510185
let chan = peer_state.channel_by_id.get(&chan_id).and_then(|c| c.as_funded()).unwrap();
10186-
chan.inbound_committed_unresolved_htlcs().len()
10186+
chan.inbound_committed_unresolved_htlcs()
10187+
.iter()
10188+
.filter(|(_, htlc)| matches!(htlc, InboundUpdateAdd::WithOnion { .. }))
10189+
.count()
10190+
}
10191+
10192+
#[cfg(test)]
10193+
/// Useful for testing crash scenarios where the holding cell of a channel is not persisted.
10194+
pub(crate) fn test_clear_channel_holding_cell(&self, cp_id: PublicKey, chan_id: ChannelId) {
10195+
let per_peer_state = self.per_peer_state.read().unwrap();
10196+
let mut peer_state = per_peer_state.get(&cp_id).map(|state| state.lock().unwrap()).unwrap();
10197+
let chan =
10198+
peer_state.channel_by_id.get_mut(&chan_id).and_then(|c| c.as_funded_mut()).unwrap();
10199+
chan.test_clear_holding_cell();
1018710200
}
1018810201

1018910202
/// Completes channel resumption after locks have been released.
@@ -18293,7 +18306,7 @@ impl<
1829318306
}
1829418307

1829518308
// Post-deserialization processing
18296-
let mut decode_update_add_htlcs = new_hash_map();
18309+
let mut decode_update_add_htlcs: HashMap<u64, Vec<msgs::UpdateAddHTLC>> = new_hash_map();
1829718310
if fake_scid_rand_bytes.is_none() {
1829818311
fake_scid_rand_bytes = Some(args.entropy_source.get_secure_random_bytes());
1829918312
}
@@ -18594,6 +18607,22 @@ impl<
1859418607
// have a fully-constructed `ChannelManager` at the end.
1859518608
let mut pending_claims_to_replay = Vec::new();
1859618609

18610+
// If we find an inbound HTLC that claims to already be forwarded to the outbound edge, we
18611+
// store an identifier for it here and verify that it is either (a) present in the outbound
18612+
// edge or (b) removed from the outbound edge via claim. If it's in neither of these states, we
18613+
// infer that it was removed from the outbound edge via fail, and fail it backwards to ensure
18614+
// that it is handled.
18615+
let mut already_forwarded_htlcs = Vec::new();
18616+
let prune_forwarded_htlc =
18617+
|already_forwarded_htlcs: &mut Vec<(PaymentHash, HTLCPreviousHopData, u64)>,
18618+
prev_hop: &HTLCPreviousHopData| {
18619+
if let Some(idx) = already_forwarded_htlcs.iter().position(|(_, htlc, _)| {
18620+
prev_hop.htlc_id == htlc.htlc_id
18621+
&& prev_hop.prev_outbound_scid_alias == htlc.prev_outbound_scid_alias
18622+
}) {
18623+
already_forwarded_htlcs.swap_remove(idx);
18624+
}
18625+
};
1859718626
{
1859818627
// If we're tracking pending payments, ensure we haven't lost any by looking at the
1859918628
// ChannelMonitor data for any channels for which we do not have authorative state
@@ -18616,16 +18645,38 @@ impl<
1861618645
if reconstruct_manager_from_monitors {
1861718646
if let Some(chan) = peer_state.channel_by_id.get(channel_id) {
1861818647
if let Some(funded_chan) = chan.as_funded() {
18648+
let scid_alias = funded_chan.context.outbound_scid_alias();
1861918649
let inbound_committed_update_adds =
1862018650
funded_chan.inbound_committed_unresolved_htlcs();
18621-
if !inbound_committed_update_adds.is_empty() {
18622-
// Reconstruct `ChannelManager::decode_update_add_htlcs` from the serialized
18623-
// `Channel`, as part of removing the requirement to regularly persist the
18624-
// `ChannelManager`.
18625-
decode_update_add_htlcs.insert(
18626-
funded_chan.context.outbound_scid_alias(),
18627-
inbound_committed_update_adds,
18628-
);
18651+
for (payment_hash, htlc) in inbound_committed_update_adds {
18652+
match htlc {
18653+
InboundUpdateAdd::WithOnion { update_add_htlc } => {
18654+
// Reconstruct `ChannelManager::decode_update_add_htlcs` from the serialized
18655+
// `Channel` as part of removing the requirement to regularly persist the
18656+
// `ChannelManager`.
18657+
match decode_update_add_htlcs.entry(scid_alias) {
18658+
hash_map::Entry::Occupied(mut entry) => {
18659+
entry.get_mut().push(update_add_htlc);
18660+
},
18661+
hash_map::Entry::Vacant(entry) => {
18662+
entry.insert(vec![update_add_htlc]);
18663+
},
18664+
}
18665+
},
18666+
InboundUpdateAdd::Forwarded {
18667+
hop_data,
18668+
outbound_amt_msat,
18669+
} => {
18670+
already_forwarded_htlcs.push((
18671+
payment_hash,
18672+
hop_data,
18673+
outbound_amt_msat,
18674+
));
18675+
},
18676+
InboundUpdateAdd::Legacy => {
18677+
return Err(DecodeError::InvalidValue)
18678+
},
18679+
}
1862918680
}
1863018681
}
1863118682
}
@@ -18679,6 +18730,7 @@ impl<
1867918730
"HTLC already forwarded to the outbound edge",
1868018731
&args.logger,
1868118732
);
18733+
prune_forwarded_htlc(&mut already_forwarded_htlcs, &prev_hop);
1868218734
}
1868318735
}
1868418736
}
@@ -18713,6 +18765,10 @@ impl<
1871318765
"HTLC already forwarded to the outbound edge",
1871418766
&&logger,
1871518767
);
18768+
prune_forwarded_htlc(
18769+
&mut already_forwarded_htlcs,
18770+
&prev_hop_data,
18771+
);
1871618772
}
1871718773

1871818774
// The ChannelMonitor is now responsible for this HTLC's
@@ -19168,6 +19224,7 @@ impl<
1916819224
"HTLC was failed backwards during manager read",
1916919225
&args.logger,
1917019226
);
19227+
prune_forwarded_htlc(&mut already_forwarded_htlcs, prev_hop_data);
1917119228
}
1917219229
}
1917319230

@@ -19313,9 +19370,47 @@ impl<
1931319370
};
1931419371

1931519372
let mut processed_claims: HashSet<Vec<MPPClaimHTLCSource>> = new_hash_set();
19316-
for (_, monitor) in args.channel_monitors.iter() {
19373+
for (channel_id, monitor) in args.channel_monitors.iter() {
1931719374
for (payment_hash, (payment_preimage, payment_claims)) in monitor.get_stored_preimages()
1931819375
{
19376+
// If we have unresolved inbound committed HTLCs that were already forwarded to the
19377+
// outbound edge and removed via claim, we need to make sure to claim them backwards via
19378+
// adding them to `pending_claims_to_replay`.
19379+
for (hash, hop_data, outbound_amt_msat) in
19380+
mem::take(&mut already_forwarded_htlcs).drain(..)
19381+
{
19382+
if hash != payment_hash {
19383+
already_forwarded_htlcs.push((hash, hop_data, outbound_amt_msat));
19384+
continue;
19385+
}
19386+
let new_pending_claim = !pending_claims_to_replay.iter().any(|(src, _, _, _, _, _, _)| {
19387+
matches!(src, HTLCSource::PreviousHopData(hop) if hop.htlc_id == hop_data.htlc_id && hop.prev_outbound_scid_alias == hop_data.prev_outbound_scid_alias)
19388+
});
19389+
if new_pending_claim {
19390+
let counterparty_node_id = monitor.get_counterparty_node_id();
19391+
let is_channel_closed = channel_manager
19392+
.per_peer_state
19393+
.read()
19394+
.unwrap()
19395+
.get(&counterparty_node_id)
19396+
.map_or(true, |peer_state_mtx| {
19397+
!peer_state_mtx
19398+
.lock()
19399+
.unwrap()
19400+
.channel_by_id
19401+
.contains_key(channel_id)
19402+
});
19403+
pending_claims_to_replay.push((
19404+
HTLCSource::PreviousHopData(hop_data),
19405+
payment_preimage,
19406+
outbound_amt_msat,
19407+
is_channel_closed,
19408+
counterparty_node_id,
19409+
monitor.get_funding_txo(),
19410+
*channel_id,
19411+
));
19412+
}
19413+
}
1931919414
if !payment_claims.is_empty() {
1932019415
for payment_claim in payment_claims {
1932119416
if processed_claims.contains(&payment_claim.mpp_parts) {
@@ -19557,6 +19652,18 @@ impl<
1955719652
channel_manager
1955819653
.fail_htlc_backwards_internal(&source, &hash, &reason, receiver, ev_action);
1955919654
}
19655+
for (hash, htlc, _) in already_forwarded_htlcs {
19656+
let channel_id = htlc.channel_id;
19657+
let node_id = htlc.counterparty_node_id;
19658+
let source = HTLCSource::PreviousHopData(htlc);
19659+
let failure_reason = LocalHTLCFailureReason::TemporaryChannelFailure;
19660+
let failure_data = channel_manager.get_htlc_inbound_temp_fail_data(failure_reason);
19661+
let reason = HTLCFailReason::reason(failure_reason, failure_data);
19662+
let receiver = HTLCHandlingFailureType::Forward { node_id, channel_id };
19663+
// The event completion action is only relevant for HTLCs that originate from our node, not
19664+
// forwarded HTLCs.
19665+
channel_manager.fail_htlc_backwards_internal(&source, &hash, &reason, receiver, None);
19666+
}
1956019667

1956119668
for (
1956219669
source,

lightning/src/ln/functional_test_utils.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,6 +1425,23 @@ macro_rules! reload_node {
14251425
None
14261426
);
14271427
};
1428+
// Reload the node and have the `ChannelManager` use new codepaths that reconstruct its set of
1429+
// pending HTLCs from `Channel{Monitor}` data.
1430+
($node: expr, $chanman_encoded: expr, $monitors_encoded: expr, $persister:
1431+
ident, $new_chain_monitor: ident, $new_channelmanager: ident, $reconstruct_pending_htlcs: expr
1432+
) => {
1433+
let config = $node.node.get_current_config();
1434+
_reload_node_inner!(
1435+
$node,
1436+
config,
1437+
$chanman_encoded,
1438+
$monitors_encoded,
1439+
$persister,
1440+
$new_chain_monitor,
1441+
$new_channelmanager,
1442+
$reconstruct_pending_htlcs
1443+
);
1444+
};
14281445
}
14291446

14301447
pub fn create_funding_transaction<'a, 'b, 'c>(

0 commit comments

Comments
 (0)