Skip to content

Commit e50791a

Browse files
ben-kaufmanclaude
andcommitted
fix: sync chain tip before healing keysend to prevent HTLC timeout force-close
The healing keysend creates an HTLC with cltv_expiry based on the ChannelManager's best block height. For users offline >24h (all affected users), the height is stale — the HTLC is already expired by the time chain sync catches up, causing LDK to force-close the channel (HTLCsTimedOut). This defeats the entire recovery. Fix: call sync_lightning_wallet() before sending healing payments. This updates the ChannelManager's chain tip to the current height so the HTLC gets a valid CLTV expiry. If sync fails, skip the keysend entirely to avoid the stale-CLTV force-close — the monitor will heal naturally on the first real user payment after continuous chain sync starts. The stale monitor processes blocks during this sync (accepted risk: Blocktank is trusted, and the monitor's force-synced update_id makes it "valid" from LDK's perspective — the only concern is counterparty force-close during the offline period, which Blocktank wouldn't do). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 153ecbe commit e50791a

1 file changed

Lines changed: 52 additions & 19 deletions

File tree

src/lib.rs

Lines changed: 52 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,8 @@ impl Node {
671671
let channel_manager = Arc::clone(&self.channel_manager);
672672
let chain_monitor = Arc::clone(&self.chain_monitor);
673673
let keys_manager = Arc::clone(&self.keys_manager);
674+
let chain_source = Arc::clone(&self.chain_source);
675+
let sync_sweeper = Arc::clone(&self.output_sweeper);
674676
let heal_logger = Arc::clone(&self.logger);
675677
let mut stop_healing = self.stop_sender.subscribe();
676678
self.runtime.block_on(async move {
@@ -706,10 +708,39 @@ impl Node {
706708
_ = tokio::time::sleep(Duration::from_secs(5)) => {}
707709
}
708710

711+
// Sync chain tip before sending healing payments. This updates the
712+
// ChannelManager's best block height so the keysend HTLC gets a current
713+
// CLTV expiry. Without this, users offline >24h would get an already-expired
714+
// HTLC that triggers a force-close when chain sync later catches up.
715+
log_info!(heal_logger, "Stale monitor recovery: syncing chain tip...");
716+
let chain_synced = match chain_source
717+
.sync_lightning_wallet(
718+
Arc::clone(&channel_manager),
719+
Arc::clone(&chain_monitor),
720+
Arc::clone(&sync_sweeper),
721+
)
722+
.await
723+
{
724+
Ok(()) => {
725+
log_info!(heal_logger, "Stale monitor recovery: chain tip synced.");
726+
true
727+
},
728+
Err(e) => {
729+
log_error!(
730+
heal_logger,
731+
"Stale monitor recovery: chain sync failed: {}. \
732+
Skipping healing payments to avoid stale CLTV.",
733+
e
734+
);
735+
false
736+
},
737+
};
738+
709739
// Send 1-sat keysend payments to trigger commitment round-trips.
710740
// We use real payments (not probes) because LDK rejects single-hop probes.
711741
// The HTLC add/fail cycle triggers commitment_signed exchanges that heal
712742
// the monitor. Cost: 1 sat per counterparty if keysend succeeds.
743+
// Only send if chain sync succeeded — stale CLTV would force-close.
713744
let send_heal_payment = |node_id: bitcoin::secp256k1::PublicKey| {
714745
let payment_id = PaymentId(keys_manager.get_secure_random_bytes());
715746
let mut route_params = RouteParameters::from_payment_params_and_value(
@@ -728,29 +759,31 @@ impl Node {
728759
)
729760
};
730761

731-
// Send one healing payment per unhealed channel. Note: for multiple
732-
// channels with the same peer, the router may pick the same channel for
733-
// both payments. The retry loop gives multiple chances for the router to
734-
// select different channels as scores and capacity shift between attempts.
735-
for (_, counterparty_node_id, _) in &initial_update_ids {
736-
match send_heal_payment(*counterparty_node_id) {
737-
Ok(_) => {
738-
log_info!(
739-
heal_logger,
740-
"Stale monitor recovery: sent healing payment to {}",
741-
counterparty_node_id
742-
);
743-
},
744-
Err(e) => {
745-
log_error!(
762+
if chain_synced {
763+
// Send one healing payment per unhealed channel. Note: for multiple
764+
// channels with the same peer, the router may pick the same channel for
765+
// both payments. The retry loop gives multiple chances for the router to
766+
// select different channels as scores and capacity shift between attempts.
767+
for (_, counterparty_node_id, _) in &initial_update_ids {
768+
match send_heal_payment(*counterparty_node_id) {
769+
Ok(_) => {
770+
log_info!(
771+
heal_logger,
772+
"Stale monitor recovery: sent healing payment to {}",
773+
counterparty_node_id
774+
);
775+
},
776+
Err(e) => {
777+
log_error!(
746778
heal_logger,
747779
"Stale monitor recovery: failed to send healing payment to {}: {:?}",
748780
counterparty_node_id,
749781
e
750782
);
751-
},
783+
},
784+
}
752785
}
753-
}
786+
} // chain_synced
754787

755788
// Poll monitor update_ids until all have advanced (healed) or timeout.
756789
// Retry payments every 10s for channels that haven't healed yet (peer
@@ -804,8 +837,8 @@ impl Node {
804837
break;
805838
}
806839

807-
// Retry healing payments for each unhealed channel.
808-
if last_retry_time.elapsed() >= retry_interval {
840+
// Retry healing payments for each unhealed channel (only if chain synced).
841+
if chain_synced && last_retry_time.elapsed() >= retry_interval {
809842
last_retry_time = tokio::time::Instant::now();
810843
for (ch_id, counterparty_node_id, initial_id) in &initial_update_ids {
811844
let healed = chain_monitor

0 commit comments

Comments
 (0)