Skip to content

Commit 226b86c

Browse files
committed
Introduce telemetry for observability
This introduces the foundational telemetry infrastructure to improve the observability of LDK Server. It adds a new `/metrics` endpoint exposed on the REST service address, which serves Prometheus-compatible metrics. This endpoint is public and does not require HMAC authentication, allowing for easy integration with monitoring systems. - Added a `Metrics` utility struct to hold all the metrics we need to expose. This is the first step in a larger effort to provide comprehensive telemetry. Future updates will expand this to include other detailed metrics for channels, balances, payments, etc.
1 parent 39d199b commit 226b86c

6 files changed

Lines changed: 356 additions & 10 deletions

File tree

ldk-server-client/src/client.rs

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ use ldk_server_protos::api::{
2929
use ldk_server_protos::endpoints::{
3030
BOLT11_RECEIVE_PATH, BOLT11_SEND_PATH, BOLT12_RECEIVE_PATH, BOLT12_SEND_PATH,
3131
CLOSE_CHANNEL_PATH, CONNECT_PEER_PATH, DISCONNECT_PEER_PATH, EXPORT_PATHFINDING_SCORES_PATH,
32-
FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH,
33-
LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, ONCHAIN_RECEIVE_PATH,
34-
ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH,
35-
SPONTANEOUS_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH,
32+
FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_METRICS_PATH, GET_NODE_INFO_PATH,
33+
GET_PAYMENT_DETAILS_PATH, LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH,
34+
ONCHAIN_RECEIVE_PATH, ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH,
35+
SPLICE_OUT_PATH, SPONTANEOUS_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH,
3636
};
3737
use ldk_server_protos::error::{ErrorCode, ErrorResponse};
3838
use prost::Message;
@@ -103,6 +103,28 @@ impl LdkServerClient {
103103
self.post_request(&request, &url).await
104104
}
105105

106+
/// Retrieve the node metrics in Prometheus format.
107+
pub async fn get_metrics(&self) -> Result<String, LdkServerError> {
108+
let url = format!("https://{}/{GET_METRICS_PATH}", self.base_url);
109+
let response = self.client.get(&url).send().await.map_err(|e| {
110+
LdkServerError::new(InternalError, format!("HTTP request failed: {}", e))
111+
})?;
112+
113+
let status = response.status();
114+
let text = response.text().await.map_err(|e| {
115+
LdkServerError::new(InternalError, format!("Failed to read response body: {}", e))
116+
})?;
117+
118+
if status.is_success() {
119+
Ok(text)
120+
} else {
121+
Err(LdkServerError::new(
122+
InternalError,
123+
format!("Request failed with status {}: {}", status, text),
124+
))
125+
}
126+
}
127+
106128
/// Retrieves an overview of all known balances.
107129
/// For API contract/usage, refer to docs for [`GetBalancesRequest`] and [`GetBalancesResponse`].
108130
pub async fn get_balances(

ldk-server-protos/src/endpoints.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,4 @@ pub const SPONTANEOUS_SEND_PATH: &str = "SpontaneousSend";
3131
pub const SIGN_MESSAGE_PATH: &str = "SignMessage";
3232
pub const VERIFY_SIGNATURE_PATH: &str = "VerifySignature";
3333
pub const EXPORT_PATHFINDING_SCORES_PATH: &str = "ExportPathfindingScores";
34+
pub const GET_METRICS_PATH: &str = "metrics";

ldk-server/src/main.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ use crate::io::persist::{
5050
use crate::service::NodeService;
5151
use crate::util::config::{load_config, ArgsConfig, ChainSource};
5252
use crate::util::logger::ServerLogger;
53+
use crate::util::metrics::{Metrics, BUILD_METRICS_INTERVAL};
5354
use crate::util::proto_adapter::{forwarded_payment_to_proto, payment_to_proto};
5455
use crate::util::tls::get_or_generate_tls_config;
5556

@@ -256,6 +257,20 @@ fn main() {
256257
}
257258
};
258259
let event_node = Arc::clone(&node);
260+
261+
let metrics_node = Arc::clone(&node);
262+
let mut interval = tokio::time::interval(BUILD_METRICS_INTERVAL);
263+
let metrics = Arc::new(Metrics::new());
264+
let metrics_bg = Arc::clone(&metrics);
265+
let event_metrics = Arc::clone(&metrics);
266+
267+
runtime.spawn(async move {
268+
loop {
269+
interval.tick().await;
270+
metrics_bg.update_all_pollable_metrics(&metrics_node);
271+
}
272+
});
273+
259274
let rest_svc_listener = TcpListener::bind(config_file.rest_service_addr)
260275
.await
261276
.expect("Failed to bind listening port");
@@ -320,6 +335,8 @@ fn main() {
320335
&event_node,
321336
Arc::clone(&event_publisher),
322337
Arc::clone(&paginated_store)).await;
338+
339+
event_metrics.update_total_successful_payments_count(&event_node);
323340
},
324341
Event::PaymentFailed {payment_id, ..} => {
325342
let payment_id = payment_id.expect("PaymentId expected for ldk-server >=0.1");
@@ -331,6 +348,8 @@ fn main() {
331348
&event_node,
332349
Arc::clone(&event_publisher),
333350
Arc::clone(&paginated_store)).await;
351+
352+
event_metrics.update_total_failed_payments_count(&event_node);
334353
},
335354
Event::PaymentClaimable {payment_id, ..} => {
336355
if let Some(payment_details) = event_node.payment(&payment_id) {
@@ -415,7 +434,7 @@ fn main() {
415434
res = rest_svc_listener.accept() => {
416435
match res {
417436
Ok((stream, _)) => {
418-
let node_service = NodeService::new(Arc::clone(&node), Arc::clone(&paginated_store), api_key.clone());
437+
let node_service = NodeService::new(Arc::clone(&node), Arc::clone(&paginated_store), api_key.clone(), Arc::clone(&metrics));
419438
let acceptor = tls_acceptor.clone();
420439
runtime.spawn(async move {
421440
match acceptor.accept(stream).await {

ldk-server/src/service.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ use ldk_node::Node;
2121
use ldk_server_protos::endpoints::{
2222
BOLT11_RECEIVE_PATH, BOLT11_SEND_PATH, BOLT12_RECEIVE_PATH, BOLT12_SEND_PATH,
2323
CLOSE_CHANNEL_PATH, CONNECT_PEER_PATH, DISCONNECT_PEER_PATH, EXPORT_PATHFINDING_SCORES_PATH,
24-
FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH,
25-
LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, ONCHAIN_RECEIVE_PATH,
26-
ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH,
27-
SPONTANEOUS_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH,
24+
FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_METRICS_PATH, GET_NODE_INFO_PATH,
25+
GET_PAYMENT_DETAILS_PATH, LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH,
26+
ONCHAIN_RECEIVE_PATH, ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH,
27+
SPLICE_OUT_PATH, SPONTANEOUS_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH,
2828
};
2929
use prost::Message;
3030

@@ -53,6 +53,7 @@ use crate::api::spontaneous_send::handle_spontaneous_send_request;
5353
use crate::api::update_channel_config::handle_update_channel_config_request;
5454
use crate::api::verify_signature::handle_verify_signature_request;
5555
use crate::io::persist::paginated_kv_store::PaginatedKVStore;
56+
use crate::util::metrics::Metrics;
5657
use crate::util::proto_adapter::to_error_response;
5758

5859
// Maximum request body size: 10 MB
@@ -64,13 +65,15 @@ pub struct NodeService {
6465
node: Arc<Node>,
6566
paginated_kv_store: Arc<dyn PaginatedKVStore>,
6667
api_key: String,
68+
metrics: Arc<Metrics>,
6769
}
6870

6971
impl NodeService {
7072
pub(crate) fn new(
7173
node: Arc<Node>, paginated_kv_store: Arc<dyn PaginatedKVStore>, api_key: String,
74+
metrics: Arc<Metrics>,
7275
) -> Self {
73-
Self { node, paginated_kv_store, api_key }
76+
Self { node, paginated_kv_store, api_key, metrics }
7477
}
7578
}
7679

@@ -154,6 +157,17 @@ impl Service<Request<Incoming>> for NodeService {
154157
type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send>>;
155158

156159
fn call(&self, req: Request<Incoming>) -> Self::Future {
160+
// Handle metrics endpoint separately to bypass auth and return plain text
161+
if req.uri().path().len() > 1 && &req.uri().path()[1..] == GET_METRICS_PATH {
162+
let metrics = Arc::clone(&self.metrics);
163+
return Box::pin(async move {
164+
Ok(Response::builder()
165+
.header("Content-Type", "text/plain")
166+
.body(Full::new(Bytes::from(metrics.gather_metrics())))
167+
.unwrap())
168+
});
169+
}
170+
157171
// Extract auth params from headers (validation happens after body is read)
158172
let auth_params = match extract_auth_params(&req) {
159173
Ok(params) => params,

0 commit comments

Comments
 (0)