Skip to content

Commit 432780c

Browse files
committed
Introduce telemetry for observability
This introduces the foundational telemetry infrastructure to improve the observability of LDK Server. It adds a new `/metrics` endpoint exposed on the REST service address, which serves Prometheus-compatible metrics. This endpoint is public and does not require HMAC authentication, allowing for easy integration with monitoring systems. - Added a `Metrics` utility struct to hold all the metrics we need to expose. This is the first step in a larger effort to provide comprehensive telemetry. Future updates will expand this to include other detailed metrics for channels, balances, payments, etc.
1 parent 9b33ccd commit 432780c

6 files changed

Lines changed: 389 additions & 30 deletions

File tree

ldk-server-client/src/client.rs

Lines changed: 57 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,15 @@ use ldk_server_protos::api::{
3131
use ldk_server_protos::endpoints::{
3232
BOLT11_RECEIVE_PATH, BOLT11_SEND_PATH, BOLT12_RECEIVE_PATH, BOLT12_SEND_PATH,
3333
CLOSE_CHANNEL_PATH, CONNECT_PEER_PATH, DISCONNECT_PEER_PATH, EXPORT_PATHFINDING_SCORES_PATH,
34-
FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH,
35-
GRAPH_GET_CHANNEL_PATH, GRAPH_GET_NODE_PATH, GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH,
36-
LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, ONCHAIN_RECEIVE_PATH,
37-
ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH,
38-
SPONTANEOUS_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH,
34+
FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_METRICS_PATH, GET_NODE_INFO_PATH,
35+
GET_PAYMENT_DETAILS_PATH, GRAPH_GET_CHANNEL_PATH, GRAPH_GET_NODE_PATH,
36+
GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, LIST_CHANNELS_PATH,
37+
LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, ONCHAIN_RECEIVE_PATH, ONCHAIN_SEND_PATH,
38+
OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH, SPONTANEOUS_SEND_PATH,
39+
UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH,
3940
};
4041
use ldk_server_protos::error::{ErrorCode, ErrorResponse};
42+
use prost::bytes::Bytes;
4143
use prost::Message;
4244
use reqwest::header::CONTENT_TYPE;
4345
use reqwest::{Certificate, Client};
@@ -61,6 +63,11 @@ pub struct LdkServerClient {
6163
api_key: String,
6264
}
6365

66+
enum RequestType {
67+
Get,
68+
Post,
69+
}
70+
6471
impl LdkServerClient {
6572
/// Constructs a [`LdkServerClient`] using `base_url` as the ldk-server endpoint.
6673
///
@@ -106,6 +113,18 @@ impl LdkServerClient {
106113
self.post_request(&request, &url).await
107114
}
108115

116+
/// Retrieve the node metrics in Prometheus format.
117+
pub async fn get_metrics(&self) -> Result<String, LdkServerError> {
118+
let url = format!("https://{}/{GET_METRICS_PATH}", self.base_url);
119+
let payload = self.make_request(&url, RequestType::Get, None, false).await?;
120+
String::from_utf8(payload.to_vec()).map_err(|e| {
121+
LdkServerError::new(
122+
InternalError,
123+
format!("Failed to decode metrics response as string: {}", e),
124+
)
125+
})
126+
}
127+
109128
/// Retrieves an overview of all known balances.
110129
/// For API contract/usage, refer to docs for [`GetBalancesRequest`] and [`GetBalancesResponse`].
111130
pub async fn get_balances(
@@ -353,31 +372,46 @@ impl LdkServerClient {
353372
&self, request: &Rq, url: &str,
354373
) -> Result<Rs, LdkServerError> {
355374
let request_body = request.encode_to_vec();
356-
let auth_header = self.compute_auth_header(&request_body);
357-
let response_raw = self
358-
.client
359-
.post(url)
360-
.header(CONTENT_TYPE, APPLICATION_OCTET_STREAM)
361-
.header("X-Auth", auth_header)
362-
.body(request_body)
363-
.send()
364-
.await
365-
.map_err(|e| {
366-
LdkServerError::new(InternalError, format!("HTTP request failed: {}", e))
367-
})?;
375+
let payload = self.make_request(url, RequestType::Post, Some(request_body), true).await?;
376+
Rs::decode(&payload[..]).map_err(|e| {
377+
LdkServerError::new(InternalError, format!("Failed to decode success response: {}", e))
378+
})
379+
}
380+
381+
async fn make_request(
382+
&self, url: &str, request_type: RequestType, body: Option<Vec<u8>>, authenticated: bool,
383+
) -> Result<Bytes, LdkServerError> {
384+
let builder = match request_type {
385+
RequestType::Get => self.client.get(url),
386+
RequestType::Post => self.client.post(url),
387+
};
388+
389+
let body_for_auth = body.as_deref().unwrap_or(&[]);
390+
391+
let builder = if authenticated {
392+
let auth_header = self.compute_auth_header(body_for_auth);
393+
builder.header("X-Auth", auth_header)
394+
} else {
395+
builder
396+
};
397+
398+
let builder = if let Some(body_content) = body {
399+
builder.header(CONTENT_TYPE, APPLICATION_OCTET_STREAM).body(body_content)
400+
} else {
401+
builder
402+
};
403+
404+
let response_raw = builder.send().await.map_err(|e| {
405+
LdkServerError::new(InternalError, format!("HTTP request failed: {}", e))
406+
})?;
368407

369408
let status = response_raw.status();
370409
let payload = response_raw.bytes().await.map_err(|e| {
371410
LdkServerError::new(InternalError, format!("Failed to read response body: {}", e))
372411
})?;
373412

374413
if status.is_success() {
375-
Ok(Rs::decode(&payload[..]).map_err(|e| {
376-
LdkServerError::new(
377-
InternalError,
378-
format!("Failed to decode success response: {}", e),
379-
)
380-
})?)
414+
Ok(payload)
381415
} else {
382416
let error_response = ErrorResponse::decode(&payload[..]).map_err(|e| {
383417
LdkServerError::new(

ldk-server-protos/src/endpoints.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,4 @@ pub const GRAPH_LIST_CHANNELS_PATH: &str = "GraphListChannels";
3535
pub const GRAPH_GET_CHANNEL_PATH: &str = "GraphGetChannel";
3636
pub const GRAPH_LIST_NODES_PATH: &str = "GraphListNodes";
3737
pub const GRAPH_GET_NODE_PATH: &str = "GraphGetNode";
38+
pub const GET_METRICS_PATH: &str = "metrics";

ldk-server/src/main.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ use crate::io::persist::{
5050
use crate::service::NodeService;
5151
use crate::util::config::{load_config, ArgsConfig, ChainSource};
5252
use crate::util::logger::ServerLogger;
53+
use crate::util::metrics::{Metrics, BUILD_METRICS_INTERVAL};
5354
use crate::util::proto_adapter::{forwarded_payment_to_proto, payment_to_proto};
5455
use crate::util::tls::get_or_generate_tls_config;
5556

@@ -256,6 +257,20 @@ fn main() {
256257
}
257258
};
258259
let event_node = Arc::clone(&node);
260+
261+
let metrics_node = Arc::clone(&node);
262+
let mut interval = tokio::time::interval(BUILD_METRICS_INTERVAL);
263+
let metrics = Arc::new(Metrics::new());
264+
let metrics_bg = Arc::clone(&metrics);
265+
let event_metrics = Arc::clone(&metrics);
266+
267+
runtime.spawn(async move {
268+
loop {
269+
interval.tick().await;
270+
metrics_bg.update_all_pollable_metrics(&metrics_node);
271+
}
272+
});
273+
259274
let rest_svc_listener = TcpListener::bind(config_file.rest_service_addr)
260275
.await
261276
.expect("Failed to bind listening port");
@@ -320,6 +335,8 @@ fn main() {
320335
&event_node,
321336
Arc::clone(&event_publisher),
322337
Arc::clone(&paginated_store)).await;
338+
339+
event_metrics.update_total_successful_payments_count(&event_node);
323340
},
324341
Event::PaymentFailed {payment_id, ..} => {
325342
let payment_id = payment_id.expect("PaymentId expected for ldk-server >=0.1");
@@ -331,6 +348,8 @@ fn main() {
331348
&event_node,
332349
Arc::clone(&event_publisher),
333350
Arc::clone(&paginated_store)).await;
351+
352+
event_metrics.update_total_failed_payments_count(&event_node);
334353
},
335354
Event::PaymentClaimable {payment_id, ..} => {
336355
if let Some(payment_details) = event_node.payment(&payment_id) {
@@ -415,7 +434,7 @@ fn main() {
415434
res = rest_svc_listener.accept() => {
416435
match res {
417436
Ok((stream, _)) => {
418-
let node_service = NodeService::new(Arc::clone(&node), Arc::clone(&paginated_store), api_key.clone());
437+
let node_service = NodeService::new(Arc::clone(&node), Arc::clone(&paginated_store), api_key.clone(), Arc::clone(&metrics));
419438
let acceptor = tls_acceptor.clone();
420439
runtime.spawn(async move {
421440
match acceptor.accept(stream).await {

ldk-server/src/service.rs

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,12 @@ use ldk_node::Node;
2121
use ldk_server_protos::endpoints::{
2222
BOLT11_RECEIVE_PATH, BOLT11_SEND_PATH, BOLT12_RECEIVE_PATH, BOLT12_SEND_PATH,
2323
CLOSE_CHANNEL_PATH, CONNECT_PEER_PATH, DISCONNECT_PEER_PATH, EXPORT_PATHFINDING_SCORES_PATH,
24-
FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH,
25-
GRAPH_GET_CHANNEL_PATH, GRAPH_GET_NODE_PATH, GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH,
26-
LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, ONCHAIN_RECEIVE_PATH,
27-
ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH,
28-
SPONTANEOUS_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH,
24+
FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_METRICS_PATH, GET_NODE_INFO_PATH,
25+
GET_PAYMENT_DETAILS_PATH, GRAPH_GET_CHANNEL_PATH, GRAPH_GET_NODE_PATH,
26+
GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, LIST_CHANNELS_PATH,
27+
LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, ONCHAIN_RECEIVE_PATH, ONCHAIN_SEND_PATH,
28+
OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH, SPONTANEOUS_SEND_PATH,
29+
UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH,
2930
};
3031
use prost::Message;
3132

@@ -58,6 +59,7 @@ use crate::api::spontaneous_send::handle_spontaneous_send_request;
5859
use crate::api::update_channel_config::handle_update_channel_config_request;
5960
use crate::api::verify_signature::handle_verify_signature_request;
6061
use crate::io::persist::paginated_kv_store::PaginatedKVStore;
62+
use crate::util::metrics::Metrics;
6163
use crate::util::proto_adapter::to_error_response;
6264

6365
// Maximum request body size: 10 MB
@@ -69,13 +71,15 @@ pub struct NodeService {
6971
node: Arc<Node>,
7072
paginated_kv_store: Arc<dyn PaginatedKVStore>,
7173
api_key: String,
74+
metrics: Arc<Metrics>,
7275
}
7376

7477
impl NodeService {
7578
pub(crate) fn new(
7679
node: Arc<Node>, paginated_kv_store: Arc<dyn PaginatedKVStore>, api_key: String,
80+
metrics: Arc<Metrics>,
7781
) -> Self {
78-
Self { node, paginated_kv_store, api_key }
82+
Self { node, paginated_kv_store, api_key, metrics }
7983
}
8084
}
8185

@@ -159,6 +163,17 @@ impl Service<Request<Incoming>> for NodeService {
159163
type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send>>;
160164

161165
fn call(&self, req: Request<Incoming>) -> Self::Future {
166+
// Handle metrics endpoint separately to bypass auth and return plain text
167+
if req.uri().path().len() > 1 && &req.uri().path()[1..] == GET_METRICS_PATH {
168+
let metrics = Arc::clone(&self.metrics);
169+
return Box::pin(async move {
170+
Ok(Response::builder()
171+
.header("Content-Type", "text/plain")
172+
.body(Full::new(Bytes::from(metrics.gather_metrics())))
173+
.unwrap())
174+
});
175+
}
176+
162177
// Extract auth params from headers (validation happens after body is read)
163178
let auth_params = match extract_auth_params(&req) {
164179
Ok(params) => params,

0 commit comments

Comments
 (0)