Skip to content

Commit c954a8b

Browse files
hyperpolymathclaude
andcommitted
Phase 1.3: Graceful shutdown with WAL checkpoint flush
Add graceful_shutdown() method to InMemoryOctadStore that writes a final WAL checkpoint and logs entity count before exit. Wire into both serve() and serve_tls(): - tokio::signal handles Ctrl+C (SIGINT) and SIGTERM (Unix) - axum::serve with_graceful_shutdown stops accepting new connections - octad_store.graceful_shutdown() writes final WAL checkpoint - TLS server uses axum_server::Handle for coordinated shutdown This ensures clean WAL state on normal exit, reducing WAL replay work on next startup and preventing false "uncommitted operation" warnings. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9b78d6f commit c954a8b

2 files changed

Lines changed: 89 additions & 3 deletions

File tree

verisimdb/rust-core/verisim-api/src/lib.rs

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,23 +1701,68 @@ async fn proof_generate_with_circuit_handler(
17011701
}
17021702
}
17031703

1704-
/// Start the API server (plain HTTP)
1704+
/// Start the API server (plain HTTP) with graceful shutdown.
1705+
///
1706+
/// On SIGINT (Ctrl+C) or SIGTERM, the server:
1707+
/// 1. Stops accepting new connections
1708+
/// 2. Writes a final WAL checkpoint
1709+
/// 3. Logs shutdown metrics
1710+
/// 4. Exits cleanly
17051711
pub async fn serve(config: ApiConfig) -> Result<(), std::io::Error> {
17061712
let state = AppState::new_async(config.clone())
17071713
.await
17081714
.map_err(|e| std::io::Error::other(e.to_string()))?;
1715+
1716+
// Keep a reference to the octad store for shutdown
1717+
let octad_store = state.octad_store.clone();
1718+
17091719
let app = build_router(state);
17101720

17111721
let addr = format!("{}:{}", config.host, config.port);
17121722
info!("Starting VeriSimDB API server on {}", addr);
17131723

17141724
let listener = TcpListener::bind(&addr).await?;
1715-
axum::serve(listener, app).await?;
1725+
1726+
// Serve with graceful shutdown on Ctrl+C / SIGTERM
1727+
axum::serve(listener, app)
1728+
.with_graceful_shutdown(shutdown_signal())
1729+
.await?;
1730+
1731+
// After server stops accepting connections, perform clean shutdown
1732+
info!("VeriSimDB: server stopped, flushing WAL...");
1733+
if let Err(e) = octad_store.graceful_shutdown().await {
1734+
tracing::warn!("Graceful shutdown error (non-fatal): {e}");
1735+
}
17161736

17171737
Ok(())
17181738
}
17191739

1720-
/// Start the API server with TLS (HTTPS)
1740+
/// Wait for a shutdown signal (Ctrl+C or SIGTERM).
1741+
async fn shutdown_signal() {
1742+
let ctrl_c = async {
1743+
tokio::signal::ctrl_c()
1744+
.await
1745+
.expect("Failed to install Ctrl+C handler");
1746+
};
1747+
1748+
#[cfg(unix)]
1749+
let terminate = async {
1750+
tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
1751+
.expect("Failed to install SIGTERM handler")
1752+
.recv()
1753+
.await;
1754+
};
1755+
1756+
#[cfg(not(unix))]
1757+
let terminate = std::future::pending::<()>();
1758+
1759+
tokio::select! {
1760+
_ = ctrl_c => info!("Received Ctrl+C, initiating graceful shutdown"),
1761+
_ = terminate => info!("Received SIGTERM, initiating graceful shutdown"),
1762+
}
1763+
}
1764+
1765+
/// Start the API server with TLS (HTTPS) and graceful shutdown.
17211766
pub async fn serve_tls(
17221767
config: ApiConfig,
17231768
cert_path: &str,
@@ -1728,6 +1773,8 @@ pub async fn serve_tls(
17281773
let state = AppState::new_async(config.clone())
17291774
.await
17301775
.map_err(|e| std::io::Error::other(e.to_string()))?;
1776+
1777+
let octad_store = state.octad_store.clone();
17311778
let app = build_router(state);
17321779

17331780
let addr = format!("{}:{}", config.host, config.port);
@@ -1741,10 +1788,26 @@ pub async fn serve_tls(
17411788
.parse()
17421789
.map_err(|e: std::net::AddrParseError| std::io::Error::other(e.to_string()))?;
17431790

1791+
let handle = axum_server::Handle::new();
1792+
let shutdown_handle = handle.clone();
1793+
1794+
// Spawn shutdown listener
1795+
tokio::spawn(async move {
1796+
shutdown_signal().await;
1797+
shutdown_handle.graceful_shutdown(Some(std::time::Duration::from_secs(10)));
1798+
});
1799+
17441800
axum_server::bind_rustls(addr, tls_config)
1801+
.handle(handle)
17451802
.serve(app.into_make_service())
17461803
.await?;
17471804

1805+
// After server stops, perform clean shutdown
1806+
info!("VeriSimDB TLS: server stopped, flushing WAL...");
1807+
if let Err(e) = octad_store.graceful_shutdown().await {
1808+
tracing::warn!("Graceful shutdown error (non-fatal): {e}");
1809+
}
1810+
17481811
Ok(())
17491812
}
17501813

verisimdb/rust-core/verisim-octad/src/store.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,29 @@ where
346346
Ok(recovered)
347347
}
348348

349+
/// Perform a graceful shutdown: write a final WAL checkpoint and log metrics.
350+
///
351+
/// Call this before process exit to ensure all in-flight operations are
352+
/// checkpointed. Persistent modality stores (redb) flush automatically on
353+
/// drop, but the WAL needs an explicit final checkpoint to mark the clean
354+
/// shutdown boundary.
355+
pub async fn graceful_shutdown(&self) -> Result<(), OctadError> {
356+
info!("VeriSimDB: graceful shutdown initiated");
357+
358+
// Write final WAL checkpoint
359+
self.wal_checkpoint().await?;
360+
361+
// Log final state
362+
let octads = self.octads.read().await;
363+
info!(
364+
entity_count = octads.len(),
365+
"VeriSimDB: shutdown complete — {} entities checkpointed",
366+
octads.len()
367+
);
368+
369+
Ok(())
370+
}
371+
349372
/// Access the provenance store for direct queries.
350373
pub fn provenance_store(&self) -> &Arc<P> {
351374
&self.provenance

0 commit comments

Comments
 (0)