Skip to content

Commit a8f6452

Browse files
hyperpolymathclaude
andcommitted
Phase 1.4 COMPLETE: Crash recovery tests prove the full recovery chain
5 end-to-end crash recovery tests: 1. Single entity survives crash (create, drop, replay, verify) 2. Graceful shutdown + restart (create, shutdown, replay, verify) 3. 10 entities survive crash (batch create, drop, replay, verify all) 4. Delete survives crash (create, delete, drop, replay, verify gone) 5. Empty WAL = clean start (fresh WAL, replay, verify 0 recovered) Fixed WAL replay to use replay_all() instead of replay_from(last_checkpoint), because per-entity COMMITTED markers are not global checkpoints. PHASE 1 IS NOW COMPLETE: 1.1 ✓ Persistent backends (all 8 modalities via redb) 1.2 ✓ WAL replay at octad level 1.3 ✓ Graceful shutdown with final checkpoint 1.4 ✓ Crash recovery tests (5 tests, all passing) VeriSimDB can now survive crashes and restart with data intact. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c954a8b commit a8f6452

4 files changed

Lines changed: 169 additions & 12 deletions

File tree

verisimdb/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

verisimdb/rust-core/verisim-octad/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,4 @@ uuid.workspace = true
3030

3131
[dev-dependencies]
3232
proptest.workspace = true
33+
tempfile = "3"

verisimdb/rust-core/verisim-octad/src/store.rs

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -224,20 +224,15 @@ where
224224
}
225225
};
226226

227-
// Find the last checkpoint to determine replay start point
228-
let checkpoint_seq = reader.find_last_checkpoint().map_err(|e| {
229-
OctadError::ModalityError {
230-
modality: "wal".to_string(),
231-
message: format!("Failed to find checkpoint: {e}"),
232-
}
233-
})?;
234-
235-
let start_seq = checkpoint_seq.unwrap_or(0);
236-
info!(start_seq, "Replaying WAL from sequence {}", start_seq);
227+
// Replay ALL WAL entries to rebuild the complete octad registry.
228+
// We replay from sequence 0 because per-entity "COMMITTED" markers
229+
// are not global checkpoints — each entity has its own commit marker.
230+
// A global checkpoint (from graceful_shutdown) would allow starting
231+
// from a later point, but for correctness we always replay everything.
232+
info!("Replaying WAL from beginning");
237233

238-
// Read all entries from the checkpoint onward
239234
let entries: Vec<WalEntry> = reader
240-
.replay_from(start_seq)
235+
.replay_all()
241236
.map_err(|e| OctadError::ModalityError {
242237
modality: "wal".to_string(),
243238
message: format!("WAL replay_from failed: {e}"),
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
// SPDX-License-Identifier: PMPL-1.0-or-later
2+
// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
3+
//
4+
// Crash recovery integration tests for VeriSimDB Phase 1.4.
5+
6+
use std::collections::HashMap;
7+
use std::sync::Arc;
8+
9+
use verisim_octad::{
10+
InMemoryOctadStore, OctadConfig, OctadInput, OctadDocumentInput,
11+
OctadSnapshot, OctadStore,
12+
};
13+
use verisim_document::TantivyDocumentStore;
14+
use verisim_graph::SimpleGraphStore;
15+
use verisim_semantic::InMemorySemanticStore;
16+
use verisim_temporal::InMemoryVersionStore;
17+
use verisim_tensor::InMemoryTensorStore;
18+
use verisim_vector::{BruteForceVectorStore, DistanceMetric};
19+
20+
type TestStore = InMemoryOctadStore<
21+
SimpleGraphStore,
22+
BruteForceVectorStore,
23+
TantivyDocumentStore,
24+
InMemoryTensorStore,
25+
InMemorySemanticStore,
26+
InMemoryVersionStore<OctadSnapshot>,
27+
verisim_provenance::InMemoryProvenanceStore,
28+
verisim_spatial::InMemorySpatialStore,
29+
>;
30+
31+
fn create_store(wal_dir: &str) -> TestStore {
32+
let config = OctadConfig::default();
33+
InMemoryOctadStore::new(
34+
config,
35+
Arc::new(SimpleGraphStore::new()),
36+
Arc::new(BruteForceVectorStore::new(3, DistanceMetric::Cosine)),
37+
Arc::new(TantivyDocumentStore::in_memory().unwrap()),
38+
Arc::new(InMemoryTensorStore::new()),
39+
Arc::new(InMemorySemanticStore::new()),
40+
Arc::new(InMemoryVersionStore::new()),
41+
Arc::new(verisim_provenance::InMemoryProvenanceStore::new()),
42+
Arc::new(verisim_spatial::InMemorySpatialStore::new()),
43+
)
44+
.with_wal(wal_dir, verisim_wal::SyncMode::Fsync)
45+
.expect("WAL init")
46+
}
47+
48+
fn doc(title: &str, body: &str) -> OctadInput {
49+
OctadInput {
50+
document: Some(OctadDocumentInput {
51+
title: title.into(),
52+
body: body.into(),
53+
fields: HashMap::new(),
54+
}),
55+
..Default::default()
56+
}
57+
}
58+
59+
#[tokio::test]
60+
async fn crash_recovery_single_entity() {
61+
let dir = tempfile::tempdir().unwrap();
62+
let wal = dir.path().join("wal");
63+
std::fs::create_dir_all(&wal).unwrap();
64+
65+
let entity_id;
66+
{
67+
let store = create_store(wal.to_str().unwrap());
68+
let octad = store.create(doc("Test", "Survives crash")).await.unwrap();
69+
entity_id = octad.id;
70+
// Crash — no graceful_shutdown
71+
}
72+
73+
{
74+
let store = create_store(wal.to_str().unwrap());
75+
let n: usize = store.replay_wal(&wal).await.unwrap();
76+
assert!(n > 0, "Should recover entity");
77+
assert!(store.get(&entity_id).await.unwrap().is_some());
78+
}
79+
}
80+
81+
#[tokio::test]
82+
async fn graceful_shutdown_then_restart() {
83+
let dir = tempfile::tempdir().unwrap();
84+
let wal = dir.path().join("wal");
85+
std::fs::create_dir_all(&wal).unwrap();
86+
87+
let entity_id;
88+
{
89+
let store = create_store(wal.to_str().unwrap());
90+
let octad = store.create(doc("Graceful", "Clean")).await.unwrap();
91+
entity_id = octad.id;
92+
store.graceful_shutdown().await.unwrap();
93+
}
94+
95+
{
96+
let store = create_store(wal.to_str().unwrap());
97+
let n: usize = store.replay_wal(&wal).await.unwrap();
98+
assert!(n > 0);
99+
assert!(store.get(&entity_id).await.unwrap().is_some());
100+
}
101+
}
102+
103+
#[tokio::test]
104+
async fn ten_entities_survive_crash() {
105+
let dir = tempfile::tempdir().unwrap();
106+
let wal = dir.path().join("wal");
107+
std::fs::create_dir_all(&wal).unwrap();
108+
109+
let mut ids = Vec::new();
110+
{
111+
let store = create_store(wal.to_str().unwrap());
112+
for i in 0..10 {
113+
let octad = store.create(doc(&format!("E{i}"), &format!("B{i}"))).await.unwrap();
114+
ids.push(octad.id);
115+
}
116+
// Crash
117+
}
118+
119+
{
120+
let store = create_store(wal.to_str().unwrap());
121+
let n: usize = store.replay_wal(&wal).await.unwrap();
122+
assert_eq!(n, 10);
123+
for id in &ids {
124+
assert!(store.get(id).await.unwrap().is_some(), "{id} missing");
125+
}
126+
}
127+
}
128+
129+
#[tokio::test]
130+
async fn delete_survives_crash() {
131+
let dir = tempfile::tempdir().unwrap();
132+
let wal = dir.path().join("wal");
133+
std::fs::create_dir_all(&wal).unwrap();
134+
135+
let entity_id;
136+
{
137+
let store = create_store(wal.to_str().unwrap());
138+
let octad = store.create(doc("Delete Me", "Gone")).await.unwrap();
139+
entity_id = octad.id;
140+
store.delete(&entity_id).await.unwrap();
141+
// Crash
142+
}
143+
144+
{
145+
let store = create_store(wal.to_str().unwrap());
146+
let _n: usize = store.replay_wal(&wal).await.unwrap();
147+
assert!(store.get(&entity_id).await.unwrap().is_none(), "Should stay deleted");
148+
}
149+
}
150+
151+
#[tokio::test]
152+
async fn empty_wal_clean_start() {
153+
let dir = tempfile::tempdir().unwrap();
154+
let wal = dir.path().join("wal");
155+
std::fs::create_dir_all(&wal).unwrap();
156+
157+
let store = create_store(wal.to_str().unwrap());
158+
let n: usize = store.replay_wal(&wal).await.unwrap();
159+
assert_eq!(n, 0);
160+
}

0 commit comments

Comments
 (0)