Skip to content

Commit 051643a

Browse files
committed
faster caching
1 parent 2a14276 commit 051643a

5 files changed

Lines changed: 291 additions & 285 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ tokio = { version = "1", features = ["full"] }
2929
serde = { version = "1", features = ["derive"] }
3030
serde_json = "1"
3131
bincode = "1"
32+
zstd = "0.13"
3233

3334
[build-dependencies]
3435
lalrpop = "0.22"

src/build/cache.rs

Lines changed: 50 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@ use std::collections::{HashMap, HashSet, VecDeque};
77
use std::hash::{Hash, Hasher};
88
use std::io;
99
use std::path::Path;
10+
use std::sync::Arc;
11+
12+
use rayon::prelude::*;
1013

1114
use crate::typechecker::registry::ModuleExports;
1215

13-
use super::portable::{PModuleExports, PortableCacheFile, PortableCachedModule};
16+
use super::portable::{PModuleExports, PortableCacheFile, PortableCachedModule, StringTableBuilder, StringTableReader};
1417

1518
// ===== Module Cache =====
1619

@@ -27,6 +30,8 @@ pub struct ModuleCache {
2730
entries: HashMap<String, CachedModule>,
2831
/// Reverse dependency graph: module → modules that import it
2932
dependents: HashMap<String, Vec<String>>,
33+
/// Whether the cache has been modified since last save/load.
34+
dirty: bool,
3035
}
3136

3237
impl ModuleCache {
@@ -83,6 +88,7 @@ impl ModuleCache {
8388
exports,
8489
imports,
8590
});
91+
self.dirty = true;
8692
}
8793

8894
/// Build the reverse dependency graph from cached import data.
@@ -119,47 +125,74 @@ impl ModuleCache {
119125

120126
/// Remove modules that are no longer in the source set.
121127
pub fn retain_modules(&mut self, module_names: &HashSet<String>) {
128+
let before = self.entries.len();
122129
self.entries.retain(|k, _| module_names.contains(k));
130+
if self.entries.len() != before {
131+
self.dirty = true;
132+
}
133+
}
134+
135+
/// Returns true if the cache has been modified since load.
136+
pub fn is_dirty(&self) -> bool {
137+
self.dirty
123138
}
124139

125-
/// Save cache to disk using bincode serialization.
140+
/// Save cache to disk using bincode serialization with string table.
126141
pub fn save_to_disk(&self, path: &Path) -> io::Result<()> {
142+
if !self.dirty {
143+
log::debug!("Cache unchanged, skipping save");
144+
return Ok(());
145+
}
146+
let mut st = StringTableBuilder::new();
147+
148+
let modules = self.entries.iter().map(|(name, cached)| {
149+
(name.clone(), PortableCachedModule {
150+
content_hash: cached.content_hash,
151+
exports: PModuleExports::from_exports(&cached.exports, &mut st),
152+
imports: cached.imports.clone(),
153+
})
154+
}).collect();
155+
127156
let portable = PortableCacheFile {
128-
modules: self.entries.iter().map(|(name, cached)| {
129-
(name.clone(), PortableCachedModule {
130-
content_hash: cached.content_hash,
131-
exports: PModuleExports::from(&cached.exports),
132-
imports: cached.imports.clone(),
133-
})
134-
}).collect(),
157+
string_table: st.into_table(),
158+
modules,
135159
};
136160

137-
let encoded = bincode::serialize(&portable)
138-
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode serialize: {e}")))?;
139-
140161
if let Some(parent) = path.parent() {
141162
std::fs::create_dir_all(parent)?;
142163
}
143-
std::fs::write(path, encoded)
164+
let file = std::fs::File::create(path)?;
165+
let mut encoder = zstd::Encoder::new(file, 1)
166+
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd encoder: {e}")))?;
167+
bincode::serialize_into(&mut encoder, &portable)
168+
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode serialize: {e}")))?;
169+
encoder.finish()
170+
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd finish: {e}")))?;
171+
Ok(())
144172
}
145173

146174
/// Load cache from disk.
147175
pub fn load_from_disk(path: &Path) -> io::Result<Self> {
148-
let data = std::fs::read(path)?;
149-
let portable: PortableCacheFile = bincode::deserialize(&data)
176+
let file = std::fs::File::open(path)?;
177+
let decoder = io::BufReader::new(zstd::Decoder::new(file)
178+
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd decoder: {e}")))?);
179+
let portable: PortableCacheFile = bincode::deserialize_from(decoder)
150180
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode deserialize: {e}")))?;
151181

152-
let entries = portable.modules.into_iter().map(|(name, cached)| {
182+
let st = Arc::new(StringTableReader::new(portable.string_table));
183+
184+
let entries: HashMap<String, CachedModule> = portable.modules.into_par_iter().map(|(name, cached)| {
153185
(name, CachedModule {
154186
content_hash: cached.content_hash,
155-
exports: ModuleExports::from(cached.exports),
187+
exports: cached.exports.to_exports(&st),
156188
imports: cached.imports,
157189
})
158190
}).collect();
159191

160192
let mut cache = ModuleCache {
161193
entries,
162194
dependents: HashMap::new(),
195+
dirty: false,
163196
};
164197
cache.build_reverse_deps();
165198
Ok(cache)

0 commit comments

Comments
 (0)