Skip to content

Commit 2477c0b

Browse files
committed
Fix #43 by invalidating the cache on a new connection
1 parent 29d9e11 commit 2477c0b

4 files changed

Lines changed: 39 additions & 16 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ edition = "2024"
55
license = "LGPL-2.0-or-later"
66
name = "sqlite-zstd"
77
repository = "https://github.com/phiresky/sqlite-zstd"
8-
version = "0.3.2"
8+
version = "0.3.3"
99
readme = "README.md"
1010
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
1111

src/dict_management.rs

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,50 @@
11
use anyhow::Context as AContext;
2+
use lru_time_cache::LruCache;
3+
use rusqlite::Connection;
24
use rusqlite::{functions::Context, params};
3-
use std::sync::{Arc, RwLock};
5+
use std::sync::LazyLock;
6+
use std::sync::{Arc, Mutex};
47
use std::time::Duration;
58

69
use zstd::dict::{DecoderDictionary, EncoderDictionary};
710

11+
// we cache the instantiated encoder dictionaries keyed by (DbConnection, dict_id, compression_level)
12+
// DbConnection would ideally be db.path() because it's the same for multiple connections to the same db, but that would be less robust (e.g. in-memory databases)
13+
// we use a Mutex and not a RwLock because even the .get() methods on LruCache need to write (to update expiry and least recently used time)
14+
static ENCODER_DICTS: LazyLock<
15+
Mutex<LruCache<(usize, i32, i32), Arc<EncoderDictionary<'static>>>>,
16+
> = LazyLock::new(|| Mutex::new(LruCache::with_expiry_duration(Duration::from_secs(10))));
17+
18+
static DECODER_DICTS: LazyLock<Mutex<LruCache<(usize, i32), Arc<DecoderDictionary<'static>>>>> =
19+
LazyLock::new(|| Mutex::new(LruCache::with_expiry_duration(Duration::from_secs(10))));
20+
21+
/// when we open a new connection, it may reuse the same pointer location as an old connection, so we need to invalidate parts of the dict cache
22+
pub(crate) fn invalidate_caches(_db: &Connection) {
23+
// (theoretically we only need to clear caches with key db_handle_pointer but it likely doesn't matter much,
24+
// how often are you going to open a new connection?)
25+
// let db_handle_pointer = unsafe { db.handle() } as usize;
26+
log::debug!("Invalidating dict caches");
27+
{
28+
let mut cache = ENCODER_DICTS.lock().unwrap();
29+
cache.clear();
30+
}
31+
{
32+
let mut cache = DECODER_DICTS.lock().unwrap();
33+
cache.clear();
34+
}
35+
}
836
// TODO: the rust interface currently requires a level when preparing a dictionary, but the zstd interface (ZSTD_CCtx_loadDictionary) does not.
937
// TODO: Using LruCache here isn't very smart
1038
pub fn encoder_dict_from_ctx(
1139
ctx: &Context,
1240
arg_index: usize,
1341
level: i32,
1442
) -> anyhow::Result<Arc<EncoderDictionary<'static>>> {
15-
use lru_time_cache::LruCache;
16-
// we cache the instantiated encoder dictionaries keyed by (DbConnection, dict_id, compression_level)
17-
// DbConnection would ideally be db.path() because it's the same for multiple connections to the same db, but that would be less robust (e.g. in-memory databases)
18-
lazy_static::lazy_static! {
19-
static ref DICTS: RwLock<LruCache<(usize, i32, i32), Arc<EncoderDictionary<'static>>>> = RwLock::new(LruCache::with_expiry_duration(Duration::from_secs(10)));
20-
}
2143
let id: i32 = ctx.get(arg_index)?;
2244
let db = unsafe { ctx.get_connection()? }; // SAFETY: This might be unsafe depending on how the connection is used. See https://github.com/rusqlite/rusqlite/issues/643#issuecomment-640181213
2345
let db_handle_pointer = unsafe { db.handle() } as usize; // SAFETY: We're only getting the pointer as an int, not using the raw connection
2446

25-
let mut dicts_write = DICTS.write().unwrap();
47+
let mut dicts_write = ENCODER_DICTS.lock().unwrap();
2648
let entry = dicts_write.entry((db_handle_pointer, id, level));
2749
let res = match entry {
2850
lru_time_cache::Entry::Vacant(e) => e.insert({
@@ -52,17 +74,17 @@ pub fn decoder_dict_from_ctx(
5274
ctx: &Context,
5375
arg_index: usize,
5476
) -> anyhow::Result<Arc<DecoderDictionary<'static>>> {
55-
use lru_time_cache::LruCache;
5677
// we cache the instantiated decoder dictionaries keyed by (DbConnection, dict_id)
5778
// DbConnection would ideally be db.path() because it's the same for multiple connections to the same db, but that would be less robust (e.g. in-memory databases)
58-
lazy_static::lazy_static! {
59-
static ref DICTS: RwLock<LruCache<(usize, i32), Arc<DecoderDictionary<'static>>>> = RwLock::new(LruCache::with_expiry_duration(Duration::from_secs(10)));
60-
}
6179
let id: i32 = ctx.get(arg_index)?;
6280
let db = unsafe { ctx.get_connection()? }; // SAFETY: This might be unsafe depending on how the connection is used. See https://github.com/rusqlite/rusqlite/issues/643#issuecomment-640181213
6381
let db_handle_pointer = unsafe { db.handle() } as usize; // SAFETY: We're only getting the pointer as an int, not using the raw connection
64-
let mut dicts_write = DICTS.write().unwrap();
65-
let entry = dicts_write.entry((db_handle_pointer, id));
82+
log::trace!("Using DB Handle pointer {db_handle_pointer} as cache key");
83+
let cache_key = (db_handle_pointer, id);
84+
// since the get() function on lru cache also writes (updates last used time and expiry),
85+
// we can not use DICTS.read() (RwLock) for perf
86+
let mut dicts_write = DECODER_DICTS.lock().unwrap();
87+
let entry = dicts_write.entry(cache_key);
6688
let res = match entry {
6789
lru_time_cache::Entry::Vacant(e) => e.insert({
6890
log::debug!(

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,6 @@ pub fn load_with_loglevel(
2626
default_log_level: LogLevel,
2727
) -> anyhow::Result<()> {
2828
init_logging(default_log_level);
29+
crate::dict_management::invalidate_caches(connection);
2930
crate::add_functions::add_functions(connection)
3031
}

0 commit comments

Comments
 (0)