Merge pull request #4 from 1Cor125/add_global_db_registry

jjhafer · web-flow · commit f595d9d9bb21 · 2025-11-18T13:32:10.000-05:00
Add global db registry
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -41,10 +41,20 @@ jobs:
            run: npm ci
 
          - name: Run standards checks
-           run: npm run standards
-           env:
-              # For PRs: lint from base branch. For pushes: lint from previous commit
-              COMMITLINT_FROM: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || github.event.before }}
+           run: |
+             # Determine commitlint range
+             if [ "${{ github.event_name }}" = "pull_request" ]; then
+                export COMMITLINT_FROM="${{ github.event.pull_request.base.sha }}"
+             else
+                # For pushes, verify the before commit exists
+                if git cat-file -e "${{ github.event.before }}" 2>/dev/null; then
+                   export COMMITLINT_FROM="${{ github.event.before }}"
+                else
+                   # Fallback: check only HEAD commit on force push or first push
+                   export COMMITLINT_FROM="HEAD~1"
+                fi
+             fi
+             npm run standards
 
          - name: Run cargo check
            run: cargo check --workspace --all-targets
diff --git a/crates/sqlx-sqlite-conn-mgr/README.md b/crates/sqlx-sqlite-conn-mgr/README.md
@@ -12,12 +12,12 @@ management.
      Prevents violation of access policies and/or a glut of open file handles and
      (mostly) idle threads
    * **Connection pooling**:
-      * Read-only pool for concurrent reads (up to 6 connections)
+      * Read-only pool for concurrent reads (default: 6 connections, configurable)
    * **Lazy write pool**: Single write connection pool (max_connections=1) initialized on
      first use
    * **Exclusive write access**: WriteGuard ensures serialized writes
-   * **WAL mode**: Automatically enabled on first `acquire_writer()` call
-     (idempotent)
+   * **WAL mode**: Automatically enabled on first `acquire_writer()` call (setting
+     journal mode to WAL is safe and idempotent)
       * See [WAL documentation](https://www.sqlite.org/wal.html) for details
    * **30-second idle timeout**: Both read and write connections close after
      30 seconds of inactivity
@@ -44,18 +44,19 @@ use std::sync::Arc;
 #[tokio::main]
 async fn main() -> Result<(), sqlx_sqlite_conn_mgr::Error> {
     // Connect to database (creates if missing, returns Arc<SqliteDatabase>)
-    let db = SqliteDatabase::connect("example.db").await?;
+    // (See below for how to customize the configuration)
+    let db = SqliteDatabase::connect("example.db", None).await?;
 
     // Multiple connects to the same path return the same instance
-    let db2 = SqliteDatabase::connect("example.db").await?;
+    let db2 = SqliteDatabase::connect("example.db", None).await?;
     assert!(Arc::ptr_eq(&db, &db2));
 
-    // Use read_pool() for SELECT queries (supports concurrent reads)
+    // Use read_pool() for read queries (supports concurrent reads)
     let rows = query("SELECT * FROM users")
         .fetch_all(db.read_pool()?)
         .await?;
 
-    // Optionally acquire writer for INSERT/UPDATE/DELETE (exclusive access)
+    // Optionally acquire writer for write queries (exclusive access)
     // WAL mode is enabled automatically on first call
     let mut writer = db.acquire_writer().await?;
     query("INSERT INTO users (name) VALUES (?)")
@@ -70,12 +71,39 @@ async fn main() -> Result<(), sqlx_sqlite_conn_mgr::Error> {
 }
 ```
 
+### Custom Configuration
+
+Only customize the configuration when the defaults don't meet your requirements:
+
+```rust
+use sqlx_sqlite_conn_mgr::{SqliteDatabase, SqliteDatabaseConfig};
+use std::time::Duration;
+
+#[tokio::main]
+async fn main() -> Result<(), sqlx_sqlite_conn_mgr::Error> {
+    // Only create custom configuration when defaults aren't suitable
+    let custom_config = SqliteDatabaseConfig {
+        max_read_connections: 10,
+        idle_timeout: Duration::from_secs(60),
+    };
+
+    // Pass custom configuration to connect()
+    let db = SqliteDatabase::connect("example.db", Some(custom_config)).await?;
+
+    // Use the database as normal...
+    db.close().await?;
+    Ok(())
+}
+```
+
 ## API Overview
 
 ### `SqliteDatabase`
 
-   * `connect(path)` - Connect to a database (creates if missing, returns cached
-     `Arc<SqliteDatabase>` if already open)
+   * `connect(path, custom_config)` - Connect to a database (creates if missing,
+     returns cached `Arc<SqliteDatabase>` if already open). Pass `None` for
+     `custom_config` to use defaults (recommended for most use cases), or
+     `Some(SqliteDatabaseConfig)` when you need to customize the configuration
    * `read_pool()` - Get reference to the read-only connection pool for read
      operations (returns `Result`)
    * `acquire_writer()` - Acquire exclusive write access (returns
@@ -85,6 +113,15 @@ async fn main() -> Result<(), sqlx_sqlite_conn_mgr::Error> {
    * `close_and_remove()` - Close and delete all database files (.db, .db-wal,
      .db-shm)
 
+### `SqliteDatabaseConfig`
+
+Configuration for connection pool behavior:
+
+   * `max_read_connections: u32` - Maximum number of concurrent read connections
+     (default: 6)
+   * `idle_timeout: Duration` - How long idle connections remain open before
+     being closed (default: 30 seconds)
+
 ### `WriteGuard`
 
 RAII guard that provides exclusive write access. Automatically returns the
@@ -111,12 +148,11 @@ queries.
    is released via `WriteGuard` drop.
 
 5. **Connection Management**:
-   * Read pool: 6 concurrent connections by default, 0 cached
-      * Can be configured via `SqliteDatabaseConfig`
-   * Write pool: max 1 connection, 0 cached
-   * Idle timeout: 30 seconds for both pools
-      * Can be configured via `SqliteDatabaseConfig`
-   * No perpetual caching to minimize idle thread overhead
+   * Read pool: 6 concurrent connections by default (configurable via `custom_config`)
+   * Write pool: max 1 connection
+   * Minimum connections: 0 (no perpetual caching)
+   * Idle timeout: 30 seconds by default (configurable via `custom_config`)
+   * Only customize `SqliteDatabaseConfig` when defaults don't meet your needs
 
 ## Error Handling
 
diff --git a/crates/sqlx-sqlite-conn-mgr/src/error.rs b/crates/sqlx-sqlite-conn-mgr/src/error.rs
@@ -18,6 +18,3 @@ pub enum Error {
    #[error("Database has been closed")]
    DatabaseClosed,
 }
-
-/// A type alias for Results with our Error type
-pub type Result<T> = std::result::Result<T, Error>;
diff --git a/crates/sqlx-sqlite-conn-mgr/src/lib.rs b/crates/sqlx-sqlite-conn-mgr/src/lib.rs
@@ -12,22 +12,68 @@
 //!
 //! ## Architecture
 //!
-//! - **Dual pools**: Separate read-only pool (max 6 connections) and write pool (max 1 connection)
+//! - **Connection pooling**: Separate read-only pool and write pool with a max of 1 connection
 //! - **Lazy WAL mode**: Write-Ahead Logging enabled automatically on first write
 //! - **Exclusive writes**: Single-connection write pool enforces serialized write access
 //! - **Concurrent reads**: Multiple readers can query simultaneously via the read pool
-
-// TODO: Remove these allows once implementation is complete
-#![allow(dead_code)]
+//!
+//! ## Usage
+//!
+//! // TODO: Remove this ignore once implementation is complete
+//! ```ignore
+//! use sqlx_sqlite_conn_mgr::SqliteDatabase;
+//! use std::sync::Arc;
+//!
+//! #[tokio::main]
+//! async fn main() -> sqlx_sqlite_conn_mgr::Result<()> {
+//!     // Connect returns Arc<SqliteDatabase>
+//!     let db = SqliteDatabase::connect("example.db", None).await?;
+//!
+//!     // Multiple connects to the same path return the same instance
+//!     let db2 = SqliteDatabase::connect("example.db", None).await?;
+//!     assert!(Arc::ptr_eq(&db, &db2));
+//!
+//!     // Use read_pool() for read queries (concurrent reads)
+//!     let rows = sqlx::query("SELECT * FROM users")
+//!         .fetch_all(db.read_pool()?)
+//!         .await?;
+//!
+//!     // Optionally acquire writer for write queries (exclusive)
+//!     // WAL mode is enabled automatically on first call
+//!     let mut writer = db.acquire_writer().await?;
+//!     sqlx::query("INSERT INTO users (name) VALUES (?)")
+//!         .bind("Alice")
+//!         .execute(&mut *writer)
+//!         .await?;
+//!
+//!     // Close when done
+//!     db.close().await?;
+//!     Ok(())
+//! }
+//! ```
+//!
+//! ## Design Principles
+//!
+//! - Uses sqlx's `SqlitePoolOptions` for all pool configuration
+//! - Uses sqlx's `SqliteConnectOptions` for connection flags and configuration
+//! - Minimal custom logic - delegates to sqlx wherever possible
+//! - Global registry caches new database instances (with their pools) and returns existing ones
+//! - WAL mode is enabled lazily only when writes are needed
+//!
+// TODO: Remove this allow once implementation is complete
 #![allow(unused)]
 
 mod config;
 mod database;
 mod error;
+mod registry;
 mod write_guard;
 
 // Re-export public types
 pub use config::SqliteDatabaseConfig;
 pub use database::SqliteDatabase;
-pub use error::{Error, Result};
+pub use error::Error;
 pub use write_guard::WriteGuard;
+
+/// A type alias for Results with our custom Error type
+pub type Result<T> = std::result::Result<T, Error>;
diff --git a/crates/sqlx-sqlite-conn-mgr/src/registry.rs b/crates/sqlx-sqlite-conn-mgr/src/registry.rs
@@ -0,0 +1,170 @@
+//! Global database registry to cache new database instances and return existing ones
+
+use crate::Result;
+use crate::database::SqliteDatabase;
+use std::collections::HashMap;
+use std::future::Future;
+use std::path::{Path, PathBuf};
+use std::sync::{Arc, OnceLock, Weak};
+use tokio::sync::RwLock;
+
+/// Global registry for SQLite databases
+static DATABASE_REGISTRY: OnceLock<RwLock<HashMap<PathBuf, Weak<SqliteDatabase>>>> =
+   OnceLock::new();
+
+fn registry() -> &'static RwLock<HashMap<PathBuf, Weak<SqliteDatabase>>> {
+   DATABASE_REGISTRY.get_or_init(|| RwLock::new(HashMap::new()))
+}
+
+/// Check if a path represents an in-memory SQLite database
+///
+/// Returns true for `:memory:` and `file::memory:*` URIs
+pub fn is_memory_database(path: &Path) -> bool {
+   let path_str = path.to_str().unwrap_or("");
+   path_str == ":memory:"
+      || path_str.starts_with("file::memory:")
+      || path_str.contains("mode=memory")
+}
+
+/// Get or open a SQLite database connection
+///
+/// If a database is already connected, returns the cached instance.
+/// Otherwise, calls the provided factory function to create a new connection.
+///
+/// Special case: `:memory:` databases should not be cached (each is unique)
+pub async fn get_or_open_database<F, Fut>(path: &Path, factory: F) -> Result<Arc<SqliteDatabase>>
+where
+   F: FnOnce() -> Fut,
+   Fut: Future<Output = Result<SqliteDatabase>>,
+{
+   // Skip registry for in-memory databases - always create new
+   if is_memory_database(path) {
+      let db = factory().await?;
+      return Ok(Arc::new(db));
+   }
+
+   // Canonicalize the path for consistent lookups
+   let canonical_path = canonicalize_path(path)?;
+
+   // Try to get existing database with read lock (allows concurrent reads)
+   {
+      let registry = registry().read().await;
+
+      if let Some(weak) = registry.get(&canonical_path) {
+         if let Some(db) = weak.upgrade() {
+            return Ok(db);
+         }
+         // Weak reference exists but dead - will be cleaned up in write phase
+      }
+   }
+
+   // Phase 2: Database not found, acquire write lock
+   let mut registry = registry().write().await;
+
+   // Double-check: another thread might have created it while we waited for write lock
+   if let Some(weak) = registry.get(&canonical_path) {
+      if let Some(db) = weak.upgrade() {
+         return Ok(db);
+      }
+   }
+
+   // Clean up dead weak references while we have the write lock
+   registry.retain(|_, weak| weak.strong_count() > 0);
+
+   // Now we're sure the database doesn't exist - create it while holding the lock
+   // This prevents race conditions
+   let db = factory().await?;
+   let arc_db = Arc::new(db);
+
+   // Cache the new database
+   registry.insert(canonical_path, Arc::downgrade(&arc_db));
+
+   Ok(arc_db)
+}
+
+/// Helper to canonicalize a database path
+///
+/// This function attempts to resolve paths to their canonical form to ensure
+/// consistent cache lookups. It handles:
+/// - Absolute path resolution
+/// - Symlink resolution (when file exists)
+/// - Parent directory canonicalization (when file doesn't exist yet)
+///
+/// Known limitations when file doesn't exist:
+/// - Case sensitivity: On case-insensitive filesystems (macOS, Windows), paths
+///   differing only in case will be treated as different until the file is created.
+///   This could lead to multiple connection pools for the same logical database, at
+///   least until the file is created and can be canonicalized properly.
+/// - Symlinks in filename: If the filename itself will be a symlink (rare for SQLite),
+///   different symlink names won't be resolved until the file exists.
+fn canonicalize_path(path: &Path) -> std::io::Result<PathBuf> {
+   match path.canonicalize() {
+      Ok(p) => Ok(p),
+      Err(_) => {
+         // If path doesn't exist, try to canonicalize parent + filename
+         let parent = path.parent().unwrap_or_else(|| Path::new("."));
+         let filename = path
+            .file_name()
+            .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::InvalidInput, "Invalid path"))?;
+         let canonical_parent = parent.canonicalize()?;
+
+         // Note: We preserve the filename case as provided. On case-insensitive
+         // filesystems, this means "MyDB.db" and "mydb.db" will create separate
+         // cache entries until the file exists and can be canonicalized properly.
+         // This is a known limitation but acceptable since:
+         // 1. Most apps use consistent casing
+         // 2. After first connection creates the file, subsequent connects will
+         //    use the canonical (on-disk) case
+         Ok(canonical_parent.join(filename))
+      }
+   }
+}
+
+/// Remove a database from the cache
+///
+/// Special case: `:memory:` databases are never in the registry
+///
+/// Returns an error if the path cannot be canonicalized
+pub async fn uncache_database(path: &Path) -> std::io::Result<()> {
+   // Skip registry for in-memory databases
+   if is_memory_database(path) {
+      return Ok(());
+   }
+
+   // Canonicalize path
+   let canonical_path = canonicalize_path(path)?;
+
+   let mut registry = registry().write().await;
+   registry.remove(&canonical_path);
+   Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+   use super::*;
+
+   #[test]
+   fn test_canonicalize_path() {
+      let temp_dir = std::env::temp_dir();
+      let test_path = temp_dir.join("test.db");
+
+      // Test that path is canonicalized to absolute path
+      let canonical = canonicalize_path(&test_path).unwrap();
+      assert!(canonical.is_absolute());
+
+      // Test relative path
+      let relative_path = Path::new("./test_relative.db");
+      let canonical_relative = canonicalize_path(relative_path).unwrap();
+      assert!(canonical_relative.is_absolute());
+   }
+
+   #[test]
+   fn test_canonicalize_nonexistent_path() {
+      let temp_dir = std::env::temp_dir();
+      let nonexistent = temp_dir.join("nonexistent_dir").join("test.db");
+
+      // Should fail if parent directory doesn't exist
+      let result = canonicalize_path(&nonexistent);
+      assert!(result.is_err());
+   }
+}
diff --git a/crates/sqlx-sqlite-conn-mgr/src/write_guard.rs b/crates/sqlx-sqlite-conn-mgr/src/write_guard.rs

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,3 @@ pub enum Error {`
`18`	`18`	`#[error("Database has been closed")]`
`19`	`19`	`DatabaseClosed,`
`20`	`20`	`}`
`21`		`-`
`22`		`-/// A type alias for Results with our Error type`
`23`		`-pub type Result<T> = std::result::Result<T, Error>;`