diff --git a/Cargo.lock b/Cargo.lock
index 89d20bdf647..aa077526a4b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4808,6 +4808,7 @@ dependencies = [
  "lance-encoding",
  "lance-file",
  "lance-geo",
+ "lance-index-core",
  "lance-io",
  "lance-linalg",
  "lance-select",
@@ -4840,6 +4841,33 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "lance-index-core"
+version = "8.1.0-beta.0"
+dependencies = [
+ "arrow-array",
+ "arrow-schema",
+ "arrow-select",
+ "async-recursion",
+ "async-trait",
+ "bytes",
+ "datafusion",
+ "datafusion-common",
+ "datafusion-expr",
+ "futures",
+ "lance-core",
+ "lance-derive",
+ "lance-io",
+ "lance-select",
+ "prost",
+ "prost-types",
+ "roaring",
+ "serde",
+ "serde_json",
+ "tokio",
+ "tracing",
+]
+
 [[package]]
 name = "lance-io"
 version = "8.1.0-beta.0"
@@ -5038,6 +5066,7 @@ dependencies = [
  "lance-core",
  "lance-datagen",
  "lance-file",
+ "lance-index-core",
  "lance-io",
  "lance-select",
  "lance-testing",
diff --git a/Cargo.toml b/Cargo.toml
index f902f10496b..00c19cdf7b8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,6 +9,7 @@ members = [
     "rust/lance-file",
     "rust/lance-geo",
     "rust/lance-index",
+    "rust/lance-index-core",
     "rust/lance-io",
     "rust/lance-linalg",
     "rust/lance-namespace",
@@ -67,6 +68,7 @@ lance-encoding = { version = "=8.1.0-beta.0", path = "./rust/lance-encoding" }
 lance-file = { version = "=8.1.0-beta.0", path = "./rust/lance-file" }
 lance-geo = { version = "=8.1.0-beta.0", path = "./rust/lance-geo" }
 lance-index = { version = "=8.1.0-beta.0", path = "./rust/lance-index" }
+lance-index-core = { version = "=8.1.0-beta.0", path = "./rust/lance-index-core" }
 lance-io = { version = "=8.1.0-beta.0", path = "./rust/lance-io", default-features = false }
 lance-linalg = { version = "=8.1.0-beta.0", path = "./rust/lance-linalg" }
 lance-namespace = { version = "=8.1.0-beta.0", path = "./rust/lance-namespace" }
diff --git a/rust/lance-index-core/Cargo.toml b/rust/lance-index-core/Cargo.toml
new file mode 100644
index 00000000000..6d87c63e293
--- /dev/null
+++ b/rust/lance-index-core/Cargo.toml
@@ -0,0 +1,38 @@
+[package]
+name = "lance-index-core"
+version.workspace = true
+edition.workspace = true
+authors.workspace = true
+license.workspace = true
+repository.workspace = true
+readme = "README.md"
+description = "Lance index core traits and abstract types"
+keywords.workspace = true
+categories.workspace = true
+rust-version.workspace = true
+
+[dependencies]
+async-recursion.workspace = true
+async-trait.workspace = true
+arrow-array.workspace = true
+arrow-schema.workspace = true
+arrow-select.workspace = true
+bytes.workspace = true
+datafusion.workspace = true
+datafusion-common.workspace = true
+datafusion-expr.workspace = true
+futures.workspace = true
+lance-core.workspace = true
+lance-derive.workspace = true
+lance-io = { workspace = true }
+lance-select.workspace = true
+prost.workspace = true
+prost-types.workspace = true
+roaring.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+tokio.workspace = true
+tracing.workspace = true
+
+[lints]
+workspace = true
diff --git a/rust/lance-index-core/src/index.rs b/rust/lance-index-core/src/index.rs
new file mode 100644
index 00000000000..ebae4bd90ea
--- /dev/null
+++ b/rust/lance-index-core/src/index.rs
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+use std::{any::Any, sync::Arc};
+
+use async_trait::async_trait;
+use lance_core::Result;
+use lance_core::deepsize::DeepSizeOf;
+use roaring::RoaringBitmap;
+use serde::{Deserialize, Serialize};
+
+/// Generic methods common across all types of secondary indices
+#[async_trait]
+pub trait Index: Send + Sync + DeepSizeOf {
+    /// Cast to [Any].
+    fn as_any(&self) -> &dyn Any;
+
+    /// Cast to [Index]
+    fn as_index(self: Arc<Self>) -> Arc<dyn Index>;
+
+    /// Retrieve index statistics as a JSON Value
+    fn statistics(&self) -> Result<serde_json::Value>;
+
+    /// Prewarm the index.
+    ///
+    /// This will load the index into memory and cache it.
+    async fn prewarm(&self) -> Result<()>;
+
+    /// Get the type of the index
+    fn index_type(&self) -> IndexType;
+
+    /// Read through the index and determine which fragment ids are covered by the index
+    ///
+    /// This is a kind of slow operation.  It's better to use the fragment_bitmap.  This
+    /// only exists for cases where the fragment_bitmap has become corrupted or missing.
+    async fn calculate_included_frags(&self) -> Result<RoaringBitmap>;
+}
+
+/// Index Type
+#[derive(Debug, PartialEq, Eq, Copy, Hash, Clone, DeepSizeOf)]
+pub enum IndexType {
+    // Preserve 0-100 for simple indices.
+    Scalar = 0, // Legacy scalar index, alias to BTree
+
+    BTree = 1, // BTree
+
+    Bitmap = 2, // Bitmap
+
+    LabelList = 3, // LabelList
+
+    Inverted = 4, // Inverted
+
+    NGram = 5, // NGram
+
+    FragmentReuse = 6,
+
+    MemWal = 7,
+
+    ZoneMap = 8, // ZoneMap
+
+    BloomFilter = 9, // Bloom filter
+
+    RTree = 10, // RTree
+
+    FMIndex = 11, // FM-Index
+
+    // 100+ and up for vector index.
+    /// Flat vector index.
+    Vector = 100, // Legacy vector index, alias to IvfPq
+    IvfFlat = 101,
+    IvfSq = 102,
+    IvfPq = 103,
+    IvfHnswSq = 104,
+    IvfHnswPq = 105,
+    IvfHnswFlat = 106,
+    IvfRq = 107,
+}
+
+impl std::fmt::Display for IndexType {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        match self {
+            Self::Scalar | Self::BTree => write!(f, "BTree"),
+            Self::Bitmap => write!(f, "Bitmap"),
+            Self::LabelList => write!(f, "LabelList"),
+            Self::Inverted => write!(f, "Inverted"),
+            Self::NGram => write!(f, "NGram"),
+            Self::FragmentReuse => write!(f, "FragmentReuse"),
+            Self::MemWal => write!(f, "MemWal"),
+            Self::ZoneMap => write!(f, "ZoneMap"),
+            Self::BloomFilter => write!(f, "BloomFilter"),
+            Self::RTree => write!(f, "RTree"),
+            Self::FMIndex => write!(f, "FMIndex"),
+            Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
+            Self::IvfFlat => write!(f, "IVF_FLAT"),
+            Self::IvfSq => write!(f, "IVF_SQ"),
+            Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
+            Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
+            Self::IvfHnswFlat => write!(f, "IVF_HNSW_FLAT"),
+            Self::IvfRq => write!(f, "IVF_RQ"),
+        }
+    }
+}
+
+use lance_core::Error;
+
+impl TryFrom<i32> for IndexType {
+    type Error = Error;
+
+    fn try_from(value: i32) -> Result<Self> {
+        match value {
+            v if v == Self::Scalar as i32 => Ok(Self::Scalar),
+            v if v == Self::BTree as i32 => Ok(Self::BTree),
+            v if v == Self::Bitmap as i32 => Ok(Self::Bitmap),
+            v if v == Self::LabelList as i32 => Ok(Self::LabelList),
+            v if v == Self::NGram as i32 => Ok(Self::NGram),
+            v if v == Self::Inverted as i32 => Ok(Self::Inverted),
+            v if v == Self::FragmentReuse as i32 => Ok(Self::FragmentReuse),
+            v if v == Self::MemWal as i32 => Ok(Self::MemWal),
+            v if v == Self::ZoneMap as i32 => Ok(Self::ZoneMap),
+            v if v == Self::BloomFilter as i32 => Ok(Self::BloomFilter),
+            v if v == Self::RTree as i32 => Ok(Self::RTree),
+            v if v == Self::FMIndex as i32 => Ok(Self::FMIndex),
+            v if v == Self::Vector as i32 => Ok(Self::Vector),
+            v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
+            v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
+            v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
+            v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
+            v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
+            v if v == Self::IvfHnswFlat as i32 => Ok(Self::IvfHnswFlat),
+            v if v == Self::IvfRq as i32 => Ok(Self::IvfRq),
+            _ => Err(Error::invalid_input_source(
+                format!("the input value {} is not a valid IndexType", value).into(),
+            )),
+        }
+    }
+}
+
+impl TryFrom<&str> for IndexType {
+    type Error = Error;
+
+    fn try_from(value: &str) -> Result<Self> {
+        match value {
+            "BTree" | "BTREE" => Ok(Self::BTree),
+            "Bitmap" | "BITMAP" => Ok(Self::Bitmap),
+            "LabelList" | "LABELLIST" => Ok(Self::LabelList),
+            "Inverted" | "INVERTED" => Ok(Self::Inverted),
+            "NGram" | "NGRAM" => Ok(Self::NGram),
+            "ZoneMap" | "ZONEMAP" => Ok(Self::ZoneMap),
+            "BloomFilter" | "BLOOMFILTER" | "BLOOM_FILTER" => Ok(Self::BloomFilter),
+            "RTree" | "RTREE" | "R_TREE" => Ok(Self::RTree),
+            "FMIndex" | "FMINDEX" | "FM_INDEX" => Ok(Self::FMIndex),
+            "Vector" | "VECTOR" => Ok(Self::Vector),
+            "IVF_FLAT" => Ok(Self::IvfFlat),
+            "IVF_SQ" => Ok(Self::IvfSq),
+            "IVF_PQ" => Ok(Self::IvfPq),
+            "IVF_RQ" => Ok(Self::IvfRq),
+            "IVF_HNSW_FLAT" => Ok(Self::IvfHnswFlat),
+            "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
+            "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
+            "FragmentReuse" => Ok(Self::FragmentReuse),
+            "MemWal" => Ok(Self::MemWal),
+            _ => Err(Error::invalid_input(format!(
+                "invalid index type: {}",
+                value
+            ))),
+        }
+    }
+}
+
+impl IndexType {
+    pub fn is_scalar(&self) -> bool {
+        matches!(
+            self,
+            Self::Scalar
+                | Self::BTree
+                | Self::Bitmap
+                | Self::LabelList
+                | Self::Inverted
+                | Self::NGram
+                | Self::ZoneMap
+                | Self::BloomFilter
+                | Self::RTree
+                | Self::FMIndex,
+        )
+    }
+
+    pub fn is_vector(&self) -> bool {
+        matches!(
+            self,
+            Self::Vector
+                | Self::IvfPq
+                | Self::IvfHnswSq
+                | Self::IvfHnswPq
+                | Self::IvfHnswFlat
+                | Self::IvfFlat
+                | Self::IvfSq
+                | Self::IvfRq
+        )
+    }
+
+    pub fn is_system(&self) -> bool {
+        matches!(self, Self::FragmentReuse | Self::MemWal)
+    }
+
+    /// Returns the current format version of the index type.
+    ///
+    /// Indices with a higher version than this will be ignored for compatibility.
+    /// This can happen when creating an index in a newer version of Lance but
+    /// opening it in an older version.
+    pub fn version(&self) -> i32 {
+        match self {
+            Self::Scalar => 0,
+            Self::BTree => 0,
+            Self::Bitmap => 0,
+            Self::LabelList => 0,
+            Self::Inverted => 0,
+            Self::NGram => 0,
+            Self::FragmentReuse => 0,
+            Self::MemWal => 0,
+            Self::ZoneMap => 0,
+            Self::BloomFilter => 0,
+            Self::RTree => 0,
+            Self::FMIndex => 0,
+
+            // IMPORTANT: if any vector index subtype needs a format bump that is
+            // not backward compatible, its new version must be set to
+            // (current max vector index version + 1), even if only one subtype
+            // changed. Compatibility filtering currently cannot distinguish vector
+            // subtypes from details-only metadata, so vector versions effectively
+            // share one global monotonic compatibility level.
+            Self::Vector
+            | Self::IvfFlat
+            | Self::IvfSq
+            | Self::IvfPq
+            | Self::IvfHnswSq
+            | Self::IvfHnswPq
+            | Self::IvfHnswFlat => VECTOR_INDEX_VERSION as i32,
+            Self::IvfRq => IVF_RQ_INDEX_VERSION as i32,
+        }
+    }
+
+    /// Returns the target partition size for the index type.
+    ///
+    /// Optimized for the best performance of vector indices.
+    pub fn target_partition_size(&self) -> usize {
+        match self {
+            Self::Vector => 8192,
+            Self::IvfFlat => 4096,
+            Self::IvfSq => 8192,
+            Self::IvfPq => 8192,
+            Self::IvfRq => 4096,
+            Self::IvfHnswFlat => 1 << 20,
+            Self::IvfHnswSq => 1 << 20,
+            Self::IvfHnswPq => 1 << 20,
+            _ => 8192,
+        }
+    }
+
+    /// Returns the highest supported vector index version in this Lance build.
+    pub fn max_vector_version() -> u32 {
+        [
+            Self::Vector,
+            Self::IvfFlat,
+            Self::IvfSq,
+            Self::IvfPq,
+            Self::IvfHnswSq,
+            Self::IvfHnswPq,
+            Self::IvfHnswFlat,
+            Self::IvfRq,
+        ]
+        .into_iter()
+        .map(|index_type| index_type.version() as u32)
+        .max()
+        .unwrap_or(VECTOR_INDEX_VERSION)
+    }
+}
+
+/// Default version for vector index metadata.
+///
+/// Most vector indices should use this version unless they need to bump for a
+/// format change.
+pub const VECTOR_INDEX_VERSION: u32 = 1;
+/// Version for IVF_RQ indices.
+pub const IVF_RQ_INDEX_VERSION: u32 = 2;
+
+pub trait IndexParams: Send + Sync {
+    fn as_any(&self) -> &dyn Any;
+
+    fn index_name(&self) -> &str;
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct IndexMetadata {
+    #[serde(rename = "type")]
+    pub index_type: String,
+    pub distance_type: String,
+}
diff --git a/rust/lance-index-core/src/lib.rs b/rust/lance-index-core/src/lib.rs
new file mode 100644
index 00000000000..7c93aad5753
--- /dev/null
+++ b/rust/lance-index-core/src/lib.rs
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+pub mod index;
+pub mod metrics;
+pub mod progress;
+pub mod registry;
+pub mod row_id_remap;
+pub mod scalar;
+
+pub use index::{
+    IVF_RQ_INDEX_VERSION, Index, IndexMetadata, IndexParams, IndexType, VECTOR_INDEX_VERSION,
+};
+pub use metrics::{LocalMetricsCollector, MetricsCollector, NoOpMetricsCollector};
+pub use progress::{IndexBuildProgress, NoopIndexBuildProgress, noop_progress};
+pub use registry::{IndexPluginRegistry, PluginRegistry};
+pub use row_id_remap::RowIdRemapper;
+pub use scalar::ScalarIndex;
+pub use scalar::registry::ScalarIndexPlugin;
diff --git a/rust/lance-index-core/src/metrics.rs b/rust/lance-index-core/src/metrics.rs
new file mode 100644
index 00000000000..37e2c43d196
--- /dev/null
+++ b/rust/lance-index-core/src/metrics.rs
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+/// A trait used by the index to report metrics
+///
+/// Callers can implement this trait to collect metrics
+pub trait MetricsCollector: Send + Sync {
+    /// Record partition loads
+    ///
+    /// Many indices consist of partitions that may need to be loaded
+    /// into cache.  For example, an inverted index or ngram index has a
+    /// posting list for each token.
+    ///
+    /// In the ideal case, these shards are in the cache and will not need
+    /// to be loaded from disk.  This method should not be called if the
+    /// shard is in the cache.
+    fn record_parts_loaded(&self, num_parts: usize);
+
+    /// Record a shard load
+    fn record_part_load(&self) {
+        self.record_parts_loaded(1);
+    }
+
+    /// Record an index load
+    ///
+    /// This should be called when a scalar index is loaded from storage.
+    /// It should not be called if the index is already in memory.
+    fn record_index_loads(&self, num_indexes: usize);
+
+    /// Record an index load
+    fn record_index_load(&self) {
+        self.record_index_loads(1);
+    }
+
+    /// Record the number of "comparisons" made by the index
+    ///
+    /// What exactly constitutes a comparison depends on the index type.
+    /// For example, a B-tree index may make comparisons while searching for a value.
+    /// On the other hand, a bitmap index makes comparisons when computing the intersection
+    /// of two bitmaps.
+    ///
+    /// The goal is to provide some visibility into the compute cost of the search
+    fn record_comparisons(&self, num_comparisons: usize);
+
+    /// Returns an optional sink for recording exact I/O statistics (bytes read,
+    /// IOPS, and requests) performed on behalf of this collector.
+    ///
+    /// Index implementations that read from a
+    /// [`lance_io::scheduler::ScanScheduler`] can attach the returned handle to
+    /// their file readers so the I/O performed for a single query is measured
+    /// and attributed here.  The default returns `None`, meaning the caller does
+    /// not want I/O measured (and index implementations should then take their
+    /// normal, uninstrumented read path).
+    fn io_stats(&self) -> Option<lance_io::scheduler::IoStats> {
+        None
+    }
+}
+
+/// A no-op metrics collector that does nothing
+pub struct NoOpMetricsCollector;
+
+impl MetricsCollector for NoOpMetricsCollector {
+    fn record_parts_loaded(&self, _num_parts: usize) {}
+    fn record_index_loads(&self, _num_indexes: usize) {}
+    fn record_comparisons(&self, _num_comparisons: usize) {}
+}
+
+#[derive(Default)]
+pub struct LocalMetricsCollector {
+    pub parts_loaded: AtomicUsize,
+    pub index_loads: AtomicUsize,
+    pub comparisons: AtomicUsize,
+}
+
+impl LocalMetricsCollector {
+    pub fn dump_into(self, other: &dyn MetricsCollector) {
+        other.record_parts_loaded(self.parts_loaded.load(Ordering::Relaxed));
+        other.record_index_loads(self.index_loads.load(Ordering::Relaxed));
+        other.record_comparisons(self.comparisons.load(Ordering::Relaxed));
+    }
+}
+
+impl MetricsCollector for LocalMetricsCollector {
+    fn record_parts_loaded(&self, num_parts: usize) {
+        self.parts_loaded.fetch_add(num_parts, Ordering::Relaxed);
+    }
+
+    fn record_index_loads(&self, num_indexes: usize) {
+        self.index_loads.fetch_add(num_indexes, Ordering::Relaxed);
+    }
+
+    fn record_comparisons(&self, num_comparisons: usize) {
+        self.comparisons
+            .fetch_add(num_comparisons, Ordering::Relaxed);
+    }
+}
diff --git a/rust/lance-index-core/src/progress.rs b/rust/lance-index-core/src/progress.rs
new file mode 100644
index 00000000000..b02ec4280ba
--- /dev/null
+++ b/rust/lance-index-core/src/progress.rs
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+use async_trait::async_trait;
+use lance_core::Result;
+use std::sync::Arc;
+
+/// Progress callback for index building and distributed index finalization.
+///
+/// Called at stage boundaries during index construction. For a single logical stream, stages are
+/// sequential: `stage_complete` is always called before the next `stage_start`, so only one stage
+/// is active at a time. Callers that orchestrate independent sub-builds in parallel may prefix
+/// stage names (for example `segment_plan[0]/merge_partitions`) to represent separate logical
+/// streams. Stage names are index-type-specific (e.g. "train_ivf", "shuffle", "merge_partitions"
+/// for vector indices; "load_data", "build_pages" for scalar indices; merge/finalization stages
+/// for distributed index construction).
+///
+/// Methods take `&self` to allow concurrent calls from within a single stage. Implementations
+/// must be thread-safe.
+#[async_trait]
+pub trait IndexBuildProgress: std::fmt::Debug + Sync + Send {
+    /// A named stage has started.
+    ///
+    /// `total` is the number of work units if known, and `unit` describes
+    /// what is being counted (e.g. "partitions", "batches", "rows").
+    async fn stage_start(&self, stage: &str, total: Option<u64>, unit: &str) -> Result<()>;
+
+    /// Progress within the current stage.
+    async fn stage_progress(&self, stage: &str, completed: u64) -> Result<()>;
+
+    /// A named stage has completed.
+    async fn stage_complete(&self, stage: &str) -> Result<()>;
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct NoopIndexBuildProgress;
+
+#[async_trait]
+impl IndexBuildProgress for NoopIndexBuildProgress {
+    async fn stage_start(&self, _: &str, _: Option<u64>, _: &str) -> Result<()> {
+        Ok(())
+    }
+    async fn stage_progress(&self, _: &str, _: u64) -> Result<()> {
+        Ok(())
+    }
+    async fn stage_complete(&self, _: &str) -> Result<()> {
+        Ok(())
+    }
+}
+
+/// Helper to create a default noop progress instance.
+pub fn noop_progress() -> Arc<dyn IndexBuildProgress> {
+    Arc::new(NoopIndexBuildProgress)
+}
diff --git a/rust/lance-index-core/src/registry.rs b/rust/lance-index-core/src/registry.rs
new file mode 100644
index 00000000000..1744865e6e9
--- /dev/null
+++ b/rust/lance-index-core/src/registry.rs
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+use std::collections::HashMap;
+
+use lance_core::{Error, Result};
+
+use crate::scalar::registry::ScalarIndexPlugin;
+
+/// Abstract interface for a registry of index plugins.
+///
+/// This trait is used by [`ScalarIndexPlugin::attach_registry`] to give plugins
+/// a handle to the registry so they can look up other plugins (e.g., to delegate
+/// loading to another plugin).
+pub trait PluginRegistry: Send + Sync {
+    fn get_plugin_by_name(&self, name: &str) -> Result<&dyn ScalarIndexPlugin>;
+    fn get_plugin_by_details(&self, details: &prost_types::Any) -> Result<&dyn ScalarIndexPlugin>;
+}
+
+/// A registry of index plugins
+pub struct IndexPluginRegistry {
+    plugins: HashMap<String, Box<dyn ScalarIndexPlugin>>,
+}
+
+impl IndexPluginRegistry {
+    /// Create an empty registry.
+    pub fn new() -> Self {
+        Self {
+            plugins: HashMap::new(),
+        }
+    }
+
+    fn normalize_plugin_name(name: &str) -> String {
+        name.to_lowercase()
+    }
+
+    fn get_plugin_name_from_details_name(&self, details_name: &str) -> String {
+        let details_name = Self::normalize_plugin_name(details_name);
+        if details_name.ends_with("indexdetails") {
+            details_name.replace("indexdetails", "")
+        } else {
+            details_name
+        }
+    }
+
+    /// Adds a plugin to the registry, using the name of the details message to determine
+    /// the plugin name.
+    ///
+    /// The plugin name will be the lowercased name of the details message with any trailing
+    /// "indexdetails" removed.
+    ///
+    /// For example, if the details message is `BTreeIndexDetails`, the plugin name will be
+    /// `btree`.
+    pub fn add_plugin<
+        DetailsType: prost::Message + prost::Name,
+        PluginType: ScalarIndexPlugin + std::default::Default + 'static,
+    >(
+        &mut self,
+    ) {
+        let plugin_name = self.get_plugin_name_from_details_name(DetailsType::NAME);
+        self.plugins
+            .insert(plugin_name, Box::new(PluginType::default()));
+    }
+
+    /// Get an index plugin suitable for training an index with the given parameters
+    pub fn get_plugin_by_name(&self, name: &str) -> Result<&dyn ScalarIndexPlugin> {
+        let plugin_name = Self::normalize_plugin_name(name);
+        self.plugins
+            .get(&plugin_name)
+            .map(|plugin| plugin.as_ref())
+            .ok_or_else(|| {
+                let hint = if plugin_name == "rtree" {
+                    ". The 'rtree' index requires the `geo` feature. \
+                     Rebuild with `--features geo` to enable geospatial support"
+                } else {
+                    ""
+                };
+                Error::invalid_input_source(
+                    format!("No scalar index plugin found for name '{name}'{hint}").into(),
+                )
+            })
+    }
+
+    pub fn get_plugin_by_details(
+        &self,
+        details: &prost_types::Any,
+    ) -> Result<&dyn ScalarIndexPlugin> {
+        let details_name = details.type_url.split('.').next_back().unwrap();
+        let plugin_name = self.get_plugin_name_from_details_name(details_name);
+        self.get_plugin_by_name(&plugin_name)
+    }
+
+    /// Call a closure for each registered plugin
+    pub fn for_each_plugin(&self, mut f: impl FnMut(&dyn ScalarIndexPlugin)) {
+        for plugin in self.plugins.values() {
+            f(plugin.as_ref());
+        }
+    }
+}
+
+impl Default for IndexPluginRegistry {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl PluginRegistry for IndexPluginRegistry {
+    fn get_plugin_by_name(&self, name: &str) -> Result<&dyn ScalarIndexPlugin> {
+        Self::get_plugin_by_name(self, name)
+    }
+
+    fn get_plugin_by_details(&self, details: &prost_types::Any) -> Result<&dyn ScalarIndexPlugin> {
+        Self::get_plugin_by_details(self, details)
+    }
+}
diff --git a/rust/lance-index-core/src/row_id_remap.rs b/rust/lance-index-core/src/row_id_remap.rs
new file mode 100644
index 00000000000..1161fc6c0bb
--- /dev/null
+++ b/rust/lance-index-core/src/row_id_remap.rs
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+use arrow_array::RecordBatch;
+use lance_core::Result;
+use lance_select::RowAddrTreeMap;
+use roaring::RoaringTreemap;
+
+/// Trait for remapping row IDs at index load time.
+///
+/// When fragments are compacted after an index is built, the row IDs stored
+/// in that index become stale. Implementors of this trait know how to map
+/// an old row ID to the current row ID (or `None` if the row was deleted).
+///
+/// This is injected into index loading so that indices can update their
+/// in-memory state without being rebuilt.
+pub trait RowIdRemapper: Send + Sync + std::fmt::Debug {
+    fn remap_row_id(&self, row_id: u64) -> Option<u64>;
+    fn remap_row_addrs_tree_map(&self, row_addrs: &RowAddrTreeMap) -> RowAddrTreeMap;
+    fn remap_row_ids_roaring_tree_map(&self, row_ids: &RoaringTreemap) -> RoaringTreemap;
+    fn remap_row_ids_record_batch(
+        &self,
+        batch: RecordBatch,
+        row_id_idx: usize,
+    ) -> Result<RecordBatch>;
+}
diff --git a/rust/lance-index-core/src/scalar/expression.rs b/rust/lance-index-core/src/scalar/expression.rs
new file mode 100644
index 00000000000..fea2c4c98d6
--- /dev/null
+++ b/rust/lance-index-core/src/scalar/expression.rs
@@ -0,0 +1,935 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+use std::{ops::Bound, sync::Arc};
+
+use arrow_schema::{DataType, Field};
+use async_recursion::async_recursion;
+use async_trait::async_trait;
+use datafusion_common::ScalarValue;
+use datafusion_expr::{Expr, Operator, ReturnFieldArgs, ScalarUDF, expr::Like};
+use tokio::try_join;
+
+use lance_core::{Error, Result};
+use lance_select::{IndexExprResult, NullableIndexExprResult, NullableRowAddrMask};
+use roaring::RoaringBitmap;
+use tracing::instrument;
+
+use crate::metrics::MetricsCollector;
+use crate::scalar::{AnyQuery, ScalarIndex, SearchResult};
+
+const MAX_DEPTH: usize = 500;
+
+/// An indexed expression consists of a scalar index query with a post-scan filter
+///
+/// When a user wants to filter the data returned by a scan we may be able to use
+/// one or more scalar indices to reduce the amount of data we load from the disk.
+#[derive(Debug, PartialEq)]
+pub struct IndexedExpression {
+    /// The portion of the query that can be satisfied by scalar indices
+    pub scalar_query: Option<ScalarIndexExpr>,
+    /// The portion of the query that cannot be satisfied by scalar indices
+    pub refine_expr: Option<Expr>,
+}
+
+impl IndexedExpression {
+    /// Create an expression that only does refine
+    pub fn refine_only(refine_expr: Expr) -> Self {
+        Self {
+            scalar_query: None,
+            refine_expr: Some(refine_expr),
+        }
+    }
+
+    /// Create an expression that is only an index query
+    pub fn index_query(
+        column: String,
+        index_name: String,
+        index_type: String,
+        query: Arc<dyn AnyQuery>,
+    ) -> Self {
+        Self {
+            scalar_query: Some(ScalarIndexExpr::Query(ScalarIndexSearch {
+                column,
+                index_name,
+                index_type,
+                query,
+                needs_recheck: false,
+                fragment_bitmap: None,
+            })),
+            refine_expr: None,
+        }
+    }
+
+    /// Create an expression that is only an index query with explicit needs_recheck
+    pub fn index_query_with_recheck(
+        column: String,
+        index_name: String,
+        index_type: String,
+        query: Arc<dyn AnyQuery>,
+        needs_recheck: bool,
+    ) -> Self {
+        Self {
+            scalar_query: Some(ScalarIndexExpr::Query(ScalarIndexSearch {
+                column,
+                index_name,
+                index_type,
+                query,
+                needs_recheck,
+                fragment_bitmap: None,
+            })),
+            refine_expr: None,
+        }
+    }
+
+    /// Try and negate the expression
+    ///
+    /// If the expression contains both an index query and a refine expression then it
+    /// cannot be negated today and None will be returned (we give up trying to use indices)
+    pub fn maybe_not(self) -> Option<Self> {
+        match (self.scalar_query, self.refine_expr) {
+            (Some(_), Some(_)) => None,
+            (Some(scalar_query), None) => {
+                if scalar_query.needs_recheck() {
+                    return None;
+                }
+                Some(Self {
+                    scalar_query: Some(ScalarIndexExpr::Not(Box::new(scalar_query))),
+                    refine_expr: None,
+                })
+            }
+            (None, Some(refine_expr)) => Some(Self {
+                scalar_query: None,
+                refine_expr: Some(Expr::Not(Box::new(refine_expr))),
+            }),
+            (None, None) => panic!("Empty node should not occur"),
+        }
+    }
+
+    /// Perform a logical AND of two indexed expressions
+    pub fn and(self, other: Self) -> Self {
+        let scalar_query = match (self.scalar_query, other.scalar_query) {
+            (Some(scalar_query), Some(other_scalar_query)) => Some(ScalarIndexExpr::And(
+                Box::new(scalar_query),
+                Box::new(other_scalar_query),
+            )),
+            (Some(scalar_query), None) => Some(scalar_query),
+            (None, Some(scalar_query)) => Some(scalar_query),
+            (None, None) => None,
+        };
+        let refine_expr = match (self.refine_expr, other.refine_expr) {
+            (Some(refine_expr), Some(other_refine_expr)) => {
+                Some(refine_expr.and(other_refine_expr))
+            }
+            (Some(refine_expr), None) => Some(refine_expr),
+            (None, Some(refine_expr)) => Some(refine_expr),
+            (None, None) => None,
+        };
+        Self {
+            scalar_query,
+            refine_expr,
+        }
+    }
+
+    /// Try and perform a logical OR of two indexed expressions
+    pub fn maybe_or(self, other: Self) -> Option<Self> {
+        let scalar_query = self.scalar_query?;
+        let other_scalar_query = other.scalar_query?;
+        let scalar_query = Some(ScalarIndexExpr::Or(
+            Box::new(scalar_query),
+            Box::new(other_scalar_query),
+        ));
+
+        let refine_expr = match (self.refine_expr, other.refine_expr) {
+            (Some(_), Some(_)) | (Some(_), None) | (None, Some(_)) => {
+                return None;
+            }
+            (None, None) => None,
+        };
+        Some(Self {
+            scalar_query,
+            refine_expr,
+        })
+    }
+
+    pub fn refine(self, expr: Expr) -> Self {
+        match self.refine_expr {
+            Some(refine_expr) => Self {
+                scalar_query: self.scalar_query,
+                refine_expr: Some(refine_expr.and(expr)),
+            },
+            None => Self {
+                scalar_query: self.scalar_query,
+                refine_expr: Some(expr),
+            },
+        }
+    }
+}
+
+pub trait ScalarQueryParser: std::fmt::Debug + Send + Sync {
+    /// Visit a between expression
+    fn visit_between(
+        &self,
+        column: &str,
+        low: &Bound<ScalarValue>,
+        high: &Bound<ScalarValue>,
+    ) -> Option<IndexedExpression>;
+    /// Visit an in list expression
+    fn visit_in_list(&self, column: &str, in_list: &[ScalarValue]) -> Option<IndexedExpression>;
+    /// Visit an is bool expression
+    fn visit_is_bool(&self, column: &str, value: bool) -> Option<IndexedExpression>;
+    /// Visit an is null expression
+    fn visit_is_null(&self, column: &str) -> Option<IndexedExpression>;
+    /// Visit a comparison expression
+    fn visit_comparison(
+        &self,
+        column: &str,
+        value: &ScalarValue,
+        op: &Operator,
+    ) -> Option<IndexedExpression>;
+    /// Visit a scalar function expression
+    fn visit_scalar_function(
+        &self,
+        column: &str,
+        data_type: &DataType,
+        func: &ScalarUDF,
+        args: &[Expr],
+    ) -> Option<IndexedExpression>;
+
+    /// Visit a LIKE expression
+    fn visit_like(
+        &self,
+        _column: &str,
+        _like: &Like,
+        _pattern: &ScalarValue,
+    ) -> Option<IndexedExpression> {
+        None
+    }
+
+    /// Visits a potential reference to a column
+    fn is_valid_reference(&self, func: &Expr, data_type: &DataType) -> Option<DataType> {
+        match func {
+            Expr::Column(_) => Some(data_type.clone()),
+            _ => None,
+        }
+    }
+}
+
+/// A generic parser that wraps multiple scalar query parsers
+///
+/// It will search each parser in order and return the first non-None result
+#[derive(Debug)]
+pub struct MultiQueryParser {
+    parsers: Vec<Box<dyn ScalarQueryParser>>,
+}
+
+impl MultiQueryParser {
+    /// Create a new MultiQueryParser with a single parser
+    pub fn single(parser: Box<dyn ScalarQueryParser>) -> Self {
+        Self {
+            parsers: vec![parser],
+        }
+    }
+
+    /// Add a new parser to the MultiQueryParser
+    pub fn add(&mut self, other: Box<dyn ScalarQueryParser>) {
+        self.parsers.push(other);
+    }
+
+    /// Pick the first underlying parser whose `is_valid_reference` accepts `expr`.
+    pub fn select(
+        &self,
+        expr: &Expr,
+        data_type: &DataType,
+    ) -> Option<(&dyn ScalarQueryParser, DataType)> {
+        self.parsers.iter().find_map(|p| {
+            p.is_valid_reference(expr, data_type)
+                .map(|dt| (p.as_ref(), dt))
+        })
+    }
+}
+
+impl ScalarQueryParser for MultiQueryParser {
+    fn visit_between(
+        &self,
+        column: &str,
+        low: &Bound<ScalarValue>,
+        high: &Bound<ScalarValue>,
+    ) -> Option<IndexedExpression> {
+        self.parsers
+            .iter()
+            .find_map(|parser| parser.visit_between(column, low, high))
+    }
+    fn visit_in_list(&self, column: &str, in_list: &[ScalarValue]) -> Option<IndexedExpression> {
+        self.parsers
+            .iter()
+            .find_map(|parser| parser.visit_in_list(column, in_list))
+    }
+    fn visit_is_bool(&self, column: &str, value: bool) -> Option<IndexedExpression> {
+        self.parsers
+            .iter()
+            .find_map(|parser| parser.visit_is_bool(column, value))
+    }
+    fn visit_is_null(&self, column: &str) -> Option<IndexedExpression> {
+        self.parsers
+            .iter()
+            .find_map(|parser| parser.visit_is_null(column))
+    }
+    fn visit_comparison(
+        &self,
+        column: &str,
+        value: &ScalarValue,
+        op: &Operator,
+    ) -> Option<IndexedExpression> {
+        self.parsers
+            .iter()
+            .find_map(|parser| parser.visit_comparison(column, value, op))
+    }
+    fn visit_scalar_function(
+        &self,
+        column: &str,
+        data_type: &DataType,
+        func: &ScalarUDF,
+        args: &[Expr],
+    ) -> Option<IndexedExpression> {
+        self.parsers
+            .iter()
+            .find_map(|parser| parser.visit_scalar_function(column, data_type, func, args))
+    }
+    fn visit_like(
+        &self,
+        column: &str,
+        like: &Like,
+        pattern: &ScalarValue,
+    ) -> Option<IndexedExpression> {
+        self.parsers
+            .iter()
+            .find_map(|parser| parser.visit_like(column, like, pattern))
+    }
+    fn is_valid_reference(&self, func: &Expr, data_type: &DataType) -> Option<DataType> {
+        self.parsers
+            .iter()
+            .find_map(|parser| parser.is_valid_reference(func, data_type))
+    }
+}
+
+/// A trait implemented by anything that can load indices by name
+///
+/// This is used during the evaluation of an index expression
+#[async_trait]
+pub trait ScalarIndexLoader: Send + Sync {
+    /// Load the index with the given name
+    async fn load_index(
+        &self,
+        column: &str,
+        index_name: &str,
+        metrics: &dyn MetricsCollector,
+    ) -> Result<Arc<dyn ScalarIndex>>;
+}
+
+/// This represents a search into a scalar index
+#[derive(Debug, Clone)]
+pub struct ScalarIndexSearch {
+    /// The column to search (redundant, used for debugging messages)
+    pub column: String,
+    /// The name of the index to search
+    pub index_name: String,
+    /// The type of the index being searched (e.g. "BTree", "Bitmap"), used for display purposes
+    pub index_type: String,
+    /// The query to search for
+    pub query: Arc<dyn AnyQuery>,
+    /// If true, the query results are inexact and will need a recheck
+    pub needs_recheck: bool,
+    /// The fragments the underlying index has entries for.
+    ///
+    /// `None` means coverage is unknown (e.g. constructed outside of scanner
+    /// planning, or from a legacy code path). Optimizer rules that need to
+    /// decide whether the index covers the dataset must treat `None` as
+    /// "refuse to use" — the bitmap is the only way to safely answer that
+    /// question synchronously without an async metadata load.
+    pub fragment_bitmap: Option<RoaringBitmap>,
+}
+
+impl PartialEq for ScalarIndexSearch {
+    fn eq(&self, other: &Self) -> bool {
+        // `fragment_bitmap` is metadata derived from the dataset state, not
+        // part of the query identity, so it intentionally does not participate
+        // in equality.
+        self.column == other.column
+            && self.index_name == other.index_name
+            && self.query.as_ref().eq(other.query.as_ref())
+    }
+}
+
+/// This represents a lookup into one or more scalar indices
+///
+/// This is a tree of operations because we may need to logically combine or
+/// modify the results of scalar lookups
+#[derive(Debug, Clone)]
+pub enum ScalarIndexExpr {
+    Not(Box<Self>),
+    And(Box<Self>, Box<Self>),
+    Or(Box<Self>, Box<Self>),
+    Query(ScalarIndexSearch),
+}
+
+impl PartialEq for ScalarIndexExpr {
+    fn eq(&self, other: &Self) -> bool {
+        match (self, other) {
+            (Self::Not(l0), Self::Not(r0)) => l0 == r0,
+            (Self::And(l0, l1), Self::And(r0, r1)) => l0 == r0 && l1 == r1,
+            (Self::Or(l0, l1), Self::Or(r0, r1)) => l0 == r0 && l1 == r1,
+            (Self::Query(l_search), Self::Query(r_search)) => l_search == r_search,
+            _ => false,
+        }
+    }
+}
+
+impl std::fmt::Display for ScalarIndexExpr {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Not(inner) => write!(f, "NOT({})", inner),
+            Self::And(lhs, rhs) => write!(f, "AND({},{})", lhs, rhs),
+            Self::Or(lhs, rhs) => write!(f, "OR({},{})", lhs, rhs),
+            Self::Query(search) => write!(
+                f,
+                "[{}]@{}({})",
+                search.query.format(&search.column),
+                search.index_name,
+                search.index_type
+            ),
+        }
+    }
+}
+
+impl From<SearchResult> for NullableIndexExprResult {
+    fn from(result: SearchResult) -> Self {
+        match result {
+            SearchResult::Exact(mask) => Self::exact(NullableRowAddrMask::AllowList(mask)),
+            SearchResult::AtMost(mask) => Self::at_most(NullableRowAddrMask::AllowList(mask)),
+            SearchResult::AtLeast(mask) => Self::at_least(NullableRowAddrMask::AllowList(mask)),
+        }
+    }
+}
+
+impl ScalarIndexExpr {
+    /// Evaluates the scalar index expression
+    #[async_recursion]
+    pub async fn evaluate_nullable(
+        &self,
+        index_loader: &dyn ScalarIndexLoader,
+        metrics: &dyn MetricsCollector,
+    ) -> Result<NullableIndexExprResult> {
+        match self {
+            Self::Not(inner) => {
+                let result = inner.evaluate_nullable(index_loader, metrics).await?;
+                Ok(!result)
+            }
+            Self::And(lhs, rhs) => {
+                let lhs_result = lhs.evaluate_nullable(index_loader, metrics);
+                let rhs_result = rhs.evaluate_nullable(index_loader, metrics);
+                let (lhs_result, rhs_result) = try_join!(lhs_result, rhs_result)?;
+                Ok(lhs_result & rhs_result)
+            }
+            Self::Or(lhs, rhs) => {
+                let lhs_result = lhs.evaluate_nullable(index_loader, metrics);
+                let rhs_result = rhs.evaluate_nullable(index_loader, metrics);
+                let (lhs_result, rhs_result) = try_join!(lhs_result, rhs_result)?;
+                Ok(lhs_result | rhs_result)
+            }
+            Self::Query(search) => {
+                let index = index_loader
+                    .load_index(&search.column, &search.index_name, metrics)
+                    .await?;
+                let search_result = index.search(search.query.as_ref(), metrics).await?;
+                Ok(search_result.into())
+            }
+        }
+    }
+
+    #[instrument(level = "debug", skip_all)]
+    pub async fn evaluate(
+        &self,
+        index_loader: &dyn ScalarIndexLoader,
+        metrics: &dyn MetricsCollector,
+    ) -> Result<IndexExprResult> {
+        Ok(self
+            .evaluate_nullable(index_loader, metrics)
+            .await?
+            .drop_nulls())
+    }
+
+    pub fn to_expr(&self) -> Expr {
+        match self {
+            Self::Not(inner) => Expr::Not(inner.to_expr().into()),
+            Self::And(lhs, rhs) => {
+                let lhs = lhs.to_expr();
+                let rhs = rhs.to_expr();
+                lhs.and(rhs)
+            }
+            Self::Or(lhs, rhs) => {
+                let lhs = lhs.to_expr();
+                let rhs = rhs.to_expr();
+                lhs.or(rhs)
+            }
+            Self::Query(search) => search.query.to_expr(search.column.clone()),
+        }
+    }
+
+    pub fn needs_recheck(&self) -> bool {
+        match self {
+            Self::Not(inner) => inner.needs_recheck(),
+            Self::And(lhs, rhs) | Self::Or(lhs, rhs) => lhs.needs_recheck() || rhs.needs_recheck(),
+            Self::Query(search) => search.needs_recheck,
+        }
+    }
+}
+
+/// A trait to be used in `apply_scalar_indices` to inform the function which columns are indexed
+pub trait IndexInformationProvider {
+    /// Check if an index exists for `col` and, if so, return the data type of col
+    /// as well as a query parser that can parse queries for that column
+    fn get_index(&self, col: &str) -> Option<(&DataType, &MultiQueryParser)>;
+
+    /// The set of fragments covered by `(column, index_name)`.
+    ///
+    /// Returns `None` when the provider doesn't know — callers must treat
+    /// that as "coverage unknown" rather than "covers everything". The
+    /// default implementation always returns `None`, so providers that
+    /// haven't been updated cannot accidentally claim full coverage.
+    fn fragment_bitmap(&self, _column: &str, _index_name: &str) -> Option<RoaringBitmap> {
+        None
+    }
+}
+
+/// Attempt to split a filter expression into a search of scalar indexes and an
+///   optional post-search refinement query
+pub fn apply_scalar_indices(
+    expr: Expr,
+    index_info: &dyn IndexInformationProvider,
+) -> Result<IndexedExpression> {
+    let mut result =
+        visit_node(&expr, index_info, 0)?.unwrap_or(IndexedExpression::refine_only(expr));
+    if let Some(query) = result.scalar_query.as_mut() {
+        populate_fragment_bitmaps(query, index_info);
+    }
+    Ok(result)
+}
+
+fn populate_fragment_bitmaps(
+    expr: &mut ScalarIndexExpr,
+    index_info: &dyn IndexInformationProvider,
+) {
+    match expr {
+        ScalarIndexExpr::Not(inner) => populate_fragment_bitmaps(inner, index_info),
+        ScalarIndexExpr::And(lhs, rhs) | ScalarIndexExpr::Or(lhs, rhs) => {
+            populate_fragment_bitmaps(lhs, index_info);
+            populate_fragment_bitmaps(rhs, index_info);
+        }
+        ScalarIndexExpr::Query(search) => {
+            search.fragment_bitmap = index_info.fragment_bitmap(&search.column, &search.index_name);
+        }
+    }
+}
+
+fn visit_node(
+    expr: &Expr,
+    index_info: &dyn IndexInformationProvider,
+    depth: usize,
+) -> Result<Option<IndexedExpression>> {
+    if depth >= MAX_DEPTH {
+        return Err(Error::invalid_input(format!(
+            "the filter expression is too long, lance limit the max number of conditions to {}",
+            MAX_DEPTH
+        )));
+    }
+    match expr {
+        Expr::Between(between) => Ok(visit_between_expr(between, index_info)),
+        Expr::Alias(alias) => visit_node(alias.expr.as_ref(), index_info, depth),
+        Expr::Column(_) => Ok(visit_column(expr, index_info)),
+        Expr::InList(in_list) => Ok(visit_in_list_expr(in_list, index_info)),
+        Expr::IsFalse(expr) => Ok(visit_is_bool(expr.as_ref(), index_info, false)),
+        Expr::IsTrue(expr) => Ok(visit_is_bool(expr.as_ref(), index_info, true)),
+        Expr::IsNull(expr) => Ok(visit_is_null(expr.as_ref(), index_info, false)),
+        Expr::IsNotNull(expr) => {
+            // `regexp_match(col, pat)` returns a list and is coerced to
+            // `IsNotNull(regexp_match(...))` before it reaches here. Unwrap that
+            // so the regex acceleration applies; everything else is a genuine
+            // IS NOT NULL check.
+            if let Expr::ScalarFunction(scalar_fn) = expr.as_ref()
+                && scalar_fn.func.name() == "regexp_match"
+            {
+                return Ok(visit_scalar_fn(scalar_fn, index_info));
+            }
+            Ok(visit_is_null(expr.as_ref(), index_info, true))
+        }
+        Expr::Not(expr) => visit_not(expr.as_ref(), index_info, depth),
+        Expr::BinaryExpr(binary_expr) => visit_binary_expr(binary_expr, index_info, depth),
+        Expr::ScalarFunction(scalar_fn) => Ok(visit_scalar_fn(scalar_fn, index_info)),
+        Expr::Like(like) => {
+            if like.negated {
+                Ok(None)
+            } else {
+                Ok(visit_like_expr(like, index_info))
+            }
+        }
+        _ => Ok(None),
+    }
+}
+
+// Extract the full nested column path from a get_field expression chain.
+// For example: get_field(get_field(metadata, "status"), "code") -> "metadata.`status.code`"
+fn extract_nested_column_path(expr: &Expr) -> Option<String> {
+    let mut current_expr = expr;
+    let mut parts = Vec::new();
+
+    loop {
+        match current_expr {
+            Expr::ScalarFunction(udf) if udf.name() == "get_field" => {
+                if udf.args.len() != 2 {
+                    return None;
+                }
+                if let Expr::Literal(ScalarValue::Utf8(Some(field_name)), _) = &udf.args[1] {
+                    parts.push(field_name.clone());
+                } else {
+                    return None;
+                }
+                current_expr = &udf.args[0];
+            }
+            Expr::Column(col) => {
+                parts.push(col.name.clone());
+                break;
+            }
+            _ => return None,
+        }
+    }
+
+    parts.reverse();
+    let field_refs: Vec<&str> = parts.iter().map(|s| s.as_str()).collect();
+    Some(lance_core::datatypes::format_field_path(&field_refs))
+}
+
+fn maybe_indexed_column<'b>(
+    expr: &Expr,
+    index_info: &'b dyn IndexInformationProvider,
+) -> Option<(String, DataType, &'b dyn ScalarQueryParser)> {
+    // First try to extract the full nested column path for get_field expressions
+    if let Some(nested_path) = extract_nested_column_path(expr)
+        && let Some((data_type, multi)) = index_info.get_index(&nested_path)
+        && let Some((parser, data_type)) = multi.select(expr, data_type)
+    {
+        return Some((nested_path, data_type, parser));
+    }
+
+    match expr {
+        Expr::Column(col) => {
+            let col = col.name.as_str();
+            let (data_type, multi) = index_info.get_index(col)?;
+            if let Some((parser, data_type)) = multi.select(expr, data_type) {
+                Some((col.to_string(), data_type, parser))
+            } else {
+                None
+            }
+        }
+        Expr::ScalarFunction(udf) => {
+            if udf.args.is_empty() {
+                return None;
+            }
+            let col = match &udf.args[0] {
+                Expr::Column(col) => col.name.as_str(),
+                _ => return None,
+            };
+            let (data_type, multi) = index_info.get_index(col)?;
+            if let Some((parser, data_type)) = multi.select(expr, data_type) {
+                Some((col.to_string(), data_type, parser))
+            } else {
+                None
+            }
+        }
+        _ => None,
+    }
+}
+
+fn visit_between_expr(
+    between: &datafusion_expr::Between,
+    index_info: &dyn IndexInformationProvider,
+) -> Option<IndexedExpression> {
+    use std::ops::Bound;
+    let (column, col_type, query_parser) = maybe_indexed_column(&between.expr, index_info)?;
+    let low = maybe_scalar(&between.low, &col_type)?;
+    let high = maybe_scalar(&between.high, &col_type)?;
+
+    let indexed_expr =
+        query_parser.visit_between(&column, &Bound::Included(low), &Bound::Included(high))?;
+
+    if between.negated {
+        indexed_expr.maybe_not()
+    } else {
+        Some(indexed_expr)
+    }
+}
+
+fn visit_in_list_expr(
+    in_list: &datafusion_expr::expr::InList,
+    index_info: &dyn IndexInformationProvider,
+) -> Option<IndexedExpression> {
+    let (column, col_type, query_parser) = maybe_indexed_column(&in_list.expr, index_info)?;
+    let values = maybe_scalar_list(&in_list.list, &col_type)?;
+
+    let indexed_expr = query_parser.visit_in_list(&column, &values)?;
+
+    if in_list.negated {
+        indexed_expr.maybe_not()
+    } else {
+        Some(indexed_expr)
+    }
+}
+
+fn visit_is_bool(
+    expr: &Expr,
+    index_info: &dyn IndexInformationProvider,
+    value: bool,
+) -> Option<IndexedExpression> {
+    let (column, col_type, query_parser) = maybe_indexed_column(expr, index_info)?;
+    if col_type != DataType::Boolean {
+        None
+    } else {
+        query_parser.visit_is_bool(&column, value)
+    }
+}
+
+fn visit_column(
+    col: &Expr,
+    index_info: &dyn IndexInformationProvider,
+) -> Option<IndexedExpression> {
+    let (column, col_type, query_parser) = maybe_indexed_column(col, index_info)?;
+    if col_type != DataType::Boolean {
+        None
+    } else {
+        query_parser.visit_is_bool(&column, true)
+    }
+}
+
+fn visit_is_null(
+    expr: &Expr,
+    index_info: &dyn IndexInformationProvider,
+    negated: bool,
+) -> Option<IndexedExpression> {
+    let (column, _, query_parser) = maybe_indexed_column(expr, index_info)?;
+    let indexed_expr = query_parser.visit_is_null(&column)?;
+    if negated {
+        indexed_expr.maybe_not()
+    } else {
+        Some(indexed_expr)
+    }
+}
+
+fn visit_not(
+    expr: &Expr,
+    index_info: &dyn IndexInformationProvider,
+    depth: usize,
+) -> Result<Option<IndexedExpression>> {
+    let node = visit_node(expr, index_info, depth + 1)?;
+    Ok(node.and_then(|node| node.maybe_not()))
+}
+
+fn visit_comparison(
+    expr: &datafusion_expr::BinaryExpr,
+    index_info: &dyn IndexInformationProvider,
+) -> Option<IndexedExpression> {
+    let left_col = maybe_indexed_column(&expr.left, index_info);
+    if let Some((column, col_type, query_parser)) = left_col {
+        let scalar = maybe_scalar(&expr.right, &col_type)?;
+        query_parser.visit_comparison(&column, &scalar, &expr.op)
+    } else {
+        None
+    }
+}
+
+fn maybe_range(
+    expr: &datafusion_expr::BinaryExpr,
+    index_info: &dyn IndexInformationProvider,
+) -> Option<IndexedExpression> {
+    let left_expr = match expr.left.as_ref() {
+        Expr::BinaryExpr(binary_expr) => Some(binary_expr),
+        _ => None,
+    }?;
+    let right_expr = match expr.right.as_ref() {
+        Expr::BinaryExpr(binary_expr) => Some(binary_expr),
+        _ => None,
+    }?;
+
+    let (left_col, dt, parser) = maybe_indexed_column(&left_expr.left, index_info)?;
+    let right_col = match &right_expr.left.as_ref() {
+        Expr::Column(col) => col.name.as_str().to_string(),
+        _ => return None,
+    };
+
+    if left_col != right_col {
+        return None;
+    }
+
+    let left_value = maybe_scalar(&left_expr.right, &dt)?;
+    let right_value = maybe_scalar(&right_expr.right, &dt)?;
+
+    let (low, high) = match (left_expr.op, right_expr.op) {
+        (Operator::GtEq, Operator::LtEq) => {
+            (Bound::Included(left_value), Bound::Included(right_value))
+        }
+        (Operator::GtEq, Operator::Lt) => {
+            (Bound::Included(left_value), Bound::Excluded(right_value))
+        }
+        (Operator::Gt, Operator::LtEq) => {
+            (Bound::Excluded(left_value), Bound::Included(right_value))
+        }
+        (Operator::Gt, Operator::Lt) => (Bound::Excluded(left_value), Bound::Excluded(right_value)),
+        (Operator::LtEq, Operator::GtEq) => {
+            (Bound::Included(right_value), Bound::Included(left_value))
+        }
+        (Operator::LtEq, Operator::Gt) => {
+            (Bound::Excluded(right_value), Bound::Included(left_value))
+        }
+        (Operator::Lt, Operator::GtEq) => {
+            (Bound::Included(right_value), Bound::Excluded(left_value))
+        }
+        (Operator::Lt, Operator::Gt) => (Bound::Excluded(right_value), Bound::Excluded(left_value)),
+        _ => return None,
+    };
+
+    parser.visit_between(&left_col, &low, &high)
+}
+
+fn visit_and(
+    expr: &datafusion_expr::BinaryExpr,
+    index_info: &dyn IndexInformationProvider,
+    depth: usize,
+) -> Result<Option<IndexedExpression>> {
+    if let Some(range_expr) = maybe_range(expr, index_info) {
+        return Ok(Some(range_expr));
+    }
+
+    let left = visit_node(&expr.left, index_info, depth + 1)?;
+    let right = visit_node(&expr.right, index_info, depth + 1)?;
+    Ok(match (left, right) {
+        (Some(left), Some(right)) => Some(left.and(right)),
+        (Some(left), None) => Some(left.refine((*expr.right).clone())),
+        (None, Some(right)) => Some(right.refine((*expr.left).clone())),
+        (None, None) => None,
+    })
+}
+
+fn visit_or(
+    expr: &datafusion_expr::BinaryExpr,
+    index_info: &dyn IndexInformationProvider,
+    depth: usize,
+) -> Result<Option<IndexedExpression>> {
+    let left = visit_node(&expr.left, index_info, depth + 1)?;
+    let right = visit_node(&expr.right, index_info, depth + 1)?;
+    Ok(match (left, right) {
+        (Some(left), Some(right)) => left.maybe_or(right),
+        (Some(_), None) | (None, Some(_)) => None,
+        (None, None) => None,
+    })
+}
+
+fn visit_binary_expr(
+    expr: &datafusion_expr::BinaryExpr,
+    index_info: &dyn IndexInformationProvider,
+    depth: usize,
+) -> Result<Option<IndexedExpression>> {
+    match &expr.op {
+        Operator::Lt | Operator::LtEq | Operator::Gt | Operator::GtEq | Operator::Eq => {
+            Ok(visit_comparison(expr, index_info))
+        }
+        Operator::NotEq => Ok(visit_comparison(expr, index_info).and_then(|node| node.maybe_not())),
+        Operator::And => visit_and(expr, index_info, depth),
+        Operator::Or => visit_or(expr, index_info, depth),
+        _ => Ok(None),
+    }
+}
+
+fn visit_scalar_fn(
+    scalar_fn: &datafusion_expr::expr::ScalarFunction,
+    index_info: &dyn IndexInformationProvider,
+) -> Option<IndexedExpression> {
+    if scalar_fn.args.is_empty() {
+        return None;
+    }
+    let (col, data_type, query_parser) = maybe_indexed_column(&scalar_fn.args[0], index_info)?;
+    query_parser.visit_scalar_function(&col, &data_type, &scalar_fn.func, &scalar_fn.args)
+}
+
+fn visit_like_expr(
+    like: &Like,
+    index_info: &dyn IndexInformationProvider,
+) -> Option<IndexedExpression> {
+    let (column, _, query_parser) = maybe_indexed_column(&like.expr, index_info)?;
+
+    let pattern = match like.pattern.as_ref() {
+        Expr::Literal(scalar, _) => scalar.clone(),
+        _ => return None,
+    };
+
+    query_parser.visit_like(&column, like, &pattern)
+}
+
+fn maybe_scalar(expr: &Expr, expected_type: &DataType) -> Option<ScalarValue> {
+    match expr {
+        Expr::Literal(value, _) => coerce_scalar(value, expected_type),
+        Expr::Cast(cast) => match cast.expr.as_ref() {
+            Expr::Literal(value, _) => {
+                let casted = value.cast_to(&cast.data_type).ok()?;
+                coerce_scalar(&casted, expected_type)
+            }
+            _ => None,
+        },
+        // arrow_cast(value, 'type') is represented as a ScalarFunction, not a Cast.
+        // This commonly arises for types not expressible in SQL literals (e.g. fixed-size-binary).
+        Expr::ScalarFunction(scalar_function) => {
+            if scalar_function.name() == "arrow_cast" && scalar_function.args.len() == 2 {
+                match (&scalar_function.args[0], &scalar_function.args[1]) {
+                    (Expr::Literal(value, _), Expr::Literal(cast_type, _)) => {
+                        let target_field = scalar_function
+                            .func
+                            .return_field_from_args(ReturnFieldArgs {
+                                arg_fields: &[
+                                    Arc::new(Field::new("expression", value.data_type(), false)),
+                                    Arc::new(Field::new("datatype", cast_type.data_type(), false)),
+                                ],
+                                scalar_arguments: &[Some(value), Some(cast_type)],
+                            })
+                            .ok()?;
+                        let casted = value.cast_to(target_field.data_type()).ok()?;
+                        coerce_scalar(&casted, expected_type)
+                    }
+                    _ => None,
+                }
+            } else {
+                None
+            }
+        }
+        _ => None,
+    }
+}
+
+fn coerce_scalar(value: &ScalarValue, expected_type: &DataType) -> Option<ScalarValue> {
+    if value.data_type() == *expected_type {
+        return Some(value.clone());
+    }
+    value.cast_to(expected_type).ok()
+}
+
+fn maybe_scalar_list(exprs: &[Expr], expected_type: &DataType) -> Option<Vec<ScalarValue>> {
+    let mut scalar_values = Vec::with_capacity(exprs.len());
+    for expr in exprs {
+        match maybe_scalar(expr, expected_type) {
+            Some(scalar_val) => {
+                scalar_values.push(scalar_val);
+            }
+            None => {
+                return None;
+            }
+        }
+    }
+    Some(scalar_values)
+}
diff --git a/rust/lance-index-core/src/scalar/mod.rs b/rust/lance-index-core/src/scalar/mod.rs
new file mode 100644
index 00000000000..5561415205e
--- /dev/null
+++ b/rust/lance-index-core/src/scalar/mod.rs
@@ -0,0 +1,558 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! Abstract scalar index traits and types
+
+use arrow_array::{BooleanArray, RecordBatch, UInt64Array};
+use arrow_schema::Schema;
+use async_trait::async_trait;
+use bytes::Bytes;
+use datafusion::physical_plan::SendableRecordBatchStream;
+use lance_core::deepsize::DeepSizeOf;
+use lance_core::{Error, Result};
+use lance_io::stream::{RecordBatchStream, RecordBatchStreamAdapter};
+use lance_select::{NullableRowAddrSet, RowAddrTreeMap, RowSetOps};
+use roaring::RoaringBitmap;
+use serde::Serialize;
+use std::collections::HashMap;
+use std::pin::Pin;
+use std::{any::Any, sync::Arc};
+
+/// Metadata about a single file within an index.
+#[derive(Debug, Clone, PartialEq, DeepSizeOf)]
+pub struct IndexFile {
+    /// Path relative to the index directory
+    pub path: String,
+    /// Size of the file in bytes
+    pub size_bytes: u64,
+}
+
+use crate::index::{Index, IndexParams};
+use crate::metrics::MetricsCollector;
+use crate::scalar::registry::TrainingCriteria;
+
+pub mod expression;
+pub mod registry;
+
+pub const LANCE_SCALAR_INDEX: &str = "__lance_scalar_index";
+
+/// Summary of a completed index file write.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct IndexWriteSummary {
+    /// The final size of the index file in bytes.
+    pub size_bytes: u64,
+}
+
+/// Builtin index types supported by the Lance library
+///
+/// This is primarily for convenience to avoid a bunch of string
+/// constants and provide some auto-complete.  This type should not
+/// be used in the manifest as plugins cannot add new entries.
+#[derive(Debug, Clone, PartialEq, Eq, DeepSizeOf)]
+pub enum BuiltinIndexType {
+    BTree,
+    Bitmap,
+    LabelList,
+    NGram,
+    ZoneMap,
+    BloomFilter,
+    RTree,
+    Inverted,
+    FMIndex,
+}
+
+impl BuiltinIndexType {
+    pub fn as_str(&self) -> &str {
+        match self {
+            Self::BTree => "btree",
+            Self::Bitmap => "bitmap",
+            Self::LabelList => "labellist",
+            Self::NGram => "ngram",
+            Self::ZoneMap => "zonemap",
+            Self::Inverted => "inverted",
+            Self::BloomFilter => "bloomfilter",
+            Self::RTree => "rtree",
+            Self::FMIndex => "fmindex",
+        }
+    }
+}
+
+use crate::index::IndexType;
+
+impl TryFrom<IndexType> for BuiltinIndexType {
+    type Error = Error;
+
+    fn try_from(value: IndexType) -> Result<Self> {
+        match value {
+            IndexType::BTree => Ok(Self::BTree),
+            IndexType::Bitmap => Ok(Self::Bitmap),
+            IndexType::LabelList => Ok(Self::LabelList),
+            IndexType::NGram => Ok(Self::NGram),
+            IndexType::ZoneMap => Ok(Self::ZoneMap),
+            IndexType::Inverted => Ok(Self::Inverted),
+            IndexType::BloomFilter => Ok(Self::BloomFilter),
+            IndexType::RTree => Ok(Self::RTree),
+            IndexType::FMIndex => Ok(Self::FMIndex),
+            _ => Err(Error::index("Invalid index type".to_string())),
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct ScalarIndexParams {
+    /// The type of index to create
+    ///
+    /// Plugins may add additional index types.  Index type lookup is case-insensitive.
+    pub index_type: String,
+    /// The parameters to train the index
+    ///
+    /// This should be a JSON string.  The contents of the JSON string will be specific to the
+    /// index type.  If not set, then default parameters will be used for the index type.
+    pub params: Option<String>,
+}
+
+impl Default for ScalarIndexParams {
+    fn default() -> Self {
+        Self {
+            index_type: BuiltinIndexType::BTree.as_str().to_string(),
+            params: None,
+        }
+    }
+}
+
+impl ScalarIndexParams {
+    /// Creates a new ScalarIndexParams from one of the builtin index types
+    pub fn for_builtin(index_type: BuiltinIndexType) -> Self {
+        Self {
+            index_type: index_type.as_str().to_string(),
+            params: None,
+        }
+    }
+
+    /// Create a new ScalarIndexParams with the given index type
+    pub fn new(index_type: String) -> Self {
+        Self {
+            index_type,
+            params: None,
+        }
+    }
+
+    /// Set the parameters for the index
+    pub fn with_params<ParamsType: Serialize>(mut self, params: &ParamsType) -> Self {
+        self.params = Some(serde_json::to_string(params).unwrap());
+        self
+    }
+}
+
+impl IndexParams for ScalarIndexParams {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn index_name(&self) -> &str {
+        LANCE_SCALAR_INDEX
+    }
+}
+
+/// Trait for storing an index (or parts of an index) into storage
+#[async_trait]
+pub trait IndexWriter: Send {
+    /// Writes a record batch into the file, returning the 0-based index of the batch in the file
+    ///
+    /// E.g. if this is the third time this is called this method will return 2
+    async fn write_record_batch(&mut self, batch: RecordBatch) -> Result<u64>;
+    /// Adds a global buffer and returns its index.
+    async fn add_global_buffer(&mut self, _data: Bytes) -> Result<u32> {
+        Err(Error::not_supported(
+            "global buffers are not supported by this index writer",
+        ))
+    }
+    /// Finishes writing the file and closes the file
+    async fn finish(&mut self) -> Result<IndexFile>;
+    /// Finishes writing the file and closes the file with additional metadata
+    async fn finish_with_metadata(
+        &mut self,
+        metadata: HashMap<String, String>,
+    ) -> Result<IndexFile>;
+}
+
+/// Trait for reading an index (or parts of an index) from storage
+#[async_trait]
+pub trait IndexReader: Send + Sync {
+    /// Read the n-th record batch from the file
+    async fn read_record_batch(&self, n: u64, batch_size: u64) -> Result<RecordBatch>;
+    /// Reads a global buffer by index.
+    async fn read_global_buffer(&self, _index: u32) -> Result<Bytes> {
+        Err(Error::not_supported(
+            "global buffers are not supported by this index reader",
+        ))
+    }
+    /// Read the range of rows from the file.
+    /// If projection is Some, only return the columns in the projection,
+    /// nested columns like Some(&["x.y"]) are not supported.
+    /// If projection is None, return all columns.
+    async fn read_range(
+        &self,
+        range: std::ops::Range<usize>,
+        projection: Option<&[&str]>,
+    ) -> Result<RecordBatch>;
+    /// Read multiple ranges and concatenate into a single batch.
+    /// Default impl runs `read_range`s in parallel via `try_join_all`.
+    async fn read_ranges(
+        &self,
+        ranges: &[std::ops::Range<usize>],
+        projection: Option<&[&str]>,
+    ) -> Result<RecordBatch> {
+        if ranges.is_empty() {
+            return self.read_range(0..0, projection).await;
+        }
+        let futures = ranges
+            .iter()
+            .map(|r| self.read_range(r.clone(), projection));
+        let batches = futures::future::try_join_all(futures).await?;
+        let schema = batches[0].schema();
+        Ok(arrow_select::concat::concat_batches(&schema, &batches)?)
+    }
+    /// Read a range of rows as a stream of record batches.
+    ///
+    /// This allows the caller to process rows incrementally without loading the
+    /// entire range into memory at once.
+    ///
+    /// The default implementation falls back to [`Self::read_range`] and wraps
+    /// the result in a single-item stream.
+    async fn read_range_stream(
+        &self,
+        range: std::ops::Range<usize>,
+        projection: Option<&[&str]>,
+    ) -> Result<Pin<Box<dyn RecordBatchStream>>> {
+        let batch = self.read_range(range, projection).await?;
+        let schema = batch.schema();
+        Ok(Box::pin(RecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move { Ok(batch) }),
+        )))
+    }
+    /// Return the number of batches in the file
+    async fn num_batches(&self, batch_size: u64) -> u32;
+    /// Return the number of rows in the file
+    fn num_rows(&self) -> usize;
+    /// Return the metadata of the file
+    fn schema(&self) -> &lance_core::datatypes::Schema;
+    /// Best-effort on-disk byte size of the file when the reader already knows it
+    /// without extra I/O, else `None`. Used to size prewarm chunks.
+    fn file_size_bytes(&self) -> Option<u64> {
+        None
+    }
+}
+
+/// Trait abstracting I/O away from index logic
+///
+/// Scalar indices are currently serialized as indexable arrow record batches stored in
+/// named "files".  The index store is responsible for serializing and deserializing
+/// these batches into file data (e.g. as .lance files or .parquet files, etc.)
+#[async_trait]
+pub trait IndexStore: std::fmt::Debug + Send + Sync + DeepSizeOf {
+    fn as_any(&self) -> &dyn Any;
+    fn clone_arc(&self) -> Arc<dyn IndexStore>;
+
+    /// Suggested I/O parallelism for the store
+    fn io_parallelism(&self) -> usize;
+
+    /// Create a new file and return a writer to store data in the file
+    async fn new_index_file(&self, name: &str, schema: Arc<Schema>)
+    -> Result<Box<dyn IndexWriter>>;
+
+    /// Open an existing file for retrieval
+    async fn open_index_file(&self, name: &str) -> Result<Arc<dyn IndexReader>>;
+
+    /// Copy a range of batches from an index file from this store to another
+    ///
+    /// This is often useful when remapping or updating
+    async fn copy_index_file(&self, name: &str, dest_store: &dyn IndexStore) -> Result<IndexFile>;
+
+    /// Copy an index file from this store to a new name in another store, leaving the source intact
+    async fn copy_index_file_to(
+        &self,
+        name: &str,
+        new_name: &str,
+        dest_store: &dyn IndexStore,
+    ) -> Result<IndexFile> {
+        if name == new_name {
+            self.copy_index_file(name, dest_store).await
+        } else {
+            Err(Error::not_supported(format!(
+                "copying index file {name} to {new_name} is not supported by this index store"
+            )))
+        }
+    }
+
+    /// Rename an index file
+    async fn rename_index_file(&self, name: &str, new_name: &str) -> Result<IndexFile>;
+
+    /// Delete an index file (used in the tmp spill store to keep tmp size down)
+    async fn delete_index_file(&self, name: &str) -> Result<()>;
+
+    /// List all files in the index directory with their sizes.
+    ///
+    /// Returns a list of (relative_path, size_bytes) tuples.
+    /// Used to capture file metadata after index creation/modification.
+    async fn list_files_with_sizes(&self) -> Result<Vec<IndexFile>>;
+}
+
+/// Different scalar indices may support different kinds of queries
+///
+/// For example, a btree index can support a wide range of queries (e.g. x > 7)
+/// while an index based on FTS only supports queries like "x LIKE 'foo'"
+///
+/// This trait is used when we need an object that can represent any kind of query
+///
+/// Note: if you are implementing this trait for a query type then you probably also
+/// need to implement the [crate::scalar::expression::ScalarQueryParser] trait to
+/// create instances of your query at parse time.
+pub trait AnyQuery: std::fmt::Debug + Any + Send + Sync {
+    /// Cast the query as Any to allow for downcasting
+    fn as_any(&self) -> &dyn Any;
+    /// Format the query as a string for display purposes
+    fn format(&self, col: &str) -> String;
+    /// Convert the query to a datafusion expression
+    fn to_expr(&self, col: String) -> datafusion_expr::Expr;
+    /// Compare this query to another query
+    fn dyn_eq(&self, other: &dyn AnyQuery) -> bool;
+}
+
+impl PartialEq for dyn AnyQuery {
+    fn eq(&self, other: &Self) -> bool {
+        self.dyn_eq(other)
+    }
+}
+
+/// The result of a search operation against a scalar index
+#[derive(Debug, PartialEq)]
+pub enum SearchResult {
+    /// The exact row ids that satisfy the query
+    Exact(NullableRowAddrSet),
+    /// Any row id satisfying the query will be in this set but not every
+    /// row id in this set will satisfy the query, a further recheck step
+    /// is needed
+    AtMost(NullableRowAddrSet),
+    /// All of the given row ids satisfy the query but there may be more
+    ///
+    /// No scalar index actually returns this today but it can arise from
+    /// boolean operations (e.g. NOT(AtMost(x)) == AtLeast(NOT(x)))
+    AtLeast(NullableRowAddrSet),
+}
+
+impl SearchResult {
+    pub fn exact(row_ids: impl Into<RowAddrTreeMap>) -> Self {
+        Self::Exact(NullableRowAddrSet::new(row_ids.into(), Default::default()))
+    }
+
+    pub fn at_most(row_ids: impl Into<RowAddrTreeMap>) -> Self {
+        Self::AtMost(NullableRowAddrSet::new(row_ids.into(), Default::default()))
+    }
+
+    pub fn at_least(row_ids: impl Into<RowAddrTreeMap>) -> Self {
+        Self::AtLeast(NullableRowAddrSet::new(row_ids.into(), Default::default()))
+    }
+
+    pub fn with_nulls(self, nulls: impl Into<RowAddrTreeMap>) -> Self {
+        match self {
+            Self::Exact(row_ids) => Self::Exact(row_ids.with_nulls(nulls.into())),
+            Self::AtMost(row_ids) => Self::AtMost(row_ids.with_nulls(nulls.into())),
+            Self::AtLeast(row_ids) => Self::AtLeast(row_ids.with_nulls(nulls.into())),
+        }
+    }
+
+    pub fn row_addrs(&self) -> &NullableRowAddrSet {
+        match self {
+            Self::Exact(row_addrs) => row_addrs,
+            Self::AtMost(row_addrs) => row_addrs,
+            Self::AtLeast(row_addrs) => row_addrs,
+        }
+    }
+
+    pub fn is_exact(&self) -> bool {
+        matches!(self, Self::Exact(_))
+    }
+}
+
+/// Brief information about an index that was created
+pub struct CreatedIndex {
+    /// The details of the index that was created
+    ///
+    /// These should be stored somewhere as they will be needed to
+    /// load the index later.
+    pub index_details: prost_types::Any,
+    /// The version of the index that was created
+    ///
+    /// This can be used to determine if a reader is able to load the index.
+    pub index_version: u32,
+    /// List of files and their sizes for this index
+    ///
+    /// This enables skipping HEAD calls when opening indices and provides
+    /// visibility into index storage size via describe_indices().
+    pub files: Option<Vec<IndexFile>>,
+}
+
+/// The criteria that specifies how to update an index
+pub struct UpdateCriteria {
+    /// If true, then we need to read the old data to update the index
+    ///
+    /// This should be avoided if possible but is left in for some legacy paths
+    pub requires_old_data: bool,
+    /// The criteria required for data (both old and new)
+    pub data_criteria: TrainingCriteria,
+}
+
+/// Filter used when merging existing scalar-index rows during update.
+///
+/// The caller must pick a filter mode that matches the row-id semantics of the
+/// dataset:
+/// - address-style row IDs: fragment filtering is valid
+/// - stable row IDs: use exact row-id membership instead
+#[derive(Debug, Clone)]
+pub enum OldIndexDataFilter {
+    /// Keeps track of which fragments are still valid and which are no longer valid.
+    ///
+    /// This is valid for address-style row IDs.
+    Fragments {
+        to_keep: RoaringBitmap,
+        to_remove: RoaringBitmap,
+    },
+    /// Keep old rows whose row IDs are in this exact allow-list.
+    ///
+    /// This is required for stable row IDs, where row IDs are opaque and
+    /// should not be interpreted as encoded row addresses.
+    RowIds(RowAddrTreeMap),
+}
+
+impl OldIndexDataFilter {
+    /// Build a boolean mask that keeps only row IDs selected by this filter.
+    pub fn filter_row_ids(&self, row_ids: &UInt64Array) -> BooleanArray {
+        match self {
+            Self::Fragments { to_keep, .. } => row_ids
+                .iter()
+                .map(|id| id.map(|id| to_keep.contains((id >> 32) as u32)))
+                .collect(),
+            Self::RowIds(valid_row_ids) => row_ids
+                .iter()
+                .map(|id| id.map(|id| valid_row_ids.contains(id)))
+                .collect(),
+        }
+    }
+}
+
+impl UpdateCriteria {
+    pub fn requires_old_data(data_criteria: TrainingCriteria) -> Self {
+        Self {
+            requires_old_data: true,
+            data_criteria,
+        }
+    }
+
+    pub fn only_new_data(data_criteria: TrainingCriteria) -> Self {
+        Self {
+            requires_old_data: false,
+            data_criteria,
+        }
+    }
+}
+
+/// Compute the lexicographically next prefix by incrementing the last character's code point.
+/// Returns None if no valid upper bound exists.
+///
+/// This is used for LIKE prefix queries to convert `LIKE 'foo%'` to range `[foo, fop)`.
+///
+/// # UTF-8 and Unicode Handling
+///
+/// This function operates on Unicode code points (characters), not bytes. Since UTF-8
+/// byte ordering is identical to Unicode code point ordering, incrementing a character's
+/// code point produces the correct lexicographic successor for byte-wise string comparison.
+///
+/// If incrementing the last character would overflow or land in the surrogate range
+/// (U+D800-U+DFFF), we try incrementing the previous character, and so on.
+///
+/// Examples:
+/// - `"foo"` → `Some("fop")`
+/// - `"café"` → `Some("cafê")`  (é U+00E9 → ê U+00EA)
+/// - `"abc中"` → `Some("abc丮")` (中 U+4E2D → 丮 U+4E2E)
+/// - `"cafÿ"` → `Some("cafĀ")` (ÿ U+00FF → Ā U+0100)
+pub fn compute_next_prefix(prefix: &str) -> Option<String> {
+    if prefix.is_empty() {
+        return None;
+    }
+
+    let chars: Vec<char> = prefix.chars().collect();
+
+    // Try incrementing characters from right to left
+    for i in (0..chars.len()).rev() {
+        if let Some(next_char) = next_unicode_char(chars[i]) {
+            let mut result: String = chars[..i].iter().collect();
+            result.push(next_char);
+            return Some(result);
+        }
+        // This character cannot be incremented (e.g., U+10FFFF), try previous
+    }
+
+    // All characters were at maximum value
+    None
+}
+
+/// Get the next valid Unicode scalar value after the given character.
+/// Skips the surrogate range (U+D800-U+DFFF) which is not valid in UTF-8.
+fn next_unicode_char(c: char) -> Option<char> {
+    let cp = c as u32;
+    let next_cp = cp.checked_add(1)?;
+
+    // Skip surrogate range (U+D800-U+DFFF)
+    let next_cp = if (0xD800..=0xDFFF).contains(&next_cp) {
+        0xE000
+    } else {
+        next_cp
+    };
+
+    char::from_u32(next_cp)
+}
+
+/// A trait for a scalar index, a structure that can determine row ids that satisfy scalar queries
+#[async_trait]
+pub trait ScalarIndex: Send + Sync + std::fmt::Debug + Index + DeepSizeOf {
+    /// Search the scalar index
+    ///
+    /// Returns all row ids that satisfy the query, these row ids are not necessarily ordered
+    async fn search(
+        &self,
+        query: &dyn AnyQuery,
+        metrics: &dyn MetricsCollector,
+    ) -> Result<SearchResult>;
+
+    /// Returns true if the remap operation is supported
+    fn can_remap(&self) -> bool;
+
+    /// Remap the row ids, creating a new remapped version of this index in `dest_store`
+    async fn remap(
+        &self,
+        mapping: &HashMap<u64, Option<u64>>,
+        dest_store: &dyn IndexStore,
+    ) -> Result<CreatedIndex>;
+
+    /// Add the new data into the index, creating an updated version of the index in `dest_store`
+    ///
+    /// If `old_data_filter` is provided, old index data will be filtered before
+    /// merge according to the chosen filter mode.
+    async fn update(
+        &self,
+        new_data: SendableRecordBatchStream,
+        dest_store: &dyn IndexStore,
+        old_data_filter: Option<OldIndexDataFilter>,
+    ) -> Result<CreatedIndex>;
+
+    /// Returns the criteria that will be used to update the index
+    fn update_criteria(&self) -> UpdateCriteria;
+
+    /// Derive the index parameters from the current index
+    ///
+    /// This returns a ScalarIndexParams that can be used to recreate an index
+    /// with the same configuration on another dataset.
+    fn derive_index_params(&self) -> Result<ScalarIndexParams>;
+}
diff --git a/rust/lance-index-core/src/scalar/registry.rs b/rust/lance-index-core/src/scalar/registry.rs
new file mode 100644
index 00000000000..b10c1c16824
--- /dev/null
+++ b/rust/lance-index-core/src/scalar/registry.rs
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+use std::borrow::Cow;
+use std::sync::Arc;
+
+use arrow_schema::Field;
+use async_trait::async_trait;
+use datafusion::execution::SendableRecordBatchStream;
+use lance_core::{
+    Result,
+    cache::{LanceCache, UnsizedCacheKey},
+};
+
+use crate::progress::IndexBuildProgress;
+use crate::registry::PluginRegistry;
+use crate::row_id_remap::RowIdRemapper;
+use crate::scalar::{CreatedIndex, IndexStore, ScalarIndex, expression::ScalarQueryParser};
+
+pub const VALUE_COLUMN_NAME: &str = "value";
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum TrainingOrdering {
+    /// The input will arrive sorted by the value column in ascending order
+    Values,
+    /// The input will arrive sorted by the address column in ascending order
+    Addresses,
+    /// The input will arrive in an arbitrary order
+    None,
+}
+
+#[derive(Debug, Clone)]
+pub struct TrainingCriteria {
+    pub ordering: TrainingOrdering,
+    pub needs_row_ids: bool,
+    pub needs_row_addrs: bool,
+}
+
+impl TrainingCriteria {
+    pub fn new(ordering: TrainingOrdering) -> Self {
+        Self {
+            ordering,
+            needs_row_ids: false,
+            needs_row_addrs: false,
+        }
+    }
+
+    pub fn with_row_id(mut self) -> Self {
+        self.needs_row_ids = true;
+        self
+    }
+
+    pub fn with_row_addr(mut self) -> Self {
+        self.needs_row_addrs = true;
+        self
+    }
+}
+
+/// A trait that describes what criteria is needed to train an index
+///
+/// The training process has two steps.  First, the parameters are given to the
+/// plugin and it creates a TrainingRequest.  Then, the caller prepares the training
+/// data and calls train_index.
+///
+/// The call to train_index will include the training request.  This allows the plugin
+/// to stash any deserialized parameter info in the request and fetch it later during
+/// training by downcasting to the appropriate type.
+pub trait TrainingRequest: std::any::Any + Send + Sync {
+    fn as_any(&self) -> &dyn std::any::Any;
+    fn criteria(&self) -> &TrainingCriteria;
+}
+
+/// A default training request impl for indexes that don't need any parameters
+pub struct DefaultTrainingRequest {
+    criteria: TrainingCriteria,
+}
+
+impl DefaultTrainingRequest {
+    pub fn new(criteria: TrainingCriteria) -> Self {
+        Self { criteria }
+    }
+}
+
+impl TrainingRequest for DefaultTrainingRequest {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn criteria(&self) -> &TrainingCriteria {
+        &self.criteria
+    }
+}
+
+/// A trait for scalar index plugins
+#[async_trait]
+pub trait ScalarIndexPlugin: Send + Sync + std::fmt::Debug {
+    /// Creates a new training request from the given parameters
+    ///
+    /// This training request specifies the criteria that the data must satisfy to train the index.
+    /// For example, does the index require the input data to be sorted?
+    fn new_training_request(&self, params: &str, field: &Field)
+    -> Result<Box<dyn TrainingRequest>>;
+
+    /// Train a new index
+    ///
+    /// The provided data must fulfill all the criteria returned by `training_criteria`.
+    /// It is the caller's responsibility to ensure this.
+    ///
+    /// Returns index details that describe the index.  These details can potentially be
+    /// useful for planning (although this will currently require inside information on
+    /// the index type) and they will need to be provided when loading the index.
+    ///
+    /// It is the caller's responsibility to store these details somewhere.
+    async fn train_index(
+        &self,
+        data: SendableRecordBatchStream,
+        index_store: &dyn IndexStore,
+        request: Box<dyn TrainingRequest>,
+        fragment_ids: Option<Vec<u32>>,
+        progress: Arc<dyn IndexBuildProgress>,
+    ) -> Result<CreatedIndex>;
+
+    /// A short name for the index
+    ///
+    /// This is a friendly name for display purposes and also can be used as an alias for
+    /// the index type URL.  If multiple plugins have the same name, then the first one
+    /// found will be used.
+    ///
+    /// By convention this is MixedCase with no spaces.  When used as an alias, it will be
+    /// compared case-insensitively.
+    fn name(&self) -> &str;
+
+    /// Returns true if the index returns an exact answer (e.g. not AtMost)
+    fn provides_exact_answer(&self) -> bool;
+
+    /// The version of the index plugin
+    ///
+    /// We assume that indexes are not forwards compatible.  If an index was written with a
+    /// newer version than this, it cannot be read
+    fn version(&self) -> u32;
+
+    /// Returns a new query parser for the index
+    ///
+    /// Can return None if this index cannot participate in query optimization
+    fn new_query_parser(
+        &self,
+        index_name: String,
+        index_details: &prost_types::Any,
+    ) -> Option<Box<dyn ScalarQueryParser>>;
+
+    /// Load an index from storage
+    ///
+    /// The index details should match the details that were returned when the index was
+    /// originally trained.
+    async fn load_index(
+        &self,
+        index_store: Arc<dyn IndexStore>,
+        index_details: &prost_types::Any,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
+        cache: &LanceCache,
+    ) -> Result<Arc<dyn ScalarIndex>>;
+
+    /// Look up a previously-opened index in the cache.
+    ///
+    /// `cache` is already per-index namespaced by the caller, so a plugin's key
+    /// only needs to disambiguate entries within a single index.
+    ///
+    /// The default implementation reads an in-memory `Arc<dyn ScalarIndex>` entry.
+    /// Plugins whose index has a serializable representation should override this
+    /// (together with [`put_in_cache`](Self::put_in_cache)) to store that
+    /// representation under a sized [`CacheKey`](lance_core::cache::CacheKey) with
+    /// a codec, and reconstruct the index here. `index_store` and
+    /// `row_id_remapper` are provided so the override can rebuild the index
+    /// without re-reading metadata.
+    async fn get_from_cache(
+        &self,
+        _index_store: Arc<dyn IndexStore>,
+        _frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
+        cache: &LanceCache,
+    ) -> Result<Option<Arc<dyn ScalarIndex>>> {
+        Ok(cache.get_unsized_with_key(&ScalarIndexCacheKey).await)
+    }
+
+    /// Store a freshly-opened index in the cache.
+    ///
+    /// `cache` is already per-index namespaced; see
+    /// [`get_from_cache`](Self::get_from_cache).
+    ///
+    /// The default implementation stores the `Arc<dyn ScalarIndex>` in-memory.
+    async fn put_in_cache(&self, cache: &LanceCache, index: Arc<dyn ScalarIndex>) -> Result<()> {
+        cache
+            .insert_unsized_with_key(&ScalarIndexCacheKey, index)
+            .await;
+        Ok(())
+    }
+
+    /// Optional hook allowing a plugin to provide statistics without loading the index.
+    async fn load_statistics(
+        &self,
+        _index_store: Arc<dyn IndexStore>,
+        _index_details: &prost_types::Any,
+    ) -> Result<Option<serde_json::Value>> {
+        Ok(None)
+    }
+
+    /// Optional hook that plugins can use if they need to be aware of the registry
+    fn attach_registry(&self, _registry: Arc<dyn PluginRegistry>) {}
+
+    /// Returns a JSON string representation of the provided index details
+    ///
+    /// These details will be user-visible and should be considered part of the public
+    /// API.  As a result, efforts should be made to ensure the information is backwards
+    /// compatible and avoid breaking changes.
+    fn details_as_json(&self, _details: &prost_types::Any) -> Result<serde_json::Value> {
+        Ok(serde_json::json!({}))
+    }
+}
+
+/// In-memory cache key for a whole `Arc<dyn ScalarIndex>`.
+///
+/// Used by the default [`ScalarIndexPlugin::get_from_cache`] /
+/// [`ScalarIndexPlugin::put_in_cache`] implementations. The cache is already
+/// per-index namespaced by the caller, so a constant key suffices. Trait objects
+/// cannot be serialized, so this is an [`UnsizedCacheKey`] with no codec —
+/// plugins that want a persistable cache entry override those methods with a
+/// sized key.
+pub struct ScalarIndexCacheKey;
+
+impl UnsizedCacheKey for ScalarIndexCacheKey {
+    type ValueType = dyn ScalarIndex;
+
+    fn key(&self) -> Cow<'_, str> {
+        Cow::Borrowed("scalar_index")
+    }
+
+    fn type_name() -> &'static str {
+        "ScalarIndex"
+    }
+}
diff --git a/rust/lance-index/Cargo.toml b/rust/lance-index/Cargo.toml
index 85de43c0f9b..58aa5491c6f 100644
--- a/rust/lance-index/Cargo.toml
+++ b/rust/lance-index/Cargo.toml
@@ -39,6 +39,7 @@ jsonb.workspace = true
 lance-arrow.workspace = true
 lance-arrow-stats.workspace = true
 lance-core.workspace = true
+lance-index-core.workspace = true
 lance-datafusion.workspace = true
 lance-encoding.workspace = true
 lance-file.workspace = true
diff --git a/rust/lance-index/src/frag_reuse.rs b/rust/lance-index/src/frag_reuse.rs
index d09d8dc0684..b6342853735 100644
--- a/rust/lance-index/src/frag_reuse.rs
+++ b/rust/lance-index/src/frag_reuse.rs
@@ -7,60 +7,4 @@
 //! [`lance_table::system_index::frag_reuse`]; this module re-exports them and
 //! implements the local [`Index`] trait for [`FragReuseIndex`].
 
-use std::any::Any;
-use std::sync::Arc;
-
-use async_trait::async_trait;
-use lance_core::{Error, Result};
-use roaring::RoaringBitmap;
-use serde::Serialize;
-
 pub use lance_table::system_index::frag_reuse::*;
-
-use crate::{Index, IndexType};
-
-#[derive(Serialize)]
-struct FragReuseStatistics {
-    num_versions: usize,
-}
-
-#[async_trait]
-impl Index for FragReuseIndex {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn as_index(self: Arc<Self>) -> Arc<dyn Index> {
-        self
-    }
-
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn crate::vector::VectorIndex>> {
-        Err(Error::not_supported_source(
-            "FragReuseIndex is not a vector index".into(),
-        ))
-    }
-
-    fn statistics(&self) -> Result<serde_json::Value> {
-        let stats = FragReuseStatistics {
-            num_versions: self.details.versions.len(),
-        };
-        serde_json::to_value(stats).map_err(|e| {
-            Error::internal(format!(
-                "failed to serialize fragment reuse index statistics: {}",
-                e
-            ))
-        })
-    }
-
-    async fn prewarm(&self) -> Result<()> {
-        Ok(())
-    }
-
-    fn index_type(&self) -> IndexType {
-        IndexType::FragmentReuse
-    }
-
-    async fn calculate_included_frags(&self) -> Result<RoaringBitmap> {
-        unimplemented!()
-    }
-}
diff --git a/rust/lance-index/src/lib.rs b/rust/lance-index/src/lib.rs
index 20e1c2692d9..2ff2119d9b2 100644
--- a/rust/lance-index/src/lib.rs
+++ b/rust/lance-index/src/lib.rs
@@ -9,16 +9,8 @@
 //! API stability is not guaranteed.
 //! </section>
 
-use std::{any::Any, sync::Arc};
-
 use crate::frag_reuse::FRAG_REUSE_INDEX_NAME;
 use crate::mem_wal::MEM_WAL_INDEX_NAME;
-use async_trait::async_trait;
-use lance_core::deepsize::DeepSizeOf;
-use lance_core::{Error, Result};
-use roaring::RoaringBitmap;
-use serde::{Deserialize, Serialize};
-use std::convert::TryFrom;
 
 pub mod frag_reuse;
 pub mod mem_wal;
@@ -33,6 +25,11 @@ pub mod vector;
 
 pub use crate::traits::*;
 
+pub use lance_index_core::RowIdRemapper;
+pub use lance_index_core::index::IndexType;
+pub use lance_index_core::index::{Index, IndexMetadata, IndexParams};
+pub use lance_index_core::row_id_remap;
+
 pub const INDEX_FILE_NAME: &str = "index.idx";
 /// The name of the auxiliary index file.
 ///
@@ -41,13 +38,7 @@ pub const INDEX_FILE_NAME: &str = "index.idx";
 pub const INDEX_AUXILIARY_FILE_NAME: &str = "auxiliary.idx";
 pub const INDEX_METADATA_SCHEMA_KEY: &str = "lance:index";
 
-/// Default version for vector index metadata.
-///
-/// Most vector indices should use this version unless they need to bump for a
-/// format change.
-pub const VECTOR_INDEX_VERSION: u32 = 1;
-/// Version for IVF_RQ indices.
-pub const IVF_RQ_INDEX_VERSION: u32 = 2;
+pub use lance_index_core::index::{IVF_RQ_INDEX_VERSION, VECTOR_INDEX_VERSION};
 
 /// The factor of threshold to trigger split / join for vector index.
 ///
@@ -75,289 +66,6 @@ pub mod cache_pb {
     include!(concat!(env!("OUT_DIR"), "/lance.index.cache.rs"));
 }
 
-/// Generic methods common across all types of secondary indices
-///
-#[async_trait]
-pub trait Index: Send + Sync + DeepSizeOf {
-    /// Cast to [Any].
-    fn as_any(&self) -> &dyn Any;
-
-    /// Cast to [Index]
-    fn as_index(self: Arc<Self>) -> Arc<dyn Index>;
-
-    /// Cast to [vector::VectorIndex]
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn vector::VectorIndex>>;
-
-    /// Retrieve index statistics as a JSON Value
-    fn statistics(&self) -> Result<serde_json::Value>;
-
-    /// Prewarm the index.
-    ///
-    /// This will load the index into memory and cache it.
-    async fn prewarm(&self) -> Result<()>;
-
-    /// Get the type of the index
-    fn index_type(&self) -> IndexType;
-
-    /// Read through the index and determine which fragment ids are covered by the index
-    ///
-    /// This is a kind of slow operation.  It's better to use the fragment_bitmap.  This
-    /// only exists for cases where the fragment_bitmap has become corrupted or missing.
-    async fn calculate_included_frags(&self) -> Result<RoaringBitmap>;
-}
-
-/// Index Type
-#[derive(Debug, PartialEq, Eq, Copy, Hash, Clone, DeepSizeOf)]
-pub enum IndexType {
-    // Preserve 0-100 for simple indices.
-    Scalar = 0, // Legacy scalar index, alias to BTree
-
-    BTree = 1, // BTree
-
-    Bitmap = 2, // Bitmap
-
-    LabelList = 3, // LabelList
-
-    Inverted = 4, // Inverted
-
-    NGram = 5, // NGram
-
-    FragmentReuse = 6,
-
-    MemWal = 7,
-
-    ZoneMap = 8, // ZoneMap
-
-    BloomFilter = 9, // Bloom filter
-
-    RTree = 10, // RTree
-
-    Fm = 11, // FM-Index
-
-    // 100+ and up for vector index.
-    /// Flat vector index.
-    Vector = 100, // Legacy vector index, alias to IvfPq
-    IvfFlat = 101,
-    IvfSq = 102,
-    IvfPq = 103,
-    IvfHnswSq = 104,
-    IvfHnswPq = 105,
-    IvfHnswFlat = 106,
-    IvfRq = 107,
-}
-
-impl std::fmt::Display for IndexType {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        match self {
-            Self::Scalar | Self::BTree => write!(f, "BTree"),
-            Self::Bitmap => write!(f, "Bitmap"),
-            Self::LabelList => write!(f, "LabelList"),
-            Self::Inverted => write!(f, "Inverted"),
-            Self::NGram => write!(f, "NGram"),
-            Self::FragmentReuse => write!(f, "FragmentReuse"),
-            Self::MemWal => write!(f, "MemWal"),
-            Self::ZoneMap => write!(f, "ZoneMap"),
-            Self::BloomFilter => write!(f, "BloomFilter"),
-            Self::RTree => write!(f, "RTree"),
-            Self::Fm => write!(f, "Fm"),
-            Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
-            Self::IvfFlat => write!(f, "IVF_FLAT"),
-            Self::IvfSq => write!(f, "IVF_SQ"),
-            Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
-            Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
-            Self::IvfHnswFlat => write!(f, "IVF_HNSW_FLAT"),
-            Self::IvfRq => write!(f, "IVF_RQ"),
-        }
-    }
-}
-
-impl TryFrom<i32> for IndexType {
-    type Error = Error;
-
-    fn try_from(value: i32) -> Result<Self> {
-        match value {
-            v if v == Self::Scalar as i32 => Ok(Self::Scalar),
-            v if v == Self::BTree as i32 => Ok(Self::BTree),
-            v if v == Self::Bitmap as i32 => Ok(Self::Bitmap),
-            v if v == Self::LabelList as i32 => Ok(Self::LabelList),
-            v if v == Self::NGram as i32 => Ok(Self::NGram),
-            v if v == Self::Inverted as i32 => Ok(Self::Inverted),
-            v if v == Self::FragmentReuse as i32 => Ok(Self::FragmentReuse),
-            v if v == Self::MemWal as i32 => Ok(Self::MemWal),
-            v if v == Self::ZoneMap as i32 => Ok(Self::ZoneMap),
-            v if v == Self::BloomFilter as i32 => Ok(Self::BloomFilter),
-            v if v == Self::RTree as i32 => Ok(Self::RTree),
-            v if v == Self::Fm as i32 => Ok(Self::Fm),
-            v if v == Self::Vector as i32 => Ok(Self::Vector),
-            v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
-            v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
-            v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
-            v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
-            v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
-            v if v == Self::IvfHnswFlat as i32 => Ok(Self::IvfHnswFlat),
-            v if v == Self::IvfRq as i32 => Ok(Self::IvfRq),
-            _ => Err(Error::invalid_input_source(
-                format!("the input value {} is not a valid IndexType", value).into(),
-            )),
-        }
-    }
-}
-
-impl TryFrom<&str> for IndexType {
-    type Error = Error;
-
-    fn try_from(value: &str) -> Result<Self> {
-        match value {
-            "BTree" | "BTREE" => Ok(Self::BTree),
-            "Bitmap" | "BITMAP" => Ok(Self::Bitmap),
-            "LabelList" | "LABELLIST" => Ok(Self::LabelList),
-            "Inverted" | "INVERTED" => Ok(Self::Inverted),
-            "NGram" | "NGRAM" => Ok(Self::NGram),
-            "ZoneMap" | "ZONEMAP" => Ok(Self::ZoneMap),
-            "BloomFilter" | "BLOOMFILTER" | "BLOOM_FILTER" => Ok(Self::BloomFilter),
-            "RTree" | "RTREE" | "R_TREE" => Ok(Self::RTree),
-            "Fm" | "FM" => Ok(Self::Fm),
-            "Vector" | "VECTOR" => Ok(Self::Vector),
-            "IVF_FLAT" => Ok(Self::IvfFlat),
-            "IVF_SQ" => Ok(Self::IvfSq),
-            "IVF_PQ" => Ok(Self::IvfPq),
-            "IVF_RQ" => Ok(Self::IvfRq),
-            "IVF_HNSW_FLAT" => Ok(Self::IvfHnswFlat),
-            "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
-            "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
-            "FragmentReuse" => Ok(Self::FragmentReuse),
-            "MemWal" => Ok(Self::MemWal),
-            _ => Err(Error::invalid_input(format!(
-                "invalid index type: {}",
-                value
-            ))),
-        }
-    }
-}
-
-impl IndexType {
-    pub fn is_scalar(&self) -> bool {
-        matches!(
-            self,
-            Self::Scalar
-                | Self::BTree
-                | Self::Bitmap
-                | Self::LabelList
-                | Self::Inverted
-                | Self::NGram
-                | Self::ZoneMap
-                | Self::BloomFilter
-                | Self::RTree
-                | Self::Fm,
-        )
-    }
-
-    pub fn is_vector(&self) -> bool {
-        matches!(
-            self,
-            Self::Vector
-                | Self::IvfPq
-                | Self::IvfHnswSq
-                | Self::IvfHnswPq
-                | Self::IvfHnswFlat
-                | Self::IvfFlat
-                | Self::IvfSq
-                | Self::IvfRq
-        )
-    }
-
-    pub fn is_system(&self) -> bool {
-        matches!(self, Self::FragmentReuse | Self::MemWal)
-    }
-
-    /// Returns the current format version of the index type,
-    /// bump this when the index format changes.
-    /// Indices which higher version than these will be ignored for compatibility,
-    /// This would happen when creating index in a newer version of Lance,
-    /// but then opening the index in older version of Lance
-    pub fn version(&self) -> i32 {
-        match self {
-            Self::Scalar => 0,
-            Self::BTree => 0,
-            Self::Bitmap => 0,
-            Self::LabelList => 0,
-            Self::Inverted => 0,
-            Self::NGram => 0,
-            Self::FragmentReuse => 0,
-            Self::MemWal => 0,
-            Self::ZoneMap => 0,
-            Self::BloomFilter => 0,
-            Self::RTree => 0,
-            Self::Fm => 0,
-
-            // IMPORTANT: if any vector index subtype needs a format bump that is
-            // not backward compatible, its new version must be set to
-            // (current max vector index version + 1), even if only one subtype
-            // changed. Compatibility filtering currently cannot distinguish vector
-            // subtypes from details-only metadata, so vector versions effectively
-            // share one global monotonic compatibility level.
-            Self::Vector
-            | Self::IvfFlat
-            | Self::IvfSq
-            | Self::IvfPq
-            | Self::IvfHnswSq
-            | Self::IvfHnswPq
-            | Self::IvfHnswFlat => VECTOR_INDEX_VERSION as i32,
-            Self::IvfRq => IVF_RQ_INDEX_VERSION as i32,
-        }
-    }
-
-    /// Returns the target partition size for the index type.
-    ///
-    /// This is used to compute the number of partitions for the index.
-    /// The partition size is optimized for the best performance of the index.
-    ///
-    /// This is for vector indices only.
-    pub fn target_partition_size(&self) -> usize {
-        match self {
-            Self::Vector => 8192,
-            Self::IvfFlat => 4096,
-            Self::IvfSq => 8192,
-            Self::IvfPq => 8192,
-            Self::IvfRq => 4096,
-            Self::IvfHnswFlat => 1 << 20,
-            Self::IvfHnswSq => 1 << 20,
-            Self::IvfHnswPq => 1 << 20,
-            _ => 8192,
-        }
-    }
-
-    /// Returns the highest supported vector index version in this Lance build.
-    pub fn max_vector_version() -> u32 {
-        [
-            Self::Vector,
-            Self::IvfFlat,
-            Self::IvfSq,
-            Self::IvfPq,
-            Self::IvfHnswSq,
-            Self::IvfHnswPq,
-            Self::IvfHnswFlat,
-            Self::IvfRq,
-        ]
-        .into_iter()
-        .map(|index_type| index_type.version() as u32)
-        .max()
-        .unwrap_or(VECTOR_INDEX_VERSION)
-    }
-}
-
-pub trait IndexParams: Send + Sync {
-    fn as_any(&self) -> &dyn Any;
-
-    fn index_name(&self) -> &str;
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct IndexMetadata {
-    #[serde(rename = "type")]
-    pub index_type: String,
-    pub distance_type: String,
-}
 
 pub fn is_system_index(index_meta: &lance_table::format::IndexMetadata) -> bool {
     index_meta.name == FRAG_REUSE_INDEX_NAME || index_meta.name == MEM_WAL_INDEX_NAME
@@ -409,7 +117,7 @@ mod tests {
             IndexType::ZoneMap,
             IndexType::BloomFilter,
             IndexType::RTree,
-            IndexType::Fm,
+            IndexType::FMIndex,
             IndexType::Vector,
             IndexType::IvfFlat,
             IndexType::IvfSq,
@@ -451,8 +159,9 @@ mod tests {
             ("RTree", IndexType::RTree),
             ("RTREE", IndexType::RTree),
             ("R_TREE", IndexType::RTree),
-            ("Fm", IndexType::Fm),
-            ("FM", IndexType::Fm),
+            ("FMIndex", IndexType::FMIndex),
+            ("FMINDEX", IndexType::FMIndex),
+            ("FM_INDEX", IndexType::FMIndex),
             ("Vector", IndexType::Vector),
             ("VECTOR", IndexType::Vector),
             ("IVF_FLAT", IndexType::IvfFlat),
diff --git a/rust/lance-index/src/mem_wal.rs b/rust/lance-index/src/mem_wal.rs
index f8f42093894..08b76e0ea0e 100644
--- a/rust/lance-index/src/mem_wal.rs
+++ b/rust/lance-index/src/mem_wal.rs
@@ -7,68 +7,4 @@
 //! [`lance_table::system_index::mem_wal`]; this module re-exports them and
 //! implements the local [`Index`] trait for [`MemWalIndex`].
 
-use std::any::Any;
-use std::sync::Arc;
-
-use async_trait::async_trait;
-use lance_core::Error;
-use roaring::RoaringBitmap;
-use serde::Serialize;
-
 pub use lance_table::system_index::mem_wal::*;
-
-use crate::{Index, IndexType};
-
-#[derive(Serialize)]
-struct MemWalStatistics {
-    num_shards: u32,
-    num_merged_generations: usize,
-    num_shard_specs: usize,
-    num_maintained_indexes: usize,
-    num_index_catchup_entries: usize,
-}
-
-#[async_trait]
-impl Index for MemWalIndex {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn as_index(self: Arc<Self>) -> Arc<dyn Index> {
-        self
-    }
-
-    fn as_vector_index(self: Arc<Self>) -> lance_core::Result<Arc<dyn crate::vector::VectorIndex>> {
-        Err(Error::not_supported_source(
-            "MemWalIndex is not a vector index".into(),
-        ))
-    }
-
-    fn statistics(&self) -> lance_core::Result<serde_json::Value> {
-        let stats = MemWalStatistics {
-            num_shards: self.details.num_shards,
-            num_merged_generations: self.details.merged_generations.len(),
-            num_shard_specs: self.details.sharding_specs.len(),
-            num_maintained_indexes: self.details.maintained_indexes.len(),
-            num_index_catchup_entries: self.details.index_catchup.len(),
-        };
-        serde_json::to_value(stats).map_err(|e| {
-            Error::internal(format!(
-                "failed to serialize MemWAL index statistics: {}",
-                e
-            ))
-        })
-    }
-
-    async fn prewarm(&self) -> lance_core::Result<()> {
-        Ok(())
-    }
-
-    fn index_type(&self) -> IndexType {
-        IndexType::MemWal
-    }
-
-    async fn calculate_included_frags(&self) -> lance_core::Result<RoaringBitmap> {
-        Ok(RoaringBitmap::new())
-    }
-}
diff --git a/rust/lance-index/src/metrics.rs b/rust/lance-index/src/metrics.rs
index 37e2c43d196..e2111d29f6b 100644
--- a/rust/lance-index/src/metrics.rs
+++ b/rust/lance-index/src/metrics.rs
@@ -1,98 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The Lance Authors
 
-use std::sync::atomic::{AtomicUsize, Ordering};
-
-/// A trait used by the index to report metrics
-///
-/// Callers can implement this trait to collect metrics
-pub trait MetricsCollector: Send + Sync {
-    /// Record partition loads
-    ///
-    /// Many indices consist of partitions that may need to be loaded
-    /// into cache.  For example, an inverted index or ngram index has a
-    /// posting list for each token.
-    ///
-    /// In the ideal case, these shards are in the cache and will not need
-    /// to be loaded from disk.  This method should not be called if the
-    /// shard is in the cache.
-    fn record_parts_loaded(&self, num_parts: usize);
-
-    /// Record a shard load
-    fn record_part_load(&self) {
-        self.record_parts_loaded(1);
-    }
-
-    /// Record an index load
-    ///
-    /// This should be called when a scalar index is loaded from storage.
-    /// It should not be called if the index is already in memory.
-    fn record_index_loads(&self, num_indexes: usize);
-
-    /// Record an index load
-    fn record_index_load(&self) {
-        self.record_index_loads(1);
-    }
-
-    /// Record the number of "comparisons" made by the index
-    ///
-    /// What exactly constitutes a comparison depends on the index type.
-    /// For example, a B-tree index may make comparisons while searching for a value.
-    /// On the other hand, a bitmap index makes comparisons when computing the intersection
-    /// of two bitmaps.
-    ///
-    /// The goal is to provide some visibility into the compute cost of the search
-    fn record_comparisons(&self, num_comparisons: usize);
-
-    /// Returns an optional sink for recording exact I/O statistics (bytes read,
-    /// IOPS, and requests) performed on behalf of this collector.
-    ///
-    /// Index implementations that read from a
-    /// [`lance_io::scheduler::ScanScheduler`] can attach the returned handle to
-    /// their file readers so the I/O performed for a single query is measured
-    /// and attributed here.  The default returns `None`, meaning the caller does
-    /// not want I/O measured (and index implementations should then take their
-    /// normal, uninstrumented read path).
-    fn io_stats(&self) -> Option<lance_io::scheduler::IoStats> {
-        None
-    }
-}
-
-/// A no-op metrics collector that does nothing
-pub struct NoOpMetricsCollector;
-
-impl MetricsCollector for NoOpMetricsCollector {
-    fn record_parts_loaded(&self, _num_parts: usize) {}
-    fn record_index_loads(&self, _num_indexes: usize) {}
-    fn record_comparisons(&self, _num_comparisons: usize) {}
-}
-
-#[derive(Default)]
-pub struct LocalMetricsCollector {
-    pub parts_loaded: AtomicUsize,
-    pub index_loads: AtomicUsize,
-    pub comparisons: AtomicUsize,
-}
-
-impl LocalMetricsCollector {
-    pub fn dump_into(self, other: &dyn MetricsCollector) {
-        other.record_parts_loaded(self.parts_loaded.load(Ordering::Relaxed));
-        other.record_index_loads(self.index_loads.load(Ordering::Relaxed));
-        other.record_comparisons(self.comparisons.load(Ordering::Relaxed));
-    }
-}
-
-impl MetricsCollector for LocalMetricsCollector {
-    fn record_parts_loaded(&self, num_parts: usize) {
-        self.parts_loaded.fetch_add(num_parts, Ordering::Relaxed);
-    }
-
-    fn record_index_loads(&self, num_indexes: usize) {
-        self.index_loads.fetch_add(num_indexes, Ordering::Relaxed);
-    }
-
-    fn record_comparisons(&self, num_comparisons: usize) {
-        self.comparisons
-            .fetch_add(num_comparisons, Ordering::Relaxed);
-    }
-}
+pub use lance_index_core::metrics::{
+    LocalMetricsCollector, MetricsCollector, NoOpMetricsCollector,
+};
diff --git a/rust/lance-index/src/progress.rs b/rust/lance-index/src/progress.rs
index b02ec4280ba..5033f46f659 100644
--- a/rust/lance-index/src/progress.rs
+++ b/rust/lance-index/src/progress.rs
@@ -1,54 +1,4 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The Lance Authors
 
-use async_trait::async_trait;
-use lance_core::Result;
-use std::sync::Arc;
-
-/// Progress callback for index building and distributed index finalization.
-///
-/// Called at stage boundaries during index construction. For a single logical stream, stages are
-/// sequential: `stage_complete` is always called before the next `stage_start`, so only one stage
-/// is active at a time. Callers that orchestrate independent sub-builds in parallel may prefix
-/// stage names (for example `segment_plan[0]/merge_partitions`) to represent separate logical
-/// streams. Stage names are index-type-specific (e.g. "train_ivf", "shuffle", "merge_partitions"
-/// for vector indices; "load_data", "build_pages" for scalar indices; merge/finalization stages
-/// for distributed index construction).
-///
-/// Methods take `&self` to allow concurrent calls from within a single stage. Implementations
-/// must be thread-safe.
-#[async_trait]
-pub trait IndexBuildProgress: std::fmt::Debug + Sync + Send {
-    /// A named stage has started.
-    ///
-    /// `total` is the number of work units if known, and `unit` describes
-    /// what is being counted (e.g. "partitions", "batches", "rows").
-    async fn stage_start(&self, stage: &str, total: Option<u64>, unit: &str) -> Result<()>;
-
-    /// Progress within the current stage.
-    async fn stage_progress(&self, stage: &str, completed: u64) -> Result<()>;
-
-    /// A named stage has completed.
-    async fn stage_complete(&self, stage: &str) -> Result<()>;
-}
-
-#[derive(Debug, Clone, Default)]
-pub struct NoopIndexBuildProgress;
-
-#[async_trait]
-impl IndexBuildProgress for NoopIndexBuildProgress {
-    async fn stage_start(&self, _: &str, _: Option<u64>, _: &str) -> Result<()> {
-        Ok(())
-    }
-    async fn stage_progress(&self, _: &str, _: u64) -> Result<()> {
-        Ok(())
-    }
-    async fn stage_complete(&self, _: &str) -> Result<()> {
-        Ok(())
-    }
-}
-
-/// Helper to create a default noop progress instance.
-pub fn noop_progress() -> Arc<dyn IndexBuildProgress> {
-    Arc::new(NoopIndexBuildProgress)
-}
+pub use lance_index_core::progress::{IndexBuildProgress, NoopIndexBuildProgress, noop_progress};
diff --git a/rust/lance-index/src/registry.rs b/rust/lance-index/src/registry.rs
index 1abab781635..ceb445c3b3a 100644
--- a/rust/lance-index/src/registry.rs
+++ b/rust/lance-index/src/registry.rs
@@ -1,8 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The Lance Authors
-use std::{collections::HashMap, sync::Arc};
 
-use lance_core::{Error, Result};
+use std::sync::Arc;
 
 #[cfg(feature = "geo")]
 use crate::scalar::rtree::RTreeIndexPlugin;
@@ -11,8 +10,7 @@ use crate::{
     scalar::{
         bitmap::BitmapIndexPlugin, bloomfilter::BloomFilterIndexPlugin, btree::BTreeIndexPlugin,
         fmindex::FMIndexPlugin, inverted::InvertedIndexPlugin, json::JsonIndexPlugin,
-        label_list::LabelListIndexPlugin, ngram::NGramIndexPlugin, registry::ScalarIndexPlugin,
-        zonemap::ZoneMapIndexPlugin,
+        label_list::LabelListIndexPlugin, ngram::NGramIndexPlugin, zonemap::ZoneMapIndexPlugin,
     },
 };
 
@@ -31,101 +29,28 @@ pub fn display_type_from_url(type_url: &str) -> &str {
         .unwrap_or(segment)
 }
 
-/// A registry of index plugins
-pub struct IndexPluginRegistry {
-    plugins: HashMap<String, Box<dyn ScalarIndexPlugin>>,
-}
-
-impl IndexPluginRegistry {
-    fn normalize_plugin_name(name: &str) -> String {
-        name.to_lowercase()
-    }
-
-    fn get_plugin_name_from_details_name(&self, details_name: &str) -> String {
-        let details_name = Self::normalize_plugin_name(details_name);
-        if details_name.ends_with("indexdetails") {
-            let plugin_name = details_name.replace("indexdetails", "");
-            if plugin_name == "fmindex" {
-                "fm".to_string()
-            } else {
-                plugin_name
-            }
-        } else {
-            details_name
-        }
-    }
-
-    /// Adds a plugin to the registry, using the name of the details message to determine
-    /// the plugin name.
-    ///
-    /// The plugin name will be the lowercased name of the details message with any trailing
-    /// "indexdetails" removed.
-    ///
-    /// For example, if the details message is `BTreeIndexDetails`, the plugin name will be
-    /// `btree`.
-    pub fn add_plugin<
-        DetailsType: prost::Message + prost::Name,
-        PluginType: ScalarIndexPlugin + std::default::Default + 'static,
-    >(
-        &mut self,
-    ) {
-        let plugin_name = self.get_plugin_name_from_details_name(DetailsType::NAME);
-        self.plugins
-            .insert(plugin_name, Box::new(PluginType::default()));
-    }
-
-    /// Create a registry with the default plugins
-    pub fn with_default_plugins() -> Arc<Self> {
-        let mut registry = Self {
-            plugins: HashMap::new(),
-        };
-        registry.add_plugin::<pbold::BTreeIndexDetails, BTreeIndexPlugin>();
-        registry.add_plugin::<pbold::BitmapIndexDetails, BitmapIndexPlugin>();
-        registry.add_plugin::<pbold::LabelListIndexDetails, LabelListIndexPlugin>();
-        registry.add_plugin::<pbold::NGramIndexDetails, NGramIndexPlugin>();
-        registry.add_plugin::<pbold::ZoneMapIndexDetails, ZoneMapIndexPlugin>();
-        registry.add_plugin::<pb::BloomFilterIndexDetails, BloomFilterIndexPlugin>();
-        registry.add_plugin::<pbold::InvertedIndexDetails, InvertedIndexPlugin>();
-        registry.add_plugin::<pb::JsonIndexDetails, JsonIndexPlugin>();
-        registry.add_plugin::<pb::FmIndexIndexDetails, FMIndexPlugin>();
-        #[cfg(feature = "geo")]
-        registry.add_plugin::<pb::RTreeIndexDetails, RTreeIndexPlugin>();
-
-        let registry = Arc::new(registry);
-        for plugin in registry.plugins.values() {
-            plugin.attach_registry(registry.clone());
-        }
-
-        registry
-    }
-
-    /// Get an index plugin suitable for training an index with the given parameters
-    pub fn get_plugin_by_name(&self, name: &str) -> Result<&dyn ScalarIndexPlugin> {
-        let plugin_name = Self::normalize_plugin_name(name);
-        self.plugins
-            .get(&plugin_name)
-            .map(|plugin| plugin.as_ref())
-            .ok_or_else(|| {
-                let hint = if plugin_name == "rtree" {
-                    ". The 'rtree' index requires the `geo` feature. \
-                     Rebuild with `--features geo` to enable geospatial support"
-                } else {
-                    ""
-                };
-                Error::invalid_input_source(
-                    format!("No scalar index plugin found for name '{name}'{hint}").into(),
-                )
-            })
-    }
-
-    pub fn get_plugin_by_details(
-        &self,
-        details: &prost_types::Any,
-    ) -> Result<&dyn ScalarIndexPlugin> {
-        let details_name = details.type_url.split('.').next_back().unwrap();
-        let plugin_name = self.get_plugin_name_from_details_name(details_name);
-        self.get_plugin_by_name(&plugin_name)
-    }
+pub use lance_index_core::registry::{IndexPluginRegistry, PluginRegistry};
+
+/// Create a registry populated with all built-in index plugins.
+pub fn with_default_plugins() -> Arc<IndexPluginRegistry> {
+    let mut registry = IndexPluginRegistry::new();
+    registry.add_plugin::<pbold::BTreeIndexDetails, BTreeIndexPlugin>();
+    registry.add_plugin::<pbold::BitmapIndexDetails, BitmapIndexPlugin>();
+    registry.add_plugin::<pbold::LabelListIndexDetails, LabelListIndexPlugin>();
+    registry.add_plugin::<pbold::NGramIndexDetails, NGramIndexPlugin>();
+    registry.add_plugin::<pbold::ZoneMapIndexDetails, ZoneMapIndexPlugin>();
+    registry.add_plugin::<pb::BloomFilterIndexDetails, BloomFilterIndexPlugin>();
+    registry.add_plugin::<pbold::InvertedIndexDetails, InvertedIndexPlugin>();
+    registry.add_plugin::<pb::JsonIndexDetails, JsonIndexPlugin>();
+    registry.add_plugin::<pb::FmIndexIndexDetails, FMIndexPlugin>();
+    #[cfg(feature = "geo")]
+    registry.add_plugin::<pb::RTreeIndexDetails, RTreeIndexPlugin>();
+
+    let registry = Arc::new(registry);
+    let registry_dyn: Arc<dyn PluginRegistry> = registry.clone();
+    registry.for_each_plugin(|p| p.attach_registry(registry_dyn.clone()));
+
+    registry
 }
 
 #[cfg(test)]
@@ -152,7 +77,7 @@ mod tests {
 
     #[test]
     fn test_get_plugin_by_name_accepts_case_insensitive_builtin_names() {
-        let registry = IndexPluginRegistry::with_default_plugins();
+        let registry = with_default_plugins();
 
         for (requested_name, expected_name) in [
             ("BTREE", "BTree"),
@@ -161,7 +86,7 @@ mod tests {
             ("NGRAM", "NGram"),
             ("ZONEMAP", "ZoneMap"),
             ("BLOOMFILTER", "BloomFilter"),
-            ("FM", "Fm"),
+            ("FMINDEX", "Fm"),
             ("JSON", "Json"),
         ] {
             let plugin = registry.get_plugin_by_name(requested_name).unwrap();
diff --git a/rust/lance-index/src/scalar.rs b/rust/lance-index/src/scalar.rs
index a287d277a81..b509c03f367 100644
--- a/rust/lance-index/src/scalar.rs
+++ b/rust/lance-index/src/scalar.rs
@@ -4,33 +4,22 @@
 //! Scalar indices for metadata search & filtering
 
 use arrow::buffer::{OffsetBuffer, ScalarBuffer};
-use arrow_array::{BooleanArray, ListArray, RecordBatch, UInt64Array};
-use arrow_schema::{Field, Schema};
-use async_trait::async_trait;
-use bytes::Bytes;
+use arrow_array::ListArray;
+use arrow_schema::Field;
 use datafusion::functions::regex::regexplike::RegexpLikeFunc;
 use datafusion::functions::string::contains::ContainsFunc;
 use datafusion::functions_nested::array_has;
-use datafusion::physical_plan::SendableRecordBatchStream;
 use datafusion_common::{Column, scalar::ScalarValue};
-use std::collections::{HashMap, HashSet};
-use std::fmt::Debug;
-use std::pin::Pin;
+use std::collections::HashSet;
 use std::{any::Any, ops::Bound, sync::Arc};
 
 use datafusion_expr::{Expr, expr::ScalarFunction};
 use inverted::query::{FtsQuery, FtsQueryNode, FtsSearchParams, MatchQuery, fill_fts_query_column};
-use lance_core::deepsize::DeepSizeOf;
-use lance_core::{Error, Result};
-use lance_io::stream::{RecordBatchStream, RecordBatchStreamAdapter};
-use lance_select::{NullableRowAddrSet, RowAddrTreeMap, RowSetOps};
-use roaring::RoaringBitmap;
-use serde::Serialize;
+use lance_core::Result;
 
+use crate::IndexParams;
 use crate::metrics::MetricsCollector;
-use crate::scalar::registry::TrainingCriteria;
-use crate::{Index, IndexParams, IndexType};
-pub use lance_table::format::IndexFile;
+pub use lance_index_core::scalar::IndexFile;
 
 pub mod bitmap;
 pub mod bloomfilter;
@@ -48,120 +37,14 @@ pub mod rtree;
 pub mod zoned;
 pub mod zonemap;
 
-use crate::frag_reuse::FragReuseIndex;
 pub use inverted::tokenizer::InvertedIndexParams;
 use lance_datafusion::udf::CONTAINS_TOKENS_UDF;
 
-pub const LANCE_SCALAR_INDEX: &str = "__lance_scalar_index";
-
-/// Builtin index types supported by the Lance library
-///
-/// This is primarily for convenience to avoid a bunch of string
-/// constants and provide some auto-complete.  This type should not
-/// be used in the manifest as plugins cannot add new entries.
-#[derive(Debug, Clone, PartialEq, Eq, DeepSizeOf)]
-pub enum BuiltinIndexType {
-    BTree,
-    Bitmap,
-    LabelList,
-    NGram,
-    ZoneMap,
-    BloomFilter,
-    RTree,
-    Inverted,
-    Fm,
-}
-
-impl BuiltinIndexType {
-    pub fn as_str(&self) -> &str {
-        match self {
-            Self::BTree => "btree",
-            Self::Bitmap => "bitmap",
-            Self::LabelList => "labellist",
-            Self::NGram => "ngram",
-            Self::ZoneMap => "zonemap",
-            Self::Inverted => "inverted",
-            Self::BloomFilter => "bloomfilter",
-            Self::RTree => "rtree",
-            Self::Fm => "fm",
-        }
-    }
-}
-
-impl TryFrom<IndexType> for BuiltinIndexType {
-    type Error = Error;
-
-    fn try_from(value: IndexType) -> Result<Self> {
-        match value {
-            IndexType::BTree => Ok(Self::BTree),
-            IndexType::Bitmap => Ok(Self::Bitmap),
-            IndexType::LabelList => Ok(Self::LabelList),
-            IndexType::NGram => Ok(Self::NGram),
-            IndexType::ZoneMap => Ok(Self::ZoneMap),
-            IndexType::Inverted => Ok(Self::Inverted),
-            IndexType::BloomFilter => Ok(Self::BloomFilter),
-            IndexType::RTree => Ok(Self::RTree),
-            IndexType::Fm => Ok(Self::Fm),
-            _ => Err(Error::index("Invalid index type".to_string())),
-        }
-    }
-}
-
-#[derive(Debug, Clone, PartialEq)]
-pub struct ScalarIndexParams {
-    /// The type of index to create
-    ///
-    /// Plugins may add additional index types.  Index type lookup is case-insensitive.
-    pub index_type: String,
-    /// The parameters to train the index
-    ///
-    /// This should be a JSON string.  The contents of the JSON string will be specific to the
-    /// index type.  If not set, then default parameters will be used for the index type.
-    pub params: Option<String>,
-}
-
-impl Default for ScalarIndexParams {
-    fn default() -> Self {
-        Self {
-            index_type: BuiltinIndexType::BTree.as_str().to_string(),
-            params: None,
-        }
-    }
-}
-
-impl ScalarIndexParams {
-    /// Creates a new ScalarIndexParams from one of the builtin index types
-    pub fn for_builtin(index_type: BuiltinIndexType) -> Self {
-        Self {
-            index_type: index_type.as_str().to_string(),
-            params: None,
-        }
-    }
-
-    /// Create a new ScalarIndexParams with the given index type
-    pub fn new(index_type: String) -> Self {
-        Self {
-            index_type,
-            params: None,
-        }
-    }
-
-    /// Set the parameters for the index
-    pub fn with_params<ParamsType: Serialize>(mut self, params: &ParamsType) -> Self {
-        self.params = Some(serde_json::to_string(params).unwrap());
-        self
-    }
-}
-
-impl IndexParams for ScalarIndexParams {
-    fn as_any(&self) -> &dyn std::any::Any {
-        self
-    }
-
-    fn index_name(&self) -> &str {
-        LANCE_SCALAR_INDEX
-    }
-}
+pub use lance_index_core::scalar::{
+    AnyQuery, BuiltinIndexType, CreatedIndex, IndexReader, IndexStore, IndexWriteSummary,
+    IndexWriter, LANCE_SCALAR_INDEX, OldIndexDataFilter, ScalarIndex, ScalarIndexParams,
+    SearchResult, UpdateCriteria, compute_next_prefix,
+};
 
 impl IndexParams for InvertedIndexParams {
     fn as_any(&self) -> &dyn std::any::Any {
@@ -173,177 +56,6 @@ impl IndexParams for InvertedIndexParams {
     }
 }
 
-/// Trait for storing an index (or parts of an index) into storage
-#[async_trait]
-pub trait IndexWriter: Send {
-    /// Writes a record batch into the file, returning the 0-based index of the batch in the file
-    ///
-    /// E.g. if this is the third time this is called this method will return 2
-    async fn write_record_batch(&mut self, batch: RecordBatch) -> Result<u64>;
-    /// Adds a global buffer and returns its index.
-    async fn add_global_buffer(&mut self, _data: Bytes) -> Result<u32> {
-        Err(Error::not_supported(
-            "global buffers are not supported by this index writer",
-        ))
-    }
-    /// Finishes writing the file and closes the file
-    async fn finish(&mut self) -> Result<IndexFile>;
-    /// Finishes writing the file and closes the file with additional metadata
-    async fn finish_with_metadata(
-        &mut self,
-        metadata: HashMap<String, String>,
-    ) -> Result<IndexFile>;
-}
-
-/// Trait for reading an index (or parts of an index) from storage
-#[async_trait]
-pub trait IndexReader: Send + Sync {
-    /// Read the n-th record batch from the file
-    async fn read_record_batch(&self, n: u64, batch_size: u64) -> Result<RecordBatch>;
-    /// Reads a global buffer by index.
-    async fn read_global_buffer(&self, _index: u32) -> Result<Bytes> {
-        Err(Error::not_supported(
-            "global buffers are not supported by this index reader",
-        ))
-    }
-    /// Read the range of rows from the file.
-    /// If projection is Some, only return the columns in the projection,
-    /// nested columns like Some(&["x.y"]) are not supported.
-    /// If projection is None, return all columns.
-    async fn read_range(
-        &self,
-        range: std::ops::Range<usize>,
-        projection: Option<&[&str]>,
-    ) -> Result<RecordBatch>;
-    /// Read multiple ranges and concatenate into a single batch.
-    /// Default impl runs `read_range`s in parallel via `try_join_all`.
-    async fn read_ranges(
-        &self,
-        ranges: &[std::ops::Range<usize>],
-        projection: Option<&[&str]>,
-    ) -> Result<RecordBatch> {
-        if ranges.is_empty() {
-            return self.read_range(0..0, projection).await;
-        }
-        let futures = ranges
-            .iter()
-            .map(|r| self.read_range(r.clone(), projection));
-        let batches = futures::future::try_join_all(futures).await?;
-        let schema = batches[0].schema();
-        Ok(arrow_select::concat::concat_batches(&schema, &batches)?)
-    }
-    /// Read a range of rows as a stream of record batches.
-    ///
-    /// This allows the caller to process rows incrementally without loading the
-    /// entire range into memory at once.
-    ///
-    /// The default implementation falls back to [`Self::read_range`] and wraps
-    /// the result in a single-item stream.
-    async fn read_range_stream(
-        &self,
-        range: std::ops::Range<usize>,
-        projection: Option<&[&str]>,
-    ) -> Result<Pin<Box<dyn RecordBatchStream>>> {
-        let batch = self.read_range(range, projection).await?;
-        let schema = batch.schema();
-        Ok(Box::pin(RecordBatchStreamAdapter::new(
-            schema,
-            futures::stream::once(async move { Ok(batch) }),
-        )))
-    }
-    /// Return the number of batches in the file
-    async fn num_batches(&self, batch_size: u64) -> u32;
-    /// Return the number of rows in the file
-    fn num_rows(&self) -> usize;
-    /// Return the metadata of the file
-    fn schema(&self) -> &lance_core::datatypes::Schema;
-    /// Best-effort on-disk byte size of the file when the reader already knows it
-    /// without extra I/O, else `None`. Used to size prewarm chunks.
-    fn file_size_bytes(&self) -> Option<u64> {
-        None
-    }
-}
-
-/// Trait abstracting I/O away from index logic
-///
-/// Scalar indices are currently serialized as indexable arrow record batches stored in
-/// named "files".  The index store is responsible for serializing and deserializing
-/// these batches into file data (e.g. as .lance files or .parquet files, etc.)
-#[async_trait]
-pub trait IndexStore: std::fmt::Debug + Send + Sync + DeepSizeOf {
-    fn as_any(&self) -> &dyn Any;
-    fn clone_arc(&self) -> Arc<dyn IndexStore>;
-
-    /// Suggested I/O parallelism for the store
-    fn io_parallelism(&self) -> usize;
-
-    /// Create a new file and return a writer to store data in the file
-    async fn new_index_file(&self, name: &str, schema: Arc<Schema>)
-    -> Result<Box<dyn IndexWriter>>;
-
-    /// Open an existing file for retrieval
-    async fn open_index_file(&self, name: &str) -> Result<Arc<dyn IndexReader>>;
-
-    /// Copy a range of batches from an index file from this store to another
-    ///
-    /// This is often useful when remapping or updating
-    async fn copy_index_file(&self, name: &str, dest_store: &dyn IndexStore) -> Result<IndexFile>;
-
-    /// Copy an index file from this store to a new name in another store, leaving the source intact
-    async fn copy_index_file_to(
-        &self,
-        name: &str,
-        new_name: &str,
-        dest_store: &dyn IndexStore,
-    ) -> Result<IndexFile> {
-        if name == new_name {
-            self.copy_index_file(name, dest_store).await
-        } else {
-            Err(Error::not_supported(format!(
-                "copying index file {name} to {new_name} is not supported by this index store"
-            )))
-        }
-    }
-
-    /// Rename an index file
-    async fn rename_index_file(&self, name: &str, new_name: &str) -> Result<IndexFile>;
-
-    /// Delete an index file (used in the tmp spill store to keep tmp size down)
-    async fn delete_index_file(&self, name: &str) -> Result<()>;
-
-    /// List all files in the index directory with their sizes.
-    ///
-    /// Returns a list of (relative_path, size_bytes) tuples.
-    /// Used to capture file metadata after index creation/modification.
-    async fn list_files_with_sizes(&self) -> Result<Vec<IndexFile>>;
-}
-
-/// Different scalar indices may support different kinds of queries
-///
-/// For example, a btree index can support a wide range of queries (e.g. x > 7)
-/// while an index based on FTS only supports queries like "x LIKE 'foo'"
-///
-/// This trait is used when we need an object that can represent any kind of query
-///
-/// Note: if you are implementing this trait for a query type then you probably also
-/// need to implement the [crate::scalar::expression::ScalarQueryParser] trait to
-/// create instances of your query at parse time.
-pub trait AnyQuery: std::fmt::Debug + Any + Send + Sync {
-    /// Cast the query as Any to allow for downcasting
-    fn as_any(&self) -> &dyn Any;
-    /// Format the query as a string for display purposes
-    fn format(&self, col: &str) -> String;
-    /// Convert the query to a datafusion expression
-    fn to_expr(&self, col: String) -> Expr;
-    /// Compare this query to another query
-    fn dyn_eq(&self, other: &dyn AnyQuery) -> bool;
-}
-
-impl PartialEq for dyn AnyQuery {
-    fn eq(&self, other: &Self) -> bool {
-        self.dyn_eq(other)
-    }
-}
 /// A full text search query
 #[derive(Debug, Clone, PartialEq)]
 pub struct FullTextSearchQuery {
@@ -845,234 +557,3 @@ impl AnyQuery for GeoQuery {
         }
     }
 }
-
-/// The result of a search operation against a scalar index
-#[derive(Debug, PartialEq)]
-pub enum SearchResult {
-    /// The exact row ids that satisfy the query
-    Exact(NullableRowAddrSet),
-    /// Any row id satisfying the query will be in this set but not every
-    /// row id in this set will satisfy the query, a further recheck step
-    /// is needed
-    AtMost(NullableRowAddrSet),
-    /// All of the given row ids satisfy the query but there may be more
-    ///
-    /// No scalar index actually returns this today but it can arise from
-    /// boolean operations (e.g. NOT(AtMost(x)) == AtLeast(NOT(x)))
-    AtLeast(NullableRowAddrSet),
-}
-
-impl SearchResult {
-    pub fn exact(row_ids: impl Into<RowAddrTreeMap>) -> Self {
-        Self::Exact(NullableRowAddrSet::new(row_ids.into(), Default::default()))
-    }
-
-    pub fn at_most(row_ids: impl Into<RowAddrTreeMap>) -> Self {
-        Self::AtMost(NullableRowAddrSet::new(row_ids.into(), Default::default()))
-    }
-
-    pub fn at_least(row_ids: impl Into<RowAddrTreeMap>) -> Self {
-        Self::AtLeast(NullableRowAddrSet::new(row_ids.into(), Default::default()))
-    }
-
-    pub fn with_nulls(self, nulls: impl Into<RowAddrTreeMap>) -> Self {
-        match self {
-            Self::Exact(row_ids) => Self::Exact(row_ids.with_nulls(nulls.into())),
-            Self::AtMost(row_ids) => Self::AtMost(row_ids.with_nulls(nulls.into())),
-            Self::AtLeast(row_ids) => Self::AtLeast(row_ids.with_nulls(nulls.into())),
-        }
-    }
-
-    pub fn row_addrs(&self) -> &NullableRowAddrSet {
-        match self {
-            Self::Exact(row_addrs) => row_addrs,
-            Self::AtMost(row_addrs) => row_addrs,
-            Self::AtLeast(row_addrs) => row_addrs,
-        }
-    }
-
-    pub fn is_exact(&self) -> bool {
-        matches!(self, Self::Exact(_))
-    }
-}
-
-/// Brief information about an index that was created
-pub struct CreatedIndex {
-    /// The details of the index that was created
-    ///
-    /// These should be stored somewhere as they will be needed to
-    /// load the index later.
-    pub index_details: prost_types::Any,
-    /// The version of the index that was created
-    ///
-    /// This can be used to determine if a reader is able to load the index.
-    pub index_version: u32,
-    /// List of files and their sizes for this index
-    ///
-    /// This enables skipping HEAD calls when opening indices and provides
-    /// visibility into index storage size via describe_indices().
-    pub files: Vec<IndexFile>,
-}
-
-/// The criteria that specifies how to update an index
-pub struct UpdateCriteria {
-    /// If true, then we need to read the old data to update the index
-    ///
-    /// This should be avoided if possible but is left in for some legacy paths
-    pub requires_old_data: bool,
-    /// The criteria required for data (both old and new)
-    pub data_criteria: TrainingCriteria,
-}
-
-/// Filter used when merging existing scalar-index rows during update.
-///
-/// The caller must pick a filter mode that matches the row-id semantics of the
-/// dataset:
-/// - address-style row IDs: fragment filtering is valid
-/// - stable row IDs: use exact row-id membership instead
-#[derive(Debug, Clone)]
-pub enum OldIndexDataFilter {
-    /// Keeps track of which fragments are still valid and which are no longer valid.
-    ///
-    /// This is valid for address-style row IDs.
-    Fragments {
-        to_keep: RoaringBitmap,
-        to_remove: RoaringBitmap,
-    },
-    /// Keep old rows whose row IDs are in this exact allow-list.
-    ///
-    /// This is required for stable row IDs, where row IDs are opaque and
-    /// should not be interpreted as encoded row addresses.
-    RowIds(RowAddrTreeMap),
-}
-
-impl OldIndexDataFilter {
-    /// Build a boolean mask that keeps only row IDs selected by this filter.
-    pub fn filter_row_ids(&self, row_ids: &UInt64Array) -> BooleanArray {
-        match self {
-            Self::Fragments { to_keep, .. } => row_ids
-                .iter()
-                .map(|id| id.map(|id| to_keep.contains((id >> 32) as u32)))
-                .collect(),
-            Self::RowIds(valid_row_ids) => row_ids
-                .iter()
-                .map(|id| id.map(|id| valid_row_ids.contains(id)))
-                .collect(),
-        }
-    }
-}
-
-impl UpdateCriteria {
-    pub fn requires_old_data(data_criteria: TrainingCriteria) -> Self {
-        Self {
-            requires_old_data: true,
-            data_criteria,
-        }
-    }
-
-    pub fn only_new_data(data_criteria: TrainingCriteria) -> Self {
-        Self {
-            requires_old_data: false,
-            data_criteria,
-        }
-    }
-}
-
-/// Compute the lexicographically next prefix by incrementing the last character's code point.
-/// Returns None if no valid upper bound exists.
-///
-/// This is used for LIKE prefix queries to convert `LIKE 'foo%'` to range `[foo, fop)`.
-///
-/// # UTF-8 and Unicode Handling
-///
-/// This function operates on Unicode code points (characters), not bytes. Since UTF-8
-/// byte ordering is identical to Unicode code point ordering, incrementing a character's
-/// code point produces the correct lexicographic successor for byte-wise string comparison.
-///
-/// If incrementing the last character would overflow or land in the surrogate range
-/// (U+D800-U+DFFF), we try incrementing the previous character, and so on.
-///
-/// Examples:
-/// - `"foo"` → `Some("fop")`
-/// - `"café"` → `Some("cafê")`  (é U+00E9 → ê U+00EA)
-/// - `"abc中"` → `Some("abc丮")` (中 U+4E2D → 丮 U+4E2E)
-/// - `"cafÿ"` → `Some("cafĀ")` (ÿ U+00FF → Ā U+0100)
-pub fn compute_next_prefix(prefix: &str) -> Option<String> {
-    if prefix.is_empty() {
-        return None;
-    }
-
-    let chars: Vec<char> = prefix.chars().collect();
-
-    // Try incrementing characters from right to left
-    for i in (0..chars.len()).rev() {
-        if let Some(next_char) = next_unicode_char(chars[i]) {
-            let mut result: String = chars[..i].iter().collect();
-            result.push(next_char);
-            return Some(result);
-        }
-        // This character cannot be incremented (e.g., U+10FFFF), try previous
-    }
-
-    // All characters were at maximum value
-    None
-}
-
-/// Get the next valid Unicode scalar value after the given character.
-/// Skips the surrogate range (U+D800-U+DFFF) which is not valid in UTF-8.
-fn next_unicode_char(c: char) -> Option<char> {
-    let cp = c as u32;
-    let next_cp = cp.checked_add(1)?;
-
-    // Skip surrogate range (U+D800-U+DFFF)
-    let next_cp = if (0xD800..=0xDFFF).contains(&next_cp) {
-        0xE000
-    } else {
-        next_cp
-    };
-
-    char::from_u32(next_cp)
-}
-
-/// A trait for a scalar index, a structure that can determine row ids that satisfy scalar queries
-#[async_trait]
-pub trait ScalarIndex: Send + Sync + std::fmt::Debug + Index + DeepSizeOf {
-    /// Search the scalar index
-    ///
-    /// Returns all row ids that satisfy the query, these row ids are not necessarily ordered
-    async fn search(
-        &self,
-        query: &dyn AnyQuery,
-        metrics: &dyn MetricsCollector,
-    ) -> Result<SearchResult>;
-
-    /// Returns true if the remap operation is supported
-    fn can_remap(&self) -> bool;
-
-    /// Remap the row ids, creating a new remapped version of this index in `dest_store`
-    async fn remap(
-        &self,
-        mapping: &HashMap<u64, Option<u64>>,
-        dest_store: &dyn IndexStore,
-    ) -> Result<CreatedIndex>;
-
-    /// Add the new data into the index, creating an updated version of the index in `dest_store`
-    ///
-    /// If `old_data_filter` is provided, old index data will be filtered before
-    /// merge according to the chosen filter mode.
-    async fn update(
-        &self,
-        new_data: SendableRecordBatchStream,
-        dest_store: &dyn IndexStore,
-        old_data_filter: Option<OldIndexDataFilter>,
-    ) -> Result<CreatedIndex>;
-
-    /// Returns the criteria that will be used to update the index
-    fn update_criteria(&self) -> UpdateCriteria;
-
-    /// Derive the index parameters from the current index
-    ///
-    /// This returns a ScalarIndexParams that can be used to recreate an index
-    /// with the same configuration on another dataset.
-    fn derive_index_params(&self) -> Result<ScalarIndexParams>;
-}
diff --git a/rust/lance-index/src/scalar/bitmap.rs b/rust/lance-index/src/scalar/bitmap.rs
index c2a6e80e82b..fc9a5e831ad 100644
--- a/rust/lance-index/src/scalar/bitmap.rs
+++ b/rust/lance-index/src/scalar/bitmap.rs
@@ -42,7 +42,6 @@ use super::{
 use crate::pbold;
 use crate::{Index, IndexType, metrics::MetricsCollector};
 use crate::{
-    frag_reuse::FragReuseIndex,
     progress::IndexBuildProgress,
     scalar::{
         CreatedIndex, UpdateCriteria,
@@ -54,6 +53,7 @@ use crate::{
     },
 };
 use crate::{scalar::IndexReader, scalar::expression::ScalarQueryParser};
+use lance_index_core::row_id_remap::RowIdRemapper;
 
 pub const BITMAP_LOOKUP_NAME: &str = "bitmap_page_lookup.lance";
 pub const INDEX_STATS_METADATA_KEY: &str = "lance:index_stats";
@@ -125,7 +125,7 @@ pub struct BitmapIndex {
 
     index_cache: WeakLanceCache,
 
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
 
     lazy_reader: LazyIndexReader,
 }
@@ -200,7 +200,7 @@ impl BitmapIndexState {
         &self,
         store: Arc<dyn IndexStore>,
         index_cache: &LanceCache,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Arc<BitmapIndex>> {
         Ok(Arc::new(BitmapIndex::new(
             self.index_map.clone(),
@@ -335,7 +335,7 @@ impl BitmapIndex {
         value_type: DataType,
         store: Arc<dyn IndexStore>,
         index_cache: WeakLanceCache,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Self {
         let lazy_reader = LazyIndexReader::new(store.clone());
         Self {
@@ -351,7 +351,7 @@ impl BitmapIndex {
 
     pub(crate) async fn load(
         store: Arc<dyn IndexStore>,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         index_cache: &LanceCache,
     ) -> Result<Arc<Self>> {
         let page_lookup_file = store.open_index_file(BITMAP_LOOKUP_NAME).await?;
@@ -551,12 +551,6 @@ impl Index for BitmapIndex {
         self
     }
 
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn crate::vector::VectorIndex>> {
-        Err(Error::not_supported_source(
-            "BitmapIndex is not a vector index".into(),
-        ))
-    }
-
     async fn prewarm(&self) -> Result<()> {
         let page_lookup_file = self.lazy_reader.get().await?;
         let total_rows = page_lookup_file.num_rows();
@@ -803,7 +797,7 @@ impl ScalarIndex for BitmapIndex {
             index_details: prost_types::Any::from_msg(&pbold::BitmapIndexDetails::default())
                 .unwrap(),
             index_version: BITMAP_INDEX_VERSION,
-            files: vec![file],
+            files: Some(vec![file]),
         })
     }
 
@@ -826,7 +820,7 @@ impl ScalarIndex for BitmapIndex {
             index_details: prost_types::Any::from_msg(&pbold::BitmapIndexDetails::default())
                 .unwrap(),
             index_version: BITMAP_INDEX_VERSION,
-            files: vec![file],
+            files: Some(vec![file]),
         })
     }
 
@@ -1671,7 +1665,7 @@ pub async fn merge_bitmap_indices(
     Ok(CreatedIndex {
         index_details: prost_types::Any::from_msg(&pbold::BitmapIndexDetails::default()).unwrap(),
         index_version: BITMAP_INDEX_VERSION,
-        files: vec![file],
+        files: Some(vec![file]),
     })
 }
 
@@ -1757,7 +1751,7 @@ impl ScalarIndexPlugin for BitmapIndexPlugin {
             index_details: prost_types::Any::from_msg(&pbold::BitmapIndexDetails::default())
                 .unwrap(),
             index_version: BITMAP_INDEX_VERSION,
-            files: vec![file],
+            files: Some(vec![file]),
         })
     }
 
@@ -1766,7 +1760,7 @@ impl ScalarIndexPlugin for BitmapIndexPlugin {
         &self,
         index_store: Arc<dyn IndexStore>,
         _index_details: &prost_types::Any,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         cache: &LanceCache,
     ) -> Result<Arc<dyn ScalarIndex>> {
         Ok(BitmapIndex::load(index_store, frag_reuse_index, cache).await? as Arc<dyn ScalarIndex>)
@@ -1775,7 +1769,7 @@ impl ScalarIndexPlugin for BitmapIndexPlugin {
     async fn get_from_cache(
         &self,
         index_store: Arc<dyn IndexStore>,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         cache: &LanceCache,
     ) -> Result<Option<Arc<dyn ScalarIndex>>> {
         let Some(state) = cache.get_with_key(&BitmapIndexStateKey).await else {
diff --git a/rust/lance-index/src/scalar/bloomfilter.rs b/rust/lance-index/src/scalar/bloomfilter.rs
index 856f08af772..1b9edb89375 100644
--- a/rust/lance-index/src/scalar/bloomfilter.rs
+++ b/rust/lance-index/src/scalar/bloomfilter.rs
@@ -14,7 +14,6 @@ use crate::scalar::registry::{
 use crate::scalar::{
     BloomFilterQuery, BuiltinIndexType, CreatedIndex, IndexFile, ScalarIndexParams, UpdateCriteria,
 };
-use crate::{Any, pb};
 use arrow_array::{Array, UInt64Array};
 use arrow_schema::{DataType, Field};
 use lance_arrow_stats::StatisticsAccumulator;
@@ -27,17 +26,17 @@ use std::sync::LazyLock;
 use datafusion::execution::SendableRecordBatchStream;
 use std::{collections::HashMap, sync::Arc};
 
-use crate::scalar::FragReuseIndex;
 use crate::scalar::{AnyQuery, IndexStore, MetricsCollector, ScalarIndex, SearchResult};
-use crate::vector::VectorIndex;
-use crate::{Index, IndexType};
+use crate::{Index, IndexType, pb};
 use arrow_array::{ArrayRef, RecordBatch};
 use async_trait::async_trait;
 use lance_core::Error;
 use lance_core::Result;
 use lance_core::cache::LanceCache;
 use lance_core::deepsize::DeepSizeOf;
+use lance_index_core::row_id_remap::RowIdRemapper;
 use roaring::RoaringBitmap;
+use std::any::Any;
 
 use super::zoned::{ZoneBound, ZoneProcessor, ZoneTrainer, rebuild_zones, search_zones};
 
@@ -90,7 +89,7 @@ impl DeepSizeOf for BloomFilterIndex {
 impl BloomFilterIndex {
     async fn load(
         store: Arc<dyn IndexStore>,
-        _fri: Option<Arc<FragReuseIndex>>,
+        _fri: Option<Arc<dyn RowIdRemapper>>,
         _index_cache: &LanceCache,
     ) -> Result<Arc<Self>> {
         let index_file = store.open_index_file(BLOOMFILTER_FILENAME).await?;
@@ -377,12 +376,6 @@ impl Index for BloomFilterIndex {
         self
     }
 
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn VectorIndex>> {
-        Err(Error::invalid_input_source(
-            "BloomFilter is not a vector index".into(),
-        ))
-    }
-
     async fn prewarm(&self) -> Result<()> {
         Ok(())
     }
@@ -464,7 +457,7 @@ impl ScalarIndex for BloomFilterIndex {
             index_details: prost_types::Any::from_msg(&pb::BloomFilterIndexDetails::default())
                 .unwrap(),
             index_version: BLOOMFILTER_INDEX_VERSION,
-            files: vec![file],
+            files: Some(vec![file]),
         })
     }
 
@@ -1079,7 +1072,7 @@ impl ScalarIndexPlugin for BloomFilterIndexPlugin {
             index_details: prost_types::Any::from_msg(&pb::BloomFilterIndexDetails::default())
                 .unwrap(),
             index_version: BLOOMFILTER_INDEX_VERSION,
-            files: vec![file],
+            files: Some(vec![file]),
         })
     }
 
@@ -1107,7 +1100,7 @@ impl ScalarIndexPlugin for BloomFilterIndexPlugin {
         &self,
         index_store: Arc<dyn IndexStore>,
         _index_details: &prost_types::Any,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         cache: &LanceCache,
     ) -> Result<Arc<dyn ScalarIndex>> {
         Ok(
diff --git a/rust/lance-index/src/scalar/btree.rs b/rust/lance-index/src/scalar/btree.rs
index 85c42e9b048..d4fabff26ae 100644
--- a/rust/lance-index/src/scalar/btree.rs
+++ b/rust/lance-index/src/scalar/btree.rs
@@ -17,8 +17,9 @@ use super::{
 };
 use crate::cache_pb::{BTreeIndexHeader, RangeToFile};
 use crate::{Index, IndexType};
+use crate::{metrics::NoOpMetricsCollector, scalar::registry::TrainingCriteria};
+use crate::{pbold, scalar::btree::flat::FlatIndex};
 use crate::{
-    frag_reuse::FragReuseIndex,
     progress::{IndexBuildProgress, noop_progress},
     scalar::{
         CreatedIndex, UpdateCriteria,
@@ -26,8 +27,6 @@ use crate::{
         registry::{ScalarIndexPlugin, TrainingOrdering, TrainingRequest, VALUE_COLUMN_NAME},
     },
 };
-use crate::{metrics::NoOpMetricsCollector, scalar::registry::TrainingCriteria};
-use crate::{pbold, scalar::btree::flat::FlatIndex};
 use arrow_arith::numeric::add;
 use arrow_array::{
     Array, ArrayAccessor, ArrowNativeTypeOp, PrimitiveArray, RecordBatch, UInt32Array,
@@ -73,6 +72,7 @@ use lance_datafusion::{
     chunker::chunk_concat_stream,
     exec::{LanceExecutionOptions, OneShotExec, execute_plan},
 };
+use lance_index_core::row_id_remap::RowIdRemapper;
 use lance_select::{NullableRowAddrSet, RowSetOps};
 use log::{debug, warn};
 use object_store::Error as ObjectStoreError;
@@ -1393,7 +1393,7 @@ impl BTreeIndexState {
         &self,
         store: Arc<dyn IndexStore>,
         index_cache: &LanceCache,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Arc<dyn ScalarIndex>> {
         let index = BTreeIndex::try_from_serialized(
             self.lookup_batch.clone(),
@@ -1519,7 +1519,7 @@ pub struct BTreeIndex {
     /// - The local page_idx is calculated: `142 - 100 = 42`.
     /// - The system now knows to read page `42` from the file `part_2_page_file.lance`.
     ranges_to_files: Option<Arc<RangeInclusiveMap<u32, (String, u32)>>>,
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
 }
 
 impl DeepSizeOf for BTreeIndex {
@@ -1532,7 +1532,6 @@ impl DeepSizeOf for BTreeIndex {
 }
 
 impl BTreeIndex {
-    #[allow(clippy::too_many_arguments)]
     fn new(
         page_lookup: Arc<BTreeLookup>,
         store: Arc<dyn IndexStore>,
@@ -1540,7 +1539,7 @@ impl BTreeIndex {
         index_cache: WeakLanceCache,
         batch_size: u64,
         ranges_to_files: Option<Arc<RangeInclusiveMap<u32, (String, u32)>>>,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Self {
         Self {
             page_lookup,
@@ -1696,7 +1695,7 @@ impl BTreeIndex {
         index_cache: &LanceCache,
         batch_size: u64,
         ranges_to_files: Option<Arc<RangeInclusiveMap<u32, (String, u32)>>>,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         let data_type = data.column(0).data_type().clone();
         let page_lookup = Arc::new(BTreeLookup::try_new(data)?);
@@ -1714,7 +1713,7 @@ impl BTreeIndex {
 
     async fn load(
         store: Arc<dyn IndexStore>,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         index_cache: &LanceCache,
     ) -> Result<Arc<Self>> {
         let (page_lookup_file, standalone_partition_page_file) =
@@ -1914,7 +1913,7 @@ impl BTreeIndex {
             index_details: prost_types::Any::from_msg(&pbold::BTreeIndexDetails::default())
                 .unwrap(),
             index_version: BTREE_INDEX_VERSION,
-            files,
+            files: Some(files),
         })
     }
 }
@@ -1950,7 +1949,7 @@ fn filter_keeps_nothing(filter: &Option<OldIndexDataFilter>) -> bool {
 
 fn remap_row_ids(
     stream: SendableRecordBatchStream,
-    frag_reuse_index: Arc<FragReuseIndex>,
+    frag_reuse_index: Arc<dyn RowIdRemapper>,
 ) -> SendableRecordBatchStream {
     let schema = stream.schema();
     let remapped = stream.map(move |batch_result| {
@@ -1998,12 +1997,6 @@ impl Index for BTreeIndex {
         self
     }
 
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn crate::vector::VectorIndex>> {
-        Err(Error::not_supported_source(
-            "BTreeIndex is not vector index".into(),
-        ))
-    }
-
     async fn prewarm(&self) -> Result<()> {
         let index_reader = LazyIndexReader::new(self.store.clone(), self.ranges_to_files.clone());
         let reader = index_reader.get().await?;
@@ -2304,7 +2297,7 @@ impl ScalarIndex for BTreeIndex {
             index_details: prost_types::Any::from_msg(&pbold::BTreeIndexDetails::default())
                 .unwrap(),
             index_version: BTREE_INDEX_VERSION,
-            files: remapped_files,
+            files: Some(remapped_files),
         })
     }
 
@@ -2554,9 +2547,7 @@ pub async fn train_btree_index(
     Ok(vec![pages_file, lookup_file])
 }
 
-fn find_single_partition_files(
-    files: &[lance_table::format::IndexFile],
-) -> Result<Option<(&str, &str)>> {
+fn find_single_partition_files(files: &[crate::scalar::IndexFile]) -> Result<Option<(&str, &str)>> {
     let lookup_files = files
         .iter()
         .filter_map(|file| {
@@ -3283,7 +3274,7 @@ impl ScalarIndexPlugin for BTreeIndexPlugin {
             index_details: prost_types::Any::from_msg(&pbold::BTreeIndexDetails::default())
                 .unwrap(),
             index_version: BTREE_INDEX_VERSION,
-            files,
+            files: Some(files),
         })
     }
 
@@ -3291,7 +3282,7 @@ impl ScalarIndexPlugin for BTreeIndexPlugin {
         &self,
         index_store: Arc<dyn IndexStore>,
         _index_details: &prost_types::Any,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         cache: &LanceCache,
     ) -> Result<Arc<dyn ScalarIndex>> {
         Ok(BTreeIndex::load(index_store, frag_reuse_index, cache).await? as Arc<dyn ScalarIndex>)
@@ -3300,7 +3291,7 @@ impl ScalarIndexPlugin for BTreeIndexPlugin {
     async fn get_from_cache(
         &self,
         index_store: Arc<dyn IndexStore>,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         cache: &LanceCache,
     ) -> Result<Option<Arc<dyn ScalarIndex>>> {
         let Some(state) = cache.get_with_key(&BTreeIndexStateKey).await else {
@@ -6261,6 +6252,7 @@ mod tests {
     #[tokio::test]
     async fn test_btree_index_state_reconstruct_applies_frag_reuse_index() {
         use crate::frag_reuse::{FragReuseIndex, FragReuseIndexDetails};
+        use lance_index_core::row_id_remap::RowIdRemapper;
         use std::collections::HashMap;
         use uuid::Uuid;
 
@@ -6292,7 +6284,7 @@ mod tests {
         // Remap row 0 -> row 5000 (outside the original [0, 1000) range so no collision).
         // Querying for value == 0 should now return row 5000, confirming reconstruct threaded
         // the FragReuseIndex through to the rebuilt BTreeIndex.
-        let frag_reuse_index = Arc::new(FragReuseIndex::new(
+        let frag_reuse_index: Arc<dyn RowIdRemapper> = Arc::new(FragReuseIndex::new(
             Uuid::new_v4(),
             vec![HashMap::from([(0u64, Some(5000u64))])],
             FragReuseIndexDetails { versions: vec![] },
diff --git a/rust/lance-index/src/scalar/expression.rs b/rust/lance-index/src/scalar/expression.rs
index 053da5ae5e7..296d30b7e4f 100644
--- a/rust/lance-index/src/scalar/expression.rs
+++ b/rust/lance-index/src/scalar/expression.rs
@@ -4,264 +4,19 @@
 use std::{ops::Bound, sync::Arc};
 
 use arrow_schema::{DataType, Field};
-use async_recursion::async_recursion;
-use async_trait::async_trait;
 use datafusion_common::ScalarValue;
-use datafusion_expr::{
-    Between, BinaryExpr, Expr, Operator, ReturnFieldArgs, ScalarUDF,
-    expr::{InList, Like, ScalarFunction},
-};
-use tokio::try_join;
+use datafusion_expr::{Expr, Operator, ReturnFieldArgs, ScalarUDF, expr::Like};
 
-use super::{
-    AnyQuery, BloomFilterQuery, LabelListQuery, MetricsCollector, SargableQuery, ScalarIndex,
-    SearchResult, TextQuery, TokenQuery,
-};
+use super::{BloomFilterQuery, LabelListQuery, SargableQuery, TextQuery, TokenQuery};
 #[cfg(feature = "geo")]
 use super::{GeoQuery, RelationQuery};
-use lance_core::{Error, Result};
+use lance_core::Result;
 use lance_datafusion::{expr::safe_coerce_scalar, planner::Planner};
-use lance_select::{IndexExprResult, NullableIndexExprResult, NullableRowAddrMask};
-use roaring::RoaringBitmap;
-use tracing::instrument;
-
-const MAX_DEPTH: usize = 500;
-
-/// An indexed expression consists of a scalar index query with a post-scan filter
-///
-/// When a user wants to filter the data returned by a scan we may be able to use
-/// one or more scalar indices to reduce the amount of data we load from the disk.
-///
-/// For example, if a user provides the filter "x = 7", and we have a scalar index
-/// on x, then we can possibly identify the exact row that the user desires with our
-/// index.  A full-table scan can then turn into a take operation fetching the rows
-/// desired.  This would create an IndexedExpression with a scalar_query but no
-/// refine.
-///
-/// If the user asked for "type = 'dog' && z = 3" and we had a scalar index on the
-/// "type" column then we could convert this to an indexed scan for "type='dog'"
-/// followed by an in-memory filter for z=3.  This would create an IndexedExpression
-/// with both a scalar_query AND a refine.
-///
-/// Finally, if the user asked for "z = 3" and we do not have a scalar index on the
-/// "z" column then we must fallback to an IndexedExpression with no scalar_query and
-/// only a refine.
-///
-/// Two IndexedExpressions can be AND'd together.  Each part is AND'd together.
-/// Two IndexedExpressions cannot be OR'd together unless both are scalar_query only
-///   or both are refine only
-/// An IndexedExpression cannot be negated if it has both a refine and a scalar_query
-///
-/// When an operation cannot be performed we fallback to the original expression-only
-/// representation
-#[derive(Debug, PartialEq)]
-pub struct IndexedExpression {
-    /// The portion of the query that can be satisfied by scalar indices
-    pub scalar_query: Option<ScalarIndexExpr>,
-    /// The portion of the query that cannot be satisfied by scalar indices
-    pub refine_expr: Option<Expr>,
-}
-
-pub trait ScalarQueryParser: std::fmt::Debug + Send + Sync {
-    /// Visit a between expression
-    ///
-    /// Returns an IndexedExpression if the index can accelerate between expressions
-    fn visit_between(
-        &self,
-        column: &str,
-        low: &Bound<ScalarValue>,
-        high: &Bound<ScalarValue>,
-    ) -> Option<IndexedExpression>;
-    /// Visit an in list expression
-    ///
-    /// Returns an IndexedExpression if the index can accelerate in list expressions
-    fn visit_in_list(&self, column: &str, in_list: &[ScalarValue]) -> Option<IndexedExpression>;
-    /// Visit an is bool expression
-    ///
-    /// Returns an IndexedExpression if the index can accelerate is bool expressions
-    fn visit_is_bool(&self, column: &str, value: bool) -> Option<IndexedExpression>;
-    /// Visit an is null expression
-    ///
-    /// Returns an IndexedExpression if the index can accelerate is null expressions
-    fn visit_is_null(&self, column: &str) -> Option<IndexedExpression>;
-    /// Visit a comparison expression
-    ///
-    /// Returns an IndexedExpression if the index can accelerate comparison expressions
-    fn visit_comparison(
-        &self,
-        column: &str,
-        value: &ScalarValue,
-        op: &Operator,
-    ) -> Option<IndexedExpression>;
-    /// Visit a scalar function expression
-    ///
-    /// Returns an IndexedExpression if the index can accelerate the given scalar function.
-    /// For example, an ngram index can accelerate the contains function.
-    fn visit_scalar_function(
-        &self,
-        column: &str,
-        data_type: &DataType,
-        func: &ScalarUDF,
-        args: &[Expr],
-    ) -> Option<IndexedExpression>;
-
-    /// Visit a LIKE expression
-    ///
-    /// Returns an IndexedExpression if the index can accelerate LIKE expressions.
-    /// For prefix patterns (e.g., "foo%"):
-    /// - ZoneMaps prune zones based on min/max statistics
-    /// - BTrees use range query conversion `[prefix, next_prefix)`
-    ///
-    /// For patterns with wildcards in the middle (e.g., "foo%bar%"), the leading prefix
-    /// can still be used for pruning, with the full pattern as a refine expression.
-    ///
-    /// # Arguments
-    /// * `column` - The column name
-    /// * `like` - The full LIKE expression (for constructing refine_expr if needed)
-    /// * `pattern` - The LIKE pattern as ScalarValue (e.g., "foo%")
-    fn visit_like(
-        &self,
-        _column: &str,
-        _like: &Like,
-        _pattern: &ScalarValue,
-    ) -> Option<IndexedExpression> {
-        None
-    }
-
-    /// Visits a potential reference to a column
-    ///
-    /// This function is a little different from the other visitors.  It is used to test if a potential
-    /// column reference is a reference the index handles.
-    ///
-    /// Most indexes are designed to run on references to the indexed column.  For example, if a query
-    /// is "x = 7" and we have a scalar index on "x" then we apply the index to the "x" column reference.
-    ///
-    /// However, some indexes are designed to run on projections of the indexed column.  For example,
-    /// if a query is "json_extract(json, '$.name') = 'books'" and we have a JSON index on the "json" column
-    /// then we apply the index to the projection of the "json" column.
-    ///
-    /// This function is used to test if a potential column reference is a reference the index handles.
-    /// The default implementation matches column references but this can be overridden by indexes that
-    /// handle projections.
-    ///
-    /// The function is also passed in the data type of the column and should return the data type of the
-    /// reference.  Normally this is the same as the input for a direct column reference and possibly something
-    /// different for a projection.  E.g. a JSON column (LargeBinary) might be projected to a string or float
-    ///
-    /// Note: higher logic in the expression parser already limits references to either Expr::Column or Expr::ScalarFunction
-    /// where the first argument is an Expr::Column.  If your projection doesn't fit that mold then the
-    /// expression parser will need to be modified.
-    fn is_valid_reference(&self, func: &Expr, data_type: &DataType) -> Option<DataType> {
-        match func {
-            Expr::Column(_) => Some(data_type.clone()),
-            _ => None,
-        }
-    }
-}
-
-/// A generic parser that wraps multiple scalar query parsers
-///
-/// It will search each parser in order and return the first non-None result
-#[derive(Debug)]
-pub struct MultiQueryParser {
-    parsers: Vec<Box<dyn ScalarQueryParser>>,
-}
-
-impl MultiQueryParser {
-    /// Create a new MultiQueryParser with a single parser
-    pub fn single(parser: Box<dyn ScalarQueryParser>) -> Self {
-        Self {
-            parsers: vec![parser],
-        }
-    }
-
-    /// Add a new parser to the MultiQueryParser
-    pub fn add(&mut self, other: Box<dyn ScalarQueryParser>) {
-        self.parsers.push(other);
-    }
 
-    /// Pick the first underlying parser whose `is_valid_reference` accepts `expr`.
-    pub fn select(
-        &self,
-        expr: &Expr,
-        data_type: &DataType,
-    ) -> Option<(&dyn ScalarQueryParser, DataType)> {
-        self.parsers.iter().find_map(|p| {
-            p.is_valid_reference(expr, data_type)
-                .map(|dt| (p.as_ref(), dt))
-        })
-    }
-}
-
-impl ScalarQueryParser for MultiQueryParser {
-    fn visit_between(
-        &self,
-        column: &str,
-        low: &Bound<ScalarValue>,
-        high: &Bound<ScalarValue>,
-    ) -> Option<IndexedExpression> {
-        self.parsers
-            .iter()
-            .find_map(|parser| parser.visit_between(column, low, high))
-    }
-    fn visit_in_list(&self, column: &str, in_list: &[ScalarValue]) -> Option<IndexedExpression> {
-        self.parsers
-            .iter()
-            .find_map(|parser| parser.visit_in_list(column, in_list))
-    }
-    fn visit_is_bool(&self, column: &str, value: bool) -> Option<IndexedExpression> {
-        self.parsers
-            .iter()
-            .find_map(|parser| parser.visit_is_bool(column, value))
-    }
-    fn visit_is_null(&self, column: &str) -> Option<IndexedExpression> {
-        self.parsers
-            .iter()
-            .find_map(|parser| parser.visit_is_null(column))
-    }
-    fn visit_comparison(
-        &self,
-        column: &str,
-        value: &ScalarValue,
-        op: &Operator,
-    ) -> Option<IndexedExpression> {
-        self.parsers
-            .iter()
-            .find_map(|parser| parser.visit_comparison(column, value, op))
-    }
-    fn visit_scalar_function(
-        &self,
-        column: &str,
-        data_type: &DataType,
-        func: &ScalarUDF,
-        args: &[Expr],
-    ) -> Option<IndexedExpression> {
-        self.parsers
-            .iter()
-            .find_map(|parser| parser.visit_scalar_function(column, data_type, func, args))
-    }
-    fn visit_like(
-        &self,
-        column: &str,
-        like: &Like,
-        pattern: &ScalarValue,
-    ) -> Option<IndexedExpression> {
-        self.parsers
-            .iter()
-            .find_map(|parser| parser.visit_like(column, like, pattern))
-    }
-    /// TODO(low-priority): This is maybe not quite right.  We should filter down the list of parsers based
-    /// on those that consider the reference valid.  Instead what we are doing is checking all parsers if any one
-    /// parser considers the reference valid.
-    ///
-    /// This will be a problem if the user creates two indexes (e.g. btree and json) on the same column and those two
-    /// indexes have different reference schemes.
-    fn is_valid_reference(&self, func: &Expr, data_type: &DataType) -> Option<DataType> {
-        self.parsers
-            .iter()
-            .find_map(|parser| parser.is_valid_reference(func, data_type))
-    }
-}
+pub use lance_index_core::scalar::expression::{
+    IndexInformationProvider, IndexedExpression, MultiQueryParser, ScalarIndexExpr,
+    ScalarIndexLoader, ScalarIndexSearch, ScalarQueryParser, apply_scalar_indices,
+};
 
 /// A parser for indices that handle SARGable queries
 #[derive(Debug)]
@@ -1199,431 +954,6 @@ impl ScalarQueryParser for GeoQueryParser {
     }
 }
 
-impl IndexedExpression {
-    /// Create an expression that only does refine
-    fn refine_only(refine_expr: Expr) -> Self {
-        Self {
-            scalar_query: None,
-            refine_expr: Some(refine_expr),
-        }
-    }
-
-    /// Create an expression that is only an index query
-    fn index_query(
-        column: String,
-        index_name: String,
-        index_type: String,
-        query: Arc<dyn AnyQuery>,
-    ) -> Self {
-        Self {
-            scalar_query: Some(ScalarIndexExpr::Query(ScalarIndexSearch {
-                column,
-                index_name,
-                index_type,
-                query,
-                needs_recheck: false,  // Default to false, will be set by parser
-                fragment_bitmap: None, // Filled in by `apply_scalar_indices`
-            })),
-            refine_expr: None,
-        }
-    }
-
-    /// Create an expression that is only an index query with explicit needs_recheck
-    fn index_query_with_recheck(
-        column: String,
-        index_name: String,
-        index_type: String,
-        query: Arc<dyn AnyQuery>,
-        needs_recheck: bool,
-    ) -> Self {
-        Self {
-            scalar_query: Some(ScalarIndexExpr::Query(ScalarIndexSearch {
-                column,
-                index_name,
-                index_type,
-                query,
-                needs_recheck,
-                fragment_bitmap: None, // Filled in by `apply_scalar_indices`
-            })),
-            refine_expr: None,
-        }
-    }
-
-    /// Try and negate the expression
-    ///
-    /// If the expression contains both an index query and a refine expression then it
-    /// cannot be negated today and None will be returned (we give up trying to use indices)
-    fn maybe_not(self) -> Option<Self> {
-        match (self.scalar_query, self.refine_expr) {
-            (Some(_), Some(_)) => None,
-            (Some(scalar_query), None) => {
-                if scalar_query.needs_recheck() {
-                    return None;
-                }
-                Some(Self {
-                    scalar_query: Some(ScalarIndexExpr::Not(Box::new(scalar_query))),
-                    refine_expr: None,
-                })
-            }
-            (None, Some(refine_expr)) => Some(Self {
-                scalar_query: None,
-                refine_expr: Some(Expr::Not(Box::new(refine_expr))),
-            }),
-            (None, None) => panic!("Empty node should not occur"),
-        }
-    }
-
-    /// Perform a logical AND of two indexed expressions
-    ///
-    /// This is straightforward because we can just AND the individual parts
-    /// because (A && B) && (C && D) == (A && C) && (B && D)
-    fn and(self, other: Self) -> Self {
-        let scalar_query = match (self.scalar_query, other.scalar_query) {
-            (Some(scalar_query), Some(other_scalar_query)) => Some(ScalarIndexExpr::And(
-                Box::new(scalar_query),
-                Box::new(other_scalar_query),
-            )),
-            (Some(scalar_query), None) => Some(scalar_query),
-            (None, Some(scalar_query)) => Some(scalar_query),
-            (None, None) => None,
-        };
-        let refine_expr = match (self.refine_expr, other.refine_expr) {
-            (Some(refine_expr), Some(other_refine_expr)) => {
-                Some(refine_expr.and(other_refine_expr))
-            }
-            (Some(refine_expr), None) => Some(refine_expr),
-            (None, Some(refine_expr)) => Some(refine_expr),
-            (None, None) => None,
-        };
-        Self {
-            scalar_query,
-            refine_expr,
-        }
-    }
-
-    /// Try and perform a logical OR of two indexed expressions
-    ///
-    /// This is a bit tricky because something like:
-    ///   (color == 'blue' AND size < 20) OR (color == 'green' AND size < 50)
-    /// is not equivalent to:
-    ///   (color == 'blue' OR color == 'green') AND (size < 20 OR size < 50)
-    fn maybe_or(self, other: Self) -> Option<Self> {
-        // If either expression is missing a scalar_query then we need to load all rows from
-        // the database and so we short-circuit and return None
-        let scalar_query = self.scalar_query?;
-        let other_scalar_query = other.scalar_query?;
-        let scalar_query = Some(ScalarIndexExpr::Or(
-            Box::new(scalar_query),
-            Box::new(other_scalar_query),
-        ));
-
-        let refine_expr = match (self.refine_expr, other.refine_expr) {
-            // TODO
-            //
-            // To handle these cases we need a way of going back from a scalar expression query to a logical DF expression (perhaps
-            // we can store the expression that led to the creation of the query)
-            //
-            // For example, imagine we have something like "(color == 'blue' AND size < 20) OR (color == 'green' AND size < 50)"
-            //
-            // We can do an indexed load of all rows matching "color == 'blue' OR color == 'green'" but then we need to
-            // refine that load with the full original expression which, at the moment, we no longer have.
-            (Some(_), Some(_)) => {
-                return None;
-            }
-            (Some(_), None) => {
-                return None;
-            }
-            (None, Some(_)) => {
-                return None;
-            }
-            (None, None) => None,
-        };
-        Some(Self {
-            scalar_query,
-            refine_expr,
-        })
-    }
-
-    fn refine(self, expr: Expr) -> Self {
-        match self.refine_expr {
-            Some(refine_expr) => Self {
-                scalar_query: self.scalar_query,
-                refine_expr: Some(refine_expr.and(expr)),
-            },
-            None => Self {
-                scalar_query: self.scalar_query,
-                refine_expr: Some(expr),
-            },
-        }
-    }
-}
-
-/// A trait implemented by anything that can load indices by name
-///
-/// This is used during the evaluation of an index expression
-#[async_trait]
-pub trait ScalarIndexLoader: Send + Sync {
-    /// Load the index with the given name
-    async fn load_index(
-        &self,
-        column: &str,
-        index_name: &str,
-        metrics: &dyn MetricsCollector,
-    ) -> Result<Arc<dyn ScalarIndex>>;
-}
-
-/// This represents a search into a scalar index
-#[derive(Debug, Clone)]
-pub struct ScalarIndexSearch {
-    /// The column to search (redundant, used for debugging messages)
-    pub column: String,
-    /// The name of the index to search
-    pub index_name: String,
-    /// The type of the index being searched (e.g. "BTree", "Bitmap"), used for display purposes
-    pub index_type: String,
-    /// The query to search for
-    pub query: Arc<dyn AnyQuery>,
-    /// If true, the query results are inexact and will need a recheck
-    pub needs_recheck: bool,
-    /// The fragments the underlying index has entries for.
-    ///
-    /// `None` means coverage is unknown (e.g. constructed outside of scanner
-    /// planning, or from a legacy code path). Optimizer rules that need to
-    /// decide whether the index covers the dataset must treat `None` as
-    /// "refuse to use" — the bitmap is the only way to safely answer that
-    /// question synchronously without an async metadata load.
-    pub fragment_bitmap: Option<RoaringBitmap>,
-}
-
-impl PartialEq for ScalarIndexSearch {
-    fn eq(&self, other: &Self) -> bool {
-        // `fragment_bitmap` is metadata derived from the dataset state, not
-        // part of the query identity, so it intentionally does not participate
-        // in equality.
-        self.column == other.column
-            && self.index_name == other.index_name
-            && self.query.as_ref().eq(other.query.as_ref())
-    }
-}
-
-/// This represents a lookup into one or more scalar indices
-///
-/// This is a tree of operations because we may need to logically combine or
-/// modify the results of scalar lookups
-#[derive(Debug, Clone)]
-pub enum ScalarIndexExpr {
-    Not(Box<Self>),
-    And(Box<Self>, Box<Self>),
-    Or(Box<Self>, Box<Self>),
-    Query(ScalarIndexSearch),
-}
-
-impl PartialEq for ScalarIndexExpr {
-    fn eq(&self, other: &Self) -> bool {
-        match (self, other) {
-            (Self::Not(l0), Self::Not(r0)) => l0 == r0,
-            (Self::And(l0, l1), Self::And(r0, r1)) => l0 == r0 && l1 == r1,
-            (Self::Or(l0, l1), Self::Or(r0, r1)) => l0 == r0 && l1 == r1,
-            (Self::Query(l_search), Self::Query(r_search)) => l_search == r_search,
-            _ => false,
-        }
-    }
-}
-
-impl std::fmt::Display for ScalarIndexExpr {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::Not(inner) => write!(f, "NOT({})", inner),
-            Self::And(lhs, rhs) => write!(f, "AND({},{})", lhs, rhs),
-            Self::Or(lhs, rhs) => write!(f, "OR({},{})", lhs, rhs),
-            Self::Query(search) => write!(
-                f,
-                "[{}]@{}({})",
-                search.query.format(&search.column),
-                search.index_name,
-                search.index_type
-            ),
-        }
-    }
-}
-
-impl From<SearchResult> for NullableIndexExprResult {
-    fn from(result: SearchResult) -> Self {
-        match result {
-            SearchResult::Exact(mask) => Self::exact(NullableRowAddrMask::AllowList(mask)),
-            SearchResult::AtMost(mask) => Self::at_most(NullableRowAddrMask::AllowList(mask)),
-            SearchResult::AtLeast(mask) => Self::at_least(NullableRowAddrMask::AllowList(mask)),
-        }
-    }
-}
-
-impl ScalarIndexExpr {
-    /// Evaluates the scalar index expression
-    ///
-    /// This will result in loading one or more scalar indices and searching them
-    ///
-    /// TODO: We could potentially try and be smarter about reusing loaded indices for
-    /// any situations where the session cache has been disabled.
-    #[async_recursion]
-    pub async fn evaluate_nullable(
-        &self,
-        index_loader: &dyn ScalarIndexLoader,
-        metrics: &dyn MetricsCollector,
-    ) -> Result<NullableIndexExprResult> {
-        match self {
-            Self::Not(inner) => {
-                let result = inner.evaluate_nullable(index_loader, metrics).await?;
-                Ok(!result)
-            }
-            Self::And(lhs, rhs) => {
-                let lhs_result = lhs.evaluate_nullable(index_loader, metrics);
-                let rhs_result = rhs.evaluate_nullable(index_loader, metrics);
-                let (lhs_result, rhs_result) = try_join!(lhs_result, rhs_result)?;
-                Ok(lhs_result & rhs_result)
-            }
-            Self::Or(lhs, rhs) => {
-                let lhs_result = lhs.evaluate_nullable(index_loader, metrics);
-                let rhs_result = rhs.evaluate_nullable(index_loader, metrics);
-                let (lhs_result, rhs_result) = try_join!(lhs_result, rhs_result)?;
-                Ok(lhs_result | rhs_result)
-            }
-            Self::Query(search) => {
-                let index = index_loader
-                    .load_index(&search.column, &search.index_name, metrics)
-                    .await?;
-                let search_result = index.search(search.query.as_ref(), metrics).await?;
-                Ok(search_result.into())
-            }
-        }
-    }
-
-    #[instrument(level = "debug", skip_all)]
-    pub async fn evaluate(
-        &self,
-        index_loader: &dyn ScalarIndexLoader,
-        metrics: &dyn MetricsCollector,
-    ) -> Result<IndexExprResult> {
-        Ok(self
-            .evaluate_nullable(index_loader, metrics)
-            .await?
-            .drop_nulls())
-    }
-
-    pub fn to_expr(&self) -> Expr {
-        match self {
-            Self::Not(inner) => Expr::Not(inner.to_expr().into()),
-            Self::And(lhs, rhs) => {
-                let lhs = lhs.to_expr();
-                let rhs = rhs.to_expr();
-                lhs.and(rhs)
-            }
-            Self::Or(lhs, rhs) => {
-                let lhs = lhs.to_expr();
-                let rhs = rhs.to_expr();
-                lhs.or(rhs)
-            }
-            Self::Query(search) => search.query.to_expr(search.column.clone()),
-        }
-    }
-
-    pub fn needs_recheck(&self) -> bool {
-        match self {
-            Self::Not(inner) => inner.needs_recheck(),
-            Self::And(lhs, rhs) | Self::Or(lhs, rhs) => lhs.needs_recheck() || rhs.needs_recheck(),
-            Self::Query(search) => search.needs_recheck,
-        }
-    }
-}
-
-// Extract a column from the expression, if it is a column, or None
-fn maybe_column(expr: &Expr) -> Option<&str> {
-    match expr {
-        Expr::Column(col) => Some(&col.name),
-        _ => None,
-    }
-}
-
-// Extract the full nested column path from a get_field expression chain
-// For example: get_field(get_field(metadata, "status"), "code") -> "metadata.status.code"
-fn extract_nested_column_path(expr: &Expr) -> Option<String> {
-    let mut current_expr = expr;
-    let mut parts = Vec::new();
-
-    // Walk up the get_field chain
-    loop {
-        match current_expr {
-            Expr::ScalarFunction(udf) if udf.name() == "get_field" => {
-                if udf.args.len() != 2 {
-                    return None;
-                }
-                // Extract the field name from the second argument
-                // The Literal now has two fields: ScalarValue and Option<FieldMetadata>
-                if let Expr::Literal(ScalarValue::Utf8(Some(field_name)), _) = &udf.args[1] {
-                    parts.push(field_name.clone());
-                } else {
-                    return None;
-                }
-                // Move up to the parent expression
-                current_expr = &udf.args[0];
-            }
-            Expr::Column(col) => {
-                // We've reached the base column
-                parts.push(col.name.clone());
-                break;
-            }
-            _ => {
-                return None;
-            }
-        }
-    }
-
-    // Reverse to get the correct order (parent.child.grandchild)
-    parts.reverse();
-
-    // Format the path correctly
-    let field_refs: Vec<&str> = parts.iter().map(|s| s.as_str()).collect();
-    Some(lance_core::datatypes::format_field_path(&field_refs))
-}
-
-// Extract a column from the expression, if it is a column, and we have an index for that column, or None
-//
-// There's two ways to get a column.  First, the obvious way, is a
-// simple column reference (e.g. x = 7).  Second, a more complex way,
-// is some kind of projection into a column (e.g. json_extract(json, '$.name')).
-// Third way is nested field access (e.g. get_field(metadata, "status.code"))
-fn maybe_indexed_column<'b>(
-    expr: &Expr,
-    index_info: &'b dyn IndexInformationProvider,
-) -> Option<(String, DataType, &'b dyn ScalarQueryParser)> {
-    // First try to extract the full nested column path for get_field expressions
-    if let Some(nested_path) = extract_nested_column_path(expr)
-        && let Some((data_type, multi)) = index_info.get_index(&nested_path)
-        && let Some((parser, data_type)) = multi.select(expr, data_type)
-    {
-        return Some((nested_path, data_type, parser));
-    }
-
-    match expr {
-        Expr::Column(col) => {
-            let col = col.name.as_str();
-            let (data_type, multi) = index_info.get_index(col)?;
-            let (parser, data_type) = multi.select(expr, data_type)?;
-            Some((col.to_string(), data_type, parser))
-        }
-        Expr::ScalarFunction(udf) => {
-            if udf.args.is_empty() {
-                return None;
-            }
-            // For non-get_field functions, fall back to old behavior
-            let col = maybe_column(&udf.args[0])?;
-            let (data_type, multi) = index_info.get_index(col)?;
-            let (parser, data_type) = multi.select(expr, data_type)?;
-            Some((col.to_string(), data_type, parser))
-        }
-        _ => None,
-    }
-}
-
 // Extract a literal scalar value from an expression, if it is a literal, or None
 fn maybe_scalar(expr: &Expr, expected_type: &DataType) -> Option<ScalarValue> {
     match expr {
@@ -1672,363 +1002,6 @@ fn maybe_scalar(expr: &Expr, expected_type: &DataType) -> Option<ScalarValue> {
     }
 }
 
-// Extract a list of scalar values from an expression, if it is a list of scalar values, or None
-fn maybe_scalar_list(exprs: &Vec<Expr>, expected_type: &DataType) -> Option<Vec<ScalarValue>> {
-    let mut scalar_values = Vec::with_capacity(exprs.len());
-    for expr in exprs {
-        match maybe_scalar(expr, expected_type) {
-            Some(scalar_val) => {
-                scalar_values.push(scalar_val);
-            }
-            None => {
-                return None;
-            }
-        }
-    }
-    Some(scalar_values)
-}
-
-fn visit_between(
-    between: &Between,
-    index_info: &dyn IndexInformationProvider,
-) -> Option<IndexedExpression> {
-    let (column, col_type, query_parser) = maybe_indexed_column(&between.expr, index_info)?;
-    let low = maybe_scalar(&between.low, &col_type)?;
-    let high = maybe_scalar(&between.high, &col_type)?;
-
-    let indexed_expr =
-        query_parser.visit_between(&column, &Bound::Included(low), &Bound::Included(high))?;
-
-    if between.negated {
-        indexed_expr.maybe_not()
-    } else {
-        Some(indexed_expr)
-    }
-}
-
-fn visit_in_list(
-    in_list: &InList,
-    index_info: &dyn IndexInformationProvider,
-) -> Option<IndexedExpression> {
-    let (column, col_type, query_parser) = maybe_indexed_column(&in_list.expr, index_info)?;
-    let values = maybe_scalar_list(&in_list.list, &col_type)?;
-
-    let indexed_expr = query_parser.visit_in_list(&column, &values)?;
-
-    if in_list.negated {
-        indexed_expr.maybe_not()
-    } else {
-        Some(indexed_expr)
-    }
-}
-
-fn visit_is_bool(
-    expr: &Expr,
-    index_info: &dyn IndexInformationProvider,
-    value: bool,
-) -> Option<IndexedExpression> {
-    let (column, col_type, query_parser) = maybe_indexed_column(expr, index_info)?;
-    if col_type != DataType::Boolean {
-        None
-    } else {
-        query_parser.visit_is_bool(&column, value)
-    }
-}
-
-// A column can be a valid indexed expression if the column is boolean (e.g. 'WHERE on_sale')
-fn visit_column(
-    col: &Expr,
-    index_info: &dyn IndexInformationProvider,
-) -> Option<IndexedExpression> {
-    let (column, col_type, query_parser) = maybe_indexed_column(col, index_info)?;
-    if col_type != DataType::Boolean {
-        None
-    } else {
-        query_parser.visit_is_bool(&column, true)
-    }
-}
-
-fn visit_is_null(
-    expr: &Expr,
-    index_info: &dyn IndexInformationProvider,
-    negated: bool,
-) -> Option<IndexedExpression> {
-    let (column, _, query_parser) = maybe_indexed_column(expr, index_info)?;
-    let indexed_expr = query_parser.visit_is_null(&column)?;
-    if negated {
-        indexed_expr.maybe_not()
-    } else {
-        Some(indexed_expr)
-    }
-}
-
-fn visit_not(
-    expr: &Expr,
-    index_info: &dyn IndexInformationProvider,
-    depth: usize,
-) -> Result<Option<IndexedExpression>> {
-    let node = visit_node(expr, index_info, depth + 1)?;
-    Ok(node.and_then(|node| node.maybe_not()))
-}
-
-fn visit_comparison(
-    expr: &BinaryExpr,
-    index_info: &dyn IndexInformationProvider,
-) -> Option<IndexedExpression> {
-    let left_col = maybe_indexed_column(&expr.left, index_info);
-    if let Some((column, col_type, query_parser)) = left_col {
-        let scalar = maybe_scalar(&expr.right, &col_type)?;
-        query_parser.visit_comparison(&column, &scalar, &expr.op)
-    } else {
-        // Datafusion's query simplifier will canonicalize expressions and so we shouldn't reach this case.  If, for some reason, we
-        // do reach this case we can handle it in the future by inverting expr.op and swapping the left and right sides
-        None
-    }
-}
-
-fn maybe_range(
-    expr: &BinaryExpr,
-    index_info: &dyn IndexInformationProvider,
-) -> Option<IndexedExpression> {
-    let left_expr = match expr.left.as_ref() {
-        Expr::BinaryExpr(binary_expr) => Some(binary_expr),
-        _ => None,
-    }?;
-    let right_expr = match expr.right.as_ref() {
-        Expr::BinaryExpr(binary_expr) => Some(binary_expr),
-        _ => None,
-    }?;
-
-    let (left_col, dt, parser) = maybe_indexed_column(&left_expr.left, index_info)?;
-    let right_col = maybe_column(&right_expr.left)?;
-
-    if left_col != right_col {
-        return None;
-    }
-
-    let left_value = maybe_scalar(&left_expr.right, &dt)?;
-    let right_value = maybe_scalar(&right_expr.right, &dt)?;
-
-    let (low, high) = match (left_expr.op, right_expr.op) {
-        // x >= a && x <= b
-        (Operator::GtEq, Operator::LtEq) => {
-            (Bound::Included(left_value), Bound::Included(right_value))
-        }
-        // x >= a && x < b
-        (Operator::GtEq, Operator::Lt) => {
-            (Bound::Included(left_value), Bound::Excluded(right_value))
-        }
-        // x > a && x <= b
-        (Operator::Gt, Operator::LtEq) => {
-            (Bound::Excluded(left_value), Bound::Included(right_value))
-        }
-        // x > a && x < b
-        (Operator::Gt, Operator::Lt) => (Bound::Excluded(left_value), Bound::Excluded(right_value)),
-        // x <= a && x >= b
-        (Operator::LtEq, Operator::GtEq) => {
-            (Bound::Included(right_value), Bound::Included(left_value))
-        }
-        // x <= a && x > b
-        (Operator::LtEq, Operator::Gt) => {
-            (Bound::Excluded(right_value), Bound::Included(left_value))
-        }
-        // x < a && x >= b
-        (Operator::Lt, Operator::GtEq) => {
-            (Bound::Included(right_value), Bound::Excluded(left_value))
-        }
-        // x < a && x > b
-        (Operator::Lt, Operator::Gt) => (Bound::Excluded(right_value), Bound::Excluded(left_value)),
-        _ => return None,
-    };
-
-    parser.visit_between(&left_col, &low, &high)
-}
-
-fn visit_and(
-    expr: &BinaryExpr,
-    index_info: &dyn IndexInformationProvider,
-    depth: usize,
-) -> Result<Option<IndexedExpression>> {
-    // Many scalar indices can efficiently handle a BETWEEN query as a single search and this
-    // can be much more efficient than two separate range queries.  As an optimization we check
-    // to see if this is a between query and, if so, we handle it as a single query
-    //
-    // Note: We can't rely on users writing the SQL BETWEEN operator because:
-    //   * Some users won't realize it's an option or a good idea
-    //   * Datafusion's simplifier will rewrite the BETWEEN operator into two separate range queries
-    if let Some(range_expr) = maybe_range(expr, index_info) {
-        return Ok(Some(range_expr));
-    }
-
-    let left = visit_node(&expr.left, index_info, depth + 1)?;
-    let right = visit_node(&expr.right, index_info, depth + 1)?;
-    Ok(match (left, right) {
-        (Some(left), Some(right)) => Some(left.and(right)),
-        (Some(left), None) => Some(left.refine((*expr.right).clone())),
-        (None, Some(right)) => Some(right.refine((*expr.left).clone())),
-        (None, None) => None,
-    })
-}
-
-fn visit_or(
-    expr: &BinaryExpr,
-    index_info: &dyn IndexInformationProvider,
-    depth: usize,
-) -> Result<Option<IndexedExpression>> {
-    let left = visit_node(&expr.left, index_info, depth + 1)?;
-    let right = visit_node(&expr.right, index_info, depth + 1)?;
-    Ok(match (left, right) {
-        (Some(left), Some(right)) => left.maybe_or(right),
-        // If one side can use an index and the other side cannot then
-        // we must abandon the entire thing.  For example, consider the
-        // query "color == 'blue' or size > 10" where color is indexed but
-        // size is not.  It's entirely possible that size > 10 matches every
-        // row in our database.  There is nothing we can do except a full scan
-        (Some(_), None) => None,
-        (None, Some(_)) => None,
-        (None, None) => None,
-    })
-}
-
-fn visit_binary_expr(
-    expr: &BinaryExpr,
-    index_info: &dyn IndexInformationProvider,
-    depth: usize,
-) -> Result<Option<IndexedExpression>> {
-    match &expr.op {
-        Operator::Lt | Operator::LtEq | Operator::Gt | Operator::GtEq | Operator::Eq => {
-            Ok(visit_comparison(expr, index_info))
-        }
-        // visit_comparison will maybe create an Eq query which we negate
-        Operator::NotEq => Ok(visit_comparison(expr, index_info).and_then(|node| node.maybe_not())),
-        Operator::And => visit_and(expr, index_info, depth),
-        Operator::Or => visit_or(expr, index_info, depth),
-        _ => Ok(None),
-    }
-}
-
-fn visit_scalar_fn(
-    scalar_fn: &ScalarFunction,
-    index_info: &dyn IndexInformationProvider,
-) -> Option<IndexedExpression> {
-    if scalar_fn.args.is_empty() {
-        return None;
-    }
-    let (col, data_type, query_parser) = maybe_indexed_column(&scalar_fn.args[0], index_info)?;
-    query_parser.visit_scalar_function(&col, &data_type, &scalar_fn.func, &scalar_fn.args)
-}
-
-fn visit_like_expr(
-    like: &Like,
-    index_info: &dyn IndexInformationProvider,
-) -> Option<IndexedExpression> {
-    let (column, _, query_parser) = maybe_indexed_column(&like.expr, index_info)?;
-
-    // Extract the pattern as a ScalarValue
-    let pattern = match like.pattern.as_ref() {
-        Expr::Literal(scalar, _) => scalar.clone(),
-        _ => return None,
-    };
-
-    query_parser.visit_like(&column, like, &pattern)
-}
-
-fn visit_node(
-    expr: &Expr,
-    index_info: &dyn IndexInformationProvider,
-    depth: usize,
-) -> Result<Option<IndexedExpression>> {
-    if depth >= MAX_DEPTH {
-        return Err(Error::invalid_input(format!(
-            "the filter expression is too long, lance limit the max number of conditions to {}",
-            MAX_DEPTH
-        )));
-    }
-    match expr {
-        Expr::Between(between) => Ok(visit_between(between, index_info)),
-        Expr::Alias(alias) => visit_node(alias.expr.as_ref(), index_info, depth),
-        Expr::Column(_) => Ok(visit_column(expr, index_info)),
-        Expr::InList(in_list) => Ok(visit_in_list(in_list, index_info)),
-        Expr::IsFalse(expr) => Ok(visit_is_bool(expr.as_ref(), index_info, false)),
-        Expr::IsTrue(expr) => Ok(visit_is_bool(expr.as_ref(), index_info, true)),
-        Expr::IsNull(expr) => Ok(visit_is_null(expr.as_ref(), index_info, false)),
-        Expr::IsNotNull(expr) => {
-            // `regexp_match(col, pat)` returns a list and is coerced to
-            // `IsNotNull(regexp_match(...))` before it reaches here. Unwrap that
-            // so the regex acceleration applies; everything else is a genuine
-            // IS NOT NULL check.
-            if let Expr::ScalarFunction(scalar_fn) = expr.as_ref()
-                && scalar_fn.func.name() == "regexp_match"
-            {
-                return Ok(visit_scalar_fn(scalar_fn, index_info));
-            }
-            Ok(visit_is_null(expr.as_ref(), index_info, true))
-        }
-        Expr::Not(expr) => visit_not(expr.as_ref(), index_info, depth),
-        Expr::BinaryExpr(binary_expr) => visit_binary_expr(binary_expr, index_info, depth),
-        Expr::ScalarFunction(scalar_fn) => Ok(visit_scalar_fn(scalar_fn, index_info)),
-        Expr::Like(like) => {
-            if like.negated {
-                // NOT LIKE cannot be efficiently pruned with zone maps
-                Ok(None)
-            } else {
-                Ok(visit_like_expr(like, index_info))
-            }
-        }
-        _ => Ok(None),
-    }
-}
-
-/// A trait to be used in `apply_scalar_indices` to inform the function which columns are indexeds
-pub trait IndexInformationProvider {
-    /// Check if an index exists for `col` and, if so, return the data type of col
-    /// as well as a query parser that can parse queries for that column
-    fn get_index(&self, col: &str) -> Option<(&DataType, &MultiQueryParser)>;
-
-    /// The set of fragments covered by `(column, index_name)`.
-    ///
-    /// Returns `None` when the provider doesn't know — callers must treat
-    /// that as "coverage unknown" rather than "covers everything". The
-    /// default implementation always returns `None`, so providers that
-    /// haven't been updated cannot accidentally claim full coverage.
-    fn fragment_bitmap(&self, _column: &str, _index_name: &str) -> Option<RoaringBitmap> {
-        None
-    }
-}
-
-/// Attempt to split a filter expression into a search of scalar indexes and an
-///   optional post-search refinement query
-pub fn apply_scalar_indices(
-    expr: Expr,
-    index_info: &dyn IndexInformationProvider,
-) -> Result<IndexedExpression> {
-    let mut result =
-        visit_node(&expr, index_info, 0)?.unwrap_or(IndexedExpression::refine_only(expr));
-    if let Some(query) = result.scalar_query.as_mut() {
-        populate_fragment_bitmaps(query, index_info);
-    }
-    Ok(result)
-}
-
-/// Walk a [`ScalarIndexExpr`] and fill in `fragment_bitmap` on each leaf from
-/// the `index_info` provider. Leaves the bitmap as `None` if the provider
-/// can't answer.
-fn populate_fragment_bitmaps(
-    expr: &mut ScalarIndexExpr,
-    index_info: &dyn IndexInformationProvider,
-) {
-    match expr {
-        ScalarIndexExpr::Not(inner) => populate_fragment_bitmaps(inner, index_info),
-        ScalarIndexExpr::And(lhs, rhs) | ScalarIndexExpr::Or(lhs, rhs) => {
-            populate_fragment_bitmaps(lhs, index_info);
-            populate_fragment_bitmaps(rhs, index_info);
-        }
-        ScalarIndexExpr::Query(search) => {
-            search.fragment_bitmap = index_info.fragment_bitmap(&search.column, &search.index_name);
-        }
-    }
-}
-
 #[derive(Clone, Default, Debug)]
 pub struct FilterPlan {
     pub index_query: Option<ScalarIndexExpr>,
@@ -2157,9 +1130,13 @@ mod tests {
     use datafusion_expr::simplify::SimplifyContext;
     use lance_datafusion::exec::{LanceExecutionOptions, get_session_context};
     use lance_select::result::IndexExprResultWireFormat;
+    use lance_select::{IndexExprResult, NullableIndexExprResult, NullableRowAddrMask};
     use roaring::RoaringBitmap;
 
-    use crate::scalar::json::{JsonQuery, JsonQueryParser};
+    use crate::scalar::{
+        AnyQuery,
+        json::{JsonQuery, JsonQueryParser},
+    };
 
     use super::*;
 
@@ -2842,6 +1819,7 @@ mod tests {
         assert!(!negated.is_exact());
         assert!(!negated.is_at_most());
         assert!(!negated.is_at_least());
+
         // !{l, u} = {!u, !l}. AllowList → BlockList.
         assert!(matches!(negated.lower, NullableRowAddrMask::BlockList(_)));
         assert!(matches!(negated.upper, NullableRowAddrMask::BlockList(_)));
diff --git a/rust/lance-index/src/scalar/fmindex.rs b/rust/lance-index/src/scalar/fmindex.rs
index cdf19f0304c..f400f9dbff0 100644
--- a/rust/lance-index/src/scalar/fmindex.rs
+++ b/rust/lance-index/src/scalar/fmindex.rs
@@ -34,7 +34,6 @@ use lance_core::deepsize::DeepSizeOf;
 use lance_core::{Error, ROW_ADDR, Result};
 use roaring::RoaringBitmap;
 
-use crate::frag_reuse::FragReuseIndex;
 use crate::metrics::MetricsCollector;
 use crate::pb;
 use crate::scalar::expression::{ScalarQueryParser, TextQueryParser};
@@ -46,8 +45,8 @@ use crate::scalar::{
     AnyQuery, BuiltinIndexType, CreatedIndex, IndexFile, IndexStore, OldIndexDataFilter,
     ScalarIndex, ScalarIndexParams, SearchResult, TextQuery, UpdateCriteria,
 };
-use crate::vector::VectorIndex;
 use crate::{Index, IndexType};
+use lance_index_core::row_id_remap::RowIdRemapper;
 
 const FMINDEX_INDEX_VERSION: u32 = 10;
 const BLOCK_WORDS: usize = 4096;
@@ -1258,7 +1257,7 @@ impl FMIndexScalarIndex {
 
     async fn load(
         store: Arc<dyn IndexStore>,
-        _fri: Option<Arc<FragReuseIndex>>,
+        _fri: Option<Arc<dyn RowIdRemapper>>,
         _cache: &LanceCache,
     ) -> Result<Arc<Self>> {
         let files = store.list_files_with_sizes().await?;
@@ -1295,11 +1294,6 @@ impl Index for FMIndexScalarIndex {
     fn as_index(self: Arc<Self>) -> Arc<dyn Index> {
         self
     }
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn VectorIndex>> {
-        Err(Error::invalid_input_source(
-            "Fm is not a vector index".into(),
-        ))
-    }
     async fn prewarm(&self) -> Result<()> {
         Ok(())
     }
@@ -1312,7 +1306,7 @@ impl Index for FMIndexScalarIndex {
         }))
     }
     fn index_type(&self) -> IndexType {
-        IndexType::Fm
+        IndexType::FMIndex
     }
     async fn calculate_included_frags(&self) -> Result<RoaringBitmap> {
         let mut frags = RoaringBitmap::new();
@@ -1380,7 +1374,7 @@ impl ScalarIndex for FMIndexScalarIndex {
         Ok(CreatedIndex {
             index_details: prost_types::Any::from_msg(&pb::FmIndexIndexDetails {}).unwrap(),
             index_version: FMINDEX_INDEX_VERSION,
-            files,
+            files: Some(files),
         })
     }
     fn update_criteria(&self) -> UpdateCriteria {
@@ -1389,7 +1383,7 @@ impl ScalarIndex for FMIndexScalarIndex {
         )
     }
     fn derive_index_params(&self) -> Result<ScalarIndexParams> {
-        Ok(ScalarIndexParams::for_builtin(BuiltinIndexType::Fm))
+        Ok(ScalarIndexParams::for_builtin(BuiltinIndexType::FMIndex))
     }
 }
 
@@ -1710,7 +1704,7 @@ impl ScalarIndexPlugin for FMIndexPlugin {
         Ok(CreatedIndex {
             index_details: prost_types::Any::from_msg(&pb::FmIndexIndexDetails {}).unwrap(),
             index_version: FMINDEX_INDEX_VERSION,
-            files,
+            files: Some(files),
         })
     }
     fn provides_exact_answer(&self) -> bool {
@@ -1737,7 +1731,7 @@ impl ScalarIndexPlugin for FMIndexPlugin {
         &self,
         store: Arc<dyn IndexStore>,
         details: &prost_types::Any,
-        fri: Option<Arc<FragReuseIndex>>,
+        fri: Option<Arc<dyn RowIdRemapper>>,
         cache: &LanceCache,
     ) -> Result<Arc<dyn ScalarIndex>> {
         let _ = details
@@ -2271,7 +2265,7 @@ mod tests {
             .await
             .unwrap();
 
-        assert_eq!(created.files.len(), 2);
+        assert_eq!(created.files.as_deref().unwrap_or_default().len(), 2);
 
         let index = FMIndexPlugin
             .load_index(store, &created.index_details, None, &LanceCache::no_cache())
diff --git a/rust/lance-index/src/scalar/inverted.rs b/rust/lance-index/src/scalar/inverted.rs
index d0bb0e40d3a..7871904041e 100644
--- a/rust/lance-index/src/scalar/inverted.rs
+++ b/rust/lance-index/src/scalar/inverted.rs
@@ -114,14 +114,12 @@ use lance_core::Error;
 
 use crate::pbold;
 use crate::progress::IndexBuildProgress;
-use crate::{
-    frag_reuse::FragReuseIndex,
-    scalar::{
-        CreatedIndex, ScalarIndex,
-        expression::{FtsQueryParser, ScalarQueryParser},
-        registry::{ScalarIndexPlugin, TrainingCriteria, TrainingOrdering, TrainingRequest},
-    },
+use crate::scalar::{
+    CreatedIndex, ScalarIndex,
+    expression::{FtsQueryParser, ScalarQueryParser},
+    registry::{ScalarIndexPlugin, TrainingCriteria, TrainingOrdering, TrainingRequest},
 };
+use lance_index_core::row_id_remap::RowIdRemapper;
 
 use super::IndexStore;
 
@@ -155,7 +153,7 @@ impl InvertedIndexPlugin {
         Ok(CreatedIndex {
             index_details: prost_types::Any::from_msg(&details).unwrap(),
             index_version: current_fts_format_version().index_version(),
-            files,
+            files: Some(files),
         })
     }
 
@@ -289,7 +287,7 @@ impl ScalarIndexPlugin for InvertedIndexPlugin {
         &self,
         index_store: Arc<dyn IndexStore>,
         _index_details: &prost_types::Any,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         cache: &LanceCache,
     ) -> Result<Arc<dyn ScalarIndex>> {
         Ok(
diff --git a/rust/lance-index/src/scalar/inverted/index.rs b/rust/lance-index/src/scalar/inverted/index.rs
index 41a18c3bd68..ef5ca4a98a7 100644
--- a/rust/lance-index/src/scalar/inverted/index.rs
+++ b/rust/lance-index/src/scalar/inverted/index.rs
@@ -69,7 +69,6 @@ use super::{
     builder::{InnerBuilder, PositionRecorder},
     iter::CompressedPostingListIterator,
 };
-use crate::frag_reuse::FragReuseIndex;
 use crate::pbold;
 use crate::progress::IndexBuildProgress;
 use crate::scalar::inverted::scorer::MemBM25Scorer;
@@ -80,6 +79,7 @@ use crate::scalar::{
 };
 use crate::{FtsPrewarmOptions, Index};
 use crate::{prefilter::PreFilter, scalar::inverted::iter::take_fst_keys};
+use lance_index_core::row_id_remap::RowIdRemapper;
 use std::str::FromStr;
 
 // Version 0: Arrow TokenSetFormat (legacy)
@@ -548,7 +548,7 @@ impl InvertedIndex {
         Ok(CreatedIndex {
             index_details: prost_types::Any::from_msg(&details).unwrap(),
             index_version: first.index_version(),
-            files,
+            files: Some(files),
         })
     }
 
@@ -821,7 +821,7 @@ impl InvertedIndex {
 
     async fn load_legacy_index(
         store: Arc<dyn IndexStore>,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         index_cache: &LanceCache,
     ) -> Result<Arc<Self>> {
         log::warn!("loading legacy FTS index");
@@ -888,7 +888,7 @@ impl InvertedIndex {
 
     pub async fn load(
         store: Arc<dyn IndexStore>,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         index_cache: &LanceCache,
     ) -> Result<Arc<Self>>
     where
@@ -986,12 +986,6 @@ impl Index for InvertedIndex {
         self
     }
 
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn crate::vector::VectorIndex>> {
-        Err(Error::invalid_input(
-            "inverted index cannot be cast to vector index",
-        ))
-    }
-
     fn statistics(&self) -> Result<serde_json::Value> {
         let num_tokens = self
             .partitions
@@ -1164,7 +1158,7 @@ impl ScalarIndex for InvertedIndex {
         Ok(CreatedIndex {
             index_details: prost_types::Any::from_msg(&details).unwrap(),
             index_version: self.index_version(),
-            files,
+            files: Some(files),
         })
     }
 
@@ -1184,7 +1178,7 @@ impl ScalarIndex for InvertedIndex {
         Ok(CreatedIndex {
             index_details: prost_types::Any::from_msg(&details).unwrap(),
             index_version: self.index_version(),
-            files,
+            files: Some(files),
         })
     }
 
@@ -1257,7 +1251,7 @@ impl InvertedPartition {
     pub async fn load(
         store: Arc<dyn IndexStore>,
         id: u64,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         index_cache: &LanceCache,
         token_set_format: TokenSetFormat,
     ) -> Result<Self> {
@@ -4698,7 +4692,7 @@ impl DocSet {
     pub async fn load(
         reader: Arc<dyn IndexReader>,
         is_legacy: bool,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         let batch = reader.read_range(0..reader.num_rows(), None).await?;
         let row_id_col = batch[ROW_ID].as_primitive::<datatypes::UInt64Type>();
@@ -4730,7 +4724,7 @@ impl DocSet {
         row_id_col: &UInt64Array,
         num_tokens_col: &arrow_array::UInt32Array,
         is_legacy: bool,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         // for legacy format, the row id is doc id; sorting keeps binary search viable
         if is_legacy {
diff --git a/rust/lance-index/src/scalar/inverted/lazy_docset.rs b/rust/lance-index/src/scalar/inverted/lazy_docset.rs
index 7a0ee41efd8..3068cd94c61 100644
--- a/rust/lance-index/src/scalar/inverted/lazy_docset.rs
+++ b/rust/lance-index/src/scalar/inverted/lazy_docset.rs
@@ -24,9 +24,9 @@ use lance_core::ROW_ID;
 use lance_core::Result;
 use tokio::sync::OnceCell;
 
-use crate::frag_reuse::FragReuseIndex;
 use crate::scalar::inverted::index::{DocSet, NUM_TOKEN_COL};
 use crate::scalar::{IndexReader, IndexStore};
+use lance_index_core::row_id_remap::RowIdRemapper;
 use lance_select::mask::RowAddrMask;
 
 /// Lazy view over an inverted-index partition's `DocSet`.
@@ -63,7 +63,7 @@ pub struct DeferredDocSet {
     store: Arc<dyn IndexStore>,
     docs_path: String,
     is_legacy: bool,
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     /// Doc count cached at construction so `len()` stays sync + IO-free.
     num_rows: usize,
     /// `sum(num_tokens)` cached on first compute.
@@ -122,7 +122,7 @@ impl LazyDocSet {
         docs_path: String,
         num_rows: usize,
         is_legacy: bool,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Self {
         Self::Deferred(Box::new(DeferredDocSet {
             store,
diff --git a/rust/lance-index/src/scalar/json.rs b/rust/lance-index/src/scalar/json.rs
index 7adf055db61..066a9947048 100644
--- a/rust/lance-index/src/scalar/json.rs
+++ b/rust/lance-index/src/scalar/json.rs
@@ -33,15 +33,15 @@ use lance_core::{Error, ROW_ID, Result, cache::LanceCache, error::LanceOptionExt
 
 use crate::{
     Index, IndexType,
-    frag_reuse::FragReuseIndex,
     metrics::MetricsCollector,
-    registry::IndexPluginRegistry,
+    registry::PluginRegistry,
     scalar::{
         AnyQuery, CreatedIndex, IndexStore, ScalarIndex, SearchResult, UpdateCriteria,
         expression::{IndexedExpression, ScalarIndexExpr, ScalarIndexSearch, ScalarQueryParser},
         registry::{ScalarIndexPlugin, TrainingCriteria, TrainingRequest, VALUE_COLUMN_NAME},
     },
 };
+use lance_index_core::row_id_remap::RowIdRemapper;
 
 const JSON_INDEX_VERSION: u32 = 0;
 
@@ -76,10 +76,6 @@ impl Index for JsonIndex {
         self
     }
 
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn crate::vector::VectorIndex>> {
-        unimplemented!()
-    }
-
     fn index_type(&self) -> IndexType {
         // TODO: This causes the index to appear as btree in list_indices call.  Need better logic
         // in list_indices to use details instead of index_type.
@@ -380,7 +376,7 @@ impl TrainingRequest for JsonTrainingRequest {
 /// Plugin implementation for a [`JsonIndex`]
 #[derive(Default)]
 pub struct JsonIndexPlugin {
-    registry: Mutex<Option<Arc<IndexPluginRegistry>>>,
+    registry: Mutex<Option<Arc<dyn PluginRegistry>>>,
 }
 
 impl std::fmt::Debug for JsonIndexPlugin {
@@ -390,7 +386,7 @@ impl std::fmt::Debug for JsonIndexPlugin {
 }
 
 impl JsonIndexPlugin {
-    fn registry(&self) -> Result<Arc<IndexPluginRegistry>> {
+    fn registry(&self) -> Result<Arc<dyn PluginRegistry>> {
         Ok(self.registry.lock().unwrap().as_ref().expect_ok()?.clone())
     }
 
@@ -705,7 +701,7 @@ impl ScalarIndexPlugin for JsonIndexPlugin {
         true
     }
 
-    fn attach_registry(&self, registry: Arc<IndexPluginRegistry>) {
+    fn attach_registry(&self, registry: Arc<dyn PluginRegistry>) {
         let mut reg_ref = self.registry.lock().unwrap();
         *reg_ref = Some(registry);
     }
@@ -793,7 +789,7 @@ impl ScalarIndexPlugin for JsonIndexPlugin {
         &self,
         index_store: Arc<dyn IndexStore>,
         index_details: &prost_types::Any,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         cache: &LanceCache,
     ) -> Result<Arc<dyn ScalarIndex>> {
         let registry = self.registry().unwrap();
diff --git a/rust/lance-index/src/scalar/label_list.rs b/rust/lance-index/src/scalar/label_list.rs
index 8e07a607bff..e7615a40824 100644
--- a/rust/lance-index/src/scalar/label_list.rs
+++ b/rust/lance-index/src/scalar/label_list.rs
@@ -31,7 +31,6 @@ use tracing::instrument;
 use super::{AnyQuery, IndexFile, IndexStore, LabelListQuery, ScalarIndex, bitmap::BitmapIndex};
 use super::{BuiltinIndexType, SargableQuery, ScalarIndexParams};
 use super::{MetricsCollector, SearchResult};
-use crate::frag_reuse::FragReuseIndex;
 use crate::pbold;
 use crate::scalar::bitmap::{BitmapIndexPlugin, BitmapIndexState};
 use crate::scalar::expression::{LabelListQueryParser, ScalarQueryParser};
@@ -41,6 +40,7 @@ use crate::scalar::registry::{
 };
 use crate::scalar::{CreatedIndex, UpdateCriteria};
 use crate::{Index, IndexType};
+use lance_index_core::row_id_remap::RowIdRemapper;
 
 pub const BITMAP_LOOKUP_NAME: &str = "bitmap_page_lookup.lance";
 pub const LABEL_LIST_NULLS_METADATA_KEY: &str = "lance:label_list_nulls";
@@ -90,7 +90,7 @@ impl LabelListIndex {
 
     async fn load(
         store: Arc<dyn IndexStore>,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         index_cache: &LanceCache,
     ) -> Result<Arc<Self>> {
         let values_index =
@@ -110,12 +110,6 @@ impl Index for LabelListIndex {
         self
     }
 
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn crate::vector::VectorIndex>> {
-        Err(Error::not_supported_source(
-            "LabeListIndex is not a vector index".into(),
-        ))
-    }
-
     async fn prewarm(&self) -> Result<()> {
         self.values_index.prewarm().await
     }
@@ -241,7 +235,7 @@ impl ScalarIndex for LabelListIndex {
             index_details: prost_types::Any::from_msg(&pbold::LabelListIndexDetails::default())
                 .unwrap(),
             index_version: LABEL_LIST_INDEX_VERSION,
-            files: vec![file],
+            files: Some(vec![file]),
         })
     }
 
@@ -271,7 +265,7 @@ impl ScalarIndex for LabelListIndex {
             index_details: prost_types::Any::from_msg(&pbold::LabelListIndexDetails::default())
                 .unwrap(),
             index_version: LABEL_LIST_INDEX_VERSION,
-            files: vec![file],
+            files: Some(vec![file]),
         })
     }
 
@@ -446,7 +440,7 @@ fn unnest_chunks(
 
 async fn read_list_nulls(
     store: Arc<dyn IndexStore>,
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
 ) -> Result<RowAddrTreeMap> {
     let reader = store.open_index_file(BITMAP_LOOKUP_NAME).await?;
     if let Some(buffer_idx_str) = reader.schema().metadata.get(LABEL_LIST_NULLS_METADATA_KEY) {
@@ -523,7 +517,7 @@ impl LabelListIndexState {
         self,
         store: Arc<dyn IndexStore>,
         index_cache: &LanceCache,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Arc<LabelListIndex>> {
         let bitmap = self
             .bitmap_state
@@ -684,7 +678,7 @@ impl ScalarIndexPlugin for LabelListIndexPlugin {
             index_details: prost_types::Any::from_msg(&pbold::LabelListIndexDetails::default())
                 .unwrap(),
             index_version: LABEL_LIST_INDEX_VERSION,
-            files: vec![file],
+            files: Some(vec![file]),
         })
     }
 
@@ -693,7 +687,7 @@ impl ScalarIndexPlugin for LabelListIndexPlugin {
         &self,
         index_store: Arc<dyn IndexStore>,
         _index_details: &prost_types::Any,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         cache: &LanceCache,
     ) -> Result<Arc<dyn ScalarIndex>> {
         Ok(
@@ -705,7 +699,7 @@ impl ScalarIndexPlugin for LabelListIndexPlugin {
     async fn get_from_cache(
         &self,
         index_store: Arc<dyn IndexStore>,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         cache: &LanceCache,
     ) -> Result<Option<Arc<dyn ScalarIndex>>> {
         let Some(state) = cache.get_with_key(&LabelListIndexStateKey).await else {
diff --git a/rust/lance-index/src/scalar/lance_format.rs b/rust/lance-index/src/scalar/lance_format.rs
index 2f82deb8403..53cc1201d38 100644
--- a/rust/lance-index/src/scalar/lance_format.rs
+++ b/rust/lance-index/src/scalar/lance_format.rs
@@ -13,10 +13,7 @@ use lance_core::deepsize::DeepSizeOf;
 use lance_core::{Error, Result, cache::LanceCache};
 use lance_encoding::decoder::{DecoderPlugins, FilterExpression};
 use lance_encoding::version::LanceFileVersion;
-use lance_file::previous::{
-    reader::FileReader as PreviousFileReader,
-    writer::{FileWriter as PreviousFileWriter, ManifestProvider as PreviousManifestProvider},
-};
+use lance_file::previous::reader::FileReader as PreviousFileReader;
 use lance_file::reader::{self as current_reader, FileReaderOptions, ReaderProjection};
 use lance_file::writer as current_writer;
 use lance_io::scheduler::{ScanScheduler, SchedulerConfig};
@@ -119,33 +116,8 @@ impl LanceIndexStore {
     }
 }
 
-#[async_trait]
-impl<M: PreviousManifestProvider + Send + Sync> IndexWriter for PreviousFileWriter<M> {
-    async fn write_record_batch(&mut self, batch: RecordBatch) -> Result<u64> {
-        let offset = self.tell().await?;
-        self.write(&[batch]).await?;
-        Ok(offset as u64)
-    }
-
-    async fn finish(&mut self) -> Result<IndexFile> {
-        Self::finish(self).await?;
-        Ok(IndexFile {
-            path: String::new(),
-            size_bytes: self.tell().await? as u64,
-        })
-    }
-
-    async fn finish_with_metadata(
-        &mut self,
-        metadata: HashMap<String, String>,
-    ) -> Result<IndexFile> {
-        Self::finish_with_metadata(self, &metadata).await?;
-        Ok(IndexFile {
-            path: String::new(),
-            size_bytes: self.tell().await? as u64,
-        })
-    }
-}
+struct LancePreviousReader(PreviousFileReader);
+struct LanceCurrentReader(current_reader::FileReader);
 
 struct LanceIndexWriter {
     path: String,
@@ -188,9 +160,10 @@ impl IndexWriter for LanceIndexWriter {
 }
 
 #[async_trait]
-impl IndexReader for PreviousFileReader {
+impl IndexReader for LancePreviousReader {
     async fn read_record_batch(&self, offset: u64, _batch_size: u64) -> Result<RecordBatch> {
-        self.read_batch(offset as i32, ReadBatchParams::RangeFull, self.schema())
+        self.0
+            .read_batch(offset as i32, ReadBatchParams::RangeFull, self.0.schema())
             .await
     }
 
@@ -200,36 +173,36 @@ impl IndexReader for PreviousFileReader {
         projection: Option<&[&str]>,
     ) -> Result<RecordBatch> {
         let projection = match projection {
-            Some(projection) => self.schema().project(projection)?,
-            None => self.schema().clone(),
+            Some(projection) => self.0.schema().project(projection)?,
+            None => self.0.schema().clone(),
         };
-        self.read_range(range, &projection).await
+        self.0.read_range(range, &projection).await
     }
 
     async fn num_batches(&self, _batch_size: u64) -> u32 {
-        self.num_batches() as u32
+        self.0.num_batches() as u32
     }
 
     fn num_rows(&self) -> usize {
-        self.len()
+        self.0.len()
     }
 
     fn schema(&self) -> &lance_core::datatypes::Schema {
-        Self::schema(self)
+        self.0.schema()
     }
 }
 
 #[async_trait]
-impl IndexReader for current_reader::FileReader {
+impl IndexReader for LanceCurrentReader {
     async fn read_record_batch(&self, offset: u64, batch_size: u64) -> Result<RecordBatch> {
         let start = offset * batch_size;
         let end = start + batch_size;
-        let end = end.min(self.num_rows());
+        let end = end.min(self.num_rows() as u64);
         self.read_range(start as usize..end as usize, None).await
     }
 
     async fn read_global_buffer(&self, n: u32) -> Result<Bytes> {
-        Self::read_global_buffer(self, n).await
+        self.0.read_global_buffer(n).await
     }
 
     async fn read_range(
@@ -239,19 +212,20 @@ impl IndexReader for current_reader::FileReader {
     ) -> Result<RecordBatch> {
         if range.is_empty() {
             return Ok(RecordBatch::new_empty(Arc::new(
-                self.schema().as_ref().into(),
+                self.0.schema().as_ref().into(),
             )));
         }
         let projection = if let Some(projection) = projection {
             ReaderProjection::from_column_names(
-                self.metadata().version(),
-                self.schema(),
+                self.0.metadata().version(),
+                self.0.schema(),
                 projection,
             )?
         } else {
-            ReaderProjection::from_whole_schema(self.schema(), self.metadata().version())
+            ReaderProjection::from_whole_schema(self.0.schema(), self.0.metadata().version())
         };
         let batches = self
+            .0
             .read_stream_projected(
                 ReadBatchParams::Range(range),
                 u32::MAX,
@@ -271,22 +245,19 @@ impl IndexReader for current_reader::FileReader {
         ranges: &[std::ops::Range<usize>],
         projection: Option<&[&str]>,
     ) -> Result<RecordBatch> {
-        let empty_batch = || {
-            Ok(RecordBatch::new_empty(Arc::new(
-                self.schema().as_ref().into(),
-            )))
-        };
+        let schema: Arc<arrow_schema::Schema> = Arc::new(self.0.schema().as_ref().into());
+        let empty_batch = || Ok(RecordBatch::new_empty(schema.clone()));
         if ranges.is_empty() {
             return empty_batch();
         }
         let projection = if let Some(projection) = projection {
             ReaderProjection::from_column_names(
-                self.metadata().version(),
-                self.schema(),
+                self.0.metadata().version(),
+                self.0.schema(),
                 projection,
             )?
         } else {
-            ReaderProjection::from_whole_schema(self.schema(), self.metadata().version())
+            ReaderProjection::from_whole_schema(self.0.schema(), self.0.metadata().version())
         };
         // `DecodeBatchScheduler::schedule_ranges` requires sorted,
         // non-overlapping ranges; sort internally and permute the
@@ -300,6 +271,7 @@ impl IndexReader for current_reader::FileReader {
             .collect();
         let total_rows: u64 = sorted_ranges.iter().map(|r| r.end - r.start).sum();
         let batches = self
+            .0
             .read_stream_projected(
                 ReadBatchParams::Ranges(sorted_ranges),
                 (total_rows as u32).max(1),
@@ -352,47 +324,48 @@ impl IndexReader for current_reader::FileReader {
     ) -> Result<Pin<Box<dyn lance_io::stream::RecordBatchStream>>> {
         if range.is_empty() {
             return Ok(Box::pin(lance_io::stream::RecordBatchStreamAdapter::new(
-                Arc::new(self.schema().as_ref().into()),
+                Arc::new(self.0.schema().as_ref().into()),
                 futures::stream::empty(),
             )));
         }
         let projection = if let Some(projection) = projection {
             ReaderProjection::from_column_names(
-                self.metadata().version(),
-                self.schema(),
+                self.0.metadata().version(),
+                self.0.schema(),
                 projection,
             )?
         } else {
-            ReaderProjection::from_whole_schema(self.schema(), self.metadata().version())
+            ReaderProjection::from_whole_schema(self.0.schema(), self.0.metadata().version())
         };
-        self.read_stream_projected(
-            ReadBatchParams::Range(range),
-            4096,
-            2,
-            projection,
-            FilterExpression::no_filter(),
-        )
-        .await
+        self.0
+            .read_stream_projected(
+                ReadBatchParams::Range(range),
+                4096,
+                2,
+                projection,
+                FilterExpression::no_filter(),
+            )
+            .await
     }
 
     // V2 format has removed the row group concept,
     // so here we assume each batch is with 4096 rows.
     async fn num_batches(&self, batch_size: u64) -> u32 {
-        Self::num_rows(self).div_ceil(batch_size) as u32
+        self.0.num_rows().div_ceil(batch_size) as u32
     }
 
     fn num_rows(&self) -> usize {
-        Self::num_rows(self) as usize
+        self.0.num_rows() as usize
     }
 
     fn schema(&self) -> &lance_core::datatypes::Schema {
-        Self::schema(self)
+        self.0.schema()
     }
 
     fn file_size_bytes(&self) -> Option<u64> {
         // The manifest records each index file's size and passes it to the reader
         // at open, so it's already in metadata here (no extra I/O).
-        Some(self.metadata().file_size())
+        Some(self.0.metadata().file_size())
     }
 }
 
@@ -450,7 +423,7 @@ impl IndexStore for LanceIndexStore {
         )
         .await
         {
-            Ok(reader) => Ok(Arc::new(reader)),
+            Ok(reader) => Ok(Arc::new(LanceCurrentReader(reader))),
             Err(e) => {
                 // If the error is a version conflict we can try to read the file with v1 reader
                 if let Error::VersionConflict { .. } = e {
@@ -461,7 +434,7 @@ impl IndexStore for LanceIndexStore {
                         Some(&self.metadata_cache),
                     )
                     .await?;
-                    Ok(Arc::new(file_reader))
+                    Ok(Arc::new(LancePreviousReader(file_reader)))
                 } else {
                     Err(e)
                 }
@@ -535,7 +508,14 @@ impl IndexStore for LanceIndexStore {
     }
 
     async fn list_files_with_sizes(&self) -> Result<Vec<IndexFile>> {
-        list_index_files_with_sizes(&self.object_store, &self.index_dir).await
+        let files = list_index_files_with_sizes(&self.object_store, &self.index_dir).await?;
+        Ok(files
+            .into_iter()
+            .map(|f| IndexFile {
+                path: f.path,
+                size_bytes: f.size_bytes,
+            })
+            .collect())
     }
 }
 
diff --git a/rust/lance-index/src/scalar/ngram.rs b/rust/lance-index/src/scalar/ngram.rs
index b452ef78c85..0d10c9c0134 100644
--- a/rust/lance-index/src/scalar/ngram.rs
+++ b/rust/lance-index/src/scalar/ngram.rs
@@ -15,7 +15,6 @@ use super::{
     AnyQuery, BuiltinIndexType, IndexFile, IndexReader, IndexStore, IndexWriter, MetricsCollector,
     ScalarIndex, ScalarIndexParams, SearchResult, TextQuery,
 };
-use crate::frag_reuse::FragReuseIndex;
 use crate::metrics::NoOpMetricsCollector;
 use crate::pbold;
 use crate::scalar::expression::{ScalarQueryParser, TextQueryParser};
@@ -24,7 +23,6 @@ use crate::scalar::registry::{
     VALUE_COLUMN_NAME,
 };
 use crate::scalar::{CreatedIndex, UpdateCriteria};
-use crate::vector::VectorIndex;
 use crate::{Index, IndexType};
 use arrow::array::{AsArray, UInt32Builder};
 use arrow::datatypes::{UInt32Type, UInt64Type};
@@ -42,6 +40,7 @@ use lance_core::utils::tempfile::TempDir;
 use lance_core::utils::tokio::get_num_compute_intensive_cpus;
 use lance_core::utils::tracing::{IO_TYPE_LOAD_SCALAR_PART, TRACE_IO_EVENTS};
 use lance_core::{Error, ROW_ID, Result};
+use lance_index_core::row_id_remap::RowIdRemapper;
 use lance_io::object_store::ObjectStore;
 use lance_select::RowAddrTreeMap;
 use lance_tokenizer::{
@@ -187,7 +186,7 @@ impl CacheKey for NGramPostingListKey {
 impl NGramPostingList {
     fn try_from_batch(
         batch: RecordBatch,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         let bitmap_bytes = batch.column(0).as_binary::<i32>().value(0);
         let mut bitmap = RoaringTreemap::deserialize_from(bitmap_bytes)
@@ -214,7 +213,7 @@ impl NGramPostingList {
 /// Reads on-demand ngram posting lists from storage (and stores them in a cache)
 struct NGramPostingListReader {
     reader: Arc<dyn IndexReader>,
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     index_cache: WeakLanceCache,
 }
 
@@ -299,7 +298,7 @@ impl DeepSizeOf for NGramIndex {
 impl NGramIndex {
     async fn from_store(
         store: Arc<dyn IndexStore>,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         index_cache: &LanceCache,
     ) -> Result<Self> {
         let tokens = store.open_index_file(POSTINGS_FILENAME).await?;
@@ -375,7 +374,7 @@ impl NGramIndex {
 
     async fn load(
         store: Arc<dyn IndexStore>,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         index_cache: &LanceCache,
     ) -> Result<Arc<Self>>
     where
@@ -397,12 +396,6 @@ impl Index for NGramIndex {
         self
     }
 
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn VectorIndex>> {
-        Err(Error::invalid_input_source(
-            "NGramIndex is not a vector index".into(),
-        ))
-    }
-
     fn statistics(&self) -> Result<serde_json::Value> {
         let ngram_stats = NGramStatistics {
             num_ngrams: self.tokens.len(),
@@ -555,7 +548,7 @@ impl ScalarIndex for NGramIndex {
             index_details: prost_types::Any::from_msg(&pbold::NGramIndexDetails::default())
                 .unwrap(),
             index_version: NGRAM_INDEX_VERSION,
-            files: vec![file],
+            files: Some(vec![file]),
         })
     }
 
@@ -576,7 +569,7 @@ impl ScalarIndex for NGramIndex {
             index_details: prost_types::Any::from_msg(&pbold::NGramIndexDetails::default())
                 .unwrap(),
             index_version: NGRAM_INDEX_VERSION,
-            files: vec![file],
+            files: Some(vec![file]),
         })
     }
 
@@ -1350,7 +1343,7 @@ impl ScalarIndexPlugin for NGramIndexPlugin {
             index_details: prost_types::Any::from_msg(&pbold::NGramIndexDetails::default())
                 .unwrap(),
             index_version: NGRAM_INDEX_VERSION,
-            files: vec![file],
+            files: Some(vec![file]),
         })
     }
 
@@ -1358,7 +1351,7 @@ impl ScalarIndexPlugin for NGramIndexPlugin {
         &self,
         index_store: Arc<dyn IndexStore>,
         _index_details: &prost_types::Any,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         cache: &LanceCache,
     ) -> Result<Arc<dyn ScalarIndex>> {
         Ok(NGramIndex::load(index_store, frag_reuse_index, cache).await? as Arc<dyn ScalarIndex>)
diff --git a/rust/lance-index/src/scalar/registry.rs b/rust/lance-index/src/scalar/registry.rs
index 0add98d8ab3..acdbabb5311 100644
--- a/rust/lance-index/src/scalar/registry.rs
+++ b/rust/lance-index/src/scalar/registry.rs
@@ -1,242 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The Lance Authors
 
-use std::borrow::Cow;
-use std::sync::Arc;
-
-use arrow_schema::Field;
-use async_trait::async_trait;
-use datafusion::execution::SendableRecordBatchStream;
-use lance_core::{
-    Result,
-    cache::{LanceCache, UnsizedCacheKey},
-};
-
-use crate::progress::IndexBuildProgress;
-use crate::registry::IndexPluginRegistry;
-use crate::{
-    frag_reuse::FragReuseIndex,
-    scalar::{CreatedIndex, IndexStore, ScalarIndex, expression::ScalarQueryParser},
+pub use lance_index_core::scalar::registry::{
+    DefaultTrainingRequest, ScalarIndexCacheKey, ScalarIndexPlugin, TrainingCriteria,
+    TrainingOrdering, TrainingRequest, VALUE_COLUMN_NAME,
 };
-
-pub const VALUE_COLUMN_NAME: &str = "value";
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum TrainingOrdering {
-    /// The input will arrive sorted by the value column in ascending order
-    Values,
-    /// The input will arrive sorted by the address column in ascending order
-    Addresses,
-    /// The input will arrive in an arbitrary order
-    None,
-}
-
-#[derive(Debug, Clone)]
-pub struct TrainingCriteria {
-    pub ordering: TrainingOrdering,
-    pub needs_row_ids: bool,
-    pub needs_row_addrs: bool,
-}
-
-impl TrainingCriteria {
-    pub fn new(ordering: TrainingOrdering) -> Self {
-        Self {
-            ordering,
-            needs_row_ids: false,
-            needs_row_addrs: false,
-        }
-    }
-
-    pub fn with_row_id(mut self) -> Self {
-        self.needs_row_ids = true;
-        self
-    }
-
-    pub fn with_row_addr(mut self) -> Self {
-        self.needs_row_addrs = true;
-        self
-    }
-}
-
-/// A trait that describes what criteria is needed to train an index
-///
-/// The training process has two steps.  First, the parameters are given to the
-/// plugin and it creates a TrainingRequest.  Then, the caller prepares the training
-/// data and calls train_index.
-///
-/// The call to train_index will include the training request.  This allows the plugin
-/// to stash any deserialized parameter info in the request and fetch it later during
-/// training by downcasting to the appropriate type.
-pub trait TrainingRequest: std::any::Any + Send + Sync {
-    fn as_any(&self) -> &dyn std::any::Any;
-    fn criteria(&self) -> &TrainingCriteria;
-}
-
-/// A default training request impl for indexes that don't need any parameters
-pub(crate) struct DefaultTrainingRequest {
-    criteria: TrainingCriteria,
-}
-
-impl DefaultTrainingRequest {
-    pub fn new(criteria: TrainingCriteria) -> Self {
-        Self { criteria }
-    }
-}
-
-impl TrainingRequest for DefaultTrainingRequest {
-    fn as_any(&self) -> &dyn std::any::Any {
-        self
-    }
-
-    fn criteria(&self) -> &TrainingCriteria {
-        &self.criteria
-    }
-}
-
-/// A trait for scalar index plugins
-#[async_trait]
-pub trait ScalarIndexPlugin: Send + Sync + std::fmt::Debug {
-    /// Creates a new training request from the given parameters
-    ///
-    /// This training request specifies the criteria that the data must satisfy to train the index.
-    /// For example, does the index require the input data to be sorted?
-    fn new_training_request(&self, params: &str, field: &Field)
-    -> Result<Box<dyn TrainingRequest>>;
-
-    /// Train a new index
-    ///
-    /// The provided data must fulfill all the criteria returned by `training_criteria`.
-    /// It is the caller's responsibility to ensure this.
-    ///
-    /// Returns index details that describe the index.  These details can potentially be
-    /// useful for planning (although this will currently require inside information on
-    /// the index type) and they will need to be provided when loading the index.
-    ///
-    /// It is the caller's responsibility to store these details somewhere.
-    async fn train_index(
-        &self,
-        data: SendableRecordBatchStream,
-        index_store: &dyn IndexStore,
-        request: Box<dyn TrainingRequest>,
-        fragment_ids: Option<Vec<u32>>,
-        progress: Arc<dyn IndexBuildProgress>,
-    ) -> Result<CreatedIndex>;
-
-    /// A short name for the index
-    ///
-    /// This is a friendly name for display purposes and also can be used as an alias for
-    /// the index type URL.  If multiple plugins have the same name, then the first one
-    /// found will be used.
-    ///
-    /// By convention this is MixedCase with no spaces.  When used as an alias, it will be
-    /// compared case-insensitively.
-    fn name(&self) -> &str;
-
-    /// Returns true if the index returns an exact answer (e.g. not AtMost)
-    fn provides_exact_answer(&self) -> bool;
-
-    /// The version of the index plugin
-    ///
-    /// We assume that indexes are not forwards compatible.  If an index was written with a
-    /// newer version than this, it cannot be read
-    fn version(&self) -> u32;
-
-    /// Returns a new query parser for the index
-    ///
-    /// Can return None if this index cannot participate in query optimization
-    fn new_query_parser(
-        &self,
-        index_name: String,
-        index_details: &prost_types::Any,
-    ) -> Option<Box<dyn ScalarQueryParser>>;
-
-    /// Load an index from storage
-    ///
-    /// The index details should match the details that were returned when the index was
-    /// originally trained.
-    async fn load_index(
-        &self,
-        index_store: Arc<dyn IndexStore>,
-        index_details: &prost_types::Any,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
-        cache: &LanceCache,
-    ) -> Result<Arc<dyn ScalarIndex>>;
-
-    /// Look up a previously-opened index in the cache.
-    ///
-    /// `cache` is already per-index namespaced by the caller, so a plugin's key
-    /// only needs to disambiguate entries within a single index.
-    ///
-    /// The default implementation reads an in-memory `Arc<dyn ScalarIndex>` entry.
-    /// Plugins whose index has a serializable representation should override this
-    /// (together with [`put_in_cache`](Self::put_in_cache)) to store that
-    /// representation under a sized [`CacheKey`](lance_core::cache::CacheKey) with
-    /// a codec, and reconstruct the index here. `index_store` and
-    /// `frag_reuse_index` are provided so the override can rebuild the index
-    /// without re-reading metadata.
-    async fn get_from_cache(
-        &self,
-        _index_store: Arc<dyn IndexStore>,
-        _frag_reuse_index: Option<Arc<FragReuseIndex>>,
-        cache: &LanceCache,
-    ) -> Result<Option<Arc<dyn ScalarIndex>>> {
-        Ok(cache.get_unsized_with_key(&ScalarIndexCacheKey).await)
-    }
-
-    /// Store a freshly-opened index in the cache.
-    ///
-    /// `cache` is already per-index namespaced; see
-    /// [`get_from_cache`](Self::get_from_cache).
-    ///
-    /// The default implementation stores the `Arc<dyn ScalarIndex>` in-memory.
-    async fn put_in_cache(&self, cache: &LanceCache, index: Arc<dyn ScalarIndex>) -> Result<()> {
-        cache
-            .insert_unsized_with_key(&ScalarIndexCacheKey, index)
-            .await;
-        Ok(())
-    }
-
-    /// Optional hook allowing a plugin to provide statistics without loading the index.
-    async fn load_statistics(
-        &self,
-        _index_store: Arc<dyn IndexStore>,
-        _index_details: &prost_types::Any,
-    ) -> Result<Option<serde_json::Value>> {
-        Ok(None)
-    }
-
-    /// Optional hook that plugins can use if they need to be aware of the registry
-    fn attach_registry(&self, _registry: Arc<IndexPluginRegistry>) {}
-
-    /// Returns a JSON string representation of the provided index details
-    ///
-    /// These details will be user-visible and should be considered part of the public
-    /// API.  As a result, efforts should be made to ensure the information is backwards
-    /// compatible and avoid breaking changes.
-    fn details_as_json(&self, _details: &prost_types::Any) -> Result<serde_json::Value> {
-        // Return an empty JSON object as the default implementation
-        Ok(serde_json::json!({}))
-    }
-}
-
-/// In-memory cache key for a whole `Arc<dyn ScalarIndex>`.
-///
-/// Used by the default [`ScalarIndexPlugin::get_from_cache`] /
-/// [`ScalarIndexPlugin::put_in_cache`] implementations. The cache is already
-/// per-index namespaced by the caller, so a constant key suffices. Trait objects
-/// cannot be serialized, so this is an [`UnsizedCacheKey`] with no codec —
-/// plugins that want a persistable cache entry override those methods with a
-/// sized key.
-pub struct ScalarIndexCacheKey;
-
-impl UnsizedCacheKey for ScalarIndexCacheKey {
-    type ValueType = dyn ScalarIndex;
-
-    fn key(&self) -> Cow<'_, str> {
-        Cow::Borrowed("scalar_index")
-    }
-
-    fn type_name() -> &'static str {
-        "ScalarIndex"
-    }
-}
diff --git a/rust/lance-index/src/scalar/rtree.rs b/rust/lance-index/src/scalar/rtree.rs
index 5d5ac2a3a92..8ff9b531d2a 100644
--- a/rust/lance-index/src/scalar/rtree.rs
+++ b/rust/lance-index/src/scalar/rtree.rs
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The Lance Authors
 
-use crate::frag_reuse::FragReuseIndex;
 use crate::metrics::{MetricsCollector, NoOpMetricsCollector};
 use crate::scalar::expression::{GeoQueryParser, ScalarQueryParser};
 use crate::scalar::lance_format::LanceIndexStore;
@@ -13,7 +12,6 @@ use crate::scalar::{
     AnyQuery, BuiltinIndexType, CreatedIndex, GeoQuery, IndexFile, IndexReader, IndexStore,
     IndexWriter, ScalarIndex, ScalarIndexParams, SearchResult, UpdateCriteria,
 };
-use crate::vector::VectorIndex;
 use crate::{Index, IndexType, pb};
 use arrow_array::UInt32Array;
 use arrow_array::cast::AsArray;
@@ -38,6 +36,7 @@ use lance_core::utils::tempfile::TempDir;
 use lance_core::{Error, ROW_ID, Result};
 use lance_datafusion::chunker::chunk_concat_stream;
 pub use lance_geo::bbox::{BoundingBox, bounding_box, total_bounds};
+use lance_index_core::row_id_remap::RowIdRemapper;
 use lance_io::object_store::ObjectStore;
 use lance_select::{NullableRowAddrSet, RowAddrTreeMap, RowSetOps};
 use roaring::RoaringBitmap;
@@ -259,7 +258,7 @@ impl CacheKey for RTreeCacheKey {
 pub struct RTreeIndex {
     pub(crate) metadata: Arc<RTreeMetadata>,
     store: Arc<dyn IndexStore>,
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     index_cache: WeakLanceCache,
     pages_reader: Arc<dyn IndexReader>,
     nulls_reader: Arc<dyn IndexReader>,
@@ -277,7 +276,7 @@ impl std::fmt::Debug for RTreeIndex {
 impl RTreeIndex {
     pub async fn load(
         store: Arc<dyn IndexStore>,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         index_cache: &LanceCache,
     ) -> Result<Arc<Self>> {
         let pages_reader = store.open_index_file(RTREE_PAGES_NAME).await?;
@@ -449,12 +448,6 @@ impl Index for RTreeIndex {
         self
     }
 
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn VectorIndex>> {
-        Err(Error::not_supported_source(
-            "RTreeIndex is not vector index".into(),
-        ))
-    }
-
     fn statistics(&self) -> Result<serde_json::Value> {
         serde_json::to_value(self.metadata.clone())
             .map_err(|e| Error::internal(format!("Error serializing statistics: {}", e)))
@@ -604,7 +597,7 @@ impl ScalarIndex for RTreeIndex {
         Ok(CreatedIndex {
             index_details: prost_types::Any::from_msg(&pb::RTreeIndexDetails::default())?,
             index_version: RTREE_INDEX_VERSION,
-            files,
+            files: Some(files),
         })
     }
 
@@ -970,7 +963,7 @@ impl ScalarIndexPlugin for RTreeIndexPlugin {
         Ok(CreatedIndex {
             index_details: prost_types::Any::from_msg(&pb::RTreeIndexDetails::default())?,
             index_version: RTREE_INDEX_VERSION,
-            files,
+            files: Some(files),
         })
     }
 
@@ -997,7 +990,7 @@ impl ScalarIndexPlugin for RTreeIndexPlugin {
         &self,
         index_store: Arc<dyn IndexStore>,
         _index_details: &prost_types::Any,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         cache: &LanceCache,
     ) -> Result<Arc<dyn ScalarIndex>> {
         Ok(RTreeIndex::load(index_store, frag_reuse_index, cache).await? as Arc<dyn ScalarIndex>)
diff --git a/rust/lance-index/src/scalar/rtree/sort/hilbert_sort.rs b/rust/lance-index/src/scalar/rtree/sort/hilbert_sort.rs
index e6c10a20575..a8256c659c2 100644
--- a/rust/lance-index/src/scalar/rtree/sort/hilbert_sort.rs
+++ b/rust/lance-index/src/scalar/rtree/sort/hilbert_sort.rs
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The Lance Authors
 
-use crate::Result;
 use crate::scalar::rtree::sort::Sorter;
 use arrow_array::{ArrayRef, UInt32Array};
 use arrow_schema::{ArrowError, DataType as ArrowDataType, Field as ArrowField, Field};
@@ -19,6 +18,7 @@ use datafusion_physical_expr::expressions::Column as DFColumn;
 use datafusion_physical_expr::{PhysicalExpr, ScalarFunctionExpr};
 use geoarrow_array::array::from_arrow_array;
 use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor};
+use lance_core::Result;
 use lance_datafusion::exec::{LanceExecutionOptions, OneShotExec, execute_plan};
 use lance_geo::bbox::{BoundingBox, bounding_box};
 use std::any::Any;
diff --git a/rust/lance-index/src/scalar/zonemap.rs b/rust/lance-index/src/scalar/zonemap.rs
index 8e7e20c211a..0b185eea397 100644
--- a/rust/lance-index/src/scalar/zonemap.rs
+++ b/rust/lance-index/src/scalar/zonemap.rs
@@ -12,7 +12,6 @@
 //! false positives that require rechecking.
 //!
 //!
-use crate::Any;
 use crate::pbold;
 use crate::scalar::expression::{SargableQueryParser, ScalarQueryParser};
 use crate::scalar::registry::{
@@ -25,6 +24,7 @@ use crate::scalar::{
 use lance_arrow_stats::StatisticsAccumulator;
 use lance_core::cache::{LanceCache, WeakLanceCache};
 use serde::{Deserialize, Serialize};
+use std::any::Any;
 use std::sync::LazyLock;
 
 use arrow_array::{
@@ -36,13 +36,12 @@ use datafusion_common::ScalarValue;
 use std::{collections::HashMap, sync::Arc};
 
 use super::{AnyQuery, IndexStore, MetricsCollector, ScalarIndex, SearchResult};
-use crate::scalar::FragReuseIndex;
-use crate::vector::VectorIndex;
 use crate::{Index, IndexType};
 use async_trait::async_trait;
 use lance_core::Error;
 use lance_core::Result;
 use lance_core::deepsize::DeepSizeOf;
+use lance_index_core::row_id_remap::RowIdRemapper;
 use roaring::RoaringBitmap;
 
 use super::zoned::{ZoneBound, ZoneProcessor, ZoneTrainer, rebuild_zones, search_zones};
@@ -108,7 +107,7 @@ pub struct ZoneMapIndex {
     // The maximum rows per zone provided by user
     rows_per_zone: u64,
     store: Arc<dyn IndexStore>,
-    fri: Option<Arc<FragReuseIndex>>,
+    fri: Option<Arc<dyn RowIdRemapper>>,
     index_cache: WeakLanceCache,
 }
 
@@ -410,7 +409,7 @@ impl ZoneMapIndex {
     /// Load the scalar index from storage
     async fn load(
         store: Arc<dyn IndexStore>,
-        fri: Option<Arc<FragReuseIndex>>,
+        fri: Option<Arc<dyn RowIdRemapper>>,
         index_cache: &LanceCache,
     ) -> Result<Arc<Self>>
     where
@@ -439,7 +438,7 @@ impl ZoneMapIndex {
     fn try_from_serialized(
         data: RecordBatch,
         store: Arc<dyn IndexStore>,
-        fri: Option<Arc<FragReuseIndex>>,
+        fri: Option<Arc<dyn RowIdRemapper>>,
         index_cache: &LanceCache,
         rows_per_zone: u64,
     ) -> Result<Self> {
@@ -548,12 +547,6 @@ impl Index for ZoneMapIndex {
         self
     }
 
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn VectorIndex>> {
-        Err(Error::invalid_input_source(
-            "ZoneMapIndex is not a vector index".into(),
-        ))
-    }
-
     async fn prewarm(&self) -> Result<()> {
         // Not much to prewarm
         Ok(())
@@ -636,7 +629,7 @@ impl ScalarIndex for ZoneMapIndex {
             index_details: prost_types::Any::from_msg(&pbold::ZoneMapIndexDetails::default())
                 .unwrap(),
             index_version: ZONEMAP_INDEX_VERSION,
-            files: vec![file],
+            files: Some(vec![file]),
         })
     }
 
@@ -699,7 +692,7 @@ pub async fn merge_zonemap_indices(
     Ok(CreatedIndex {
         index_details: prost_types::Any::from_msg(&pbold::ZoneMapIndexDetails::default()).unwrap(),
         index_version: ZONEMAP_INDEX_VERSION,
-        files: dest_store.list_files_with_sizes().await?,
+        files: Some(dest_store.list_files_with_sizes().await?),
     })
 }
 
@@ -1029,7 +1022,7 @@ impl ScalarIndexPlugin for ZoneMapIndexPlugin {
             index_details: prost_types::Any::from_msg(&pbold::ZoneMapIndexDetails::default())
                 .unwrap(),
             index_version: ZONEMAP_INDEX_VERSION,
-            files: vec![file],
+            files: Some(vec![file]),
         })
     }
 
@@ -1037,7 +1030,7 @@ impl ScalarIndexPlugin for ZoneMapIndexPlugin {
         &self,
         index_store: Arc<dyn IndexStore>,
         _index_details: &prost_types::Any,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         cache: &LanceCache,
     ) -> Result<Arc<dyn ScalarIndex>> {
         Ok(ZoneMapIndex::load(index_store, frag_reuse_index, cache).await? as Arc<dyn ScalarIndex>)
diff --git a/rust/lance-index/src/vector/bq/storage.rs b/rust/lance-index/src/vector/bq/storage.rs
index 2f4fe69792a..12048cd1885 100644
--- a/rust/lance-index/src/vector/bq/storage.rs
+++ b/rust/lance-index/src/vector/bq/storage.rs
@@ -38,7 +38,6 @@ use num_traits::AsPrimitive;
 use prost::Message;
 use serde::{Deserialize, Serialize};
 
-use crate::frag_reuse::FragReuseIndex;
 use crate::pb;
 use crate::vector::ApproxMode;
 use crate::vector::bq::dist_table_quant::{
@@ -64,6 +63,7 @@ use crate::vector::quantizer::{QuantizerMetadata, QuantizerStorage};
 use crate::vector::storage::{
     DistCalculator, DistanceCalculatorOptions, QueryResidual, RabitRawQueryContext, VectorStore,
 };
+use lance_index_core::row_id_remap::RowIdRemapper;
 
 pub const RABIT_METADATA_KEY: &str = "lance:rabit";
 pub const RABIT_CODE_COLUMN: &str = "_rabit_codes";
@@ -2391,13 +2391,10 @@ pub fn unpack_codes(codes: &FixedSizeListArray) -> FixedSizeListArray {
 /// to `Some(new_id)` for surviving rows or `None` for rows whose covering
 /// fragment was compacted away, suitable for `RabitQuantizationStorage::remap`.
 fn build_frag_reuse_mapping(
-    fri: Option<&FragReuseIndex>,
+    fri: Option<&dyn RowIdRemapper>,
     row_ids: &UInt64Array,
 ) -> Option<HashMap<u64, Option<u64>>> {
     let fri = fri?;
-    if fri.row_id_maps.is_empty() {
-        return None;
-    }
     let mut mapping: HashMap<u64, Option<u64>> = HashMap::new();
     for row_id in row_ids.values().iter() {
         match fri.remap_row_id(*row_id) {
@@ -2422,7 +2419,7 @@ impl QuantizerStorage for RabitQuantizationStorage {
         batch: RecordBatch,
         metadata: &Self::Metadata,
         distance_type: DistanceType,
-        fri: Option<Arc<FragReuseIndex>>,
+        fri: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         let distance_type = match (metadata.query_estimator, distance_type) {
             (RabitQueryEstimator::RawQuery, DistanceType::Cosine) => DistanceType::L2,
@@ -2548,7 +2545,7 @@ impl QuantizerStorage for RabitQuantizationStorage {
         range: std::ops::Range<usize>,
         distance_type: DistanceType,
         metadata: &Self::Metadata,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         let schema = reader.schema();
         let batch = reader.read_range(range, schema).await?;
diff --git a/rust/lance-index/src/vector/flat/storage.rs b/rust/lance-index/src/vector/flat/storage.rs
index c3ec30d5086..e5e477fb155 100644
--- a/rust/lance-index/src/vector/flat/storage.rs
+++ b/rust/lance-index/src/vector/flat/storage.rs
@@ -4,7 +4,6 @@
 use std::{borrow::Cow, sync::Arc};
 
 use super::index::FlatMetadata;
-use crate::frag_reuse::FragReuseIndex;
 use crate::vector::quantizer::QuantizerStorage;
 use crate::vector::storage::{DistCalculator, VectorStore};
 use crate::vector::utils::do_prefetch;
@@ -20,6 +19,7 @@ use arrow_schema::{DataType, SchemaRef};
 use lance_core::deepsize::DeepSizeOf;
 use lance_core::{Error, ROW_ID, Result};
 use lance_file::previous::reader::FileReader as PreviousFileReader;
+use lance_index_core::row_id_remap::RowIdRemapper;
 use lance_linalg::distance::hamming::hamming;
 use lance_linalg::distance::{Cosine, DistanceType, Dot, L2};
 
@@ -51,7 +51,7 @@ impl QuantizerStorage for FlatFloatStorage {
         batch: RecordBatch,
         metadata: &Self::Metadata,
         distance_type: DistanceType,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         let batch = if let Some(frag_reuse_index_ref) = frag_reuse_index.as_ref() {
             frag_reuse_index_ref.remap_row_ids_record_batch(batch, 0)?
@@ -91,7 +91,7 @@ impl QuantizerStorage for FlatFloatStorage {
         _: std::ops::Range<usize>,
         _: DistanceType,
         _: &Self::Metadata,
-        _: Option<Arc<FragReuseIndex>>,
+        _: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         unimplemented!("Flat will be used in new index builder which doesn't require this")
     }
@@ -213,7 +213,7 @@ impl QuantizerStorage for FlatBinStorage {
         batch: RecordBatch,
         metadata: &Self::Metadata,
         distance_type: DistanceType,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         let batch = if let Some(frag_reuse_index_ref) = frag_reuse_index.as_ref() {
             frag_reuse_index_ref.remap_row_ids_record_batch(batch, 0)?
@@ -253,7 +253,7 @@ impl QuantizerStorage for FlatBinStorage {
         _: std::ops::Range<usize>,
         _: DistanceType,
         _: &Self::Metadata,
-        _: Option<Arc<FragReuseIndex>>,
+        _: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         unimplemented!("Flat will be used in new index builder which doesn't require this")
     }
diff --git a/rust/lance-index/src/vector/flat/transform.rs b/rust/lance-index/src/vector/flat/transform.rs
index 75a465ce262..f9fdca0819c 100644
--- a/rust/lance-index/src/vector/flat/transform.rs
+++ b/rust/lance-index/src/vector/flat/transform.rs
@@ -26,7 +26,7 @@ impl FlatTransformer {
 
 impl Transformer for FlatTransformer {
     #[instrument(name = "FlatTransformer::transform", level = "debug", skip_all)]
-    fn transform(&self, batch: &RecordBatch) -> crate::Result<RecordBatch> {
+    fn transform(&self, batch: &RecordBatch) -> lance_core::Result<RecordBatch> {
         let input_arr = batch
             .column_by_name(&self.input_column)
             .ok_or(Error::index(format!(
diff --git a/rust/lance-index/src/vector/hnsw/builder.rs b/rust/lance-index/src/vector/hnsw/builder.rs
index 214750dfafa..62990d351bf 100644
--- a/rust/lance-index/src/vector/hnsw/builder.rs
+++ b/rust/lance-index/src/vector/hnsw/builder.rs
@@ -1323,7 +1323,6 @@ mod tests {
     use rstest::rstest;
 
     use super::HnswGraph;
-    use crate::scalar::IndexWriter;
     use crate::vector::storage::{DistCalculator, VectorStore};
     use crate::vector::v3::subindex::IvfSubIndex;
     use crate::vector::{
@@ -1368,7 +1367,7 @@ mod tests {
         .unwrap();
         let batch = builder.to_batch().unwrap();
         let metadata = batch.schema_ref().metadata().clone();
-        writer.write_record_batch(batch).await.unwrap();
+        writer.write(&[batch]).await.unwrap();
         writer.finish_with_metadata(&metadata).await.unwrap();
 
         let reader = PreviousFileReader::try_new_self_described(&object_store, &path, None)
@@ -1429,7 +1428,7 @@ mod tests {
         .unwrap();
         let batch = builder.to_batch().unwrap();
         let metadata = batch.schema_ref().metadata().clone();
-        writer.write_record_batch(batch).await.unwrap();
+        writer.write(&[batch]).await.unwrap();
         writer.finish_with_metadata(&metadata).await.unwrap();
 
         let reader = PreviousFileReader::try_new_self_described(&object_store, &path, None)
diff --git a/rust/lance-index/src/vector/hnsw/index.rs b/rust/lance-index/src/vector/hnsw/index.rs
index 0ae42f59414..c8c9e5164fe 100644
--- a/rust/lance-index/src/vector/hnsw/index.rs
+++ b/rust/lance-index/src/vector/hnsw/index.rs
@@ -119,11 +119,6 @@ impl<Q: Quantization + Send + Sync + 'static> Index for HNSWIndex<Q> {
         self
     }
 
-    /// Cast to [VectorIndex]
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn VectorIndex>> {
-        Ok(self)
-    }
-
     /// Retrieve index statistics as a JSON Value
     fn statistics(&self) -> Result<serde_json::Value> {
         Ok(json!({
diff --git a/rust/lance-index/src/vector/kmeans.rs b/rust/lance-index/src/vector/kmeans.rs
index b11fb70bed0..07dc067b263 100644
--- a/rust/lance-index/src/vector/kmeans.rs
+++ b/rust/lance-index/src/vector/kmeans.rs
@@ -45,7 +45,7 @@ use {
 };
 
 use crate::vector::utils::SimpleIndex;
-use crate::{Error, Result};
+use lance_core::{Error, Result};
 
 /// KMean initialization method.
 #[derive(Debug, PartialEq)]
diff --git a/rust/lance-index/src/vector/pq/storage.rs b/rust/lance-index/src/vector/pq/storage.rs
index de5a7ac28bd..47cf2630671 100644
--- a/rust/lance-index/src/vector/pq/storage.rs
+++ b/rust/lance-index/src/vector/pq/storage.rs
@@ -37,7 +37,6 @@ use serde::{Deserialize, Serialize};
 
 use super::ProductQuantizer;
 use super::distance::{build_distance_table_dot, build_distance_table_l2, compute_pq_distance};
-use crate::frag_reuse::FragReuseIndex;
 use crate::vector::graph::{OrderedFloat, OrderedNode};
 use crate::{
     INDEX_METADATA_SCHEMA_KEY, IndexMetadata, pb,
@@ -49,6 +48,7 @@ use crate::{
         transform::Transformer,
     },
 };
+use lance_index_core::row_id_remap::RowIdRemapper;
 
 pub const PQ_METADATA_KEY: &str = "lance:pq";
 
@@ -202,7 +202,7 @@ impl ProductQuantizationStorage {
         dimension: usize,
         distance_type: DistanceType,
         transposed: bool,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         if batch.num_columns() != 2 {
             log::warn!(
@@ -341,7 +341,7 @@ impl ProductQuantizationStorage {
         quantizer: ProductQuantizer,
         batch: &RecordBatch,
         vector_col: &str,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         let codebook = quantizer.codebook.clone();
         let num_bits = quantizer.num_bits;
@@ -384,7 +384,7 @@ impl ProductQuantizationStorage {
     pub async fn load(
         object_store: &ObjectStore,
         path: &Path,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         let reader = PreviousFileReader::try_new_self_described(object_store, path, None).await?;
         let schema = reader.schema();
@@ -511,7 +511,7 @@ impl QuantizerStorage for ProductQuantizationStorage {
         batch: RecordBatch,
         metadata: &Self::Metadata,
         distance_type: DistanceType,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self>
     where
         Self: Sized,
@@ -619,7 +619,7 @@ impl QuantizerStorage for ProductQuantizationStorage {
         range: std::ops::Range<usize>,
         distance_type: DistanceType,
         metadata: &Self::Metadata,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         // Hard coded to float32 for now
         let codebook = metadata
diff --git a/rust/lance-index/src/vector/quantizer.rs b/rust/lance-index/src/vector/quantizer.rs
index 8ee64669f32..47f432a8d1a 100644
--- a/rust/lance-index/src/vector/quantizer.rs
+++ b/rust/lance-index/src/vector/quantizer.rs
@@ -23,9 +23,9 @@ use serde::{Deserialize, Serialize};
 use super::flat::index::{FlatBinQuantizer, FlatQuantizer};
 use super::pq::ProductQuantizer;
 use super::{ivf::storage::IvfModel, sq::ScalarQuantizer, storage::VectorStore};
-use crate::frag_reuse::FragReuseIndex;
 use crate::vector::bq::builder::RabitQuantizer;
 use crate::{INDEX_METADATA_SCHEMA_KEY, IndexMetadata};
+use lance_index_core::row_id_remap::RowIdRemapper;
 
 pub trait Quantization:
     Send
@@ -235,7 +235,7 @@ pub trait QuantizerStorage: Clone + Sized + DeepSizeOf + VectorStore {
         batch: RecordBatch,
         metadata: &Self::Metadata,
         distance_type: DistanceType,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self>;
 
     fn metadata(&self) -> &Self::Metadata;
@@ -280,7 +280,7 @@ pub trait QuantizerStorage: Clone + Sized + DeepSizeOf + VectorStore {
         range: std::ops::Range<usize>,
         distance_type: DistanceType,
         metadata: &Self::Metadata,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self>;
 }
 
diff --git a/rust/lance-index/src/vector/sq/storage.rs b/rust/lance-index/src/vector/sq/storage.rs
index 1e5eebda0d9..af0b088fe51 100644
--- a/rust/lance-index/src/vector/sq/storage.rs
+++ b/rust/lance-index/src/vector/sq/storage.rs
@@ -23,7 +23,6 @@ use serde::{Deserialize, Serialize};
 use std::sync::Arc;
 
 use super::{ScalarQuantizer, scale_to_u8};
-use crate::frag_reuse::FragReuseIndex;
 use crate::{
     INDEX_METADATA_SCHEMA_KEY, IndexMetadata,
     vector::{
@@ -33,6 +32,7 @@ use crate::{
         transform::Transformer,
     },
 };
+use lance_index_core::row_id_remap::RowIdRemapper;
 
 pub const SQ_METADATA_KEY: &str = "lance:sq";
 
@@ -171,7 +171,7 @@ impl ScalarQuantizationStorage {
         distance_type: DistanceType,
         bounds: Range<f64>,
         batches: impl IntoIterator<Item = RecordBatch>,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         let mut chunks = Vec::with_capacity(SQ_CHUNK_CAPACITY);
         let mut offsets = Vec::with_capacity(SQ_CHUNK_CAPACITY + 1);
@@ -211,7 +211,7 @@ impl ScalarQuantizationStorage {
     pub async fn load(
         object_store: &ObjectStore,
         path: &Path,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         let reader = PreviousFileReader::try_new_self_described(object_store, path, None).await?;
         let schema = reader.schema();
@@ -263,7 +263,7 @@ impl QuantizerStorage for ScalarQuantizationStorage {
         batch: RecordBatch,
         metadata: &Self::Metadata,
         distance_type: DistanceType,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self>
     where
         Self: Sized,
@@ -294,7 +294,7 @@ impl QuantizerStorage for ScalarQuantizationStorage {
         range: std::ops::Range<usize>,
         distance_type: DistanceType,
         metadata: &Self::Metadata,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         let schema = reader.schema();
         let batch = reader.read_range(range, schema).await?;
diff --git a/rust/lance-index/src/vector/storage.rs b/rust/lance-index/src/vector/storage.rs
index a14308197ed..7de143c041b 100644
--- a/rust/lance-index/src/vector/storage.rs
+++ b/rust/lance-index/src/vector/storage.rs
@@ -28,7 +28,6 @@ use std::{
 
 use crossbeam_queue::ArrayQueue;
 
-use crate::frag_reuse::FragReuseIndex;
 use crate::{
     pb,
     vector::{
@@ -36,6 +35,7 @@ use crate::{
         quantizer::Quantization,
     },
 };
+use lance_index_core::row_id_remap::RowIdRemapper;
 
 use super::graph::OrderedFloat;
 use super::graph::OrderedNode;
@@ -448,7 +448,7 @@ pub struct StorageBuilder<Q: Quantization> {
     distance_type: DistanceType,
     quantizer: Q,
 
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
 }
 
 impl<Q: Quantization> StorageBuilder<Q> {
@@ -456,7 +456,7 @@ impl<Q: Quantization> StorageBuilder<Q> {
         vector_column: String,
         distance_type: DistanceType,
         quantizer: Q,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         Ok(Self {
             vector_column,
@@ -504,7 +504,7 @@ pub struct IvfQuantizationStorage<Q: Quantization> {
     metadata: Q::Metadata,
 
     ivf: IvfModel,
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
 }
 
 impl<Q: Quantization> DeepSizeOf for IvfQuantizationStorage<Q> {
@@ -519,7 +519,7 @@ impl<Q: Quantization> IvfQuantizationStorage<Q> {
     ///
     pub async fn try_new(
         reader: FileReader,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         let schema = reader.schema();
 
@@ -576,7 +576,7 @@ impl<Q: Quantization> IvfQuantizationStorage<Q> {
         ivf: IvfModel,
         metadata: Q::Metadata,
         distance_type: DistanceType,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Self {
         Self {
             reader,
diff --git a/rust/lance-namespace-impls/src/dir/manifest.rs b/rust/lance-namespace-impls/src/dir/manifest.rs
index aae924378da..1da78806027 100644
--- a/rust/lance-namespace-impls/src/dir/manifest.rs
+++ b/rust/lance-namespace-impls/src/dir/manifest.rs
@@ -33,7 +33,7 @@ use lance_core::Error as LanceError;
 use lance_core::datatypes::LANCE_UNENFORCED_PRIMARY_KEY_POSITION;
 use lance_core::{Error, ROW_ID, Result};
 use lance_index::progress::noop_progress;
-use lance_index::registry::IndexPluginRegistry;
+use lance_index::registry::{IndexPluginRegistry, with_default_plugins};
 use lance_index::scalar::lance_format::LanceIndexStore;
 use lance_index::scalar::registry::VALUE_COLUMN_NAME;
 use lance_index::scalar::{BuiltinIndexType, CreatedIndex, ScalarIndexParams};
@@ -1213,7 +1213,15 @@ impl ManifestNamespace {
             index_version: trained_index.created_index.index_version as i32,
             created_at: None,
             base_id: None,
-            files: Some(trained_index.created_index.files),
+            files: trained_index.created_index.files.map(|files| {
+                files
+                    .into_iter()
+                    .map(|f| lance_table::format::IndexFile {
+                        path: f.path,
+                        size_bytes: f.size_bytes,
+                    })
+                    .collect()
+            }),
         })
     }
 
@@ -1267,7 +1275,7 @@ impl ManifestNamespace {
             ..
         } = index_data;
         let [object_id_uuid, object_type_uuid, base_objects_uuid] = index_uuids;
-        let registry = IndexPluginRegistry::with_default_plugins();
+        let registry = with_default_plugins();
 
         let dataset_version = manifest.version;
         let object_id_index_fut = Self::build_manifest_index(
diff --git a/rust/lance-table/Cargo.toml b/rust/lance-table/Cargo.toml
index 042ae92c618..b83fba89477 100644
--- a/rust/lance-table/Cargo.toml
+++ b/rust/lance-table/Cargo.toml
@@ -15,6 +15,7 @@ rust-version.workspace = true
 lance-arrow.workspace = true
 lance-core.workspace = true
 lance-file.workspace = true
+lance-index-core.workspace = true
 lance-select.workspace = true
 lance-io.workspace = true
 arrow.workspace = true
diff --git a/rust/lance-table/src/system_index/frag_reuse.rs b/rust/lance-table/src/system_index/frag_reuse.rs
index 40bbc4f58b6..8d758209129 100644
--- a/rust/lance-table/src/system_index/frag_reuse.rs
+++ b/rust/lance-table/src/system_index/frag_reuse.rs
@@ -348,6 +348,67 @@ impl FragReuseIndex {
     }
 }
 
+impl lance_index_core::row_id_remap::RowIdRemapper for FragReuseIndex {
+    fn remap_row_id(&self, row_id: u64) -> Option<u64> {
+        Self::remap_row_id(self, row_id)
+    }
+
+    fn remap_row_addrs_tree_map(&self, row_addrs: &RowAddrTreeMap) -> RowAddrTreeMap {
+        Self::remap_row_addrs_tree_map(self, row_addrs)
+    }
+
+    fn remap_row_ids_roaring_tree_map(&self, row_ids: &RoaringTreemap) -> RoaringTreemap {
+        Self::remap_row_ids_roaring_tree_map(self, row_ids)
+    }
+
+    fn remap_row_ids_record_batch(
+        &self,
+        batch: RecordBatch,
+        row_id_idx: usize,
+    ) -> Result<RecordBatch> {
+        Self::remap_row_ids_record_batch(self, batch, row_id_idx)
+    }
+}
+
+#[async_trait::async_trait]
+impl lance_index_core::index::Index for FragReuseIndex {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn as_index(self: Arc<Self>) -> Arc<dyn lance_index_core::index::Index> {
+        self
+    }
+
+    fn statistics(&self) -> lance_core::Result<serde_json::Value> {
+        #[derive(Serialize)]
+        struct FragReuseStatistics {
+            num_versions: usize,
+        }
+        let stats = FragReuseStatistics {
+            num_versions: self.details.versions.len(),
+        };
+        serde_json::to_value(stats).map_err(|e| {
+            Error::internal(format!(
+                "failed to serialize fragment reuse index statistics: {}",
+                e
+            ))
+        })
+    }
+
+    async fn prewarm(&self) -> lance_core::Result<()> {
+        Ok(())
+    }
+
+    fn index_type(&self) -> lance_index_core::index::IndexType {
+        lance_index_core::index::IndexType::FragmentReuse
+    }
+
+    async fn calculate_included_frags(&self) -> lance_core::Result<RoaringBitmap> {
+        unimplemented!()
+    }
+}
+
 #[cfg(test)]
 mod tests {
 
diff --git a/rust/lance-table/src/system_index/mem_wal.rs b/rust/lance-table/src/system_index/mem_wal.rs
index 3bf279df062..3e36091a561 100644
--- a/rust/lance-table/src/system_index/mem_wal.rs
+++ b/rust/lance-table/src/system_index/mem_wal.rs
@@ -398,3 +398,50 @@ impl MemWalIndex {
         caught_up_gen.is_none_or(|generation| generation >= merged_gen)
     }
 }
+
+#[async_trait::async_trait]
+impl lance_index_core::index::Index for MemWalIndex {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn as_index(self: std::sync::Arc<Self>) -> std::sync::Arc<dyn lance_index_core::index::Index> {
+        self
+    }
+
+    fn statistics(&self) -> lance_core::Result<serde_json::Value> {
+        #[derive(Serialize)]
+        struct MemWalStatistics {
+            num_shards: u32,
+            num_merged_generations: usize,
+            num_shard_specs: usize,
+            num_maintained_indexes: usize,
+            num_index_catchup_entries: usize,
+        }
+        let stats = MemWalStatistics {
+            num_shards: self.details.num_shards,
+            num_merged_generations: self.details.merged_generations.len(),
+            num_shard_specs: self.details.sharding_specs.len(),
+            num_maintained_indexes: self.details.maintained_indexes.len(),
+            num_index_catchup_entries: self.details.index_catchup.len(),
+        };
+        serde_json::to_value(stats).map_err(|e| {
+            Error::internal(format!(
+                "failed to serialize MemWAL index statistics: {}",
+                e
+            ))
+        })
+    }
+
+    async fn prewarm(&self) -> lance_core::Result<()> {
+        Ok(())
+    }
+
+    fn index_type(&self) -> lance_index_core::index::IndexType {
+        lance_index_core::index::IndexType::MemWal
+    }
+
+    async fn calculate_included_frags(&self) -> lance_core::Result<roaring::RoaringBitmap> {
+        Ok(roaring::RoaringBitmap::new())
+    }
+}
diff --git a/rust/lance/src/dataset/mem_wal/memtable/flush.rs b/rust/lance/src/dataset/mem_wal/memtable/flush.rs
index ebcc06cab44..ceba2252b70 100644
--- a/rust/lance/src/dataset/mem_wal/memtable/flush.rs
+++ b/rust/lance/src/dataset/mem_wal/memtable/flush.rs
@@ -1402,7 +1402,7 @@ mod tests {
     async fn flushed_pk_index_sidecar_is_probeable() {
         use lance_core::cache::LanceCache;
         use lance_index::metrics::NoOpMetricsCollector;
-        use lance_index::registry::IndexPluginRegistry;
+        use lance_index::registry::with_default_plugins;
         use lance_index::scalar::lance_format::LanceIndexStore;
         use lance_index::scalar::{SargableQuery, SearchResult};
 
@@ -1462,7 +1462,7 @@ mod tests {
             pk_index_path(&gen_path),
             Arc::new(LanceCache::no_cache()),
         ));
-        let registry = IndexPluginRegistry::with_default_plugins();
+        let registry = with_default_plugins();
         let plugin = registry.get_plugin_by_name("BTree").unwrap();
         let details =
             prost_types::Any::from_msg(&lance_index::pbold::BTreeIndexDetails::default()).unwrap();
@@ -1503,7 +1503,7 @@ mod tests {
     async fn plain_flush_writes_pk_sidecar() {
         use lance_core::cache::LanceCache;
         use lance_index::metrics::NoOpMetricsCollector;
-        use lance_index::registry::IndexPluginRegistry;
+        use lance_index::registry::with_default_plugins;
         use lance_index::scalar::lance_format::LanceIndexStore;
         use lance_index::scalar::{SargableQuery, SearchResult};
 
@@ -1559,7 +1559,7 @@ mod tests {
             pk_index_path(&gen_path),
             Arc::new(LanceCache::no_cache()),
         ));
-        let registry = IndexPluginRegistry::with_default_plugins();
+        let registry = with_default_plugins();
         let plugin = registry.get_plugin_by_name("BTree").unwrap();
         let details =
             prost_types::Any::from_msg(&lance_index::pbold::BTreeIndexDetails::default()).unwrap();
diff --git a/rust/lance/src/dataset/mem_wal/scanner/block_list.rs b/rust/lance/src/dataset/mem_wal/scanner/block_list.rs
index 69d16930888..ff320784426 100644
--- a/rust/lance/src/dataset/mem_wal/scanner/block_list.rs
+++ b/rust/lance/src/dataset/mem_wal/scanner/block_list.rs
@@ -23,7 +23,7 @@ use datafusion::common::ScalarValue;
 use lance_core::{Error, Result};
 
 use lance_index::metrics::NoOpMetricsCollector;
-use lance_index::registry::IndexPluginRegistry;
+use lance_index::registry::{IndexPluginRegistry, with_default_plugins};
 use lance_index::scalar::btree::BTreeIndex;
 use lance_index::scalar::lance_format::LanceIndexStore;
 use lance_index::scalar::{
@@ -41,7 +41,7 @@ use crate::session::Session;
 /// Default-plugin registry, used only to load the standalone PK BTree by its
 /// `BTreeIndexDetails` type. Built once.
 static PK_BTREE_REGISTRY: LazyLock<Arc<IndexPluginRegistry>> =
-    LazyLock::new(IndexPluginRegistry::with_default_plugins);
+    LazyLock::new(with_default_plugins);
 
 /// One newer generation's PK membership, used to decide whether it shadows an
 /// older source's row.
diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs
index 1a3a3aa54ec..e27c7646707 100644
--- a/rust/lance/src/index.rs
+++ b/rust/lance/src/index.rs
@@ -96,6 +96,24 @@ use crate::{Error, Result, dataset::Dataset};
 pub use create::CreateIndexBuilder;
 pub use lance_index::IndexDescription;
 
+pub(crate) fn index_file_to_table(
+    f: lance_index::scalar::IndexFile,
+) -> lance_table::format::IndexFile {
+    lance_table::format::IndexFile {
+        path: f.path,
+        size_bytes: f.size_bytes,
+    }
+}
+
+pub(crate) fn index_file_from_table(
+    f: lance_table::format::IndexFile,
+) -> lance_index::scalar::IndexFile {
+    lance_index::scalar::IndexFile {
+        path: f.path,
+        size_bytes: f.size_bytes,
+    }
+}
+
 fn validate_segment_metadata(index_name: &str, segments: &[IndexMetadata]) -> Result<()> {
     if segments.is_empty() {
         return Err(Error::invalid_input(
@@ -458,7 +476,7 @@ fn legacy_type_name(index_uri: &str, index_type_hint: Option<&str>) -> String {
         "BloomFilter" => IndexType::BloomFilter.to_string(),
         "RTree" => IndexType::RTree.to_string(),
         "Inverted" => IndexType::Inverted.to_string(),
-        "FMIndex" => IndexType::Fm.to_string(),
+        "FMIndex" => IndexType::FMIndex.to_string(),
         "Json" => IndexType::Scalar.to_string(),
         "Flat" | "Vector" => IndexType::Vector.to_string(),
         other if other.contains("Vector") => IndexType::Vector.to_string(),
@@ -604,7 +622,7 @@ pub(crate) async fn remap_index(
                 )
                 .unwrap(),
                 index_version,
-                files,
+                files: Some(files.into_iter().map(index_file_from_table).collect()),
             }
         }
         _ => {
@@ -620,7 +638,9 @@ pub(crate) async fn remap_index(
         new_id,
         index_details: created_index.index_details,
         index_version: created_index.index_version,
-        files: Some(created_index.files),
+        files: created_index
+            .files
+            .map(|files| files.into_iter().map(index_file_to_table).collect()),
     }))
 }
 
@@ -1916,6 +1936,7 @@ impl DatasetIndexInternalExt for Dataset {
         }
 
         let frag_reuse_index = self.open_frag_reuse_index(metrics).await?;
+        let frag_reuse_index = frag_reuse_index.map(|f| f as Arc<dyn lance_index::RowIdRemapper>);
         let index_dir = self.indice_files_dir(&index_meta)?;
         let index_file = index_dir
             .clone()
diff --git a/rust/lance/src/index/append.rs b/rust/lance/src/index/append.rs
index 99ff7bebe43..53f7f9d0749 100644
--- a/rust/lance/src/index/append.rs
+++ b/rust/lance/src/index/append.rs
@@ -509,7 +509,12 @@ pub async fn merge_indices_with_unindexed_frags<'a>(
                 CreatedIndex {
                     index_details: vector_index_details_default(),
                     index_version: lance_index::IndexType::Vector.version() as u32,
-                    files,
+                    files: Some(
+                        files
+                            .into_iter()
+                            .map(crate::index::index_file_from_table)
+                            .collect(),
+                    ),
                 },
             ))
         } else {
@@ -572,7 +577,12 @@ pub async fn merge_indices_with_unindexed_frags<'a>(
                     // index_version <= our max supported version, so we can safely
                     // write the current library's version for this index type.
                     index_version: lance_index::IndexType::Vector.version() as u32,
-                    files,
+                    files: Some(
+                        files
+                            .into_iter()
+                            .map(crate::index::index_file_from_table)
+                            .collect(),
+                    ),
                 },
             ))
         }
@@ -656,7 +666,12 @@ pub async fn merge_indices_with_unindexed_frags<'a>(
                         new_fragment_bitmap: dataset.fragment_bitmap.as_ref().clone(),
                         new_index_version: created_index.index_version as i32,
                         new_index_details: created_index.index_details,
-                        files: created_index.files,
+                        files: created_index
+                            .files
+                            .unwrap_or_default()
+                            .into_iter()
+                            .map(crate::index::index_file_to_table)
+                            .collect(),
                     }));
                 }
 
@@ -772,7 +787,12 @@ pub async fn merge_indices_with_unindexed_frags<'a>(
         new_fragment_bitmap,
         new_index_version: created_index.index_version as i32,
         new_index_details: created_index.index_details,
-        files: created_index.files,
+        files: created_index
+            .files
+            .unwrap_or_default()
+            .into_iter()
+            .map(crate::index::index_file_to_table)
+            .collect(),
     }))
 }
 
@@ -1532,11 +1552,11 @@ mod tests {
         .await
         .unwrap();
 
-        let params = ScalarIndexParams::for_builtin(BuiltinIndexType::Fm);
+        let params = ScalarIndexParams::for_builtin(BuiltinIndexType::FMIndex);
         dataset
             .create_index(
                 &["text"],
-                IndexType::Fm,
+                IndexType::FMIndex,
                 Some("text_fmindex".to_string()),
                 &params,
                 true,
diff --git a/rust/lance/src/index/create.rs b/rust/lance/src/index/create.rs
index bbb055463dc..da15a1bc466 100644
--- a/rust/lance/src/index/create.rs
+++ b/rust/lance/src/index/create.rs
@@ -165,7 +165,8 @@ impl<'a> CreateIndexBuilder<'a> {
         let fri = self
             .dataset
             .open_frag_reuse_index(&NoOpMetricsCollector)
-            .await?;
+            .await?
+            .map(|f| f as Arc<dyn lance_index::RowIdRemapper>);
         let index_name = if let Some(name) = self.name.take() {
             name
         } else {
@@ -218,7 +219,7 @@ impl<'a> CreateIndexBuilder<'a> {
                 | IndexType::BTree
                 | IndexType::Inverted
                 | IndexType::NGram
-                | IndexType::Fm
+                | IndexType::FMIndex
                 | IndexType::ZoneMap
                 | IndexType::BloomFilter
                 | IndexType::LabelList
@@ -407,7 +408,12 @@ impl<'a> CreateIndexBuilder<'a> {
                 CreatedIndex {
                     index_details: vector_index_details(vec_params),
                     index_version,
-                    files,
+                    files: Some(
+                        files
+                            .into_iter()
+                            .map(crate::index::index_file_from_table)
+                            .collect(),
+                    ),
                 }
             }
             // Can't use if let Some(...) here because it's not stable yet.
@@ -446,7 +452,12 @@ impl<'a> CreateIndexBuilder<'a> {
                 CreatedIndex {
                     index_details: vector_index_details_default(),
                     index_version: self.index_type.version() as u32,
-                    files,
+                    files: Some(
+                        files
+                            .into_iter()
+                            .map(crate::index::index_file_from_table)
+                            .collect(),
+                    ),
                 }
             }
             (IndexType::FragmentReuse, _) => {
@@ -479,7 +490,12 @@ impl<'a> CreateIndexBuilder<'a> {
             index_version: created_index.index_version as i32,
             created_at: Some(chrono::Utc::now()),
             base_id: None,
-            files: Some(created_index.files),
+            files: created_index.files.map(|files| {
+                files
+                    .into_iter()
+                    .map(crate::index::index_file_to_table)
+                    .collect()
+            }),
         })
     }
 
@@ -549,7 +565,7 @@ impl<'a> CreateIndexBuilder<'a> {
     }
     /// Extract `num_segments` from FM-Index params if this is an FM-Index build.
     fn fmindex_num_segments(&self) -> Option<u32> {
-        if self.index_type != IndexType::Fm {
+        if self.index_type != IndexType::FMIndex {
             return None;
         }
         let scalar_params = self.params.as_any().downcast_ref::<ScalarIndexParams>()?;
@@ -627,7 +643,7 @@ impl<'a> CreateIndexBuilder<'a> {
                 self.dataset,
                 &column,
                 segment_uuid,
-                &ScalarIndexParams::for_builtin(lance_index::scalar::BuiltinIndexType::Fm),
+                &ScalarIndexParams::for_builtin(lance_index::scalar::BuiltinIndexType::FMIndex),
                 false,
                 None,
                 None,
@@ -644,7 +660,12 @@ impl<'a> CreateIndexBuilder<'a> {
                 index_version: created_index.index_version as i32,
                 created_at: Some(chrono::Utc::now()),
                 base_id: None,
-                files: Some(created_index.files),
+                files: created_index.files.map(|files| {
+                    files
+                        .into_iter()
+                        .map(crate::index::index_file_to_table)
+                        .collect()
+                }),
             };
             let segments = vec![metadata.into_index_segment()?];
             let new_indices =
@@ -695,7 +716,7 @@ impl<'a> CreateIndexBuilder<'a> {
                 self.dataset,
                 &column,
                 segment_uuid,
-                &ScalarIndexParams::for_builtin(lance_index::scalar::BuiltinIndexType::Fm),
+                &ScalarIndexParams::for_builtin(lance_index::scalar::BuiltinIndexType::FMIndex),
                 true,
                 Some(fragment_ids.clone()),
                 None,
@@ -713,7 +734,12 @@ impl<'a> CreateIndexBuilder<'a> {
                 index_version: created_index.index_version as i32,
                 created_at: Some(chrono::Utc::now()),
                 base_id: None,
-                files: Some(created_index.files),
+                files: created_index.files.map(|files| {
+                    files
+                        .into_iter()
+                        .map(crate::index::index_file_to_table)
+                        .collect()
+                }),
             });
         }
 
diff --git a/rust/lance/src/index/scalar.rs b/rust/lance/src/index/scalar.rs
index ae2478589fb..975ea5a858e 100644
--- a/rust/lance/src/index/scalar.rs
+++ b/rust/lance/src/index/scalar.rs
@@ -39,7 +39,7 @@ use lance_index::pbold::{
     BTreeIndexDetails, BitmapIndexDetails, InvertedIndexDetails, LabelListIndexDetails,
 };
 use lance_index::progress::IndexBuildProgress;
-use lance_index::registry::IndexPluginRegistry;
+use lance_index::registry::{IndexPluginRegistry, with_default_plugins};
 use lance_index::scalar::IndexStore;
 use lance_index::scalar::inverted::METADATA_FILE;
 use lance_index::scalar::label_list::{
@@ -236,7 +236,7 @@ pub(crate) async fn load_training_data(
 
 // TODO: Allow users to register their own plugins
 static SCALAR_INDEX_PLUGIN_REGISTRY: LazyLock<Arc<IndexPluginRegistry>> =
-    LazyLock::new(IndexPluginRegistry::with_default_plugins);
+    LazyLock::new(with_default_plugins);
 
 pub struct IndexDetails(pub Arc<prost_types::Any>);
 
@@ -448,8 +448,12 @@ pub async fn open_scalar_index(
         .index_cache
         .for_index(&index.uuid, frag_reuse_index.as_ref().map(|f| &f.uuid));
 
+    let remapper = frag_reuse_index
+        .clone()
+        .map(|f| f as Arc<dyn lance_index::RowIdRemapper>);
+
     if let Some(index) = plugin
-        .get_from_cache(index_store.clone(), frag_reuse_index.clone(), &index_cache)
+        .get_from_cache(index_store.clone(), remapper.clone(), &index_cache)
         .await?
     {
         // Compatibility check is only needed on first load; a cache hit means
@@ -463,7 +467,7 @@ pub async fn open_scalar_index(
     }
 
     let index = plugin
-        .load_index(index_store, &index_details, frag_reuse_index, &index_cache)
+        .load_index(index_store, &index_details, remapper, &index_cache)
         .await?;
 
     tracing::info!(target: TRACE_IO_EVENTS, index_uuid = %index_uuid, r#type = IO_TYPE_OPEN_SCALAR, index_type = index.index_type().to_string());
diff --git a/rust/lance/src/index/scalar/bitmap.rs b/rust/lance/src/index/scalar/bitmap.rs
index 2eb5702ee28..da1a6d9d8fe 100644
--- a/rust/lance/src/index/scalar/bitmap.rs
+++ b/rust/lance/src/index/scalar/bitmap.rs
@@ -70,7 +70,12 @@ pub(in crate::index) async fn merge_segments(
         index_version: created_index.index_version as i32,
         created_at: Some(chrono::Utc::now()),
         base_id: None,
-        files: Some(created_index.files),
+        files: created_index.files.map(|files| {
+            files
+                .into_iter()
+                .map(crate::index::index_file_to_table)
+                .collect()
+        }),
         ..segments[0].clone()
     })
 }
diff --git a/rust/lance/src/index/scalar/btree.rs b/rust/lance/src/index/scalar/btree.rs
index 4339b8c183b..625b3853ff1 100644
--- a/rust/lance/src/index/scalar/btree.rs
+++ b/rust/lance/src/index/scalar/btree.rs
@@ -161,6 +161,11 @@ pub(crate) async fn merge_segments(
         index_version: created_index.index_version as i32,
         created_at: Some(chrono::Utc::now()),
         base_id: None,
-        files: Some(created_index.files),
+        files: created_index.files.map(|files| {
+            files
+                .into_iter()
+                .map(crate::index::index_file_to_table)
+                .collect()
+        }),
     })
 }
diff --git a/rust/lance/src/index/scalar/fmindex.rs b/rust/lance/src/index/scalar/fmindex.rs
index 6c33498d929..784b74208fe 100644
--- a/rust/lance/src/index/scalar/fmindex.rs
+++ b/rust/lance/src/index/scalar/fmindex.rs
@@ -52,7 +52,7 @@ pub(in crate::index) async fn merge_segments(
             &column,
             new_uuid,
             &lance_index::scalar::ScalarIndexParams::for_builtin(
-                lance_index::scalar::BuiltinIndexType::Fm,
+                lance_index::scalar::BuiltinIndexType::FMIndex,
             ),
             false,
             None,
@@ -70,7 +70,12 @@ pub(in crate::index) async fn merge_segments(
             index_version: created_index.index_version as i32,
             created_at: Some(chrono::Utc::now()),
             base_id: None,
-            files: Some(created_index.files),
+            files: created_index.files.map(|files| {
+                files
+                    .into_iter()
+                    .map(crate::index::index_file_to_table)
+                    .collect()
+            }),
             ..segments[0].clone()
         });
     }
@@ -83,7 +88,7 @@ pub(in crate::index) async fn merge_segments(
         &column,
         new_uuid,
         &lance_index::scalar::ScalarIndexParams::for_builtin(
-            lance_index::scalar::BuiltinIndexType::Fm,
+            lance_index::scalar::BuiltinIndexType::FMIndex,
         ),
         true,
         Some(fragment_ids),
@@ -101,7 +106,12 @@ pub(in crate::index) async fn merge_segments(
         index_version: created_index.index_version as i32,
         created_at: Some(chrono::Utc::now()),
         base_id: None,
-        files: Some(created_index.files),
+        files: created_index.files.map(|files| {
+            files
+                .into_iter()
+                .map(crate::index::index_file_to_table)
+                .collect()
+        }),
         ..segments[0].clone()
     })
 }
diff --git a/rust/lance/src/index/scalar/inverted.rs b/rust/lance/src/index/scalar/inverted.rs
index 000d2c3139c..dab76b1781c 100644
--- a/rust/lance/src/index/scalar/inverted.rs
+++ b/rust/lance/src/index/scalar/inverted.rs
@@ -137,7 +137,12 @@ pub(crate) async fn merge_segments(
         index_version: created_index.index_version as i32,
         created_at: Some(chrono::Utc::now()),
         base_id: None,
-        files: Some(created_index.files),
+        files: created_index.files.map(|files| {
+            files
+                .into_iter()
+                .map(crate::index::index_file_to_table)
+                .collect()
+        }),
         ..segments[0].clone()
     })
 }
diff --git a/rust/lance/src/index/scalar/zonemap.rs b/rust/lance/src/index/scalar/zonemap.rs
index 0cbd98f2c40..b4095bddabe 100644
--- a/rust/lance/src/index/scalar/zonemap.rs
+++ b/rust/lance/src/index/scalar/zonemap.rs
@@ -80,7 +80,12 @@ pub(in crate::index) async fn merge_segments(
         index_version: created_index.index_version as i32,
         created_at: Some(chrono::Utc::now()),
         base_id: None,
-        files: Some(created_index.files),
+        files: created_index.files.map(|files| {
+            files
+                .into_iter()
+                .map(crate::index::index_file_to_table)
+                .collect()
+        }),
         ..segments[0].clone()
     })
 }
diff --git a/rust/lance/src/index/scalar_logical.rs b/rust/lance/src/index/scalar_logical.rs
index f3a7b637202..1e5b0d8f9f2 100644
--- a/rust/lance/src/index/scalar_logical.rs
+++ b/rust/lance/src/index/scalar_logical.rs
@@ -86,13 +86,6 @@ impl Index for LogicalScalarIndex {
         self
     }
 
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn lance_index::vector::VectorIndex>> {
-        Err(Error::invalid_input(format!(
-            "LogicalScalarIndex '{}' is not a vector index",
-            self.name
-        )))
-    }
-
     fn statistics(&self) -> Result<serde_json::Value> {
         Ok(json!({
             "index_name": self.name,
@@ -1146,15 +1139,16 @@ mod tests {
         let fragments = dataset.get_fragments();
         assert_eq!(fragments.len(), 3);
 
-        let params = ScalarIndexParams::for_builtin(BuiltinIndexType::Fm);
+        let params = ScalarIndexParams::for_builtin(BuiltinIndexType::FMIndex);
         let mut segments = Vec::new();
         for fragment in &fragments {
-            let segment = CreateIndexBuilder::new(&mut dataset, &["text"], IndexType::Fm, &params)
-                .name("text_fmindex".to_string())
-                .fragments(vec![fragment.id() as u32])
-                .execute_uncommitted()
-                .await
-                .unwrap();
+            let segment =
+                CreateIndexBuilder::new(&mut dataset, &["text"], IndexType::FMIndex, &params)
+                    .name("text_fmindex".to_string())
+                    .fragments(vec![fragment.id() as u32])
+                    .execute_uncommitted()
+                    .await
+                    .unwrap();
 
             assert_eq!(
                 segment
@@ -1180,7 +1174,7 @@ mod tests {
             open_named_scalar_index(&dataset, "text", "text_fmindex", &NoOpMetricsCollector)
                 .await
                 .unwrap();
-        assert_eq!(logical.index_type(), IndexType::Fm);
+        assert_eq!(logical.index_type(), IndexType::FMIndex);
 
         let query = lance_index::scalar::TextQuery::StringContains("quick".to_string());
         let result = logical.search(&query, &NoOpMetricsCollector).await.unwrap();
@@ -1246,15 +1240,16 @@ mod tests {
         let fragments = dataset.get_fragments();
         assert_eq!(fragments.len(), 2);
 
-        let params = ScalarIndexParams::for_builtin(BuiltinIndexType::Fm);
+        let params = ScalarIndexParams::for_builtin(BuiltinIndexType::FMIndex);
         let mut staged = Vec::new();
         for fragment in &fragments {
-            let segment = CreateIndexBuilder::new(&mut dataset, &["text"], IndexType::Fm, &params)
-                .name("text_fmindex_merge".to_string())
-                .fragments(vec![fragment.id() as u32])
-                .execute_uncommitted()
-                .await
-                .unwrap();
+            let segment =
+                CreateIndexBuilder::new(&mut dataset, &["text"], IndexType::FMIndex, &params)
+                    .name("text_fmindex_merge".to_string())
+                    .fragments(vec![fragment.id() as u32])
+                    .execute_uncommitted()
+                    .await
+                    .unwrap();
             staged.push(segment);
         }
         assert_eq!(staged.len(), 2);
@@ -1292,7 +1287,7 @@ mod tests {
         )
         .await
         .unwrap();
-        assert_eq!(logical.index_type(), IndexType::Fm);
+        assert_eq!(logical.index_type(), IndexType::FMIndex);
 
         let query = lance_index::scalar::TextQuery::StringContains("delta".to_string());
         let result = logical.search(&query, &NoOpMetricsCollector).await.unwrap();
@@ -1354,15 +1349,16 @@ mod tests {
         assert_eq!(fragments.len(), 2);
 
         // Build per-fragment FM-Index segments and commit
-        let params = ScalarIndexParams::for_builtin(BuiltinIndexType::Fm);
+        let params = ScalarIndexParams::for_builtin(BuiltinIndexType::FMIndex);
         let mut staged = Vec::new();
         for fragment in &fragments {
-            let segment = CreateIndexBuilder::new(&mut dataset, &["text"], IndexType::Fm, &params)
-                .name("text_fmindex_compact".to_string())
-                .fragments(vec![fragment.id() as u32])
-                .execute_uncommitted()
-                .await
-                .unwrap();
+            let segment =
+                CreateIndexBuilder::new(&mut dataset, &["text"], IndexType::FMIndex, &params)
+                    .name("text_fmindex_compact".to_string())
+                    .fragments(vec![fragment.id() as u32])
+                    .execute_uncommitted()
+                    .await
+                    .unwrap();
             staged.push(segment);
         }
         dataset
diff --git a/rust/lance/src/index/vector.rs b/rust/lance/src/index/vector.rs
index 0eb66ea2ede..e9297e638f1 100644
--- a/rust/lance/src/index/vector.rs
+++ b/rust/lance/src/index/vector.rs
@@ -24,7 +24,7 @@ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
 use futures::stream;
 use lance_core::utils::tempfile::TempStdDir;
 use lance_file::previous::reader::FileReader as PreviousFileReader;
-use lance_index::frag_reuse::FragReuseIndex;
+use lance_index::RowIdRemapper;
 use lance_index::metrics::NoOpMetricsCollector;
 use lance_index::optimize::OptimizeOptions;
 use lance_index::progress::{IndexBuildProgress, noop_progress};
@@ -589,7 +589,7 @@ pub(crate) async fn build_distributed_vector_index(
     _name: &str,
     uuid: Uuid,
     params: &VectorIndexParams,
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     fragment_ids: &[u32],
     progress: Arc<dyn IndexBuildProgress>,
 ) -> Result<(Uuid, Vec<IndexFile>)> {
@@ -941,7 +941,7 @@ pub(crate) async fn build_vector_index(
     name: &str,
     uuid: Uuid,
     params: &VectorIndexParams,
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     progress: Arc<dyn IndexBuildProgress>,
 ) -> Result<Vec<IndexFile>> {
     let (element_type, index_type, ivf_params, shuffler) = prepare_vector_segment_build(
@@ -1215,7 +1215,7 @@ pub(crate) async fn build_vector_index_incremental(
     uuid: Uuid,
     params: &VectorIndexParams,
     existing_index: Arc<dyn VectorIndex>,
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     progress: Arc<dyn IndexBuildProgress>,
 ) -> Result<VectorIndexBuildSummary> {
     let stages = &params.stages;
@@ -1537,7 +1537,7 @@ pub(crate) async fn open_vector_index(
     uuid: &Uuid,
     vec_idx: &lance_index::pb::VectorIndex,
     reader: Arc<dyn Reader>,
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
 ) -> Result<Arc<dyn VectorIndex>> {
     let metric_type = pb::VectorMetricType::try_from(vec_idx.metric_type)?.into();
 
@@ -1632,7 +1632,7 @@ pub(crate) async fn open_vector_index_v2(
     column: &str,
     uuid: &Uuid,
     reader: PreviousFileReader,
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
 ) -> Result<Arc<dyn VectorIndex>> {
     let index_metadata = reader
         .schema()
@@ -1841,7 +1841,8 @@ pub async fn initialize_vector_index(
     let new_uuid = Uuid::new_v4();
     let frag_reuse_index = target_dataset
         .open_frag_reuse_index(&NoOpMetricsCollector)
-        .await?;
+        .await?
+        .map(|f| f as Arc<dyn RowIdRemapper>);
 
     let summary = build_vector_index_incremental(
         target_dataset,
diff --git a/rust/lance/src/index/vector/builder.rs b/rust/lance/src/index/vector/builder.rs
index 1e4fec8c762..49cdfdbf515 100644
--- a/rust/lance/src/index/vector/builder.rs
+++ b/rust/lance/src/index/vector/builder.rs
@@ -28,7 +28,7 @@ use lance_core::utils::tokio::{get_num_compute_intensive_cpus, spawn_cpu};
 use lance_core::{Error, ROW_ID_FIELD, Result};
 use lance_encoding::version::LanceFileVersion;
 use lance_file::writer::{FileWriter, FileWriterOptions};
-use lance_index::frag_reuse::FragReuseIndex;
+use lance_index::RowIdRemapper;
 use lance_index::metrics::NoOpMetricsCollector;
 use lance_index::optimize::OptimizeOptions;
 use lance_index::progress::{IndexBuildProgress, NoopIndexBuildProgress};
@@ -149,7 +149,7 @@ pub struct IvfIndexBuilder<S: IvfSubIndex, Q: Quantization> {
     // fields for merging indices / remapping
     existing_indices: Vec<Arc<dyn VectorIndex>>,
 
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
 
     // fragments for distributed indexing
     fragment_filter: Option<Vec<u32>>,
@@ -188,7 +188,7 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> IvfIndexBuilder<S, Q>
         ivf_params: Option<IvfBuildParams>,
         quantizer_params: Option<Q::BuildParams>,
         sub_index_params: S::BuildParams,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<Self> {
         let temp_dir = TempStdDir::default();
         let temp_dir_path = Path::from_filesystem_path(&temp_dir)?;
@@ -229,7 +229,7 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> IvfIndexBuilder<S, Q>
         distance_type: DistanceType,
         shuffler: Box<dyn Shuffler>,
         sub_index_params: S::BuildParams,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         optimize_options: OptimizeOptions,
     ) -> Result<Self> {
         let mut builder = Self::new(
@@ -1017,7 +1017,7 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> IvfIndexBuilder<S, Q>
         sub_index_params: S::BuildParams,
         batches: Vec<RecordBatch>,
         column: String,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Result<(Q::Storage, S)> {
         let storage = StorageBuilder::new(column, distance_type, quantizer, frag_reuse_index)?
             .build(batches)?;
diff --git a/rust/lance/src/index/vector/fixture_test.rs b/rust/lance/src/index/vector/fixture_test.rs
index 91d5c434dd1..1b82a7f6941 100644
--- a/rust/lance/src/index/vector/fixture_test.rs
+++ b/rust/lance/src/index/vector/fixture_test.rs
@@ -71,10 +71,6 @@ mod test {
             self
         }
 
-        fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn VectorIndex>> {
-            Ok(self)
-        }
-
         async fn prewarm(&self) -> Result<()> {
             Ok(())
         }
diff --git a/rust/lance/src/index/vector/ivf.rs b/rust/lance/src/index/vector/ivf.rs
index fb01339ead9..328125262a6 100644
--- a/rust/lance/src/index/vector/ivf.rs
+++ b/rust/lance/src/index/vector/ivf.rs
@@ -500,7 +500,10 @@ pub(crate) async fn optimize_vector_indices_v2(
     let distance_type = existing_indices[0].metric_type();
     let num_partitions = ivf_model.num_partitions();
     let index_type = existing_indices[0].sub_index_type();
-    let frag_reuse_index = dataset.open_frag_reuse_index(&NoOpMetricsCollector).await?;
+    let frag_reuse_index = dataset
+        .open_frag_reuse_index(&NoOpMetricsCollector)
+        .await?
+        .map(|f| f as Arc<dyn lance_index::RowIdRemapper>);
 
     let format_version = dataset_format_version(dataset);
 
@@ -1088,10 +1091,6 @@ impl Index for IVFIndex {
         self
     }
 
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn VectorIndex>> {
-        Ok(self)
-    }
-
     fn index_type(&self) -> IndexType {
         if self.sub_index.as_any().downcast_ref::<PQIndex>().is_some() {
             IndexType::IvfPq
diff --git a/rust/lance/src/index/vector/ivf/io.rs b/rust/lance/src/index/vector/ivf/io.rs
index 56d220aeed2..9bef7648dd0 100644
--- a/rust/lance/src/index/vector/ivf/io.rs
+++ b/rust/lance/src/index/vector/ivf/io.rs
@@ -25,7 +25,6 @@ use lance_core::utils::tokio::{get_num_compute_intensive_cpus, spawn_cpu};
 use lance_file::previous::reader::FileReader as PreviousFileReader;
 use lance_file::previous::writer::FileWriter as PreviousFileWriter;
 use lance_index::metrics::NoOpMetricsCollector;
-use lance_index::scalar::IndexWriter;
 use lance_index::vector::hnsw::HNSW;
 use lance_index::vector::hnsw::{HnswMetadata, builder::HnswBuildParams};
 use lance_index::vector::ivf::storage::IvfModel;
@@ -508,7 +507,7 @@ async fn build_and_write_hnsw(
 ) -> Result<usize> {
     let batch = params.build(vectors, distance_type).await?.to_batch()?;
     let metadata = batch.schema_ref().metadata().clone();
-    writer.write_record_batch(batch).await?;
+    writer.write(&[batch]).await?;
     writer.finish_with_metadata(&metadata).await
 }
 
@@ -521,7 +520,7 @@ async fn build_and_write_pq_storage(
 ) -> Result<()> {
     let storage = spawn_cpu(move || build_pq_storage(metric_type, row_ids, code_array, pq)).await?;
 
-    writer.write_record_batch(storage.batch().clone()).await?;
+    writer.write(&[storage.batch().clone()]).await?;
     writer.finish().await?;
     Ok(())
 }
diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs
index 5b29752f7c1..75a24805fb5 100644
--- a/rust/lance/src/index/vector/ivf/v2.rs
+++ b/rust/lance/src/index/vector/ivf/v2.rs
@@ -36,6 +36,7 @@ use lance_core::{Error, ROW_ID, Result};
 use lance_encoding::decoder::{DecoderPlugins, FilterExpression};
 use lance_file::LanceEncodingsIo;
 use lance_file::reader::{CachedFileMetadata, FileReader, FileReaderOptions};
+use lance_index::RowIdRemapper;
 use lance_index::cache_pb::IvfStateHeader;
 use lance_index::frag_reuse::FragReuseIndex;
 use lance_index::metrics::{LocalMetricsCollector, MetricsCollector, NoOpMetricsCollector};
@@ -969,7 +970,7 @@ impl<S: IvfSubIndex + 'static, Q: Quantization> IVFIndex<S, Q> {
         object_store: Arc<ObjectStore>,
         index_dir: Path,
         uuid: Uuid,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
         file_metadata_cache: &LanceCache,
         index_cache: LanceCache,
         file_sizes: HashMap<String, u64>,
@@ -1288,10 +1289,6 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> Index for IVFIndex<S,
         self
     }
 
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn VectorIndex>> {
-        Ok(self)
-    }
-
     async fn prewarm(&self) -> Result<()> {
         futures::stream::iter(0..self.ivf.num_partitions())
             .map(Ok)
@@ -1887,12 +1884,13 @@ async fn reconstruct_typed<S: IvfSubIndex + 'static, Q: Quantization + 'static>(
             (index_reader, aux_reader)
         };
 
+    let fri: Option<Arc<dyn RowIdRemapper>> = frag_reuse_index.map(|f| f as Arc<dyn RowIdRemapper>);
     let storage = IvfQuantizationStorage::from_cached(
         aux_reader,
         state.aux_ivf.clone(),
         state.metadata.clone(),
         state.distance_type,
-        frag_reuse_index,
+        fri,
     );
     let rq_search_cache = IVFIndex::<S, Q>::rq_search_cache_from_state(state, &storage)?;
 
@@ -1952,13 +1950,17 @@ mod tests {
         dataset::optimize::{CompactionOptions, compact_files},
         index::vector::IndexFileVersion,
     };
+    use arrow::compute::concat_batches;
+    use futures::TryStreamExt;
     use lance_core::cache::LanceCache;
     use lance_core::utils::tempfile::TempStrDir;
     use lance_core::{ROW_ID, Result};
     use lance_encoding::decoder::DecoderPlugins;
+    use lance_encoding::decoder::FilterExpression;
     use lance_file::reader::{FileReader, FileReaderOptions};
     use lance_file::writer::FileWriter;
     use lance_index::IndexType;
+    use lance_index::optimize::OptimizeOptions;
     use lance_index::progress::IndexBuildProgress;
     use lance_index::vector::DIST_COL;
     use lance_index::vector::hnsw::builder::HnswBuildParams;
@@ -1973,8 +1975,8 @@ mod tests {
         storage::STORAGE_METADATA_KEY,
     };
     use lance_index::{INDEX_AUXILIARY_FILE_NAME, metrics::NoOpMetricsCollector};
-    use lance_index::{optimize::OptimizeOptions, scalar::IndexReader};
     use lance_io::{
+        ReadBatchParams,
         object_store::ObjectStore,
         scheduler::{ScanScheduler, SchedulerConfig},
         utils::CachedFileSize,
@@ -5148,9 +5150,18 @@ mod tests {
 
         // Rewrite auxiliary file with PQ codebook inlined into schema metadata.
         let mut metadata = reader.schema().metadata.clone();
-        let batch = reader
-            .read_range(0..reader.num_rows() as usize, None)
+        let reader_schema: Arc<arrow_schema::Schema> = Arc::new(reader.schema().as_ref().into());
+        let batches = reader
+            .read_stream(
+                ReadBatchParams::RangeFull,
+                u32::MAX,
+                1,
+                FilterExpression::no_filter(),
+            )
+            .await?
+            .try_collect::<Vec<_>>()
             .await?;
+        let batch = concat_batches(&reader_schema, batches.iter())?;
         let new_aux_path = new_dir.clone().join(INDEX_AUXILIARY_FILE_NAME);
         let mut writer = FileWriter::try_new(
             obj_store.create(&new_aux_path).await?,
diff --git a/rust/lance/src/index/vector/pq.rs b/rust/lance/src/index/vector/pq.rs
index a661a314b4d..e5c7e350085 100644
--- a/rust/lance/src/index/vector/pq.rs
+++ b/rust/lance/src/index/vector/pq.rs
@@ -22,7 +22,7 @@ use lance_core::deepsize::DeepSizeOf;
 use lance_core::utils::address::RowAddress;
 use lance_core::utils::tokio::spawn_cpu;
 use lance_core::{ROW_ID, ROW_ID_FIELD};
-use lance_index::frag_reuse::FragReuseIndex;
+use lance_index::RowIdRemapper;
 use lance_index::metrics::MetricsCollector;
 use lance_index::vector::ivf::storage::IvfModel;
 use lance_index::vector::pq::storage::{ProductQuantizationStorage, transpose};
@@ -67,7 +67,7 @@ pub struct PQIndex {
     /// Metric type.
     metric_type: MetricType,
 
-    frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
 }
 
 impl DeepSizeOf for PQIndex {
@@ -115,7 +115,7 @@ impl PQIndex {
     pub(crate) fn new(
         pq: ProductQuantizer,
         metric_type: MetricType,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+        frag_reuse_index: Option<Arc<dyn RowIdRemapper>>,
     ) -> Self {
         Self {
             code: None,
@@ -180,10 +180,6 @@ impl Index for PQIndex {
         self
     }
 
-    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn VectorIndex>> {
-        Ok(self)
-    }
-
     fn index_type(&self) -> IndexType {
         IndexType::Vector
     }
diff --git a/rust/lance/src/io/exec/knn.rs b/rust/lance/src/io/exec/knn.rs
index 0ceddf7c5ee..05bf83a2196 100644
--- a/rust/lance/src/io/exec/knn.rs
+++ b/rust/lance/src/io/exec/knn.rs
@@ -2020,10 +2020,6 @@ mod tests {
             self
         }
 
-        fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn VectorIndex>> {
-            Ok(self)
-        }
-
         fn statistics(&self) -> Result<serde_json::Value> {
             Ok(serde_json::json!({}))
         }
@@ -2144,10 +2140,6 @@ mod tests {
             self
         }
 
-        fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn VectorIndex>> {
-            Ok(self)
-        }
-
         fn statistics(&self) -> Result<serde_json::Value> {
             Ok(serde_json::json!({}))
         }
diff --git a/rust/lance/src/session/index_extension.rs b/rust/lance/src/session/index_extension.rs
index de9e61b5f8f..301213c6f06 100644
--- a/rust/lance/src/session/index_extension.rs
+++ b/rust/lance/src/session/index_extension.rs
@@ -111,10 +111,6 @@ mod test {
             self
         }
 
-        fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn VectorIndex>> {
-            Ok(self)
-        }
-
         async fn prewarm(&self) -> Result<()> {
             Ok(())
         }