From e76f66548bfd826e16fd2fa00f63f46cd6e93db1 Mon Sep 17 00:00:00 2001 From: Beinan Wang Date: Sat, 6 Jun 2026 21:37:38 -0700 Subject: [PATCH 1/4] feat: add configurable V1 blob encoding for large payload columns Enable Lance V1 blob encoding via `blob_columns` option in ContextStoreOptions. Supports `text_payload` and `binary_payload` columns. Blob-encoded columns store data in out-of-line buffers for efficient storage of large/unpredictable content. Co-Authored-By: Beinan Wang --- crates/lance-context-core/src/store.rs | 325 +++++++++++++++++++++++-- python/src/lib.rs | 8 +- 2 files changed, 313 insertions(+), 20 deletions(-) diff --git a/crates/lance-context-core/src/store.rs b/crates/lance-context-core/src/store.rs index 60c733d..4bc4199 100644 --- a/crates/lance-context-core/src/store.rs +++ b/crates/lance-context-core/src/store.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; use std::time::Duration; @@ -96,12 +96,16 @@ struct CompactionState { total_compactions: u64, } +/// Valid column names that may use blob encoding. +const VALID_BLOB_COLUMNS: &[&str] = &["text_payload", "binary_payload"]; + /// Persistent Lance-backed context store. #[derive(Clone)] pub struct ContextStore { dataset: Dataset, compaction_state: Arc>, pub compaction_config: CompactionConfig, + blob_columns: HashSet, } /// Additional configuration when opening a [`ContextStore`]. @@ -109,6 +113,9 @@ pub struct ContextStore { pub struct ContextStoreOptions { pub storage_options: Option>, pub compaction: CompactionConfig, + /// Column names that should use Lance V1 blob encoding. + /// Valid values: `"text_payload"`, `"binary_payload"`. + pub blob_columns: HashSet, } impl ContextStoreOptions { @@ -126,11 +133,24 @@ impl ContextStore { /// Open a dataset with explicit object store configuration (e.g. S3 credentials). pub async fn open_with_options(uri: &str, options: ContextStoreOptions) -> LanceResult { + // Validate blob_columns + for col in &options.blob_columns { + if !VALID_BLOB_COLUMNS.contains(&col.as_str()) { + return Err(LanceError::from(ArrowError::InvalidArgumentError( + format!( + "invalid blob column '{}': valid columns are {:?}", + col, VALID_BLOB_COLUMNS + ), + ))); + } + } + let storage_options = options.storage_options(); + let blob_columns = options.blob_columns.clone(); let dataset = match Self::load_with_options(uri, storage_options.clone()).await { Ok(dataset) => dataset, Err(LanceError::DatasetNotFound { .. }) => { - Self::create_with_options(uri, storage_options).await? + Self::create_with_options(uri, storage_options, &blob_columns).await? } Err(err) => return Err(err), }; @@ -145,6 +165,7 @@ impl ContextStore { total_compactions: 0, })), compaction_config: options.compaction, + blob_columns, }; // Start background compaction if enabled @@ -186,7 +207,7 @@ impl ContextStore { for ((bot_id, session_id), group_entries) in groups { let region_id = Self::derive_region_id(&bot_id, &session_id); - let batch = Self::records_to_batch(&group_entries)?; + let batch = self.records_to_batch(&group_entries)?; let config = ShardWriterConfig { shard_id: region_id, ..Default::default() @@ -449,13 +470,33 @@ impl ContextStore { } /// Lance schema for the context store. - pub fn schema() -> Schema { + /// + /// When `blob_columns` contains a column name, that column is stored using + /// Lance V1 blob encoding (out-of-line binary buffers). For `text_payload`, + /// this also changes the Arrow type from `LargeUtf8` to `LargeBinary`. + pub fn schema(blob_columns: &HashSet) -> Schema { let mut id_metadata = HashMap::new(); id_metadata.insert( "lance-schema:unenforced-primary-key".to_string(), "true".to_string(), ); + let text_field = if blob_columns.contains("text_payload") { + let mut metadata = HashMap::new(); + metadata.insert("lance-encoding:blob".to_string(), "true".to_string()); + Field::new("text_payload", DataType::LargeBinary, true).with_metadata(metadata) + } else { + Field::new("text_payload", DataType::LargeUtf8, true) + }; + + let binary_field = if blob_columns.contains("binary_payload") { + let mut metadata = HashMap::new(); + metadata.insert("lance-encoding:blob".to_string(), "true".to_string()); + Field::new("binary_payload", DataType::LargeBinary, true).with_metadata(metadata) + } else { + Field::new("binary_payload", DataType::LargeBinary, true) + }; + Schema::new(vec![ Field::new("id", DataType::Utf8, false).with_metadata(id_metadata), Field::new("run_id", DataType::Utf8, false), @@ -485,8 +526,8 @@ impl ContextStore { true, ), Field::new("content_type", DataType::Utf8, false), - Field::new("text_payload", DataType::LargeUtf8, true), - Field::new("binary_payload", DataType::LargeBinary, true), + text_field, + binary_field, Field::new( "embedding", DataType::FixedSizeList( @@ -515,8 +556,9 @@ impl ContextStore { async fn create_with_options( uri: &str, storage_options: Option>, + blob_columns: &HashSet, ) -> LanceResult { - let schema = Arc::new(Self::schema()); + let schema = Arc::new(Self::schema(blob_columns)); let empty_batch = RecordBatch::new_empty(schema.clone()); let batches = RecordBatchIterator::new( vec![Ok::(empty_batch)].into_iter(), @@ -541,7 +583,7 @@ impl ContextStore { Dataset::write(batches, uri, Some(params)).await } - fn records_to_batch(entries: &[ContextRecord]) -> LanceResult { + fn records_to_batch(&self, entries: &[ContextRecord]) -> LanceResult { let mut id_builder = StringBuilder::new(); let mut run_id_builder = StringBuilder::new(); let mut bot_id_builder = StringBuilder::new(); @@ -549,9 +591,20 @@ impl ContextStore { let mut created_at_builder = TimestampMicrosecondBuilder::with_capacity(entries.len()); let mut role_builder = StringDictionaryBuilder::::new(); let mut content_type_builder = StringBuilder::new(); - let mut text_builder = LargeStringBuilder::new(); let mut binary_builder = LargeBinaryBuilder::new(); + let text_is_blob = self.blob_columns.contains("text_payload"); + let mut text_string_builder = if !text_is_blob { + Some(LargeStringBuilder::new()) + } else { + None + }; + let mut text_binary_builder = if text_is_blob { + Some(LargeBinaryBuilder::new()) + } else { + None + }; + let state_fields: Vec = vec![ Arc::new(Field::new("step", DataType::Int32, true)), Arc::new(Field::new("active_plan_id", DataType::Utf8, true)), @@ -580,9 +633,19 @@ impl ContextStore { role_builder.append(&entry.role)?; content_type_builder.append_value(&entry.content_type); - match &entry.text_payload { - Some(value) => text_builder.append_value(value), - None => text_builder.append_null(), + if text_is_blob { + match &entry.text_payload { + Some(value) => text_binary_builder + .as_mut() + .unwrap() + .append_value(value.as_bytes()), + None => text_binary_builder.as_mut().unwrap().append_null(), + } + } else { + match &entry.text_payload { + Some(value) => text_string_builder.as_mut().unwrap().append_value(value), + None => text_string_builder.as_mut().unwrap().append_null(), + } } match &entry.binary_payload { @@ -661,12 +724,16 @@ impl ContextStore { let created_at_array: ArrayRef = Arc::new(created_at_builder.finish()); let role_array: ArrayRef = Arc::new(role_builder.finish()); let content_type_array: ArrayRef = Arc::new(content_type_builder.finish()); - let text_array: ArrayRef = Arc::new(text_builder.finish()); + let text_array: ArrayRef = if text_is_blob { + Arc::new(text_binary_builder.unwrap().finish()) + } else { + Arc::new(text_string_builder.unwrap().finish()) + }; let binary_array: ArrayRef = Arc::new(binary_builder.finish()); let state_array: ArrayRef = Arc::new(state_builder.finish()); let embedding_array: ArrayRef = Arc::new(embedding_builder.finish()); - let schema = Arc::new(Self::schema()); + let schema = Arc::new(Self::schema(&self.blob_columns)); let batch = RecordBatch::try_new( schema, vec![ @@ -737,10 +804,26 @@ fn batch_to_records(batch: &RecordBatch) -> LanceResult> { let role_array = column_as::>(batch, "role")?; let state_array = column_as::(batch, "state_metadata")?; let content_type_array = column_as::(batch, "content_type")?; - let text_array = column_as::(batch, "text_payload")?; let binary_array = column_as::(batch, "binary_payload")?; let embedding_array = column_as::(batch, "embedding")?; + // Auto-detect whether text_payload is LargeBinary (blob) or LargeUtf8 (default) + let text_is_binary = batch + .schema() + .field_with_name("text_payload") + .map_or(false, |f| f.data_type() == &DataType::LargeBinary); + + let text_string_array = if !text_is_binary { + Some(column_as::(batch, "text_payload")?) + } else { + None + }; + let text_binary_array = if text_is_binary { + Some(column_as::(batch, "text_payload")?) + } else { + None + }; + let step_array = state_array .column(0) .as_ref() @@ -819,10 +902,20 @@ fn batch_to_records(batch: &RecordBatch) -> LanceResult> { }) }; - let text_payload = if text_array.is_null(row) { - None + let text_payload = if text_is_binary { + let arr = text_binary_array.unwrap(); + if arr.is_null(row) { + None + } else { + Some(String::from_utf8_lossy(arr.value(row)).to_string()) + } } else { - Some(text_array.value(row).to_string()) + let arr = text_string_array.unwrap(); + if arr.is_null(row) { + None + } else { + Some(arr.value(row).to_string()) + } }; let binary_payload = if binary_array.is_null(row) { @@ -1076,4 +1169,200 @@ mod tests { // assert!(ids.contains(&"r2".to_string())); }); } + + #[test] + fn test_blob_binary_payload() { + let dir = TempDir::new().unwrap(); + let uri = dir.path().to_string_lossy().to_string(); + let runtime = tokio::runtime::Runtime::new().unwrap(); + + runtime.block_on(async { + let options = ContextStoreOptions { + blob_columns: HashSet::from(["binary_payload".to_string()]), + ..Default::default() + }; + let mut store = ContextStore::open_with_options(&uri, options).await.unwrap(); + + let mut record = text_record("blob-bin-1", 0.0); + record.binary_payload = Some(vec![0xDE, 0xAD, 0xBE, 0xEF]); + store.add(&[record.clone()]).await.unwrap(); + + // Verify schema has blob metadata on binary_payload + let schema = ContextStore::schema(&store.blob_columns); + let field = schema.field_with_name("binary_payload").unwrap(); + assert_eq!( + field.metadata().get("lance-encoding:blob"), + Some(&"true".to_string()), + ); + // text_payload should remain LargeUtf8 without blob metadata + let text_field = schema.field_with_name("text_payload").unwrap(); + assert_eq!(text_field.data_type(), &DataType::LargeUtf8); + assert!(text_field.metadata().get("lance-encoding:blob").is_none()); + }); + } + + #[test] + fn test_blob_text_payload() { + let dir = TempDir::new().unwrap(); + let uri = dir.path().to_string_lossy().to_string(); + let runtime = tokio::runtime::Runtime::new().unwrap(); + + runtime.block_on(async { + let options = ContextStoreOptions { + blob_columns: HashSet::from(["text_payload".to_string()]), + ..Default::default() + }; + let mut store = ContextStore::open_with_options(&uri, options).await.unwrap(); + + let record = text_record("blob-txt-1", 0.0); + store.add(&[record.clone()]).await.unwrap(); + + // Roundtrip: records_to_batch -> batch_to_records + let batch = store.records_to_batch(&[record.clone()]).unwrap(); + let batch_schema = batch.schema(); + let text_field = batch_schema.field_with_name("text_payload").unwrap(); + assert_eq!( + text_field.data_type(), + &DataType::LargeBinary, + "text_payload should be LargeBinary when blob-encoded" + ); + + let roundtripped = batch_to_records(&batch).unwrap(); + assert_eq!(roundtripped.len(), 1); + assert_eq!( + roundtripped[0].text_payload, + record.text_payload, + "text payload should survive blob roundtrip" + ); + }); + } + + #[test] + fn test_blob_both_columns() { + let dir = TempDir::new().unwrap(); + let uri = dir.path().to_string_lossy().to_string(); + let runtime = tokio::runtime::Runtime::new().unwrap(); + + runtime.block_on(async { + let options = ContextStoreOptions { + blob_columns: HashSet::from([ + "text_payload".to_string(), + "binary_payload".to_string(), + ]), + ..Default::default() + }; + let mut store = ContextStore::open_with_options(&uri, options).await.unwrap(); + + let mut record = text_record("blob-both-1", 0.0); + record.binary_payload = Some(b"hello binary".to_vec()); + store.add(&[record.clone()]).await.unwrap(); + + // Both columns should have blob metadata + let schema = ContextStore::schema(&store.blob_columns); + let text_field = schema.field_with_name("text_payload").unwrap(); + let bin_field = schema.field_with_name("binary_payload").unwrap(); + assert_eq!( + text_field.metadata().get("lance-encoding:blob"), + Some(&"true".to_string()), + ); + assert_eq!( + bin_field.metadata().get("lance-encoding:blob"), + Some(&"true".to_string()), + ); + + // Roundtrip via batch + let batch = store.records_to_batch(&[record.clone()]).unwrap(); + let roundtripped = batch_to_records(&batch).unwrap(); + assert_eq!(roundtripped.len(), 1); + assert_eq!(roundtripped[0].text_payload, record.text_payload); + assert_eq!(roundtripped[0].binary_payload, record.binary_payload); + }); + } + + #[test] + fn test_no_blob_default() { + // Default options should produce no blob metadata + let schema = ContextStore::schema(&HashSet::new()); + let text_field = schema.field_with_name("text_payload").unwrap(); + let bin_field = schema.field_with_name("binary_payload").unwrap(); + + assert_eq!(text_field.data_type(), &DataType::LargeUtf8); + assert!(text_field.metadata().get("lance-encoding:blob").is_none()); + assert_eq!(bin_field.data_type(), &DataType::LargeBinary); + assert!(bin_field.metadata().get("lance-encoding:blob").is_none()); + } + + #[test] + fn test_blob_schema_metadata() { + let blob_columns = HashSet::from(["text_payload".to_string(), "binary_payload".to_string()]); + let schema = ContextStore::schema(&blob_columns); + + let text_field = schema.field_with_name("text_payload").unwrap(); + assert_eq!(text_field.data_type(), &DataType::LargeBinary); + assert_eq!( + text_field.metadata().get("lance-encoding:blob"), + Some(&"true".to_string()), + ); + + let bin_field = schema.field_with_name("binary_payload").unwrap(); + assert_eq!(bin_field.data_type(), &DataType::LargeBinary); + assert_eq!( + bin_field.metadata().get("lance-encoding:blob"), + Some(&"true".to_string()), + ); + + // Non-blob fields should have no blob metadata + let id_field = schema.field_with_name("id").unwrap(); + assert!(id_field.metadata().get("lance-encoding:blob").is_none()); + } + + #[test] + fn test_blob_invalid_column_name() { + let dir = TempDir::new().unwrap(); + let uri = dir.path().to_string_lossy().to_string(); + let runtime = tokio::runtime::Runtime::new().unwrap(); + + runtime.block_on(async { + let options = ContextStoreOptions { + blob_columns: HashSet::from(["nonexistent_column".to_string()]), + ..Default::default() + }; + let result = ContextStore::open_with_options(&uri, options).await; + assert!(result.is_err(), "should reject invalid blob column names"); + let err_msg = result.err().unwrap().to_string(); + assert!( + err_msg.contains("invalid blob column"), + "error should mention invalid blob column: {err_msg}" + ); + }); + } + + #[test] + fn test_batch_to_records_autodetects_text_type() { + // Verify that batch_to_records works on both LargeUtf8 and LargeBinary + // text_payload without needing configuration. + let runtime = tokio::runtime::Runtime::new().unwrap(); + runtime.block_on(async { + // Build a batch with text_payload as LargeUtf8 (default) + let dir1 = TempDir::new().unwrap(); + let uri1 = dir1.path().to_string_lossy().to_string(); + let store_default = ContextStore::open(&uri1).await.unwrap(); + let record = text_record("auto-1", 0.0); + let batch_utf8 = store_default.records_to_batch(&[record.clone()]).unwrap(); + let results_utf8 = batch_to_records(&batch_utf8).unwrap(); + assert_eq!(results_utf8[0].text_payload, record.text_payload); + + // Build a batch with text_payload as LargeBinary (blob) + let dir2 = TempDir::new().unwrap(); + let uri2 = dir2.path().to_string_lossy().to_string(); + let options = ContextStoreOptions { + blob_columns: HashSet::from(["text_payload".to_string()]), + ..Default::default() + }; + let store_blob = ContextStore::open_with_options(&uri2, options).await.unwrap(); + let batch_binary = store_blob.records_to_batch(&[record.clone()]).unwrap(); + let results_binary = batch_to_records(&batch_binary).unwrap(); + assert_eq!(results_binary[0].text_payload, record.text_payload); + }); + } } diff --git a/python/src/lib.rs b/python/src/lib.rs index 94dcb07..460abe8 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; use chrono::{SecondsFormat, Utc}; @@ -110,19 +110,23 @@ fn compaction_config_from_dict<'py>( #[pymethods] impl Context { #[classmethod] - #[pyo3(signature = (uri, *, storage_options=None, compaction_config=None))] + #[pyo3(signature = (uri, *, storage_options=None, compaction_config=None, blob_columns=None))] fn create( _cls: &Bound<'_, PyType>, py: Python<'_>, uri: &str, storage_options: Option<&Bound<'_, PyDict>>, compaction_config: Option<&Bound<'_, PyDict>>, + blob_columns: Option>, ) -> PyResult { let runtime = Arc::new(Runtime::new().map_err(to_py_err)?); + let blob_set: HashSet = blob_columns.unwrap_or_default().into_iter().collect(); + let options = ContextStoreOptions { storage_options: storage_options_from_dict(storage_options)?, compaction: compaction_config_from_dict(compaction_config)?, + blob_columns: blob_set, }; let store_res = From 75c7d00fb970dad652909c2c5d4ff54b4ce4e0eb Mon Sep 17 00:00:00 2001 From: Beinan Wang Date: Sat, 6 Jun 2026 22:05:59 -0700 Subject: [PATCH 2/4] style: fix formatting Co-Authored-By: Beinan Wang --- crates/lance-context-core/src/store.rs | 32 +++++++++++++++----------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/crates/lance-context-core/src/store.rs b/crates/lance-context-core/src/store.rs index 4bc4199..1c9567e 100644 --- a/crates/lance-context-core/src/store.rs +++ b/crates/lance-context-core/src/store.rs @@ -136,12 +136,10 @@ impl ContextStore { // Validate blob_columns for col in &options.blob_columns { if !VALID_BLOB_COLUMNS.contains(&col.as_str()) { - return Err(LanceError::from(ArrowError::InvalidArgumentError( - format!( - "invalid blob column '{}': valid columns are {:?}", - col, VALID_BLOB_COLUMNS - ), - ))); + return Err(LanceError::from(ArrowError::InvalidArgumentError(format!( + "invalid blob column '{}': valid columns are {:?}", + col, VALID_BLOB_COLUMNS + )))); } } @@ -1181,7 +1179,9 @@ mod tests { blob_columns: HashSet::from(["binary_payload".to_string()]), ..Default::default() }; - let mut store = ContextStore::open_with_options(&uri, options).await.unwrap(); + let mut store = ContextStore::open_with_options(&uri, options) + .await + .unwrap(); let mut record = text_record("blob-bin-1", 0.0); record.binary_payload = Some(vec![0xDE, 0xAD, 0xBE, 0xEF]); @@ -1212,7 +1212,9 @@ mod tests { blob_columns: HashSet::from(["text_payload".to_string()]), ..Default::default() }; - let mut store = ContextStore::open_with_options(&uri, options).await.unwrap(); + let mut store = ContextStore::open_with_options(&uri, options) + .await + .unwrap(); let record = text_record("blob-txt-1", 0.0); store.add(&[record.clone()]).await.unwrap(); @@ -1230,8 +1232,7 @@ mod tests { let roundtripped = batch_to_records(&batch).unwrap(); assert_eq!(roundtripped.len(), 1); assert_eq!( - roundtripped[0].text_payload, - record.text_payload, + roundtripped[0].text_payload, record.text_payload, "text payload should survive blob roundtrip" ); }); @@ -1251,7 +1252,9 @@ mod tests { ]), ..Default::default() }; - let mut store = ContextStore::open_with_options(&uri, options).await.unwrap(); + let mut store = ContextStore::open_with_options(&uri, options) + .await + .unwrap(); let mut record = text_record("blob-both-1", 0.0); record.binary_payload = Some(b"hello binary".to_vec()); @@ -1294,7 +1297,8 @@ mod tests { #[test] fn test_blob_schema_metadata() { - let blob_columns = HashSet::from(["text_payload".to_string(), "binary_payload".to_string()]); + let blob_columns = + HashSet::from(["text_payload".to_string(), "binary_payload".to_string()]); let schema = ContextStore::schema(&blob_columns); let text_field = schema.field_with_name("text_payload").unwrap(); @@ -1359,7 +1363,9 @@ mod tests { blob_columns: HashSet::from(["text_payload".to_string()]), ..Default::default() }; - let store_blob = ContextStore::open_with_options(&uri2, options).await.unwrap(); + let store_blob = ContextStore::open_with_options(&uri2, options) + .await + .unwrap(); let batch_binary = store_blob.records_to_batch(&[record.clone()]).unwrap(); let results_binary = batch_to_records(&batch_binary).unwrap(); assert_eq!(results_binary[0].text_payload, record.text_payload); From aa999bbd79775225dfac6f009537f0bb3072fa29 Mon Sep 17 00:00:00 2001 From: Beinan Wang Date: Sat, 6 Jun 2026 22:15:04 -0700 Subject: [PATCH 3/4] fix: replace &[record.clone()] with std::slice::from_ref Co-Authored-By: Beinan Wang --- crates/lance-context-core/src/store.rs | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/crates/lance-context-core/src/store.rs b/crates/lance-context-core/src/store.rs index 1c9567e..b2a3f17 100644 --- a/crates/lance-context-core/src/store.rs +++ b/crates/lance-context-core/src/store.rs @@ -1185,7 +1185,7 @@ mod tests { let mut record = text_record("blob-bin-1", 0.0); record.binary_payload = Some(vec![0xDE, 0xAD, 0xBE, 0xEF]); - store.add(&[record.clone()]).await.unwrap(); + store.add(std::slice::from_ref(&record)).await.unwrap(); // Verify schema has blob metadata on binary_payload let schema = ContextStore::schema(&store.blob_columns); @@ -1217,10 +1217,12 @@ mod tests { .unwrap(); let record = text_record("blob-txt-1", 0.0); - store.add(&[record.clone()]).await.unwrap(); + store.add(std::slice::from_ref(&record)).await.unwrap(); // Roundtrip: records_to_batch -> batch_to_records - let batch = store.records_to_batch(&[record.clone()]).unwrap(); + let batch = store + .records_to_batch(std::slice::from_ref(&record)) + .unwrap(); let batch_schema = batch.schema(); let text_field = batch_schema.field_with_name("text_payload").unwrap(); assert_eq!( @@ -1258,7 +1260,7 @@ mod tests { let mut record = text_record("blob-both-1", 0.0); record.binary_payload = Some(b"hello binary".to_vec()); - store.add(&[record.clone()]).await.unwrap(); + store.add(std::slice::from_ref(&record)).await.unwrap(); // Both columns should have blob metadata let schema = ContextStore::schema(&store.blob_columns); @@ -1274,7 +1276,9 @@ mod tests { ); // Roundtrip via batch - let batch = store.records_to_batch(&[record.clone()]).unwrap(); + let batch = store + .records_to_batch(std::slice::from_ref(&record)) + .unwrap(); let roundtripped = batch_to_records(&batch).unwrap(); assert_eq!(roundtripped.len(), 1); assert_eq!(roundtripped[0].text_payload, record.text_payload); @@ -1352,7 +1356,9 @@ mod tests { let uri1 = dir1.path().to_string_lossy().to_string(); let store_default = ContextStore::open(&uri1).await.unwrap(); let record = text_record("auto-1", 0.0); - let batch_utf8 = store_default.records_to_batch(&[record.clone()]).unwrap(); + let batch_utf8 = store_default + .records_to_batch(std::slice::from_ref(&record)) + .unwrap(); let results_utf8 = batch_to_records(&batch_utf8).unwrap(); assert_eq!(results_utf8[0].text_payload, record.text_payload); @@ -1366,7 +1372,9 @@ mod tests { let store_blob = ContextStore::open_with_options(&uri2, options) .await .unwrap(); - let batch_binary = store_blob.records_to_batch(&[record.clone()]).unwrap(); + let batch_binary = store_blob + .records_to_batch(std::slice::from_ref(&record)) + .unwrap(); let results_binary = batch_to_records(&batch_binary).unwrap(); assert_eq!(results_binary[0].text_payload, record.text_payload); }); From 036714888c0d0fd8545b2dbae73d8110785600dc Mon Sep 17 00:00:00 2001 From: Beinan Wang Date: Sat, 6 Jun 2026 23:10:01 -0700 Subject: [PATCH 4/4] fix: use is_ok_and instead of map_or for clippy Co-Authored-By: Beinan Wang --- crates/lance-context-core/src/store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/lance-context-core/src/store.rs b/crates/lance-context-core/src/store.rs index b2a3f17..0deb9c2 100644 --- a/crates/lance-context-core/src/store.rs +++ b/crates/lance-context-core/src/store.rs @@ -809,7 +809,7 @@ fn batch_to_records(batch: &RecordBatch) -> LanceResult> { let text_is_binary = batch .schema() .field_with_name("text_payload") - .map_or(false, |f| f.data_type() == &DataType::LargeBinary); + .is_ok_and(|f| f.data_type() == &DataType::LargeBinary); let text_string_array = if !text_is_binary { Some(column_as::(batch, "text_payload")?)