-
Notifications
You must be signed in to change notification settings - Fork 729
fix(python): added cleanup_frag_reuse_index #7221
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
d90747b
d5143cc
cae6ac0
8e65b01
089d237
a26d17c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,27 +38,25 @@ pub async fn cleanup_frag_reuse_index(dataset: &mut Dataset) -> lance_core::Resu | |
| return Ok(()); | ||
| }; | ||
|
|
||
| let frag_reuse_details = load_frag_reuse_index_details(dataset, frag_reuse_index_meta) | ||
| .await | ||
| .unwrap(); | ||
| // Surface corrupt/stale metadata as a normal error instead of panicking on unwrap. | ||
| let frag_reuse_details = load_frag_reuse_index_details(dataset, frag_reuse_index_meta).await?; | ||
|
|
||
| let mut retained_versions = Vec::new(); | ||
| let mut fragment_bitmaps = RoaringBitmap::new(); | ||
| for version in frag_reuse_details.versions.iter() { | ||
| let check_results = indices | ||
| .iter() | ||
| .map(|idx| is_index_remap_caught_up(version, idx)) | ||
| .collect::<Vec<_>>(); | ||
|
|
||
| if check_results | ||
| .iter() | ||
| .any(|r| matches!(r, Err(Error::InvalidInput { .. }))) | ||
| { | ||
| // If the check fails, the reuse version is likely corrupted, do not retain it. | ||
| continue; | ||
| 'versions: for version in frag_reuse_details.versions.iter() { | ||
| let mut all_caught_up = true; | ||
| for idx in indices.iter() { | ||
| match is_index_remap_caught_up(version, idx) { | ||
| Ok(true) => {} | ||
| Ok(false) => all_caught_up = false, | ||
| // An InvalidInput error means the reuse version is likely corrupted; drop it. | ||
| Err(Error::InvalidInput { .. }) => continue 'versions, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The cleanup path treats a partially indexed rewrite group as corrupt and drops the reuse generation, while the load path treats the same straddling state as recoverable. Calling the new Python cleanup API on an older dataset can remove the generation that keeps affected indexes safe. |
||
| // Any other error is unexpected; surface it instead of panicking. | ||
| Err(e) => return Err(e), | ||
| } | ||
| } | ||
|
|
||
| if !check_results.into_iter().all(|r| r.unwrap()) { | ||
| if !all_caught_up { | ||
| fragment_bitmaps.extend(version.new_frag_bitmap()); | ||
| retained_versions.push(version.clone()); | ||
| } | ||
|
|
@@ -212,6 +210,22 @@ mod tests { | |
| is_index_remap_caught_up(&frag_reuse_details.versions[0], scalar_index).unwrap() | ||
| ); | ||
|
|
||
| // Cleanup must not prune a reuse version while an index has not caught up to it. | ||
| cleanup_frag_reuse_index(&mut dataset).await.unwrap(); | ||
| let frag_reuse_index_meta = dataset | ||
| .load_index_by_name(FRAG_REUSE_INDEX_NAME) | ||
| .await | ||
| .unwrap() | ||
| .expect("Fragment reuse index must be available"); | ||
| let frag_reuse_details = load_frag_reuse_index_details(&dataset, &frag_reuse_index_meta) | ||
| .await | ||
| .unwrap(); | ||
| assert_eq!( | ||
| frag_reuse_details.versions.len(), | ||
| 1, | ||
| "reuse version must be retained while an index is not caught up" | ||
| ); | ||
|
|
||
| // Remap and check index is caught up | ||
| remapping::remap_column_index(&mut dataset, &["i"], index_name.clone()) | ||
| .await | ||
|
|
@@ -244,6 +258,71 @@ mod tests { | |
| )); | ||
| } | ||
|
|
||
| /// Corrupt or stale fragment-reuse metadata must surface as a normal error, | ||
| /// not a panic. Regression for the `load_frag_reuse_index_details().unwrap()` | ||
| /// that used to crash the (Python) caller on unloadable metadata. | ||
| #[tokio::test] | ||
| async fn test_cleanup_frag_reuse_index_corrupt_metadata_errors() { | ||
| let mut dataset = lance_datagen::gen_batch() | ||
| .col("i", lance_datagen::array::step::<Int32Type>()) | ||
| .into_ram_dataset(FragmentCount::from(6), FragmentRowCount::from(1000)) | ||
| .await | ||
| .unwrap(); | ||
|
|
||
| dataset | ||
| .create_index( | ||
| &["i"], | ||
| IndexType::Scalar, | ||
| Some("scalar".into()), | ||
| &ScalarIndexParams::default(), | ||
| false, | ||
| ) | ||
| .await | ||
| .unwrap(); | ||
|
|
||
| compact_files( | ||
| &mut dataset, | ||
| CompactionOptions { | ||
| target_rows_per_fragment: 2_000, | ||
| defer_index_remap: true, | ||
| ..Default::default() | ||
| }, | ||
| None, | ||
| ) | ||
| .await | ||
| .unwrap(); | ||
|
|
||
| let frag_reuse_index_meta = dataset | ||
| .load_index_by_name(FRAG_REUSE_INDEX_NAME) | ||
| .await | ||
| .unwrap() | ||
| .expect("Fragment reuse index must be available"); | ||
|
|
||
| // Replace the reuse index with one whose details cannot be loaded. | ||
| let corrupt_meta = IndexMetadata { | ||
| uuid: uuid::Uuid::new_v4(), | ||
| index_details: None, | ||
| ..frag_reuse_index_meta.clone() | ||
| }; | ||
| let transaction = Transaction::new( | ||
| dataset.manifest.version, | ||
| Operation::CreateIndex { | ||
| new_indices: vec![corrupt_meta], | ||
| removed_indices: vec![frag_reuse_index_meta], | ||
| }, | ||
| None, | ||
| ); | ||
| dataset | ||
| .apply_commit(transaction, &Default::default(), &Default::default()) | ||
| .await | ||
| .unwrap(); | ||
|
|
||
| assert!( | ||
| cleanup_frag_reuse_index(&mut dataset).await.is_err(), | ||
| "corrupt frag-reuse metadata must surface as an error, not a panic" | ||
| ); | ||
| } | ||
|
|
||
| /// With more than one index on the table, remapping every index must catch | ||
| /// all of them up so the reuse index can be trimmed. | ||
| /// | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The cleanup path still reads external fragment-reuse details with unchecked offset/size arithmetic. Malformed metadata can still panic or produce an invalid object-store range instead of returning a Python exception.