Skip to content

Commit 2b28d2f

Browse files
committed
enhance: pickle the iterator if something failed
Signed-off-by: yangxuan <xuan.yang@zilliz.com>
1 parent d60de74 commit 2b28d2f

2 files changed

Lines changed: 18 additions & 3 deletions

File tree

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,8 @@ venv/
1111
.venv/
1212
.idea/
1313
results/
14-
logs/
14+
logs/
15+
16+
# AI rules
17+
CLAUDE.md
18+
AGENTS.md

vectordb_bench/backend/dataset.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
import logging
88
import pathlib
9-
import typing
109
from enum import Enum
10+
from typing import Any, NamedTuple
1111

1212
import pandas as pd
1313
import polars as pl
@@ -25,7 +25,7 @@
2525
log = logging.getLogger(__name__)
2626

2727

28-
class SizeLabel(typing.NamedTuple):
28+
class SizeLabel(NamedTuple):
2929
size: int
3030
label: str
3131
file_count: int
@@ -405,6 +405,17 @@ def __init__(self, dataset: DatasetManager):
405405
self._cur = None
406406
self._sub_idx = [0 for i in range(len(self._ds.train_files))] # iter num for each file
407407

408+
def __getstate__(self):
409+
"""Custom pickle support to handle unpicklable generator."""
410+
state = self.__dict__.copy()
411+
# Remove the unpicklable generator from ParquetFile.iter_batches()
412+
state["_cur"] = None
413+
return state
414+
415+
def __setstate__(self, state: Any):
416+
"""Restore state after unpickling."""
417+
self.__dict__.update(state)
418+
408419
def __iter__(self):
409420
return self
410421

0 commit comments

Comments
 (0)