From c880256bfedccb037ea9780824eca79dd887640b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Pavela?= Date: Mon, 18 May 2026 09:53:57 +0200 Subject: [PATCH] Cap batchSize by row count --- parquet/pqarrow/file_reader.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/parquet/pqarrow/file_reader.go b/parquet/pqarrow/file_reader.go index 34992f8a..500dbc2c 100644 --- a/parquet/pqarrow/file_reader.go +++ b/parquet/pqarrow/file_reader.go @@ -28,6 +28,7 @@ import ( "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/arrio" + "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/utils" "github.com/apache/arrow-go/v18/parquet" @@ -519,6 +520,8 @@ func (fr *FileReader) GetRecordReader(ctx context.Context, colIndices, rowGroups batchSize := fr.Props.BatchSize if fr.Props.BatchSize <= 0 { batchSize = nrows + } else { + batchSize = min(fr.Props.BatchSize, int64(bitutil.NextPowerOf2(int(nrows)))) } rr := &recordReader{ numRows: nrows,