diff --git a/rust/perspective-js/test/js/constructors/arrow.spec.ts b/rust/perspective-js/test/js/constructors/arrow.spec.ts index 66eef26391..fe4f0b2165 100644 --- a/rust/perspective-js/test/js/constructors/arrow.spec.ts +++ b/rust/perspective-js/test/js/constructors/arrow.spec.ts @@ -75,6 +75,48 @@ test.describe("Arrow", function () { }); test.describe("regressions", () => { + // https://github.com/perspective-dev/perspective/issues/3169 + test("null values are preserved across multi-batch Arrow IPC streams", async function () { + function row( + identifier: string, + value: number | null, + date: Date | null, + ) { + return arrow.tableFromArrays({ + Identifier: arrow.vectorFromArray( + [identifier], + new arrow.Utf8(), + ), + Value: arrow.vectorFromArray([value], new arrow.Float64()), + Date: arrow.vectorFromArray([date], new arrow.DateDay()), + }); + } + + const t1 = row("A", null, null); + const t2 = row("B", 5, null); + const t3 = row("C", null, new Date(Date.UTC(2025, 5, 15))); + + const multiBatchTable = new arrow.Table([ + ...t1.batches, + ...t2.batches, + ...t3.batches, + ]); + expect(multiBatchTable.batches.length).toEqual(3); + + const ipc = arrow.tableToIPC(multiBatchTable, "stream"); + const table = await perspective.table(ipc.buffer as ArrayBuffer); + const view = await table.view(); + const json = await view.to_json(); + await view.delete(); + await table.delete(); + + expect(json).toStrictEqual([ + { Identifier: "A", Value: null, Date: null }, + { Identifier: "B", Value: 5, Date: null }, + { Identifier: "C", Value: null, Date: 1749945600000 }, + ]); + }); + test("null equality works correctly in updates", async function () { async function write_to_json( buffer: ArrayBuffer, diff --git a/rust/perspective-server/cpp/perspective/src/cpp/arrow_loader.cpp b/rust/perspective-server/cpp/perspective/src/cpp/arrow_loader.cpp index 3b3e2257e5..a6226d923b 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/arrow_loader.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/arrow_loader.cpp @@ -907,19 +907,30 @@ ArrowLoader::fill_column( copy_array(col, array, offset, len); } - // Fill validity bitmap + // Fill validity bitmap. Operate only on the current chunk's + // range [offset, offset+len); a whole-column fill here would + // clobber validity bits set by other chunks in a multi-batch + // ChunkedArray. std::int64_t null_count = array->null_count(); if (null_count == 0) { - col->valid_raw_fill(); + for (uint32_t i = 0; i < len; ++i) { + col->set_valid(offset + i, true); + } } else { const uint8_t* null_bitmap = array->null_bitmap_data(); // If the arrow column is of null type, the null - // bitmap is a nullptr - so just mark everything as - // invalid and move on. + // bitmap is a nullptr - so just mark this chunk's rows + // as invalid and move on. if (null_bitmap == nullptr) { - col->invalid_raw_fill(); + for (uint32_t i = 0; i < len; ++i) { + if (is_update) { + col->unset(offset + i); + } else { + col->clear(offset + i); + } + } } else { // Read the null bitmap and set the correct rows // as valid