This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-js.git
The following commit(s) were added to refs/heads/main by this push:
new 5ebbe36 fix: Write zero-filled values buffer for fully-null Bool
columns in IPC (#392)
5ebbe36 is described below
commit 5ebbe36a75c94304e2451f96cf7d6899c9c06af5
Author: Alon Spivack <[email protected]>
AuthorDate: Tue Mar 3 02:43:24 2026 +0200
fix: Write zero-filled values buffer for fully-null Bool columns in IPC
(#392)
## Summary
When a Bool column is fully null (`nullCount >= length`),
`assembleBoolVector` in
`VectorAssembler` previously returned early without writing a values
buffer,
producing an IPC stream with 0 bytes for buffer `#1` .
This violates the Arrow IPC specification, which requires a data buffer
of
`ceil(length / 8)` bytes for Bool arrays regardless of null count. Other
implementations (PyArrow, arrow-rs) reject these streams with:
> Buffer `#1` too small in array of type Bool. Expected at least 1
byte(s), got 0
## Fix
Write a zero-filled `Uint8Array` of the correct byte length
`((data.length + 7) >> 3)`
when all values are null, instead of returning early with no buffer.
## Tests
Added 4 round-trip tests for fully-null Bool columns through
`tableToIPC` / `tableFromIPC`:
- Single-row fully-null Bool
- 2-row fully-null Bool (file format)
- 10-row fully-null Bool (crosses byte boundary)
- Mixed table with normal Int32 + fully-null Bool columns
Closes #68
Co-authored-by: Alon Spivack <[email protected]>
---
src/visitor/vectorassembler.ts | 4 +--
test/unit/ipc/serialization-tests.ts | 51 ++++++++++++++++++++++++++++++++++--
2 files changed, 51 insertions(+), 4 deletions(-)
diff --git a/src/visitor/vectorassembler.ts b/src/visitor/vectorassembler.ts
index 2ac6f8f..ae4b712 100644
--- a/src/visitor/vectorassembler.ts
+++ b/src/visitor/vectorassembler.ts
@@ -186,8 +186,8 @@ function assembleBoolVector<T extends Bool>(this:
VectorAssembler, data: Data<T>
// Bool vector is a special case of FlatVector, as its data buffer needs
to stay packed
let values: Uint8Array;
if (data.nullCount >= data.length) {
- // If all values are null, just insert a placeholder empty data buffer
(fastest path)
- return addBuffer.call(this, new Uint8Array(0));
+ // If all values are null, write a zero-filled data buffer of the
correct byte length
+ return addBuffer.call(this, new Uint8Array((data.length + 7) >> 3));
} else if ((values = data.values) instanceof Uint8Array) {
// If values is already a Uint8Array, slice the bitmap (fast path)
return addBuffer.call(this, truncateBitmap(data.offset, data.length,
values));
diff --git a/test/unit/ipc/serialization-tests.ts
b/test/unit/ipc/serialization-tests.ts
index f31a242..300e02c 100644
--- a/test/unit/ipc/serialization-tests.ts
+++ b/test/unit/ipc/serialization-tests.ts
@@ -19,8 +19,8 @@ import '../../jest-extensions.js';
import * as generate from '../../generate-test-data.js';
import {
- Table, Schema, Field, DataType, TypeMap, Dictionary, Int32, Float32,
Uint8, Utf8, Null,
- makeVector,
+ Table, Schema, Field, DataType, TypeMap, Dictionary, Int32, Float32,
Uint8, Utf8, Null, Bool,
+ makeVector, vectorFromArray,
tableFromIPC, tableToIPC, RecordBatchReader, RecordBatchStreamWriter
} from 'apache-arrow';
@@ -121,6 +121,53 @@ describe('tableToIPC()', () => {
expect(result).toEqualTable(source);
});
+ test(`single-row fully-null Bool column round-trips`, () => {
+ const source = new Table({
+ a: vectorFromArray([null], new Bool()),
+ });
+ const buffer = tableToIPC(source);
+ const result = tableFromIPC(buffer);
+ expect(result.numRows).toBe(1);
+ expect(result.getChild('a')!.get(0)).toBeNull();
+ });
+
+ test(`fully-null Bool column round-trips through IPC file format`, () => {
+ const source = new Table({
+ a: vectorFromArray([null, null], new Bool()),
+ });
+ const buffer = tableToIPC(source, 'file');
+ const result = tableFromIPC(buffer);
+ expect(result.numRows).toBe(2);
+ expect(result.getChild('a')!.get(0)).toBeNull();
+ expect(result.getChild('a')!.get(1)).toBeNull();
+ });
+
+ test(`fully-null Bool column with length > 8 round-trips through
serialization`, () => {
+ const source = new Table({
+ a: vectorFromArray(new Array(10).fill(null), new Bool()),
+ });
+ const buffer = tableToIPC(source);
+ const result = tableFromIPC(buffer);
+ expect(result.numRows).toBe(10);
+ for (let i = 0; i < 10; i++) {
+ expect(result.getChild('a')!.get(i)).toBeNull();
+ }
+ });
+
+ test(`mixed table with normal column and fully-null Bool column
round-trips through serialization`, () => {
+ const source = new Table({
+ a: makeVector(new Int32Array([1, 2, 3])),
+ b: vectorFromArray([null, null, null], new Bool()),
+ });
+ const buffer = tableToIPC(source);
+ const result = tableFromIPC(buffer);
+ expect(result.numRows).toBe(3);
+ expect(result.getChild('a')!.toArray()).toEqual(new Int32Array([1, 2,
3]));
+ for (let i = 0; i < 3; i++) {
+ expect(result.getChild('b')!.get(i)).toBeNull();
+ }
+ });
+
const chunkLengths = [] as number[];
const table = <T extends TypeMap = any>(schema: Schema<T>) =>
createTable(schema, chunkLengths);
for (let i = -1; ++i < 3;) {