This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-js.git
The following commit(s) were added to refs/heads/main by this push:
new c23916c fix: Preserve numRows for zero-column RecordBatch in IPC
(#402)
c23916c is described below
commit c23916c22eb961d671b91bce5a9ef2842fe7bdbe
Author: Rusty Conover <[email protected]>
AuthorDate: Thu Mar 5 20:28:05 2026 -0500
fix: Preserve numRows for zero-column RecordBatch in IPC (#402)
## Summary
When a zero-column RecordBatch is deserialized from IPC,
`ensureSameLengthData`
in the `RecordBatch` constructor recomputes length from children via
`chunks.reduce((max, col) => Math.max(max, col.length), 0)`. With zero
children,
this always returns 0 — discarding the original length from the IPC
message header.
Other Arrow implementations (PyArrow, Arrow Go, arrow-rs) correctly
preserve
numRows for zero-column batches.
## Fix
Pass `this.data.length` to `ensureSameLengthData` as the explicit
`maxLength`
parameter, which the function already accepts as an optional third
argument.
For batches with columns, `this.data.length` already matches the max
column
length, so there is no behavior change.
## Tests
- Read a PyArrow-generated zero-column IPC stream (100 rows) and verify
numRows
- JS round-trip: write + read zero-column batch, verify numRows
preserved
- Direct constructor: verify zero-column RecordBatch preserves length
Closes #401
Co-authored-by: Claude Opus 4.6 <[email protected]>
---
src/recordbatch.ts | 2 +-
test/unit/ipc/reader/zero-column-batch-tests.ts | 75 +++++++++++++++++++++++++
2 files changed, 76 insertions(+), 1 deletion(-)
diff --git a/src/recordbatch.ts b/src/recordbatch.ts
index 7cdc8b3..7c876dc 100644
--- a/src/recordbatch.ts
+++ b/src/recordbatch.ts
@@ -81,7 +81,7 @@ export class RecordBatch<T extends TypeMap = any> {
if (!(this.data instanceof Data)) {
throw new TypeError('RecordBatch constructor expects a
[Schema, Data] pair.');
}
- [this.schema, this.data] =
ensureSameLengthData<T>(this.schema, this.data.children as Data<T[keyof T]>[]);
+ [this.schema, this.data] =
ensureSameLengthData<T>(this.schema, this.data.children as Data<T[keyof T]>[],
this.data.length);
break;
}
case 1: {
diff --git a/test/unit/ipc/reader/zero-column-batch-tests.ts
b/test/unit/ipc/reader/zero-column-batch-tests.ts
new file mode 100644
index 0000000..7e7e344
--- /dev/null
+++ b/test/unit/ipc/reader/zero-column-batch-tests.ts
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../../../jest-extensions.js';
+import {
+ makeData,
+ RecordBatch,
+ RecordBatchStreamWriter,
+ Schema,
+ Struct,
+ tableFromIPC,
+} from 'apache-arrow';
+
+/** Helper to create a zero-column IPC stream buffer with the given number of
rows. */
+function createZeroColumnIPCBuffer(numRows: number): Uint8Array {
+ const schema = new Schema([]);
+ const data = makeData({
+ type: new Struct([]),
+ length: numRows,
+ nullCount: 0,
+ children: [],
+ });
+ const batch = new RecordBatch(schema, data);
+ const writer = new RecordBatchStreamWriter();
+ writer.write(batch);
+ writer.finish();
+ return writer.toUint8Array(true);
+}
+
+describe('Zero-column RecordBatch numRows preservation', () => {
+
+ describe('IPC round-trip', () => {
+
+ test('should read zero-column stream and preserve numRows', () => {
+ const buffer = createZeroColumnIPCBuffer(100);
+ const table = tableFromIPC(buffer);
+
+ expect(table.numRows).toBe(100);
+ expect(table.numCols).toBe(0);
+ expect(table.batches).toHaveLength(1);
+ expect(table.batches[0].numRows).toBe(100);
+ });
+ });
+
+ describe('Direct constructor', () => {
+
+ test('RecordBatch constructor preserves length for zero-column data',
() => {
+ const schema = new Schema([]);
+ const data = makeData({
+ type: new Struct([]),
+ length: 100,
+ nullCount: 0,
+ children: [],
+ });
+ const batch = new RecordBatch(schema, data);
+
+ expect(batch.numRows).toBe(100);
+ expect(batch.numCols).toBe(0);
+ });
+ });
+});