Djjanks commented on code in PR #14:
URL: https://github.com/apache/arrow-js/pull/14#discussion_r2100731400


##########
src/ipc/reader.ts:
##########
@@ -369,9 +389,51 @@ abstract class RecordBatchReaderImpl<T extends TypeMap = 
any> implements RecordB
             new Vector(data)) :
             new Vector(data)).memoize() as Vector;
     }
-    protected _loadVectors(header: metadata.RecordBatch, body: any, types: 
(Field | DataType)[]) {
+    protected _loadVectors(header: metadata.RecordBatch, body: Uint8Array, 
types: (Field | DataType)[]) {
         return new VectorLoader(body, header.nodes, header.buffers, 
this.dictionaries, this.schema.metadataVersion).visitMany(types);
     }
+
+    private _decompressBuffers(header: metadata.RecordBatch, body: Uint8Array, 
codec: Codec): { decommpressedBody: Uint8Array; buffers: 
metadata.BufferRegion[] } {
+        const decompressedBuffers: Uint8Array[] = [];
+        const newBufferRegions: metadata.BufferRegion[] = [];
+
+        let currentOffset = 0;
+        for (const { offset, length } of header.buffers) {
+            if (length === 0) {
+                decompressedBuffers.push(new Uint8Array(0));
+                newBufferRegions.push(new metadata.BufferRegion(currentOffset, 
0));
+                continue;
+            }
+            const byteBuf = new flatbuffers.ByteBuffer(body.subarray(offset, 
offset + length));
+            const uncompressedLenth = bigIntToNumber(byteBuf.readInt64(0));
+
+
+            const bytes = byteBuf.bytes().subarray(LENGTH_OF_PREFIX_DATA);
+
+            const decompressed = (uncompressedLenth === 
LENGTH_NO_COMPRESSED_DATA)
+                ? bytes
+                : codec.decode!(bytes);
+
+            decompressedBuffers.push(decompressed);
+
+            const padding = (DEFAULT_ALIGNMENT - (currentOffset % 
DEFAULT_ALIGNMENT)) % DEFAULT_ALIGNMENT;
+            currentOffset += padding;
+            newBufferRegions.push(new metadata.BufferRegion(currentOffset, 
decompressed.length));
+            currentOffset += decompressed.length;
+        }
+
+        const totalSize = currentOffset;
+        const combined = new Uint8Array(totalSize);
+
+        for (const [i, decompressedBuffer] of decompressedBuffers.entries()) {
+            combined.set(decompressedBuffer, newBufferRegions[i].offset);

Review Comment:
   I ended up solving this issue without implementing a `VirtualUint8Array`. 
Instead, I modified the body parameter signature in `_loadVectors` and the 
`VectorLoader` constructor to accept `Uint8Array | Uint8Array[]`.
   
   It worked out nicely because the class already has a buffersIndex parameter 
that points to the correct buffer, and in my case, the decompression order 
matches the BufferRegion[] sequence. This approach required minimal changes, 
and thanks to the type signature, TypeScript will prevent errors in future 
modifications to VectorLoader.
   
   Your suggested approach (with `CompressedVectorLoader`) is also 
interesting—it would help isolate the logic for compressed buffers. If you 
think it’s the better solution, I can refactor the code to use it instead.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to