trxcllnt commented on code in PR #14: URL: https://github.com/apache/arrow-js/pull/14#discussion_r2103290165
########## src/ipc/reader.ts: ########## @@ -369,9 +389,51 @@ abstract class RecordBatchReaderImpl<T extends TypeMap = any> implements RecordB new Vector(data)) : new Vector(data)).memoize() as Vector; } - protected _loadVectors(header: metadata.RecordBatch, body: any, types: (Field | DataType)[]) { + protected _loadVectors(header: metadata.RecordBatch, body: Uint8Array, types: (Field | DataType)[]) { return new VectorLoader(body, header.nodes, header.buffers, this.dictionaries, this.schema.metadataVersion).visitMany(types); } + + private _decompressBuffers(header: metadata.RecordBatch, body: Uint8Array, codec: Codec): { decommpressedBody: Uint8Array; buffers: metadata.BufferRegion[] } { + const decompressedBuffers: Uint8Array[] = []; + const newBufferRegions: metadata.BufferRegion[] = []; + + let currentOffset = 0; + for (const { offset, length } of header.buffers) { + if (length === 0) { + decompressedBuffers.push(new Uint8Array(0)); + newBufferRegions.push(new metadata.BufferRegion(currentOffset, 0)); + continue; + } + const byteBuf = new flatbuffers.ByteBuffer(body.subarray(offset, offset + length)); + const uncompressedLenth = bigIntToNumber(byteBuf.readInt64(0)); + + + const bytes = byteBuf.bytes().subarray(LENGTH_OF_PREFIX_DATA); + + const decompressed = (uncompressedLenth === LENGTH_NO_COMPRESSED_DATA) + ? bytes + : codec.decode!(bytes); + + decompressedBuffers.push(decompressed); + + const padding = (DEFAULT_ALIGNMENT - (currentOffset % DEFAULT_ALIGNMENT)) % DEFAULT_ALIGNMENT; + currentOffset += padding; + newBufferRegions.push(new metadata.BufferRegion(currentOffset, decompressed.length)); + currentOffset += decompressed.length; + } + + const totalSize = currentOffset; + const combined = new Uint8Array(totalSize); + + for (const [i, decompressedBuffer] of decompressedBuffers.entries()) { + combined.set(decompressedBuffer, newBufferRegions[i].offset); Review Comment: I can take a look at adding it to the writer. I wouldn't want to merge this PR without at least a limited set of tests, and verifying we can read what we write is the easiest way to do that. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org