wgtmac commented on code in PR #1278: URL: https://github.com/apache/parquet-mr/pull/1278#discussion_r1500221059
########## parquet-common/src/main/java/org/apache/parquet/bytes/ReusingByteBufferAllocator.java: ########## @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.bytes; + +import java.nio.ByteBuffer; + +/** + * A special {@link ByteBufferAllocator} implementation that keeps one {@link ByteBuffer} object and reuse it at the Review Comment: ```suggestion * A special {@link ByteBufferAllocator} implementation that keeps one {@link ByteBuffer} object and reuses it at the ``` ########## parquet-common/src/main/java/org/apache/parquet/bytes/BytesInput.java: ########## @@ -207,10 +211,18 @@ public static BytesInput copy(BytesInput bytesInput) throws IOException { */ public abstract void writeAllTo(OutputStream out) throws IOException; + /** + * For internal use only. It is expected that the buffer is large enough to fit the content of this {@link BytesInput} Review Comment: Should we add a comment for what to expect if the content does not fit into the ByteBuffer? ########## parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java: ########## @@ -1450,10 +1449,11 @@ private static void verifyFooterIntegrity( AesGcmEncryptor footerSigner = fileDecryptor.createSignedFooterEncryptor(); - byte[] footerAndSignature = ((ByteBufferInputStream) from).slice(0).array(); int footerSignatureLength = AesCipher.NONCE_LENGTH + AesCipher.GCM_TAG_LENGTH; byte[] serializedFooter = new byte[combinedFooterLength - footerSignatureLength]; - System.arraycopy(footerAndSignature, 0, serializedFooter, 0, serializedFooter.length); + // Resetting to the beginning of the footer + from.reset(); Review Comment: Should we check `from.markSupported()` before calling reset() and mark()? ########## parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java: ########## @@ -422,18 +417,15 @@ void addColumn(ColumnDescriptor path, ColumnChunkPageReader reader) { } } - void setBuffersToRelease(ByteBufferAllocator allocator, List<ByteBuffer> toRelease) { - this.allocator = allocator; - this.toRelease = toRelease; + void setReleaser(ByteBufferReleaser releaser) { + this.releaser = releaser; Review Comment: Should we check if the passed releaser is null? ########## parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java: ########## @@ -381,6 +386,33 @@ public ParquetFileWriter( null); } + public ParquetFileWriter( + OutputFile file, + MessageType schema, + Mode mode, + long rowGroupSize, + int maxPaddingSize, + int columnIndexTruncateLength, + int statisticsTruncateLength, + boolean pageWriteChecksumEnabled, + FileEncryptionProperties encryptionProperties, + ByteBufferAllocator allocator) + throws IOException { + this( + file, + schema, + mode, + rowGroupSize, + maxPaddingSize, + columnIndexTruncateLength, + statisticsTruncateLength, + pageWriteChecksumEnabled, + encryptionProperties, + null, + allocator); + } + + @Deprecated Review Comment: The argument list grows longer now. Should we use an options class instead to avoid frequent deprecation? ########## parquet-common/src/main/java/org/apache/parquet/bytes/ConcatenatingByteBufferCollector.java: ########## @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.bytes; + +import static java.lang.String.format; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; +import java.util.ArrayList; +import java.util.List; + +/** + * Alternative to {@link ConcatenatingByteArrayCollector} but using {@link java.nio.ByteBuffer}s allocated by its + * {@link ByteBufferAllocator}. + */ +public class ConcatenatingByteBufferCollector extends BytesInput implements AutoCloseable { + + private final ByteBufferAllocator allocator; + private final List<ByteBuffer> slabs = new ArrayList<>(); + private long size = 0; + + /** + * Constructs a new {@link ConcatenatingByteBufferCollector} instance with the specified allocator. + * + * @param allocator to be used for allocating the required {@link ByteBuffer} instances + */ + public ConcatenatingByteBufferCollector(ByteBufferAllocator allocator) { + this.allocator = allocator; + } + + /** + * Collects the content of the specified input. It allocates a new {@link ByteBuffer} instance that can contain all + * the content. + * + * @param bytesInput the input which content is to be collected + */ + public void collect(BytesInput bytesInput) { + int inputSize = Math.toIntExact(bytesInput.size()); + ByteBuffer slab = allocator.allocate(inputSize); + bytesInput.writeInto(slab); + slab.flip(); + slabs.add(slab); + size += inputSize; + } + + @Override + public void close() { + for (ByteBuffer slab : slabs) { + allocator.release(slab); + } + slabs.clear(); + } + + @Override + public void writeAllTo(OutputStream out) throws IOException { + WritableByteChannel channel = Channels.newChannel(out); + for (ByteBuffer buffer : slabs) { + channel.write(buffer.duplicate()); + } + } + + @Override + public void writeInto(ByteBuffer buffer) { + for (ByteBuffer slab : slabs) { + buffer.put(slab.duplicate()); + } + } + + @Override + ByteBuffer getInternalByteBuffer() { + return slabs.size() == 1 ? slabs.get(0).duplicate() : null; + } + + @Override + public long size() { + return size; + } + + /** + * @param prefix a prefix to be used for every new line in the string + * @return a text representation of the memory usage of this structure + */ + public String memUsageString(String prefix) { + return format("%s %s %d slabs, %,d bytes", prefix, getClass().getSimpleName(), slabs.size(), size); Review Comment: ```suggestion return format("%s %s %d slabs, %d bytes", prefix, getClass().getSimpleName(), slabs.size(), size); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
