This is an automated email from the ASF dual-hosted git repository.
shangxinli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 44dc3a4ae Performance optimization to ByteBitPackingValuesReader (#962)
44dc3a4ae is described below
commit 44dc3a4aef8e7746408381a7b11ff7ab8e888c3f
Author: Timothy N. Miller <[email protected]>
AuthorDate: Sun Oct 9 14:59:00 2022 -0400
Performance optimization to ByteBitPackingValuesReader (#962)
Remove object creation out of critical path
Move less-used code into separate function to encourage JIT to inline
more frequently used code.
---
.../bitpacking/ByteBitPackingValuesReader.java | 38 +++++++++++++---------
1 file changed, 23 insertions(+), 15 deletions(-)
diff --git
a/parquet-column/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPackingValuesReader.java
b/parquet-column/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPackingValuesReader.java
index 1fa62d4b5..0294b6c13 100644
---
a/parquet-column/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPackingValuesReader.java
+++
b/parquet-column/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPackingValuesReader.java
@@ -38,31 +38,39 @@ public class ByteBitPackingValuesReader extends
ValuesReader {
private final int[] decoded = new int[VALUES_AT_A_TIME];
private int decodedPosition = VALUES_AT_A_TIME - 1;
private ByteBufferInputStream in;
+ private final byte[] tempEncode;
public ByteBitPackingValuesReader(int bound, Packer packer) {
this.bitWidth = BytesUtils.getWidthFromMaxInt(bound);
this.packer = packer.newBytePacker(bitWidth);
+ // Create and retain byte array to avoid object creation in the critical
path
+ this.tempEncode = new byte[this.bitWidth];
+ }
+
+ private void readMore() {
+ try {
+ int avail = in.available();
+ if (avail < bitWidth) {
+ in.read(tempEncode, 0, avail);
+ // Clear the portion of the array we didn't read into
+ for (int i=avail; i<bitWidth; i++) tempEncode[i] = 0;
+ } else {
+ in.read(tempEncode, 0, bitWidth);
+ }
+
+ // The "deprecated" unpacker is faster than using the one that takes
ByteBuffer
+ packer.unpack8Values(tempEncode, 0, decoded, 0);
+ } catch (IOException e) {
+ throw new ParquetDecodingException("Failed to read packed values", e);
+ }
+ decodedPosition = 0;
}
@Override
public int readInteger() {
++ decodedPosition;
if (decodedPosition == decoded.length) {
- try {
- if (in.available() < bitWidth) {
- // unpack8Values needs at least bitWidth bytes to read from,
- // We have to fill in 0 byte at the end of encoded bytes.
- byte[] tempEncode = new byte[bitWidth];
- in.read(tempEncode, 0, in.available());
- packer.unpack8Values(tempEncode, 0, decoded, 0);
- } else {
- ByteBuffer encoded = in.slice(bitWidth);
- packer.unpack8Values(encoded, encoded.position(), decoded, 0);
- }
- } catch (IOException e) {
- throw new ParquetDecodingException("Failed to read packed values", e);
- }
- decodedPosition = 0;
+ readMore();
}
return decoded[decodedPosition];
}