This is an automated email from the ASF dual-hosted git repository.
zivanfi pushed a commit to branch parquet-1.8.x
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/parquet-1.8.x by this push:
new 3db8547 Revert "PARQUET-852: Slowly ramp up sizes of byte[] in
ByteBasedBitPackingEncoder"
3db8547 is described below
commit 3db8547dc21a9d68e166f55dc877af4b2455bd83
Author: Gabor Szadovszky <[email protected]>
AuthorDate: Mon Apr 23 13:31:56 2018 +0200
Revert "PARQUET-852: Slowly ramp up sizes of byte[] in
ByteBasedBitPackingEncoder"
Reverting this change as it is a minor improvement and shall not be part of
a maintencance release.
This reverts commit d59b32a9120ad40e2a9f6651b680e84dae1747a6.
---
.../bitpacking/ByteBasedBitPackingEncoder.java | 30 ++++++----------------
.../bitpacking/TestByteBasedBitPackingEncoder.java | 18 +++++--------
2 files changed, 14 insertions(+), 34 deletions(-)
diff --git
a/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
b/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
index 0bc8b30..cc23e8f 100644
---
a/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
+++
b/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -39,14 +39,11 @@ public class ByteBasedBitPackingEncoder {
private static final Logger LOG =
LoggerFactory.getLogger(ByteBasedBitPackingEncoder.class);
private static final int VALUES_WRITTEN_AT_A_TIME = 8;
- private static final int MAX_SLAB_SIZE_MULT = 64 * 1024;
- private static final int INITIAL_SLAB_SIZE_MULT = 1024;
private final int bitWidth;
private final BytePacker packer;
private final int[] input = new int[VALUES_WRITTEN_AT_A_TIME];
- private int slabSize;
- private long totalFullSlabSize;
+ private final int slabSize;
private int inputSize;
private byte[] packed;
private int packedPosition;
@@ -59,9 +56,8 @@ public class ByteBasedBitPackingEncoder {
public ByteBasedBitPackingEncoder(int bitWidth, Packer packer) {
this.bitWidth = bitWidth;
this.inputSize = 0;
- this.totalFullSlabSize = 0;
// must be a multiple of bitWidth
- this.slabSize = (bitWidth == 0) ? 1 : (bitWidth * INITIAL_SLAB_SIZE_MULT);
+ this.slabSize = bitWidth * 64 * 1024;
initPackedSlab();
this.packer = packer.newBytePacker(bitWidth);
}
@@ -79,10 +75,6 @@ public class ByteBasedBitPackingEncoder {
pack();
if (packedPosition == slabSize) {
slabs.add(BytesInput.from(packed));
- totalFullSlabSize += slabSize;
- if (slabSize < bitWidth * MAX_SLAB_SIZE_MULT) {
- slabSize *= 2;
- }
initPackedSlab();
}
}
@@ -107,7 +99,7 @@ public class ByteBasedBitPackingEncoder {
public BytesInput toBytes() throws IOException {
int packedByteLength = packedPosition +
BytesUtils.paddedByteCountFromBits(inputSize * bitWidth);
- LOG.debug("writing {} bytes", (totalFullSlabSize + packedByteLength));
+ LOG.debug("writing {} bytes", (slabs.size() * slabSize +
packedByteLength));
if (inputSize > 0) {
for (int i = inputSize; i < input.length; i++) {
input[i] = 0;
@@ -121,24 +113,18 @@ public class ByteBasedBitPackingEncoder {
* @return size of the data as it would be written
*/
public long getBufferSize() {
- return BytesUtils.paddedByteCountFromBits((totalValues + inputSize) *
bitWidth);
+ return BytesUtils.paddedByteCountFromBits(totalValues * bitWidth);
}
/**
* @return total memory allocated
*/
public long getAllocatedSize() {
- return totalFullSlabSize + packed.length + input.length * 4;
+ return (slabs.size() * slabSize) + packed.length + input.length * 4;
}
public String memUsageString(String prefix) {
return String.format("%s ByteBitPacking %d slabs, %d bytes", prefix,
slabs.size(), getAllocatedSize());
}
- /**
- * @return number of full slabs along with the current slab (debug aid)
- */
- int getNumSlabs() {
- return slabs.size() + 1;
- }
}
diff --git
a/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
b/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
index b49595b..293b961 100644
---
a/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
+++
b/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -18,28 +18,22 @@
*/
package org.apache.parquet.column.values.bitpacking;
-import org.apache.parquet.bytes.BytesUtils;
import org.junit.Test;
-import static org.junit.Assert.assertEquals;
-
public class TestByteBasedBitPackingEncoder {
@Test
public void testSlabBoundary() {
- for (int i = 0; i <= 32; i++) {
+ for (int i = 0; i < 32; i++) {
final ByteBasedBitPackingEncoder encoder = new
ByteBasedBitPackingEncoder(i, Packer.BIG_ENDIAN);
- // make sure to write through the progression of slabs
- final int totalValues = 191 * 1024 * 8 + 10;
- for (int j = 0; j < totalValues; j++) {
+ // make sure to write more than a slab
+ for (int j = 0; j < 64 * 1024 * 32 + 10; j++) {
try {
encoder.writeInt(j);
} catch (Exception e) {
throw new RuntimeException(i + ": error writing " + j, e);
}
}
- assertEquals(BytesUtils.paddedByteCountFromBits(totalValues * i),
encoder.getBufferSize());
- assertEquals(i == 0 ? 1 : 9, encoder.getNumSlabs());
}
}
--
To stop receiving notification emails like this one, please contact
[email protected].