This is an automated email from the ASF dual-hosted git repository.
omalley pushed a commit to branch storage-branch-2.8
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/storage-branch-2.8 by this
push:
new 02017b0 HIVE-25400: Move the offset updating in BytesColumnVector to
setValPreallocated
02017b0 is described below
commit 02017b04737de040b56ab40dacd120325ca6cc6f
Author: Owen O'Malley <[email protected]>
AuthorDate: Wed Jul 28 10:41:04 2021 -0700
HIVE-25400: Move the offset updating in BytesColumnVector to
setValPreallocated
This change moves the update to the sharedBufferOffset to
setValPreallocated. It
also means the internal code also needs to call setValPreallocated rather
than
use the direct access to the values.
Fixes #2543
Signed-off-by: Owen O'Malley <[email protected]>
---
.../hive/ql/exec/vector/BytesColumnVector.java | 34 +++++++++-----------
.../hive/ql/exec/vector/TestBytesColumnVector.java | 37 ++++++++++++++++++++++
2 files changed, 52 insertions(+), 19 deletions(-)
diff --git
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
index a8c58ac..3b26ac7 100644
---
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
+++
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
@@ -187,9 +187,7 @@ public class BytesColumnVector extends ColumnVector {
if (length > 0) {
System.arraycopy(sourceBuf, start, currentValue, currentOffset, length);
}
- vector[elementNum] = currentValue;
- this.start[elementNum] = currentOffset;
- this.length[elementNum] = length;
+ setValPreallocated(elementNum, length);
}
/**
@@ -213,18 +211,18 @@ public class BytesColumnVector extends ColumnVector {
* Ensures that we have space allocated for the next value, which has size
* length bytes.
*
- * Updates currentValue, currentOffset, and sharedBufferOffset for this
value.
+ * Updates currentValue and currentOffset for this value.
*
- * Always use before getValPreallocatedBytes, getValPreallocatedStart,
- * and setValPreallocated.
+ * Always use before getValPreallocatedBytes, getValPreallocatedStart.
+ * setValPreallocated must be called to actually reserve the bytes.
*/
public void ensureValPreallocated(int length) {
if ((sharedBufferOffset + length) > sharedBuffer.length) {
- currentValue = allocateBuffer(length);
+ // sets currentValue and currentOffset
+ allocateBuffer(length);
} else {
currentValue = sharedBuffer;
currentOffset = sharedBufferOffset;
- sharedBufferOffset += length;
}
}
@@ -246,6 +244,10 @@ public class BytesColumnVector extends ColumnVector {
vector[elementNum] = currentValue;
this.start[elementNum] = currentOffset;
this.length[elementNum] = length;
+ // If the current value is the shared buffer, move the next offset forward.
+ if (currentValue == sharedBuffer) {
+ sharedBufferOffset += length;
+ }
}
/**
@@ -264,9 +266,7 @@ public class BytesColumnVector extends ColumnVector {
byte[] rightSourceBuf, int rightStart, int rightLen) {
int newLen = leftLen + rightLen;
ensureValPreallocated(newLen);
- vector[elementNum] = currentValue;
- this.start[elementNum] = currentOffset;
- this.length[elementNum] = newLen;
+ setValPreallocated(elementNum, newLen);
System.arraycopy(leftSourceBuf, leftStart, currentValue, currentOffset,
leftLen);
System.arraycopy(rightSourceBuf, rightStart, currentValue,
@@ -275,9 +275,7 @@ public class BytesColumnVector extends ColumnVector {
/**
* Allocate/reuse enough buffer space to accommodate next element.
- * currentOffset is set to the first available byte in the returned array.
- * If sharedBuffer is used, sharedBufferOffset is updated to point after the
- * current record.
+ * Sets currentValue and currentOffset to the correct buffer and offset.
*
* This uses an exponential increase mechanism to rapidly
* increase buffer size to enough to hold all data.
@@ -285,9 +283,8 @@ public class BytesColumnVector extends ColumnVector {
* stabilize.
*
* @param nextElemLength size of next element to be added
- * @return the buffer to use for the next element
*/
- private byte[] allocateBuffer(int nextElemLength) {
+ private void allocateBuffer(int nextElemLength) {
// If this is a large value or shared buffer is maxed out, allocate a
// single use buffer. Assumes that sharedBuffer length and
// MAX_SIZE_FOR_SHARED_BUFFER are powers of 2.
@@ -295,8 +292,8 @@ public class BytesColumnVector extends ColumnVector {
sharedBufferOffset + nextElemLength >= MAX_SIZE_FOR_SHARED_BUFFER) {
// allocate a value for the next value
++bufferAllocationCount;
+ currentValue = new byte[nextElemLength];
currentOffset = 0;
- return new byte[nextElemLength];
} else {
// sharedBuffer might still be out of space
@@ -309,9 +306,8 @@ public class BytesColumnVector extends ColumnVector {
++bufferAllocationCount;
sharedBufferOffset = 0;
}
+ currentValue = sharedBuffer;
currentOffset = sharedBufferOffset;
- sharedBufferOffset += nextElemLength;
- return sharedBuffer;
}
}
diff --git
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
index 3b42d17..be4ff70 100644
---
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
+++
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
@@ -174,6 +174,43 @@ public class TestBytesColumnVector {
}
}
+ /**
+ * Make sure that ORC's redact mask won't get into trouble by increasing
+ * the size of the allocation before calling setValPreallocated.
+ */
+ @Test
+ public void testMultipleReallocations() {
+ BytesColumnVector vector = new BytesColumnVector();
+ vector.reset();
+
+ // add a value
+ vector.ensureValPreallocated(1000);
+ byte[] bytes = vector.getValPreallocatedBytes();
+ assertEquals(16 * 1024, bytes.length);
+ assertEquals(0, vector.getValPreallocatedStart());
+ // now ask for more memory
+ vector.ensureValPreallocated(5000);
+ // It shouldn't have needed to resize
+ assertSame(bytes, vector.getValPreallocatedBytes());
+ assertEquals(0, vector.getValPreallocatedStart());
+ // actually use 2000 of the bytes
+ vector.setValPreallocated(0, 2000);
+
+ // add another value
+ vector.ensureValPreallocated(10000);
+ assertSame(bytes, vector.getValPreallocatedBytes());
+ assertEquals(2000, vector.getValPreallocatedStart());
+ vector.setValPreallocated(1, 3000);
+
+ // check to make sure the values are allocated correctly
+ assertSame(bytes, vector.vector[0]);
+ assertEquals(0, vector.start[0]);
+ assertEquals(2000, vector.length[0]);
+ assertSame(bytes, vector.vector[1]);
+ assertEquals(2000, vector.start[1]);
+ assertEquals(3000, vector.length[1]);
+ }
+
// Write a value to the column vector, and return back the byte buffer used.
private static byte[] writeToBytesColumnVector(int rowIdx, BytesColumnVector
col, int writeSize, int val) {
col.ensureValPreallocated(writeSize);