This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 61b2926 ARROW-4532: [Java] fix bug causing very large varchar value
buffers
61b2926 is described below
commit 61b2926fdbd07dae3bfefae6aabdb2dc4a18f48c
Author: Pindikura Ravindra <[email protected]>
AuthorDate: Tue Feb 12 15:35:56 2019 +0100
ARROW-4532: [Java] fix bug causing very large varchar value buffers
The varchar/varbinary vectors have a setSafe variant that takes an arrow
buf and a start/end offsets. The method needs to expand the buffer to make
space for 'end - start' bytes. but, due to a bug, it was expanding it to
accommodate 'end' bytes, thereby making the value buffer much larger than
required.
Author: Pindikura Ravindra <[email protected]>
Closes #3613 from pravindra/varchar and squashes the following commits:
1b4f2243 <Pindikura Ravindra> ARROW-4532: fix review comments
8f88e3aa <Pindikura Ravindra> ARROW-4532: fix a size compute bug
---
.../arrow/vector/BaseVariableWidthVector.java | 2 +-
.../org/apache/arrow/vector/TestValueVector.java | 45 ++++++++++++++++++++++
2 files changed, 46 insertions(+), 1 deletion(-)
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
index ac148a2..a5b550a 100644
---
a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
+++
b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -1100,7 +1100,7 @@ public abstract class BaseVariableWidthVector extends
BaseValueVector
assert index >= 0;
final int dataLength = end - start;
fillEmpties(index);
- handleSafe(index, end);
+ handleSafe(index, dataLength);
BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
final int startOffset = offsetBuffer.getInt(index * OFFSET_WIDTH);
offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
index 30fe23c..f0cc4c4 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -1395,6 +1395,51 @@ public class TestValueVector {
}
@Test
+ public void testSetSafeWithArrowBufNoExcessAllocs() {
+ final int numValues = BaseFixedWidthVector.INITIAL_VALUE_ALLOCATION * 2;
+ final byte[] valueBytes = "hello world".getBytes();
+ final int valueBytesLength = valueBytes.length;
+ final int isSet = 1;
+
+ try (
+ final VarCharVector fromVector = newVector(VarCharVector.class,
EMPTY_SCHEMA_PATH,
+ MinorType.VARCHAR, allocator);
+ final VarCharVector toVector = newVector(VarCharVector.class,
EMPTY_SCHEMA_PATH,
+ MinorType.VARCHAR, allocator)) {
+ /*
+ * Populate the from vector with 'numValues' with byte-arrays, each of
size 'valueBytesLength'.
+ */
+ fromVector.setInitialCapacity(numValues);
+ fromVector.allocateNew();
+ for (int i = 0; i < numValues; ++i) {
+ fromVector.setSafe(i, valueBytes, 0 /*start*/, valueBytesLength);
+ }
+ fromVector.setValueCount(numValues);
+ ArrowBuf fromDataBuffer = fromVector.getDataBuffer();
+ assertTrue(numValues * valueBytesLength <= fromDataBuffer.capacity());
+
+ /*
+ * Copy the entries one-by-one from 'fromVector' to 'toVector', but use
the setSafe with
+ * ArrowBuf API (instead of setSafe with byte-array).
+ */
+ toVector.setInitialCapacity(numValues);
+ toVector.allocateNew();
+ for (int i = 0; i < numValues; i++) {
+ int start = fromVector.getstartOffset(i);
+ int end = fromVector.getstartOffset(i + 1);
+ toVector.setSafe(i, isSet, start, end, fromDataBuffer);
+ }
+
+ /*
+ * Since the 'fromVector' and 'toVector' have the same initial capacity,
and were populated
+ * with the same varchar elements, the allocations and hence, the final
capacity should be
+ * the same.
+ */
+ assertEquals(fromDataBuffer.capacity(),
toVector.getDataBuffer().capacity());
+ }
+ }
+
+ @Test
public void testCopyFromWithNulls() {
try (final VarCharVector vector = newVector(VarCharVector.class,
EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator);
final VarCharVector vector2 =