This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new bc923bd2cf GH-40937: [Java] Implement Holder-based functions for
ViewVarCharVector & ViewVarBinaryVector (#44187)
bc923bd2cf is described below
commit bc923bd2cf6a0323caa60860edb18c9e7530e34d
Author: ViggoC <[email protected]>
AuthorDate: Thu Sep 26 14:09:11 2024 +0800
GH-40937: [Java] Implement Holder-based functions for ViewVarCharVector &
ViewVarBinaryVector (#44187)
* GitHub Issue: #40936
* GitHub Issue: #40937
Authored-by: chenweiguo.vc <[email protected]>
Signed-off-by: David Li <[email protected]>
---
.../apache/arrow/vector/ViewVarBinaryVector.java | 54 ++++++--
.../org/apache/arrow/vector/ViewVarCharVector.java | 57 +++++++--
...ector.java => TestVariableWidthViewVector.java} | 136 ++++++++++++++++++++-
3 files changed, 223 insertions(+), 24 deletions(-)
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java
index 9481831fbd..80d6952e00 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java
@@ -132,8 +132,31 @@ public final class ViewVarBinaryVector extends
BaseVariableWidthViewVector
* @param holder data holder to be populated by this function
*/
public void get(int index, NullableViewVarBinaryHolder holder) {
- // TODO: https://github.com/apache/arrow/issues/40936
- throw new UnsupportedOperationException("Unsupported operation");
+ final int dataLength = getValueLength(index);
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ if (dataLength > INLINE_SIZE) {
+ // data is in the data buffer
+ // get buffer index
+ final int bufferIndex =
+ viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH +
PREFIX_WIDTH);
+ // get data offset
+ final int dataOffset =
+ viewBuffer.getInt(
+ ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH +
BUF_INDEX_WIDTH);
+ holder.buffer = dataBuffers.get(bufferIndex);
+ holder.start = dataOffset;
+ holder.end = dataOffset + dataLength;
+ } else {
+ final long dataOffset = ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH;
+ // data is in the value buffer
+ holder.buffer = viewBuffer;
+ holder.start = (int) dataOffset;
+ holder.end = (int) dataOffset + dataLength;
+ }
}
/*----------------------------------------------------------------*
@@ -150,8 +173,10 @@ public final class ViewVarBinaryVector extends
BaseVariableWidthViewVector
* @param holder holder that carries data buffer.
*/
public void set(int index, ViewVarBinaryHolder holder) {
- // TODO: https://github.com/apache/arrow/issues/40936
- throw new UnsupportedOperationException("Unsupported operation");
+ int start = holder.start;
+ int length = holder.end - start;
+ setBytes(index, holder.buffer, start, length);
+ lastSet = index;
}
/**
@@ -162,8 +187,9 @@ public final class ViewVarBinaryVector extends
BaseVariableWidthViewVector
* @param holder holder that carries data buffer.
*/
public void setSafe(int index, ViewVarBinaryHolder holder) {
- // TODO: https://github.com/apache/arrow/issues/40936
- throw new UnsupportedOperationException("Unsupported operation");
+ int length = holder.end - holder.start;
+ handleSafe(index, length);
+ set(index, holder);
}
/**
@@ -174,8 +200,15 @@ public final class ViewVarBinaryVector extends
BaseVariableWidthViewVector
* @param holder holder that carries data buffer.
*/
public void set(int index, NullableViewVarBinaryHolder holder) {
- // TODO: https://github.com/apache/arrow/issues/40936
- throw new UnsupportedOperationException("Unsupported operation");
+ if (holder.isSet == 0) {
+ setNull(index);
+ } else {
+ BitVectorHelper.setBit(validityBuffer, index);
+ int start = holder.start;
+ int length = holder.end - start;
+ setBytes(index, holder.buffer, start, length);
+ }
+ lastSet = index;
}
/**
@@ -186,8 +219,9 @@ public final class ViewVarBinaryVector extends
BaseVariableWidthViewVector
* @param holder holder that carries data buffer.
*/
public void setSafe(int index, NullableViewVarBinaryHolder holder) {
- // TODO: https://github.com/apache/arrow/issues/40936
- throw new UnsupportedOperationException("Unsupported operation");
+ int length = holder.end - holder.start;
+ handleSafe(index, length);
+ set(index, holder);
}
/*----------------------------------------------------------------*
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java
index 900e0493fe..dc474b68e3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java
@@ -143,9 +143,31 @@ public final class ViewVarCharVector extends
BaseVariableWidthViewVector
* @param holder data holder to be populated by this function
*/
public void get(int index, NullableViewVarCharHolder holder) {
- // TODO: https://github.com/apache/arrow/issues/40937
- throw new UnsupportedOperationException(
- "NullableViewVarCharHolder get operation not supported");
+ final int dataLength = getValueLength(index);
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ if (dataLength > INLINE_SIZE) {
+ // data is in the data buffer
+ // get buffer index
+ final int bufferIndex =
+ viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH +
PREFIX_WIDTH);
+ // get data offset
+ final int dataOffset =
+ viewBuffer.getInt(
+ ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH +
BUF_INDEX_WIDTH);
+ holder.buffer = dataBuffers.get(bufferIndex);
+ holder.start = dataOffset;
+ holder.end = dataOffset + dataLength;
+ } else {
+ final long dataOffset = ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH;
+ // data is in the value buffer
+ holder.buffer = viewBuffer;
+ holder.start = (int) dataOffset;
+ holder.end = (int) dataOffset + dataLength;
+ }
}
/*----------------------------------------------------------------*
@@ -162,8 +184,10 @@ public final class ViewVarCharVector extends
BaseVariableWidthViewVector
* @param holder holder that carries data buffer.
*/
public void set(int index, ViewVarCharHolder holder) {
- // TODO: https://github.com/apache/arrow/issues/40937
- throw new UnsupportedOperationException("ViewVarCharHolder set operation
not supported");
+ int start = holder.start;
+ int length = holder.end - start;
+ setBytes(index, holder.buffer, start, length);
+ lastSet = index;
}
/**
@@ -174,8 +198,9 @@ public final class ViewVarCharVector extends
BaseVariableWidthViewVector
* @param holder holder that carries data buffer.
*/
public void setSafe(int index, ViewVarCharHolder holder) {
- // TODO: https://github.com/apache/arrow/issues/40937
- throw new UnsupportedOperationException("ViewVarCharHolder setSafe
operation not supported");
+ int length = holder.end - holder.start;
+ handleSafe(index, length);
+ set(index, holder);
}
/**
@@ -186,9 +211,15 @@ public final class ViewVarCharVector extends
BaseVariableWidthViewVector
* @param holder holder that carries data buffer.
*/
public void set(int index, NullableViewVarCharHolder holder) {
- // TODO: https://github.com/apache/arrow/issues/40937
- throw new UnsupportedOperationException(
- "NullableViewVarCharHolder set operation not supported");
+ if (holder.isSet == 0) {
+ setNull(index);
+ } else {
+ BitVectorHelper.setBit(validityBuffer, index);
+ int start = holder.start;
+ int length = holder.end - start;
+ setBytes(index, holder.buffer, start, length);
+ }
+ lastSet = index;
}
/**
@@ -199,9 +230,9 @@ public final class ViewVarCharVector extends
BaseVariableWidthViewVector
* @param holder holder that carries data buffer.
*/
public void setSafe(int index, NullableViewVarCharHolder holder) {
- // TODO: https://github.com/apache/arrow/issues/40937
- throw new UnsupportedOperationException(
- "NullableViewVarCharHolder setSafe operation not supported");
+ int length = holder.end - holder.start;
+ handleSafe(index, length);
+ set(index, holder);
}
/**
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java
b/java/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java
similarity index 95%
rename from
java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java
rename to
java/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java
index 232eec9ef1..a4533dba3b 100644
---
a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java
+++
b/java/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java
@@ -48,6 +48,10 @@ import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
import org.apache.arrow.memory.util.ArrowBufPointer;
import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.holders.NullableViewVarBinaryHolder;
+import org.apache.arrow.vector.holders.NullableViewVarCharHolder;
+import org.apache.arrow.vector.holders.ValueHolder;
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
import org.apache.arrow.vector.types.Types;
@@ -63,7 +67,7 @@ import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
-public class TestVarCharViewVector {
+public class TestVariableWidthViewVector {
// short string (length <= 12)
private static final byte[] STR0 =
"0123456".getBytes(StandardCharsets.UTF_8);
@@ -371,6 +375,136 @@ public class TestVarCharViewVector {
}
}
+ @Test
+ public void testSetNullableViewVarCharHolder() {
+ try (final ViewVarCharVector viewVarCharVector = new
ViewVarCharVector("myvector", allocator)) {
+ viewVarCharVector.allocateNew(0, 0);
+ final List<byte[]> strings = List.of(STR0, STR1, STR2, STR3, STR4, STR5);
+
+ NullableViewVarCharHolder stringHolder = new NullableViewVarCharHolder();
+
+ // set not null
+ int size = strings.size();
+ for (int i = 0; i < size; i++) {
+ setAndCheck(viewVarCharVector, i, strings.get(i), stringHolder);
+ }
+
+ // set null
+ setAndCheck(viewVarCharVector, 6, null, stringHolder);
+
+ // copy by holder
+ // len < 12
+ copyAndCheck(viewVarCharVector, stringHolder, 0, 7);
+ // len > 12
+ copyAndCheck(viewVarCharVector, stringHolder, 2, 8);
+ // null
+ copyAndCheck(viewVarCharVector, stringHolder, 6, 9);
+
+ // test overwrite
+ for (int i = 0; i < size; i++) {
+ setAndCheck(viewVarCharVector, i, strings.get(size - i - 1),
stringHolder);
+ }
+
+ String longString = generateRandomString(128);
+ setAndCheck(viewVarCharVector, 6, longString.getBytes(), stringHolder);
+ }
+ }
+
+ @Test
+ public void testSetNullableViewVarBinaryHolder() {
+ try (final ViewVarBinaryVector viewVarBinaryVector =
+ new ViewVarBinaryVector("myvector", allocator)) {
+ viewVarBinaryVector.allocateNew(0, 0);
+ final List<byte[]> strings = List.of(STR0, STR1, STR2, STR3, STR4, STR5);
+
+ NullableViewVarBinaryHolder holder = new NullableViewVarBinaryHolder();
+
+ // set not null
+ int size = strings.size();
+ for (int i = 0; i < size; i++) {
+ setAndCheck(viewVarBinaryVector, i, strings.get(i), holder);
+ }
+
+ // set null
+ setAndCheck(viewVarBinaryVector, 6, null, holder);
+
+ // copy by holder
+ // len < 12
+ copyAndCheck(viewVarBinaryVector, holder, 0, 7);
+ // len > 12
+ copyAndCheck(viewVarBinaryVector, holder, 2, 8);
+ // null
+ copyAndCheck(viewVarBinaryVector, holder, 6, 9);
+
+ // test overwrite
+ for (int i = 0; i < size; i++) {
+ setAndCheck(viewVarBinaryVector, i, strings.get(size - i - 1), holder);
+ }
+
+ String longString = generateRandomString(128);
+ setAndCheck(viewVarBinaryVector, 6, longString.getBytes(), holder);
+ }
+ }
+
+ private static void copyAndCheck(
+ BaseVariableWidthViewVector vector, ValueHolder holder, int fromIndex,
int toIndex) {
+ if (vector instanceof ViewVarCharVector) {
+ ViewVarCharVector viewVarCharVector = (ViewVarCharVector) vector;
+ NullableViewVarCharHolder stringHolder = (NullableViewVarCharHolder)
holder;
+ viewVarCharVector.get(fromIndex, stringHolder);
+ viewVarCharVector.setSafe(toIndex, stringHolder);
+ }
+
+ if (vector instanceof ViewVarBinaryVector) {
+ ViewVarBinaryVector viewVarBinaryVector = (ViewVarBinaryVector) vector;
+ NullableViewVarBinaryHolder binaryHolder = (NullableViewVarBinaryHolder)
holder;
+ viewVarBinaryVector.get(fromIndex, binaryHolder);
+ viewVarBinaryVector.setSafe(toIndex, binaryHolder);
+ }
+
+ assertArrayEquals(vector.get(fromIndex), vector.get(toIndex));
+ }
+
+ private void setAndCheck(
+ ViewVarCharVector vector, int index, byte[] str,
NullableViewVarCharHolder stringHolder) {
+ ArrowBuf buf = null;
+ if (null == str) {
+ stringHolder.isSet = 0;
+ } else {
+ buf = allocator.buffer(str.length);
+ buf.setBytes(0, str);
+ stringHolder.isSet = 1;
+ stringHolder.start = 0;
+ stringHolder.end = str.length;
+ stringHolder.buffer = buf;
+ }
+ vector.setSafe(index, stringHolder);
+
+ // verify results
+ assertArrayEquals(str, vector.get(index));
+ AutoCloseables.closeNoChecked(buf);
+ }
+
+ private void setAndCheck(
+ ViewVarBinaryVector vector, int index, byte[] str,
NullableViewVarBinaryHolder binaryHolder) {
+ ArrowBuf buf = null;
+ if (null == str) {
+ binaryHolder.isSet = 0;
+ } else {
+ buf = allocator.buffer(str.length);
+ buf.setBytes(0, str);
+ binaryHolder.isSet = 1;
+ binaryHolder.start = 0;
+ binaryHolder.end = str.length;
+ binaryHolder.buffer = buf;
+ }
+ vector.setSafe(index, binaryHolder);
+
+ // verify results
+ assertArrayEquals(str, vector.get(index));
+ AutoCloseables.closeNoChecked(buf);
+ }
+
@Test
public void testAllocationIndexOutOfBounds() {
assertThrows(