This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-java.git
The following commit(s) were added to refs/heads/main by this push:
new c4d3c9e27 GH-109: Implement Vector Validators for StringView (#886)
c4d3c9e27 is described below
commit c4d3c9e2777f5d7b55c3f8fdb5e5155b71bbd87a
Author: ViggoC <[email protected]>
AuthorDate: Wed Oct 29 20:42:45 2025 +0800
GH-109: Implement Vector Validators for StringView (#886)
## What's Changed
Implement Vector Validators for StringView.
Closes #109.
---
.../validate/ValidateVectorBufferVisitor.java | 29 ++++++++++++++++------
.../vector/validate/ValidateVectorDataVisitor.java | 3 ++-
.../vector/validate/ValidateVectorTypeVisitor.java | 9 ++++++-
.../vector/validate/ValidateVectorVisitor.java | 9 +++++--
.../arrow/vector/TestVariableWidthViewVector.java | 17 ++++++++++++-
5 files changed, 54 insertions(+), 13 deletions(-)
diff --git
a/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java
b/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java
index 5c7215437..5cfe64b14 100644
---
a/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java
+++
b/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java
@@ -52,14 +52,22 @@ public class ValidateVectorBufferVisitor implements
VectorVisitor<Void, Void> {
if (vector instanceof FieldVector) {
FieldVector fieldVector = (FieldVector) vector;
- // TODO: https://github.com/apache/arrow/issues/41734
int typeBufferCount = TypeLayout.getTypeBufferCount(arrowType);
- validateOrThrow(
- fieldVector.getFieldBuffers().size() == typeBufferCount,
- "Expected %s buffers in vector of type %s, got %s.",
- typeBufferCount,
- vector.getField().getType().toString(),
- fieldVector.getFieldBuffers().size());
+ if (TypeLayout.getTypeLayout(arrowType).isFixedBufferCount()) {
+ validateOrThrow(
+ fieldVector.getFieldBuffers().size() == typeBufferCount,
+ "Expected %s buffers in vector of type %s, got %s.",
+ typeBufferCount,
+ vector.getField().getType().toString(),
+ fieldVector.getFieldBuffers().size());
+ } else {
+ validateOrThrow(
+ fieldVector.getFieldBuffers().size() >= typeBufferCount,
+ "Expected at least %s buffers in vector of type %s, got %s.",
+ typeBufferCount,
+ vector.getField().getType().toString(),
+ fieldVector.getFieldBuffers().size());
+ }
}
}
@@ -158,7 +166,12 @@ public class ValidateVectorBufferVisitor implements
VectorVisitor<Void, Void> {
@Override
public Void visit(BaseVariableWidthViewVector vector, Void value) {
- throw new UnsupportedOperationException("View vectors are not supported.");
+ final int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateOrThrow(vector.getFieldBuffers().size() >= 2, "Expected at least 2
buffers.");
+ validateValidityBuffer(vector, valueCount);
+ validateDataBuffer(vector, (long) valueCount *
BaseVariableWidthViewVector.ELEMENT_SIZE);
+ return null;
}
@Override
diff --git
a/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java
b/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java
index c62bff79f..9da8cc813 100644
---
a/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java
+++
b/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java
@@ -121,7 +121,8 @@ public class ValidateVectorDataVisitor implements
VectorVisitor<Void, Void> {
@Override
public Void visit(BaseVariableWidthViewVector vector, Void value) {
- throw new UnsupportedOperationException("View vectors are not supported.");
+ vector.validateScalars();
+ return null;
}
@Override
diff --git
a/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
b/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
index daad41dbd..395852ef7 100644
---
a/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
+++
b/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
@@ -61,6 +61,8 @@ import org.apache.arrow.vector.UInt8Vector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.ViewVarBinaryVector;
+import org.apache.arrow.vector.ViewVarCharVector;
import org.apache.arrow.vector.compare.VectorVisitor;
import org.apache.arrow.vector.complex.DenseUnionVector;
import org.apache.arrow.vector.complex.FixedSizeListVector;
@@ -380,7 +382,12 @@ public class ValidateVectorTypeVisitor implements
VectorVisitor<Void, Void> {
@Override
public Void visit(BaseVariableWidthViewVector vector, Void value) {
- throw new UnsupportedOperationException("View vectors are not supported.");
+ if (vector instanceof ViewVarCharVector) {
+ validateVectorCommon(vector, ArrowType.Utf8View.class);
+ } else if (vector instanceof ViewVarBinaryVector) {
+ validateVectorCommon(vector, ArrowType.BinaryView.class);
+ }
+ return null;
}
@Override
diff --git
a/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java
b/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java
index 5004ba488..211141001 100644
---
a/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java
+++
b/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java
@@ -107,8 +107,13 @@ public class ValidateVectorVisitor implements
VectorVisitor<Void, Void> {
}
@Override
- public Void visit(BaseVariableWidthViewVector left, Void value) {
- throw new UnsupportedOperationException("View vectors are not supported.");
+ public Void visit(BaseVariableWidthViewVector vector, Void value) {
+ if (vector.getValueCount() > 0) {
+ if (vector.getDataBuffer() == null || vector.getDataBuffer().capacity()
== 0) {
+ throw new IllegalArgumentException("valueBuffer is null or capacity is
0");
+ }
+ }
+ return null;
}
@Override
diff --git
a/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java
b/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java
index f7c66a00b..baf5e672c 100644
---
a/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java
+++
b/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java
@@ -61,6 +61,7 @@ import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.util.ReusableByteArray;
import org.apache.arrow.vector.util.Text;
import org.apache.arrow.vector.util.TransferPair;
+import org.apache.arrow.vector.validate.ValidateUtil;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -2445,7 +2446,7 @@ public class TestVariableWidthViewVector {
final ViewVarBinaryVector sourceVector =
newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) {
testSplitAndTransferOnValiditySplitHelper(
- targetVector, sourceVector, startIndex, length, data);
+ targetVector, sourceVector, startIndex, length, binaryData);
}
}
@@ -2852,4 +2853,18 @@ public class TestVariableWidthViewVector {
}
}
}
+
+ @Test
+ public void testValidate() {
+ try (final ViewVarCharVector vector = new ViewVarCharVector("v",
allocator)) {
+ vector.validateFull();
+ setVector(vector, STR1, STR2, STR3);
+ vector.validateFull();
+
+ vector.getDataBuffer().capacity(0);
+ ValidateUtil.ValidateException e =
+ assertThrows(ValidateUtil.ValidateException.class, () ->
vector.validate());
+ assertTrue(e.getMessage().contains("Not enough capacity for data
buffer"));
+ }
+ }
}