This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new e61c105c73 GH-41584: [Java] ListView Implementation for C Data
Interface (#43686)
e61c105c73 is described below
commit e61c105c73dfabb51d5afc972ff21cc5326b3d93
Author: Vibhatha Lakmal Abeykoon <[email protected]>
AuthorDate: Sat Aug 24 07:07:09 2024 +0530
GH-41584: [Java] ListView Implementation for C Data Interface (#43686)
### Rationale for this change
C Data Interface is missing `ListView` and `LargeListView` after recently
merging core functionalities.
Also closes;
- [x] https://github.com/apache/arrow/issues/41585
### What changes are included in this PR?
This PR includes C Data interface related component additions to `ListView`
and `LargeListView` along with the corresponding test cases.
### Are these changes tested?
Yes
### Are there any user-facing changes?
No
* GitHub Issue: #41584
Authored-by: Vibhatha Abeykoon <[email protected]>
Signed-off-by: David Li <[email protected]>
---
dev/archery/archery/integration/datagen.py | 1 -
.../apache/arrow/c/BufferImportTypeVisitor.java | 14 ++-
.../c/src/main/java/org/apache/arrow/c/Format.java | 8 ++
.../java/org/apache/arrow/c/RoundtripTest.java | 42 +++++++
java/c/src/test/python/integration_tests.py | 47 ++++++++
.../complex/BaseLargeRepeatedValueViewVector.java | 29 +++--
.../complex/BaseRepeatedValueViewVector.java | 30 +++--
.../arrow/vector/complex/LargeListViewVector.java | 10 +-
.../arrow/vector/complex/ListViewVector.java | 6 +-
.../arrow/vector/TestLargeListViewVector.java | 134 +++++++++++++++++++++
.../apache/arrow/vector/TestListViewVector.java | 132 ++++++++++++++++++++
.../vector/testing/ValueVectorDataPopulator.java | 34 ++++++
12 files changed, 451 insertions(+), 36 deletions(-)
diff --git a/dev/archery/archery/integration/datagen.py
b/dev/archery/archery/integration/datagen.py
index 47310c905a..d395d26cb7 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1936,7 +1936,6 @@ def get_generated_json_files(tempdir=None):
generate_list_view_case()
.skip_tester('C#') # Doesn't support large list views
- .skip_tester('Java')
.skip_tester('JS')
.skip_tester('nanoarrow')
.skip_tester('Rust'),
diff --git
a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
index 633ecd43bd..93fef6d7ca 100644
--- a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
+++ b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
@@ -47,7 +47,9 @@ import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.complex.DenseUnionVector;
import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.MapVector;
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
@@ -400,13 +402,17 @@ class BufferImportTypeVisitor implements
ArrowType.ArrowTypeVisitor<List<ArrowBu
@Override
public List<ArrowBuf> visit(ArrowType.ListView type) {
- throw new UnsupportedOperationException(
- "Importing buffers for view type: " + type + " not supported");
+ return Arrays.asList(
+ maybeImportBitmap(type),
+ importFixedBytes(type, 1, ListViewVector.OFFSET_WIDTH),
+ importFixedBytes(type, 2, ListViewVector.SIZE_WIDTH));
}
@Override
public List<ArrowBuf> visit(ArrowType.LargeListView type) {
- throw new UnsupportedOperationException(
- "Importing buffers for view type: " + type + " not supported");
+ return Arrays.asList(
+ maybeImportBitmap(type),
+ importFixedBytes(type, 1, LargeListViewVector.OFFSET_WIDTH),
+ importFixedBytes(type, 2, LargeListViewVector.SIZE_WIDTH));
}
}
diff --git a/java/c/src/main/java/org/apache/arrow/c/Format.java
b/java/c/src/main/java/org/apache/arrow/c/Format.java
index aff51e7b73..f77a555d18 100644
--- a/java/c/src/main/java/org/apache/arrow/c/Format.java
+++ b/java/c/src/main/java/org/apache/arrow/c/Format.java
@@ -229,6 +229,10 @@ final class Format {
return "vu";
case BinaryView:
return "vz";
+ case ListView:
+ return "+vl";
+ case LargeListView:
+ return "+vL";
case NONE:
throw new IllegalArgumentException("Arrow type ID is NONE");
default:
@@ -313,6 +317,10 @@ final class Format {
return new ArrowType.Utf8View();
case "vz":
return new ArrowType.BinaryView();
+ case "+vl":
+ return new ArrowType.ListView();
+ case "+vL":
+ return new ArrowType.LargeListView();
default:
String[] parts = format.split(":", 2);
if (parts.length == 2) {
diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
index 6591d1f730..18b2e94add 100644
--- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
@@ -84,7 +84,9 @@ import org.apache.arrow.vector.ZeroVector;
import org.apache.arrow.vector.compare.VectorEqualsVisitor;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.MapVector;
import org.apache.arrow.vector.complex.StructVector;
import org.apache.arrow.vector.complex.UnionVector;
@@ -683,6 +685,46 @@ public class RoundtripTest {
}
}
+ @Test
+ public void testListViewVector() {
+ try (final ListViewVector vector = ListViewVector.empty("v", allocator)) {
+ setVector(
+ vector,
+ Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList()),
+ Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList()),
+ new ArrayList<Integer>());
+ assertTrue(roundtrip(vector, ListViewVector.class));
+ }
+ }
+
+ @Test
+ public void testEmptyListViewVector() {
+ try (final ListViewVector vector = ListViewVector.empty("v", allocator)) {
+ setVector(vector, new ArrayList<Integer>());
+ assertTrue(roundtrip(vector, ListViewVector.class));
+ }
+ }
+
+ @Test
+ public void testLargeListViewVector() {
+ try (final LargeListViewVector vector = LargeListViewVector.empty("v",
allocator)) {
+ setVector(
+ vector,
+ Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList()),
+ Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList()),
+ new ArrayList<Integer>());
+ assertTrue(roundtrip(vector, LargeListViewVector.class));
+ }
+ }
+
+ @Test
+ public void testEmptyLargeListViewVector() {
+ try (final LargeListViewVector vector = LargeListViewVector.empty("v",
allocator)) {
+ setVector(vector, new ArrayList<Integer>());
+ assertTrue(roundtrip(vector, LargeListViewVector.class));
+ }
+ }
+
@Test
public void testMapVector() {
int count = 5;
diff --git a/java/c/src/test/python/integration_tests.py
b/java/c/src/test/python/integration_tests.py
index ab2ee1742f..b0a86e9c66 100644
--- a/java/c/src/test/python/integration_tests.py
+++ b/java/c/src/test/python/integration_tests.py
@@ -352,6 +352,53 @@ class TestPythonIntegration(unittest.TestCase):
]
self.round_trip_reader(schema, data)
+ def test_listview_array(self):
+ self.round_trip_array(lambda: pa.array(
+ [[], [0], [1, 2], [4, 5, 6]], pa.list_view(pa.int64())
+ # disabled check_metadata since in Java API the listview
+ # internal field name ("item") is not preserved
+ # during round trips (it becomes "$data$").
+ ), check_metadata=False)
+
+ def test_empty_listview_array(self):
+ with pa.BufferOutputStream() as bos:
+ schema = pa.schema([pa.field("f0", pa.list_view(pa.int32()),
True)])
+ with ipc.new_stream(bos, schema) as writer:
+ src = pa.RecordBatch.from_arrays(
+ [pa.array([[]], pa.list_view(pa.int32()))], schema=schema)
+ writer.write(src)
+ data_bytes = bos.getvalue()
+
+ def recreate_batch():
+ with pa.input_stream(data_bytes) as ios:
+ with ipc.open_stream(ios) as reader:
+ return reader.read_next_batch()
+
+ self.round_trip_record_batch(recreate_batch)
+
+ def test_largelistview_array(self):
+ self.round_trip_array(lambda: pa.array(
+ [[], [0], [1, 2], [4, 5, 6]], pa.large_list_view(pa.int64())
+ # disabled check_metadata since in Java API the listview
+ # internal field name ("item") is not preserved
+ # during round trips (it becomes "$data$").
+ ), check_metadata=False)
+
+ def test_empty_largelistview_array(self):
+ with pa.BufferOutputStream() as bos:
+ schema = pa.schema([pa.field("f0", pa.large_list_view(pa.int32()),
True)])
+ with ipc.new_stream(bos, schema) as writer:
+ src = pa.RecordBatch.from_arrays(
+ [pa.array([[]], pa.large_list_view(pa.int32()))],
schema=schema)
+ writer.write(src)
+ data_bytes = bos.getvalue()
+
+ def recreate_batch():
+ with pa.input_stream(data_bytes) as ios:
+ with ipc.open_stream(ios) as reader:
+ return reader.read_next_batch()
+
+ self.round_trip_record_batch(recreate_batch)
if __name__ == '__main__':
unittest.main(verbosity=2)
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java
index f643306cfd..12edd6557b 100644
---
a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java
+++
b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java
@@ -305,38 +305,43 @@ public abstract class BaseLargeRepeatedValueViewVector
extends BaseValueVector
while (valueCount > getOffsetBufferValueCapacity()) {
reallocateBuffers();
}
- final int childValueCount = valueCount == 0 ? 0 : getLengthOfChildVector();
+ final int childValueCount = valueCount == 0 ? 0 :
getMaxViewEndChildVector();
vector.setValueCount(childValueCount);
}
- protected int getLengthOfChildVector() {
+ /**
+ * Get the end of the child vector via the maximum view length. This method
deduces the length by
+ * considering the condition i.e., argmax_i(offsets[i] + size[i]).
+ *
+ * @return the end of the child vector.
+ */
+ protected int getMaxViewEndChildVector() {
int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
- int minOffset = offsetBuffer.getInt(0);
for (int i = 0; i < valueCount; i++) {
int currentOffset = offsetBuffer.getInt((long) i * OFFSET_WIDTH);
int currentSize = sizeBuffer.getInt((long) i * SIZE_WIDTH);
int currentSum = currentOffset + currentSize;
-
maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
- minOffset = Math.min(minOffset, currentOffset);
}
- return maxOffsetSizeSum - minOffset;
+ return maxOffsetSizeSum;
}
- protected int getLengthOfChildVectorByIndex(int index) {
+ /**
+ * Get the end of the child vector via the maximum view length of the child
vector by index.
+ *
+ * @return the end of the child vector by index
+ */
+ protected int getMaxViewEndChildVectorByIndex(int index) {
int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
- int minOffset = offsetBuffer.getInt(0);
for (int i = 0; i < index; i++) {
int currentOffset = offsetBuffer.getInt((long) i * OFFSET_WIDTH);
int currentSize = sizeBuffer.getInt((long) i * SIZE_WIDTH);
int currentSum = currentOffset + currentSize;
-
maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
- minOffset = Math.min(minOffset, currentOffset);
}
- return maxOffsetSizeSum - minOffset;
+ return maxOffsetSizeSum;
}
/**
@@ -390,7 +395,7 @@ public abstract class BaseLargeRepeatedValueViewVector
extends BaseValueVector
}
if (index > 0) {
- final int prevOffset = getLengthOfChildVectorByIndex(index);
+ final int prevOffset = getMaxViewEndChildVectorByIndex(index);
offsetBuffer.setInt((long) index * OFFSET_WIDTH, prevOffset);
}
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java
index 031cc8037b..e6213316b5 100644
---
a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java
+++
b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java
@@ -304,38 +304,44 @@ public abstract class BaseRepeatedValueViewVector extends
BaseValueVector
while (valueCount > getOffsetBufferValueCapacity()) {
reallocateBuffers();
}
- final int childValueCount = valueCount == 0 ? 0 : getLengthOfChildVector();
+ final int childValueCount = valueCount == 0 ? 0 :
getMaxViewEndChildVector();
vector.setValueCount(childValueCount);
}
- protected int getLengthOfChildVector() {
+ /**
+ * Get the end of the child vector via the maximum view length. This method
deduces the length by
+ * considering the condition i.e., argmax_i(offsets[i] + size[i]).
+ *
+ * @return the end of the child vector.
+ */
+ protected int getMaxViewEndChildVector() {
int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
- int minOffset = offsetBuffer.getInt(0);
for (int i = 0; i < valueCount; i++) {
int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH);
int currentSum = currentOffset + currentSize;
-
maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
- minOffset = Math.min(minOffset, currentOffset);
}
- return maxOffsetSizeSum - minOffset;
+ return maxOffsetSizeSum;
}
- protected int getLengthOfChildVectorByIndex(int index) {
+ /**
+ * Get the end of the child vector via the maximum view length of the child
vector by index.
+ *
+ * @return the end of the child vector by index
+ */
+ protected int getMaxViewEndChildVectorByIndex(int index) {
int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
- int minOffset = offsetBuffer.getInt(0);
+ // int minOffset = offsetBuffer.getInt(0);
for (int i = 0; i < index; i++) {
int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH);
int currentSum = currentOffset + currentSize;
-
maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
- minOffset = Math.min(minOffset, currentOffset);
}
- return maxOffsetSizeSum - minOffset;
+ return maxOffsetSizeSum;
}
/**
@@ -389,7 +395,7 @@ public abstract class BaseRepeatedValueViewVector extends
BaseValueVector
}
if (index > 0) {
- final int prevOffset = getLengthOfChildVectorByIndex(index);
+ final int prevOffset = getMaxViewEndChildVectorByIndex(index);
offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset);
}
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java
index 2c61f799a4..84c6f03edb 100644
---
a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java
+++
b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java
@@ -250,7 +250,9 @@ public class LargeListViewVector extends
BaseLargeRepeatedValueViewVector
*/
@Override
public void exportCDataBuffers(List<ArrowBuf> buffers, ArrowBuf buffersPtr,
long nullValue) {
- throw new UnsupportedOperationException("exportCDataBuffers Not
implemented yet");
+ exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true);
+ exportBuffer(offsetBuffer, buffers, buffersPtr, nullValue, true);
+ exportBuffer(sizeBuffer, buffers, buffersPtr, nullValue, true);
}
@Override
@@ -851,7 +853,7 @@ public class LargeListViewVector extends
BaseLargeRepeatedValueViewVector
}
if (index > 0) {
- final int prevOffset = getLengthOfChildVectorByIndex(index);
+ final int prevOffset = getMaxViewEndChildVectorByIndex(index);
offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset);
}
@@ -943,7 +945,7 @@ public class LargeListViewVector extends
BaseLargeRepeatedValueViewVector
}
}
/* valueCount for the data vector is the current end offset */
- final long childValueCount = (valueCount == 0) ? 0 :
getLengthOfChildVector();
+ final long childValueCount = (valueCount == 0) ? 0 :
getMaxViewEndChildVector();
/* set the value count of data vector and this will take care of
* checking whether data buffer needs to be reallocated.
* TODO: revisit when 64-bit vectors are supported
@@ -1001,7 +1003,7 @@ public class LargeListViewVector extends
BaseLargeRepeatedValueViewVector
if (valueCount == 0) {
return 0.0D;
}
- final double totalListSize = getLengthOfChildVector();
+ final double totalListSize = getMaxViewEndChildVector();
return totalListSize / valueCount;
}
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
index 7f6d92f3be..9b4e6b4c0c 100644
---
a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
+++
b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
@@ -858,7 +858,7 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
}
if (index > 0) {
- final int prevOffset = getLengthOfChildVectorByIndex(index);
+ final int prevOffset = getMaxViewEndChildVectorByIndex(index);
offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset);
}
@@ -942,7 +942,7 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
}
}
/* valueCount for the data vector is the current end offset */
- final int childValueCount = (valueCount == 0) ? 0 :
getLengthOfChildVector();
+ final int childValueCount = (valueCount == 0) ? 0 :
getMaxViewEndChildVector();
/* set the value count of data vector and this will take care of
* checking whether data buffer needs to be reallocated.
*/
@@ -1005,7 +1005,7 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
if (valueCount == 0) {
return 0.0D;
}
- final double totalListSize = getLengthOfChildVector();
+ final double totalListSize = getMaxViewEndChildVector();
return totalListSize / valueCount;
}
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java
b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java
index 2ed8d4d700..26e7bb4a0d 100644
---
a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java
+++
b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java
@@ -2095,6 +2095,140 @@ public class TestLargeListViewVector {
}
}
+ @Test
+ public void testRangeChildVector1() {
+ /*
+ * Non-overlapping ranges
+ * offsets: [0, 2]
+ * sizes: [4, 1]
+ * values: [0, 1, 2, 3]
+ *
+ * vector: [[0, 1, 2, 3], [2]]
+ * */
+ try (LargeListViewVector largeListViewVector =
+ LargeListViewVector.empty("largelistview", allocator)) {
+ // Allocate buffers in listViewVector by calling `allocateNew` method.
+ largeListViewVector.allocateNew();
+
+ // Initialize the child vector using `initializeChildrenFromFields`
method.
+
+ FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true),
null, null);
+ Field field = new Field("child-vector", fieldType, null);
+
largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+ // Set values in the child vector.
+ FieldVector fieldVector = largeListViewVector.getDataVector();
+ fieldVector.clear();
+
+ IntVector childVector = (IntVector) fieldVector;
+
+ childVector.allocateNew(8);
+
+ childVector.set(0, 0);
+ childVector.set(1, 1);
+ childVector.set(2, 2);
+ childVector.set(3, 3);
+ childVector.set(4, 4);
+ childVector.set(5, 5);
+ childVector.set(6, 6);
+ childVector.set(7, 7);
+
+ childVector.setValueCount(8);
+
+ // Set validity, offset and size buffers using `setValidity`,
+ // `setOffset` and `setSize` methods.
+ largeListViewVector.setValidity(0, 1);
+ largeListViewVector.setValidity(1, 1);
+
+ largeListViewVector.setOffset(0, 0);
+ largeListViewVector.setOffset(1, 2);
+
+ largeListViewVector.setSize(0, 4);
+ largeListViewVector.setSize(1, 1);
+
+ assertEquals(8, largeListViewVector.getDataVector().getValueCount());
+
+ largeListViewVector.setValueCount(2);
+ assertEquals(4, largeListViewVector.getDataVector().getValueCount());
+
+ IntVector childVector1 = (IntVector) largeListViewVector.getDataVector();
+ final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+ final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+ // yet the underneath buffer contains the original buffer
+ for (int i = 0; i < validityBuffer.capacity(); i++) {
+ assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+ }
+ }
+ }
+
+ @Test
+ public void testRangeChildVector2() {
+ /*
+ * Overlapping ranges
+ * offsets: [0, 2]
+ * sizes: [3, 1]
+ * values: [0, 1, 2, 3]
+ *
+ * vector: [[1, 2, 3], [2]]
+ * */
+ try (LargeListViewVector largeListViewVector =
+ LargeListViewVector.empty("largelistview", allocator)) {
+ // Allocate buffers in listViewVector by calling `allocateNew` method.
+ largeListViewVector.allocateNew();
+
+ // Initialize the child vector using `initializeChildrenFromFields`
method.
+
+ FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true),
null, null);
+ Field field = new Field("child-vector", fieldType, null);
+
largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+ // Set values in the child vector.
+ FieldVector fieldVector = largeListViewVector.getDataVector();
+ fieldVector.clear();
+
+ IntVector childVector = (IntVector) fieldVector;
+
+ childVector.allocateNew(8);
+
+ childVector.set(0, 0);
+ childVector.set(1, 1);
+ childVector.set(2, 2);
+ childVector.set(3, 3);
+ childVector.set(4, 4);
+ childVector.set(5, 5);
+ childVector.set(6, 6);
+ childVector.set(7, 7);
+
+ childVector.setValueCount(8);
+
+ // Set validity, offset and size buffers using `setValidity`,
+ // `setOffset` and `setSize` methods.
+ largeListViewVector.setValidity(0, 1);
+ largeListViewVector.setValidity(1, 1);
+
+ largeListViewVector.setOffset(0, 1);
+ largeListViewVector.setOffset(1, 2);
+
+ largeListViewVector.setSize(0, 3);
+ largeListViewVector.setSize(1, 1);
+
+ assertEquals(8, largeListViewVector.getDataVector().getValueCount());
+
+ largeListViewVector.setValueCount(2);
+ assertEquals(4, largeListViewVector.getDataVector().getValueCount());
+
+ IntVector childVector1 = (IntVector) largeListViewVector.getDataVector();
+ final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+ final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+ // yet the underneath buffer contains the original buffer
+ for (int i = 0; i < validityBuffer.capacity(); i++) {
+ assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+ }
+ }
+ }
+
private void writeIntValues(UnionLargeListViewWriter writer, int[] values) {
writer.startListView();
for (int v : values) {
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
index 4fa808c18a..639585fc48 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
@@ -2084,6 +2084,138 @@ public class TestListViewVector {
}
}
+ @Test
+ public void testRangeChildVector1() {
+ /*
+ * Non-overlapping ranges
+ * offsets: [0, 2]
+ * sizes: [4, 1]
+ * values: [0, 1, 2, 3]
+ *
+ * vector: [[0, 1, 2, 3], [2]]
+ * */
+ try (ListViewVector listViewVector = ListViewVector.empty("listview",
allocator)) {
+ // Allocate buffers in listViewVector by calling `allocateNew` method.
+ listViewVector.allocateNew();
+
+ // Initialize the child vector using `initializeChildrenFromFields`
method.
+
+ FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true),
null, null);
+ Field field = new Field("child-vector", fieldType, null);
+
listViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+ // Set values in the child vector.
+ FieldVector fieldVector = listViewVector.getDataVector();
+ fieldVector.clear();
+
+ IntVector childVector = (IntVector) fieldVector;
+
+ childVector.allocateNew(8);
+
+ childVector.set(0, 0);
+ childVector.set(1, 1);
+ childVector.set(2, 2);
+ childVector.set(3, 3);
+ childVector.set(4, 4);
+ childVector.set(5, 5);
+ childVector.set(6, 6);
+ childVector.set(7, 7);
+
+ childVector.setValueCount(8);
+
+ // Set validity, offset and size buffers using `setValidity`,
+ // `setOffset` and `setSize` methods.
+ listViewVector.setValidity(0, 1);
+ listViewVector.setValidity(1, 1);
+
+ listViewVector.setOffset(0, 0);
+ listViewVector.setOffset(1, 2);
+
+ listViewVector.setSize(0, 4);
+ listViewVector.setSize(1, 1);
+
+ assertEquals(8, listViewVector.getDataVector().getValueCount());
+
+ listViewVector.setValueCount(2);
+ assertEquals(4, listViewVector.getDataVector().getValueCount());
+
+ IntVector childVector1 = (IntVector) listViewVector.getDataVector();
+ final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+ final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+ // yet the underneath buffer contains the original buffer
+ for (int i = 0; i < validityBuffer.capacity(); i++) {
+ assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+ }
+ }
+ }
+
+ @Test
+ public void testRangeChildVector2() {
+ /*
+ * Overlapping ranges
+ * offsets: [0, 2]
+ * sizes: [3, 1]
+ * values: [0, 1, 2, 3]
+ *
+ * vector: [[1, 2, 3], [2]]
+ * */
+ try (ListViewVector listViewVector = ListViewVector.empty("listview",
allocator)) {
+ // Allocate buffers in listViewVector by calling `allocateNew` method.
+ listViewVector.allocateNew();
+
+ // Initialize the child vector using `initializeChildrenFromFields`
method.
+
+ FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true),
null, null);
+ Field field = new Field("child-vector", fieldType, null);
+
listViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+ // Set values in the child vector.
+ FieldVector fieldVector = listViewVector.getDataVector();
+ fieldVector.clear();
+
+ IntVector childVector = (IntVector) fieldVector;
+
+ childVector.allocateNew(8);
+
+ childVector.set(0, 0);
+ childVector.set(1, 1);
+ childVector.set(2, 2);
+ childVector.set(3, 3);
+ childVector.set(4, 4);
+ childVector.set(5, 5);
+ childVector.set(6, 6);
+ childVector.set(7, 7);
+
+ childVector.setValueCount(8);
+
+ // Set validity, offset and size buffers using `setValidity`,
+ // `setOffset` and `setSize` methods.
+ listViewVector.setValidity(0, 1);
+ listViewVector.setValidity(1, 1);
+
+ listViewVector.setOffset(0, 1);
+ listViewVector.setOffset(1, 2);
+
+ listViewVector.setSize(0, 3);
+ listViewVector.setSize(1, 1);
+
+ assertEquals(8, listViewVector.getDataVector().getValueCount());
+
+ listViewVector.setValueCount(2);
+ assertEquals(4, listViewVector.getDataVector().getValueCount());
+
+ IntVector childVector1 = (IntVector) listViewVector.getDataVector();
+ final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+ final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+ // yet the underneath buffer contains the original buffer
+ for (int i = 0; i < validityBuffer.capacity(); i++) {
+ assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+ }
+ }
+ }
+
private void writeIntValues(UnionListViewWriter writer, int[] values) {
writer.startListView();
for (int v : values) {
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
index 69e16dc470..afbc30f019 100644
---
a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
+++
b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
@@ -60,10 +60,12 @@ import org.apache.arrow.vector.UInt8Vector;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VariableWidthFieldVector;
+import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector;
import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.StructVector;
@@ -760,4 +762,36 @@ public class ValueVectorDataPopulator {
dataVector.setValueCount(curPos);
vector.setValueCount(values.length);
}
+
+ /** Populate values for {@link ListViewVector}. */
+ public static void setVector(LargeListViewVector vector, List<Integer>...
values) {
+ vector.allocateNewSafe();
+ Types.MinorType type = Types.MinorType.INT;
+ vector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ IntVector dataVector = (IntVector) vector.getDataVector();
+ dataVector.allocateNew();
+
+ // set underlying vectors
+ int curPos = 0;
+ for (int i = 0; i < values.length; i++) {
+ vector
+ .getOffsetBuffer()
+ .setInt((long) i * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH,
curPos);
+ if (values[i] == null) {
+ BitVectorHelper.unsetBit(vector.getValidityBuffer(), i);
+ } else {
+ BitVectorHelper.setBit(vector.getValidityBuffer(), i);
+ for (int value : values[i]) {
+ dataVector.setSafe(curPos, value);
+ curPos += 1;
+ }
+ }
+ vector
+ .getSizeBuffer()
+ .setInt((long) i * BaseRepeatedValueViewVector.SIZE_WIDTH,
values[i].size());
+ }
+ dataVector.setValueCount(curPos);
+ vector.setValueCount(values.length);
+ }
}