This is an automated email from the ASF dual-hosted git repository.
jbonofre pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-java.git
The following commit(s) were added to refs/heads/main by this push:
new 9fd4973d1 GH-470: [Vector] Fix
ListViewVector.getElementEndIndex(index) method (#1019)
9fd4973d1 is described below
commit 9fd4973d12abbd9e9d14098c2acd41d97a6c4407
Author: Aleksei Starikov <[email protected]>
AuthorDate: Tue Feb 17 14:15:00 2026 +0100
GH-470: [Vector] Fix ListViewVector.getElementEndIndex(index) method (#1019)
## What's Changed
[First
commit](https://github.com/apache/arrow-java/commit/a758cadb17c3d50c08a139a3e2ddced71215ccf5)
changes logic:
- The PR fixes a bug in the `ListViewVector.getElementEndIndex(index)`
method .
Before:
```
public int getElementEndIndex(int index) {
return sizeBuffer.getInt(index * OFFSET_WIDTH);
}
```
After:
```
public int getElementEndIndex(int index) {
return offsetBuffer.getInt(index * OFFSET_WIDTH) +
sizeBuffer.getInt(index * SIZE_WIDTH);
}
```
[Second
commit](https://github.com/apache/arrow-java/commit/aec29750202ff1aac73e7fdc9c2020b3dcf72696)
doesn't change logic:
- Fixes a bug of usage `sizeBuffer` with `OFFSET_WIDTH` (`hashCode`
method) and `offsetBuffer` with `SIZE_WIDTH` (`setSize` method). It
doesn't introduce real changes in the logic as `OFFSET_WIDTH` ==
`SIZE_WIDTH` == 4
- Plus small refactoring of ListViewVector to avoid code duplication and
similar issues in the future.
---
It's a bug fix.
---
Closes #470.
---
.../arrow/vector/complex/ListViewVector.java | 68 ++++---
.../apache/arrow/vector/TestListViewVector.java | 215 ++++++++++++++++-----
2 files changed, 204 insertions(+), 79 deletions(-)
diff --git
a/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
b/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
index 8711db5e0..d41f61e29 100644
--- a/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
+++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
@@ -226,8 +226,8 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
sizeBuffer.writerIndex(0);
} else {
validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount));
- offsetBuffer.writerIndex(valueCount * OFFSET_WIDTH);
- sizeBuffer.writerIndex(valueCount * SIZE_WIDTH);
+ offsetBuffer.writerIndex((long) valueCount * OFFSET_WIDTH);
+ sizeBuffer.writerIndex((long) valueCount * SIZE_WIDTH);
}
}
@@ -445,14 +445,22 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
return ArrowBufPointer.NULL_HASH_CODE;
}
int hash = 0;
- final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
- final int end = sizeBuffer.getInt(index * OFFSET_WIDTH);
+ final int start = getElementStartIndex(index);
+ final int end = getElementEndIndex(index);
for (int i = start; i < end; i++) {
hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(i, hasher));
}
return hash;
}
+ private void setElementOffsetBuffer(int index, int value) {
+ offsetBuffer.setInt((long) index * OFFSET_WIDTH, value);
+ }
+
+ private void setElementSizeBuffer(int index, int value) {
+ sizeBuffer.setInt((long) index * SIZE_WIDTH, value);
+ }
+
private class TransferImpl implements TransferPair {
ListViewVector to;
@@ -498,7 +506,6 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
valueCount);
to.clear();
if (length > 0) {
- final int startPoint = offsetBuffer.getInt((long) startIndex *
OFFSET_WIDTH);
// we have to scan by index since there are out-of-order offsets
to.offsetBuffer = to.allocateBuffers((long) length * OFFSET_WIDTH);
to.sizeBuffer = to.allocateBuffers((long) length * SIZE_WIDTH);
@@ -507,9 +514,9 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
int maxOffsetAndSizeSum = -1;
int minOffsetValue = -1;
for (int i = 0; i < length; i++) {
- final int offsetValue = offsetBuffer.getInt((long) (startIndex + i)
* OFFSET_WIDTH);
- final int sizeValue = sizeBuffer.getInt((long) (startIndex + i) *
SIZE_WIDTH);
- to.sizeBuffer.setInt((long) i * SIZE_WIDTH, sizeValue);
+ final int offsetValue = getElementStartIndex(startIndex + i);
+ final int sizeValue = getElementSize(startIndex + i);
+ to.setElementSizeBuffer(i, sizeValue);
if (maxOffsetAndSizeSum < offsetValue + sizeValue) {
maxOffsetAndSizeSum = offsetValue + sizeValue;
}
@@ -520,9 +527,9 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
/* splitAndTransfer the offset buffer */
for (int i = 0; i < length; i++) {
- final int offsetValue = offsetBuffer.getInt((long) (startIndex + i)
* OFFSET_WIDTH);
+ final int offsetValue = getElementStartIndex(startIndex + i);
final int relativeOffset = offsetValue - minOffsetValue;
- to.offsetBuffer.setInt((long) i * OFFSET_WIDTH, relativeOffset);
+ to.setElementOffsetBuffer(i, relativeOffset);
}
/* splitAndTransfer the validity buffer */
@@ -678,8 +685,8 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
if (isSet(index) == 0) {
return null;
}
- final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
- final int end = start + sizeBuffer.getInt((index) * SIZE_WIDTH);
+ final int start = getElementStartIndex(index);
+ final int end = getElementEndIndex(index);
final ValueVector vv = getDataVector();
final List<Object> vals = new JsonStringArrayList<>(end - start);
for (int i = start; i < end; i++) {
@@ -711,7 +718,7 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
if (isNull(index)) {
return true;
} else {
- return sizeBuffer.getInt(index * SIZE_WIDTH) == 0;
+ return getElementSize(index) == 0;
}
}
@@ -722,10 +729,7 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
* @return 1 if element at given index is not null, 0 otherwise
*/
public int isSet(int index) {
- final int byteIndex = index >> 3;
- final byte b = validityBuffer.getByte(byteIndex);
- final int bitIndex = index & 7;
- return (b >> bitIndex) & 0x01;
+ return BitVectorHelper.get(validityBuffer, index);
}
/**
@@ -775,8 +779,8 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
reallocValidityAndSizeAndOffsetBuffers();
}
- offsetBuffer.setInt(index * OFFSET_WIDTH, 0);
- sizeBuffer.setInt(index * SIZE_WIDTH, 0);
+ setElementOffsetBuffer(index, 0);
+ setElementSizeBuffer(index, 0);
BitVectorHelper.unsetBit(validityBuffer, index);
}
@@ -794,11 +798,11 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
if (index > 0) {
final int prevOffset = getMaxViewEndChildVectorByIndex(index);
- offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset);
+ setElementOffsetBuffer(index, prevOffset);
}
BitVectorHelper.setBit(validityBuffer, index);
- return offsetBuffer.getInt(index * OFFSET_WIDTH);
+ return getElementStartIndex(index);
}
/**
@@ -836,9 +840,9 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
* @param value value to set
*/
public void setOffset(int index, int value) {
- validateInvariants(value, sizeBuffer.getInt(index * SIZE_WIDTH));
+ validateInvariants(value, getElementSize(index));
- offsetBuffer.setInt(index * OFFSET_WIDTH, value);
+ setElementOffsetBuffer(index, value);
}
/**
@@ -848,9 +852,9 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
* @param value value to set
*/
public void setSize(int index, int value) {
- validateInvariants(offsetBuffer.getInt(index * SIZE_WIDTH), value);
+ validateInvariants(getElementStartIndex(index), value);
- sizeBuffer.setInt(index * SIZE_WIDTH, value);
+ setElementSizeBuffer(index, value);
}
/**
@@ -886,12 +890,16 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
@Override
public int getElementStartIndex(int index) {
- return offsetBuffer.getInt(index * OFFSET_WIDTH);
+ return offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ }
+
+ private int getElementSize(int index) {
+ return sizeBuffer.getInt((long) index * SIZE_WIDTH);
}
@Override
public int getElementEndIndex(int index) {
- return sizeBuffer.getInt(index * OFFSET_WIDTH);
+ return getElementStartIndex(index) + getElementSize(index);
}
@Override
@@ -948,8 +956,8 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
@Override
public void validate() {
for (int i = 0; i < valueCount; i++) {
- final int offset = offsetBuffer.getInt(i * OFFSET_WIDTH);
- final int size = sizeBuffer.getInt(i * SIZE_WIDTH);
+ final int offset = getElementStartIndex(i);
+ final int size = getElementSize(i);
validateInvariants(offset, size);
}
}
@@ -961,6 +969,6 @@ public class ListViewVector extends
BaseRepeatedValueViewVector
* @param size number of elements in the list that was written
*/
public void endValue(int index, int size) {
- sizeBuffer.setInt(index * SIZE_WIDTH, size);
+ setElementSizeBuffer(index, size);
}
}
diff --git
a/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
b/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
index 2f282e198..8ab0edb14 100644
--- a/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
+++ b/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
@@ -1550,55 +1550,7 @@ public class TestListViewVector {
public void testOutOfOrderOffset1() {
// [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]]
try (ListViewVector listViewVector = ListViewVector.empty("listview",
allocator)) {
- // Allocate buffers in listViewVector by calling `allocateNew` method.
- listViewVector.allocateNew();
-
- // Initialize the child vector using `initializeChildrenFromFields`
method.
-
- FieldType fieldType = new FieldType(true, new ArrowType.Int(16, true),
null, null);
- Field field = new Field("child-vector", fieldType, null);
-
listViewVector.initializeChildrenFromFields(Collections.singletonList(field));
-
- // Set values in the child vector.
- FieldVector fieldVector = listViewVector.getDataVector();
- fieldVector.clear();
-
- SmallIntVector childVector = (SmallIntVector) fieldVector;
-
- childVector.allocateNew(7);
-
- childVector.set(0, 0);
- childVector.set(1, -127);
- childVector.set(2, 127);
- childVector.set(3, 50);
- childVector.set(4, 12);
- childVector.set(5, -7);
- childVector.set(6, 25);
-
- childVector.setValueCount(7);
-
- // Set validity, offset and size buffers using `setValidity`,
- // `setOffset` and `setSize` methods.
- listViewVector.setValidity(0, 1);
- listViewVector.setValidity(1, 0);
- listViewVector.setValidity(2, 1);
- listViewVector.setValidity(3, 1);
- listViewVector.setValidity(4, 1);
-
- listViewVector.setOffset(0, 4);
- listViewVector.setOffset(1, 7);
- listViewVector.setOffset(2, 0);
- listViewVector.setOffset(3, 0);
- listViewVector.setOffset(4, 3);
-
- listViewVector.setSize(0, 3);
- listViewVector.setSize(1, 0);
- listViewVector.setSize(2, 4);
- listViewVector.setSize(3, 0);
- listViewVector.setSize(4, 2);
-
- // Set value count using `setValueCount` method.
- listViewVector.setValueCount(5);
+ initializeListViewVectorAsInSpecification(listViewVector);
final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer();
final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer();
@@ -2217,6 +2169,105 @@ public class TestListViewVector {
}
}
+ @Test
+ public void
testGetElementStartIndexAndEndIndexOrderedOffsetsNoIntersection() {
+ /*
+ values = [10, 20, 30, 40, 50]
+ offsets = [0, 3]
+ sizes = [3, 2]
+ vector: [[10, 20, 30], [40, 50]]
+ */
+ try (ListViewVector listViewVector = ListViewVector.empty("sourceVector",
allocator)) {
+ initializeListViewVector(
+ listViewVector, List.of(10, 20, 30, 40, 50), List.of(1, 1),
List.of(0, 3), List.of(3, 2));
+
+ assertEquals(0, listViewVector.getElementStartIndex(0));
+ assertEquals(3, listViewVector.getElementEndIndex(0));
+ assertEquals(3, listViewVector.getElementStartIndex(1));
+ assertEquals(5, listViewVector.getElementEndIndex(1));
+
+ final FieldVector dataVec = listViewVector.getDataVector();
+ int elemIndex = 0;
+ int start = listViewVector.getElementStartIndex(elemIndex);
+ int end = listViewVector.getElementEndIndex(elemIndex);
+ List<?> list = listViewVector.getObject(elemIndex);
+ assertEquals(end - start, list.size());
+ for (int j = 0; j < list.size(); j++) {
+ assertEquals(((SmallIntVector) dataVec).get(start + j), list.get(j));
+ }
+ }
+ }
+
+ @Test
+ public void
testGetElementStartIndexAndEndIndexNotOrderedOffsetsNoIntersection() {
+ /*
+ values = [1, 2, 3, 4, 5, 6]
+ validity = [1, 1, 1]
+ offsets = [4, 2, 0]
+ sizes = [2, 2, 2]
+ vector: [[5, 6], [3, 4], [1, 2]]
+ */
+ try (ListViewVector listViewVector = ListViewVector.empty("sourceVector",
allocator)) {
+ initializeListViewVector(
+ listViewVector,
+ List.of(1, 2, 3, 4, 5, 6),
+ List.of(1, 1, 1),
+ List.of(4, 2, 0),
+ List.of(2, 2, 2));
+
+ assertEquals(4, listViewVector.getElementStartIndex(0));
+ assertEquals(6, listViewVector.getElementEndIndex(0));
+ assertEquals(2, listViewVector.getElementStartIndex(1));
+ assertEquals(4, listViewVector.getElementEndIndex(1));
+ assertEquals(0, listViewVector.getElementStartIndex(2));
+ assertEquals(2, listViewVector.getElementEndIndex(2));
+ }
+ }
+
+ @Test
+ public void
testGetElementStartIndexAndEndIndexOrderedOffsetsWithIntersection() {
+ /*
+ values = [1, 2, 3, 4, 5]
+ validity = [1, 1, 1]
+ offsets = [0, 1, 4]
+ sizes = [2, 3, 1]
+ vector: [[1, 2], [2, 3, 4], [5]]
+ */
+ try (ListViewVector listViewVector = ListViewVector.empty("sourceVector",
allocator)) {
+ initializeListViewVector(
+ listViewVector,
+ List.of(1, 2, 3, 4, 5),
+ List.of(1, 1, 1),
+ List.of(0, 1, 4),
+ List.of(2, 3, 1));
+
+ assertEquals(0, listViewVector.getElementStartIndex(0));
+ assertEquals(2, listViewVector.getElementEndIndex(0));
+ assertEquals(1, listViewVector.getElementStartIndex(1));
+ assertEquals(4, listViewVector.getElementEndIndex(1));
+ assertEquals(4, listViewVector.getElementStartIndex(2));
+ assertEquals(5, listViewVector.getElementEndIndex(2));
+ }
+ }
+
+ @Test
+ public void
testGetElementStartIndexAndEndIndexOrderedOffsetsAsInSpecification() {
+ try (ListViewVector listViewVector = ListViewVector.empty("sourceVector",
allocator)) {
+ initializeListViewVectorAsInSpecification(listViewVector);
+
+ assertEquals(4, listViewVector.getElementStartIndex(0));
+ assertEquals(7, listViewVector.getElementEndIndex(0));
+ assertEquals(7, listViewVector.getElementStartIndex(1));
+ assertEquals(7, listViewVector.getElementEndIndex(1));
+ assertEquals(0, listViewVector.getElementStartIndex(2));
+ assertEquals(4, listViewVector.getElementEndIndex(2));
+ assertEquals(0, listViewVector.getElementStartIndex(3));
+ assertEquals(0, listViewVector.getElementEndIndex(3));
+ assertEquals(3, listViewVector.getElementStartIndex(4));
+ assertEquals(5, listViewVector.getElementEndIndex(4));
+ }
+ }
+
private void writeIntValues(UnionListViewWriter writer, int[] values) {
writer.startListView();
for (int v : values) {
@@ -2224,4 +2275,70 @@ public class TestListViewVector {
}
writer.endListView();
}
+
+ /**
+ * ListViewVector from the <a
+ *
href="https://arrow.apache.org/docs/format/Intro.html#list-view">specification</a>.
+ */
+ private void initializeListViewVectorAsInSpecification(ListViewVector
listViewVector) {
+ /*
+ values = [0, -127, 127, 50, 12, -7, 25]
+ validity = [1, 1, 1, 0, 1] (reversed)
+ offsets = [4, 7, 0, 0, 3]
+ sizes = [3, 0, 4, 0, 2]
+ vector: [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]]
+ */
+ initializeListViewVector(
+ listViewVector,
+ List.of(0, -127, 127, 50, 12, -7, 25),
+ List.of(1, 1, 1, 0, 1),
+ List.of(4, 7, 0, 0, 3),
+ List.of(3, 0, 4, 0, 2));
+ }
+
+ private void initializeListViewVector(
+ ListViewVector listViewVector,
+ List<Integer> values,
+ List<Integer> validity,
+ List<Integer> offsets,
+ List<Integer> sizes) {
+ // Allocate buffers in listViewVector by calling `allocateNew` method.
+ assert offsets.size() == sizes.size();
+ listViewVector.allocateNew();
+
+ // Initialize the child vector using `initializeChildrenFromFields` method.
+ FieldType fieldType = new FieldType(true, new ArrowType.Int(16, true),
null, null);
+ Field field = new Field("child-vector", fieldType, null);
+
listViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+ // Set values in the child vector.
+ FieldVector fieldVector = listViewVector.getDataVector();
+ fieldVector.clear();
+
+ SmallIntVector childVector = (SmallIntVector) fieldVector;
+ childVector.allocateNew(values.size());
+ for (int i = 0; i < values.size(); i++) {
+ childVector.set(i, values.get(i));
+ }
+ childVector.setValueCount(values.size());
+
+ // Set validity, offset and size buffers using `setValidity`,
+ // `setOffset` and `setSize` methods.
+ List<Integer> reversedValidity = new ArrayList<>(validity);
+ Collections.reverse(reversedValidity);
+ for (int i = 0; i < reversedValidity.size(); i++) {
+ listViewVector.setValidity(i, reversedValidity.get(i));
+ }
+
+ for (int i = 0; i < offsets.size(); i++) {
+ listViewVector.setOffset(i, offsets.get(i));
+ }
+
+ for (int i = 0; i < sizes.size(); i++) {
+ listViewVector.setSize(i, sizes.get(i));
+ }
+
+ // Set value count using `setValueCount` method.
+ listViewVector.setValueCount(offsets.size());
+ }
}