This is an automated email from the ASF dual-hosted git repository.
kfaraz pushed a commit to branch 24.0.1
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/24.0.1 by this push:
new 7b9dd1c75e fix nested column range index range computation (#13297)
(#13300)
7b9dd1c75e is described below
commit 7b9dd1c75e44554588dc4b2dea9576b6b9d1096d
Author: Clint Wylie <[email protected]>
AuthorDate: Thu Nov 3 01:50:29 2022 -0700
fix nested column range index range computation (#13297) (#13300)
* fix nested column range index range computation
* simplify, add missing bounds check for FixedIndexed
---
.../apache/druid/segment/data/FixedIndexed.java | 7 ++
.../NestedFieldLiteralColumnIndexSupplier.java | 36 +++---
.../druid/segment/data/FixedIndexedTest.java | 3 +
.../NestedFieldLiteralColumnIndexSupplierTest.java | 130 ++++++++++++++++++++-
4 files changed, 160 insertions(+), 16 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/segment/data/FixedIndexed.java
b/processing/src/main/java/org/apache/druid/segment/data/FixedIndexed.java
index 10a29dff91..5275f19d0c 100644
--- a/processing/src/main/java/org/apache/druid/segment/data/FixedIndexed.java
+++ b/processing/src/main/java/org/apache/druid/segment/data/FixedIndexed.java
@@ -21,6 +21,7 @@ package org.apache.druid.segment.data;
import com.google.common.base.Preconditions;
import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.TypeStrategy;
@@ -112,6 +113,12 @@ public class FixedIndexed<T> implements Indexed<T>
@Override
public T get(int index)
{
+ if (index < 0) {
+ throw new IAE("Index[%s] < 0", index);
+ }
+ if (index >= size) {
+ throw new IAE("Index[%d] >= size[%d]", index, size);
+ }
if (hasNull) {
if (index == 0) {
return null;
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplier.java
b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplier.java
index d3030c66f2..df2d064f6d 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplier.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplier.java
@@ -186,6 +186,7 @@ public class NestedFieldLiteralColumnIndexSupplier
implements ColumnIndexSupplie
{
int globalStartIndex, globalEndIndex;
int localStartIndex, localEndIndex;
+ // start with standard range finding in global value dictionary
if (startValue == null) {
globalStartIndex = adjust == 0 ? 1 : adjust; // global index 0 is always
the null value
} else {
@@ -196,15 +197,6 @@ public class NestedFieldLiteralColumnIndexSupplier
implements ColumnIndexSupplie
globalStartIndex = adjust + (-(found + 1));
}
}
- // with starting global index settled, now lets find starting local index
- int localFound = dictionary.indexOf(globalStartIndex);
- if (localFound < 0) {
- // the first valid global index is not within the local dictionary, so
the insertion point is where we begin
- localStartIndex = -(localFound + 1);
- } else {
- // valid global index in local dictionary, start here
- localStartIndex = localFound;
- }
if (endValue == null) {
globalEndIndex = globalDictionary.size() + adjust;
@@ -217,16 +209,32 @@ public class NestedFieldLiteralColumnIndexSupplier
implements ColumnIndexSupplie
}
}
globalEndIndex = Math.max(globalStartIndex, globalEndIndex);
- // end index is not inclusive, so we find the last value in the local
dictionary that falls within the range
- int localEndFound = dictionary.indexOf(globalEndIndex - 1);
+
+ if (globalStartIndex == globalEndIndex) {
+ return new IntIntImmutablePair(0, 0);
+ }
+
+ // with global dictionary id range settled, now lets map that onto a local
dictionary id range
+ int localFound = dictionary.indexOf(globalStartIndex);
+ if (localFound < 0) {
+ // the first valid global index is not within the local dictionary, so
the insertion point is where we begin
+ localStartIndex = -(localFound + 1);
+ } else {
+ // valid global index in local dictionary, start here
+ localStartIndex = localFound;
+ }
+ // global end index is exclusive already, so we don't adjust local end
index even for missing values
+ int localEndFound = dictionary.indexOf(globalEndIndex);
if (localEndFound < 0) {
localEndIndex = -localEndFound;
} else {
- // add 1 because the last valid global end value is in the local
dictionary, and end index is exclusive
- localEndIndex = localEndFound + 1;
+ localEndIndex = localEndFound;
}
- return new IntIntImmutablePair(localStartIndex,
Math.min(dictionary.size(), localEndIndex));
+ localStartIndex = Math.min(localStartIndex, dictionary.size());
+ localEndIndex = Math.max(localStartIndex, Math.min(dictionary.size(),
localEndIndex));
+
+ return new IntIntImmutablePair(localStartIndex, localEndIndex);
}
diff --git
a/processing/src/test/java/org/apache/druid/segment/data/FixedIndexedTest.java
b/processing/src/test/java/org/apache/druid/segment/data/FixedIndexedTest.java
index dcfc5c058e..d175c7cf7d 100644
---
a/processing/src/test/java/org/apache/druid/segment/data/FixedIndexedTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/data/FixedIndexedTest.java
@@ -74,6 +74,9 @@ public class FixedIndexedTest extends
InitializedNullHandlingTest
Assert.assertEquals(LONGS[i], fixedIndexed.get(i));
Assert.assertEquals(i, fixedIndexed.indexOf(LONGS[i]));
}
+
+ Assert.assertThrows(IllegalArgumentException.class, () ->
fixedIndexed.get(-1));
+ Assert.assertThrows(IllegalArgumentException.class, () ->
fixedIndexed.get(LONGS.length));
}
@Test
diff --git
a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java
b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java
index 850baf4412..8931746102 100644
---
a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java
@@ -22,6 +22,7 @@ package org.apache.druid.segment.nested;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.bitmap.MutableBitmap;
+import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.query.DefaultBitmapResultFactory;
import org.apache.druid.query.filter.DruidPredicateFactory;
@@ -34,6 +35,7 @@ import org.apache.druid.segment.column.DruidPredicateIndex;
import org.apache.druid.segment.column.LexicographicalRangeIndex;
import org.apache.druid.segment.column.NullValueIndex;
import org.apache.druid.segment.column.NumericRangeIndex;
+import org.apache.druid.segment.column.SpatialIndex;
import org.apache.druid.segment.column.StringValueSetIndex;
import org.apache.druid.segment.column.TypeStrategies;
import org.apache.druid.segment.data.BitmapSerdeFactory;
@@ -136,6 +138,9 @@ public class NestedFieldLiteralColumnIndexSupplierTest
extends InitializedNullHa
NullValueIndex nullIndex = indexSupplier.as(NullValueIndex.class);
Assert.assertNotNull(nullIndex);
+ // sanity check to make sure we don't return indexes we don't support
+ Assert.assertNull(indexSupplier.as(SpatialIndex.class));
+
// 10 rows
// local: [b, foo, fooo, z]
// column: [foo, b, fooo, b, z, fooo, z, b, b, foo]
@@ -497,6 +502,9 @@ public class NestedFieldLiteralColumnIndexSupplierTest
extends InitializedNullHa
StringValueSetIndex valueSetIndex =
indexSupplier.as(StringValueSetIndex.class);
Assert.assertNotNull(valueSetIndex);
+ // sanity check to make sure we don't return indexes we don't support
+ Assert.assertNull(indexSupplier.as(SpatialIndex.class));
+
// 10 rows
// local: [1, 3, 100, 300]
// column: [100, 1, 300, 1, 3, 3, 100, 300, 300, 1]
@@ -611,6 +619,25 @@ public class NestedFieldLiteralColumnIndexSupplierTest
extends InitializedNullHa
Assert.assertEquals(0.5, columnIndex.estimateSelectivity(10), 0.0);
bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 1, 3, 4, 7, 9);
+
+ // set index with null
+ TreeSet<String> treeSet = new TreeSet<>(Comparators.naturalNullsFirst());
+ treeSet.add(null);
+ treeSet.add("1");
+ treeSet.add("3");
+ treeSet.add("300");
+ columnIndex = valueSetIndex.forSortedValues(treeSet);
+ Assert.assertNotNull(columnIndex);
+ Assert.assertEquals(0.8, columnIndex.estimateSelectivity(10), 0.0);
+ bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+ checkBitmap(bitmap, 1, 2, 3, 4, 5, 7, 8, 9);
+
+ // null value should really use NullValueIndex, but this works for classic
reasons
+ columnIndex = valueSetIndex.forValue(null);
+ Assert.assertNotNull(columnIndex);
+ Assert.assertEquals(0.3, columnIndex.estimateSelectivity(10), 0.0);
+ bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+ checkBitmap(bitmap, 2, 5, 8);
}
@Test
@@ -674,6 +701,9 @@ public class NestedFieldLiteralColumnIndexSupplierTest
extends InitializedNullHa
StringValueSetIndex valueSetIndex =
indexSupplier.as(StringValueSetIndex.class);
Assert.assertNotNull(valueSetIndex);
+ // sanity check to make sure we don't return indexes we don't support
+ Assert.assertNull(indexSupplier.as(SpatialIndex.class));
+
// 10 rows
// local: [1.1, 1.2, 3.3, 6.6]
// column: [1.1, 1.1, 1.2, 3.3, 1.2, 6.6, 3.3, 1.2, 1.1, 3.3]
@@ -720,10 +750,10 @@ public class NestedFieldLiteralColumnIndexSupplierTest
extends InitializedNullHa
forRange = rangeIndex.forRange(1.1, true, 3.3, true);
Assert.assertNotNull(forRange);
- Assert.assertEquals(0.6, forRange.estimateSelectivity(10), 0.0);
+ Assert.assertEquals(0.3, forRange.estimateSelectivity(10), 0.0);
bitmap = forRange.computeBitmapResult(bitmapResultFactory);
- checkBitmap(bitmap, 2, 3, 4, 6, 7, 9);
+ checkBitmap(bitmap, 2, 4, 7);
forRange = rangeIndex.forRange(null, true, null, true);
Assert.assertEquals(1.0, forRange.estimateSelectivity(10), 0.0);
@@ -734,6 +764,55 @@ public class NestedFieldLiteralColumnIndexSupplierTest
extends InitializedNullHa
Assert.assertEquals(1.0, forRange.estimateSelectivity(10), 0.0);
bitmap = forRange.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+
+ forRange = rangeIndex.forRange(1.111, true, 1.19, true);
+ Assert.assertNotNull(forRange);
+ Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
+
+ bitmap = forRange.computeBitmapResult(bitmapResultFactory);
+ checkBitmap(bitmap);
+
+ forRange = rangeIndex.forRange(1.01, true, 1.09, true);
+ Assert.assertNotNull(forRange);
+ Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
+
+ bitmap = forRange.computeBitmapResult(bitmapResultFactory);
+ checkBitmap(bitmap);
+
+ forRange = rangeIndex.forRange(0.05, true, 0.98, true);
+ Assert.assertNotNull(forRange);
+ Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
+
+ bitmap = forRange.computeBitmapResult(bitmapResultFactory);
+ checkBitmap(bitmap);
+
+ forRange = rangeIndex.forRange(0.05, true, 1.1, true);
+ Assert.assertNotNull(forRange);
+ Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
+
+ bitmap = forRange.computeBitmapResult(bitmapResultFactory);
+ checkBitmap(bitmap);
+
+ forRange = rangeIndex.forRange(8.99, true, 10.10, true);
+ Assert.assertNotNull(forRange);
+ Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
+
+ bitmap = forRange.computeBitmapResult(bitmapResultFactory);
+ checkBitmap(bitmap);
+
+ forRange = rangeIndex.forRange(8.99, true, 10.10, true);
+ Assert.assertNotNull(forRange);
+ Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
+
+ bitmap = forRange.computeBitmapResult(bitmapResultFactory);
+ checkBitmap(bitmap);
+
+ forRange = rangeIndex.forRange(10.00, true, 10.10, true);
+ Assert.assertNotNull(forRange);
+ Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
+
+ bitmap = forRange.computeBitmapResult(bitmapResultFactory);
+ checkBitmap(bitmap);
}
@Test
@@ -802,6 +881,25 @@ public class NestedFieldLiteralColumnIndexSupplierTest
extends InitializedNullHa
Assert.assertEquals(0.4, columnIndex.estimateSelectivity(10), 0.0);
bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 2, 4, 7, 9);
+
+ // set index with null
+ TreeSet<String> treeSet = new TreeSet<>(Comparators.naturalNullsFirst());
+ treeSet.add(null);
+ treeSet.add("1.2");
+ treeSet.add("3.3");
+ treeSet.add("7.7");
+ columnIndex = valueSetIndex.forSortedValues(treeSet);
+ Assert.assertNotNull(columnIndex);
+ Assert.assertEquals(0.7, columnIndex.estimateSelectivity(10), 0.0);
+ bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+ checkBitmap(bitmap, 1, 2, 3, 4, 6, 7, 9);
+
+ // null value should really use NullValueIndex, but this works for classic
reasons
+ columnIndex = valueSetIndex.forValue(null);
+ Assert.assertNotNull(columnIndex);
+ Assert.assertEquals(0.3, columnIndex.estimateSelectivity(10), 0.0);
+ bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+ checkBitmap(bitmap, 1, 3, 6);
}
@Test
@@ -865,6 +963,9 @@ public class NestedFieldLiteralColumnIndexSupplierTest
extends InitializedNullHa
NullValueIndex nullIndex = indexSupplier.as(NullValueIndex.class);
Assert.assertNotNull(nullIndex);
+ // sanity check to make sure we don't return indexes we don't support
+ Assert.assertNull(indexSupplier.as(SpatialIndex.class));
+
// 10 rows
// local: [null, b, z, 1, 300, 1.1, 9.9]
// column: [1, b, null, 9.9, 300, 1, z, null, 1.1, b]
@@ -912,6 +1013,26 @@ public class NestedFieldLiteralColumnIndexSupplierTest
extends InitializedNullHa
Assert.assertEquals(0.4, columnIndex.estimateSelectivity(10), 0.0);
bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 1, 3, 4, 9);
+
+ // set index with null
+ TreeSet<String> treeSet = new TreeSet<>(Comparators.naturalNullsFirst());
+ treeSet.add(null);
+ treeSet.add("b");
+ treeSet.add("300");
+ treeSet.add("9.9");
+ treeSet.add("1.6");
+ columnIndex = valueSetIndex.forSortedValues(treeSet);
+ Assert.assertNotNull(columnIndex);
+ Assert.assertEquals(0.6, columnIndex.estimateSelectivity(10), 0.0);
+ bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+ checkBitmap(bitmap, 1, 2, 3, 4, 7, 9);
+
+ // null value should really use NullValueIndex, but this works for classic
reasons
+ columnIndex = valueSetIndex.forValue(null);
+ Assert.assertNotNull(columnIndex);
+ Assert.assertEquals(0.2, columnIndex.estimateSelectivity(10), 0.0);
+ bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+ checkBitmap(bitmap, 2, 7);
}
@Test
@@ -969,6 +1090,11 @@ public class NestedFieldLiteralColumnIndexSupplierTest
extends InitializedNullHa
Assert.assertEquals("300", lowLevelIndex.getValue(4));
Assert.assertEquals("1.1", lowLevelIndex.getValue(5));
Assert.assertEquals("9.9", lowLevelIndex.getValue(6));
+
+ Assert.assertEquals(7, lowLevelIndex.getCardinality());
+ checkBitmap(lowLevelIndex.getBitmap(0), 2, 7);
+ checkBitmap(lowLevelIndex.getBitmap(1), 1, 9);
+ checkBitmap(lowLevelIndex.getBitmap(-1));
}
private NestedFieldLiteralColumnIndexSupplier makeSingleTypeStringSupplier()
throws IOException
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]