This is an automated email from the ASF dual-hosted git repository.

kfaraz pushed a commit to branch 24.0.1
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/24.0.1 by this push:
     new 7b9dd1c75e fix nested column range index range computation (#13297) 
(#13300)
7b9dd1c75e is described below

commit 7b9dd1c75e44554588dc4b2dea9576b6b9d1096d
Author: Clint Wylie <[email protected]>
AuthorDate: Thu Nov 3 01:50:29 2022 -0700

    fix nested column range index range computation (#13297) (#13300)
    
    * fix nested column range index range computation
    
    * simplify, add missing bounds check for FixedIndexed
---
 .../apache/druid/segment/data/FixedIndexed.java    |   7 ++
 .../NestedFieldLiteralColumnIndexSupplier.java     |  36 +++---
 .../druid/segment/data/FixedIndexedTest.java       |   3 +
 .../NestedFieldLiteralColumnIndexSupplierTest.java | 130 ++++++++++++++++++++-
 4 files changed, 160 insertions(+), 16 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/segment/data/FixedIndexed.java 
b/processing/src/main/java/org/apache/druid/segment/data/FixedIndexed.java
index 10a29dff91..5275f19d0c 100644
--- a/processing/src/main/java/org/apache/druid/segment/data/FixedIndexed.java
+++ b/processing/src/main/java/org/apache/druid/segment/data/FixedIndexed.java
@@ -21,6 +21,7 @@ package org.apache.druid.segment.data;
 
 import com.google.common.base.Preconditions;
 import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.IAE;
 import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
 import org.apache.druid.segment.column.TypeStrategy;
 
@@ -112,6 +113,12 @@ public class FixedIndexed<T> implements Indexed<T>
   @Override
   public T get(int index)
   {
+    if (index < 0) {
+      throw new IAE("Index[%s] < 0", index);
+    }
+    if (index >= size) {
+      throw new IAE("Index[%d] >= size[%d]", index, size);
+    }
     if (hasNull) {
       if (index == 0) {
         return null;
diff --git 
a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplier.java
 
b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplier.java
index d3030c66f2..df2d064f6d 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplier.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplier.java
@@ -186,6 +186,7 @@ public class NestedFieldLiteralColumnIndexSupplier 
implements ColumnIndexSupplie
   {
     int globalStartIndex, globalEndIndex;
     int localStartIndex, localEndIndex;
+    // start with standard range finding in global value dictionary
     if (startValue == null) {
       globalStartIndex = adjust == 0 ? 1 : adjust; // global index 0 is always 
the null value
     } else {
@@ -196,15 +197,6 @@ public class NestedFieldLiteralColumnIndexSupplier 
implements ColumnIndexSupplie
         globalStartIndex = adjust + (-(found + 1));
       }
     }
-    // with starting global index settled, now lets find starting local index
-    int localFound = dictionary.indexOf(globalStartIndex);
-    if (localFound < 0) {
-      // the first valid global index is not within the local dictionary, so 
the insertion point is where we begin
-      localStartIndex = -(localFound + 1);
-    } else {
-      // valid global index in local dictionary, start here
-      localStartIndex = localFound;
-    }
 
     if (endValue == null) {
       globalEndIndex = globalDictionary.size() + adjust;
@@ -217,16 +209,32 @@ public class NestedFieldLiteralColumnIndexSupplier 
implements ColumnIndexSupplie
       }
     }
     globalEndIndex = Math.max(globalStartIndex, globalEndIndex);
-    // end index is not inclusive, so we find the last value in the local 
dictionary that falls within the range
-    int localEndFound = dictionary.indexOf(globalEndIndex - 1);
+
+    if (globalStartIndex == globalEndIndex) {
+      return new IntIntImmutablePair(0, 0);
+    }
+
+    // with global dictionary id range settled, now lets map that onto a local 
dictionary id range
+    int localFound = dictionary.indexOf(globalStartIndex);
+    if (localFound < 0) {
+      // the first valid global index is not within the local dictionary, so 
the insertion point is where we begin
+      localStartIndex = -(localFound + 1);
+    } else {
+      // valid global index in local dictionary, start here
+      localStartIndex = localFound;
+    }
+    // global end index is exclusive already, so we don't adjust local end 
index even for missing values
+    int localEndFound = dictionary.indexOf(globalEndIndex);
     if (localEndFound < 0) {
       localEndIndex = -localEndFound;
     } else {
-      // add 1 because the last valid global end value is in the local 
dictionary, and end index is exclusive
-      localEndIndex = localEndFound + 1;
+      localEndIndex = localEndFound;
     }
 
-    return new IntIntImmutablePair(localStartIndex, 
Math.min(dictionary.size(), localEndIndex));
+    localStartIndex = Math.min(localStartIndex, dictionary.size());
+    localEndIndex = Math.max(localStartIndex, Math.min(dictionary.size(), 
localEndIndex));
+
+    return new IntIntImmutablePair(localStartIndex, localEndIndex);
   }
 
 
diff --git 
a/processing/src/test/java/org/apache/druid/segment/data/FixedIndexedTest.java 
b/processing/src/test/java/org/apache/druid/segment/data/FixedIndexedTest.java
index dcfc5c058e..d175c7cf7d 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/data/FixedIndexedTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/data/FixedIndexedTest.java
@@ -74,6 +74,9 @@ public class FixedIndexedTest extends 
InitializedNullHandlingTest
       Assert.assertEquals(LONGS[i], fixedIndexed.get(i));
       Assert.assertEquals(i, fixedIndexed.indexOf(LONGS[i]));
     }
+
+    Assert.assertThrows(IllegalArgumentException.class, () -> 
fixedIndexed.get(-1));
+    Assert.assertThrows(IllegalArgumentException.class, () -> 
fixedIndexed.get(LONGS.length));
   }
 
   @Test
diff --git 
a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java
 
b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java
index 850baf4412..8931746102 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java
@@ -22,6 +22,7 @@ package org.apache.druid.segment.nested;
 import com.google.common.collect.ImmutableSet;
 import org.apache.druid.collections.bitmap.ImmutableBitmap;
 import org.apache.druid.collections.bitmap.MutableBitmap;
+import org.apache.druid.java.util.common.guava.Comparators;
 import org.apache.druid.query.BitmapResultFactory;
 import org.apache.druid.query.DefaultBitmapResultFactory;
 import org.apache.druid.query.filter.DruidPredicateFactory;
@@ -34,6 +35,7 @@ import org.apache.druid.segment.column.DruidPredicateIndex;
 import org.apache.druid.segment.column.LexicographicalRangeIndex;
 import org.apache.druid.segment.column.NullValueIndex;
 import org.apache.druid.segment.column.NumericRangeIndex;
+import org.apache.druid.segment.column.SpatialIndex;
 import org.apache.druid.segment.column.StringValueSetIndex;
 import org.apache.druid.segment.column.TypeStrategies;
 import org.apache.druid.segment.data.BitmapSerdeFactory;
@@ -136,6 +138,9 @@ public class NestedFieldLiteralColumnIndexSupplierTest 
extends InitializedNullHa
     NullValueIndex nullIndex = indexSupplier.as(NullValueIndex.class);
     Assert.assertNotNull(nullIndex);
 
+    // sanity check to make sure we don't return indexes we don't support
+    Assert.assertNull(indexSupplier.as(SpatialIndex.class));
+
     // 10 rows
     // local: [b, foo, fooo, z]
     // column: [foo, b, fooo, b, z, fooo, z, b, b, foo]
@@ -497,6 +502,9 @@ public class NestedFieldLiteralColumnIndexSupplierTest 
extends InitializedNullHa
     StringValueSetIndex valueSetIndex = 
indexSupplier.as(StringValueSetIndex.class);
     Assert.assertNotNull(valueSetIndex);
 
+    // sanity check to make sure we don't return indexes we don't support
+    Assert.assertNull(indexSupplier.as(SpatialIndex.class));
+
     // 10 rows
     // local: [1, 3, 100, 300]
     // column: [100, 1, 300, 1, 3, 3, 100, 300, 300, 1]
@@ -611,6 +619,25 @@ public class NestedFieldLiteralColumnIndexSupplierTest 
extends InitializedNullHa
     Assert.assertEquals(0.5, columnIndex.estimateSelectivity(10), 0.0);
     bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
     checkBitmap(bitmap, 1, 3, 4, 7, 9);
+
+    // set index with null
+    TreeSet<String> treeSet = new TreeSet<>(Comparators.naturalNullsFirst());
+    treeSet.add(null);
+    treeSet.add("1");
+    treeSet.add("3");
+    treeSet.add("300");
+    columnIndex = valueSetIndex.forSortedValues(treeSet);
+    Assert.assertNotNull(columnIndex);
+    Assert.assertEquals(0.8, columnIndex.estimateSelectivity(10), 0.0);
+    bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap, 1, 2, 3, 4, 5, 7, 8, 9);
+
+    // null value should really use NullValueIndex, but this works for classic 
reasons
+    columnIndex = valueSetIndex.forValue(null);
+    Assert.assertNotNull(columnIndex);
+    Assert.assertEquals(0.3, columnIndex.estimateSelectivity(10), 0.0);
+    bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap, 2, 5, 8);
   }
 
   @Test
@@ -674,6 +701,9 @@ public class NestedFieldLiteralColumnIndexSupplierTest 
extends InitializedNullHa
     StringValueSetIndex valueSetIndex = 
indexSupplier.as(StringValueSetIndex.class);
     Assert.assertNotNull(valueSetIndex);
 
+    // sanity check to make sure we don't return indexes we don't support
+    Assert.assertNull(indexSupplier.as(SpatialIndex.class));
+
     // 10 rows
     // local: [1.1, 1.2, 3.3, 6.6]
     // column: [1.1, 1.1, 1.2, 3.3, 1.2, 6.6, 3.3, 1.2, 1.1, 3.3]
@@ -720,10 +750,10 @@ public class NestedFieldLiteralColumnIndexSupplierTest 
extends InitializedNullHa
 
     forRange = rangeIndex.forRange(1.1, true, 3.3, true);
     Assert.assertNotNull(forRange);
-    Assert.assertEquals(0.6, forRange.estimateSelectivity(10), 0.0);
+    Assert.assertEquals(0.3, forRange.estimateSelectivity(10), 0.0);
 
     bitmap = forRange.computeBitmapResult(bitmapResultFactory);
-    checkBitmap(bitmap, 2, 3, 4, 6, 7, 9);
+    checkBitmap(bitmap, 2, 4, 7);
 
     forRange = rangeIndex.forRange(null, true, null, true);
     Assert.assertEquals(1.0, forRange.estimateSelectivity(10), 0.0);
@@ -734,6 +764,55 @@ public class NestedFieldLiteralColumnIndexSupplierTest 
extends InitializedNullHa
     Assert.assertEquals(1.0, forRange.estimateSelectivity(10), 0.0);
     bitmap = forRange.computeBitmapResult(bitmapResultFactory);
     checkBitmap(bitmap, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+
+    forRange = rangeIndex.forRange(1.111, true, 1.19, true);
+    Assert.assertNotNull(forRange);
+    Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
+
+    bitmap = forRange.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap);
+
+    forRange = rangeIndex.forRange(1.01, true, 1.09, true);
+    Assert.assertNotNull(forRange);
+    Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
+
+    bitmap = forRange.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap);
+
+    forRange = rangeIndex.forRange(0.05, true, 0.98, true);
+    Assert.assertNotNull(forRange);
+    Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
+
+    bitmap = forRange.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap);
+
+    forRange = rangeIndex.forRange(0.05, true, 1.1, true);
+    Assert.assertNotNull(forRange);
+    Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
+
+    bitmap = forRange.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap);
+
+    forRange = rangeIndex.forRange(8.99, true, 10.10, true);
+    Assert.assertNotNull(forRange);
+    Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
+
+    bitmap = forRange.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap);
+
+    forRange = rangeIndex.forRange(8.99, true, 10.10, true);
+    Assert.assertNotNull(forRange);
+    Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
+
+    bitmap = forRange.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap);
+
+    forRange = rangeIndex.forRange(10.00, true, 10.10, true);
+    Assert.assertNotNull(forRange);
+    Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
+
+    bitmap = forRange.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap);
   }
 
   @Test
@@ -802,6 +881,25 @@ public class NestedFieldLiteralColumnIndexSupplierTest 
extends InitializedNullHa
     Assert.assertEquals(0.4, columnIndex.estimateSelectivity(10), 0.0);
     bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
     checkBitmap(bitmap, 2, 4, 7, 9);
+
+    // set index with null
+    TreeSet<String> treeSet = new TreeSet<>(Comparators.naturalNullsFirst());
+    treeSet.add(null);
+    treeSet.add("1.2");
+    treeSet.add("3.3");
+    treeSet.add("7.7");
+    columnIndex = valueSetIndex.forSortedValues(treeSet);
+    Assert.assertNotNull(columnIndex);
+    Assert.assertEquals(0.7, columnIndex.estimateSelectivity(10), 0.0);
+    bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap, 1, 2, 3, 4, 6, 7, 9);
+
+    // null value should really use NullValueIndex, but this works for classic 
reasons
+    columnIndex = valueSetIndex.forValue(null);
+    Assert.assertNotNull(columnIndex);
+    Assert.assertEquals(0.3, columnIndex.estimateSelectivity(10), 0.0);
+    bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap, 1, 3, 6);
   }
 
   @Test
@@ -865,6 +963,9 @@ public class NestedFieldLiteralColumnIndexSupplierTest 
extends InitializedNullHa
     NullValueIndex nullIndex = indexSupplier.as(NullValueIndex.class);
     Assert.assertNotNull(nullIndex);
 
+    // sanity check to make sure we don't return indexes we don't support
+    Assert.assertNull(indexSupplier.as(SpatialIndex.class));
+
     // 10 rows
     // local: [null, b, z, 1, 300, 1.1, 9.9]
     // column: [1, b, null, 9.9, 300, 1, z, null, 1.1, b]
@@ -912,6 +1013,26 @@ public class NestedFieldLiteralColumnIndexSupplierTest 
extends InitializedNullHa
     Assert.assertEquals(0.4, columnIndex.estimateSelectivity(10), 0.0);
     bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
     checkBitmap(bitmap, 1, 3, 4, 9);
+
+    // set index with null
+    TreeSet<String> treeSet = new TreeSet<>(Comparators.naturalNullsFirst());
+    treeSet.add(null);
+    treeSet.add("b");
+    treeSet.add("300");
+    treeSet.add("9.9");
+    treeSet.add("1.6");
+    columnIndex = valueSetIndex.forSortedValues(treeSet);
+    Assert.assertNotNull(columnIndex);
+    Assert.assertEquals(0.6, columnIndex.estimateSelectivity(10), 0.0);
+    bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap, 1, 2, 3, 4, 7, 9);
+
+    // null value should really use NullValueIndex, but this works for classic 
reasons
+    columnIndex = valueSetIndex.forValue(null);
+    Assert.assertNotNull(columnIndex);
+    Assert.assertEquals(0.2, columnIndex.estimateSelectivity(10), 0.0);
+    bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap, 2, 7);
   }
 
   @Test
@@ -969,6 +1090,11 @@ public class NestedFieldLiteralColumnIndexSupplierTest 
extends InitializedNullHa
     Assert.assertEquals("300", lowLevelIndex.getValue(4));
     Assert.assertEquals("1.1", lowLevelIndex.getValue(5));
     Assert.assertEquals("9.9", lowLevelIndex.getValue(6));
+
+    Assert.assertEquals(7, lowLevelIndex.getCardinality());
+    checkBitmap(lowLevelIndex.getBitmap(0), 2, 7);
+    checkBitmap(lowLevelIndex.getBitmap(1), 1, 9);
+    checkBitmap(lowLevelIndex.getBitmap(-1));
   }
 
   private NestedFieldLiteralColumnIndexSupplier makeSingleTypeStringSupplier() 
throws IOException


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to