This is an automated email from the ASF dual-hosted git repository.

gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 9bb25e6de92 fix OR filter partial index value matcher on cursor reset 
(#18029)
9bb25e6de92 is described below

commit 9bb25e6de92a0597ad3616c784f12bc25a583337
Author: Clint Wylie <[email protected]>
AuthorDate: Thu May 22 12:36:55 2025 -0700

    fix OR filter partial index value matcher on cursor reset (#18029)
    
    * fix OR filter partial index value matcher on cursor reset
    
    * oops, missed a filter tuning
    
    * fix reset condition check for partial or index matchers, fix 
RowBasedColumnValueSelector by removing ignored marking since it could 
incorrectly eliminate columns that had some mvd rows and some number rows 
(uncommon, but im not sure it is technically illegal)
    
    * fixes
    
    * better variable name
    
    * more tests
---
 .../druid/segment/filter/ExpressionFilter.java     | 32 ++++------
 .../org/apache/druid/segment/filter/OrFilter.java  | 71 ++++++++++++++++------
 .../RowBasedExpressionColumnValueSelector.java     | 24 ++------
 .../druid/segment/filter/BaseFilterTest.java       | 25 ++++++++
 .../druid/segment/filter/ExpressionFilterTest.java | 17 ++++++
 .../apache/druid/segment/filter/OrFilterTest.java  | 40 ++++++++++++
 6 files changed, 150 insertions(+), 59 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
 
b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
index 7d1424d76da..d4f52fa8ead 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
@@ -154,39 +154,31 @@ public class ExpressionFilter implements Filter
         }
 
         if (eval.type().isArray()) {
+          final Object[] result = eval.asArray();
+          if (result == null) {
+            // if value was null and includeUnknown was true we would have 
already returned before getting here so
+            // is safe to return false
+            return false;
+          }
           switch (eval.elementType().getType()) {
             case LONG:
-              final Object[] lResult = eval.asArray();
-              if (lResult == null) {
-                return false;
-              }
               if (includeUnknown) {
-                return Arrays.stream(lResult).anyMatch(o -> o == null || 
Evals.asBoolean((long) o));
+                return Arrays.stream(result).anyMatch(o -> o == null || 
Evals.asBoolean((long) o));
               }
 
-              return 
Arrays.stream(lResult).filter(Objects::nonNull).anyMatch(o -> 
Evals.asBoolean((long) o));
+              return Arrays.stream(result).filter(Objects::nonNull).anyMatch(o 
-> Evals.asBoolean((long) o));
             case STRING:
-              final Object[] sResult = eval.asArray();
-              if (sResult == null) {
-                return false;
-              }
-
               if (includeUnknown) {
-                return Arrays.stream(sResult).anyMatch(o -> o == null || 
Evals.asBoolean((String) o));
+                return Arrays.stream(result).anyMatch(o -> o == null || 
Evals.asBoolean((String) o));
               }
 
-              return Arrays.stream(sResult).anyMatch(o -> 
Evals.asBoolean((String) o));
+              return Arrays.stream(result).anyMatch(o -> 
Evals.asBoolean((String) o));
             case DOUBLE:
-              final Object[] dResult = eval.asArray();
-              if (dResult == null) {
-                return false;
-              }
-
               if (includeUnknown) {
-                return Arrays.stream(dResult).anyMatch(o -> o == null || 
Evals.asBoolean((double) o));
+                return Arrays.stream(result).anyMatch(o -> o == null || 
Evals.asBoolean((double) o));
               }
 
-              return 
Arrays.stream(dResult).filter(Objects::nonNull).anyMatch(o -> 
Evals.asBoolean((double) o));
+              return Arrays.stream(result).filter(Objects::nonNull).anyMatch(o 
-> Evals.asBoolean((double) o));
           }
         }
         return eval.asBoolean();
diff --git 
a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java 
b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java
index 8363fd29c9f..ebf60288c9e 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java
@@ -296,21 +296,29 @@ public class OrFilter implements BooleanFilter
 
     if (descending) {
 
-      final IntIterator iter = 
BitmapOffset.getReverseBitmapOffsetIterator(rowBitmap);
+      final IntIterator initialIterator = 
BitmapOffset.getReverseBitmapOffsetIterator(rowBitmap);
 
-      if (!iter.hasNext()) {
+      if (!initialIterator.hasNext()) {
         return ValueMatchers.allFalse();
       }
       return new ValueMatcher()
       {
         int iterOffset = Integer.MAX_VALUE;
+        int previousOffset = Integer.MAX_VALUE;
+        IntIterator iterator = initialIterator;
 
         @Override
         public boolean matches(boolean includeUnknown)
         {
           int currentOffset = offset.getOffset();
-          while (iterOffset > currentOffset && iter.hasNext()) {
-            iterOffset = iter.next();
+          // check if the cursor was reset, and reset iterator if so
+          if (currentOffset >= previousOffset) {
+            iterOffset = Integer.MAX_VALUE;
+            iterator = BitmapOffset.getReverseBitmapOffsetIterator(rowBitmap);
+          }
+          previousOffset = currentOffset;
+          while (iterOffset > currentOffset && iterator.hasNext()) {
+            iterOffset = iterator.next();
           }
 
           return iterOffset == currentOffset;
@@ -320,26 +328,34 @@ public class OrFilter implements BooleanFilter
         public void inspectRuntimeShape(RuntimeShapeInspector inspector)
         {
           inspector.visit("offset", offset);
-          inspector.visit("iter", iter);
+          inspector.visit("iter", iterator);
         }
       };
     } else {
-      final PeekableIntIterator peekableIterator = 
rowBitmap.peekableIterator();
+      final PeekableIntIterator initialIterator = rowBitmap.peekableIterator();
 
-      if (!peekableIterator.hasNext()) {
+      if (!initialIterator.hasNext()) {
         return ValueMatchers.allFalse();
       }
       return new ValueMatcher()
       {
         int iterOffset = -1;
+        int previousOffset = -1;
+        PeekableIntIterator iterator = initialIterator;
 
         @Override
         public boolean matches(boolean includeUnknown)
         {
           int currentOffset = offset.getOffset();
-          peekableIterator.advanceIfNeeded(currentOffset);
-          if (peekableIterator.hasNext()) {
-            iterOffset = peekableIterator.peekNext();
+          // check if the cursor was reset, and reset iterator if so
+          if (currentOffset <= previousOffset) {
+            iterOffset = -1;
+            iterator = rowBitmap.peekableIterator();
+          }
+          previousOffset = currentOffset;
+          iterator.advanceIfNeeded(currentOffset);
+          if (iterator.hasNext()) {
+            iterOffset = iterator.peekNext();
           }
 
           return iterOffset == currentOffset;
@@ -349,7 +365,7 @@ public class OrFilter implements BooleanFilter
         public void inspectRuntimeShape(RuntimeShapeInspector inspector)
         {
           inspector.visit("offset", offset);
-          inspector.visit("peekableIterator", peekableIterator);
+          inspector.visit("peekableIterator", iterator);
         }
       };
     }
@@ -360,8 +376,8 @@ public class OrFilter implements BooleanFilter
       final ImmutableBitmap bitmap
   )
   {
-    final PeekableIntIterator peekableIntIterator = bitmap.peekableIterator();
-    if (!peekableIntIterator.hasNext()) {
+    final PeekableIntIterator initialIterator = bitmap.peekableIterator();
+    if (!initialIterator.hasNext()) {
       return BooleanVectorValueMatcher.of(vectorOffset, 
ConstantMatcherType.ALL_FALSE);
     }
 
@@ -369,19 +385,29 @@ public class OrFilter implements BooleanFilter
     {
       final VectorMatch match = VectorMatch.wrap(new 
int[vectorOffset.getMaxVectorSize()]);
       int iterOffset = -1;
+      int previousStartOffset = -1;
+      PeekableIntIterator iterator = initialIterator;
 
       @Override
       public ReadableVectorMatch match(ReadableVectorMatch mask, boolean 
includeUnknown)
       {
         final int[] selection = match.getSelection();
+
         if (vectorOffset.isContiguous()) {
+          final int startOffset = vectorOffset.getStartOffset();
+          // check if the cursor was reset, and reset iterator if so
+          if (startOffset <= previousStartOffset) {
+            iterOffset = -1;
+            iterator = bitmap.peekableIterator();
+          }
+          previousStartOffset = startOffset;
           int numRows = 0;
           for (int i = 0; i < mask.getSelectionSize(); i++) {
             final int maskNum = mask.getSelection()[i];
-            final int rowNum = vectorOffset.getStartOffset() + maskNum;
-            peekableIntIterator.advanceIfNeeded(rowNum);
-            if (peekableIntIterator.hasNext()) {
-              iterOffset = peekableIntIterator.peekNext();
+            final int rowNum = startOffset + maskNum;
+            iterator.advanceIfNeeded(rowNum);
+            if (iterator.hasNext()) {
+              iterOffset = iterator.peekNext();
               if (iterOffset == rowNum) {
                 selection[numRows++] = maskNum;
               }
@@ -391,13 +417,18 @@ public class OrFilter implements BooleanFilter
           return match;
         } else {
           final int[] currentOffsets = vectorOffset.getOffsets();
+          if (getCurrentVectorSize() > 0 && currentOffsets[0] <= 
previousStartOffset) {
+            iterOffset = -1;
+            iterator = bitmap.peekableIterator();
+          }
+          previousStartOffset = currentOffsets[0];
           int numRows = 0;
           for (int i = 0; i < mask.getSelectionSize(); i++) {
             final int maskNum = mask.getSelection()[i];
             final int rowNum = currentOffsets[mask.getSelection()[i]];
-            peekableIntIterator.advanceIfNeeded(rowNum);
-            if (peekableIntIterator.hasNext()) {
-              iterOffset = peekableIntIterator.peekNext();
+            iterator.advanceIfNeeded(rowNum);
+            if (iterator.hasNext()) {
+              iterOffset = iterator.peekNext();
               if (iterOffset == rowNum) {
                 selection[numRows++] = maskNum;
               }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java
 
b/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java
index 86e4aedb988..085510aaccf 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java
@@ -19,6 +19,7 @@
 
 package org.apache.druid.segment.virtual;
 
+import com.google.common.collect.Lists;
 import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap;
 import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
 import org.apache.druid.math.expr.Expr;
@@ -27,10 +28,7 @@ import org.apache.druid.math.expr.Parser;
 import org.apache.druid.segment.RowIdSupplier;
 
 import javax.annotation.Nullable;
-import java.util.ArrayList;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 import java.util.stream.Collectors;
 
 /**
@@ -46,7 +44,6 @@ public class RowBasedExpressionColumnValueSelector extends 
BaseExpressionColumnV
   private final Expr expression;
   private final List<String> unknownColumns;
   private final Expr.BindingAnalysis baseBindingAnalysis;
-  private final Set<String> ignoredColumns;
   private final Int2ObjectMap<Expr> transformedCache;
 
   public RowBasedExpressionColumnValueSelector(
@@ -63,7 +60,6 @@ public class RowBasedExpressionColumnValueSelector extends 
BaseExpressionColumnV
                               .filter(x -> 
!plan.getAnalysis().getArrayBindings().contains(x))
                               .collect(Collectors.toList());
     this.baseBindingAnalysis = plan.getAnalysis();
-    this.ignoredColumns = new HashSet<>();
     this.transformedCache = new Int2ObjectArrayMap<>(unknownColumns.size());
   }
 
@@ -71,7 +67,7 @@ public class RowBasedExpressionColumnValueSelector extends 
BaseExpressionColumnV
   protected ExprEval<?> eval()
   {
     // check to find any arrays for this row
-    List<String> arrayBindings = new ArrayList<>();
+    final List<String> arrayBindings = 
Lists.newArrayListWithCapacity(unknownColumns.size());
 
     for (String unknownColumn : unknownColumns) {
       if (isBindingArray(unknownColumn)) {
@@ -79,20 +75,14 @@ public class RowBasedExpressionColumnValueSelector extends 
BaseExpressionColumnV
       }
     }
 
-    // eliminate anything that will never be an array
-    if (ignoredColumns.size() > 0) {
-      unknownColumns.removeAll(ignoredColumns);
-      ignoredColumns.clear();
-    }
-
     // if there are arrays, we need to transform the expression to one that 
applies each value of the array to the
     // base expression, we keep a cache of transformed expressions to minimize 
extra work
-    if (arrayBindings.size() > 0) {
+    if (!arrayBindings.isEmpty()) {
       final int key = arrayBindings.hashCode();
       if (transformedCache.containsKey(key)) {
         return transformedCache.get(key).eval(bindings);
       }
-      Expr transformed = Parser.applyUnappliedBindings(expression, 
baseBindingAnalysis, arrayBindings);
+      final Expr transformed = Parser.applyUnappliedBindings(expression, 
baseBindingAnalysis, arrayBindings);
       transformedCache.put(key, transformed);
       return transformed.eval(bindings);
     }
@@ -108,11 +98,7 @@ public class RowBasedExpressionColumnValueSelector extends 
BaseExpressionColumnV
   {
     Object binding = bindings.get(x);
     if (binding != null) {
-      if (binding instanceof Object[] && ((Object[]) binding).length > 0) {
-        return true;
-      } else if (binding instanceof Number) {
-        ignoredColumns.add(x);
-      }
+      return binding instanceof Object[] && ((Object[]) binding).length > 0;
     }
     return false;
   }
diff --git 
a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java 
b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
index fa73bf7e983..7cbc9cdc29a 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
@@ -732,6 +732,11 @@ public abstract class BaseFilterTest extends 
InitializedNullHandlingTest
     return false;
   }
 
+  protected boolean hasTypeInformation()
+  {
+    return !testName.contains("rowBasedWithoutTypeSignature");
+  }
+
   protected boolean canTestArrayColumns()
   {
     if (testName.contains("frame (columnar)") || 
testName.contains("rowBasedWithoutTypeSignature")) {
@@ -805,6 +810,12 @@ public abstract class BaseFilterTest extends 
InitializedNullHandlingTest
 
       final List<String> values = new ArrayList<>();
 
+      while (!cursor.isDone()) {
+        IndexedInts row = selector.getRow();
+        Preconditions.checkState(row.size() == 1);
+        cursor.advance();
+      }
+      cursor.reset();
       while (!cursor.isDone()) {
         IndexedInts row = selector.getRow();
         Preconditions.checkState(row.size() == 1);
@@ -916,6 +927,12 @@ public abstract class BaseFilterTest extends 
InitializedNullHandlingTest
 
       final List<String> values = new ArrayList<>();
 
+      while (!cursor.isDone()) {
+        IndexedInts row = selector.getRow();
+        Preconditions.checkState(row.size() == 1);
+        cursor.advance();
+      }
+      cursor.reset();
       while (!cursor.isDone()) {
         IndexedInts row = selector.getRow();
         Preconditions.checkState(row.size() == 1);
@@ -976,6 +993,10 @@ public abstract class BaseFilterTest extends 
InitializedNullHandlingTest
 
       final List<String> values = new ArrayList<>();
 
+      while (!cursor.isDone()) {
+        cursor.advance();
+      }
+      cursor.reset();
       while (!cursor.isDone()) {
         final int[] rowVector = selector.getRowVector();
         for (int i = 0; i < cursor.getCurrentVectorSize(); i++) {
@@ -1001,6 +1022,10 @@ public abstract class BaseFilterTest extends 
InitializedNullHandlingTest
 
       final List<String> values = new ArrayList<>();
 
+      while (!cursor.isDone()) {
+        cursor.advance();
+      }
+      cursor.reset();
       while (!cursor.isDone()) {
         final int[] rowVector = selector.getRowVector();
         for (int i = 0; i < cursor.getCurrentVectorSize(); i++) {
diff --git 
a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java
 
b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java
index 5509af29694..8e32a2d8fbb 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java
@@ -176,6 +176,23 @@ public class ExpressionFilterTest extends BaseFilterTest
     assertFilterMatchesSkipVectorize(edf("dim4 == ''"), ImmutableList.of("2"));
     // AS per SQL standard null == null returns false.
     assertFilterMatchesSkipVectorize(edf("dim4 == null"), ImmutableList.of());
+    if (hasTypeInformation()) {
+      assertFilterMatchesSkipVectorize(edf("isnull(dim4)"), 
ImmutableList.of("1", "6", "7", "8"));
+      assertFilterMatchesSkipVectorize(
+          NotDimFilter.of(edf("isnull(dim4)")),
+          ImmutableList.of("0", "2", "3", "4", "5", "9")
+      );
+      assertFilterMatchesSkipVectorize(edf("cast(dim4, 'LONG')"), 
ImmutableList.of("0", "3", "4", "5", "9"));
+      assertFilterMatchesSkipVectorize(NotDimFilter.of(edf("cast(dim4, 
'LONG')")), ImmutableList.of());
+      assertFilterMatchesSkipVectorize(edf("cast(dim4, 'STRING')"), 
ImmutableList.of());
+      assertFilterMatchesSkipVectorize(
+          NotDimFilter.of(edf("cast(dim4, 'STRING')")),
+          ImmutableList.of("0", "2", "3", "4", "5", "9")
+      );
+      assertFilterMatchesSkipVectorize(edf("cast(dim4, 'DOUBLE')"), 
ImmutableList.of("0", "3", "4", "5", "9"));
+      assertFilterMatchesSkipVectorize(NotDimFilter.of(edf("cast(dim4, 
'DOUBLE')")), ImmutableList.of());
+    }
+
     assertFilterMatchesSkipVectorize(edf("dim4 == '1'"), 
ImmutableList.of("0"));
     assertFilterMatchesSkipVectorize(edf("dim4 == '3'"), 
ImmutableList.of("3"));
     assertFilterMatchesSkipVectorize(edf("dim4 == '4'"), ImmutableList.of("4", 
"5"));
diff --git 
a/processing/src/test/java/org/apache/druid/segment/filter/OrFilterTest.java 
b/processing/src/test/java/org/apache/druid/segment/filter/OrFilterTest.java
index 60da4f0c12a..dfc3d1f2a1f 100644
--- a/processing/src/test/java/org/apache/druid/segment/filter/OrFilterTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/filter/OrFilterTest.java
@@ -32,6 +32,7 @@ import org.apache.druid.data.input.impl.TimestampSpec;
 import org.apache.druid.java.util.common.DateTimes;
 import org.apache.druid.java.util.common.Pair;
 import org.apache.druid.query.filter.AndDimFilter;
+import org.apache.druid.query.filter.FilterTuning;
 import org.apache.druid.query.filter.InDimFilter;
 import org.apache.druid.query.filter.NotDimFilter;
 import org.apache.druid.query.filter.OrDimFilter;
@@ -238,6 +239,31 @@ public class OrFilterTest extends BaseFilterTest
     );
   }
 
+  @Test
+  public void testTwoFilterFirstMatchesSomeSecondMatchesNonePartialIndex()
+  {
+    assertFilterMatches(
+        new OrDimFilter(
+            ImmutableList.of(
+                new InDimFilter("dim0", ImmutableSet.of("1", "2", "3"), null),
+                new SelectorDimFilter("dim1", "7", null, new 
FilterTuning(false, null, null))
+            )
+        ),
+        ImmutableList.of("1", "2", "3")
+    );
+    assertFilterMatches(
+        NotDimFilter.of(
+            new OrDimFilter(
+                ImmutableList.of(
+                    new InDimFilter("dim0", ImmutableSet.of("1", "2", "3"), 
null),
+                    new SelectorDimFilter("dim1", "7", null, new 
FilterTuning(false, null, null))
+                )
+            )
+        ),
+        ImmutableList.of("0", "4", "5")
+    );
+  }
+
   @Test
   public void testTwoFilterFirstMatchesNoneSecondMatchesSome()
   {
@@ -252,6 +278,20 @@ public class OrFilterTest extends BaseFilterTest
     );
   }
 
+  @Test
+  public void testTwoFilterFirstMatchesNoneSecondMatchesSomePartialIndex()
+  {
+    assertFilterMatches(
+        new OrDimFilter(
+            ImmutableList.of(
+                new SelectorDimFilter("dim1", "7", null, new 
FilterTuning(false, null, null)),
+                new InDimFilter("dim0", ImmutableSet.of("1", "2", "3"), null)
+            )
+        ),
+        ImmutableList.of("1", "2", "3")
+    );
+  }
+
   @Test
   public void testTwoFilterFirstMatchesNoneSecondMatchesNone()
   {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to