This is an automated email from the ASF dual-hosted git repository.

gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new ccc1ffb0323 Additional short circuiting knowledge in filter bundles. 
(#16292)
ccc1ffb0323 is described below

commit ccc1ffb03238c2318a622a95e808b83952abd586
Author: Gian Merlino <[email protected]>
AuthorDate: Tue Apr 16 22:42:28 2024 -0700

    Additional short circuiting knowledge in filter bundles. (#16292)
    
    * Additional short circuiting knowledge in filter bundles.
    
    Three updates:
    
    1) The parameter "selectionRowCount" on "makeFilterBundle" is renamed
       "applyRowCount", and redefined as an upper bound on rows remaining
       after short-circuiting (rather than number of rows selected so far).
       This definition works better for OR filters, which pass through the
       FALSE set rather than the TRUE set to the next subfilter.
    
    2) AndFilter uses min(applyRowCount, indexIntersectionSize) rather
       than using selectionRowCount for the first subfilter and 
indexIntersectionSize
       for each filter thereafter. This improves accuracy when the incoming
       applyRowCount is smaller than the row count from the first few indexes.
    
    3) OrFilter uses min(applyRowCount, totalRowCount - indexUnionSize) rather
       than applyRowCount for subfilters. This allows an OR filter to pass
       information about short-circuiting to its subfilters.
    
    To help write tests for this, the patch also moves the sampled
    wikiticker data file from sql to processing.
    
    * Forbidden APIs.
    
    * Forbidden APIs.
    
    * Better comments.
    
    * Fix inspection.
    
    * Adjustments to tests.
---
 .../java/org/apache/druid/query/filter/Filter.java |  10 +-
 .../apache/druid/query/filter/FilterBundle.java    |  74 ++++-
 .../org/apache/druid/segment/filter/AndFilter.java |  14 +-
 .../druid/segment/filter/IsBooleanFilter.java      |   6 +-
 .../org/apache/druid/segment/filter/NotFilter.java |   4 +-
 .../org/apache/druid/segment/filter/OrFilter.java  |  15 +-
 .../druid/segment/index/BitmapColumnIndex.java     |   8 +-
 .../index/DictionaryRangeScanningBitmapIndex.java  |   4 +-
 .../index/DictionaryScanningBitmapIndex.java       |   4 +-
 .../java/org/apache/druid/segment/TestIndex.java   |  67 +++++
 .../druid/segment/filter/FilterBundleTest.java     | 328 +++++++++++++++++++++
 .../wikiticker-2015-09-12-sampled.json.gz          | Bin
 .../druid/sql/calcite/util/TestDataBuilder.java    |  60 ++--
 13 files changed, 523 insertions(+), 71 deletions(-)

diff --git a/processing/src/main/java/org/apache/druid/query/filter/Filter.java 
b/processing/src/main/java/org/apache/druid/query/filter/Filter.java
index 4a83b4c1b8a..9fae7d51d20 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/Filter.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/Filter.java
@@ -52,7 +52,11 @@ public interface Filter
    *                              them
    * @param bitmapResultFactory - wrapper for {@link ImmutableBitmap} 
operations to tie into
    *                              {@link org.apache.druid.query.QueryMetrics} 
and build the output indexes
-   * @param selectionRowCount   - number of rows selected so far by any 
previous bundle computations
+   * @param applyRowCount       - upper bound on number of rows this filter 
would be applied to, after removing rows
+   *                              short-circuited by prior bundle operations. 
For example, given "x AND y", if "x" is
+   *                              resolved using an index, then "y" will 
receive the number of rows that matched
+   *                              the filter "x". As another example, given "x 
OR y", if "x" is resolved using an
+   *                              index, then "y" will receive the number of 
rows that did *not* match the filter "x".
    * @param totalRowCount       - total number of rows to be scanned if no 
indexes are applied
    * @param includeUnknown      - mapping for Druid native two state logic 
system into SQL three-state logic system. If
    *                              set to true, bitmaps returned by this method 
should include true bits for any rows
@@ -65,7 +69,7 @@ public interface Filter
   default <T> FilterBundle makeFilterBundle(
       ColumnIndexSelector columnIndexSelector,
       BitmapResultFactory<T> bitmapResultFactory,
-      int selectionRowCount,
+      int applyRowCount,
       int totalRowCount,
       boolean includeUnknown
   )
@@ -77,7 +81,7 @@ public interface Filter
       final long bitmapConstructionStartNs = System.nanoTime();
       final T result = columnIndex.computeBitmapResult(
           bitmapResultFactory,
-          selectionRowCount,
+          applyRowCount,
           totalRowCount,
           includeUnknown
       );
diff --git 
a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java 
b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java
index 78d666c61cb..695eb26c00b 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java
@@ -38,6 +38,7 @@ import java.util.List;
 import java.util.concurrent.TimeUnit;
 import java.util.function.Function;
 import java.util.function.Supplier;
+import java.util.regex.Pattern;
 
 /**
  * FilterBundle is a container for all the goodies used for producing filtered 
cursors, a {@link ImmutableBitmap} if
@@ -120,7 +121,9 @@ public class FilterBundle
   public interface IndexBundle
   {
     IndexBundleInfo getIndexInfo();
+
     ImmutableBitmap getBitmap();
+
     ColumnIndexCapabilities getIndexCapabilities();
   }
 
@@ -135,7 +138,9 @@ public class FilterBundle
   public interface MatcherBundle
   {
     MatcherBundleInfo getMatcherInfo();
+
     ValueMatcher valueMatcher(ColumnSelectorFactory selectorFactory, Offset 
baseOffset, boolean descending);
+
     VectorValueMatcher vectorMatcher(VectorColumnSelectorFactory 
selectorFactory, ReadableVectorOffset baseOffset);
   }
 
@@ -205,7 +210,10 @@ public class FilterBundle
     }
 
     @Override
-    public VectorValueMatcher vectorMatcher(VectorColumnSelectorFactory 
selectorFactory, ReadableVectorOffset baseOffset)
+    public VectorValueMatcher vectorMatcher(
+        VectorColumnSelectorFactory selectorFactory,
+        ReadableVectorOffset baseOffset
+    )
     {
       return vectorMatcherFn.apply(selectorFactory);
     }
@@ -240,6 +248,21 @@ public class FilterBundle
       return matcher;
     }
 
+    /**
+     * Return a multiline description string, suitable for comparisons in 
tests.
+     */
+    public String describe()
+    {
+      final StringBuilder sb = new StringBuilder();
+      if (index != null) {
+        sb.append(index.describe());
+      }
+      if (matcher != null) {
+        sb.append(matcher.describe());
+      }
+      return sb.toString();
+    }
+
     @Override
     public String toString()
     {
@@ -249,6 +272,8 @@ public class FilterBundle
 
   public static class IndexBundleInfo
   {
+    private static final Pattern PATTERN_LINE_START = Pattern.compile("(?m)^");
+
     private final Supplier<String> filter;
     private final List<IndexBundleInfo> indexes;
     private final int selectionSize;
@@ -292,6 +317,27 @@ public class FilterBundle
       return indexes;
     }
 
+    /**
+     * Return a multiline description string, suitable for comparisons in 
tests.
+     */
+    public String describe()
+    {
+      final StringBuilder sb = new StringBuilder()
+          .append("index: ")
+          .append(filter.get())
+          .append(" (selectionSize = ")
+          .append(selectionSize)
+          .append(")\n");
+
+      if (indexes != null) {
+        for (final IndexBundleInfo info : indexes) {
+          sb.append(PATTERN_LINE_START.matcher(info.describe()).replaceAll("  
"));
+        }
+      }
+
+      return sb.toString();
+    }
+
     @Override
     public String toString()
     {
@@ -306,6 +352,8 @@ public class FilterBundle
 
   public static class MatcherBundleInfo
   {
+    private static final Pattern PATTERN_LINE_START = Pattern.compile("(?m)^");
+
     private final Supplier<String> filter;
     @Nullable
     final List<MatcherBundleInfo> matchers;
@@ -345,6 +393,30 @@ public class FilterBundle
       return matchers;
     }
 
+    /**
+     * Return a multiline description string, suitable for comparisons in 
tests.
+     */
+    public String describe()
+    {
+      final StringBuilder sb = new StringBuilder()
+          .append("matcher: ")
+          .append(filter.get())
+          .append("\n");
+
+      if (partialIndex != null) {
+        sb.append("  with partial ")
+          
.append(PATTERN_LINE_START.matcher(partialIndex.describe()).replaceAll("  
").substring(2));
+      }
+
+      if (matchers != null) {
+        for (MatcherBundleInfo info : matchers) {
+          sb.append(PATTERN_LINE_START.matcher(info.describe()).replaceAll("  
"));
+        }
+      }
+
+      return sb.toString();
+    }
+
     @Override
     public String toString()
     {
diff --git 
a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java 
b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java
index f9001689d4a..036c6a8250a 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java
@@ -76,7 +76,7 @@ public class AndFilter implements BooleanFilter
   public <T> FilterBundle makeFilterBundle(
       ColumnIndexSelector columnIndexSelector,
       BitmapResultFactory<T> bitmapResultFactory,
-      int selectionRowCount,
+      int applyRowCount,
       int totalRowCount,
       boolean includeUnknown
   )
@@ -85,7 +85,7 @@ public class AndFilter implements BooleanFilter
     final List<FilterBundle.MatcherBundle> matcherBundles = new ArrayList<>();
     final List<FilterBundle.MatcherBundleInfo> matcherBundleInfos = new 
ArrayList<>();
 
-    int selectionCount = selectionRowCount;
+    int indexIntersectionSize = totalRowCount;
     ImmutableBitmap index = null;
     ColumnIndexCapabilities merged = new SimpleColumnIndexCapabilities(true, 
true);
     // AND filter can be partitioned into a bundle that has both indexes and 
value matchers. The filters which support
@@ -101,7 +101,7 @@ public class AndFilter implements BooleanFilter
       final FilterBundle subBundle = subfilter.makeFilterBundle(
           columnIndexSelector,
           bitmapResultFactory,
-          selectionCount,
+          Math.min(applyRowCount, indexIntersectionSize),
           totalRowCount,
           includeUnknown
       );
@@ -120,7 +120,7 @@ public class AndFilter implements BooleanFilter
         } else {
           index = index.intersection(subBundle.getIndex().getBitmap());
         }
-        selectionCount = index.size();
+        indexIntersectionSize = index.size();
       }
       if (subBundle.getMatcherBundle() != null) {
         matcherBundles.add(subBundle.getMatcherBundle());
@@ -140,7 +140,7 @@ public class AndFilter implements BooleanFilter
         indexBundle = new FilterBundle.SimpleIndexBundle(
             new FilterBundle.IndexBundleInfo(
                 () -> "AND",
-                selectionCount,
+                indexIntersectionSize,
                 System.nanoTime() - bitmapConstructionStartNs,
                 indexBundleInfos
             ),
@@ -247,7 +247,7 @@ public class AndFilter implements BooleanFilter
       @Override
       public <T> T computeBitmapResult(
           BitmapResultFactory<T> bitmapResultFactory,
-          int selectionRowCount,
+          int applyRowCount,
           int totalRowCount,
           boolean includeUnknown
       )
@@ -256,7 +256,7 @@ public class AndFilter implements BooleanFilter
         for (final BitmapColumnIndex index : bitmapColumnIndices) {
           final T bitmapResult = index.computeBitmapResult(
               bitmapResultFactory,
-              selectionRowCount,
+              applyRowCount,
               totalRowCount,
               includeUnknown
           );
diff --git 
a/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java 
b/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java
index 39a4f90c263..75eaebb27c8 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java
@@ -94,7 +94,7 @@ public class IsBooleanFilter implements Filter
         @Override
         public <T> T computeBitmapResult(
             BitmapResultFactory<T> bitmapResultFactory,
-            int selectionRowCount,
+            int applyRowCount,
             int totalRowCount,
             boolean includeUnknown
         )
@@ -102,7 +102,7 @@ public class IsBooleanFilter implements Filter
           if (isTrue) {
             return baseIndex.computeBitmapResult(
                 bitmapResultFactory,
-                selectionRowCount,
+                applyRowCount,
                 totalRowCount,
                 false
             );
@@ -110,7 +110,7 @@ public class IsBooleanFilter implements Filter
 
           final T result = baseIndex.computeBitmapResult(
               bitmapResultFactory,
-              selectionRowCount,
+              applyRowCount,
               totalRowCount,
               useThreeValueLogic
           );
diff --git 
a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java 
b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java
index b4fb793deb1..58dd90c8af1 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java
@@ -93,14 +93,14 @@ public class NotFilter implements Filter
         @Override
         public <T> T computeBitmapResult(
             BitmapResultFactory<T> bitmapResultFactory,
-            int selectionRowCount,
+            int applyRowCount,
             int totalRowCount,
             boolean includeUnknown
         )
         {
           final T result = baseIndex.computeBitmapResult(
               bitmapResultFactory,
-              selectionRowCount,
+              applyRowCount,
               totalRowCount,
               !includeUnknown && useThreeValueLogic
           );
diff --git 
a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java 
b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java
index 9373ea142e9..784507caf03 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java
@@ -81,7 +81,7 @@ public class OrFilter implements BooleanFilter
   public <T> FilterBundle makeFilterBundle(
       ColumnIndexSelector columnIndexSelector,
       BitmapResultFactory<T> bitmapResultFactory,
-      int selectionRowCount,
+      int applyRowCount,
       int totalRowCount,
       boolean includeUnknown
   )
@@ -99,6 +99,8 @@ public class OrFilter implements BooleanFilter
     final List<FilterBundle.IndexBundleInfo> indexOnlyBundlesInfo = new 
ArrayList<>();
     final List<FilterBundle.MatcherBundle> partialIndexBundles = new 
ArrayList<>();
     final List<FilterBundle.MatcherBundle> matcherOnlyBundles = new 
ArrayList<>();
+
+    int indexUnionSize = 0;
     ImmutableBitmap index = null;
     ColumnIndexCapabilities merged = new SimpleColumnIndexCapabilities(true, 
true);
     int emptyCount = 0;
@@ -109,7 +111,7 @@ public class OrFilter implements BooleanFilter
       final FilterBundle bundle = subfilter.makeFilterBundle(
           columnIndexSelector,
           bitmapResultFactory,
-          selectionRowCount,
+          Math.min(applyRowCount, totalRowCount - indexUnionSize),
           totalRowCount,
           includeUnknown
       );
@@ -138,6 +140,7 @@ public class OrFilter implements BooleanFilter
             } else {
               index = index.union(bundle.getIndex().getBitmap());
             }
+            indexUnionSize = index.size();
           }
         }
       } else {
@@ -165,7 +168,7 @@ public class OrFilter implements BooleanFilter
           new FilterBundle.SimpleIndexBundle(
               new FilterBundle.IndexBundleInfo(
                   () -> "OR",
-                  selectionRowCount,
+                  applyRowCount,
                   totalBitmapConstructTimeNs,
                   indexOnlyBundlesInfo
               ),
@@ -185,7 +188,7 @@ public class OrFilter implements BooleanFilter
     if (!indexOnlyBundles.isEmpty()) {
       // translate the indexOnly bundles into a single matcher
       final FilterBundle.MatcherBundle matcherBundle = 
convertIndexToMatcherBundle(
-          selectionRowCount,
+          applyRowCount,
           indexOnlyBundles,
           indexOnlyBundlesInfo,
           totalBitmapConstructTimeNs,
@@ -284,14 +287,14 @@ public class OrFilter implements BooleanFilter
       @Override
       public <T> T computeBitmapResult(
           BitmapResultFactory<T> bitmapResultFactory,
-          int selectionRowCount,
+          int applyRowCount,
           int totalRowCount,
           boolean includeUnknown
       )
       {
         List<T> results = 
Lists.newArrayListWithCapacity(bitmapColumnIndices.size());
         for (BitmapColumnIndex index : bitmapColumnIndices) {
-          final T r = index.computeBitmapResult(bitmapResultFactory, 
selectionRowCount, totalRowCount, includeUnknown);
+          final T r = index.computeBitmapResult(bitmapResultFactory, 
applyRowCount, totalRowCount, includeUnknown);
           if (r == null) {
             // all or nothing
             return null;
diff --git 
a/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java
 
b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java
index 154dd66e206..04a5bb8b6b5 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java
@@ -59,7 +59,11 @@ public interface BitmapColumnIndex
    *
    * @param bitmapResultFactory helper to format the {@link 
org.apache.druid.collections.bitmap.ImmutableBitmap} in a
    *                            form ready for consumption by callers
-   * @param selectionRowCount   number of rows selected so far by any previous 
index computations
+   * @param applyRowCount       upper bound on number of rows this filter 
would be applied to, after removing rows
+   *                            short-circuited by prior bundle operations. 
For example, given "x AND y", if "x" is
+   *                            resolved using an index, then "y" will receive 
the number of rows that matched
+   *                            the filter "x". As another example, given "x 
OR y", if "x" is resolved using an
+   *                            index, then "y" will receive the number of 
rows that did *not* match the filter "x".
    * @param totalRowCount       total number of rows to be scanned if no 
indexes are used
    * @param includeUnknown      mapping for Druid native two state logic 
system into SQL three-state logic system. If
    *                            set to true, bitmaps returned by this method 
should include true bits for any rows where
@@ -71,7 +75,7 @@ public interface BitmapColumnIndex
   @Nullable
   default <T> T computeBitmapResult(
       BitmapResultFactory<T> bitmapResultFactory,
-      int selectionRowCount,
+      int applyRowCount,
       int totalRowCount,
       boolean includeUnknown
   )
diff --git 
a/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java
 
b/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java
index 6b175297181..37e84bb8b5d 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java
@@ -50,12 +50,12 @@ public abstract class DictionaryRangeScanningBitmapIndex 
extends SimpleImmutable
   @Override
   public final <T> T computeBitmapResult(
       BitmapResultFactory<T> bitmapResultFactory,
-      int selectionRowCount,
+      int applyRowCount,
       int totalRowCount,
       boolean includeUnknown
   )
   {
-    final int scale = (int) Math.ceil(sizeScale * selectionRowCount);
+    final int scale = (int) Math.ceil(sizeScale * applyRowCount);
     if (rangeSize > scale) {
       return null;
     }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java
 
b/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java
index 2bde27dc52c..1ae00a846ba 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java
@@ -53,12 +53,12 @@ public abstract class DictionaryScanningBitmapIndex extends 
SimpleImmutableBitma
   @Override
   public final <T> T computeBitmapResult(
       BitmapResultFactory<T> bitmapResultFactory,
-      int selectionRowCount,
+      int applyRowCount,
       int totalRowCount,
       boolean includeUnknown
   )
   {
-    if (selectionRowCount != totalRowCount && selectionRowCount < 
(dictionarySize * scaleThreshold)) {
+    if (applyRowCount != totalRowCount && applyRowCount < (dictionarySize * 
scaleThreshold)) {
       return null;
     }
     return 
bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable(includeUnknown));
diff --git a/processing/src/test/java/org/apache/druid/segment/TestIndex.java 
b/processing/src/test/java/org/apache/druid/segment/TestIndex.java
index 374cf6c7e01..4aceafa5f3a 100644
--- a/processing/src/test/java/org/apache/druid/segment/TestIndex.java
+++ b/processing/src/test/java/org/apache/druid/segment/TestIndex.java
@@ -24,6 +24,7 @@ import com.google.common.base.Suppliers;
 import com.google.common.io.CharSource;
 import com.google.common.io.LineProcessor;
 import com.google.common.io.Resources;
+import org.apache.druid.data.input.ResourceInputSource;
 import org.apache.druid.data.input.impl.DelimitedParseSpec;
 import org.apache.druid.data.input.impl.DimensionSchema;
 import org.apache.druid.data.input.impl.DimensionsSpec;
@@ -37,6 +38,7 @@ import org.apache.druid.java.util.common.DateTimes;
 import org.apache.druid.java.util.common.FileUtils;
 import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.query.NestedDataTestUtils;
 import org.apache.druid.query.aggregation.AggregatorFactory;
 import org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory;
 import org.apache.druid.query.aggregation.DoubleMinAggregatorFactory;
@@ -68,6 +70,7 @@ import java.util.List;
 import java.util.concurrent.atomic.AtomicLong;
 
 /**
+ *
  */
 public class TestIndex
 {
@@ -222,6 +225,9 @@ public class TestIndex
                    .build()
       )
   );
+  private static Supplier<QueryableIndex> wikipediaMMappedIndex = 
Suppliers.memoize(
+      () -> persistRealtimeAndLoadMMapped(makeWikipediaIncrementalIndex())
+  );
 
   public static IncrementalIndex getIncrementalTestIndex()
   {
@@ -243,6 +249,11 @@ public class TestIndex
     return mmappedIndex.get();
   }
 
+  public static QueryableIndex getMMappedWikipediaIndex()
+  {
+    return wikipediaMMappedIndex.get();
+  }
+
   public static QueryableIndex getNoRollupMMappedTestIndex()
   {
     return noRollupMmappedIndex.get();
@@ -322,6 +333,62 @@ public class TestIndex
     }
   }
 
+  public static IncrementalIndex makeWikipediaIncrementalIndex()
+  {
+    final List<DimensionSchema> dimensions = Arrays.asList(
+        new StringDimensionSchema("channel"),
+        new StringDimensionSchema("cityName"),
+        new StringDimensionSchema("comment"),
+        new StringDimensionSchema("countryIsoCode"),
+        new StringDimensionSchema("countryName"),
+        new StringDimensionSchema("isAnonymous"),
+        new StringDimensionSchema("isMinor"),
+        new StringDimensionSchema("isNew"),
+        new StringDimensionSchema("isRobot"),
+        new StringDimensionSchema("isUnpatrolled"),
+        new StringDimensionSchema("metroCode"),
+        new StringDimensionSchema("namespace"),
+        new StringDimensionSchema("page"),
+        new StringDimensionSchema("regionIsoCode"),
+        new StringDimensionSchema("regionName"),
+        new StringDimensionSchema("user"),
+        new LongDimensionSchema("delta"),
+        new LongDimensionSchema("added"),
+        new LongDimensionSchema("deleted")
+    );
+
+    final File tmpDir;
+    try {
+      tmpDir = FileUtils.createTempDir("test-index-input-source");
+      try {
+        return IndexBuilder
+            .create()
+            
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
+            .schema(new IncrementalIndexSchema.Builder()
+                        .withRollup(false)
+                        .withTimestampSpec(new TimestampSpec("time", null, 
null))
+                        .withDimensionsSpec(new DimensionsSpec(dimensions))
+                        .build()
+            )
+            .inputSource(
+                ResourceInputSource.of(
+                    TestIndex.class.getClassLoader(),
+                    "wikipedia/wikiticker-2015-09-12-sampled.json.gz"
+                )
+            )
+            .inputFormat(NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT)
+            .inputTmpDir(new File(tmpDir, "tmpWikipedia1"))
+            .buildIncrementalIndex();
+      }
+      finally {
+        FileUtils.deleteDirectory(tmpDir);
+      }
+    }
+    catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
   public static IncrementalIndex loadIncrementalIndex(
       final IncrementalIndex retVal,
       final CharSource source
diff --git 
a/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java
 
b/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java
new file mode 100644
index 00000000000..aa764535444
--- /dev/null
+++ 
b/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java
@@ -0,0 +1,328 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.filter;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.druid.collections.bitmap.BitmapFactory;
+import org.apache.druid.java.util.common.io.Closer;
+import org.apache.druid.query.DefaultBitmapResultFactory;
+import org.apache.druid.query.filter.ColumnIndexSelector;
+import org.apache.druid.query.filter.EqualityFilter;
+import org.apache.druid.query.filter.Filter;
+import org.apache.druid.query.filter.FilterBundle;
+import org.apache.druid.query.filter.LikeDimFilter;
+import org.apache.druid.query.filter.NullFilter;
+import org.apache.druid.query.filter.TypedInFilter;
+import org.apache.druid.segment.ColumnCache;
+import org.apache.druid.segment.ColumnSelectorColumnIndexSelector;
+import org.apache.druid.segment.QueryableIndex;
+import org.apache.druid.segment.TestIndex;
+import org.apache.druid.segment.VirtualColumns;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.testing.InitializedNullHandlingTest;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+public class FilterBundleTest extends InitializedNullHandlingTest
+{
+  private Closer closer;
+  protected BitmapFactory bitmapFactory;
+  protected ColumnIndexSelector indexSelector;
+
+  @Rule
+  public TemporaryFolder tmpDir = new TemporaryFolder();
+
+  @Before
+  public void setUp()
+  {
+    final QueryableIndex index = TestIndex.getMMappedWikipediaIndex();
+    closer = Closer.create();
+    bitmapFactory = index.getBitmapFactoryForDimensions();
+    indexSelector = new ColumnSelectorColumnIndexSelector(
+        bitmapFactory,
+        VirtualColumns.EMPTY,
+        new ColumnCache(index, closer)
+    );
+  }
+
+  @After
+  public void tearDown() throws Exception
+  {
+    closer.close();
+    indexSelector = null;
+  }
+
+  @Test
+  public void test_or_country_isRobot()
+  {
+    final FilterBundle filterBundle = makeFilterBundle(
+        new OrFilter(
+            ImmutableList.of(
+                new EqualityFilter("countryName", ColumnType.STRING, "United 
States", null),
+                new EqualityFilter("isRobot", ColumnType.STRING, "true", null)
+            )
+        )
+    );
+
+    Assert.assertEquals(
+        "index: OR (selectionSize = 39244)\n"
+        + "  index: countryName = United States (selectionSize = 528)\n"
+        + "  index: isRobot = true (selectionSize = 15420)\n",
+        filterBundle.getInfo().describe()
+    );
+  }
+
+  @Test
+  public void test_and_country_isRobot()
+  {
+    final FilterBundle filterBundle = makeFilterBundle(
+        new AndFilter(
+            ImmutableList.of(
+                new EqualityFilter("countryName", ColumnType.STRING, "United 
States", null),
+                new EqualityFilter("isRobot", ColumnType.STRING, "true", null)
+            )
+        )
+    );
+
+    Assert.assertEquals(
+        "index: AND (selectionSize = 0)\n"
+        + "  index: countryName = United States (selectionSize = 528)\n"
+        + "  index: isRobot = true (selectionSize = 15420)\n",
+        filterBundle.getInfo().describe()
+    );
+  }
+
+  @Test
+  public void test_or_countryIsNull_pageLike()
+  {
+    final FilterBundle filterBundle = makeFilterBundle(
+        new OrFilter(
+            ImmutableList.of(
+                new NullFilter("countryName", null),
+                new LikeDimFilter("page", "%u%", null, null).toFilter()
+            )
+        )
+    );
+
+    Assert.assertEquals(
+        "matcher: OR\n"
+        + "  matcher: countryName IS NULL\n"
+        + "    with partial index: countryName IS NULL (selectionSize = 
35445)\n"
+        + "  matcher: page LIKE '%u%'\n",
+        filterBundle.getInfo().describe()
+    );
+  }
+
+  @Test
+  public void test_and_countryIsNull_pageLike()
+  {
+    final FilterBundle filterBundle = makeFilterBundle(
+        new AndFilter(
+            ImmutableList.of(
+                new NullFilter("countryName", null),
+                new LikeDimFilter("page", "%u%", null, null).toFilter()
+            )
+        )
+    );
+
+    Assert.assertEquals(
+        "index: AND (selectionSize = 14165)\n"
+        + "  index: countryName IS NULL (selectionSize = 35445)\n"
+        + "  index: page LIKE '%u%' (selectionSize = 15328)\n",
+        filterBundle.getInfo().describe()
+    );
+  }
+
+  @Test
+  public void test_and_country_pageLike()
+  {
+    final FilterBundle filterBundle = makeFilterBundle(
+        new AndFilter(
+            ImmutableList.of(
+                new EqualityFilter("countryName", ColumnType.STRING, "United 
States", null),
+                new LikeDimFilter("page", "%u%", null, null).toFilter()
+            )
+        )
+    );
+
+    Assert.assertEquals(
+        "index: countryName = United States (selectionSize = 528)\n"
+        + "matcher: page LIKE '%u%'\n",
+        filterBundle.getInfo().describe()
+    );
+  }
+
+  @Test
+  public void test_or_countryNotNull_pageLike()
+  {
+    final FilterBundle filterBundle = makeFilterBundle(
+        new OrFilter(
+            ImmutableList.of(
+                new NotFilter(new NullFilter("countryName", null)),
+                new LikeDimFilter("page", "%u%", null, null).toFilter()
+            )
+        )
+    );
+
+    Assert.assertEquals(
+        "index: OR (selectionSize = 39244)\n"
+        + "  index: ~(countryName IS NULL) (selectionSize = 3799)\n"
+        + "  index: page LIKE '%u%' (selectionSize = 15328)\n",
+        filterBundle.getInfo().describe()
+    );
+  }
+
+  @Test
+  public void test_and_countryNotNull_pageLike()
+  {
+    final FilterBundle filterBundle = makeFilterBundle(
+        new AndFilter(
+            ImmutableList.of(
+                new NotFilter(new NullFilter("countryName", null)),
+                new LikeDimFilter("page", "%u%", null, null).toFilter()
+            )
+        )
+    );
+
+    Assert.assertEquals(
+        "index: ~(countryName IS NULL) (selectionSize = 3799)\n"
+        + "matcher: page LIKE '%u%'\n",
+        filterBundle.getInfo().describe()
+    );
+  }
+
+  @Test
+  public void test_or_countryIsAndPageLike()
+  {
+    final FilterBundle filterBundle = makeFilterBundle(
+        new OrFilter(
+            ImmutableList.of(
+                new AndFilter(
+                    ImmutableList.of(
+                        new EqualityFilter("countryName", ColumnType.STRING, 
"United States", null),
+                        new LikeDimFilter("page", "%a%", null, null).toFilter()
+                    )
+                ),
+                new AndFilter(
+                    ImmutableList.of(
+                        new EqualityFilter("countryName", ColumnType.STRING, 
"United Kingdom", null),
+                        new LikeDimFilter("page", "%b%", null, null).toFilter()
+                    )
+                ),
+                new AndFilter(
+                    ImmutableList.of(
+                        new NullFilter("countryName", null),
+                        new LikeDimFilter("page", "%c%", null, null).toFilter()
+                    )
+                )
+            )
+        )
+    );
+
+    Assert.assertEquals(
+        "matcher: OR\n"
+        + "  matcher: AND\n"
+        + "    with partial index: AND (selectionSize = 11851)\n"
+        + "      index: countryName IS NULL (selectionSize = 35445)\n"
+        + "      index: page LIKE '%c%' (selectionSize = 12864)\n"
+        + "  matcher: AND\n"
+        + "    with partial index: countryName = United States (selectionSize 
= 528)\n"
+        + "    matcher: page LIKE '%a%'\n"
+        + "  matcher: AND\n"
+        + "    with partial index: countryName = United Kingdom (selectionSize 
= 234)\n"
+        + "    matcher: page LIKE '%b%'\n",
+        filterBundle.getInfo().describe()
+    );
+  }
+
+  @Test
+  public void test_or_countryIsNull_and_country_pageLike()
+  {
+    final FilterBundle filterBundle = makeFilterBundle(
+        new OrFilter(
+            ImmutableList.of(
+                new NullFilter("countryName", null),
+                new AndFilter(
+                    ImmutableList.of(
+                        new EqualityFilter("countryName", ColumnType.STRING, 
"United States", null),
+                        new LikeDimFilter("page", "%a%", null, null).toFilter()
+                    )
+                )
+            )
+        )
+    );
+
+    Assert.assertEquals(
+        "matcher: OR\n"
+        + "  matcher: countryName IS NULL\n"
+        + "    with partial index: countryName IS NULL (selectionSize = 
35445)\n"
+        + "  matcher: AND\n"
+        + "    with partial index: countryName = United States (selectionSize 
= 528)\n"
+        + "    matcher: page LIKE '%a%'\n",
+        filterBundle.getInfo().describe()
+    );
+  }
+
+  @Test
+  public void test_or_countryIsNull_and_isRobotInFalseTrue_pageLike()
+  {
+    final FilterBundle filterBundle = makeFilterBundle(
+        new OrFilter(
+            ImmutableList.of(
+                new NullFilter("countryName", null),
+                new AndFilter(
+                    ImmutableList.of(
+                        // isRobot IN (false, true) matches all rows; so this 
test is equivalent logically to
+                        // test_or_countryIsNull_pageLike. It's effectively 
testing that the AndFilter carries through
+                        // the short-circuiting done by the OrFilter when it 
applies the NullFilter.
+                        new TypedInFilter("isRobot", ColumnType.STRING, 
ImmutableList.of("false", "true"), null, null),
+                        new LikeDimFilter("page", "%u%", null, null).toFilter()
+                    )
+                )
+            )
+        )
+    );
+
+    Assert.assertEquals(
+        "matcher: OR\n"
+        + "  matcher: countryName IS NULL\n"
+        + "    with partial index: countryName IS NULL (selectionSize = 
35445)\n"
+        + "  matcher: AND\n"
+        + "    with partial index: isRobot IN (false, true) (STRING) 
(selectionSize = 39244)\n"
+        + "    matcher: page LIKE '%u%'\n",
+        filterBundle.getInfo().describe()
+    );
+  }
+
+  protected FilterBundle makeFilterBundle(final Filter filter)
+  {
+    return filter.makeFilterBundle(
+        indexSelector,
+        new DefaultBitmapResultFactory(bitmapFactory),
+        indexSelector.getNumRows(),
+        indexSelector.getNumRows(),
+        false
+    );
+  }
+}
diff --git 
a/sql/src/test/resources/calcite/tests/wikiticker-2015-09-12-sampled.json.gz 
b/processing/src/test/resources/wikipedia/wikiticker-2015-09-12-sampled.json.gz
similarity index 100%
rename from 
sql/src/test/resources/calcite/tests/wikiticker-2015-09-12-sampled.json.gz
rename to 
processing/src/test/resources/wikipedia/wikiticker-2015-09-12-sampled.json.gz
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java 
b/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java
index 2303d51e02a..4148353e603 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java
@@ -38,6 +38,7 @@ import org.apache.druid.data.input.impl.StringDimensionSchema;
 import org.apache.druid.data.input.impl.TimestampSpec;
 import org.apache.druid.java.util.common.DateTimes;
 import org.apache.druid.java.util.common.Intervals;
+import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.common.parsers.JSONPathSpec;
 import org.apache.druid.query.DataSource;
 import org.apache.druid.query.GlobalTableDataSource;
@@ -56,10 +57,13 @@ import 
org.apache.druid.query.aggregation.last.FloatLastAggregatorFactory;
 import org.apache.druid.query.aggregation.last.LongLastAggregatorFactory;
 import org.apache.druid.query.lookup.LookupExtractorFactoryContainerProvider;
 import org.apache.druid.segment.IndexBuilder;
+import org.apache.druid.segment.IndexSpec;
 import org.apache.druid.segment.QueryableIndex;
 import org.apache.druid.segment.SegmentWrangler;
+import org.apache.druid.segment.TestIndex;
 import org.apache.druid.segment.column.ColumnType;
 import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.incremental.IncrementalIndex;
 import org.apache.druid.segment.incremental.IncrementalIndexSchema;
 import org.apache.druid.segment.join.JoinConditionAnalysis;
 import org.apache.druid.segment.join.Joinable;
@@ -78,11 +82,13 @@ import org.joda.time.DateTime;
 import org.joda.time.chrono.ISOChronology;
 
 import java.io.File;
+import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
+import java.util.UUID;
 import java.util.stream.Collectors;
 
 /**
@@ -602,47 +608,15 @@ public class TestDataBuilder
 
   public static QueryableIndex makeWikipediaIndex(File tmpDir)
   {
-    final List<DimensionSchema> dimensions = Arrays.asList(
-        new StringDimensionSchema("channel"),
-        new StringDimensionSchema("cityName"),
-        new StringDimensionSchema("comment"),
-        new StringDimensionSchema("countryIsoCode"),
-        new StringDimensionSchema("countryName"),
-        new StringDimensionSchema("isAnonymous"),
-        new StringDimensionSchema("isMinor"),
-        new StringDimensionSchema("isNew"),
-        new StringDimensionSchema("isRobot"),
-        new StringDimensionSchema("isUnpatrolled"),
-        new StringDimensionSchema("metroCode"),
-        new StringDimensionSchema("namespace"),
-        new StringDimensionSchema("page"),
-        new StringDimensionSchema("regionIsoCode"),
-        new StringDimensionSchema("regionName"),
-        new StringDimensionSchema("user"),
-        new LongDimensionSchema("delta"),
-        new LongDimensionSchema("added"),
-        new LongDimensionSchema("deleted")
-    );
-
-    return IndexBuilder
-        .create()
-        .tmpDir(new File(tmpDir, "wikipedia1"))
-        
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
-        .schema(new IncrementalIndexSchema.Builder()
-                    .withRollup(false)
-                    .withTimestampSpec(new TimestampSpec("time", null, null))
-                    .withDimensionsSpec(new DimensionsSpec(dimensions))
-                    .build()
-        )
-        .inputSource(
-            ResourceInputSource.of(
-                TestDataBuilder.class.getClassLoader(),
-                "calcite/tests/wikiticker-2015-09-12-sampled.json.gz"
-            )
-        )
-        .inputFormat(DEFAULT_JSON_INPUT_FORMAT)
-        .inputTmpDir(new File(tmpDir, "tmpWikipedia1"))
-        .buildMMappedIndex();
+    try {
+      final File directory = new File(tmpDir, 
StringUtils.format("wikipedia-index-%s", UUID.randomUUID()));
+      final IncrementalIndex index = TestIndex.makeWikipediaIncrementalIndex();
+      TestIndex.INDEX_MERGER.persist(index, directory, IndexSpec.DEFAULT, 
null);
+      return TestIndex.INDEX_IO.loadIndex(directory);
+    }
+    catch (IOException e) {
+      throw new RuntimeException(e);
+    }
   }
 
   public static QueryableIndex makeWikipediaIndexWithAggregation(File tmpDir)
@@ -687,8 +661,8 @@ public class TestDataBuilder
         )
         .inputSource(
             ResourceInputSource.of(
-                TestDataBuilder.class.getClassLoader(),
-                "calcite/tests/wikiticker-2015-09-12-sampled.json.gz"
+                TestIndex.class.getClassLoader(),
+                "wikipedia/wikiticker-2015-09-12-sampled.json.gz"
             )
         )
         .inputFormat(DEFAULT_JSON_INPUT_FORMAT)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to