This is an automated email from the ASF dual-hosted git repository.
gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new ccc1ffb0323 Additional short circuiting knowledge in filter bundles.
(#16292)
ccc1ffb0323 is described below
commit ccc1ffb03238c2318a622a95e808b83952abd586
Author: Gian Merlino <[email protected]>
AuthorDate: Tue Apr 16 22:42:28 2024 -0700
Additional short circuiting knowledge in filter bundles. (#16292)
* Additional short circuiting knowledge in filter bundles.
Three updates:
1) The parameter "selectionRowCount" on "makeFilterBundle" is renamed
"applyRowCount", and redefined as an upper bound on rows remaining
after short-circuiting (rather than number of rows selected so far).
This definition works better for OR filters, which pass through the
FALSE set rather than the TRUE set to the next subfilter.
2) AndFilter uses min(applyRowCount, indexIntersectionSize) rather
than using selectionRowCount for the first subfilter and
indexIntersectionSize
for each filter thereafter. This improves accuracy when the incoming
applyRowCount is smaller than the row count from the first few indexes.
3) OrFilter uses min(applyRowCount, totalRowCount - indexUnionSize) rather
than applyRowCount for subfilters. This allows an OR filter to pass
information about short-circuiting to its subfilters.
To help write tests for this, the patch also moves the sampled
wikiticker data file from sql to processing.
* Forbidden APIs.
* Forbidden APIs.
* Better comments.
* Fix inspection.
* Adjustments to tests.
---
.../java/org/apache/druid/query/filter/Filter.java | 10 +-
.../apache/druid/query/filter/FilterBundle.java | 74 ++++-
.../org/apache/druid/segment/filter/AndFilter.java | 14 +-
.../druid/segment/filter/IsBooleanFilter.java | 6 +-
.../org/apache/druid/segment/filter/NotFilter.java | 4 +-
.../org/apache/druid/segment/filter/OrFilter.java | 15 +-
.../druid/segment/index/BitmapColumnIndex.java | 8 +-
.../index/DictionaryRangeScanningBitmapIndex.java | 4 +-
.../index/DictionaryScanningBitmapIndex.java | 4 +-
.../java/org/apache/druid/segment/TestIndex.java | 67 +++++
.../druid/segment/filter/FilterBundleTest.java | 328 +++++++++++++++++++++
.../wikiticker-2015-09-12-sampled.json.gz | Bin
.../druid/sql/calcite/util/TestDataBuilder.java | 60 ++--
13 files changed, 523 insertions(+), 71 deletions(-)
diff --git a/processing/src/main/java/org/apache/druid/query/filter/Filter.java
b/processing/src/main/java/org/apache/druid/query/filter/Filter.java
index 4a83b4c1b8a..9fae7d51d20 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/Filter.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/Filter.java
@@ -52,7 +52,11 @@ public interface Filter
* them
* @param bitmapResultFactory - wrapper for {@link ImmutableBitmap}
operations to tie into
* {@link org.apache.druid.query.QueryMetrics}
and build the output indexes
- * @param selectionRowCount - number of rows selected so far by any
previous bundle computations
+ * @param applyRowCount - upper bound on number of rows this filter
would be applied to, after removing rows
+ * short-circuited by prior bundle operations.
For example, given "x AND y", if "x" is
+ * resolved using an index, then "y" will
receive the number of rows that matched
+ * the filter "x". As another example, given "x
OR y", if "x" is resolved using an
+ * index, then "y" will receive the number of
rows that did *not* match the filter "x".
* @param totalRowCount - total number of rows to be scanned if no
indexes are applied
* @param includeUnknown - mapping for Druid native two state logic
system into SQL three-state logic system. If
* set to true, bitmaps returned by this method
should include true bits for any rows
@@ -65,7 +69,7 @@ public interface Filter
default <T> FilterBundle makeFilterBundle(
ColumnIndexSelector columnIndexSelector,
BitmapResultFactory<T> bitmapResultFactory,
- int selectionRowCount,
+ int applyRowCount,
int totalRowCount,
boolean includeUnknown
)
@@ -77,7 +81,7 @@ public interface Filter
final long bitmapConstructionStartNs = System.nanoTime();
final T result = columnIndex.computeBitmapResult(
bitmapResultFactory,
- selectionRowCount,
+ applyRowCount,
totalRowCount,
includeUnknown
);
diff --git
a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java
b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java
index 78d666c61cb..695eb26c00b 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java
@@ -38,6 +38,7 @@ import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.function.Supplier;
+import java.util.regex.Pattern;
/**
* FilterBundle is a container for all the goodies used for producing filtered
cursors, a {@link ImmutableBitmap} if
@@ -120,7 +121,9 @@ public class FilterBundle
public interface IndexBundle
{
IndexBundleInfo getIndexInfo();
+
ImmutableBitmap getBitmap();
+
ColumnIndexCapabilities getIndexCapabilities();
}
@@ -135,7 +138,9 @@ public class FilterBundle
public interface MatcherBundle
{
MatcherBundleInfo getMatcherInfo();
+
ValueMatcher valueMatcher(ColumnSelectorFactory selectorFactory, Offset
baseOffset, boolean descending);
+
VectorValueMatcher vectorMatcher(VectorColumnSelectorFactory
selectorFactory, ReadableVectorOffset baseOffset);
}
@@ -205,7 +210,10 @@ public class FilterBundle
}
@Override
- public VectorValueMatcher vectorMatcher(VectorColumnSelectorFactory
selectorFactory, ReadableVectorOffset baseOffset)
+ public VectorValueMatcher vectorMatcher(
+ VectorColumnSelectorFactory selectorFactory,
+ ReadableVectorOffset baseOffset
+ )
{
return vectorMatcherFn.apply(selectorFactory);
}
@@ -240,6 +248,21 @@ public class FilterBundle
return matcher;
}
+ /**
+ * Return a multiline description string, suitable for comparisons in
tests.
+ */
+ public String describe()
+ {
+ final StringBuilder sb = new StringBuilder();
+ if (index != null) {
+ sb.append(index.describe());
+ }
+ if (matcher != null) {
+ sb.append(matcher.describe());
+ }
+ return sb.toString();
+ }
+
@Override
public String toString()
{
@@ -249,6 +272,8 @@ public class FilterBundle
public static class IndexBundleInfo
{
+ private static final Pattern PATTERN_LINE_START = Pattern.compile("(?m)^");
+
private final Supplier<String> filter;
private final List<IndexBundleInfo> indexes;
private final int selectionSize;
@@ -292,6 +317,27 @@ public class FilterBundle
return indexes;
}
+ /**
+ * Return a multiline description string, suitable for comparisons in
tests.
+ */
+ public String describe()
+ {
+ final StringBuilder sb = new StringBuilder()
+ .append("index: ")
+ .append(filter.get())
+ .append(" (selectionSize = ")
+ .append(selectionSize)
+ .append(")\n");
+
+ if (indexes != null) {
+ for (final IndexBundleInfo info : indexes) {
+ sb.append(PATTERN_LINE_START.matcher(info.describe()).replaceAll("
"));
+ }
+ }
+
+ return sb.toString();
+ }
+
@Override
public String toString()
{
@@ -306,6 +352,8 @@ public class FilterBundle
public static class MatcherBundleInfo
{
+ private static final Pattern PATTERN_LINE_START = Pattern.compile("(?m)^");
+
private final Supplier<String> filter;
@Nullable
final List<MatcherBundleInfo> matchers;
@@ -345,6 +393,30 @@ public class FilterBundle
return matchers;
}
+ /**
+ * Return a multiline description string, suitable for comparisons in
tests.
+ */
+ public String describe()
+ {
+ final StringBuilder sb = new StringBuilder()
+ .append("matcher: ")
+ .append(filter.get())
+ .append("\n");
+
+ if (partialIndex != null) {
+ sb.append(" with partial ")
+
.append(PATTERN_LINE_START.matcher(partialIndex.describe()).replaceAll("
").substring(2));
+ }
+
+ if (matchers != null) {
+ for (MatcherBundleInfo info : matchers) {
+ sb.append(PATTERN_LINE_START.matcher(info.describe()).replaceAll("
"));
+ }
+ }
+
+ return sb.toString();
+ }
+
@Override
public String toString()
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java
b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java
index f9001689d4a..036c6a8250a 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java
@@ -76,7 +76,7 @@ public class AndFilter implements BooleanFilter
public <T> FilterBundle makeFilterBundle(
ColumnIndexSelector columnIndexSelector,
BitmapResultFactory<T> bitmapResultFactory,
- int selectionRowCount,
+ int applyRowCount,
int totalRowCount,
boolean includeUnknown
)
@@ -85,7 +85,7 @@ public class AndFilter implements BooleanFilter
final List<FilterBundle.MatcherBundle> matcherBundles = new ArrayList<>();
final List<FilterBundle.MatcherBundleInfo> matcherBundleInfos = new
ArrayList<>();
- int selectionCount = selectionRowCount;
+ int indexIntersectionSize = totalRowCount;
ImmutableBitmap index = null;
ColumnIndexCapabilities merged = new SimpleColumnIndexCapabilities(true,
true);
// AND filter can be partitioned into a bundle that has both indexes and
value matchers. The filters which support
@@ -101,7 +101,7 @@ public class AndFilter implements BooleanFilter
final FilterBundle subBundle = subfilter.makeFilterBundle(
columnIndexSelector,
bitmapResultFactory,
- selectionCount,
+ Math.min(applyRowCount, indexIntersectionSize),
totalRowCount,
includeUnknown
);
@@ -120,7 +120,7 @@ public class AndFilter implements BooleanFilter
} else {
index = index.intersection(subBundle.getIndex().getBitmap());
}
- selectionCount = index.size();
+ indexIntersectionSize = index.size();
}
if (subBundle.getMatcherBundle() != null) {
matcherBundles.add(subBundle.getMatcherBundle());
@@ -140,7 +140,7 @@ public class AndFilter implements BooleanFilter
indexBundle = new FilterBundle.SimpleIndexBundle(
new FilterBundle.IndexBundleInfo(
() -> "AND",
- selectionCount,
+ indexIntersectionSize,
System.nanoTime() - bitmapConstructionStartNs,
indexBundleInfos
),
@@ -247,7 +247,7 @@ public class AndFilter implements BooleanFilter
@Override
public <T> T computeBitmapResult(
BitmapResultFactory<T> bitmapResultFactory,
- int selectionRowCount,
+ int applyRowCount,
int totalRowCount,
boolean includeUnknown
)
@@ -256,7 +256,7 @@ public class AndFilter implements BooleanFilter
for (final BitmapColumnIndex index : bitmapColumnIndices) {
final T bitmapResult = index.computeBitmapResult(
bitmapResultFactory,
- selectionRowCount,
+ applyRowCount,
totalRowCount,
includeUnknown
);
diff --git
a/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java
b/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java
index 39a4f90c263..75eaebb27c8 100644
---
a/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java
+++
b/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java
@@ -94,7 +94,7 @@ public class IsBooleanFilter implements Filter
@Override
public <T> T computeBitmapResult(
BitmapResultFactory<T> bitmapResultFactory,
- int selectionRowCount,
+ int applyRowCount,
int totalRowCount,
boolean includeUnknown
)
@@ -102,7 +102,7 @@ public class IsBooleanFilter implements Filter
if (isTrue) {
return baseIndex.computeBitmapResult(
bitmapResultFactory,
- selectionRowCount,
+ applyRowCount,
totalRowCount,
false
);
@@ -110,7 +110,7 @@ public class IsBooleanFilter implements Filter
final T result = baseIndex.computeBitmapResult(
bitmapResultFactory,
- selectionRowCount,
+ applyRowCount,
totalRowCount,
useThreeValueLogic
);
diff --git
a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java
b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java
index b4fb793deb1..58dd90c8af1 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java
@@ -93,14 +93,14 @@ public class NotFilter implements Filter
@Override
public <T> T computeBitmapResult(
BitmapResultFactory<T> bitmapResultFactory,
- int selectionRowCount,
+ int applyRowCount,
int totalRowCount,
boolean includeUnknown
)
{
final T result = baseIndex.computeBitmapResult(
bitmapResultFactory,
- selectionRowCount,
+ applyRowCount,
totalRowCount,
!includeUnknown && useThreeValueLogic
);
diff --git
a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java
b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java
index 9373ea142e9..784507caf03 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java
@@ -81,7 +81,7 @@ public class OrFilter implements BooleanFilter
public <T> FilterBundle makeFilterBundle(
ColumnIndexSelector columnIndexSelector,
BitmapResultFactory<T> bitmapResultFactory,
- int selectionRowCount,
+ int applyRowCount,
int totalRowCount,
boolean includeUnknown
)
@@ -99,6 +99,8 @@ public class OrFilter implements BooleanFilter
final List<FilterBundle.IndexBundleInfo> indexOnlyBundlesInfo = new
ArrayList<>();
final List<FilterBundle.MatcherBundle> partialIndexBundles = new
ArrayList<>();
final List<FilterBundle.MatcherBundle> matcherOnlyBundles = new
ArrayList<>();
+
+ int indexUnionSize = 0;
ImmutableBitmap index = null;
ColumnIndexCapabilities merged = new SimpleColumnIndexCapabilities(true,
true);
int emptyCount = 0;
@@ -109,7 +111,7 @@ public class OrFilter implements BooleanFilter
final FilterBundle bundle = subfilter.makeFilterBundle(
columnIndexSelector,
bitmapResultFactory,
- selectionRowCount,
+ Math.min(applyRowCount, totalRowCount - indexUnionSize),
totalRowCount,
includeUnknown
);
@@ -138,6 +140,7 @@ public class OrFilter implements BooleanFilter
} else {
index = index.union(bundle.getIndex().getBitmap());
}
+ indexUnionSize = index.size();
}
}
} else {
@@ -165,7 +168,7 @@ public class OrFilter implements BooleanFilter
new FilterBundle.SimpleIndexBundle(
new FilterBundle.IndexBundleInfo(
() -> "OR",
- selectionRowCount,
+ applyRowCount,
totalBitmapConstructTimeNs,
indexOnlyBundlesInfo
),
@@ -185,7 +188,7 @@ public class OrFilter implements BooleanFilter
if (!indexOnlyBundles.isEmpty()) {
// translate the indexOnly bundles into a single matcher
final FilterBundle.MatcherBundle matcherBundle =
convertIndexToMatcherBundle(
- selectionRowCount,
+ applyRowCount,
indexOnlyBundles,
indexOnlyBundlesInfo,
totalBitmapConstructTimeNs,
@@ -284,14 +287,14 @@ public class OrFilter implements BooleanFilter
@Override
public <T> T computeBitmapResult(
BitmapResultFactory<T> bitmapResultFactory,
- int selectionRowCount,
+ int applyRowCount,
int totalRowCount,
boolean includeUnknown
)
{
List<T> results =
Lists.newArrayListWithCapacity(bitmapColumnIndices.size());
for (BitmapColumnIndex index : bitmapColumnIndices) {
- final T r = index.computeBitmapResult(bitmapResultFactory,
selectionRowCount, totalRowCount, includeUnknown);
+ final T r = index.computeBitmapResult(bitmapResultFactory,
applyRowCount, totalRowCount, includeUnknown);
if (r == null) {
// all or nothing
return null;
diff --git
a/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java
b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java
index 154dd66e206..04a5bb8b6b5 100644
---
a/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java
@@ -59,7 +59,11 @@ public interface BitmapColumnIndex
*
* @param bitmapResultFactory helper to format the {@link
org.apache.druid.collections.bitmap.ImmutableBitmap} in a
* form ready for consumption by callers
- * @param selectionRowCount number of rows selected so far by any previous
index computations
+ * @param applyRowCount upper bound on number of rows this filter
would be applied to, after removing rows
+ * short-circuited by prior bundle operations.
For example, given "x AND y", if "x" is
+ * resolved using an index, then "y" will receive
the number of rows that matched
+ * the filter "x". As another example, given "x
OR y", if "x" is resolved using an
+ * index, then "y" will receive the number of
rows that did *not* match the filter "x".
* @param totalRowCount total number of rows to be scanned if no
indexes are used
* @param includeUnknown mapping for Druid native two state logic
system into SQL three-state logic system. If
* set to true, bitmaps returned by this method
should include true bits for any rows where
@@ -71,7 +75,7 @@ public interface BitmapColumnIndex
@Nullable
default <T> T computeBitmapResult(
BitmapResultFactory<T> bitmapResultFactory,
- int selectionRowCount,
+ int applyRowCount,
int totalRowCount,
boolean includeUnknown
)
diff --git
a/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java
b/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java
index 6b175297181..37e84bb8b5d 100644
---
a/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java
@@ -50,12 +50,12 @@ public abstract class DictionaryRangeScanningBitmapIndex
extends SimpleImmutable
@Override
public final <T> T computeBitmapResult(
BitmapResultFactory<T> bitmapResultFactory,
- int selectionRowCount,
+ int applyRowCount,
int totalRowCount,
boolean includeUnknown
)
{
- final int scale = (int) Math.ceil(sizeScale * selectionRowCount);
+ final int scale = (int) Math.ceil(sizeScale * applyRowCount);
if (rangeSize > scale) {
return null;
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java
b/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java
index 2bde27dc52c..1ae00a846ba 100644
---
a/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java
@@ -53,12 +53,12 @@ public abstract class DictionaryScanningBitmapIndex extends
SimpleImmutableBitma
@Override
public final <T> T computeBitmapResult(
BitmapResultFactory<T> bitmapResultFactory,
- int selectionRowCount,
+ int applyRowCount,
int totalRowCount,
boolean includeUnknown
)
{
- if (selectionRowCount != totalRowCount && selectionRowCount <
(dictionarySize * scaleThreshold)) {
+ if (applyRowCount != totalRowCount && applyRowCount < (dictionarySize *
scaleThreshold)) {
return null;
}
return
bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable(includeUnknown));
diff --git a/processing/src/test/java/org/apache/druid/segment/TestIndex.java
b/processing/src/test/java/org/apache/druid/segment/TestIndex.java
index 374cf6c7e01..4aceafa5f3a 100644
--- a/processing/src/test/java/org/apache/druid/segment/TestIndex.java
+++ b/processing/src/test/java/org/apache/druid/segment/TestIndex.java
@@ -24,6 +24,7 @@ import com.google.common.base.Suppliers;
import com.google.common.io.CharSource;
import com.google.common.io.LineProcessor;
import com.google.common.io.Resources;
+import org.apache.druid.data.input.ResourceInputSource;
import org.apache.druid.data.input.impl.DelimitedParseSpec;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionsSpec;
@@ -37,6 +38,7 @@ import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.FileUtils;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.query.NestedDataTestUtils;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory;
import org.apache.druid.query.aggregation.DoubleMinAggregatorFactory;
@@ -68,6 +70,7 @@ import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
/**
+ *
*/
public class TestIndex
{
@@ -222,6 +225,9 @@ public class TestIndex
.build()
)
);
+ private static Supplier<QueryableIndex> wikipediaMMappedIndex =
Suppliers.memoize(
+ () -> persistRealtimeAndLoadMMapped(makeWikipediaIncrementalIndex())
+ );
public static IncrementalIndex getIncrementalTestIndex()
{
@@ -243,6 +249,11 @@ public class TestIndex
return mmappedIndex.get();
}
+ public static QueryableIndex getMMappedWikipediaIndex()
+ {
+ return wikipediaMMappedIndex.get();
+ }
+
public static QueryableIndex getNoRollupMMappedTestIndex()
{
return noRollupMmappedIndex.get();
@@ -322,6 +333,62 @@ public class TestIndex
}
}
+ public static IncrementalIndex makeWikipediaIncrementalIndex()
+ {
+ final List<DimensionSchema> dimensions = Arrays.asList(
+ new StringDimensionSchema("channel"),
+ new StringDimensionSchema("cityName"),
+ new StringDimensionSchema("comment"),
+ new StringDimensionSchema("countryIsoCode"),
+ new StringDimensionSchema("countryName"),
+ new StringDimensionSchema("isAnonymous"),
+ new StringDimensionSchema("isMinor"),
+ new StringDimensionSchema("isNew"),
+ new StringDimensionSchema("isRobot"),
+ new StringDimensionSchema("isUnpatrolled"),
+ new StringDimensionSchema("metroCode"),
+ new StringDimensionSchema("namespace"),
+ new StringDimensionSchema("page"),
+ new StringDimensionSchema("regionIsoCode"),
+ new StringDimensionSchema("regionName"),
+ new StringDimensionSchema("user"),
+ new LongDimensionSchema("delta"),
+ new LongDimensionSchema("added"),
+ new LongDimensionSchema("deleted")
+ );
+
+ final File tmpDir;
+ try {
+ tmpDir = FileUtils.createTempDir("test-index-input-source");
+ try {
+ return IndexBuilder
+ .create()
+
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
+ .schema(new IncrementalIndexSchema.Builder()
+ .withRollup(false)
+ .withTimestampSpec(new TimestampSpec("time", null,
null))
+ .withDimensionsSpec(new DimensionsSpec(dimensions))
+ .build()
+ )
+ .inputSource(
+ ResourceInputSource.of(
+ TestIndex.class.getClassLoader(),
+ "wikipedia/wikiticker-2015-09-12-sampled.json.gz"
+ )
+ )
+ .inputFormat(NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT)
+ .inputTmpDir(new File(tmpDir, "tmpWikipedia1"))
+ .buildIncrementalIndex();
+ }
+ finally {
+ FileUtils.deleteDirectory(tmpDir);
+ }
+ }
+ catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
public static IncrementalIndex loadIncrementalIndex(
final IncrementalIndex retVal,
final CharSource source
diff --git
a/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java
b/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java
new file mode 100644
index 00000000000..aa764535444
--- /dev/null
+++
b/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java
@@ -0,0 +1,328 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.filter;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.druid.collections.bitmap.BitmapFactory;
+import org.apache.druid.java.util.common.io.Closer;
+import org.apache.druid.query.DefaultBitmapResultFactory;
+import org.apache.druid.query.filter.ColumnIndexSelector;
+import org.apache.druid.query.filter.EqualityFilter;
+import org.apache.druid.query.filter.Filter;
+import org.apache.druid.query.filter.FilterBundle;
+import org.apache.druid.query.filter.LikeDimFilter;
+import org.apache.druid.query.filter.NullFilter;
+import org.apache.druid.query.filter.TypedInFilter;
+import org.apache.druid.segment.ColumnCache;
+import org.apache.druid.segment.ColumnSelectorColumnIndexSelector;
+import org.apache.druid.segment.QueryableIndex;
+import org.apache.druid.segment.TestIndex;
+import org.apache.druid.segment.VirtualColumns;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.testing.InitializedNullHandlingTest;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+public class FilterBundleTest extends InitializedNullHandlingTest
+{
+ private Closer closer;
+ protected BitmapFactory bitmapFactory;
+ protected ColumnIndexSelector indexSelector;
+
+ @Rule
+ public TemporaryFolder tmpDir = new TemporaryFolder();
+
+ @Before
+ public void setUp()
+ {
+ final QueryableIndex index = TestIndex.getMMappedWikipediaIndex();
+ closer = Closer.create();
+ bitmapFactory = index.getBitmapFactoryForDimensions();
+ indexSelector = new ColumnSelectorColumnIndexSelector(
+ bitmapFactory,
+ VirtualColumns.EMPTY,
+ new ColumnCache(index, closer)
+ );
+ }
+
+ @After
+ public void tearDown() throws Exception
+ {
+ closer.close();
+ indexSelector = null;
+ }
+
+ @Test
+ public void test_or_country_isRobot()
+ {
+ final FilterBundle filterBundle = makeFilterBundle(
+ new OrFilter(
+ ImmutableList.of(
+ new EqualityFilter("countryName", ColumnType.STRING, "United
States", null),
+ new EqualityFilter("isRobot", ColumnType.STRING, "true", null)
+ )
+ )
+ );
+
+ Assert.assertEquals(
+ "index: OR (selectionSize = 39244)\n"
+ + " index: countryName = United States (selectionSize = 528)\n"
+ + " index: isRobot = true (selectionSize = 15420)\n",
+ filterBundle.getInfo().describe()
+ );
+ }
+
+ @Test
+ public void test_and_country_isRobot()
+ {
+ final FilterBundle filterBundle = makeFilterBundle(
+ new AndFilter(
+ ImmutableList.of(
+ new EqualityFilter("countryName", ColumnType.STRING, "United
States", null),
+ new EqualityFilter("isRobot", ColumnType.STRING, "true", null)
+ )
+ )
+ );
+
+ Assert.assertEquals(
+ "index: AND (selectionSize = 0)\n"
+ + " index: countryName = United States (selectionSize = 528)\n"
+ + " index: isRobot = true (selectionSize = 15420)\n",
+ filterBundle.getInfo().describe()
+ );
+ }
+
+ @Test
+ public void test_or_countryIsNull_pageLike()
+ {
+ final FilterBundle filterBundle = makeFilterBundle(
+ new OrFilter(
+ ImmutableList.of(
+ new NullFilter("countryName", null),
+ new LikeDimFilter("page", "%u%", null, null).toFilter()
+ )
+ )
+ );
+
+ Assert.assertEquals(
+ "matcher: OR\n"
+ + " matcher: countryName IS NULL\n"
+ + " with partial index: countryName IS NULL (selectionSize =
35445)\n"
+ + " matcher: page LIKE '%u%'\n",
+ filterBundle.getInfo().describe()
+ );
+ }
+
+ @Test
+ public void test_and_countryIsNull_pageLike()
+ {
+ final FilterBundle filterBundle = makeFilterBundle(
+ new AndFilter(
+ ImmutableList.of(
+ new NullFilter("countryName", null),
+ new LikeDimFilter("page", "%u%", null, null).toFilter()
+ )
+ )
+ );
+
+ Assert.assertEquals(
+ "index: AND (selectionSize = 14165)\n"
+ + " index: countryName IS NULL (selectionSize = 35445)\n"
+ + " index: page LIKE '%u%' (selectionSize = 15328)\n",
+ filterBundle.getInfo().describe()
+ );
+ }
+
+ @Test
+ public void test_and_country_pageLike()
+ {
+ final FilterBundle filterBundle = makeFilterBundle(
+ new AndFilter(
+ ImmutableList.of(
+ new EqualityFilter("countryName", ColumnType.STRING, "United
States", null),
+ new LikeDimFilter("page", "%u%", null, null).toFilter()
+ )
+ )
+ );
+
+ Assert.assertEquals(
+ "index: countryName = United States (selectionSize = 528)\n"
+ + "matcher: page LIKE '%u%'\n",
+ filterBundle.getInfo().describe()
+ );
+ }
+
+ @Test
+ public void test_or_countryNotNull_pageLike()
+ {
+ final FilterBundle filterBundle = makeFilterBundle(
+ new OrFilter(
+ ImmutableList.of(
+ new NotFilter(new NullFilter("countryName", null)),
+ new LikeDimFilter("page", "%u%", null, null).toFilter()
+ )
+ )
+ );
+
+ Assert.assertEquals(
+ "index: OR (selectionSize = 39244)\n"
+ + " index: ~(countryName IS NULL) (selectionSize = 3799)\n"
+ + " index: page LIKE '%u%' (selectionSize = 15328)\n",
+ filterBundle.getInfo().describe()
+ );
+ }
+
+ @Test
+ public void test_and_countryNotNull_pageLike()
+ {
+ final FilterBundle filterBundle = makeFilterBundle(
+ new AndFilter(
+ ImmutableList.of(
+ new NotFilter(new NullFilter("countryName", null)),
+ new LikeDimFilter("page", "%u%", null, null).toFilter()
+ )
+ )
+ );
+
+ Assert.assertEquals(
+ "index: ~(countryName IS NULL) (selectionSize = 3799)\n"
+ + "matcher: page LIKE '%u%'\n",
+ filterBundle.getInfo().describe()
+ );
+ }
+
+ @Test
+ public void test_or_countryIsAndPageLike()
+ {
+ final FilterBundle filterBundle = makeFilterBundle(
+ new OrFilter(
+ ImmutableList.of(
+ new AndFilter(
+ ImmutableList.of(
+ new EqualityFilter("countryName", ColumnType.STRING,
"United States", null),
+ new LikeDimFilter("page", "%a%", null, null).toFilter()
+ )
+ ),
+ new AndFilter(
+ ImmutableList.of(
+ new EqualityFilter("countryName", ColumnType.STRING,
"United Kingdom", null),
+ new LikeDimFilter("page", "%b%", null, null).toFilter()
+ )
+ ),
+ new AndFilter(
+ ImmutableList.of(
+ new NullFilter("countryName", null),
+ new LikeDimFilter("page", "%c%", null, null).toFilter()
+ )
+ )
+ )
+ )
+ );
+
+ Assert.assertEquals(
+ "matcher: OR\n"
+ + " matcher: AND\n"
+ + " with partial index: AND (selectionSize = 11851)\n"
+ + " index: countryName IS NULL (selectionSize = 35445)\n"
+ + " index: page LIKE '%c%' (selectionSize = 12864)\n"
+ + " matcher: AND\n"
+ + " with partial index: countryName = United States (selectionSize
= 528)\n"
+ + " matcher: page LIKE '%a%'\n"
+ + " matcher: AND\n"
+ + " with partial index: countryName = United Kingdom (selectionSize
= 234)\n"
+ + " matcher: page LIKE '%b%'\n",
+ filterBundle.getInfo().describe()
+ );
+ }
+
+ @Test
+ public void test_or_countryIsNull_and_country_pageLike()
+ {
+ final FilterBundle filterBundle = makeFilterBundle(
+ new OrFilter(
+ ImmutableList.of(
+ new NullFilter("countryName", null),
+ new AndFilter(
+ ImmutableList.of(
+ new EqualityFilter("countryName", ColumnType.STRING,
"United States", null),
+ new LikeDimFilter("page", "%a%", null, null).toFilter()
+ )
+ )
+ )
+ )
+ );
+
+ Assert.assertEquals(
+ "matcher: OR\n"
+ + " matcher: countryName IS NULL\n"
+ + " with partial index: countryName IS NULL (selectionSize =
35445)\n"
+ + " matcher: AND\n"
+ + " with partial index: countryName = United States (selectionSize
= 528)\n"
+ + " matcher: page LIKE '%a%'\n",
+ filterBundle.getInfo().describe()
+ );
+ }
+
+ @Test
+ public void test_or_countryIsNull_and_isRobotInFalseTrue_pageLike()
+ {
+ final FilterBundle filterBundle = makeFilterBundle(
+ new OrFilter(
+ ImmutableList.of(
+ new NullFilter("countryName", null),
+ new AndFilter(
+ ImmutableList.of(
+ // isRobot IN (false, true) matches all rows; so this
test is equivalent logically to
+ // test_or_countryIsNull_pageLike. It's effectively
testing that the AndFilter carries through
+ // the short-circuiting done by the OrFilter when it
applies the NullFilter.
+ new TypedInFilter("isRobot", ColumnType.STRING,
ImmutableList.of("false", "true"), null, null),
+ new LikeDimFilter("page", "%u%", null, null).toFilter()
+ )
+ )
+ )
+ )
+ );
+
+ Assert.assertEquals(
+ "matcher: OR\n"
+ + " matcher: countryName IS NULL\n"
+ + " with partial index: countryName IS NULL (selectionSize =
35445)\n"
+ + " matcher: AND\n"
+ + " with partial index: isRobot IN (false, true) (STRING)
(selectionSize = 39244)\n"
+ + " matcher: page LIKE '%u%'\n",
+ filterBundle.getInfo().describe()
+ );
+ }
+
+ protected FilterBundle makeFilterBundle(final Filter filter)
+ {
+ return filter.makeFilterBundle(
+ indexSelector,
+ new DefaultBitmapResultFactory(bitmapFactory),
+ indexSelector.getNumRows(),
+ indexSelector.getNumRows(),
+ false
+ );
+ }
+}
diff --git
a/sql/src/test/resources/calcite/tests/wikiticker-2015-09-12-sampled.json.gz
b/processing/src/test/resources/wikipedia/wikiticker-2015-09-12-sampled.json.gz
similarity index 100%
rename from
sql/src/test/resources/calcite/tests/wikiticker-2015-09-12-sampled.json.gz
rename to
processing/src/test/resources/wikipedia/wikiticker-2015-09-12-sampled.json.gz
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java
b/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java
index 2303d51e02a..4148353e603 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java
@@ -38,6 +38,7 @@ import org.apache.druid.data.input.impl.StringDimensionSchema;
import org.apache.druid.data.input.impl.TimestampSpec;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.Intervals;
+import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.parsers.JSONPathSpec;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.GlobalTableDataSource;
@@ -56,10 +57,13 @@ import
org.apache.druid.query.aggregation.last.FloatLastAggregatorFactory;
import org.apache.druid.query.aggregation.last.LongLastAggregatorFactory;
import org.apache.druid.query.lookup.LookupExtractorFactoryContainerProvider;
import org.apache.druid.segment.IndexBuilder;
+import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.SegmentWrangler;
+import org.apache.druid.segment.TestIndex;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.join.JoinConditionAnalysis;
import org.apache.druid.segment.join.Joinable;
@@ -78,11 +82,13 @@ import org.joda.time.DateTime;
import org.joda.time.chrono.ISOChronology;
import java.io.File;
+import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
+import java.util.UUID;
import java.util.stream.Collectors;
/**
@@ -602,47 +608,15 @@ public class TestDataBuilder
public static QueryableIndex makeWikipediaIndex(File tmpDir)
{
- final List<DimensionSchema> dimensions = Arrays.asList(
- new StringDimensionSchema("channel"),
- new StringDimensionSchema("cityName"),
- new StringDimensionSchema("comment"),
- new StringDimensionSchema("countryIsoCode"),
- new StringDimensionSchema("countryName"),
- new StringDimensionSchema("isAnonymous"),
- new StringDimensionSchema("isMinor"),
- new StringDimensionSchema("isNew"),
- new StringDimensionSchema("isRobot"),
- new StringDimensionSchema("isUnpatrolled"),
- new StringDimensionSchema("metroCode"),
- new StringDimensionSchema("namespace"),
- new StringDimensionSchema("page"),
- new StringDimensionSchema("regionIsoCode"),
- new StringDimensionSchema("regionName"),
- new StringDimensionSchema("user"),
- new LongDimensionSchema("delta"),
- new LongDimensionSchema("added"),
- new LongDimensionSchema("deleted")
- );
-
- return IndexBuilder
- .create()
- .tmpDir(new File(tmpDir, "wikipedia1"))
-
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
- .schema(new IncrementalIndexSchema.Builder()
- .withRollup(false)
- .withTimestampSpec(new TimestampSpec("time", null, null))
- .withDimensionsSpec(new DimensionsSpec(dimensions))
- .build()
- )
- .inputSource(
- ResourceInputSource.of(
- TestDataBuilder.class.getClassLoader(),
- "calcite/tests/wikiticker-2015-09-12-sampled.json.gz"
- )
- )
- .inputFormat(DEFAULT_JSON_INPUT_FORMAT)
- .inputTmpDir(new File(tmpDir, "tmpWikipedia1"))
- .buildMMappedIndex();
+ try {
+ final File directory = new File(tmpDir,
StringUtils.format("wikipedia-index-%s", UUID.randomUUID()));
+ final IncrementalIndex index = TestIndex.makeWikipediaIncrementalIndex();
+ TestIndex.INDEX_MERGER.persist(index, directory, IndexSpec.DEFAULT,
null);
+ return TestIndex.INDEX_IO.loadIndex(directory);
+ }
+ catch (IOException e) {
+ throw new RuntimeException(e);
+ }
}
public static QueryableIndex makeWikipediaIndexWithAggregation(File tmpDir)
@@ -687,8 +661,8 @@ public class TestDataBuilder
)
.inputSource(
ResourceInputSource.of(
- TestDataBuilder.class.getClassLoader(),
- "calcite/tests/wikiticker-2015-09-12-sampled.json.gz"
+ TestIndex.class.getClassLoader(),
+ "wikipedia/wikiticker-2015-09-12-sampled.json.gz"
)
)
.inputFormat(DEFAULT_JSON_INPUT_FORMAT)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]