This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new a2b011cdcdf Incorporate `estimatedComputeCost` into all
`BitmapColumnIndex` classes. (#17125)
a2b011cdcdf is described below
commit a2b011cdcdf0f302feb6e1ee59b0aa3a682da58b
Author: Cece Mei <[email protected]>
AuthorDate: Wed Sep 25 23:11:26 2024 -0700
Incorporate `estimatedComputeCost` into all `BitmapColumnIndex` classes.
(#17125)
changes:
* filter index processing is now automatically ordered based on estimated
'cost', which is approximated based on how many expected bitmap operations are
required to construct the bitmap used for the 'offset'
* cursorAutoArrangeFilters context flag now defaults to true, but can be
set to false to disable cost based filter index sorting
---
.../benchmark/query/SqlExpressionBenchmark.java | 28 ++-
.../benchmark/query/SqlNestedDataBenchmark.java | 2 +-
.../druid/segment/QueryableIndexCursorHolder.java | 2 +-
.../apache/druid/segment/filter/BoundFilter.java | 8 +-
.../druid/segment/filter/IsBooleanFilter.java | 6 +
.../org/apache/druid/segment/filter/NotFilter.java | 6 +
.../apache/druid/segment/filter/SpatialFilter.java | 6 +
.../segment/index/AllFalseBitmapColumnIndex.java | 6 +
.../segment/index/AllTrueBitmapColumnIndex.java | 6 +
.../segment/index/AllUnknownBitmapColumnIndex.java | 6 +
.../druid/segment/index/BitmapColumnIndex.java | 10 +-
.../index/DictionaryRangeScanningBitmapIndex.java | 6 +
.../index/DictionaryScanningBitmapIndex.java | 6 +
.../segment/index/IndexedUtf8ValueIndexes.java | 15 +-
.../segment/index/SimpleImmutableBitmapIndex.java | 6 +
.../segment/index/semantic/ValueSetIndexes.java | 35 +++-
.../nested/NestedFieldColumnIndexSupplier.java | 88 ++++++++-
.../nested/ScalarDoubleColumnAndIndexSupplier.java | 21 ++
.../nested/ScalarLongColumnAndIndexSupplier.java | 20 ++
.../nested/VariantColumnAndIndexSupplier.java | 10 +
.../segment/virtual/ListFilteredVirtualColumn.java | 133 ++++++++-----
.../druid/segment/filter/FilterBundleTest.java | 214 +++++++++++++++++----
22 files changed, 532 insertions(+), 108 deletions(-)
diff --git
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
index d351b40f2db..cc6dd636df0 100644
---
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
+++
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
@@ -118,6 +118,7 @@ public class SqlExpressionBenchmark
{
return 1;
}
+
@Override
public String getFormatString()
{
@@ -220,8 +221,11 @@ public class SqlExpressionBenchmark
"SELECT ARRAY_CONTAINS(\"multi-string3\", 100) FROM foo",
"SELECT ARRAY_CONTAINS(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22,
30, 33, 40, 44, 50, 55, 100]) FROM foo",
"SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 100]) FROM foo",
- "SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30,
33, 40, 44, 50, 55, 100]) FROM foo"
- );
+ "SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30,
33, 40, 44, 50, 55, 100]) FROM foo",
+ // 46: filters with random orders
+ "SELECT string2, SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND
string1 = '1000' GROUP BY 1 ORDER BY 2",
+ "SELECT string2, SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND
(string3 in ('1', '10', '20', '22', '32') AND long2 IN (1, 19, 21, 23, 25, 26,
46) AND double3 < 1010.0 AND double3 > 1000.0 AND (string4 = '1' OR
REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2,
'^Z2') IS NOT NULL)) AND string1 = '1000' GROUP BY 1 ORDER BY 2"
+ );
@Param({"5000000"})
private int rowsPerSegment;
@@ -294,7 +298,9 @@ public class SqlExpressionBenchmark
"42",
"43",
"44",
- "45"
+ "45",
+ "46",
+ "47"
})
private String query;
@@ -319,7 +325,12 @@ public class SqlExpressionBenchmark
final PlannerConfig plannerConfig = new PlannerConfig();
final SegmentGenerator segmentGenerator = closer.register(new
SegmentGenerator());
- log.info("Starting benchmark setup using cacheDir[%s], rows[%,d],
schema[%s].", segmentGenerator.getCacheDir(), rowsPerSegment, schema);
+ log.info(
+ "Starting benchmark setup using cacheDir[%s], rows[%,d], schema[%s].",
+ segmentGenerator.getCacheDir(),
+ rowsPerSegment,
+ schema
+ );
final QueryableIndex index;
if ("auto".equals(schema)) {
List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec()
@@ -383,7 +394,14 @@ public class SqlExpressionBenchmark
final String sql = QUERIES.get(Integer.parseInt(query));
- try (final DruidPlanner planner =
plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql,
ImmutableMap.of("useNativeQueryExplain", true))) {
+ try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(
+ engine,
+ "EXPLAIN PLAN FOR " + sql,
+ ImmutableMap.of(
+ "useNativeQueryExplain",
+ true
+ )
+ )) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence =
plannerResult.run().getResults();
final Object[] planResult = resultSequence.toList().get(0);
diff --git
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java
index b896f8df52f..fa5942dc2c9 100644
---
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java
+++
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java
@@ -199,7 +199,7 @@ public class SqlNestedDataBenchmark
// 42, 43 big cardinality like predicate filter
"SELECT SUM(long1) FROM foo WHERE string5 LIKE '%1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo
WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%'",
- // 44, 45 big cardinality like filter + selector filter
+ // 44, 45 big cardinality like filter + selector filter with different
ordering
"SELECT SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND string1 =
'1000'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo
WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%' AND
JSON_VALUE(nested, '$.nesteder.string1') = '1000'",
"SELECT SUM(long1) FROM foo WHERE string1 = '1000' AND string5 LIKE
'%1%'",
diff --git
a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java
b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java
index 5aa4dbed8cc..4b295931f0a 100644
---
a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java
+++
b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java
@@ -113,7 +113,7 @@ public class QueryableIndexCursorHolder implements
CursorHolder
Cursors.getTimeOrdering(ordering),
interval,
filter,
-
cursorBuildSpec.getQueryContext().getBoolean(QueryContexts.CURSOR_AUTO_ARRANGE_FILTERS,
false),
+
cursorBuildSpec.getQueryContext().getBoolean(QueryContexts.CURSOR_AUTO_ARRANGE_FILTERS,
true),
metrics
)
);
diff --git
a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java
b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java
index 4fa1a480248..ca6f66b8fda 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java
@@ -149,8 +149,6 @@ public class BoundFilter implements Filter
BitmapColumnIndex rangeIndex
)
{
-
-
final BitmapColumnIndex nullBitmap;
final NullValueIndex nulls = indexSupplier.as(NullValueIndex.class);
if (nulls == null) {
@@ -166,6 +164,12 @@ public class BoundFilter implements Filter
return
rangeIndex.getIndexCapabilities().merge(nullBitmap.getIndexCapabilities());
}
+ @Override
+ public int estimatedComputeCost()
+ {
+ return rangeIndex.estimatedComputeCost() + 1;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java
b/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java
index 75eaebb27c8..d39afdecb7f 100644
---
a/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java
+++
b/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java
@@ -78,6 +78,12 @@ public class IsBooleanFilter implements Filter
return baseIndex.getIndexCapabilities();
}
+ @Override
+ public int estimatedComputeCost()
+ {
+ return baseIndex.estimatedComputeCost();
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java
b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java
index 58dd90c8af1..3e89148e2cc 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java
@@ -80,6 +80,12 @@ public class NotFilter implements Filter
return baseIndex.getIndexCapabilities();
}
+ @Override
+ public int estimatedComputeCost()
+ {
+ return baseIndex.estimatedComputeCost();
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java
b/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java
index a79d4d2ac22..311f32667dd 100644
---
a/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java
+++
b/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java
@@ -94,6 +94,12 @@ public class SpatialFilter implements Filter
return new SimpleColumnIndexCapabilities(true, true);
}
+ @Override
+ public int estimatedComputeCost()
+ {
+ return Integer.MAX_VALUE;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/index/AllFalseBitmapColumnIndex.java
b/processing/src/main/java/org/apache/druid/segment/index/AllFalseBitmapColumnIndex.java
index d0469dffe56..907ce55cb2a 100644
---
a/processing/src/main/java/org/apache/druid/segment/index/AllFalseBitmapColumnIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/index/AllFalseBitmapColumnIndex.java
@@ -50,6 +50,12 @@ public class AllFalseBitmapColumnIndex implements
BitmapColumnIndex
return SimpleColumnIndexCapabilities.getConstant();
}
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 0;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory,
boolean includeUnknown)
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/index/AllTrueBitmapColumnIndex.java
b/processing/src/main/java/org/apache/druid/segment/index/AllTrueBitmapColumnIndex.java
index 02ff2c5bcca..5bd48d31802 100644
---
a/processing/src/main/java/org/apache/druid/segment/index/AllTrueBitmapColumnIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/index/AllTrueBitmapColumnIndex.java
@@ -39,6 +39,12 @@ public class AllTrueBitmapColumnIndex implements
BitmapColumnIndex
return SimpleColumnIndexCapabilities.getConstant();
}
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 0;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory,
boolean includeUnknown)
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/index/AllUnknownBitmapColumnIndex.java
b/processing/src/main/java/org/apache/druid/segment/index/AllUnknownBitmapColumnIndex.java
index a33ae663d53..793773ede11 100644
---
a/processing/src/main/java/org/apache/druid/segment/index/AllUnknownBitmapColumnIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/index/AllUnknownBitmapColumnIndex.java
@@ -44,6 +44,12 @@ public class AllUnknownBitmapColumnIndex implements
BitmapColumnIndex
return SimpleColumnIndexCapabilities.getConstant();
}
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 0;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory,
boolean includeUnknown)
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java
b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java
index f28429969d3..97d24ff0bab 100644
---
a/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java
@@ -36,12 +36,12 @@ public interface BitmapColumnIndex
ColumnIndexCapabilities getIndexCapabilities();
/**
- * Returns an estimated cost for computing the bitmap result.
+ * Returns an estimated cost for computing the bitmap result. Generally this
is equivalent to number of bitmap union
+ * or intersection operations need to be performed. E.x. null value index
bitmap has a cost of 0, non-null value index
+ * bitmap union with null bitmap has a cost of 1, range (size of 10)
scanning index bitmap union with null bitmap has
+ * a cost of 10.
*/
- default int estimatedComputeCost()
- {
- return Integer.MAX_VALUE;
- }
+ int estimatedComputeCost();
/**
* Compute a bitmap result wrapped with the {@link BitmapResultFactory}
representing the rows matched by this index.
diff --git
a/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java
b/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java
index 37e84bb8b5d..ee0a2dc67ad 100644
---
a/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java
@@ -46,6 +46,12 @@ public abstract class DictionaryRangeScanningBitmapIndex
extends SimpleImmutable
this.rangeSize = rangeSize;
}
+ @Override
+ public int estimatedComputeCost()
+ {
+ return this.rangeSize;
+ }
+
@Nullable
@Override
public final <T> T computeBitmapResult(
diff --git
a/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java
b/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java
index 1ae00a846ba..118d310d656 100644
---
a/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java
@@ -49,6 +49,12 @@ public abstract class DictionaryScanningBitmapIndex extends
SimpleImmutableBitma
this.scaleThreshold = scaleThreshold;
}
+ @Override
+ public int estimatedComputeCost()
+ {
+ return this.dictionarySize;
+ }
+
@Nullable
@Override
public final <T> T computeBitmapResult(
diff --git
a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java
b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java
index 6015088d558..a9819fd75cb 100644
---
a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java
+++
b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java
@@ -83,6 +83,11 @@ public final class IndexedUtf8ValueIndexes<TDictionary
extends Indexed<ByteBuffe
final ByteBuffer utf8 = StringUtils.toUtf8ByteBuffer(value);
return new SimpleBitmapColumnIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
@@ -122,10 +127,7 @@ public final class IndexedUtf8ValueIndexes<TDictionary
extends Indexed<ByteBuffe
public BitmapColumnIndex forSortedValues(SortedSet<String> values)
{
return getBitmapColumnIndexForSortedIterableUtf8(
- Iterables.transform(
- values,
- StringUtils::toUtf8ByteBuffer
- ),
+ Iterables.transform(values, StringUtils::toUtf8ByteBuffer),
values.size(),
values.contains(null)
);
@@ -181,6 +183,7 @@ public final class IndexedUtf8ValueIndexes<TDictionary
extends Indexed<ByteBuffe
bitmapFactory,
COMPARATOR,
valuesUtf8,
+ size,
dictionary,
bitmaps,
() -> {
@@ -197,6 +200,7 @@ public final class IndexedUtf8ValueIndexes<TDictionary
extends Indexed<ByteBuffe
return
ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch(
bitmapFactory,
valuesUtf8,
+ size,
dictionary,
bitmaps,
() -> {
@@ -242,6 +246,7 @@ public final class IndexedUtf8ValueIndexes<TDictionary
extends Indexed<ByteBuffe
bitmapFactory,
ByteBufferUtils.utf8Comparator(),
Iterables.transform(tailSet, StringUtils::toUtf8ByteBuffer),
+ tailSet.size(),
dictionary,
bitmaps,
unknownsIndex
@@ -251,6 +256,7 @@ public final class IndexedUtf8ValueIndexes<TDictionary
extends Indexed<ByteBuffe
return
ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch(
bitmapFactory,
Iterables.transform(tailSet, StringUtils::toUtf8ByteBuffer),
+ tailSet.size(),
dictionary,
bitmaps,
unknownsIndex
@@ -262,6 +268,7 @@ public final class IndexedUtf8ValueIndexes<TDictionary
extends Indexed<ByteBuffe
sortedValues,
x ->
StringUtils.toUtf8ByteBuffer(DimensionHandlerUtils.convertObjectToString(x))
),
+ sortedValues.size(),
dictionary,
bitmaps,
unknownsIndex
diff --git
a/processing/src/main/java/org/apache/druid/segment/index/SimpleImmutableBitmapIndex.java
b/processing/src/main/java/org/apache/druid/segment/index/SimpleImmutableBitmapIndex.java
index 7c6f6023ed0..6c4177dc53a 100644
---
a/processing/src/main/java/org/apache/druid/segment/index/SimpleImmutableBitmapIndex.java
+++
b/processing/src/main/java/org/apache/druid/segment/index/SimpleImmutableBitmapIndex.java
@@ -35,6 +35,12 @@ public final class SimpleImmutableBitmapIndex extends
SimpleBitmapColumnIndex
this.bitmap = bitmap;
}
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 0;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory,
boolean includeUnknown)
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java
b/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java
index 56d6da9a19a..f31f0a7bfed 100644
---
a/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java
+++
b/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java
@@ -63,8 +63,8 @@ public interface ValueSetIndexes
* @param sortedValues values to match, sorted in matchValueType order
* @param matchValueType type of the value to match, used to assist
conversion from the match value type to the column
* value type
- * @return {@link ImmutableBitmap} corresponding to the rows
which match the values, or null if an index
- * connot be computed for the supplied value type
+ * @return {@link ImmutableBitmap} corresponding to the rows which match the
values, or null if an index
+ * connot be computed for the supplied value type
*/
@Nullable
BitmapColumnIndex forSortedValues(@Nonnull List<?> sortedValues,
TypeSignature<ValueType> matchValueType);
@@ -78,10 +78,10 @@ public interface ValueSetIndexes
* {@link Indexed<T>} value dictionary. Uses a strategy that does zipping
similar to the merge step of a sort-merge,
* where we step forward on both the iterator and the dictionary to find
matches to build a
* {@link Iterable<ImmutableBitmap>}.
- * <p>
+ * <p>
* If sorted match value iterator size is greater than (dictionary size *
{@link #SORTED_SCAN_RATIO_THRESHOLD}),
* consider using this method instead of {@link
#buildBitmapColumnIndexFromSortedIteratorBinarySearch}.
- * <p>
+ * <p>
* If the values in the iterator are NOT sorted the same as the dictionary,
do NOT use this method, use
* {@link #buildBitmapColumnIndexFromIteratorBinarySearch} instead.
*/
@@ -89,6 +89,7 @@ public interface ValueSetIndexes
BitmapFactory bitmapFactory,
Comparator<T> comparator,
Iterable<T> values,
+ int size,
Indexed<T> dictionary,
Indexed<ImmutableBitmap> bitmaps,
Supplier<ImmutableBitmap> unknownsBitmap
@@ -96,6 +97,13 @@ public interface ValueSetIndexes
{
return new BaseValueSetIndexesFromIterable(bitmapFactory, bitmaps,
unknownsBitmap)
{
+
+ @Override
+ public int estimatedComputeCost()
+ {
+ return Integer.max(size, dictionary.size());
+ }
+
@Override
public Iterable<ImmutableBitmap> getBitmapIterable()
{
@@ -163,13 +171,14 @@ public interface ValueSetIndexes
* <p>
* If sorted match value iterator size is less than (dictionary size *
{@link #SORTED_SCAN_RATIO_THRESHOLD}),
* consider using this method instead of {@link
#buildBitmapColumnIndexFromSortedIteratorScan}.
- * <p>
+ * <p>
* If the values in the iterator are not sorted the same as the dictionary,
do not use this method, use
* {@link #buildBitmapColumnIndexFromIteratorBinarySearch} instead.
*/
static <T> BitmapColumnIndex
buildBitmapColumnIndexFromSortedIteratorBinarySearch(
BitmapFactory bitmapFactory,
Iterable<T> values,
+ int size,
Indexed<T> dictionary,
Indexed<ImmutableBitmap> bitmaps,
Supplier<ImmutableBitmap> getUnknownsIndex
@@ -177,6 +186,13 @@ public interface ValueSetIndexes
{
return new BaseValueSetIndexesFromIterable(bitmapFactory, bitmaps,
getUnknownsIndex)
{
+
+ @Override
+ public int estimatedComputeCost()
+ {
+ return size;
+ }
+
@Override
public Iterable<ImmutableBitmap> getBitmapIterable()
{
@@ -236,7 +252,7 @@ public interface ValueSetIndexes
* {@link Indexed<T>} value dictionary. This algorithm iterates the values
to match and does a binary search for
* matching values using {@link Indexed#indexOf(Object)} to build a {@link
Iterable<ImmutableBitmap>} until the match
* values iterator is exhausted.
- * <p>
+ * <p>
* If values of the iterator are sorted the same as the dictionary, use
* {@link #buildBitmapColumnIndexFromSortedIteratorScan} or
* {@link #buildBitmapColumnIndexFromSortedIteratorBinarySearch} instead.
@@ -244,6 +260,7 @@ public interface ValueSetIndexes
static <T> BitmapColumnIndex buildBitmapColumnIndexFromIteratorBinarySearch(
BitmapFactory bitmapFactory,
Iterable<T> values,
+ int size,
Indexed<T> dictionary,
Indexed<ImmutableBitmap> bitmaps,
Supplier<ImmutableBitmap> getUnknownsIndex
@@ -251,6 +268,12 @@ public interface ValueSetIndexes
{
return new BaseValueSetIndexesFromIterable(bitmapFactory, bitmaps,
getUnknownsIndex)
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return size;
+ }
+
@Override
public Iterable<ImmutableBitmap> getBitmapIterable()
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java
b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java
index c784eaa8d13..d53596964f8 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java
@@ -158,7 +158,8 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
nullIndex = new
SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap());
}
return (T) (NullValueIndex) () -> nullIndex;
- } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) ||
clazz.equals(DictionaryEncodedValueIndex.class)) {
+ } else if (clazz.equals(DictionaryEncodedStringValueIndex.class)
+ || clazz.equals(DictionaryEncodedValueIndex.class)) {
return (T) new NestedFieldDictionaryEncodedStringValueIndex();
}
@@ -402,6 +403,12 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
final FixedIndexed<Integer> localDictionary =
localDictionarySupplier.get();
final Indexed<ByteBuffer> stringDictionary =
globalStringDictionarySupplier.get();
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
{
@@ -430,6 +437,12 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
{
return new SimpleImmutableBitmapDelegatingIterableIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return values.size();
+ }
+
@Override
public Iterable<ImmutableBitmap> getBitmapIterable()
{
@@ -548,7 +561,9 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
private int findNext()
{
- while (currIndex < end &&
!matcher.apply(StringUtils.fromUtf8Nullable(stringDictionary.get(localDictionary.get(currIndex)))).matches(false))
{
+ while (currIndex < end
+ &&
!matcher.apply(StringUtils.fromUtf8Nullable(stringDictionary.get(localDictionary.get(currIndex))))
+ .matches(false)) {
currIndex++;
}
@@ -674,6 +689,12 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
final FixedIndexed<Integer> localDictionary =
localDictionarySupplier.get();
final FixedIndexed<Long> globalDictionary =
globalLongDictionarySupplier.get();
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
{
@@ -715,6 +736,12 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
final FixedIndexed<Integer> localDictionary =
localDictionarySupplier.get();
final FixedIndexed<Long> longDictionary =
globalLongDictionarySupplier.get();
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
{
@@ -750,6 +777,12 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
{
return new SimpleImmutableBitmapDelegatingIterableIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return values.size();
+ }
+
@Override
public Iterable<ImmutableBitmap> getBitmapIterable()
{
@@ -922,7 +955,8 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
if (nextValue == 0) {
nextSet = longPredicate.applyNull().matches(includeUnknown);
} else {
- nextSet =
longPredicate.applyLong(longDictionary.get(nextValue -
adjustLongId)).matches(includeUnknown);
+ nextSet =
longPredicate.applyLong(longDictionary.get(nextValue - adjustLongId))
+ .matches(includeUnknown);
}
}
}
@@ -953,6 +987,12 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
final FixedIndexed<Integer> localDictionary =
localDictionarySupplier.get();
final FixedIndexed<Double> globalDictionary =
globalDoubleDictionarySupplier.get();
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
{
@@ -993,6 +1033,12 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
final FixedIndexed<Integer> localDictionary =
localDictionarySupplier.get();
final FixedIndexed<Double> doubleDictionary =
globalDoubleDictionarySupplier.get();
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
{
@@ -1028,6 +1074,12 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
{
return new SimpleImmutableBitmapDelegatingIterableIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return values.size();
+ }
+
@Override
public Iterable<ImmutableBitmap> getBitmapIterable()
{
@@ -1204,6 +1256,9 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
final FrontCodedIntArrayIndexed arrayDictionary =
globalArrayDictionarySupplier == null
? null
:
globalArrayDictionarySupplier.get();
+ // For every single String value, we need to look up indexes from
stringDictionary, longDictionary and
+ // doubleDictionary. Hence, the compute cost for one value is 3.
+ static final int INDEX_COMPUTE_SCALE = 3;
IntList getIndexes(@Nullable String value)
{
@@ -1256,6 +1311,12 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
{
return new SimpleImmutableBitmapDelegatingIterableIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return NestedVariantIndexes.INDEX_COMPUTE_SCALE;
+ }
+
@Override
protected Iterable<ImmutableBitmap> getBitmapIterable()
{
@@ -1293,6 +1354,15 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
{
return new SimpleImmutableBitmapDelegatingIterableIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ if (values.size() >= Integer.MAX_VALUE /
NestedVariantIndexes.INDEX_COMPUTE_SCALE) {
+ return Integer.MAX_VALUE;
+ }
+ return values.size() * NestedVariantIndexes.INDEX_COMPUTE_SCALE;
+ }
+
@Override
public Iterable<ImmutableBitmap> getBitmapIterable()
{
@@ -1504,6 +1574,12 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
return new SimpleBitmapColumnIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
{
@@ -1574,6 +1650,12 @@ public class
NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
return new SimpleBitmapColumnIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java
b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java
index e535d9cb54b..5199c98d93f 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java
@@ -241,10 +241,17 @@ public class ScalarDoubleColumnAndIndexSupplier
implements Supplier<NestedCommon
return new AllFalseBitmapColumnIndex(bitmapFactory, nullValueBitmap);
}
final double doubleValue = castForComparison.asDouble();
+
return new SimpleBitmapColumnIndex()
{
final FixedIndexed<Double> dictionary = doubleDictionarySupplier.get();
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
{
@@ -307,6 +314,7 @@ public class ScalarDoubleColumnAndIndexSupplier implements
Supplier<NestedCommon
bitmapFactory,
ColumnType.DOUBLE.getNullableStrategy(),
tailSet,
+ tailSet.size(),
dictionary,
valueIndexes,
unknownsIndex
@@ -316,6 +324,7 @@ public class ScalarDoubleColumnAndIndexSupplier implements
Supplier<NestedCommon
return
ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch(
bitmapFactory,
tailSet,
+ tailSet.size(),
dictionary,
valueIndexes,
unknownsIndex
@@ -325,6 +334,7 @@ public class ScalarDoubleColumnAndIndexSupplier implements
Supplier<NestedCommon
return ValueSetIndexes.buildBitmapColumnIndexFromIteratorBinarySearch(
bitmapFactory,
Iterables.transform(sortedValues,
DimensionHandlerUtils::convertObjectToDouble),
+ sortedValues.size(),
dictionary,
valueIndexes,
unknownsIndex
@@ -345,6 +355,11 @@ public class ScalarDoubleColumnAndIndexSupplier implements
Supplier<NestedCommon
return new SimpleBitmapColumnIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
@@ -390,6 +405,12 @@ public class ScalarDoubleColumnAndIndexSupplier implements
Supplier<NestedCommon
{
return new SimpleImmutableBitmapDelegatingIterableIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return values.size();
+ }
+
@Override
public Iterable<ImmutableBitmap> getBitmapIterable()
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java
b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java
index a9932f7ae3c..66527530336 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java
@@ -246,6 +246,12 @@ public class ScalarLongColumnAndIndexSupplier implements
Supplier<NestedCommonFo
{
final FixedIndexed<Long> dictionary = longDictionarySupplier.get();
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
+
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
{
@@ -305,6 +311,7 @@ public class ScalarLongColumnAndIndexSupplier implements
Supplier<NestedCommonFo
bitmapFactory,
ColumnType.LONG.getNullableStrategy(),
tailSet,
+ tailSet.size(),
dictionary,
valueIndexes,
unknownsIndex
@@ -314,6 +321,7 @@ public class ScalarLongColumnAndIndexSupplier implements
Supplier<NestedCommonFo
return
ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch(
bitmapFactory,
tailSet,
+ tailSet.size(),
dictionary,
valueIndexes,
unknownsIndex
@@ -326,6 +334,7 @@ public class ScalarLongColumnAndIndexSupplier implements
Supplier<NestedCommonFo
sortedValues,
DimensionHandlerUtils::convertObjectToLong
),
+ sortedValues.size(),
dictionary,
valueIndexes,
unknownsIndex
@@ -346,6 +355,11 @@ public class ScalarLongColumnAndIndexSupplier implements
Supplier<NestedCommonFo
final Long longValue = GuavaUtils.tryParseLong(value);
return new SimpleBitmapColumnIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
@@ -390,6 +404,12 @@ public class ScalarLongColumnAndIndexSupplier implements
Supplier<NestedCommonFo
{
return new SimpleImmutableBitmapDelegatingIterableIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return values.size();
+ }
+
@Override
public Iterable<ImmutableBitmap> getBitmapIterable()
{
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java
b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java
index aa1dd143e4e..3cd9e4f7308 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java
@@ -363,6 +363,11 @@ public class VariantColumnAndIndexSupplier implements
Supplier<NestedCommonForma
final FrontCodedIntArrayIndexed dictionary =
arrayDictionarySupplier.get();
return new SimpleBitmapColumnIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
@@ -428,6 +433,11 @@ public class VariantColumnAndIndexSupplier implements
Supplier<NestedCommonForma
return new SimpleBitmapColumnIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
index 9262e8a1730..f8294951234 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
@@ -26,6 +26,8 @@ import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
+import it.unimi.dsi.fastutil.ints.IntIntImmutablePair;
+import it.unimi.dsi.fastutil.ints.IntIntPair;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.common.config.NullHandling;
@@ -49,6 +51,8 @@ import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.index.BitmapColumnIndex;
+import org.apache.druid.segment.index.DictionaryRangeScanningBitmapIndex;
+import org.apache.druid.segment.index.DictionaryScanningBitmapIndex;
import org.apache.druid.segment.index.SimpleBitmapColumnIndex;
import
org.apache.druid.segment.index.SimpleImmutableBitmapDelegatingIterableIndex;
import
org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex;
@@ -139,9 +143,17 @@ public class ListFilteredVirtualColumn implements
VirtualColumn
)
{
if (allowList) {
- return ListFilteredDimensionSpec.filterAllowList(values,
factory.makeDimensionSelector(delegate), delegate.getExtractionFn() != null);
+ return ListFilteredDimensionSpec.filterAllowList(
+ values,
+ factory.makeDimensionSelector(delegate),
+ delegate.getExtractionFn() != null
+ );
} else {
- return ListFilteredDimensionSpec.filterDenyList(values,
factory.makeDimensionSelector(delegate), delegate.getExtractionFn() != null);
+ return ListFilteredDimensionSpec.filterDenyList(
+ values,
+ factory.makeDimensionSelector(delegate),
+ delegate.getExtractionFn() != null
+ );
}
}
@@ -243,7 +255,8 @@ public class ListFilteredVirtualColumn implements
VirtualColumn
return (T) new ListFilteredDruidPredicateIndexes(underlyingIndex,
idMapping, nullValueBitmapSupplier);
} else if (clazz.equals(LexicographicalRangeIndexes.class)) {
return (T) new
ListFilteredLexicographicalRangeIndexes(underlyingIndex, idMapping,
nullValueBitmapSupplier);
- } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) ||
clazz.equals(DictionaryEncodedValueIndex.class)) {
+ } else if (clazz.equals(DictionaryEncodedStringValueIndex.class)
+ || clazz.equals(DictionaryEncodedValueIndex.class)) {
return (T) new
ListFilteredDictionaryEncodedStringValueIndex(underlyingIndex, idMapping);
}
return null;
@@ -320,7 +333,8 @@ public class ListFilteredVirtualColumn implements
VirtualColumn
private int findNext()
{
- while (currIndex < end &&
!matcher.apply(delegate.getValue(idMapping.getReverseId(currIndex))).matches(includeUnknown))
{
+ while (currIndex < end &&
!matcher.apply(delegate.getValue(idMapping.getReverseId(currIndex)))
+ .matches(includeUnknown)) {
currIndex++;
}
@@ -400,7 +414,12 @@ public class ListFilteredVirtualColumn implements
VirtualColumn
return -(minIndex + 1);
}
- Iterable<ImmutableBitmap> getBitmapsInRange(DruidObjectPredicate<String>
matcher, int start, int end, boolean includeUnknown)
+ Iterable<ImmutableBitmap> getBitmapsInRange(
+ DruidObjectPredicate<String> matcher,
+ int start,
+ int end,
+ boolean includeUnknown
+ )
{
return ListFilteredVirtualColumn.getBitmapsInRange(delegate, idMapping,
matcher, start, end, includeUnknown);
}
@@ -420,6 +439,11 @@ public class ListFilteredVirtualColumn implements
VirtualColumn
{
return new SimpleBitmapColumnIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 0;
+ }
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknowns)
@@ -450,6 +474,11 @@ public class ListFilteredVirtualColumn implements
VirtualColumn
{
return new SimpleBitmapColumnIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return 1;
+ }
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
@@ -478,6 +507,12 @@ public class ListFilteredVirtualColumn implements
VirtualColumn
{
return new SimpleImmutableBitmapDelegatingIterableIndex()
{
+ @Override
+ public int estimatedComputeCost()
+ {
+ return values.size();
+ }
+
@Override
public Iterable<ImmutableBitmap> getBitmapIterable()
{
@@ -548,22 +583,17 @@ public class ListFilteredVirtualColumn implements
VirtualColumn
@Nullable
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
- return new SimpleBitmapColumnIndex()
+ return new DictionaryScanningBitmapIndex(getCardinality())
{
@Override
- public <T> T computeBitmapResult(BitmapResultFactory<T>
bitmapResultFactory, boolean includeUnknown)
+ protected Iterable<ImmutableBitmap> getBitmapIterable(boolean
includeUnknown)
{
- final int start = 0, end = getCardinality();
- Iterable<ImmutableBitmap> bitmaps;
- if (includeUnknown) {
- bitmaps = Iterables.concat(
- getBitmapsInRange(matcherFactory.makeStringPredicate(), start,
end, includeUnknown),
- Collections.singletonList(nullValueBitmapSupplier.get())
- );
- } else {
- bitmaps = getBitmapsInRange(matcherFactory.makeStringPredicate(),
start, end, includeUnknown);
- }
- return bitmapResultFactory.unionDimensionValueBitmaps(bitmaps);
+ return Iterables.concat(getBitmapsInRange(
+ matcherFactory.makeStringPredicate(),
+ 0,
+ getCardinality(),
+ includeUnknown
+ ), includeUnknown ?
Collections.singletonList(nullValueBitmapSupplier.get()) :
Collections.emptyList());
}
};
}
@@ -606,37 +636,13 @@ public class ListFilteredVirtualColumn implements
VirtualColumn
DruidObjectPredicate<String> matcher
)
{
- return new SimpleImmutableBitmapDelegatingIterableIndex()
+ final IntIntPair range = getRange(startValue, startStrict, endValue,
endStrict);
+ final int start = range.leftInt(), end = range.rightInt();
+ return new DictionaryRangeScanningBitmapIndex(1.0, end - start)
{
@Override
- public Iterable<ImmutableBitmap> getBitmapIterable()
+ protected Iterable<ImmutableBitmap> getBitmapIterable()
{
- int startIndex, endIndex;
- final int firstValue =
NullHandling.isNullOrEquivalent(delegate.getValue(idMapping.getReverseId(0))) ?
1 : 0;
- if (startValue == null) {
- startIndex = firstValue;
- } else {
- final int found =
getReverseIndex(NullHandling.emptyToNullIfNeeded(startValue));
- if (found >= 0) {
- startIndex = startStrict ? found + 1 : found;
- } else {
- startIndex = -(found + 1);
- }
- }
-
- if (endValue == null) {
- endIndex = idMapping.getValueCardinality();
- } else {
- final int found =
getReverseIndex(NullHandling.emptyToNullIfNeeded(endValue));
- if (found >= 0) {
- endIndex = endStrict ? found : found + 1;
- } else {
- endIndex = -(found + 1);
- }
- }
-
- endIndex = Math.max(startIndex, endIndex);
- final int start = startIndex, end = endIndex;
return getBitmapsInRange(matcher, start, end, false);
}
@@ -648,6 +654,41 @@ public class ListFilteredVirtualColumn implements
VirtualColumn
}
};
}
+
+ private IntIntPair getRange(
+ @Nullable String startValue,
+ boolean startStrict,
+ @Nullable String endValue,
+ boolean endStrict
+ )
+ {
+ int startIndex, endIndex;
+ final int firstValue =
NullHandling.isNullOrEquivalent(delegate.getValue(idMapping.getReverseId(0))) ?
1 : 0;
+ if (startValue == null) {
+ startIndex = firstValue;
+ } else {
+ final int found =
getReverseIndex(NullHandling.emptyToNullIfNeeded(startValue));
+ if (found >= 0) {
+ startIndex = startStrict ? found + 1 : found;
+ } else {
+ startIndex = -(found + 1);
+ }
+ }
+
+ if (endValue == null) {
+ endIndex = idMapping.getValueCardinality();
+ } else {
+ final int found =
getReverseIndex(NullHandling.emptyToNullIfNeeded(endValue));
+ if (found >= 0) {
+ endIndex = endStrict ? found : found + 1;
+ } else {
+ endIndex = -(found + 1);
+ }
+ }
+
+ endIndex = Math.max(startIndex, endIndex);
+ return new IntIntImmutablePair(startIndex, endIndex);
+ }
}
private static class ListFilteredDictionaryEncodedStringValueIndex extends
BaseListFilteredColumnIndex
diff --git
a/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java
b/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java
index 9bc86ae3900..a7afd9755c5 100644
---
a/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java
@@ -20,6 +20,8 @@
package org.apache.druid.segment.filter;
import com.google.common.collect.ImmutableList;
+import junitparams.JUnitParamsRunner;
+import junitparams.Parameters;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.query.DefaultBitmapResultFactory;
@@ -44,11 +46,8 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameter;
-import org.junit.runners.Parameterized.Parameters;
-@RunWith(Parameterized.class)
+@RunWith(JUnitParamsRunner.class)
public class FilterBundleTest extends InitializedNullHandlingTest
{
private Closer closer;
@@ -58,15 +57,6 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
@Rule
public TemporaryFolder tmpDir = new TemporaryFolder();
- @Parameters
- public static Object[] flags()
- {
- return new Object[]{false, true};
- }
-
- @Parameter
- public boolean cursorAutoArrangeFilters;
-
@Before
public void setUp()
{
@@ -88,7 +78,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
}
@Test
- public void test_or_country_isRobot()
+ @Parameters({"true", "false"})
+ public void test_or_country_isRobot(boolean cursorAutoArrangeFilters)
{
final FilterBundle filterBundle = makeFilterBundle(
new OrFilter(
@@ -96,7 +87,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
new EqualityFilter("countryName", ColumnType.STRING, "United
States", null),
new EqualityFilter("isRobot", ColumnType.STRING, "true", null)
)
- )
+ ),
+ cursorAutoArrangeFilters
);
Assert.assertEquals(
@@ -108,7 +100,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
}
@Test
- public void test_and_country_isRobot()
+ @Parameters({"true", "false"})
+ public void test_and_country_isRobot(boolean cursorAutoArrangeFilters)
{
final FilterBundle filterBundle = makeFilterBundle(
new AndFilter(
@@ -116,7 +109,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
new EqualityFilter("countryName", ColumnType.STRING, "United
States", null),
new EqualityFilter("isRobot", ColumnType.STRING, "true", null)
)
- )
+ ),
+ cursorAutoArrangeFilters
);
Assert.assertEquals(
@@ -128,7 +122,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
}
@Test
- public void test_or_countryIsNull_pageLike()
+ @Parameters({"true", "false"})
+ public void test_or_countryIsNull_pageLike(boolean cursorAutoArrangeFilters)
{
final FilterBundle filterBundle = makeFilterBundle(
new OrFilter(
@@ -136,7 +131,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
new NullFilter("countryName", null),
new LikeDimFilter("page", "%u%", null, null).toFilter()
)
- )
+ ),
+ cursorAutoArrangeFilters
);
Assert.assertEquals(
@@ -149,7 +145,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
}
@Test
- public void test_and_countryIsNull_pageLike()
+ @Parameters({"true", "false"})
+ public void test_and_countryIsNull_pageLike(boolean cursorAutoArrangeFilters)
{
final FilterBundle filterBundle = makeFilterBundle(
new AndFilter(
@@ -157,7 +154,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
new NullFilter("countryName", null),
new LikeDimFilter("page", "%u%", null, null).toFilter()
)
- )
+ ),
+ cursorAutoArrangeFilters
);
Assert.assertEquals(
@@ -169,7 +167,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
}
@Test
- public void test_and_country_pageLike()
+ @Parameters({"true", "false"})
+ public void test_and_country_pageLike(boolean cursorAutoArrangeFilters)
{
final FilterBundle filterBundle = makeFilterBundle(
new AndFilter(
@@ -177,9 +176,32 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
new EqualityFilter("countryName", ColumnType.STRING, "United
States", null),
new LikeDimFilter("page", "%u%", null, null).toFilter()
)
- )
+ ),
+ cursorAutoArrangeFilters
+ );
+
+ Assert.assertEquals(
+ "index: countryName = United States (selectionSize = 528)\n"
+ + "matcher: page LIKE '%u%'\n",
+ filterBundle.getInfo().describe()
+ );
+ }
+
+ @Test
+ @Parameters({"true"})
+ public void
test_pageLike_and_country_pageLike_with_cursorAutoArrangeFilters(boolean
cursorAutoArrangeFilters)
+ {
+ final FilterBundle filterBundle = makeFilterBundle(
+ new AndFilter(
+ ImmutableList.of(
+ new LikeDimFilter("page", "%u%", null, null).toFilter(),
+ new EqualityFilter("countryName", ColumnType.STRING, "United
States", null)
+ )
+ ),
+ cursorAutoArrangeFilters
);
+ // With cursorAutoArrangeFilters flag on, the indexes are sorted by cost
ASC, hence country name index is used first.
Assert.assertEquals(
"index: countryName = United States (selectionSize = 528)\n"
+ "matcher: page LIKE '%u%'\n",
@@ -188,7 +210,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
}
@Test
- public void test_or_countryNotNull_pageLike()
+ @Parameters({"true", "false"})
+ public void test_or_countryNotNull_pageLike(boolean cursorAutoArrangeFilters)
{
final FilterBundle filterBundle = makeFilterBundle(
new OrFilter(
@@ -196,7 +219,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
new NotFilter(new NullFilter("countryName", null)),
new LikeDimFilter("page", "%u%", null, null).toFilter()
)
- )
+ ),
+ cursorAutoArrangeFilters
);
Assert.assertEquals(
@@ -208,7 +232,31 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
}
@Test
- public void test_and_countryNotNull_pageLike()
+ @Parameters({"true"})
+ public void
test_or_pageLike_countryNotNull_pageLike_with_cursorAutoArrangeFilters(boolean
cursorAutoArrangeFilters)
+ {
+ final FilterBundle filterBundle = makeFilterBundle(
+ new OrFilter(
+ ImmutableList.of(
+ new LikeDimFilter("page", "%u%", null, null).toFilter(),
+ new NotFilter(new NullFilter("countryName", null))
+ )
+ ),
+ cursorAutoArrangeFilters
+ );
+
+ // With cursorAutoArrangeFilters flag on, the indexes are sorted by cost
ASC, hence country name index is used first.
+ Assert.assertEquals(
+ "index: OR (selectionSize = 39244)\n"
+ + " index: ~(countryName IS NULL) (selectionSize = 3799)\n"
+ + " index: page LIKE '%u%' (selectionSize = 15328)\n",
+ filterBundle.getInfo().describe()
+ );
+ }
+
+ @Test
+ @Parameters({"true", "false"})
+ public void test_and_countryNotNull_pageLike(boolean
cursorAutoArrangeFilters)
{
final FilterBundle filterBundle = makeFilterBundle(
new AndFilter(
@@ -216,7 +264,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
new NotFilter(new NullFilter("countryName", null)),
new LikeDimFilter("page", "%u%", null, null).toFilter()
)
- )
+ ),
+ cursorAutoArrangeFilters
);
Assert.assertEquals(
@@ -226,8 +275,96 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
);
}
+
+ @Test
+ @Parameters({"true"})
+ public void test_and_cursorAutoArrangeFilters(boolean
cursorAutoArrangeFilters)
+ {
+ final FilterBundle filterBundle = makeFilterBundle(
+ new AndFilter(
+ ImmutableList.of(
+ new OrFilter(
+ ImmutableList.of(
+ new LikeDimFilter("page", "O%", null, null).toFilter(),
+ new EqualityFilter("countryName", ColumnType.STRING,
"United States", null)
+ )
+ ),
+ new EqualityFilter("isRobot", ColumnType.STRING, "false",
null),
+ new NotFilter(new NullFilter("countryName", null))
+ )
+ ),
+ cursorAutoArrangeFilters
+ );
+
+ // The estimate cost for child Filters:
+ // 1. countryName NullFilter: 0
+ // 2. isRobot EqualityFilter: 1
+ // 3. page LikeFilter with prefix and countryName EqualityFIlter: 1 + size
of page dictionary with keys matching "O" prefix
+ Assert.assertEquals(
+ "index: AND (selectionSize = 562)\n"
+ + " index: ~(countryName IS NULL) (selectionSize = 3799)\n"
+ + " index: isRobot = false (selectionSize = 23824)\n"
+ + " index: OR (selectionSize = 3799)\n"
+ + " index: countryName = United States (selectionSize = 528)\n"
+ + " index: page LIKE 'O%' (selectionSize = 351)\n",
+ filterBundle.getInfo().describe()
+ );
+ }
+
+ @Test
+ @Parameters({"true"})
+ public void test_or_cursorAutoArrangeFilters(boolean
cursorAutoArrangeFilters)
+ {
+ final FilterBundle filterBundle = makeFilterBundle(
+ new OrFilter(
+ ImmutableList.of(
+ new AndFilter(
+ ImmutableList.of(
+ new EqualityFilter("countryName", ColumnType.STRING,
"United Kingdom", null),
+ new LikeDimFilter("page", "%b%", null, null).toFilter()
+ )
+ ),
+ new AndFilter(
+ ImmutableList.of(
+ new EqualityFilter("countryName", ColumnType.STRING,
"United States", null),
+ new LikeDimFilter("page", "O%", null, null).toFilter()
+ )
+ ),
+ new TypedInFilter("isRobot", ColumnType.STRING,
ImmutableList.of("false", "true"), null, null),
+ new EqualityFilter("channel", ColumnType.STRING,
"#en.wikipedia", null),
+ new NullFilter("countryName", null)
+ )
+ ),
+ cursorAutoArrangeFilters
+ );
+
+ // The estimate cost for child Filters:
+ // 1. countryName NullFilter: 0
+ // 2. channel EqualityFilter: 1
+ // 3. isRobot TypedInFilter: 2
+ // 4. page LikeFilter with prefix and countryName EqualityFilter: 1 + size
of page dictionary with keys matching "O" prefix
+ // 5. page LikeFilter and countryName EqualityFilter: 1 + size of page
dictionary
+ Assert.assertEquals(
+ "matcher: OR\n"
+ + " matcher: OR\n"
+ + " with partial index: OR (selectionSize = 39244)\n"
+ + " index: countryName IS NULL (selectionSize = 35445)\n"
+ + " index: channel = #en.wikipedia (selectionSize = 11549)\n"
+ + " index: isRobot IN (false, true) (STRING) (selectionSize =
39244)\n"
+ + " matcher: AND\n"
+ + " with partial index: countryName = United States (selectionSize
= 528)\n"
+ + " matcher: page LIKE 'O%'\n"
+ + " matcher: AND\n"
+ + " with partial index: countryName = United Kingdom (selectionSize
= 234)\n"
+ + " matcher: page LIKE '%b%'\n",
+ filterBundle.getInfo().describe()
+ );
+
+ }
+
@Test
- public void test_or_countryIsAndPageLike()
+ @Parameters({"true", "false"})
+ public void test_or_countryIsAndPageLike(boolean cursorAutoArrangeFilters)
{
final FilterBundle filterBundle = makeFilterBundle(
new OrFilter(
@@ -251,7 +388,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
)
)
)
- )
+ ),
+ cursorAutoArrangeFilters
);
Assert.assertEquals(
@@ -271,7 +409,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
}
@Test
- public void test_or_countryIsNull_and_country_pageLike()
+ @Parameters({"true", "false"})
+ public void test_or_countryIsNull_and_country_pageLike(boolean
cursorAutoArrangeFilters)
{
final FilterBundle filterBundle = makeFilterBundle(
new OrFilter(
@@ -284,7 +423,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
)
)
)
- )
+ ),
+ cursorAutoArrangeFilters
);
Assert.assertEquals(
@@ -299,7 +439,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
}
@Test
- public void test_or_countryIsNull_and_isRobotInFalseTrue_pageLike()
+ @Parameters({"true", "false"})
+ public void test_or_countryIsNull_and_isRobotInFalseTrue_pageLike(boolean
cursorAutoArrangeFilters)
{
final FilterBundle filterBundle = makeFilterBundle(
new OrFilter(
@@ -315,7 +456,8 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
)
)
)
- )
+ ),
+ cursorAutoArrangeFilters
);
Assert.assertEquals(
@@ -329,9 +471,11 @@ public class FilterBundleTest extends
InitializedNullHandlingTest
);
}
- protected FilterBundle makeFilterBundle(final Filter filter)
+ protected FilterBundle makeFilterBundle(final Filter filter, boolean
cursorAutoArrangeFilters)
{
- return new FilterBundle.Builder(filter, indexSelector,
cursorAutoArrangeFilters).build(
+ return new FilterBundle.Builder(filter, indexSelector,
+ cursorAutoArrangeFilters
+ ).build(
new DefaultBitmapResultFactory(bitmapFactory),
indexSelector.getNumRows(),
indexSelector.getNumRows(),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]