This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 26d82fd342 fix filtering bug in filtering unnest cols and dim cols:
Received a non-applicable rewrite (#14587)
26d82fd342 is described below
commit 26d82fd342de4ea40bc189542738023722e592af
Author: Pranav <[email protected]>
AuthorDate: Wed Aug 16 17:57:16 2023 -0700
fix filtering bug in filtering unnest cols and dim cols: Received a
non-applicable rewrite (#14587)
---
.../apache/druid/segment/UnnestStorageAdapter.java | 257 +++++++++++-----
.../org/apache/druid/segment/filter/Filters.java | 16 +
.../druid/segment/UnnestStorageAdapterTest.java | 265 ++++++++++++----
.../druid/sql/calcite/CalciteArraysQueryTest.java | 336 +++++++++++++++++++++
4 files changed, 741 insertions(+), 133 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java
b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java
index 048cd10f8e..71a1809a88 100644
---
a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java
+++
b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java
@@ -38,6 +38,7 @@ import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.ListIndexed;
+import org.apache.druid.segment.filter.AndFilter;
import org.apache.druid.segment.filter.BoundFilter;
import org.apache.druid.segment.filter.Filters;
import org.apache.druid.segment.filter.LikeFilter;
@@ -314,84 +315,29 @@ public class UnnestStorageAdapter implements
StorageAdapter
filtersPushedDownToBaseCursor -> null (as the filter cannot be
re-written due to presence of virtual columns)
filtersForPostUnnestCursor -> d12 IN (a,b) or m1 < 10
*/
- class FilterSplitter
- {
- final List<Filter> filtersPushedDownToBaseCursor = new ArrayList<>();
- final List<Filter> filtersForPostUnnestCursor = new ArrayList<>();
-
- void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter
filter, boolean skipPreFilters)
- {
- if (filter == null) {
- return;
- }
- if (!skipPreFilters) {
- final Filter newFilter =
rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn,
inputColumnCapabilites);
- if (newFilter != null) {
- // Add the rewritten filter pre-unnest, so we get the benefit of
any indexes, and so we avoid unnesting
- // any rows that do not match this filter at all.
- filtersPushedDownToBaseCursor.add(newFilter);
- }
- }
- // Add original filter post-unnest no matter what: we need to filter
out any extraneous unnested values.
- filtersForPostUnnestCursor.add(filter);
- }
-
- void addPreFilter(@Nullable final Filter filter)
- {
- if (filter == null) {
- return;
- }
-
- final Set<String> requiredColumns = filter.getRequiredColumns();
-
- // Run filter post-unnest if it refers to any virtual columns. This is
a conservative judgement call
- // that perhaps forces the code to use a ValueMatcher where an index
would've been available,
- // which can have real performance implications. This is an interim
choice made to value correctness
- // over performance. When we need to optimize this performance, we
should be able to
- // create a VirtualColumnDatasource that contains all the virtual
columns, in which case the query
- // itself would stop carrying them and everything should be able to be
pushed down.
- if (queryVirtualColumns.getVirtualColumns().length > 0) {
- for (String column : requiredColumns) {
- if (queryVirtualColumns.exists(column)) {
- filtersForPostUnnestCursor.add(filter);
- return;
- }
- }
- }
- filtersPushedDownToBaseCursor.add(filter);
-
- }
- }
-
- final FilterSplitter filterSplitter = new FilterSplitter();
+ final FilterSplitter filterSplitter = new FilterSplitter(inputColumn,
inputColumnCapabilites, queryVirtualColumns);
if (queryFilter != null) {
- List<Filter> preFilterList = new ArrayList<>();
- final int origFilterSize;
if (queryFilter.getRequiredColumns().contains(outputColumnName)) {
// outside filter contains unnested column
- // requires check for OR
- if (queryFilter instanceof OrFilter) {
- origFilterSize = ((OrFilter) queryFilter).getFilters().size();
- for (Filter filter : ((OrFilter) queryFilter).getFilters()) {
- if (filter.getRequiredColumns().contains(outputColumnName)) {
- final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(
- filter,
- inputColumn,
- inputColumnCapabilites
- );
- if (newFilter != null) {
- preFilterList.add(newFilter);
- }
- } else {
- preFilterList.add(filter);
+ // requires check for OR and And filters, disqualify rewrite for
non-unnest filters
+ if (queryFilter instanceof BooleanFilter) {
+ boolean isTopLevelAndFilter = queryFilter instanceof AndFilter;
+ List<Filter> preFilterList = recursiveRewriteOnUnnestFilters(
+ (BooleanFilter) queryFilter,
+ inputColumn,
+ inputColumnCapabilites,
+ filterSplitter,
+ isTopLevelAndFilter
+ );
+ // If rewite on entire query filter is successful then add entire
filter to preFilter else skip and only add to post filter.
+ if (filterSplitter.getPreFilterCount() ==
filterSplitter.getOriginalFilterCount()) {
+ if (queryFilter instanceof AndFilter) {
+ filterSplitter.addPreFilter(new AndFilter(preFilterList));
+ } else if (queryFilter instanceof OrFilter) {
+ filterSplitter.addPreFilter(new OrFilter(preFilterList));
}
}
- if (preFilterList.size() == origFilterSize) {
- // there has been successful rewrites
- final OrFilter preOrFilter = new OrFilter(preFilterList);
- filterSplitter.addPreFilter(preOrFilter);
- }
// add the entire query filter to unnest filter to be used in Value
matcher
filterSplitter.addPostFilterWithPreFilterIfRewritePossible(queryFilter, true);
} else {
@@ -412,7 +358,173 @@ public class UnnestStorageAdapter implements
StorageAdapter
);
}
+ class FilterSplitter
+ {
+ private String inputColumn;
+ private ColumnCapabilities inputColumnCapabilites;
+ private VirtualColumns queryVirtualColumns;
+
+ private int originalFilterCount = 0;
+ private int preFilterCount = 0;
+
+ public FilterSplitter(
+ String inputColumn,
+ ColumnCapabilities inputColumnCapabilites,
+ VirtualColumns queryVirtualColumns
+ )
+ {
+ this.inputColumn = inputColumn;
+ this.inputColumnCapabilites = inputColumnCapabilites;
+ this.queryVirtualColumns = queryVirtualColumns;
+ }
+
+ final List<Filter> filtersPushedDownToBaseCursor = new ArrayList<>();
+ final List<Filter> filtersForPostUnnestCursor = new ArrayList<>();
+
+ void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter
filter, boolean skipPreFilters)
+ {
+ if (filter == null) {
+ return;
+ }
+ if (!skipPreFilters) {
+ final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter,
inputColumn, inputColumnCapabilites);
+ if (newFilter != null) {
+ // Add the rewritten filter pre-unnest, so we get the benefit of any
indexes, and so we avoid unnesting
+ // any rows that do not match this filter at all.
+ filtersPushedDownToBaseCursor.add(newFilter);
+ }
+ }
+ // Add original filter post-unnest no matter what: we need to filter out
any extraneous unnested values.
+ filtersForPostUnnestCursor.add(filter);
+ }
+
+ void addPreFilter(@Nullable final Filter filter)
+ {
+ if (filter == null) {
+ return;
+ }
+
+ final Set<String> requiredColumns = filter.getRequiredColumns();
+
+ // Run filter post-unnest if it refers to any virtual columns. This is a
conservative judgement call
+ // that perhaps forces the code to use a ValueMatcher where an index
would've been available,
+ // which can have real performance implications. This is an interim
choice made to value correctness
+ // over performance. When we need to optimize this performance, we
should be able to
+ // create a VirtualColumnDatasource that contains all the virtual
columns, in which case the query
+ // itself would stop carrying them and everything should be able to be
pushed down.
+ if (queryVirtualColumns.getVirtualColumns().length > 0) {
+ for (String column : requiredColumns) {
+ if (queryVirtualColumns.exists(column)) {
+ filtersForPostUnnestCursor.add(filter);
+ return;
+ }
+ }
+ }
+ filtersPushedDownToBaseCursor.add(filter);
+
+ }
+
+ public void addToOriginalFilterCount(int c)
+ {
+ originalFilterCount += c;
+ }
+
+ public void addToPreFilterCount(int c)
+ {
+ preFilterCount += c;
+ }
+
+ public int getOriginalFilterCount()
+ {
+ return originalFilterCount;
+ }
+
+ public int getPreFilterCount()
+ {
+ return preFilterCount;
+ }
+ }
+ /**
+ * handles the nested rewrite for unnest columns in recursive way,
+ * it loops through all and/or filters and rewrite only required filters in
the child and add it to preFilter if qualified
+ * or else skip adding it to preFilters.
+ * RULES:
+ * 1. Add to preFilters only when top level filter is AND.
+ * for example: a=1 and (b=2 or c=2) , In this case a=1 can be added as
preFilters but we can not add b=2 as preFilters.
+ * 2. If Top level is OR filter then we can either choose to add entire top
level OR filter to preFilter or skip it all together.
+ * for example: a=1 or (b=2 and c=2)
+ * 3. Filters on unnest column which is derived from Array or any other
Expression can not be pushe down to base.
+ * for example: a=1 and vc=3 , lets say vc is ExpressionVirtualColumn, and
vc=3 can not be push down to base even if top level is AND filter.
+ * A. Unnesting a single dimension e.g. select * from foo,
UNNEST(MV_TO_ARRAY(dim3)) as u(d3)
+ * B. Unnesting an expression from multiple columns e.g. select * from foo,
UNNEST(ARRAY[dim1,dim2]) as u(d12)
+ * In case A, d3 is a direct reference to dim3 so any filter using d3 can be
rewritten using dim3 and added to pre filter
+ * while in case B, due to presence of the expression virtual column
expressionVirtualColumn("j0.unnest", "array(\"dim1\",\"dim2\")",
ColumnType.STRING_ARRAY)
+ * the filters on d12 cannot be pushed to the pre filters
+ *
+ * @param queryFilter query filter passed to makeCursors
+ * @param inputColumn input column to unnest if it's a direct
access; otherwise null
+ * @param inputColumnCapabilites input column capabilities if known;
otherwise null
+ */
+ private List<Filter> recursiveRewriteOnUnnestFilters(
+ BooleanFilter queryFilter,
+ final String inputColumn,
+ final ColumnCapabilities inputColumnCapabilites,
+ final FilterSplitter filterSplitter,
+ final boolean isTopLevelAndFilter
+ )
+ {
+ final List<Filter> preFilterList = new ArrayList<>();
+ for (Filter filter : queryFilter.getFilters()) {
+ if (filter.getRequiredColumns().contains(outputColumnName)) {
+ if (filter instanceof AndFilter) {
+ preFilterList.add(new AndFilter(recursiveRewriteOnUnnestFilters(
+ (BooleanFilter) filter,
+ inputColumn,
+ inputColumnCapabilites,
+ filterSplitter,
+ isTopLevelAndFilter
+ )));
+ } else if (filter instanceof OrFilter) {
+ // in case of Or Fiters, we set isTopLevelAndFilter to false that
prevents pushing down any child filters to base
+ List<Filter> orChildFilters = recursiveRewriteOnUnnestFilters(
+ (BooleanFilter) filter,
+ inputColumn,
+ inputColumnCapabilites,
+ filterSplitter,
+ false
+ );
+ preFilterList.add(new OrFilter(orChildFilters));
+ } else {
+ final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(
+ filter,
+ inputColumn,
+ inputColumnCapabilites
+ );
+ if (newFilter != null) {
+ // this is making sure that we are not pushing the unnest columns
filters to base filter without rewriting.
+ preFilterList.add(newFilter);
+ filterSplitter.addToPreFilterCount(1);
+ }
+ /*
+ Push down the filters to base only if top level is And Filter
+ we can not push down if top level filter is OR or unnestColumn is
derived expression like arrays
+ */
+ if (isTopLevelAndFilter &&
getUnnestInputIfDirectAccess(unnestColumn) != null) {
+ filterSplitter.addPreFilter(newFilter != null ? newFilter :
filter);
+ }
+ filterSplitter.addToOriginalFilterCount(1);
+ }
+ } else {
+ preFilterList.add(filter);
+ // for filters on non unnest columns, we still need to count the
nested filters if any as we are not traversing it in this method
+ int filterCount = Filters.countNumberOfFilters(filter);
+ filterSplitter.addToOriginalFilterCount(filterCount);
+ filterSplitter.addToPreFilterCount(filterCount);
+ }
+ }
+ return preFilterList;
+ }
/**
* Returns the input of {@link #unnestColumn}, if it's a direct access;
otherwise returns null.
*/
@@ -465,7 +577,6 @@ public class UnnestStorageAdapter implements StorageAdapter
return false;
}
}
-
return true;
} else if (filter instanceof NotFilter) {
return filterMapsOverMultiValueStrings(((NotFilter)
filter).getBaseFilter());
diff --git
a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
index c522f8b4aa..14c2e2f770 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
@@ -26,6 +26,7 @@ import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.DefaultBitmapResultFactory;
import org.apache.druid.query.Query;
import org.apache.druid.query.QueryContexts;
+import org.apache.druid.query.filter.BooleanFilter;
import org.apache.druid.query.filter.ColumnIndexSelector;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.filter.DruidPredicateFactory;
@@ -422,4 +423,19 @@ public class Filters
return retVal;
}
+
+ public static int countNumberOfFilters(@Nullable Filter filter)
+ {
+ if (filter == null) {
+ return 0;
+ }
+ if (filter instanceof BooleanFilter) {
+ return ((BooleanFilter) filter).getFilters()
+ .stream()
+ .map(f -> countNumberOfFilters(f))
+ .mapToInt(Integer::intValue)
+ .sum();
+ }
+ return 1;
+ }
}
diff --git
a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
index 480c86f51f..4257ac4c07 100644
---
a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
@@ -20,8 +20,8 @@
package org.apache.druid.segment;
import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSet;
import org.apache.druid.java.util.common.DateTimes;
+import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.java.util.common.guava.Sequence;
@@ -29,26 +29,13 @@ import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.QueryMetrics;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
-import org.apache.druid.query.filter.BoundDimFilter;
-import org.apache.druid.query.filter.EqualityFilter;
import org.apache.druid.query.filter.Filter;
-import org.apache.druid.query.filter.InDimFilter;
-import org.apache.druid.query.filter.LikeDimFilter;
-import org.apache.druid.query.filter.NullFilter;
-import org.apache.druid.query.filter.RangeFilter;
import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.segment.column.ColumnCapabilities;
-import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.filter.AndFilter;
-import org.apache.druid.segment.filter.BoundFilter;
-import org.apache.druid.segment.filter.ColumnComparisonFilter;
-import org.apache.druid.segment.filter.FalseFilter;
-import org.apache.druid.segment.filter.LikeFilter;
-import org.apache.druid.segment.filter.NotFilter;
import org.apache.druid.segment.filter.OrFilter;
import org.apache.druid.segment.filter.SelectorFilter;
-import org.apache.druid.segment.filter.TrueFilter;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
@@ -72,6 +59,10 @@ import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.List;
+import static org.apache.druid.segment.filter.FilterTestUtils.selector;
+import static org.apache.druid.segment.filter.Filters.and;
+import static org.apache.druid.segment.filter.Filters.or;
+
public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
{
private static Closer CLOSER;
@@ -286,13 +277,13 @@ public class UnnestStorageAdapterTest extends
InitializedNullHandlingTest
final String inputColumn =
unnestStorageAdapter.getUnnestInputIfDirectAccess(vc);
final OrFilter baseFilter = new OrFilter(ImmutableList.of(
- new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter(),
- new SelectorDimFilter(inputColumn, "2", null).toFilter()
+ selector(OUTPUT_COLUMN_NAME, "1"),
+ selector(inputColumn, "2")
));
final OrFilter expectedPushDownFilter = new OrFilter(ImmutableList.of(
- new SelectorDimFilter(inputColumn, "1", null).toFilter(),
- new SelectorDimFilter(inputColumn, "2", null).toFilter()
+ selector(inputColumn, "1"),
+ selector(inputColumn, "2")
));
final Sequence<Cursor> cursorSequence = unnestStorageAdapter.makeCursors(
@@ -316,14 +307,182 @@ public class UnnestStorageAdapterTest extends
InitializedNullHandlingTest
return null;
});
}
+ @Test
+ public void
test_nested_filters_unnested_and_original_dimension_with_unnest_adapters()
+ {
+ final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter(
+ new TestStorageAdapter(INCREMENTAL_INDEX),
+ new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME +
"\"", null, ExprMacroTable.nil()),
+ null
+ );
+ final VirtualColumn vc = unnestStorageAdapter.getUnnestColumn();
+
+ final String inputColumn =
unnestStorageAdapter.getUnnestInputIfDirectAccess(vc);
+
+ final OrFilter baseFilter = new OrFilter(ImmutableList.of(
+ selector(OUTPUT_COLUMN_NAME, "1"),
+ new AndFilter(ImmutableList.of(
+ selector(inputColumn, "2"),
+ selector(OUTPUT_COLUMN_NAME, "10")
+ ))
+ ));
+
+ final OrFilter expectedPushDownFilter = new OrFilter(ImmutableList.of(
+ selector(inputColumn, "1"),
+ new AndFilter(ImmutableList.of(
+ selector(inputColumn, "2"),
+ selector(inputColumn, "10")
+ ))
+ ));
+
+ final Sequence<Cursor> cursorSequence = unnestStorageAdapter.makeCursors(
+ baseFilter,
+ unnestStorageAdapter.getInterval(),
+ VirtualColumns.EMPTY,
+ Granularities.ALL,
+ false,
+ null
+ );
+
+ final TestStorageAdapter base = (TestStorageAdapter)
unnestStorageAdapter.getBaseAdapter();
+ final Filter pushDownFilter = base.getPushDownFilter();
+
+ Assert.assertEquals(expectedPushDownFilter, pushDownFilter);
+ cursorSequence.accumulate(null, (accumulated, cursor) -> {
+ Assert.assertEquals(cursor.getClass(), PostJoinCursor.class);
+ final Filter postFilter = ((PostJoinCursor) cursor).getPostJoinFilter();
+ // OR-case so base filter should match the postJoinFilter
+ Assert.assertEquals(baseFilter, postFilter);
+ return null;
+ });
+ }
+ @Test
+ public void test_nested_filters_unnested_and_topLevel1And3filtersInOR()
+ {
+ final Filter testQueryFilter = and(ImmutableList.of(
+ selector(OUTPUT_COLUMN_NAME, "3"),
+ or(ImmutableList.of(
+ selector("newcol", "2"),
+ selector(COLUMNNAME, "2"),
+ selector(OUTPUT_COLUMN_NAME, "1")
+ ))
+ ));
+ testComputeBaseAndPostUnnestFilters(
+ testQueryFilter,
+ "(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 ||
multi-string1 = 1))",
+ "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 ||
unnested-multi-string1 = 1))"
+ );
+ }
+ @Test
+ public void test_nested_multiLevel_filters_unnested()
+ {
+ final Filter testQueryFilter = and(ImmutableList.of(
+ selector(OUTPUT_COLUMN_NAME, "3"),
+ or(ImmutableList.of(
+ or(ImmutableList.of(
+ selector("newcol", "2"),
+ selector(COLUMNNAME, "2"),
+ and(ImmutableList.of(
+ selector("newcol", "3"),
+ selector(COLUMNNAME, "7")
+ ))
+ )),
+ selector(OUTPUT_COLUMN_NAME, "1")
+ ))
+ ));
+ testComputeBaseAndPostUnnestFilters(
+ testQueryFilter,
+ "(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || (newcol = 3
&& multi-string1 = 7) || multi-string1 = 1))",
+ "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 ||
(newcol = 3 && multi-string1 = 7) || unnested-multi-string1 = 1))"
+ );
+ }
+ @Test
+ public void test_nested_multiLevel_filters_unnested5Level()
+ {
+ final Filter testQueryFilter = or(ImmutableList.of(
+ selector(OUTPUT_COLUMN_NAME, "3"),
+ or(ImmutableList.of(
+ or(ImmutableList.of(
+ selector("newcol", "2"),
+ selector(COLUMNNAME, "2"),
+ and(ImmutableList.of(
+ selector("newcol", "3"),
+ and(ImmutableList.of(
+ selector(COLUMNNAME, "7"),
+ selector("newcol_1", "10")
+ ))
+ ))
+ )),
+ selector(OUTPUT_COLUMN_NAME, "1")
+ ))
+ ));
+ testComputeBaseAndPostUnnestFilters(
+ testQueryFilter,
+ "(multi-string1 = 3 || newcol = 2 || multi-string1 = 2 || (newcol = 3
&& multi-string1 = 7 && newcol_1 = 10) || multi-string1 = 1)",
+ "(unnested-multi-string1 = 3 || newcol = 2 || multi-string1 = 2 ||
(newcol = 3 && multi-string1 = 7 && newcol_1 = 10) || unnested-multi-string1 =
1)"
+ );
+ }
+ @Test
+ public void test_nested_filters_unnested_and_topLevelORAnd3filtersInOR()
+ {
+ final Filter testQueryFilter = or(ImmutableList.of(
+ selector(OUTPUT_COLUMN_NAME, "3"),
+ and(ImmutableList.of(
+ selector("newcol", "2"),
+ selector(COLUMNNAME, "2"),
+ selector(OUTPUT_COLUMN_NAME, "1")
+ ))
+ ));
+ testComputeBaseAndPostUnnestFilters(
+ testQueryFilter,
+ "(multi-string1 = 3 || (newcol = 2 && multi-string1 = 2 &&
multi-string1 = 1))",
+ "(unnested-multi-string1 = 3 || (newcol = 2 && multi-string1 = 2 &&
unnested-multi-string1 = 1))"
+ );
+ }
+
+ @Test
+ public void
test_nested_filters_unnested_and_topLevelAND3filtersInORWithNestedOrs()
+ {
+ final Filter testQueryFilter = and(ImmutableList.of(
+ selector(OUTPUT_COLUMN_NAME, "3"),
+ or(ImmutableList.of(
+ selector("newcol", "2"),
+ selector(COLUMNNAME, "2")
+ )),
+ or(ImmutableList.of(
+ selector("newcol", "4"),
+ selector(COLUMNNAME, "8"),
+ selector(OUTPUT_COLUMN_NAME, "6")
+ ))
+ ));
+ testComputeBaseAndPostUnnestFilters(
+ testQueryFilter,
+ "(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2) && (newcol =
4 || multi-string1 = 8 || multi-string1 = 6))",
+ "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2) &&
(newcol = 4 || multi-string1 = 8 || unnested-multi-string1 = 6))"
+ );
+ }
+
+ @Test
+ public void test_nested_filters_unnested_and_topLevelAND2sdf()
+ {
+ final Filter testQueryFilter = and(ImmutableList.of(
+ selector(OUTPUT_COLUMN_NAME, "3"),
+ selector(COLUMNNAME, "2")
+ ));
+ testComputeBaseAndPostUnnestFilters(
+ testQueryFilter,
+ "(multi-string1 = 3 && multi-string1 = 2)",
+ "(unnested-multi-string1 = 3 && multi-string1 = 2)"
+ );
+ }
@Test
public void test_pushdown_filters_unnested_dimension_with_unnest_adapters()
{
final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter(
new TestStorageAdapter(INCREMENTAL_INDEX),
new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME +
"\"", null, ExprMacroTable.nil()),
- new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null)
+ new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null)
);
final VirtualColumn vc = unnestStorageAdapter.getUnnestColumn();
@@ -331,7 +490,7 @@ public class UnnestStorageAdapterTest extends
InitializedNullHandlingTest
final String inputColumn =
unnestStorageAdapter.getUnnestInputIfDirectAccess(vc);
final Filter expectedPushDownFilter =
- new SelectorDimFilter(inputColumn, "1", null).toFilter();
+ selector(inputColumn, "1");
final Sequence<Cursor> cursorSequence = unnestStorageAdapter.makeCursors(
@@ -377,10 +536,9 @@ public class UnnestStorageAdapterTest extends
InitializedNullHandlingTest
final String inputColumn =
unnestStorageAdapter.getUnnestInputIfDirectAccess(vc);
final Filter expectedPushDownFilter =
- new SelectorDimFilter(inputColumn, "1", null).toFilter();
+ selector(inputColumn, "1");
-
- final Filter queryFilter = new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1",
null).toFilter();
+ final Filter queryFilter = new SelectorFilter(OUTPUT_COLUMN_NAME, "1",
null);
final Sequence<Cursor> cursorSequence = unnestStorageAdapter.makeCursors(
queryFilter,
unnestStorageAdapter.getInterval(),
@@ -409,45 +567,32 @@ public class UnnestStorageAdapterTest extends
InitializedNullHandlingTest
});
}
- @Test
- public void testAllowedFiltersForPushdown()
+ public void testComputeBaseAndPostUnnestFilters(
+ Filter testQueryFilter,
+ String expectedBasePushDown,
+ String expectedPostUnnest
+ )
{
- Filter[] allowed = new Filter[] {
- new SelectorFilter("column", "value"),
- new InDimFilter("column", ImmutableSet.of("a", "b")),
- new LikeFilter("column", null,
LikeDimFilter.LikeMatcher.from("hello%", null), null),
- new BoundFilter(
- new BoundDimFilter("column", "a", "b", true, true, null, null,
null)
- ),
- NullFilter.forColumn("column"),
- new EqualityFilter("column", ColumnType.LONG, 1234L, null),
- new RangeFilter("column", ColumnType.LONG, 0L, 1234L, true, false,
null)
- };
- // not exhaustive
- Filter[] notAllowed = new Filter[] {
- TrueFilter.instance(),
- FalseFilter.instance(),
- new
ColumnComparisonFilter(ImmutableList.of(DefaultDimensionSpec.of("col1"),
DefaultDimensionSpec.of("col2")))
- };
-
- for (Filter f : allowed) {
-
Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(f));
- }
- for (Filter f : notAllowed) {
-
Assert.assertFalse(UnnestStorageAdapter.filterMapsOverMultiValueStrings(f));
- }
-
- Filter notAnd = new NotFilter(
- new AndFilter(
- Arrays.asList(allowed)
- )
+ final String inputColumn =
UNNEST_STORAGE_ADAPTER.getUnnestInputIfDirectAccess(UNNEST_STORAGE_ADAPTER.getUnnestColumn());
+ final VirtualColumn vc = UNNEST_STORAGE_ADAPTER.getUnnestColumn();
+ Pair<Filter, Filter> filterPair =
UNNEST_STORAGE_ADAPTER.computeBaseAndPostUnnestFilters(
+ testQueryFilter,
+ null,
+ VirtualColumns.EMPTY,
+ inputColumn,
+ vc.capabilities(UNNEST_STORAGE_ADAPTER, inputColumn)
);
-
-
Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(notAnd));
- Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(new
OrFilter(Arrays.asList(allowed))));
- Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(new
NotFilter(notAnd)));
- Assert.assertFalse(
- UnnestStorageAdapter.filterMapsOverMultiValueStrings(new NotFilter(new
OrFilter(Arrays.asList(notAllowed))))
+ Filter actualPushDownFilter = filterPair.lhs;
+ Filter actualPostUnnestFilter = filterPair.rhs;
+ Assert.assertEquals(
+ "Expects only top level child of And Filter to push down to base",
+ expectedBasePushDown,
+ actualPushDownFilter == null ? "" : actualPushDownFilter.toString()
+ );
+ Assert.assertEquals(
+ "Should have post unnest filter",
+ expectedPostUnnest,
+ actualPostUnnestFilter == null ? "" : actualPostUnnestFilter.toString()
);
}
}
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
index f5ea4ea609..9f25f4cd00 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
@@ -2880,6 +2880,253 @@ public class CalciteArraysQueryTest extends
BaseCalciteQueryTest
);
}
+ @Test
+ public void testUnnestThriceWithFiltersOnDimAndUnnestCol()
+ {
+ cannotVectorize();
+ String sql = " SELECT dimZipf, dim3_unnest1, dim3_unnest2, dim3_unnest3
FROM \n"
+ + " ( SELECT * FROM \n"
+ + " ( SELECT * FROM lotsocolumns,
UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest1) )"
+ + " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as
ut(dim3_unnest2) \n"
+ + " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as
ut(dim3_unnest3) "
+ + " WHERE dimZipf=27 AND dim3_unnest1='Baz'";
+ List<Query<?>> expectedQuerySc = ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(
+ UnnestDataSource.create(
+ UnnestDataSource.create(
+ FilteredDataSource.create(
+ UnnestDataSource.create(
+ new
TableDataSource(CalciteTests.DATASOURCE5),
+ expressionVirtualColumn(
+ "j0.unnest",
+ "\"dimMultivalEnumerated\"",
+ ColumnType.STRING
+ ),
+ null
+ ),
+ and(
+ NullHandling.sqlCompatible()
+ ? equality("dimZipf", "27", ColumnType.LONG)
+ : bound("dimZipf", "27", "27", false, false,
null, StringComparators.NUMERIC),
+ equality("j0.unnest", "Baz",
ColumnType.STRING)
+ )
+ ),
+ expressionVirtualColumn(
+ "_j0.unnest",
+ "\"dimMultivalEnumerated\"",
+ ColumnType.STRING
+ ), null
+ ),
+ expressionVirtualColumn(
+ "__j0.unnest",
+ "\"dimMultivalEnumerated\"",
+ ColumnType.STRING
+ ),
+ null
+ )
+ )
+ .intervals(querySegmentSpec(Filtration.eternity()))
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .context(QUERY_CONTEXT_UNNEST)
+ .virtualColumns(expressionVirtualColumn(
+ "v0",
+ "'Baz'",
+ ColumnType.STRING
+ ))
+ .columns(ImmutableList.of("__j0.unnest", "_j0.unnest",
"dimZipf", "v0"))
+ .build()
+ );
+ testQuery(
+ sql,
+ QUERY_CONTEXT_UNNEST,
+ expectedQuerySc,
+ ImmutableList.of(
+ new Object[]{"27", "Baz", "Baz", "Baz"},
+ new Object[]{"27", "Baz", "Baz", "Baz"},
+ new Object[]{"27", "Baz", "Baz", "Hello"},
+ new Object[]{"27", "Baz", "Baz", "World"},
+ new Object[]{"27", "Baz", "Baz", "Baz"},
+ new Object[]{"27", "Baz", "Baz", "Baz"},
+ new Object[]{"27", "Baz", "Baz", "Hello"},
+ new Object[]{"27", "Baz", "Baz", "World"},
+ new Object[]{"27", "Baz", "Hello", "Baz"},
+ new Object[]{"27", "Baz", "Hello", "Baz"},
+ new Object[]{"27", "Baz", "Hello", "Hello"},
+ new Object[]{"27", "Baz", "Hello", "World"},
+ new Object[]{"27", "Baz", "World", "Baz"},
+ new Object[]{"27", "Baz", "World", "Baz"},
+ new Object[]{"27", "Baz", "World", "Hello"},
+ new Object[]{"27", "Baz", "World", "World"},
+ new Object[]{"27", "Baz", "Baz", "Baz"},
+ new Object[]{"27", "Baz", "Baz", "Baz"},
+ new Object[]{"27", "Baz", "Baz", "Hello"},
+ new Object[]{"27", "Baz", "Baz", "World"},
+ new Object[]{"27", "Baz", "Baz", "Baz"},
+ new Object[]{"27", "Baz", "Baz", "Baz"},
+ new Object[]{"27", "Baz", "Baz", "Hello"},
+ new Object[]{"27", "Baz", "Baz", "World"},
+ new Object[]{"27", "Baz", "Hello", "Baz"},
+ new Object[]{"27", "Baz", "Hello", "Baz"},
+ new Object[]{"27", "Baz", "Hello", "Hello"},
+ new Object[]{"27", "Baz", "Hello", "World"},
+ new Object[]{"27", "Baz", "World", "Baz"},
+ new Object[]{"27", "Baz", "World", "Baz"},
+ new Object[]{"27", "Baz", "World", "Hello"},
+ new Object[]{"27", "Baz", "World", "World"}
+ )
+ );
+ }
+ @Test
+ public void testUnnestThriceWithFiltersOnDimAndAllUnnestColumns()
+ {
+ cannotVectorize();
+ String sql = " SELECT dimZipf, dim3_unnest1, dim3_unnest2, dim3_unnest3
FROM \n"
+ + " ( SELECT * FROM \n"
+ + " ( SELECT * FROM lotsocolumns,
UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest1) )"
+ + " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as
ut(dim3_unnest2) \n"
+ + " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as
ut(dim3_unnest3) "
+ + " WHERE dimZipf=27 AND dim3_unnest1='Baz' AND
dim3_unnest2='Hello' AND dim3_unnest3='World'";
+ List<Query<?>> expectedQuerySc = ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(
+ UnnestDataSource.create(
+ UnnestDataSource.create(
+ FilteredDataSource.create(
+ UnnestDataSource.create(
+ new
TableDataSource(CalciteTests.DATASOURCE5),
+ expressionVirtualColumn(
+ "j0.unnest",
+ "\"dimMultivalEnumerated\"",
+ ColumnType.STRING
+ ),
+ null
+ ),
+ and(
+ NullHandling.sqlCompatible()
+ ? equality("dimZipf", "27", ColumnType.LONG)
+ : bound("dimZipf", "27", "27", false, false,
null, StringComparators.NUMERIC),
+ equality("j0.unnest", "Baz",
ColumnType.STRING)
+ )
+ ),
+ expressionVirtualColumn(
+ "_j0.unnest",
+ "\"dimMultivalEnumerated\"",
+ ColumnType.STRING
+ ), equality("_j0.unnest", "Hello", ColumnType.STRING)
+ ),
+ expressionVirtualColumn(
+ "__j0.unnest",
+ "\"dimMultivalEnumerated\"",
+ ColumnType.STRING
+ ),
+ equality("__j0.unnest", "World", ColumnType.STRING)
+ )
+ )
+ .intervals(querySegmentSpec(Filtration.eternity()))
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .virtualColumns(expressionVirtualColumn(
+ "v0",
+ "'Baz'",
+ ColumnType.STRING
+ ))
+ .legacy(false)
+ .context(QUERY_CONTEXT_UNNEST)
+ .columns(ImmutableList.of("__j0.unnest", "_j0.unnest",
"dimZipf", "v0"))
+ .build()
+ );
+ testQuery(
+ sql,
+ QUERY_CONTEXT_UNNEST,
+ expectedQuerySc,
+ ImmutableList.of(
+ new Object[]{"27", "Baz", "Hello", "World"},
+ new Object[]{"27", "Baz", "Hello", "World"}
+ )
+ );
+ }
+
+ @Test
+ public void testUnnestThriceWithFiltersOnDimAndUnnestColumnsORCombinations()
+ {
+ cannotVectorize();
+ skipVectorize();
+ String sql = " SELECT dimZipf, dim3_unnest1, dim3_unnest2, dim3_unnest3
FROM \n"
+ + " ( SELECT * FROM \n"
+ + " ( SELECT * FROM lotsocolumns,
UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest1) )"
+ + " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as
ut(dim3_unnest2) \n"
+ + " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as
ut(dim3_unnest3) "
+ + " WHERE dimZipf=27 AND (dim3_unnest1='Baz' OR
dim3_unnest2='Hello') AND dim3_unnest3='World'";
+ List<Query<?>> expectedQuerySqlCom = ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(
+ UnnestDataSource.create(
+ FilteredDataSource.create(
+ UnnestDataSource.create(
+ FilteredDataSource.create(
+ UnnestDataSource.create(
+ new
TableDataSource(CalciteTests.DATASOURCE5),
+ expressionVirtualColumn(
+ "j0.unnest",
+ "\"dimMultivalEnumerated\"",
+ ColumnType.STRING
+ ),
+ null
+ ),
+ NullHandling.sqlCompatible() ?
equality("dimZipf", "27", ColumnType.LONG) : range(
+ "dimZipf",
+ ColumnType.LONG,
+ "27",
+ "27",
+ false,
+ false
+ )
+ ),
+ expressionVirtualColumn(
+ "_j0.unnest",
+ "\"dimMultivalEnumerated\"",
+ ColumnType.STRING
+ ),
+ null
+ ),
+ or(
+ equality("j0.unnest", "Baz", ColumnType.STRING),
+ equality("_j0.unnest", "Hello",
ColumnType.STRING)
+ ) // (j0.unnest = Baz || _j0.unnest = Hello)
+ ),
+ expressionVirtualColumn(
+ "__j0.unnest",
+ "\"dimMultivalEnumerated\"",
+ ColumnType.STRING
+ ),
+ equality("__j0.unnest", "World", ColumnType.STRING)
+ )
+ )
+ .intervals(querySegmentSpec(Filtration.eternity()))
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .context(QUERY_CONTEXT_UNNEST)
+ .columns(ImmutableList.of("__j0.unnest", "_j0.unnest",
"dimZipf", "j0.unnest"))
+ .build()
+ );
+ testQuery(
+ sql,
+ QUERY_CONTEXT_UNNEST, expectedQuerySqlCom,
+ ImmutableList.of(
+ new Object[]{"27", "Baz", "Baz", "World"},
+ new Object[]{"27", "Baz", "Baz", "World"},
+ new Object[]{"27", "Baz", "Hello", "World"},
+ new Object[]{"27", "Baz", "World", "World"},
+ new Object[]{"27", "Baz", "Baz", "World"},
+ new Object[]{"27", "Baz", "Baz", "World"},
+ new Object[]{"27", "Baz", "Hello", "World"},
+ new Object[]{"27", "Baz", "World", "World"},
+ new Object[]{"27", "Hello", "Hello", "World"},
+ new Object[]{"27", "World", "Hello", "World"}
+ )
+ );
+ }
@Test
public void testUnnestWithGroupBy()
{
@@ -3148,6 +3395,95 @@ public class CalciteArraysQueryTest extends
BaseCalciteQueryTest
);
}
+ @Test
+ public void testUnnestVirtualWithColumns1()
+ {
+ // This tells the test to skip generating (vectorize = force) path
+ // Generates only 1 native query with vectorize = false
+ skipVectorize();
+ // This tells that both vectorize = force and vectorize = false takes the
same path of non vectorization
+ // Generates 2 native queries with 2 different values of vectorize
+ cannotVectorize();
+ testQuery(
+ "SELECT strings, m1 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as
unnested (strings) where (strings='a' and (m1<=10 or strings='b'))",
+ QUERY_CONTEXT_UNNEST,
+ ImmutableList.of(Druids.newScanQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new
TableDataSource(CalciteTests.DATASOURCE3),
+ expressionVirtualColumn(
+ "j0.unnest",
+ "\"dim3\"",
+ ColumnType.STRING
+ ),
+ equality("j0.unnest", "a",
ColumnType.STRING)
+ ))
+
.intervals(querySegmentSpec(Filtration.eternity()))
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .filters(or(
+ NullHandling.sqlCompatible()
+ ? range("m1", ColumnType.LONG, null, "10",
false, false)
+ : bound(
+ "m1",
+ null,
+ "10",
+ false,
+ false,
+ null,
+ StringComparators.NUMERIC
+ ),
+ equality("j0.unnest", "b",
ColumnType.STRING)
+ ))
+ .context(QUERY_CONTEXT_UNNEST)
+ .columns(ImmutableList.of("j0.unnest", "m1"))
+ .build()),
+ ImmutableList.of(new Object[]{"a", 1.0f})
+ );
+ }
+
+ @Test
+ public void testUnnestVirtualWithColumns2()
+ {
+ // This tells the test to skip generating (vectorize = force) path
+ // Generates only 1 native query with vectorize = false
+ skipVectorize();
+ // This tells that both vectorize = force and vectorize = false takes the
same path of non vectorization
+ // Generates 2 native queries with 2 different values of vectorize
+ cannotVectorize();
+ testQuery(
+ "SELECT strings, m1 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as
unnested (strings) where (strings='a' or (m1=2 and strings='b'))",
+ QUERY_CONTEXT_UNNEST,
+ ImmutableList.of(Druids.newScanQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new
TableDataSource(CalciteTests.DATASOURCE3),
+ expressionVirtualColumn(
+ "j0.unnest",
+ "\"dim3\"",
+ ColumnType.STRING
+ ),
+ null
+ ))
+
.intervals(querySegmentSpec(Filtration.eternity()))
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false) // (j0.unnest = a || (m1 = 2 &&
j0.unnest = b))
+ .filters(or(
+ equality("j0.unnest", "a",
ColumnType.STRING),
+ and(
+ NullHandling.sqlCompatible()
+ ? equality("m1", "2", ColumnType.FLOAT)
+ : equality("m1", "2",
ColumnType.STRING),
+ equality("j0.unnest", "b",
ColumnType.STRING)
+ )
+ ))
+ .context(QUERY_CONTEXT_UNNEST)
+ .columns(ImmutableList.of("j0.unnest", "m1"))
+ .build()),
+ ImmutableList.of(
+ new Object[]{"a", 1.0f},
+ new Object[]{"b", 2.0f}
+ )
+ );
+ }
@Test
public void testUnnestWithFilters()
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]