imply-cheddar commented on code in PR #13934: URL: https://github.com/apache/druid/pull/13934#discussion_r1138146593
########## sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java: ########## @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rule; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.sql.SqlKind; +import org.apache.druid.sql.calcite.rel.DruidUnnestRel; + +public class DruidFilterUnnestRule extends RelOptRule +{ + private static final DruidFilterUnnestRule INSTANCE = new DruidFilterUnnestRule(); + + private DruidFilterUnnestRule() + { + super( + operand( + Filter.class, + operand(DruidUnnestRel.class, any()) + ) + ); + } + + public static DruidFilterUnnestRule instance() + { + return INSTANCE; + } + + @Override + public void onMatch(RelOptRuleCall call) + { + final Filter filter = call.rel(0); + final DruidUnnestRel unnestDatasourceRel = call.rel(1); + DruidUnnestRel newRel = unnestDatasourceRel.withFilter(filter); + call.transformTo(newRel); + } + + // This is for a special case of handling selector filters + // on top of UnnestDataSourceRel when Calcite adds an extra + // LogicalProject on the LogicalFilter. For e.g. #122 here + // SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' + // 126:LogicalProject(d3=[$17]) + // 124:LogicalCorrelate(subset=[rel#125:Subset#6.NONE.[]], correlation=[$cor0], joinType=[inner], requiredColumns=[{3}]) + // 8:LogicalTableScan(subset=[rel#114:Subset#0.NONE.[]], table=[[druid, numfoo]]) + // 122:LogicalProject(subset=[rel#123:Subset#5.NONE.[]], d3=[CAST('b':VARCHAR):VARCHAR]) + // 120:LogicalFilter(subset=[rel#121:Subset#4.NONE.[]], condition=[=($0, 'b')]) + // 118:Uncollect(subset=[rel#119:Subset#3.NONE.[]]) + // 116:LogicalProject(subset=[rel#117:Subset#2.NONE.[]], EXPR$0=[MV_TO_ARRAY($cor0.dim3)]) + // 9:LogicalValues(subset=[rel#115:Subset#1.NONE.[0]], tuples=[[{ 0 }]]) + + // This logical project does a type cast only which Druid already has information about + // So we can skip this LogicalProject only if it is a CAST for strings or LITERALS for other types + // Extensive unit tests can be found in {@link CalciteArraysQueryTest} + + static class DruidProjectOnUnnestRule extends RelOptRule + { + private static final DruidProjectOnUnnestRule INSTANCE = new DruidProjectOnUnnestRule(); + + private DruidProjectOnUnnestRule() + { + super( + operand( + Project.class, + operand(DruidUnnestRel.class, any()) + ) + ); + } + + public static DruidProjectOnUnnestRule instance() + { + return INSTANCE; + } + + @Override + public void onMatch(RelOptRuleCall call) + { + final Project rightP = call.rel(0); + final SqlKind rightProjectKind = rightP.getChildExps().get(0).getKind(); + final DruidUnnestRel unnestDatasourceRel = call.rel(1); + + if (rightP.getProjects().size() == 1 && (rightProjectKind == SqlKind.CAST || rightProjectKind == SqlKind.LITERAL)) { + call.transformTo(unnestDatasourceRel); + } Review Comment: Should we do the check of whether or not this actually does a rewrite in the `matches` call instead of the `onMatch` call? ########## processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java: ########## @@ -245,4 +260,107 @@ private static void assertColumnReadsIdentifier(final VirtualColumn column, fina MatcherAssert.assertThat(column, CoreMatchers.instanceOf(ExpressionVirtualColumn.class)); Assert.assertEquals("\"" + identifier + "\"", ((ExpressionVirtualColumn) column).getExpression()); } + + @Test + public void test_unnest_adapters_with_no_base_filter_active_unnest_filter() + { + + Sequence<Cursor> cursorSequence = UNNEST_STORAGE_ADAPTER2.makeCursors( + null, + UNNEST_STORAGE_ADAPTER2.getInterval(), + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + + cursorSequence.accumulate(null, (accumulated, cursor) -> { + ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); + + DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); + int count = 0; + while (!cursor.isDone()) { + Object dimSelectorVal = dimSelector.getObject(); + if (dimSelectorVal == null) { + Assert.assertNull(dimSelectorVal); + } + cursor.advance(); + count++; + } + Assert.assertEquals(1, count); + Filter unnestFilter = new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter(); + VirtualColumn vc = new ExpressionVirtualColumn( + OUTPUT_COLUMN_NAME, + "\"" + COLUMNNAME + "\"", + null, + ExprMacroTable.nil() + ); + final String inputColumn = UNNEST_STORAGE_ADAPTER2.getUnnestInputIfDirectAccess(vc); + Pair<Filter, Filter> filterPair = UNNEST_STORAGE_ADAPTER2.computeBaseAndPostUnnestFilters( + null, + unnestFilter, + VirtualColumns.EMPTY, + inputColumn, + INCREMENTAL_INDEX_STORAGE_ADAPTER.getColumnCapabilities(inputColumn) + ); + SelectorFilter left = ((SelectorFilter) filterPair.lhs); + SelectorFilter right = ((SelectorFilter) filterPair.rhs); + Assert.assertEquals(inputColumn, left.getDimension()); + Assert.assertEquals(OUTPUT_COLUMN_NAME, right.getDimension()); + Assert.assertEquals(right.getValue(), left.getValue()); + return null; + }); + } + Review Comment: The structuring of this test for the purposes of validating that the filters are being managed properly seems a bit off. All of the decisioning of what should be pushed down and what shouldn't will have been done when you call `makeCursors` and you should be able to see it from there. Specifically, we want to validate that it chose to push down specific filters and that it attached other filters to a PostJoinCursor. You can have a `TestStorageAdapter` that exists just to have `makeCursor` called on it. You can store the values that were called and then return a `Sequence<Cursor>` where the `Cursor` objects are also just empty implementations. Then, in the test, you can ask your `TestStorageAdapter` for which filters it was called with and validate that it's the ones you expect to be pushed down. You then take the cursors and validate that they are `PostJoinCursor` objects, then reach in and grab their filters (or it might need to be the ValueMatchers) and ensure that those filters are what you expect. ########## sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java: ########## @@ -3737,4 +3751,384 @@ public void testUnnestWithNotFiltersOnUnnestedColumn() ) ); } + + @Test + public void testUnnestWithSelectorFiltersOnSelectedColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b'", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + selector("j0.unnest", "b", null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithSelectorFiltersOnVirtualColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d12 FROM druid.numfoo, UNNEST(ARRAY[m1,m2]) as unnested (d12) where d12=1", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY), + selector("j0.unnest", "1", null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{1.0f}, + new Object[]{1.0f} + ) + ); + } + + @Test + public void testUnnestWithSelectorFiltersOnVirtualStringColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d45 FROM druid.numfoo, UNNEST(ARRAY[dim4,dim5]) as unnested (d45) where d45 IN ('a','ab')", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY), + new InDimFilter("j0.unnest", ImmutableSet.of("a", "ab"), null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"a"}, + new Object[]{"ab"}, + new Object[]{"a"}, + new Object[]{"ab"} + ) + ); + } + + @Test + public void testUnnestWithMultipleAndFiltersOnSelectedColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' and m1 < 10 and m2 < 10", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + selector("j0.unnest", "b", null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .filters( + and( + bound("m1", null, "10", false, true, null, StringComparators.NUMERIC), + bound("m2", null, "10", false, true, null, StringComparators.NUMERIC) + ) + ) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithMultipleOrFiltersOnSelectedColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' or m1 < 2 ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .filters( + or( + selector("j0.unnest", "b", null), + bound("m1", null, "2", false, true, null, StringComparators.NUMERIC) + ) + ) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithMultipleAndFiltersOnSelectedUnnestedColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('a','b') and d3 < 'e' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + and( + new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null), + bound("j0.unnest", null, "e", false, true, null, StringComparators.LEXICOGRAPHIC) + ) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithMultipleOrFiltersOnUnnestedColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' or d3='d' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + new InDimFilter("j0.unnest", ImmutableSet.of("b", "d"), null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"d"} + ) + ); + } + + @Test + public void testUnnestWithMultipleOrFiltersOnVariationsOfUnnestedColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where strlen(d3) < 2 or d3='d' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + or( + new ExpressionDimFilter("(strlen(\"j0.unnest\") < 2)", TestExprMacroTable.INSTANCE), + selector("j0.unnest", "d", null) + ) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + useDefault ? + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"c"}, + new Object[]{"d"}, + new Object[]{""}, + new Object[]{""}, + new Object[]{""} + ) : + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"c"}, + new Object[]{"d"}, + new Object[]{""} + ) + ); + } + + @Test + public void testUnnestWithMultipleOrFiltersOnSelectedNonUnnestedColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where m1 < 2 or m2 < 2 ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .filters( + or( + bound("m1", null, "2", false, true, null, StringComparators.NUMERIC), + bound("m2", null, "2", false, true, null, StringComparators.NUMERIC) + ) + ) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithMultipleOrFiltersOnSelectedVirtualColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d45 FROM druid.numfoo, UNNEST(ARRAY[dim4,dim5]) as unnested (d45) where d45 IN ('a','aa') or m1 < 2 ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .filters( + or( + bound("m1", null, "2", false, true, null, StringComparators.NUMERIC), + new InDimFilter("j0.unnest", ImmutableSet.of("a", "aa"), null) + ) + ) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"aa"}, + new Object[]{"a"}, + new Object[]{"a"}, + new Object[]{"aa"} + ) + ); + } + + @Test + public void testUnnestWithMultipleOrFiltersOnUnnestedColumnsAndOnOriginalColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' or dim3='d' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters( + or( + selector("j0.unnest", "b", null), + selector("dim3", "d", null) + ) + ) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"d"} + ) + ); + } Review Comment: This test has got me wondering what will happen if you run ``` "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' or dim3='a' " ``` Instead. I expect that it should match all of the `["a", "b"]` rows and thus be 4 results of a, b, a, b. But I'm curious. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
