github-code-scanning[bot] commented on code in PR #13952: URL: https://github.com/apache/druid/pull/13952#discussion_r1163674379
########## sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java: ########## @@ -0,0 +1,896 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.UOE; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.granularity.PeriodGranularity; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.Druids; +import org.apache.druid.query.JoinDataSource; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.QueryDataSource; +import org.apache.druid.query.ResourceLimitExceededException; +import org.apache.druid.query.TableDataSource; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.FilteredAggregatorFactory; +import org.apache.druid.query.aggregation.LongMaxAggregatorFactory; +import org.apache.druid.query.aggregation.LongMinAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.query.aggregation.post.ArithmeticPostAggregator; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.dimension.ExtractionDimensionSpec; +import org.apache.druid.query.extraction.SubstringDimExtractionFn; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; +import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; +import org.apache.druid.query.ordering.StringComparators; +import org.apache.druid.query.scan.ScanQuery; +import org.apache.druid.query.topn.DimensionTopNMetricSpec; +import org.apache.druid.query.topn.TopNQueryBuilder; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.join.JoinType; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.filtration.Filtration; +import org.apache.druid.sql.calcite.util.CalciteTests; +import org.joda.time.DateTimeZone; +import org.joda.time.Period; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/** + * Calcite tests which involve subqueries and materializing the intermediate results on {@link org.apache.druid.server.ClientQuerySegmentWalker} + * The tests are run with two different + */ +@RunWith(Parameterized.class) +public class CalciteSubqueryTest extends BaseCalciteQueryTest +{ + + public String testName; + public Map<String, Object> queryContext; + + public CalciteSubqueryTest( + String testName, + Map<String, Object> queryContext + ) + { + this.testName = testName; + this.queryContext = queryContext; + } + + @Parameterized.Parameters(name = "{0}") + public static Iterable<Object[]> constructorFeeder() + { + final List<Object[]> constructors = new ArrayList<>(); + constructors.add( + new Object[]{"without memory limit", QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT} + ); + constructors.add( + new Object[]{"with memory limit", QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT} + ); + return constructors; + } + + @Test + public void testExactCountDistinctUsingSubqueryWithWhereToOuterFilter() + { + msqCompatible(); + // Cannot vectorize topN operator. + cannotVectorize(); + + testQuery( + "SELECT\n" + + " SUM(cnt),\n" + + " COUNT(*)\n" + + "FROM (SELECT dim2, SUM(cnt) AS cnt FROM druid.foo GROUP BY dim2 LIMIT 1)\n" + + "WHERE cnt > 0", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + new TopNQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .dimension(new DefaultDimensionSpec("dim2", "d0")) + .aggregators(new LongSumAggregatorFactory("a0", "cnt")) + .metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)) + .threshold(1) + .build() + ) + ) + .setDimFilter(bound("a0", "0", null, true, false, null, StringComparators.NUMERIC)) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(aggregators( + new LongSumAggregatorFactory("_a0", "a0"), + new CountAggregatorFactory("_a1") + )) + .setContext(queryContext) + .build() + ), + NullHandling.replaceWithDefault() ? + ImmutableList.of( + new Object[]{3L, 1L} + ) : + ImmutableList.of( + new Object[]{2L, 1L} + ) + ); + } + + @Test + public void testExactCountDistinctOfSemiJoinResult() + { + msqCompatible(); + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + + testQuery( + "SELECT COUNT(*)\n" + + "FROM (\n" + + " SELECT DISTINCT dim2\n" + + " FROM druid.foo\n" + + " WHERE SUBSTRING(dim2, 1, 1) IN (\n" + + " SELECT SUBSTRING(dim1, 1, 1) FROM druid.foo WHERE dim1 <> ''\n" + + " ) AND __time >= '2000-01-01' AND __time < '2002-01-01'\n" + + ")", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + GroupByQuery.builder() + .setDataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimFilter(not(selector("dim1", "", null))) + .setDimensions( + dimensions( + new ExtractionDimensionSpec( + "dim1", + "d0", + new SubstringDimExtractionFn(0, 1) + ) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + equalsCondition( + makeExpression("substring(\"dim2\", 0, 1)"), + DruidExpression.ofColumn(ColumnType.STRING, "j0.d0") + ), + JoinType.INNER + ) + ) + .setInterval(querySegmentSpec(Intervals.of("2000-01-01/2002-01-01"))) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{2L} + ) + ); + } + + @Test + public void testTwoExactCountDistincts() + { + requireMergeBuffers(6); + msqCompatible(); + testQuery( + PLANNER_CONFIG_NO_HLL, + queryContext, + "SELECT COUNT(distinct dim1), COUNT(distinct dim2) FROM druid.foo", + CalciteTests.REGULAR_USER_AUTH_RESULT, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs( + new FilteredAggregatorFactory( + new CountAggregatorFactory("a0"), + not(selector("d0", null, null)) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("dim2", "d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs( + new FilteredAggregatorFactory( + new CountAggregatorFactory("a0"), + not(selector("d0", null, null)) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + "1", + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("a0", "j0.a0") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.sqlCompatible() ? 6L : 5L, NullHandling.sqlCompatible() ? 3L : 2L} + ) + ); + } + + @Test + public void testViewAndJoin() + { + cannotVectorize(); + Map<String, Object> queryContextModified = withLeftDirectAccessEnabled(queryContext); + testQuery( + "SELECT COUNT(*) FROM view.cview as a INNER JOIN druid.foo d on d.dim2 = a.dim2 WHERE a.dim1_firstchar <> 'z' ", + queryContextModified, + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource( + join( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE3) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .context(queryContextModified) + .build() + ), + "j0.", + "(\"dim2\" == \"j0.dim2\")", + JoinType.INNER, + bound("dim2", "a", "a", false, false, null, null) + ), + new QueryDataSource( + newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .context(queryContextModified) + .build() + ), + "_j0.", + "('a' == \"_j0.dim2\")", + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1)))) + .granularity(Granularities.ALL) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .context(queryContextModified) + .build() + ), + ImmutableList.of( + new Object[]{8L} + ) + ); + } + + @Test + public void testGroupByWithPostAggregatorReferencingTimeFloorColumnOnTimeseries() + { + msqCompatible(); + cannotVectorize(); + + testQuery( + "SELECT TIME_FORMAT(\"date\", 'yyyy-MM'), SUM(x)\n" + + "FROM (\n" + + " SELECT\n" + + " FLOOR(__time to hour) as \"date\",\n" + + " COUNT(*) as x\n" + + " FROM foo\n" + + " GROUP BY 1\n" + + ")\n" + + "GROUP BY 1", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.HOUR) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .context(getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")) + .build() + ) + .setInterval(querySegmentSpec(Intervals.ETERNITY)) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "timestamp_format(\"d0\",'yyyy-MM','UTC')", + ColumnType.STRING + ) + ) + .setGranularity(Granularities.ALL) + .addDimension(new DefaultDimensionSpec("v0", "_d0")) + .addAggregator(new LongSumAggregatorFactory("_a0", "a0")) + .build() + ), + ImmutableList.of( + new Object[]{"2000-01", 3L}, + new Object[]{"2001-01", 3L} + ) + ); + } + + @Test + public void testUsingSubqueryAsFilterWithInnerSort() + { + // Regression test for https://github.com/apache/druid/issues/4208 + + testQuery( + "SELECT dim1, dim2 FROM druid.foo\n" + + " WHERE dim2 IN (\n" + + " SELECT dim2\n" + + " FROM druid.foo\n" + + " GROUP BY dim2\n" + + " ORDER BY dim2 DESC\n" + + " )", + queryContext, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + equalsCondition(makeColumnExpression("dim2"), makeColumnExpression("j0.d0")), + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim1", "dim2") + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + NullHandling.replaceWithDefault() ? + ImmutableList.of( + new Object[]{"", "a"}, + new Object[]{"1", "a"}, + new Object[]{"def", "abc"} + ) : + ImmutableList.of( + new Object[]{"", "a"}, + new Object[]{"2", ""}, + new Object[]{"1", "a"}, + new Object[]{"def", "abc"} + ) + ); + } + + @Test + public void testUsingSubqueryAsFilterOnTwoColumns() + { + msqCompatible(); + testQuery( + "SELECT __time, cnt, dim1, dim2 FROM druid.foo " + + " WHERE (dim1, dim2) IN (" + + " SELECT dim1, dim2 FROM (" + + " SELECT dim1, dim2, COUNT(*)" + + " FROM druid.foo" + + " WHERE dim2 = 'abc'" + + " GROUP BY dim1, dim2" + + " HAVING COUNT(*) = 1" + + " )" + + " )", + queryContext, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimFilter(selector("dim2", "abc", null)) + .setDimensions(dimensions( + new DefaultDimensionSpec("dim1", "d0"), + new DefaultDimensionSpec("dim2", "d1") + )) + .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) + .setPostAggregatorSpecs( + ImmutableList.of(expressionPostAgg("p0", "'abc'")) + ) + .setHavingSpec(having(selector("a0", "1", null))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + StringUtils.format( + "(%s && %s)", + equalsCondition(makeColumnExpression("dim1"), makeColumnExpression("j0.d0")), + equalsCondition(makeColumnExpression("dim2"), makeColumnExpression("j0.p0")) Review Comment: ## Deprecated method or constructor invocation Invoking [CalciteTestBase.makeColumnExpression](1) should be avoided because it has been deprecated. [Show more details](https://github.com/apache/druid/security/code-scanning/4786) ########## sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java: ########## @@ -0,0 +1,896 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.UOE; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.granularity.PeriodGranularity; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.Druids; +import org.apache.druid.query.JoinDataSource; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.QueryDataSource; +import org.apache.druid.query.ResourceLimitExceededException; +import org.apache.druid.query.TableDataSource; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.FilteredAggregatorFactory; +import org.apache.druid.query.aggregation.LongMaxAggregatorFactory; +import org.apache.druid.query.aggregation.LongMinAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.query.aggregation.post.ArithmeticPostAggregator; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.dimension.ExtractionDimensionSpec; +import org.apache.druid.query.extraction.SubstringDimExtractionFn; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; +import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; +import org.apache.druid.query.ordering.StringComparators; +import org.apache.druid.query.scan.ScanQuery; +import org.apache.druid.query.topn.DimensionTopNMetricSpec; +import org.apache.druid.query.topn.TopNQueryBuilder; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.join.JoinType; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.filtration.Filtration; +import org.apache.druid.sql.calcite.util.CalciteTests; +import org.joda.time.DateTimeZone; +import org.joda.time.Period; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/** + * Calcite tests which involve subqueries and materializing the intermediate results on {@link org.apache.druid.server.ClientQuerySegmentWalker} + * The tests are run with two different + */ +@RunWith(Parameterized.class) +public class CalciteSubqueryTest extends BaseCalciteQueryTest +{ + + public String testName; + public Map<String, Object> queryContext; + + public CalciteSubqueryTest( + String testName, + Map<String, Object> queryContext + ) + { + this.testName = testName; + this.queryContext = queryContext; + } + + @Parameterized.Parameters(name = "{0}") + public static Iterable<Object[]> constructorFeeder() + { + final List<Object[]> constructors = new ArrayList<>(); + constructors.add( + new Object[]{"without memory limit", QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT} + ); + constructors.add( + new Object[]{"with memory limit", QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT} + ); + return constructors; + } + + @Test + public void testExactCountDistinctUsingSubqueryWithWhereToOuterFilter() + { + msqCompatible(); + // Cannot vectorize topN operator. + cannotVectorize(); + + testQuery( + "SELECT\n" + + " SUM(cnt),\n" + + " COUNT(*)\n" + + "FROM (SELECT dim2, SUM(cnt) AS cnt FROM druid.foo GROUP BY dim2 LIMIT 1)\n" + + "WHERE cnt > 0", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + new TopNQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .dimension(new DefaultDimensionSpec("dim2", "d0")) + .aggregators(new LongSumAggregatorFactory("a0", "cnt")) + .metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)) + .threshold(1) + .build() + ) + ) + .setDimFilter(bound("a0", "0", null, true, false, null, StringComparators.NUMERIC)) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(aggregators( + new LongSumAggregatorFactory("_a0", "a0"), + new CountAggregatorFactory("_a1") + )) + .setContext(queryContext) + .build() + ), + NullHandling.replaceWithDefault() ? + ImmutableList.of( + new Object[]{3L, 1L} + ) : + ImmutableList.of( + new Object[]{2L, 1L} + ) + ); + } + + @Test + public void testExactCountDistinctOfSemiJoinResult() + { + msqCompatible(); + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + + testQuery( + "SELECT COUNT(*)\n" + + "FROM (\n" + + " SELECT DISTINCT dim2\n" + + " FROM druid.foo\n" + + " WHERE SUBSTRING(dim2, 1, 1) IN (\n" + + " SELECT SUBSTRING(dim1, 1, 1) FROM druid.foo WHERE dim1 <> ''\n" + + " ) AND __time >= '2000-01-01' AND __time < '2002-01-01'\n" + + ")", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + GroupByQuery.builder() + .setDataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimFilter(not(selector("dim1", "", null))) + .setDimensions( + dimensions( + new ExtractionDimensionSpec( + "dim1", + "d0", + new SubstringDimExtractionFn(0, 1) + ) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + equalsCondition( + makeExpression("substring(\"dim2\", 0, 1)"), Review Comment: ## Deprecated method or constructor invocation Invoking [CalciteTestBase.makeExpression](1) should be avoided because it has been deprecated. [Show more details](https://github.com/apache/druid/security/code-scanning/4781) ########## sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java: ########## @@ -0,0 +1,896 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.UOE; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.granularity.PeriodGranularity; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.Druids; +import org.apache.druid.query.JoinDataSource; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.QueryDataSource; +import org.apache.druid.query.ResourceLimitExceededException; +import org.apache.druid.query.TableDataSource; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.FilteredAggregatorFactory; +import org.apache.druid.query.aggregation.LongMaxAggregatorFactory; +import org.apache.druid.query.aggregation.LongMinAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.query.aggregation.post.ArithmeticPostAggregator; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.dimension.ExtractionDimensionSpec; +import org.apache.druid.query.extraction.SubstringDimExtractionFn; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; +import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; +import org.apache.druid.query.ordering.StringComparators; +import org.apache.druid.query.scan.ScanQuery; +import org.apache.druid.query.topn.DimensionTopNMetricSpec; +import org.apache.druid.query.topn.TopNQueryBuilder; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.join.JoinType; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.filtration.Filtration; +import org.apache.druid.sql.calcite.util.CalciteTests; +import org.joda.time.DateTimeZone; +import org.joda.time.Period; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/** + * Calcite tests which involve subqueries and materializing the intermediate results on {@link org.apache.druid.server.ClientQuerySegmentWalker} + * The tests are run with two different + */ +@RunWith(Parameterized.class) +public class CalciteSubqueryTest extends BaseCalciteQueryTest +{ + + public String testName; + public Map<String, Object> queryContext; + + public CalciteSubqueryTest( + String testName, + Map<String, Object> queryContext + ) + { + this.testName = testName; + this.queryContext = queryContext; + } + + @Parameterized.Parameters(name = "{0}") + public static Iterable<Object[]> constructorFeeder() + { + final List<Object[]> constructors = new ArrayList<>(); + constructors.add( + new Object[]{"without memory limit", QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT} + ); + constructors.add( + new Object[]{"with memory limit", QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT} + ); + return constructors; + } + + @Test + public void testExactCountDistinctUsingSubqueryWithWhereToOuterFilter() + { + msqCompatible(); + // Cannot vectorize topN operator. + cannotVectorize(); + + testQuery( + "SELECT\n" + + " SUM(cnt),\n" + + " COUNT(*)\n" + + "FROM (SELECT dim2, SUM(cnt) AS cnt FROM druid.foo GROUP BY dim2 LIMIT 1)\n" + + "WHERE cnt > 0", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + new TopNQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .dimension(new DefaultDimensionSpec("dim2", "d0")) + .aggregators(new LongSumAggregatorFactory("a0", "cnt")) + .metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)) + .threshold(1) + .build() + ) + ) + .setDimFilter(bound("a0", "0", null, true, false, null, StringComparators.NUMERIC)) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(aggregators( + new LongSumAggregatorFactory("_a0", "a0"), + new CountAggregatorFactory("_a1") + )) + .setContext(queryContext) + .build() + ), + NullHandling.replaceWithDefault() ? + ImmutableList.of( + new Object[]{3L, 1L} + ) : + ImmutableList.of( + new Object[]{2L, 1L} + ) + ); + } + + @Test + public void testExactCountDistinctOfSemiJoinResult() + { + msqCompatible(); + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + + testQuery( + "SELECT COUNT(*)\n" + + "FROM (\n" + + " SELECT DISTINCT dim2\n" + + " FROM druid.foo\n" + + " WHERE SUBSTRING(dim2, 1, 1) IN (\n" + + " SELECT SUBSTRING(dim1, 1, 1) FROM druid.foo WHERE dim1 <> ''\n" + + " ) AND __time >= '2000-01-01' AND __time < '2002-01-01'\n" + + ")", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + GroupByQuery.builder() + .setDataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimFilter(not(selector("dim1", "", null))) + .setDimensions( + dimensions( + new ExtractionDimensionSpec( + "dim1", + "d0", + new SubstringDimExtractionFn(0, 1) + ) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + equalsCondition( + makeExpression("substring(\"dim2\", 0, 1)"), + DruidExpression.ofColumn(ColumnType.STRING, "j0.d0") + ), + JoinType.INNER + ) + ) + .setInterval(querySegmentSpec(Intervals.of("2000-01-01/2002-01-01"))) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{2L} + ) + ); + } + + @Test + public void testTwoExactCountDistincts() + { + requireMergeBuffers(6); + msqCompatible(); + testQuery( + PLANNER_CONFIG_NO_HLL, + queryContext, + "SELECT COUNT(distinct dim1), COUNT(distinct dim2) FROM druid.foo", + CalciteTests.REGULAR_USER_AUTH_RESULT, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs( + new FilteredAggregatorFactory( + new CountAggregatorFactory("a0"), + not(selector("d0", null, null)) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("dim2", "d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs( + new FilteredAggregatorFactory( + new CountAggregatorFactory("a0"), + not(selector("d0", null, null)) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + "1", + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("a0", "j0.a0") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.sqlCompatible() ? 6L : 5L, NullHandling.sqlCompatible() ? 3L : 2L} + ) + ); + } + + @Test + public void testViewAndJoin() + { + cannotVectorize(); + Map<String, Object> queryContextModified = withLeftDirectAccessEnabled(queryContext); + testQuery( + "SELECT COUNT(*) FROM view.cview as a INNER JOIN druid.foo d on d.dim2 = a.dim2 WHERE a.dim1_firstchar <> 'z' ", + queryContextModified, + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource( + join( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE3) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .context(queryContextModified) + .build() + ), + "j0.", + "(\"dim2\" == \"j0.dim2\")", + JoinType.INNER, + bound("dim2", "a", "a", false, false, null, null) + ), + new QueryDataSource( + newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .context(queryContextModified) + .build() + ), + "_j0.", + "('a' == \"_j0.dim2\")", + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1)))) + .granularity(Granularities.ALL) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .context(queryContextModified) + .build() + ), + ImmutableList.of( + new Object[]{8L} + ) + ); + } + + @Test + public void testGroupByWithPostAggregatorReferencingTimeFloorColumnOnTimeseries() + { + msqCompatible(); + cannotVectorize(); + + testQuery( + "SELECT TIME_FORMAT(\"date\", 'yyyy-MM'), SUM(x)\n" + + "FROM (\n" + + " SELECT\n" + + " FLOOR(__time to hour) as \"date\",\n" + + " COUNT(*) as x\n" + + " FROM foo\n" + + " GROUP BY 1\n" + + ")\n" + + "GROUP BY 1", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.HOUR) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .context(getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")) + .build() + ) + .setInterval(querySegmentSpec(Intervals.ETERNITY)) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "timestamp_format(\"d0\",'yyyy-MM','UTC')", + ColumnType.STRING + ) + ) + .setGranularity(Granularities.ALL) + .addDimension(new DefaultDimensionSpec("v0", "_d0")) + .addAggregator(new LongSumAggregatorFactory("_a0", "a0")) + .build() + ), + ImmutableList.of( + new Object[]{"2000-01", 3L}, + new Object[]{"2001-01", 3L} + ) + ); + } + + @Test + public void testUsingSubqueryAsFilterWithInnerSort() + { + // Regression test for https://github.com/apache/druid/issues/4208 + + testQuery( + "SELECT dim1, dim2 FROM druid.foo\n" + + " WHERE dim2 IN (\n" + + " SELECT dim2\n" + + " FROM druid.foo\n" + + " GROUP BY dim2\n" + + " ORDER BY dim2 DESC\n" + + " )", + queryContext, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + equalsCondition(makeColumnExpression("dim2"), makeColumnExpression("j0.d0")), + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim1", "dim2") + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + NullHandling.replaceWithDefault() ? + ImmutableList.of( + new Object[]{"", "a"}, + new Object[]{"1", "a"}, + new Object[]{"def", "abc"} + ) : + ImmutableList.of( + new Object[]{"", "a"}, + new Object[]{"2", ""}, + new Object[]{"1", "a"}, + new Object[]{"def", "abc"} + ) + ); + } + + @Test + public void testUsingSubqueryAsFilterOnTwoColumns() + { + msqCompatible(); + testQuery( + "SELECT __time, cnt, dim1, dim2 FROM druid.foo " + + " WHERE (dim1, dim2) IN (" + + " SELECT dim1, dim2 FROM (" + + " SELECT dim1, dim2, COUNT(*)" + + " FROM druid.foo" + + " WHERE dim2 = 'abc'" + + " GROUP BY dim1, dim2" + + " HAVING COUNT(*) = 1" + + " )" + + " )", + queryContext, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimFilter(selector("dim2", "abc", null)) + .setDimensions(dimensions( + new DefaultDimensionSpec("dim1", "d0"), + new DefaultDimensionSpec("dim2", "d1") + )) + .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) + .setPostAggregatorSpecs( + ImmutableList.of(expressionPostAgg("p0", "'abc'")) + ) + .setHavingSpec(having(selector("a0", "1", null))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + StringUtils.format( + "(%s && %s)", + equalsCondition(makeColumnExpression("dim1"), makeColumnExpression("j0.d0")), + equalsCondition(makeColumnExpression("dim2"), makeColumnExpression("j0.p0")) Review Comment: ## Deprecated method or constructor invocation Invoking [CalciteTestBase.makeColumnExpression](1) should be avoided because it has been deprecated. [Show more details](https://github.com/apache/druid/security/code-scanning/4787) ########## sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java: ########## @@ -0,0 +1,896 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.UOE; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.granularity.PeriodGranularity; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.Druids; +import org.apache.druid.query.JoinDataSource; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.QueryDataSource; +import org.apache.druid.query.ResourceLimitExceededException; +import org.apache.druid.query.TableDataSource; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.FilteredAggregatorFactory; +import org.apache.druid.query.aggregation.LongMaxAggregatorFactory; +import org.apache.druid.query.aggregation.LongMinAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.query.aggregation.post.ArithmeticPostAggregator; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.dimension.ExtractionDimensionSpec; +import org.apache.druid.query.extraction.SubstringDimExtractionFn; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; +import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; +import org.apache.druid.query.ordering.StringComparators; +import org.apache.druid.query.scan.ScanQuery; +import org.apache.druid.query.topn.DimensionTopNMetricSpec; +import org.apache.druid.query.topn.TopNQueryBuilder; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.join.JoinType; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.filtration.Filtration; +import org.apache.druid.sql.calcite.util.CalciteTests; +import org.joda.time.DateTimeZone; +import org.joda.time.Period; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/** + * Calcite tests which involve subqueries and materializing the intermediate results on {@link org.apache.druid.server.ClientQuerySegmentWalker} + * The tests are run with two different + */ +@RunWith(Parameterized.class) +public class CalciteSubqueryTest extends BaseCalciteQueryTest +{ + + public String testName; + public Map<String, Object> queryContext; + + public CalciteSubqueryTest( + String testName, + Map<String, Object> queryContext + ) + { + this.testName = testName; + this.queryContext = queryContext; + } + + @Parameterized.Parameters(name = "{0}") + public static Iterable<Object[]> constructorFeeder() + { + final List<Object[]> constructors = new ArrayList<>(); + constructors.add( + new Object[]{"without memory limit", QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT} + ); + constructors.add( + new Object[]{"with memory limit", QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT} + ); + return constructors; + } + + @Test + public void testExactCountDistinctUsingSubqueryWithWhereToOuterFilter() + { + msqCompatible(); + // Cannot vectorize topN operator. + cannotVectorize(); + + testQuery( + "SELECT\n" + + " SUM(cnt),\n" + + " COUNT(*)\n" + + "FROM (SELECT dim2, SUM(cnt) AS cnt FROM druid.foo GROUP BY dim2 LIMIT 1)\n" + + "WHERE cnt > 0", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + new TopNQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .dimension(new DefaultDimensionSpec("dim2", "d0")) + .aggregators(new LongSumAggregatorFactory("a0", "cnt")) + .metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)) + .threshold(1) + .build() + ) + ) + .setDimFilter(bound("a0", "0", null, true, false, null, StringComparators.NUMERIC)) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(aggregators( + new LongSumAggregatorFactory("_a0", "a0"), + new CountAggregatorFactory("_a1") + )) + .setContext(queryContext) + .build() + ), + NullHandling.replaceWithDefault() ? + ImmutableList.of( + new Object[]{3L, 1L} + ) : + ImmutableList.of( + new Object[]{2L, 1L} + ) + ); + } + + @Test + public void testExactCountDistinctOfSemiJoinResult() + { + msqCompatible(); + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + + testQuery( + "SELECT COUNT(*)\n" + + "FROM (\n" + + " SELECT DISTINCT dim2\n" + + " FROM druid.foo\n" + + " WHERE SUBSTRING(dim2, 1, 1) IN (\n" + + " SELECT SUBSTRING(dim1, 1, 1) FROM druid.foo WHERE dim1 <> ''\n" + + " ) AND __time >= '2000-01-01' AND __time < '2002-01-01'\n" + + ")", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + GroupByQuery.builder() + .setDataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimFilter(not(selector("dim1", "", null))) + .setDimensions( + dimensions( + new ExtractionDimensionSpec( + "dim1", + "d0", + new SubstringDimExtractionFn(0, 1) + ) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + equalsCondition( + makeExpression("substring(\"dim2\", 0, 1)"), + DruidExpression.ofColumn(ColumnType.STRING, "j0.d0") + ), + JoinType.INNER + ) + ) + .setInterval(querySegmentSpec(Intervals.of("2000-01-01/2002-01-01"))) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{2L} + ) + ); + } + + @Test + public void testTwoExactCountDistincts() + { + requireMergeBuffers(6); + msqCompatible(); + testQuery( + PLANNER_CONFIG_NO_HLL, + queryContext, + "SELECT COUNT(distinct dim1), COUNT(distinct dim2) FROM druid.foo", + CalciteTests.REGULAR_USER_AUTH_RESULT, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs( + new FilteredAggregatorFactory( + new CountAggregatorFactory("a0"), + not(selector("d0", null, null)) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("dim2", "d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs( + new FilteredAggregatorFactory( + new CountAggregatorFactory("a0"), + not(selector("d0", null, null)) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + "1", + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("a0", "j0.a0") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.sqlCompatible() ? 6L : 5L, NullHandling.sqlCompatible() ? 3L : 2L} + ) + ); + } + + @Test + public void testViewAndJoin() + { + cannotVectorize(); + Map<String, Object> queryContextModified = withLeftDirectAccessEnabled(queryContext); + testQuery( + "SELECT COUNT(*) FROM view.cview as a INNER JOIN druid.foo d on d.dim2 = a.dim2 WHERE a.dim1_firstchar <> 'z' ", + queryContextModified, + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource( + join( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE3) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .context(queryContextModified) + .build() + ), + "j0.", + "(\"dim2\" == \"j0.dim2\")", + JoinType.INNER, + bound("dim2", "a", "a", false, false, null, null) + ), + new QueryDataSource( + newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .context(queryContextModified) + .build() + ), + "_j0.", + "('a' == \"_j0.dim2\")", + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1)))) + .granularity(Granularities.ALL) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .context(queryContextModified) + .build() + ), + ImmutableList.of( + new Object[]{8L} + ) + ); + } + + @Test + public void testGroupByWithPostAggregatorReferencingTimeFloorColumnOnTimeseries() + { + msqCompatible(); + cannotVectorize(); + + testQuery( + "SELECT TIME_FORMAT(\"date\", 'yyyy-MM'), SUM(x)\n" + + "FROM (\n" + + " SELECT\n" + + " FLOOR(__time to hour) as \"date\",\n" + + " COUNT(*) as x\n" + + " FROM foo\n" + + " GROUP BY 1\n" + + ")\n" + + "GROUP BY 1", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.HOUR) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .context(getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")) + .build() + ) + .setInterval(querySegmentSpec(Intervals.ETERNITY)) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "timestamp_format(\"d0\",'yyyy-MM','UTC')", + ColumnType.STRING + ) + ) + .setGranularity(Granularities.ALL) + .addDimension(new DefaultDimensionSpec("v0", "_d0")) + .addAggregator(new LongSumAggregatorFactory("_a0", "a0")) + .build() + ), + ImmutableList.of( + new Object[]{"2000-01", 3L}, + new Object[]{"2001-01", 3L} + ) + ); + } + + @Test + public void testUsingSubqueryAsFilterWithInnerSort() + { + // Regression test for https://github.com/apache/druid/issues/4208 + + testQuery( + "SELECT dim1, dim2 FROM druid.foo\n" + + " WHERE dim2 IN (\n" + + " SELECT dim2\n" + + " FROM druid.foo\n" + + " GROUP BY dim2\n" + + " ORDER BY dim2 DESC\n" + + " )", + queryContext, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + equalsCondition(makeColumnExpression("dim2"), makeColumnExpression("j0.d0")), + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim1", "dim2") + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + NullHandling.replaceWithDefault() ? + ImmutableList.of( + new Object[]{"", "a"}, + new Object[]{"1", "a"}, + new Object[]{"def", "abc"} + ) : + ImmutableList.of( + new Object[]{"", "a"}, + new Object[]{"2", ""}, + new Object[]{"1", "a"}, + new Object[]{"def", "abc"} + ) + ); + } + + @Test + public void testUsingSubqueryAsFilterOnTwoColumns() + { + msqCompatible(); + testQuery( + "SELECT __time, cnt, dim1, dim2 FROM druid.foo " + + " WHERE (dim1, dim2) IN (" + + " SELECT dim1, dim2 FROM (" + + " SELECT dim1, dim2, COUNT(*)" + + " FROM druid.foo" + + " WHERE dim2 = 'abc'" + + " GROUP BY dim1, dim2" + + " HAVING COUNT(*) = 1" + + " )" + + " )", + queryContext, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimFilter(selector("dim2", "abc", null)) + .setDimensions(dimensions( + new DefaultDimensionSpec("dim1", "d0"), + new DefaultDimensionSpec("dim2", "d1") + )) + .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) + .setPostAggregatorSpecs( + ImmutableList.of(expressionPostAgg("p0", "'abc'")) + ) + .setHavingSpec(having(selector("a0", "1", null))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + StringUtils.format( + "(%s && %s)", + equalsCondition(makeColumnExpression("dim1"), makeColumnExpression("j0.d0")), Review Comment: ## Deprecated method or constructor invocation Invoking [CalciteTestBase.makeColumnExpression](1) should be avoided because it has been deprecated. [Show more details](https://github.com/apache/druid/security/code-scanning/4784) ########## sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java: ########## @@ -0,0 +1,896 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.UOE; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.granularity.PeriodGranularity; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.Druids; +import org.apache.druid.query.JoinDataSource; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.QueryDataSource; +import org.apache.druid.query.ResourceLimitExceededException; +import org.apache.druid.query.TableDataSource; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.FilteredAggregatorFactory; +import org.apache.druid.query.aggregation.LongMaxAggregatorFactory; +import org.apache.druid.query.aggregation.LongMinAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.query.aggregation.post.ArithmeticPostAggregator; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.dimension.ExtractionDimensionSpec; +import org.apache.druid.query.extraction.SubstringDimExtractionFn; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; +import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; +import org.apache.druid.query.ordering.StringComparators; +import org.apache.druid.query.scan.ScanQuery; +import org.apache.druid.query.topn.DimensionTopNMetricSpec; +import org.apache.druid.query.topn.TopNQueryBuilder; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.join.JoinType; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.filtration.Filtration; +import org.apache.druid.sql.calcite.util.CalciteTests; +import org.joda.time.DateTimeZone; +import org.joda.time.Period; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/** + * Calcite tests which involve subqueries and materializing the intermediate results on {@link org.apache.druid.server.ClientQuerySegmentWalker} + * The tests are run with two different + */ +@RunWith(Parameterized.class) +public class CalciteSubqueryTest extends BaseCalciteQueryTest +{ + + public String testName; + public Map<String, Object> queryContext; + + public CalciteSubqueryTest( + String testName, + Map<String, Object> queryContext + ) + { + this.testName = testName; + this.queryContext = queryContext; + } + + @Parameterized.Parameters(name = "{0}") + public static Iterable<Object[]> constructorFeeder() + { + final List<Object[]> constructors = new ArrayList<>(); + constructors.add( + new Object[]{"without memory limit", QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT} + ); + constructors.add( + new Object[]{"with memory limit", QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT} + ); + return constructors; + } + + @Test + public void testExactCountDistinctUsingSubqueryWithWhereToOuterFilter() + { + msqCompatible(); + // Cannot vectorize topN operator. + cannotVectorize(); + + testQuery( + "SELECT\n" + + " SUM(cnt),\n" + + " COUNT(*)\n" + + "FROM (SELECT dim2, SUM(cnt) AS cnt FROM druid.foo GROUP BY dim2 LIMIT 1)\n" + + "WHERE cnt > 0", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + new TopNQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .dimension(new DefaultDimensionSpec("dim2", "d0")) + .aggregators(new LongSumAggregatorFactory("a0", "cnt")) + .metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)) + .threshold(1) + .build() + ) + ) + .setDimFilter(bound("a0", "0", null, true, false, null, StringComparators.NUMERIC)) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(aggregators( + new LongSumAggregatorFactory("_a0", "a0"), + new CountAggregatorFactory("_a1") + )) + .setContext(queryContext) + .build() + ), + NullHandling.replaceWithDefault() ? + ImmutableList.of( + new Object[]{3L, 1L} + ) : + ImmutableList.of( + new Object[]{2L, 1L} + ) + ); + } + + @Test + public void testExactCountDistinctOfSemiJoinResult() + { + msqCompatible(); + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + + testQuery( + "SELECT COUNT(*)\n" + + "FROM (\n" + + " SELECT DISTINCT dim2\n" + + " FROM druid.foo\n" + + " WHERE SUBSTRING(dim2, 1, 1) IN (\n" + + " SELECT SUBSTRING(dim1, 1, 1) FROM druid.foo WHERE dim1 <> ''\n" + + " ) AND __time >= '2000-01-01' AND __time < '2002-01-01'\n" + + ")", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + GroupByQuery.builder() + .setDataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimFilter(not(selector("dim1", "", null))) + .setDimensions( + dimensions( + new ExtractionDimensionSpec( + "dim1", + "d0", + new SubstringDimExtractionFn(0, 1) + ) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + equalsCondition( + makeExpression("substring(\"dim2\", 0, 1)"), + DruidExpression.ofColumn(ColumnType.STRING, "j0.d0") + ), + JoinType.INNER + ) + ) + .setInterval(querySegmentSpec(Intervals.of("2000-01-01/2002-01-01"))) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{2L} + ) + ); + } + + @Test + public void testTwoExactCountDistincts() + { + requireMergeBuffers(6); + msqCompatible(); + testQuery( + PLANNER_CONFIG_NO_HLL, + queryContext, + "SELECT COUNT(distinct dim1), COUNT(distinct dim2) FROM druid.foo", + CalciteTests.REGULAR_USER_AUTH_RESULT, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs( + new FilteredAggregatorFactory( + new CountAggregatorFactory("a0"), + not(selector("d0", null, null)) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("dim2", "d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs( + new FilteredAggregatorFactory( + new CountAggregatorFactory("a0"), + not(selector("d0", null, null)) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + "1", + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("a0", "j0.a0") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.sqlCompatible() ? 6L : 5L, NullHandling.sqlCompatible() ? 3L : 2L} + ) + ); + } + + @Test + public void testViewAndJoin() + { + cannotVectorize(); + Map<String, Object> queryContextModified = withLeftDirectAccessEnabled(queryContext); + testQuery( + "SELECT COUNT(*) FROM view.cview as a INNER JOIN druid.foo d on d.dim2 = a.dim2 WHERE a.dim1_firstchar <> 'z' ", + queryContextModified, + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource( + join( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE3) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .context(queryContextModified) + .build() + ), + "j0.", + "(\"dim2\" == \"j0.dim2\")", + JoinType.INNER, + bound("dim2", "a", "a", false, false, null, null) + ), + new QueryDataSource( + newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .context(queryContextModified) + .build() + ), + "_j0.", + "('a' == \"_j0.dim2\")", + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1)))) + .granularity(Granularities.ALL) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .context(queryContextModified) + .build() + ), + ImmutableList.of( + new Object[]{8L} + ) + ); + } + + @Test + public void testGroupByWithPostAggregatorReferencingTimeFloorColumnOnTimeseries() + { + msqCompatible(); + cannotVectorize(); + + testQuery( + "SELECT TIME_FORMAT(\"date\", 'yyyy-MM'), SUM(x)\n" + + "FROM (\n" + + " SELECT\n" + + " FLOOR(__time to hour) as \"date\",\n" + + " COUNT(*) as x\n" + + " FROM foo\n" + + " GROUP BY 1\n" + + ")\n" + + "GROUP BY 1", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.HOUR) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .context(getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")) + .build() + ) + .setInterval(querySegmentSpec(Intervals.ETERNITY)) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "timestamp_format(\"d0\",'yyyy-MM','UTC')", + ColumnType.STRING + ) + ) + .setGranularity(Granularities.ALL) + .addDimension(new DefaultDimensionSpec("v0", "_d0")) + .addAggregator(new LongSumAggregatorFactory("_a0", "a0")) + .build() + ), + ImmutableList.of( + new Object[]{"2000-01", 3L}, + new Object[]{"2001-01", 3L} + ) + ); + } + + @Test + public void testUsingSubqueryAsFilterWithInnerSort() + { + // Regression test for https://github.com/apache/druid/issues/4208 + + testQuery( + "SELECT dim1, dim2 FROM druid.foo\n" + + " WHERE dim2 IN (\n" + + " SELECT dim2\n" + + " FROM druid.foo\n" + + " GROUP BY dim2\n" + + " ORDER BY dim2 DESC\n" + + " )", + queryContext, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + equalsCondition(makeColumnExpression("dim2"), makeColumnExpression("j0.d0")), Review Comment: ## Deprecated method or constructor invocation Invoking [CalciteTestBase.makeColumnExpression](1) should be avoided because it has been deprecated. [Show more details](https://github.com/apache/druid/security/code-scanning/4783) ########## sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java: ########## @@ -0,0 +1,896 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.UOE; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.granularity.PeriodGranularity; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.Druids; +import org.apache.druid.query.JoinDataSource; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.QueryDataSource; +import org.apache.druid.query.ResourceLimitExceededException; +import org.apache.druid.query.TableDataSource; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.FilteredAggregatorFactory; +import org.apache.druid.query.aggregation.LongMaxAggregatorFactory; +import org.apache.druid.query.aggregation.LongMinAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.query.aggregation.post.ArithmeticPostAggregator; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.dimension.ExtractionDimensionSpec; +import org.apache.druid.query.extraction.SubstringDimExtractionFn; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; +import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; +import org.apache.druid.query.ordering.StringComparators; +import org.apache.druid.query.scan.ScanQuery; +import org.apache.druid.query.topn.DimensionTopNMetricSpec; +import org.apache.druid.query.topn.TopNQueryBuilder; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.join.JoinType; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.filtration.Filtration; +import org.apache.druid.sql.calcite.util.CalciteTests; +import org.joda.time.DateTimeZone; +import org.joda.time.Period; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/** + * Calcite tests which involve subqueries and materializing the intermediate results on {@link org.apache.druid.server.ClientQuerySegmentWalker} + * The tests are run with two different + */ +@RunWith(Parameterized.class) +public class CalciteSubqueryTest extends BaseCalciteQueryTest +{ + + public String testName; + public Map<String, Object> queryContext; + + public CalciteSubqueryTest( + String testName, + Map<String, Object> queryContext + ) + { + this.testName = testName; + this.queryContext = queryContext; + } + + @Parameterized.Parameters(name = "{0}") + public static Iterable<Object[]> constructorFeeder() + { + final List<Object[]> constructors = new ArrayList<>(); + constructors.add( + new Object[]{"without memory limit", QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT} + ); + constructors.add( + new Object[]{"with memory limit", QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT} + ); + return constructors; + } + + @Test + public void testExactCountDistinctUsingSubqueryWithWhereToOuterFilter() + { + msqCompatible(); + // Cannot vectorize topN operator. + cannotVectorize(); + + testQuery( + "SELECT\n" + + " SUM(cnt),\n" + + " COUNT(*)\n" + + "FROM (SELECT dim2, SUM(cnt) AS cnt FROM druid.foo GROUP BY dim2 LIMIT 1)\n" + + "WHERE cnt > 0", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + new TopNQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .dimension(new DefaultDimensionSpec("dim2", "d0")) + .aggregators(new LongSumAggregatorFactory("a0", "cnt")) + .metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)) + .threshold(1) + .build() + ) + ) + .setDimFilter(bound("a0", "0", null, true, false, null, StringComparators.NUMERIC)) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(aggregators( + new LongSumAggregatorFactory("_a0", "a0"), + new CountAggregatorFactory("_a1") + )) + .setContext(queryContext) + .build() + ), + NullHandling.replaceWithDefault() ? + ImmutableList.of( + new Object[]{3L, 1L} + ) : + ImmutableList.of( + new Object[]{2L, 1L} + ) + ); + } + + @Test + public void testExactCountDistinctOfSemiJoinResult() + { + msqCompatible(); + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + + testQuery( + "SELECT COUNT(*)\n" + + "FROM (\n" + + " SELECT DISTINCT dim2\n" + + " FROM druid.foo\n" + + " WHERE SUBSTRING(dim2, 1, 1) IN (\n" + + " SELECT SUBSTRING(dim1, 1, 1) FROM druid.foo WHERE dim1 <> ''\n" + + " ) AND __time >= '2000-01-01' AND __time < '2002-01-01'\n" + + ")", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + GroupByQuery.builder() + .setDataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimFilter(not(selector("dim1", "", null))) + .setDimensions( + dimensions( + new ExtractionDimensionSpec( + "dim1", + "d0", + new SubstringDimExtractionFn(0, 1) + ) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + equalsCondition( + makeExpression("substring(\"dim2\", 0, 1)"), + DruidExpression.ofColumn(ColumnType.STRING, "j0.d0") + ), + JoinType.INNER + ) + ) + .setInterval(querySegmentSpec(Intervals.of("2000-01-01/2002-01-01"))) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{2L} + ) + ); + } + + @Test + public void testTwoExactCountDistincts() + { + requireMergeBuffers(6); + msqCompatible(); + testQuery( + PLANNER_CONFIG_NO_HLL, + queryContext, + "SELECT COUNT(distinct dim1), COUNT(distinct dim2) FROM druid.foo", + CalciteTests.REGULAR_USER_AUTH_RESULT, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs( + new FilteredAggregatorFactory( + new CountAggregatorFactory("a0"), + not(selector("d0", null, null)) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("dim2", "d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs( + new FilteredAggregatorFactory( + new CountAggregatorFactory("a0"), + not(selector("d0", null, null)) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + "1", + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("a0", "j0.a0") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.sqlCompatible() ? 6L : 5L, NullHandling.sqlCompatible() ? 3L : 2L} + ) + ); + } + + @Test + public void testViewAndJoin() + { + cannotVectorize(); + Map<String, Object> queryContextModified = withLeftDirectAccessEnabled(queryContext); + testQuery( + "SELECT COUNT(*) FROM view.cview as a INNER JOIN druid.foo d on d.dim2 = a.dim2 WHERE a.dim1_firstchar <> 'z' ", + queryContextModified, + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource( + join( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE3) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .context(queryContextModified) + .build() + ), + "j0.", + "(\"dim2\" == \"j0.dim2\")", + JoinType.INNER, + bound("dim2", "a", "a", false, false, null, null) + ), + new QueryDataSource( + newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .context(queryContextModified) + .build() + ), + "_j0.", + "('a' == \"_j0.dim2\")", + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1)))) + .granularity(Granularities.ALL) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .context(queryContextModified) + .build() + ), + ImmutableList.of( + new Object[]{8L} + ) + ); + } + + @Test + public void testGroupByWithPostAggregatorReferencingTimeFloorColumnOnTimeseries() + { + msqCompatible(); + cannotVectorize(); + + testQuery( + "SELECT TIME_FORMAT(\"date\", 'yyyy-MM'), SUM(x)\n" + + "FROM (\n" + + " SELECT\n" + + " FLOOR(__time to hour) as \"date\",\n" + + " COUNT(*) as x\n" + + " FROM foo\n" + + " GROUP BY 1\n" + + ")\n" + + "GROUP BY 1", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.HOUR) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .context(getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")) + .build() + ) + .setInterval(querySegmentSpec(Intervals.ETERNITY)) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "timestamp_format(\"d0\",'yyyy-MM','UTC')", + ColumnType.STRING + ) + ) + .setGranularity(Granularities.ALL) + .addDimension(new DefaultDimensionSpec("v0", "_d0")) + .addAggregator(new LongSumAggregatorFactory("_a0", "a0")) + .build() + ), + ImmutableList.of( + new Object[]{"2000-01", 3L}, + new Object[]{"2001-01", 3L} + ) + ); + } + + @Test + public void testUsingSubqueryAsFilterWithInnerSort() + { + // Regression test for https://github.com/apache/druid/issues/4208 + + testQuery( + "SELECT dim1, dim2 FROM druid.foo\n" + + " WHERE dim2 IN (\n" + + " SELECT dim2\n" + + " FROM druid.foo\n" + + " GROUP BY dim2\n" + + " ORDER BY dim2 DESC\n" + + " )", + queryContext, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + equalsCondition(makeColumnExpression("dim2"), makeColumnExpression("j0.d0")), + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim1", "dim2") + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + NullHandling.replaceWithDefault() ? + ImmutableList.of( + new Object[]{"", "a"}, + new Object[]{"1", "a"}, + new Object[]{"def", "abc"} + ) : + ImmutableList.of( + new Object[]{"", "a"}, + new Object[]{"2", ""}, + new Object[]{"1", "a"}, + new Object[]{"def", "abc"} + ) + ); + } + + @Test + public void testUsingSubqueryAsFilterOnTwoColumns() + { + msqCompatible(); + testQuery( + "SELECT __time, cnt, dim1, dim2 FROM druid.foo " + + " WHERE (dim1, dim2) IN (" + + " SELECT dim1, dim2 FROM (" + + " SELECT dim1, dim2, COUNT(*)" + + " FROM druid.foo" + + " WHERE dim2 = 'abc'" + + " GROUP BY dim1, dim2" + + " HAVING COUNT(*) = 1" + + " )" + + " )", + queryContext, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimFilter(selector("dim2", "abc", null)) + .setDimensions(dimensions( + new DefaultDimensionSpec("dim1", "d0"), + new DefaultDimensionSpec("dim2", "d1") + )) + .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) + .setPostAggregatorSpecs( + ImmutableList.of(expressionPostAgg("p0", "'abc'")) + ) + .setHavingSpec(having(selector("a0", "1", null))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + StringUtils.format( + "(%s && %s)", + equalsCondition(makeColumnExpression("dim1"), makeColumnExpression("j0.d0")), Review Comment: ## Deprecated method or constructor invocation Invoking [CalciteTestBase.makeColumnExpression](1) should be avoided because it has been deprecated. [Show more details](https://github.com/apache/druid/security/code-scanning/4785) ########## sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java: ########## @@ -0,0 +1,896 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.UOE; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.granularity.PeriodGranularity; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.Druids; +import org.apache.druid.query.JoinDataSource; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.QueryDataSource; +import org.apache.druid.query.ResourceLimitExceededException; +import org.apache.druid.query.TableDataSource; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.FilteredAggregatorFactory; +import org.apache.druid.query.aggregation.LongMaxAggregatorFactory; +import org.apache.druid.query.aggregation.LongMinAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.query.aggregation.post.ArithmeticPostAggregator; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.dimension.ExtractionDimensionSpec; +import org.apache.druid.query.extraction.SubstringDimExtractionFn; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; +import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; +import org.apache.druid.query.ordering.StringComparators; +import org.apache.druid.query.scan.ScanQuery; +import org.apache.druid.query.topn.DimensionTopNMetricSpec; +import org.apache.druid.query.topn.TopNQueryBuilder; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.join.JoinType; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.filtration.Filtration; +import org.apache.druid.sql.calcite.util.CalciteTests; +import org.joda.time.DateTimeZone; +import org.joda.time.Period; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/** + * Calcite tests which involve subqueries and materializing the intermediate results on {@link org.apache.druid.server.ClientQuerySegmentWalker} + * The tests are run with two different + */ +@RunWith(Parameterized.class) +public class CalciteSubqueryTest extends BaseCalciteQueryTest +{ + + public String testName; + public Map<String, Object> queryContext; + + public CalciteSubqueryTest( + String testName, + Map<String, Object> queryContext + ) + { + this.testName = testName; + this.queryContext = queryContext; + } + + @Parameterized.Parameters(name = "{0}") + public static Iterable<Object[]> constructorFeeder() + { + final List<Object[]> constructors = new ArrayList<>(); + constructors.add( + new Object[]{"without memory limit", QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT} + ); + constructors.add( + new Object[]{"with memory limit", QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT} + ); + return constructors; + } + + @Test + public void testExactCountDistinctUsingSubqueryWithWhereToOuterFilter() + { + msqCompatible(); + // Cannot vectorize topN operator. + cannotVectorize(); + + testQuery( + "SELECT\n" + + " SUM(cnt),\n" + + " COUNT(*)\n" + + "FROM (SELECT dim2, SUM(cnt) AS cnt FROM druid.foo GROUP BY dim2 LIMIT 1)\n" + + "WHERE cnt > 0", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + new TopNQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .dimension(new DefaultDimensionSpec("dim2", "d0")) + .aggregators(new LongSumAggregatorFactory("a0", "cnt")) + .metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)) + .threshold(1) + .build() + ) + ) + .setDimFilter(bound("a0", "0", null, true, false, null, StringComparators.NUMERIC)) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(aggregators( + new LongSumAggregatorFactory("_a0", "a0"), + new CountAggregatorFactory("_a1") + )) + .setContext(queryContext) + .build() + ), + NullHandling.replaceWithDefault() ? + ImmutableList.of( + new Object[]{3L, 1L} + ) : + ImmutableList.of( + new Object[]{2L, 1L} + ) + ); + } + + @Test + public void testExactCountDistinctOfSemiJoinResult() + { + msqCompatible(); + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + + testQuery( + "SELECT COUNT(*)\n" + + "FROM (\n" + + " SELECT DISTINCT dim2\n" + + " FROM druid.foo\n" + + " WHERE SUBSTRING(dim2, 1, 1) IN (\n" + + " SELECT SUBSTRING(dim1, 1, 1) FROM druid.foo WHERE dim1 <> ''\n" + + " ) AND __time >= '2000-01-01' AND __time < '2002-01-01'\n" + + ")", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + GroupByQuery.builder() + .setDataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimFilter(not(selector("dim1", "", null))) + .setDimensions( + dimensions( + new ExtractionDimensionSpec( + "dim1", + "d0", + new SubstringDimExtractionFn(0, 1) + ) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + equalsCondition( + makeExpression("substring(\"dim2\", 0, 1)"), + DruidExpression.ofColumn(ColumnType.STRING, "j0.d0") + ), + JoinType.INNER + ) + ) + .setInterval(querySegmentSpec(Intervals.of("2000-01-01/2002-01-01"))) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{2L} + ) + ); + } + + @Test + public void testTwoExactCountDistincts() + { + requireMergeBuffers(6); + msqCompatible(); + testQuery( + PLANNER_CONFIG_NO_HLL, + queryContext, + "SELECT COUNT(distinct dim1), COUNT(distinct dim2) FROM druid.foo", + CalciteTests.REGULAR_USER_AUTH_RESULT, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs( + new FilteredAggregatorFactory( + new CountAggregatorFactory("a0"), + not(selector("d0", null, null)) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + new QueryDataSource( + GroupByQuery + .builder() + .setDataSource( + GroupByQuery + .builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("dim2", "d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs( + new FilteredAggregatorFactory( + new CountAggregatorFactory("a0"), + not(selector("d0", null, null)) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + "1", + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("a0", "j0.a0") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.sqlCompatible() ? 6L : 5L, NullHandling.sqlCompatible() ? 3L : 2L} + ) + ); + } + + @Test + public void testViewAndJoin() + { + cannotVectorize(); + Map<String, Object> queryContextModified = withLeftDirectAccessEnabled(queryContext); + testQuery( + "SELECT COUNT(*) FROM view.cview as a INNER JOIN druid.foo d on d.dim2 = a.dim2 WHERE a.dim1_firstchar <> 'z' ", + queryContextModified, + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource( + join( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE3) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .context(queryContextModified) + .build() + ), + "j0.", + "(\"dim2\" == \"j0.dim2\")", + JoinType.INNER, + bound("dim2", "a", "a", false, false, null, null) + ), + new QueryDataSource( + newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .context(queryContextModified) + .build() + ), + "_j0.", + "('a' == \"_j0.dim2\")", + JoinType.INNER + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1)))) + .granularity(Granularities.ALL) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .context(queryContextModified) + .build() + ), + ImmutableList.of( + new Object[]{8L} + ) + ); + } + + @Test + public void testGroupByWithPostAggregatorReferencingTimeFloorColumnOnTimeseries() + { + msqCompatible(); + cannotVectorize(); + + testQuery( + "SELECT TIME_FORMAT(\"date\", 'yyyy-MM'), SUM(x)\n" + + "FROM (\n" + + " SELECT\n" + + " FLOOR(__time to hour) as \"date\",\n" + + " COUNT(*) as x\n" + + " FROM foo\n" + + " GROUP BY 1\n" + + ")\n" + + "GROUP BY 1", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.HOUR) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .context(getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")) + .build() + ) + .setInterval(querySegmentSpec(Intervals.ETERNITY)) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "timestamp_format(\"d0\",'yyyy-MM','UTC')", + ColumnType.STRING + ) + ) + .setGranularity(Granularities.ALL) + .addDimension(new DefaultDimensionSpec("v0", "_d0")) + .addAggregator(new LongSumAggregatorFactory("_a0", "a0")) + .build() + ), + ImmutableList.of( + new Object[]{"2000-01", 3L}, + new Object[]{"2001-01", 3L} + ) + ); + } + + @Test + public void testUsingSubqueryAsFilterWithInnerSort() + { + // Regression test for https://github.com/apache/druid/issues/4208 + + testQuery( + "SELECT dim1, dim2 FROM druid.foo\n" + + " WHERE dim2 IN (\n" + + " SELECT dim2\n" + + " FROM druid.foo\n" + + " GROUP BY dim2\n" + + " ORDER BY dim2 DESC\n" + + " )", + queryContext, + ImmutableList.of( + newScanQueryBuilder() + .dataSource( + join( + new TableDataSource(CalciteTests.DATASOURCE1), + new QueryDataSource( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + "j0.", + equalsCondition(makeColumnExpression("dim2"), makeColumnExpression("j0.d0")), Review Comment: ## Deprecated method or constructor invocation Invoking [CalciteTestBase.makeColumnExpression](1) should be avoided because it has been deprecated. [Show more details](https://github.com/apache/druid/security/code-scanning/4782) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
