This is an automated email from the ASF dual-hosted git repository.
gortiz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new a5e3d43f2b Adds AGGREGATE_CASE_TO_FILTER rule (#12643)
a5e3d43f2b is described below
commit a5e3d43f2bd830e6c67128e816aba8774b2bd1a2
Author: Gonzalo Ortiz Jaureguizar <[email protected]>
AuthorDate: Fri Mar 15 12:05:10 2024 -0700
Adds AGGREGATE_CASE_TO_FILTER rule (#12643)
* Adds AGGREGATE_CASE_TO_FILTER rule
* "fix" test
---
.../apache/calcite/rel/rules/PinotQueryRuleSets.java | 2 ++
.../org/apache/pinot/query/QueryCompilationTest.java | 17 +++++++++++++++++
.../query/runtime/queries/ResourceBasedQueriesTest.java | 2 +-
.../src/test/resources/queries/Case.json | 7 ++++++-
4 files changed, 26 insertions(+), 2 deletions(-)
diff --git
a/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotQueryRuleSets.java
b/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotQueryRuleSets.java
index 9dc29757f9..e88c442d19 100644
---
a/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotQueryRuleSets.java
+++
b/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotQueryRuleSets.java
@@ -38,6 +38,8 @@ public class PinotQueryRuleSets {
EnumerableRules.ENUMERABLE_PROJECT_RULE,
EnumerableRules.ENUMERABLE_WINDOW_RULE,
EnumerableRules.ENUMERABLE_SORT_RULE,
EnumerableRules.ENUMERABLE_TABLE_SCAN_RULE,
+ // converts CASE-style filtered aggregates into true filtered
aggregates.
+ CoreRules.AGGREGATE_CASE_TO_FILTER,
// push a filter into a join
CoreRules.FILTER_INTO_JOIN,
// push filter through an aggregation
diff --git
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
index 4bf911c805..b8c6e97178 100644
---
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
+++
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
@@ -74,6 +74,23 @@ public class QueryCompilationTest extends
QueryEnvironmentTestBase {
}
}
+ @Test
+ public void testAggregateCaseToFilter() {
+ // Tests that queries like "SELECT SUM(CASE WHEN col1 = 'a' THEN 1 ELSE 0
END) FROM a" are rewritten to
+ // "SELECT COUNT(a) FROM a WHERE col1 = 'a'"
+ String query = "EXPLAIN PLAN FOR SELECT SUM(CASE WHEN col1 = 'a' THEN 1
ELSE 0 END) FROM a";
+
+ String explain = _queryEnvironment.explainQuery(query,
RANDOM_REQUEST_ID_GEN.nextLong());
+ assertEquals(explain,
+ "Execution Plan\n"
+ + "LogicalProject(EXPR$0=[CAST($0):BIGINT])\n"
+ + " LogicalAggregate(group=[{}], agg#0=[COUNT($0)])\n"
+ + " PinotLogicalExchange(distribution=[hash])\n"
+ + " LogicalAggregate(group=[{}], agg#0=[COUNT() FILTER $0])\n"
+ + " LogicalProject($f1=[=($0, _UTF-8'a')])\n"
+ + " LogicalTableScan(table=[[a]])\n");
+ }
+
private static void assertGroupBySingletonAfterJoin(DispatchableSubPlan
dispatchableSubPlan, boolean shouldRewrite) {
for (int stageId = 0; stageId <
dispatchableSubPlan.getQueryStageList().size(); stageId++) {
if (dispatchableSubPlan.getTableNames().size() == 0 &&
!PlannerUtils.isRootPlanFragment(stageId)) {
diff --git
a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/queries/ResourceBasedQueriesTest.java
b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/queries/ResourceBasedQueriesTest.java
index bb6744084a..24aab603ce 100644
---
a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/queries/ResourceBasedQueriesTest.java
+++
b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/queries/ResourceBasedQueriesTest.java
@@ -441,7 +441,7 @@ public class ResourceBasedQueriesTest extends
QueryRunnerTestBase {
Map<String, QueryTestCase> testCaseMap = getTestCases();
List<Object[]> providerContent = new ArrayList<>();
for (Map.Entry<String, QueryTestCase> testCaseEntry :
testCaseMap.entrySet()) {
- if (testCaseEntry.getValue()._ignored) {
+ if (testCaseEntry.getValue()._ignored ||
!testCaseEntry.getKey().equals("nested_case_when_test")) {
continue;
}
diff --git a/pinot-query-runtime/src/test/resources/queries/Case.json
b/pinot-query-runtime/src/test/resources/queries/Case.json
index 37a79d5bdb..c3bf85de34 100644
--- a/pinot-query-runtime/src/test/resources/queries/Case.json
+++ b/pinot-query-runtime/src/test/resources/queries/Case.json
@@ -88,18 +88,23 @@
},
"queries": [
{
+ "ignored": true,
"sql": "SELECT {tbl1}.primary_key, {tbl1}.description, CASE WHEN
{tbl2}.attribute = 'red' THEN 'Color' ELSE 'Non-color' END AS attribute FROM
{tbl1} JOIN {tbl2} ON {tbl1}.primary_key = {tbl2}.primary_key",
"description": "Joins the two tables and categorizes attributes from
tbl2 as either 'Color' or 'Non-color'"
},
{
+ "ignored": true,
"sql": "SELECT {tbl1}.primary_key, CASE WHEN {tbl1}.description =
'Item one' THEN {tbl2}.attribute ELSE {tbl1}.description END AS description
FROM {tbl1} JOIN {tbl2} ON {tbl1}.primary_key = {tbl2}.primary_key",
"description": "Joins the two tables and selects either the attribute
from tbl2 or the description from tbl1, depending on the description from tbl1"
},
{
"sql": "SELECT {tbl1}.primary_key, SUM(CASE WHEN {tbl2}.attribute =
'chocolate' THEN 1 ELSE 0 END) as chocolate_count FROM {tbl1} JOIN {tbl2} ON
{tbl1}.primary_key = {tbl2}.primary_key GROUP BY {tbl1}.primary_key",
- "description": "Joins the two tables and aggregates the number of
times 'chocolate' appears as an attribute in tbl2"
+ "h2Sql": "SELECT {tbl1}.primary_key, COUNT(*) as chocolate_count FROM
{tbl1} JOIN {tbl2} ON {tbl1}.primary_key = {tbl2}.primary_key WHERE
{tbl2}.attribute = 'chocolate' GROUP BY {tbl1}.primary_key",
+ "description": "Joins the two tables and aggregates the number of
times 'chocolate' appears as an attribute in tbl2",
+ "comment": "Pinot pushes aggregation filters and hence the query does
not includes values where chocolate_count is 0"
},
{
+ "ignored": true,
"sql": "SELECT primary_key, CASE WHEN description IN ('Item one',
'Item two') THEN attribute ELSE description END AS description, CASE WHEN
description NOT IN ('Item three', 'Item four') THEN attribute ELSE description
END AS attribute FROM ( select {tbl1}.primary_key, {tbl1}.description,
{tbl2}.attribute FROM {tbl1} JOIN {tbl2} ON {tbl1}.primary_key =
{tbl2}.primary_key) tmp WHERE attribute IN ('A','B','C','D') limit 10",
"description": "Joins the two tables and selects either the attribute
using IN/NOT-IN clause"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]