This is an automated email from the ASF dual-hosted git repository.
yashmayya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 2c7b0ddc72 Improve performance of multi-stage queries with large IN
clauses (#14615)
2c7b0ddc72 is described below
commit 2c7b0ddc72b69f8bb080158d1b20de27c1d69431
Author: Yash Mayya <[email protected]>
AuthorDate: Tue Mar 11 20:50:39 2025 +0530
Improve performance of multi-stage queries with large IN clauses (#14615)
---
.../tests/MultiStageEngineIntegrationTest.java | 30 ++++++++--
.../rel/rules/PinotFilterExpandSearchRule.java | 67 ----------------------
.../calcite/rel/rules/PinotQueryRuleSets.java | 6 --
.../query/planner/logical/RexExpressionUtils.java | 39 +++++++++++++
.../apache/pinot/query/QueryCompilationTest.java | 63 ++++++++++++++++++++
.../resources/queries/LiteralEvaluationPlans.json | 35 -----------
.../test/resources/queries/LiteralExpressions.json | 7 +++
7 files changed, 134 insertions(+), 113 deletions(-)
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiStageEngineIntegrationTest.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiStageEngineIntegrationTest.java
index f9e1d2fd3f..7600ef4103 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiStageEngineIntegrationTest.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiStageEngineIntegrationTest.java
@@ -1074,11 +1074,6 @@ public class MultiStageEngineIntegrationTest extends
BaseClusterIntegrationTestS
JsonNode jsonNode = postQuery(sqlQuery);
assertNoError(jsonNode);
assertEquals(jsonNode.get("resultTable").get("rows").size(),
getCountStarResult());
-
- String explainQuery = "EXPLAIN PLAN FOR " + sqlQuery;
- jsonNode = postQuery(explainQuery);
-
assertTrue(jsonNode.get("resultTable").get("rows").get(0).get(1).asText().contains("LogicalProject"));
-
assertFalse(jsonNode.get("resultTable").get("rows").get(0).get(1).asText().contains("LogicalFilter"));
}
@Test
@@ -1600,6 +1595,31 @@ public class MultiStageEngineIntegrationTest extends
BaseClusterIntegrationTestS
assertTrue(numServersQueried.asInt() > 0);
}
+ @Test
+ public void testSearchLiteralFilter() throws Exception {
+ String sqlQuery =
+ "WITH CTE_B AS (SELECT 1692057600000 AS __ts FROM mytable GROUP BY
__ts) SELECT 1692057600000 AS __ts FROM "
+ + "CTE_B WHERE __ts >= 1692057600000 AND __ts < 1693267200000
GROUP BY __ts";
+ JsonNode explainPlan = postQuery("EXPLAIN PLAN FOR " + sqlQuery);
+
assertTrue(explainPlan.get("resultTable").get("rows").get(0).get(1).asText().contains("SEARCH"));
+
+ JsonNode result = postQuery(sqlQuery);
+ assertNoError(result);
+ assertEquals(result.get("resultTable").get("rows").size(), 1);
+ assertEquals(result.get("resultTable").get("rows").get(0).get(0).asLong(),
1692057600000L);
+
+ sqlQuery =
+ "SELECT * FROM (SELECT CASE WHEN Carrier = 'garbage' THEN 'val1' ELSE
'val2' END as val FROM mytable) WHERE "
+ + "val in ('val1', 'val2') LIMIT 1";
+ explainPlan = postQuery("EXPLAIN PLAN FOR " + sqlQuery);
+
assertTrue(explainPlan.get("resultTable").get("rows").get(0).get(1).asText().contains("SEARCH"));
+
+ result = postQuery(sqlQuery);
+ assertNoError(result);
+ assertEquals(result.get("resultTable").get("rows").size(), 1);
+ assertEquals(result.get("resultTable").get("rows").get(0).get(0).asText(),
"val2");
+ }
+
@Test
public void testPolymorphicScalarArrayFunctions() throws Exception {
String query = "select ARRAY_LENGTH(ARRAY[1,2,3]);";
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotFilterExpandSearchRule.java
b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotFilterExpandSearchRule.java
deleted file mode 100644
index 1f34913c19..0000000000
---
a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotFilterExpandSearchRule.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.calcite.rel.rules;
-
-import org.apache.calcite.plan.RelOptRule;
-import org.apache.calcite.plan.RelOptRuleCall;
-import org.apache.calcite.rel.core.Filter;
-import org.apache.calcite.rex.RexCall;
-import org.apache.calcite.rex.RexNode;
-import org.apache.calcite.rex.RexUtil;
-import org.apache.calcite.tools.RelBuilderFactory;
-
-
-public class PinotFilterExpandSearchRule extends RelOptRule {
- public static final PinotFilterExpandSearchRule INSTANCE =
- new PinotFilterExpandSearchRule(PinotRuleUtils.PINOT_REL_FACTORY);
-
- public PinotFilterExpandSearchRule(RelBuilderFactory factory) {
- super(operand(Filter.class, any()), factory, null);
- }
-
- @Override
- public boolean matches(RelOptRuleCall call) {
- Filter filter = call.rel(0);
- return containsRangeSearch(filter.getCondition());
- }
-
- @Override
- public void onMatch(RelOptRuleCall call) {
- Filter filter = call.rel(0);
- RexNode newCondition =
RexUtil.expandSearch(filter.getCluster().getRexBuilder(), null,
filter.getCondition());
- call.transformTo(filter.copy(filter.getTraitSet(), filter.getInput(),
newCondition));
- }
-
- private boolean containsRangeSearch(RexNode condition) {
- switch (condition.getKind()) {
- case AND:
- case OR:
- for (RexNode operand : ((RexCall) condition).getOperands()) {
- if (containsRangeSearch(operand)) {
- return true;
- }
- }
- return false;
- case SEARCH:
- return true;
- default:
- return false;
- }
- }
-}
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotQueryRuleSets.java
b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotQueryRuleSets.java
index e6850f26f9..27ac0a1fd7 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotQueryRuleSets.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotQueryRuleSets.java
@@ -149,12 +149,6 @@ public class PinotQueryRuleSets {
// remove exchanges when there's duplicates
PinotExchangeEliminationRule.INSTANCE,
- // Expand all SEARCH nodes to simplified filter nodes. SEARCH nodes get
created for queries with range predicates,
- // in-clauses, etc.
- // NOTE: Keep this rule at the end because it can potentially create a
lot of predicates joined by OR/AND for IN/
- // NOT IN clause, which can be expensive to process in other rules.
- // TODO: Consider removing this rule and directly handle SEARCH in
RexExpressionUtils.
- PinotFilterExpandSearchRule.INSTANCE,
// Evaluate the Literal filter nodes
CoreRules.FILTER_REDUCE_EXPRESSIONS
);
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpressionUtils.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpressionUtils.java
index a1b7075850..2cc63c6a01 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpressionUtils.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpressionUtils.java
@@ -330,17 +330,56 @@ public class RexExpressionUtils {
Sarg sarg = searchArgument.getValueAs(Sarg.class);
assert sarg != null;
if (sarg.isPoints()) {
+ if (leftOperand instanceof RexLiteral) {
+ return evaluateLiteralIn((RexLiteral) leftOperand,
sarg.rangeSet.asRanges());
+ }
return new RexExpression.FunctionCall(ColumnDataType.BOOLEAN,
SqlKind.IN.name(),
toSearchFunctionOperands(leftOperand, sarg.rangeSet.asRanges(),
dataType));
} else if (sarg.isComplementedPoints()) {
+ if (leftOperand instanceof RexLiteral) {
+ return evaluateLiteralNotIn((RexLiteral) leftOperand,
sarg.rangeSet.complement().asRanges());
+ }
return new RexExpression.FunctionCall(ColumnDataType.BOOLEAN,
SqlKind.NOT_IN.name(),
toSearchFunctionOperands(leftOperand,
sarg.rangeSet.complement().asRanges(), dataType));
} else {
+ if (leftOperand instanceof RexLiteral) {
+ return evaluateLiteralOrRanges((RexLiteral) leftOperand,
sarg.rangeSet.asRanges());
+ }
Set<Range> ranges = sarg.rangeSet.asRanges();
return convertRangesToOr(dataType, leftOperand, ranges);
}
}
+ private static RexExpression evaluateLiteralIn(RexLiteral leftOperand,
Set<Range> ranges) {
+ Comparable leftVal = leftOperand.getValue();
+ for (Range range : ranges) {
+ if (range.lowerEndpoint().equals(leftVal)) {
+ return RexExpression.Literal.TRUE;
+ }
+ }
+ return RexExpression.Literal.FALSE;
+ }
+
+ private static RexExpression evaluateLiteralNotIn(RexLiteral leftOperand,
Set<Range> ranges) {
+ Comparable leftVal = leftOperand.getValue();
+ for (Range range : ranges) {
+ if (range.lowerEndpoint().equals(leftVal)) {
+ return RexExpression.Literal.FALSE;
+ }
+ }
+ return RexExpression.Literal.TRUE;
+ }
+
+ private static RexExpression evaluateLiteralOrRanges(RexLiteral leftOperand,
Set<Range> ranges) {
+ Comparable leftVal = leftOperand.getValue();
+ for (Range range : ranges) {
+ if (range.contains(leftVal)) {
+ return RexExpression.Literal.TRUE;
+ }
+ }
+ return RexExpression.Literal.FALSE;
+ }
+
private static RexExpression convertRangesToOr(ColumnDataType dataType,
RexNode leftOperand, Set<Range> ranges) {
int numRanges = ranges.size();
if (numRanges == 0) {
diff --git
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
index 0a9e02f40c..93ca0acbaf 100644
---
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
+++
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
@@ -486,6 +486,69 @@ public class QueryCompilationTest extends
QueryEnvironmentTestBase {
assertTrue(e.getCause().getMessage().contains("expecting 1 argument"));
}
+ @Test
+ public void testLargeIn() {
+ String query = "SELECT col1\n"
+ + "FROM (\n"
+ + " SELECT col1\n"
+ + " FROM a\n"
+ + " WHERE col1 IN (\n"
+ + " 'a0', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8',
'a9',\n"
+ + " 'a10', 'a11', 'a12', 'a13', 'a14', 'a15', 'a16',
'a17', 'a18', 'a19',\n"
+ + " 'a20', 'a21', 'a22', 'a23', 'a24', 'a25', 'a26',
'a27', 'a28', 'a29',\n"
+ + " 'a30', 'a31', 'a32', 'a33', 'a34', 'a35', 'a36',
'a37', 'a38', 'a39',\n"
+ + " 'a40', 'a41', 'a42', 'a43', 'a44', 'a45', 'a46',
'a47', 'a48', 'a49',\n"
+ + " 'a50', 'a51', 'a52', 'a53', 'a54', 'a55', 'a56',
'a57', 'a58', 'a59',\n"
+ + " 'a60', 'a61', 'a62', 'a63', 'a64', 'a65', 'a66',
'a67', 'a68', 'a69',\n"
+ + " 'a70', 'a71', 'a72', 'a73', 'a74', 'a75', 'a76',
'a77', 'a78', 'a79',\n"
+ + " 'a80', 'a81', 'a82', 'a83', 'a84', 'a85', 'a86',
'a87', 'a88', 'a89',\n"
+ + " 'a90', 'a91', 'a92', 'a93', 'a94', 'a95', 'a96',
'a97', 'a98', 'a99',\n"
+ + " 'a100', 'a101', 'a102', 'a103', 'a104', 'a105',
'a106', 'a107', 'a108', 'a109',\n"
+ + " 'a110', 'a111', 'a112', 'a113', 'a114', 'a115',
'a116', 'a117', 'a118', 'a119',\n"
+ + " 'a120', 'a121', 'a122', 'a123', 'a124', 'a125',
'a126', 'a127', 'a128', 'a129',\n"
+ + " 'a130', 'a131', 'a132', 'a133', 'a134', 'a135',
'a136', 'a137', 'a138', 'a139',\n"
+ + " 'a140', 'a141', 'a142', 'a143', 'a144', 'a145',
'a146', 'a147', 'a148', 'a149',\n"
+ + " 'a150', 'a151', 'a152', 'a153', 'a154', 'a155',
'a156', 'a157', 'a158', 'a159',\n"
+ + " 'a160', 'a161', 'a162', 'a163', 'a164', 'a165',
'a166', 'a167', 'a168', 'a169',\n"
+ + " 'a170', 'a171', 'a172', 'a173', 'a174', 'a175',
'a176', 'a177', 'a178', 'a179',\n"
+ + " 'a180', 'a181', 'a182', 'a183', 'a184', 'a185',
'a186', 'a187', 'a188', 'a189',\n"
+ + " 'a190', 'a191', 'a192', 'a193', 'a194', 'a195',
'a196', 'a197', 'a198', 'a199',\n"
+ + " 'a200', 'a201', 'a202', 'a203', 'a204', 'a205',
'a206', 'a207', 'a208', 'a209',\n"
+ + " 'a210', 'a211', 'a212', 'a213', 'a214', 'a215',
'a216', 'a217', 'a218', 'a219',\n"
+ + " 'a220', 'a221', 'a222', 'a223', 'a224', 'a225',
'a226', 'a227', 'a228', 'a229',\n"
+ + " 'a230', 'a231', 'a232', 'a233', 'a234', 'a235',
'a236', 'a237', 'a238', 'a239',\n"
+ + " 'a240', 'a241', 'a242', 'a243', 'a244', 'a245',
'a246', 'a247', 'a248', 'a249',\n"
+ + " 'a250', 'a251', 'a252', 'a253', 'a254', 'a255',
'a256', 'a257', 'a258', 'a259',\n"
+ + " 'a260', 'a261', 'a262', 'a263', 'a264', 'a265',
'a266', 'a267', 'a268', 'a269',\n"
+ + " 'a270', 'a271', 'a272', 'a273', 'a274', 'a275',
'a276', 'a277', 'a278', 'a279',\n"
+ + " 'a280', 'a281', 'a282', 'a283', 'a284', 'a285',
'a286', 'a287', 'a288', 'a289',\n"
+ + " 'a290', 'a291', 'a292', 'a293', 'a294', 'a295',
'a296', 'a297', 'a298', 'a299',\n"
+ + " 'a300', 'a301', 'a302', 'a303', 'a304', 'a305',
'a306', 'a307', 'a308', 'a309',\n"
+ + " 'a310', 'a311', 'a312', 'a313', 'a314', 'a315',
'a316', 'a317', 'a318', 'a319',\n"
+ + " 'a320', 'a321', 'a322', 'a323', 'a324', 'a325',
'a326', 'a327', 'a328', 'a329',\n"
+ + " 'a330', 'a331', 'a332', 'a333', 'a334', 'a335',
'a336', 'a337', 'a338', 'a339',\n"
+ + " 'a340', 'a341', 'a342', 'a343', 'a344', 'a345',
'a346', 'a347', 'a348', 'a349',\n"
+ + " 'a350', 'a351', 'a352', 'a353', 'a354', 'a355',
'a356', 'a357', 'a358', 'a359',\n"
+ + " 'a360', 'a361', 'a362', 'a363', 'a364', 'a365',
'a366', 'a367', 'a368', 'a369',\n"
+ + " 'a370', 'a371', 'a372', 'a373', 'a374', 'a375',
'a376', 'a377', 'a378', 'a379',\n"
+ + " 'a380', 'a381', 'a382', 'a383', 'a384', 'a385',
'a386', 'a387', 'a388', 'a389',\n"
+ + " 'a390', 'a391', 'a392', 'a393', 'a394', 'a395',
'a396', 'a397', 'a398', 'a399',\n"
+ + " 'a400', 'a401', 'a402', 'a403', 'a404', 'a405',
'a406', 'a407', 'a408', 'a409',\n"
+ + " 'a410', 'a411', 'a412', 'a413', 'a414', 'a415',
'a416', 'a417', 'a418', 'a419',\n"
+ + " 'a420', 'a421', 'a422', 'a423', 'a424', 'a425',
'a426', 'a427', 'a428', 'a429',\n"
+ + " 'a430', 'a431', 'a432', 'a433', 'a434', 'a435',
'a436', 'a437', 'a438', 'a439',\n"
+ + " 'a440', 'a441', 'a442', 'a443', 'a444', 'a445',
'a446', 'a447', 'a448', 'a449',\n"
+ + " 'a450', 'a451', 'a452', 'a453', 'a454', 'a455',
'a456', 'a457', 'a458', 'a459',\n"
+ + " 'a460', 'a461', 'a462', 'a463', 'a464', 'a465',
'a466', 'a467', 'a468', 'a469',\n"
+ + " 'a470', 'a471', 'a472', 'a473', 'a474', 'a475',
'a476', 'a477', 'a478', 'a479',\n"
+ + " 'a480', 'a481', 'a482', 'a483', 'a484', 'a485',
'a486', 'a487', 'a488', 'a489',\n"
+ + " 'a490', 'a491', 'a492', 'a493', 'a494', 'a495',
'a496', 'a497', 'a498', 'a499'\n"
+ + " )\n"
+ + " )\n"
+ + "GROUP BY col1;";
+ _queryEnvironment.planQuery(query);
+ }
+
// --------------------------------------------------------------------------
// Test Utils.
// --------------------------------------------------------------------------
diff --git
a/pinot-query-planner/src/test/resources/queries/LiteralEvaluationPlans.json
b/pinot-query-planner/src/test/resources/queries/LiteralEvaluationPlans.json
index 098daef731..43866188f4 100644
--- a/pinot-query-planner/src/test/resources/queries/LiteralEvaluationPlans.json
+++ b/pinot-query-planner/src/test/resources/queries/LiteralEvaluationPlans.json
@@ -257,16 +257,6 @@
"description": "select non-exist literal function",
"sql": "EXPLAIN PLAN FOR Select nonExistFun(1,2) FROM a",
"expectedException": ".*No match found for function signature
nonExistFun.*"
- },
- {
- "description": "Literal only filter on subquery is simplified",
- "sql": "EXPLAIN PLAN FOR SELECT * FROM (SELECT CASE WHEN col1 =
'abc123' THEN 'val1' ELSE 'val2' END as val FROM a) WHERE val in ('val1',
'val2')",
- "output": [
- "Execution Plan",
- "\nLogicalProject(val=[CASE(=($0, _UTF-8'abc123'), _UTF-8'val1',
_UTF-8'val2')])",
- "\n LogicalTableScan(table=[[default, a]])",
- "\n"
- ]
}
]
},
@@ -281,31 +271,6 @@
"\nLogicalValues(tuples=[[]])",
"\n"
]
- },
- {
- "description": "AND filter on constants is simplified",
- "sql": "EXPLAIN PLAN FOR WITH CTE_B AS (SELECT 1692057600000 AS __ts
FROM a GROUP BY __ts) SELECT 1692057600000 AS __ts FROM CTE_B WHERE __ts >=
1692057600000 AND __ts < 1693267200000 GROUP BY __ts",
- "output": [
- "Execution Plan",
- "\nLogicalProject(__ts=[1692057600000:BIGINT])",
- "\n PinotLogicalAggregate(group=[{0}], aggType=[FINAL])",
- "\n PinotLogicalExchange(distribution=[hash[0]])",
- "\n PinotLogicalAggregate(group=[{0}], aggType=[LEAF])",
- "\n LogicalProject(__ts=[1692057600000:BIGINT])",
- "\n LogicalTableScan(table=[[default, a]])",
- "\n"
- ]
- },
- {
- "description": "Search + OR filter on constants is simplified",
- "sql": "EXPLAIN PLAN FOR WITH tmp2 AS (SELECT CASE WHEN col2 = 'VAL1'
THEN 'A' ELSE col2 END AS cased FROM a) SELECT 1 FROM tmp2 WHERE ((cased = 'B')
OR (cased = 'A'))",
- "output": [
- "Execution Plan",
- "\nLogicalProject(EXPR$0=[1])",
- "\n LogicalFilter(condition=[SEARCH($1, Sarg[_UTF-8'A':VARCHAR
CHARACTER SET \"UTF-8\", _UTF-8'B':VARCHAR CHARACTER SET \"UTF-8\",
_UTF-8'VAL1':VARCHAR CHARACTER SET \"UTF-8\"]:VARCHAR CHARACTER SET
\"UTF-8\")])",
- "\n LogicalTableScan(table=[[default, a]])",
- "\n"
- ]
}
]
}
diff --git
a/pinot-query-runtime/src/test/resources/queries/LiteralExpressions.json
b/pinot-query-runtime/src/test/resources/queries/LiteralExpressions.json
index 77992571a5..8655870486 100644
--- a/pinot-query-runtime/src/test/resources/queries/LiteralExpressions.json
+++ b/pinot-query-runtime/src/test/resources/queries/LiteralExpressions.json
@@ -20,6 +20,13 @@
[1]
]
},
+ {
+ "description": "CTE with search + OR filter on constants",
+ "sql": "WITH CTE_1 AS (SELECT CASE WHEN strCol = 'foo' THEN 'A' ELSE
strCol END AS cased FROM {mytable}) SELECT 1 FROM CTE_1 WHERE ((cased = 'B') OR
(cased = 'A'))",
+ "outputs": [
+ [1]
+ ]
+ },
{
"description": "Literal SEARCH filter on subquery with CASE WHEN",
"sql": "SELECT * FROM (SELECT CASE WHEN strCol = 'foo' THEN 'a' ELSE
'b' END AS val FROM {mytable}) WHERE val IN ('a', 'b')",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]