This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 02a3f21b65 [fix](analyzer) InferFilterRule bug: equations in on clause
of outer/anti join are not inferable. (#11515)
02a3f21b65 is described below
commit 02a3f21b65bf32934e9f30018ee75335017a831f
Author: minghong <[email protected]>
AuthorDate: Thu Aug 11 09:36:43 2022 +0800
[fix](analyzer) InferFilterRule bug: equations in on clause of outer/anti
join are not inferable. (#11515)
---
.../java/org/apache/doris/analysis/TableRef.java | 3 +-
.../org/apache/doris/rewrite/ExprRewriter.java | 43 ++++++++++++++++++++-
.../org/apache/doris/rewrite/InferFiltersRule.java | 30 ++++++++-------
.../apache/doris/rewrite/InferFiltersRuleTest.java | 26 +++++++++++--
.../test_forbidden_infer_filter_rule.out | 5 +++
.../test_forbidden_infer_filter_rule.groovy | 44 ++++++++++++++++++++++
6 files changed, 131 insertions(+), 20 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
index 7532adc101..56811b0f75 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
@@ -30,6 +30,7 @@ import org.apache.doris.common.UserException;
import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;
import org.apache.doris.rewrite.ExprRewriter;
+import org.apache.doris.rewrite.ExprRewriter.ClauseType;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
@@ -567,7 +568,7 @@ public class TableRef implements ParseNode, Writable {
Preconditions.checkState(isAnalyzed);
if (onClause != null) {
Expr expr = onClause.clone();
- onClause = rewriter.rewrite(onClause, analyzer,
ExprRewriter.ClauseType.ON_CLAUSE);
+ onClause = rewriter.rewrite(onClause, analyzer,
ClauseType.fromJoinType(joinOp));
if (joinOp.isOuterJoin() || joinOp.isSemiAntiJoin()) {
if (onClause instanceof BoolLiteral && !((BoolLiteral)
onClause).getValue()) {
onClause = expr;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java
b/fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java
index 2b54b986f5..b0d82ddbaf 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java
@@ -22,6 +22,7 @@ package org.apache.doris.rewrite;
import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.Expr;
+import org.apache.doris.analysis.JoinOperator;
import org.apache.doris.common.AnalysisException;
import com.google.common.collect.Lists;
@@ -53,9 +54,47 @@ public class ExprRewriter {
// The type of clause that executes the rule.
// This type is only used in InferFiltersRule, RewriteDateLiteralRule,
other rules are not used
public enum ClauseType {
- ON_CLAUSE,
+ INNER_JOIN_CLAUSE,
+ LEFT_OUTER_JOIN_CLAUSE,
+ RIGHT_OUTER_JOIN_CLAUSE,
+ FULL_OUTER_JOIN_CLAUSE,
+ LEFT_SEMI_JOIN_CLAUSE,
+ RIGHT_SEMI_JOIN_CLAUSE,
+ LEFT_ANTI_JOIN_CLAUSE,
+ RIGHT_ANTI_JOIN_CLAUSE,
+ CROSS_JOIN_CLAUSE,
WHERE_CLAUSE,
- OTHER_CLAUSE, // All other clauses that are not on and not where
+ OTHER_CLAUSE; // All other clauses that are not on and not where
+
+ public static ClauseType fromJoinType(JoinOperator joinOp) {
+ switch (joinOp) {
+ case INNER_JOIN: return INNER_JOIN_CLAUSE;
+ case LEFT_OUTER_JOIN: return LEFT_OUTER_JOIN_CLAUSE;
+ case RIGHT_OUTER_JOIN: return RIGHT_OUTER_JOIN_CLAUSE;
+ case FULL_OUTER_JOIN: return FULL_OUTER_JOIN_CLAUSE;
+ case LEFT_SEMI_JOIN: return LEFT_SEMI_JOIN_CLAUSE;
+ case RIGHT_SEMI_JOIN: return RIGHT_SEMI_JOIN_CLAUSE;
+ case LEFT_ANTI_JOIN: return LEFT_ANTI_JOIN_CLAUSE;
+ case RIGHT_ANTI_JOIN: return RIGHT_ANTI_JOIN_CLAUSE;
+ case CROSS_JOIN: return CROSS_JOIN_CLAUSE;
+ default: return OTHER_CLAUSE;
+ }
+ }
+
+ public boolean isInferable() {
+ if (this == INNER_JOIN_CLAUSE
+ || this == LEFT_SEMI_JOIN_CLAUSE
+ || this == RIGHT_SEMI_JOIN_CLAUSE
+ || this == WHERE_CLAUSE
+ || this == OTHER_CLAUSE) {
+ return true;
+ }
+ return false;
+ }
+
+ public boolean isOnClause() {
+ return this.compareTo(WHERE_CLAUSE) < 0;
+ }
}
// Once-only Rules
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/rewrite/InferFiltersRule.java
b/fe/fe-core/src/main/java/org/apache/doris/rewrite/InferFiltersRule.java
index 50d123f99c..ff7f266dce 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/InferFiltersRule.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/InferFiltersRule.java
@@ -71,18 +71,22 @@ public class InferFiltersRule implements ExprRewriteRule {
return expr;
}
+ if (!clauseType.isInferable()) {
+ return expr;
+ }
+
// slotEqSlotExpr: Record existing and infer equivalent connections
List<Expr> slotEqSlotExpr = analyzer.getOnSlotEqSlotExpr();
// slotEqSlotDeDuplication: De-Duplication for slotEqSlotExpr
- Set<Pair<Expr, Expr>> slotEqSlotDeDuplication = (clauseType ==
ExprRewriter.ClauseType.ON_CLAUSE)
+ Set<Pair<Expr, Expr>> slotEqSlotDeDuplication =
(clauseType.isOnClause())
? analyzer.getOnSlotEqSlotDeDuplication() : Sets.newHashSet();
// slotToLiteralExpr: Record existing and infer expr which slot and
literal are equal
List<Expr> slotToLiteralExpr = analyzer.getOnSlotToLiteralExpr();
// slotToLiteralDeDuplication: De-Duplication for slotToLiteralExpr
- Set<Pair<Expr, Expr>> slotToLiteralDeDuplication = (clauseType ==
ExprRewriter.ClauseType.ON_CLAUSE)
+ Set<Pair<Expr, Expr>> slotToLiteralDeDuplication =
(clauseType.isOnClause())
? analyzer.getOnSlotToLiteralDeDuplication() :
Sets.newHashSet();
// newExprWithState: just record infer expr which slot and literal are
equal and which is not null predicate
@@ -94,7 +98,7 @@ public class InferFiltersRule implements ExprRewriteRule {
List<Expr> isNullExpr = analyzer.getOnIsNullExpr();
// isNullDeDuplication: De-Duplication for isNullExpr
- Set<Expr> isNullDeDuplication = (clauseType ==
ExprRewriter.ClauseType.ON_CLAUSE)
+ Set<Expr> isNullDeDuplication = (clauseType.isOnClause())
? analyzer.getOnIsNullDeDuplication() : Sets.newHashSet();
// inExpr: Record existing and infer in predicate
@@ -102,7 +106,7 @@ public class InferFiltersRule implements ExprRewriteRule {
// inDeDuplication: De-Duplication for inExpr
Set<Expr> inDeDuplication =
- (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) ?
analyzer.getInDeDuplication() : Sets.newHashSet();
+ (clauseType.isOnClause()) ? analyzer.getInDeDuplication() :
Sets.newHashSet();
// exprToWarshallArraySubscript/warshallArraySubscriptToExpr:
// function is easy to build warshall and newExprWithState
@@ -180,7 +184,7 @@ public class InferFiltersRule implements ExprRewriteRule {
if (!slotToLiteralDeDuplication.contains(pair)) {
slotToLiteralDeDuplication.add(pair);
slotToLiteralExpr.add(conjunct);
- if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
+ if (clauseType.isOnClause()) {
analyzer.registerOnSlotToLiteralDeDuplication(pair);
analyzer.registerOnSlotToLiteralExpr(conjunct);
}
@@ -197,7 +201,7 @@ public class InferFiltersRule implements ExprRewriteRule {
&& !slotEqSlotDeDuplication.contains(eqPair)) {
slotEqSlotDeDuplication.add(pair);
slotEqSlotExpr.add(conjunct);
- if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
+ if (clauseType.isOnClause()) {
analyzer.registerOnSlotEqSlotDeDuplication(pair);
analyzer.registerOnSlotEqSlotExpr(conjunct);
}
@@ -210,7 +214,7 @@ public class InferFiltersRule implements ExprRewriteRule {
&& ((IsNullPredicate) conjunct).isNotNull()) {
isNullDeDuplication.add(conjunct.getChild(0).unwrapSlotRef());
isNullExpr.add(conjunct);
- if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
+ if (clauseType.isOnClause()) {
analyzer.registerOnIsNullDeDuplication(conjunct.getChild(0).unwrapSlotRef());
analyzer.registerOnIsNullExpr(conjunct);
}
@@ -221,7 +225,7 @@ public class InferFiltersRule implements ExprRewriteRule {
if
(!inDeDuplication.contains(conjunct.getChild(0).unwrapSlotRef())) {
inDeDuplication.add(conjunct.getChild(0).unwrapSlotRef());
inExpr.add(conjunct);
- if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
+ if (clauseType.isOnClause()) {
analyzer.registerInExpr(conjunct);
analyzer.registerInDeDuplication(conjunct.getChild(0).unwrapSlotRef());
}
@@ -359,7 +363,7 @@ public class InferFiltersRule implements ExprRewriteRule {
slotEqSlotExpr.add(new
BinaryPredicate(BinaryPredicate.Operator.EQ,
warshallArraySubscriptToExpr.get(slotPair.first),
warshallArraySubscriptToExpr.get(slotPair.second)));
- if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
+ if (clauseType.isOnClause()) {
analyzer.registerOnSlotEqSlotDeDuplication(pair);
analyzer.registerOnSlotEqSlotExpr(new
BinaryPredicate(BinaryPredicate.Operator.EQ,
warshallArraySubscriptToExpr.get(slotPair.first),
@@ -448,7 +452,7 @@ public class InferFiltersRule implements ExprRewriteRule {
*/
private boolean checkNeedInfer(JoinOperator joinOperator, boolean
needChange, ExprRewriter.ClauseType clauseType) {
boolean ret = false;
- if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
+ if (clauseType.isOnClause()) {
if (joinOperator.isInnerJoin()
|| (joinOperator == JoinOperator.LEFT_SEMI_JOIN)
|| (!needChange && joinOperator ==
JoinOperator.RIGHT_OUTER_JOIN)
@@ -496,7 +500,7 @@ public class InferFiltersRule implements ExprRewriteRule {
slotToLiteralDeDuplication.add(pair);
Pair<Expr, Boolean> newBPWithBool = new Pair<>(newBP,
needAddnewExprWithState);
newExprWithState.add(newBPWithBool);
- if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
+ if (clauseType.isOnClause()) {
analyzer.registerOnSlotToLiteralDeDuplication(pair);
analyzer.registerOnSlotToLiteralExpr(newBP);
}
@@ -575,7 +579,7 @@ public class InferFiltersRule implements ExprRewriteRule {
isNullDeDuplication.add(newExpr.getChild(0));
Pair<Expr, Boolean> newExprWithBoolean = new Pair<>(newExpr,
true);
newExprWithState.add(newExprWithBoolean);
- if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
+ if (clauseType.isOnClause()) {
analyzer.registerOnIsNullExpr(newExpr);
analyzer.registerOnIsNullDeDuplication(newExpr);
}
@@ -660,7 +664,7 @@ public class InferFiltersRule implements ExprRewriteRule {
inDeDuplication.add(newIP);
Pair<Expr, Boolean> newBPWithBool = new Pair<>(newIP,
needAddnewExprWithState);
newExprWithState.add(newBPWithBool);
- if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
+ if (clauseType.isOnClause()) {
analyzer.registerInDeDuplication(newIP);
analyzer.registerInExpr(newIP);
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/rewrite/InferFiltersRuleTest.java
b/fe/fe-core/src/test/java/org/apache/doris/rewrite/InferFiltersRuleTest.java
index d8889d8d7a..2866c78e11 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/rewrite/InferFiltersRuleTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/rewrite/InferFiltersRuleTest.java
@@ -140,15 +140,24 @@ public class InferFiltersRuleTest {
}
@Test
- public void testOn2TablesLeftAntiJoinEqLiteralAt1st() throws Exception {
+ public void testOn2TablesLeftJoinNotInferable() throws Exception {
SessionVariable sessionVariable = dorisAssert.getSessionVariable();
sessionVariable.setEnableInferPredicate(true);
Assert.assertTrue(sessionVariable.isEnableInferPredicate());
- String query = "select * from tb1 left anti join tb2 on tb1.k1 =
tb2.k1 and tb1.k1 = 1";
+ String query = "select * from tb1 left join tb2 on tb1.k1 = tb2.k1 and
tb2.k1 = 1";
String planString = dorisAssert.query(query).explainQuery();
- Assert.assertTrue(planString.contains("`tb2`.`k1` = 1"));
+ Assert.assertFalse(planString.contains("`tb1`.`k1` = 1"));
}
+ /*
+ the following 3 test case is valid. But we cannot tell them from other
incorrect inferences.
+ In origin design we made a mistake: we assume inference is symmetrical.
+ For example, t1.x=t2.x and t1.x=1 => t2.x=1
+ this is not always true.
+ if this is left join, t1 is left, t2.x=1 is not valid.
+ However, in inferFilterRule, we do not know whether t1.x is from left or
right table.
+ And hence, we have to skip inference for outer/anti join for quick fix.
+
@Test
public void testOn3Tables1stInner2ndRightJoinEqLiteralAt2nd() throws
Exception {
SessionVariable sessionVariable = dorisAssert.getSessionVariable();
@@ -172,7 +181,16 @@ public class InferFiltersRuleTest {
Assert.assertTrue(planString.contains("`tb2`.`k1` = 1"));
Assert.assertTrue(planString.contains("`tb1`.`k1` = 1"));
}
-
+ @Test
+ public void testOn2TablesLeftAntiJoinEqLiteralAt1st() throws Exception {
+ SessionVariable sessionVariable = dorisAssert.getSessionVariable();
+ sessionVariable.setEnableInferPredicate(true);
+ Assert.assertTrue(sessionVariable.isEnableInferPredicate());
+ String query = "select * from tb1 left anti join tb2 on tb1.k1 =
tb2.k1 and tb1.k1 = 1";
+ String planString = dorisAssert.query(query).explainQuery();
+ Assert.assertTrue(planString.contains("`tb2`.`k1` = 1"));
+ }
+ */
@Test
public void testOnIsNotNullPredicate() throws Exception {
SessionVariable sessionVariable = dorisAssert.getSessionVariable();
diff --git
a/regression-test/data/correctness/test_forbidden_infer_filter_rule.out
b/regression-test/data/correctness/test_forbidden_infer_filter_rule.out
new file mode 100644
index 0000000000..2cfc6b4ac3
--- /dev/null
+++ b/regression-test/data/correctness/test_forbidden_infer_filter_rule.out
@@ -0,0 +1,5 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+-1 \N
+1 1
+
diff --git
a/regression-test/suites/correctness/test_forbidden_infer_filter_rule.groovy
b/regression-test/suites/correctness/test_forbidden_infer_filter_rule.groovy
new file mode 100644
index 0000000000..fe6ad1ec3e
--- /dev/null
+++ b/regression-test/suites/correctness/test_forbidden_infer_filter_rule.groovy
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_forbidden_infer_filter_rule") {
+ sql """ DROP TABLE IF EXISTS A """
+ sql """
+ CREATE TABLE IF NOT EXISTS A
+ (
+ a_id int
+ )
+ DISTRIBUTED BY HASH(a_id) BUCKETS 1
+ PROPERTIES("replication_num" = "1");
+ """
+
+ sql """ DROP TABLE IF EXISTS B """
+ sql """
+ CREATE TABLE IF NOT EXISTS B
+ (
+ b_id int
+ )
+ DISTRIBUTED BY HASH(b_id) BUCKETS 1
+ PROPERTIES("replication_num" = "1");
+ """
+
+ sql " INSERT INTO A values (1), (-1);"
+
+ sql " INSERT INTO B values (1);"
+
+ qt_sql "select * from A left join B on a_id=b_id and b_id in (1,2) where
a_id < 100 order by a_id;"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]