IMPALA-1286: Extract common conjuncts from disjunctions. Adds a new ExprRewriteRule to extract common conjuncts from disjunctions.
Examples: (a AND b AND c) OR (b AND d) ==> b AND ((a AND c) OR (d)) (a AND b) OR (a AND b) ==> a AND b (a AND b AND c) OR (c) ==> c Adds a new query option ENABLE_EXPR_REWRITES to enable/disable non-essential expr rewrites in the FE. Note that some rewrites are required, e.g., BetweenToCompoundRule. Disabling the rewrites is useful for testing, in particular, to make sure that the exprs specified in expr-test.cc are executed as written. Testing: Added a new unit test in ExprRewriteRulesTest. Change-Id: I3cf9b950afaa3fd753d1b09ba5e540b5258940ad Reviewed-on: http://gerrit.cloudera.org:8080/4877 Reviewed-by: Alex Behm <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0aeb6805 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0aeb6805 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0aeb6805 Branch: refs/heads/master Commit: 0aeb68050b3bc16b8a10a5a3ae65428e7edd9df5 Parents: cfac09d Author: Alex Behm <[email protected]> Authored: Mon Oct 17 23:01:57 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Wed Nov 9 09:44:59 2016 +0000 ---------------------------------------------------------------------- be/src/exprs/expr-test.cc | 3 + be/src/service/query-options.cc | 5 + be/src/service/query-options.h | 5 +- common/thrift/ImpalaInternalService.thrift | 4 + common/thrift/ImpalaService.thrift | 4 + .../apache/impala/analysis/AnalysisContext.java | 7 ++ .../impala/analysis/BetweenPredicate.java | 6 ++ .../impala/analysis/CompoundPredicate.java | 26 +++-- .../apache/impala/analysis/ExistsPredicate.java | 10 +- .../apache/impala/rewrite/ExprRewriteRule.java | 3 + .../rewrite/ExtractCommonConjunctRule.java | 102 +++++++++++++++++++ .../impala/analysis/ExprRewriteRulesTest.java | 92 +++++++++++++++++ .../org/apache/impala/analysis/ExprTest.java | 3 +- .../queries/PlannerTest/subquery-rewrite.test | 2 +- .../queries/PlannerTest/tpch-all.test | 12 ++- .../queries/PlannerTest/tpch-kudu.test | 5 +- .../queries/PlannerTest/tpch-nested.test | 12 ++- .../queries/PlannerTest/tpch-views.test | 4 +- 18 files changed, 282 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/be/src/exprs/expr-test.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc index 5b930e2..5a999b6 100644 --- a/be/src/exprs/expr-test.cc +++ b/be/src/exprs/expr-test.cc @@ -6124,6 +6124,9 @@ int main(int argc, char **argv) { ABORT_IF_ERROR(executor_->Setup()); vector<string> options; + // Disable FE Expr rewrites to make sure the Exprs get executed exactly as specified + // in the tests here. + options.push_back("ENABLE_EXPR_REWRITES=0"); options.push_back("DISABLE_CODEGEN=1"); disable_codegen_ = true; executor_->setExecOptions(options); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/be/src/service/query-options.cc ---------------------------------------------------------------------- diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc index eb1d2f9..d6d2cb2 100644 --- a/be/src/service/query-options.cc +++ b/be/src/service/query-options.cc @@ -441,6 +441,11 @@ Status impala::SetQueryOption(const string& key, const string& value, } break; } + case TImpalaQueryOptions::ENABLE_EXPR_REWRITES: { + query_options->__set_enable_expr_rewrites( + iequals(value, "true") || iequals(value, "1")); + break; + } default: // We hit this DCHECK(false) if we forgot to add the corresponding entry here // when we add a new query option. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/be/src/service/query-options.h ---------------------------------------------------------------------- diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h index b1194d3..6531dcb 100644 --- a/be/src/service/query-options.h +++ b/be/src/service/query-options.h @@ -35,7 +35,7 @@ class TQueryOptions; // the DCHECK. #define QUERY_OPTS_TABLE\ DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),\ - TImpalaQueryOptions::SCRATCH_LIMIT + 1);\ + TImpalaQueryOptions::ENABLE_EXPR_REWRITES + 1);\ QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED)\ QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR)\ QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)\ @@ -85,7 +85,8 @@ class TQueryOptions; QUERY_OPT_FN(runtime_filter_max_size, RUNTIME_FILTER_MAX_SIZE)\ QUERY_OPT_FN(prefetch_mode, PREFETCH_MODE)\ QUERY_OPT_FN(strict_mode, STRICT_MODE)\ - QUERY_OPT_FN(scratch_limit, SCRATCH_LIMIT); + QUERY_OPT_FN(scratch_limit, SCRATCH_LIMIT)\ + QUERY_OPT_FN(enable_expr_rewrites, ENABLE_EXPR_REWRITES); /// Converts a TQueryOptions struct into a map of key, value pairs. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/common/thrift/ImpalaInternalService.thrift ---------------------------------------------------------------------- diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift index d5a92ca..e510331 100644 --- a/common/thrift/ImpalaInternalService.thrift +++ b/common/thrift/ImpalaInternalService.thrift @@ -213,6 +213,10 @@ struct TQueryOptions { // A limit on the amount of scratch directory space that can be used; 50: optional i64 scratch_limit = -1 + + // Indicates whether the FE should rewrite Exprs for optimization purposes. + // It's sometimes useful to disable rewrites for testing, e.g., expr-test.cc. + 51: optional bool enable_expr_rewrites = true } // Impala currently has two types of sessions: Beeswax and HiveServer2 http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/common/thrift/ImpalaService.thrift ---------------------------------------------------------------------- diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift index 573709b..68a0588 100644 --- a/common/thrift/ImpalaService.thrift +++ b/common/thrift/ImpalaService.thrift @@ -243,6 +243,10 @@ enum TImpalaQueryOptions { // Unspecified or a limit of -1 means no limit; // Otherwise specified in the same way as MEM_LIMIT. SCRATCH_LIMIT + + // Indicates whether the FE should rewrite Exprs for optimization purposes. + // It's sometimes useful to disable rewrites for testing, e.g., expr-test.cc. + ENABLE_EXPR_REWRITES } // The summary of an insert. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java b/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java index b22bccb..3323ed4 100644 --- a/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java +++ b/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java @@ -37,6 +37,7 @@ import org.apache.impala.common.Pair; import org.apache.impala.rewrite.BetweenToCompoundRule; import org.apache.impala.rewrite.ExprRewriteRule; import org.apache.impala.rewrite.ExprRewriter; +import org.apache.impala.rewrite.ExtractCommonConjunctRule; import org.apache.impala.thrift.TAccessEvent; import org.apache.impala.thrift.TLineageGraph; import org.apache.impala.thrift.TQueryCtx; @@ -65,7 +66,13 @@ public class AnalysisContext { catalog_ = catalog; queryCtx_ = queryCtx; authzConfig_ = authzConfig; + // BetweenPredicates must be rewritten to be executable. Other non-essential + // expr rewrites can be disabled via a query option. When rewrites are enabled + // BetweenPredicates should be rewritten first to help trigger other rules. List<ExprRewriteRule> rules = Lists.newArrayList(BetweenToCompoundRule.INSTANCE); + if (queryCtx.getRequest().getQuery_options().enable_expr_rewrites) { + rules.add(ExtractCommonConjunctRule.INSTANCE); + } rewriter_ = new ExprRewriter(rules); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/fe/src/main/java/org/apache/impala/analysis/BetweenPredicate.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/BetweenPredicate.java b/fe/src/main/java/org/apache/impala/analysis/BetweenPredicate.java index 2459715..ef19a52 100644 --- a/fe/src/main/java/org/apache/impala/analysis/BetweenPredicate.java +++ b/fe/src/main/java/org/apache/impala/analysis/BetweenPredicate.java @@ -75,5 +75,11 @@ public class BetweenPredicate extends Predicate { } @Override + public boolean equals(Object obj) { + if (!super.equals(obj)) return false; + return isNotBetween_ == ((BetweenPredicate)obj).isNotBetween_; + } + + @Override public Expr clone() { return new BetweenPredicate(this); } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/fe/src/main/java/org/apache/impala/analysis/CompoundPredicate.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/CompoundPredicate.java b/fe/src/main/java/org/apache/impala/analysis/CompoundPredicate.java index d549053..6757d26 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CompoundPredicate.java +++ b/fe/src/main/java/org/apache/impala/analysis/CompoundPredicate.java @@ -27,6 +27,7 @@ import org.apache.impala.catalog.Type; import org.apache.impala.common.AnalysisException; import org.apache.impala.thrift.TExprNode; import org.apache.impala.thrift.TExprNodeType; + import com.google.common.base.Objects; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; @@ -192,16 +193,27 @@ public class CompoundPredicate extends Predicate { * Creates a conjunctive predicate from a list of exprs. */ public static Expr createConjunctivePredicate(List<Expr> conjuncts) { - Expr conjunctivePred = null; - for (Expr expr: conjuncts) { - if (conjunctivePred == null) { - conjunctivePred = expr; + return createCompoundTree(conjuncts, Operator.AND); + } + + /** + * Creates a disjunctive predicate from a list of exprs. + */ + public static Expr createDisjunctivePredicate(List<Expr> disjuncts) { + return createCompoundTree(disjuncts, Operator.OR); + } + + private static Expr createCompoundTree(List<Expr> exprs, Operator op) { + Preconditions.checkState(op == Operator.AND || op == Operator.OR); + Expr result = null; + for (Expr expr: exprs) { + if (result == null) { + result = expr; continue; } - conjunctivePred = new CompoundPredicate(CompoundPredicate.Operator.AND, - expr, conjunctivePred); + result = new CompoundPredicate(op, result, expr); } - return conjunctivePred; + return result; } @Override http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/fe/src/main/java/org/apache/impala/analysis/ExistsPredicate.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/ExistsPredicate.java b/fe/src/main/java/org/apache/impala/analysis/ExistsPredicate.java index 3131acf..578c786 100644 --- a/fe/src/main/java/org/apache/impala/analysis/ExistsPredicate.java +++ b/fe/src/main/java/org/apache/impala/analysis/ExistsPredicate.java @@ -17,11 +17,11 @@ package org.apache.impala.analysis; +import org.apache.impala.common.AnalysisException; +import org.apache.impala.thrift.TExprNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.impala.common.AnalysisException; -import org.apache.impala.thrift.TExprNode; import com.google.common.base.Preconditions; /** @@ -69,6 +69,12 @@ public class ExistsPredicate extends Predicate { } @Override + public boolean equals(Object o) { + if (!super.equals(o)) return false; + return notExists_ == ((ExistsPredicate)o).notExists_; + } + + @Override public Expr clone() { return new ExistsPredicate(this); } @Override http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/fe/src/main/java/org/apache/impala/rewrite/ExprRewriteRule.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/rewrite/ExprRewriteRule.java b/fe/src/main/java/org/apache/impala/rewrite/ExprRewriteRule.java index 48054a8..bbd7c38 100644 --- a/fe/src/main/java/org/apache/impala/rewrite/ExprRewriteRule.java +++ b/fe/src/main/java/org/apache/impala/rewrite/ExprRewriteRule.java @@ -24,6 +24,9 @@ import org.apache.impala.common.AnalysisException; /** * Base class for all Expr rewrite rules. A rule is free to modify Exprs in place, * but must return a different Expr object if any modifications were made. + * An ExprRewriteRule is intended to apply its transformation on a single Expr and not + * recursively on all its children. The recursive and repeated application of + * ExprRewriteRules is driven by an ExprRewriter. */ public interface ExprRewriteRule { /** http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/fe/src/main/java/org/apache/impala/rewrite/ExtractCommonConjunctRule.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/rewrite/ExtractCommonConjunctRule.java b/fe/src/main/java/org/apache/impala/rewrite/ExtractCommonConjunctRule.java new file mode 100644 index 0000000..e1515d3 --- /dev/null +++ b/fe/src/main/java/org/apache/impala/rewrite/ExtractCommonConjunctRule.java @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.rewrite; + +import java.util.List; + +import org.apache.impala.analysis.Analyzer; +import org.apache.impala.analysis.CompoundPredicate; +import org.apache.impala.analysis.Expr; +import org.apache.impala.common.AnalysisException; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + +/** + * This rule extracts common conjuncts from multiple disjunctions when it is applied + * recursively bottom-up to a tree of CompoundPredicates. + * + * Examples: + * (a AND b AND c) OR (b AND d) ==> b AND ((a AND c) OR (d)) + * (a AND b) OR (a AND b) ==> a AND b + * (a AND b AND c) OR (c) ==> c + */ +public class ExtractCommonConjunctRule implements ExprRewriteRule { + public static ExprRewriteRule INSTANCE = new ExtractCommonConjunctRule(); + + // Arbitrary limit the number of Expr.equals() comparisons in the O(N^2) loop below. + // Used to avoid pathologically expensive invocations of this rule. + // TODO: Implement Expr.hashCode() and move to a hash-based solution for the core + // Expr.equals() comparison loop below. + private static final int MAX_EQUALS_COMPARISONS = 30 * 30; + + @Override + public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException { + if (!Expr.IS_OR_PREDICATE.apply(expr)) return expr; + + // Get childrens' conjuncts and check + List<Expr> child0Conjuncts = expr.getChild(0).getConjuncts(); + List<Expr> child1Conjuncts = expr.getChild(1).getConjuncts(); + Preconditions.checkState(!child0Conjuncts.isEmpty() && !child1Conjuncts.isEmpty()); + // Impose cost bound. + if (child0Conjuncts.size() * child1Conjuncts.size() > MAX_EQUALS_COMPARISONS) { + return expr; + } + + // Find common conjuncts. + List<Expr> commonConjuncts = Lists.newArrayList(); + for (Expr conjunct: child0Conjuncts) { + if (child1Conjuncts.contains(conjunct)) { + // The conjunct may have parenthesis but there's no need to preserve them. + // Removing them makes the toSql() easier to read. + conjunct.setPrintSqlInParens(false); + commonConjuncts.add(conjunct); + } + } + if (commonConjuncts.isEmpty()) return expr; + + // Remove common conjuncts. + child0Conjuncts.removeAll(commonConjuncts); + child1Conjuncts.removeAll(commonConjuncts); + + // Check special case where one child contains all conjuncts of the other. + // (a AND b) OR (a AND b) ==> a AND b + // (a AND b AND c) OR (c) ==> c + if (child0Conjuncts.isEmpty() || child1Conjuncts.isEmpty()) { + Preconditions.checkState(!commonConjuncts.isEmpty()); + Expr result = CompoundPredicate.createConjunctivePredicate(commonConjuncts); + result.analyze(analyzer); + return result; + } + + // Re-assemble disjunctive predicate. + Expr child0Disjunct = CompoundPredicate.createConjunctivePredicate(child0Conjuncts); + child0Disjunct.setPrintSqlInParens(expr.getChild(0).getPrintSqlInParens()); + Expr child1Disjunct = CompoundPredicate.createConjunctivePredicate(child1Conjuncts); + child1Disjunct.setPrintSqlInParens(expr.getChild(1).getPrintSqlInParens()); + List<Expr> newDisjuncts = Lists.newArrayList(child0Disjunct, child1Disjunct); + Expr newDisjunction = CompoundPredicate.createDisjunctivePredicate(newDisjuncts); + newDisjunction.setPrintSqlInParens(true); + Expr result = CompoundPredicate.createConjunction(newDisjunction, + CompoundPredicate.createConjunctivePredicate(commonConjuncts)); + result.analyze(analyzer); + return result; + } + + private ExtractCommonConjunctRule() {} +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java index d3d52b4..16ca7e2 100644 --- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java @@ -25,6 +25,7 @@ import org.apache.impala.common.FrontendTestBase; import org.apache.impala.rewrite.BetweenToCompoundRule; import org.apache.impala.rewrite.ExprRewriteRule; import org.apache.impala.rewrite.ExprRewriter; +import org.apache.impala.rewrite.ExtractCommonConjunctRule; import org.junit.Assert; import org.junit.Test; @@ -106,4 +107,95 @@ public class ExprRewriteRulesTest extends FrontendTestBase { "int_col >= if(tinyint_col >= 1 AND tinyint_col <= 2, 10, 20) " + "AND int_col <= CAST(smallint_col < 1 OR smallint_col > 2 AS INT)"); } + + @Test + public void TestExtractCommonConjunctsRule() throws AnalysisException { + ExprRewriteRule rule = ExtractCommonConjunctRule.INSTANCE; + + // One common conjunct: int_col < 10 + RewritesOk( + "(int_col < 10 and bigint_col < 10) or " + + "(string_col = '10' and int_col < 10)", rule, + "int_col < 10 AND ((bigint_col < 10) OR (string_col = '10'))"); + // One common conjunct in multiple disjuncts: int_col < 10 + RewritesOk( + "(int_col < 10 and bigint_col < 10) or " + + "(string_col = '10' and int_col < 10) or " + + "(id < 20 and int_col < 10) or " + + "(int_col < 10 and float_col > 3.14)", rule, + "int_col < 10 AND " + + "((bigint_col < 10) OR (string_col = '10') OR " + + "(id < 20) OR (float_col > 3.14))"); + // Same as above but with a bushy OR tree. + RewritesOk( + "((int_col < 10 and bigint_col < 10) or " + + " (string_col = '10' and int_col < 10)) or " + + "((id < 20 and int_col < 10) or " + + " (int_col < 10 and float_col > 3.14))", rule, + "int_col < 10 AND " + + "((bigint_col < 10) OR (string_col = '10') OR " + + "(id < 20) OR (float_col > 3.14))"); + // Multiple common conjuncts: int_col < 10, bool_col is null + RewritesOk( + "(int_col < 10 and bigint_col < 10 and bool_col is null) or " + + "(bool_col is null and string_col = '10' and int_col < 10)", rule, + "int_col < 10 AND bool_col IS NULL AND " + + "((bigint_col < 10) OR (string_col = '10'))"); + // Negated common conjunct: !(int_col=5 or tinyint_col > 9) + RewritesOk( + "(!(int_col=5 or tinyint_col > 9) and double_col = 7) or " + + "(!(int_col=5 or tinyint_col > 9) and double_col = 8)", rule, + "NOT (int_col = 5 OR tinyint_col > 9) AND " + + "((double_col = 7) OR (double_col = 8))"); + + // Test common BetweenPredicate: int_col between 10 and 30 + RewritesOk( + "(int_col between 10 and 30 and bigint_col < 10) or " + + "(string_col = '10' and int_col between 10 and 30) or " + + "(id < 20 and int_col between 10 and 30) or " + + "(int_col between 10 and 30 and float_col > 3.14)", rule, + "int_col BETWEEN 10 AND 30 AND " + + "((bigint_col < 10) OR (string_col = '10') OR " + + "(id < 20) OR (float_col > 3.14))"); + // Test common NOT BetweenPredicate: int_col not between 10 and 30 + RewritesOk( + "(int_col not between 10 and 30 and bigint_col < 10) or " + + "(string_col = '10' and int_col not between 10 and 30) or " + + "(id < 20 and int_col not between 10 and 30) or " + + "(int_col not between 10 and 30 and float_col > 3.14)", rule, + "int_col NOT BETWEEN 10 AND 30 AND " + + "((bigint_col < 10) OR (string_col = '10') OR " + + "(id < 20) OR (float_col > 3.14))"); + // Test mixed BetweenPredicates are not common. + RewritesOk( + "(int_col not between 10 and 30 and bigint_col < 10) or " + + "(string_col = '10' and int_col between 10 and 30) or " + + "(id < 20 and int_col not between 10 and 30) or " + + "(int_col between 10 and 30 and float_col > 3.14)", rule, + null); + + // All conjuncts are common. + RewritesOk( + "(int_col < 10 and id between 5 and 6) or " + + "(id between 5 and 6 and int_col < 10) or " + + "(int_col < 10 and id between 5 and 6)", rule, + "int_col < 10 AND id BETWEEN 5 AND 6"); + // Complex disjuncts are redundant. + RewritesOk( + "(int_col < 10) or " + + "(int_col < 10 and bigint_col < 10 and bool_col is null) or " + + "(int_col < 10) or " + + "(bool_col is null and int_col < 10)", rule, + "int_col < 10"); + + // Due to the shape of the original OR tree we are left with redundant + // disjuncts after the extraction. + RewritesOk( + "(int_col < 10 and bigint_col < 10) or " + + "(string_col = '10' and int_col < 10) or " + + "(id < 20 and int_col < 10) or " + + "(int_col < 10 and id < 20)", rule, + "int_col < 10 AND " + + "((bigint_col < 10) OR (string_col = '10') OR (id < 20) OR (id < 20))"); + } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/fe/src/test/java/org/apache/impala/analysis/ExprTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprTest.java index 7ae0f89..e0eca0a 100644 --- a/fe/src/test/java/org/apache/impala/analysis/ExprTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/ExprTest.java @@ -19,10 +19,9 @@ package org.apache.impala.analysis; import static org.junit.Assert.fail; -import org.junit.Test; - import org.apache.impala.catalog.ScalarType; import org.apache.impala.catalog.Type; +import org.junit.Test; public class ExprTest { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test index 0880d9f..dfc9072 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test @@ -1638,7 +1638,7 @@ where PLAN-ROOT SINK | 05:HASH JOIN [RIGHT SEMI JOIN] -| hash predicates: sum(t1.id) = t.int_col +| hash predicates: t1.id + t2.id = t.int_col | |--00:SCAN HDFS [functional.alltypestiny t] | partitions=4/4 files=4 size=460B http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test index 9b17ff1..219e5a9 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test @@ -3415,14 +3415,16 @@ PLAN-ROOT SINK | 02:HASH JOIN [INNER JOIN] | hash predicates: l_partkey = p_partkey -| other predicates: ((p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND l_quantity >= 1 AND l_quantity <= 11 AND p_size >= 1 AND p_size <= 5 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND l_quantity >= 10 AND l_quantity <= 20 AND p_size >= 1 AND p_size <= 10 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND l_quantity >= 20 AND l_quantity <= 30 AND p_size >= 1 AND p_size <= 15 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON')) +| other predicates: ((p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND l_quantity >= 1 AND l_quantity <= 11 AND p_size <= 5) OR (p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND l_quantity >= 10 AND l_quantity <= 20 AND p_size <= 10) OR (p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND l_quantity >= 20 AND l_quantity <= 30 AND p_size <= 15)) | runtime filters: RF000 <- p_partkey | |--01:SCAN HDFS [tpch.part] | partitions=1/1 files=1 size=22.83MB +| predicates: p_size >= 1 | 00:SCAN HDFS [tpch.lineitem] partitions=1/1 files=1 size=718.94MB + predicates: l_shipmode IN ('AIR', 'AIR REG'), l_shipinstruct = 'DELIVER IN PERSON' runtime filters: RF000 -> l_partkey ---- DISTRIBUTEDPLAN PLAN-ROOT SINK @@ -3437,16 +3439,18 @@ PLAN-ROOT SINK | 02:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: l_partkey = p_partkey -| other predicates: ((p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND l_quantity >= 1 AND l_quantity <= 11 AND p_size >= 1 AND p_size <= 5 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND l_quantity >= 10 AND l_quantity <= 20 AND p_size >= 1 AND p_size <= 10 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND l_quantity >= 20 AND l_quantity <= 30 AND p_size >= 1 AND p_size <= 15 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON')) +| other predicates: ((p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND l_quantity >= 1 AND l_quantity <= 11 AND p_size <= 5) OR (p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND l_quantity >= 10 AND l_quantity <= 20 AND p_size <= 10) OR (p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND l_quantity >= 20 AND l_quantity <= 30 AND p_size <= 15)) | runtime filters: RF000 <- p_partkey | |--04:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [tpch.part] | partitions=1/1 files=1 size=22.83MB +| predicates: p_size >= 1 | 00:SCAN HDFS [tpch.lineitem] partitions=1/1 files=1 size=718.94MB + predicates: l_shipmode IN ('AIR', 'AIR REG'), l_shipinstruct = 'DELIVER IN PERSON' runtime filters: RF000 -> l_partkey ---- PARALLELPLANS PLAN-ROOT SINK @@ -3461,7 +3465,7 @@ PLAN-ROOT SINK | 02:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: l_partkey = p_partkey -| other predicates: ((p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND l_quantity >= 1 AND l_quantity <= 11 AND p_size >= 1 AND p_size <= 5 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND l_quantity >= 10 AND l_quantity <= 20 AND p_size >= 1 AND p_size <= 10 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND l_quantity >= 20 AND l_quantity <= 30 AND p_size >= 1 AND p_size <= 15 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON')) +| other predicates: ((p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND l_quantity >= 1 AND l_quantity <= 11 AND p_size <= 5) OR (p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND l_quantity >= 10 AND l_quantity <= 20 AND p_size <= 10) OR (p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND l_quantity >= 20 AND l_quantity <= 30 AND p_size <= 15)) | runtime filters: RF000 <- p_partkey | |--JOIN BUILD @@ -3472,9 +3476,11 @@ PLAN-ROOT SINK | | | 01:SCAN HDFS [tpch.part] | partitions=1/1 files=1 size=22.83MB +| predicates: p_size >= 1 | 00:SCAN HDFS [tpch.lineitem] partitions=1/1 files=1 size=718.94MB + predicates: l_shipmode IN ('AIR', 'AIR REG'), l_shipinstruct = 'DELIVER IN PERSON' runtime filters: RF000 -> l_partkey ==== # TPCH-Q20 http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test index a5e6e07..a5f2511 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test @@ -1074,11 +1074,14 @@ PLAN-ROOT SINK | 02:HASH JOIN [INNER JOIN] | hash predicates: l_partkey = p_partkey -| other predicates: ((p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND l_quantity >= 1 AND l_quantity <= 11 AND p_size >= 1 AND p_size <= 5 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND l_quantity >= 10 AND l_quantity <= 20 AND p_size >= 1 AND p_size <= 10 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND l_quantity >= 20 AND l_quantity <= 30 AND p_size >= 1 AND p_size <= 15 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON')) +| other predicates: ((p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND l_quantity >= 1 AND l_quantity <= 11 AND p_size <= 5) OR (p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND l_quantity >= 10 AND l_quantity <= 20 AND p_size <= 10) OR (p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND l_quantity >= 20 AND l_quantity <= 30 AND p_size <= 15)) | |--01:SCAN KUDU [tpch_kudu.part] +| kudu predicates: p_size >= 1 | 00:SCAN KUDU [tpch_kudu.lineitem] + predicates: l_shipmode IN ('AIR', 'AIR REG') + kudu predicates: l_shipinstruct = 'DELIVER IN PERSON' ==== # Q20 - Potential Part Promotion Query select http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test index 0c3dd9b..31e5d40 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test @@ -632,7 +632,7 @@ PLAN-ROOT SINK | output: sum(l_extendedprice * l_discount) | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems] - partitions=1/1 files=4 size=292.35MB + partitions=1/1 files=4 size=577.87MB predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01' ---- DISTRIBUTEDPLAN PLAN-ROOT SINK @@ -646,7 +646,7 @@ PLAN-ROOT SINK | output: sum(l_extendedprice * l_discount) | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems] - partitions=1/1 files=4 size=292.35MB + partitions=1/1 files=4 size=577.87MB predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01' ==== # TPCH-Q7 @@ -2117,14 +2117,16 @@ PLAN-ROOT SINK | 02:HASH JOIN [INNER JOIN] | hash predicates: l_partkey = p_partkey -| other predicates: ((p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND l_quantity >= 1 AND l_quantity <= 11 AND p_size >= 1 AND p_size <= 5 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND l_quantity >= 10 AND l_quantity <= 20 AND p_size >= 1 AND p_size <= 10 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND l_quantity >= 20 AND l_quantity <= 30 AND p_size >= 1 AND p_size <= 15 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON')) +| other predicates: ((p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND l_quantity >= 1 AND l_quantity <= 11 AND p_size <= 5) OR (p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND l_quantity >= 10 AND l_quantity <= 20 AND p_size <= 10) OR (p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND l_quantity >= 20 AND l_quantity <= 30 AND p_size <= 15)) | runtime filters: RF000 <- p_partkey | |--01:SCAN HDFS [tpch_nested_parquet.part p] | partitions=1/1 files=1 size=6.20MB +| predicates: p_size >= 1 | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] partitions=1/1 files=4 size=577.87MB + predicates: l_shipmode IN ('AIR', 'AIR REG'), l_shipinstruct = 'DELIVER IN PERSON' runtime filters: RF000 -> l_partkey ---- DISTRIBUTEDPLAN PLAN-ROOT SINK @@ -2139,16 +2141,18 @@ PLAN-ROOT SINK | 02:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: l_partkey = p_partkey -| other predicates: ((p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND l_quantity >= 1 AND l_quantity <= 11 AND p_size >= 1 AND p_size <= 5 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND l_quantity >= 10 AND l_quantity <= 20 AND p_size >= 1 AND p_size <= 10 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND l_quantity >= 20 AND l_quantity <= 30 AND p_size >= 1 AND p_size <= 15 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON')) +| other predicates: ((p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND l_quantity >= 1 AND l_quantity <= 11 AND p_size <= 5) OR (p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND l_quantity >= 10 AND l_quantity <= 20 AND p_size <= 10) OR (p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND l_quantity >= 20 AND l_quantity <= 30 AND p_size <= 15)) | runtime filters: RF000 <- p_partkey | |--04:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [tpch_nested_parquet.part p] | partitions=1/1 files=1 size=6.20MB +| predicates: p_size >= 1 | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] partitions=1/1 files=4 size=577.87MB + predicates: l_shipmode IN ('AIR', 'AIR REG'), l_shipinstruct = 'DELIVER IN PERSON' runtime filters: RF000 -> l_partkey ==== # TPCH-Q20 http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0aeb6805/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test index de442b2..c927b8b 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test @@ -1253,14 +1253,16 @@ PLAN-ROOT SINK | 02:HASH JOIN [INNER JOIN] | hash predicates: tpch.lineitem.l_partkey = tpch.part.p_partkey -| other predicates: ((tpch.part.p_brand = 'Brand#12' AND tpch.part.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND tpch.lineitem.l_quantity >= 1 AND tpch.lineitem.l_quantity <= 11 AND tpch.part.p_size >= 1 AND tpch.part.p_size <= 5 AND tpch.lineitem.l_shipmode IN ('AIR', 'AIR REG') AND tpch.lineitem.l_shipinstruct = 'DELIVER IN PERSON') OR (tpch.part.p_brand = 'Brand#23' AND tpch.part.p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND tpch.lineitem.l_quantity >= 10 AND tpch.lineitem.l_quantity <= 20 AND tpch.part.p_size >= 1 AND tpch.part.p_size <= 10 AND tpch.lineitem.l_shipmode IN ('AIR', 'AIR REG') AND tpch.lineitem.l_shipinstruct = 'DELIVER IN PERSON') OR (tpch.part.p_brand = 'Brand#34' AND tpch.part.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND tpch.lineitem.l_quantity >= 20 AND tpch.lineitem.l_quantity <= 30 AND tpch.part.p_size >= 1 AND tpch.part.p_size <= 15 AND tpch.lineitem.l_shipmode IN ('AIR', 'AIR REG') AND tpch.lineitem.l_sh ipinstruct = 'DELIVER IN PERSON')) +| other predicates: ((tpch.part.p_brand = 'Brand#12' AND tpch.part.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND tpch.lineitem.l_quantity >= 1 AND tpch.lineitem.l_quantity <= 11 AND tpch.part.p_size <= 5) OR (tpch.part.p_brand = 'Brand#23' AND tpch.part.p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND tpch.lineitem.l_quantity >= 10 AND tpch.lineitem.l_quantity <= 20 AND tpch.part.p_size <= 10) OR (tpch.part.p_brand = 'Brand#34' AND tpch.part.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND tpch.lineitem.l_quantity >= 20 AND tpch.lineitem.l_quantity <= 30 AND tpch.part.p_size <= 15)) | runtime filters: RF000 <- tpch.part.p_partkey | |--01:SCAN HDFS [tpch.part] | partitions=1/1 files=1 size=22.83MB +| predicates: tpch.part.p_size >= 1 | 00:SCAN HDFS [tpch.lineitem] partitions=1/1 files=1 size=718.94MB + predicates: tpch.lineitem.l_shipmode IN ('AIR', 'AIR REG'), tpch.lineitem.l_shipinstruct = 'DELIVER IN PERSON' runtime filters: RF000 -> tpch.lineitem.l_partkey ==== # TPCH-Q20
