This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 130a55e5269ea023b43ef2c0b495989cb0759800 Author: Abhishek Rawat <[email protected]> AuthorDate: Wed Jun 1 17:50:20 2022 -0700 IMPALA-11284: Do non-optional rewrites for || and Between predicate IMPALA-6590 disabled expression rewrites for ValuesStmt. However, CompoundVerticalBarExpr (||) cannot be executed directly without rewrite. This is because it could either be an OR operation with boolean arguments or CONCAT function call with string arguments. Backend cannot evaluate a BetweenPredicate and relies on rewriting BetweenPredicate into a conjunctive or disjunctive CompoundPredicate. This patch enables non-optional expression rewrites for ValuesStmt with CompoundVerticalBarExpr or BetweenPredicate. Testing: - Extended ExprRewriterTest and Planner test to have values clause with || and Between predicate Change-Id: I99b8b33bf6468d12b9e26f0a6e744feb7072619c Reviewed-on: http://gerrit.cloudera.org:8080/18581 Reviewed-by: Michael Smith <[email protected]> Reviewed-by: Daniel Becker <[email protected]> Tested-by: Riza Suminto <[email protected]> --- .../java/org/apache/impala/analysis/ValuesStmt.java | 18 ++++++++++++++---- .../java/org/apache/impala/rewrite/ExprRewriter.java | 7 +++++++ .../org/apache/impala/analysis/ExprRewriterTest.java | 16 ++++++++++++++-- .../functional-query/queries/QueryTest/values.test | 20 +++++++++++++++++++- 4 files changed, 54 insertions(+), 7 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java b/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java index 6231497dc..d6a3d7348 100644 --- a/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java @@ -17,13 +17,16 @@ package org.apache.impala.analysis; +import java.util.Arrays; import java.util.List; import com.google.common.base.Preconditions; import static org.apache.impala.analysis.ToSqlOptions.DEFAULT; import org.apache.impala.common.AnalysisException; +import org.apache.impala.rewrite.BetweenToCompoundRule; import org.apache.impala.rewrite.ExprRewriter; +import org.apache.impala.rewrite.ExtractCompoundVerticalBarExprRule; /** * Representation of a values() statement with a list of constant-expression lists. @@ -84,11 +87,18 @@ public class ValuesStmt extends UnionStmt { @Override public ValuesStmt clone() { return new ValuesStmt(this); } - /** - * Intentionally left empty to disable expression rewrite for values clause. - */ @Override - public void rewriteExprs(ExprRewriter rewriter) {} + public void rewriteExprs(ExprRewriter rewriter) throws AnalysisException { + // IMPALA-11284: Expression rewrites for VALUES() could result in performance + // regression since overhead can be huge and there is virtually no benefit of + // rewrite if the expression will only ever be evaluated once (IMPALA-6590). + // The following code only does the non-optional rewrites for || and BETWEEN + // operator as the backend cannot execute them directly. + ExprRewriter mandatoryRewriter = new ExprRewriter(Arrays.asList( + BetweenToCompoundRule.INSTANCE, ExtractCompoundVerticalBarExprRule.INSTANCE)); + super.rewriteExprs(mandatoryRewriter); + rewriter.addNumChanges(mandatoryRewriter); + } @Override protected boolean shouldAvoidLossyCharPadding(Analyzer analyzer) { diff --git a/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java b/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java index 37c092733..d30531ac6 100644 --- a/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java +++ b/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java @@ -91,6 +91,13 @@ public class ExprRewriter { for (int i = 0; i < exprs.size(); ++i) exprs.set(i, rewrite(exprs.get(i), analyzer)); } + /** + * Add numChanges_ of otherRewriter to this rewriter's numChanges_. + */ + public void addNumChanges(ExprRewriter otherRewriter) { + numChanges_ += otherRewriter.numChanges_; + } + public void reset() { numChanges_ = 0; } public boolean changed() { return numChanges_ > 0; } public int getNumChanges() { return numChanges_; } diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java index 9880db3b2..4c9457a39 100644 --- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java @@ -149,8 +149,20 @@ public class ExprRewriterTest extends AnalyzerTest { stmt_, stmt_), 47, 23); // Constant select. RewritesOk("select 1, 2, 3, 4", 4, 4); - // Values stmt - expression rewrites are disabled. - RewritesOk("values(1, '2', 3, 4.1), (1, '2', 3, 4.1)", 0, 0); + // Values stmt - expression rewrites are not required in this test cases. + RewritesOk("values(1, '2', 3, 4.1), (1, '2', 3, 4.1)," + + "(CAST(true OR false AS INT), '2', 3*1+2-4, 1.1%1)", + 0, 0); + RewritesOk("values(CONCAT('a', 'b'), true OR true)", 0, 0); + // Values stmt - expression rewrites are required for || and Between predicate. + RewritesOk("values(1 <= 2 || 'impala' <> 'IMPALA'), (0.5 BETWEEN 0 AND 1)," + + "('a' NOT BETWEEN 'b' AND 'c')", + 3, 0); + // Values stmt - expression rewrites are required for || and Between predicate that + // is not at root Expr. + RewritesOk("values(1 <= 2 AND ((0.5 BETWEEN 0 AND 1) AND " + + "(('a' || 'b') = 'ab' AND (true || false))))", + 3, 0); // Test WHERE-clause subqueries. RewritesOk("select id, int_col from functional.alltypes a " + "where exists (select 1 from functional.alltypes " + diff --git a/testdata/workloads/functional-query/queries/QueryTest/values.test b/testdata/workloads/functional-query/queries/QueryTest/values.test index bfda103b2..f19830dbe 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/values.test +++ b/testdata/workloads/functional-query/queries/QueryTest/values.test @@ -140,4 +140,22 @@ select cast("0.43149576573887316" as double) 0.43149576573887316 ---- TYPES DOUBLE -==== \ No newline at end of file +==== +---- QUERY +# IMPALA-11284: Don't skip rewrites for || and BETWEEN operator as the backend cannot +# execute them directly. +select * from +( + values (concat("a", "b" || "c"), 1 <= 2 AND ((0.5 BETWEEN 0 AND 1) AND (true || false))), + ("hello" || "world", 0 <= 1 || 0.5 < 0.6), + ("impala", 4.0 BETWEEN 3.2 AND 4.1), + ("sql", 'a' NOT BETWEEN 'b' AND 'c') +) t; +---- RESULTS +'abc',true +'helloworld',true +'impala',true +'sql',true +---- TYPES +string,boolean +====
