IMPALA-3861: Replace BetweenPredicates with their equivalent CompoundPredicate.
The bug: Our BetweenPredicate has a complex object structure that is unlike most other Exprs because we generate an equivalent CompoundPredicate during analysis and replace the original children. Keeping the various members in sync and preserving the object structure during clone() and substitute() is very difficult and error prone. In particular, subquery rewriting is difficult because we extract and replace correlated BinaryPredicates. Substituting BinaryPredicates in a BetweenPredicate's children is not equivalent to a substitution on the BetweenPredicat's original children, so keeping the various redundant members in sync is quite difficult. The fix is to replace BetweenPredicates with their equivalent CompoundPredicates before performing subquery rewrites. We ultimately still want to fix clone() and substitute() for BetweenPredicates, but an elegant solution is likely to more involved. Change-Id: I0838b30444ed9704ce6a058d30718a24caa7444a Reviewed-on: http://gerrit.cloudera.org:8080/3804 Reviewed-by: Alex Behm <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ac1215fd Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ac1215fd Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ac1215fd Branch: refs/heads/master Commit: ac1215fd3162bfda3eb87759b7098002837a1307 Parents: b9f6392 Author: Alex Behm <[email protected]> Authored: Thu Jul 28 00:48:49 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Sat Jul 30 07:23:52 2016 +0000 ---------------------------------------------------------------------- .../cloudera/impala/analysis/StmtRewriter.java | 17 ++++++------- .../impala/analysis/AnalyzeSubqueriesTest.java | 21 ++++++++++++++++ .../queries/PlannerTest/subquery-rewrite.test | 25 ++++++++++++++++++++ 3 files changed, 53 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac1215fd/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java b/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java index c3fbc20..de411c8 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java @@ -23,7 +23,6 @@ import org.slf4j.LoggerFactory; import com.cloudera.impala.analysis.AnalysisContext.AnalysisResult; import com.cloudera.impala.analysis.UnionStmt.UnionOperand; import com.cloudera.impala.common.AnalysisException; -import com.cloudera.impala.common.TreeNode; import com.google.common.base.Preconditions; import com.google.common.base.Predicates; import com.google.common.collect.Iterables; @@ -202,9 +201,8 @@ public class StmtRewriter { int numTableRefs = stmt.fromClause_.size(); ArrayList<Expr> exprsWithSubqueries = Lists.newArrayList(); ExprSubstitutionMap smap = new ExprSubstitutionMap(); - // Replace all BetweenPredicates that contain subqueries with their - // equivalent compound predicates. - stmt.whereClause_ = replaceBetweenPredicates(stmt.whereClause_); + // Replace all BetweenPredicates with their equivalent compound predicates. + stmt.whereClause_ = rewriteBetweenPredicates(stmt.whereClause_); // Check if all the conjuncts in the WHERE clause that contain subqueries // can currently be rewritten as a join. for (Expr conjunct: stmt.whereClause_.getConjuncts()) { @@ -275,16 +273,15 @@ public class StmtRewriter { } /** - * Replace all BetweenPredicates containing subqueries with their - * equivalent compound predicates from the expr tree rooted at 'expr'. - * The modified expr tree is returned. + * Replace all BetweenPredicates with their equivalent compound predicates from the + * expr tree rooted at 'expr'. The modified expr tree is returned. */ - private static Expr replaceBetweenPredicates(Expr expr) { - if (expr instanceof BetweenPredicate && expr.contains(Subquery.class)) { + private static Expr rewriteBetweenPredicates(Expr expr) { + if (expr instanceof BetweenPredicate) { return ((BetweenPredicate)expr).getRewrittenPredicate(); } for (int i = 0; i < expr.getChildren().size(); ++i) { - expr.setChild(i, replaceBetweenPredicates(expr.getChild(i))); + expr.setChild(i, rewriteBetweenPredicates(expr.getChild(i))); } return expr; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac1215fd/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java index a5b4fe2..54292cc 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeSubqueriesTest.java @@ -217,6 +217,16 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest { AnalyzesOk(String.format( "select id from functional.allcomplextypes t where id %s " + "(select f1 from t.struct_array_col a where t.int_struct_col.f1 = a.f1)", op)); + // Test correlated BETWEEN predicates. + AnalyzesOk(String.format("select 1 from functional.alltypes t where id %s " + + "(select id from functional.alltypesagg a where " + + " a.tinyint_col between t.tinyint_col and t.smallint_col and " + + " a.smallint_col between 10 and t.int_col and " + + " 20 between t.bigint_col and a.int_col and " + + " t.float_col between a.float_col and a.double_col and " + + " t.string_col between a.string_col and t.date_string_col and " + + " a.double_col between round(acos(t.float_col), 2) " + + " and cast(t.string_col as int))", op)); // Multiple nesting levels (uncorrelated queries) AnalyzesOk(String.format("select * from functional.alltypes t where id %s " + @@ -605,6 +615,17 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest { String.format("Unsupported predicate with subquery: EXISTS (SELECT * FROM " + "functional.alltypesagg a WHERE t.id %s a.id)", cmpOp)); } + // Correlated BETWEEN predicate with relative table refs. + AnalyzesOk("select 1 from functional.allcomplextypes t where exists " + + "(select a.f1 from t.struct_array_col a " + + " where a.f1 between t.int_struct_col.f1 and t.int_struct_col.f2)"); + // Correlated BETWEEN predicate with absolute table refs. + AnalysisError("select 1 from functional.alltypes t where EXISTS " + + "(select id from functional.alltypessmall a " + + " where a.int_col between t.tinyint_col and t.bigint_col)", + "Unsupported predicate with subquery: " + + "EXISTS (SELECT id FROM functional.alltypessmall a " + + "WHERE a.int_col >= t.tinyint_col AND a.int_col <= t.bigint_col)"); // Uncorrelated EXISTS in a query with GROUP BY AnalyzesOk("select id, count(*) from functional.alltypes t " + http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac1215fd/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test index fa21c9e..a04f723 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test @@ -1839,3 +1839,28 @@ where t1.int_col = t2.int_col); partitions=11/11 files=11 size=814.73KB runtime filters: RF000 -> t1.int_col ==== +# IMPALA-3861: Test that IN subqueries with correlated BETWEEN predicates work. +select 1 from functional.alltypes t where id in + (select id from functional.alltypesagg a where + a.tinyint_col between t.tinyint_col and t.smallint_col and + a.smallint_col between 10 and t.int_col and + 20 between t.bigint_col and a.int_col and + t.float_col between a.float_col and a.double_col and + t.string_col between a.string_col and t.date_string_col and + a.double_col between round(acos(t.float_col), 2) + and cast(t.string_col as int)) +---- PLAN +02:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: id = id +| other join predicates: t.float_col >= a.float_col, a.tinyint_col >= t.tinyint_col, t.float_col <= a.double_col, a.smallint_col <= t.int_col, a.tinyint_col <= t.smallint_col, a.double_col <= CAST(t.string_col AS INT), t.string_col >= a.string_col, a.double_col >= round(acos(t.float_col), 2) +| runtime filters: RF000 <- id +| +|--01:SCAN HDFS [functional.alltypesagg a] +| partitions=11/11 files=11 size=814.73KB +| predicates: a.smallint_col >= 10, 20 <= a.int_col +| +00:SCAN HDFS [functional.alltypes t] + partitions=24/24 files=24 size=478.45KB + predicates: 20 >= t.bigint_col, t.string_col <= t.date_string_col + runtime filters: RF000 -> id +====
