IMPALA-4423: Correct but conservative implementation of Subquery.equals(). The underlying problem was for trivial/constant [NOT] EXISTS subqueries we substituted out Subqueries with bool literals using an ExprSubstitutionMap, but the Subquery.equals() function was not implemented properly, so we ended up matching Subqueries to the wrong entry in the ExprSubstitutionMap. This could ultimately lead to wrong plans and results.
Testing: Corrected an existing test and modified an existing test for extra coverage. Change-Id: I5562d98ce36507aa5e253323e184fd42b54f27ed Reviewed-on: http://gerrit.cloudera.org:8080/4923 Reviewed-by: Alex Behm <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/c5f49ec9 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/c5f49ec9 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/c5f49ec9 Branch: refs/heads/master Commit: c5f49ec9bbc1b191545b6edd2848acb632b45973 Parents: 14891fe Author: Alex Behm <[email protected]> Authored: Wed Nov 2 10:54:32 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Fri Nov 4 00:19:33 2016 +0000 ---------------------------------------------------------------------- .../org/apache/impala/analysis/Subquery.java | 17 +++++++++-- .../queries/PlannerTest/subquery-rewrite.test | 32 +++++++++++++++++--- 2 files changed, 41 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c5f49ec9/fe/src/main/java/org/apache/impala/analysis/Subquery.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/Subquery.java b/fe/src/main/java/org/apache/impala/analysis/Subquery.java index f3dcb7c..414b98d 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Subquery.java +++ b/fe/src/main/java/org/apache/impala/analysis/Subquery.java @@ -20,14 +20,14 @@ package org.apache.impala.analysis; import java.util.ArrayList; import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import org.apache.impala.catalog.ArrayType; import org.apache.impala.catalog.StructField; import org.apache.impala.catalog.StructType; import org.apache.impala.common.AnalysisException; import org.apache.impala.thrift.TExprNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -149,6 +149,17 @@ public class Subquery extends Expr { return new StructType(structFields); } + /** + * Returns true if the toSql() of the Subqueries is identical. May return false for + * equivalent statements even due to minor syntactic differences like parenthesis. + * TODO: Switch to a less restrictive implementation. + */ + @Override + public boolean equals(Object o) { + if (!super.equals(o)) return false; + return stmt_.toSql().equals(((Subquery)o).stmt_.toSql()); + } + @Override public Subquery clone() { return new Subquery(this); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c5f49ec9/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test index 1f52597..0880d9f 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test @@ -1732,8 +1732,8 @@ PLAN-ROOT SINK partitions=11/11 files=11 size=814.73KB runtime filters: RF001 -> tt1.int_col ==== -# Correlated EXISTS and NOT EXISTS subqueries with limit 0 and -# aggregates. All predicates evaluate to FALSE. (IMPALA-1550) +# IMPALA-1550/IMPALA-4423: Correlated EXISTS and NOT EXISTS subqueries with aggregates +# that can be evaluated at query compile time. All predicates evaluate to FALSE. select 1 from functional.alltypestiny t1 where exists @@ -1744,13 +1744,17 @@ and not exists (select count(distinct int_col) from functional.alltypesagg t3 where t1.id = t3.id) +and not exists + (select min(int_col) + from functional.alltypestiny t5 + where t1.id = t5.id and false) ---- PLAN PLAN-ROOT SINK | 00:EMPTYSET ==== -# Correlated EXISTS and NOT EXISTS subqueries with limit 0 and -# aggregates. All predicates evaluate to TRUE. (IMPALA-1550) +# IMPALA-1550/IMPALA-4423: Correlated EXISTS and NOT EXISTS subqueries with aggregates +# that can be evaluated at query compile time. All predicates evaluate to TRUE. select 1 from functional.alltypestiny t1 where not exists @@ -1768,7 +1772,7 @@ and not exists and not exists (select min(int_col) from functional.alltypestiny t5 - where t1.id = t5.id and false) + where t1.id = t5.id having false) ---- PLAN PLAN-ROOT SINK | @@ -2031,3 +2035,21 @@ PLAN-ROOT SINK predicates: 20 >= t.bigint_col, t.string_col <= t.date_string_col runtime filters: RF000 -> id ==== +# IMPALA-4423: Correlated EXISTS and NOT EXISTS subqueries with aggregates. Both +# subqueries can be evaluated at query compile time. The first one evaluates to +# TRUE and the second one to FALSE. +select 1 +from functional.alltypestiny t1 +where not exists + (select id + from functional.alltypes t2 + where t1.int_col = t2.int_col limit 0) +and not exists + (select min(int_col) + from functional.alltypestiny t5 + where t1.id = t5.id and false) +---- PLAN +PLAN-ROOT SINK +| +00:EMPTYSET +====
