IMPALA-4423: Correct but conservative implementation of Subquery.equals().

The underlying problem was for trivial/constant [NOT] EXISTS subqueries
we substituted out Subqueries with bool literals using an ExprSubstitutionMap,
but the Subquery.equals() function was not implemented properly, so we ended
up matching Subqueries to the wrong entry in the ExprSubstitutionMap.
This could ultimately lead to wrong plans and results.

Testing: Corrected an existing test and modified an existing test for
extra coverage.

Change-Id: I5562d98ce36507aa5e253323e184fd42b54f27ed
Reviewed-on: http://gerrit.cloudera.org:8080/4923
Reviewed-by: Alex Behm <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/c5f49ec9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/c5f49ec9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/c5f49ec9

Branch: refs/heads/master
Commit: c5f49ec9bbc1b191545b6edd2848acb632b45973
Parents: 14891fe
Author: Alex Behm <[email protected]>
Authored: Wed Nov 2 10:54:32 2016 -0700
Committer: Internal Jenkins <[email protected]>
Committed: Fri Nov 4 00:19:33 2016 +0000

----------------------------------------------------------------------
 .../org/apache/impala/analysis/Subquery.java    | 17 +++++++++--
 .../queries/PlannerTest/subquery-rewrite.test   | 32 +++++++++++++++++---
 2 files changed, 41 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c5f49ec9/fe/src/main/java/org/apache/impala/analysis/Subquery.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/Subquery.java 
b/fe/src/main/java/org/apache/impala/analysis/Subquery.java
index f3dcb7c..414b98d 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Subquery.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Subquery.java
@@ -20,14 +20,14 @@ package org.apache.impala.analysis;
 import java.util.ArrayList;
 
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.impala.catalog.ArrayType;
 import org.apache.impala.catalog.StructField;
 import org.apache.impala.catalog.StructType;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.thrift.TExprNode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
@@ -149,6 +149,17 @@ public class Subquery extends Expr {
     return new StructType(structFields);
   }
 
+  /**
+   * Returns true if the toSql() of the Subqueries is identical. May return 
false for
+   * equivalent statements even due to minor syntactic differences like 
parenthesis.
+   * TODO: Switch to a less restrictive implementation.
+   */
+  @Override
+  public boolean equals(Object o) {
+    if (!super.equals(o)) return false;
+    return stmt_.toSql().equals(((Subquery)o).stmt_.toSql());
+  }
+
   @Override
   public Subquery clone() { return new Subquery(this); }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c5f49ec9/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
index 1f52597..0880d9f 100644
--- 
a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
@@ -1732,8 +1732,8 @@ PLAN-ROOT SINK
    partitions=11/11 files=11 size=814.73KB
    runtime filters: RF001 -> tt1.int_col
 ====
-# Correlated EXISTS and NOT EXISTS subqueries with limit 0 and
-# aggregates. All predicates evaluate to FALSE. (IMPALA-1550)
+# IMPALA-1550/IMPALA-4423: Correlated EXISTS and NOT EXISTS subqueries with 
aggregates
+# that can be evaluated at query compile time. All predicates evaluate to 
FALSE.
 select 1
 from functional.alltypestiny t1
 where exists
@@ -1744,13 +1744,17 @@ and not exists
   (select count(distinct int_col)
    from functional.alltypesagg t3
    where t1.id = t3.id)
+and not exists
+  (select min(int_col)
+   from functional.alltypestiny t5
+   where t1.id = t5.id and false)
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:EMPTYSET
 ====
-# Correlated EXISTS and NOT EXISTS subqueries with limit 0 and
-# aggregates. All predicates evaluate to TRUE. (IMPALA-1550)
+# IMPALA-1550/IMPALA-4423: Correlated EXISTS and NOT EXISTS subqueries with 
aggregates
+# that can be evaluated at query compile time. All predicates evaluate to TRUE.
 select 1
 from functional.alltypestiny t1
 where not exists
@@ -1768,7 +1772,7 @@ and not exists
 and not exists
   (select min(int_col)
    from functional.alltypestiny t5
-   where t1.id = t5.id and false)
+   where t1.id = t5.id having false)
 ---- PLAN
 PLAN-ROOT SINK
 |
@@ -2031,3 +2035,21 @@ PLAN-ROOT SINK
    predicates: 20 >= t.bigint_col, t.string_col <= t.date_string_col
    runtime filters: RF000 -> id
 ====
+# IMPALA-4423: Correlated EXISTS and NOT EXISTS subqueries with aggregates. 
Both
+# subqueries can be evaluated at query compile time. The first one evaluates to
+# TRUE and the second one to FALSE.
+select 1
+from functional.alltypestiny t1
+where not exists
+  (select id
+   from functional.alltypes t2
+   where t1.int_col = t2.int_col limit 0)
+and not exists
+  (select min(int_col)
+   from functional.alltypestiny t5
+   where t1.id = t5.id and false)
+---- PLAN
+PLAN-ROOT SINK
+|
+00:EMPTYSET
+====

Reply via email to