IMPALA-5856: Fix outer join predicate assignment. Fixes incorrect assignment of join predicates with the following properties: - from the On-clause of a left outer join - only references the left-hand side tuples (not the right hand side tuple) - references full-outer joined tuples; the full outer join appears on the left
Testing: - a core/hdfs run passed - added new regression test Change-Id: I93db34d988cb66e00aa05d7dc161e0ca47042acb Reviewed-on: http://gerrit.cloudera.org:8080/8039 Reviewed-by: Alex Behm <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/032dee28 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/032dee28 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/032dee28 Branch: refs/heads/master Commit: 032dee28de2fe95f80d9284eb41b64fd12a56c86 Parents: 50d603d Author: Alex Behm <[email protected]> Authored: Mon Sep 11 10:33:39 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Fri Sep 15 02:26:49 2017 +0000 ---------------------------------------------------------------------- .../org/apache/impala/analysis/Analyzer.java | 18 ++++++++--- .../queries/PlannerTest/outer-joins.test | 33 ++++++++++++++++++++ 2 files changed, 46 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/032dee28/fe/src/main/java/org/apache/impala/analysis/Analyzer.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index 365091d..47acedc 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -83,7 +83,6 @@ import org.slf4j.LoggerFactory; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.base.Predicates; -import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -1223,6 +1222,10 @@ public class Analyzer { return result; } + public TableRef getOjRef(Expr e) { + return globalState_.ojClauseByConjunct.get(e.getId()); + } + public boolean isOjConjunct(Expr e) { return globalState_.ojClauseByConjunct.containsKey(e.getId()); } @@ -1375,9 +1378,14 @@ public class Analyzer { * evaluate 'e' at a node materializing 'tids'. Returns true otherwise. */ public boolean canEvalFullOuterJoinedConjunct(Expr e, List<TupleId> tids) { - TableRef fullOuterJoin = getFullOuterJoinRef(e); - if (fullOuterJoin == null) return true; - return tids.containsAll(fullOuterJoin.getAllTableRefIds()); + TableRef fullOjRef = getFullOuterJoinRef(e); + if (fullOjRef == null) return true; + // 'ojRef' represents the outer-join On-clause that 'e' originates from (if any). + // Might be the same as 'fullOjRef'. If different from 'fullOjRef' it means that + // 'e' should be assigned to the node materializing the 'ojRef' tuple ids. + TableRef ojRef = getOjRef(e); + TableRef targetRef = (ojRef != null && ojRef != fullOjRef) ? ojRef : fullOjRef; + return tids.containsAll(targetRef.getAllTableRefIds()); } /** @@ -1385,7 +1393,7 @@ public class Analyzer { * evaluate 'e' at a node materializing 'tids'. Returns true otherwise. */ public boolean canEvalOuterJoinedConjunct(Expr e, List<TupleId> tids) { - TableRef outerJoin = globalState_.ojClauseByConjunct.get(e.getId()); + TableRef outerJoin = getOjRef(e); if (outerJoin == null) return true; return tids.containsAll(outerJoin.getAllTableRefIds()); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/032dee28/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test index 5b82c2d..51989cd 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test @@ -1016,3 +1016,36 @@ PLAN-ROOT SINK 00:SCAN HDFS [functional.alltypes a] partitions=24/24 files=24 size=478.45KB ==== +# IMPALA-5856: Test correct assignment of a join predicate with the following properties: +# - from the On-clause of a left outer join +# - only references the left-hand side tuples (not the right hand side tuple) +# - references full-outer joined tuples; the full outer join appears on the left +select * from functional.alltypes t1 +full outer join functional.alltypessmall t2 + on t1.id = t2.id +left outer join functional.alltypestiny t3 + on coalesce(t1.id, t2.id) = t3.id and coalesce(t1.int_col, t2.int_col) = 2 + and t1.bigint_col > 10 and t2.bigint_col > 30 +where concat(t1.string_col, t2.string_col) = 'test1' and t3.string_col = 'test2' +---- PLAN +PLAN-ROOT SINK +| +04:HASH JOIN [LEFT OUTER JOIN] +| hash predicates: coalesce(t1.id, t2.id) = t3.id +| other join predicates: t1.bigint_col > 10, t2.bigint_col > 30, coalesce(t1.int_col, t2.int_col) = 2 +| other predicates: t3.string_col = 'test2' +| +|--02:SCAN HDFS [functional.alltypestiny t3] +| partitions=4/4 files=4 size=460B +| predicates: t3.string_col = 'test2' +| +03:HASH JOIN [FULL OUTER JOIN] +| hash predicates: t1.id = t2.id +| other predicates: concat(t1.string_col, t2.string_col) = 'test1' +| +|--01:SCAN HDFS [functional.alltypessmall t2] +| partitions=4/4 files=4 size=6.32KB +| +00:SCAN HDFS [functional.alltypes t1] + partitions=24/24 files=24 size=478.45KB +====
