Repository: incubator-impala Updated Branches: refs/heads/master 41e3055f9 -> fec05231b
IMPALA-5689: Avoid inverting non-equi left joins When checking if a join can be inverted, we forgot to also check that the resulting join would not be a non-equi right semi-join or a non-equi right outer-join. We currently do not support those kinds of joins in the backend. Testing: -Added a planner test Change-Id: I91ba66fe30139fcd44d4615a142f183266800aab Reviewed-on: http://gerrit.cloudera.org:8080/7476 Reviewed-by: Taras Bobrovytsky <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/9bfcd78b Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/9bfcd78b Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/9bfcd78b Branch: refs/heads/master Commit: 9bfcd78bdabe41c22d69afc5684e6bc6f73143af Parents: 41e3055 Author: Taras Bobrovytsky <[email protected]> Authored: Thu Jul 20 15:18:02 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Fri Jul 28 11:24:21 2017 +0000 ---------------------------------------------------------------------- .../org/apache/impala/planner/JoinNode.java | 24 ++++++-- .../java/org/apache/impala/planner/Planner.java | 9 +-- .../queries/PlannerTest/nested-loop-join.test | 58 ++++++++++++++++++++ 3 files changed, 80 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9bfcd78b/fe/src/main/java/org/apache/impala/planner/JoinNode.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/planner/JoinNode.java b/fe/src/main/java/org/apache/impala/planner/JoinNode.java index 5e0beb4..04dc40a 100644 --- a/fe/src/main/java/org/apache/impala/planner/JoinNode.java +++ b/fe/src/main/java/org/apache/impala/planner/JoinNode.java @@ -170,6 +170,25 @@ public abstract class JoinNode extends PlanNode { } } + /** + * Returns true if the join node can be inverted. Inversions are not allowed + * in the following cases: + * 1. Straight join. + * 2. The operator is a null-aware left anti-join. There is no backend support + * for a null-aware right anti-join because we cannot execute it efficiently. + * 3. In the case of a distributed plan, the resulting join is a non-equi right + * semi-join or a non-equi right outer-join. There is no backend support. + */ + public boolean isInvertible(boolean isLocalPlan) { + if (isStraightJoin()) return false; + if (joinOp_.isNullAwareLeftAntiJoin()) return false; + if (isLocalPlan) return true; + if (!eqJoinConjuncts_.isEmpty()) return true; + if (joinOp_.isLeftOuterJoin()) return false; + if (joinOp_.isLeftSemiJoin()) return false; + return true; + } + public JoinOperator getJoinOp() { return joinOp_; } public List<BinaryPredicate> getEqJoinConjuncts() { return eqJoinConjuncts_; } public List<Expr> getOtherJoinConjuncts() { return otherJoinConjuncts_; } @@ -615,11 +634,6 @@ public abstract class JoinNode extends PlanNode { for (BinaryPredicate p: eqJoinConjuncts_) p.reverse(); } - public boolean hasConjuncts() { - return !eqJoinConjuncts_.isEmpty() || !otherJoinConjuncts_.isEmpty() || - !conjuncts_.isEmpty(); - } - @Override protected String getDisplayLabelDetail() { StringBuilder output = new StringBuilder(joinOp_.toString()); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9bfcd78b/fe/src/main/java/org/apache/impala/planner/Planner.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/planner/Planner.java b/fe/src/main/java/org/apache/impala/planner/Planner.java index c202094..4cfd57e 100644 --- a/fe/src/main/java/org/apache/impala/planner/Planner.java +++ b/fe/src/main/java/org/apache/impala/planner/Planner.java @@ -427,12 +427,9 @@ public class Planner { JoinNode joinNode = (JoinNode) root; JoinOperator joinOp = joinNode.getJoinOp(); - // 1. No inversion allowed due to straight join. - // 2. The null-aware left anti-join operator is not considered for inversion. - // There is no backend support for a null-aware right anti-join because - // we cannot execute it efficiently. - if (joinNode.isStraightJoin() || joinOp.isNullAwareLeftAntiJoin()) { - // Re-compute tuple ids since their order must correspond to the order of children. + if (!joinNode.isInvertible(isLocalPlan)) { + // Re-compute tuple ids since their order must correspond to the order + // of children. root.computeTupleIds(); return; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9bfcd78b/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test b/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test index fecec7f..af3d101 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test @@ -214,3 +214,61 @@ PLAN-ROOT SINK partitions=1/1 files=0 size=0B predicates: e.id < 10 ==== +# IMPALA-5689: Do not invert a left outer join with no equi-join predicates. +select * from ( + select distinct int_col + from functional.alltypes) t1 +left outer join functional.alltypes t2 on (t2.bigint_col=5) +---- DISTRIBUTEDPLAN +PLAN-ROOT SINK +| +07:EXCHANGE [UNPARTITIONED] +| +03:NESTED LOOP JOIN [LEFT OUTER JOIN, BROADCAST] +| +|--06:EXCHANGE [BROADCAST] +| | +| 02:SCAN HDFS [functional.alltypes t2] +| partitions=24/24 files=24 size=478.45KB +| predicates: (t2.bigint_col = 5) +| +05:AGGREGATE [FINALIZE] +| group by: int_col +| +04:EXCHANGE [HASH(int_col)] +| +01:AGGREGATE [STREAMING] +| group by: int_col +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# IMPALA-5689: Do not invert a left semi join with no equi-join predicates. +select * from ( + select distinct int_col + from functional.alltypes) t1 +left semi join functional.alltypes t2 on (t2.bigint_col=5) +---- DISTRIBUTEDPLAN +PLAN-ROOT SINK +| +07:EXCHANGE [UNPARTITIONED] +| +03:NESTED LOOP JOIN [LEFT SEMI JOIN, BROADCAST] +| +|--06:EXCHANGE [BROADCAST] +| | +| 02:SCAN HDFS [functional.alltypes t2] +| partitions=24/24 files=24 size=478.45KB +| predicates: (t2.bigint_col = 5) +| +05:AGGREGATE [FINALIZE] +| group by: int_col +| +04:EXCHANGE [HASH(int_col)] +| +01:AGGREGATE [STREAMING] +| group by: int_col +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +====
