Repository: incubator-impala Updated Branches: refs/heads/master baf8fe202 -> 45740c8bc
IMPALA-3678: Fix migration of predicates into union operands with an order by + limit. There were two separate issues: First, the SortNode incorrectly picked up unassigned conjuncts, and expected those to be empty. In this case where predicates are migrated into union operands, there could actually be unassigned conjuncts bound by the SortNode's tuple id (and so would be incorrectly picked up). The fix is to not pick up unassigned conjuncts in the SortNode, and allow them to be picked up later (into a SelectNode). Second, when generating the plan for union operands we were missing a call to graft a SelectNode on top of the operand plan to capture unassigned conjuncts. Change-Id: I95d105ac15a3dc975e52dfd418890e13f912dfce Reviewed-on: http://gerrit.cloudera.org:8080/3600 Reviewed-by: Alex Behm <[email protected]> Tested-by: Alex Behm <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/45740c8b Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/45740c8b Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/45740c8b Branch: refs/heads/master Commit: 45740c8bcc05fb5075dd663c81de1cfc69ee5f1b Parents: 6ee15fa Author: Alex Behm <[email protected]> Authored: Fri Jul 8 14:24:04 2016 -0700 Committer: Taras Bobrovytsky <[email protected]> Committed: Fri Jul 15 18:27:05 2016 +0000 ---------------------------------------------------------------------- .../impala/planner/SingleNodePlanner.java | 3 + .../com/cloudera/impala/planner/SortNode.java | 3 +- .../queries/PlannerTest/union.test | 84 +++++++++++++++++++- 3 files changed, 88 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/45740c8b/fe/src/main/java/com/cloudera/impala/planner/SingleNodePlanner.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/planner/SingleNodePlanner.java b/fe/src/main/java/com/cloudera/impala/planner/SingleNodePlanner.java index 9f07edb..84552e0 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/SingleNodePlanner.java +++ b/fe/src/main/java/com/cloudera/impala/planner/SingleNodePlanner.java @@ -1595,6 +1595,9 @@ public class SingleNodePlanner { } } PlanNode opPlan = createQueryPlan(queryStmt, op.getAnalyzer(), false); + // There may still be unassigned conjuncts if the operand has an order by + limit. + // Place them into a SelectNode on top of the operand's plan. + opPlan = addUnassignedConjuncts(analyzer, opPlan.getTupleIds(), opPlan); if (opPlan instanceof EmptySetNode) continue; unionNode.addChild(opPlan, op.getQueryStmt().getBaseTblResultExprs()); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/45740c8b/fe/src/main/java/com/cloudera/impala/planner/SortNode.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/planner/SortNode.java b/fe/src/main/java/com/cloudera/impala/planner/SortNode.java index cd5f58c..6cc0077 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/SortNode.java +++ b/fe/src/main/java/com/cloudera/impala/planner/SortNode.java @@ -89,7 +89,8 @@ public class SortNode extends PlanNode { @Override public void init(Analyzer analyzer) throws InternalException { - assignConjuncts(analyzer); + // Do not assignConjuncts() here, so that conjuncts bound by this SortNode's tuple id + // can be placed in a downstream SelectNode. A SortNode cannot evaluate conjuncts. Preconditions.checkState(conjuncts_.isEmpty()); // Compute the memory layout for the generated tuple. computeMemLayout(analyzer); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/45740c8b/testdata/workloads/functional-planner/queries/PlannerTest/union.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/union.test b/testdata/workloads/functional-planner/queries/PlannerTest/union.test index 86c59c7..1dfbdcc 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/union.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/union.test @@ -2688,4 +2688,86 @@ select l_orderkey from tpch.lineitem UNION DISTINCT (select l_orderkey from tpch | 01:SCAN HDFS [tpch.lineitem] partitions=1/1 files=1 size=718.94MB -==== \ No newline at end of file +==== +# IMPALA-3678: Predicates migrated into a union operand should be placed into +# a SelectNode if that union operand has an order by + limit. +select * from + ((select * from functional.alltypes) + union all + (select * from functional.alltypes order by id) + union all + (select * from functional.alltypessmall order by id limit 10) + union all + (select * from functional.alltypestiny order by id limit 20 offset 10)) v +where v.id < 10 and v.int_col > 20 +---- PLAN +00:UNION +| +|--08:SELECT +| | predicates: id < 10, int_col > 20 +| | +| 07:TOP-N [LIMIT=20 OFFSET=10] +| | order by: id ASC +| | +| 06:SCAN HDFS [functional.alltypestiny] +| partitions=4/4 files=4 size=460B +| +|--05:SELECT +| | predicates: id < 10, int_col > 20 +| | +| 04:TOP-N [LIMIT=10] +| | order by: id ASC +| | +| 03:SCAN HDFS [functional.alltypessmall] +| partitions=4/4 files=4 size=6.32KB +| +|--02:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| predicates: functional.alltypes.id < 10, functional.alltypes.int_col > 20 +| +01:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB + predicates: functional.alltypes.id < 10, functional.alltypes.int_col > 20 +==== +# IMPALA-3678: Same as above but with union distinct. +select * from + ((select * from functional.alltypes) + union distinct + (select * from functional.alltypes order by id) + union distinct + (select * from functional.alltypessmall order by id limit 10) + union distinct + (select * from functional.alltypestiny order by id limit 20 offset 10)) v +where v.id < 10 and v.int_col > 20 +---- PLAN +09:AGGREGATE [FINALIZE] +| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month +| +00:UNION +| +|--08:SELECT +| | predicates: id < 10, int_col > 20 +| | +| 07:TOP-N [LIMIT=20 OFFSET=10] +| | order by: id ASC +| | +| 06:SCAN HDFS [functional.alltypestiny] +| partitions=4/4 files=4 size=460B +| +|--05:SELECT +| | predicates: id < 10, int_col > 20 +| | +| 04:TOP-N [LIMIT=10] +| | order by: id ASC +| | +| 03:SCAN HDFS [functional.alltypessmall] +| partitions=4/4 files=4 size=6.32KB +| +|--02:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| predicates: functional.alltypes.id < 10, functional.alltypes.int_col > 20 +| +01:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB + predicates: functional.alltypes.id < 10, functional.alltypes.int_col > 20 +====
