Repository: tajo Updated Branches: refs/heads/branch-0.11.0 7c7281de7 -> 7a1a8ce3c
TAJO-1600: Invalid query planning for distinct group-by. Closes #750 Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/7a1a8ce3 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/7a1a8ce3 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/7a1a8ce3 Branch: refs/heads/branch-0.11.0 Commit: 7a1a8ce3c9648b67027b3714ad0eba3641d6ecc2 Parents: 7c7281d Author: Hyunsik Choi <[email protected]> Authored: Thu Sep 10 15:34:48 2015 +0900 Committer: Hyunsik Choi <[email protected]> Committed: Thu Sep 10 15:48:35 2015 +0900 ---------------------------------------------------------------------- CHANGES | 2 + .../tajo/engine/query/TestCaseByCases.java | 7 +++ .../queries/TestCaseByCases/testTAJO_1600.sql | 10 ++++ .../results/TestCaseByCases/testTAJO_1600.plan | 21 ++++++++ .../TestCaseByCases/testTAJO_1600.result | 7 +++ .../org/apache/tajo/plan/LogicalPlanner.java | 46 +++++++++++----- .../apache/tajo/plan/logical/GroupbyNode.java | 12 +++++ .../rewrite/rules/ProjectionPushDownRule.java | 55 +++++++++++++------- 8 files changed, 128 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/7a1a8ce3/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index e6c9844..49c1d45 100644 --- a/CHANGES +++ b/CHANGES @@ -257,6 +257,8 @@ Release 0.11.0 - unreleased BUG FIXES + TAJO-1600: Invalid query planning for distinct group-by. (hyunsik) + TAJO-1782: Check ON_ERROR_STOP flag in TSQL when error is occured. (Contributed by Dongkyu Hwangbo, Committed by jihoon) http://git-wip-us.apache.org/repos/asf/tajo/blob/7a1a8ce3/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java index bcf00f8..b32ba65 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java @@ -180,4 +180,11 @@ public class TestCaseByCases extends QueryTestCaseBase { assertResultSet(res); cleanupQuery(res); } + + @Test + @Option(withExplain = true) + @SimpleTest + public final void testTAJO_1600() throws Exception { + runSimpleTests(); + } } http://git-wip-us.apache.org/repos/asf/tajo/blob/7a1a8ce3/tajo-core-tests/src/test/resources/queries/TestCaseByCases/testTAJO_1600.sql ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/queries/TestCaseByCases/testTAJO_1600.sql b/tajo-core-tests/src/test/resources/queries/TestCaseByCases/testTAJO_1600.sql new file mode 100644 index 0000000..0861283 --- /dev/null +++ b/tajo-core-tests/src/test/resources/queries/TestCaseByCases/testTAJO_1600.sql @@ -0,0 +1,10 @@ +SELECT DISTINCT + c_custkey, + orders.o_orderkey, + orders.o_orderstatus, + orders.o_orderdate +from + customer left outer join orders on c_custkey = o_orderkey +order by + c_custkey, + o_orderkey; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/7a1a8ce3/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.plan ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.plan b/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.plan new file mode 100644 index 0000000..12921cd --- /dev/null +++ b/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.plan @@ -0,0 +1,21 @@ +explain +------------------------------- +SORT(3) + => Sort Keys: default.customer.c_custkey (INT4) (asc),default.orders.o_orderkey (INT4) (asc) + GROUP_BY(5)(c_custkey,o_orderkey,o_orderstatus,o_orderdate) + => target list: default.customer.c_custkey (INT4), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT), default.orders.o_orderdate (TEXT) + => out schema:{(4) default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)} + => in schema:{(4) default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)} + JOIN(7)(LEFT_OUTER) + => Join Cond: default.customer.c_custkey (INT4) = default.orders.o_orderkey (INT4) + => target list: default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT) + => out schema: {(4) default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)} + => in schema: {(4) default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)} + SCAN(1) on default.orders + => target list: default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT) + => out schema: {(3) default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)} + => in schema: {(9) default.orders.o_clerk (TEXT), default.orders.o_comment (TEXT), default.orders.o_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderpriority (TEXT), default.orders.o_orderstatus (TEXT), default.orders.o_shippriority (INT4), default.orders.o_totalprice (FLOAT8)} + SCAN(0) on default.customer + => target list: default.customer.c_custkey (INT4) + => out schema: {(1) default.customer.c_custkey (INT4)} + => in schema: {(8) default.customer.c_acctbal (FLOAT8), default.customer.c_address (TEXT), default.customer.c_comment (TEXT), default.customer.c_custkey (INT4), default.customer.c_mktsegment (TEXT), default.customer.c_name (TEXT), default.customer.c_nationkey (INT4), default.customer.c_phone (TEXT)} http://git-wip-us.apache.org/repos/asf/tajo/blob/7a1a8ce3/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.result ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.result b/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.result new file mode 100644 index 0000000..7cb5166 --- /dev/null +++ b/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.result @@ -0,0 +1,7 @@ +c_custkey,o_orderkey,o_orderstatus,o_orderdate +------------------------------- +1,1,O,1996-01-02 +2,2,O,1996-12-01 +3,3,F,1993-10-14 +4,null,null,null +5,null,null,null http://git-wip-us.apache.org/repos/asf/tajo/blob/7a1a8ce3/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java index 6d40cda..24dcfd5 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java @@ -323,18 +323,40 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex LogicalPlan plan = context.plan; QueryBlock block = context.queryBlock; - Schema outSchema = projectionNode.getOutSchema(); - GroupbyNode dupRemoval = context.plan.createNode(GroupbyNode.class); - dupRemoval.setChild(child); - dupRemoval.setInSchema(projectionNode.getInSchema()); - dupRemoval.setTargets(PlannerUtil.schemaToTargets(outSchema)); - dupRemoval.setGroupingColumns(outSchema.toArray()); - - block.registerNode(dupRemoval); - postHook(context, stack, null, dupRemoval); - - projectionNode.setChild(dupRemoval); - projectionNode.setInSchema(dupRemoval.getOutSchema()); + if (child.getType() == NodeType.SORT) { + SortNode sortNode = (SortNode) child; + + GroupbyNode dupRemoval = context.plan.createNode(GroupbyNode.class); + dupRemoval.setForDistinctBlock(); + dupRemoval.setChild(sortNode.getChild()); + dupRemoval.setInSchema(sortNode.getInSchema()); + dupRemoval.setTargets(PlannerUtil.schemaToTargets(sortNode.getInSchema())); + dupRemoval.setGroupingColumns(sortNode.getInSchema().toArray()); + + block.registerNode(dupRemoval); + postHook(context, stack, null, dupRemoval); + + sortNode.setChild(dupRemoval); + sortNode.setInSchema(dupRemoval.getOutSchema()); + sortNode.setOutSchema(dupRemoval.getOutSchema()); + projectionNode.setInSchema(sortNode.getOutSchema()); + projectionNode.setChild(sortNode); + + } else { + Schema outSchema = projectionNode.getOutSchema(); + GroupbyNode dupRemoval = context.plan.createNode(GroupbyNode.class); + dupRemoval.setForDistinctBlock(); + dupRemoval.setChild(child); + dupRemoval.setInSchema(projectionNode.getInSchema()); + dupRemoval.setTargets(PlannerUtil.schemaToTargets(outSchema)); + dupRemoval.setGroupingColumns(outSchema.toArray()); + + block.registerNode(dupRemoval); + postHook(context, stack, null, dupRemoval); + + projectionNode.setChild(dupRemoval); + projectionNode.setInSchema(dupRemoval.getOutSchema()); + } } private Pair<String [], ExprNormalizer.WindowSpecReferences []> doProjectionPrephase(PlanContext context, http://git-wip-us.apache.org/repos/asf/tajo/blob/7a1a8ce3/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java index 23a9154..3966606 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java @@ -41,6 +41,10 @@ public class GroupbyNode extends UnaryNode implements Projectable, Cloneable { * */ @Expose private Target [] targets; @Expose private boolean hasDistinct = false; + /** + * A flag to indicate if this groupby is for distinct block (i.e., SELECT DISTINCT x,y,z, ...) + */ + @Expose private boolean forDistinctBlock = false; public GroupbyNode(int pid) { super(pid, NodeType.GROUP_BY); @@ -70,6 +74,14 @@ public class GroupbyNode extends UnaryNode implements Projectable, Cloneable { hasDistinct = distinct; } + public final void setForDistinctBlock() { + forDistinctBlock = true; + } + + public boolean isForDistinctBlock() { + return forDistinctBlock; + } + public boolean hasAggFunctions() { return aggrFunctions.length > 0; } http://git-wip-us.apache.org/repos/asf/tajo/blob/7a1a8ce3/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java index 5322868..408f2d2 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java @@ -714,36 +714,51 @@ public class ProjectionPushDownRule extends Stack<LogicalNode> stack) throws TajoException { Context newContext = new Context(context); - // Getting grouping key names - final int groupingKeyNum = node.getGroupingColumns().length; + int groupingKeyNum = node.getGroupingColumns().length; LinkedHashSet<String> groupingKeyNames = null; - if (groupingKeyNum > 0) { - groupingKeyNames = Sets.newLinkedHashSet(); - for (int i = 0; i < groupingKeyNum; i++) { - FieldEval fieldEval = new FieldEval(node.getGroupingColumns()[i]); - groupingKeyNames.add(newContext.addExpr(fieldEval)); + String[] aggEvalNames = null; + + // if this query block is distinct, this groupby node have the same target to that of its above operator. + // So, it does not need to add new expression to newContext. + if (!node.isForDistinctBlock()) { + // Getting grouping key names + if (groupingKeyNum > 0) { + groupingKeyNames = Sets.newLinkedHashSet(); + for (int i = 0; i < groupingKeyNum; i++) { + FieldEval fieldEval = new FieldEval(node.getGroupingColumns()[i]); + groupingKeyNames.add(newContext.addExpr(fieldEval)); + } } - } - // Getting eval names - - final String [] aggEvalNames; - if (node.hasAggFunctions()) { - final int evalNum = node.getAggFunctions().length; - aggEvalNames = new String[evalNum]; - for (int evalIdx = 0, targetIdx = groupingKeyNum; targetIdx < node.getTargets().length; evalIdx++, targetIdx++) { - Target target = node.getTargets()[targetIdx]; - EvalNode evalNode = node.getAggFunctions()[evalIdx]; - aggEvalNames[evalIdx] = newContext.addExpr(new Target(evalNode, target.getCanonicalName())); + // Getting eval names + if (node.hasAggFunctions()) { + final int evalNum = node.getAggFunctions().length; + aggEvalNames = new String[evalNum]; + for (int evalIdx = 0, targetIdx = node.getGroupingColumns().length; targetIdx < node.getTargets().length; + evalIdx++, targetIdx++) { + Target target = node.getTargets()[targetIdx]; + EvalNode evalNode = node.getAggFunctions()[evalIdx]; + aggEvalNames[evalIdx] = newContext.addExpr(new Target(evalNode, target.getCanonicalName())); + } } - } else { - aggEvalNames = null; } // visit a child node LogicalNode child = super.visitGroupBy(newContext, plan, block, node, stack); node.setInSchema(child.getOutSchema()); + if (node.isForDistinctBlock()) { // the grouping columns should be updated according to the schema of child node. + node.setGroupingColumns(child.getOutSchema().toArray()); + node.setTargets(PlannerUtil.schemaToTargets(child.getOutSchema())); + + // Because it updates grouping columns and targets, it should refresh grouping key num and names. + groupingKeyNum = node.getGroupingColumns().length; + groupingKeyNames = Sets.newLinkedHashSet(); + for (int i = 0; i < groupingKeyNum; i++) { + FieldEval fieldEval = new FieldEval(node.getGroupingColumns()[i]); + groupingKeyNames.add(newContext.addExpr(fieldEval)); + } + } List<Target> targets = Lists.newArrayList(); if (groupingKeyNum > 0 && groupingKeyNames != null) {
