TAJO-850: OUTER JOIN does not properly handle a NULL. (Hyoungjun Kim via hyunsik)
Closes #22 Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/be21bc70 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/be21bc70 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/be21bc70 Branch: refs/heads/window_function Commit: be21bc706d9ebade71e976ecb966149cf1272385 Parents: 8883f9f Author: Hyunsik Choi <[email protected]> Authored: Tue Jun 24 12:09:45 2014 +0900 Committer: Hyunsik Choi <[email protected]> Committed: Tue Jun 24 12:09:45 2014 +0900 ---------------------------------------------------------------------- CHANGES | 14 +-- .../apache/tajo/engine/eval/EvalTreeUtil.java | 28 ++++++ .../tajo/engine/planner/LogicalPlanner.java | 33 +++---- .../planner/rewrite/FilterPushDownRule.java | 92 +++++++++----------- .../apache/tajo/engine/query/TestJoinQuery.java | 21 +++++ .../queries/TestJoinQuery/oj_table1_ddl.sql | 3 - .../queries/TestJoinQuery/oj_table2_ddl.sql | 3 - .../testLeftOuterJoinWithNull1.sql | 10 +++ .../testLeftOuterJoinWithNull2.sql | 11 +++ .../testLeftOuterJoinWithNull3.sql | 10 +++ .../testLeftOuterJoinWithNull1.result | 4 + .../testLeftOuterJoinWithNull2.result | 4 + .../testLeftOuterJoinWithNull3.result | 2 + 13 files changed, 157 insertions(+), 78 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/be21bc70/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 3f3c9c9..c9fc1ee 100644 --- a/CHANGES +++ b/CHANGES @@ -71,11 +71,14 @@ Release 0.9.0 - unreleased BUG FIXES - TAJO-879: Some data is missing in the case of BROADCAST JOIN and multi-column partition. - (Hyoungjun Kim via jaehwa) + TAJO-850: OUTER JOIN does not properly handle a NULL. + (Hyoungjun Kim via hyunsik) + + TAJO-879: Some data is missing in the case of BROADCAST JOIN and + multi-column partition. (Hyoungjun Kim via jaehwa) - TAJO-848: PreLogicalPlanVerifier::visitInsert need to find smaller expressions than target - columns for a partitioned table. (jaehwa) + TAJO-848: PreLogicalPlanVerifier::visitInsert need to find smaller + expressions than target columns for a partitioned table. (jaehwa) TAJO-880: NULL in CASE clause occurs Exception. (Hyoungjun Kim via hyunsik) @@ -125,7 +128,8 @@ Release 0.9.0 - unreleased TAJO-800: CLI's meta command should be aware "TABLE_NAME" style. (Hyoungjun Kim via hyunsik) - TAJO-795: PlannerUtil::joinJoinKeyForEachTable need to handle theta-join. (jaehwa) + TAJO-795: PlannerUtil::joinJoinKeyForEachTable need to handle theta-join. + (jaehwa) TAJO-792: Insert table error with database name. (Hyoungjun Kim via hyunsik) http://git-wip-us.apache.org/repos/asf/tajo/blob/be21bc70/tajo-core/src/main/java/org/apache/tajo/engine/eval/EvalTreeUtil.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/eval/EvalTreeUtil.java b/tajo-core/src/main/java/org/apache/tajo/engine/eval/EvalTreeUtil.java index 8982bd5..07f71dc 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/eval/EvalTreeUtil.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/eval/EvalTreeUtil.java @@ -364,6 +364,12 @@ public class EvalTreeUtil { return (Collection<T>) finder.evalNodes; } + public static <T extends EvalNode> Collection<T> findOuterJoinSensitiveEvals(EvalNode evalNode) { + OuterJoinSensitiveEvalFinder finder = new OuterJoinSensitiveEvalFinder(); + finder.visitChild(null, evalNode, new Stack<EvalNode>()); + return (Collection<T>) finder.evalNodes; + } + public static class EvalFinder extends BasicEvalNodeVisitor<Object, Object> { private EvalType targetType; List<EvalNode> evalNodes = TUtil.newList(); @@ -384,6 +390,28 @@ public class EvalTreeUtil { } } + public static class OuterJoinSensitiveEvalFinder extends BasicEvalNodeVisitor<Object, Object> { + private List<EvalNode> evalNodes = TUtil.newList(); + + @Override + public Object visitChild(Object context, EvalNode evalNode, Stack<EvalNode> stack) { + super.visitChild(context, evalNode, stack); + + if (evalNode.type == EvalType.CASE) { + evalNodes.add(evalNode); + } else if (evalNode.type == EvalType.FUNCTION) { + FunctionEval functionEval = (FunctionEval)evalNode; + if ("coalesce".equals(functionEval.getName())) { + evalNodes.add(evalNode); + } + } else if (evalNode.type == EvalType.IS_NULL) { + evalNodes.add(evalNode); + } + + return evalNode; + } + } + public static boolean checkIfCanBeConstant(EvalNode evalNode) { return findUniqueColumns(evalNode).size() == 0 && findDistinctAggFunction(evalNode).size() == 0; } http://git-wip-us.apache.org/repos/asf/tajo/blob/be21bc70/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java index 6474444..c9948d8 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java @@ -835,8 +835,8 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex block.namedExprsMgr.markAsEvaluated(namedExpr.getAlias(), evalNode); newlyEvaluatedExprs.add(namedExpr.getAlias()); } - } catch (VerifyException ve) {} catch (PlanningException e) { - e.printStackTrace(); + } catch (VerifyException ve) { + } catch (PlanningException e) { } } return newlyEvaluatedExprs; @@ -1629,22 +1629,25 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex // at the topmost join operator. // TODO - It's also valid that case-when is evalauted at the topmost outer operator. // But, how can we know there is no further outer join operator after this node? - if (!checkIfCaseWhenWithOuterJoinBeEvaluated(block, evalNode, isTopMostJoin)) { - return false; + if (containsOuterJoin(block)) { + if (!isTopMostJoin) { + Collection<EvalNode> found = EvalTreeUtil.findOuterJoinSensitiveEvals(evalNode); + if (found.size() > 0) { + return false; + } + } } return true; } - private static boolean checkIfCaseWhenWithOuterJoinBeEvaluated(QueryBlock block, EvalNode evalNode, - boolean isTopMostJoin) { - if (block.containsJoinType(JoinType.LEFT_OUTER) || block.containsJoinType(JoinType.RIGHT_OUTER)) { - Collection<CaseWhenEval> caseWhenEvals = EvalTreeUtil.findEvalsByType(evalNode, EvalType.CASE); - if (caseWhenEvals.size() > 0 && !isTopMostJoin) { - return false; - } - } - return true; + public static boolean isOuterJoin(JoinType joinType) { + return joinType == JoinType.LEFT_OUTER || joinType == JoinType.RIGHT_OUTER || joinType==JoinType.FULL_OUTER; + } + + public static boolean containsOuterJoin(QueryBlock block) { + return block.containsJoinType(JoinType.LEFT_OUTER) || block.containsJoinType(JoinType.RIGHT_OUTER) || + block.containsJoinType(JoinType.FULL_OUTER); } /** @@ -1663,8 +1666,8 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex } // Why? - When a {case when} is used with outer join, case when must be evaluated at topmost outer join. - if (block.containsJoinType(JoinType.LEFT_OUTER) || block.containsJoinType(JoinType.RIGHT_OUTER)) { - Collection<CaseWhenEval> found = EvalTreeUtil.findEvalsByType(evalNode, EvalType.CASE); + if (containsOuterJoin(block)) { + Collection<EvalNode> found = EvalTreeUtil.findOuterJoinSensitiveEvals(evalNode); if (found.size() > 0) { return false; } http://git-wip-us.apache.org/repos/asf/tajo/blob/be21bc70/tajo-core/src/main/java/org/apache/tajo/engine/planner/rewrite/FilterPushDownRule.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/rewrite/FilterPushDownRule.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/rewrite/FilterPushDownRule.java index b848c86..ed46e25 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/rewrite/FilterPushDownRule.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/rewrite/FilterPushDownRule.java @@ -151,10 +151,6 @@ public class FilterPushDownRule extends BasicLogicalPlanVisitor<FilterPushDownCo return selNode; } - private boolean isOuterJoin(JoinType joinType) { - return joinType == JoinType.LEFT_OUTER || joinType == JoinType.RIGHT_OUTER || joinType==JoinType.FULL_OUTER; - } - @Override public LogicalNode visitJoin(FilterPushDownContext context, LogicalPlan plan, LogicalPlan.QueryBlock block, JoinNode joinNode, Stack<LogicalNode> stack) throws PlanningException { @@ -165,7 +161,7 @@ public class FilterPushDownRule extends BasicLogicalPlanVisitor<FilterPushDownCo // get the two operands of the join operation as well as the join type JoinType joinType = joinNode.getJoinType(); EvalNode joinQual = joinNode.getJoinQual(); - if (joinQual != null && isOuterJoin(joinType)) { + if (joinQual != null && LogicalPlanner.isOuterJoin(joinType)) { BinaryEval binaryEval = (BinaryEval) joinQual; // if both are fields if (binaryEval.getLeftExpr().getType() == EvalType.FIELD && @@ -210,58 +206,45 @@ public class FilterPushDownRule extends BasicLogicalPlanVisitor<FilterPushDownCo throw new InvalidQueryException("Incorrect Logical Query Plan with regard to outer join"); } } + } + } - // retain in this outer join node's JoinQual those selection predicates - // related to the outer join's null supplier(s) - List<EvalNode> matched2 = Lists.newArrayList(); - for (EvalNode eval : context.pushingDownFilters) { - - Set<Column> columnRefs = EvalTreeUtil.findUniqueColumns(eval); - Set<String> tableNames = Sets.newHashSet(); - // getting distinct table references - for (Column col : columnRefs) { - if (!tableNames.contains(col.getQualifier())) { - tableNames.add(col.getQualifier()); - } - } + // get evals from ON clause + List<EvalNode> onConditions = new ArrayList<EvalNode>(); + if (joinNode.hasJoinQual()) { + onConditions.addAll(Sets.newHashSet(AlgebraicUtil.toConjunctiveNormalFormArray(joinNode.getJoinQual()))); + } - //if the predicate involves any of the null suppliers - boolean shouldKeep=false; - Iterator<String> it2 = nullSuppliers.iterator(); - while(it2.hasNext()){ - if(tableNames.contains(it2.next()) == true) { - shouldKeep = true; - } - } + boolean isTopMostJoin = stack.peek().getType() != NodeType.JOIN; - if(shouldKeep == true) { - matched2.add(eval); - } - } - // merge the retained predicates and establish them in the current outer join node. - // Then remove them from the pushingDownFilters - EvalNode qual2 = null; - if (matched2.size() > 1) { - // merged into one eval tree - qual2 = AlgebraicUtil.createSingletonExprFromCNF( - matched2.toArray(new EvalNode[matched2.size()])); - } else if (matched2.size() == 1) { - // if the number of matched expr is one - qual2 = matched2.get(0); - } + List<EvalNode> outerJoinConditionEvals = new ArrayList<EvalNode>(); + List<EvalNode> outerJoinFilterEvalsExcludeJoinCondition = new ArrayList<EvalNode>(); + if (LogicalPlanner.isOuterJoin(joinNode.getJoinType())) { + // In the case of top most JOIN, all filters except JOIN condition aren't pushed down. + // That filters are processed by SELECTION NODE. + for (EvalNode eachEval: context.pushingDownFilters) { + if (isTopMostJoin && !EvalTreeUtil.isJoinQual(eachEval, true)) { + outerJoinFilterEvalsExcludeJoinCondition.add(eachEval); - if (qual2 != null) { - EvalNode conjQual2 = AlgebraicUtil.createSingletonExprFromCNF(joinNode.getJoinQual(), qual2); - joinNode.setJoinQual(conjQual2); - context.pushingDownFilters.removeAll(matched2); - } // for the remaining context.pushingDownFilters, push it as usual + } else { + outerJoinConditionEvals.add(eachEval); + } } - } - if (joinNode.hasJoinQual()) { - context.addFiltersTobePushed(Sets.newHashSet(AlgebraicUtil.toConjunctiveNormalFormArray(joinNode.getJoinQual()))); + for (EvalNode eachOnEval: onConditions) { + if (EvalTreeUtil.isJoinQual(eachOnEval, true)) { + // If join condition, processing in the JoinNode. + outerJoinConditionEvals.add(eachOnEval); + } else { + // TODO pushdown(Null Supplying table) or add join condition(Preserved Row table) + // https://cwiki.apache.org/confluence/display/Hive/OuterJoinBehavior + throw new PlanningException("Currently not support filter condition ON clause in the case of OUTER JOIN."); + } + } + } else { + context.pushingDownFilters.addAll(onConditions); } List<EvalNode> notMatched = new ArrayList<EvalNode>(); @@ -283,9 +266,13 @@ public class FilterPushDownRule extends BasicLogicalPlanVisitor<FilterPushDownCo context.addFiltersTobePushed(notMatched); List<EvalNode> matched = Lists.newArrayList(); - for (EvalNode eval : context.pushingDownFilters) { - if (LogicalPlanner.checkIfBeEvaluatedAtJoin(block, eval, joinNode, stack.peek().getType() != NodeType.JOIN)) { - matched.add(eval); + if(LogicalPlanner.isOuterJoin(joinNode.getJoinType())) { + matched.addAll(outerJoinConditionEvals); + } else { + for (EvalNode eval : context.pushingDownFilters) { + if (LogicalPlanner.checkIfBeEvaluatedAtJoin(block, eval, joinNode, isTopMostJoin)) { + matched.add(eval); + } } } @@ -308,6 +295,7 @@ public class FilterPushDownRule extends BasicLogicalPlanVisitor<FilterPushDownCo context.pushingDownFilters.removeAll(matched); } + context.pushingDownFilters.addAll(outerJoinFilterEvalsExcludeJoinCondition); return joinNode; } http://git-wip-us.apache.org/repos/asf/tajo/blob/be21bc70/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java index b1aca56..cc5db52 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java @@ -376,4 +376,25 @@ public class TestJoinQuery extends QueryTestCaseBase { assertResultSet(res); cleanupQuery(res); } + + @Test + public final void testLeftOuterJoinWithNull1() throws Exception { + ResultSet res = executeQuery(); + assertResultSet(res); + cleanupQuery(res); + } + + @Test + public final void testLeftOuterJoinWithNull2() throws Exception { + ResultSet res = executeQuery(); + assertResultSet(res); + cleanupQuery(res); + } + + @Test + public final void testLeftOuterJoinWithNull3() throws Exception { + ResultSet res = executeQuery(); + assertResultSet(res); + cleanupQuery(res); + } } http://git-wip-us.apache.org/repos/asf/tajo/blob/be21bc70/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table1_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table1_ddl.sql b/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table1_ddl.sql index ea2c954..a37403c 100644 --- a/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table1_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table1_ddl.sql @@ -1,6 +1,3 @@ --- Outer Join's Left Table --- It is used in TestJoin::testOuterJoinAndCaseWhen - create external table table1 (id int, name text, score float, type text) using csv with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') location ${table.path}; http://git-wip-us.apache.org/repos/asf/tajo/blob/be21bc70/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table2_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table2_ddl.sql b/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table2_ddl.sql index ac6459a..d60b145 100644 --- a/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table2_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table2_ddl.sql @@ -1,6 +1,3 @@ --- Outer Join's Left Table --- It is used in TestJoin::testOuterJoinAndCaseWhen - create external table table2 (id int, name text, score float, type text) using csv with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') location ${table.path}; http://git-wip-us.apache.org/repos/asf/tajo/blob/be21bc70/tajo-core/src/test/resources/queries/TestJoinQuery/testLeftOuterJoinWithNull1.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestJoinQuery/testLeftOuterJoinWithNull1.sql b/tajo-core/src/test/resources/queries/TestJoinQuery/testLeftOuterJoinWithNull1.sql new file mode 100644 index 0000000..5698a74 --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestJoinQuery/testLeftOuterJoinWithNull1.sql @@ -0,0 +1,10 @@ +select + c_custkey, + orders.o_orderkey, + coalesce(orders.o_orderstatus, 'N/A'), + orders.o_orderdate +from + customer left outer join orders on c_custkey = o_orderkey +where o_orderkey is null +order by + c_custkey, o_orderkey; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/be21bc70/tajo-core/src/test/resources/queries/TestJoinQuery/testLeftOuterJoinWithNull2.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestJoinQuery/testLeftOuterJoinWithNull2.sql b/tajo-core/src/test/resources/queries/TestJoinQuery/testLeftOuterJoinWithNull2.sql new file mode 100644 index 0000000..000ab68 --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestJoinQuery/testLeftOuterJoinWithNull2.sql @@ -0,0 +1,11 @@ +select + c_custkey, + orders.o_orderkey, + coalesce(orders.o_orderstatus, 'N/A'), + orders.o_orderdate +from + customer left outer join orders on c_custkey = o_orderkey +where orders.o_orderdate is not null +and orders.o_orderdate like '1996%' +order by + c_custkey, o_orderkey; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/be21bc70/tajo-core/src/test/resources/queries/TestJoinQuery/testLeftOuterJoinWithNull3.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestJoinQuery/testLeftOuterJoinWithNull3.sql b/tajo-core/src/test/resources/queries/TestJoinQuery/testLeftOuterJoinWithNull3.sql new file mode 100644 index 0000000..32a7750 --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestJoinQuery/testLeftOuterJoinWithNull3.sql @@ -0,0 +1,10 @@ +select + c_custkey, + orders.o_orderkey, + coalesce(orders.o_orderstatus, 'N/A'), + orders.o_orderdate +from + customer left outer join orders on c_custkey = o_orderkey +where orders.o_orderkey = 100 +order by + c_custkey, o_orderkey; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/be21bc70/tajo-core/src/test/resources/results/TestJoinQuery/testLeftOuterJoinWithNull1.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestJoinQuery/testLeftOuterJoinWithNull1.result b/tajo-core/src/test/resources/results/TestJoinQuery/testLeftOuterJoinWithNull1.result new file mode 100644 index 0000000..81b907d --- /dev/null +++ b/tajo-core/src/test/resources/results/TestJoinQuery/testLeftOuterJoinWithNull1.result @@ -0,0 +1,4 @@ +c_custkey,o_orderkey,?coalesce,o_orderdate +------------------------------- +4,null,N/A,null +5,null,N/A,null \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/be21bc70/tajo-core/src/test/resources/results/TestJoinQuery/testLeftOuterJoinWithNull2.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestJoinQuery/testLeftOuterJoinWithNull2.result b/tajo-core/src/test/resources/results/TestJoinQuery/testLeftOuterJoinWithNull2.result new file mode 100644 index 0000000..08f745c --- /dev/null +++ b/tajo-core/src/test/resources/results/TestJoinQuery/testLeftOuterJoinWithNull2.result @@ -0,0 +1,4 @@ +c_custkey,o_orderkey,?coalesce,o_orderdate +------------------------------- +1,1,O,1996-01-02 +2,2,O,1996-12-01 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/be21bc70/tajo-core/src/test/resources/results/TestJoinQuery/testLeftOuterJoinWithNull3.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestJoinQuery/testLeftOuterJoinWithNull3.result b/tajo-core/src/test/resources/results/TestJoinQuery/testLeftOuterJoinWithNull3.result new file mode 100644 index 0000000..efd2e74 --- /dev/null +++ b/tajo-core/src/test/resources/results/TestJoinQuery/testLeftOuterJoinWithNull3.result @@ -0,0 +1,2 @@ +c_custkey,o_orderkey,?coalesce,o_orderdate +-------------------------------
