Repository: tajo Updated Branches: refs/heads/index_support 15f0fdc0c -> 99d37a3d8
TAJO-1277: GreedyHeuristicJoinOrderAlgorithm sometimes wrongly assumes associativity of joins. (Keuntae Park via jihoon) Closes #379 Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/95d9cc45 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/95d9cc45 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/95d9cc45 Branch: refs/heads/index_support Commit: 95d9cc455dd7d25da6dd0df709084d7346efb410 Parents: dbf91f5 Author: Jihoon Son <[email protected]> Authored: Tue Feb 17 10:51:48 2015 +0900 Committer: Jihoon Son <[email protected]> Committed: Tue Feb 17 10:51:48 2015 +0900 ---------------------------------------------------------------------- CHANGES | 3 + .../apache/tajo/engine/query/TestJoinQuery.java | 7 ++ .../testJoinWithMultipleJoinTypes.sql | 4 + .../testJoinWithMultipleJoinTypes.result | 6 ++ .../GreedyHeuristicJoinOrderAlgorithm.java | 96 +++++++++++++++++--- 5 files changed, 102 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/95d9cc45/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 025bc14..b78b8b7 100644 --- a/CHANGES +++ b/CHANGES @@ -185,6 +185,9 @@ Release 0.10.0 - unreleased BUG FIXES + TAJO-1277: GreedyHeuristicJoinOrderAlgorithm sometimes wrongly assumes + associativity of joins. (Keuntae Park via jihoon) + TAJO-1336: Fix task failure of stopped task. (jinho) TAJO-1316: NPE occurs when performing window functions after join. http://git-wip-us.apache.org/repos/asf/tajo/blob/95d9cc45/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java index ecd933d..9ab32ff 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java @@ -226,6 +226,13 @@ public class TestJoinQuery extends QueryTestCaseBase { } @Test + public final void testJoinWithMultipleJoinTypes() throws Exception { + ResultSet res = executeQuery(); + assertResultSet(res); + cleanupQuery(res); + } + + @Test public final void testLeftOuterJoin1() throws Exception { ResultSet res = executeQuery(); assertResultSet(res); http://git-wip-us.apache.org/repos/asf/tajo/blob/95d9cc45/tajo-core/src/test/resources/queries/TestJoinQuery/testJoinWithMultipleJoinTypes.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestJoinQuery/testJoinWithMultipleJoinTypes.sql b/tajo-core/src/test/resources/queries/TestJoinQuery/testJoinWithMultipleJoinTypes.sql new file mode 100644 index 0000000..ffb23ce --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestJoinQuery/testJoinWithMultipleJoinTypes.sql @@ -0,0 +1,4 @@ +select * FROM +customer c +right outer join (select n_nationkey from nation) n on n.n_nationkey = c.c_custkey +join region r on r.r_regionkey = c.c_custkey; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/95d9cc45/tajo-core/src/test/resources/results/TestJoinQuery/testJoinWithMultipleJoinTypes.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestJoinQuery/testJoinWithMultipleJoinTypes.result b/tajo-core/src/test/resources/results/TestJoinQuery/testJoinWithMultipleJoinTypes.result new file mode 100644 index 0000000..7748513 --- /dev/null +++ b/tajo-core/src/test/resources/results/TestJoinQuery/testJoinWithMultipleJoinTypes.result @@ -0,0 +1,6 @@ +c_custkey,c_name,c_address,c_nationkey,c_phone,c_acctbal,c_mktsegment,c_comment,n_nationkey,r_regionkey,r_name,r_comment +------------------------------- +1,Customer#000000001,IVhzIApeRb ot,c,E,15,25-989-741-2988,711.56,BUILDING,to the even, regular platelets. regular, ironic epitaphs nag e,1,1,AMERICA,hs use ironic, even requests. s +2,Customer#000000002,XSTf4,NCwDVaWNe6tEgvwfmRchLXak,13,23-768-687-3665,121.65,AUTOMOBILE,l accounts. blithely ironic theodolites integrate boldly: caref,2,2,ASIA,ges. thinly even pinto beans ca +3,Customer#000000003,MG9kdTD2WBHm,1,11-719-748-3364,7498.12,AUTOMOBILE, deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov,3,3,EUROPE,ly final courts cajole furiously final excuse +4,Customer#000000004,XxVSJsLAGtn,4,14-128-190-5944,2866.83,MACHINERY, requests. final, regular ideas sleep final accou,4,4,MIDDLE EAST,uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/95d9cc45/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java b/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java index 0fad8a8..4231484 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java @@ -28,10 +28,7 @@ import org.apache.tajo.plan.expr.AlgebraicUtil; import org.apache.tajo.plan.logical.*; import org.apache.tajo.util.TUtil; -import java.util.LinkedHashSet; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; +import java.util.*; /** * This is a greedy heuristic algorithm to find a bushy join tree. This algorithm finds @@ -57,17 +54,75 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { JoinEdge bestPair; while (remainRelations.size() > 1) { + Set<LogicalNode> checkingRelations = new LinkedHashSet<LogicalNode>(); + + for (LogicalNode relation : remainRelations) { + Collection <String> relationStrings = PlannerUtil.getRelationLineageWithinQueryBlock(plan, relation); + List<JoinEdge> joinEdges = new ArrayList<JoinEdge>(); + String relationCollection = TUtil.collectionToString(relationStrings, ","); + List<JoinEdge> joinEdgesForGiven = joinGraph.getIncomingEdges(relationCollection); + if (joinEdgesForGiven != null) { + joinEdges.addAll(joinEdgesForGiven); + } + if (relationStrings.size() > 1) { + for (String relationString: relationStrings) { + joinEdgesForGiven = joinGraph.getIncomingEdges(relationString); + if (joinEdgesForGiven != null) { + joinEdges.addAll(joinEdgesForGiven); + } + } + } + + // check if the relation is the last piece of outer join + boolean endInnerRelation = false; + for (JoinEdge joinEdge: joinEdges) { + switch(joinEdge.getJoinType()) { + case LEFT_ANTI: + case RIGHT_ANTI: + case LEFT_SEMI: + case RIGHT_SEMI: + case LEFT_OUTER: + case RIGHT_OUTER: + case FULL_OUTER: + endInnerRelation = true; + if (checkingRelations.size() <= 1) { + checkingRelations.add(relation); + } + break; + } + } + + if (endInnerRelation) { + break; + } + + checkingRelations.add(relation); + } + + remainRelations.removeAll(checkingRelations); + // Find the best join pair among all joinable operators in candidate set. - bestPair = getBestPair(plan, joinGraph, remainRelations); + while (checkingRelations.size() > 1) { + LinkedHashSet<String[]> removingJoinEdges = new LinkedHashSet<String[]>(); + bestPair = getBestPair(plan, joinGraph, checkingRelations, removingJoinEdges); + + checkingRelations.remove(bestPair.getLeftRelation()); + checkingRelations.remove(bestPair.getRightRelation()); + for (String[] joinEdge: removingJoinEdges) { + // remove the edge of the best pair from join graph + joinGraph.removeEdge(joinEdge[0], joinEdge[1]); + } - remainRelations.remove(bestPair.getLeftRelation()); // remainRels = remainRels \ Ti - remainRelations.remove(bestPair.getRightRelation()); // remainRels = remainRels \ Tj + latestJoin = createJoinNode(plan, bestPair); + checkingRelations.add(latestJoin); - latestJoin = createJoinNode(plan, bestPair); - remainRelations.add(latestJoin); + // all logical nodes should be registered to corresponding blocks + block.registerNode(latestJoin); + } - // all logical nodes should be registered to corresponding blocks - block.registerNode(latestJoin); + // new Logical block should be the first entry of new Set + checkingRelations.addAll(remainRelations); + remainRelations = checkingRelations; } JoinNode joinTree = (JoinNode) remainRelations.iterator().next(); @@ -116,10 +171,12 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { * @return The best join pair among them * @throws PlanningException */ - private JoinEdge getBestPair(LogicalPlan plan, JoinGraph graph, Set<LogicalNode> candidateSet) + private JoinEdge getBestPair(LogicalPlan plan, JoinGraph graph, Set<LogicalNode> candidateSet, Set<String[]> bestJoinEdges) throws PlanningException { double minCost = Double.MAX_VALUE; JoinEdge bestJoin = null; + LinkedHashSet<String[]> relatedJoinEdges = null; + LinkedHashSet<String[]> relatedNonCrossJoinEdges = null; double minNonCrossJoinCost = Double.MAX_VALUE; JoinEdge bestNonCrossJoin = null; @@ -130,7 +187,8 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { continue; } - JoinEdge foundJoin = findJoin(plan, graph, outer, inner); + LinkedHashSet<String[]> joinEdgePairs = new LinkedHashSet<String[]>(); + JoinEdge foundJoin = findJoin(plan, graph, outer, inner, joinEdgePairs); if (foundJoin == null) { continue; } @@ -139,6 +197,7 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { if (cost < minCost) { minCost = cost; bestJoin = foundJoin; + relatedJoinEdges = joinEdgePairs; } // Keep the min cost join @@ -148,14 +207,17 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { if (cost < minNonCrossJoinCost) { minNonCrossJoinCost = cost; bestNonCrossJoin = foundJoin; + relatedNonCrossJoinEdges = joinEdgePairs; } } } } if (bestNonCrossJoin != null) { + bestJoinEdges.addAll(relatedNonCrossJoinEdges); return bestNonCrossJoin; } else { + bestJoinEdges.addAll(relatedJoinEdges); return bestJoin; } } @@ -165,7 +227,7 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { * * @return If there is no join condition between two relation, it returns NULL value. */ - private static JoinEdge findJoin(LogicalPlan plan, JoinGraph graph, LogicalNode outer, LogicalNode inner) + private static JoinEdge findJoin(LogicalPlan plan, JoinGraph graph, LogicalNode outer, LogicalNode inner, Set<String[]> joinEdgePairs) throws PlanningException { JoinEdge foundJoinEdge = null; @@ -176,6 +238,8 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { for (String innerName : PlannerUtil.getRelationLineageWithinQueryBlock(plan, inner)) { if (graph.hasEdge(outerEdgeKey, innerName)) { JoinEdge existJoinEdge = graph.getEdge(outerEdgeKey, innerName); + String[] joinEdgePair = {outerEdgeKey, innerName}; + joinEdgePairs.add(joinEdgePair); if (foundJoinEdge == null) { foundJoinEdge = new JoinEdge(existJoinEdge.getJoinType(), outer, inner, existJoinEdge.getJoinQual()); @@ -195,6 +259,8 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { for (String outerName : PlannerUtil.getRelationLineageWithinQueryBlock(plan, outer)) { if (graph.hasEdge(outerEdgeKey, outerName)) { JoinEdge existJoinEdge = graph.getEdge(outerEdgeKey, outerName); + String[] joinEdgePair = {outerEdgeKey, outerName}; + joinEdgePairs.add(joinEdgePair); if (foundJoinEdge == null) { foundJoinEdge = new JoinEdge(existJoinEdge.getJoinType(), inner, outer, existJoinEdge.getJoinQual()); @@ -214,6 +280,8 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { // Find all joins between two relations and merge them into one join if possible if (graph.hasEdge(outerName, innerName)) { JoinEdge existJoinEdge = graph.getEdge(outerName, innerName); + String[] joinEdgePair = {outerName, innerName}; + joinEdgePairs.add(joinEdgePair); if (foundJoinEdge == null) { foundJoinEdge = new JoinEdge(existJoinEdge.getJoinType(), outer, inner, existJoinEdge.getJoinQual());
