Repository: tajo
Updated Branches:
  refs/heads/master dbf91f54c -> 95d9cc455


TAJO-1277: GreedyHeuristicJoinOrderAlgorithm sometimes wrongly assumes 
associativity of joins. (Keuntae Park via jihoon)

Closes #379


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/95d9cc45
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/95d9cc45
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/95d9cc45

Branch: refs/heads/master
Commit: 95d9cc455dd7d25da6dd0df709084d7346efb410
Parents: dbf91f5
Author: Jihoon Son <[email protected]>
Authored: Tue Feb 17 10:51:48 2015 +0900
Committer: Jihoon Son <[email protected]>
Committed: Tue Feb 17 10:51:48 2015 +0900

----------------------------------------------------------------------
 CHANGES                                         |  3 +
 .../apache/tajo/engine/query/TestJoinQuery.java |  7 ++
 .../testJoinWithMultipleJoinTypes.sql           |  4 +
 .../testJoinWithMultipleJoinTypes.result        |  6 ++
 .../GreedyHeuristicJoinOrderAlgorithm.java      | 96 +++++++++++++++++---
 5 files changed, 102 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/95d9cc45/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 025bc14..b78b8b7 100644
--- a/CHANGES
+++ b/CHANGES
@@ -185,6 +185,9 @@ Release 0.10.0 - unreleased
 
   BUG FIXES
 
+    TAJO-1277: GreedyHeuristicJoinOrderAlgorithm sometimes wrongly assumes 
+    associativity of joins. (Keuntae Park via jihoon)
+
     TAJO-1336: Fix task failure of stopped task. (jinho)
 
     TAJO-1316: NPE occurs when performing window functions after join.

http://git-wip-us.apache.org/repos/asf/tajo/blob/95d9cc45/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java
----------------------------------------------------------------------
diff --git 
a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java 
b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java
index ecd933d..9ab32ff 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java
@@ -226,6 +226,13 @@ public class TestJoinQuery extends QueryTestCaseBase {
   }
 
   @Test
+  public final void testJoinWithMultipleJoinTypes() throws Exception {
+    ResultSet res = executeQuery();
+    assertResultSet(res);
+    cleanupQuery(res);
+  }
+
+  @Test
   public final void testLeftOuterJoin1() throws Exception {
     ResultSet res = executeQuery();
     assertResultSet(res);

http://git-wip-us.apache.org/repos/asf/tajo/blob/95d9cc45/tajo-core/src/test/resources/queries/TestJoinQuery/testJoinWithMultipleJoinTypes.sql
----------------------------------------------------------------------
diff --git 
a/tajo-core/src/test/resources/queries/TestJoinQuery/testJoinWithMultipleJoinTypes.sql
 
b/tajo-core/src/test/resources/queries/TestJoinQuery/testJoinWithMultipleJoinTypes.sql
new file mode 100644
index 0000000..ffb23ce
--- /dev/null
+++ 
b/tajo-core/src/test/resources/queries/TestJoinQuery/testJoinWithMultipleJoinTypes.sql
@@ -0,0 +1,4 @@
+select * FROM
+customer c
+right outer join (select n_nationkey from nation) n on n.n_nationkey = 
c.c_custkey
+join region r on r.r_regionkey = c.c_custkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/95d9cc45/tajo-core/src/test/resources/results/TestJoinQuery/testJoinWithMultipleJoinTypes.result
----------------------------------------------------------------------
diff --git 
a/tajo-core/src/test/resources/results/TestJoinQuery/testJoinWithMultipleJoinTypes.result
 
b/tajo-core/src/test/resources/results/TestJoinQuery/testJoinWithMultipleJoinTypes.result
new file mode 100644
index 0000000..7748513
--- /dev/null
+++ 
b/tajo-core/src/test/resources/results/TestJoinQuery/testJoinWithMultipleJoinTypes.result
@@ -0,0 +1,6 @@
+c_custkey,c_name,c_address,c_nationkey,c_phone,c_acctbal,c_mktsegment,c_comment,n_nationkey,r_regionkey,r_name,r_comment
+-------------------------------
+1,Customer#000000001,IVhzIApeRb ot,c,E,15,25-989-741-2988,711.56,BUILDING,to 
the even, regular platelets. regular, ironic epitaphs nag e,1,1,AMERICA,hs use 
ironic, even requests. s
+2,Customer#000000002,XSTf4,NCwDVaWNe6tEgvwfmRchLXak,13,23-768-687-3665,121.65,AUTOMOBILE,l
 accounts. blithely ironic theodolites integrate boldly: caref,2,2,ASIA,ges. 
thinly even pinto beans ca
+3,Customer#000000003,MG9kdTD2WBHm,1,11-719-748-3364,7498.12,AUTOMOBILE, 
deposits eat slyly ironic, even instructions. express foxes detect slyly. 
blithely even accounts abov,3,3,EUROPE,ly final courts cajole furiously final 
excuse
+4,Customer#000000004,XxVSJsLAGtn,4,14-128-190-5944,2866.83,MACHINERY, 
requests. final, regular ideas sleep final accou,4,4,MIDDLE EAST,uickly special 
accounts cajole carefully blithely close requests. carefully final asymptotes 
haggle furiousl
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/95d9cc45/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java
----------------------------------------------------------------------
diff --git 
a/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java
 
b/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java
index 0fad8a8..4231484 100644
--- 
a/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java
+++ 
b/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java
@@ -28,10 +28,7 @@ import org.apache.tajo.plan.expr.AlgebraicUtil;
 import org.apache.tajo.plan.logical.*;
 import org.apache.tajo.util.TUtil;
 
-import java.util.LinkedHashSet;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.TreeSet;
+import java.util.*;
 
 /**
  * This is a greedy heuristic algorithm to find a bushy join tree. This 
algorithm finds
@@ -57,17 +54,75 @@ public class GreedyHeuristicJoinOrderAlgorithm implements 
JoinOrderAlgorithm {
     JoinEdge bestPair;
 
     while (remainRelations.size() > 1) {
+      Set<LogicalNode> checkingRelations = new LinkedHashSet<LogicalNode>();
+
+      for (LogicalNode relation : remainRelations) {
+        Collection <String> relationStrings = 
PlannerUtil.getRelationLineageWithinQueryBlock(plan, relation);
+        List<JoinEdge> joinEdges = new ArrayList<JoinEdge>();
+        String relationCollection = TUtil.collectionToString(relationStrings, 
",");
+        List<JoinEdge> joinEdgesForGiven = 
joinGraph.getIncomingEdges(relationCollection);
+        if (joinEdgesForGiven != null) {
+          joinEdges.addAll(joinEdgesForGiven);
+        }
+        if (relationStrings.size() > 1) {
+          for (String relationString: relationStrings) {
+            joinEdgesForGiven = joinGraph.getIncomingEdges(relationString);
+            if (joinEdgesForGiven != null) {
+              joinEdges.addAll(joinEdgesForGiven);
+            }
+          }
+        }
+
+        // check if the relation is the last piece of outer join
+        boolean endInnerRelation = false;
+        for (JoinEdge joinEdge: joinEdges) {
+          switch(joinEdge.getJoinType()) {
+            case LEFT_ANTI:
+            case RIGHT_ANTI:
+            case LEFT_SEMI:
+            case RIGHT_SEMI:
+            case LEFT_OUTER:
+            case RIGHT_OUTER:
+            case FULL_OUTER:
+              endInnerRelation = true;
+              if (checkingRelations.size() <= 1) {
+                checkingRelations.add(relation);
+              }
+              break;
+          }
+        }
+
+        if (endInnerRelation) {
+          break;
+        }
+
+        checkingRelations.add(relation);
+      }
+
+      remainRelations.removeAll(checkingRelations);
+
       // Find the best join pair among all joinable operators in candidate set.
-      bestPair = getBestPair(plan, joinGraph, remainRelations);
+      while (checkingRelations.size() > 1) {
+        LinkedHashSet<String[]> removingJoinEdges = new 
LinkedHashSet<String[]>();
+        bestPair = getBestPair(plan, joinGraph, checkingRelations, 
removingJoinEdges);
+
+        checkingRelations.remove(bestPair.getLeftRelation());
+        checkingRelations.remove(bestPair.getRightRelation());
+        for (String[] joinEdge: removingJoinEdges) {
+          // remove the edge of the best pair from join graph
+          joinGraph.removeEdge(joinEdge[0], joinEdge[1]);
+        }
 
-      remainRelations.remove(bestPair.getLeftRelation()); // remainRels = 
remainRels \ Ti
-      remainRelations.remove(bestPair.getRightRelation()); // remainRels = 
remainRels \ Tj
+        latestJoin = createJoinNode(plan, bestPair);
+        checkingRelations.add(latestJoin);
 
-      latestJoin = createJoinNode(plan, bestPair);
-      remainRelations.add(latestJoin);
+        // all logical nodes should be registered to corresponding blocks
+        block.registerNode(latestJoin);
+      }
 
-      // all logical nodes should be registered to corresponding blocks
-      block.registerNode(latestJoin);
+      // new Logical block should be the first entry of new Set
+      checkingRelations.addAll(remainRelations);
+      remainRelations = checkingRelations;
     }
 
     JoinNode joinTree = (JoinNode) remainRelations.iterator().next();
@@ -116,10 +171,12 @@ public class GreedyHeuristicJoinOrderAlgorithm implements 
JoinOrderAlgorithm {
    * @return The best join pair among them
    * @throws PlanningException
    */
-  private JoinEdge getBestPair(LogicalPlan plan, JoinGraph graph, 
Set<LogicalNode> candidateSet)
+  private JoinEdge getBestPair(LogicalPlan plan, JoinGraph graph, 
Set<LogicalNode> candidateSet, Set<String[]> bestJoinEdges)
       throws PlanningException {
     double minCost = Double.MAX_VALUE;
     JoinEdge bestJoin = null;
+    LinkedHashSet<String[]> relatedJoinEdges = null;
+    LinkedHashSet<String[]> relatedNonCrossJoinEdges = null;
 
     double minNonCrossJoinCost = Double.MAX_VALUE;
     JoinEdge bestNonCrossJoin = null;
@@ -130,7 +187,8 @@ public class GreedyHeuristicJoinOrderAlgorithm implements 
JoinOrderAlgorithm {
           continue;
         }
 
-        JoinEdge foundJoin = findJoin(plan, graph, outer, inner);
+        LinkedHashSet<String[]> joinEdgePairs = new LinkedHashSet<String[]>();
+        JoinEdge foundJoin = findJoin(plan, graph, outer, inner, 
joinEdgePairs);
         if (foundJoin == null) {
           continue;
         }
@@ -139,6 +197,7 @@ public class GreedyHeuristicJoinOrderAlgorithm implements 
JoinOrderAlgorithm {
         if (cost < minCost) {
           minCost = cost;
           bestJoin = foundJoin;
+          relatedJoinEdges = joinEdgePairs;
         }
 
         // Keep the min cost join
@@ -148,14 +207,17 @@ public class GreedyHeuristicJoinOrderAlgorithm implements 
JoinOrderAlgorithm {
           if (cost < minNonCrossJoinCost) {
             minNonCrossJoinCost = cost;
             bestNonCrossJoin = foundJoin;
+            relatedNonCrossJoinEdges = joinEdgePairs;
           }
         }
       }
     }
 
     if (bestNonCrossJoin != null) {
+      bestJoinEdges.addAll(relatedNonCrossJoinEdges);
       return bestNonCrossJoin;
     } else {
+      bestJoinEdges.addAll(relatedJoinEdges);
       return bestJoin;
     }
   }
@@ -165,7 +227,7 @@ public class GreedyHeuristicJoinOrderAlgorithm implements 
JoinOrderAlgorithm {
    *
    * @return If there is no join condition between two relation, it returns 
NULL value.
    */
-  private static JoinEdge findJoin(LogicalPlan plan, JoinGraph graph, 
LogicalNode outer, LogicalNode inner)
+  private static JoinEdge findJoin(LogicalPlan plan, JoinGraph graph, 
LogicalNode outer, LogicalNode inner, Set<String[]> joinEdgePairs)
       throws PlanningException {
     JoinEdge foundJoinEdge = null;
 
@@ -176,6 +238,8 @@ public class GreedyHeuristicJoinOrderAlgorithm implements 
JoinOrderAlgorithm {
     for (String innerName : 
PlannerUtil.getRelationLineageWithinQueryBlock(plan, inner)) {
       if (graph.hasEdge(outerEdgeKey, innerName)) {
         JoinEdge existJoinEdge = graph.getEdge(outerEdgeKey, innerName);
+        String[] joinEdgePair = {outerEdgeKey, innerName};
+        joinEdgePairs.add(joinEdgePair);
         if (foundJoinEdge == null) {
           foundJoinEdge = new JoinEdge(existJoinEdge.getJoinType(), outer, 
inner,
               existJoinEdge.getJoinQual());
@@ -195,6 +259,8 @@ public class GreedyHeuristicJoinOrderAlgorithm implements 
JoinOrderAlgorithm {
     for (String outerName : 
PlannerUtil.getRelationLineageWithinQueryBlock(plan, outer)) {
       if (graph.hasEdge(outerEdgeKey, outerName)) {
         JoinEdge existJoinEdge = graph.getEdge(outerEdgeKey, outerName);
+        String[] joinEdgePair = {outerEdgeKey, outerName};
+        joinEdgePairs.add(joinEdgePair);
         if (foundJoinEdge == null) {
           foundJoinEdge = new JoinEdge(existJoinEdge.getJoinType(), inner, 
outer,
               existJoinEdge.getJoinQual());
@@ -214,6 +280,8 @@ public class GreedyHeuristicJoinOrderAlgorithm implements 
JoinOrderAlgorithm {
         // Find all joins between two relations and merge them into one join 
if possible
         if (graph.hasEdge(outerName, innerName)) {
           JoinEdge existJoinEdge = graph.getEdge(outerName, innerName);
+          String[] joinEdgePair = {outerName, innerName};
+          joinEdgePairs.add(joinEdgePair);
           if (foundJoinEdge == null) {
             foundJoinEdge = new JoinEdge(existJoinEdge.getJoinType(), outer, 
inner,
                 existJoinEdge.getJoinQual());

Reply via email to