[GitHub] spark pull request: [SQL][SPARK-2212]HashJoin(Shuffled)

marmbrus Sat, 21 Jun 2014 12:29:27 -0700

Github user marmbrus commented on a diff in the pull request:

    https://github.com/apache/spark/pull/1147#discussion_r14051271
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
 ---
    @@ -114,48 +94,27 @@ object HashFilteredJoin extends Logging with 
PredicateHelper {
         (JoinType, Seq[Expression], Seq[Expression], Option[Expression], 
LogicalPlan, LogicalPlan)
     
       def unapply(plan: LogicalPlan): Option[ReturnType] = plan match {
    -    // All predicates can be evaluated for inner join (i.e., those that 
are in the ON
    -    // clause and WHERE clause.)
    -    case FilteredOperation(predicates, join @ Join(left, right, Inner, 
condition)) =>
    -      logger.debug(s"Considering hash inner join on: ${predicates ++ 
condition}")
    -      splitPredicates(predicates ++ condition, join)
    -    // All predicates can be evaluated for left semi join (those that are 
in the WHERE
    -    // clause can only from left table, so they can all be pushed down.)
    -    case FilteredOperation(predicates, join @ Join(left, right, LeftSemi, 
condition)) =>
    -      logger.debug(s"Considering hash left semi join on: ${predicates ++ 
condition}")
    -      splitPredicates(predicates ++ condition, join)
         case join @ Join(left, right, joinType, condition) =>
           logger.debug(s"Considering hash join on: $condition")
    -      splitPredicates(condition.toSeq, join)
    -    case _ => None
    -  }
    -
    -  // Find equi-join predicates that can be evaluated before the join, and 
thus can be used
    -  // as join keys.
    -  def splitPredicates(allPredicates: Seq[Expression], join: Join): 
Option[ReturnType] = {
    -    val Join(left, right, joinType, _) = join
    -    val (joinPredicates, otherPredicates) =
    -      allPredicates.flatMap(splitConjunctivePredicates).partition {
    +      // Find equi-join predicates that can be evaluated before the join, 
and thus can be used
    +      // as join keys.
    +      val (joinPredicates, otherPredicates) = 
condition.map(splitConjunctivePredicates).
    +        getOrElse(Nil).partition {
             case Equals(l, r) if (canEvaluate(l, left) && canEvaluate(r, 
right)) ||
               (canEvaluate(l, right) && canEvaluate(r, left)) => true
             case _ => false
           }
     
    -    val joinKeys = joinPredicates.map {
    -      case Equals(l, r) if canEvaluate(l, left) && canEvaluate(r, right) 
=> (l, r)
    -      case Equals(l, r) if canEvaluate(l, right) && canEvaluate(r, left) 
=> (r, l)
    -    }
    +      val joinKeys = joinPredicates.map {
    +        case Equals(l, r) if canEvaluate(l, left) && canEvaluate(r, right) 
=> (l, r)
    +        case Equals(l, r) if canEvaluate(l, right) && canEvaluate(r, left) 
=> (r, l)
    +      }
     
    -    // Do not consider this strategy if there are no join keys.
    --- End diff --
    
    Why are you changing the semantics of this pattern?  It is called 
`HashFilteredJoin` but is now matching joins that cannot be answered using 
hashing techniques.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

[GitHub] spark pull request: [SQL][SPARK-2212]HashJoin(Shuffled)

Reply via email to