cloud-fan commented on a change in pull request #23036: [SPARK-26065][SQL] 
Change query hint from a `LogicalPlan` to a field
URL: https://github.com/apache/spark/pull/23036#discussion_r245298669
 
 

 ##########
 File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
 ##########
 @@ -59,3 +76,41 @@ case class HintInfo(broadcast: Boolean = false) {
     if (hints.isEmpty) "none" else hints.mkString("(", ", ", ")")
   }
 }
+
+/**
+ * Replaces [[ResolvedHint]] operators from the plan. Move the [[HintInfo]] to 
associated [[Join]]
+ * operators, otherwise remove it if no [[Join]] operator is matched.
+ */
+object EliminateResolvedHint extends Rule[LogicalPlan] {
+  // This is also called in the beginning of the optimization phase, and as a 
result
+  // is using transformUp rather than resolveOperators.
+  def apply(plan: LogicalPlan): LogicalPlan = {
+    val pulledUp = plan transformUp {
+      case j: Join =>
+        val leftHint = mergeHints(collectHints(j.left))
+        val rightHint = mergeHints(collectHints(j.right))
+        j.copy(hint = JoinHint(leftHint, rightHint))
+    }
+    pulledUp.transform {
+      case h: ResolvedHint => h.child
+    }
+  }
+
+  def mergeHints(hints: Seq[HintInfo]): Option[HintInfo] = {
+    hints.reduceOption((h1, h2) => HintInfo(
+      broadcast = h1.broadcast || h2.broadcast))
+  }
+
+  def collectHints(plan: LogicalPlan): Seq[HintInfo] = {
+    plan match {
+      case h: ResolvedHint => collectHints(h.child) :+ h.hints
+      case u: UnaryNode => collectHints(u.child)
 
 Review comment:
   I'm not sure if it's safe to collect hint through other operators. e.g. 
`Generate` is a unary node which produces more data than its child, and we may 
add more hints in the future which can't be propagated through operators.
   
   I think a safer way is to only collect hints from the `ResolvedHint` 
operator if it's a child of `Join`.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to