tanelk commented on pull request #30093:
URL: https://github.com/apache/spark/pull/30093#issuecomment-713073103


   Taking it a bit further we can also remove local child sorts inside a local 
sort:
   
   ```diff
   diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
   index c37dac90c5..dee3e40f0d 100644
   --- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
   +++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
   @@ -1058,33 +1058,29 @@ object EliminateSorts extends Rule[LogicalPlan] {
        case s @ Sort(orders, _, child) if orders.isEmpty || 
orders.exists(_.child.foldable) =>
          val newOrders = orders.filterNot(_.child.foldable)
          if (newOrders.isEmpty) child else s.copy(order = newOrders)
   -    case s @ Sort(orders, global, child)
   -        if SortOrder.orderingSatisfies(child.outputOrdering, orders) =>
   -      (global, child) match {
   -        case (false, _) => child
   -        case (true, r: Range) => r
   -        case (true, s @ Sort(_, true, _)) => s
   -        case (true, _) => s.copy(child = recursiveRemoveSort(child))
   -      }
   -    case s @ Sort(_, _, child) => s.copy(child = recursiveRemoveSort(child))
   +    case Sort(orders, false, child) if 
SortOrder.orderingSatisfies(child.outputOrdering, orders) =>
   +      child
   +    case s @ Sort(_, global, child) => s.copy(child = 
recursiveRemoveSort(child, global))
        case j @ Join(originLeft, originRight, _, cond, _) if 
cond.forall(_.deterministic) =>
          j.copy(left = recursiveRemoveSort(originLeft), right = 
recursiveRemoveSort(originRight))
        case g @ Aggregate(_, aggs, originChild) if isOrderIrrelevantAggs(aggs) 
=>
          g.copy(child = recursiveRemoveSort(originChild))
      }
   
   -  private def recursiveRemoveSort(plan: LogicalPlan): LogicalPlan = plan 
match {
   -    case Sort(_, _, child) => recursiveRemoveSort(child)
   -    case other if canEliminateSort(other) =>
   -      other.withNewChildren(other.children.map(recursiveRemoveSort))
   +  private def recursiveRemoveSort(
   +    plan: LogicalPlan, global: Boolean = true): LogicalPlan = plan match {
   +    case Sort(_, false, child) if !global => recursiveRemoveSort(child, 
global)
   +    case Sort(_, _, child) if global => recursiveRemoveSort(child, global)
   +    case other if canEliminateSort(other, global) =>
   +      other.withNewChildren(other.children.map(recursiveRemoveSort(_, 
global)))
        case _ => plan
      }
   
   -  private def canEliminateSort(plan: LogicalPlan): Boolean = plan match {
   +  private def canEliminateSort(plan: LogicalPlan, global: Boolean): Boolean 
= plan match {
        case p: Project => p.projectList.forall(_.deterministic)
        case f: Filter => f.condition.deterministic
   -    case r: RepartitionByExpression => 
r.partitionExpressions.forall(_.deterministic)
   -    case _: Repartition => true
   +    case r: RepartitionByExpression => 
r.partitionExpressions.forall(_.deterministic) && global
   +    case _: Repartition => global
        case _ => false
      }
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to