tanelk commented on pull request #30093:
URL: https://github.com/apache/spark/pull/30093#issuecomment-713073103
Taking it a bit further we can also remove local child sorts inside a local
sort:
```diff
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index c37dac90c5..dee3e40f0d 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1058,33 +1058,29 @@ object EliminateSorts extends Rule[LogicalPlan] {
case s @ Sort(orders, _, child) if orders.isEmpty ||
orders.exists(_.child.foldable) =>
val newOrders = orders.filterNot(_.child.foldable)
if (newOrders.isEmpty) child else s.copy(order = newOrders)
- case s @ Sort(orders, global, child)
- if SortOrder.orderingSatisfies(child.outputOrdering, orders) =>
- (global, child) match {
- case (false, _) => child
- case (true, r: Range) => r
- case (true, s @ Sort(_, true, _)) => s
- case (true, _) => s.copy(child = recursiveRemoveSort(child))
- }
- case s @ Sort(_, _, child) => s.copy(child = recursiveRemoveSort(child))
+ case Sort(orders, false, child) if
SortOrder.orderingSatisfies(child.outputOrdering, orders) =>
+ child
+ case s @ Sort(_, global, child) => s.copy(child =
recursiveRemoveSort(child, global))
case j @ Join(originLeft, originRight, _, cond, _) if
cond.forall(_.deterministic) =>
j.copy(left = recursiveRemoveSort(originLeft), right =
recursiveRemoveSort(originRight))
case g @ Aggregate(_, aggs, originChild) if isOrderIrrelevantAggs(aggs)
=>
g.copy(child = recursiveRemoveSort(originChild))
}
- private def recursiveRemoveSort(plan: LogicalPlan): LogicalPlan = plan
match {
- case Sort(_, _, child) => recursiveRemoveSort(child)
- case other if canEliminateSort(other) =>
- other.withNewChildren(other.children.map(recursiveRemoveSort))
+ private def recursiveRemoveSort(
+ plan: LogicalPlan, global: Boolean = true): LogicalPlan = plan match {
+ case Sort(_, false, child) if !global => recursiveRemoveSort(child,
global)
+ case Sort(_, _, child) if global => recursiveRemoveSort(child, global)
+ case other if canEliminateSort(other, global) =>
+ other.withNewChildren(other.children.map(recursiveRemoveSort(_,
global)))
case _ => plan
}
- private def canEliminateSort(plan: LogicalPlan): Boolean = plan match {
+ private def canEliminateSort(plan: LogicalPlan, global: Boolean): Boolean
= plan match {
case p: Project => p.projectList.forall(_.deterministic)
case f: Filter => f.condition.deterministic
- case r: RepartitionByExpression =>
r.partitionExpressions.forall(_.deterministic)
- case _: Repartition => true
+ case r: RepartitionByExpression =>
r.partitionExpressions.forall(_.deterministic) && global
+ case _: Repartition => global
case _ => false
}
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]