This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new d1ca0ea4ae5 [SPARK-39890][SQL] Make TakeOrderedAndProjectExec inherit
AliasAwareOutputOrdering
d1ca0ea4ae5 is described below
commit d1ca0ea4ae55bf19c7569eacae5b377c3340dde9
Author: ulysses-you <[email protected]>
AuthorDate: Thu Jul 28 10:49:05 2022 +0800
[SPARK-39890][SQL] Make TakeOrderedAndProjectExec inherit
AliasAwareOutputOrdering
### What changes were proposed in this pull request?
Make TakeOrderedAndProjectExec inherit AliasAwareOutputOrdering
TakeOrderedAndProjectExec does not need to inherit
AliasAwareOutputPartitioning since it's output partitioning is SinglePartition.
### Why are the changes needed?
AliasAwareOutputOrdering can save a sort if the project inside
TakeOrderedAndProjectExec has an alias for the sort order.
### Does this PR introduce _any_ user-facing change?
no, only improve performance
### How was this patch tested?
add test
Closes #37318 from ulysses-you/topn-order.
Authored-by: ulysses-you <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../scala/org/apache/spark/sql/execution/limit.scala | 6 ++++--
.../org/apache/spark/sql/execution/PlannerSuite.scala | 17 +++++++++++++++++
2 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index 49f703fddb7..88e212c53a0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -276,7 +276,7 @@ case class TakeOrderedAndProjectExec(
sortOrder: Seq[SortOrder],
projectList: Seq[NamedExpression],
child: SparkPlan,
- offset: Int = 0) extends UnaryExecNode {
+ offset: Int = 0) extends AliasAwareOutputOrdering {
override def output: Seq[Attribute] = {
projectList.map(_.toAttribute)
@@ -347,7 +347,9 @@ case class TakeOrderedAndProjectExec(
}
}
- override def outputOrdering: Seq[SortOrder] = sortOrder
+ override protected def outputExpressions: Seq[NamedExpression] = projectList
+
+ override protected def orderingExpressions: Seq[SortOrder] = sortOrder
override def outputPartitioning: Partitioning = SinglePartition
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 6f4869bf110..c7bd12c86a4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -1293,6 +1293,23 @@ class PlannerSuite extends SharedSparkSession with
AdaptiveSparkPlanHelper {
assert(numSorts.size == 2)
}
}
+
+ test("SPARK-39890: Make TakeOrderedAndProjectExec inherit
AliasAwareOutputOrdering") {
+ val df = spark.range(20).repartition($"id")
+ .orderBy("id")
+ .selectExpr("id as c")
+ .limit(10)
+ .orderBy("c")
+
+ val topKs = collect(df.queryExecution.executedPlan) {
+ case topK: TakeOrderedAndProjectExec => topK
+ }
+ val sorts = collect(df.queryExecution.executedPlan) {
+ case sort: SortExec => sort
+ }
+ assert(topKs.size == 1)
+ assert(sorts.isEmpty)
+ }
}
// Used for unit-testing EnsureRequirements
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]