ulysses-you commented on code in PR #39556:
URL: https://github.com/apache/spark/pull/39556#discussion_r1070214567
##########
sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala:
##########
@@ -1314,6 +1314,78 @@ class PlannerSuite extends SharedSparkSession with
AdaptiveSparkPlanHelper {
assert(topKs.size == 1)
assert(sorts.isEmpty)
}
+
+ test("SPARK-42049: Improve AliasAwareOutputExpression - ordering -
multi-alias") {
+ Seq(0, 1, 5).foreach { limit =>
+ withSQLConf(SQLConf.EXPRESSION_PROJECTION_CANDIDATE_LIMIT.key ->
limit.toString) {
+ val df = spark.range(2).orderBy($"id").selectExpr("id as x", "id as
y", "id as z")
+ val outputOrdering = df.queryExecution.optimizedPlan.outputOrdering
+ assert(outputOrdering.size == 1)
+ limit match {
+ case 5 =>
+ assert(outputOrdering.head.sameOrderExpressions.size == 3)
+
assert(outputOrdering.head.sameOrderExpressions.map(_.asInstanceOf[Attribute].name)
+ .toSet == Set("x", "y", "z"))
+ case 1 =>
+ assert(outputOrdering.head.sameOrderExpressions.size == 1)
+
assert(outputOrdering.head.sameOrderExpressions.map(_.asInstanceOf[Attribute].name)
+ .toSet.subsetOf(Set("x", "y", "z")))
+ case 0 =>
+ assert(outputOrdering.head.sameOrderExpressions.isEmpty)
+ }
+ }
+ }
+ }
+
+ test("SPARK-42049: Improve AliasAwareOutputExpression - partitioning -
multi-alias") {
+ Seq(0, 1, 5).foreach { limit =>
+ withSQLConf(SQLConf.EXPRESSION_PROJECTION_CANDIDATE_LIMIT.key ->
limit.toString) {
+ val df = spark.range(2).repartition($"id").selectExpr("id as x", "id
as y", "id as z")
+ val outputPartitioning =
stripAQEPlan(df.queryExecution.executedPlan).outputPartitioning
+ limit match {
+ case 5 =>
+ val p =
outputPartitioning.asInstanceOf[PartitioningCollection].partitionings
+ assert(p.size == 3)
+ assert(p.flatMap(_.asInstanceOf[HashPartitioning].expressions
+ .map(_.asInstanceOf[Attribute].name)).toSet == Set("x", "y",
"z"))
+ case 1 =>
+ val p = outputPartitioning.asInstanceOf[HashPartitioning]
+ assert(p.expressions.size == 1)
+ assert(p.expressions.map(_.asInstanceOf[Attribute].name)
+ .toSet.subsetOf(Set("x", "y", "z")))
+ case 0 =>
+ // the references of child output partitioning is not the subset
of output,
+ // so it has been pruned
+ assert(outputPartitioning.isInstanceOf[UnknownPartitioning])
+ }
+ }
+ }
+ }
+
+ test("SPARK-42049: Improve AliasAwareOutputExpression - ordering -
multi-references") {
+ val df = spark.range(2).selectExpr("id as a", "id as b")
+ .orderBy($"a" + $"b").selectExpr("a as x", "b as y")
+ val outputOrdering = df.queryExecution.optimizedPlan.outputOrdering
+ assert(outputOrdering.size == 1)
+ assert(outputOrdering.head.sameOrderExpressions.size == 1)
+ // (a + b), (a + y), (x + b) are pruned since their references are not the
subset of output
+ outputOrdering.head.sameOrderExpressions.head match {
+ case Add(l: Attribute, r: Attribute, _) => assert(l.name == "x" &&
r.name == "y")
+ case _ => fail(s"Unexpected
${outputOrdering.head.sameOrderExpressions.head}")
Review Comment:
this test is for the comment
https://github.com/apache/spark/pull/39556#discussion_r1069573290
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]