[GitHub] [spark] ulysses-you commented on a diff in pull request #39556: [WIP][SPARK-42049][SQL] Improve AliasAwareOutputExpression

GitBox Fri, 13 Jan 2023 20:31:53 -0800


ulysses-you commented on code in PR #39556:
URL: https://github.com/apache/spark/pull/39556#discussion_r1070214567



##########
sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala:
##########
@@ -1314,6 +1314,78 @@ class PlannerSuite extends SharedSparkSession with 
AdaptiveSparkPlanHelper {
     assert(topKs.size == 1)
     assert(sorts.isEmpty)
   }
+
+  test("SPARK-42049: Improve AliasAwareOutputExpression - ordering - 
multi-alias") {
+    Seq(0, 1, 5).foreach { limit =>
+      withSQLConf(SQLConf.EXPRESSION_PROJECTION_CANDIDATE_LIMIT.key -> 
limit.toString) {
+        val df = spark.range(2).orderBy($"id").selectExpr("id as x", "id as 
y", "id as z")
+        val outputOrdering = df.queryExecution.optimizedPlan.outputOrdering
+        assert(outputOrdering.size == 1)
+        limit match {
+          case 5 =>
+            assert(outputOrdering.head.sameOrderExpressions.size == 3)
+            
assert(outputOrdering.head.sameOrderExpressions.map(_.asInstanceOf[Attribute].name)
+              .toSet == Set("x", "y", "z"))
+          case 1 =>
+            assert(outputOrdering.head.sameOrderExpressions.size == 1)
+            
assert(outputOrdering.head.sameOrderExpressions.map(_.asInstanceOf[Attribute].name)
+              .toSet.subsetOf(Set("x", "y", "z")))
+          case 0 =>
+            assert(outputOrdering.head.sameOrderExpressions.isEmpty)
+        }
+      }
+    }
+  }
+
+  test("SPARK-42049: Improve AliasAwareOutputExpression - partitioning - 
multi-alias") {
+    Seq(0, 1, 5).foreach { limit =>
+      withSQLConf(SQLConf.EXPRESSION_PROJECTION_CANDIDATE_LIMIT.key -> 
limit.toString) {
+        val df = spark.range(2).repartition($"id").selectExpr("id as x", "id 
as y", "id as z")
+        val outputPartitioning = 
stripAQEPlan(df.queryExecution.executedPlan).outputPartitioning
+        limit match {
+          case 5 =>
+            val p = 
outputPartitioning.asInstanceOf[PartitioningCollection].partitionings
+            assert(p.size == 3)
+            assert(p.flatMap(_.asInstanceOf[HashPartitioning].expressions
+              .map(_.asInstanceOf[Attribute].name)).toSet == Set("x", "y", 
"z"))
+          case 1 =>
+            val p = outputPartitioning.asInstanceOf[HashPartitioning]
+            assert(p.expressions.size == 1)
+            assert(p.expressions.map(_.asInstanceOf[Attribute].name)
+              .toSet.subsetOf(Set("x", "y", "z")))
+          case 0 =>
+            // the references of child output partitioning is not the subset 
of output,
+            // so it has been pruned
+            assert(outputPartitioning.isInstanceOf[UnknownPartitioning])
+        }
+      }
+    }
+  }
+
+  test("SPARK-42049: Improve AliasAwareOutputExpression - ordering - 
multi-references") {
+    val df = spark.range(2).selectExpr("id as a", "id as b")
+      .orderBy($"a" + $"b").selectExpr("a as x", "b as y")
+    val outputOrdering = df.queryExecution.optimizedPlan.outputOrdering
+    assert(outputOrdering.size == 1)
+    assert(outputOrdering.head.sameOrderExpressions.size == 1)
+    // (a + b), (a + y), (x + b) are pruned since their references are not the 
subset of output
+    outputOrdering.head.sameOrderExpressions.head match {
+      case Add(l: Attribute, r: Attribute, _) => assert(l.name == "x" && 
r.name == "y")
+      case _ => fail(s"Unexpected 
${outputOrdering.head.sameOrderExpressions.head}")

Review Comment:
   this test is for the comment 
https://github.com/apache/spark/pull/39556#discussion_r1069573290



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] ulysses-you commented on a diff in pull request #39556: [WIP][SPARK-42049][SQL] Improve AliasAwareOutputExpression

Reply via email to