viirya commented on a change in pull request #29734:
URL: https://github.com/apache/spark/pull/29734#discussion_r488359058
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
##########
@@ -115,9 +119,40 @@ class RemoveRedundantProjectsSuite extends QueryTest with
SharedSparkSession wit
assertProjectExec(query, 1, 2)
}
- test("generate") {
- val query = "select a, key, explode(d) from testView where a > 10"
- assertProjectExec(query, 0, 1)
+ test("generate should require column ordering") {
+ withTempView("testData") {
+ spark.range(0, 10, 1)
+ .selectExpr("id as key", "id * 2 as a", "id * 3 as b")
+ .createOrReplaceTempView("testData")
+
+ val data = sql("select key, a, b, count(*) from testData group by key,
a, b limit 2")
+ val df = data.selectExpr("a", "b", "key", "explode(array(key, a, b)) as
d").filter("d > 0")
+ df.collect()
+ val plan = df.queryExecution.executedPlan
+ val numProjects = collectWithSubqueries(plan) { case p: ProjectExec => p
}.length
+
+ // Create a new plan that reverse the GenerateExec output and add a new
ProjectExec between
+ // GenerateExec and its child. This is to test if the ProjectExec is
removed, the output of
+ // the query will be incorrect.
+ val newPlan = stripAQEPlan(plan) transform {
+ case g @ GenerateExec(_, requiredChildOutput, _, _, child) =>
+ g.copy(requiredChildOutput = requiredChildOutput.reverse,
+ child = ProjectExec(requiredChildOutput.reverse, child))
+ }
+
+ // Re-apply remove redundant project rule.
+ val rule = RemoveRedundantProjects(spark.sessionState.conf)
+ val newExecutedPlan = rule.apply(newPlan)
+ // The manually added ProjectExec node shouldn't be removed.
+ assert(collectWithSubqueries(newExecutedPlan) {
+ case p: ProjectExec => p }.size == numProjects + 1)
Review comment:
The style looks weird.
```scala
assert(collectWithSubqueries(newExecutedPlan) {
case p: ProjectExec => p
}.size == numProjects + 1)
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]