Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/19488#discussion_r145292679
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
---
@@ -2677,4 +2678,29 @@ class SQLQuerySuite extends QueryTest with
SharedSQLContext {
checkAnswer(df, Row(1, 1, 1))
}
}
+
+ test("SRARK-22266: the same aggregate function was calculated multiple
times") {
+ val query = "SELECT a, max(b+1), max(b+1) + 1 FROM testData2 GROUP BY
a"
+ val df = sql(query)
+ val physical = df.queryExecution.sparkPlan
+ val aggregates = physical.collect {
+ case agg : HashAggregateExec => agg
+ }
+ aggregates.foreach { agg =>
+ assert (agg.aggregateExpressions.size == 1)
+ }
+ checkAnswer(df, Row(1, 3, 4) :: Row(2, 3, 4) :: Row(3, 3, 4) :: Nil)
+ }
+
+ test("Non-deterministic aggregate functions should not be deduplicated")
{
+ val query = "SELECT a, first_value(b), first_value(b) + 1 FROM
testData2 GROUP BY a"
+ val df = sql(query)
+ val physical = df.queryExecution.sparkPlan
+ val aggregates = physical.collect {
+ case agg : HashAggregateExec => agg
+ }
+ aggregates.foreach { agg =>
--- End diff --
ditto
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]