Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19488#discussion_r145292679
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
---
    @@ -2677,4 +2678,29 @@ class SQLQuerySuite extends QueryTest with 
SharedSQLContext {
           checkAnswer(df, Row(1, 1, 1))
         }
       }
    +
    +  test("SRARK-22266: the same aggregate function was calculated multiple 
times") {
    +    val query = "SELECT a, max(b+1), max(b+1) + 1 FROM testData2 GROUP BY 
a"
    +    val df = sql(query)
    +    val physical = df.queryExecution.sparkPlan
    +    val aggregates = physical.collect {
    +      case agg : HashAggregateExec => agg
    +    }
    +    aggregates.foreach { agg =>
    +      assert (agg.aggregateExpressions.size == 1)
    +    }
    +    checkAnswer(df, Row(1, 3, 4) :: Row(2, 3, 4) :: Row(3, 3, 4) :: Nil)
    +  }
    +
    +  test("Non-deterministic aggregate functions should not be deduplicated") 
{
    +    val query = "SELECT a, first_value(b), first_value(b) + 1 FROM 
testData2 GROUP BY a"
    +    val df = sql(query)
    +    val physical = df.queryExecution.sparkPlan
    +    val aggregates = physical.collect {
    +      case agg : HashAggregateExec => agg
    +    }
    +    aggregates.foreach { agg =>
    --- End diff --
    
    ditto


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to