Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/21473#discussion_r192284247
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala ---
@@ -687,4 +687,29 @@ class DataFrameAggregateSuite extends QueryTest with
SharedSQLContext {
}
}
}
+
+ test("SPARK-21896: Window functions inside aggregate functions") {
+ def checkWindowError(df: => DataFrame): Unit = {
+ val thrownException = the [AnalysisException] thrownBy {
+ df.queryExecution.analyzed
+ }
+ assert(thrownException.message.contains("not allowed to use a window
function"))
+ }
+
+
checkWindowError(testData2.select(min(avg('b).over(Window.partitionBy('a)))))
+ checkWindowError(testData2.agg(sum('b),
max(rank().over(Window.orderBy('a)))))
+ checkWindowError(testData2.groupBy('a).agg(sum('b),
max(rank().over(Window.orderBy('b)))))
+
checkWindowError(testData2.groupBy('a).agg(max(sum(sum('b)).over(Window.orderBy('b)))))
+
+ checkWindowError(
+ sql("SELECT MAX(RANK() OVER(ORDER BY b)) FROM testData2 GROUP BY a
HAVING SUM(b) = 3"))
+ checkWindowError(
+ sql("SELECT MAX(RANK() OVER(ORDER BY a)) FROM testData2"))
+ checkWindowError(
+ sql("SELECT MAX(RANK() OVER(ORDER BY b)) FROM testData2 GROUP BY a"))
+ checkAnswer(
+ sql("SELECT a, MAX(b), RANK() OVER(ORDER BY a) FROM testData2 GROUP
BY a HAVING SUM(b) = 3"),
--- End diff --
I think the dataset version should be
```
df.groupBy('a).agg(max('b), sum('b).as("sumb"),
rank().over(window)).where('sumb === 5)
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]