[
https://issues.apache.org/jira/browse/FLINK-5654?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15948225#comment-15948225
]
ASF GitHub Bot commented on FLINK-5654:
---------------------------------------
Github user sunjincheng121 commented on a diff in the pull request:
https://github.com/apache/flink/pull/3641#discussion_r108824115
--- Diff:
flink-libraries/flink-table/src/test/scala/org/apache/flink/table/api/scala/stream/sql/SqlITCase.scala
---
@@ -696,6 +716,205 @@ class SqlITCase extends StreamingWithStateTestBase {
"6,8,Hello world,51,9,5,9,1")
assertEquals(expected.sorted, StreamITCase.testResults.sorted)
}
+
+ @Test
+ def testAvgSumAggregatationPartition(): Unit = {
+
+ val env = StreamExecutionEnvironment.getExecutionEnvironment
+ val tEnv = TableEnvironment.getTableEnvironment(env)
+ env.setParallelism(1)
+ StreamITCase.testResults = mutable.MutableList()
+
+ val sqlQuery = "SELECT a, AVG(c) OVER (PARTITION BY a ORDER BY
procTime()" +
+ "RANGE BETWEEN INTERVAL '10' SECOND PRECEDING AND CURRENT ROW) AS
avgC," +
+ "SUM(c) OVER (PARTITION BY a ORDER BY procTime()" +
+ "RANGE BETWEEN INTERVAL '10' SECOND PRECEDING AND CURRENT ROW) as
sumC FROM MyTable"
+
+ val t = StreamTestData.get5TupleDataStream(env)
+ .toTable(tEnv).as('a, 'b, 'c, 'd, 'e)
+
+ tEnv.registerTable("MyTable", t)
+
+ val result = tEnv.sql(sqlQuery).toDataStream[Row]
+ result.addSink(new StreamITCase.StringSink)
+ env.execute()
+
+ val expected = mutable.MutableList(
+ "1,0,0",
+ "2,1,1",
+ "2,1,3",
+ "3,3,3",
+ "3,3,7",
+ "3,4,12",
+ "4,6,13",
+ "4,6,6",
+ "4,7,21",
+ "4,7,30",
+ "5,10,10",
+ "5,10,21",
+ "5,11,33",
+ "5,11,46",
+ "5,12,60")
+
+ assertEquals(expected.sorted, StreamITCase.testResults.sorted)
+ }
+
+ @Test
+ def testAvgSumAggregatationNonPartition(): Unit = {
+
+ val env = StreamExecutionEnvironment.getExecutionEnvironment
+ val tEnv = TableEnvironment.getTableEnvironment(env)
+ env.setParallelism(1)
+ StreamITCase.testResults = mutable.MutableList()
+
+ val sqlQuery = "SELECT a, Count(c) OVER (ORDER BY procTime()" +
+ "RANGE BETWEEN INTERVAL '10' SECOND PRECEDING AND CURRENT ROW) AS
avgC," +
+ "MIN(c) OVER (ORDER BY procTime()" +
+ "RANGE BETWEEN INTERVAL '10' SECOND PRECEDING AND CURRENT ROW) as
sumC FROM MyTable"
+
+ val t = StreamTestData.get5TupleDataStream(env)
+ .toTable(tEnv).as('a, 'b, 'c, 'd, 'e)
+
+ tEnv.registerTable("MyTable", t)
+
+ val result = tEnv.sql(sqlQuery).toDataStream[Row]
+ result.addSink(new StreamITCase.StringSink)
+ env.execute()
+
+ val expected = mutable.MutableList(
+ "1,1,0",
+ "2,2,0",
+ "2,3,0",
+ "3,4,0",
+ "3,5,0",
+ "3,6,0",
+ "4,7,0",
+ "4,8,0",
+ "4,9,0",
+ "4,10,0",
+ "5,11,0",
+ "5,12,0",
+ "5,13,0",
+ "5,14,0",
+ "5,15,0")
+
+ assertEquals(expected.sorted, StreamITCase.testResults.sorted)
+ }
+
+
+ @Test
+ def testCountAggregatationProcTimeHarnessPartitioned(): Unit = {
+ val env = StreamExecutionEnvironment.getExecutionEnvironment
--- End diff --
Thanks for the explanation. +1
> Add processing time OVER RANGE BETWEEN x PRECEDING aggregation to SQL
> ---------------------------------------------------------------------
>
> Key: FLINK-5654
> URL: https://issues.apache.org/jira/browse/FLINK-5654
> Project: Flink
> Issue Type: Sub-task
> Components: Table API & SQL
> Reporter: Fabian Hueske
> Assignee: radu
>
> The goal of this issue is to add support for OVER RANGE aggregations on
> processing time streams to the SQL interface.
> Queries similar to the following should be supported:
> {code}
> SELECT
> a,
> SUM(b) OVER (PARTITION BY c ORDER BY procTime() RANGE BETWEEN INTERVAL '1'
> HOUR PRECEDING AND CURRENT ROW) AS sumB,
> MIN(b) OVER (PARTITION BY c ORDER BY procTime() RANGE BETWEEN INTERVAL '1'
> HOUR PRECEDING AND CURRENT ROW) AS minB
> FROM myStream
> {code}
> The following restrictions should initially apply:
> - All OVER clauses in the same SELECT clause must be exactly the same.
> - The PARTITION BY clause is optional (no partitioning results in single
> threaded execution).
> - The ORDER BY clause may only have procTime() as parameter. procTime() is a
> parameterless scalar function that just indicates processing time mode.
> - UNBOUNDED PRECEDING is not supported (see FLINK-5657)
> - FOLLOWING is not supported.
> The restrictions will be resolved in follow up issues. If we find that some
> of the restrictions are trivial to address, we can add the functionality in
> this issue as well.
> This issue includes:
> - Design of the DataStream operator to compute OVER ROW aggregates
> - Translation from Calcite's RelNode representation (LogicalProject with
> RexOver expression).
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)