[
https://issues.apache.org/jira/browse/FLINK-5654?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15948681#comment-15948681
]
ASF GitHub Bot commented on FLINK-5654:
---------------------------------------
Github user rtudoran commented on a diff in the pull request:
https://github.com/apache/flink/pull/3641#discussion_r108871599
--- Diff:
flink-libraries/flink-table/src/test/scala/org/apache/flink/table/api/scala/stream/sql/SqlITCase.scala
---
@@ -696,6 +713,205 @@ class SqlITCase extends StreamingWithStateTestBase {
"6,8,Hello world,51,9,5,9,1")
assertEquals(expected.sorted, StreamITCase.testResults.sorted)
}
+
+ @Test
+ def testAvgSumAggregatationPartition(): Unit = {
+
+ val env = StreamExecutionEnvironment.getExecutionEnvironment
+ val tEnv = TableEnvironment.getTableEnvironment(env)
+ env.setParallelism(1)
+ StreamITCase.testResults = mutable.MutableList()
+
+ val sqlQuery = "SELECT a, AVG(c) OVER (PARTITION BY a ORDER BY
procTime()" +
+ "RANGE BETWEEN INTERVAL '10' SECOND PRECEDING AND CURRENT ROW) AS
avgC," +
+ "SUM(c) OVER (PARTITION BY a ORDER BY procTime()" +
+ "RANGE BETWEEN INTERVAL '10' SECOND PRECEDING AND CURRENT ROW) as
sumC FROM MyTable"
+
+ val t = StreamTestData.get5TupleDataStream(env)
+ .toTable(tEnv).as('a, 'b, 'c, 'd, 'e)
+
+ tEnv.registerTable("MyTable", t)
+
+ val result = tEnv.sql(sqlQuery).toDataStream[Row]
+ result.addSink(new StreamITCase.StringSink)
+ env.execute()
+
+ val expected = mutable.MutableList(
+ "1,0,0",
+ "2,1,1",
+ "2,1,3",
+ "3,3,3",
+ "3,3,7",
+ "3,4,12",
+ "4,6,13",
+ "4,6,6",
+ "4,7,21",
+ "4,7,30",
+ "5,10,10",
+ "5,10,21",
+ "5,11,33",
+ "5,11,46",
+ "5,12,60")
+
+ assertEquals(expected.sorted, StreamITCase.testResults.sorted)
+ }
+
+ @Test
+ def testAvgSumAggregatationNonPartition(): Unit = {
+
+ val env = StreamExecutionEnvironment.getExecutionEnvironment
+ val tEnv = TableEnvironment.getTableEnvironment(env)
+ env.setParallelism(1)
+ StreamITCase.testResults = mutable.MutableList()
+
+ val sqlQuery = "SELECT a, Count(c) OVER (ORDER BY procTime()" +
+ "RANGE BETWEEN INTERVAL '10' SECOND PRECEDING AND CURRENT ROW) AS
avgC," +
+ "MIN(c) OVER (ORDER BY procTime()" +
+ "RANGE BETWEEN INTERVAL '10' SECOND PRECEDING AND CURRENT ROW) as
sumC FROM MyTable"
+
+ val t = StreamTestData.get5TupleDataStream(env)
+ .toTable(tEnv).as('a, 'b, 'c, 'd, 'e)
+
+ tEnv.registerTable("MyTable", t)
+
+ val result = tEnv.sql(sqlQuery).toDataStream[Row]
+ result.addSink(new StreamITCase.StringSink)
+ env.execute()
+
+ val expected = mutable.MutableList(
+ "1,1,0",
+ "2,2,0",
+ "2,3,0",
+ "3,4,0",
+ "3,5,0",
+ "3,6,0",
+ "4,7,0",
+ "4,8,0",
+ "4,9,0",
+ "4,10,0",
+ "5,11,0",
+ "5,12,0",
+ "5,13,0",
+ "5,14,0",
+ "5,15,0")
+
+ assertEquals(expected.sorted, StreamITCase.testResults.sorted)
+ }
+
+
+ @Test
+ def testCountAggregatationProcTimeHarnessPartitioned(): Unit = {
+ val env = StreamExecutionEnvironment.getExecutionEnvironment
+ val tEnv = TableEnvironment.getTableEnvironment(env)
+ env.setParallelism(1)
+
+ val rT = new RowTypeInfo(Array[TypeInformation[_]](
+ INT_TYPE_INFO,
+ LONG_TYPE_INFO,
+ INT_TYPE_INFO,
+ STRING_TYPE_INFO,
+ LONG_TYPE_INFO),
+ Array("a","b","c","d","e"))
+
+ val rTA = new RowTypeInfo(Array[TypeInformation[_]](
+ LONG_TYPE_INFO), Array("count"))
+
+ val processFunction = new KeyedProcessOperator[String,Row,Row](
+ new ProcTimeBoundedProcessingOverProcessFunction(
+ Array(new CountAggFunction),
+ Array(1),
+ 5,
+ rTA,
+ 1000,
+ rT))
+
+ val rInput:Row = new Row(5)
+ rInput.setField(0, 1)
+ rInput.setField(1, 11L)
+ rInput.setField(2, 1)
+ rInput.setField(3, "aaa")
+ rInput.setField(4, 11L)
+
+ val testHarness = new
KeyedOneInputStreamOperatorTestHarness[String,Row,Row](
+ processFunction,
+ new TupleRowSelector(3),
+ BasicTypeInfo.STRING_TYPE_INFO)
+
+ testHarness.open();
+
+ testHarness.setProcessingTime(3)
+
+ // timestamp is ignored in processing time
+ testHarness.processElement(new StreamRecord(rInput, 1001))
+ testHarness.processElement(new StreamRecord(rInput, 2002))
+ testHarness.processElement(new StreamRecord(rInput, 2003))
+ testHarness.processElement(new StreamRecord(rInput, 2004))
+
+ testHarness.setProcessingTime(1004)
+
+ testHarness.processElement(new StreamRecord(rInput, 2005))
+ testHarness.processElement(new StreamRecord(rInput, 2006))
+
+ val result = testHarness.getOutput
+
+ val expectedOutput = new ConcurrentLinkedQueue[Object]()
+
+ val rOutput:Row = new Row(6)
+ rOutput.setField(0, 1)
+ rOutput.setField(1, 11L)
+ rOutput.setField(2, 1)
+ rOutput.setField(3, "aaa")
+ rOutput.setField(4, 11L)
+ rOutput.setField(5, 1L) //count is 1
+ expectedOutput.add(new StreamRecord(rOutput, 1001));
--- End diff --
:) beautiful java trademark
> Add processing time OVER RANGE BETWEEN x PRECEDING aggregation to SQL
> ---------------------------------------------------------------------
>
> Key: FLINK-5654
> URL: https://issues.apache.org/jira/browse/FLINK-5654
> Project: Flink
> Issue Type: Sub-task
> Components: Table API & SQL
> Reporter: Fabian Hueske
> Assignee: radu
>
> The goal of this issue is to add support for OVER RANGE aggregations on
> processing time streams to the SQL interface.
> Queries similar to the following should be supported:
> {code}
> SELECT
> a,
> SUM(b) OVER (PARTITION BY c ORDER BY procTime() RANGE BETWEEN INTERVAL '1'
> HOUR PRECEDING AND CURRENT ROW) AS sumB,
> MIN(b) OVER (PARTITION BY c ORDER BY procTime() RANGE BETWEEN INTERVAL '1'
> HOUR PRECEDING AND CURRENT ROW) AS minB
> FROM myStream
> {code}
> The following restrictions should initially apply:
> - All OVER clauses in the same SELECT clause must be exactly the same.
> - The PARTITION BY clause is optional (no partitioning results in single
> threaded execution).
> - The ORDER BY clause may only have procTime() as parameter. procTime() is a
> parameterless scalar function that just indicates processing time mode.
> - UNBOUNDED PRECEDING is not supported (see FLINK-5657)
> - FOLLOWING is not supported.
> The restrictions will be resolved in follow up issues. If we find that some
> of the restrictions are trivial to address, we can add the functionality in
> this issue as well.
> This issue includes:
> - Design of the DataStream operator to compute OVER ROW aggregates
> - Translation from Calcite's RelNode representation (LogicalProject with
> RexOver expression).
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)