This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 98dba2b1d Minor: Begin porting some window tests to sqllogictests
(#5199)
98dba2b1d is described below
commit 98dba2b1d69ee2ba72b0bba762ed7754276e744e
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Feb 13 20:12:19 2023 +0100
Minor: Begin porting some window tests to sqllogictests (#5199)
---
datafusion/core/tests/sql/window.rs | 414 ---------------------
.../core/tests/sqllogictests/test_files/window.slt | 400 ++++++++++++++++++++
2 files changed, 400 insertions(+), 414 deletions(-)
diff --git a/datafusion/core/tests/sql/window.rs
b/datafusion/core/tests/sql/window.rs
index af03d922c..3fc5b9569 100644
--- a/datafusion/core/tests/sql/window.rs
+++ b/datafusion/core/tests/sql/window.rs
@@ -20,420 +20,6 @@ use ::parquet::arrow::arrow_writer::ArrowWriter;
use ::parquet::file::properties::WriterProperties;
use datafusion::execution::options::ReadOptions;
-/// for window functions without order by the first, last, and nth function
call does not make sense
-#[tokio::test]
-async fn csv_query_window_with_empty_over() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "select \
- c9, \
- count(c5) over () as count1, \
- max(c5) over () as max1, \
- min(c5) over () as min1 \
- from aggregate_test_100 \
- order by c9 \
- limit 5";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+-----------+--------+------------+-------------+",
- "| c9 | count1 | max1 | min1 |",
- "+-----------+--------+------------+-------------+",
- "| 28774375 | 100 | 2143473091 | -2141999138 |",
- "| 63044568 | 100 | 2143473091 | -2141999138 |",
- "| 141047417 | 100 | 2143473091 | -2141999138 |",
- "| 141680161 | 100 | 2143473091 | -2141999138 |",
- "| 145294611 | 100 | 2143473091 | -2141999138 |",
- "+-----------+--------+------------+-------------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-/// for window functions without order by the first, last, and nth function
call does not make sense
-#[tokio::test]
-async fn csv_query_window_with_partition_by() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "select \
- c9, \
- sum(cast(c4 as Int)) over (partition by c3) as sum1, \
- avg(cast(c4 as Int)) over (partition by c3) as avg1, \
- count(cast(c4 as Int)) over (partition by c3) as count1, \
- max(cast(c4 as Int)) over (partition by c3) as max1, \
- min(cast(c4 as Int)) over (partition by c3) as min1 \
- from aggregate_test_100 \
- order by c9 \
- limit 5";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+-----------+--------+----------+--------+--------+--------+",
- "| c9 | sum1 | avg1 | count1 | max1 | min1 |",
- "+-----------+--------+----------+--------+--------+--------+",
- "| 28774375 | -16110 | -16110 | 1 | -16110 | -16110 |",
- "| 63044568 | 3917 | 3917 | 1 | 3917 | 3917 |",
- "| 141047417 | -38455 | -19227.5 | 2 | -16974 | -21481 |",
- "| 141680161 | -1114 | -1114 | 1 | -1114 | -1114 |",
- "| 145294611 | 15673 | 15673 | 1 | 15673 | 15673 |",
- "+-----------+--------+----------+--------+--------+--------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_window_with_order_by() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "select \
- c9, \
- sum(c5) over (order by c9) as sum1, \
- avg(c5) over (order by c9) as avg1, \
- count(c5) over (order by c9) as count1, \
- max(c5) over (order by c9) as max1, \
- min(c5) over (order by c9) as min1, \
- first_value(c5) over (order by c9) as fv1, \
- last_value(c5) over (order by c9) as lv1, \
- nth_value(c5, 2) over (order by c9) as nv1 \
- from aggregate_test_100 \
- order by c9 \
- limit 5";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
-
"+-----------+-------------+--------------------+--------+-----------+-------------+----------+-------------+------------+",
- "| c9 | sum1 | avg1 | count1 | max1 |
min1 | fv1 | lv1 | nv1 |",
-
"+-----------+-------------+--------------------+--------+-----------+-------------+----------+-------------+------------+",
- "| 28774375 | 61035129 | 61035129 | 1 | 61035129 |
61035129 | 61035129 | 61035129 | |",
- "| 63044568 | -47938237 | -23969118.5 | 2 | 61035129 |
-108973366 | 61035129 | -108973366 | -108973366 |",
- "| 141047417 | 575165281 | 191721760.33333334 | 3 | 623103518 |
-108973366 | 61035129 | 623103518 | -108973366 |",
- "| 141680161 | -1352462829 | -338115707.25 | 4 | 623103518 |
-1927628110 | 61035129 | -1927628110 | -108973366 |",
- "| 145294611 | -3251637940 | -650327588 | 5 | 623103518 |
-1927628110 | 61035129 | -1899175111 | -108973366 |",
-
"+-----------+-------------+--------------------+--------+-----------+-------------+----------+-------------+------------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_window_with_partition_by_order_by() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "select \
- c9, \
- sum(c5) over (partition by c4 order by c9) as sum1, \
- avg(c5) over (partition by c4 order by c9) as avg1, \
- count(c5) over (partition by c4 order by c9) as count1, \
- max(c5) over (partition by c4 order by c9) as max1, \
- min(c5) over (partition by c4 order by c9) as min1, \
- first_value(c5) over (partition by c4 order by c9) as fv1, \
- last_value(c5) over (partition by c4 order by c9) as lv1, \
- nth_value(c5, 2) over (partition by c4 order by c9) as nv1 \
- from aggregate_test_100 \
- order by c9 \
- limit 5";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
-
"+-----------+-------------+-------------+--------+-------------+-------------+-------------+-------------+-----+",
- "| c9 | sum1 | avg1 | count1 | max1 | min1
| fv1 | lv1 | nv1 |",
-
"+-----------+-------------+-------------+--------+-------------+-------------+-------------+-------------+-----+",
- "| 28774375 | 61035129 | 61035129 | 1 | 61035129 |
61035129 | 61035129 | 61035129 | |",
- "| 63044568 | -108973366 | -108973366 | 1 | -108973366 |
-108973366 | -108973366 | -108973366 | |",
- "| 141047417 | 623103518 | 623103518 | 1 | 623103518 |
623103518 | 623103518 | 623103518 | |",
- "| 141680161 | -1927628110 | -1927628110 | 1 | -1927628110 |
-1927628110 | -1927628110 | -1927628110 | |",
- "| 145294611 | -1899175111 | -1899175111 | 1 | -1899175111 |
-1899175111 | -1899175111 | -1899175111 | |",
-
"+-----------+-------------+-------------+--------+-------------+-------------+-------------+-------------+-----+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn window() -> Result<()> {
- let results = execute_with_partition(
- "SELECT \
- c1, \
- c2, \
- SUM(c2) OVER () as sum1, \
- COUNT(c2) OVER () as count1, \
- MAX(c2) OVER () as max1, \
- MIN(c2) OVER () as min1, \
- AVG(c2) OVER () as avg1 \
- FROM test \
- ORDER BY c1, c2 \
- LIMIT 5",
- 4,
- )
- .await?;
- // result in one batch, although e.g. having 2 batches do not change
- // result semantics, having a len=1 assertion upfront keeps surprises
- // at bay
- assert_eq!(results.len(), 1);
-
- let expected = vec![
- "+----+----+------+--------+------+------+------+",
- "| c1 | c2 | sum1 | count1 | max1 | min1 | avg1 |",
- "+----+----+------+--------+------+------+------+",
- "| 0 | 1 | 220 | 40 | 10 | 1 | 5.5 |",
- "| 0 | 2 | 220 | 40 | 10 | 1 | 5.5 |",
- "| 0 | 3 | 220 | 40 | 10 | 1 | 5.5 |",
- "| 0 | 4 | 220 | 40 | 10 | 1 | 5.5 |",
- "| 0 | 5 | 220 | 40 | 10 | 1 | 5.5 |",
- "+----+----+------+--------+------+------+------+",
- ];
-
- // window function shall respect ordering
- assert_batches_eq!(expected, &results);
- Ok(())
-}
-
-#[tokio::test]
-async fn window_order_by() -> Result<()> {
- let results = execute_with_partition(
- "SELECT \
- c1, \
- c2, \
- ROW_NUMBER() OVER (ORDER BY c1, c2) as rn1, \
- FIRST_VALUE(c2) OVER (ORDER BY c1, c2) as fv1, \
- LAST_VALUE(c2) OVER (ORDER BY c1, c2) as lv1, \
- NTH_VALUE(c2, 2) OVER (ORDER BY c1, c2) as nv1, \
- SUM(c2) OVER (ORDER BY c1, c2) as sum1, \
- COUNT(c2) OVER (ORDER BY c1, c2) as count1, \
- MAX(c2) OVER (ORDER BY c1, c2) as max1, \
- MIN(c2) OVER (ORDER BY c1, c2) as min1, \
- AVG(c2) OVER (ORDER BY c1, c2) as avg1 \
- FROM test \
- ORDER BY c1, c2 \
- LIMIT 5",
- 4,
- )
- .await?;
- // result in one batch, although e.g. having 2 batches do not change
- // result semantics, having a len=1 assertion upfront keeps surprises
- // at bay
- assert_eq!(results.len(), 1);
-
- let expected = vec![
-
"+----+----+-----+-----+-----+-----+------+--------+------+------+------+",
- "| c1 | c2 | rn1 | fv1 | lv1 | nv1 | sum1 | count1 | max1 | min1 |
avg1 |",
-
"+----+----+-----+-----+-----+-----+------+--------+------+------+------+",
- "| 0 | 1 | 1 | 1 | 1 | | 1 | 1 | 1 | 1 | 1
|",
- "| 0 | 2 | 2 | 1 | 2 | 2 | 3 | 2 | 2 | 1 | 1.5
|",
- "| 0 | 3 | 3 | 1 | 3 | 2 | 6 | 3 | 3 | 1 | 2
|",
- "| 0 | 4 | 4 | 1 | 4 | 2 | 10 | 4 | 4 | 1 | 2.5
|",
- "| 0 | 5 | 5 | 1 | 5 | 2 | 15 | 5 | 5 | 1 | 3
|",
-
"+----+----+-----+-----+-----+-----+------+--------+------+------+------+",
- ];
-
- // window function shall respect ordering
- assert_batches_eq!(expected, &results);
- Ok(())
-}
-
-#[tokio::test]
-async fn window_partition_by() -> Result<()> {
- let results = execute_with_partition(
- "SELECT \
- c1, \
- c2, \
- SUM(c2) OVER (PARTITION BY c2) as sum1, \
- COUNT(c2) OVER (PARTITION BY c2) as count1, \
- MAX(c2) OVER (PARTITION BY c2) as max1, \
- MIN(c2) OVER (PARTITION BY c2) as min1, \
- AVG(c2) OVER (PARTITION BY c2) as avg1 \
- FROM test \
- ORDER BY c1, c2 \
- LIMIT 5",
- 4,
- )
- .await?;
-
- let expected = vec![
- "+----+----+------+--------+------+------+------+",
- "| c1 | c2 | sum1 | count1 | max1 | min1 | avg1 |",
- "+----+----+------+--------+------+------+------+",
- "| 0 | 1 | 4 | 4 | 1 | 1 | 1 |",
- "| 0 | 2 | 8 | 4 | 2 | 2 | 2 |",
- "| 0 | 3 | 12 | 4 | 3 | 3 | 3 |",
- "| 0 | 4 | 16 | 4 | 4 | 4 | 4 |",
- "| 0 | 5 | 20 | 4 | 5 | 5 | 5 |",
- "+----+----+------+--------+------+------+------+",
- ];
-
- // window function shall respect ordering
- assert_batches_eq!(expected, &results);
- Ok(())
-}
-
-#[tokio::test]
-async fn window_partition_by_order_by() -> Result<()> {
- let results = execute_with_partition(
- "SELECT \
- c1, \
- c2, \
- ROW_NUMBER() OVER (PARTITION BY c2 ORDER BY c1) as rn1, \
- FIRST_VALUE(c2 + c1) OVER (PARTITION BY c2 ORDER BY c1) as fv1, \
- LAST_VALUE(c2 + c1) OVER (PARTITION BY c2 ORDER BY c1) as lv1, \
- NTH_VALUE(c2 + c1, 1) OVER (PARTITION BY c2 ORDER BY c1) as nv1, \
- SUM(c2) OVER (PARTITION BY c2 ORDER BY c1) as sum1, \
- COUNT(c2) OVER (PARTITION BY c2 ORDER BY c1) as count1, \
- MAX(c2) OVER (PARTITION BY c2 ORDER BY c1) as max1, \
- MIN(c2) OVER (PARTITION BY c2 ORDER BY c1) as min1, \
- AVG(c2) OVER (PARTITION BY c2 ORDER BY c1) as avg1 \
- FROM test \
- ORDER BY c1, c2 \
- LIMIT 5",
- 4,
- )
- .await?;
-
- let expected = vec![
-
"+----+----+-----+-----+-----+-----+------+--------+------+------+------+",
- "| c1 | c2 | rn1 | fv1 | lv1 | nv1 | sum1 | count1 | max1 | min1 |
avg1 |",
-
"+----+----+-----+-----+-----+-----+------+--------+------+------+------+",
- "| 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1
|",
- "| 0 | 2 | 1 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 2
|",
- "| 0 | 3 | 1 | 3 | 3 | 3 | 3 | 1 | 3 | 3 | 3
|",
- "| 0 | 4 | 1 | 4 | 4 | 4 | 4 | 1 | 4 | 4 | 4
|",
- "| 0 | 5 | 1 | 5 | 5 | 5 | 5 | 1 | 5 | 5 | 5
|",
-
"+----+----+-----+-----+-----+-----+------+--------+------+------+------+",
- ];
-
- // window function shall respect ordering
- assert_batches_eq!(expected, &results);
- Ok(())
-}
-
-#[tokio::test]
-async fn window_expr_eliminate() -> Result<()> {
- let ctx = SessionContext::new();
-
- // window expr is not referenced anywhere, eliminate it.
- let sql = "WITH _sample_data AS (
- SELECT 1 as a, 'aa' AS b
- UNION ALL
- SELECT 3 as a, 'aa' AS b
- UNION ALL
- SELECT 5 as a, 'bb' AS b
- UNION ALL
- SELECT 7 as a, 'bb' AS b
- ), _data2 AS (
- SELECT
- row_number() OVER (PARTITION BY s.b ORDER BY s.a) AS seq,
- s.a,
- s.b
- FROM _sample_data s
- )
- SELECT d.b, MAX(d.a) AS max_a
- FROM _data2 d
- GROUP BY d.b
- ORDER BY d.b;";
-
- let msg = format!("Creating logical plan for '{sql}'");
- let dataframe = ctx.sql(&("explain ".to_owned() + sql)).await.expect(&msg);
- let plan = dataframe.into_optimized_plan().unwrap();
- let expected = vec![
- "Explain [plan_type:Utf8, plan:Utf8]",
- " Sort: d.b ASC NULLS LAST [b:Utf8, max_a:Int64;N]",
- " Projection: d.b, MAX(d.a) AS max_a [b:Utf8, max_a:Int64;N]",
- " Aggregate: groupBy=[[d.b]], aggr=[[MAX(d.a)]] [b:Utf8,
MAX(d.a):Int64;N]",
- " SubqueryAlias: d [a:Int64, b:Utf8]",
- " SubqueryAlias: _data2 [a:Int64, b:Utf8]",
- " Projection: s.a, s.b [a:Int64, b:Utf8]",
- " SubqueryAlias: s [a:Int64, b:Utf8]",
- " SubqueryAlias: _sample_data [a:Int64, b:Utf8]",
- " Union [a:Int64, b:Utf8]",
- " Projection: Int64(1) AS a, Utf8(\"aa\") AS b
[a:Int64, b:Utf8]",
- " EmptyRelation []",
- " Projection: Int64(3) AS a, Utf8(\"aa\") AS b
[a:Int64, b:Utf8]",
- " EmptyRelation []",
- " Projection: Int64(5) AS a, Utf8(\"bb\") AS b
[a:Int64, b:Utf8]",
- " EmptyRelation []",
- " Projection: Int64(7) AS a, Utf8(\"bb\") AS b
[a:Int64, b:Utf8]",
- " EmptyRelation []",
- ];
- let formatted = plan.display_indent_schema().to_string();
- let actual: Vec<&str> = formatted.trim().lines().collect();
- assert_eq!(
- expected, actual,
- "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
- );
-
- let results = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+-------+",
- "| b | max_a |",
- "+----+-------+",
- "| aa | 3 |",
- "| bb | 7 |",
- "+----+-------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- // window expr is referenced by the output, keep it
- let sql = "WITH _sample_data AS (
- SELECT 1 as a, 'aa' AS b
- UNION ALL
- SELECT 3 as a, 'aa' AS b
- UNION ALL
- SELECT 5 as a, 'bb' AS b
- UNION ALL
- SELECT 7 as a, 'bb' AS b
- ), _data2 AS (
- SELECT
- row_number() OVER (PARTITION BY s.b ORDER BY s.a) AS seq,
- s.a,
- s.b
- FROM _sample_data s
- )
- SELECT d.b, MAX(d.a) AS max_a, max(d.seq)
- FROM _data2 d
- GROUP BY d.b
- ORDER BY d.b;";
-
- let dataframe = ctx.sql(&("explain ".to_owned() + sql)).await.expect(&msg);
- let plan = dataframe.into_optimized_plan().unwrap();
- let expected = vec![
- "Explain [plan_type:Utf8, plan:Utf8]",
- " Sort: d.b ASC NULLS LAST [b:Utf8, max_a:Int64;N,
MAX(d.seq):UInt64;N]",
- " Projection: d.b, MAX(d.a) AS max_a, MAX(d.seq) [b:Utf8,
max_a:Int64;N, MAX(d.seq):UInt64;N]",
- " Aggregate: groupBy=[[d.b]], aggr=[[MAX(d.a), MAX(d.seq)]]
[b:Utf8, MAX(d.a):Int64;N, MAX(d.seq):UInt64;N]",
- " SubqueryAlias: d [seq:UInt64;N, a:Int64, b:Utf8]",
- " SubqueryAlias: _data2 [seq:UInt64;N, a:Int64, b:Utf8]",
- " Projection: ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS seq, s.a,
s.b [seq:UInt64;N, a:Int64, b:Utf8]",
- " WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY
[s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND
CURRENT ROW]] [a:Int64, b:Utf8, ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]",
- " SubqueryAlias: s [a:Int64, b:Utf8]",
- " SubqueryAlias: _sample_data [a:Int64, b:Utf8]",
- " Union [a:Int64, b:Utf8]",
- " Projection: Int64(1) AS a, Utf8(\"aa\") AS b
[a:Int64, b:Utf8]",
- " EmptyRelation []",
- " Projection: Int64(3) AS a, Utf8(\"aa\") AS b
[a:Int64, b:Utf8]",
- " EmptyRelation []",
- " Projection: Int64(5) AS a, Utf8(\"bb\") AS b
[a:Int64, b:Utf8]",
- " EmptyRelation []",
- " Projection: Int64(7) AS a, Utf8(\"bb\") AS b
[a:Int64, b:Utf8]",
- " EmptyRelation []",
- ];
- let formatted = plan.display_indent_schema().to_string();
- let actual: Vec<&str> = formatted.trim().lines().collect();
- assert_eq!(
- expected, actual,
- "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
- );
-
- let results = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+-------+------------+",
- "| b | max_a | MAX(d.seq) |",
- "+----+-------+------------+",
- "| aa | 3 | 2 |",
- "| bb | 7 | 2 |",
- "+----+-------+------------+",
- ];
-
- assert_batches_eq!(expected, &results);
- Ok(())
-}
-
#[tokio::test]
async fn window_in_expression() -> Result<()> {
let ctx = SessionContext::new();
diff --git a/datafusion/core/tests/sqllogictests/test_files/window.slt
b/datafusion/core/tests/sqllogictests/test_files/window.slt
new file mode 100644
index 000000000..d10946e5d
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/test_files/window.slt
@@ -0,0 +1,400 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+statement ok
+CREATE EXTERNAL TABLE aggregate_test_100 (
+ c1 VARCHAR NOT NULL,
+ c2 TINYINT NOT NULL,
+ c3 SMALLINT NOT NULL,
+ c4 SMALLINT,
+ c5 INT,
+ c6 BIGINT NOT NULL,
+ c7 SMALLINT NOT NULL,
+ c8 INT NOT NULL,
+ c9 BIGINT UNSIGNED NOT NULL,
+ c10 VARCHAR NOT NULL,
+ c11 FLOAT NOT NULL,
+ c12 DOUBLE NOT NULL,
+ c13 VARCHAR NOT NULL
+)
+STORED AS CSV
+WITH HEADER ROW
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+
+### This is the same table as
+### execute_with_partition with 4 partitions
+statement ok
+CREATE EXTERNAL TABLE test (c1 int, c2 bigint, c3 boolean)
+STORED AS CSV LOCATION 'tests/data/partitioned_csv';
+
+
+# for window functions without order by the first, last, and nth function call
does not make sense
+# csv_query_window_with_empty_over
+query IIII
+select
+c9,
+count(c5) over () as count1,
+max(c5) over () as max1,
+min(c5) over () as min1
+from aggregate_test_100
+order by c9
+limit 5
+----
+28774375 100 2143473091 -2141999138
+63044568 100 2143473091 -2141999138
+141047417 100 2143473091 -2141999138
+141680161 100 2143473091 -2141999138
+145294611 100 2143473091 -2141999138
+
+# for window functions without order by the first, last, and nth function call
does not make sense
+# csv_query_window_with_partition_by
+query IIIIII
+select
+c9,
+sum(cast(c4 as Int)) over (partition by c3) as sum1,
+avg(cast(c4 as Int)) over (partition by c3) as avg1,
+count(cast(c4 as Int)) over (partition by c3) as count1,
+max(cast(c4 as Int)) over (partition by c3) as max1,
+min(cast(c4 as Int)) over (partition by c3) as min1
+from aggregate_test_100
+order by c9
+limit 5
+----
+28774375 -16110 -16110 1 -16110 -16110
+63044568 3917 3917 1 3917 3917
+141047417 -38455 -19227.5 2 -16974 -21481
+141680161 -1114 -1114 1 -1114 -1114
+145294611 15673 15673 1 15673 15673
+
+
+
+# async fn csv_query_window_with_order_by
+query IIIIII
+select
+c9,
+sum(c5) over (order by c9) as sum1,
+avg(c5) over (order by c9) as avg1,
+count(c5) over (order by c9) as count1,
+max(c5) over (order by c9) as max1,
+min(c5) over (order by c9) as min1,
+first_value(c5) over (order by c9) as fv1,
+last_value(c5) over (order by c9) as lv1,
+nth_value(c5, 2) over (order by c9) as nv1
+from aggregate_test_100
+order by c9
+limit 5
+----
+28774375 61035129 61035129 1 61035129 61035129 61035129 61035129 NULL
+63044568 -47938237 -23969118.5 2 61035129 -108973366 61035129 -108973366
-108973366
+141047417 575165281 191721760.33333334 3 623103518 -108973366 61035129
623103518 -108973366
+141680161 -1352462829 -338115707.25 4 623103518 -1927628110 61035129
-1927628110 -108973366
+145294611 -3251637940 -650327588 5 623103518 -1927628110 61035129 -1899175111
-108973366
+
+# csv_query_window_with_partition_by_order_by
+query IIIIII
+select
+ c9,
+ sum(c5) over (partition by c4 order by c9) as sum1,
+ avg(c5) over (partition by c4 order by c9) as avg1,
+ count(c5) over (partition by c4 order by c9) as count1,
+ max(c5) over (partition by c4 order by c9) as max1,
+ min(c5) over (partition by c4 order by c9) as min1,
+ first_value(c5) over (partition by c4 order by c9) as fv1,
+ last_value(c5) over (partition by c4 order by c9) as lv1,
+ nth_value(c5, 2) over (partition by c4 order by c9) as nv1
+from aggregate_test_100
+order by c9
+limit 5
+----
+28774375 61035129 61035129 1 61035129 61035129 61035129 61035129 NULL
+63044568 -108973366 -108973366 1 -108973366 -108973366 -108973366 -108973366
NULL
+141047417 623103518 623103518 1 623103518 623103518 623103518 623103518 NULL
+141680161 -1927628110 -1927628110 1 -1927628110 -1927628110 -1927628110
-1927628110 NULL
+145294611 -1899175111 -1899175111 1 -1899175111 -1899175111 -1899175111
-1899175111 NULL
+
+# window()
+query IIIIII
+SELECT
+c1,
+c2,
+SUM(c2) OVER () as sum1,
+COUNT(c2) OVER () as count1,
+MAX(c2) OVER () as max1,
+MIN(c2) OVER () as min1,
+AVG(c2) OVER () as avg1
+FROM test
+ORDER BY c1, c2
+LIMIT 5
+----
+0 0 220 44 10 0 5
+0 1 220 44 10 0 5
+0 2 220 44 10 0 5
+0 3 220 44 10 0 5
+0 4 220 44 10 0 5
+
+
+# window_order_by
+query IIIIII
+SELECT
+c1,
+c2,
+ROW_NUMBER() OVER (ORDER BY c1, c2) as rn1,
+FIRST_VALUE(c2) OVER (ORDER BY c1, c2) as fv1,
+LAST_VALUE(c2) OVER (ORDER BY c1, c2) as lv1,
+NTH_VALUE(c2, 2) OVER (ORDER BY c1, c2) as nv1,
+SUM(c2) OVER (ORDER BY c1, c2) as sum1,
+COUNT(c2) OVER (ORDER BY c1, c2) as count1,
+MAX(c2) OVER (ORDER BY c1, c2) as max1,
+MIN(c2) OVER (ORDER BY c1, c2) as min1,
+AVG(c2) OVER (ORDER BY c1, c2) as avg1
+FROM test
+ORDER BY c1, c2
+LIMIT 5
+----
+0 0 1 0 0 NULL 0 1 0 0 0
+0 1 2 0 1 1 1 2 1 0 0.5
+0 2 3 0 2 1 3 3 2 0 1
+0 3 4 0 3 1 6 4 3 0 1.5
+0 4 5 0 4 1 10 5 4 0 2
+
+# window_partition_by
+query IIIIII
+SELECT
+c1,
+c2,
+SUM(c2) OVER (PARTITION BY c2) as sum1,
+COUNT(c2) OVER (PARTITION BY c2) as count1,
+MAX(c2) OVER (PARTITION BY c2) as max1,
+MIN(c2) OVER (PARTITION BY c2) as min1,
+AVG(c2) OVER (PARTITION BY c2) as avg1
+FROM test
+ORDER BY c1, c2
+LIMIT 5
+----
+0 0 0 4 0 0 0
+0 1 4 4 1 1 1
+0 2 8 4 2 2 2
+0 3 12 4 3 3 3
+0 4 16 4 4 4 4
+
+query IIIIIIIIII
+SELECT
+c1,
+c2,
+ROW_NUMBER() OVER (PARTITION BY c2 ORDER BY c1) as rn1,
+FIRST_VALUE(c2 + c1) OVER (PARTITION BY c2 ORDER BY c1) as fv1,
+LAST_VALUE(c2 + c1) OVER (PARTITION BY c2 ORDER BY c1) as lv1,
+NTH_VALUE(c2 + c1, 1) OVER (PARTITION BY c2 ORDER BY c1) as nv1,
+SUM(c2) OVER (PARTITION BY c2 ORDER BY c1) as sum1,
+COUNT(c2) OVER (PARTITION BY c2 ORDER BY c1) as count1,
+MAX(c2) OVER (PARTITION BY c2 ORDER BY c1) as max1,
+MIN(c2) OVER (PARTITION BY c2 ORDER BY c1) as min1,
+AVG(c2) OVER (PARTITION BY c2 ORDER BY c1) as avg1
+FROM test
+ORDER BY c1, c2
+LIMIT 5
+----
+0 0 1 0 0 0 0 1 0 0 0
+0 1 1 1 1 1 1 1 1 1 1
+0 2 1 2 2 2 2 1 2 2 2
+0 3 1 3 3 3 3 1 3 3 3
+0 4 1 4 4 4 4 1 4 4 4
+
+
+#####
+# window_expr_eliminate()
+#####
+
+# window expr is not referenced anywhere, eliminate it.
+query ??
+EXPLAIN
+WITH _sample_data AS (
+ SELECT 1 as a, 'aa' AS b
+ UNION ALL
+ SELECT 3 as a, 'aa' AS b
+ UNION ALL
+ SELECT 5 as a, 'bb' AS b
+ UNION ALL
+ SELECT 7 as a, 'bb' AS b
+ ), _data2 AS (
+ SELECT
+ row_number() OVER (PARTITION BY s.b ORDER BY s.a) AS seq,
+ s.a,
+ s.b
+ FROM _sample_data s
+ )
+ SELECT d.b, MAX(d.a) AS max_a
+ FROM _data2 d
+ GROUP BY d.b
+ ORDER BY d.b;
+----
+logical_plan
+Sort: d.b ASC NULLS LAST
+ Projection: d.b, MAX(d.a) AS max_a
+ Aggregate: groupBy=[[d.b]], aggr=[[MAX(d.a)]]
+ SubqueryAlias: d
+ SubqueryAlias: _data2
+ Projection: s.a, s.b
+ SubqueryAlias: s
+ SubqueryAlias: _sample_data
+ Union
+ Projection: Int64(1) AS a, Utf8("aa") AS b
+ EmptyRelation
+ Projection: Int64(3) AS a, Utf8("aa") AS b
+ EmptyRelation
+ Projection: Int64(5) AS a, Utf8("bb") AS b
+ EmptyRelation
+ Projection: Int64(7) AS a, Utf8("bb") AS b
+ EmptyRelation
+physical_plan
+SortPreservingMergeExec: [b@0 ASC NULLS LAST]
+ SortExec: [b@0 ASC NULLS LAST]
+ ProjectionExec: expr=[b@0 as b, MAX(d.a)@1 as max_a]
+ AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[MAX(d.a)]
+ CoalesceBatchesExec: target_batch_size=8192
+ RepartitionExec: partitioning=Hash([Column { name: "b", index: 0 }],
4), input_partitions=4
+ AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[MAX(d.a)]
+ ProjectionExec: expr=[a@0 as a, b@1 as b]
+ UnionExec
+ ProjectionExec: expr=[1 as a, aa as b]
+ EmptyExec: produce_one_row=true
+ ProjectionExec: expr=[3 as a, aa as b]
+ EmptyExec: produce_one_row=true
+ ProjectionExec: expr=[5 as a, bb as b]
+ EmptyExec: produce_one_row=true
+ ProjectionExec: expr=[7 as a, bb as b]
+ EmptyExec: produce_one_row=true
+
+# Check actual result:
+query ?I
+WITH _sample_data AS (
+ SELECT 1 as a, 'aa' AS b
+ UNION ALL
+ SELECT 3 as a, 'aa' AS b
+ UNION ALL
+ SELECT 5 as a, 'bb' AS b
+ UNION ALL
+ SELECT 7 as a, 'bb' AS b
+ ), _data2 AS (
+ SELECT
+ row_number() OVER (PARTITION BY s.b ORDER BY s.a) AS seq,
+ s.a,
+ s.b
+ FROM _sample_data s
+ )
+ SELECT d.b, MAX(d.a) AS max_a
+ FROM _data2 d
+ GROUP BY d.b
+ ORDER BY d.b;
+----
+aa 3
+bb 7
+
+# window expr is referenced by the output, keep it
+query ??
+EXPLAIN
+WITH _sample_data AS (
+ SELECT 1 as a, 'aa' AS b
+ UNION ALL
+ SELECT 3 as a, 'aa' AS b
+ UNION ALL
+ SELECT 5 as a, 'bb' AS b
+ UNION ALL
+ SELECT 7 as a, 'bb' AS b
+ ), _data2 AS (
+ SELECT
+ row_number() OVER (PARTITION BY s.b ORDER BY s.a) AS seq,
+ s.a,
+ s.b
+ FROM _sample_data s
+ )
+ SELECT d.b, MAX(d.a) AS max_a, max(d.seq)
+ FROM _data2 d
+ GROUP BY d.b
+ ORDER BY d.b
+----
+logical_plan
+Sort: d.b ASC NULLS LAST
+ Projection: d.b, MAX(d.a) AS max_a, MAX(d.seq)
+ Aggregate: groupBy=[[d.b]], aggr=[[MAX(d.a), MAX(d.seq)]]
+ SubqueryAlias: d
+ SubqueryAlias: _data2
+ Projection: ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS
LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS seq, s.a, s.b
+ WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [s.b] ORDER BY
[s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+ SubqueryAlias: s
+ SubqueryAlias: _sample_data
+ Union
+ Projection: Int64(1) AS a, Utf8("aa") AS b
+ EmptyRelation
+ Projection: Int64(3) AS a, Utf8("aa") AS b
+ EmptyRelation
+ Projection: Int64(5) AS a, Utf8("bb") AS b
+ EmptyRelation
+ Projection: Int64(7) AS a, Utf8("bb") AS b
+ EmptyRelation
+physical_plan
+SortPreservingMergeExec: [b@0 ASC NULLS LAST]
+ SortExec: [b@0 ASC NULLS LAST]
+ ProjectionExec: expr=[b@0 as b, MAX(d.a)@1 as max_a, MAX(d.seq)@2 as
MAX(d.seq)]
+ AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[MAX(d.a),
MAX(d.seq)]
+ AggregateExec: mode=Partial, gby=[b@2 as b], aggr=[MAX(d.a),
MAX(d.seq)]
+ ProjectionExec: expr=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0
as a, b@1 as b]
+ BoundedWindowAggExec: wdw=[ROW_NUMBER(): Ok(Field { name:
"ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0,
dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range,
start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }]
+ SortExec: [b@1 ASC NULLS LAST,a@0 ASC NULLS LAST]
+ CoalesceBatchesExec: target_batch_size=8192
+ RepartitionExec: partitioning=Hash([Column { name: "b",
index: 1 }], 4), input_partitions=4
+ UnionExec
+ ProjectionExec: expr=[1 as a, aa as b]
+ EmptyExec: produce_one_row=true
+ ProjectionExec: expr=[3 as a, aa as b]
+ EmptyExec: produce_one_row=true
+ ProjectionExec: expr=[5 as a, bb as b]
+ EmptyExec: produce_one_row=true
+ ProjectionExec: expr=[7 as a, bb as b]
+ EmptyExec: produce_one_row=true
+
+
+
+
+
+
+
+# check actual result
+
+query ?II
+WITH _sample_data AS (
+ SELECT 1 as a, 'aa' AS b
+ UNION ALL
+ SELECT 3 as a, 'aa' AS b
+ UNION ALL
+ SELECT 5 as a, 'bb' AS b
+ UNION ALL
+ SELECT 7 as a, 'bb' AS b
+ ), _data2 AS (
+ SELECT
+ row_number() OVER (PARTITION BY s.b ORDER BY s.a) AS seq,
+ s.a,
+ s.b
+ FROM _sample_data s
+ )
+ SELECT d.b, MAX(d.a) AS max_a, max(d.seq)
+ FROM _data2 d
+ GROUP BY d.b
+ ORDER BY d.b
+----
+aa 3 2
+bb 7 2