This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 237233f8c Add some more aggregate sqllogictests and remove rust tests
(#4505)
237233f8c is described below
commit 237233f8c84bec58f3d07d960c56d856e20df4c5
Author: mvanschellebeeck <[email protected]>
AuthorDate: Mon Dec 5 13:06:34 2022 -0500
Add some more aggregate sqllogictests and remove rust tests (#4505)
* Expand median tests + fix floats
* Remove rust tests
* simplify csv_query_rollup_avg test
---
datafusion/core/tests/sql/aggregates.rs | 1367 --------------------
.../tests/sqllogictests/test_files/aggregate.slt | 638 +++++----
2 files changed, 384 insertions(+), 1621 deletions(-)
diff --git a/datafusion/core/tests/sql/aggregates.rs
b/datafusion/core/tests/sql/aggregates.rs
index 8b57a69fa..2dd3a8dec 100644
--- a/datafusion/core/tests/sql/aggregates.rs
+++ b/datafusion/core/tests/sql/aggregates.rs
@@ -41,519 +41,6 @@ async fn csv_query_avg_multi_batch() -> Result<()> {
Ok(())
}
-#[tokio::test]
-async fn csv_query_avg() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT avg(c12) FROM aggregate_test_100";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["0.5089725099127211"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_covariance_1() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT covar_pop(c2, c12) FROM aggregate_test_100";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["-0.07916932235380847"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_covariance_2() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT covar(c2, c12) FROM aggregate_test_100";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["-0.07996901247859442"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_correlation() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT corr(c2, c12) FROM aggregate_test_100";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["-0.19064544190576607"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_variance_1() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT var_pop(c2) FROM aggregate_test_100";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["1.8675"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_variance_2() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT var_pop(c6) FROM aggregate_test_100";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["26156334342021890000000000000000000000"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_variance_3() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT var_pop(c12) FROM aggregate_test_100";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["0.09234223721582163"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_variance_4() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT var(c2) FROM aggregate_test_100";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["1.8863636363636365"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_variance_5() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT var_samp(c2) FROM aggregate_test_100";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["1.8863636363636365"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_stddev_1() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT stddev_pop(c2) FROM aggregate_test_100";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["1.3665650368716449"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_stddev_2() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT stddev_pop(c6) FROM aggregate_test_100";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["5114326382039172000"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_stddev_3() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT stddev_pop(c12) FROM aggregate_test_100";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["0.30387865541334363"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_stddev_4() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT stddev(c12) FROM aggregate_test_100";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["0.3054095399405338"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_stddev_5() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT stddev_samp(c12) FROM aggregate_test_100";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["0.3054095399405338"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_stddev_6() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "select stddev(sq.column1) from (values (1.1), (2.0), (3.0)) as
sq";
- let mut actual = execute(&ctx, sql).await;
- actual.sort();
- let expected = vec![vec!["0.9504384952922168"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_approx_median_1() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT approx_median(c2) FROM aggregate_test_100";
- let actual = execute(&ctx, sql).await;
- let expected = vec![vec!["3"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_approx_median_2() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT approx_median(c6) FROM aggregate_test_100";
- let actual = execute(&ctx, sql).await;
- let expected = vec![vec!["1146409980542786560"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_approx_median_3() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT approx_median(c12) FROM aggregate_test_100";
- let actual = execute(&ctx, sql).await;
- let expected = vec![vec!["0.5550065410522981"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_median_1() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT median(c2) FROM aggregate_test_100";
- let actual = execute(&ctx, sql).await;
- let expected = vec![vec!["3"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_median_2() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT median(c6) FROM aggregate_test_100";
- let actual = execute(&ctx, sql).await;
- let expected = vec![vec!["1125553990140691277"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_median_3() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT median(c12) FROM aggregate_test_100";
- let actual = execute(&ctx, sql).await;
- let expected = vec![vec!["0.5513900544385053"]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn median_i8() -> Result<()> {
- median_test(
- "median",
- DataType::Int8,
- Arc::new(Int8Array::from(vec![i8::MIN, i8::MIN, 100, i8::MAX])),
- "-14",
- )
- .await
-}
-
-#[tokio::test]
-async fn median_i16() -> Result<()> {
- median_test(
- "median",
- DataType::Int16,
- Arc::new(Int16Array::from(vec![i16::MIN, i16::MIN, 100, i16::MAX])),
- "-16334",
- )
- .await
-}
-
-#[tokio::test]
-async fn median_i32() -> Result<()> {
- median_test(
- "median",
- DataType::Int32,
- Arc::new(Int32Array::from(vec![i32::MIN, i32::MIN, 100, i32::MAX])),
- "-1073741774",
- )
- .await
-}
-
-#[tokio::test]
-async fn median_i64() -> Result<()> {
- median_test(
- "median",
- DataType::Int64,
- Arc::new(Int64Array::from(vec![i64::MIN, i64::MIN, 100, i64::MAX])),
- "-4611686018427388000",
- )
- .await
-}
-
-#[tokio::test]
-async fn median_u8() -> Result<()> {
- median_test(
- "median",
- DataType::UInt8,
- Arc::new(UInt8Array::from(vec![u8::MIN, u8::MIN, 100, u8::MAX])),
- "50",
- )
- .await
-}
-
-#[tokio::test]
-async fn median_u16() -> Result<()> {
- median_test(
- "median",
- DataType::UInt16,
- Arc::new(UInt16Array::from(vec![u16::MIN, u16::MIN, 100, u16::MAX])),
- "50",
- )
- .await
-}
-
-#[tokio::test]
-async fn median_u32() -> Result<()> {
- median_test(
- "median",
- DataType::UInt32,
- Arc::new(UInt32Array::from(vec![u32::MIN, u32::MIN, 100, u32::MAX])),
- "50",
- )
- .await
-}
-
-#[tokio::test]
-async fn median_u64() -> Result<()> {
- median_test(
- "median",
- DataType::UInt64,
- Arc::new(UInt64Array::from(vec![u64::MIN, u64::MIN, 100, u64::MAX])),
- "50",
- )
- .await
-}
-
-#[tokio::test]
-async fn median_f32() -> Result<()> {
- median_test(
- "median",
- DataType::Float32,
- Arc::new(Float32Array::from(vec![1.1, 4.4, 5.5, 3.3, 2.2])),
- "3.3",
- )
- .await
-}
-
-#[tokio::test]
-async fn median_f64() -> Result<()> {
- median_test(
- "median",
- DataType::Float64,
- Arc::new(Float64Array::from(vec![1.1, 4.4, 5.5, 3.3, 2.2])),
- "3.3",
- )
- .await
-}
-
-#[tokio::test]
-async fn median_f64_nan() -> Result<()> {
- median_test(
- "median",
- DataType::Float64,
- Arc::new(Float64Array::from(vec![1.1, f64::NAN, f64::NAN, f64::NAN])),
- "NaN", // probably not the desired behavior? - see
https://github.com/apache/arrow-datafusion/issues/3039
- )
- .await
-}
-
-#[tokio::test]
-async fn approx_median_f64_nan() -> Result<()> {
- median_test(
- "approx_median",
- DataType::Float64,
- Arc::new(Float64Array::from(vec![1.1, f64::NAN, f64::NAN, f64::NAN])),
- "NaN", // probably not the desired behavior? - see
https://github.com/apache/arrow-datafusion/issues/3039
- )
- .await
-}
-
-async fn median_test(
- func: &str,
- data_type: DataType,
- values: ArrayRef,
- expected: &str,
-) -> Result<()> {
- let ctx = SessionContext::new();
- let schema = Arc::new(Schema::new(vec![Field::new("a", data_type,
false)]));
- let batch = RecordBatch::try_new(schema.clone(), vec![values])?;
- ctx.register_batch("t", batch)?;
- let sql = format!("SELECT {}(a) FROM t", func);
- let actual = execute(&ctx, &sql).await;
- let expected = vec![vec![expected.to_owned()]];
- assert_float_eq(&expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_external_table_count() {
- let ctx = SessionContext::new();
- register_aggregate_csv_by_sql(&ctx).await;
- let sql = "SELECT COUNT(c12) FROM aggregate_test_100";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+-------------------------------+",
- "| COUNT(aggregate_test_100.c12) |",
- "+-------------------------------+",
- "| 100 |",
- "+-------------------------------+",
- ];
-
- assert_batches_eq!(expected, &actual);
-}
-
-#[tokio::test]
-async fn csv_query_external_table_sum() {
- let ctx = SessionContext::new();
- // cast smallint and int to bigint to avoid overflow during calculation
- register_aggregate_csv_by_sql(&ctx).await;
- let sql =
- "SELECT SUM(CAST(c7 AS BIGINT)), SUM(CAST(c8 AS BIGINT)) FROM
aggregate_test_100";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----------------------------+----------------------------+",
- "| SUM(aggregate_test_100.c7) | SUM(aggregate_test_100.c8) |",
- "+----------------------------+----------------------------+",
- "| 13060 | 3017641 |",
- "+----------------------------+----------------------------+",
- ];
- assert_batches_eq!(expected, &actual);
-}
-
-#[tokio::test]
-async fn csv_query_count() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT count(c12) FROM aggregate_test_100";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+-------------------------------+",
- "| COUNT(aggregate_test_100.c12) |",
- "+-------------------------------+",
- "| 100 |",
- "+-------------------------------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_count_distinct() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT count(distinct c2) FROM aggregate_test_100";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+---------------------------------------+",
- "| COUNT(DISTINCT aggregate_test_100.c2) |",
- "+---------------------------------------+",
- "| 5 |",
- "+---------------------------------------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_count_distinct_expr() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT count(distinct c2 % 2) FROM aggregate_test_100";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+--------------------------------------------------+",
- "| COUNT(DISTINCT aggregate_test_100.c2 % Int64(2)) |",
- "+--------------------------------------------------+",
- "| 2 |",
- "+--------------------------------------------------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_count_star() {
- let ctx = SessionContext::new();
- register_aggregate_csv_by_sql(&ctx).await;
- let sql = "SELECT COUNT(*) FROM aggregate_test_100";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+-----------------+",
- "| COUNT(UInt8(1)) |",
- "+-----------------+",
- "| 100 |",
- "+-----------------+",
- ];
- assert_batches_eq!(expected, &actual);
-}
-
-#[tokio::test]
-async fn csv_query_count_literal() {
- let ctx = SessionContext::new();
- register_aggregate_csv_by_sql(&ctx).await;
- let sql = "SELECT COUNT(2) FROM aggregate_test_100";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+-----------------+",
- "| COUNT(Int64(2)) |",
- "+-----------------+",
- "| 100 |",
- "+-----------------+",
- ];
- assert_batches_eq!(expected, &actual);
-}
-
#[tokio::test]
#[ignore] // https://github.com/apache/arrow-datafusion/issues/3353
async fn csv_query_approx_count() -> Result<()> {
@@ -572,351 +59,11 @@ async fn csv_query_approx_count() -> Result<()> {
Ok(())
}
-#[tokio::test]
-async fn csv_query_approx_count_dupe_expr_aliased() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql =
- "SELECT approx_distinct(c9) a, approx_distinct(c9) b FROM
aggregate_test_100";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+-----+-----+",
- "| a | b |",
- "+-----+-----+",
- "| 100 | 100 |",
- "+-----+-----+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-// This test executes the APPROX_PERCENTILE_CONT aggregation against the test
-// data, asserting the estimated quantiles are ±5% their actual values.
-//
-// Actual quantiles calculated with:
-//
-// ```r
-// read_csv("./testing/data/csv/aggregate_test_100.csv") |>
-// select_if(is.numeric) |>
-// summarise_all(~ quantile(., c(0.1, 0.5, 0.9)))
-// ```
-//
-// Giving:
-//
-// ```text
-// c2 c3 c4 c5 c6 c7 c8 c9
c10 c11 c12
-// <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
<dbl> <dbl> <dbl>
-// 1 1 -95.3 -22925. -1882606710 -7.25e18 18.9 2671. 472608672.
1.83e18 0.109 0.0714
-// 2 3 15.5 4599 377164262 1.13e18 134. 30634 2365817608.
9.30e18 0.491 0.551
-// 3 5 102. 25334. 1991374996. 7.37e18 231 57518. 3776538487.
1.61e19 0.834 0.946
-// ```
-//
-// Column `c12` is omitted due to a large relative error (~10%) due to the
small
-// float values.
-#[tokio::test]
-async fn csv_query_approx_percentile_cont() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
-
- // Generate an assertion that the estimated $percentile value for $column
is
- // within 5% of the $actual percentile value.
- macro_rules! percentile_test {
- ($ctx:ident, column=$column:literal, percentile=$percentile:literal,
actual=$actual:literal) => {
- let sql = format!("SELECT (ABS(1 - CAST(approx_percentile_cont({},
{}) AS DOUBLE) / {}) < 0.05) AS q FROM aggregate_test_100", $column,
$percentile, $actual);
- let actual = execute_to_batches(&ctx, &sql).await;
- let want = [
- "+------+",
- "| q |",
- "+------+",
- "| true |",
- "+------+"
- ];
- assert_batches_eq!(want, &actual);
- };
- }
-
- percentile_test!(ctx, column = "c2", percentile = 0.1, actual = 1.0);
- percentile_test!(ctx, column = "c2", percentile = 0.5, actual = 3.0);
- percentile_test!(ctx, column = "c2", percentile = 0.9, actual = 5.0);
- ////////////////////////////////////
- percentile_test!(ctx, column = "c3", percentile = 0.1, actual = -95.3);
- percentile_test!(ctx, column = "c3", percentile = 0.5, actual = 15.5);
- percentile_test!(ctx, column = "c3", percentile = 0.9, actual = 102.0);
- ////////////////////////////////////
- percentile_test!(ctx, column = "c4", percentile = 0.1, actual = -22925.0);
- percentile_test!(ctx, column = "c4", percentile = 0.5, actual = 4599.0);
- percentile_test!(ctx, column = "c4", percentile = 0.9, actual = 25334.0);
- ////////////////////////////////////
- percentile_test!(ctx, column = "c5", percentile = 0.1, actual =
-1882606710.0);
- percentile_test!(ctx, column = "c5", percentile = 0.5, actual =
377164262.0);
- percentile_test!(ctx, column = "c5", percentile = 0.9, actual =
1991374996.0);
- ////////////////////////////////////
- percentile_test!(ctx, column = "c6", percentile = 0.1, actual = -7.25e18);
- percentile_test!(ctx, column = "c6", percentile = 0.5, actual = 1.13e18);
- percentile_test!(ctx, column = "c6", percentile = 0.9, actual = 7.37e18);
- ////////////////////////////////////
- percentile_test!(ctx, column = "c7", percentile = 0.1, actual = 18.9);
- percentile_test!(ctx, column = "c7", percentile = 0.5, actual = 134.0);
- percentile_test!(ctx, column = "c7", percentile = 0.9, actual = 231.0);
- ////////////////////////////////////
- percentile_test!(ctx, column = "c8", percentile = 0.1, actual = 2671.0);
- percentile_test!(ctx, column = "c8", percentile = 0.5, actual = 30634.0);
- percentile_test!(ctx, column = "c8", percentile = 0.9, actual = 57518.0);
- ////////////////////////////////////
- percentile_test!(ctx, column = "c9", percentile = 0.1, actual =
472608672.0);
- percentile_test!(ctx, column = "c9", percentile = 0.5, actual =
2365817608.0);
- percentile_test!(ctx, column = "c9", percentile = 0.9, actual =
3776538487.0);
- ////////////////////////////////////
- percentile_test!(ctx, column = "c10", percentile = 0.1, actual = 1.83e18);
- percentile_test!(ctx, column = "c10", percentile = 0.5, actual = 9.30e18);
- percentile_test!(ctx, column = "c10", percentile = 0.9, actual = 1.61e19);
- ////////////////////////////////////
- percentile_test!(ctx, column = "c11", percentile = 0.1, actual = 0.109);
- percentile_test!(ctx, column = "c11", percentile = 0.5, actual = 0.491);
- percentile_test!(ctx, column = "c11", percentile = 0.9, actual = 0.834);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_cube_avg() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv_by_sql(&ctx).await;
-
- let sql = "SELECT c1, c2, AVG(c3) FROM aggregate_test_100 GROUP BY CUBE
(c1, c2) ORDER BY c1, c2";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+----+----------------------------+",
- "| c1 | c2 | AVG(aggregate_test_100.c3) |",
- "+----+----+----------------------------+",
- "| a | 1 | -17.6 |",
- "| a | 2 | -15.333333333333334 |",
- "| a | 3 | -4.5 |",
- "| a | 4 | -32 |",
- "| a | 5 | -32 |",
- "| a | | -18.333333333333332 |",
- "| b | 1 | 31.666666666666668 |",
- "| b | 2 | 25.5 |",
- "| b | 3 | -42 |",
- "| b | 4 | -44.6 |",
- "| b | 5 | -0.2 |",
- "| b | | -5.842105263157895 |",
- "| c | 1 | 47.5 |",
- "| c | 2 | -55.57142857142857 |",
- "| c | 3 | 47.5 |",
- "| c | 4 | -10.75 |",
- "| c | 5 | 12 |",
- "| c | | -1.3333333333333333 |",
- "| d | 1 | -8.142857142857142 |",
- "| d | 2 | 109.33333333333333 |",
- "| d | 3 | 41.333333333333336 |",
- "| d | 4 | 54 |",
- "| d | 5 | -49.5 |",
- "| d | | 25.444444444444443 |",
- "| e | 1 | 75.66666666666667 |",
- "| e | 2 | 37.8 |",
- "| e | 3 | 48 |",
- "| e | 4 | 37.285714285714285 |",
- "| e | 5 | -11 |",
- "| e | | 40.333333333333336 |",
- "| | 1 | 16.681818181818183 |",
- "| | 2 | 8.363636363636363 |",
- "| | 3 | 20.789473684210527 |",
- "| | 4 | 1.2608695652173914 |",
- "| | 5 | -13.857142857142858 |",
- "| | | 7.81 |",
- "+----+----+----------------------------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_rollup_avg() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv_by_sql(&ctx).await;
-
- let sql = "SELECT c1, c2, c3, AVG(c4) FROM aggregate_test_100 GROUP BY
ROLLUP (c1, c2, c3) ORDER BY c1, c2, c3";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+----+------+----------------------------+",
- "| c1 | c2 | c3 | AVG(aggregate_test_100.c4) |",
- "+----+----+------+----------------------------+",
- "| a | 1 | -85 | -15154 |",
- "| a | 1 | -56 | 8692 |",
- "| a | 1 | -25 | 15295 |",
- "| a | 1 | -5 | 12636 |",
- "| a | 1 | 83 | -14704 |",
- "| a | 1 | | 1353 |",
- "| a | 2 | -48 | -18025 |",
- "| a | 2 | -43 | 13080 |",
- "| a | 2 | 45 | 15673 |",
- "| a | 2 | | 3576 |",
- "| a | 3 | -72 | -11122 |",
- "| a | 3 | -12 | -9168 |",
- "| a | 3 | 13 | 22338.5 |",
- "| a | 3 | 14 | 28162 |",
- "| a | 3 | 17 | -22796 |",
- "| a | 3 | | 4958.833333333333 |",
- "| a | 4 | -101 | 11640 |",
- "| a | 4 | -54 | -2376 |",
- "| a | 4 | -38 | 20744 |",
- "| a | 4 | 65 | -28462 |",
- "| a | 4 | | 386.5 |",
- "| a | 5 | -101 | -12484 |",
- "| a | 5 | -31 | -12907 |",
- "| a | 5 | 36 | -16974 |",
- "| a | 5 | | -14121.666666666666 |",
- "| a | | | 306.04761904761904 |",
- "| b | 1 | 12 | 7652 |",
- "| b | 1 | 29 | -18218 |",
- "| b | 1 | 54 | -18410 |",
- "| b | 1 | | -9658.666666666666 |",
- "| b | 2 | -60 | -21739 |",
- "| b | 2 | 31 | 23127 |",
- "| b | 2 | 63 | 21456 |",
- "| b | 2 | 68 | 15874 |",
- "| b | 2 | | 9679.5 |",
- "| b | 3 | -101 | -13217 |",
- "| b | 3 | 17 | 14457 |",
- "| b | 3 | | 620 |",
- "| b | 4 | -117 | 19316 |",
- "| b | 4 | -111 | -1967 |",
- "| b | 4 | -59 | 25286 |",
- "| b | 4 | 17 | -28070 |",
- "| b | 4 | 47 | 20690 |",
- "| b | 4 | | 7051 |",
- "| b | 5 | -82 | 22080 |",
- "| b | 5 | -44 | 15788 |",
- "| b | 5 | -5 | 24896 |",
- "| b | 5 | 62 | 16337 |",
- "| b | 5 | 68 | 21576 |",
- "| b | 5 | | 20135.4 |",
- "| b | | | 7732.315789473684 |",
- "| c | 1 | -24 | -24085 |",
- "| c | 1 | 41 | -4667 |",
- "| c | 1 | 70 | 27752 |",
- "| c | 1 | 103 | -22186 |",
- "| c | 1 | | -5796.5 |",
- "| c | 2 | -117 | -30187 |",
- "| c | 2 | -107 | -2904 |",
- "| c | 2 | -106 | -1114 |",
- "| c | 2 | -60 | -16312 |",
- "| c | 2 | -29 | 25305 |",
- "| c | 2 | 1 | 18109 |",
- "| c | 2 | 29 | -3855 |",
- "| c | 2 | | -1565.4285714285713 |",
- "| c | 3 | -2 | -18655 |",
- "| c | 3 | 22 | 13741 |",
- "| c | 3 | 73 | -9565 |",
- "| c | 3 | 97 | 29106 |",
- "| c | 3 | | 3656.75 |",
- "| c | 4 | -90 | -2935 |",
- "| c | 4 | -79 | 5281 |",
- "| c | 4 | 3 | -30508 |",
- "| c | 4 | 123 | 16620 |",
- "| c | 4 | | -2885.5 |",
- "| c | 5 | -94 | -15880 |",
- "| c | 5 | 118 | 19208 |",
- "| c | 5 | | 1664 |",
- "| c | | | -1320.5238095238096 |",
- "| d | 1 | -99 | 5613 |",
- "| d | 1 | -98 | 13630 |",
- "| d | 1 | -72 | 25590 |",
- "| d | 1 | -8 | 27138 |",
- "| d | 1 | 38 | 18384 |",
- "| d | 1 | 57 | 28781 |",
- "| d | 1 | 125 | 31106 |",
- "| d | 1 | | 21463.14285714286 |",
- "| d | 2 | 93 | -12642 |",
- "| d | 2 | 113 | 3917 |",
- "| d | 2 | 122 | 10130 |",
- "| d | 2 | | 468.3333333333333 |",
- "| d | 3 | -76 | 8809 |",
- "| d | 3 | 77 | 15091 |",
- "| d | 3 | 123 | 29533 |",
- "| d | 3 | | 17811 |",
- "| d | 4 | 5 | -7688 |",
- "| d | 4 | 55 | -1471 |",
- "| d | 4 | 102 | -24558 |",
- "| d | 4 | | -11239 |",
- "| d | 5 | -59 | 2045 |",
- "| d | 5 | -40 | 22614 |",
- "| d | 5 | | 12329.5 |",
- "| d | | | 10890.111111111111 |",
- "| e | 1 | 36 | -21481 |",
- "| e | 1 | 71 | -5479 |",
- "| e | 1 | 120 | 10837 |",
- "| e | 1 | | -5374.333333333333 |",
- "| e | 2 | -61 | -2888 |",
- "| e | 2 | 49 | 24495 |",
- "| e | 2 | 52 | 5666 |",
- "| e | 2 | 97 | 18167 |",
- "| e | 2 | | 10221.2 |",
- "| e | 3 | -95 | 13611 |",
- "| e | 3 | 71 | 194 |",
- "| e | 3 | 104 | -25136 |",
- "| e | 3 | 112 | -6823 |",
- "| e | 3 | | -4538.5 |",
- "| e | 4 | -56 | -31500 |",
- "| e | 4 | -53 | 13788 |",
- "| e | 4 | 30 | -16110 |",
- "| e | 4 | 73 | -22501 |",
- "| e | 4 | 74 | -12612 |",
- "| e | 4 | 96 | -30336 |",
- "| e | 4 | 97 | -13181 |",
- "| e | 4 | | -16064.57142857143 |",
- "| e | 5 | -86 | 32514 |",
- "| e | 5 | 64 | -26526 |",
- "| e | 5 | | 2994 |",
- "| e | | | -4268.333333333333 |",
- "| | | | 2319.97 |",
- "+----+----+------+----------------------------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
#[tokio::test]
async fn csv_query_approx_percentile_cont_with_weight() -> Result<()> {
let ctx = SessionContext::new();
register_aggregate_csv(&ctx).await?;
- // compare approx_percentile_cont and approx_percentile_cont_with_weight
- let sql = "SELECT c1, approx_percentile_cont(c3, 0.95) AS c3_p95 FROM
aggregate_test_100 GROUP BY 1 ORDER BY 1";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+--------+",
- "| c1 | c3_p95 |",
- "+----+--------+",
- "| a | 73 |",
- "| b | 68 |",
- "| c | 122 |",
- "| d | 124 |",
- "| e | 115 |",
- "+----+--------+",
- ];
- assert_batches_eq!(expected, &actual);
-
- let sql = "SELECT c1, approx_percentile_cont_with_weight(c3, 1, 0.95) AS
c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1";
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
-
- let sql = "SELECT c1, approx_percentile_cont_with_weight(c3, c2, 0.95) AS
c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+--------+",
- "| c1 | c3_p95 |",
- "+----+--------+",
- "| a | 74 |",
- "| b | 68 |",
- "| c | 123 |",
- "| d | 124 |",
- "| e | 115 |",
- "+----+--------+",
- ];
- assert_batches_eq!(expected, &actual);
-
let results = plan_and_collect(
&ctx,
"SELECT approx_percentile_cont_with_weight(c1, c2, 0.95) FROM
aggregate_test_100",
@@ -949,22 +96,6 @@ async fn
csv_query_approx_percentile_cont_with_histogram_bins() -> Result<()> {
let ctx = SessionContext::new();
register_aggregate_csv(&ctx).await?;
- // compare approx_percentile_cont and approx_percentile_cont_with_weight
- let sql = "SELECT c1, approx_percentile_cont(c3, 0.95, 200) AS c3_p95 FROM
aggregate_test_100 GROUP BY 1 ORDER BY 1";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+--------+",
- "| c1 | c3_p95 |",
- "+----+--------+",
- "| a | 73 |",
- "| b | 68 |",
- "| c | 122 |",
- "| d | 124 |",
- "| e | 115 |",
- "+----+--------+",
- ];
- assert_batches_eq!(expected, &actual);
-
let results = plan_and_collect(
&ctx,
"SELECT c1, approx_percentile_cont(c3, 0.95, -1000) AS c3_p95 FROM
aggregate_test_100 GROUP BY 1 ORDER BY 1",
@@ -992,256 +123,6 @@ async fn
csv_query_approx_percentile_cont_with_histogram_bins() -> Result<()> {
Ok(())
}
-#[tokio::test]
-async fn csv_query_sum_crossjoin() {
- let ctx = SessionContext::new();
- register_aggregate_csv_by_sql(&ctx).await;
- let sql = "SELECT a.c1, b.c1, SUM(a.c2) FROM aggregate_test_100 as a CROSS
JOIN aggregate_test_100 as b GROUP BY a.c1, b.c1 ORDER BY a.c1, b.c1";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+----+-----------+",
- "| c1 | c1 | SUM(a.c2) |",
- "+----+----+-----------+",
- "| a | a | 1260 |",
- "| a | b | 1140 |",
- "| a | c | 1260 |",
- "| a | d | 1080 |",
- "| a | e | 1260 |",
- "| b | a | 1302 |",
- "| b | b | 1178 |",
- "| b | c | 1302 |",
- "| b | d | 1116 |",
- "| b | e | 1302 |",
- "| c | a | 1176 |",
- "| c | b | 1064 |",
- "| c | c | 1176 |",
- "| c | d | 1008 |",
- "| c | e | 1176 |",
- "| d | a | 924 |",
- "| d | b | 836 |",
- "| d | c | 924 |",
- "| d | d | 792 |",
- "| d | e | 924 |",
- "| e | a | 1323 |",
- "| e | b | 1197 |",
- "| e | c | 1323 |",
- "| e | d | 1134 |",
- "| e | e | 1323 |",
- "+----+----+-----------+",
- ];
- assert_batches_eq!(expected, &actual);
-}
-
-#[tokio::test]
-async fn csv_query_cube_sum_crossjoin() {
- let ctx = SessionContext::new();
- register_aggregate_csv_by_sql(&ctx).await;
- let sql = "SELECT a.c1, b.c1, SUM(a.c2) FROM aggregate_test_100 as a CROSS
JOIN aggregate_test_100 as b GROUP BY CUBE (a.c1, b.c1) ORDER BY a.c1, b.c1";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+----+-----------+",
- "| c1 | c1 | SUM(a.c2) |",
- "+----+----+-----------+",
- "| a | a | 1260 |",
- "| a | b | 1140 |",
- "| a | c | 1260 |",
- "| a | d | 1080 |",
- "| a | e | 1260 |",
- "| a | | 6000 |",
- "| b | a | 1302 |",
- "| b | b | 1178 |",
- "| b | c | 1302 |",
- "| b | d | 1116 |",
- "| b | e | 1302 |",
- "| b | | 6200 |",
- "| c | a | 1176 |",
- "| c | b | 1064 |",
- "| c | c | 1176 |",
- "| c | d | 1008 |",
- "| c | e | 1176 |",
- "| c | | 5600 |",
- "| d | a | 924 |",
- "| d | b | 836 |",
- "| d | c | 924 |",
- "| d | d | 792 |",
- "| d | e | 924 |",
- "| d | | 4400 |",
- "| e | a | 1323 |",
- "| e | b | 1197 |",
- "| e | c | 1323 |",
- "| e | d | 1134 |",
- "| e | e | 1323 |",
- "| e | | 6300 |",
- "| | a | 5985 |",
- "| | b | 5415 |",
- "| | c | 5985 |",
- "| | d | 5130 |",
- "| | e | 5985 |",
- "| | | 28500 |",
- "+----+----+-----------+",
- ];
- assert_batches_eq!(expected, &actual);
-}
-
-#[tokio::test]
-async fn csv_query_cube_distinct_count() {
- let ctx = SessionContext::new();
- register_aggregate_csv_by_sql(&ctx).await;
- let sql = "SELECT c1, c2, COUNT(DISTINCT c3) FROM aggregate_test_100 GROUP
BY CUBE (c1,c2) ORDER BY c1,c2";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+----+---------------------------------------+",
- "| c1 | c2 | COUNT(DISTINCT aggregate_test_100.c3) |",
- "+----+----+---------------------------------------+",
- "| a | 1 | 5 |",
- "| a | 2 | 3 |",
- "| a | 3 | 5 |",
- "| a | 4 | 4 |",
- "| a | 5 | 3 |",
- "| a | | 19 |",
- "| b | 1 | 3 |",
- "| b | 2 | 4 |",
- "| b | 3 | 2 |",
- "| b | 4 | 5 |",
- "| b | 5 | 5 |",
- "| b | | 17 |",
- "| c | 1 | 4 |",
- "| c | 2 | 7 |",
- "| c | 3 | 4 |",
- "| c | 4 | 4 |",
- "| c | 5 | 2 |",
- "| c | | 21 |",
- "| d | 1 | 7 |",
- "| d | 2 | 3 |",
- "| d | 3 | 3 |",
- "| d | 4 | 3 |",
- "| d | 5 | 2 |",
- "| d | | 18 |",
- "| e | 1 | 3 |",
- "| e | 2 | 4 |",
- "| e | 3 | 4 |",
- "| e | 4 | 7 |",
- "| e | 5 | 2 |",
- "| e | | 18 |",
- "| | 1 | 22 |",
- "| | 2 | 20 |",
- "| | 3 | 17 |",
- "| | 4 | 23 |",
- "| | 5 | 14 |",
- "| | | 80 |",
- "+----+----+---------------------------------------+",
- ];
- assert_batches_eq!(expected, &actual);
-}
-
-#[tokio::test]
-async fn csv_query_rollup_distinct_count() {
- let ctx = SessionContext::new();
- register_aggregate_csv_by_sql(&ctx).await;
- let sql = "SELECT c1, c2, COUNT(DISTINCT c3) FROM aggregate_test_100 GROUP
BY ROLLUP (c1,c2) ORDER BY c1,c2";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+----+---------------------------------------+",
- "| c1 | c2 | COUNT(DISTINCT aggregate_test_100.c3) |",
- "+----+----+---------------------------------------+",
- "| a | 1 | 5 |",
- "| a | 2 | 3 |",
- "| a | 3 | 5 |",
- "| a | 4 | 4 |",
- "| a | 5 | 3 |",
- "| a | | 19 |",
- "| b | 1 | 3 |",
- "| b | 2 | 4 |",
- "| b | 3 | 2 |",
- "| b | 4 | 5 |",
- "| b | 5 | 5 |",
- "| b | | 17 |",
- "| c | 1 | 4 |",
- "| c | 2 | 7 |",
- "| c | 3 | 4 |",
- "| c | 4 | 4 |",
- "| c | 5 | 2 |",
- "| c | | 21 |",
- "| d | 1 | 7 |",
- "| d | 2 | 3 |",
- "| d | 3 | 3 |",
- "| d | 4 | 3 |",
- "| d | 5 | 2 |",
- "| d | | 18 |",
- "| e | 1 | 3 |",
- "| e | 2 | 4 |",
- "| e | 3 | 4 |",
- "| e | 4 | 7 |",
- "| e | 5 | 2 |",
- "| e | | 18 |",
- "| | | 80 |",
- "+----+----+---------------------------------------+",
- ];
- assert_batches_eq!(expected, &actual);
-}
-
-#[tokio::test]
-async fn csv_query_rollup_sum_crossjoin() {
- let ctx = SessionContext::new();
- register_aggregate_csv_by_sql(&ctx).await;
- let sql = "SELECT a.c1, b.c1, SUM(a.c2) FROM aggregate_test_100 as a CROSS
JOIN aggregate_test_100 as b GROUP BY ROLLUP (a.c1, b.c1) ORDER BY a.c1, b.c1";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+----+-----------+",
- "| c1 | c1 | SUM(a.c2) |",
- "+----+----+-----------+",
- "| a | a | 1260 |",
- "| a | b | 1140 |",
- "| a | c | 1260 |",
- "| a | d | 1080 |",
- "| a | e | 1260 |",
- "| a | | 6000 |",
- "| b | a | 1302 |",
- "| b | b | 1178 |",
- "| b | c | 1302 |",
- "| b | d | 1116 |",
- "| b | e | 1302 |",
- "| b | | 6200 |",
- "| c | a | 1176 |",
- "| c | b | 1064 |",
- "| c | c | 1176 |",
- "| c | d | 1008 |",
- "| c | e | 1176 |",
- "| c | | 5600 |",
- "| d | a | 924 |",
- "| d | b | 836 |",
- "| d | c | 924 |",
- "| d | d | 792 |",
- "| d | e | 924 |",
- "| d | | 4400 |",
- "| e | a | 1323 |",
- "| e | b | 1197 |",
- "| e | c | 1323 |",
- "| e | d | 1134 |",
- "| e | e | 1323 |",
- "| e | | 6300 |",
- "| | | 28500 |",
- "+----+----+-----------+",
- ];
- assert_batches_eq!(expected, &actual);
-}
-
-#[tokio::test]
-async fn query_count_without_from() -> Result<()> {
- let ctx = SessionContext::new();
- let sql = "SELECT count(1 + 1)";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----------------------------+",
- "| COUNT(Int64(1) + Int64(1)) |",
- "+----------------------------+",
- "| 1 |",
- "+----------------------------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
#[tokio::test]
async fn csv_query_array_agg() -> Result<()> {
let ctx = SessionContext::new();
@@ -1296,28 +177,6 @@ async fn csv_query_array_agg_one() -> Result<()> {
Ok(())
}
-#[tokio::test]
-async fn csv_query_array_agg_with_overflow() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql =
- "select c2, sum(c3) sum_c3, avg(c3) avg_c3, max(c3) max_c3, min(c3)
min_c3, count(c3) count_c3 from aggregate_test_100 group by c2 order by c2";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+--------+---------------------+--------+--------+----------+",
- "| c2 | sum_c3 | avg_c3 | max_c3 | min_c3 | count_c3 |",
- "+----+--------+---------------------+--------+--------+----------+",
- "| 1 | 367 | 16.681818181818183 | 125 | -99 | 22 |",
- "| 2 | 184 | 8.363636363636363 | 122 | -117 | 22 |",
- "| 3 | 395 | 20.789473684210527 | 123 | -101 | 19 |",
- "| 4 | 29 | 1.2608695652173914 | 123 | -117 | 23 |",
- "| 5 | -194 | -13.857142857142858 | 118 | -101 | 14 |",
- "+----+--------+---------------------+--------+--------+----------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
#[tokio::test]
async fn csv_query_array_agg_unsupported() -> Result<()> {
let ctx = SessionContext::new();
@@ -1350,59 +209,6 @@ async fn csv_query_array_agg_unsupported() -> Result<()> {
Ok(())
}
-#[tokio::test]
-async fn csv_query_array_cube_agg_with_overflow() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql =
- "select c1, c2, sum(c3) sum_c3, avg(c3) avg_c3, max(c3) max_c3,
min(c3) min_c3, count(c3) count_c3 from aggregate_test_100 group by CUBE
(c1,c2) order by c1, c2";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
-
"+----+----+--------+---------------------+--------+--------+----------+",
- "| c1 | c2 | sum_c3 | avg_c3 | max_c3 | min_c3 | count_c3
|",
-
"+----+----+--------+---------------------+--------+--------+----------+",
- "| a | 1 | -88 | -17.6 | 83 | -85 | 5
|",
- "| a | 2 | -46 | -15.333333333333334 | 45 | -48 | 3
|",
- "| a | 3 | -27 | -4.5 | 17 | -72 | 6
|",
- "| a | 4 | -128 | -32 | 65 | -101 | 4
|",
- "| a | 5 | -96 | -32 | 36 | -101 | 3
|",
- "| a | | -385 | -18.333333333333332 | 83 | -101 | 21
|",
- "| b | 1 | 95 | 31.666666666666668 | 54 | 12 | 3
|",
- "| b | 2 | 102 | 25.5 | 68 | -60 | 4
|",
- "| b | 3 | -84 | -42 | 17 | -101 | 2
|",
- "| b | 4 | -223 | -44.6 | 47 | -117 | 5
|",
- "| b | 5 | -1 | -0.2 | 68 | -82 | 5
|",
- "| b | | -111 | -5.842105263157895 | 68 | -117 | 19
|",
- "| c | 1 | 190 | 47.5 | 103 | -24 | 4
|",
- "| c | 2 | -389 | -55.57142857142857 | 29 | -117 | 7
|",
- "| c | 3 | 190 | 47.5 | 97 | -2 | 4
|",
- "| c | 4 | -43 | -10.75 | 123 | -90 | 4
|",
- "| c | 5 | 24 | 12 | 118 | -94 | 2
|",
- "| c | | -28 | -1.3333333333333333 | 123 | -117 | 21
|",
- "| d | 1 | -57 | -8.142857142857142 | 125 | -99 | 7
|",
- "| d | 2 | 328 | 109.33333333333333 | 122 | 93 | 3
|",
- "| d | 3 | 124 | 41.333333333333336 | 123 | -76 | 3
|",
- "| d | 4 | 162 | 54 | 102 | 5 | 3
|",
- "| d | 5 | -99 | -49.5 | -40 | -59 | 2
|",
- "| d | | 458 | 25.444444444444443 | 125 | -99 | 18
|",
- "| e | 1 | 227 | 75.66666666666667 | 120 | 36 | 3
|",
- "| e | 2 | 189 | 37.8 | 97 | -61 | 5
|",
- "| e | 3 | 192 | 48 | 112 | -95 | 4
|",
- "| e | 4 | 261 | 37.285714285714285 | 97 | -56 | 7
|",
- "| e | 5 | -22 | -11 | 64 | -86 | 2
|",
- "| e | | 847 | 40.333333333333336 | 120 | -95 | 21
|",
- "| | 1 | 367 | 16.681818181818183 | 125 | -99 | 22
|",
- "| | 2 | 184 | 8.363636363636363 | 122 | -117 | 22
|",
- "| | 3 | 395 | 20.789473684210527 | 123 | -101 | 19
|",
- "| | 4 | 29 | 1.2608695652173914 | 123 | -117 | 23
|",
- "| | 5 | -194 | -13.857142857142858 | 118 | -101 | 14
|",
- "| | | 781 | 7.81 | 125 | -117 | 100
|",
-
"+----+----+--------+---------------------+--------+--------+----------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
#[tokio::test]
async fn csv_query_array_agg_distinct() -> Result<()> {
let ctx = SessionContext::new();
@@ -1641,72 +447,6 @@ async fn aggregate_timestamps_avg() -> Result<()> {
Ok(())
}
-#[tokio::test]
-async fn aggregate_time_min_and_max() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql = "select min(t), max(t) from (select '00:00:00' as t union
select '00:00:01' union select '00:00:02');";
- let results = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----------+----------+",
- "| MIN(t) | MAX(t) |",
- "+----------+----------+",
- "| 00:00:00 | 00:00:02 |",
- "+----------+----------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn aggregate_decimal_min() -> Result<()> {
- let ctx = SessionContext::new();
- // the data type of c1 is decimal(10,3)
- ctx.register_table("d_table", table_with_decimal()).unwrap();
- let result = plan_and_collect(&ctx, "select min(c1) from d_table")
- .await
- .unwrap();
- let expected = vec![
- "+-----------------+",
- "| MIN(d_table.c1) |",
- "+-----------------+",
- "| -100.009 |",
- "+-----------------+",
- ];
- assert_eq!(
- &DataType::Decimal128(10, 3),
- result[0].schema().field(0).data_type()
- );
- assert_batches_sorted_eq!(expected, &result);
- Ok(())
-}
-
-#[tokio::test]
-async fn aggregate_decimal_max() -> Result<()> {
- let ctx = SessionContext::new();
- // the data type of c1 is decimal(10,3)
- ctx.register_table("d_table", table_with_decimal()).unwrap();
-
- let result = plan_and_collect(&ctx, "select max(c1) from d_table")
- .await
- .unwrap();
- let expected = vec![
- "+-----------------+",
- "| MAX(d_table.c1) |",
- "+-----------------+",
- "| 110.009 |",
- "+-----------------+",
- ];
- assert_eq!(
- &DataType::Decimal128(10, 3),
- result[0].schema().field(0).data_type()
- );
- assert_batches_sorted_eq!(expected, &result);
- Ok(())
-}
-
#[tokio::test]
async fn aggregate_decimal_sum() -> Result<()> {
let ctx = SessionContext::new();
@@ -2191,92 +931,6 @@ async fn simple_mean() -> Result<()> {
Ok(())
}
-#[tokio::test]
-async fn query_sum_distinct() -> Result<()> {
- let schema = Arc::new(Schema::new(vec![
- Field::new("c1", DataType::Int64, true),
- Field::new("c2", DataType::Int64, true),
- ]));
-
- let data = RecordBatch::try_new(
- schema.clone(),
- vec![
- Arc::new(Int64Array::from(vec![
- Some(0),
- Some(1),
- None,
- Some(3),
- Some(3),
- ])),
- Arc::new(Int64Array::from(vec![
- None,
- Some(1),
- Some(1),
- Some(2),
- Some(2),
- ])),
- ],
- )?;
-
- let ctx = SessionContext::new();
- ctx.register_batch("test", data)?;
-
- // 2 different aggregate functions: avg and sum(distinct)
- let sql = "SELECT AVG(c1), SUM(DISTINCT c2) FROM test";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+--------------+-----------------------+",
- "| AVG(test.c1) | SUM(DISTINCT test.c2) |",
- "+--------------+-----------------------+",
- "| 1.75 | 3 |",
- "+--------------+-----------------------+",
- ];
- assert_batches_eq!(expected, &actual);
-
- // 2 sum(distinct) functions
- let sql = "SELECT SUM(DISTINCT c1), SUM(DISTINCT c2) FROM test";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+-----------------------+-----------------------+",
- "| SUM(DISTINCT test.c1) | SUM(DISTINCT test.c2) |",
- "+-----------------------+-----------------------+",
- "| 4 | 3 |",
- "+-----------------------+-----------------------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn query_count_distinct() -> Result<()> {
- let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32,
true)]));
-
- let data = RecordBatch::try_new(
- schema.clone(),
- vec![Arc::new(Int32Array::from(vec![
- Some(0),
- Some(1),
- None,
- Some(3),
- Some(3),
- ]))],
- )?;
-
- let ctx = SessionContext::new();
- ctx.register_batch("test", data)?;
- let sql = "SELECT COUNT(DISTINCT c1) FROM test";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+-------------------------+",
- "| COUNT(DISTINCT test.c1) |",
- "+-------------------------+",
- "| 3 |",
- "+-------------------------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
async fn run_count_distinct_integers_aggregated_scenario(
partitions: Vec<Vec<(&str, u64)>>,
) -> Result<Vec<RecordBatch>> {
@@ -2459,24 +1113,3 @@ async fn array_agg_one() -> Result<()> {
assert_batches_eq!(expected, &actual);
Ok(())
}
-
-#[tokio::test]
-async fn test_approx_percentile_cont_decimal_support() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT c1, approx_percentile_cont(c2, cast(0.85 as
decimal(10,2))) apc FROM aggregate_test_100 GROUP BY 1 ORDER BY 1";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+-----+",
- "| c1 | apc |",
- "+----+-----+",
- "| a | 4 |",
- "| b | 5 |",
- "| c | 4 |",
- "| d | 4 |",
- "| e | 4 |",
- "+----+-----+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index 11030446d..c0b747702 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -48,7 +48,6 @@ SELECT var_pop(c2) FROM aggregate_test_100
----
1.8675
-# original: 26156334342021890000000000000000000000
# csv_query_variance_2
query R
SELECT var_pop(c6) FROM aggregate_test_100
@@ -79,7 +78,6 @@ SELECT stddev_pop(c2) FROM aggregate_test_100
----
1.3665650368716449
-# original: 5114326382039172000
# csv_query_stddev_2
query R
SELECT stddev_pop(c6) FROM aggregate_test_100
@@ -164,12 +162,11 @@ SELECT median(a) FROM median_i32
----
-1073741774
-# TODO: FIX - numbers are off?
# median_i64
-# query I
-# SELECT median(a) FROM median_i64
-# ----
-# -4611686018427388000
+query I
+SELECT median(a) FROM median_i64
+----
+-4611686018427387854
# median_u8
query I
@@ -274,187 +271,290 @@ SELECT approx_distinct(c9) AS a, approx_distinct(c9) AS
b FROM aggregate_test_10
----
100 100
-# TODO: csv_query_approx_percentile_cont
+## This test executes the APPROX_PERCENTILE_CONT aggregation against the test
+## data, asserting the estimated quantiles are ±5% their actual values.
+##
+## Actual quantiles calculated with:
+##
+## ```r
+## read_csv("./testing/data/csv/aggregate_test_100.csv") |>
+## select_if(is.numeric) |>
+## summarise_all(~ quantile(., c(0.1, 0.5, 0.9)))
+## ```
+##
+## Giving:
+##
+## ```text
+## c2 c3 c4 c5 c6 c7 c8 c9
c10 c11 c12
+## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
<dbl> <dbl> <dbl>
+## 1 1 -95.3 -22925. -1882606710 -7.25e18 18.9 2671. 472608672.
1.83e18 0.109 0.0714
+## 2 3 15.5 4599 377164262 1.13e18 134. 30634 2365817608.
9.30e18 0.491 0.551
+## 3 5 102. 25334. 1991374996. 7.37e18 231 57518. 3776538487.
1.61e19 0.834 0.946
+## ```
+##
+## Column `c12` is omitted due to a large relative error (~10%) due to the
small
+## float values.
+
+#csv_query_approx_percentile_cont (c2)
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.1) AS DOUBLE) / 1.0) < 0.05)
AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.5) AS DOUBLE) / 3.0) < 0.05)
AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.9) AS DOUBLE) / 5.0) < 0.05)
AS q FROM aggregate_test_100
+----
+true
+
+# csv_query_approx_percentile_cont (c3)
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c3, 0.1) AS DOUBLE) / -95.3) <
0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c3, 0.5) AS DOUBLE) / 15.5) <
0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c3, 0.9) AS DOUBLE) / 102.0) <
0.05) AS q FROM aggregate_test_100
+----
+true
+
+# csv_query_approx_percentile_cont (c4)
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c4, 0.1) AS DOUBLE) / -22925.0) <
0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c4, 0.5) AS DOUBLE) / 4599.0) <
0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c4, 0.9) AS DOUBLE) / 25334.0) <
0.05) AS q FROM aggregate_test_100
+----
+true
+
+# csv_query_approx_percentile_cont (c5)
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c5, 0.1) AS DOUBLE) /
-1882606710.0) < 0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c5, 0.5) AS DOUBLE) / 377164262.0)
< 0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c5, 0.9) AS DOUBLE) /
1991374996.0) < 0.05) AS q FROM aggregate_test_100
+----
+true
+
+# csv_query_approx_percentile_cont (c6)
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c6, 0.1) AS DOUBLE) /
-7250000000000000000) < 0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c6, 0.5) AS DOUBLE) /
1130000000000000000) < 0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c6, 0.9) AS DOUBLE) /
7370000000000000000) < 0.05) AS q FROM aggregate_test_100
+----
+true
+
+# csv_query_approx_percentile_cont (c7)
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c7, 0.1) AS DOUBLE) / 18.9) <
0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c7, 0.5) AS DOUBLE) / 134.0) <
0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c7, 0.9) AS DOUBLE) / 231.0) <
0.05) AS q FROM aggregate_test_100
+----
+true
+
+# csv_query_approx_percentile_cont (c8)
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c8, 0.1) AS DOUBLE) / 2671.0) <
0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c8, 0.5) AS DOUBLE) / 30634.0) <
0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c8, 0.9) AS DOUBLE) / 57518.0) <
0.05) AS q FROM aggregate_test_100
+----
+true
+
+# csv_query_approx_percentile_cont (c9)
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c9, 0.1) AS DOUBLE) / 472608672.0)
< 0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c9, 0.5) AS DOUBLE) /
2365817608.0) < 0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c9, 0.9) AS DOUBLE) /
3776538487.0) < 0.05) AS q FROM aggregate_test_100
+----
+true
+
+# csv_query_approx_percentile_cont (c10)
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c10, 0.1) AS DOUBLE) /
1830000000000000000) < 0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c10, 0.5) AS DOUBLE) /
9300000000000000000) < 0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c10, 0.9) AS DOUBLE) /
16100000000000000000) < 0.05) AS q FROM aggregate_test_100
+----
+true
+
+# csv_query_approx_percentile_cont (c11)
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c11, 0.1) AS DOUBLE) / 0.109) <
0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c11, 0.5) AS DOUBLE) / 0.491) <
0.05) AS q FROM aggregate_test_100
+----
+true
+
+query T
+SELECT (ABS(1 - CAST(approx_percentile_cont(c11, 0.9) AS DOUBLE) / 0.834) <
0.05) AS q FROM aggregate_test_100
+----
+true
-# TODO: fix decimal places
# csv_query_cube_avg
-# query TIR
-# SELECT c1, c2, AVG(c3) FROM aggregate_test_100_by_sql GROUP BY CUBE (c1, c2)
ORDER BY c1, c2
-# ----
-# a 1 -17.6
-# a 2 -15.333333333333334
-# a 3 -4.5
-# a 4 -32
-# a 5 -32
-# a -18.333333333333332
-# b 1 31.666666666666668
-# b 2 25.5
-# b 3 -42
-# b 4 -44.6
-# b 5 -0.2
-# b -5.842105263157895
-# c 1 47.5
-# c 2 -55.57142857142857
-# c 3 47.5
-# c 4 -10.75
-# c 5 12
-# c -1.3333333333333333
-# d 1 -8.142857142857142
-# d 2 109.33333333333333
-# d 3 41.333333333333336
-# d 4 54
-# d 5 -49.5
-# d 25.444444444444443
-# e 1 75.66666666666667
-# e 2 37.8
-# e 3 48
-# e 4 37.285714285714285
-# e 5 -11
-# e 40.333333333333336
-# 1 16.681818181818183
-# 2 8.363636363636363
-# 3 20.789473684210527
-# 4 1.2608695652173914
-# 5 -13.857142857142858
-# 7.81
-
-# TODO: fix decimal places
+query TIR
+SELECT c1, c2, AVG(c3) FROM aggregate_test_100_by_sql GROUP BY CUBE (c1, c2)
ORDER BY c1, c2
+----
+a 1 -17.6
+a 2 -15.333333333333334
+a 3 -4.5
+a 4 -32.0
+a 5 -32.0
+a -18.333333333333332
+b 1 31.666666666666668
+b 2 25.5
+b 3 -42.0
+b 4 -44.6
+b 5 -0.2
+b -5.842105263157895
+c 1 47.5
+c 2 -55.57142857142857
+c 3 47.5
+c 4 -10.75
+c 5 12.0
+c -1.3333333333333333
+d 1 -8.142857142857142
+d 2 109.33333333333333
+d 3 41.333333333333336
+d 4 54.0
+d 5 -49.5
+d 25.444444444444443
+e 1 75.66666666666667
+e 2 37.8
+e 3 48.0
+e 4 37.285714285714285
+e 5 -11.0
+e 40.333333333333336
+ 1 16.681818181818183
+ 2 8.363636363636363
+ 3 20.789473684210527
+ 4 1.2608695652173914
+ 5 -13.857142857142858
+ 7.81
+
# csv_query_rollup_avg
-# query IIII
-# SELECT c1, c2, c3, AVG(c4) FROM aggregate_test_100_by_sql GROUP BY ROLLUP
(c1, c2, c3) ORDER BY c1, c2, c3
-# ----
-# a 1 -85 -15154
-# a 1 -56 8692
-# a 1 -25 15295
-# a 1 -5 12636
-# a 1 83 -14704
-# a 1 1353
-# a 2 -48 -18025
-# a 2 -43 13080
-# a 2 45 15673
-# a 2 3576
-# a 3 -72 -11122
-# a 3 -12 -9168
-# a 3 13 22338.5
-# a 3 14 28162
-# a 3 17 -22796
-# a 3 4958.833333333333
-# a 4 -101 11640
-# a 4 -54 -2376
-# a 4 -38 20744
-# a 4 65 -28462
-# a 4 386.5
-# a 5 -101 -12484
-# a 5 -31 -12907
-# a 5 36 -16974
-# a 5 -14121.666666666666
-# a 306.04761904761904
-# b 1 12 7652
-# b 1 29 -18218
-# b 1 54 -18410
-# b 1 -9658.666666666666
-# b 2 -60 -21739
-# b 2 31 23127
-# b 2 63 21456
-# b 2 68 15874
-# b 2 9679.5
-# b 3 -101 -13217
-# b 3 17 14457
-# b 3 620
-# b 4 -117 19316
-# b 4 -111 -1967
-# b 4 -59 25286
-# b 4 17 -28070
-# b 4 47 20690
-# b 4 7051
-# b 5 -82 22080
-# b 5 -44 15788
-# b 5 -5 24896
-# b 5 62 16337
-# b 5 68 21576
-# b 5 20135.4
-# b 7732.315789473684
-# c 1 -24 -24085
-# c 1 41 -4667
-# c 1 70 27752
-# c 1 103 -22186
-# c 1 -5796.5
-# c 2 -117 -30187
-# c 2 -107 -2904
-# c 2 -106 -1114
-# c 2 -60 -16312
-# c 2 -29 25305
-# c 2 1 18109
-# c 2 29 -3855
-# c 2 -1565.4285714285713
-# c 3 -2 -18655
-# c 3 22 13741
-# c 3 73 -9565
-# c 3 97 29106
-# c 3 3656.75
-# c 4 -90 -2935
-# c 4 -79 5281
-# c 4 3 -30508
-# c 4 123 16620
-# c 4 -2885.5
-# c 5 -94 -15880
-# c 5 118 19208
-# c 5 1664
-# c -1320.5238095238096
-# d 1 -99 5613
-# d 1 -98 13630
-# d 1 -72 25590
-# d 1 -8 27138
-# d 1 38 18384
-# d 1 57 28781
-# d 1 125 31106
-# d 1 21463.14285714286
-# d 2 93 -12642
-# d 2 113 3917
-# d 2 122 10130
-# d 2 468.3333333333333
-# d 3 -76 8809
-# d 3 77 15091
-# d 3 123 29533
-# d 3 17811
-# d 4 5 -7688
-# d 4 55 -1471
-# d 4 102 -24558
-# d 4 -11239
-# d 5 -59 2045
-# d 5 -40 22614
-# d 5 12329.5
-# d 10890.111111111111
-# e 1 36 -21481
-# e 1 71 -5479
-# e 1 120 10837
-# e 1 -5374.333333333333
-# e 2 -61 -2888
-# e 2 49 24495
-# e 2 52 5666
-# e 2 97 18167
-# e 2 10221.2
-# e 3 -95 13611
-# e 3 71 194
-# e 3 104 -25136
-# e 3 112 -6823
-# e 3 -4538.5
-# e 4 -56 -31500
-# e 4 -53 13788
-# e 4 30 -16110
-# e 4 73 -22501
-# e 4 74 -12612
-# e 4 96 -30336
-# e 4 97 -13181
-# e 4 -16064.57142857143
-# e 5 -86 32514
-# e 5 64 -26526
-# e 5 2994
-# e -4268.333333333333
-# 2319.97
+query TIIR
+SELECT c1, c2, c3, AVG(c4) FROM aggregate_test_100_by_sql WHERE c1 IN ('a',
'b', NULL) GROUP BY ROLLUP (c1, c2, c3) ORDER BY c1, c2, c3
+----
+a 1 -85 -15154.0
+a 1 -56 8692.0
+a 1 -25 15295.0
+a 1 -5 12636.0
+a 1 83 -14704.0
+a 1 1353.0
+a 2 -48 -18025.0
+a 2 -43 13080.0
+a 2 45 15673.0
+a 2 3576.0
+a 3 -72 -11122.0
+a 3 -12 -9168.0
+a 3 13 22338.5
+a 3 14 28162.0
+a 3 17 -22796.0
+a 3 4958.833333333333
+a 4 -101 11640.0
+a 4 -54 -2376.0
+a 4 -38 20744.0
+a 4 65 -28462.0
+a 4 386.5
+a 5 -101 -12484.0
+a 5 -31 -12907.0
+a 5 36 -16974.0
+a 5 -14121.666666666666
+a 306.04761904761904
+b 1 12 7652.0
+b 1 29 -18218.0
+b 1 54 -18410.0
+b 1 -9658.666666666666
+b 2 -60 -21739.0
+b 2 31 23127.0
+b 2 63 21456.0
+b 2 68 15874.0
+b 2 9679.5
+b 3 -101 -13217.0
+b 3 17 14457.0
+b 3 620.0
+b 4 -117 19316.0
+b 4 -111 -1967.0
+b 4 -59 25286.0
+b 4 17 -28070.0
+b 4 47 20690.0
+b 4 7051.0
+b 5 -82 22080.0
+b 5 -44 15788.0
+b 5 -5 24896.0
+b 5 62 16337.0
+b 5 68 21576.0
+b 5 20135.4
+b 7732.315789473684
+ 3833.525
# csv_query_approx_percentile_cont_with_weight
-query II
+query TI
SELECT c1, approx_percentile_cont(c3, 0.95) AS c3_p95 FROM aggregate_test_100
GROUP BY 1 ORDER BY 1
----
a 73
@@ -464,7 +564,7 @@ d 124
e 115
# csv_query_approx_percentile_cont_with_weight (2)
-query II
+query TI
SELECT c1, approx_percentile_cont_with_weight(c3, 1, 0.95) AS c3_p95 FROM
aggregate_test_100 GROUP BY 1 ORDER BY 1
----
a 73
@@ -474,7 +574,7 @@ d 124
e 115
# csv_query_approx_percentile_cont_with_histogram_bins
-query II
+query TI
SELECT c1, approx_percentile_cont(c3, 0.95, 200) AS c3_p95 FROM
aggregate_test_100 GROUP BY 1 ORDER BY 1
----
a 73
@@ -483,8 +583,17 @@ c 122
d 124
e 115
+query TI
+SELECT c1, approx_percentile_cont_with_weight(c3, c2, 0.95) AS c3_p95 FROM
aggregate_test_100 GROUP BY 1 ORDER BY 1
+----
+a 74
+b 68
+c 123
+d 124
+e 115
+
# csv_query_sum_crossjoin
-query III
+query TTI
SELECT a.c1, b.c1, SUM(a.c2) FROM aggregate_test_100 as a CROSS JOIN
aggregate_test_100 as b GROUP BY a.c1, b.c1 ORDER BY a.c1, b.c1
----
a a 1260
@@ -514,7 +623,7 @@ e d 1134
e e 1323
# csv_query_cube_sum_crossjoin
-query III
+query TTI
SELECT a.c1, b.c1, SUM(a.c2) FROM aggregate_test_100 as a CROSS JOIN
aggregate_test_100 as b GROUP BY CUBE (a.c1, b.c1) ORDER BY a.c1, b.c1
----
a a 1260
@@ -555,7 +664,7 @@ e 6300
28500
# csv_query_cube_distinct_count
-query III
+query TII
SELECT c1, c2, COUNT(DISTINCT c3) FROM aggregate_test_100 GROUP BY CUBE
(c1,c2) ORDER BY c1,c2
----
a 1 5
@@ -596,7 +705,7 @@ e 18
80
# csv_query_rollup_distinct_count
-query III
+query TII
SELECT c1, c2, COUNT(DISTINCT c3) FROM aggregate_test_100 GROUP BY ROLLUP
(c1,c2) ORDER BY c1,c2
----
a 1 5
@@ -632,7 +741,7 @@ e 18
80
# csv_query_rollup_sum_crossjoin
-query III
+query TTI
SELECT a.c1, b.c1, SUM(a.c2) FROM aggregate_test_100 as a CROSS JOIN
aggregate_test_100 as b GROUP BY ROLLUP (a.c1, b.c1) ORDER BY a.c1, b.c1
----
a a 1260
@@ -675,7 +784,7 @@ SELECT count(1 + 1)
# FIX: "CSV Writer does not support List(Field { name: \"item\", data_type:
Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None })
data type")
# csv_query_array_agg
-# query I
+# query T
# SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 ORDER BY c13
LIMIT 2) test
# ----
# [0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm0keZ5G8BffGwgF2RwQD59TFzMStxCB]
@@ -694,70 +803,86 @@ SELECT count(1 + 1)
# ----
# [0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm]
-# FIX: Decimal error
# csv_query_array_agg_with_overflow
-# query IIIIII
-# select c2, sum(c3) sum_c3, avg(c3) avg_c3, max(c3) max_c3, min(c3) min_c3,
count(c3) count_c3 from aggregate_test_100 group by c2 order by c2
-# ----
-# 1 367 16.681818181818183 125 -99 22
-# 2 184 8.363636363636363 122 -117 22
-# 3 395 20.789473684210527 123 -101 19
-# 4 29 1.2608695652173914 123 -117 23
-# 5 -194 -13.857142857142858 118 -101 14
+query IIRIII
+select c2, sum(c3) sum_c3, avg(c3) avg_c3, max(c3) max_c3, min(c3) min_c3,
count(c3) count_c3 from aggregate_test_100 group by c2 order by c2
+----
+1 367 16.681818181818183 125 -99 22
+2 184 8.363636363636363 122 -117 22
+3 395 20.789473684210527 123 -101 19
+4 29 1.2608695652173914 123 -117 23
+5 -194 -13.857142857142858 118 -101 14
# TODO: csv_query_array_agg_unsupported
+# statement error
+# SELECT array_agg(c13 ORDER BY c1) FROM aggregate_test_100;
-# FIX: Decimal error
# csv_query_array_cube_agg_with_overflow
-# query IIIIIII
-# select c1, c2, sum(c3) sum_c3, avg(c3) avg_c3, max(c3) max_c3, min(c3)
min_c3, count(c3) count_c3 from aggregate_test_100 group by CUBE (c1,c2) order
by c1, c2
+query TIIRIII
+select c1, c2, sum(c3) sum_c3, avg(c3) avg_c3, max(c3) max_c3, min(c3) min_c3,
count(c3) count_c3 from aggregate_test_100 group by CUBE (c1,c2) order by c1, c2
+----
+a 1 -88 -17.6 83 -85 5
+a 2 -46 -15.333333333333334 45 -48 3
+a 3 -27 -4.5 17 -72 6
+a 4 -128 -32.0 65 -101 4
+a 5 -96 -32.0 36 -101 3
+a -385 -18.333333333333332 83 -101 21
+b 1 95 31.666666666666668 54 12 3
+b 2 102 25.5 68 -60 4
+b 3 -84 -42.0 17 -101 2
+b 4 -223 -44.6 47 -117 5
+b 5 -1 -0.2 68 -82 5
+b -111 -5.842105263157895 68 -117 19
+c 1 190 47.5 103 -24 4
+c 2 -389 -55.57142857142857 29 -117 7
+c 3 190 47.5 97 -2 4
+c 4 -43 -10.75 123 -90 4
+c 5 24 12.0 118 -94 2
+c -28 -1.3333333333333333 123 -117 21
+d 1 -57 -8.142857142857142 125 -99 7
+d 2 328 109.33333333333333 122 93 3
+d 3 124 41.333333333333336 123 -76 3
+d 4 162 54.0 102 5 3
+d 5 -99 -49.5 -40 -59 2
+d 458 25.444444444444443 125 -99 18
+e 1 227 75.66666666666667 120 36 3
+e 2 189 37.8 97 -61 5
+e 3 192 48.0 112 -95 4
+e 4 261 37.285714285714285 97 -56 7
+e 5 -22 -11.0 64 -86 2
+e 847 40.333333333333336 120 -95 21
+ 1 367 16.681818181818183 125 -99 22
+ 2 184 8.363636363636363 122 -117 22
+ 3 395 20.789473684210527 123 -101 19
+ 4 29 1.2608695652173914 123 -117 23
+ 5 -194 -13.857142857142858 118 -101 14
+ 781 7.81 125 -117 100
+
+# csv_query_array_agg_distinct
+# query T
+# SELECT array_agg(distinct c2) FROM aggregate_test_100
# ----
-# a 1 -88 -17.6 83 -85 5
-# a 2 -46 -15.333333333333334 45 -48 3
-# a 3 -27 -4.5 17 -72 6
-# a 4 -128 -32 65 -101 4
-# a 5 -96 -32 36 -101 3
-# a -385 -18.333333333333332 83 -101 21
-# b 1 95 31.666666666666668 54 12 3
-# b 2 102 25.5 68 -60 4
-# b 3 -84 -42 17 -101 2
-# b 4 -223 -44.6 47 -117 5
-# b 5 -1 -0.2 68 -82 5
-# b -111 -5.842105263157895 68 -117 19
-# c 1 190 47.5 103 -24 4
-# c 2 -389 -55.57142857142857 29 -117 7
-# c 3 190 47.5 97 -2 4
-# c 4 -43 -10.75 123 -90 4
-# c 5 24 12 118 -94 2
-# c -28 -1.3333333333333333 123 -117 21
-# d 1 -57 -8.142857142857142 125 -99 7
-# d 2 328 109.33333333333333 122 93 3
-# d 3 124 41.333333333333336 123 -76 3
-# d 4 162 54 102 5 3
-# d 5 -99 -49.5 -40 -59 2
-# d 458 25.444444444444443 125 -99 18
-# e 1 227 75.66666666666667 120 36 3
-# e 2 189 37.8 97 -61 5
-# e 3 192 48 112 -95 4
-# e 4 261 37.285714285714285 97 -56 7
-# e 5 -22 -11 64 -86 2
-# e 847 40.333333333333336 120 -95 21
-# 1 367 16.681818181818183 125 -99 22
-# 2 184 8.363636363636363 122 -117 22
-# 3 395 20.789473684210527 123 -101 19
-# 4 29 1.2608695652173914 123 -117 23
-# 5 -194 -13.857142857142858 118 -101 14
-# 781 7.81 125 -117 100
-
-# TODO: csv_query_array_agg_distinct
+# [4, 2, 3, 5, 1]
# TODO: aggregate_timestamps_sum
-# TODO: aggregate_timestamps_count
+# aggregate_timestamps_count
+# query IIII
+# SELECT count(nanos), count(micros), count(millis), count(secs) FROM t
+# ----
+# 3 3 3 3
-# TODO: aggregate_timestamps_min
+# aggregate_timestamps_min
+# query TTTT
+# SELECT min(nanos), min(micros), min(millis), min(secs) FROM t
+# ----
+# 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450
2011-12-13T11:13:10.123 2011-12-13T11:13:10
-# TODO: aggregate_timestamps_max
+# # aggregate_timestamps_max
+# query TTTT
+# SELECT max(nanos), max(micros), max(millis), max(secs) FROM t
+# ----
+# 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432
2021-01-01T05:11:10
# TODO: aggregate_times_sum
@@ -770,31 +895,32 @@ SELECT count(1 + 1)
# TODO: aggregate_timestamps_avg
# aggregate_time_min_and_max
-query II
+query TT
select min(t), max(t) from (select '00:00:00' as t union select '00:00:01'
union select '00:00:02')
----
00:00:00 00:00:02
# aggregate_decimal_min
-query I
+query R
select min(c1) from d_table
----
-100.009
# aggregate_decimal_max
-query I
+query R
select max(c1) from d_table
----
110.009
-query I
+# aggregate_decimal_sum
+query R
select sum(c1) from d_table
----
100.000
# FIX: doesn't check datatype
# aggregate_decimal_avg
-query I
+query R
select avg(c1) from d_table
----
5.0000000
@@ -840,13 +966,18 @@ select avg(c1) from d_table
# TODO: simple_mean
-# TOOD: bottom 2 require pre-populated data
-# query_sum_distinct
-query II
+# query_sum_distinct - 2 different aggregate functions: avg and sum(distinct)
+query RI
SELECT AVG(c1), SUM(DISTINCT c2) FROM test
----
1.75 3
+# query_sum_distinct - 2 sum(distinct) functions
+query II
+SELECT SUM(DISTINCT c1), SUM(DISTINCT c2) FROM test
+----
+4 3
+
# # query_count_distinct
query I
SELECT COUNT(DISTINCT c1) FROM test
@@ -859,7 +990,6 @@ SELECT COUNT(DISTINCT c1) FROM test
# TODO: aggregate_with_alias
-
# FIX: CSV Writer error
# array_agg_zero
# query I
@@ -875,7 +1005,7 @@ SELECT COUNT(DISTINCT c1) FROM test
# [[1]]
# test_approx_percentile_cont_decimal_support
-query II
+query TI
SELECT c1, approx_percentile_cont(c2, cast(0.85 as decimal(10,2))) apc FROM
aggregate_test_100 GROUP BY 1 ORDER BY 1
----
a 4