This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new c75a95780d Minor: Move median test (#10611)
c75a95780d is described below
commit c75a95780dacd436143a0159af421a7570c3cc04
Author: Jay Zhan <[email protected]>
AuthorDate: Thu May 23 23:36:39 2024 +0800
Minor: Move median test (#10611)
* move median test
Signed-off-by: jayzhan211 <[email protected]>
* add more test
Signed-off-by: jayzhan211 <[email protected]>
---------
Signed-off-by: jayzhan211 <[email protected]>
---
datafusion/physical-expr/src/aggregate/median.rs | 384 -----------------------
datafusion/sqllogictest/test_files/aggregate.slt | 252 +++++++++++++++
2 files changed, 252 insertions(+), 384 deletions(-)
diff --git a/datafusion/physical-expr/src/aggregate/median.rs
b/datafusion/physical-expr/src/aggregate/median.rs
index f4f56fa46e..ee0fce3fab 100644
--- a/datafusion/physical-expr/src/aggregate/median.rs
+++ b/datafusion/physical-expr/src/aggregate/median.rs
@@ -295,387 +295,3 @@ fn calculate_median<T: ArrowNumericType>(
Some(*median)
}
}
-
-#[cfg(test)]
-mod tests {
- use super::*;
- use crate::expressions::col;
- use crate::expressions::tests::aggregate;
- use crate::generic_test_distinct_op;
- use arrow::{array::*, datatypes::*};
-
- #[test]
- fn median_decimal() -> Result<()> {
- // test median
- let array: ArrayRef = Arc::new(
- (1..7)
- .map(Some)
- .collect::<Decimal128Array>()
- .with_precision_and_scale(10, 4)?,
- );
-
- generic_test_distinct_op!(
- array,
- DataType::Decimal128(10, 4),
- Median,
- false,
- ScalarValue::Decimal128(Some(3), 10, 4)
- )
- }
-
- #[test]
- fn median_decimal_with_nulls() -> Result<()> {
- let array: ArrayRef = Arc::new(
- (1..6)
- .map(|i| if i == 2 { None } else { Some(i) })
- .collect::<Decimal128Array>()
- .with_precision_and_scale(10, 4)?,
- );
- generic_test_distinct_op!(
- array,
- DataType::Decimal128(10, 4),
- Median,
- false,
- ScalarValue::Decimal128(Some(3), 10, 4)
- )
- }
-
- #[test]
- fn median_decimal_all_nulls() -> Result<()> {
- // test median
- let array: ArrayRef = Arc::new(
- std::iter::repeat::<Option<i128>>(None)
- .take(6)
- .collect::<Decimal128Array>()
- .with_precision_and_scale(10, 4)?,
- );
- generic_test_distinct_op!(
- array,
- DataType::Decimal128(10, 4),
- Median,
- false,
- ScalarValue::Decimal128(None, 10, 4)
- )
- }
-
- #[test]
- fn median_i32_odd() -> Result<()> {
- let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
- generic_test_distinct_op!(
- a,
- DataType::Int32,
- Median,
- false,
- ScalarValue::from(3_i32)
- )
- }
-
- #[test]
- fn median_i32_even() -> Result<()> {
- let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6]));
- generic_test_distinct_op!(
- a,
- DataType::Int32,
- Median,
- false,
- ScalarValue::from(3_i32)
- )
- }
-
- #[test]
- fn median_i32_with_nulls() -> Result<()> {
- let a: ArrayRef = Arc::new(Int32Array::from(vec![
- Some(1),
- None,
- Some(3),
- Some(4),
- Some(5),
- ]));
- generic_test_distinct_op!(
- a,
- DataType::Int32,
- Median,
- false,
- ScalarValue::from(3i32)
- )
- }
-
- #[test]
- fn median_i32_all_nulls() -> Result<()> {
- let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
- generic_test_distinct_op!(
- a,
- DataType::Int32,
- Median,
- false,
- ScalarValue::Int32(None)
- )
- }
-
- #[test]
- fn median_u32_odd() -> Result<()> {
- let a: ArrayRef =
- Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32,
5_u32]));
- generic_test_distinct_op!(
- a,
- DataType::UInt32,
- Median,
- false,
- ScalarValue::from(3u32)
- )
- }
-
- #[test]
- fn median_u32_even() -> Result<()> {
- let a: ArrayRef = Arc::new(UInt32Array::from(vec![
- 1_u32, 2_u32, 3_u32, 4_u32, 5_u32, 6_u32,
- ]));
- generic_test_distinct_op!(
- a,
- DataType::UInt32,
- Median,
- false,
- ScalarValue::from(3u32)
- )
- }
-
- #[test]
- fn median_f32_odd() -> Result<()> {
- let a: ArrayRef =
- Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32,
5_f32]));
- generic_test_distinct_op!(
- a,
- DataType::Float32,
- Median,
- false,
- ScalarValue::from(3_f32)
- )
- }
-
- #[test]
- fn median_f32_even() -> Result<()> {
- let a: ArrayRef = Arc::new(Float32Array::from(vec![
- 1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32,
- ]));
- generic_test_distinct_op!(
- a,
- DataType::Float32,
- Median,
- false,
- ScalarValue::from(3.5_f32)
- )
- }
-
- #[test]
- fn median_f64_odd() -> Result<()> {
- let a: ArrayRef =
- Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64,
5_f64]));
- generic_test_distinct_op!(
- a,
- DataType::Float64,
- Median,
- false,
- ScalarValue::from(3_f64)
- )
- }
-
- #[test]
- fn median_f64_even() -> Result<()> {
- let a: ArrayRef = Arc::new(Float64Array::from(vec![
- 1_f64, 2_f64, 3_f64, 4_f64, 5_f64, 6_f64,
- ]));
- generic_test_distinct_op!(
- a,
- DataType::Float64,
- Median,
- false,
- ScalarValue::from(3.5_f64)
- )
- }
-
- #[test]
- fn distinct_median_decimal() -> Result<()> {
- let array: ArrayRef = Arc::new(
- vec![1, 1, 1, 1, 2, 3, 1, 1, 3]
- .into_iter()
- .map(Some)
- .collect::<Decimal128Array>()
- .with_precision_and_scale(10, 4)?,
- );
-
- generic_test_distinct_op!(
- array,
- DataType::Decimal128(10, 4),
- Median,
- true,
- ScalarValue::Decimal128(Some(2), 10, 4)
- )
- }
-
- #[test]
- fn distinct_median_decimal_with_nulls() -> Result<()> {
- let array: ArrayRef = Arc::new(
- vec![Some(3), Some(1), None, Some(3), Some(2), Some(3), Some(3)]
- .into_iter()
- .collect::<Decimal128Array>()
- .with_precision_and_scale(10, 4)?,
- );
- generic_test_distinct_op!(
- array,
- DataType::Decimal128(10, 4),
- Median,
- true,
- ScalarValue::Decimal128(Some(2), 10, 4)
- )
- }
-
- #[test]
- fn distinct_median_decimal_all_nulls() -> Result<()> {
- let array: ArrayRef = Arc::new(
- std::iter::repeat::<Option<i128>>(None)
- .take(6)
- .collect::<Decimal128Array>()
- .with_precision_and_scale(10, 4)?,
- );
- generic_test_distinct_op!(
- array,
- DataType::Decimal128(10, 4),
- Median,
- true,
- ScalarValue::Decimal128(None, 10, 4)
- )
- }
-
- #[test]
- fn distinct_median_i32_odd() -> Result<()> {
- let a: ArrayRef = Arc::new(Int32Array::from(vec![2, 1, 1, 2, 1, 3]));
- generic_test_distinct_op!(
- a,
- DataType::Int32,
- Median,
- true,
- ScalarValue::from(2_i32)
- )
- }
-
- #[test]
- fn distinct_median_i32_even() -> Result<()> {
- let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 1, 3, 1, 1]));
- generic_test_distinct_op!(
- a,
- DataType::Int32,
- Median,
- true,
- ScalarValue::from(2_i32)
- )
- }
-
- #[test]
- fn distinct_median_i32_with_nulls() -> Result<()> {
- let a: ArrayRef = Arc::new(Int32Array::from(vec![
- Some(1),
- None,
- Some(1),
- Some(1),
- Some(3),
- ]));
- generic_test_distinct_op!(
- a,
- DataType::Int32,
- Median,
- true,
- ScalarValue::from(2i32)
- )
- }
-
- #[test]
- fn distinct_median_i32_all_nulls() -> Result<()> {
- let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
- generic_test_distinct_op!(
- a,
- DataType::Int32,
- Median,
- true,
- ScalarValue::Int32(None)
- )
- }
-
- #[test]
- fn distinct_median_u32_odd() -> Result<()> {
- let a: ArrayRef =
- Arc::new(UInt32Array::from(vec![1_u32, 1_u32, 2_u32, 1_u32,
3_u32]));
- generic_test_distinct_op!(
- a,
- DataType::UInt32,
- Median,
- true,
- ScalarValue::from(2u32)
- )
- }
-
- #[test]
- fn distinct_median_u32_even() -> Result<()> {
- let a: ArrayRef = Arc::new(UInt32Array::from(vec![
- 1_u32, 1_u32, 1_u32, 1_u32, 3_u32, 3_u32,
- ]));
- generic_test_distinct_op!(
- a,
- DataType::UInt32,
- Median,
- true,
- ScalarValue::from(2u32)
- )
- }
-
- #[test]
- fn distinct_median_f32_odd() -> Result<()> {
- let a: ArrayRef =
- Arc::new(Float32Array::from(vec![3_f32, 2_f32, 1_f32, 1_f32,
1_f32]));
- generic_test_distinct_op!(
- a,
- DataType::Float32,
- Median,
- true,
- ScalarValue::from(2_f32)
- )
- }
-
- #[test]
- fn distinct_median_f32_even() -> Result<()> {
- let a: ArrayRef =
- Arc::new(Float32Array::from(vec![1_f32, 1_f32, 1_f32, 1_f32,
2_f32]));
- generic_test_distinct_op!(
- a,
- DataType::Float32,
- Median,
- true,
- ScalarValue::from(1.5_f32)
- )
- }
-
- #[test]
- fn distinct_median_f64_odd() -> Result<()> {
- let a: ArrayRef =
- Arc::new(Float64Array::from(vec![1_f64, 1_f64, 1_f64, 2_f64,
3_f64]));
- generic_test_distinct_op!(
- a,
- DataType::Float64,
- Median,
- true,
- ScalarValue::from(2_f64)
- )
- }
-
- #[test]
- fn distinct_median_f64_even() -> Result<()> {
- let a: ArrayRef =
- Arc::new(Float64Array::from(vec![1_f64, 1_f64, 1_f64, 1_f64,
2_f64]));
- generic_test_distinct_op!(
- a,
- DataType::Float64,
- Median,
- true,
- ScalarValue::from(1.5_f64)
- )
- }
-}
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt
b/datafusion/sqllogictest/test_files/aggregate.slt
index 983f8a085b..c2478e5437 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -619,6 +619,258 @@ SELECT approx_median(col_f64_nan) FROM median_table
----
NaN
+# median decimal
+statement ok
+create table t(c decimal(10, 4)) as values (0.0001), (0.0002), (0.0003),
(0.0004), (0.0005), (0.0006);
+
+query RT
+select median(c), arrow_typeof(median(c)) from t;
+----
+0.0003 Decimal128(10, 4)
+
+statement ok
+drop table t;
+
+# median decimal with nulls
+statement ok
+create table t(c decimal(10, 4)) as values (0.0001), (null), (0.0003),
(0.0004), (0.0005);
+
+query RT
+select median(c), arrow_typeof(median(c)) from t;
+----
+0.0003 Decimal128(10, 4)
+
+statement ok
+drop table t;
+
+# median decimal with all nulls
+statement ok
+create table t(c decimal(10, 4)) as values (null), (null), (null);
+
+query RT
+select median(c), arrow_typeof(median(c)) from t;
+----
+NULL Decimal128(10, 4)
+
+statement ok
+drop table t;
+
+# median odd
+statement ok
+create table t(c int) as values (1), (2), (3), (4), (5);
+
+query I
+select median(c) from t;
+----
+3
+
+statement ok
+drop table t;
+
+# median even
+statement ok
+create table t(c int) as values (1), (2), (3), (4), (5), (6);
+
+query I
+select median(c) from t;
+----
+3
+
+statement ok
+drop table t;
+
+# median with nulls
+statement ok
+create table t(c int) as values (1), (null), (3), (4), (5);
+
+query I
+select median(c) from t;
+----
+3
+
+statement ok
+drop table t;
+
+# median with all nulls
+statement ok
+create table t(c int) as values (null), (null), (null);
+
+query I
+select median(c) from t;
+----
+NULL
+
+statement ok
+drop table t;
+
+# median u32
+statement ok
+create table t(c int unsigned) as values (1), (2), (3), (4), (5);
+
+query I
+select median(c) from t;
+----
+3
+
+statement ok
+drop table t;
+
+# median f32
+statement ok
+create table t(c float) as values (1.1), (2.2), (3.3), (4.4), (5.5);
+
+query R
+select median(c) from t;
+----
+3.3
+
+statement ok
+drop table t;
+
+# median distinct decimal
+statement ok
+create table t(c decimal(10, 4)) as values (0.0001), (0.0001), (0.0001),
(0.0001), (0.0002), (0.0002), (0.0003), (0.0003);
+
+query R
+select median(distinct c) from t;
+----
+0.0002
+
+statement ok
+drop table t;
+
+# median distinct decimal with nulls
+statement ok
+create table t(c decimal(10, 4)) as values (0.0001), (0.0001), (0.0001),
(null), (null), (0.0002), (0.0003), (0.0003);
+
+query R
+select median(distinct c) from t;
+----
+0.0002
+
+statement ok
+drop table t;
+
+# distinct median i32 odd
+statement ok
+create table t(c int) as values (2), (1), (1), (2), (1), (3);
+
+query I
+select median(distinct c) from t;
+----
+2
+
+statement ok
+drop table t;
+
+# distinct median i32 even
+statement ok
+create table t(c int) as values (1), (1), (3), (1), (1);
+
+query I
+select median(distinct c) from t;
+----
+2
+
+statement ok
+drop table t;
+
+# distinct median i32 with nulls
+statement ok
+create table t(c int) as values (1), (null), (1), (1), (3);
+
+query I
+select median(distinct c) from t;
+----
+2
+
+statement ok
+drop table t;
+
+# distinct median u32 odd
+statement ok
+create table t(c int unsigned) as values (1), (1), (2), (1), (3);
+
+query I
+select median(distinct c) from t;
+----
+2
+
+statement ok
+drop table t;
+
+# distinct median u32 even
+statement ok
+create table t(c int unsigned) as values (1), (1), (1), (1), (3), (3);
+
+query I
+select median(distinct c) from t;
+----
+2
+
+statement ok
+drop table t;
+
+# distinct median f32 odd
+statement ok
+create table t(c float) as values (3), (2), (1), (1), (1);
+
+query R
+select median(distinct c) from t;
+----
+2
+
+statement ok
+drop table t;
+
+# distinct median f32 even
+statement ok
+create table t(c float) as values (1), (1), (1), (1), (2);
+
+query R
+select median(distinct c) from t;
+----
+1.5
+
+statement ok
+drop table t;
+
+# distinct median f64 odd
+statement ok
+create table t(c double) as values (1), (1), (1), (2), (3);
+
+query R
+select median(distinct c) from t;
+----
+2
+
+statement ok
+drop table t;
+
+# distinct median f64 even
+statement ok
+create table t(c double) as values (1), (1), (1), (1), (2);
+
+query R
+select median(distinct c) from t;
+----
+1.5
+
+statement ok
+drop table t;
+
+# distinct median i32
+statement ok
+create table t(c int) as values (1), (1), (1), (1), (2), (2), (3), (3);
+
+query I
+select median(distinct c) from t;
+----
+2
+
+statement ok
+drop table t;
+
# median_multi
# test case for https://github.com/apache/datafusion/issues/3105
# has an intermediate grouping
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]