This is an automated email from the ASF dual-hosted git repository.

mneumann pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 5c40142722 feat: min/max agg for bool (#6226)
5c40142722 is described below

commit 5c4014272206397919a4a67ee3ca0af011aeb3f8
Author: Marco Neumann <[email protected]>
AuthorDate: Wed May 17 12:03:19 2023 +0200

    feat: min/max agg for bool (#6226)
---
 .../tests/sqllogictests/test_files/aggregate.slt   | 30 ++++++++
 datafusion/physical-expr/src/aggregate/min_max.rs  | 90 +++++++++++++++++++++-
 datafusion/physical-expr/src/expressions/mod.rs    |  2 +-
 datafusion/row/src/accessor.rs                     |  2 +
 4 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt 
b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index 47d7d031ce..17d89a9f05 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -1838,3 +1838,33 @@ select max(x_dict) from value_dict group by x_dict % 2 
order by max(x_dict);
 ----
 4
 5
+
+# bool aggregtion
+statement ok
+CREATE TABLE value_bool(x boolean, g int) AS VALUES (NULL, 0), (false, 0), 
(true, 0), (false, 1), (true, 2), (NULL, 3);
+
+query B
+select min(x) from value_bool;
+----
+false
+
+query B
+select max(x) from value_bool;
+----
+true
+
+query B
+select min(x) from value_bool group by g order by g;
+----
+false
+false
+true
+NULL
+
+query B
+select max(x) from value_bool group by g order by g;
+----
+true
+false
+true
+NULL
diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs 
b/datafusion/physical-expr/src/aggregate/min_max.rs
index 3a3d529839..f811dae7b5 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -26,8 +26,8 @@ use arrow::compute;
 use arrow::datatypes::{DataType, TimeUnit};
 use arrow::{
     array::{
-        ArrayRef, Date32Array, Date64Array, Float32Array, Float64Array, 
Int16Array,
-        Int32Array, Int64Array, Int8Array, LargeStringArray, StringArray,
+        ArrayRef, BooleanArray, Date32Array, Date64Array, Float32Array, 
Float64Array,
+        Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, 
StringArray,
         Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
         Time64NanosecondArray, TimestampMicrosecondArray, 
TimestampMillisecondArray,
         TimestampNanosecondArray, TimestampSecondArray, UInt16Array, 
UInt32Array,
@@ -290,6 +290,9 @@ fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
         DataType::LargeUtf8 => {
             typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, 
min_string)
         }
+        DataType::Boolean => {
+            typed_min_max_batch!(values, BooleanArray, Boolean, min_boolean)
+        }
         _ => min_max_batch!(values, min),
     })
 }
@@ -303,6 +306,9 @@ fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
         DataType::LargeUtf8 => {
             typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, 
max_string)
         }
+        DataType::Boolean => {
+            typed_min_max_batch!(values, BooleanArray, Boolean, max_boolean)
+        }
         _ => min_max_batch!(values, max),
     })
 }
@@ -386,6 +392,9 @@ macro_rules! min_max {
                 )));
                 }
             }
+            (ScalarValue::Boolean(lhs), ScalarValue::Boolean(rhs)) => {
+                typed_min_max!(lhs, rhs, Boolean, $OP)
+            }
             (ScalarValue::Float64(lhs), ScalarValue::Float64(rhs)) => {
                 typed_min_max!(lhs, rhs, Float64, $OP)
             }
@@ -532,6 +541,9 @@ macro_rules! min_max {
 macro_rules! min_max_v2 {
     ($INDEX:ident, $ACC:ident, $SCALAR:expr, $OP:ident) => {{
         Ok(match $SCALAR {
+            ScalarValue::Boolean(rhs) => {
+                typed_min_max_v2!($INDEX, $ACC, rhs, bool, $OP)
+            }
             ScalarValue::Float64(rhs) => {
                 typed_min_max_v2!($INDEX, $ACC, rhs, f64, $OP)
             }
@@ -1429,4 +1441,78 @@ mod tests {
             ScalarValue::Time64Nanosecond(Some(5))
         )
     }
+
+    #[test]
+    fn max_bool() -> Result<()> {
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![false, false]));
+        generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(false))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![true, true]));
+        generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(true))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![false, true, 
false]));
+        generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(true))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![true, false, 
true]));
+        generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(true))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(Vec::<bool>::new()));
+        generic_test_op!(
+            a,
+            DataType::Boolean,
+            Max,
+            ScalarValue::from(None as Option<bool>)
+        )?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![None as 
Option<bool>]));
+        generic_test_op!(
+            a,
+            DataType::Boolean,
+            Max,
+            ScalarValue::from(None as Option<bool>)
+        )?;
+
+        let a: ArrayRef =
+            Arc::new(BooleanArray::from(vec![None, Some(true), Some(false)]));
+        generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(true))?;
+
+        Ok(())
+    }
+
+    #[test]
+    fn min_bool() -> Result<()> {
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![false, false]));
+        generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(false))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![true, true]));
+        generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(true))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![false, true, 
false]));
+        generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(false))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![true, false, 
true]));
+        generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(false))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(Vec::<bool>::new()));
+        generic_test_op!(
+            a,
+            DataType::Boolean,
+            Min,
+            ScalarValue::from(None as Option<bool>)
+        )?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![None as 
Option<bool>]));
+        generic_test_op!(
+            a,
+            DataType::Boolean,
+            Min,
+            ScalarValue::from(None as Option<bool>)
+        )?;
+
+        let a: ArrayRef =
+            Arc::new(BooleanArray::from(vec![None, Some(true), Some(false)]));
+        generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(false))?;
+
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-expr/src/expressions/mod.rs 
b/datafusion/physical-expr/src/expressions/mod.rs
index afe1ccd99f..140556b765 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -128,7 +128,7 @@ pub(crate) mod tests {
 
             assert_eq!(expected, actual);
 
-            Ok(())
+            Ok(()) as Result<(), DataFusionError>
         }};
     }
 
diff --git a/datafusion/row/src/accessor.rs b/datafusion/row/src/accessor.rs
index d4db66f367..a0b5a70df9 100644
--- a/datafusion/row/src/accessor.rs
+++ b/datafusion/row/src/accessor.rs
@@ -327,6 +327,7 @@ impl<'a> RowAccessor<'a> {
     fn_add_idx!(f64);
     fn_add_idx!(i128);
 
+    fn_max_min_idx!(bool, max);
     fn_max_min_idx!(u8, max);
     fn_max_min_idx!(u16, max);
     fn_max_min_idx!(u32, max);
@@ -339,6 +340,7 @@ impl<'a> RowAccessor<'a> {
     fn_max_min_idx!(f64, max);
     fn_max_min_idx!(i128, max);
 
+    fn_max_min_idx!(bool, min);
     fn_max_min_idx!(u8, min);
     fn_max_min_idx!(u16, min);
     fn_max_min_idx!(u32, min);

Reply via email to