This is an automated email from the ASF dual-hosted git repository.

findepi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 05bb56913e feat: Add `array_max` function support (#14470)
05bb56913e is described below

commit 05bb56913ecaeed981c8913cca030af482e6c39b
Author: Eren Avsarogullari <[email protected]>
AuthorDate: Fri Mar 7 02:26:18 2025 -0800

    feat: Add `array_max` function support (#14470)
    
    * Issue-14469 - feat: Add array_max function
    
    * Address review comments
    
    * Address review comments II
    
    * Address review comments III
---
 datafusion/functions-aggregate/src/min_max.rs  |   2 +-
 datafusion/functions-nested/src/lib.rs         |   2 +
 datafusion/functions-nested/src/max.rs         | 137 +++++++++++++++++++++++++
 datafusion/sqllogictest/test_files/array.slt   |  87 ++++++++++++++++
 docs/source/user-guide/sql/scalar_functions.md |  33 ++++++
 5 files changed, 260 insertions(+), 1 deletion(-)

diff --git a/datafusion/functions-aggregate/src/min_max.rs 
b/datafusion/functions-aggregate/src/min_max.rs
index 90fb46883d..83356e2f9f 100644
--- a/datafusion/functions-aggregate/src/min_max.rs
+++ b/datafusion/functions-aggregate/src/min_max.rs
@@ -573,7 +573,7 @@ fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
 }
 
 /// dynamically-typed max(array) -> ScalarValue
-fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
+pub fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
     Ok(match values.data_type() {
         DataType::Utf8 => {
             typed_min_max_batch_string!(values, StringArray, Utf8, max_string)
diff --git a/datafusion/functions-nested/src/lib.rs 
b/datafusion/functions-nested/src/lib.rs
index 41ebb4366c..446cd58865 100644
--- a/datafusion/functions-nested/src/lib.rs
+++ b/datafusion/functions-nested/src/lib.rs
@@ -52,6 +52,7 @@ pub mod map;
 pub mod map_extract;
 pub mod map_keys;
 pub mod map_values;
+pub mod max;
 pub mod planner;
 pub mod position;
 pub mod range;
@@ -144,6 +145,7 @@ pub fn all_default_nested_functions() -> 
Vec<Arc<ScalarUDF>> {
         length::array_length_udf(),
         distance::array_distance_udf(),
         flatten::flatten_udf(),
+        max::array_max_udf(),
         sort::array_sort_udf(),
         repeat::array_repeat_udf(),
         resize::array_resize_udf(),
diff --git a/datafusion/functions-nested/src/max.rs 
b/datafusion/functions-nested/src/max.rs
new file mode 100644
index 0000000000..22bd14740b
--- /dev/null
+++ b/datafusion/functions-nested/src/max.rs
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`ScalarUDFImpl`] definitions for array_max function.
+use crate::utils::make_scalar_function;
+use arrow::array::ArrayRef;
+use arrow::datatypes::DataType;
+use arrow::datatypes::DataType::List;
+use datafusion_common::cast::as_list_array;
+use datafusion_common::utils::take_function_args;
+use datafusion_common::{exec_err, ScalarValue};
+use datafusion_doc::Documentation;
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_functions_aggregate::min_max;
+use datafusion_macros::user_doc;
+use itertools::Itertools;
+use std::any::Any;
+
+make_udf_expr_and_func!(
+    ArrayMax,
+    array_max,
+    array,
+    "returns the maximum value in the array.",
+    array_max_udf
+);
+
+#[user_doc(
+    doc_section(label = "Array Functions"),
+    description = "Returns the maximum value in the array.",
+    syntax_example = "array_max(array)",
+    sql_example = r#"```sql
+> select array_max([3,1,4,2]);
++-----------------------------------------+
+| array_max(List([3,1,4,2]))              |
++-----------------------------------------+
+| 4                                       |
++-----------------------------------------+
+```"#,
+    argument(
+        name = "array",
+        description = "Array expression. Can be a constant, column, or 
function, and any combination of array operators."
+    )
+)]
+#[derive(Debug)]
+pub struct ArrayMax {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl Default for ArrayMax {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ArrayMax {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::array(Volatility::Immutable),
+            aliases: vec!["list_max".to_string()],
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArrayMax {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "array_max"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> 
datafusion_common::Result<DataType> {
+        match &arg_types[0] {
+            List(field) => Ok(field.data_type().clone()),
+            _ => exec_err!("Not reachable, data_type should be List"),
+        }
+    }
+
+    fn invoke_batch(
+        &self,
+        args: &[ColumnarValue],
+        _number_rows: usize,
+    ) -> datafusion_common::Result<ColumnarValue> {
+        make_scalar_function(array_max_inner)(args)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        self.doc()
+    }
+}
+
+/// array_max SQL function
+///
+/// There is one argument for array_max as the array.
+/// `array_max(array)`
+///
+/// For example:
+/// > array_max(\[1, 3, 2]) -> 3
+pub fn array_max_inner(args: &[ArrayRef]) -> 
datafusion_common::Result<ArrayRef> {
+    let [arg1] = take_function_args("array_max", args)?;
+
+    match arg1.data_type() {
+        List(_) => {
+            let input_list_array = as_list_array(&arg1)?;
+            let result_vec = input_list_array
+                .iter()
+                .flat_map(|arr| min_max::max_batch(&arr.unwrap()))
+                .collect_vec();
+            ScalarValue::iter_to_array(result_vec)
+        }
+        _ => exec_err!("array_max does not support type: {:?}", 
arg1.data_type()),
+    }
+}
diff --git a/datafusion/sqllogictest/test_files/array.slt 
b/datafusion/sqllogictest/test_files/array.slt
index c8f6a985bb..3b7f129606 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -1435,6 +1435,93 @@ NULL 23
 NULL 43
 5 NULL
 
+
+## array_max
+# array_max scalar function #1 (with positive index)
+query I
+select array_max(make_array(5, 3, 6, 4));
+----
+6
+
+query I
+select array_max(make_array(5, 3, 4, NULL, 6, NULL));
+----
+6
+
+query I
+select array_max(make_array(NULL, NULL));
+----
+NULL
+
+query T
+select array_max(make_array('h', 'e', 'o', 'l', 'l'));
+----
+o
+
+query T
+select array_max(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL));
+----
+o
+
+query B
+select array_max(make_array(false, true, false, true));
+----
+true
+
+query B
+select array_max(make_array(false, true, NULL, false, true));
+----
+true
+
+query D
+select array_max(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE 
'1999-05-01', DATE '1985-11-01'));
+----
+1999-05-01
+
+query D
+select array_max(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE 
'1993-03-01', NULL));
+----
+1999-05-01
+
+query P
+select array_max(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', 
TIMESTAMP '1984-10-01'));
+----
+1995-06-01T00:00:00
+
+query P
+select array_max(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP 
'1995-06-01'));
+----
+1996-10-01T00:00:00
+
+query R
+select array_max(make_array(5.1, -3.2, 6.3, 4.9));
+----
+6.3
+
+query ?I
+select input, array_max(input) from (select make_array(d - 1, d, d + 1) input 
from (values (0), (10), (20), (30), (NULL)) t(d))
+----
+[-1, 0, 1] 1
+[9, 10, 11] 11
+[19, 20, 21] 21
+[29, 30, 31] 31
+[NULL, NULL, NULL] NULL
+
+query II
+select array_max(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), 
array_max(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'));
+----
+3 1
+
+query I
+select array_max(make_array());
+----
+NULL
+
+# Testing with empty arguments should result in an error
+query error DataFusion error: Error during planning: 'array_max' does not 
support zero arguments
+select array_max();
+
+
 ## array_pop_back (aliases: `list_pop_back`)
 
 # array_pop_back scalar function with null
diff --git a/docs/source/user-guide/sql/scalar_functions.md 
b/docs/source/user-guide/sql/scalar_functions.md
index fb4043c33e..60ecf7bd78 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -2524,6 +2524,7 @@ _Alias of [current_date](#current_date)._
 - [array_intersect](#array_intersect)
 - [array_join](#array_join)
 - [array_length](#array_length)
+- [array_max](#array_max)
 - [array_ndims](#array_ndims)
 - [array_pop_back](#array_pop_back)
 - [array_pop_front](#array_pop_front)
@@ -2569,6 +2570,7 @@ _Alias of [current_date](#current_date)._
 - [list_intersect](#list_intersect)
 - [list_join](#list_join)
 - [list_length](#list_length)
+- [list_max](#list_max)
 - [list_ndims](#list_ndims)
 - [list_pop_back](#list_pop_back)
 - [list_pop_front](#list_pop_front)
@@ -3002,6 +3004,33 @@ array_length(array, dimension)
 
 - list_length
 
+### `array_max`
+
+Returns the maximum value in the array.
+
+```sql
+array_max(array)
+```
+
+#### Arguments
+
+- **array**: Array expression. Can be a constant, column, or function, and any 
combination of array operators.
+
+#### Example
+
+```sql
+> select array_max([3,1,4,2]);
++-----------------------------------------+
+| array_max(List([3,1,4,2]))              |
++-----------------------------------------+
+| 4                                       |
++-----------------------------------------+
+```
+
+#### Aliases
+
+- list_max
+
 ### `array_ndims`
 
 Returns the number of dimensions of the array.
@@ -3759,6 +3788,10 @@ _Alias of [array_to_string](#array_to_string)._
 
 _Alias of [array_length](#array_length)._
 
+### `list_max`
+
+_Alias of [array_max](#array_max)._
+
 ### `list_ndims`
 
 _Alias of [array_ndims](#array_ndims)._


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to