This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 667c77a858 feat(function): add `least` function (#13786)
667c77a858 is described below
commit 667c77a85836bffdf7747a25403cd475f0c07be8
Author: Raz Luvaton <[email protected]>
AuthorDate: Fri Dec 20 23:06:09 2024 +0200
feat(function): add `least` function (#13786)
* start adding least fn
* feat(function): add least function
* update function name
* fix scalar smaller function
* add tests
* run Clippy and Fmt
* Generated docs using `./dev/update_function_docs.sh`
* add comment why `descending: false`
* update comment
* Update least.rs
Co-authored-by: Bruce Ritchie <[email protected]>
* Update scalar_functions.md
* run ./dev/update_function_docs.sh to update docs
* merge greatest and least implementation to one
* add header
---------
Co-authored-by: Bruce Ritchie <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/functions/src/core/greatest.rs | 183 +++++--------------
.../functions/src/core/greatest_least_utils.rs | 133 ++++++++++++++
datafusion/functions/src/core/least.rs | 200 +++++++++++++++++++++
datafusion/functions/src/core/mod.rs | 8 +
datafusion/sqllogictest/test_files/functions.slt | 198 ++++++++++++++++++++
docs/source/user-guide/sql/scalar_functions.md | 24 +++
6 files changed, 612 insertions(+), 134 deletions(-)
diff --git a/datafusion/functions/src/core/greatest.rs
b/datafusion/functions/src/core/greatest.rs
index 3ea2eadf22..e91ec2b0c4 100644
--- a/datafusion/functions/src/core/greatest.rs
+++ b/datafusion/functions/src/core/greatest.rs
@@ -15,20 +15,19 @@
// specific language governing permissions and limitations
// under the License.
-use arrow::array::{make_comparator, Array, ArrayRef, BooleanArray};
+use crate::core::greatest_least_utils::GreatestLeastOperator;
+use arrow::array::{make_comparator, Array, BooleanArray};
use arrow::compute::kernels::cmp;
-use arrow::compute::kernels::zip::zip;
use arrow::compute::SortOptions;
use arrow::datatypes::DataType;
use arrow_buffer::BooleanBuffer;
-use datafusion_common::{exec_err, plan_err, Result, ScalarValue};
+use datafusion_common::{internal_err, Result, ScalarValue};
use datafusion_doc::Documentation;
-use datafusion_expr::binary::type_union_resolution;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_CONDITIONAL;
use datafusion_expr::ColumnarValue;
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::OnceLock;
const SORT_OPTIONS: SortOptions = SortOptions {
// We want greatest first
@@ -57,79 +56,57 @@ impl GreatestFunc {
}
}
-fn get_logical_null_count(arr: &dyn Array) -> usize {
- arr.logical_nulls()
- .map(|n| n.null_count())
- .unwrap_or_default()
-}
+impl GreatestLeastOperator for GreatestFunc {
+ const NAME: &'static str = "greatest";
-/// Return boolean array where `arr[i] = lhs[i] >= rhs[i]` for all i, where
`arr` is the result array
-/// Nulls are always considered smaller than any other value
-fn get_larger(lhs: &dyn Array, rhs: &dyn Array) -> Result<BooleanArray> {
- // Fast path:
- // If both arrays are not nested, have the same length and no nulls, we
can use the faster vectorised kernel
- // - If both arrays are not nested: Nested types, such as lists, are not
supported as the null semantics are not well-defined.
- // - both array does not have any nulls: cmp::gt_eq will return null if
any of the input is null while we want to return false in that case
- if !lhs.data_type().is_nested()
- && get_logical_null_count(lhs) == 0
- && get_logical_null_count(rhs) == 0
- {
- return cmp::gt_eq(&lhs, &rhs).map_err(|e| e.into());
- }
+ fn keep_scalar<'a>(
+ lhs: &'a ScalarValue,
+ rhs: &'a ScalarValue,
+ ) -> Result<&'a ScalarValue> {
+ if !lhs.data_type().is_nested() {
+ return if lhs >= rhs { Ok(lhs) } else { Ok(rhs) };
+ }
- let cmp = make_comparator(lhs, rhs, SORT_OPTIONS)?;
+ // If complex type we can't compare directly as we want null values to
be smaller
+ let cmp = make_comparator(
+ lhs.to_array()?.as_ref(),
+ rhs.to_array()?.as_ref(),
+ SORT_OPTIONS,
+ )?;
- if lhs.len() != rhs.len() {
- return exec_err!(
- "All arrays should have the same length for greatest comparison"
- );
+ if cmp(0, 0).is_ge() {
+ Ok(lhs)
+ } else {
+ Ok(rhs)
+ }
}
- let values = BooleanBuffer::collect_bool(lhs.len(), |i| cmp(i, i).is_ge());
-
- // No nulls as we only want to keep the values that are larger, its either
true or false
- Ok(BooleanArray::new(values, None))
-}
-
-/// Return array where the largest value at each index is kept
-fn keep_larger(lhs: ArrayRef, rhs: ArrayRef) -> Result<ArrayRef> {
- // True for values that we should keep from the left array
- let keep_lhs = get_larger(lhs.as_ref(), rhs.as_ref())?;
-
- let larger = zip(&keep_lhs, &lhs, &rhs)?;
+ /// Return boolean array where `arr[i] = lhs[i] >= rhs[i]` for all i,
where `arr` is the result array
+ /// Nulls are always considered smaller than any other value
+ fn get_indexes_to_keep(lhs: &dyn Array, rhs: &dyn Array) ->
Result<BooleanArray> {
+ // Fast path:
+ // If both arrays are not nested, have the same length and no nulls,
we can use the faster vectorised kernel
+ // - If both arrays are not nested: Nested types, such as lists, are
not supported as the null semantics are not well-defined.
+ // - both array does not have any nulls: cmp::gt_eq will return null
if any of the input is null while we want to return false in that case
+ if !lhs.data_type().is_nested()
+ && lhs.logical_null_count() == 0
+ && rhs.logical_null_count() == 0
+ {
+ return cmp::gt_eq(&lhs, &rhs).map_err(|e| e.into());
+ }
- Ok(larger)
-}
+ let cmp = make_comparator(lhs, rhs, SORT_OPTIONS)?;
-fn keep_larger_scalar<'a>(
- lhs: &'a ScalarValue,
- rhs: &'a ScalarValue,
-) -> Result<&'a ScalarValue> {
- if !lhs.data_type().is_nested() {
- return if lhs >= rhs { Ok(lhs) } else { Ok(rhs) };
- }
-
- // If complex type we can't compare directly as we want null values to be
smaller
- let cmp = make_comparator(
- lhs.to_array()?.as_ref(),
- rhs.to_array()?.as_ref(),
- SORT_OPTIONS,
- )?;
+ if lhs.len() != rhs.len() {
+ return internal_err!(
+ "All arrays should have the same length for greatest
comparison"
+ );
+ }
- if cmp(0, 0).is_ge() {
- Ok(lhs)
- } else {
- Ok(rhs)
- }
-}
+ let values = BooleanBuffer::collect_bool(lhs.len(), |i| cmp(i,
i).is_ge());
-fn find_coerced_type(data_types: &[DataType]) -> Result<DataType> {
- if data_types.is_empty() {
- plan_err!("greatest was called without any arguments. It requires at
least 1.")
- } else if let Some(coerced_type) = type_union_resolution(data_types) {
- Ok(coerced_type)
- } else {
- plan_err!("Cannot find a common type for arguments")
+ // No nulls as we only want to keep the values that are larger, its
either true or false
+ Ok(BooleanArray::new(values, None))
}
}
@@ -151,74 +128,12 @@ impl ScalarUDFImpl for GreatestFunc {
}
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
- if args.is_empty() {
- return exec_err!(
- "greatest was called with no arguments. It requires at least
1."
- );
- }
-
- // Some engines (e.g. SQL Server) allow greatest with single arg, it's
a noop
- if args.len() == 1 {
- return Ok(args[0].clone());
- }
-
- // Split to scalars and arrays for later optimization
- let (scalars, arrays): (Vec<_>, Vec<_>) = args.iter().partition(|x|
match x {
- ColumnarValue::Scalar(_) => true,
- ColumnarValue::Array(_) => false,
- });
-
- let mut arrays_iter = arrays.iter().map(|x| match x {
- ColumnarValue::Array(a) => a,
- _ => unreachable!(),
- });
-
- let first_array = arrays_iter.next();
-
- let mut largest: ArrayRef;
-
- // Optimization: merge all scalars into one to avoid recomputing
- if !scalars.is_empty() {
- let mut scalars_iter = scalars.iter().map(|x| match x {
- ColumnarValue::Scalar(s) => s,
- _ => unreachable!(),
- });
-
- // We have at least one scalar
- let mut largest_scalar = scalars_iter.next().unwrap();
-
- for scalar in scalars_iter {
- largest_scalar = keep_larger_scalar(largest_scalar, scalar)?;
- }
-
- // If we only have scalars, return the largest one
- if arrays.is_empty() {
- return Ok(ColumnarValue::Scalar(largest_scalar.clone()));
- }
-
- // We have at least one array
- let first_array = first_array.unwrap();
-
- // Start with the largest value
- largest = keep_larger(
- Arc::clone(first_array),
- largest_scalar.to_array_of_size(first_array.len())?,
- )?;
- } else {
- // If we only have arrays, start with the first array
- // (We must have at least one array)
- largest = Arc::clone(first_array.unwrap());
- }
-
- for array in arrays_iter {
- largest = keep_larger(Arc::clone(array), largest)?;
- }
-
- Ok(ColumnarValue::Array(largest))
+ super::greatest_least_utils::execute_conditional::<Self>(args)
}
fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
- let coerced_type = find_coerced_type(arg_types)?;
+ let coerced_type =
+ super::greatest_least_utils::find_coerced_type::<Self>(arg_types)?;
Ok(vec![coerced_type; arg_types.len()])
}
diff --git a/datafusion/functions/src/core/greatest_least_utils.rs
b/datafusion/functions/src/core/greatest_least_utils.rs
new file mode 100644
index 0000000000..46b3645e70
--- /dev/null
+++ b/datafusion/functions/src/core/greatest_least_utils.rs
@@ -0,0 +1,133 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{Array, ArrayRef, BooleanArray};
+use arrow::compute::kernels::zip::zip;
+use arrow::datatypes::DataType;
+use datafusion_common::{internal_err, plan_err, Result, ScalarValue};
+use datafusion_expr_common::columnar_value::ColumnarValue;
+use datafusion_expr_common::type_coercion::binary::type_union_resolution;
+use std::sync::Arc;
+
+pub(super) trait GreatestLeastOperator {
+ const NAME: &'static str;
+
+ fn keep_scalar<'a>(
+ lhs: &'a ScalarValue,
+ rhs: &'a ScalarValue,
+ ) -> Result<&'a ScalarValue>;
+
+ /// Return array with true for values that we should keep from the lhs
array
+ fn get_indexes_to_keep(lhs: &dyn Array, rhs: &dyn Array) ->
Result<BooleanArray>;
+}
+
+fn keep_array<Op: GreatestLeastOperator>(
+ lhs: ArrayRef,
+ rhs: ArrayRef,
+) -> Result<ArrayRef> {
+ // True for values that we should keep from the left array
+ let keep_lhs = Op::get_indexes_to_keep(lhs.as_ref(), rhs.as_ref())?;
+
+ let result = zip(&keep_lhs, &lhs, &rhs)?;
+
+ Ok(result)
+}
+
+pub(super) fn execute_conditional<Op: GreatestLeastOperator>(
+ args: &[ColumnarValue],
+) -> Result<ColumnarValue> {
+ if args.is_empty() {
+ return internal_err!(
+ "{} was called with no arguments. It requires at least 1.",
+ Op::NAME
+ );
+ }
+
+ // Some engines (e.g. SQL Server) allow greatest/least with single arg,
it's a noop
+ if args.len() == 1 {
+ return Ok(args[0].clone());
+ }
+
+ // Split to scalars and arrays for later optimization
+ let (scalars, arrays): (Vec<_>, Vec<_>) = args.iter().partition(|x| match
x {
+ ColumnarValue::Scalar(_) => true,
+ ColumnarValue::Array(_) => false,
+ });
+
+ let mut arrays_iter = arrays.iter().map(|x| match x {
+ ColumnarValue::Array(a) => a,
+ _ => unreachable!(),
+ });
+
+ let first_array = arrays_iter.next();
+
+ let mut result: ArrayRef;
+
+ // Optimization: merge all scalars into one to avoid recomputing (constant
folding)
+ if !scalars.is_empty() {
+ let mut scalars_iter = scalars.iter().map(|x| match x {
+ ColumnarValue::Scalar(s) => s,
+ _ => unreachable!(),
+ });
+
+ // We have at least one scalar
+ let mut result_scalar = scalars_iter.next().unwrap();
+
+ for scalar in scalars_iter {
+ result_scalar = Op::keep_scalar(result_scalar, scalar)?;
+ }
+
+ // If we only have scalars, return the one that we should keep
(largest/least)
+ if arrays.is_empty() {
+ return Ok(ColumnarValue::Scalar(result_scalar.clone()));
+ }
+
+ // We have at least one array
+ let first_array = first_array.unwrap();
+
+ // Start with the result value
+ result = keep_array::<Op>(
+ Arc::clone(first_array),
+ result_scalar.to_array_of_size(first_array.len())?,
+ )?;
+ } else {
+ // If we only have arrays, start with the first array
+ // (We must have at least one array)
+ result = Arc::clone(first_array.unwrap());
+ }
+
+ for array in arrays_iter {
+ result = keep_array::<Op>(Arc::clone(array), result)?;
+ }
+
+ Ok(ColumnarValue::Array(result))
+}
+
+pub(super) fn find_coerced_type<Op: GreatestLeastOperator>(
+ data_types: &[DataType],
+) -> Result<DataType> {
+ if data_types.is_empty() {
+ plan_err!(
+ "{} was called without any arguments. It requires at least 1.",
+ Op::NAME
+ )
+ } else if let Some(coerced_type) = type_union_resolution(data_types) {
+ Ok(coerced_type)
+ } else {
+ plan_err!("Cannot find a common type for arguments")
+ }
+}
diff --git a/datafusion/functions/src/core/least.rs
b/datafusion/functions/src/core/least.rs
new file mode 100644
index 0000000000..b9ea65cdb7
--- /dev/null
+++ b/datafusion/functions/src/core/least.rs
@@ -0,0 +1,200 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::core::greatest_least_utils::GreatestLeastOperator;
+use arrow::array::{make_comparator, Array, BooleanArray};
+use arrow::compute::kernels::cmp;
+use arrow::compute::SortOptions;
+use arrow::datatypes::DataType;
+use arrow_buffer::BooleanBuffer;
+use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_doc::Documentation;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_CONDITIONAL;
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+use std::sync::OnceLock;
+
+const SORT_OPTIONS: SortOptions = SortOptions {
+ // Having the smallest result first
+ descending: false,
+
+ // NULL will be greater than any other value
+ nulls_first: false,
+};
+
+#[derive(Debug)]
+pub struct LeastFunc {
+ signature: Signature,
+}
+
+impl Default for LeastFunc {
+ fn default() -> Self {
+ LeastFunc::new()
+ }
+}
+
+impl LeastFunc {
+ pub fn new() -> Self {
+ Self {
+ signature: Signature::user_defined(Volatility::Immutable),
+ }
+ }
+}
+
+impl GreatestLeastOperator for LeastFunc {
+ const NAME: &'static str = "least";
+
+ fn keep_scalar<'a>(
+ lhs: &'a ScalarValue,
+ rhs: &'a ScalarValue,
+ ) -> Result<&'a ScalarValue> {
+ // Manual checking for nulls as:
+ // 1. If we're going to use <=, in Rust None is smaller than Some(T),
which we don't want
+ // 2. And we can't use make_comparator as it has no natural order
(Arrow error)
+ if lhs.is_null() {
+ return Ok(rhs);
+ }
+
+ if rhs.is_null() {
+ return Ok(lhs);
+ }
+
+ if !lhs.data_type().is_nested() {
+ return if lhs <= rhs { Ok(lhs) } else { Ok(rhs) };
+ }
+
+ // Not using <= as in Rust None is smaller than Some(T)
+
+ // If complex type we can't compare directly as we want null values to
be larger
+ let cmp = make_comparator(
+ lhs.to_array()?.as_ref(),
+ rhs.to_array()?.as_ref(),
+ SORT_OPTIONS,
+ )?;
+
+ if cmp(0, 0).is_le() {
+ Ok(lhs)
+ } else {
+ Ok(rhs)
+ }
+ }
+
+ /// Return boolean array where `arr[i] = lhs[i] <= rhs[i]` for all i,
where `arr` is the result array
+ /// Nulls are always considered larger than any other value
+ fn get_indexes_to_keep(lhs: &dyn Array, rhs: &dyn Array) ->
Result<BooleanArray> {
+ // Fast path:
+ // If both arrays are not nested, have the same length and no nulls,
we can use the faster vectorised kernel
+ // - If both arrays are not nested: Nested types, such as lists, are
not supported as the null semantics are not well-defined.
+ // - both array does not have any nulls: cmp::lt_eq will return null
if any of the input is null while we want to return false in that case
+ if !lhs.data_type().is_nested()
+ && lhs.logical_null_count() == 0
+ && rhs.logical_null_count() == 0
+ {
+ return cmp::lt_eq(&lhs, &rhs).map_err(|e| e.into());
+ }
+
+ let cmp = make_comparator(lhs, rhs, SORT_OPTIONS)?;
+
+ if lhs.len() != rhs.len() {
+ return internal_err!(
+ "All arrays should have the same length for least comparison"
+ );
+ }
+
+ let values = BooleanBuffer::collect_bool(lhs.len(), |i| cmp(i,
i).is_le());
+
+ // No nulls as we only want to keep the values that are smaller, its
either true or false
+ Ok(BooleanArray::new(values, None))
+ }
+}
+
+impl ScalarUDFImpl for LeastFunc {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn name(&self) -> &str {
+ "least"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ Ok(arg_types[0].clone())
+ }
+
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ super::greatest_least_utils::execute_conditional::<Self>(args)
+ }
+
+ fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+ let coerced_type =
+ super::greatest_least_utils::find_coerced_type::<Self>(arg_types)?;
+
+ Ok(vec![coerced_type; arg_types.len()])
+ }
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_smallest_doc())
+ }
+}
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_smallest_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder(
+ DOC_SECTION_CONDITIONAL,
+ "Returns the smallest value in a list of expressions. Returns
_null_ if all expressions are _null_.",
+ "least(expression1[, ..., expression_n])")
+ .with_sql_example(r#"```sql
+> select least(4, 7, 5);
++---------------------------+
+| least(4,7,5) |
++---------------------------+
+| 4 |
++---------------------------+
+```"#,
+ )
+ .with_argument(
+ "expression1, expression_n",
+ "Expressions to compare and return the smallest value. Can be
a constant, column, or function, and any combination of arithmetic operators.
Pass as many expression arguments as necessary."
+ )
+ .build()
+ })
+}
+
+#[cfg(test)]
+mod test {
+ use crate::core::least::LeastFunc;
+ use arrow::datatypes::DataType;
+ use datafusion_expr::ScalarUDFImpl;
+
+ #[test]
+ fn test_least_return_types_without_common_supertype_in_arg_type() {
+ let least = LeastFunc::new();
+ let return_type = least
+ .coerce_types(&[DataType::Decimal128(10, 3),
DataType::Decimal128(10, 4)])
+ .unwrap();
+ assert_eq!(
+ return_type,
+ vec![DataType::Decimal128(11, 4), DataType::Decimal128(11, 4)]
+ );
+ }
+}
diff --git a/datafusion/functions/src/core/mod.rs
b/datafusion/functions/src/core/mod.rs
index bd8305cd56..ba8255d2e4 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -26,6 +26,8 @@ pub mod coalesce;
pub mod expr_ext;
pub mod getfield;
pub mod greatest;
+mod greatest_least_utils;
+pub mod least;
pub mod named_struct;
pub mod nullif;
pub mod nvl;
@@ -45,6 +47,7 @@ make_udf_function!(named_struct::NamedStructFunc,
named_struct);
make_udf_function!(getfield::GetFieldFunc, get_field);
make_udf_function!(coalesce::CoalesceFunc, coalesce);
make_udf_function!(greatest::GreatestFunc, greatest);
+make_udf_function!(least::LeastFunc, least);
make_udf_function!(version::VersionFunc, version);
pub mod expr_fn {
@@ -86,6 +89,10 @@ pub mod expr_fn {
greatest,
"Returns `greatest(args...)`, which evaluates to the greatest value in
the list of expressions or NULL if all the expressions are NULL",
args,
+ ),(
+ least,
+ "Returns `least(args...)`, which evaluates to the smallest value in
the list of expressions or NULL if all the expressions are NULL",
+ args,
));
#[doc = "Returns the value of the field with the given name from the
struct"]
@@ -113,6 +120,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
get_field(),
coalesce(),
greatest(),
+ least(),
version(),
r#struct(),
]
diff --git a/datafusion/sqllogictest/test_files/functions.slt
b/datafusion/sqllogictest/test_files/functions.slt
index 4b770a19fe..4213de0235 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -955,3 +955,201 @@ Infinity
statement ok
drop table t1
+
+# test for least
+statement ok
+CREATE TABLE t1 (a int, b int, c int) as VALUES
+(4, NULL, NULL),
+(1, 2, 3),
+(3, 1, 2),
+(1, NULL, -1),
+(NULL, NULL, NULL),
+(3, 0, -1);
+
+query I
+SELECT least(a, b, c) FROM t1
+----
+4
+1
+1
+-1
+NULL
+-1
+
+statement ok
+drop table t1
+
+query I
+SELECT least(1)
+----
+1
+
+query I
+SELECT least(1, 2)
+----
+1
+
+query I
+SELECT least(3, 1)
+----
+1
+
+query ?
+SELECT least(NULL)
+----
+NULL
+
+query I
+SELECT least(1, NULL, -1)
+----
+-1
+
+query I
+SELECT least((3), (0), (-1));
+----
+-1
+
+query ?
+SELECT least([4, 3], [4, 2], [4, 4]);
+----
+[4, 2]
+
+query ?
+SELECT least([2, 3], [1, 4], [5, 0]);
+----
+[1, 4]
+
+query I
+SELECT least(1::int, 2::text)
+----
+1
+
+query R
+SELECT least(-1, 1, 2.3, 123456789, 3 + 5, -(-4))
+----
+-1
+
+query R
+SELECT least(-1.123, 1.21313, 2.3, 123456789.321, 3 + 5.3213, -(-4.3213),
abs(-9))
+----
+-1.123
+
+query R
+SELECT least(-1, 1, 2.3, 123456789, 3 + 5, -(-4), abs(-9.0))
+----
+-1
+
+
+query error least does not support zero arguments
+SELECT least()
+
+query I
+SELECT least(4, 5, 7, 1, 2)
+----
+1
+
+query I
+SELECT least(4, NULL, 7, 1, 2)
+----
+1
+
+query I
+SELECT least(NULL, NULL, 7, NULL, 2)
+----
+2
+
+query I
+SELECT least(NULL, NULL, NULL, NULL, 2)
+----
+2
+
+query I
+SELECT least(2, NULL, NULL, NULL, NULL)
+----
+2
+
+query ?
+SELECT least(NULL, NULL, NULL)
+----
+NULL
+
+query I
+SELECT least(2, '4')
+----
+2
+
+query T
+SELECT least('foo', 'bar', 'foobar')
+----
+bar
+
+query R
+SELECT least(1, 1.2)
+----
+1
+
+statement ok
+CREATE TABLE foo (a int)
+
+statement ok
+INSERT INTO foo (a) VALUES (1)
+
+# Test homogenous functions that can't be constant folded.
+query I
+SELECT least(NULL, a, 5, NULL) FROM foo
+----
+1
+
+query I
+SELECT least(NULL, NULL, NULL, a, -1) FROM foo
+----
+-1
+
+statement ok
+drop table foo
+
+query R
+select least(arrow_cast('NAN','Float64'), arrow_cast('NAN','Float64'))
+----
+NaN
+
+query R
+select least(arrow_cast('NAN','Float64'), arrow_cast('NAN','Float32'))
+----
+NaN
+
+query R
+select least(arrow_cast('NAN','Float64'), '+Inf'::Double)
+----
+Infinity
+
+query R
+select least(arrow_cast('NAN','Float64'), NULL)
+----
+NaN
+
+query R
+select least(NULL, '+Inf'::Double)
+----
+Infinity
+
+query R
+select least(NULL, '-Inf'::Double)
+----
+-Infinity
+
+statement ok
+CREATE TABLE t1 (a double, b double, c double) as VALUES
+(1, arrow_cast('NAN', 'Float64'), '+Inf'::Double),
+(NULL, arrow_cast('NAN','Float64'), '+Inf'::Double),
+(1, '+Inf'::Double, NULL);
+
+query R
+SELECT least(a, b, c) FROM t1
+----
+1
+Infinity
+1
+
+statement ok
+drop table t1
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index 4e74cfc54a..2e4147f96e 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -549,6 +549,7 @@ trunc(numeric_expression[, decimal_places])
- [coalesce](#coalesce)
- [greatest](#greatest)
- [ifnull](#ifnull)
+- [least](#least)
- [nullif](#nullif)
- [nvl](#nvl)
- [nvl2](#nvl2)
@@ -603,6 +604,29 @@ greatest(expression1[, ..., expression_n])
_Alias of [nvl](#nvl)._
+### `least`
+
+Returns the smallest value in a list of expressions. Returns _null_ if all
expressions are _null_.
+
+```
+least(expression1[, ..., expression_n])
+```
+
+#### Arguments
+
+- **expression1, expression_n**: Expressions to compare and return the
smallest value. Can be a constant, column, or function, and any combination of
arithmetic operators. Pass as many expression arguments as necessary.
+
+#### Example
+
+```sql
+> select least(4, 7, 5);
++---------------------------+
+| least(4,7,5) |
++---------------------------+
+| 4 |
++---------------------------+
+```
+
### `nullif`
Returns _null_ if _expression1_ equals _expression2_; otherwise it returns
_expression1_.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]