This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new d9d8ddd5f7 feat: Support `array_sort`(`list_sort`) (#8279)
d9d8ddd5f7 is described below

commit d9d8ddd5f770817f325190c4c0cc02436e7777e6
Author: Asura7969 <[email protected]>
AuthorDate: Thu Dec 7 06:00:16 2023 +0800

    feat: Support `array_sort`(`list_sort`) (#8279)
    
    * Minor: Improve the document format of JoinHashMap
    
    * list sort
    
    * fix: example doc
    
    * fix: ci
    
    * fix: doc error
    
    * fix pb
    
    * like DuckDB function semantics
    
    * fix ci
    
    * fix pb
    
    * fix: doc
    
    * add table test
    
    * fix: not as expected
    
    * fix: return null
    
    * resolve conflicts
    
    * doc
    
    * merge
---
 datafusion/expr/src/built_in_function.rs          |  8 +++
 datafusion/expr/src/expr_fn.rs                    |  3 +
 datafusion/physical-expr/src/array_expressions.rs | 83 ++++++++++++++++++++++-
 datafusion/physical-expr/src/functions.rs         |  3 +
 datafusion/proto/proto/datafusion.proto           |  1 +
 datafusion/proto/src/generated/pbjson.rs          |  3 +
 datafusion/proto/src/generated/prost.rs           |  3 +
 datafusion/proto/src/logical_plan/from_proto.rs   | 15 ++--
 datafusion/proto/src/logical_plan/to_proto.rs     |  1 +
 datafusion/sqllogictest/test_files/array.slt      | 50 ++++++++++++--
 docs/source/user-guide/sql/scalar_functions.md    | 36 ++++++++++
 11 files changed, 194 insertions(+), 12 deletions(-)

diff --git a/datafusion/expr/src/built_in_function.rs 
b/datafusion/expr/src/built_in_function.rs
index d48e9e7a67..44fbf45525 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -130,6 +130,8 @@ pub enum BuiltinScalarFunction {
     // array functions
     /// array_append
     ArrayAppend,
+    /// array_sort
+    ArraySort,
     /// array_concat
     ArrayConcat,
     /// array_has
@@ -398,6 +400,7 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Tanh => Volatility::Immutable,
             BuiltinScalarFunction::Trunc => Volatility::Immutable,
             BuiltinScalarFunction::ArrayAppend => Volatility::Immutable,
+            BuiltinScalarFunction::ArraySort => Volatility::Immutable,
             BuiltinScalarFunction::ArrayConcat => Volatility::Immutable,
             BuiltinScalarFunction::ArrayEmpty => Volatility::Immutable,
             BuiltinScalarFunction::ArrayHasAll => Volatility::Immutable,
@@ -545,6 +548,7 @@ impl BuiltinScalarFunction {
                 Ok(data_type)
             }
             BuiltinScalarFunction::ArrayAppend => 
Ok(input_expr_types[0].clone()),
+            BuiltinScalarFunction::ArraySort => 
Ok(input_expr_types[0].clone()),
             BuiltinScalarFunction::ArrayConcat => {
                 let mut expr_type = Null;
                 let mut max_dims = 0;
@@ -909,6 +913,9 @@ impl BuiltinScalarFunction {
         // for now, the list is small, as we do not have many built-in 
functions.
         match self {
             BuiltinScalarFunction::ArrayAppend => Signature::any(2, 
self.volatility()),
+            BuiltinScalarFunction::ArraySort => {
+                Signature::variadic_any(self.volatility())
+            }
             BuiltinScalarFunction::ArrayPopFront => Signature::any(1, 
self.volatility()),
             BuiltinScalarFunction::ArrayPopBack => Signature::any(1, 
self.volatility()),
             BuiltinScalarFunction::ArrayConcat => {
@@ -1558,6 +1565,7 @@ impl BuiltinScalarFunction {
                 "array_push_back",
                 "list_push_back",
             ],
+            BuiltinScalarFunction::ArraySort => &["array_sort", "list_sort"],
             BuiltinScalarFunction::ArrayConcat => {
                 &["array_concat", "array_cat", "list_concat", "list_cat"]
             }
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 6148226f6b..8d25619c07 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -583,6 +583,8 @@ scalar_expr!(
     "appends an element to the end of an array."
 );
 
+scalar_expr!(ArraySort, array_sort, array desc null_first, "returns sorted 
array.");
+
 scalar_expr!(
     ArrayPopBack,
     array_pop_back,
@@ -1184,6 +1186,7 @@ mod test {
         test_scalar_expr!(FromUnixtime, from_unixtime, unixtime);
 
         test_scalar_expr!(ArrayAppend, array_append, array, element);
+        test_scalar_expr!(ArraySort, array_sort, array, desc, null_first);
         test_scalar_expr!(ArrayPopFront, array_pop_front, array);
         test_scalar_expr!(ArrayPopBack, array_pop_back, array);
         test_unary_scalar_expr!(ArrayDims, array_dims);
diff --git a/datafusion/physical-expr/src/array_expressions.rs 
b/datafusion/physical-expr/src/array_expressions.rs
index f254274edd..269bbf7dcf 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -29,7 +29,7 @@ use arrow::datatypes::{DataType, Field, UInt64Type};
 use arrow::row::{RowConverter, SortField};
 use arrow_buffer::NullBuffer;
 
-use arrow_schema::FieldRef;
+use arrow_schema::{FieldRef, SortOptions};
 use datafusion_common::cast::{
     as_generic_list_array, as_generic_string_array, as_int64_array, 
as_list_array,
     as_null_array, as_string_array,
@@ -693,7 +693,7 @@ fn general_append_and_prepend(
 /// # Arguments
 ///
 /// * `args` - An array of 1 to 3 ArrayRefs representing start, stop, and 
step(step value can not be zero.) values.
-///    
+///
 /// # Examples
 ///
 /// gen_range(3) => [0, 1, 2]
@@ -777,6 +777,85 @@ pub fn array_append(args: &[ArrayRef]) -> Result<ArrayRef> 
{
     Ok(res)
 }
 
+/// Array_sort SQL function
+pub fn array_sort(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let sort_option = match args.len() {
+        1 => None,
+        2 => {
+            let sort = as_string_array(&args[1])?.value(0);
+            Some(SortOptions {
+                descending: order_desc(sort)?,
+                nulls_first: true,
+            })
+        }
+        3 => {
+            let sort = as_string_array(&args[1])?.value(0);
+            let nulls_first = as_string_array(&args[2])?.value(0);
+            Some(SortOptions {
+                descending: order_desc(sort)?,
+                nulls_first: order_nulls_first(nulls_first)?,
+            })
+        }
+        _ => return internal_err!("array_sort expects 1 to 3 arguments"),
+    };
+
+    let list_array = as_list_array(&args[0])?;
+    let row_count = list_array.len();
+
+    let mut array_lengths = vec![];
+    let mut arrays = vec![];
+    let mut valid = BooleanBufferBuilder::new(row_count);
+    for i in 0..row_count {
+        if list_array.is_null(i) {
+            array_lengths.push(0);
+            valid.append(false);
+        } else {
+            let arr_ref = list_array.value(i);
+            let arr_ref = arr_ref.as_ref();
+
+            let sorted_array = compute::sort(arr_ref, sort_option)?;
+            array_lengths.push(sorted_array.len());
+            arrays.push(sorted_array);
+            valid.append(true);
+        }
+    }
+
+    // Assume all arrays have the same data type
+    let data_type = list_array.value_type();
+    let buffer = valid.finish();
+
+    let elements = arrays
+        .iter()
+        .map(|a| a.as_ref())
+        .collect::<Vec<&dyn Array>>();
+
+    let list_arr = ListArray::new(
+        Arc::new(Field::new("item", data_type, true)),
+        OffsetBuffer::from_lengths(array_lengths),
+        Arc::new(compute::concat(elements.as_slice())?),
+        Some(NullBuffer::new(buffer)),
+    );
+    Ok(Arc::new(list_arr))
+}
+
+fn order_desc(modifier: &str) -> Result<bool> {
+    match modifier.to_uppercase().as_str() {
+        "DESC" => Ok(true),
+        "ASC" => Ok(false),
+        _ => internal_err!("the second parameter of array_sort expects DESC or 
ASC"),
+    }
+}
+
+fn order_nulls_first(modifier: &str) -> Result<bool> {
+    match modifier.to_uppercase().as_str() {
+        "NULLS FIRST" => Ok(true),
+        "NULLS LAST" => Ok(false),
+        _ => internal_err!(
+            "the third parameter of array_sort expects NULLS FIRST or NULLS 
LAST"
+        ),
+    }
+}
+
 /// Array_prepend SQL function
 pub fn array_prepend(args: &[ArrayRef]) -> Result<ArrayRef> {
     let list_array = as_list_array(&args[1])?;
diff --git a/datafusion/physical-expr/src/functions.rs 
b/datafusion/physical-expr/src/functions.rs
index b5d7a8e97d..873864a57a 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -329,6 +329,9 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::ArrayAppend => {
             Arc::new(|args| 
make_scalar_function(array_expressions::array_append)(args))
         }
+        BuiltinScalarFunction::ArraySort => {
+            Arc::new(|args| 
make_scalar_function(array_expressions::array_sort)(args))
+        }
         BuiltinScalarFunction::ArrayConcat => {
             Arc::new(|args| 
make_scalar_function(array_expressions::array_concat)(args))
         }
diff --git a/datafusion/proto/proto/datafusion.proto 
b/datafusion/proto/proto/datafusion.proto
index 64b8e28074..863e3c315c 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -644,6 +644,7 @@ enum ScalarFunction {
   Levenshtein = 125;
   SubstrIndex = 126;
   FindInSet = 127;
+  ArraySort = 128;
 }
 
 message ScalarFunctionNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs 
b/datafusion/proto/src/generated/pbjson.rs
index 34ad63d819..74798ee8e9 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -20905,6 +20905,7 @@ impl serde::Serialize for ScalarFunction {
             Self::Levenshtein => "Levenshtein",
             Self::SubstrIndex => "SubstrIndex",
             Self::FindInSet => "FindInSet",
+            Self::ArraySort => "ArraySort",
         };
         serializer.serialize_str(variant)
     }
@@ -21044,6 +21045,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Levenshtein",
             "SubstrIndex",
             "FindInSet",
+            "ArraySort",
         ];
 
         struct GeneratedVisitor;
@@ -21212,6 +21214,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Levenshtein" => Ok(ScalarFunction::Levenshtein),
                     "SubstrIndex" => Ok(ScalarFunction::SubstrIndex),
                     "FindInSet" => Ok(ScalarFunction::FindInSet),
+                    "ArraySort" => Ok(ScalarFunction::ArraySort),
                     _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
                 }
             }
diff --git a/datafusion/proto/src/generated/prost.rs 
b/datafusion/proto/src/generated/prost.rs
index 8b4dd1b759..ae20913e3d 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2601,6 +2601,7 @@ pub enum ScalarFunction {
     Levenshtein = 125,
     SubstrIndex = 126,
     FindInSet = 127,
+    ArraySort = 128,
 }
 impl ScalarFunction {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -2737,6 +2738,7 @@ impl ScalarFunction {
             ScalarFunction::Levenshtein => "Levenshtein",
             ScalarFunction::SubstrIndex => "SubstrIndex",
             ScalarFunction::FindInSet => "FindInSet",
+            ScalarFunction::ArraySort => "ArraySort",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -2870,6 +2872,7 @@ impl ScalarFunction {
             "Levenshtein" => Some(Self::Levenshtein),
             "SubstrIndex" => Some(Self::SubstrIndex),
             "FindInSet" => Some(Self::FindInSet),
+            "ArraySort" => Some(Self::ArraySort),
             _ => None,
         }
     }
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs 
b/datafusion/proto/src/logical_plan/from_proto.rs
index ae3628bdde..13576aaa08 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -44,10 +44,11 @@ use datafusion_expr::{
     array_except, array_has, array_has_all, array_has_any, array_intersect, 
array_length,
     array_ndims, array_position, array_positions, array_prepend, array_remove,
     array_remove_all, array_remove_n, array_repeat, array_replace, 
array_replace_all,
-    array_replace_n, array_slice, array_to_string, arrow_typeof, ascii, asin, 
asinh,
-    atan, atan2, atanh, bit_length, btrim, cardinality, cbrt, ceil, 
character_length,
-    chr, coalesce, concat_expr, concat_ws_expr, cos, cosh, cot, current_date,
-    current_time, date_bin, date_part, date_trunc, decode, degrees, digest, 
encode, exp,
+    array_replace_n, array_slice, array_sort, array_to_string, arrow_typeof, 
ascii, asin,
+    asinh, atan, atan2, atanh, bit_length, btrim, cardinality, cbrt, ceil,
+    character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, cosh, 
cot,
+    current_date, current_time, date_bin, date_part, date_trunc, decode, 
degrees, digest,
+    encode, exp,
     expr::{self, InList, Sort, WindowFunction},
     factorial, find_in_set, flatten, floor, from_unixtime, gcd, gen_range, 
isnan, iszero,
     lcm, left, levenshtein, ln, log, log10, log2,
@@ -463,6 +464,7 @@ impl From<&protobuf::ScalarFunction> for 
BuiltinScalarFunction {
             ScalarFunction::Rtrim => Self::Rtrim,
             ScalarFunction::ToTimestamp => Self::ToTimestamp,
             ScalarFunction::ArrayAppend => Self::ArrayAppend,
+            ScalarFunction::ArraySort => Self::ArraySort,
             ScalarFunction::ArrayConcat => Self::ArrayConcat,
             ScalarFunction::ArrayEmpty => Self::ArrayEmpty,
             ScalarFunction::ArrayExcept => Self::ArrayExcept,
@@ -1343,6 +1345,11 @@ pub fn parse_expr(
                     parse_expr(&args[0], registry)?,
                     parse_expr(&args[1], registry)?,
                 )),
+                ScalarFunction::ArraySort => Ok(array_sort(
+                    parse_expr(&args[0], registry)?,
+                    parse_expr(&args[1], registry)?,
+                    parse_expr(&args[2], registry)?,
+                )),
                 ScalarFunction::ArrayPopFront => {
                     Ok(array_pop_front(parse_expr(&args[0], registry)?))
                 }
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs 
b/datafusion/proto/src/logical_plan/to_proto.rs
index ecbfaca5db..0af8d9f3e7 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1502,6 +1502,7 @@ impl TryFrom<&BuiltinScalarFunction> for 
protobuf::ScalarFunction {
             BuiltinScalarFunction::Rtrim => Self::Rtrim,
             BuiltinScalarFunction::ToTimestamp => Self::ToTimestamp,
             BuiltinScalarFunction::ArrayAppend => Self::ArrayAppend,
+            BuiltinScalarFunction::ArraySort => Self::ArraySort,
             BuiltinScalarFunction::ArrayConcat => Self::ArrayConcat,
             BuiltinScalarFunction::ArrayEmpty => Self::ArrayEmpty,
             BuiltinScalarFunction::ArrayExcept => Self::ArrayExcept,
diff --git a/datafusion/sqllogictest/test_files/array.slt 
b/datafusion/sqllogictest/test_files/array.slt
index d8bf441d71..3c23dd369a 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -1052,6 +1052,44 @@ select make_array(['a','b'], null);
 ----
 [[a, b], ]
 
+## array_sort (aliases: `list_sort`)
+query ???
+select array_sort(make_array(1, 3, null, 5, NULL, -5)), 
array_sort(make_array(1, 3, null, 2), 'ASC'), array_sort(make_array(1, 3, null, 
2), 'desc', 'NULLS FIRST');
+----
+[, , -5, 1, 3, 5] [, 1, 2, 3] [, 3, 2, 1]
+
+query ?
+select array_sort(column1, 'DESC', 'NULLS LAST') from arrays_values;
+----
+[10, 9, 8, 7, 6, 5, 4, 3, 2, ]
+[20, 18, 17, 16, 15, 14, 13, 12, 11, ]
+[30, 29, 28, 27, 26, 25, 23, 22, 21, ]
+[40, 39, 38, 37, 35, 34, 33, 32, 31, ]
+NULL
+[50, 49, 48, 47, 46, 45, 44, 43, 42, 41]
+[60, 59, 58, 57, 56, 55, 54, 52, 51, ]
+[70, 69, 68, 67, 66, 65, 64, 63, 62, 61]
+
+query ?
+select array_sort(column1, 'ASC', 'NULLS FIRST') from arrays_values;
+----
+[, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+[, 11, 12, 13, 14, 15, 16, 17, 18, 20]
+[, 21, 22, 23, 25, 26, 27, 28, 29, 30]
+[, 31, 32, 33, 34, 35, 37, 38, 39, 40]
+NULL
+[41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
+[, 51, 52, 54, 55, 56, 57, 58, 59, 60]
+[61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
+
+
+## list_sort (aliases: `array_sort`)
+query ???
+select list_sort(make_array(1, 3, null, 5, NULL, -5)), list_sort(make_array(1, 
3, null, 2), 'ASC'), list_sort(make_array(1, 3, null, 2), 'desc', 'NULLS 
FIRST');
+----
+[, , -5, 1, 3, 5] [, 1, 2, 3] [, 3, 2, 1]
+
+
 ## array_append (aliases: `list_append`, `array_push_back`, `list_push_back`)
 
 # TODO: array_append with NULLs
@@ -1224,7 +1262,7 @@ select array_prepend(make_array(1, 11, 111), column1), 
array_prepend(column2, ma
 
 # array_repeat scalar function #1
 query ????????
-select 
+select
   array_repeat(1, 5),
   array_repeat(3.14, 3),
   array_repeat('l', 4),
@@ -1257,7 +1295,7 @@ AS VALUES
   (0, 3, 3.3, 'datafusion', make_array(8, 9));
 
 query ??????
-select 
+select
   array_repeat(column2, column1),
   array_repeat(column3, column1),
   array_repeat(column4, column1),
@@ -1272,7 +1310,7 @@ from array_repeat_table;
 [] [] [] [] [3, 3, 3] []
 
 statement ok
-drop table array_repeat_table; 
+drop table array_repeat_table;
 
 ## array_concat (aliases: `array_cat`, `list_concat`, `list_cat`)
 
@@ -2188,7 +2226,7 @@ select array_remove(make_array(1, 2, 2, 1, 1), 2), 
array_remove(make_array(1.0,
 [1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o]
 
 query ???
-select 
+select
   array_remove(make_array(1, null, 2, 3), 2),
   array_remove(make_array(1.1, null, 2.2, 3.3), 1.1),
   array_remove(make_array('a', null, 'bc'), 'a');
@@ -2887,7 +2925,7 @@ from array_intersect_table_3D;
 query ??????
 SELECT  array_intersect(make_array(1,2,3), make_array(2,3,4)),
         array_intersect(make_array(1,3,5), make_array(2,4,6)),
-        array_intersect(make_array('aa','bb','cc'), 
make_array('cc','aa','dd')), 
+        array_intersect(make_array('aa','bb','cc'), 
make_array('cc','aa','dd')),
         array_intersect(make_array(true, false), make_array(true)),
         array_intersect(make_array(1.1, 2.2, 3.3), make_array(2.2, 3.3, 4.4)),
         array_intersect(make_array([1, 1], [2, 2], [3, 3]), make_array([2, 2], 
[3, 3], [4, 4]))
@@ -2918,7 +2956,7 @@ NULL
 query ??????
 SELECT  list_intersect(make_array(1,2,3), make_array(2,3,4)),
         list_intersect(make_array(1,3,5), make_array(2,4,6)),
-        list_intersect(make_array('aa','bb','cc'), 
make_array('cc','aa','dd')), 
+        list_intersect(make_array('aa','bb','cc'), make_array('cc','aa','dd')),
         list_intersect(make_array(true, false), make_array(true)),
         list_intersect(make_array(1.1, 2.2, 3.3), make_array(2.2, 3.3, 4.4)),
         list_intersect(make_array([1, 1], [2, 2], [3, 3]), make_array([2, 2], 
[3, 3], [4, 4]))
diff --git a/docs/source/user-guide/sql/scalar_functions.md 
b/docs/source/user-guide/sql/scalar_functions.md
index 46920f1c4d..9a9bec9df7 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1555,6 +1555,7 @@ from_unixtime(expression)
 ## Array Functions
 
 - [array_append](#array_append)
+- [array_sort](#array_sort)
 - [array_cat](#array_cat)
 - [array_concat](#array_concat)
 - [array_contains](#array_contains)
@@ -1584,6 +1585,7 @@ from_unixtime(expression)
 - [cardinality](#cardinality)
 - [empty](#empty)
 - [list_append](#list_append)
+- [list_sort](#list_sort)
 - [list_cat](#list_cat)
 - [list_concat](#list_concat)
 - [list_dims](#list_dims)
@@ -1645,6 +1647,36 @@ array_append(array, element)
 - list_append
 - list_push_back
 
+### `array_sort`
+
+Sort array.
+
+```
+array_sort(array, desc, nulls_first)
+```
+
+#### Arguments
+
+- **array**: Array expression.
+  Can be a constant, column, or function, and any combination of array 
operators.
+- **desc**: Whether to sort in descending order(`ASC` or `DESC`).
+- **nulls_first**: Whether to sort nulls first(`NULLS FIRST` or `NULLS LAST`).
+
+#### Example
+
+```
+❯ select array_sort([3, 1, 2]);
++-----------------------------+
+| array_sort(List([3,1,2]))   |
++-----------------------------+
+| [1, 2, 3]                   |
++-----------------------------+
+```
+
+#### Aliases
+
+- list_sort
+
 ### `array_cat`
 
 _Alias of [array_concat](#array_concat)._
@@ -2433,6 +2465,10 @@ empty(array)
 
 _Alias of [array_append](#array_append)._
 
+### `list_sort`
+
+_Alias of [array_sort](#array_sort)._
+
 ### `list_cat`
 
 _Alias of [array_concat](#array_concat)._

Reply via email to