(datafusion-comet) branch main updated: chore: [1941-Part1]: Introduce `map_sort` scalar function (#2262)

comphead Tue, 09 Sep 2025 10:38:23 -0700

This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git



The following commit(s) were added to refs/heads/main by this push:
     new f2baf95e9 chore: [1941-Part1]: Introduce `map_sort` scalar function 
(#2262)
f2baf95e9 is described below

commit f2baf95e9a1653991e21846d7e89e77ee656ba4b
Author: Rishab Joshi <8187657+rish...@users.noreply.github.com>
AuthorDate: Tue Sep 9 10:38:07 2025 -0700

    chore: [1941-Part1]: Introduce `map_sort` scalar function (#2262)
---
 native/spark-expr/src/comet_scalar_funcs.rs        |   5 +
 native/spark-expr/src/lib.rs                       |   1 +
 native/spark-expr/src/map_funcs/map_sort.rs        | 805 +++++++++++++++++++++
 native/spark-expr/src/map_funcs/mod.rs             |  19 +
 .../src/math_funcs/checked_arithmetic.rs           |   6 +-
 5 files changed, 832 insertions(+), 4 deletions(-)

diff --git a/native/spark-expr/src/comet_scalar_funcs.rs 
b/native/spark-expr/src/comet_scalar_funcs.rs
index 75f5689ad..3ec685d3c 100644
--- a/native/spark-expr/src/comet_scalar_funcs.rs
+++ b/native/spark-expr/src/comet_scalar_funcs.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use crate::hash_funcs::*;
+use crate::map_funcs::spark_map_sort;
 use crate::math_funcs::checked_arithmetic::{checked_add, checked_div, 
checked_mul, checked_sub};
 use crate::math_funcs::modulo_expr::spark_modulo;
 use crate::{
@@ -157,6 +158,10 @@ pub fn create_comet_physical_fun(
             let fail_on_error = fail_on_error.unwrap_or(false);
             make_comet_scalar_udf!("spark_modulo", func, without data_type, 
fail_on_error)
         }
+        "map_sort" => {
+            let func = Arc::new(spark_map_sort);
+            make_comet_scalar_udf!("spark_map_sort", func, without data_type)
+        }
         _ => registry.udf(fun_name).map_err(|e| {
             DataFusionError::Execution(format!(
                 "Function {fun_name} not found in the registry: {e}",
diff --git a/native/spark-expr/src/lib.rs b/native/spark-expr/src/lib.rs
index 4b29b6177..27672ded5 100644
--- a/native/spark-expr/src/lib.rs
+++ b/native/spark-expr/src/lib.rs
@@ -55,6 +55,7 @@ pub use bloom_filter::{BloomFilterAgg, 
BloomFilterMightContain};
 
 mod conditional_funcs;
 mod conversion_funcs;
+mod map_funcs;
 mod math_funcs;
 mod nondetermenistic_funcs;
 
diff --git a/native/spark-expr/src/map_funcs/map_sort.rs 
b/native/spark-expr/src/map_funcs/map_sort.rs
new file mode 100644
index 000000000..6d336d821
--- /dev/null
+++ b/native/spark-expr/src/map_funcs/map_sort.rs
@@ -0,0 +1,805 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{Array, ArrayRef, MapArray, StructArray};
+use arrow::compute::{concat, sort_to_indices, take, SortOptions};
+use arrow::datatypes::DataType;
+use datafusion::common::{exec_err, DataFusionError};
+use datafusion::physical_plan::ColumnarValue;
+use std::sync::Arc;
+
+/// Spark compatible `MapSort` implementation.
+/// Sorts each entries of a MapArray by keys in ascending order without 
changing the ordering of the
+/// maps in the array.
+///
+/// For eg. If the input is a MapArray with entries:
+/// ```text
+/// [
+///     {"c": 3, "a": 1, "b": 2}
+///     {"x": 1, "z": 3, "y": 2}
+///     {"a": 1, "b": 2, "c": 3}
+///     {["b", "a", "c"]: 2, ["a", "b", "c"]: 1, ["c", "b", "a"]: 3}
+/// ]
+/// ```
+/// The output will be:
+/// ```text
+/// [
+///     {"a": 1, "b": 2, "c": 3}
+///     {"x": 1, "y": 2, "z": 3}
+///     {"a": 1, "b": 2, "c": 3}
+///     {["a", "b", "c"]: 1, ["b", "a", "c"]: 2, ["c", "b", "a"]: 3}
+/// ]
+/// ```
+pub fn spark_map_sort(args: &[ColumnarValue]) -> Result<ColumnarValue, 
DataFusionError> {
+    if args.len() != 1 {
+        return exec_err!("spark_map_sort expects exactly one argument");
+    }
+
+    let arr_arg: ArrayRef = match &args[0] {
+        ColumnarValue::Array(array) => Arc::<dyn Array>::clone(array),
+        ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(1)?,
+    };
+
+    let (maps_arg, map_field, is_sorted) = match arr_arg.data_type() {
+        DataType::Map(map_field, is_sorted) => {
+            let maps_arg = 
arr_arg.as_any().downcast_ref::<MapArray>().unwrap();
+            (maps_arg, map_field, is_sorted)
+        }
+        _ => return exec_err!("spark_map_sort expects Map type as argument"),
+    };
+
+    let maps_arg_entries = maps_arg.entries();
+    let maps_arg_offsets = maps_arg.offsets();
+
+    let mut sorted_map_entries_vec: Vec<ArrayRef> = 
Vec::with_capacity(maps_arg.len());
+
+    // Iterate over each map in the MapArray and build a vector of sorted map 
entries.
+    for idx in 0..maps_arg.len() {
+        // Retrieve the start and end of the current map entries from the 
offset buffer.
+        let map_start = maps_arg_offsets[idx] as usize;
+        let map_end = maps_arg_offsets[idx + 1] as usize;
+        let map_len = map_end - map_start;
+
+        // Get the current map entries.
+        let map_entries = maps_arg_entries.slice(map_start, map_len);
+
+        if map_len == 0 {
+            sorted_map_entries_vec.push(Arc::new(map_entries));
+            continue;
+        }
+
+        // Sort the entry-indices of the map by their keys in ascending order.
+        let map_keys = map_entries.column(0);
+        let sort_options = SortOptions {
+            descending: false,
+            nulls_first: true,
+        };
+        let sorted_indices = sort_to_indices(&map_keys, Some(sort_options), 
None)?;
+
+        // Get the sorted map entries using the sorted indices and add it to 
the sorted map vector.
+        let sorted_map_entries = take(&map_entries, &sorted_indices, None)?;
+        sorted_map_entries_vec.push(sorted_map_entries);
+    }
+
+    // Flatten the sorted map entries into a single StructArray.
+    let sorted_map_entries_arr: Vec<&dyn Array> = sorted_map_entries_vec
+        .iter()
+        .map(|arr| arr.as_ref())
+        .collect();
+    let combined_sorted_map_entries = concat(&sorted_map_entries_arr)?;
+    let sorted_map_struct = combined_sorted_map_entries
+        .as_any()
+        .downcast_ref::<StructArray>()
+        .unwrap();
+
+    // Create a new MapArray with the sorted entries while preserving the 
original metadata.
+    // Note that then even though the map is sorted, the is_sorted flag is 
been set from the
+    // original MapArray which may be false. Although, this might be less 
efficient, it is has been
+    // done to keep the schema consistent.
+    let sorted_map_arr = Arc::new(MapArray::try_new(
+        Arc::<arrow::datatypes::Field>::clone(map_field),
+        maps_arg.offsets().clone(),
+        sorted_map_struct.clone(),
+        maps_arg.nulls().cloned(),
+        *is_sorted,
+    )?);
+
+    Ok(ColumnarValue::Array(sorted_map_arr))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::array::builder::{Int32Builder, MapBuilder, StringBuilder};
+    use arrow::array::{Int32Array, ListArray, ListBuilder, MapFieldNames, 
StringArray};
+    use datafusion::common::ScalarValue;
+    use std::sync::Arc;
+
+    macro_rules! build_map {
+        (
+            $key_builder:expr,
+            $value_builder:expr,
+            $keys:expr,
+            $values:expr,
+            $validity:expr,
+            $entries_builder_fn:ident
+        ) => {{
+            let mut map_builder = MapBuilder::new(
+                Some(MapFieldNames {
+                    entry: "entries".into(),
+                    key: "key".into(),
+                    value: "value".into(),
+                }),
+                $key_builder,
+                $value_builder,
+            );
+
+            assert_eq!($keys.len(), $values.len());
+            assert_eq!($keys.len(), $validity.len());
+
+            let total_maps = $keys.len();
+            for map_idx in 0..total_maps {
+                let map_keys = &$keys[map_idx];
+                let map_values = &$values[map_idx];
+                assert_eq!(map_keys.len(), map_values.len());
+
+                let map_entries = map_keys.len();
+                for entry_idx in 0..map_entries {
+                    let map_key = &map_keys[entry_idx];
+                    let map_value = &map_values[entry_idx];
+                    $entries_builder_fn!(map_builder, map_key, map_value);
+                }
+
+                let is_valid = $validity[map_idx];
+                map_builder.append(is_valid).unwrap();
+            }
+
+            map_builder.finish()
+        }};
+    }
+
+    macro_rules! default_map_entries_builder {
+        ($map_builder:expr, $key:expr, $value:expr) => {{
+            $map_builder.keys().append_value($key.clone());
+            $map_builder.values().append_value($value.clone().unwrap());
+        }};
+    }
+
+    macro_rules! nested_map_entries_builder {
+        ($map_builder:expr, $key:expr, $value:expr) => {{
+            $map_builder.keys().append_value($key.clone());
+
+            let inner_map_builder = $map_builder.values();
+
+            let (inner_keys, inner_values, inner_valid) = $value;
+            assert_eq!(inner_keys.len(), inner_values.len());
+
+            let inner_entries = inner_keys.len();
+            for inner_idx in 0..inner_entries {
+                let inner_key_val = &inner_keys[inner_idx];
+                let inner_value = &inner_values[inner_idx];
+                default_map_entries_builder!(inner_map_builder, inner_key_val, 
inner_value);
+            }
+
+            inner_map_builder.append(*inner_valid).unwrap();
+        }};
+    }
+
+    macro_rules! verify_result {
+        (
+            $key_type:ty,
+            $value_type:ty,
+            $result:expr,
+            $expected_map_arr:expr,
+            $verify_entries_fn:ident
+        ) => {{
+            match $result {
+                ColumnarValue::Array(actual_arr) => {
+                    let actual_map_arr = 
actual_arr.as_any().downcast_ref::<MapArray>().unwrap();
+
+                    assert_eq!(
+                        actual_map_arr.len(),
+                        $expected_map_arr.len(),
+                        "Unexpected length of the result MapArray"
+                    );
+                    assert_eq!(
+                        actual_map_arr.offsets(),
+                        $expected_map_arr.offsets(),
+                        "Unexpected offsets of the result MapArray"
+                    );
+                    assert_eq!(
+                        actual_map_arr.nulls(),
+                        $expected_map_arr.nulls(),
+                        "Unexpected nulls of the result MapArray"
+                    );
+                    assert_eq!(
+                        actual_map_arr.data_type(),
+                        $expected_map_arr.data_type(),
+                        "Unexpected data type of the result MapArray"
+                    );
+
+                    match (actual_map_arr.data_type(), 
$expected_map_arr.data_type()) {
+                        (
+                            DataType::Map(actual_field_ref, actual_is_sorted),
+                            DataType::Map(expected_field_ref, 
expected_is_sorted),
+                        ) => {
+                            assert_eq!(
+                                actual_field_ref, expected_field_ref,
+                                "Unexpected field of the result MapArray"
+                            );
+                            assert_eq!(
+                                actual_is_sorted, expected_is_sorted,
+                                "Unexpected is_sorted flag of the result 
MapArray"
+                            );
+                        }
+                        _ => panic!("Actual result data type is not Map"),
+                    }
+
+                    let actual_entries = actual_map_arr.entries();
+                    let actual_keys = actual_entries
+                        .column(0)
+                        .as_any()
+                        .downcast_ref::<$key_type>()
+                        .unwrap();
+                    let actual_values = actual_entries
+                        .column(1)
+                        .as_any()
+                        .downcast_ref::<$value_type>()
+                        .unwrap();
+
+                    let expected_entries = $expected_map_arr.entries();
+                    let expected_keys = expected_entries
+                        .column(0)
+                        .as_any()
+                        .downcast_ref::<$key_type>()
+                        .unwrap();
+                    let expected_values = expected_entries
+                        .column(1)
+                        .as_any()
+                        .downcast_ref::<$value_type>()
+                        .unwrap();
+
+                    assert_eq!(
+                        actual_keys.len(),
+                        expected_keys.len(),
+                        "Unexpected length of keys"
+                    );
+                    assert_eq!(
+                        actual_values.len(),
+                        expected_values.len(),
+                        "Unexpected length of values"
+                    );
+
+                    $verify_entries_fn!(
+                        expected_entries.len(),
+                        actual_keys,
+                        expected_keys,
+                        actual_values,
+                        expected_values
+                    );
+                }
+                unexpected_arr => {
+                    panic!("Actual result: {unexpected_arr:?} is not an Array 
ColumnarValue")
+                }
+            }
+        }};
+    }
+
+    macro_rules! default_entries_verifier {
+        (
+            $entries_len:expr,
+            $actual_keys:expr,
+            $expected_keys:expr,
+            $actual_values:expr,
+            $expected_values:expr
+        ) => {{
+            for idx in 0..$entries_len {
+                assert_eq!(
+                    $actual_keys.value(idx),
+                    $expected_keys.value(idx),
+                    "Unexpected key at index {idx}"
+                );
+                assert_eq!(
+                    $actual_values.value(idx),
+                    $expected_values.value(idx),
+                    "Unexpected value at index {idx}"
+                );
+            }
+        }};
+    }
+
+    macro_rules! list_entries_verifier {
+        (
+            $entries_len:expr,
+            $actual_keys:expr,
+            $expected_keys:expr,
+            $actual_values:expr,
+            $expected_values:expr
+        ) => {{
+            for idx in 0..$entries_len {
+                let actual_list = $actual_keys.value(idx);
+                let expected_list = $expected_keys.value(idx);
+
+                assert!(
+                    actual_list.eq(&expected_list),
+                    "Unexpected key at index {}: actual={:?}, expected={:?}",
+                    idx,
+                    actual_list,
+                    expected_list
+                );
+
+                assert_eq!(
+                    $actual_values.value(idx),
+                    $expected_values.value(idx),
+                    "Unexpected value at index {idx}"
+                );
+            }
+        }};
+    }
+
+    #[test]
+    fn test_map_sort_with_string_keys() {
+        // Input is `MapArray` with 4 maps. Each map has 3 entries with string 
keys and int values.
+        let keys_arg: [Vec<String>; 4] = [
+            vec!["c".into(), "a".into(), "b".into()],
+            vec!["z".into(), "y".into(), "x".into()],
+            vec!["a".into(), "b".into(), "c".into()],
+            vec!["fusion".into(), "comet".into(), "data".into()],
+        ];
+        let values_arg = [
+            vec![Some(3), Some(1), Some(2)],
+            vec![Some(30), Some(20), Some(10)],
+            vec![Some(1), Some(2), Some(3)],
+            vec![Some(300), Some(100), Some(200)],
+        ];
+        let validity = [true, true, true, true];
+
+        let map_arr_arg = build_map!(
+            StringBuilder::new(),
+            Int32Builder::new(),
+            keys_arg,
+            values_arg,
+            validity,
+            default_map_entries_builder
+        );
+        let args = vec![ColumnarValue::Array(Arc::new(map_arr_arg))];
+        let result = spark_map_sort(&args).unwrap();
+
+        let expected_keys: [Vec<String>; 4] = [
+            vec!["a".into(), "b".into(), "c".into()],
+            vec!["x".into(), "y".into(), "z".into()],
+            vec!["a".into(), "b".into(), "c".into()],
+            vec!["comet".into(), "data".into(), "fusion".into()],
+        ];
+        let expected_values = [
+            vec![Some(1), Some(2), Some(3)],
+            vec![Some(10), Some(20), Some(30)],
+            vec![Some(1), Some(2), Some(3)],
+            vec![Some(100), Some(200), Some(300)],
+        ];
+        let expected_validity = [true, true, true, true];
+
+        let expected_map_arr = build_map!(
+            StringBuilder::new(),
+            Int32Builder::new(),
+            expected_keys,
+            expected_values,
+            expected_validity,
+            default_map_entries_builder
+        );
+        verify_result!(
+            StringArray,
+            Int32Array,
+            result,
+            expected_map_arr,
+            default_entries_verifier
+        );
+    }
+
+    #[test]
+    fn test_map_sort_with_int_keys() {
+        // Input is `MapArray` with 4 maps. Each map has 3 entries with int 
keys and string values.
+        let keys_arg = [
+            vec![3, 2, 1],
+            vec![100, 50, 20],
+            vec![20, 50, 100],
+            vec![-5, 0, -1],
+        ];
+        let values_arg: [Vec<Option<String>>; 4] = [
+            vec![Some("three".into()), Some("two".into()), Some("one".into())],
+            vec![
+                Some("hundred".into()),
+                Some("fifty".into()),
+                Some("twenty".into()),
+            ],
+            vec![
+                Some("twenty".into()),
+                Some("fifty".into()),
+                Some("hundred".into()),
+            ],
+            vec![
+                Some("minus five".into()),
+                Some("zero".into()),
+                Some("minus one".into()),
+            ],
+        ];
+        let validity = [true, true, true, true];
+
+        let map_arr_arg = build_map!(
+            Int32Builder::new(),
+            StringBuilder::new(),
+            keys_arg,
+            values_arg,
+            validity,
+            default_map_entries_builder
+        );
+        let args = vec![ColumnarValue::Array(Arc::new(map_arr_arg))];
+        let result = spark_map_sort(&args).unwrap();
+
+        let expected_keys = [
+            vec![1, 2, 3],
+            vec![20, 50, 100],
+            vec![20, 50, 100],
+            vec![-5, -1, 0],
+        ];
+        let expected_values: [Vec<Option<String>>; 4] = [
+            vec![Some("one".into()), Some("two".into()), Some("three".into())],
+            vec![
+                Some("twenty".into()),
+                Some("fifty".into()),
+                Some("hundred".into()),
+            ],
+            vec![
+                Some("twenty".into()),
+                Some("fifty".into()),
+                Some("hundred".into()),
+            ],
+            vec![
+                Some("minus five".into()),
+                Some("minus one".into()),
+                Some("zero".into()),
+            ],
+        ];
+        let expected_validity = [true, true, true, true];
+
+        let expected_map_arr = build_map!(
+            Int32Builder::new(),
+            StringBuilder::new(),
+            expected_keys,
+            expected_values,
+            expected_validity,
+            default_map_entries_builder
+        );
+        verify_result!(
+            Int32Array,
+            StringArray,
+            result,
+            expected_map_arr,
+            default_entries_verifier
+        );
+    }
+
+    #[test]
+    fn test_map_sort_with_nested_maps() {
+        // Input is `MapArray` with one maps. The map has 2 entries with 
string keys and map values.
+        // The inner maps have 2 entries each with string keys and string 
values.
+        let outer_keys: [String; 2] = ["outer_k2".into(), "outer_k1".into()];
+        let inner_keys: [[String; 2]; 2] = [
+            ["outer_k2->inner_k1".into(), "outer_k2->inner_k2".into()],
+            ["outer_k1->inner_k1".into(), "outer_k1->inner_k2".into()],
+        ];
+        let inner_values: [[Option<String>; 2]; 2] = [
+            [
+                Some("outer_k2->inner_k1->inner_v1".into()),
+                Some("outer_k2->inner_k2->inner_v2".into()),
+            ],
+            [
+                Some("outer_k1->inner_k1->inner_v1".into()),
+                Some("outer_k1->inner_k2->inner_v2".into()),
+            ],
+        ];
+        let outer_values = [
+            (&inner_keys[0], &inner_values[0], true),
+            (&inner_keys[1], &inner_values[1], true),
+        ];
+
+        let keys_arg = [outer_keys];
+        let values_arg = [outer_values];
+        let validity = [true];
+
+        let map_arr_arg = build_map!(
+            StringBuilder::new(),
+            MapBuilder::new(
+                Some(MapFieldNames {
+                    entry: "entries".into(),
+                    key: "key".into(),
+                    value: "value".into(),
+                }),
+                StringBuilder::new(),
+                StringBuilder::new(),
+            ),
+            keys_arg,
+            values_arg,
+            validity,
+            nested_map_entries_builder
+        );
+
+        // For nested maps, only the outer map is sorted by keys, the inner 
maps remain unchanged.
+        let args = vec![ColumnarValue::Array(Arc::new(map_arr_arg))];
+        let result = spark_map_sort(&args).unwrap();
+
+        let expected_outer_keys: [String; 2] = ["outer_k1".into(), 
"outer_k2".into()];
+        let expected_inner_keys: [[String; 2]; 2] = [
+            ["outer_k1->inner_k1".into(), "outer_k1->inner_k2".into()],
+            ["outer_k2->inner_k1".into(), "outer_k2->inner_k2".into()],
+        ];
+        let expected_inner_values: [[Option<String>; 2]; 2] = [
+            [
+                Some("outer_k1->inner_k1->inner_v1".into()),
+                Some("outer_k1->inner_k2->inner_v2".into()),
+            ],
+            [
+                Some("outer_k2->inner_k1->inner_v1".into()),
+                Some("outer_k2->inner_k2->inner_v2".into()),
+            ],
+        ];
+        let expected_outer_values = [
+            (&expected_inner_keys[0], &expected_inner_values[0], true),
+            (&expected_inner_keys[1], &expected_inner_values[1], true),
+        ];
+
+        let expected_keys_arg = [expected_outer_keys];
+        let expected_values_arg = [expected_outer_values];
+        let expected_validity = [true];
+
+        let expected_map_arr = build_map!(
+            StringBuilder::new(),
+            MapBuilder::new(
+                Some(MapFieldNames {
+                    entry: "entries".into(),
+                    key: "key".into(),
+                    value: "value".into(),
+                }),
+                StringBuilder::new(),
+                StringBuilder::new(),
+            ),
+            expected_keys_arg,
+            expected_values_arg,
+            expected_validity,
+            nested_map_entries_builder
+        );
+
+        verify_result!(
+            StringArray,
+            MapArray,
+            result,
+            expected_map_arr,
+            default_entries_verifier
+        );
+    }
+
+    #[test]
+    fn test_map_sort_with_list_int_keys() {
+        // Input is `MapArray` with one maps. The map has 3 entries with 
integer list keys and
+        // string values.
+        let keys_arg = [vec![
+            vec![Some(3), Some(2)],
+            vec![Some(1), Some(2)],
+            vec![Some(2), Some(1)],
+        ]];
+        let values_arg: [Vec<Option<String>>; 1] = [vec![
+            Some("three_two".into()),
+            Some("one_two".into()),
+            Some("two_one".into()),
+        ]];
+        let validity = [true];
+
+        let map_arr_arg = build_map!(
+            ListBuilder::new(Int32Builder::new()),
+            StringBuilder::new(),
+            keys_arg,
+            values_arg,
+            validity,
+            default_map_entries_builder
+        );
+
+        let args = vec![ColumnarValue::Array(Arc::new(map_arr_arg))];
+        let result = spark_map_sort(&args).unwrap();
+
+        let expected_keys = [vec![
+            vec![Some(1), Some(2)],
+            vec![Some(2), Some(1)],
+            vec![Some(3), Some(2)],
+        ]];
+        let expected_values: [Vec<Option<String>>; 1] = [vec![
+            Some("one_two".into()),
+            Some("two_one".into()),
+            Some("three_two".into()),
+        ]];
+        let expected_validity = [true];
+
+        let expected_map_arr = build_map!(
+            ListBuilder::new(Int32Builder::new()),
+            StringBuilder::new(),
+            expected_keys,
+            expected_values,
+            expected_validity,
+            default_map_entries_builder
+        );
+
+        verify_result!(
+            ListArray,
+            StringArray,
+            result,
+            expected_map_arr,
+            list_entries_verifier
+        );
+    }
+
+    #[test]
+    fn test_map_sort_with_list_string_keys() {
+        // Input is `MapArray` with one maps. The map has 3 entries with 
string list keys and
+        // int values.
+        let keys_arg: [Vec<Vec<Option<String>>>; 1] = [vec![
+            vec![Some("c".into()), Some("b".into())],
+            vec![Some("a".into()), Some("b".into())],
+            vec![Some("b".into()), Some("a".into())],
+        ]];
+        let values_arg: [Vec<Option<i32>>; 1] = [vec![Some(32), Some(12), 
Some(21)]];
+        let validity = [true];
+
+        let map_arr_arg = build_map!(
+            ListBuilder::new(StringBuilder::new()),
+            Int32Builder::new(),
+            keys_arg,
+            values_arg,
+            validity,
+            default_map_entries_builder
+        );
+
+        let args = vec![ColumnarValue::Array(Arc::new(map_arr_arg))];
+        let result = spark_map_sort(&args).unwrap();
+
+        let expected_keys: [Vec<Vec<Option<String>>>; 1] = [vec![
+            vec![Some("a".into()), Some("b".into())],
+            vec![Some("b".into()), Some("a".into())],
+            vec![Some("c".into()), Some("b".into())],
+        ]];
+        let expected_values: [Vec<Option<i32>>; 1] = [vec![Some(12), Some(21), 
Some(32)]];
+        let expected_validity = [true];
+
+        let expected_map_arr = build_map!(
+            ListBuilder::new(StringBuilder::new()),
+            Int32Builder::new(),
+            expected_keys,
+            expected_values,
+            expected_validity,
+            default_map_entries_builder
+        );
+
+        verify_result!(
+            ListArray,
+            Int32Array,
+            result,
+            expected_map_arr,
+            list_entries_verifier
+        );
+    }
+
+    #[test]
+    fn test_map_sort_with_scalar_argument() {
+        let map_array = build_map!(
+            StringBuilder::new(),
+            Int32Builder::new(),
+            vec![vec!["b".to_string(), "a".to_string()]],
+            vec![vec![Some(2), Some(1)]],
+            vec![true],
+            default_map_entries_builder
+        );
+
+        let args = vec![ColumnarValue::Scalar(
+            ScalarValue::try_from_array(&map_array, 0).unwrap(),
+        )];
+        let result = spark_map_sort(&args).unwrap();
+
+        let expected_map_arr = build_map!(
+            StringBuilder::new(),
+            Int32Builder::new(),
+            vec![vec!["a".to_string(), "b".to_string()]],
+            vec![vec![Some(1), Some(2)]],
+            vec![true],
+            default_map_entries_builder
+        );
+        verify_result!(
+            StringArray,
+            Int32Array,
+            result,
+            expected_map_arr,
+            default_entries_verifier
+        );
+    }
+
+    #[test]
+    fn test_map_sort_with_empty_map() {
+        let map_arr_arg = build_map!(
+            StringBuilder::new(),
+            Int32Builder::new(),
+            vec![Vec::<String>::new()],
+            vec![Vec::<Option<i32>>::new()],
+            vec![false],
+            default_map_entries_builder
+        );
+
+        let args = vec![ColumnarValue::Array(Arc::new(map_arr_arg))];
+        let result = spark_map_sort(&args).unwrap();
+        let expected_map_arr = build_map!(
+            StringBuilder::new(),
+            Int32Builder::new(),
+            vec![Vec::<String>::new()],
+            vec![Vec::<Option<i32>>::new()],
+            vec![false],
+            default_map_entries_builder
+        );
+        verify_result!(
+            StringArray,
+            Int32Array,
+            result,
+            expected_map_arr,
+            default_entries_verifier
+        );
+    }
+
+    #[test]
+    fn test_map_sort_with_invalid_arguments() {
+        let result = spark_map_sort(&[]);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("spark_map_sort expects exactly one argument"));
+
+        let map_array = build_map!(
+            StringBuilder::new(),
+            Int32Builder::new(),
+            vec![vec!["a".to_string()]],
+            vec![vec![Some(1)]],
+            vec![true],
+            default_map_entries_builder
+        );
+
+        let args = vec![
+            ColumnarValue::Array(Arc::new(map_array.clone())),
+            ColumnarValue::Array(Arc::new(map_array)),
+        ];
+        let result = spark_map_sort(&args);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("spark_map_sort expects exactly one argument"));
+
+        let int_array = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef;
+        let args = vec![ColumnarValue::Array(int_array)];
+
+        let result = spark_map_sort(&args);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("spark_map_sort expects Map type as argument"));
+    }
+}
diff --git a/native/spark-expr/src/map_funcs/mod.rs 
b/native/spark-expr/src/map_funcs/mod.rs
new file mode 100644
index 000000000..7288b847a
--- /dev/null
+++ b/native/spark-expr/src/map_funcs/mod.rs
@@ -0,0 +1,19 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+mod map_sort;
+pub use map_sort::spark_map_sort;
diff --git a/native/spark-expr/src/math_funcs/checked_arithmetic.rs 
b/native/spark-expr/src/math_funcs/checked_arithmetic.rs
index 0312cdb0b..7ef128de3 100644
--- a/native/spark-expr/src/math_funcs/checked_arithmetic.rs
+++ b/native/spark-expr/src/math_funcs/checked_arithmetic.rs
@@ -72,8 +72,7 @@ where
         }
         _ => {
             return Err(DataFusionError::Internal(format!(
-                "Unsupported operation: {:?}",
-                op
+                "Unsupported operation: {op:?}"
             )))
         }
     }
@@ -141,8 +140,7 @@ fn checked_arithmetic_internal(
             op,
         ),
         _ => Err(DataFusionError::Internal(format!(
-            "Unsupported data type: {:?}",
-            data_type
+            "Unsupported data type: {data_type:?}"
         ))),
     };
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

(datafusion-comet) branch main updated: chore: [1941-Part1]: Introduce `map_sort` scalar function (#2262)

Reply via email to