This is an automated email from the ASF dual-hosted git repository.

richox pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/auron.git


The following commit(s) were added to refs/heads/master by this push:
     new 64c3e0e4 [AURON #1712] Fix Casting from Map to Utf8 not supported 
(#1744)
64c3e0e4 is described below

commit 64c3e0e4d185b5c0707b180b3695f54ae8a5c83f
Author: cxzl25 <[email protected]>
AuthorDate: Wed Dec 17 19:46:55 2025 +0800

    [AURON #1712] Fix Casting from Map to Utf8 not supported (#1744)
    
    # Which issue does this PR close?
    
    Closes #1712
    
    # Rationale for this change
    
    ```
    Arrow error: Cast error: Casting from Map to Utf8 not supported
    ```
    
    # What changes are included in this PR?
    
    # Are there any user-facing changes?
    
    # How was this patch tested?
    Add UT
---
 .../datafusion-ext-commons/src/arrow/cast.rs       | 218 +++++++++++++++++++++
 .../scala/org.apache.auron/AuronQuerySuite.scala   |  57 ++++++
 2 files changed, 275 insertions(+)

diff --git a/native-engine/datafusion-ext-commons/src/arrow/cast.rs 
b/native-engine/datafusion-ext-commons/src/arrow/cast.rs
index 2aeb1d7a..8b352abd 100644
--- a/native-engine/datafusion-ext-commons/src/arrow/cast.rs
+++ b/native-engine/datafusion-ext-commons/src/arrow/cast.rs
@@ -219,6 +219,52 @@ pub fn cast_impl(
         (&DataType::Utf8, DataType::Decimal128(..)) => {
             arrow::compute::kernels::cast::cast(&to_plain_string_array(array), 
cast_type)?
         }
+        // map to string (spark compatible)
+        (&DataType::Map(..), &DataType::Utf8) => {
+            let map_array = as_map_array(array);
+            let entries = map_array.entries();
+            let keys = entries.column(0);
+            let values = entries.column(1);
+
+            let casted_keys = cast_impl(keys, &DataType::Utf8, 
match_struct_fields)?;
+            let casted_values = cast_impl(values, &DataType::Utf8, 
match_struct_fields)?;
+
+            let string_keys = as_string_array(&casted_keys);
+            let string_values = as_string_array(&casted_values);
+
+            let mut builder = StringBuilder::new();
+
+            for row_idx in 0..map_array.len() {
+                if map_array.is_null(row_idx) {
+                    builder.append_null();
+                } else {
+                    let mut row_str = String::from("{");
+                    let start = map_array.value_offsets()[row_idx] as usize;
+                    let end = map_array.value_offsets()[row_idx + 1] as usize;
+
+                    for i in start..end {
+                        if i > start {
+                            row_str.push_str(", ");
+                        }
+
+                        row_str.push_str(string_keys.value(i));
+                        row_str.push_str(" ->");
+
+                        if values.is_null(i) {
+                            row_str.push_str(" null");
+                        } else {
+                            row_str.push(' ');
+                            row_str.push_str(string_values.value(i));
+                        }
+                    }
+
+                    row_str.push('}');
+                    builder.append_value(&row_str);
+                }
+            }
+
+            Arc::new(builder.finish())
+        }
         // struct to string (spark compatible)
         (&DataType::Struct(_), &DataType::Utf8) => {
             let struct_array = as_struct_array(array);
@@ -795,4 +841,176 @@ mod test {
             &StringArray::from_iter(vec![Some("{100, {x, true}}"), Some("{200, 
{y, null}}"),])
         );
     }
+
+    #[test]
+    fn test_map_to_string() {
+        // Create a map array: Map<Int32, String>
+        let key_field = Arc::new(Field::new("key", DataType::Int32, false));
+        let value_field = Arc::new(Field::new("value", DataType::Utf8, true));
+        let entries_field = Arc::new(Field::new(
+            "entries",
+            DataType::Struct(Fields::from(vec![
+                key_field.as_ref().clone(),
+                value_field.as_ref().clone(),
+            ])),
+            false,
+        ));
+
+        let keys = Int32Array::from(vec![1, 2, 3, 4, 5]);
+        let values = StringArray::from(vec![Some("a"), Some("b"), None, 
Some("d"), Some("e")]);
+
+        let entries = StructArray::from(vec![
+            (key_field.clone(), Arc::new(keys) as ArrayRef),
+            (value_field.clone(), Arc::new(values) as ArrayRef),
+        ]);
+
+        let offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2, 3, 
5].into());
+        let map_array: ArrayRef = Arc::new(MapArray::new(
+            entries_field.clone(),
+            offsets,
+            entries,
+            None,
+            false,
+        ));
+
+        let casted = cast(&map_array, &DataType::Utf8).unwrap();
+        assert_eq!(
+            as_string_array(&casted),
+            &StringArray::from_iter(vec![
+                Some("{1 -> a, 2 -> b}"),
+                Some("{3 -> null}"),
+                Some("{4 -> d, 5 -> e}"),
+            ])
+        );
+    }
+
+    #[test]
+    fn test_map_to_string_with_null_map() {
+        // Create a map array with null rows
+        let key_field = Arc::new(Field::new("key", DataType::Int32, false));
+        let value_field = Arc::new(Field::new("value", DataType::Utf8, true));
+        let entries_field = Arc::new(Field::new(
+            "entries",
+            DataType::Struct(Fields::from(vec![
+                key_field.as_ref().clone(),
+                value_field.as_ref().clone(),
+            ])),
+            false,
+        ));
+
+        let keys = Int32Array::from(vec![1, 2, 3]);
+        let values = StringArray::from(vec![Some("a"), Some("b"), Some("c")]);
+
+        let entries = StructArray::from(vec![
+            (key_field.clone(), Arc::new(keys) as ArrayRef),
+            (value_field.clone(), Arc::new(values) as ArrayRef),
+        ]);
+
+        let offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 1, 2, 
3].into());
+        let nulls = arrow::buffer::NullBuffer::from(vec![true, false, true]);
+        let map_array: ArrayRef = Arc::new(MapArray::new(
+            entries_field.clone(),
+            offsets,
+            entries,
+            Some(nulls),
+            false,
+        ));
+
+        let casted = cast(&map_array, &DataType::Utf8).unwrap();
+        assert_eq!(
+            as_string_array(&casted),
+            &StringArray::from_iter(vec![Some("{1 -> a}"), None, Some("{3 -> 
c}"),])
+        );
+    }
+
+    #[test]
+    fn test_empty_map_to_string() {
+        // Create an empty map array
+        let key_field = Arc::new(Field::new("key", DataType::Int32, false));
+        let value_field = Arc::new(Field::new("value", DataType::Utf8, true));
+        let entries_field = Arc::new(Field::new(
+            "entries",
+            DataType::Struct(Fields::from(vec![
+                key_field.as_ref().clone(),
+                value_field.as_ref().clone(),
+            ])),
+            false,
+        ));
+
+        let keys = Int32Array::from(vec![] as Vec<i32>);
+        let values = StringArray::from(vec![] as Vec<Option<&str>>);
+
+        let entries = StructArray::from(vec![
+            (key_field.clone(), Arc::new(keys) as ArrayRef),
+            (value_field.clone(), Arc::new(values) as ArrayRef),
+        ]);
+
+        // Two rows, both empty maps
+        let offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 0, 
0].into());
+        let map_array: ArrayRef = Arc::new(MapArray::new(
+            entries_field.clone(),
+            offsets,
+            entries,
+            None,
+            false,
+        ));
+
+        let casted = cast(&map_array, &DataType::Utf8).unwrap();
+        assert_eq!(
+            as_string_array(&casted),
+            &StringArray::from_iter(vec![Some("{}"), Some("{}")])
+        );
+    }
+
+    #[test]
+    fn test_nested_map_to_string() {
+        // Create a map with struct values: Map<Int32, Struct<String, Boolean>>
+        let key_field = Arc::new(Field::new("key", DataType::Int32, false));
+
+        let inner_string_field = Arc::new(Field::new("s1", DataType::Utf8, 
true));
+        let inner_bool_field = Arc::new(Field::new("s2", DataType::Boolean, 
true));
+        let inner_struct_type = DataType::Struct(Fields::from(vec![
+            inner_string_field.as_ref().clone(),
+            inner_bool_field.as_ref().clone(),
+        ]));
+
+        let value_field = Arc::new(Field::new("value", 
inner_struct_type.clone(), true));
+        let entries_field = Arc::new(Field::new(
+            "entries",
+            DataType::Struct(Fields::from(vec![
+                key_field.as_ref().clone(),
+                value_field.as_ref().clone(),
+            ])),
+            false,
+        ));
+
+        let keys = Int32Array::from(vec![1, 2]);
+        let inner_strings = StringArray::from(vec![Some("x"), Some("y")]);
+        let inner_bools = BooleanArray::from(vec![Some(true), None]);
+        let inner_struct: ArrayRef = Arc::new(StructArray::from(vec![
+            (inner_string_field, Arc::new(inner_strings) as ArrayRef),
+            (inner_bool_field, Arc::new(inner_bools) as ArrayRef),
+        ]));
+
+        let entries = StructArray::from(vec![
+            (key_field.clone(), Arc::new(keys) as ArrayRef),
+            (value_field.clone(), inner_struct),
+        ]);
+
+        // One row with 2 entries
+        let offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2].into());
+        let map_array: ArrayRef = Arc::new(MapArray::new(
+            entries_field.clone(),
+            offsets,
+            entries,
+            None,
+            false,
+        ));
+
+        let casted = cast(&map_array, &DataType::Utf8).unwrap();
+        assert_eq!(
+            as_string_array(&casted),
+            &StringArray::from_iter(vec![Some("{1 -> {x, true}, 2 -> {y, 
null}}")])
+        );
+    }
 }
diff --git 
a/spark-extension-shims-spark/src/test/scala/org.apache.auron/AuronQuerySuite.scala
 
b/spark-extension-shims-spark/src/test/scala/org.apache.auron/AuronQuerySuite.scala
index 5d0c420e..3a2cc9cf 100644
--- 
a/spark-extension-shims-spark/src/test/scala/org.apache.auron/AuronQuerySuite.scala
+++ 
b/spark-extension-shims-spark/src/test/scala/org.apache.auron/AuronQuerySuite.scala
@@ -524,4 +524,61 @@ class AuronQuerySuite extends AuronQueryTest with 
BaseAuronSQLSuite with AuronSQ
       }
     }
   }
+
+  test("cast map to string") {
+    if (AuronTestUtils.isSparkV31OrGreater) {
+      withTable("t_map") {
+        sql("""
+              |create table t_map using parquet as
+              |select map('a', 1, 'b', 2) as m
+              |union all select map('x', 10, 'y', 20, 'z', 30)
+              |union all select map('key', null)
+              |""".stripMargin)
+
+        checkSparkAnswerAndOperator("select cast(m as string) from t_map")
+      }
+    }
+  }
+
+  test("cast nested map to string") {
+    if (AuronTestUtils.isSparkV31OrGreater) {
+      withTable("t_nested_map") {
+        sql("""
+              |create table t_nested_map using parquet as
+              |select map('outer1', map('inner1', 1, 'inner2', 2)) as m
+              |union all select map('outer2', map('inner3', 3))
+              |""".stripMargin)
+
+        checkSparkAnswerAndOperator("select cast(m as string) from 
t_nested_map")
+      }
+    }
+  }
+
+  test("cast map with struct value to string") {
+    if (AuronTestUtils.isSparkV31OrGreater) {
+      withTable("t_map_struct") {
+        sql("""
+              |create table t_map_struct using parquet as
+              |select map('k1', named_struct('x', 'a', 'y', 10)) as m
+              |union all select map('k2', named_struct('x', 'b', 'y', 20))
+              |""".stripMargin)
+
+        checkSparkAnswerAndOperator("select cast(m as string) from 
t_map_struct")
+      }
+    }
+  }
+
+  test("cast empty map to string") {
+    if (AuronTestUtils.isSparkV31OrGreater) {
+      withTable("t_empty_map") {
+        sql("""
+              |create table t_empty_map using parquet as
+              |select map() as m
+              |union all select map('a', 1)
+              |""".stripMargin)
+
+        checkSparkAnswerAndOperator("select cast(m as string) from 
t_empty_map")
+      }
+    }
+  }
 }

Reply via email to