This is an automated email from the ASF dual-hosted git repository.
richox pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/auron.git
The following commit(s) were added to refs/heads/master by this push:
new 64c3e0e4 [AURON #1712] Fix Casting from Map to Utf8 not supported
(#1744)
64c3e0e4 is described below
commit 64c3e0e4d185b5c0707b180b3695f54ae8a5c83f
Author: cxzl25 <[email protected]>
AuthorDate: Wed Dec 17 19:46:55 2025 +0800
[AURON #1712] Fix Casting from Map to Utf8 not supported (#1744)
# Which issue does this PR close?
Closes #1712
# Rationale for this change
```
Arrow error: Cast error: Casting from Map to Utf8 not supported
```
# What changes are included in this PR?
# Are there any user-facing changes?
# How was this patch tested?
Add UT
---
.../datafusion-ext-commons/src/arrow/cast.rs | 218 +++++++++++++++++++++
.../scala/org.apache.auron/AuronQuerySuite.scala | 57 ++++++
2 files changed, 275 insertions(+)
diff --git a/native-engine/datafusion-ext-commons/src/arrow/cast.rs
b/native-engine/datafusion-ext-commons/src/arrow/cast.rs
index 2aeb1d7a..8b352abd 100644
--- a/native-engine/datafusion-ext-commons/src/arrow/cast.rs
+++ b/native-engine/datafusion-ext-commons/src/arrow/cast.rs
@@ -219,6 +219,52 @@ pub fn cast_impl(
(&DataType::Utf8, DataType::Decimal128(..)) => {
arrow::compute::kernels::cast::cast(&to_plain_string_array(array),
cast_type)?
}
+ // map to string (spark compatible)
+ (&DataType::Map(..), &DataType::Utf8) => {
+ let map_array = as_map_array(array);
+ let entries = map_array.entries();
+ let keys = entries.column(0);
+ let values = entries.column(1);
+
+ let casted_keys = cast_impl(keys, &DataType::Utf8,
match_struct_fields)?;
+ let casted_values = cast_impl(values, &DataType::Utf8,
match_struct_fields)?;
+
+ let string_keys = as_string_array(&casted_keys);
+ let string_values = as_string_array(&casted_values);
+
+ let mut builder = StringBuilder::new();
+
+ for row_idx in 0..map_array.len() {
+ if map_array.is_null(row_idx) {
+ builder.append_null();
+ } else {
+ let mut row_str = String::from("{");
+ let start = map_array.value_offsets()[row_idx] as usize;
+ let end = map_array.value_offsets()[row_idx + 1] as usize;
+
+ for i in start..end {
+ if i > start {
+ row_str.push_str(", ");
+ }
+
+ row_str.push_str(string_keys.value(i));
+ row_str.push_str(" ->");
+
+ if values.is_null(i) {
+ row_str.push_str(" null");
+ } else {
+ row_str.push(' ');
+ row_str.push_str(string_values.value(i));
+ }
+ }
+
+ row_str.push('}');
+ builder.append_value(&row_str);
+ }
+ }
+
+ Arc::new(builder.finish())
+ }
// struct to string (spark compatible)
(&DataType::Struct(_), &DataType::Utf8) => {
let struct_array = as_struct_array(array);
@@ -795,4 +841,176 @@ mod test {
&StringArray::from_iter(vec![Some("{100, {x, true}}"), Some("{200,
{y, null}}"),])
);
}
+
+ #[test]
+ fn test_map_to_string() {
+ // Create a map array: Map<Int32, String>
+ let key_field = Arc::new(Field::new("key", DataType::Int32, false));
+ let value_field = Arc::new(Field::new("value", DataType::Utf8, true));
+ let entries_field = Arc::new(Field::new(
+ "entries",
+ DataType::Struct(Fields::from(vec![
+ key_field.as_ref().clone(),
+ value_field.as_ref().clone(),
+ ])),
+ false,
+ ));
+
+ let keys = Int32Array::from(vec![1, 2, 3, 4, 5]);
+ let values = StringArray::from(vec![Some("a"), Some("b"), None,
Some("d"), Some("e")]);
+
+ let entries = StructArray::from(vec![
+ (key_field.clone(), Arc::new(keys) as ArrayRef),
+ (value_field.clone(), Arc::new(values) as ArrayRef),
+ ]);
+
+ let offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2, 3,
5].into());
+ let map_array: ArrayRef = Arc::new(MapArray::new(
+ entries_field.clone(),
+ offsets,
+ entries,
+ None,
+ false,
+ ));
+
+ let casted = cast(&map_array, &DataType::Utf8).unwrap();
+ assert_eq!(
+ as_string_array(&casted),
+ &StringArray::from_iter(vec![
+ Some("{1 -> a, 2 -> b}"),
+ Some("{3 -> null}"),
+ Some("{4 -> d, 5 -> e}"),
+ ])
+ );
+ }
+
+ #[test]
+ fn test_map_to_string_with_null_map() {
+ // Create a map array with null rows
+ let key_field = Arc::new(Field::new("key", DataType::Int32, false));
+ let value_field = Arc::new(Field::new("value", DataType::Utf8, true));
+ let entries_field = Arc::new(Field::new(
+ "entries",
+ DataType::Struct(Fields::from(vec![
+ key_field.as_ref().clone(),
+ value_field.as_ref().clone(),
+ ])),
+ false,
+ ));
+
+ let keys = Int32Array::from(vec![1, 2, 3]);
+ let values = StringArray::from(vec![Some("a"), Some("b"), Some("c")]);
+
+ let entries = StructArray::from(vec![
+ (key_field.clone(), Arc::new(keys) as ArrayRef),
+ (value_field.clone(), Arc::new(values) as ArrayRef),
+ ]);
+
+ let offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 1, 2,
3].into());
+ let nulls = arrow::buffer::NullBuffer::from(vec![true, false, true]);
+ let map_array: ArrayRef = Arc::new(MapArray::new(
+ entries_field.clone(),
+ offsets,
+ entries,
+ Some(nulls),
+ false,
+ ));
+
+ let casted = cast(&map_array, &DataType::Utf8).unwrap();
+ assert_eq!(
+ as_string_array(&casted),
+ &StringArray::from_iter(vec![Some("{1 -> a}"), None, Some("{3 ->
c}"),])
+ );
+ }
+
+ #[test]
+ fn test_empty_map_to_string() {
+ // Create an empty map array
+ let key_field = Arc::new(Field::new("key", DataType::Int32, false));
+ let value_field = Arc::new(Field::new("value", DataType::Utf8, true));
+ let entries_field = Arc::new(Field::new(
+ "entries",
+ DataType::Struct(Fields::from(vec![
+ key_field.as_ref().clone(),
+ value_field.as_ref().clone(),
+ ])),
+ false,
+ ));
+
+ let keys = Int32Array::from(vec![] as Vec<i32>);
+ let values = StringArray::from(vec![] as Vec<Option<&str>>);
+
+ let entries = StructArray::from(vec![
+ (key_field.clone(), Arc::new(keys) as ArrayRef),
+ (value_field.clone(), Arc::new(values) as ArrayRef),
+ ]);
+
+ // Two rows, both empty maps
+ let offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 0,
0].into());
+ let map_array: ArrayRef = Arc::new(MapArray::new(
+ entries_field.clone(),
+ offsets,
+ entries,
+ None,
+ false,
+ ));
+
+ let casted = cast(&map_array, &DataType::Utf8).unwrap();
+ assert_eq!(
+ as_string_array(&casted),
+ &StringArray::from_iter(vec![Some("{}"), Some("{}")])
+ );
+ }
+
+ #[test]
+ fn test_nested_map_to_string() {
+ // Create a map with struct values: Map<Int32, Struct<String, Boolean>>
+ let key_field = Arc::new(Field::new("key", DataType::Int32, false));
+
+ let inner_string_field = Arc::new(Field::new("s1", DataType::Utf8,
true));
+ let inner_bool_field = Arc::new(Field::new("s2", DataType::Boolean,
true));
+ let inner_struct_type = DataType::Struct(Fields::from(vec![
+ inner_string_field.as_ref().clone(),
+ inner_bool_field.as_ref().clone(),
+ ]));
+
+ let value_field = Arc::new(Field::new("value",
inner_struct_type.clone(), true));
+ let entries_field = Arc::new(Field::new(
+ "entries",
+ DataType::Struct(Fields::from(vec![
+ key_field.as_ref().clone(),
+ value_field.as_ref().clone(),
+ ])),
+ false,
+ ));
+
+ let keys = Int32Array::from(vec![1, 2]);
+ let inner_strings = StringArray::from(vec![Some("x"), Some("y")]);
+ let inner_bools = BooleanArray::from(vec![Some(true), None]);
+ let inner_struct: ArrayRef = Arc::new(StructArray::from(vec![
+ (inner_string_field, Arc::new(inner_strings) as ArrayRef),
+ (inner_bool_field, Arc::new(inner_bools) as ArrayRef),
+ ]));
+
+ let entries = StructArray::from(vec![
+ (key_field.clone(), Arc::new(keys) as ArrayRef),
+ (value_field.clone(), inner_struct),
+ ]);
+
+ // One row with 2 entries
+ let offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2].into());
+ let map_array: ArrayRef = Arc::new(MapArray::new(
+ entries_field.clone(),
+ offsets,
+ entries,
+ None,
+ false,
+ ));
+
+ let casted = cast(&map_array, &DataType::Utf8).unwrap();
+ assert_eq!(
+ as_string_array(&casted),
+ &StringArray::from_iter(vec![Some("{1 -> {x, true}, 2 -> {y,
null}}")])
+ );
+ }
}
diff --git
a/spark-extension-shims-spark/src/test/scala/org.apache.auron/AuronQuerySuite.scala
b/spark-extension-shims-spark/src/test/scala/org.apache.auron/AuronQuerySuite.scala
index 5d0c420e..3a2cc9cf 100644
---
a/spark-extension-shims-spark/src/test/scala/org.apache.auron/AuronQuerySuite.scala
+++
b/spark-extension-shims-spark/src/test/scala/org.apache.auron/AuronQuerySuite.scala
@@ -524,4 +524,61 @@ class AuronQuerySuite extends AuronQueryTest with
BaseAuronSQLSuite with AuronSQ
}
}
}
+
+ test("cast map to string") {
+ if (AuronTestUtils.isSparkV31OrGreater) {
+ withTable("t_map") {
+ sql("""
+ |create table t_map using parquet as
+ |select map('a', 1, 'b', 2) as m
+ |union all select map('x', 10, 'y', 20, 'z', 30)
+ |union all select map('key', null)
+ |""".stripMargin)
+
+ checkSparkAnswerAndOperator("select cast(m as string) from t_map")
+ }
+ }
+ }
+
+ test("cast nested map to string") {
+ if (AuronTestUtils.isSparkV31OrGreater) {
+ withTable("t_nested_map") {
+ sql("""
+ |create table t_nested_map using parquet as
+ |select map('outer1', map('inner1', 1, 'inner2', 2)) as m
+ |union all select map('outer2', map('inner3', 3))
+ |""".stripMargin)
+
+ checkSparkAnswerAndOperator("select cast(m as string) from
t_nested_map")
+ }
+ }
+ }
+
+ test("cast map with struct value to string") {
+ if (AuronTestUtils.isSparkV31OrGreater) {
+ withTable("t_map_struct") {
+ sql("""
+ |create table t_map_struct using parquet as
+ |select map('k1', named_struct('x', 'a', 'y', 10)) as m
+ |union all select map('k2', named_struct('x', 'b', 'y', 20))
+ |""".stripMargin)
+
+ checkSparkAnswerAndOperator("select cast(m as string) from
t_map_struct")
+ }
+ }
+ }
+
+ test("cast empty map to string") {
+ if (AuronTestUtils.isSparkV31OrGreater) {
+ withTable("t_empty_map") {
+ sql("""
+ |create table t_empty_map using parquet as
+ |select map() as m
+ |union all select map('a', 1)
+ |""".stripMargin)
+
+ checkSparkAnswerAndOperator("select cast(m as string) from
t_empty_map")
+ }
+ }
+ }
}