This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new c33cb85333 Do not rename struct fields when coercing types in `CASE` 
(#14384)
c33cb85333 is described below

commit c33cb853330c7d38955e1ea7c72ac60768a6824f
Author: Andrew Lamb <[email protected]>
AuthorDate: Sat Feb 1 07:00:50 2025 -0500

    Do not rename struct fields when coercing types in `CASE` (#14384)
    
    * Fix field name during struct equality coercion
    
    * fix bug
    
    * Add more tests
    
    * Update tests per #14396
---
 datafusion/expr-common/src/type_coercion/binary.rs |  22 +++--
 datafusion/sqllogictest/test_files/case.slt        | 110 +++++++++++++++++++++
 2 files changed, 125 insertions(+), 7 deletions(-)

diff --git a/datafusion/expr-common/src/type_coercion/binary.rs 
b/datafusion/expr-common/src/type_coercion/binary.rs
index 83f47883ad..571c171194 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -961,23 +961,31 @@ fn struct_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<DataType>
                 return None;
             }
 
-            let types = std::iter::zip(lhs_fields.iter(), rhs_fields.iter())
+            let coerced_types = std::iter::zip(lhs_fields.iter(), 
rhs_fields.iter())
                 .map(|(lhs, rhs)| comparison_coercion(lhs.data_type(), 
rhs.data_type()))
                 .collect::<Option<Vec<DataType>>>()?;
 
-            let fields = types
+            // preserve the field name and nullability
+            let orig_fields = std::iter::zip(lhs_fields.iter(), 
rhs_fields.iter());
+
+            let fields: Vec<FieldRef> = coerced_types
                 .into_iter()
-                .enumerate()
-                .map(|(i, datatype)| {
-                    Arc::new(Field::new(format!("c{i}"), datatype, true))
-                })
-                .collect::<Vec<FieldRef>>();
+                .zip(orig_fields)
+                .map(|(datatype, (lhs, rhs))| coerce_fields(datatype, lhs, 
rhs))
+                .collect();
             Some(Struct(fields.into()))
         }
         _ => None,
     }
 }
 
+/// returns the result of coercing two fields to a common type
+fn coerce_fields(common_type: DataType, lhs: &FieldRef, rhs: &FieldRef) -> 
FieldRef {
+    let is_nullable = lhs.is_nullable() || rhs.is_nullable();
+    let name = lhs.name(); // pick the name from the left field
+    Arc::new(Field::new(name, common_type, is_nullable))
+}
+
 /// Returns the output type of applying mathematics operations such as
 /// `+` to arguments of `lhs_type` and `rhs_type`.
 fn mathematics_numerical_coercion(
diff --git a/datafusion/sqllogictest/test_files/case.slt 
b/datafusion/sqllogictest/test_files/case.slt
index a339c2aa03..46e9c86c75 100644
--- a/datafusion/sqllogictest/test_files/case.slt
+++ b/datafusion/sqllogictest/test_files/case.slt
@@ -308,3 +308,113 @@ NULL NULL false
 
 statement ok
 drop table foo
+
+
+# Test coercion of inner struct field names
+# Reproducer for https://github.com/apache/datafusion/issues/14383
+statement ok
+create table t as values
+(
+ 100,                                       -- column1 int (so the case isn't 
constant folded)
+ { 'foo': 'bar' },                          -- column2 has List of Struct w/ 
Utf8
+ { 'foo': arrow_cast('baz', 'Utf8View') },  -- column3 has List of Struct w/ 
Utf8View
+ { 'foo': arrow_cast('blarg', 'Utf8View') } -- column4 has List of Struct w/ 
Utf8View
+);
+
+
+# Note field name is foo
+query ???
+SELECT column2, column3, column4  FROM t;
+----
+{foo: bar} {foo: baz} {foo: blarg}
+
+# Coerce fields, expect the field name to be the name of the first arg to case
+# the field should not be named 'c0'
+query ?
+SELECT
+  case
+    when column1 > 0 then column2
+    when column1 < 0 then column3
+    else column4
+  end
+FROM t;
+----
+{foo: bar}
+
+query ?
+SELECT
+  case
+    when column1 > 0 then column3 -- different arg order shouldn't affect name
+    when column1 < 0 then column4
+    else column2
+  end
+FROM t;
+----
+{foo: baz}
+
+query ?
+SELECT
+  case
+    when column1 > 0 then column4 -- different arg order shouldn't affect name
+    when column1 < 0 then column2
+    else column3
+  end
+FROM t;
+----
+{foo: blarg}
+
+statement ok
+drop table t
+
+
+# Test coercion of inner struct field names with different orders / missing 
fields
+statement ok
+create table t as values
+(
+ 100,                        -- column1 int (so the case isn't constant folded)
+ { 'foo': 'a', 'xxx': 'b' }, -- column2: Struct with fields foo, xxx
+ { 'xxx': 'c', 'foo': 'd' }, -- column3: Struct with fields xxx, foo
+ { 'xxx': 'e'             }  -- column4: Struct with field xxx (no second 
field)
+);
+
+# Note field names are in different orders
+query ???
+SELECT column2, column3, column4  FROM t;
+----
+{foo: a, xxx: b} {xxx: c, foo: d} {xxx: e}
+
+# coerce structs with different field orders,
+# (note the *value*s are from column2 but the field name is 'xxx', as the 
coerced
+# type takes the field name from the last argument (column3)
+query ?
+SELECT
+  case
+    when column1 > 0 then column2 -- always true
+    else column3
+  end
+FROM t;
+----
+{xxx: a, foo: b}
+
+# coerce structs with different field orders
+query ?
+SELECT
+  case
+    when column1 < 0 then column2 -- always false
+    else column3
+  end
+FROM t;
+----
+{xxx: c, foo: d}
+
+# coerce structs with subset of fields
+query error Failed to coerce then
+SELECT
+  case
+    when column1 > 0 then column3
+    else column4
+  end
+FROM t;
+
+statement ok
+drop table t


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to