This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new c33cb85333 Do not rename struct fields when coercing types in `CASE`
(#14384)
c33cb85333 is described below
commit c33cb853330c7d38955e1ea7c72ac60768a6824f
Author: Andrew Lamb <[email protected]>
AuthorDate: Sat Feb 1 07:00:50 2025 -0500
Do not rename struct fields when coercing types in `CASE` (#14384)
* Fix field name during struct equality coercion
* fix bug
* Add more tests
* Update tests per #14396
---
datafusion/expr-common/src/type_coercion/binary.rs | 22 +++--
datafusion/sqllogictest/test_files/case.slt | 110 +++++++++++++++++++++
2 files changed, 125 insertions(+), 7 deletions(-)
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs
b/datafusion/expr-common/src/type_coercion/binary.rs
index 83f47883ad..571c171194 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -961,23 +961,31 @@ fn struct_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<DataType>
return None;
}
- let types = std::iter::zip(lhs_fields.iter(), rhs_fields.iter())
+ let coerced_types = std::iter::zip(lhs_fields.iter(),
rhs_fields.iter())
.map(|(lhs, rhs)| comparison_coercion(lhs.data_type(),
rhs.data_type()))
.collect::<Option<Vec<DataType>>>()?;
- let fields = types
+ // preserve the field name and nullability
+ let orig_fields = std::iter::zip(lhs_fields.iter(),
rhs_fields.iter());
+
+ let fields: Vec<FieldRef> = coerced_types
.into_iter()
- .enumerate()
- .map(|(i, datatype)| {
- Arc::new(Field::new(format!("c{i}"), datatype, true))
- })
- .collect::<Vec<FieldRef>>();
+ .zip(orig_fields)
+ .map(|(datatype, (lhs, rhs))| coerce_fields(datatype, lhs,
rhs))
+ .collect();
Some(Struct(fields.into()))
}
_ => None,
}
}
+/// returns the result of coercing two fields to a common type
+fn coerce_fields(common_type: DataType, lhs: &FieldRef, rhs: &FieldRef) ->
FieldRef {
+ let is_nullable = lhs.is_nullable() || rhs.is_nullable();
+ let name = lhs.name(); // pick the name from the left field
+ Arc::new(Field::new(name, common_type, is_nullable))
+}
+
/// Returns the output type of applying mathematics operations such as
/// `+` to arguments of `lhs_type` and `rhs_type`.
fn mathematics_numerical_coercion(
diff --git a/datafusion/sqllogictest/test_files/case.slt
b/datafusion/sqllogictest/test_files/case.slt
index a339c2aa03..46e9c86c75 100644
--- a/datafusion/sqllogictest/test_files/case.slt
+++ b/datafusion/sqllogictest/test_files/case.slt
@@ -308,3 +308,113 @@ NULL NULL false
statement ok
drop table foo
+
+
+# Test coercion of inner struct field names
+# Reproducer for https://github.com/apache/datafusion/issues/14383
+statement ok
+create table t as values
+(
+ 100, -- column1 int (so the case isn't
constant folded)
+ { 'foo': 'bar' }, -- column2 has List of Struct w/
Utf8
+ { 'foo': arrow_cast('baz', 'Utf8View') }, -- column3 has List of Struct w/
Utf8View
+ { 'foo': arrow_cast('blarg', 'Utf8View') } -- column4 has List of Struct w/
Utf8View
+);
+
+
+# Note field name is foo
+query ???
+SELECT column2, column3, column4 FROM t;
+----
+{foo: bar} {foo: baz} {foo: blarg}
+
+# Coerce fields, expect the field name to be the name of the first arg to case
+# the field should not be named 'c0'
+query ?
+SELECT
+ case
+ when column1 > 0 then column2
+ when column1 < 0 then column3
+ else column4
+ end
+FROM t;
+----
+{foo: bar}
+
+query ?
+SELECT
+ case
+ when column1 > 0 then column3 -- different arg order shouldn't affect name
+ when column1 < 0 then column4
+ else column2
+ end
+FROM t;
+----
+{foo: baz}
+
+query ?
+SELECT
+ case
+ when column1 > 0 then column4 -- different arg order shouldn't affect name
+ when column1 < 0 then column2
+ else column3
+ end
+FROM t;
+----
+{foo: blarg}
+
+statement ok
+drop table t
+
+
+# Test coercion of inner struct field names with different orders / missing
fields
+statement ok
+create table t as values
+(
+ 100, -- column1 int (so the case isn't constant folded)
+ { 'foo': 'a', 'xxx': 'b' }, -- column2: Struct with fields foo, xxx
+ { 'xxx': 'c', 'foo': 'd' }, -- column3: Struct with fields xxx, foo
+ { 'xxx': 'e' } -- column4: Struct with field xxx (no second
field)
+);
+
+# Note field names are in different orders
+query ???
+SELECT column2, column3, column4 FROM t;
+----
+{foo: a, xxx: b} {xxx: c, foo: d} {xxx: e}
+
+# coerce structs with different field orders,
+# (note the *value*s are from column2 but the field name is 'xxx', as the
coerced
+# type takes the field name from the last argument (column3)
+query ?
+SELECT
+ case
+ when column1 > 0 then column2 -- always true
+ else column3
+ end
+FROM t;
+----
+{xxx: a, foo: b}
+
+# coerce structs with different field orders
+query ?
+SELECT
+ case
+ when column1 < 0 then column2 -- always false
+ else column3
+ end
+FROM t;
+----
+{xxx: c, foo: d}
+
+# coerce structs with subset of fields
+query error Failed to coerce then
+SELECT
+ case
+ when column1 > 0 then column3
+ else column4
+ end
+FROM t;
+
+statement ok
+drop table t
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]