alamb commented on code in PR #12839:
URL: https://github.com/apache/datafusion/pull/12839#discussion_r1796062667
##########
datafusion/expr-common/src/type_coercion/binary.rs:
##########
@@ -476,6 +478,26 @@ fn type_union_resolution_coercion(
type_union_resolution_coercion(lhs.data_type(),
rhs.data_type());
new_item_type.map(|t| DataType::List(Arc::new(Field::new("item",
t, true))))
}
+ (DataType::Struct(lhs), DataType::Struct(rhs)) => {
+ if lhs.len() != rhs.len() {
+ return None;
+ }
+
+ let types = std::iter::zip(lhs.iter(), rhs.iter())
Review Comment:
I am not familiar with the semantics, but as I read this code it would *not*
coerce structs with fields named the same but in a different order.
For example
```
{
a: 1,
b: 2
}
```
Would not be coerceable / comparable to
```
{
b: 20 // note the fields are in different order
a: 10,
}
```
Is that intended? 🤔
##########
datafusion/expr/src/type_coercion/functions.rs:
##########
@@ -221,20 +221,37 @@ fn get_valid_types_with_scalar_udf(
current_types: &[DataType],
func: &ScalarUDF,
) -> Result<Vec<Vec<DataType>>> {
- let valid_types = match signature {
+ match signature {
TypeSignature::UserDefined => match func.coerce_types(current_types) {
- Ok(coerced_types) => vec![coerced_types],
- Err(e) => return exec_err!("User-defined coercion failed with
{:?}", e),
+ Ok(coerced_types) => Ok(vec![coerced_types]),
+ Err(e) => exec_err!("User-defined coercion failed with {:?}", e),
},
- TypeSignature::OneOf(signatures) => signatures
- .iter()
- .filter_map(|t| get_valid_types_with_scalar_udf(t, current_types,
func).ok())
- .flatten()
- .collect::<Vec<_>>(),
- _ => get_valid_types(signature, current_types)?,
- };
+ TypeSignature::OneOf(signatures) => {
+ let mut res = vec![];
+ let mut errors = vec![];
+ for sig in signatures {
+ match get_valid_types_with_scalar_udf(sig, current_types,
func) {
+ Ok(valid_types) => {
+ res.extend(valid_types);
+ }
+ Err(e) => {
+ errors.push(e.to_string());
+ }
+ }
+ }
- Ok(valid_types)
+ // Every signature failed, return the joined error
+ if res.is_empty() {
Review Comment:
👍 for better errors
##########
datafusion/functions-nested/src/make_array.rs:
##########
@@ -106,6 +108,32 @@ impl ScalarUDFImpl for MakeArray {
fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
if let Some(new_type) = type_union_resolution(arg_types) {
+ // TODO: Move the logic to type_union_resolution if this applies
to other functions as well
Review Comment:
Ah, this is the same question I had above -- this code seems to properly
account for differences in field order
##########
datafusion/sqllogictest/test_files/struct.slt:
##########
@@ -373,3 +373,30 @@ You reached the bottom!
statement ok
drop view complex_view;
+
+# struct with different keys r1 and r2 is not valid
Review Comment:
Could you also please add a test that shows what happens with coercion when
the fields are in a different order?
Like this
```sql
> create table t(a struct<r1 varchar, c int>) as values ({r1: 'foo', c:1}),
({c:2, r: 'bar'});
```
On main it seems to get an error:
> Error during planning: Inconsistent data type across values list at row 1
column 0. Was Struct([Field { name: "r1", data_type: Utf8, nullable: true,
dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c",
data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata:
{} }]) but found Struct([Field { name: "c", data_type: Int64, nullable: true,
dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "r",
data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata:
{} }])
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]