erratic-pattern commented on code in PR #10268: URL: https://github.com/apache/datafusion/pull/10268#discussion_r1583508138
########## datafusion/expr/src/type_coercion/binary.rs: ########## @@ -289,15 +290,164 @@ fn bitwise_coercion(left_type: &DataType, right_type: &DataType) -> Option<DataT } } +#[derive(Debug, PartialEq, Eq, Hash, Clone)] +enum TypeCategory { + Array, + Boolean, + Numeric, + // String, well-defined type, but are considered as unknown type. + DateTime, + Composite, + Unknown, + NotSupported, +} + +fn data_type_category(data_type: &DataType) -> TypeCategory { + if data_type.is_numeric() { + return TypeCategory::Numeric; + } + + if matches!(data_type, DataType::Boolean) { + return TypeCategory::Boolean; + } + + if matches!( + data_type, + DataType::List(_) | DataType::FixedSizeList(_, _) | DataType::LargeList(_) + ) { + return TypeCategory::Array; + } + + // String literal is possible to cast to many other types like numeric or datetime, + // therefore, it is categorized as a unknown type + if matches!( + data_type, + DataType::Utf8 | DataType::LargeUtf8 | DataType::Null + ) { + return TypeCategory::Unknown; + } + + if matches!( + data_type, + DataType::Date32 + | DataType::Date64 + | DataType::Time32(_) + | DataType::Time64(_) + | DataType::Timestamp(_, _) + | DataType::Interval(_) + | DataType::Duration(_) + ) { + return TypeCategory::DateTime; + } + + if matches!( + data_type, + DataType::Dictionary(_, _) | DataType::Struct(_) | DataType::Union(_, _) + ) { + return TypeCategory::Composite; + } + + TypeCategory::NotSupported +} + +/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of constructs including +/// CASE, ARRAY, VALUES, and the GREATEST and LEAST functions. +/// See <https://www.postgresql.org/docs/current/typeconv-union-case.html> for more information. +/// The actual rules follows the behavior of Postgres and DuckDB +pub fn type_resolution(data_types: &[DataType]) -> Option<DataType> { Review Comment: The doc string is a bit confusing here because there are no lhs_type and rhs_type. I assume that case would be `type_resolution(&[lhs_type, rhs_type])`? Also, maybe this function name could reflect that it's finding a union type to satisfy a set of input types, for example `type_union`, `type_union_resolution, `resolve_type_union`. I think `type_resolution` is too generic. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org