This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 08365001c0 Fix `Coalesce` casting logic to follows what Postgres and
DuckDB do. Introduce signature that do non-comparison coercion (#10268)
08365001c0 is described below
commit 08365001c0bf871d8a5780b19004591d2fce0afa
Author: Jay Zhan <[email protected]>
AuthorDate: Sun May 26 18:04:03 2024 +0800
Fix `Coalesce` casting logic to follows what Postgres and DuckDB do.
Introduce signature that do non-comparison coercion (#10268)
* remove casting for coalesce
Signed-off-by: jayzhan211 <[email protected]>
* add more test
Signed-off-by: jayzhan211 <[email protected]>
* add more test
Signed-off-by: jayzhan211 <[email protected]>
* crate only visibility
Signed-off-by: jayzhan211 <[email protected]>
* polish comment
Signed-off-by: jayzhan211 <[email protected]>
* improve test
Signed-off-by: jayzhan211 <[email protected]>
* backup
Signed-off-by: jayzhan211 <[email protected]>
* introduce new signautre for coalesce
Signed-off-by: jayzhan211 <[email protected]>
* cleanup
Signed-off-by: jayzhan211 <[email protected]>
* cleanup
Signed-off-by: jayzhan211 <[email protected]>
* ignore err msg
Signed-off-by: jayzhan211 <[email protected]>
* fmt
Signed-off-by: jayzhan211 <[email protected]>
* fix doc
Signed-off-by: jayzhan211 <[email protected]>
* cleanup
Signed-off-by: jayzhan211 <[email protected]>
* add more test
Signed-off-by: jayzhan211 <[email protected]>
* switch to type_resolution coercion
Signed-off-by: jayzhan211 <[email protected]>
* fix i64 and u64 case
Signed-off-by: jayzhan211 <[email protected]>
* add more tests
Signed-off-by: jayzhan211 <[email protected]>
* cleanup
Signed-off-by: jayzhan211 <[email protected]>
* add null case
Signed-off-by: jayzhan211 <[email protected]>
* fmt
Signed-off-by: jayzhan211 <[email protected]>
* fix
Signed-off-by: jayzhan211 <[email protected]>
* rename to type_union_resolution
Signed-off-by: jayzhan211 <[email protected]>
* add comment
Signed-off-by: jayzhan211 <[email protected]>
* cleanup
Signed-off-by: jayzhan211 <[email protected]>
* fix test
Signed-off-by: jayzhan211 <[email protected]>
* add comment
Signed-off-by: jayzhan211 <[email protected]>
* rm test
Signed-off-by: jayzhan211 <[email protected]>
* cleanup since rebase
Signed-off-by: jayzhan211 <[email protected]>
* add more test
Signed-off-by: jayzhan211 <[email protected]>
* add more test
Signed-off-by: jayzhan211 <[email protected]>
* fix msg
Signed-off-by: jayzhan211 <[email protected]>
* fmt
Signed-off-by: jayzhan211 <[email protected]>
* rm pure_string_coercion
Signed-off-by: jayzhan211 <[email protected]>
* rm duplicate
Signed-off-by: jayzhan211 <[email protected]>
* change type in select.slt
Signed-off-by: jayzhan211 <[email protected]>
* fix slt
Signed-off-by: jayzhan211 <[email protected]>
---------
Signed-off-by: jayzhan211 <[email protected]>
---
datafusion/expr/src/type_coercion/binary.rs | 261 +++++++++++++++++++--
datafusion/expr/src/type_coercion/functions.rs | 118 ++++++----
datafusion/functions/src/core/coalesce.rs | 24 +-
datafusion/optimizer/src/analyzer/type_coercion.rs | 2 +-
datafusion/sqllogictest/test_files/coalesce.slt | 64 ++++-
5 files changed, 378 insertions(+), 91 deletions(-)
diff --git a/datafusion/expr/src/type_coercion/binary.rs
b/datafusion/expr/src/type_coercion/binary.rs
index 10d8306d76..615bb3ac56 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -17,6 +17,7 @@
//! Coercion rules for matching argument types for binary operators
+use std::collections::HashSet;
use std::sync::Arc;
use crate::Operator;
@@ -289,13 +290,207 @@ fn bitwise_coercion(left_type: &DataType, right_type:
&DataType) -> Option<DataT
}
}
+#[derive(Debug, PartialEq, Eq, Hash, Clone)]
+enum TypeCategory {
+ Array,
+ Boolean,
+ Numeric,
+ // String, well-defined type, but are considered as unknown type.
+ DateTime,
+ Composite,
+ Unknown,
+ NotSupported,
+}
+
+impl From<&DataType> for TypeCategory {
+ fn from(data_type: &DataType) -> Self {
+ match data_type {
+ // Dict is a special type in arrow, we check the value type
+ DataType::Dictionary(_, v) => {
+ let v = v.as_ref();
+ TypeCategory::from(v)
+ }
+ _ => {
+ if data_type.is_numeric() {
+ return TypeCategory::Numeric;
+ }
+
+ if matches!(data_type, DataType::Boolean) {
+ return TypeCategory::Boolean;
+ }
+
+ if matches!(
+ data_type,
+ DataType::List(_)
+ | DataType::FixedSizeList(_, _)
+ | DataType::LargeList(_)
+ ) {
+ return TypeCategory::Array;
+ }
+
+ // String literal is possible to cast to many other types like
numeric or datetime,
+ // therefore, it is categorized as a unknown type
+ if matches!(
+ data_type,
+ DataType::Utf8 | DataType::LargeUtf8 | DataType::Null
+ ) {
+ return TypeCategory::Unknown;
+ }
+
+ if matches!(
+ data_type,
+ DataType::Date32
+ | DataType::Date64
+ | DataType::Time32(_)
+ | DataType::Time64(_)
+ | DataType::Timestamp(_, _)
+ | DataType::Interval(_)
+ | DataType::Duration(_)
+ ) {
+ return TypeCategory::DateTime;
+ }
+
+ if matches!(
+ data_type,
+ DataType::Map(_, _) | DataType::Struct(_) |
DataType::Union(_, _)
+ ) {
+ return TypeCategory::Composite;
+ }
+
+ TypeCategory::NotSupported
+ }
+ }
+ }
+}
+
+/// Coerce dissimilar data types to a single data type.
+/// UNION, INTERSECT, EXCEPT, CASE, ARRAY, VALUES, and the GREATEST and LEAST
functions are
+/// examples that has the similar resolution rules.
+/// See <https://www.postgresql.org/docs/current/typeconv-union-case.html> for
more information.
+/// The rules in the document provide a clue, but adhering strictly to them
doesn't precisely
+/// align with the behavior of Postgres. Therefore, we've made slight
adjustments to the rules
+/// to better match the behavior of both Postgres and DuckDB. For example, we
expect adjusted
+/// decimal percision and scale when coercing decimal types.
+pub fn type_union_resolution(data_types: &[DataType]) -> Option<DataType> {
+ if data_types.is_empty() {
+ return None;
+ }
+
+ // if all the data_types is the same return first one
+ if data_types.iter().all(|t| t == &data_types[0]) {
+ return Some(data_types[0].clone());
+ }
+
+ // if all the data_types are null, return string
+ if data_types.iter().all(|t| t == &DataType::Null) {
+ return Some(DataType::Utf8);
+ }
+
+ // Ignore Nulls, if any data_type category is not the same, return None
+ let data_types_category: Vec<TypeCategory> = data_types
+ .iter()
+ .filter(|&t| t != &DataType::Null)
+ .map(|t| t.into())
+ .collect();
+
+ if data_types_category
+ .iter()
+ .any(|t| t == &TypeCategory::NotSupported)
+ {
+ return None;
+ }
+
+ // check if there is only one category excluding Unknown
+ let categories: HashSet<TypeCategory> = HashSet::from_iter(
+ data_types_category
+ .iter()
+ .filter(|&c| c != &TypeCategory::Unknown)
+ .cloned(),
+ );
+ if categories.len() > 1 {
+ return None;
+ }
+
+ // Ignore Nulls
+ let mut candidate_type: Option<DataType> = None;
+ for data_type in data_types.iter() {
+ if data_type == &DataType::Null {
+ continue;
+ }
+ if let Some(ref candidate_t) = candidate_type {
+ // Find candidate type that all the data types can be coerced to
+ // Follows the behavior of Postgres and DuckDB
+ // Coerced type may be different from the candidate and current
data type
+ // For example,
+ // i64 and decimal(7, 2) are expect to get coerced type
decimal(22, 2)
+ // numeric string ('1') and numeric (2) are expect to get coerced
type numeric (1, 2)
+ if let Some(t) = type_union_resolution_coercion(data_type,
candidate_t) {
+ candidate_type = Some(t);
+ } else {
+ return None;
+ }
+ } else {
+ candidate_type = Some(data_type.clone());
+ }
+ }
+
+ candidate_type
+}
+
+/// Coerce `lhs_type` and `rhs_type` to a common type for
[type_union_resolution]
+/// See [type_union_resolution] for more information.
+fn type_union_resolution_coercion(
+ lhs_type: &DataType,
+ rhs_type: &DataType,
+) -> Option<DataType> {
+ if lhs_type == rhs_type {
+ return Some(lhs_type.clone());
+ }
+
+ match (lhs_type, rhs_type) {
+ (
+ DataType::Dictionary(lhs_index_type, lhs_value_type),
+ DataType::Dictionary(rhs_index_type, rhs_value_type),
+ ) => {
+ let new_index_type =
+ type_union_resolution_coercion(lhs_index_type, rhs_index_type);
+ let new_value_type =
+ type_union_resolution_coercion(lhs_value_type, rhs_value_type);
+ if let (Some(new_index_type), Some(new_value_type)) =
+ (new_index_type, new_value_type)
+ {
+ Some(DataType::Dictionary(
+ Box::new(new_index_type),
+ Box::new(new_value_type),
+ ))
+ } else {
+ None
+ }
+ }
+ (DataType::Dictionary(index_type, value_type), other_type)
+ | (other_type, DataType::Dictionary(index_type, value_type)) => {
+ let new_value_type = type_union_resolution_coercion(value_type,
other_type);
+ new_value_type.map(|t| DataType::Dictionary(index_type.clone(),
Box::new(t)))
+ }
+ _ => {
+ // numeric coercion is the same as comparison coercion, both find
the narrowest type
+ // that can accommodate both types
+ binary_numeric_coercion(lhs_type, rhs_type)
+ .or_else(|| string_coercion(lhs_type, rhs_type))
+ .or_else(|| numeric_string_coercion(lhs_type, rhs_type))
+ }
+ }
+}
+
/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a
comparison operation
+/// Unlike `coerced_from`, usually the coerced type is for comparison only.
+/// For example, compare with Dictionary and Dictionary, only value type is
what we care about
pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
if lhs_type == rhs_type {
// same type => equality is possible
return Some(lhs_type.clone());
}
- comparison_binary_numeric_coercion(lhs_type, rhs_type)
+ binary_numeric_coercion(lhs_type, rhs_type)
.or_else(|| dictionary_coercion(lhs_type, rhs_type, true))
.or_else(|| temporal_coercion(lhs_type, rhs_type))
.or_else(|| string_coercion(lhs_type, rhs_type))
@@ -312,7 +507,7 @@ pub fn values_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<DataT
// same type => equality is possible
return Some(lhs_type.clone());
}
- comparison_binary_numeric_coercion(lhs_type, rhs_type)
+ binary_numeric_coercion(lhs_type, rhs_type)
.or_else(|| temporal_coercion(lhs_type, rhs_type))
.or_else(|| string_coercion(lhs_type, rhs_type))
.or_else(|| binary_coercion(lhs_type, rhs_type))
@@ -372,9 +567,8 @@ fn string_temporal_coercion(
match_rule(lhs_type, rhs_type).or_else(|| match_rule(rhs_type, lhs_type))
}
-/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a
comparison operation
-/// where one both are numeric
-pub(crate) fn comparison_binary_numeric_coercion(
+/// Coerce `lhs_type` and `rhs_type` to a common type where both are numeric
+pub(crate) fn binary_numeric_coercion(
lhs_type: &DataType,
rhs_type: &DataType,
) -> Option<DataType> {
@@ -388,20 +582,13 @@ pub(crate) fn comparison_binary_numeric_coercion(
return Some(lhs_type.clone());
}
+ if let Some(t) = decimal_coercion(lhs_type, rhs_type) {
+ return Some(t);
+ }
+
// these are ordered from most informative to least informative so
// that the coercion does not lose information via truncation
match (lhs_type, rhs_type) {
- // Prefer decimal data type over floating point for comparison
operation
- (Decimal128(_, _), Decimal128(_, _)) => {
- get_wider_decimal_type(lhs_type, rhs_type)
- }
- (Decimal128(_, _), _) => get_comparison_common_decimal_type(lhs_type,
rhs_type),
- (_, Decimal128(_, _)) => get_comparison_common_decimal_type(rhs_type,
lhs_type),
- (Decimal256(_, _), Decimal256(_, _)) => {
- get_wider_decimal_type(lhs_type, rhs_type)
- }
- (Decimal256(_, _), _) => get_comparison_common_decimal_type(lhs_type,
rhs_type),
- (_, Decimal256(_, _)) => get_comparison_common_decimal_type(rhs_type,
lhs_type),
(Float64, _) | (_, Float64) => Some(Float64),
(_, Float32) | (Float32, _) => Some(Float32),
// The following match arms encode the following logic: Given the two
@@ -409,6 +596,11 @@ pub(crate) fn comparison_binary_numeric_coercion(
// accommodates all values of both types. Note that some information
// loss is inevitable when we have a signed type and a `UInt64`, in
// which case we use `Int64`;i.e. the widest signed integral type.
+
+ // TODO: For i64 and u64, we can use decimal or float64
+ // Postgres has no unsigned type :(
+ // DuckDB v.0.10.0 has double (double precision floating-point number
(8 bytes))
+ // for largest signed (signed sixteen-byte integer) and unsigned
integer (unsigned sixteen-byte integer)
(Int64, _)
| (_, Int64)
| (UInt64, Int8)
@@ -439,9 +631,28 @@ pub(crate) fn comparison_binary_numeric_coercion(
}
}
-/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of
-/// a comparison operation where one is a decimal
-fn get_comparison_common_decimal_type(
+/// Decimal coercion rules.
+pub fn decimal_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
+ use arrow::datatypes::DataType::*;
+
+ match (lhs_type, rhs_type) {
+ // Prefer decimal data type over floating point for comparison
operation
+ (Decimal128(_, _), Decimal128(_, _)) => {
+ get_wider_decimal_type(lhs_type, rhs_type)
+ }
+ (Decimal128(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
+ (_, Decimal128(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
+ (Decimal256(_, _), Decimal256(_, _)) => {
+ get_wider_decimal_type(lhs_type, rhs_type)
+ }
+ (Decimal256(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
+ (_, Decimal256(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
+ (_, _) => None,
+ }
+}
+
+/// Coerce `lhs_type` and `rhs_type` to a common type.
+fn get_common_decimal_type(
decimal_type: &DataType,
other_type: &DataType,
) -> Option<DataType> {
@@ -725,6 +936,18 @@ fn string_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<DataType>
}
}
+fn numeric_string_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
+ use arrow::datatypes::DataType::*;
+ match (lhs_type, rhs_type) {
+ (Utf8 | LargeUtf8, other_type) | (other_type, Utf8 | LargeUtf8)
+ if other_type.is_numeric() =>
+ {
+ Some(other_type.clone())
+ }
+ _ => None,
+ }
+}
+
/// Coercion rules for list types.
fn list_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
{
use arrow::datatypes::DataType::*;
diff --git a/datafusion/expr/src/type_coercion/functions.rs
b/datafusion/expr/src/type_coercion/functions.rs
index b41ec10910..623fa25742 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -30,7 +30,7 @@ use datafusion_common::{
exec_err, internal_datafusion_err, internal_err, plan_err, Result,
};
-use super::binary::{comparison_binary_numeric_coercion, comparison_coercion};
+use super::binary::comparison_coercion;
/// Performs type coercion for scalar function arguments.
///
@@ -66,20 +66,7 @@ pub fn data_types_with_scalar_udf(
return Ok(current_types.to_vec());
}
- // Try and coerce the argument types to match the signature, returning the
- // coerced types from the first matching signature.
- for valid_types in valid_types {
- if let Some(types) = maybe_data_types(&valid_types, current_types) {
- return Ok(types);
- }
- }
-
- // none possible -> Error
- plan_err!(
- "[data_types_with_scalar_udf] Coercion from {:?} to the signature {:?}
failed.",
- current_types,
- &signature.type_signature
- )
+ try_coerce_types(valid_types, current_types, &signature.type_signature)
}
pub fn data_types_with_aggregate_udf(
@@ -112,20 +99,7 @@ pub fn data_types_with_aggregate_udf(
return Ok(current_types.to_vec());
}
- // Try and coerce the argument types to match the signature, returning the
- // coerced types from the first matching signature.
- for valid_types in valid_types {
- if let Some(types) = maybe_data_types(&valid_types, current_types) {
- return Ok(types);
- }
- }
-
- // none possible -> Error
- plan_err!(
- "[data_types_with_aggregate_udf] Coercion from {:?} to the signature
{:?} failed.",
- current_types,
- &signature.type_signature
- )
+ try_coerce_types(valid_types, current_types, &signature.type_signature)
}
/// Performs type coercion for function arguments.
@@ -152,7 +126,6 @@ pub fn data_types(
}
let valid_types = get_valid_types(&signature.type_signature,
current_types)?;
-
if valid_types
.iter()
.any(|data_type| data_type == current_types)
@@ -160,19 +133,39 @@ pub fn data_types(
return Ok(current_types.to_vec());
}
- // Try and coerce the argument types to match the signature, returning the
- // coerced types from the first matching signature.
- for valid_types in valid_types {
- if let Some(types) = maybe_data_types(&valid_types, current_types) {
- return Ok(types);
+ try_coerce_types(valid_types, current_types, &signature.type_signature)
+}
+
+fn try_coerce_types(
+ valid_types: Vec<Vec<DataType>>,
+ current_types: &[DataType],
+ type_signature: &TypeSignature,
+) -> Result<Vec<DataType>> {
+ let mut valid_types = valid_types;
+
+ // Well-supported signature that returns exact valid types.
+ if !valid_types.is_empty() && matches!(type_signature,
TypeSignature::UserDefined) {
+ // exact valid types
+ assert_eq!(valid_types.len(), 1);
+ let valid_types = valid_types.swap_remove(0);
+ if let Some(t) = maybe_data_types_without_coercion(&valid_types,
current_types) {
+ return Ok(t);
+ }
+ } else {
+ // Try and coerce the argument types to match the signature, returning
the
+ // coerced types from the first matching signature.
+ for valid_types in valid_types {
+ if let Some(types) = maybe_data_types(&valid_types, current_types)
{
+ return Ok(types);
+ }
}
}
// none possible -> Error
plan_err!(
- "[data_types] Coercion from {:?} to the signature {:?} failed.",
+ "Coercion from {:?} to the signature {:?} failed.",
current_types,
- &signature.type_signature
+ type_signature
)
}
@@ -438,6 +431,8 @@ fn maybe_data_types(
new_type.push(current_type.clone())
} else {
// attempt to coerce.
+ // TODO: Replace with `can_cast_types` after failing cases are
resolved
+ // (they need new signature that returns exactly valid types
instead of list of possible valid types).
if let Some(coerced_type) = coerced_from(valid_type, current_type)
{
new_type.push(coerced_type)
} else {
@@ -449,6 +444,33 @@ fn maybe_data_types(
Some(new_type)
}
+/// Check if the current argument types can be coerced to match the given
`valid_types`
+/// unlike `maybe_data_types`, this function does not coerce the types.
+/// TODO: I think this function should replace `maybe_data_types` after
signature are well-supported.
+fn maybe_data_types_without_coercion(
+ valid_types: &[DataType],
+ current_types: &[DataType],
+) -> Option<Vec<DataType>> {
+ if valid_types.len() != current_types.len() {
+ return None;
+ }
+
+ let mut new_type = Vec::with_capacity(valid_types.len());
+ for (i, valid_type) in valid_types.iter().enumerate() {
+ let current_type = ¤t_types[i];
+
+ if current_type == valid_type {
+ new_type.push(current_type.clone())
+ } else if can_cast_types(current_type, valid_type) {
+ // validate the valid type is castable from the current type
+ new_type.push(valid_type.clone())
+ } else {
+ return None;
+ }
+ }
+ Some(new_type)
+}
+
/// Return true if a value of type `type_from` can be coerced
/// (losslessly converted) into a value of `type_to`
///
@@ -463,11 +485,18 @@ pub fn can_coerce_from(type_into: &DataType, type_from:
&DataType) -> bool {
false
}
+/// Find the coerced type for the given `type_into` and `type_from`.
+/// Returns `None` if coercion is not possible.
+///
+/// Expect uni-directional coercion, for example, i32 is coerced to i64, but
i64 is not coerced to i32.
+///
+/// Unlike [comparison_coercion], the coerced type is usually `wider` for
lossless conversion.
fn coerced_from<'a>(
type_into: &'a DataType,
type_from: &'a DataType,
) -> Option<DataType> {
use self::DataType::*;
+
// match Dictionary first
match (type_into, type_from) {
// coerced dictionary first
@@ -585,7 +614,6 @@ fn coerced_from<'a>(
}
_ => None,
},
-
(Timestamp(unit, Some(tz)), _) if tz.as_ref() == TIMEZONE_WILDCARD => {
match type_from {
Timestamp(_, Some(from_tz)) => {
@@ -606,19 +634,7 @@ fn coerced_from<'a>(
{
Some(type_into.clone())
}
- // More coerce rules.
- // Note that not all rules in `comparison_coercion` can be reused here.
- // For example, all numeric types can be coerced into Utf8 for
comparison,
- // but not for function arguments.
- _ => comparison_binary_numeric_coercion(type_into, type_from).and_then(
- |coerced_type| {
- if *type_into == coerced_type {
- Some(coerced_type)
- } else {
- None
- }
- },
- ),
+ _ => None,
}
}
diff --git a/datafusion/functions/src/core/coalesce.rs
b/datafusion/functions/src/core/coalesce.rs
index 63778eb773..15a3ddd9d6 100644
--- a/datafusion/functions/src/core/coalesce.rs
+++ b/datafusion/functions/src/core/coalesce.rs
@@ -22,8 +22,8 @@ use arrow::compute::kernels::zip::zip;
use arrow::compute::{and, is_not_null, is_null};
use arrow::datatypes::DataType;
-use datafusion_common::{exec_err, internal_err, Result};
-use datafusion_expr::type_coercion::binary::comparison_coercion;
+use datafusion_common::{exec_err, Result};
+use datafusion_expr::type_coercion::binary::type_union_resolution;
use datafusion_expr::ColumnarValue;
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
@@ -124,21 +124,11 @@ impl ScalarUDFImpl for CoalesceFunc {
}
fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
- let new_type = arg_types.iter().skip(1).try_fold(
- arg_types.first().unwrap().clone(),
- |acc, x| {
- // The coerced types found by `comparison_coercion` are not
guaranteed to be
- // coercible for the arguments. `comparison_coercion` returns
more loose
- // types that can be coerced to both `acc` and `x` for
comparison purpose.
- // See `maybe_data_types` for the actual coercion.
- let coerced_type = comparison_coercion(&acc, x);
- if let Some(coerced_type) = coerced_type {
- Ok(coerced_type)
- } else {
- internal_err!("Coercion from {acc:?} to {x:?} failed.")
- }
- },
- )?;
+ if arg_types.is_empty() {
+ return exec_err!("coalesce must have at least one argument");
+ }
+ let new_type = type_union_resolution(arg_types)
+ .unwrap_or(arg_types.first().unwrap().clone());
Ok(vec![new_type; arg_types.len()])
}
}
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs
b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 69be344cb7..081a54ac44 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -990,7 +990,7 @@ mod test {
.err()
.unwrap();
assert_eq!(
- "type_coercion\ncaused by\nError during planning:
[data_types_with_aggregate_udf] Coercion from [Utf8] to the signature
Uniform(1, [Float64]) failed.",
+ "type_coercion\ncaused by\nError during planning: Coercion from
[Utf8] to the signature Uniform(1, [Float64]) failed.",
err.strip_backtrace()
);
Ok(())
diff --git a/datafusion/sqllogictest/test_files/coalesce.slt
b/datafusion/sqllogictest/test_files/coalesce.slt
index a0317ac4a5..17b0e774d9 100644
--- a/datafusion/sqllogictest/test_files/coalesce.slt
+++ b/datafusion/sqllogictest/test_files/coalesce.slt
@@ -208,7 +208,7 @@ select
----
[3, 4] List(Field { name: "item", data_type: Int64, nullable: true, dict_id:
0, dict_is_ordered: false, metadata: {} })
-# TODO: after switch signature of array to the same with coalesce, this query
should be fixed
+# coalesce with array
query ?T
select
coalesce(array[1, 2], array[arrow_cast(3, 'Int32'), arrow_cast(4, 'Int32')]),
@@ -216,10 +216,10 @@ select
----
[1, 2] List(Field { name: "item", data_type: Int64, nullable: true, dict_id:
0, dict_is_ordered: false, metadata: {} })
+# test dict(int32, utf8)
statement ok
create table test1 as values (arrow_cast('foo', 'Dictionary(Int32, Utf8)')),
(null);
-# Dictionary and String are not coercible
query ?
select coalesce(column1, 'none_set') from test1;
----
@@ -242,7 +242,59 @@ none_set
statement ok
drop table test1
-# Numeric and Dictionary are not coercible
+# test dict coercion with value
+statement ok
+create table t(c varchar) as values ('a'), (null);
+
+query TT
+select
+ coalesce(c, arrow_cast('b', 'Dictionary(Int32, Utf8)')),
+ arrow_typeof(coalesce(c, arrow_cast('b', 'Dictionary(Int32, Utf8)')))
+from t;
+----
+a Dictionary(Int32, Utf8)
+b Dictionary(Int32, Utf8)
+
+statement ok
+drop table t;
+
+# test dict coercion with dict
+statement ok
+create table t as values
+ (arrow_cast('foo', 'Dictionary(Int32, Utf8)')),
+ (null);
+
+query ?T
+select
+ coalesce(column1, arrow_cast('bar', 'Dictionary(Int64, LargeUtf8)')),
+ arrow_typeof(coalesce(column1, arrow_cast('bar', 'Dictionary(Int64,
LargeUtf8)')))
+from t;
+----
+foo Dictionary(Int64, LargeUtf8)
+bar Dictionary(Int64, LargeUtf8)
+
+query ?T
+select
+ coalesce(column1, arrow_cast('bar', 'Dictionary(Int32, LargeUtf8)')),
+ arrow_typeof(coalesce(column1, arrow_cast('bar', 'Dictionary(Int32,
LargeUtf8)')))
+from t;
+----
+foo Dictionary(Int32, LargeUtf8)
+bar Dictionary(Int32, LargeUtf8)
+
+query ?T
+select
+ coalesce(column1, arrow_cast('bar', 'Dictionary(Int64, Utf8)')),
+ arrow_typeof(coalesce(column1, arrow_cast('bar', 'Dictionary(Int64, Utf8)')))
+from t;
+----
+foo Dictionary(Int64, Utf8)
+bar Dictionary(Int64, Utf8)
+
+statement ok
+drop table t;
+
+# test dict(int32, int8)
query I
select coalesce(34, arrow_cast(123, 'Dictionary(Int32, Int8)'));
----
@@ -258,6 +310,12 @@ select coalesce(null, 34, arrow_cast(123,
'Dictionary(Int32, Int8)'));
----
34
+# numeric string coercion
+query RT
+select coalesce(2.0, 1, '3'), arrow_typeof(coalesce(2.0, 1, '3'));
+----
+2 Float64
+
# explicitly cast to Int8, and it will implicitly cast to Int64
query IT
select
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]