This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch branch-37
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/branch-37 by this push:
new ec4a6da798 coercion vec[Dictionary, Utf8] to Dictionary for coalesce
function (#9958) (#10104)
ec4a6da798 is described below
commit ec4a6da798f7d02979e80d518ea6ca9546f8f15e
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Apr 17 13:50:07 2024 -0400
coercion vec[Dictionary, Utf8] to Dictionary for coalesce function (#9958)
(#10104)
* for debug
finish
remove print
add space
* fix clippy
* finish
* fix clippy
Co-authored-by: Lordworms <[email protected]>
---
datafusion/expr/src/type_coercion/functions.rs | 57 ++++++++++++++++----------
datafusion/sqllogictest/test_files/scalar.slt | 42 ++++++++++++++++++-
2 files changed, 75 insertions(+), 24 deletions(-)
diff --git a/datafusion/expr/src/type_coercion/functions.rs
b/datafusion/expr/src/type_coercion/functions.rs
index d4095a72fe..34b607d088 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -311,17 +311,25 @@ fn coerced_from<'a>(
type_from: &'a DataType,
) -> Option<DataType> {
use self::DataType::*;
-
- match type_into {
+ // match Dictionary first
+ match (type_into, type_from) {
+ // coerced dictionary first
+ (cur_type, Dictionary(_, value_type)) | (Dictionary(_, value_type),
cur_type)
+ if coerced_from(cur_type, value_type).is_some() =>
+ {
+ Some(type_into.clone())
+ }
// coerced into type_into
- Int8 if matches!(type_from, Null | Int8) => Some(type_into.clone()),
- Int16 if matches!(type_from, Null | Int8 | Int16 | UInt8) => {
+ (Int8, _) if matches!(type_from, Null | Int8) =>
Some(type_into.clone()),
+ (Int16, _) if matches!(type_from, Null | Int8 | Int16 | UInt8) => {
Some(type_into.clone())
}
- Int32 if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 |
UInt16) => {
+ (Int32, _)
+ if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 |
UInt16) =>
+ {
Some(type_into.clone())
}
- Int64
+ (Int64, _)
if matches!(
type_from,
Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
@@ -329,15 +337,17 @@ fn coerced_from<'a>(
{
Some(type_into.clone())
}
- UInt8 if matches!(type_from, Null | UInt8) => Some(type_into.clone()),
- UInt16 if matches!(type_from, Null | UInt8 | UInt16) =>
Some(type_into.clone()),
- UInt32 if matches!(type_from, Null | UInt8 | UInt16 | UInt32) => {
+ (UInt8, _) if matches!(type_from, Null | UInt8) =>
Some(type_into.clone()),
+ (UInt16, _) if matches!(type_from, Null | UInt8 | UInt16) => {
+ Some(type_into.clone())
+ }
+ (UInt32, _) if matches!(type_from, Null | UInt8 | UInt16 | UInt32) => {
Some(type_into.clone())
}
- UInt64 if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64)
=> {
+ (UInt64, _) if matches!(type_from, Null | UInt8 | UInt16 | UInt32 |
UInt64) => {
Some(type_into.clone())
}
- Float32
+ (Float32, _)
if matches!(
type_from,
Null | Int8
@@ -353,7 +363,7 @@ fn coerced_from<'a>(
{
Some(type_into.clone())
}
- Float64
+ (Float64, _)
if matches!(
type_from,
Null | Int8
@@ -371,7 +381,7 @@ fn coerced_from<'a>(
{
Some(type_into.clone())
}
- Timestamp(TimeUnit::Nanosecond, None)
+ (Timestamp(TimeUnit::Nanosecond, None), _)
if matches!(
type_from,
Null | Timestamp(_, None) | Date32 | Utf8 | LargeUtf8
@@ -379,23 +389,27 @@ fn coerced_from<'a>(
{
Some(type_into.clone())
}
- Interval(_) if matches!(type_from, Utf8 | LargeUtf8) =>
Some(type_into.clone()),
+ (Interval(_), _) if matches!(type_from, Utf8 | LargeUtf8) => {
+ Some(type_into.clone())
+ }
// Any type can be coerced into strings
- Utf8 | LargeUtf8 => Some(type_into.clone()),
- Null if can_cast_types(type_from, type_into) =>
Some(type_into.clone()),
+ (Utf8 | LargeUtf8, _) => Some(type_into.clone()),
+ (Null, _) if can_cast_types(type_from, type_into) =>
Some(type_into.clone()),
- List(_) if matches!(type_from, FixedSizeList(_, _)) =>
Some(type_into.clone()),
+ (List(_), _) if matches!(type_from, FixedSizeList(_, _)) => {
+ Some(type_into.clone())
+ }
// Only accept list and largelist with the same number of dimensions
unless the type is Null.
// List or LargeList with different dimensions should be handled in
TypeSignature or other places before this
- List(_) | LargeList(_)
+ (List(_) | LargeList(_), _)
if datafusion_common::utils::base_type(type_from).eq(&Null)
|| list_ndims(type_from) == list_ndims(type_into) =>
{
Some(type_into.clone())
}
// should be able to coerce wildcard fixed size list to non wildcard
fixed size list
- FixedSizeList(f_into, FIXED_SIZE_LIST_WILDCARD) => match type_from {
+ (FixedSizeList(f_into, FIXED_SIZE_LIST_WILDCARD), _) => match
type_from {
FixedSizeList(f_from, size_from) => {
match coerced_from(f_into.data_type(), f_from.data_type()) {
Some(data_type) if &data_type != f_into.data_type() => {
@@ -410,7 +424,7 @@ fn coerced_from<'a>(
_ => None,
},
- Timestamp(unit, Some(tz)) if tz.as_ref() == TIMEZONE_WILDCARD => {
+ (Timestamp(unit, Some(tz)), _) if tz.as_ref() == TIMEZONE_WILDCARD => {
match type_from {
Timestamp(_, Some(from_tz)) => {
Some(Timestamp(unit.clone(), Some(from_tz.clone())))
@@ -422,7 +436,7 @@ fn coerced_from<'a>(
_ => None,
}
}
- Timestamp(_, Some(_))
+ (Timestamp(_, Some(_)), _)
if matches!(
type_from,
Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8
@@ -430,7 +444,6 @@ fn coerced_from<'a>(
{
Some(type_into.clone())
}
-
// More coerce rules.
// Note that not all rules in `comparison_coercion` can be reused here.
// For example, all numeric types can be coerced into Utf8 for
comparison,
diff --git a/datafusion/sqllogictest/test_files/scalar.slt
b/datafusion/sqllogictest/test_files/scalar.slt
index a77a2bf405..1911b0cef5 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1779,6 +1779,46 @@ SELECT COALESCE(NULL, 'test')
----
test
+
+statement ok
+create table test1 as values (arrow_cast('foo', 'Dictionary(Int32, Utf8)')),
(null);
+
+# test coercion string
+query ?
+select coalesce(column1, 'none_set') from test1;
+----
+foo
+none_set
+
+# test coercion Int
+query I
+select coalesce(34, arrow_cast(123, 'Dictionary(Int32, Int8)'));
+----
+34
+
+# test with Int
+query I
+select coalesce(arrow_cast(123, 'Dictionary(Int32, Int8)'),34);
+----
+123
+
+# test with null
+query I
+select coalesce(null, 34, arrow_cast(123, 'Dictionary(Int32, Int8)'));
+----
+34
+
+# test with null
+query T
+select coalesce(null, column1, 'none_set') from test1;
+----
+foo
+none_set
+
+statement ok
+drop table test1
+
+
statement ok
CREATE TABLE test(
c1 INT,
@@ -2162,5 +2202,3 @@ query I
select strpos('joséésoj', arrow_cast(null, 'Utf8'));
----
NULL
-
-