This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch branch-37
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/branch-37 by this push:
     new ec4a6da798 coercion vec[Dictionary, Utf8] to Dictionary for coalesce 
function (#9958) (#10104)
ec4a6da798 is described below

commit ec4a6da798f7d02979e80d518ea6ca9546f8f15e
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Apr 17 13:50:07 2024 -0400

    coercion vec[Dictionary, Utf8] to Dictionary for coalesce function (#9958) 
(#10104)
    
    * for debug
    
    finish
    
    remove print
    
    add space
    
    * fix clippy
    
    * finish
    
    * fix clippy
    
    Co-authored-by: Lordworms <[email protected]>
---
 datafusion/expr/src/type_coercion/functions.rs | 57 ++++++++++++++++----------
 datafusion/sqllogictest/test_files/scalar.slt  | 42 ++++++++++++++++++-
 2 files changed, 75 insertions(+), 24 deletions(-)

diff --git a/datafusion/expr/src/type_coercion/functions.rs 
b/datafusion/expr/src/type_coercion/functions.rs
index d4095a72fe..34b607d088 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -311,17 +311,25 @@ fn coerced_from<'a>(
     type_from: &'a DataType,
 ) -> Option<DataType> {
     use self::DataType::*;
-
-    match type_into {
+    // match Dictionary first
+    match (type_into, type_from) {
+        // coerced dictionary first
+        (cur_type, Dictionary(_, value_type)) | (Dictionary(_, value_type), 
cur_type)
+            if coerced_from(cur_type, value_type).is_some() =>
+        {
+            Some(type_into.clone())
+        }
         // coerced into type_into
-        Int8 if matches!(type_from, Null | Int8) => Some(type_into.clone()),
-        Int16 if matches!(type_from, Null | Int8 | Int16 | UInt8) => {
+        (Int8, _) if matches!(type_from, Null | Int8) => 
Some(type_into.clone()),
+        (Int16, _) if matches!(type_from, Null | Int8 | Int16 | UInt8) => {
             Some(type_into.clone())
         }
-        Int32 if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | 
UInt16) => {
+        (Int32, _)
+            if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | 
UInt16) =>
+        {
             Some(type_into.clone())
         }
-        Int64
+        (Int64, _)
             if matches!(
                 type_from,
                 Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
@@ -329,15 +337,17 @@ fn coerced_from<'a>(
         {
             Some(type_into.clone())
         }
-        UInt8 if matches!(type_from, Null | UInt8) => Some(type_into.clone()),
-        UInt16 if matches!(type_from, Null | UInt8 | UInt16) => 
Some(type_into.clone()),
-        UInt32 if matches!(type_from, Null | UInt8 | UInt16 | UInt32) => {
+        (UInt8, _) if matches!(type_from, Null | UInt8) => 
Some(type_into.clone()),
+        (UInt16, _) if matches!(type_from, Null | UInt8 | UInt16) => {
+            Some(type_into.clone())
+        }
+        (UInt32, _) if matches!(type_from, Null | UInt8 | UInt16 | UInt32) => {
             Some(type_into.clone())
         }
-        UInt64 if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64) 
=> {
+        (UInt64, _) if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | 
UInt64) => {
             Some(type_into.clone())
         }
-        Float32
+        (Float32, _)
             if matches!(
                 type_from,
                 Null | Int8
@@ -353,7 +363,7 @@ fn coerced_from<'a>(
         {
             Some(type_into.clone())
         }
-        Float64
+        (Float64, _)
             if matches!(
                 type_from,
                 Null | Int8
@@ -371,7 +381,7 @@ fn coerced_from<'a>(
         {
             Some(type_into.clone())
         }
-        Timestamp(TimeUnit::Nanosecond, None)
+        (Timestamp(TimeUnit::Nanosecond, None), _)
             if matches!(
                 type_from,
                 Null | Timestamp(_, None) | Date32 | Utf8 | LargeUtf8
@@ -379,23 +389,27 @@ fn coerced_from<'a>(
         {
             Some(type_into.clone())
         }
-        Interval(_) if matches!(type_from, Utf8 | LargeUtf8) => 
Some(type_into.clone()),
+        (Interval(_), _) if matches!(type_from, Utf8 | LargeUtf8) => {
+            Some(type_into.clone())
+        }
         // Any type can be coerced into strings
-        Utf8 | LargeUtf8 => Some(type_into.clone()),
-        Null if can_cast_types(type_from, type_into) => 
Some(type_into.clone()),
+        (Utf8 | LargeUtf8, _) => Some(type_into.clone()),
+        (Null, _) if can_cast_types(type_from, type_into) => 
Some(type_into.clone()),
 
-        List(_) if matches!(type_from, FixedSizeList(_, _)) => 
Some(type_into.clone()),
+        (List(_), _) if matches!(type_from, FixedSizeList(_, _)) => {
+            Some(type_into.clone())
+        }
 
         // Only accept list and largelist with the same number of dimensions 
unless the type is Null.
         // List or LargeList with different dimensions should be handled in 
TypeSignature or other places before this
-        List(_) | LargeList(_)
+        (List(_) | LargeList(_), _)
             if datafusion_common::utils::base_type(type_from).eq(&Null)
                 || list_ndims(type_from) == list_ndims(type_into) =>
         {
             Some(type_into.clone())
         }
         // should be able to coerce wildcard fixed size list to non wildcard 
fixed size list
-        FixedSizeList(f_into, FIXED_SIZE_LIST_WILDCARD) => match type_from {
+        (FixedSizeList(f_into, FIXED_SIZE_LIST_WILDCARD), _) => match 
type_from {
             FixedSizeList(f_from, size_from) => {
                 match coerced_from(f_into.data_type(), f_from.data_type()) {
                     Some(data_type) if &data_type != f_into.data_type() => {
@@ -410,7 +424,7 @@ fn coerced_from<'a>(
             _ => None,
         },
 
-        Timestamp(unit, Some(tz)) if tz.as_ref() == TIMEZONE_WILDCARD => {
+        (Timestamp(unit, Some(tz)), _) if tz.as_ref() == TIMEZONE_WILDCARD => {
             match type_from {
                 Timestamp(_, Some(from_tz)) => {
                     Some(Timestamp(unit.clone(), Some(from_tz.clone())))
@@ -422,7 +436,7 @@ fn coerced_from<'a>(
                 _ => None,
             }
         }
-        Timestamp(_, Some(_))
+        (Timestamp(_, Some(_)), _)
             if matches!(
                 type_from,
                 Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8
@@ -430,7 +444,6 @@ fn coerced_from<'a>(
         {
             Some(type_into.clone())
         }
-
         // More coerce rules.
         // Note that not all rules in `comparison_coercion` can be reused here.
         // For example, all numeric types can be coerced into Utf8 for 
comparison,
diff --git a/datafusion/sqllogictest/test_files/scalar.slt 
b/datafusion/sqllogictest/test_files/scalar.slt
index a77a2bf405..1911b0cef5 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1779,6 +1779,46 @@ SELECT COALESCE(NULL, 'test')
 ----
 test
 
+
+statement ok
+create table test1 as values (arrow_cast('foo', 'Dictionary(Int32, Utf8)')), 
(null);
+
+# test coercion string
+query ?
+select  coalesce(column1, 'none_set') from test1;
+----
+foo
+none_set
+
+# test coercion Int
+query I
+select coalesce(34, arrow_cast(123, 'Dictionary(Int32, Int8)'));
+----
+34
+
+# test with Int
+query I
+select coalesce(arrow_cast(123, 'Dictionary(Int32, Int8)'),34);
+----
+123
+
+# test with null
+query I
+select coalesce(null, 34, arrow_cast(123, 'Dictionary(Int32, Int8)'));
+----
+34
+
+# test with null
+query T
+select  coalesce(null, column1, 'none_set') from test1;
+----
+foo
+none_set
+
+statement ok
+drop table test1
+
+
 statement ok
 CREATE TABLE test(
   c1 INT,
@@ -2162,5 +2202,3 @@ query I
 select strpos('joséésoj', arrow_cast(null, 'Utf8'));
 ----
 NULL
-
-

Reply via email to