alamb commented on code in PR #10268:
URL: https://github.com/apache/datafusion/pull/10268#discussion_r1586698773


##########
datafusion/expr/src/type_coercion/functions.rs:
##########
@@ -53,20 +54,31 @@ pub fn data_types(
         }
     }
 
-    let valid_types = get_valid_types(&signature.type_signature, 
current_types)?;
-
+    let mut valid_types = get_valid_types(&signature.type_signature, 
current_types)?;
     if valid_types
         .iter()
         .any(|data_type| data_type == current_types)
     {
         return Ok(current_types.to_vec());
     }
 
-    // Try and coerce the argument types to match the signature, returning the
-    // coerced types from the first matching signature.
-    for valid_types in valid_types {
-        if let Some(types) = maybe_data_types(&valid_types, current_types) {
-            return Ok(types);
+    // Well-supported signature that returns exact valid types.
+    if !valid_types.is_empty()
+        && matches!(signature.type_signature, 
TypeSignature::VariadicEqualOrNull)
+    {
+        // exact valid types
+        assert_eq!(valid_types.len(), 1);
+        let valid_types = valid_types.swap_remove(0);

Review Comment:
   is it a problem that this may change the type ordering? So that the last 
type is now the first in the new Vec?



##########
datafusion/functions/src/core/coalesce.rs:
##########
@@ -60,9 +59,8 @@ impl ScalarUDFImpl for CoalesceFunc {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        // COALESCE has multiple args and they might get coerced, get a 
preview of this

Review Comment:
   👍 ❤️ 



##########
datafusion/sqllogictest/test_files/functions.slt:
##########
@@ -1158,3 +1158,347 @@ drop table uuid_table
 
 statement ok
 drop table t
+
+
+# Test coalesce function
+query I
+select coalesce(1, 2, 3);
+----
+1
+
+# test with first null
+query ?T
+select coalesce(null, 3, 2, 1), arrow_typeof(coalesce(null, 3, 2, 1));
+----
+3 Int64
+
+# test with null values
+query ?
+select coalesce(null, null);
+----
+NULL
+
+# cast to float
+query IT
+select 
+  coalesce(1, 2.0),
+  arrow_typeof(coalesce(1, 2.0))
+;
+----
+1 Float64
+
+query RT
+select 
+  coalesce(2.0, 1),
+  arrow_typeof(coalesce(2.0, 1))
+;
+----
+2 Float64
+
+query IT
+select 
+  coalesce(1, arrow_cast(2.0, 'Float32')),
+  arrow_typeof(coalesce(1, arrow_cast(2.0, 'Float32')))
+;
+----
+1 Float32
+
+# test with empty args
+query error
+select coalesce();
+
+# test with different types
+query I
+select coalesce(arrow_cast(1, 'Int32'), arrow_cast(1, 'Int64'));
+----
+1
+
+# test with nulls
+query ?T
+select coalesce(null, null, null), arrow_typeof(coalesce(null, null));
+----
+NULL Null
+
+# i32 and u32, cast to wider type i64
+query IT
+select 
+  coalesce(arrow_cast(2, 'Int32'), arrow_cast(3, 'UInt32')),
+  arrow_typeof(coalesce(arrow_cast(2, 'Int32'), arrow_cast(3, 'UInt32')));
+----
+2 Int64
+
+query IT
+select
+  coalesce(arrow_cast(2, 'Int16'), arrow_cast(3, 'UInt16')),
+  arrow_typeof(coalesce(arrow_cast(2, 'Int16'), arrow_cast(3, 'UInt16')));
+----
+2 Int32
+
+query IT
+select
+  coalesce(arrow_cast(2, 'Int8'), arrow_cast(3, 'UInt8')),
+  arrow_typeof(coalesce(arrow_cast(2, 'Int8'), arrow_cast(3, 'UInt8')));
+----
+2 Int16
+
+# i64 and u32, cast to wider type i64
+query IT
+select 
+  coalesce(2, arrow_cast(3, 'UInt32')),
+  arrow_typeof(coalesce(2, arrow_cast(3, 'UInt32')));
+----
+2 Int64
+
+# TODO: Got types (i64, u64), casting to decimal or double or even i128 if 
supported
+query IT
+select
+  coalesce(2, arrow_cast(3, 'UInt64')),
+  arrow_typeof(coalesce(2, arrow_cast(3, 'UInt64')));
+----
+2 Int64
+
+statement ok
+create table t1 (a bigint, b int, c int) as values (null, null, 1), (null, 2, 
null);
+
+# Follow Postgres and DuckDB behavior, since a is bigint, although the value 
is null, all args are coerced to bigint
+query IT
+select 
+  coalesce(a, b, c), 
+  arrow_typeof(coalesce(a, b, c))
+from t1;
+----
+1 Int64
+2 Int64
+
+# b, c has the same type int, so the result is int
+query IT
+select 
+  coalesce(b, c),
+  arrow_typeof(coalesce(b, c))
+from t1;
+----
+1 Int32
+2 Int32
+
+statement ok
+drop table t1;
+
+# test multi rows
+statement ok
+CREATE TABLE t1(
+  c1 int,
+  c2 int
+) as VALUES
+(1, 2),
+(NULL, 2),
+(1, NULL),
+(NULL, NULL);
+
+query I
+SELECT COALESCE(c1, c2) FROM t1
+----
+1
+2
+1
+NULL
+
+statement ok
+drop table t1;
+
+# Decimal128(7, 2) and int64 are coerced to common wider type Decimal128(22, 2)
+query RT
+select 
+  coalesce(arrow_cast(2, 'Decimal128(7, 2)'), 0),
+  arrow_typeof(coalesce(arrow_cast(2, 'Decimal128(7, 2)'), 0))
+----
+2 Decimal128(22, 2)
+
+query RT
+select 
+  coalesce(arrow_cast(2, 'Decimal256(7, 2)'), 0),
+  arrow_typeof(coalesce(arrow_cast(2, 'Decimal256(7, 2)'), 0));
+----
+2 Decimal256(22, 2)
+
+# coalesce string
+query T?
+select 
+  coalesce('', 'test'),
+  coalesce(null, 'test');
+----
+(empty) test
+
+# coalesce utf8 and large utf8
+query TT
+select 
+  coalesce('a', arrow_cast('b', 'LargeUtf8')),
+  arrow_typeof(coalesce('a', arrow_cast('b', 'LargeUtf8')))
+;
+----
+a LargeUtf8
+
+# coalesce array
+query ?T
+select 
+  coalesce(array[1, 2], array[3, 4]),
+  arrow_typeof(coalesce(array[1, 2], array[3, 4]));
+----
+[1, 2] List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 
0, dict_is_ordered: false, metadata: {} })
+
+query ?T
+select 
+  coalesce(null, array[3, 4]),
+  arrow_typeof(coalesce(array[1, 2], array[3, 4]));
+----
+[3, 4] List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 
0, dict_is_ordered: false, metadata: {} })
+
+# TODO: after switch signature of array to the same with coalesce, this query 
should be fixed
+query error
+select 
+  coalesce(array[1, 2], array[arrow_cast(3, 'Int32'), arrow_cast(4, 'Int32')]),
+  arrow_typeof(coalesce(array[1, 2], array[arrow_cast(3, 'Int32'), 
arrow_cast(4, 'Int32')]));
+
+statement ok
+create table test1 as values (arrow_cast('foo', 'Dictionary(Int32, Utf8)')), 
(null);
+
+# Dictionary and String are not coercible

Review Comment:
   This query actually works on main so making it error seems like a regression 
to me
   
   ```sql
   DataFusion CLI v37.1.0
   > create table test1 as values (arrow_cast('foo', 'Dictionary(Int32, 
Utf8)')), (null);
   0 row(s) fetched.
   Elapsed 0.011 seconds.
   
   > select coalesce(column1, 'none_set') from test1;
   +------------------------------------------+
   | coalesce(test1.column1,Utf8("none_set")) |
   +------------------------------------------+
   | foo                                      |
   | none_set                                 |
   +------------------------------------------+
   2 row(s) fetched.
   Elapsed 0.003 seconds.
   
   > select coalesce(null, column1, 'none_set') from test1;
   +-----------------------------------------------+
   | coalesce(NULL,test1.column1,Utf8("none_set")) |
   +-----------------------------------------------+
   | foo                                           |
   | none_set                                      |
   +-----------------------------------------------+
   2 row(s) fetched.
   Elapsed 0.002 seconds.
   ```



##########
datafusion/expr/src/signature.rs:
##########
@@ -92,14 +92,22 @@ pub enum TypeSignature {
     /// A function such as `concat` is `Variadic(vec![DataType::Utf8, 
DataType::LargeUtf8])`
     Variadic(Vec<DataType>),
     /// One or more arguments of an arbitrary but equal type.
-    /// DataFusion attempts to coerce all argument types to match the first 
argument's type
+    /// DataFusion attempts to coerce all argument types to match to the 
common type with comparision coercion.

Review Comment:
   Can we please link to 
https://docs.rs/datafusion/latest/datafusion/logical_expr/type_coercion/binary/fn.comparison_coercion.html
 that explains (however limited) what comparison coercion is?



##########
datafusion/expr/src/type_coercion/binary.rs:
##########
@@ -289,15 +290,169 @@ fn bitwise_coercion(left_type: &DataType, right_type: 
&DataType) -> Option<DataT
     }
 }
 
+#[derive(Debug, PartialEq, Eq, Hash, Clone)]
+enum TypeCategory {
+    Array,
+    Boolean,
+    Numeric,
+    // String, well-defined type, but are considered as unknown type.
+    DateTime,
+    Composite,
+    Unknown,
+    NotSupported,
+}
+
+fn data_type_category(data_type: &DataType) -> TypeCategory {

Review Comment:
   This could also be something like 
   
   ```rust
   impl From<&DataType> for TypeCategory { 
   ...
   ```
   
   And then you create a `TypeCategory` like
   ```rust
   let category = TypeCategory::from(&type)
   ```
   



##########
datafusion/expr/src/signature.rs:
##########
@@ -92,14 +92,22 @@ pub enum TypeSignature {
     /// A function such as `concat` is `Variadic(vec![DataType::Utf8, 
DataType::LargeUtf8])`
     Variadic(Vec<DataType>),
     /// One or more arguments of an arbitrary but equal type.
-    /// DataFusion attempts to coerce all argument types to match the first 
argument's type
+    /// DataFusion attempts to coerce all argument types to match to the 
common type with comparision coercion.
     ///
     /// # Examples
     /// Given types in signature should be coercible to the same final type.
     /// A function such as `make_array` is `VariadicEqual`.
     ///
     /// `make_array(i32, i64) -> make_array(i64, i64)`
     VariadicEqual,
+    /// One or more arguments of an arbitrary but equal type or Null.
+    /// Non-comparison coercion is attempted to match the signatures.
+    ///
+    /// Functions like `coalesce` is `VariadicEqual`.
+    // TODO: Temporary Signature, to differentiate existing VariadicEqual.
+    // After we swtich `make_array` to VariadicEqualOrNull,
+    // we can reuse VariadicEqual.
+    VariadicEqualOrNull,

Review Comment:
   Maybe we could do something like this (basically to flavor the type 
signature) 🤔 
   
   ```rust
   pub enum TypeSignature {
   ...
     /// Rather than the usual coercion rules, special type union rules are 
applied
     Union(Box<TypeSignature>)
   }
   ```
   
   



##########
datafusion/expr/src/type_coercion/functions.rs:
##########
@@ -293,6 +314,32 @@ fn maybe_data_types(
     Some(new_type)
 }
 
+/// Check if the current argument types can be coerced to match the given 
`valid_types`
+/// unlike `maybe_data_types`, this function does not coerce the types.

Review Comment:
   this comment says "function does not coerce the types" but then it permits 
types in `can_cast_types`. I may be misunderstanding what coerce means in this 
case



##########
datafusion/expr/src/type_coercion/functions.rs:
##########
@@ -450,19 +473,7 @@ fn coerced_from<'a>(
         {
             Some(type_into.clone())
         }
-        // More coerce rules.
-        // Note that not all rules in `comparison_coercion` can be reused here.
-        // For example, all numeric types can be coerced into Utf8 for 
comparison,
-        // but not for function arguments.
-        _ => comparison_binary_numeric_coercion(type_into, type_from).and_then(

Review Comment:
   cc @viirya 



##########
datafusion/expr/src/signature.rs:
##########
@@ -92,14 +92,22 @@ pub enum TypeSignature {
     /// A function such as `concat` is `Variadic(vec![DataType::Utf8, 
DataType::LargeUtf8])`
     Variadic(Vec<DataType>),
     /// One or more arguments of an arbitrary but equal type.
-    /// DataFusion attempts to coerce all argument types to match the first 
argument's type
+    /// DataFusion attempts to coerce all argument types to match to the 
common type with comparision coercion.
     ///
     /// # Examples
     /// Given types in signature should be coercible to the same final type.
     /// A function such as `make_array` is `VariadicEqual`.
     ///
     /// `make_array(i32, i64) -> make_array(i64, i64)`
     VariadicEqual,
+    /// One or more arguments of an arbitrary but equal type or Null.
+    /// Non-comparison coercion is attempted to match the signatures.
+    ///
+    /// Functions like `coalesce` is `VariadicEqual`.

Review Comment:
   I am a little confused about what "Non-comparison coercion" means in this 
situation. Specifically how comparison coercion and non comparision coercion 
differ 🤔 
   
   Does non comparison coercion mean "type union resolution" (aka 
`type_union_resolution`)? 
   
   Also, I think `coalesce` is now   `VariadicEqualOrNull` (this says the 
opposite)
   
   ```suggestion
       /// One or more arguments of an arbitrary but equal type or Null.
       /// Non-comparison coercion is attempted to match the signatures.
       ///
       /// Functions like `coalesce` is `VariadicEqualOrNull`.
   ```
   
   



##########
datafusion/sqllogictest/test_files/functions.slt:
##########
@@ -1158,3 +1158,347 @@ drop table uuid_table
 
 statement ok
 drop table t
+
+
+# Test coalesce function
+query I
+select coalesce(1, 2, 3);
+----
+1
+
+# test with first null
+query ?T
+select coalesce(null, 3, 2, 1), arrow_typeof(coalesce(null, 3, 2, 1));
+----
+3 Int64
+
+# test with null values
+query ?
+select coalesce(null, null);
+----
+NULL
+
+# cast to float
+query IT
+select 
+  coalesce(1, 2.0),
+  arrow_typeof(coalesce(1, 2.0))
+;
+----
+1 Float64
+
+query RT
+select 
+  coalesce(2.0, 1),
+  arrow_typeof(coalesce(2.0, 1))
+;
+----
+2 Float64
+
+query IT
+select 
+  coalesce(1, arrow_cast(2.0, 'Float32')),
+  arrow_typeof(coalesce(1, arrow_cast(2.0, 'Float32')))
+;
+----
+1 Float32
+
+# test with empty args
+query error
+select coalesce();
+
+# test with different types
+query I
+select coalesce(arrow_cast(1, 'Int32'), arrow_cast(1, 'Int64'));
+----
+1
+
+# test with nulls
+query ?T
+select coalesce(null, null, null), arrow_typeof(coalesce(null, null));
+----
+NULL Null
+
+# i32 and u32, cast to wider type i64
+query IT
+select 
+  coalesce(arrow_cast(2, 'Int32'), arrow_cast(3, 'UInt32')),
+  arrow_typeof(coalesce(arrow_cast(2, 'Int32'), arrow_cast(3, 'UInt32')));
+----
+2 Int64
+
+query IT
+select
+  coalesce(arrow_cast(2, 'Int16'), arrow_cast(3, 'UInt16')),
+  arrow_typeof(coalesce(arrow_cast(2, 'Int16'), arrow_cast(3, 'UInt16')));
+----
+2 Int32
+
+query IT
+select
+  coalesce(arrow_cast(2, 'Int8'), arrow_cast(3, 'UInt8')),
+  arrow_typeof(coalesce(arrow_cast(2, 'Int8'), arrow_cast(3, 'UInt8')));
+----
+2 Int16
+
+# i64 and u32, cast to wider type i64
+query IT
+select 
+  coalesce(2, arrow_cast(3, 'UInt32')),
+  arrow_typeof(coalesce(2, arrow_cast(3, 'UInt32')));
+----
+2 Int64
+
+# TODO: Got types (i64, u64), casting to decimal or double or even i128 if 
supported
+query IT
+select
+  coalesce(2, arrow_cast(3, 'UInt64')),
+  arrow_typeof(coalesce(2, arrow_cast(3, 'UInt64')));
+----
+2 Int64
+
+statement ok
+create table t1 (a bigint, b int, c int) as values (null, null, 1), (null, 2, 
null);
+
+# Follow Postgres and DuckDB behavior, since a is bigint, although the value 
is null, all args are coerced to bigint
+query IT
+select 
+  coalesce(a, b, c), 
+  arrow_typeof(coalesce(a, b, c))
+from t1;
+----
+1 Int64
+2 Int64
+
+# b, c has the same type int, so the result is int
+query IT
+select 
+  coalesce(b, c),
+  arrow_typeof(coalesce(b, c))
+from t1;
+----
+1 Int32
+2 Int32
+
+statement ok
+drop table t1;
+
+# test multi rows
+statement ok
+CREATE TABLE t1(
+  c1 int,
+  c2 int
+) as VALUES
+(1, 2),
+(NULL, 2),
+(1, NULL),
+(NULL, NULL);
+
+query I
+SELECT COALESCE(c1, c2) FROM t1
+----
+1
+2
+1
+NULL
+
+statement ok
+drop table t1;
+
+# Decimal128(7, 2) and int64 are coerced to common wider type Decimal128(22, 2)
+query RT
+select 
+  coalesce(arrow_cast(2, 'Decimal128(7, 2)'), 0),
+  arrow_typeof(coalesce(arrow_cast(2, 'Decimal128(7, 2)'), 0))
+----
+2 Decimal128(22, 2)
+
+query RT
+select 
+  coalesce(arrow_cast(2, 'Decimal256(7, 2)'), 0),
+  arrow_typeof(coalesce(arrow_cast(2, 'Decimal256(7, 2)'), 0));
+----
+2 Decimal256(22, 2)
+
+# coalesce string
+query T?
+select 
+  coalesce('', 'test'),
+  coalesce(null, 'test');
+----
+(empty) test
+
+# coalesce utf8 and large utf8
+query TT
+select 
+  coalesce('a', arrow_cast('b', 'LargeUtf8')),
+  arrow_typeof(coalesce('a', arrow_cast('b', 'LargeUtf8')))
+;
+----
+a LargeUtf8
+
+# coalesce array
+query ?T
+select 
+  coalesce(array[1, 2], array[3, 4]),
+  arrow_typeof(coalesce(array[1, 2], array[3, 4]));
+----
+[1, 2] List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 
0, dict_is_ordered: false, metadata: {} })
+
+query ?T
+select 
+  coalesce(null, array[3, 4]),
+  arrow_typeof(coalesce(array[1, 2], array[3, 4]));
+----
+[3, 4] List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 
0, dict_is_ordered: false, metadata: {} })
+
+# TODO: after switch signature of array to the same with coalesce, this query 
should be fixed
+query error
+select 
+  coalesce(array[1, 2], array[arrow_cast(3, 'Int32'), arrow_cast(4, 'Int32')]),
+  arrow_typeof(coalesce(array[1, 2], array[arrow_cast(3, 'Int32'), 
arrow_cast(4, 'Int32')]));
+
+statement ok
+create table test1 as values (arrow_cast('foo', 'Dictionary(Int32, Utf8)')), 
(null);
+
+# Dictionary and String are not coercible
+query error
+select coalesce(column1, 'none_set') from test1;
+
+query error
+select coalesce(null, column1, 'none_set') from test1;
+
+# explicitly cast to Utf8
+query T
+select coalesce(arrow_cast(column1, 'Utf8'), 'none_set') from test1;
+----
+foo
+none_set
+
+statement ok
+drop table test1
+
+# Numeric and Dictionary are not coercible

Review Comment:
   Likewise, this also works on main: 
   ```
   > select coalesce(34, arrow_cast(123, 'Dictionary(Int32, Int8)'));
   
+----------------------------------------------------------------------------+
   | coalesce(Int64(34),arrow_cast(Int64(123),Utf8("Dictionary(Int32, Int8)"))) 
|
   
+----------------------------------------------------------------------------+
   | 34                                                                         
|
   
+----------------------------------------------------------------------------+
   1 row(s) fetched.
   Elapsed 0.001 seconds.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to