This is an automated email from the ASF dual-hosted git repository.

jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 6686e034dd Use LogicalType for TypeSignature `Numeric` and `String`, 
`Coercible` (#13240)
6686e034dd is described below

commit 6686e034dd1008fc7303c482cf664402eca25d67
Author: Jay Zhan <[email protected]>
AuthorDate: Thu Nov 7 07:56:34 2024 +0800

    Use LogicalType for TypeSignature `Numeric` and `String`, `Coercible` 
(#13240)
    
    * use logical type for signature
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * fmt & clippy
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * numeric
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * fix numeric
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * deprecate coercible
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * introduce numeric and numeric string
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * fix doc
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * cleanup
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * add back coercible
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * rename
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * fmt
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * rm numeric string signature
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * typo
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * improve doc and err msg
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    ---------
    
    Signed-off-by: jayzhan211 <[email protected]>
---
 datafusion/common/src/types/logical.rs             |   6 +
 datafusion/common/src/types/native.rs              |  45 ++++++-
 datafusion/expr-common/src/signature.rs            |  16 ++-
 datafusion/expr/src/type_coercion/functions.rs     | 149 +++++++++++++--------
 datafusion/functions-aggregate/src/first_last.rs   |  24 +---
 datafusion/functions-aggregate/src/stddev.rs       |   5 +-
 datafusion/functions-aggregate/src/variance.rs     |   5 +-
 datafusion/functions/src/math/abs.rs               |   2 +-
 datafusion/functions/src/string/bit_length.rs      |   2 +-
 datafusion/functions/src/string/concat.rs          |   2 +-
 datafusion/functions/src/string/concat_ws.rs       |   2 +-
 datafusion/functions/src/string/lower.rs           |   2 +-
 datafusion/functions/src/string/octet_length.rs    |   2 +-
 datafusion/functions/src/string/repeat.rs          |  16 +--
 datafusion/functions/src/string/upper.rs           |   2 +-
 .../functions/src/unicode/character_length.rs      |   2 +-
 datafusion/functions/src/unicode/lpad.rs           |   2 +-
 datafusion/sqllogictest/test_files/expr.slt        |  10 +-
 datafusion/sqllogictest/test_files/math.slt        |   9 +-
 datafusion/sqllogictest/test_files/scalar.slt      |   6 +-
 datafusion/sqllogictest/test_files/timestamps.slt  |   7 +
 21 files changed, 199 insertions(+), 117 deletions(-)

diff --git a/datafusion/common/src/types/logical.rs 
b/datafusion/common/src/types/logical.rs
index bde393992a..a65392cae3 100644
--- a/datafusion/common/src/types/logical.rs
+++ b/datafusion/common/src/types/logical.rs
@@ -98,6 +98,12 @@ impl fmt::Debug for dyn LogicalType {
     }
 }
 
+impl std::fmt::Display for dyn LogicalType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{self:?}")
+    }
+}
+
 impl PartialEq for dyn LogicalType {
     fn eq(&self, other: &Self) -> bool {
         self.signature().eq(&other.signature())
diff --git a/datafusion/common/src/types/native.rs 
b/datafusion/common/src/types/native.rs
index bfb546783e..7e326dc15b 100644
--- a/datafusion/common/src/types/native.rs
+++ b/datafusion/common/src/types/native.rs
@@ -24,7 +24,7 @@ use arrow::compute::can_cast_types;
 use arrow_schema::{
     DataType, Field, FieldRef, Fields, IntervalUnit, TimeUnit, UnionFields,
 };
-use std::sync::Arc;
+use std::{fmt::Display, sync::Arc};
 
 /// Representation of a type that DataFusion can handle natively. It is a 
subset
 /// of the physical variants in Arrow's native [`DataType`].
@@ -183,6 +183,12 @@ pub enum NativeType {
     Map(LogicalFieldRef),
 }
 
+impl Display for NativeType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "NativeType::{self:?}")
+    }
+}
+
 impl LogicalType for NativeType {
     fn native(&self) -> &NativeType {
         self
@@ -348,6 +354,12 @@ impl LogicalType for NativeType {
 // mapping solutions to provide backwards compatibility while transitioning 
from
 // the purely physical system to a logical / physical system.
 
+impl From<&DataType> for NativeType {
+    fn from(value: &DataType) -> Self {
+        value.clone().into()
+    }
+}
+
 impl From<DataType> for NativeType {
     fn from(value: DataType) -> Self {
         use NativeType::*;
@@ -392,8 +404,33 @@ impl From<DataType> for NativeType {
     }
 }
 
-impl From<&DataType> for NativeType {
-    fn from(value: &DataType) -> Self {
-        value.clone().into()
+impl NativeType {
+    #[inline]
+    pub fn is_numeric(&self) -> bool {
+        use NativeType::*;
+        matches!(
+            self,
+            UInt8
+                | UInt16
+                | UInt32
+                | UInt64
+                | Int8
+                | Int16
+                | Int32
+                | Int64
+                | Float16
+                | Float32
+                | Float64
+                | Decimal(_, _)
+        )
+    }
+
+    #[inline]
+    pub fn is_integer(&self) -> bool {
+        use NativeType::*;
+        matches!(
+            self,
+            UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64
+        )
     }
 }
diff --git a/datafusion/expr-common/src/signature.rs 
b/datafusion/expr-common/src/signature.rs
index 24cb54f634..6e78f31e6a 100644
--- a/datafusion/expr-common/src/signature.rs
+++ b/datafusion/expr-common/src/signature.rs
@@ -19,6 +19,7 @@
 //! and return types of functions in DataFusion.
 
 use arrow::datatypes::DataType;
+use datafusion_common::types::LogicalTypeRef;
 
 /// Constant that is used as a placeholder for any valid timezone.
 /// This is used where a function can accept a timestamp type with any
@@ -106,10 +107,10 @@ pub enum TypeSignature {
     /// Exact number of arguments of an exact type
     Exact(Vec<DataType>),
     /// The number of arguments that can be coerced to in order
-    /// For example, `Coercible(vec![DataType::Float64])` accepts
+    /// For example, `Coercible(vec![logical_float64()])` accepts
     /// arguments like `vec![DataType::Int32]` or `vec![DataType::Float32]`
     /// since i32 and f32 can be casted to f64
-    Coercible(Vec<DataType>),
+    Coercible(Vec<LogicalTypeRef>),
     /// Fixed number of arguments of arbitrary types
     /// If a function takes 0 argument, its `TypeSignature` should be `Any(0)`
     Any(usize),
@@ -123,7 +124,9 @@ pub enum TypeSignature {
     /// Specifies Signatures for array functions
     ArraySignature(ArrayFunctionSignature),
     /// Fixed number of arguments of numeric types.
-    /// See 
<https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html#method.is_numeric>
 to know which type is considered numeric
+    /// See [`NativeType::is_numeric`] to know which type is considered numeric
+    ///
+    /// [`NativeType::is_numeric`]: datafusion_common
     Numeric(usize),
     /// Fixed number of arguments of all the same string types.
     /// The precedence of type from high to low is Utf8View, LargeUtf8 and 
Utf8.
@@ -201,7 +204,10 @@ impl TypeSignature {
             TypeSignature::Numeric(num) => {
                 vec![format!("Numeric({num})")]
             }
-            TypeSignature::Exact(types) | TypeSignature::Coercible(types) => {
+            TypeSignature::Coercible(types) => {
+                vec![Self::join_types(types, ", ")]
+            }
+            TypeSignature::Exact(types) => {
                 vec![Self::join_types(types, ", ")]
             }
             TypeSignature::Any(arg_count) => {
@@ -322,7 +328,7 @@ impl Signature {
         }
     }
     /// Target coerce types in order
-    pub fn coercible(target_types: Vec<DataType>, volatility: Volatility) -> 
Self {
+    pub fn coercible(target_types: Vec<LogicalTypeRef>, volatility: 
Volatility) -> Self {
         Self {
             type_signature: TypeSignature::Coercible(target_types),
             volatility,
diff --git a/datafusion/expr/src/type_coercion/functions.rs 
b/datafusion/expr/src/type_coercion/functions.rs
index 85f8e20ba4..5a4d89a0b2 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -23,6 +23,7 @@ use arrow::{
 };
 use datafusion_common::{
     exec_err, internal_datafusion_err, internal_err, plan_err,
+    types::{LogicalType, NativeType},
     utils::{coerced_fixed_size_list_to_list, list_ndims},
     Result,
 };
@@ -395,40 +396,56 @@ fn get_valid_types(
         }
     }
 
+    fn function_length_check(length: usize, expected_length: usize) -> 
Result<()> {
+        if length < 1 {
+            return plan_err!(
+                "The signature expected at least one argument but received 
{expected_length}"
+            );
+        }
+
+        if length != expected_length {
+            return plan_err!(
+                "The signature expected {length} arguments but received 
{expected_length}"
+            );
+        }
+
+        Ok(())
+    }
+
     let valid_types = match signature {
         TypeSignature::Variadic(valid_types) => valid_types
             .iter()
             .map(|valid_type| current_types.iter().map(|_| 
valid_type.clone()).collect())
             .collect(),
         TypeSignature::String(number) => {
-            if *number < 1 {
-                return plan_err!(
-                    "The signature expected at least one argument but received 
{}",
-                    current_types.len()
-                );
-            }
-            if *number != current_types.len() {
-                return plan_err!(
-                    "The signature expected {} arguments but received {}",
-                    number,
-                    current_types.len()
-                );
+            function_length_check(current_types.len(), *number)?;
+
+            let mut new_types = Vec::with_capacity(current_types.len());
+            for data_type in current_types.iter() {
+                let logical_data_type: NativeType = data_type.into();
+                if logical_data_type == NativeType::String {
+                    new_types.push(data_type.to_owned());
+                } else if logical_data_type == NativeType::Null {
+                    // TODO: Switch to Utf8View if all the string functions 
supports Utf8View
+                    new_types.push(DataType::Utf8);
+                } else {
+                    return plan_err!(
+                        "The signature expected NativeType::String but 
received {logical_data_type}"
+                    );
+                }
             }
 
-            fn coercion_rule(
+            // Find the common string type for the given types
+            fn find_common_type(
                 lhs_type: &DataType,
                 rhs_type: &DataType,
             ) -> Result<DataType> {
                 match (lhs_type, rhs_type) {
-                    (DataType::Null, DataType::Null) => Ok(DataType::Utf8),
-                    (DataType::Null, data_type) | (data_type, DataType::Null) 
=> {
-                        coercion_rule(data_type, &DataType::Utf8)
-                    }
                     (DataType::Dictionary(_, lhs), DataType::Dictionary(_, 
rhs)) => {
-                        coercion_rule(lhs, rhs)
+                        find_common_type(lhs, rhs)
                     }
                     (DataType::Dictionary(_, v), other)
-                    | (other, DataType::Dictionary(_, v)) => coercion_rule(v, 
other),
+                    | (other, DataType::Dictionary(_, v)) => 
find_common_type(v, other),
                     _ => {
                         if let Some(coerced_type) = string_coercion(lhs_type, 
rhs_type) {
                             Ok(coerced_type)
@@ -444,15 +461,13 @@ fn get_valid_types(
             }
 
             // Length checked above, safe to unwrap
-            let mut coerced_type = current_types.first().unwrap().to_owned();
-            for t in current_types.iter().skip(1) {
-                coerced_type = coercion_rule(&coerced_type, t)?;
+            let mut coerced_type = new_types.first().unwrap().to_owned();
+            for t in new_types.iter().skip(1) {
+                coerced_type = find_common_type(&coerced_type, t)?;
             }
 
             fn base_type_or_default_type(data_type: &DataType) -> DataType {
-                if data_type.is_null() {
-                    DataType::Utf8
-                } else if let DataType::Dictionary(_, v) = data_type {
+                if let DataType::Dictionary(_, v) = data_type {
                     base_type_or_default_type(v)
                 } else {
                     data_type.to_owned()
@@ -462,22 +477,22 @@ fn get_valid_types(
             vec![vec![base_type_or_default_type(&coerced_type); *number]]
         }
         TypeSignature::Numeric(number) => {
-            if *number < 1 {
-                return plan_err!(
-                    "The signature expected at least one argument but received 
{}",
-                    current_types.len()
-                );
-            }
-            if *number != current_types.len() {
-                return plan_err!(
-                    "The signature expected {} arguments but received {}",
-                    number,
-                    current_types.len()
-                );
-            }
+            function_length_check(current_types.len(), *number)?;
 
-            let mut valid_type = current_types.first().unwrap().clone();
+            // Find common numeric type amongs given types except string
+            let mut valid_type = current_types.first().unwrap().to_owned();
             for t in current_types.iter().skip(1) {
+                let logical_data_type: NativeType = t.into();
+                if logical_data_type == NativeType::Null {
+                    continue;
+                }
+
+                if !logical_data_type.is_numeric() {
+                    return plan_err!(
+                        "The signature expected NativeType::Numeric but 
received {logical_data_type}"
+                    );
+                }
+
                 if let Some(coerced_type) = 
binary_numeric_coercion(&valid_type, t) {
                     valid_type = coerced_type;
                 } else {
@@ -489,31 +504,55 @@ fn get_valid_types(
                 }
             }
 
+            let logical_data_type: NativeType = valid_type.clone().into();
+            // Fallback to default type if we don't know which type to coerced 
to
+            // f64 is chosen since most of the math functions utilize 
Signature::numeric,
+            // and their default type is double precision
+            if logical_data_type == NativeType::Null {
+                valid_type = DataType::Float64;
+            }
+
             vec![vec![valid_type; *number]]
         }
         TypeSignature::Coercible(target_types) => {
-            if target_types.is_empty() {
-                return plan_err!(
-                    "The signature expected at least one argument but received 
{}",
-                    current_types.len()
-                );
-            }
-            if target_types.len() != current_types.len() {
-                return plan_err!(
-                    "The signature expected {} arguments but received {}",
-                    target_types.len(),
-                    current_types.len()
-                );
+            function_length_check(current_types.len(), target_types.len())?;
+
+            // Aim to keep this logic as SIMPLE as possible!
+            // Make sure the corresponding test is covered
+            // If this function becomes COMPLEX, create another new signature!
+            fn can_coerce_to(
+                logical_type: &NativeType,
+                target_type: &NativeType,
+            ) -> bool {
+                if logical_type == target_type {
+                    return true;
+                }
+
+                if logical_type == &NativeType::Null {
+                    return true;
+                }
+
+                if target_type.is_integer() && logical_type.is_integer() {
+                    return true;
+                }
+
+                false
             }
 
-            for (data_type, target_type) in 
current_types.iter().zip(target_types.iter())
+            let mut new_types = Vec::with_capacity(current_types.len());
+            for (current_type, target_type) in
+                current_types.iter().zip(target_types.iter())
             {
-                if !can_cast_types(data_type, target_type) {
-                    return plan_err!("{data_type} is not coercible to 
{target_type}");
+                let logical_type: NativeType = current_type.into();
+                let target_logical_type = target_type.native();
+                if can_coerce_to(&logical_type, target_logical_type) {
+                    let target_type =
+                        target_logical_type.default_cast_for(current_type)?;
+                    new_types.push(target_type);
                 }
             }
 
-            vec![target_types.to_owned()]
+            vec![new_types]
         }
         TypeSignature::Uniform(number, valid_types) => valid_types
             .iter()
diff --git a/datafusion/functions-aggregate/src/first_last.rs 
b/datafusion/functions-aggregate/src/first_last.rs
index 3ca1422668..493c2cde2b 100644
--- a/datafusion/functions-aggregate/src/first_last.rs
+++ b/datafusion/functions-aggregate/src/first_last.rs
@@ -33,8 +33,8 @@ use 
datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::{format_state_name, AggregateOrderSensitivity};
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, ArrayFunctionSignature, Documentation, Expr,
-    ExprFunctionExt, Signature, SortExpr, TypeSignature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, Expr, ExprFunctionExt, 
Signature,
+    SortExpr, Volatility,
 };
 use datafusion_functions_aggregate_common::utils::get_sort_options;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
@@ -79,15 +79,7 @@ impl Default for FirstValue {
 impl FirstValue {
     pub fn new() -> Self {
         Self {
-            signature: Signature::one_of(
-                vec![
-                    // TODO: we can introduce more strict signature that only 
numeric of array types are allowed
-                    
TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
-                    TypeSignature::Numeric(1),
-                    TypeSignature::Uniform(1, vec![DataType::Utf8]),
-                ],
-                Volatility::Immutable,
-            ),
+            signature: Signature::any(1, Volatility::Immutable),
             requirement_satisfied: false,
         }
     }
@@ -406,15 +398,7 @@ impl Default for LastValue {
 impl LastValue {
     pub fn new() -> Self {
         Self {
-            signature: Signature::one_of(
-                vec![
-                    // TODO: we can introduce more strict signature that only 
numeric of array types are allowed
-                    
TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
-                    TypeSignature::Numeric(1),
-                    TypeSignature::Uniform(1, vec![DataType::Utf8]),
-                ],
-                Volatility::Immutable,
-            ),
+            signature: Signature::any(1, Volatility::Immutable),
             requirement_satisfied: false,
         }
     }
diff --git a/datafusion/functions-aggregate/src/stddev.rs 
b/datafusion/functions-aggregate/src/stddev.rs
index b785d8e985..d1f43c6661 100644
--- a/datafusion/functions-aggregate/src/stddev.rs
+++ b/datafusion/functions-aggregate/src/stddev.rs
@@ -71,10 +71,7 @@ impl Stddev {
     /// Create a new STDDEV aggregate function
     pub fn new() -> Self {
         Self {
-            signature: Signature::coercible(
-                vec![DataType::Float64],
-                Volatility::Immutable,
-            ),
+            signature: Signature::numeric(1, Volatility::Immutable),
             alias: vec!["stddev_samp".to_string()],
         }
     }
diff --git a/datafusion/functions-aggregate/src/variance.rs 
b/datafusion/functions-aggregate/src/variance.rs
index 0090886109..8daa85a5cc 100644
--- a/datafusion/functions-aggregate/src/variance.rs
+++ b/datafusion/functions-aggregate/src/variance.rs
@@ -82,10 +82,7 @@ impl VarianceSample {
     pub fn new() -> Self {
         Self {
             aliases: vec![String::from("var_sample"), 
String::from("var_samp")],
-            signature: Signature::coercible(
-                vec![DataType::Float64],
-                Volatility::Immutable,
-            ),
+            signature: Signature::numeric(1, Volatility::Immutable),
         }
     }
 }
diff --git a/datafusion/functions/src/math/abs.rs 
b/datafusion/functions/src/math/abs.rs
index 5511a57d85..798939162a 100644
--- a/datafusion/functions/src/math/abs.rs
+++ b/datafusion/functions/src/math/abs.rs
@@ -117,7 +117,7 @@ impl Default for AbsFunc {
 impl AbsFunc {
     pub fn new() -> Self {
         Self {
-            signature: Signature::any(1, Volatility::Immutable),
+            signature: Signature::numeric(1, Volatility::Immutable),
         }
     }
 }
diff --git a/datafusion/functions/src/string/bit_length.rs 
b/datafusion/functions/src/string/bit_length.rs
index 25b56341fc..d02c2b6a65 100644
--- a/datafusion/functions/src/string/bit_length.rs
+++ b/datafusion/functions/src/string/bit_length.rs
@@ -79,7 +79,7 @@ impl ScalarUDFImpl for BitLengthFunc {
                 ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
                     ScalarValue::Int64(v.as_ref().map(|x| (x.len() * 8) as 
i64)),
                 )),
-                _ => unreachable!(),
+                _ => unreachable!("bit length"),
             },
         }
     }
diff --git a/datafusion/functions/src/string/concat.rs 
b/datafusion/functions/src/string/concat.rs
index e3834b2918..e429a938b2 100644
--- a/datafusion/functions/src/string/concat.rs
+++ b/datafusion/functions/src/string/concat.rs
@@ -186,7 +186,7 @@ impl ScalarUDFImpl for ConcatFunc {
                         }
                     };
                 }
-                _ => unreachable!(),
+                _ => unreachable!("concat"),
             }
         }
 
diff --git a/datafusion/functions/src/string/concat_ws.rs 
b/datafusion/functions/src/string/concat_ws.rs
index 811939c169..611c48a963 100644
--- a/datafusion/functions/src/string/concat_ws.rs
+++ b/datafusion/functions/src/string/concat_ws.rs
@@ -164,7 +164,7 @@ impl ScalarUDFImpl for ConcatWsFunc {
                     ColumnarValueRef::NonNullableArray(string_array)
                 }
             }
-            _ => unreachable!(),
+            _ => unreachable!("concat ws"),
         };
 
         let mut columns = Vec::with_capacity(args.len() - 1);
diff --git a/datafusion/functions/src/string/lower.rs 
b/datafusion/functions/src/string/lower.rs
index ef56120c58..02770e5e22 100644
--- a/datafusion/functions/src/string/lower.rs
+++ b/datafusion/functions/src/string/lower.rs
@@ -108,7 +108,7 @@ mod tests {
         #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
         let result = match func.invoke(&args)? {
             ColumnarValue::Array(result) => result,
-            _ => unreachable!(),
+            _ => unreachable!("lower"),
         };
         assert_eq!(&expected, &result);
         Ok(())
diff --git a/datafusion/functions/src/string/octet_length.rs 
b/datafusion/functions/src/string/octet_length.rs
index 2ac2bf70da..89f71d4571 100644
--- a/datafusion/functions/src/string/octet_length.rs
+++ b/datafusion/functions/src/string/octet_length.rs
@@ -82,7 +82,7 @@ impl ScalarUDFImpl for OctetLengthFunc {
                 ScalarValue::Utf8View(v) => Ok(ColumnarValue::Scalar(
                     ScalarValue::Int32(v.as_ref().map(|x| x.len() as i32)),
                 )),
-                _ => unreachable!(),
+                _ => unreachable!("OctetLengthFunc"),
             },
         }
     }
diff --git a/datafusion/functions/src/string/repeat.rs 
b/datafusion/functions/src/string/repeat.rs
index aa69f9c660..249ce15d6d 100644
--- a/datafusion/functions/src/string/repeat.rs
+++ b/datafusion/functions/src/string/repeat.rs
@@ -25,11 +25,12 @@ use arrow::array::{
     OffsetSizeTrait, StringViewArray,
 };
 use arrow::datatypes::DataType;
-use arrow::datatypes::DataType::{Int64, LargeUtf8, Utf8, Utf8View};
+use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View};
 use datafusion_common::cast::as_int64_array;
+use datafusion_common::types::{logical_int64, logical_string};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
 
 #[derive(Debug)]
@@ -46,15 +47,8 @@ impl Default for RepeatFunc {
 impl RepeatFunc {
     pub fn new() -> Self {
         Self {
-            signature: Signature::one_of(
-                vec![
-                    // Planner attempts coercion to the target type starting 
with the most preferred candidate.
-                    // For example, given input `(Utf8View, Int64)`, it first 
tries coercing to `(Utf8View, Int64)`.
-                    // If that fails, it proceeds to `(Utf8, Int64)`.
-                    TypeSignature::Exact(vec![Utf8View, Int64]),
-                    TypeSignature::Exact(vec![Utf8, Int64]),
-                    TypeSignature::Exact(vec![LargeUtf8, Int64]),
-                ],
+            signature: Signature::coercible(
+                vec![logical_string(), logical_int64()],
                 Volatility::Immutable,
             ),
         }
diff --git a/datafusion/functions/src/string/upper.rs 
b/datafusion/functions/src/string/upper.rs
index 68a9d60a16..1293e51fa9 100644
--- a/datafusion/functions/src/string/upper.rs
+++ b/datafusion/functions/src/string/upper.rs
@@ -108,7 +108,7 @@ mod tests {
         #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
         let result = match func.invoke(&args)? {
             ColumnarValue::Array(result) => result,
-            _ => unreachable!(),
+            _ => unreachable!("upper"),
         };
         assert_eq!(&expected, &result);
         Ok(())
diff --git a/datafusion/functions/src/unicode/character_length.rs 
b/datafusion/functions/src/unicode/character_length.rs
index 7858a59664..eca8d3fd49 100644
--- a/datafusion/functions/src/unicode/character_length.rs
+++ b/datafusion/functions/src/unicode/character_length.rs
@@ -128,7 +128,7 @@ fn character_length(args: &[ArrayRef]) -> Result<ArrayRef> {
             let string_array = args[0].as_string_view();
             character_length_general::<Int32Type, _>(string_array)
         }
-        _ => unreachable!(),
+        _ => unreachable!("CharacterLengthFunc"),
     }
 }
 
diff --git a/datafusion/functions/src/unicode/lpad.rs 
b/datafusion/functions/src/unicode/lpad.rs
index 767eda203c..a639bcedcd 100644
--- a/datafusion/functions/src/unicode/lpad.rs
+++ b/datafusion/functions/src/unicode/lpad.rs
@@ -162,7 +162,7 @@ pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> 
Result<ArrayRef> {
             length_array,
             &args[2],
         ),
-        (_, _) => unreachable!(),
+        (_, _) => unreachable!("lpad"),
     }
 }
 
diff --git a/datafusion/sqllogictest/test_files/expr.slt 
b/datafusion/sqllogictest/test_files/expr.slt
index 182afff7a6..c653113fd4 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -22,7 +22,7 @@ SELECT true, false, false = false, true = false
 true false true false
 
 # test_mathematical_expressions_with_null
-query RRRRRRRRRRRRRRRRRR?RRRRRIIIRRRRRRBB
+query RRRRRRRRRRRRRRRRRRRRRRRRIIIRRRRRRBB
 SELECT
     sqrt(NULL),
     cbrt(NULL),
@@ -550,6 +550,14 @@ SELECT repeat(NULL, 4)
 ----
 NULL
 
+query T
+select repeat('-1.2', arrow_cast(3, 'Int32'));
+----
+-1.2-1.2-1.2
+
+query error DataFusion error: Error during planning: Error during planning: 
Coercion from \[Utf8, Float64\] to the signature
+select repeat('-1.2', 3.2);
+
 query T
 SELECT replace('abcdefabcdef', 'cd', 'XX')
 ----
diff --git a/datafusion/sqllogictest/test_files/math.slt 
b/datafusion/sqllogictest/test_files/math.slt
index 1bc972a3e3..e86d78a623 100644
--- a/datafusion/sqllogictest/test_files/math.slt
+++ b/datafusion/sqllogictest/test_files/math.slt
@@ -126,9 +126,16 @@ statement error
 SELECT abs(1, 2);
 
 # abs: unsupported argument type
-query error DataFusion error: This feature is not implemented: Unsupported 
data type Utf8 for function abs
+query error This feature is not implemented: Unsupported data type Utf8 for 
function abs
 SELECT abs('foo');
 
+# abs: numeric string
+# TODO: In Postgres, '-1.2' is unknown type and interpreted to float8 so they 
don't fail on this query
+query error DataFusion error: This feature is not implemented: Unsupported 
data type Utf8 for function abs
+select abs('-1.2');
+
+query error DataFusion error: This feature is not implemented: Unsupported 
data type Utf8 for function abs
+select abs(arrow_cast('-1.2', 'Utf8'));
 
 statement ok
 CREATE TABLE test_nullable_integer(
diff --git a/datafusion/sqllogictest/test_files/scalar.slt 
b/datafusion/sqllogictest/test_files/scalar.slt
index 145172f31f..fe7d1a90c5 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -67,13 +67,13 @@ CREATE TABLE small_floats(
 ## abs
 
 # abs scalar function
-query III rowsort
+query III
 select abs(64), abs(0), abs(-64);
 ----
 64 0 64
 
 # abs scalar nulls
-query ? rowsort
+query R
 select abs(null);
 ----
 NULL
@@ -1940,7 +1940,7 @@ select position('' in '')
 ----
 1
 
-query error DataFusion error: Error during planning: Error during planning: 
Int64 and Int64 are not coercible to a common string
+query error DataFusion error: Error during planning: Error during planning: 
The signature expected NativeType::String but received NativeType::Int64
 select position(1 in 1)
 
 query I
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt 
b/datafusion/sqllogictest/test_files/timestamps.slt
index 70f7dedeac..a80036df2c 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -3351,3 +3351,10 @@ drop view t_utc;
 
 statement ok
 drop view t_europe;
+
+# TODO: In Postgres, '-1' is unknown type and interpreted to float8 so they 
don't fail on this query
+query error DataFusion error: Arrow error: Parser error: Error parsing 
timestamp from '\-1': timestamp must contain at least 10 characters
+select to_timestamp('-1');
+
+query error DataFusion error: Arrow error: Parser error: Error parsing 
timestamp from '\-1': timestamp must contain at least 10 characters
+select to_timestamp(arrow_cast('-1', 'Utf8'));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to