This is an automated email from the ASF dual-hosted git repository.

jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 47664df42f Introduce Signature::String and return error if  input of 
`strpos` is integer (#12751)
47664df42f is described below

commit 47664df42f37823ccba9c0a9ff9a122ac840babd
Author: Jay Zhan <[email protected]>
AuthorDate: Tue Oct 8 15:05:42 2024 +0800

    Introduce Signature::String and return error if  input of `strpos` is 
integer (#12751)
    
    * fix sig
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * fix
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * fix error
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * fix all signature
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * fix all signature
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * change default type
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * clippy
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * fix docs
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * rm deadcode
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * cleanup
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * cleanup
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * rm test
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    ---------
    
    Signed-off-by: jayzhan211 <[email protected]>
---
 datafusion/core/tests/expr_api/mod.rs              |   8 +-
 datafusion/expr-common/src/signature.rs            |  18 +-
 datafusion/expr-common/src/type_coercion/binary.rs |   2 +-
 datafusion/expr/src/type_coercion/functions.rs     |  67 +++++++-
 datafusion/functions/src/macros.rs                 |   6 +-
 datafusion/functions/src/math/nans.rs              |   8 +-
 datafusion/functions/src/math/power.rs             |   8 +-
 datafusion/functions/src/regex/regexplike.rs       |  11 +-
 datafusion/functions/src/regex/regexpmatch.rs      |  11 +-
 datafusion/functions/src/regex/regexpreplace.rs    |  10 +-
 datafusion/functions/src/string/ascii.rs           |   7 +-
 datafusion/functions/src/string/bit_length.rs      |   7 +-
 datafusion/functions/src/string/btrim.rs           |  17 +-
 datafusion/functions/src/string/contains.rs        | 184 +--------------------
 datafusion/functions/src/string/ends_with.rs       |  13 +-
 datafusion/functions/src/string/initcap.rs         |   7 +-
 datafusion/functions/src/string/levenshtein.rs     |  10 +-
 datafusion/functions/src/string/lower.rs           |   7 +-
 datafusion/functions/src/string/ltrim.rs           |  14 +-
 datafusion/functions/src/string/octet_length.rs    |   7 +-
 datafusion/functions/src/string/overlay.rs         |  15 +-
 datafusion/functions/src/string/repeat.rs          |  14 +-
 datafusion/functions/src/string/replace.rs         |  11 +-
 datafusion/functions/src/string/rtrim.rs           |  14 +-
 datafusion/functions/src/string/split_part.rs      |  21 ++-
 datafusion/functions/src/string/starts_with.rs     |  13 +-
 datafusion/functions/src/string/upper.rs           |   7 +-
 datafusion/functions/src/unicode/strpos.rs         |  20 +--
 datafusion/sqllogictest/test_files/scalar.slt      |   6 +-
 .../test_files/string/init_data.slt.part           |   1 -
 .../sqllogictest/test_files/string/string_view.slt |  24 ++-
 31 files changed, 184 insertions(+), 384 deletions(-)

diff --git a/datafusion/core/tests/expr_api/mod.rs 
b/datafusion/core/tests/expr_api/mod.rs
index cbd8926721..81a3336100 100644
--- a/datafusion/core/tests/expr_api/mod.rs
+++ b/datafusion/core/tests/expr_api/mod.rs
@@ -37,14 +37,14 @@ mod simplification;
 fn test_octet_length() {
     #[rustfmt::skip]
     evaluate_expr_test(
-        octet_length(col("list")),
+        octet_length(col("id")),
         vec![
             "+------+",
             "| expr |",
             "+------+",
-            "| 5    |",
-            "| 18   |",
-            "| 6    |",
+            "| 1    |",
+            "| 1    |",
+            "| 1    |",
             "+------+",
         ],
     );
diff --git a/datafusion/expr-common/src/signature.rs 
b/datafusion/expr-common/src/signature.rs
index d1553b3315..320e1303a2 100644
--- a/datafusion/expr-common/src/signature.rs
+++ b/datafusion/expr-common/src/signature.rs
@@ -125,6 +125,11 @@ pub enum TypeSignature {
     /// Fixed number of arguments of numeric types.
     /// See 
<https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html#method.is_numeric>
 to know which type is considered numeric
     Numeric(usize),
+    /// Fixed number of arguments of all the same string types.
+    /// The precedence of type from high to low is Utf8View, LargeUtf8 and 
Utf8.
+    /// Null is considerd as Utf8 by default
+    /// Dictionary with string value type is also handled.
+    String(usize),
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
@@ -190,8 +195,11 @@ impl TypeSignature {
                     .collect::<Vec<String>>()
                     .join(", ")]
             }
+            TypeSignature::String(num) => {
+                vec![format!("String({num})")]
+            }
             TypeSignature::Numeric(num) => {
-                vec![format!("Numeric({})", num)]
+                vec![format!("Numeric({num})")]
             }
             TypeSignature::Exact(types) | TypeSignature::Coercible(types) => {
                 vec![Self::join_types(types, ", ")]
@@ -280,6 +288,14 @@ impl Signature {
         }
     }
 
+    /// A specified number of numeric arguments
+    pub fn string(arg_count: usize, volatility: Volatility) -> Self {
+        Self {
+            type_signature: TypeSignature::String(arg_count),
+            volatility,
+        }
+    }
+
     /// An arbitrary number of arguments of any type.
     pub fn variadic_any(volatility: Volatility) -> Self {
         Self {
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs 
b/datafusion/expr-common/src/type_coercion/binary.rs
index e7c4f65a1b..6d66b8b4df 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -959,7 +959,7 @@ fn string_concat_internal_coercion(
 /// based on the observation that StringArray to StringViewArray is cheap but 
not vice versa.
 ///
 /// Between Utf8 and LargeUtf8, we coerce to LargeUtf8.
-fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> 
Option<DataType> {
+pub fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> 
Option<DataType> {
     use arrow::datatypes::DataType::*;
     match (lhs_type, rhs_type) {
         // If Utf8View is in any side, we coerce to Utf8View.
diff --git a/datafusion/expr/src/type_coercion/functions.rs 
b/datafusion/expr/src/type_coercion/functions.rs
index 9000ac2538..143e00fa40 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -26,8 +26,9 @@ use datafusion_common::{
     utils::{coerced_fixed_size_list_to_list, list_ndims},
     Result,
 };
-use datafusion_expr_common::signature::{
-    ArrayFunctionSignature, FIXED_SIZE_LIST_WILDCARD, TIMEZONE_WILDCARD,
+use datafusion_expr_common::{
+    signature::{ArrayFunctionSignature, FIXED_SIZE_LIST_WILDCARD, 
TIMEZONE_WILDCARD},
+    type_coercion::binary::string_coercion,
 };
 use std::sync::Arc;
 
@@ -176,6 +177,7 @@ fn is_well_supported_signature(type_signature: 
&TypeSignature) -> bool {
         type_signature,
         TypeSignature::UserDefined
             | TypeSignature::Numeric(_)
+            | TypeSignature::String(_)
             | TypeSignature::Coercible(_)
             | TypeSignature::Any(_)
     )
@@ -381,6 +383,67 @@ fn get_valid_types(
             .iter()
             .map(|valid_type| current_types.iter().map(|_| 
valid_type.clone()).collect())
             .collect(),
+        TypeSignature::String(number) => {
+            if *number < 1 {
+                return plan_err!(
+                    "The signature expected at least one argument but received 
{}",
+                    current_types.len()
+                );
+            }
+            if *number != current_types.len() {
+                return plan_err!(
+                    "The signature expected {} arguments but received {}",
+                    number,
+                    current_types.len()
+                );
+            }
+
+            fn coercion_rule(
+                lhs_type: &DataType,
+                rhs_type: &DataType,
+            ) -> Result<DataType> {
+                match (lhs_type, rhs_type) {
+                    (DataType::Null, DataType::Null) => Ok(DataType::Utf8),
+                    (DataType::Null, data_type) | (data_type, DataType::Null) 
=> {
+                        coercion_rule(data_type, &DataType::Utf8)
+                    }
+                    (DataType::Dictionary(_, lhs), DataType::Dictionary(_, 
rhs)) => {
+                        coercion_rule(lhs, rhs)
+                    }
+                    (DataType::Dictionary(_, v), other)
+                    | (other, DataType::Dictionary(_, v)) => coercion_rule(v, 
other),
+                    _ => {
+                        if let Some(coerced_type) = string_coercion(lhs_type, 
rhs_type) {
+                            Ok(coerced_type)
+                        } else {
+                            plan_err!(
+                                "{} and {} are not coercible to a common 
string type",
+                                lhs_type,
+                                rhs_type
+                            )
+                        }
+                    }
+                }
+            }
+
+            // Length checked above, safe to unwrap
+            let mut coerced_type = current_types.first().unwrap().to_owned();
+            for t in current_types.iter().skip(1) {
+                coerced_type = coercion_rule(&coerced_type, t)?;
+            }
+
+            fn base_type_or_default_type(data_type: &DataType) -> DataType {
+                if data_type.is_null() {
+                    DataType::Utf8
+                } else if let DataType::Dictionary(_, v) = data_type {
+                    base_type_or_default_type(v)
+                } else {
+                    data_type.to_owned()
+                }
+            }
+
+            vec![vec![base_type_or_default_type(&coerced_type); *number]]
+        }
         TypeSignature::Numeric(number) => {
             if *number < 1 {
                 return plan_err!(
diff --git a/datafusion/functions/src/macros.rs 
b/datafusion/functions/src/macros.rs
index e47818bc86..e850673ef8 100644
--- a/datafusion/functions/src/macros.rs
+++ b/datafusion/functions/src/macros.rs
@@ -284,7 +284,7 @@ macro_rules! make_math_binary_udf {
             use arrow::datatypes::DataType;
             use datafusion_common::{exec_err, DataFusionError, Result};
             use datafusion_expr::sort_properties::{ExprProperties, 
SortProperties};
-            use datafusion_expr::TypeSignature::*;
+            use datafusion_expr::TypeSignature;
             use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, 
Volatility};
 
             #[derive(Debug)]
@@ -298,8 +298,8 @@ macro_rules! make_math_binary_udf {
                     Self {
                         signature: Signature::one_of(
                             vec![
-                                Exact(vec![Float32, Float32]),
-                                Exact(vec![Float64, Float64]),
+                                TypeSignature::Exact(vec![Float32, Float32]),
+                                TypeSignature::Exact(vec![Float64, Float64]),
                             ],
                             Volatility::Immutable,
                         ),
diff --git a/datafusion/functions/src/math/nans.rs 
b/datafusion/functions/src/math/nans.rs
index 2bd704a7de..b02839b40b 100644
--- a/datafusion/functions/src/math/nans.rs
+++ b/datafusion/functions/src/math/nans.rs
@@ -19,10 +19,9 @@
 
 use arrow::datatypes::DataType;
 use datafusion_common::{exec_err, DataFusionError, Result};
-use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ColumnarValue, TypeSignature};
 
 use arrow::array::{ArrayRef, BooleanArray, Float32Array, Float64Array};
-use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 use std::sync::Arc;
@@ -43,7 +42,10 @@ impl IsNanFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Float32]), Exact(vec![Float64])],
+                vec![
+                    TypeSignature::Exact(vec![Float32]),
+                    TypeSignature::Exact(vec![Float64]),
+                ],
                 Volatility::Immutable,
             ),
         }
diff --git a/datafusion/functions/src/math/power.rs 
b/datafusion/functions/src/math/power.rs
index 5b790fb56d..831f983d59 100644
--- a/datafusion/functions/src/math/power.rs
+++ b/datafusion/functions/src/math/power.rs
@@ -25,10 +25,9 @@ use datafusion_common::{
 };
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
-use datafusion_expr::{ColumnarValue, Expr, ScalarUDF};
+use datafusion_expr::{ColumnarValue, Expr, ScalarUDF, TypeSignature};
 
 use arrow::array::{ArrayRef, Float64Array, Int64Array};
-use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 use std::sync::Arc;
@@ -52,7 +51,10 @@ impl PowerFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Int64, Int64]), Exact(vec![Float64, Float64])],
+                vec![
+                    TypeSignature::Exact(vec![Int64, Int64]),
+                    TypeSignature::Exact(vec![Float64, Float64]),
+                ],
                 Volatility::Immutable,
             ),
             aliases: vec![String::from("pow")],
diff --git a/datafusion/functions/src/regex/regexplike.rs 
b/datafusion/functions/src/regex/regexplike.rs
index e245ea9fa7..a698913fff 100644
--- a/datafusion/functions/src/regex/regexplike.rs
+++ b/datafusion/functions/src/regex/regexplike.rs
@@ -26,8 +26,7 @@ use datafusion_common::{
     cast::as_generic_string_array, internal_err, DataFusionError, Result,
 };
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Documentation};
+use datafusion_expr::{ColumnarValue, Documentation, TypeSignature};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 use std::sync::{Arc, OnceLock};
@@ -87,10 +86,10 @@ impl RegexpLikeFunc {
         Self {
             signature: Signature::one_of(
                 vec![
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![LargeUtf8, LargeUtf8]),
-                    Exact(vec![Utf8, Utf8, Utf8]),
-                    Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8]),
+                    TypeSignature::Exact(vec![Utf8, Utf8]),
+                    TypeSignature::Exact(vec![LargeUtf8, LargeUtf8]),
+                    TypeSignature::Exact(vec![Utf8, Utf8, Utf8]),
+                    TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, 
LargeUtf8]),
                 ],
                 Volatility::Immutable,
             ),
diff --git a/datafusion/functions/src/regex/regexpmatch.rs 
b/datafusion/functions/src/regex/regexpmatch.rs
index 498b591620..bfec97f92c 100644
--- a/datafusion/functions/src/regex/regexpmatch.rs
+++ b/datafusion/functions/src/regex/regexpmatch.rs
@@ -26,8 +26,7 @@ use datafusion_common::{arrow_datafusion_err, plan_err};
 use datafusion_common::{
     cast::as_generic_string_array, internal_err, DataFusionError, Result,
 };
-use datafusion_expr::ColumnarValue;
-use datafusion_expr::TypeSignature::*;
+use datafusion_expr::{ColumnarValue, TypeSignature};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 use std::sync::Arc;
@@ -53,10 +52,10 @@ impl RegexpMatchFunc {
                     // For example, given input `(Utf8View, Utf8)`, it first 
tries coercing to `(Utf8, Utf8)`.
                     // If that fails, it proceeds to `(LargeUtf8, Utf8)`.
                     // TODO: Native support Utf8View for regexp_match.
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![LargeUtf8, LargeUtf8]),
-                    Exact(vec![Utf8, Utf8, Utf8]),
-                    Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8]),
+                    TypeSignature::Exact(vec![Utf8, Utf8]),
+                    TypeSignature::Exact(vec![LargeUtf8, LargeUtf8]),
+                    TypeSignature::Exact(vec![Utf8, Utf8, Utf8]),
+                    TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, 
LargeUtf8]),
                 ],
                 Volatility::Immutable,
             ),
diff --git a/datafusion/functions/src/regex/regexpreplace.rs 
b/datafusion/functions/src/regex/regexpreplace.rs
index 3eb72a1fb5..bce8752af2 100644
--- a/datafusion/functions/src/regex/regexpreplace.rs
+++ b/datafusion/functions/src/regex/regexpreplace.rs
@@ -33,7 +33,7 @@ use datafusion_common::{
 };
 use datafusion_expr::function::Hint;
 use datafusion_expr::ColumnarValue;
-use datafusion_expr::TypeSignature::*;
+use datafusion_expr::TypeSignature;
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use regex::Regex;
 use std::any::Any;
@@ -56,10 +56,10 @@ impl RegexpReplaceFunc {
         Self {
             signature: Signature::one_of(
                 vec![
-                    Exact(vec![Utf8, Utf8, Utf8]),
-                    Exact(vec![Utf8View, Utf8, Utf8]),
-                    Exact(vec![Utf8, Utf8, Utf8, Utf8]),
-                    Exact(vec![Utf8View, Utf8, Utf8, Utf8]),
+                    TypeSignature::Exact(vec![Utf8, Utf8, Utf8]),
+                    TypeSignature::Exact(vec![Utf8View, Utf8, Utf8]),
+                    TypeSignature::Exact(vec![Utf8, Utf8, Utf8, Utf8]),
+                    TypeSignature::Exact(vec![Utf8View, Utf8, Utf8, Utf8]),
                 ],
                 Volatility::Immutable,
             ),
diff --git a/datafusion/functions/src/string/ascii.rs 
b/datafusion/functions/src/string/ascii.rs
index 1e828d0667..8d61661f97 100644
--- a/datafusion/functions/src/string/ascii.rs
+++ b/datafusion/functions/src/string/ascii.rs
@@ -39,13 +39,8 @@ impl Default for AsciiFunc {
 
 impl AsciiFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::uniform(
-                1,
-                vec![Utf8, LargeUtf8, Utf8View],
-                Volatility::Immutable,
-            ),
+            signature: Signature::string(1, Volatility::Immutable),
         }
     }
 }
diff --git a/datafusion/functions/src/string/bit_length.rs 
b/datafusion/functions/src/string/bit_length.rs
index bd22c1504b..7d162e7d41 100644
--- a/datafusion/functions/src/string/bit_length.rs
+++ b/datafusion/functions/src/string/bit_length.rs
@@ -39,13 +39,8 @@ impl Default for BitLengthFunc {
 
 impl BitLengthFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::uniform(
-                1,
-                vec![Utf8, LargeUtf8],
-                Volatility::Immutable,
-            ),
+            signature: Signature::string(1, Volatility::Immutable),
         }
     }
 }
diff --git a/datafusion/functions/src/string/btrim.rs 
b/datafusion/functions/src/string/btrim.rs
index b2e79a7b89..82b7599f07 100644
--- a/datafusion/functions/src/string/btrim.rs
+++ b/datafusion/functions/src/string/btrim.rs
@@ -22,9 +22,9 @@ use arrow::datatypes::DataType;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::function::Hint;
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Documentation, Volatility};
-use datafusion_expr::{ScalarUDFImpl, Signature};
+use datafusion_expr::{
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, 
Volatility,
+};
 use std::any::Any;
 use std::sync::OnceLock;
 
@@ -49,18 +49,9 @@ impl Default for BTrimFunc {
 
 impl BTrimFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![
-                    // Planner attempts coercion to the target type starting 
with the most preferred candidate.
-                    // For example, given input `(Utf8View, Utf8)`, it first 
tries coercing to `(Utf8View, Utf8View)`.
-                    // If that fails, it proceeds to `(Utf8, Utf8)`.
-                    Exact(vec![Utf8View, Utf8View]),
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![Utf8View]),
-                    Exact(vec![Utf8]),
-                ],
+                vec![TypeSignature::String(2), TypeSignature::String(1)],
                 Volatility::Immutable,
             ),
             aliases: vec![String::from("trim")],
diff --git a/datafusion/functions/src/string/contains.rs 
b/datafusion/functions/src/string/contains.rs
index 7fc1fa876c..0f75731aa1 100644
--- a/datafusion/functions/src/string/contains.rs
+++ b/datafusion/functions/src/string/contains.rs
@@ -16,19 +16,17 @@
 // under the License.
 
 use crate::utils::make_scalar_function;
-
 use arrow::array::{Array, ArrayRef, AsArray, GenericStringArray, 
StringViewArray};
+use arrow::compute::regexp_is_match;
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{Boolean, LargeUtf8, Utf8, Utf8View};
 use datafusion_common::exec_err;
 use datafusion_common::DataFusionError;
 use datafusion_common::Result;
-use datafusion_expr::TypeSignature::Exact;
-use datafusion_expr::{ColumnarValue, Signature, Volatility};
-use datafusion_expr::{Documentation, ScalarUDFImpl};
-
-use arrow::compute::regexp_is_match;
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
+use datafusion_expr::{
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
 use std::any::Any;
 use std::sync::{Arc, OnceLock};
 
@@ -45,22 +43,8 @@ impl Default for ContainsFunc {
 
 impl ContainsFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::one_of(
-                vec![
-                    Exact(vec![Utf8View, Utf8View]),
-                    Exact(vec![Utf8View, Utf8]),
-                    Exact(vec![Utf8View, LargeUtf8]),
-                    Exact(vec![Utf8, Utf8View]),
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![Utf8, LargeUtf8]),
-                    Exact(vec![LargeUtf8, Utf8View]),
-                    Exact(vec![LargeUtf8, Utf8]),
-                    Exact(vec![LargeUtf8, LargeUtf8]),
-                ],
-                Volatility::Immutable,
-            ),
+            signature: Signature::string(2, Volatility::Immutable),
         }
     }
 }
@@ -132,39 +116,6 @@ pub fn contains(args: &[ArrayRef]) -> Result<ArrayRef, 
DataFusionError> {
 
             Ok(Arc::new(res) as ArrayRef)
         }
-        (Utf8View, Utf8) => {
-            let mod_str = args[0].as_string_view();
-            let match_str = args[1].as_string::<i32>();
-            let res = regexp_is_match::<
-                StringViewArray,
-                GenericStringArray<i32>,
-                GenericStringArray<i32>,
-            >(mod_str, match_str, None)?;
-
-            Ok(Arc::new(res) as ArrayRef)
-        }
-        (Utf8View, LargeUtf8) => {
-            let mod_str = args[0].as_string_view();
-            let match_str = args[1].as_string::<i64>();
-            let res = regexp_is_match::<
-                StringViewArray,
-                GenericStringArray<i64>,
-                GenericStringArray<i32>,
-            >(mod_str, match_str, None)?;
-
-            Ok(Arc::new(res) as ArrayRef)
-        }
-        (Utf8, Utf8View) => {
-            let mod_str = args[0].as_string::<i32>();
-            let match_str = args[1].as_string_view();
-            let res = regexp_is_match::<
-                GenericStringArray<i32>,
-                StringViewArray,
-                GenericStringArray<i32>,
-            >(mod_str, match_str, None)?;
-
-            Ok(Arc::new(res) as ArrayRef)
-        }
         (Utf8, Utf8) => {
             let mod_str = args[0].as_string::<i32>();
             let match_str = args[1].as_string::<i32>();
@@ -176,39 +127,6 @@ pub fn contains(args: &[ArrayRef]) -> Result<ArrayRef, 
DataFusionError> {
 
             Ok(Arc::new(res) as ArrayRef)
         }
-        (Utf8, LargeUtf8) => {
-            let mod_str = args[0].as_string::<i32>();
-            let match_str = args[1].as_string::<i64>();
-            let res = regexp_is_match::<
-                GenericStringArray<i32>,
-                GenericStringArray<i64>,
-                GenericStringArray<i32>,
-            >(mod_str, match_str, None)?;
-
-            Ok(Arc::new(res) as ArrayRef)
-        }
-        (LargeUtf8, Utf8View) => {
-            let mod_str = args[0].as_string::<i64>();
-            let match_str = args[1].as_string_view();
-            let res = regexp_is_match::<
-                GenericStringArray<i64>,
-                StringViewArray,
-                GenericStringArray<i32>,
-            >(mod_str, match_str, None)?;
-
-            Ok(Arc::new(res) as ArrayRef)
-        }
-        (LargeUtf8, Utf8) => {
-            let mod_str = args[0].as_string::<i64>();
-            let match_str = args[1].as_string::<i32>();
-            let res = regexp_is_match::<
-                GenericStringArray<i64>,
-                GenericStringArray<i32>,
-                GenericStringArray<i32>,
-            >(mod_str, match_str, None)?;
-
-            Ok(Arc::new(res) as ArrayRef)
-        }
         (LargeUtf8, LargeUtf8) => {
             let mod_str = args[0].as_string::<i64>();
             let match_str = args[1].as_string::<i64>();
@@ -225,95 +143,3 @@ pub fn contains(args: &[ArrayRef]) -> Result<ArrayRef, 
DataFusionError> {
         }
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use crate::string::contains::ContainsFunc;
-    use crate::utils::test::test_function;
-    use arrow::array::Array;
-    use arrow::{array::BooleanArray, datatypes::DataType::Boolean};
-    use datafusion_common::Result;
-    use datafusion_common::ScalarValue;
-    use datafusion_expr::ColumnarValue;
-    use datafusion_expr::ScalarUDFImpl;
-    #[test]
-    fn test_functions() -> Result<()> {
-        test_function!(
-            ContainsFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("alphabet")),
-                ColumnarValue::Scalar(ScalarValue::from("alph")),
-            ],
-            Ok(Some(true)),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        test_function!(
-            ContainsFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("alphabet")),
-                ColumnarValue::Scalar(ScalarValue::from("dddddd")),
-            ],
-            Ok(Some(false)),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        test_function!(
-            ContainsFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("alphabet")),
-                ColumnarValue::Scalar(ScalarValue::from("pha")),
-            ],
-            Ok(Some(true)),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-
-        test_function!(
-            ContainsFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
-                    "Apache"
-                )))),
-                
ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from("pac")))),
-            ],
-            Ok(Some(true)),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        test_function!(
-            ContainsFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
-                    "Apache"
-                )))),
-                
ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("ap")))),
-            ],
-            Ok(Some(false)),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        test_function!(
-            ContainsFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
-                    "Apache"
-                )))),
-                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from(
-                    "DataFusion"
-                )))),
-            ],
-            Ok(Some(false)),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-
-        Ok(())
-    }
-}
diff --git a/datafusion/functions/src/string/ends_with.rs 
b/datafusion/functions/src/string/ends_with.rs
index 786010764c..8c90cbc3b1 100644
--- a/datafusion/functions/src/string/ends_with.rs
+++ b/datafusion/functions/src/string/ends_with.rs
@@ -24,7 +24,6 @@ use arrow::datatypes::DataType;
 use crate::utils::make_scalar_function;
 use datafusion_common::{internal_err, Result};
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ColumnarValue, Documentation, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
 
@@ -42,17 +41,7 @@ impl Default for EndsWithFunc {
 impl EndsWithFunc {
     pub fn new() -> Self {
         Self {
-            signature: Signature::one_of(
-                vec![
-                    // Planner attempts coercion to the target type starting 
with the most preferred candidate.
-                    // For example, given input `(Utf8View, Utf8)`, it first 
tries coercing to `(Utf8View, Utf8View)`.
-                    // If that fails, it proceeds to `(Utf8, Utf8)`.
-                    Exact(vec![DataType::Utf8View, DataType::Utf8View]),
-                    Exact(vec![DataType::Utf8, DataType::Utf8]),
-                    Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
-                ],
-                Volatility::Immutable,
-            ),
+            signature: Signature::string(2, Volatility::Immutable),
         }
     }
 }
diff --git a/datafusion/functions/src/string/initcap.rs 
b/datafusion/functions/src/string/initcap.rs
index ffd60bb6e9..78c95b9a5e 100644
--- a/datafusion/functions/src/string/initcap.rs
+++ b/datafusion/functions/src/string/initcap.rs
@@ -41,13 +41,8 @@ impl Default for InitcapFunc {
 
 impl InitcapFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::uniform(
-                1,
-                vec![Utf8, LargeUtf8, Utf8View],
-                Volatility::Immutable,
-            ),
+            signature: Signature::string(1, Volatility::Immutable),
         }
     }
 }
diff --git a/datafusion/functions/src/string/levenshtein.rs 
b/datafusion/functions/src/string/levenshtein.rs
index 2f121426f1..558e71239f 100644
--- a/datafusion/functions/src/string/levenshtein.rs
+++ b/datafusion/functions/src/string/levenshtein.rs
@@ -26,7 +26,6 @@ use datafusion_common::cast::{as_generic_string_array, 
as_string_view_array};
 use datafusion_common::utils::datafusion_strsim;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ColumnarValue, Documentation};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 
@@ -44,14 +43,7 @@ impl Default for LevenshteinFunc {
 impl LevenshteinFunc {
     pub fn new() -> Self {
         Self {
-            signature: Signature::one_of(
-                vec![
-                    Exact(vec![DataType::Utf8View, DataType::Utf8View]),
-                    Exact(vec![DataType::Utf8, DataType::Utf8]),
-                    Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
-                ],
-                Volatility::Immutable,
-            ),
+            signature: Signature::string(2, Volatility::Immutable),
         }
     }
 }
diff --git a/datafusion/functions/src/string/lower.rs 
b/datafusion/functions/src/string/lower.rs
index 25acfc2760..f82b11ca90 100644
--- a/datafusion/functions/src/string/lower.rs
+++ b/datafusion/functions/src/string/lower.rs
@@ -39,13 +39,8 @@ impl Default for LowerFunc {
 
 impl LowerFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::uniform(
-                1,
-                vec![Utf8, LargeUtf8, Utf8View],
-                Volatility::Immutable,
-            ),
+            signature: Signature::string(1, Volatility::Immutable),
         }
     }
 }
diff --git a/datafusion/functions/src/string/ltrim.rs 
b/datafusion/functions/src/string/ltrim.rs
index 1fcde9e97a..b64dcda721 100644
--- a/datafusion/functions/src/string/ltrim.rs
+++ b/datafusion/functions/src/string/ltrim.rs
@@ -25,8 +25,7 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::function::Hint;
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Documentation, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
 
 /// Returns the longest string  with leading characters removed. If the 
characters are not specified, whitespace is removed.
@@ -49,18 +48,9 @@ impl Default for LtrimFunc {
 
 impl LtrimFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![
-                    // Planner attempts coercion to the target type starting 
with the most preferred candidate.
-                    // For example, given input `(Utf8View, Utf8)`, it first 
tries coercing to `(Utf8View, Utf8View)`.
-                    // If that fails, it proceeds to `(Utf8, Utf8)`.
-                    Exact(vec![Utf8View, Utf8View]),
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![Utf8View]),
-                    Exact(vec![Utf8]),
-                ],
+                vec![TypeSignature::String(2), TypeSignature::String(1)],
                 Volatility::Immutable,
             ),
         }
diff --git a/datafusion/functions/src/string/octet_length.rs 
b/datafusion/functions/src/string/octet_length.rs
index 195a6c296c..04094396fa 100644
--- a/datafusion/functions/src/string/octet_length.rs
+++ b/datafusion/functions/src/string/octet_length.rs
@@ -39,13 +39,8 @@ impl Default for OctetLengthFunc {
 
 impl OctetLengthFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::uniform(
-                1,
-                vec![Utf8, LargeUtf8, Utf8View],
-                Volatility::Immutable,
-            ),
+            signature: Signature::string(1, Volatility::Immutable),
         }
     }
 }
diff --git a/datafusion/functions/src/string/overlay.rs 
b/datafusion/functions/src/string/overlay.rs
index ec33840a0b..3b31bc3608 100644
--- a/datafusion/functions/src/string/overlay.rs
+++ b/datafusion/functions/src/string/overlay.rs
@@ -27,8 +27,7 @@ use datafusion_common::cast::{
 };
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Documentation, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
 
 #[derive(Debug)]
@@ -48,12 +47,12 @@ impl OverlayFunc {
         Self {
             signature: Signature::one_of(
                 vec![
-                    Exact(vec![Utf8View, Utf8View, Int64, Int64]),
-                    Exact(vec![Utf8, Utf8, Int64, Int64]),
-                    Exact(vec![LargeUtf8, LargeUtf8, Int64, Int64]),
-                    Exact(vec![Utf8View, Utf8View, Int64]),
-                    Exact(vec![Utf8, Utf8, Int64]),
-                    Exact(vec![LargeUtf8, LargeUtf8, Int64]),
+                    TypeSignature::Exact(vec![Utf8View, Utf8View, Int64, 
Int64]),
+                    TypeSignature::Exact(vec![Utf8, Utf8, Int64, Int64]),
+                    TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Int64, 
Int64]),
+                    TypeSignature::Exact(vec![Utf8View, Utf8View, Int64]),
+                    TypeSignature::Exact(vec![Utf8, Utf8, Int64]),
+                    TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Int64]),
                 ],
                 Volatility::Immutable,
             ),
diff --git a/datafusion/functions/src/string/repeat.rs 
b/datafusion/functions/src/string/repeat.rs
index 3abd1767bb..fda9c7a13d 100644
--- a/datafusion/functions/src/string/repeat.rs
+++ b/datafusion/functions/src/string/repeat.rs
@@ -18,20 +18,18 @@
 use std::any::Any;
 use std::sync::{Arc, OnceLock};
 
+use crate::string::common::StringArrayType;
+use crate::utils::{make_scalar_function, utf8_to_str_type};
 use arrow::array::{
     ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
     OffsetSizeTrait, StringViewArray,
 };
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{Int64, LargeUtf8, Utf8, Utf8View};
-
-use crate::string::common::StringArrayType;
-use crate::utils::{make_scalar_function, utf8_to_str_type};
 use datafusion_common::cast::as_int64_array;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Documentation, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
 
 #[derive(Debug)]
@@ -53,9 +51,9 @@ impl RepeatFunc {
                     // Planner attempts coercion to the target type starting 
with the most preferred candidate.
                     // For example, given input `(Utf8View, Int64)`, it first 
tries coercing to `(Utf8View, Int64)`.
                     // If that fails, it proceeds to `(Utf8, Int64)`.
-                    Exact(vec![Utf8View, Int64]),
-                    Exact(vec![Utf8, Int64]),
-                    Exact(vec![LargeUtf8, Int64]),
+                    TypeSignature::Exact(vec![Utf8View, Int64]),
+                    TypeSignature::Exact(vec![Utf8, Int64]),
+                    TypeSignature::Exact(vec![LargeUtf8, Int64]),
                 ],
                 Volatility::Immutable,
             ),
diff --git a/datafusion/functions/src/string/replace.rs 
b/datafusion/functions/src/string/replace.rs
index 7c985b44ab..612cd7276b 100644
--- a/datafusion/functions/src/string/replace.rs
+++ b/datafusion/functions/src/string/replace.rs
@@ -25,7 +25,6 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ColumnarValue, Documentation, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
 
@@ -42,16 +41,8 @@ impl Default for ReplaceFunc {
 
 impl ReplaceFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::one_of(
-                vec![
-                    Exact(vec![Utf8View, Utf8View, Utf8View]),
-                    Exact(vec![Utf8, Utf8, Utf8]),
-                    Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8]),
-                ],
-                Volatility::Immutable,
-            ),
+            signature: Signature::string(3, Volatility::Immutable),
         }
     }
 }
diff --git a/datafusion/functions/src/string/rtrim.rs 
b/datafusion/functions/src/string/rtrim.rs
index 6743ad99d3..1a27502a20 100644
--- a/datafusion/functions/src/string/rtrim.rs
+++ b/datafusion/functions/src/string/rtrim.rs
@@ -25,8 +25,7 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::function::Hint;
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Documentation, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
 
 /// Returns the longest string  with trailing characters removed. If the 
characters are not specified, whitespace is removed.
@@ -49,18 +48,9 @@ impl Default for RtrimFunc {
 
 impl RtrimFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![
-                    // Planner attempts coercion to the target type starting 
with the most preferred candidate.
-                    // For example, given input `(Utf8View, Utf8)`, it first 
tries coercing to `(Utf8View, Utf8View)`.
-                    // If that fails, it proceeds to `(Utf8, Utf8)`.
-                    Exact(vec![Utf8View, Utf8View]),
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![Utf8View]),
-                    Exact(vec![Utf8]),
-                ],
+                vec![TypeSignature::String(2), TypeSignature::String(1)],
                 Volatility::Immutable,
             ),
         }
diff --git a/datafusion/functions/src/string/split_part.rs 
b/datafusion/functions/src/string/split_part.rs
index 2424103c84..2441798c38 100644
--- a/datafusion/functions/src/string/split_part.rs
+++ b/datafusion/functions/src/string/split_part.rs
@@ -25,8 +25,7 @@ use datafusion_common::cast::as_int64_array;
 use datafusion_common::ScalarValue;
 use datafusion_common::{exec_err, DataFusionError, Result};
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Documentation, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
 use std::any::Any;
 use std::sync::{Arc, OnceLock};
@@ -50,15 +49,15 @@ impl SplitPartFunc {
         Self {
             signature: Signature::one_of(
                 vec![
-                    Exact(vec![Utf8View, Utf8View, Int64]),
-                    Exact(vec![Utf8View, Utf8, Int64]),
-                    Exact(vec![Utf8View, LargeUtf8, Int64]),
-                    Exact(vec![Utf8, Utf8View, Int64]),
-                    Exact(vec![Utf8, Utf8, Int64]),
-                    Exact(vec![LargeUtf8, Utf8View, Int64]),
-                    Exact(vec![LargeUtf8, Utf8, Int64]),
-                    Exact(vec![Utf8, LargeUtf8, Int64]),
-                    Exact(vec![LargeUtf8, LargeUtf8, Int64]),
+                    TypeSignature::Exact(vec![Utf8View, Utf8View, Int64]),
+                    TypeSignature::Exact(vec![Utf8View, Utf8, Int64]),
+                    TypeSignature::Exact(vec![Utf8View, LargeUtf8, Int64]),
+                    TypeSignature::Exact(vec![Utf8, Utf8View, Int64]),
+                    TypeSignature::Exact(vec![Utf8, Utf8, Int64]),
+                    TypeSignature::Exact(vec![LargeUtf8, Utf8View, Int64]),
+                    TypeSignature::Exact(vec![LargeUtf8, Utf8, Int64]),
+                    TypeSignature::Exact(vec![Utf8, LargeUtf8, Int64]),
+                    TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Int64]),
                 ],
                 Volatility::Immutable,
             ),
diff --git a/datafusion/functions/src/string/starts_with.rs 
b/datafusion/functions/src/string/starts_with.rs
index ff4bf01c99..713b642d5e 100644
--- a/datafusion/functions/src/string/starts_with.rs
+++ b/datafusion/functions/src/string/starts_with.rs
@@ -24,7 +24,6 @@ use arrow::datatypes::DataType;
 use crate::utils::make_scalar_function;
 use datafusion_common::{internal_err, Result};
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ColumnarValue, Documentation};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 
@@ -49,17 +48,7 @@ impl Default for StartsWithFunc {
 impl StartsWithFunc {
     pub fn new() -> Self {
         Self {
-            signature: Signature::one_of(
-                vec![
-                    // Planner attempts coercion to the target type starting 
with the most preferred candidate.
-                    // For example, given input `(Utf8View, Utf8)`, it first 
tries coercing to `(Utf8View, Utf8View)`.
-                    // If that fails, it proceeds to `(Utf8, Utf8)`.
-                    Exact(vec![DataType::Utf8View, DataType::Utf8View]),
-                    Exact(vec![DataType::Utf8, DataType::Utf8]),
-                    Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
-                ],
-                Volatility::Immutable,
-            ),
+            signature: Signature::string(2, Volatility::Immutable),
         }
     }
 }
diff --git a/datafusion/functions/src/string/upper.rs 
b/datafusion/functions/src/string/upper.rs
index caef7f6552..bfcb2a8699 100644
--- a/datafusion/functions/src/string/upper.rs
+++ b/datafusion/functions/src/string/upper.rs
@@ -38,13 +38,8 @@ impl Default for UpperFunc {
 
 impl UpperFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::uniform(
-                1,
-                vec![Utf8, LargeUtf8, Utf8View],
-                Volatility::Immutable,
-            ),
+            signature: Signature::string(1, Volatility::Immutable),
         }
     }
 }
diff --git a/datafusion/functions/src/unicode/strpos.rs 
b/datafusion/functions/src/unicode/strpos.rs
index eaff62c338..660adc7578 100644
--- a/datafusion/functions/src/unicode/strpos.rs
+++ b/datafusion/functions/src/unicode/strpos.rs
@@ -18,14 +18,12 @@
 use std::any::Any;
 use std::sync::{Arc, OnceLock};
 
-use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
-use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
-
 use crate::string::common::StringArrayType;
 use crate::utils::{make_scalar_function, utf8_to_int_type};
+use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
+use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
@@ -44,20 +42,8 @@ impl Default for StrposFunc {
 
 impl StrposFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::one_of(
-                vec![
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![Utf8, LargeUtf8]),
-                    Exact(vec![LargeUtf8, Utf8]),
-                    Exact(vec![LargeUtf8, LargeUtf8]),
-                    Exact(vec![Utf8View, Utf8View]),
-                    Exact(vec![Utf8View, Utf8]),
-                    Exact(vec![Utf8View, LargeUtf8]),
-                ],
-                Volatility::Immutable,
-            ),
+            signature: Signature::string(2, Volatility::Immutable),
             aliases: vec![String::from("instr"), String::from("position")],
         }
     }
diff --git a/datafusion/sqllogictest/test_files/scalar.slt 
b/datafusion/sqllogictest/test_files/scalar.slt
index 8820fffaeb..0c2fa41e5b 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1906,12 +1906,8 @@ select position('' in '')
 ----
 1
 
-
-query I
+query error DataFusion error: Error during planning: Error during planning: 
Int64 and Int64 are not coercible to a common string
 select position(1 in 1)
-----
-1
-
 
 query I
 select strpos('abc', 'c');
diff --git a/datafusion/sqllogictest/test_files/string/init_data.slt.part 
b/datafusion/sqllogictest/test_files/string/init_data.slt.part
index d99401f10d..096e3bb3b3 100644
--- a/datafusion/sqllogictest/test_files/string/init_data.slt.part
+++ b/datafusion/sqllogictest/test_files/string/init_data.slt.part
@@ -30,4 +30,3 @@ statement ok
 create table test_substr_base (
     col1 VARCHAR
 ) as values ('foo'), ('hello🌏世界'), ('💩'), ('ThisIsAVeryLongASCIIString'), 
(''), (NULL);
-
diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt 
b/datafusion/sqllogictest/test_files/string/string_view.slt
index e7b55c9c1c..e01a40586f 100644
--- a/datafusion/sqllogictest/test_files/string/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string/string_view.slt
@@ -92,6 +92,21 @@ FROM test_source;
 statement ok
 drop table test_source
 
+########
+## StringView Function test
+########
+
+query error DataFusion error: Arrow error: Compute error: bit_length not 
supported for Utf8View
+select bit_length(column1_utf8view) from test;
+
+query T
+select btrim(column1_large_utf8) from test;
+----
+Andrew
+Xiangpeng
+Raphael
+NULL
+
 ########
 ## StringView to Other Types column
 ########
@@ -299,9 +314,8 @@ EXPLAIN SELECT
 FROM test;
 ----
 logical_plan
-01)Projection: starts_with(__common_expr_1, test.column2_utf8view) AS c1, 
starts_with(test.column1_utf8, test.column2_utf8) AS c3, 
starts_with(__common_expr_1, CAST(test.column2_large_utf8 AS Utf8View)) AS c4
-02)--Projection: CAST(test.column1_utf8 AS Utf8View) AS __common_expr_1, 
test.column1_utf8, test.column2_utf8, test.column2_large_utf8, 
test.column2_utf8view
-03)----TableScan: test projection=[column1_utf8, column2_utf8, 
column2_large_utf8, column2_utf8view]
+01)Projection: starts_with(CAST(test.column1_utf8 AS Utf8View), 
test.column2_utf8view) AS c1, starts_with(test.column1_utf8, test.column2_utf8) 
AS c3, starts_with(CAST(test.column1_utf8 AS LargeUtf8), 
test.column2_large_utf8) AS c4
+02)--TableScan: test projection=[column1_utf8, column2_utf8, 
column2_large_utf8, column2_utf8view]
 
 query BBB
  SELECT
@@ -591,7 +605,7 @@ EXPLAIN SELECT
 FROM test;
 ----
 logical_plan
-01)Projection: contains(test.column1_utf8view, Utf8("foo")) AS c1, 
contains(test.column1_utf8view, test.column2_utf8view) AS c2, 
contains(test.column1_utf8view, test.column2_large_utf8) AS c3, 
contains(test.column1_utf8, test.column2_utf8view) AS c4, 
contains(test.column1_utf8, test.column2_utf8) AS c5, 
contains(test.column1_utf8, test.column2_large_utf8) AS c6, 
contains(test.column1_large_utf8, test.column1_utf8view) AS c7, 
contains(test.column1_large_utf8, test.column2_utf8) AS c8, con [...]
+01)Projection: contains(test.column1_utf8view, Utf8View("foo")) AS c1, 
contains(test.column1_utf8view, test.column2_utf8view) AS c2, 
contains(test.column1_utf8view, CAST(test.column2_large_utf8 AS Utf8View)) AS 
c3, contains(CAST(test.column1_utf8 AS Utf8View), test.column2_utf8view) AS c4, 
contains(test.column1_utf8, test.column2_utf8) AS c5, 
contains(CAST(test.column1_utf8 AS LargeUtf8), test.column2_large_utf8) AS c6, 
contains(CAST(test.column1_large_utf8 AS Utf8View), test.column1_utf [...]
 02)--TableScan: test projection=[column1_utf8, column2_utf8, 
column1_large_utf8, column2_large_utf8, column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for ENDS_WITH
@@ -835,7 +849,7 @@ EXPLAIN SELECT
 FROM test;
 ----
 logical_plan
-01)Projection: strpos(test.column1_utf8view, Utf8("f")) AS c, 
strpos(test.column1_utf8view, test.column2_utf8view) AS c2
+01)Projection: strpos(test.column1_utf8view, Utf8View("f")) AS c, 
strpos(test.column1_utf8view, test.column2_utf8view) AS c2
 02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for SUBSTR


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to