This is an automated email from the ASF dual-hosted git repository.
jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 47664df42f Introduce Signature::String and return error if input of
`strpos` is integer (#12751)
47664df42f is described below
commit 47664df42f37823ccba9c0a9ff9a122ac840babd
Author: Jay Zhan <[email protected]>
AuthorDate: Tue Oct 8 15:05:42 2024 +0800
Introduce Signature::String and return error if input of `strpos` is
integer (#12751)
* fix sig
Signed-off-by: jayzhan211 <[email protected]>
* fix
Signed-off-by: jayzhan211 <[email protected]>
* fix error
Signed-off-by: jayzhan211 <[email protected]>
* fix all signature
Signed-off-by: jayzhan211 <[email protected]>
* fix all signature
Signed-off-by: jayzhan211 <[email protected]>
* change default type
Signed-off-by: jayzhan211 <[email protected]>
* clippy
Signed-off-by: jayzhan211 <[email protected]>
* fix docs
Signed-off-by: jayzhan211 <[email protected]>
* rm deadcode
Signed-off-by: jayzhan211 <[email protected]>
* cleanup
Signed-off-by: jayzhan211 <[email protected]>
* cleanup
Signed-off-by: jayzhan211 <[email protected]>
* rm test
Signed-off-by: jayzhan211 <[email protected]>
---------
Signed-off-by: jayzhan211 <[email protected]>
---
datafusion/core/tests/expr_api/mod.rs | 8 +-
datafusion/expr-common/src/signature.rs | 18 +-
datafusion/expr-common/src/type_coercion/binary.rs | 2 +-
datafusion/expr/src/type_coercion/functions.rs | 67 +++++++-
datafusion/functions/src/macros.rs | 6 +-
datafusion/functions/src/math/nans.rs | 8 +-
datafusion/functions/src/math/power.rs | 8 +-
datafusion/functions/src/regex/regexplike.rs | 11 +-
datafusion/functions/src/regex/regexpmatch.rs | 11 +-
datafusion/functions/src/regex/regexpreplace.rs | 10 +-
datafusion/functions/src/string/ascii.rs | 7 +-
datafusion/functions/src/string/bit_length.rs | 7 +-
datafusion/functions/src/string/btrim.rs | 17 +-
datafusion/functions/src/string/contains.rs | 184 +--------------------
datafusion/functions/src/string/ends_with.rs | 13 +-
datafusion/functions/src/string/initcap.rs | 7 +-
datafusion/functions/src/string/levenshtein.rs | 10 +-
datafusion/functions/src/string/lower.rs | 7 +-
datafusion/functions/src/string/ltrim.rs | 14 +-
datafusion/functions/src/string/octet_length.rs | 7 +-
datafusion/functions/src/string/overlay.rs | 15 +-
datafusion/functions/src/string/repeat.rs | 14 +-
datafusion/functions/src/string/replace.rs | 11 +-
datafusion/functions/src/string/rtrim.rs | 14 +-
datafusion/functions/src/string/split_part.rs | 21 ++-
datafusion/functions/src/string/starts_with.rs | 13 +-
datafusion/functions/src/string/upper.rs | 7 +-
datafusion/functions/src/unicode/strpos.rs | 20 +--
datafusion/sqllogictest/test_files/scalar.slt | 6 +-
.../test_files/string/init_data.slt.part | 1 -
.../sqllogictest/test_files/string/string_view.slt | 24 ++-
31 files changed, 184 insertions(+), 384 deletions(-)
diff --git a/datafusion/core/tests/expr_api/mod.rs
b/datafusion/core/tests/expr_api/mod.rs
index cbd8926721..81a3336100 100644
--- a/datafusion/core/tests/expr_api/mod.rs
+++ b/datafusion/core/tests/expr_api/mod.rs
@@ -37,14 +37,14 @@ mod simplification;
fn test_octet_length() {
#[rustfmt::skip]
evaluate_expr_test(
- octet_length(col("list")),
+ octet_length(col("id")),
vec![
"+------+",
"| expr |",
"+------+",
- "| 5 |",
- "| 18 |",
- "| 6 |",
+ "| 1 |",
+ "| 1 |",
+ "| 1 |",
"+------+",
],
);
diff --git a/datafusion/expr-common/src/signature.rs
b/datafusion/expr-common/src/signature.rs
index d1553b3315..320e1303a2 100644
--- a/datafusion/expr-common/src/signature.rs
+++ b/datafusion/expr-common/src/signature.rs
@@ -125,6 +125,11 @@ pub enum TypeSignature {
/// Fixed number of arguments of numeric types.
/// See
<https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html#method.is_numeric>
to know which type is considered numeric
Numeric(usize),
+ /// Fixed number of arguments of all the same string types.
+ /// The precedence of type from high to low is Utf8View, LargeUtf8 and
Utf8.
+ /// Null is considerd as Utf8 by default
+ /// Dictionary with string value type is also handled.
+ String(usize),
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
@@ -190,8 +195,11 @@ impl TypeSignature {
.collect::<Vec<String>>()
.join(", ")]
}
+ TypeSignature::String(num) => {
+ vec![format!("String({num})")]
+ }
TypeSignature::Numeric(num) => {
- vec![format!("Numeric({})", num)]
+ vec![format!("Numeric({num})")]
}
TypeSignature::Exact(types) | TypeSignature::Coercible(types) => {
vec![Self::join_types(types, ", ")]
@@ -280,6 +288,14 @@ impl Signature {
}
}
+ /// A specified number of numeric arguments
+ pub fn string(arg_count: usize, volatility: Volatility) -> Self {
+ Self {
+ type_signature: TypeSignature::String(arg_count),
+ volatility,
+ }
+ }
+
/// An arbitrary number of arguments of any type.
pub fn variadic_any(volatility: Volatility) -> Self {
Self {
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs
b/datafusion/expr-common/src/type_coercion/binary.rs
index e7c4f65a1b..6d66b8b4df 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -959,7 +959,7 @@ fn string_concat_internal_coercion(
/// based on the observation that StringArray to StringViewArray is cheap but
not vice versa.
///
/// Between Utf8 and LargeUtf8, we coerce to LargeUtf8.
-fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
+pub fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
// If Utf8View is in any side, we coerce to Utf8View.
diff --git a/datafusion/expr/src/type_coercion/functions.rs
b/datafusion/expr/src/type_coercion/functions.rs
index 9000ac2538..143e00fa40 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -26,8 +26,9 @@ use datafusion_common::{
utils::{coerced_fixed_size_list_to_list, list_ndims},
Result,
};
-use datafusion_expr_common::signature::{
- ArrayFunctionSignature, FIXED_SIZE_LIST_WILDCARD, TIMEZONE_WILDCARD,
+use datafusion_expr_common::{
+ signature::{ArrayFunctionSignature, FIXED_SIZE_LIST_WILDCARD,
TIMEZONE_WILDCARD},
+ type_coercion::binary::string_coercion,
};
use std::sync::Arc;
@@ -176,6 +177,7 @@ fn is_well_supported_signature(type_signature:
&TypeSignature) -> bool {
type_signature,
TypeSignature::UserDefined
| TypeSignature::Numeric(_)
+ | TypeSignature::String(_)
| TypeSignature::Coercible(_)
| TypeSignature::Any(_)
)
@@ -381,6 +383,67 @@ fn get_valid_types(
.iter()
.map(|valid_type| current_types.iter().map(|_|
valid_type.clone()).collect())
.collect(),
+ TypeSignature::String(number) => {
+ if *number < 1 {
+ return plan_err!(
+ "The signature expected at least one argument but received
{}",
+ current_types.len()
+ );
+ }
+ if *number != current_types.len() {
+ return plan_err!(
+ "The signature expected {} arguments but received {}",
+ number,
+ current_types.len()
+ );
+ }
+
+ fn coercion_rule(
+ lhs_type: &DataType,
+ rhs_type: &DataType,
+ ) -> Result<DataType> {
+ match (lhs_type, rhs_type) {
+ (DataType::Null, DataType::Null) => Ok(DataType::Utf8),
+ (DataType::Null, data_type) | (data_type, DataType::Null)
=> {
+ coercion_rule(data_type, &DataType::Utf8)
+ }
+ (DataType::Dictionary(_, lhs), DataType::Dictionary(_,
rhs)) => {
+ coercion_rule(lhs, rhs)
+ }
+ (DataType::Dictionary(_, v), other)
+ | (other, DataType::Dictionary(_, v)) => coercion_rule(v,
other),
+ _ => {
+ if let Some(coerced_type) = string_coercion(lhs_type,
rhs_type) {
+ Ok(coerced_type)
+ } else {
+ plan_err!(
+ "{} and {} are not coercible to a common
string type",
+ lhs_type,
+ rhs_type
+ )
+ }
+ }
+ }
+ }
+
+ // Length checked above, safe to unwrap
+ let mut coerced_type = current_types.first().unwrap().to_owned();
+ for t in current_types.iter().skip(1) {
+ coerced_type = coercion_rule(&coerced_type, t)?;
+ }
+
+ fn base_type_or_default_type(data_type: &DataType) -> DataType {
+ if data_type.is_null() {
+ DataType::Utf8
+ } else if let DataType::Dictionary(_, v) = data_type {
+ base_type_or_default_type(v)
+ } else {
+ data_type.to_owned()
+ }
+ }
+
+ vec![vec![base_type_or_default_type(&coerced_type); *number]]
+ }
TypeSignature::Numeric(number) => {
if *number < 1 {
return plan_err!(
diff --git a/datafusion/functions/src/macros.rs
b/datafusion/functions/src/macros.rs
index e47818bc86..e850673ef8 100644
--- a/datafusion/functions/src/macros.rs
+++ b/datafusion/functions/src/macros.rs
@@ -284,7 +284,7 @@ macro_rules! make_math_binary_udf {
use arrow::datatypes::DataType;
use datafusion_common::{exec_err, DataFusionError, Result};
use datafusion_expr::sort_properties::{ExprProperties,
SortProperties};
- use datafusion_expr::TypeSignature::*;
+ use datafusion_expr::TypeSignature;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature,
Volatility};
#[derive(Debug)]
@@ -298,8 +298,8 @@ macro_rules! make_math_binary_udf {
Self {
signature: Signature::one_of(
vec![
- Exact(vec![Float32, Float32]),
- Exact(vec![Float64, Float64]),
+ TypeSignature::Exact(vec![Float32, Float32]),
+ TypeSignature::Exact(vec![Float64, Float64]),
],
Volatility::Immutable,
),
diff --git a/datafusion/functions/src/math/nans.rs
b/datafusion/functions/src/math/nans.rs
index 2bd704a7de..b02839b40b 100644
--- a/datafusion/functions/src/math/nans.rs
+++ b/datafusion/functions/src/math/nans.rs
@@ -19,10 +19,9 @@
use arrow::datatypes::DataType;
use datafusion_common::{exec_err, DataFusionError, Result};
-use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ColumnarValue, TypeSignature};
use arrow::array::{ArrayRef, BooleanArray, Float32Array, Float64Array};
-use datafusion_expr::TypeSignature::*;
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use std::any::Any;
use std::sync::Arc;
@@ -43,7 +42,10 @@ impl IsNanFunc {
use DataType::*;
Self {
signature: Signature::one_of(
- vec![Exact(vec![Float32]), Exact(vec![Float64])],
+ vec![
+ TypeSignature::Exact(vec![Float32]),
+ TypeSignature::Exact(vec![Float64]),
+ ],
Volatility::Immutable,
),
}
diff --git a/datafusion/functions/src/math/power.rs
b/datafusion/functions/src/math/power.rs
index 5b790fb56d..831f983d59 100644
--- a/datafusion/functions/src/math/power.rs
+++ b/datafusion/functions/src/math/power.rs
@@ -25,10 +25,9 @@ use datafusion_common::{
};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
-use datafusion_expr::{ColumnarValue, Expr, ScalarUDF};
+use datafusion_expr::{ColumnarValue, Expr, ScalarUDF, TypeSignature};
use arrow::array::{ArrayRef, Float64Array, Int64Array};
-use datafusion_expr::TypeSignature::*;
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use std::any::Any;
use std::sync::Arc;
@@ -52,7 +51,10 @@ impl PowerFunc {
use DataType::*;
Self {
signature: Signature::one_of(
- vec![Exact(vec![Int64, Int64]), Exact(vec![Float64, Float64])],
+ vec![
+ TypeSignature::Exact(vec![Int64, Int64]),
+ TypeSignature::Exact(vec![Float64, Float64]),
+ ],
Volatility::Immutable,
),
aliases: vec![String::from("pow")],
diff --git a/datafusion/functions/src/regex/regexplike.rs
b/datafusion/functions/src/regex/regexplike.rs
index e245ea9fa7..a698913fff 100644
--- a/datafusion/functions/src/regex/regexplike.rs
+++ b/datafusion/functions/src/regex/regexplike.rs
@@ -26,8 +26,7 @@ use datafusion_common::{
cast::as_generic_string_array, internal_err, DataFusionError, Result,
};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Documentation};
+use datafusion_expr::{ColumnarValue, Documentation, TypeSignature};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use std::any::Any;
use std::sync::{Arc, OnceLock};
@@ -87,10 +86,10 @@ impl RegexpLikeFunc {
Self {
signature: Signature::one_of(
vec![
- Exact(vec![Utf8, Utf8]),
- Exact(vec![LargeUtf8, LargeUtf8]),
- Exact(vec![Utf8, Utf8, Utf8]),
- Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8]),
+ TypeSignature::Exact(vec![Utf8, Utf8]),
+ TypeSignature::Exact(vec![LargeUtf8, LargeUtf8]),
+ TypeSignature::Exact(vec![Utf8, Utf8, Utf8]),
+ TypeSignature::Exact(vec![LargeUtf8, LargeUtf8,
LargeUtf8]),
],
Volatility::Immutable,
),
diff --git a/datafusion/functions/src/regex/regexpmatch.rs
b/datafusion/functions/src/regex/regexpmatch.rs
index 498b591620..bfec97f92c 100644
--- a/datafusion/functions/src/regex/regexpmatch.rs
+++ b/datafusion/functions/src/regex/regexpmatch.rs
@@ -26,8 +26,7 @@ use datafusion_common::{arrow_datafusion_err, plan_err};
use datafusion_common::{
cast::as_generic_string_array, internal_err, DataFusionError, Result,
};
-use datafusion_expr::ColumnarValue;
-use datafusion_expr::TypeSignature::*;
+use datafusion_expr::{ColumnarValue, TypeSignature};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use std::any::Any;
use std::sync::Arc;
@@ -53,10 +52,10 @@ impl RegexpMatchFunc {
// For example, given input `(Utf8View, Utf8)`, it first
tries coercing to `(Utf8, Utf8)`.
// If that fails, it proceeds to `(LargeUtf8, Utf8)`.
// TODO: Native support Utf8View for regexp_match.
- Exact(vec![Utf8, Utf8]),
- Exact(vec![LargeUtf8, LargeUtf8]),
- Exact(vec![Utf8, Utf8, Utf8]),
- Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8]),
+ TypeSignature::Exact(vec![Utf8, Utf8]),
+ TypeSignature::Exact(vec![LargeUtf8, LargeUtf8]),
+ TypeSignature::Exact(vec![Utf8, Utf8, Utf8]),
+ TypeSignature::Exact(vec![LargeUtf8, LargeUtf8,
LargeUtf8]),
],
Volatility::Immutable,
),
diff --git a/datafusion/functions/src/regex/regexpreplace.rs
b/datafusion/functions/src/regex/regexpreplace.rs
index 3eb72a1fb5..bce8752af2 100644
--- a/datafusion/functions/src/regex/regexpreplace.rs
+++ b/datafusion/functions/src/regex/regexpreplace.rs
@@ -33,7 +33,7 @@ use datafusion_common::{
};
use datafusion_expr::function::Hint;
use datafusion_expr::ColumnarValue;
-use datafusion_expr::TypeSignature::*;
+use datafusion_expr::TypeSignature;
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use regex::Regex;
use std::any::Any;
@@ -56,10 +56,10 @@ impl RegexpReplaceFunc {
Self {
signature: Signature::one_of(
vec![
- Exact(vec![Utf8, Utf8, Utf8]),
- Exact(vec![Utf8View, Utf8, Utf8]),
- Exact(vec![Utf8, Utf8, Utf8, Utf8]),
- Exact(vec![Utf8View, Utf8, Utf8, Utf8]),
+ TypeSignature::Exact(vec![Utf8, Utf8, Utf8]),
+ TypeSignature::Exact(vec![Utf8View, Utf8, Utf8]),
+ TypeSignature::Exact(vec![Utf8, Utf8, Utf8, Utf8]),
+ TypeSignature::Exact(vec![Utf8View, Utf8, Utf8, Utf8]),
],
Volatility::Immutable,
),
diff --git a/datafusion/functions/src/string/ascii.rs
b/datafusion/functions/src/string/ascii.rs
index 1e828d0667..8d61661f97 100644
--- a/datafusion/functions/src/string/ascii.rs
+++ b/datafusion/functions/src/string/ascii.rs
@@ -39,13 +39,8 @@ impl Default for AsciiFunc {
impl AsciiFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
- signature: Signature::uniform(
- 1,
- vec![Utf8, LargeUtf8, Utf8View],
- Volatility::Immutable,
- ),
+ signature: Signature::string(1, Volatility::Immutable),
}
}
}
diff --git a/datafusion/functions/src/string/bit_length.rs
b/datafusion/functions/src/string/bit_length.rs
index bd22c1504b..7d162e7d41 100644
--- a/datafusion/functions/src/string/bit_length.rs
+++ b/datafusion/functions/src/string/bit_length.rs
@@ -39,13 +39,8 @@ impl Default for BitLengthFunc {
impl BitLengthFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
- signature: Signature::uniform(
- 1,
- vec![Utf8, LargeUtf8],
- Volatility::Immutable,
- ),
+ signature: Signature::string(1, Volatility::Immutable),
}
}
}
diff --git a/datafusion/functions/src/string/btrim.rs
b/datafusion/functions/src/string/btrim.rs
index b2e79a7b89..82b7599f07 100644
--- a/datafusion/functions/src/string/btrim.rs
+++ b/datafusion/functions/src/string/btrim.rs
@@ -22,9 +22,9 @@ use arrow::datatypes::DataType;
use datafusion_common::{exec_err, Result};
use datafusion_expr::function::Hint;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Documentation, Volatility};
-use datafusion_expr::{ScalarUDFImpl, Signature};
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature,
Volatility,
+};
use std::any::Any;
use std::sync::OnceLock;
@@ -49,18 +49,9 @@ impl Default for BTrimFunc {
impl BTrimFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
signature: Signature::one_of(
- vec![
- // Planner attempts coercion to the target type starting
with the most preferred candidate.
- // For example, given input `(Utf8View, Utf8)`, it first
tries coercing to `(Utf8View, Utf8View)`.
- // If that fails, it proceeds to `(Utf8, Utf8)`.
- Exact(vec![Utf8View, Utf8View]),
- Exact(vec![Utf8, Utf8]),
- Exact(vec![Utf8View]),
- Exact(vec![Utf8]),
- ],
+ vec![TypeSignature::String(2), TypeSignature::String(1)],
Volatility::Immutable,
),
aliases: vec![String::from("trim")],
diff --git a/datafusion/functions/src/string/contains.rs
b/datafusion/functions/src/string/contains.rs
index 7fc1fa876c..0f75731aa1 100644
--- a/datafusion/functions/src/string/contains.rs
+++ b/datafusion/functions/src/string/contains.rs
@@ -16,19 +16,17 @@
// under the License.
use crate::utils::make_scalar_function;
-
use arrow::array::{Array, ArrayRef, AsArray, GenericStringArray,
StringViewArray};
+use arrow::compute::regexp_is_match;
use arrow::datatypes::DataType;
use arrow::datatypes::DataType::{Boolean, LargeUtf8, Utf8, Utf8View};
use datafusion_common::exec_err;
use datafusion_common::DataFusionError;
use datafusion_common::Result;
-use datafusion_expr::TypeSignature::Exact;
-use datafusion_expr::{ColumnarValue, Signature, Volatility};
-use datafusion_expr::{Documentation, ScalarUDFImpl};
-
-use arrow::compute::regexp_is_match;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
use std::any::Any;
use std::sync::{Arc, OnceLock};
@@ -45,22 +43,8 @@ impl Default for ContainsFunc {
impl ContainsFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
- signature: Signature::one_of(
- vec![
- Exact(vec![Utf8View, Utf8View]),
- Exact(vec![Utf8View, Utf8]),
- Exact(vec![Utf8View, LargeUtf8]),
- Exact(vec![Utf8, Utf8View]),
- Exact(vec![Utf8, Utf8]),
- Exact(vec![Utf8, LargeUtf8]),
- Exact(vec![LargeUtf8, Utf8View]),
- Exact(vec![LargeUtf8, Utf8]),
- Exact(vec![LargeUtf8, LargeUtf8]),
- ],
- Volatility::Immutable,
- ),
+ signature: Signature::string(2, Volatility::Immutable),
}
}
}
@@ -132,39 +116,6 @@ pub fn contains(args: &[ArrayRef]) -> Result<ArrayRef,
DataFusionError> {
Ok(Arc::new(res) as ArrayRef)
}
- (Utf8View, Utf8) => {
- let mod_str = args[0].as_string_view();
- let match_str = args[1].as_string::<i32>();
- let res = regexp_is_match::<
- StringViewArray,
- GenericStringArray<i32>,
- GenericStringArray<i32>,
- >(mod_str, match_str, None)?;
-
- Ok(Arc::new(res) as ArrayRef)
- }
- (Utf8View, LargeUtf8) => {
- let mod_str = args[0].as_string_view();
- let match_str = args[1].as_string::<i64>();
- let res = regexp_is_match::<
- StringViewArray,
- GenericStringArray<i64>,
- GenericStringArray<i32>,
- >(mod_str, match_str, None)?;
-
- Ok(Arc::new(res) as ArrayRef)
- }
- (Utf8, Utf8View) => {
- let mod_str = args[0].as_string::<i32>();
- let match_str = args[1].as_string_view();
- let res = regexp_is_match::<
- GenericStringArray<i32>,
- StringViewArray,
- GenericStringArray<i32>,
- >(mod_str, match_str, None)?;
-
- Ok(Arc::new(res) as ArrayRef)
- }
(Utf8, Utf8) => {
let mod_str = args[0].as_string::<i32>();
let match_str = args[1].as_string::<i32>();
@@ -176,39 +127,6 @@ pub fn contains(args: &[ArrayRef]) -> Result<ArrayRef,
DataFusionError> {
Ok(Arc::new(res) as ArrayRef)
}
- (Utf8, LargeUtf8) => {
- let mod_str = args[0].as_string::<i32>();
- let match_str = args[1].as_string::<i64>();
- let res = regexp_is_match::<
- GenericStringArray<i32>,
- GenericStringArray<i64>,
- GenericStringArray<i32>,
- >(mod_str, match_str, None)?;
-
- Ok(Arc::new(res) as ArrayRef)
- }
- (LargeUtf8, Utf8View) => {
- let mod_str = args[0].as_string::<i64>();
- let match_str = args[1].as_string_view();
- let res = regexp_is_match::<
- GenericStringArray<i64>,
- StringViewArray,
- GenericStringArray<i32>,
- >(mod_str, match_str, None)?;
-
- Ok(Arc::new(res) as ArrayRef)
- }
- (LargeUtf8, Utf8) => {
- let mod_str = args[0].as_string::<i64>();
- let match_str = args[1].as_string::<i32>();
- let res = regexp_is_match::<
- GenericStringArray<i64>,
- GenericStringArray<i32>,
- GenericStringArray<i32>,
- >(mod_str, match_str, None)?;
-
- Ok(Arc::new(res) as ArrayRef)
- }
(LargeUtf8, LargeUtf8) => {
let mod_str = args[0].as_string::<i64>();
let match_str = args[1].as_string::<i64>();
@@ -225,95 +143,3 @@ pub fn contains(args: &[ArrayRef]) -> Result<ArrayRef,
DataFusionError> {
}
}
}
-
-#[cfg(test)]
-mod tests {
- use crate::string::contains::ContainsFunc;
- use crate::utils::test::test_function;
- use arrow::array::Array;
- use arrow::{array::BooleanArray, datatypes::DataType::Boolean};
- use datafusion_common::Result;
- use datafusion_common::ScalarValue;
- use datafusion_expr::ColumnarValue;
- use datafusion_expr::ScalarUDFImpl;
- #[test]
- fn test_functions() -> Result<()> {
- test_function!(
- ContainsFunc::new(),
- &[
- ColumnarValue::Scalar(ScalarValue::from("alphabet")),
- ColumnarValue::Scalar(ScalarValue::from("alph")),
- ],
- Ok(Some(true)),
- bool,
- Boolean,
- BooleanArray
- );
- test_function!(
- ContainsFunc::new(),
- &[
- ColumnarValue::Scalar(ScalarValue::from("alphabet")),
- ColumnarValue::Scalar(ScalarValue::from("dddddd")),
- ],
- Ok(Some(false)),
- bool,
- Boolean,
- BooleanArray
- );
- test_function!(
- ContainsFunc::new(),
- &[
- ColumnarValue::Scalar(ScalarValue::from("alphabet")),
- ColumnarValue::Scalar(ScalarValue::from("pha")),
- ],
- Ok(Some(true)),
- bool,
- Boolean,
- BooleanArray
- );
-
- test_function!(
- ContainsFunc::new(),
- &[
- ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
- "Apache"
- )))),
-
ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from("pac")))),
- ],
- Ok(Some(true)),
- bool,
- Boolean,
- BooleanArray
- );
- test_function!(
- ContainsFunc::new(),
- &[
- ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
- "Apache"
- )))),
-
ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("ap")))),
- ],
- Ok(Some(false)),
- bool,
- Boolean,
- BooleanArray
- );
- test_function!(
- ContainsFunc::new(),
- &[
- ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
- "Apache"
- )))),
- ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from(
- "DataFusion"
- )))),
- ],
- Ok(Some(false)),
- bool,
- Boolean,
- BooleanArray
- );
-
- Ok(())
- }
-}
diff --git a/datafusion/functions/src/string/ends_with.rs
b/datafusion/functions/src/string/ends_with.rs
index 786010764c..8c90cbc3b1 100644
--- a/datafusion/functions/src/string/ends_with.rs
+++ b/datafusion/functions/src/string/ends_with.rs
@@ -24,7 +24,6 @@ use arrow::datatypes::DataType;
use crate::utils::make_scalar_function;
use datafusion_common::{internal_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
@@ -42,17 +41,7 @@ impl Default for EndsWithFunc {
impl EndsWithFunc {
pub fn new() -> Self {
Self {
- signature: Signature::one_of(
- vec![
- // Planner attempts coercion to the target type starting
with the most preferred candidate.
- // For example, given input `(Utf8View, Utf8)`, it first
tries coercing to `(Utf8View, Utf8View)`.
- // If that fails, it proceeds to `(Utf8, Utf8)`.
- Exact(vec![DataType::Utf8View, DataType::Utf8View]),
- Exact(vec![DataType::Utf8, DataType::Utf8]),
- Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
- ],
- Volatility::Immutable,
- ),
+ signature: Signature::string(2, Volatility::Immutable),
}
}
}
diff --git a/datafusion/functions/src/string/initcap.rs
b/datafusion/functions/src/string/initcap.rs
index ffd60bb6e9..78c95b9a5e 100644
--- a/datafusion/functions/src/string/initcap.rs
+++ b/datafusion/functions/src/string/initcap.rs
@@ -41,13 +41,8 @@ impl Default for InitcapFunc {
impl InitcapFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
- signature: Signature::uniform(
- 1,
- vec![Utf8, LargeUtf8, Utf8View],
- Volatility::Immutable,
- ),
+ signature: Signature::string(1, Volatility::Immutable),
}
}
}
diff --git a/datafusion/functions/src/string/levenshtein.rs
b/datafusion/functions/src/string/levenshtein.rs
index 2f121426f1..558e71239f 100644
--- a/datafusion/functions/src/string/levenshtein.rs
+++ b/datafusion/functions/src/string/levenshtein.rs
@@ -26,7 +26,6 @@ use datafusion_common::cast::{as_generic_string_array,
as_string_view_array};
use datafusion_common::utils::datafusion_strsim;
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
@@ -44,14 +43,7 @@ impl Default for LevenshteinFunc {
impl LevenshteinFunc {
pub fn new() -> Self {
Self {
- signature: Signature::one_of(
- vec![
- Exact(vec![DataType::Utf8View, DataType::Utf8View]),
- Exact(vec![DataType::Utf8, DataType::Utf8]),
- Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
- ],
- Volatility::Immutable,
- ),
+ signature: Signature::string(2, Volatility::Immutable),
}
}
}
diff --git a/datafusion/functions/src/string/lower.rs
b/datafusion/functions/src/string/lower.rs
index 25acfc2760..f82b11ca90 100644
--- a/datafusion/functions/src/string/lower.rs
+++ b/datafusion/functions/src/string/lower.rs
@@ -39,13 +39,8 @@ impl Default for LowerFunc {
impl LowerFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
- signature: Signature::uniform(
- 1,
- vec![Utf8, LargeUtf8, Utf8View],
- Volatility::Immutable,
- ),
+ signature: Signature::string(1, Volatility::Immutable),
}
}
}
diff --git a/datafusion/functions/src/string/ltrim.rs
b/datafusion/functions/src/string/ltrim.rs
index 1fcde9e97a..b64dcda721 100644
--- a/datafusion/functions/src/string/ltrim.rs
+++ b/datafusion/functions/src/string/ltrim.rs
@@ -25,8 +25,7 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::{exec_err, Result};
use datafusion_expr::function::Hint;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Documentation, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
/// Returns the longest string with leading characters removed. If the
characters are not specified, whitespace is removed.
@@ -49,18 +48,9 @@ impl Default for LtrimFunc {
impl LtrimFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
signature: Signature::one_of(
- vec![
- // Planner attempts coercion to the target type starting
with the most preferred candidate.
- // For example, given input `(Utf8View, Utf8)`, it first
tries coercing to `(Utf8View, Utf8View)`.
- // If that fails, it proceeds to `(Utf8, Utf8)`.
- Exact(vec![Utf8View, Utf8View]),
- Exact(vec![Utf8, Utf8]),
- Exact(vec![Utf8View]),
- Exact(vec![Utf8]),
- ],
+ vec![TypeSignature::String(2), TypeSignature::String(1)],
Volatility::Immutable,
),
}
diff --git a/datafusion/functions/src/string/octet_length.rs
b/datafusion/functions/src/string/octet_length.rs
index 195a6c296c..04094396fa 100644
--- a/datafusion/functions/src/string/octet_length.rs
+++ b/datafusion/functions/src/string/octet_length.rs
@@ -39,13 +39,8 @@ impl Default for OctetLengthFunc {
impl OctetLengthFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
- signature: Signature::uniform(
- 1,
- vec![Utf8, LargeUtf8, Utf8View],
- Volatility::Immutable,
- ),
+ signature: Signature::string(1, Volatility::Immutable),
}
}
}
diff --git a/datafusion/functions/src/string/overlay.rs
b/datafusion/functions/src/string/overlay.rs
index ec33840a0b..3b31bc3608 100644
--- a/datafusion/functions/src/string/overlay.rs
+++ b/datafusion/functions/src/string/overlay.rs
@@ -27,8 +27,7 @@ use datafusion_common::cast::{
};
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Documentation, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
#[derive(Debug)]
@@ -48,12 +47,12 @@ impl OverlayFunc {
Self {
signature: Signature::one_of(
vec![
- Exact(vec![Utf8View, Utf8View, Int64, Int64]),
- Exact(vec![Utf8, Utf8, Int64, Int64]),
- Exact(vec![LargeUtf8, LargeUtf8, Int64, Int64]),
- Exact(vec![Utf8View, Utf8View, Int64]),
- Exact(vec![Utf8, Utf8, Int64]),
- Exact(vec![LargeUtf8, LargeUtf8, Int64]),
+ TypeSignature::Exact(vec![Utf8View, Utf8View, Int64,
Int64]),
+ TypeSignature::Exact(vec![Utf8, Utf8, Int64, Int64]),
+ TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Int64,
Int64]),
+ TypeSignature::Exact(vec![Utf8View, Utf8View, Int64]),
+ TypeSignature::Exact(vec![Utf8, Utf8, Int64]),
+ TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Int64]),
],
Volatility::Immutable,
),
diff --git a/datafusion/functions/src/string/repeat.rs
b/datafusion/functions/src/string/repeat.rs
index 3abd1767bb..fda9c7a13d 100644
--- a/datafusion/functions/src/string/repeat.rs
+++ b/datafusion/functions/src/string/repeat.rs
@@ -18,20 +18,18 @@
use std::any::Any;
use std::sync::{Arc, OnceLock};
+use crate::string::common::StringArrayType;
+use crate::utils::{make_scalar_function, utf8_to_str_type};
use arrow::array::{
ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
OffsetSizeTrait, StringViewArray,
};
use arrow::datatypes::DataType;
use arrow::datatypes::DataType::{Int64, LargeUtf8, Utf8, Utf8View};
-
-use crate::string::common::StringArrayType;
-use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Documentation, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
#[derive(Debug)]
@@ -53,9 +51,9 @@ impl RepeatFunc {
// Planner attempts coercion to the target type starting
with the most preferred candidate.
// For example, given input `(Utf8View, Int64)`, it first
tries coercing to `(Utf8View, Int64)`.
// If that fails, it proceeds to `(Utf8, Int64)`.
- Exact(vec![Utf8View, Int64]),
- Exact(vec![Utf8, Int64]),
- Exact(vec![LargeUtf8, Int64]),
+ TypeSignature::Exact(vec![Utf8View, Int64]),
+ TypeSignature::Exact(vec![Utf8, Int64]),
+ TypeSignature::Exact(vec![LargeUtf8, Int64]),
],
Volatility::Immutable,
),
diff --git a/datafusion/functions/src/string/replace.rs
b/datafusion/functions/src/string/replace.rs
index 7c985b44ab..612cd7276b 100644
--- a/datafusion/functions/src/string/replace.rs
+++ b/datafusion/functions/src/string/replace.rs
@@ -25,7 +25,6 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
@@ -42,16 +41,8 @@ impl Default for ReplaceFunc {
impl ReplaceFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
- signature: Signature::one_of(
- vec![
- Exact(vec![Utf8View, Utf8View, Utf8View]),
- Exact(vec![Utf8, Utf8, Utf8]),
- Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8]),
- ],
- Volatility::Immutable,
- ),
+ signature: Signature::string(3, Volatility::Immutable),
}
}
}
diff --git a/datafusion/functions/src/string/rtrim.rs
b/datafusion/functions/src/string/rtrim.rs
index 6743ad99d3..1a27502a20 100644
--- a/datafusion/functions/src/string/rtrim.rs
+++ b/datafusion/functions/src/string/rtrim.rs
@@ -25,8 +25,7 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::{exec_err, Result};
use datafusion_expr::function::Hint;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Documentation, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
/// Returns the longest string with trailing characters removed. If the
characters are not specified, whitespace is removed.
@@ -49,18 +48,9 @@ impl Default for RtrimFunc {
impl RtrimFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
signature: Signature::one_of(
- vec![
- // Planner attempts coercion to the target type starting
with the most preferred candidate.
- // For example, given input `(Utf8View, Utf8)`, it first
tries coercing to `(Utf8View, Utf8View)`.
- // If that fails, it proceeds to `(Utf8, Utf8)`.
- Exact(vec![Utf8View, Utf8View]),
- Exact(vec![Utf8, Utf8]),
- Exact(vec![Utf8View]),
- Exact(vec![Utf8]),
- ],
+ vec![TypeSignature::String(2), TypeSignature::String(1)],
Volatility::Immutable,
),
}
diff --git a/datafusion/functions/src/string/split_part.rs
b/datafusion/functions/src/string/split_part.rs
index 2424103c84..2441798c38 100644
--- a/datafusion/functions/src/string/split_part.rs
+++ b/datafusion/functions/src/string/split_part.rs
@@ -25,8 +25,7 @@ use datafusion_common::cast::as_int64_array;
use datafusion_common::ScalarValue;
use datafusion_common::{exec_err, DataFusionError, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Documentation, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use std::any::Any;
use std::sync::{Arc, OnceLock};
@@ -50,15 +49,15 @@ impl SplitPartFunc {
Self {
signature: Signature::one_of(
vec![
- Exact(vec![Utf8View, Utf8View, Int64]),
- Exact(vec![Utf8View, Utf8, Int64]),
- Exact(vec![Utf8View, LargeUtf8, Int64]),
- Exact(vec![Utf8, Utf8View, Int64]),
- Exact(vec![Utf8, Utf8, Int64]),
- Exact(vec![LargeUtf8, Utf8View, Int64]),
- Exact(vec![LargeUtf8, Utf8, Int64]),
- Exact(vec![Utf8, LargeUtf8, Int64]),
- Exact(vec![LargeUtf8, LargeUtf8, Int64]),
+ TypeSignature::Exact(vec![Utf8View, Utf8View, Int64]),
+ TypeSignature::Exact(vec![Utf8View, Utf8, Int64]),
+ TypeSignature::Exact(vec![Utf8View, LargeUtf8, Int64]),
+ TypeSignature::Exact(vec![Utf8, Utf8View, Int64]),
+ TypeSignature::Exact(vec![Utf8, Utf8, Int64]),
+ TypeSignature::Exact(vec![LargeUtf8, Utf8View, Int64]),
+ TypeSignature::Exact(vec![LargeUtf8, Utf8, Int64]),
+ TypeSignature::Exact(vec![Utf8, LargeUtf8, Int64]),
+ TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Int64]),
],
Volatility::Immutable,
),
diff --git a/datafusion/functions/src/string/starts_with.rs
b/datafusion/functions/src/string/starts_with.rs
index ff4bf01c99..713b642d5e 100644
--- a/datafusion/functions/src/string/starts_with.rs
+++ b/datafusion/functions/src/string/starts_with.rs
@@ -24,7 +24,6 @@ use arrow::datatypes::DataType;
use crate::utils::make_scalar_function;
use datafusion_common::{internal_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::*;
use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
@@ -49,17 +48,7 @@ impl Default for StartsWithFunc {
impl StartsWithFunc {
pub fn new() -> Self {
Self {
- signature: Signature::one_of(
- vec![
- // Planner attempts coercion to the target type starting
with the most preferred candidate.
- // For example, given input `(Utf8View, Utf8)`, it first
tries coercing to `(Utf8View, Utf8View)`.
- // If that fails, it proceeds to `(Utf8, Utf8)`.
- Exact(vec![DataType::Utf8View, DataType::Utf8View]),
- Exact(vec![DataType::Utf8, DataType::Utf8]),
- Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
- ],
- Volatility::Immutable,
- ),
+ signature: Signature::string(2, Volatility::Immutable),
}
}
}
diff --git a/datafusion/functions/src/string/upper.rs
b/datafusion/functions/src/string/upper.rs
index caef7f6552..bfcb2a8699 100644
--- a/datafusion/functions/src/string/upper.rs
+++ b/datafusion/functions/src/string/upper.rs
@@ -38,13 +38,8 @@ impl Default for UpperFunc {
impl UpperFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
- signature: Signature::uniform(
- 1,
- vec![Utf8, LargeUtf8, Utf8View],
- Volatility::Immutable,
- ),
+ signature: Signature::string(1, Volatility::Immutable),
}
}
}
diff --git a/datafusion/functions/src/unicode/strpos.rs
b/datafusion/functions/src/unicode/strpos.rs
index eaff62c338..660adc7578 100644
--- a/datafusion/functions/src/unicode/strpos.rs
+++ b/datafusion/functions/src/unicode/strpos.rs
@@ -18,14 +18,12 @@
use std::any::Any;
use std::sync::{Arc, OnceLock};
-use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
-use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
-
use crate::string::common::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_int_type};
+use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
+use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
@@ -44,20 +42,8 @@ impl Default for StrposFunc {
impl StrposFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
- signature: Signature::one_of(
- vec![
- Exact(vec![Utf8, Utf8]),
- Exact(vec![Utf8, LargeUtf8]),
- Exact(vec![LargeUtf8, Utf8]),
- Exact(vec![LargeUtf8, LargeUtf8]),
- Exact(vec![Utf8View, Utf8View]),
- Exact(vec![Utf8View, Utf8]),
- Exact(vec![Utf8View, LargeUtf8]),
- ],
- Volatility::Immutable,
- ),
+ signature: Signature::string(2, Volatility::Immutable),
aliases: vec![String::from("instr"), String::from("position")],
}
}
diff --git a/datafusion/sqllogictest/test_files/scalar.slt
b/datafusion/sqllogictest/test_files/scalar.slt
index 8820fffaeb..0c2fa41e5b 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1906,12 +1906,8 @@ select position('' in '')
----
1
-
-query I
+query error DataFusion error: Error during planning: Error during planning:
Int64 and Int64 are not coercible to a common string
select position(1 in 1)
-----
-1
-
query I
select strpos('abc', 'c');
diff --git a/datafusion/sqllogictest/test_files/string/init_data.slt.part
b/datafusion/sqllogictest/test_files/string/init_data.slt.part
index d99401f10d..096e3bb3b3 100644
--- a/datafusion/sqllogictest/test_files/string/init_data.slt.part
+++ b/datafusion/sqllogictest/test_files/string/init_data.slt.part
@@ -30,4 +30,3 @@ statement ok
create table test_substr_base (
col1 VARCHAR
) as values ('foo'), ('hello🌏世界'), ('💩'), ('ThisIsAVeryLongASCIIString'),
(''), (NULL);
-
diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt
b/datafusion/sqllogictest/test_files/string/string_view.slt
index e7b55c9c1c..e01a40586f 100644
--- a/datafusion/sqllogictest/test_files/string/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string/string_view.slt
@@ -92,6 +92,21 @@ FROM test_source;
statement ok
drop table test_source
+########
+## StringView Function test
+########
+
+query error DataFusion error: Arrow error: Compute error: bit_length not
supported for Utf8View
+select bit_length(column1_utf8view) from test;
+
+query T
+select btrim(column1_large_utf8) from test;
+----
+Andrew
+Xiangpeng
+Raphael
+NULL
+
########
## StringView to Other Types column
########
@@ -299,9 +314,8 @@ EXPLAIN SELECT
FROM test;
----
logical_plan
-01)Projection: starts_with(__common_expr_1, test.column2_utf8view) AS c1,
starts_with(test.column1_utf8, test.column2_utf8) AS c3,
starts_with(__common_expr_1, CAST(test.column2_large_utf8 AS Utf8View)) AS c4
-02)--Projection: CAST(test.column1_utf8 AS Utf8View) AS __common_expr_1,
test.column1_utf8, test.column2_utf8, test.column2_large_utf8,
test.column2_utf8view
-03)----TableScan: test projection=[column1_utf8, column2_utf8,
column2_large_utf8, column2_utf8view]
+01)Projection: starts_with(CAST(test.column1_utf8 AS Utf8View),
test.column2_utf8view) AS c1, starts_with(test.column1_utf8, test.column2_utf8)
AS c3, starts_with(CAST(test.column1_utf8 AS LargeUtf8),
test.column2_large_utf8) AS c4
+02)--TableScan: test projection=[column1_utf8, column2_utf8,
column2_large_utf8, column2_utf8view]
query BBB
SELECT
@@ -591,7 +605,7 @@ EXPLAIN SELECT
FROM test;
----
logical_plan
-01)Projection: contains(test.column1_utf8view, Utf8("foo")) AS c1,
contains(test.column1_utf8view, test.column2_utf8view) AS c2,
contains(test.column1_utf8view, test.column2_large_utf8) AS c3,
contains(test.column1_utf8, test.column2_utf8view) AS c4,
contains(test.column1_utf8, test.column2_utf8) AS c5,
contains(test.column1_utf8, test.column2_large_utf8) AS c6,
contains(test.column1_large_utf8, test.column1_utf8view) AS c7,
contains(test.column1_large_utf8, test.column2_utf8) AS c8, con [...]
+01)Projection: contains(test.column1_utf8view, Utf8View("foo")) AS c1,
contains(test.column1_utf8view, test.column2_utf8view) AS c2,
contains(test.column1_utf8view, CAST(test.column2_large_utf8 AS Utf8View)) AS
c3, contains(CAST(test.column1_utf8 AS Utf8View), test.column2_utf8view) AS c4,
contains(test.column1_utf8, test.column2_utf8) AS c5,
contains(CAST(test.column1_utf8 AS LargeUtf8), test.column2_large_utf8) AS c6,
contains(CAST(test.column1_large_utf8 AS Utf8View), test.column1_utf [...]
02)--TableScan: test projection=[column1_utf8, column2_utf8,
column1_large_utf8, column2_large_utf8, column1_utf8view, column2_utf8view]
## Ensure no casts for ENDS_WITH
@@ -835,7 +849,7 @@ EXPLAIN SELECT
FROM test;
----
logical_plan
-01)Projection: strpos(test.column1_utf8view, Utf8("f")) AS c,
strpos(test.column1_utf8view, test.column2_utf8view) AS c2
+01)Projection: strpos(test.column1_utf8view, Utf8View("f")) AS c,
strpos(test.column1_utf8view, test.column2_utf8view) AS c2
02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
## Ensure no casts for SUBSTR
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]