This is an automated email from the ASF dual-hosted git repository.
jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new f2de2c4cc2 Refactor regexplike signature (#13394)
f2de2c4cc2 is described below
commit f2de2c4cc2009d9b6965f7951fd543e1974fcd2c
Author: Jiashen Cao <[email protected]>
AuthorDate: Sun Dec 8 02:48:57 2024 -0500
Refactor regexplike signature (#13394)
* update
* update
* update
* clean up errors
* fix flags types
* fix failed example
---
datafusion-examples/examples/regexp.rs | 2 +-
datafusion/functions/src/regex/regexplike.rs | 50 +++++++++++++---------
.../sqllogictest/test_files/string/string_view.slt | 2 +-
3 files changed, 31 insertions(+), 23 deletions(-)
diff --git a/datafusion-examples/examples/regexp.rs
b/datafusion-examples/examples/regexp.rs
index 02e74bae22..5419efd2fa 100644
--- a/datafusion-examples/examples/regexp.rs
+++ b/datafusion-examples/examples/regexp.rs
@@ -148,7 +148,7 @@ async fn main() -> Result<()> {
// invalid flags will result in an error
let result = ctx
- .sql(r"select regexp_like('\b4(?!000)\d\d\d\b', 4010, 'g')")
+ .sql(r"select regexp_like('\b4(?!000)\d\d\d\b', '4010', 'g')")
.await?
.collect()
.await;
diff --git a/datafusion/functions/src/regex/regexplike.rs
b/datafusion/functions/src/regex/regexplike.rs
index 49e57776c7..1c826b12ef 100644
--- a/datafusion/functions/src/regex/regexplike.rs
+++ b/datafusion/functions/src/regex/regexplike.rs
@@ -81,26 +81,7 @@ impl RegexpLikeFunc {
pub fn new() -> Self {
Self {
signature: Signature::one_of(
- vec![
- TypeSignature::Exact(vec![Utf8View, Utf8]),
- TypeSignature::Exact(vec![Utf8View, Utf8View]),
- TypeSignature::Exact(vec![Utf8View, LargeUtf8]),
- TypeSignature::Exact(vec![Utf8, Utf8]),
- TypeSignature::Exact(vec![Utf8, Utf8View]),
- TypeSignature::Exact(vec![Utf8, LargeUtf8]),
- TypeSignature::Exact(vec![LargeUtf8, Utf8]),
- TypeSignature::Exact(vec![LargeUtf8, Utf8View]),
- TypeSignature::Exact(vec![LargeUtf8, LargeUtf8]),
- TypeSignature::Exact(vec![Utf8View, Utf8, Utf8]),
- TypeSignature::Exact(vec![Utf8View, Utf8View, Utf8]),
- TypeSignature::Exact(vec![Utf8View, LargeUtf8, Utf8]),
- TypeSignature::Exact(vec![Utf8, Utf8, Utf8]),
- TypeSignature::Exact(vec![Utf8, Utf8View, Utf8]),
- TypeSignature::Exact(vec![Utf8, LargeUtf8, Utf8]),
- TypeSignature::Exact(vec![LargeUtf8, Utf8, Utf8]),
- TypeSignature::Exact(vec![LargeUtf8, Utf8View, Utf8]),
- TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Utf8]),
- ],
+ vec![TypeSignature::String(2), TypeSignature::String(3)],
Volatility::Immutable,
),
}
@@ -211,7 +192,34 @@ pub fn regexp_like(args: &[ArrayRef]) -> Result<ArrayRef> {
match args.len() {
2 => handle_regexp_like(&args[0], &args[1], None),
3 => {
- let flags = args[2].as_string::<i32>();
+ let flags = match args[2].data_type() {
+ Utf8 => args[2].as_string::<i32>(),
+ LargeUtf8 => {
+ let large_string_array = args[2].as_string::<i64>();
+ let string_vec: Vec<Option<&str>> =
(0..large_string_array.len()).map(|i| {
+ if large_string_array.is_null(i) {
+ None
+ } else {
+ Some(large_string_array.value(i))
+ }
+ })
+ .collect();
+
+ &GenericStringArray::<i32>::from(string_vec)
+ },
+ _ => {
+ let string_view_array = args[2].as_string_view();
+ let string_vec: Vec<Option<String>> =
(0..string_view_array.len()).map(|i| {
+ if string_view_array.is_null(i) {
+ None
+ } else {
+ Some(string_view_array.value(i).to_string())
+ }
+ })
+ .collect();
+ &GenericStringArray::<i32>::from(string_vec)
+ },
+ };
if flags.iter().any(|s| s == Some("g")) {
return plan_err!("regexp_like() does not support the
\"global\" option");
diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt
b/datafusion/sqllogictest/test_files/string/string_view.slt
index ebabaf7655..c37dd1ed3b 100644
--- a/datafusion/sqllogictest/test_files/string/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string/string_view.slt
@@ -731,7 +731,7 @@ EXPLAIN SELECT
FROM test;
----
logical_plan
-01)Projection: regexp_like(test.column1_utf8view,
Utf8("^https?://(?:www\.)?([^/]+)/.*$")) AS k
+01)Projection: regexp_like(test.column1_utf8view,
Utf8View("^https?://(?:www\.)?([^/]+)/.*$")) AS k
02)--TableScan: test projection=[column1_utf8view]
## Ensure no casts for REGEXP_MATCH
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]