This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 7bd0e74aaa fix: `regexp_replace` fails when pattern or replacement is 
a scalar `NULL` (#11459)
7bd0e74aaa is described below

commit 7bd0e74aaa7aad3e436f01000fd4f973d5724f50
Author: Alex Huang <[email protected]>
AuthorDate: Tue Jul 16 02:53:37 2024 +0800

    fix: `regexp_replace` fails when pattern or replacement is a scalar `NULL` 
(#11459)
    
    * fix: gexp_replace fails when pattern or replacement is a scalar NULL
    
    * chore
---
 datafusion/functions/src/regex/regexpreplace.rs | 31 ++++++++++++++++++-------
 datafusion/sqllogictest/test_files/regexp.slt   | 10 ++++++++
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/datafusion/functions/src/regex/regexpreplace.rs 
b/datafusion/functions/src/regex/regexpreplace.rs
index 201eebde22..378b6ced07 100644
--- a/datafusion/functions/src/regex/regexpreplace.rs
+++ b/datafusion/functions/src/regex/regexpreplace.rs
@@ -282,22 +282,23 @@ pub fn regexp_replace<T: OffsetSizeTrait>(args: 
&[ArrayRef]) -> Result<ArrayRef>
 
 fn _regexp_replace_early_abort<T: OffsetSizeTrait>(
     input_array: &GenericStringArray<T>,
+    sz: usize,
 ) -> Result<ArrayRef> {
     // Mimicking the existing behavior of regexp_replace, if any of the scalar 
arguments
-    // are actually null, then the result will be an array of the same size 
but with nulls.
+    // are actually null, then the result will be an array of the same size as 
the first argument with all nulls.
     //
     // Also acts like an early abort mechanism when the input array is empty.
-    Ok(new_null_array(input_array.data_type(), input_array.len()))
+    Ok(new_null_array(input_array.data_type(), sz))
 }
 /// Get the first argument from the given string array.
 ///
 /// Note: If the array is empty or the first argument is null,
 /// then calls the given early abort function.
 macro_rules! fetch_string_arg {
-    ($ARG:expr, $NAME:expr, $T:ident, $EARLY_ABORT:ident) => {{
+    ($ARG:expr, $NAME:expr, $T:ident, $EARLY_ABORT:ident, $ARRAY_SIZE:expr) => 
{{
         let array = as_generic_string_array::<T>($ARG)?;
         if array.len() == 0 || array.is_null(0) {
-            return $EARLY_ABORT(array);
+            return $EARLY_ABORT(array, $ARRAY_SIZE);
         } else {
             array.value(0)
         }
@@ -313,12 +314,24 @@ fn _regexp_replace_static_pattern_replace<T: 
OffsetSizeTrait>(
     args: &[ArrayRef],
 ) -> Result<ArrayRef> {
     let string_array = as_generic_string_array::<T>(&args[0])?;
-    let pattern = fetch_string_arg!(&args[1], "pattern", T, 
_regexp_replace_early_abort);
-    let replacement =
-        fetch_string_arg!(&args[2], "replacement", T, 
_regexp_replace_early_abort);
+    let array_size = string_array.len();
+    let pattern = fetch_string_arg!(
+        &args[1],
+        "pattern",
+        T,
+        _regexp_replace_early_abort,
+        array_size
+    );
+    let replacement = fetch_string_arg!(
+        &args[2],
+        "replacement",
+        T,
+        _regexp_replace_early_abort,
+        array_size
+    );
     let flags = match args.len() {
         3 => None,
-        4 => Some(fetch_string_arg!(&args[3], "flags", T, 
_regexp_replace_early_abort)),
+        4 => Some(fetch_string_arg!(&args[3], "flags", T, 
_regexp_replace_early_abort, array_size)),
         other => {
             return exec_err!(
                 "regexp_replace was called with {other} arguments. It requires 
at least 3 and at most 4."
@@ -351,7 +364,7 @@ fn _regexp_replace_static_pattern_replace<T: 
OffsetSizeTrait>(
         let offsets = string_array.value_offsets();
         (offsets[string_array.len()] - offsets[0])
             .to_usize()
-            .unwrap()
+            .expect("Failed to convert usize")
     });
     let mut new_offsets = BufferBuilder::<T>::new(string_array.len() + 1);
     new_offsets.append(T::zero());
diff --git a/datafusion/sqllogictest/test_files/regexp.slt 
b/datafusion/sqllogictest/test_files/regexp.slt
index fed7ac3171..f5349fc659 100644
--- a/datafusion/sqllogictest/test_files/regexp.slt
+++ b/datafusion/sqllogictest/test_files/regexp.slt
@@ -309,6 +309,16 @@ SELECT regexp_replace(arrow_cast('foobar', 
'Dictionary(Int32, Utf8)'), 'bar', 'x
 ----
 fooxx
 
+query TTT
+select
+    regexp_replace(col, NULL, 'c'),
+    regexp_replace(col, 'a', NULL),
+    regexp_replace(col, 'a', 'c', NULL)
+from (values ('a'), ('b')) as tbl(col);
+----
+NULL NULL NULL
+NULL NULL NULL
+
 # multiline string
 query B
 SELECT 'foo\nbar\nbaz' ~ 'bar';


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to