This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 7bd0e74aaa fix: `regexp_replace` fails when pattern or replacement is
a scalar `NULL` (#11459)
7bd0e74aaa is described below
commit 7bd0e74aaa7aad3e436f01000fd4f973d5724f50
Author: Alex Huang <[email protected]>
AuthorDate: Tue Jul 16 02:53:37 2024 +0800
fix: `regexp_replace` fails when pattern or replacement is a scalar `NULL`
(#11459)
* fix: gexp_replace fails when pattern or replacement is a scalar NULL
* chore
---
datafusion/functions/src/regex/regexpreplace.rs | 31 ++++++++++++++++++-------
datafusion/sqllogictest/test_files/regexp.slt | 10 ++++++++
2 files changed, 32 insertions(+), 9 deletions(-)
diff --git a/datafusion/functions/src/regex/regexpreplace.rs
b/datafusion/functions/src/regex/regexpreplace.rs
index 201eebde22..378b6ced07 100644
--- a/datafusion/functions/src/regex/regexpreplace.rs
+++ b/datafusion/functions/src/regex/regexpreplace.rs
@@ -282,22 +282,23 @@ pub fn regexp_replace<T: OffsetSizeTrait>(args:
&[ArrayRef]) -> Result<ArrayRef>
fn _regexp_replace_early_abort<T: OffsetSizeTrait>(
input_array: &GenericStringArray<T>,
+ sz: usize,
) -> Result<ArrayRef> {
// Mimicking the existing behavior of regexp_replace, if any of the scalar
arguments
- // are actually null, then the result will be an array of the same size
but with nulls.
+ // are actually null, then the result will be an array of the same size as
the first argument with all nulls.
//
// Also acts like an early abort mechanism when the input array is empty.
- Ok(new_null_array(input_array.data_type(), input_array.len()))
+ Ok(new_null_array(input_array.data_type(), sz))
}
/// Get the first argument from the given string array.
///
/// Note: If the array is empty or the first argument is null,
/// then calls the given early abort function.
macro_rules! fetch_string_arg {
- ($ARG:expr, $NAME:expr, $T:ident, $EARLY_ABORT:ident) => {{
+ ($ARG:expr, $NAME:expr, $T:ident, $EARLY_ABORT:ident, $ARRAY_SIZE:expr) =>
{{
let array = as_generic_string_array::<T>($ARG)?;
if array.len() == 0 || array.is_null(0) {
- return $EARLY_ABORT(array);
+ return $EARLY_ABORT(array, $ARRAY_SIZE);
} else {
array.value(0)
}
@@ -313,12 +314,24 @@ fn _regexp_replace_static_pattern_replace<T:
OffsetSizeTrait>(
args: &[ArrayRef],
) -> Result<ArrayRef> {
let string_array = as_generic_string_array::<T>(&args[0])?;
- let pattern = fetch_string_arg!(&args[1], "pattern", T,
_regexp_replace_early_abort);
- let replacement =
- fetch_string_arg!(&args[2], "replacement", T,
_regexp_replace_early_abort);
+ let array_size = string_array.len();
+ let pattern = fetch_string_arg!(
+ &args[1],
+ "pattern",
+ T,
+ _regexp_replace_early_abort,
+ array_size
+ );
+ let replacement = fetch_string_arg!(
+ &args[2],
+ "replacement",
+ T,
+ _regexp_replace_early_abort,
+ array_size
+ );
let flags = match args.len() {
3 => None,
- 4 => Some(fetch_string_arg!(&args[3], "flags", T,
_regexp_replace_early_abort)),
+ 4 => Some(fetch_string_arg!(&args[3], "flags", T,
_regexp_replace_early_abort, array_size)),
other => {
return exec_err!(
"regexp_replace was called with {other} arguments. It requires
at least 3 and at most 4."
@@ -351,7 +364,7 @@ fn _regexp_replace_static_pattern_replace<T:
OffsetSizeTrait>(
let offsets = string_array.value_offsets();
(offsets[string_array.len()] - offsets[0])
.to_usize()
- .unwrap()
+ .expect("Failed to convert usize")
});
let mut new_offsets = BufferBuilder::<T>::new(string_array.len() + 1);
new_offsets.append(T::zero());
diff --git a/datafusion/sqllogictest/test_files/regexp.slt
b/datafusion/sqllogictest/test_files/regexp.slt
index fed7ac3171..f5349fc659 100644
--- a/datafusion/sqllogictest/test_files/regexp.slt
+++ b/datafusion/sqllogictest/test_files/regexp.slt
@@ -309,6 +309,16 @@ SELECT regexp_replace(arrow_cast('foobar',
'Dictionary(Int32, Utf8)'), 'bar', 'x
----
fooxx
+query TTT
+select
+ regexp_replace(col, NULL, 'c'),
+ regexp_replace(col, 'a', NULL),
+ regexp_replace(col, 'a', 'c', NULL)
+from (values ('a'), ('b')) as tbl(col);
+----
+NULL NULL NULL
+NULL NULL NULL
+
# multiline string
query B
SELECT 'foo\nbar\nbaz' ~ 'bar';
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]