This is an automated email from the ASF dual-hosted git repository. github-merge-queue[bot] pushed a commit to branch gh-readonly-queue/main/pr-22497-560456dfbddf99858f0931d1bb8b3d6b2856cac7 in repository https://gitbox.apache.org/repos/asf/datafusion.git
commit bad2be17427ba550ddb3775a09f11dfd876de6d3 Author: Amogh Ramesh <[email protected]> AuthorDate: Thu Jun 4 15:05:52 2026 +0530 fix: replace with empty search string should be a no-op (#22497) ## Which issue does this PR close? - Closes #22253 - Closes #22357 ## Rationale for this change PostgreSQL returns the input unchanged when `replace` is called with an empty `from`. DataFusion was instead inserting `to` before every character and at both ends, so `replace('abc', '', 'x')` returned `xaxbxcx`. This PR brings the behaviour in line with PostgreSQL. Part of the PG-compatibility cleanup tracked in #22247. ## What changes are included in this PR? - `datafusion/functions/src/string/replace.rs`: the empty-`from` branch in `apply_replace` now writes the input verbatim instead of inserting `to`. Added a `LargeUtf8` unit test for the new behaviour. - `datafusion/sqllogictest/test_files/string/string_literal.slt`: four new SLT asserts covering the `Utf8`, `Dictionary`, `Utf8View`, and `LargeUtf8` paths. - `datafusion/sqllogictest/test_files/string/string_query.slt.part`: updated four expected rows that were asserting the old buggy output. ## Are these changes tested? Yes. The unit test in `replace.rs` covers the `LargeUtf8` path, and the four new SLT asserts in `string_literal.slt` cover the remaining Arrow string encodings end-to-end. The full SLT suite passes locally. ## Are there any user-facing changes? Yes. `replace(str, '', x)` now returns `str` unchanged instead of inserting `x` between every character. This matches PostgreSQL. --------- Signed-off-by: Amogh Ramesh <[email protected]> --- datafusion/functions/benches/replace.rs | 30 ---------------------- datafusion/functions/src/string/replace.rs | 23 +++++++++++------ .../test_files/string/string_literal.slt | 20 +++++++++++++++ .../test_files/string/string_query.slt.part | 8 +++--- 4 files changed, 39 insertions(+), 42 deletions(-) diff --git a/datafusion/functions/benches/replace.rs b/datafusion/functions/benches/replace.rs index b117968bad..7ad198995a 100644 --- a/datafusion/functions/benches/replace.rs +++ b/datafusion/functions/benches/replace.rs @@ -162,36 +162,6 @@ fn criterion_benchmark(c: &mut Criterion) { } } - // Empty-`from` path: insert `to` between every char of the input and at - // both ends. - if size == 1024 { - for &str_len in &[32_usize, 128] { - let args = create_args::<i32>(size, str_len, false, 0, 3, 0.0); - group.bench_function( - format!("replace_string_empty_from [size={size}, str_len={str_len}]"), - |b| { - b.iter(|| { - let args_cloned = args.clone(); - black_box(invoke_replace_with_args(args_cloned, size)) - }) - }, - ); - - let args = create_args::<i32>(size, str_len, true, 0, 3, 0.0); - group.bench_function( - format!( - "replace_string_view_empty_from [size={size}, str_len={str_len}]" - ), - |b| { - b.iter(|| { - let args_cloned = args.clone(); - black_box(invoke_replace_with_args(args_cloned, size)) - }) - }, - ); - } - } - group.finish(); } } diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs index 769727999e..28f81769f5 100644 --- a/datafusion/functions/src/string/replace.rs +++ b/datafusion/functions/src/string/replace.rs @@ -268,14 +268,8 @@ fn apply_replace<B: BulkNullStringArrayBuilder>( } if from.is_empty() { - // Empty `from`: insert `to` before each character and at both ends. - builder.append_with(|w| { - w.write_str(to); - for ch in string.chars() { - w.write_char(ch); - w.write_str(to); - } - }); + // PostgreSQL returns the input unchanged when `from` is empty (#22253). + builder.append_value(string); return; } @@ -346,6 +340,19 @@ mod tests { StringArray ); + test_function!( + ReplaceFunc::new(), + vec![ + ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from("abc")))), + ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from("")))), + ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from("x")))), + ], + Ok(Some("abc")), + &str, + LargeUtf8, + LargeStringArray + ); + Ok(()) } } diff --git a/datafusion/sqllogictest/test_files/string/string_literal.slt b/datafusion/sqllogictest/test_files/string/string_literal.slt index d7547bf145..81aaf48629 100644 --- a/datafusion/sqllogictest/test_files/string/string_literal.slt +++ b/datafusion/sqllogictest/test_files/string/string_literal.slt @@ -430,6 +430,26 @@ SELECT replace(arrow_cast('foobar', 'LargeUtf8'), arrow_cast('bar', 'LargeUtf8') ---- foohello +# PostgreSQL compatibility: empty search string is a no-op (issue #22253) +query T +SELECT replace('abc', '', 'x') +---- +abc + +query T +SELECT replace(arrow_cast('abc', 'Dictionary(Int32, Utf8)'), '', 'x') +---- +abc + +query T +SELECT replace(arrow_cast('abc', 'Utf8View'), arrow_cast('', 'Utf8View'), arrow_cast('x', 'Utf8View')) +---- +abc + +query T +SELECT replace(arrow_cast('abc', 'LargeUtf8'), arrow_cast('', 'LargeUtf8'), arrow_cast('x', 'LargeUtf8')) +---- +abc query T SELECT reverse('abcde') diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part b/datafusion/sqllogictest/test_files/string/string_query.slt.part index 9e5b8f91e7..dac4dd06db 100644 --- a/datafusion/sqllogictest/test_files/string/string_query.slt.part +++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part @@ -884,10 +884,10 @@ Xiangpeng bar NULL bar NULL datafusion数据融合 Raphael baraphael NULL datafusionДатbarион NULL datafusionДатаФусион under_score under_score NULL un iść core NULL un iść core percent percent NULL pan Tadeusz ma iść w kąt NULL pan Tadeusz ma iść w kąt -(empty) (empty) NULL bar NULL (empty) -(empty) (empty) NULL bar NULL (empty) -% % NULL bar NULL (empty) -_ _ NULL bar NULL (empty) +(empty) (empty) NULL (empty) NULL (empty) +(empty) (empty) NULL (empty) NULL (empty) +% % NULL (empty) NULL (empty) +_ _ NULL (empty) NULL (empty) NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
