This is an automated email from the ASF dual-hosted git repository.

github-merge-queue[bot] pushed a commit to branch 
gh-readonly-queue/main/pr-22497-560456dfbddf99858f0931d1bb8b3d6b2856cac7
in repository https://gitbox.apache.org/repos/asf/datafusion.git

commit bad2be17427ba550ddb3775a09f11dfd876de6d3
Author: Amogh Ramesh <[email protected]>
AuthorDate: Thu Jun 4 15:05:52 2026 +0530

    fix: replace with empty search string should be a no-op (#22497)
    
    ## Which issue does this PR close?
    
    - Closes #22253
    - Closes #22357
    
    ## Rationale for this change
    
    PostgreSQL returns the input unchanged when `replace` is called with an
    empty `from`. DataFusion was instead inserting `to` before every
    character and at both ends, so `replace('abc', '', 'x')` returned
    `xaxbxcx`. This PR brings the behaviour in line with PostgreSQL. Part of
    the PG-compatibility cleanup tracked in #22247.
    
    ## What changes are included in this PR?
    
    - `datafusion/functions/src/string/replace.rs`: the empty-`from` branch
    in `apply_replace` now writes the input verbatim instead of inserting
    `to`. Added a `LargeUtf8` unit test for the new behaviour.
    - `datafusion/sqllogictest/test_files/string/string_literal.slt`: four
    new SLT asserts covering the `Utf8`, `Dictionary`, `Utf8View`, and
    `LargeUtf8` paths.
    - `datafusion/sqllogictest/test_files/string/string_query.slt.part`:
    updated four expected rows that were asserting the old buggy output.
    
    ## Are these changes tested?
    
    Yes. The unit test in `replace.rs` covers the `LargeUtf8` path, and the
    four new SLT asserts in `string_literal.slt` cover the remaining Arrow
    string encodings end-to-end. The full SLT suite passes locally.
    
    ## Are there any user-facing changes?
    
    Yes. `replace(str, '', x)` now returns `str` unchanged instead of
    inserting `x` between every character. This matches PostgreSQL.
    
    ---------
    
    Signed-off-by: Amogh Ramesh <[email protected]>
---
 datafusion/functions/benches/replace.rs            | 30 ----------------------
 datafusion/functions/src/string/replace.rs         | 23 +++++++++++------
 .../test_files/string/string_literal.slt           | 20 +++++++++++++++
 .../test_files/string/string_query.slt.part        |  8 +++---
 4 files changed, 39 insertions(+), 42 deletions(-)

diff --git a/datafusion/functions/benches/replace.rs 
b/datafusion/functions/benches/replace.rs
index b117968bad..7ad198995a 100644
--- a/datafusion/functions/benches/replace.rs
+++ b/datafusion/functions/benches/replace.rs
@@ -162,36 +162,6 @@ fn criterion_benchmark(c: &mut Criterion) {
             }
         }
 
-        // Empty-`from` path: insert `to` between every char of the input and 
at
-        // both ends.
-        if size == 1024 {
-            for &str_len in &[32_usize, 128] {
-                let args = create_args::<i32>(size, str_len, false, 0, 3, 0.0);
-                group.bench_function(
-                    format!("replace_string_empty_from [size={size}, 
str_len={str_len}]"),
-                    |b| {
-                        b.iter(|| {
-                            let args_cloned = args.clone();
-                            black_box(invoke_replace_with_args(args_cloned, 
size))
-                        })
-                    },
-                );
-
-                let args = create_args::<i32>(size, str_len, true, 0, 3, 0.0);
-                group.bench_function(
-                    format!(
-                        "replace_string_view_empty_from [size={size}, 
str_len={str_len}]"
-                    ),
-                    |b| {
-                        b.iter(|| {
-                            let args_cloned = args.clone();
-                            black_box(invoke_replace_with_args(args_cloned, 
size))
-                        })
-                    },
-                );
-            }
-        }
-
         group.finish();
     }
 }
diff --git a/datafusion/functions/src/string/replace.rs 
b/datafusion/functions/src/string/replace.rs
index 769727999e..28f81769f5 100644
--- a/datafusion/functions/src/string/replace.rs
+++ b/datafusion/functions/src/string/replace.rs
@@ -268,14 +268,8 @@ fn apply_replace<B: BulkNullStringArrayBuilder>(
     }
 
     if from.is_empty() {
-        // Empty `from`: insert `to` before each character and at both ends.
-        builder.append_with(|w| {
-            w.write_str(to);
-            for ch in string.chars() {
-                w.write_char(ch);
-                w.write_str(to);
-            }
-        });
+        // PostgreSQL returns the input unchanged when `from` is empty 
(#22253).
+        builder.append_value(string);
         return;
     }
 
@@ -346,6 +340,19 @@ mod tests {
             StringArray
         );
 
+        test_function!(
+            ReplaceFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from("abc")))),
+                
ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from("")))),
+                
ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from("x")))),
+            ],
+            Ok(Some("abc")),
+            &str,
+            LargeUtf8,
+            LargeStringArray
+        );
+
         Ok(())
     }
 }
diff --git a/datafusion/sqllogictest/test_files/string/string_literal.slt 
b/datafusion/sqllogictest/test_files/string/string_literal.slt
index d7547bf145..81aaf48629 100644
--- a/datafusion/sqllogictest/test_files/string/string_literal.slt
+++ b/datafusion/sqllogictest/test_files/string/string_literal.slt
@@ -430,6 +430,26 @@ SELECT replace(arrow_cast('foobar', 'LargeUtf8'), 
arrow_cast('bar', 'LargeUtf8')
 ----
 foohello
 
+# PostgreSQL compatibility: empty search string is a no-op (issue #22253)
+query T
+SELECT replace('abc', '', 'x')
+----
+abc
+
+query T
+SELECT replace(arrow_cast('abc', 'Dictionary(Int32, Utf8)'), '', 'x')
+----
+abc
+
+query T
+SELECT replace(arrow_cast('abc', 'Utf8View'), arrow_cast('', 'Utf8View'), 
arrow_cast('x', 'Utf8View'))
+----
+abc
+
+query T
+SELECT replace(arrow_cast('abc', 'LargeUtf8'), arrow_cast('', 'LargeUtf8'), 
arrow_cast('x', 'LargeUtf8'))
+----
+abc
 
 query T
 SELECT reverse('abcde')
diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part 
b/datafusion/sqllogictest/test_files/string/string_query.slt.part
index 9e5b8f91e7..dac4dd06db 100644
--- a/datafusion/sqllogictest/test_files/string/string_query.slt.part
+++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part
@@ -884,10 +884,10 @@ Xiangpeng bar NULL bar NULL datafusion数据融合
 Raphael baraphael NULL datafusionДатbarион NULL datafusionДатаФусион
 under_score under_score NULL un iść core NULL un iść core
 percent percent NULL pan Tadeusz ma iść w kąt NULL pan Tadeusz ma iść w kąt
-(empty) (empty) NULL bar NULL (empty)
-(empty) (empty) NULL bar NULL (empty)
-% % NULL bar NULL (empty)
-_ _ NULL bar NULL (empty)
+(empty) (empty) NULL (empty) NULL (empty)
+(empty) (empty) NULL (empty) NULL (empty)
+% % NULL (empty) NULL (empty)
+_ _ NULL (empty) NULL (empty)
 NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to