From 9134614e79d760cf17a56c92b647ad80b3d0c3d1 Mon Sep 17 00:00:00 2001 From: Amogh Ramesh Date: Sun, 24 May 2026 19:51:01 +0530 Subject: [PATCH 1/2] fix: replace with empty search string should be a no-op PostgreSQL returns the input unchanged when `replace` is called with an empty `from`. DataFusion was instead inserting `to` before every character and at both ends - so `replace('abc', '', 'x')` returned `xaxbxcx`. Fix the empty-`from` branch in `apply_replace` to write the input verbatim. Update the regression tests in `string_query.slt.part` that were asserting the buggy output. Closes #22253 Signed-off-by: Amogh Ramesh --- datafusion/functions/src/string/replace.rs | 23 ++++++++++++------- .../test_files/string/string_literal.slt | 20 ++++++++++++++++ .../test_files/string/string_query.slt.part | 8 +++---- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs index 769727999ea05..77b617bb90899 100644 --- a/datafusion/functions/src/string/replace.rs +++ b/datafusion/functions/src/string/replace.rs @@ -268,14 +268,8 @@ fn apply_replace( } if from.is_empty() { - // Empty `from`: insert `to` before each character and at both ends. - builder.append_with(|w| { - w.write_str(to); - for ch in string.chars() { - w.write_char(ch); - w.write_str(to); - } - }); + // PostgreSQL returns the input unchanged when `from` is empty (#22253). + builder.append_with(|w| w.write_str(string)); return; } @@ -346,6 +340,19 @@ mod tests { StringArray ); + test_function!( + ReplaceFunc::new(), + vec![ + ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from("abc")))), + ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from("")))), + ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from("x")))), + ], + Ok(Some("abc")), + &str, + LargeUtf8, + LargeStringArray + ); + Ok(()) } } diff --git a/datafusion/sqllogictest/test_files/string/string_literal.slt b/datafusion/sqllogictest/test_files/string/string_literal.slt index 97f2a40c13fea..76a03b6739bca 100644 --- a/datafusion/sqllogictest/test_files/string/string_literal.slt +++ b/datafusion/sqllogictest/test_files/string/string_literal.slt @@ -426,6 +426,26 @@ SELECT replace(arrow_cast('foobar', 'LargeUtf8'), arrow_cast('bar', 'LargeUtf8') ---- foohello +# PostgreSQL compatibility: empty search string is a no-op (issue #22253) +query T +SELECT replace('abc', '', 'x') +---- +abc + +query T +SELECT replace(arrow_cast('abc', 'Dictionary(Int32, Utf8)'), '', 'x') +---- +abc + +query T +SELECT replace(arrow_cast('abc', 'Utf8View'), arrow_cast('', 'Utf8View'), arrow_cast('x', 'Utf8View')) +---- +abc + +query T +SELECT replace(arrow_cast('abc', 'LargeUtf8'), arrow_cast('', 'LargeUtf8'), arrow_cast('x', 'LargeUtf8')) +---- +abc query T SELECT reverse('abcde') diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part b/datafusion/sqllogictest/test_files/string/string_query.slt.part index 9e5b8f91e7d8e..dac4dd06db21f 100644 --- a/datafusion/sqllogictest/test_files/string/string_query.slt.part +++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part @@ -884,10 +884,10 @@ Xiangpeng bar NULL bar NULL datafusion数据融合 Raphael baraphael NULL datafusionДатbarион NULL datafusionДатаФусион under_score under_score NULL un iść core NULL un iść core percent percent NULL pan Tadeusz ma iść w kąt NULL pan Tadeusz ma iść w kąt -(empty) (empty) NULL bar NULL (empty) -(empty) (empty) NULL bar NULL (empty) -% % NULL bar NULL (empty) -_ _ NULL bar NULL (empty) +(empty) (empty) NULL (empty) NULL (empty) +(empty) (empty) NULL (empty) NULL (empty) +% % NULL (empty) NULL (empty) +_ _ NULL (empty) NULL (empty) NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL From 752ef6b4a23a6d764491ea4bdd1c13685858c876 Mon Sep 17 00:00:00 2001 From: Amogh Ramesh Date: Mon, 1 Jun 2026 20:23:40 +0530 Subject: [PATCH 2/2] address review on empty-from path use builder.append_value(string) for the no-op instead of the append_with closure, per the suggestion from #22357. also drop the replace_string_empty_from benches since they were timing the old char-interleaving path this PR removes. --- datafusion/functions/benches/replace.rs | 30 ---------------------- datafusion/functions/src/string/replace.rs | 2 +- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/datafusion/functions/benches/replace.rs b/datafusion/functions/benches/replace.rs index b117968bad039..7ad198995a028 100644 --- a/datafusion/functions/benches/replace.rs +++ b/datafusion/functions/benches/replace.rs @@ -162,36 +162,6 @@ fn criterion_benchmark(c: &mut Criterion) { } } - // Empty-`from` path: insert `to` between every char of the input and at - // both ends. - if size == 1024 { - for &str_len in &[32_usize, 128] { - let args = create_args::(size, str_len, false, 0, 3, 0.0); - group.bench_function( - format!("replace_string_empty_from [size={size}, str_len={str_len}]"), - |b| { - b.iter(|| { - let args_cloned = args.clone(); - black_box(invoke_replace_with_args(args_cloned, size)) - }) - }, - ); - - let args = create_args::(size, str_len, true, 0, 3, 0.0); - group.bench_function( - format!( - "replace_string_view_empty_from [size={size}, str_len={str_len}]" - ), - |b| { - b.iter(|| { - let args_cloned = args.clone(); - black_box(invoke_replace_with_args(args_cloned, size)) - }) - }, - ); - } - } - group.finish(); } } diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs index 77b617bb90899..28f81769f56db 100644 --- a/datafusion/functions/src/string/replace.rs +++ b/datafusion/functions/src/string/replace.rs @@ -269,7 +269,7 @@ fn apply_replace( if from.is_empty() { // PostgreSQL returns the input unchanged when `from` is empty (#22253). - builder.append_with(|w| w.write_str(string)); + builder.append_value(string); return; }