This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new f9d3133c4f Improve perfomance of `reverse` function (#14025)
f9d3133c4f is described below

commit f9d3133c4fd39e8a14329bbb98bb726283bccffd
Author: Tai Le Manh <[email protected]>
AuthorDate: Fri Jan 10 04:45:57 2025 +0700

    Improve perfomance of `reverse` function (#14025)
    
    * Improve perfomance of 'reverse' function
    
    Signed-off-by: Tai Le Manh <[email protected]>
    
    * Apply sugestion change
    
    * Fix typo
    
    ---------
    
    Signed-off-by: Tai Le Manh <[email protected]>
---
 datafusion/functions/Cargo.toml             |  5 ++
 datafusion/functions/benches/reverse.rs     | 90 +++++++++++++++++++++++++++++
 datafusion/functions/src/unicode/reverse.rs | 25 +++++---
 3 files changed, 111 insertions(+), 9 deletions(-)

diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index fd986c4be4..c8025fb2d8 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -204,6 +204,11 @@ harness = false
 name = "strpos"
 required-features = ["unicode_expressions"]
 
+[[bench]]
+harness = false
+name = "reverse"
+required-features = ["unicode_expressions"]
+
 [[bench]]
 harness = false
 name = "trunc"
diff --git a/datafusion/functions/benches/reverse.rs 
b/datafusion/functions/benches/reverse.rs
new file mode 100644
index 0000000000..c7c1ef8a82
--- /dev/null
+++ b/datafusion/functions/benches/reverse.rs
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::array::OffsetSizeTrait;
+use arrow::util::bench_util::{
+    create_string_array_with_len, create_string_view_array_with_len,
+};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::unicode;
+use std::sync::Arc;
+
+fn create_args<O: OffsetSizeTrait>(
+    size: usize,
+    str_len: usize,
+    force_view_types: bool,
+) -> Vec<ColumnarValue> {
+    if force_view_types {
+        let string_array =
+            Arc::new(create_string_view_array_with_len(size, 0.1, str_len, 
false));
+
+        vec![ColumnarValue::Array(string_array)]
+    } else {
+        let string_array =
+            Arc::new(create_string_array_with_len::<O>(size, 0.1, str_len));
+
+        vec![ColumnarValue::Array(string_array)]
+    }
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let reverse = unicode::reverse();
+    for size in [1024, 4096] {
+        let str_len = 8;
+
+        let args = create_args::<i32>(size, str_len, true);
+        c.bench_function(
+            format!("reverse_string_view [size={}, str_len={}]", size, 
str_len).as_str(),
+            |b| {
+                b.iter(|| {
+                    // TODO use invoke_with_args
+                    black_box(reverse.invoke_batch(&args, str_len))
+                })
+            },
+        );
+
+        let str_len = 32;
+
+        let args = create_args::<i32>(size, str_len, true);
+        c.bench_function(
+            format!("reverse_string_view [size={}, str_len={}]", size, 
str_len).as_str(),
+            |b| {
+                b.iter(|| {
+                    // TODO use invoke_with_args
+                    black_box(reverse.invoke_batch(&args, str_len))
+                })
+            },
+        );
+
+        let args = create_args::<i32>(size, str_len, false);
+        c.bench_function(
+            format!("reverse_string [size={}, str_len={}]", size, 
str_len).as_str(),
+            |b| {
+                b.iter(|| {
+                    // TODO use invoke_with_args
+                    black_box(reverse.invoke_batch(&args, str_len))
+                })
+            },
+        );
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/unicode/reverse.rs 
b/datafusion/functions/src/unicode/reverse.rs
index 5ad347ed96..f07deda70e 100644
--- a/datafusion/functions/src/unicode/reverse.rs
+++ b/datafusion/functions/src/unicode/reverse.rs
@@ -20,8 +20,7 @@ use std::sync::Arc;
 
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 use arrow::array::{
-    Array, ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray,
-    OffsetSizeTrait,
+    Array, ArrayRef, AsArray, GenericStringBuilder, OffsetSizeTrait, 
StringArrayType,
 };
 use arrow::datatypes::DataType;
 use datafusion_common::{exec_err, Result};
@@ -105,8 +104,7 @@ impl ScalarUDFImpl for ReverseFunc {
     }
 }
 
-/// Reverses the order of the characters in the string.
-/// reverse('abcde') = 'edcba'
+/// Reverses the order of the characters in the string `reverse('abcde') = 
'edcba'`.
 /// The implementation uses UTF-8 code points as characters
 pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args[0].data_type() == &Utf8View {
@@ -116,14 +114,23 @@ pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> 
Result<ArrayRef> {
     }
 }
 
-fn reverse_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
+fn reverse_impl<'a, T: OffsetSizeTrait, V: StringArrayType<'a>>(
     string_array: V,
 ) -> Result<ArrayRef> {
-    let result = ArrayIter::new(string_array)
-        .map(|string| string.map(|string: &str| 
string.chars().rev().collect::<String>()))
-        .collect::<GenericStringArray<T>>();
+    let mut builder = 
GenericStringBuilder::<T>::with_capacity(string_array.len(), 1024);
+
+    let mut reversed = String::new();
+    for string in string_array.iter() {
+        if let Some(s) = string {
+            reversed.extend(s.chars().rev());
+            builder.append_value(&reversed);
+            reversed.clear();
+        } else {
+            builder.append_null();
+        }
+    }
 
-    Ok(Arc::new(result) as ArrayRef)
+    Ok(Arc::new(builder.finish()) as ArrayRef)
 }
 
 #[cfg(test)]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to