This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 8b45d2d72f optimize performance of the repeat function (up to 50% 
faster) (#14697)
8b45d2d72f is described below

commit 8b45d2d72f4d61ffa6ad8c2f4da6f42e45a6d317
Author: zjregee <[email protected]>
AuthorDate: Mon Feb 17 20:10:57 2025 +0800

    optimize performance of the repeat function (up to 50% faster) (#14697)
    
    * optimize performance of the repeat function
    
    * correct spelling
---
 datafusion/functions/src/string/repeat.rs | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/datafusion/functions/src/string/repeat.rs 
b/datafusion/functions/src/string/repeat.rs
index 5bfb7cf252..8253754c2b 100644
--- a/datafusion/functions/src/string/repeat.rs
+++ b/datafusion/functions/src/string/repeat.rs
@@ -151,20 +151,35 @@ where
     T: OffsetSizeTrait,
     S: StringArrayType<'a>,
 {
-    let mut builder: GenericStringBuilder<T> = GenericStringBuilder::new();
+    let mut total_capacity = 0;
     string_array.iter().zip(number_array.iter()).try_for_each(
         |(string, number)| -> Result<(), DataFusionError> {
             match (string, number) {
                 (Some(string), Some(number)) if number >= 0 => {
-                    if number as usize * string.len() > max_str_len {
+                    let item_capacity = string.len() * number as usize;
+                    if item_capacity > max_str_len {
                         return exec_err!(
                             "string size overflow on repeat, max size is {}, 
but got {}",
                             max_str_len,
                             number as usize * string.len()
                         );
-                    } else {
-                        builder.append_value(string.repeat(number as usize))
                     }
+                    total_capacity += item_capacity;
+                }
+                _ => (),
+            }
+            Ok(())
+        },
+    )?;
+
+    let mut builder =
+        GenericStringBuilder::<T>::with_capacity(string_array.len(), 
total_capacity);
+
+    string_array.iter().zip(number_array.iter()).try_for_each(
+        |(string, number)| -> Result<(), DataFusionError> {
+            match (string, number) {
+                (Some(string), Some(number)) if number >= 0 => {
+                    builder.append_value(string.repeat(number as usize));
                 }
                 (Some(_), Some(_)) => builder.append_value(""),
                 _ => builder.append_null(),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to