This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 8b45d2d72f optimize performance of the repeat function (up to 50%
faster) (#14697)
8b45d2d72f is described below
commit 8b45d2d72f4d61ffa6ad8c2f4da6f42e45a6d317
Author: zjregee <[email protected]>
AuthorDate: Mon Feb 17 20:10:57 2025 +0800
optimize performance of the repeat function (up to 50% faster) (#14697)
* optimize performance of the repeat function
* correct spelling
---
datafusion/functions/src/string/repeat.rs | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/datafusion/functions/src/string/repeat.rs
b/datafusion/functions/src/string/repeat.rs
index 5bfb7cf252..8253754c2b 100644
--- a/datafusion/functions/src/string/repeat.rs
+++ b/datafusion/functions/src/string/repeat.rs
@@ -151,20 +151,35 @@ where
T: OffsetSizeTrait,
S: StringArrayType<'a>,
{
- let mut builder: GenericStringBuilder<T> = GenericStringBuilder::new();
+ let mut total_capacity = 0;
string_array.iter().zip(number_array.iter()).try_for_each(
|(string, number)| -> Result<(), DataFusionError> {
match (string, number) {
(Some(string), Some(number)) if number >= 0 => {
- if number as usize * string.len() > max_str_len {
+ let item_capacity = string.len() * number as usize;
+ if item_capacity > max_str_len {
return exec_err!(
"string size overflow on repeat, max size is {},
but got {}",
max_str_len,
number as usize * string.len()
);
- } else {
- builder.append_value(string.repeat(number as usize))
}
+ total_capacity += item_capacity;
+ }
+ _ => (),
+ }
+ Ok(())
+ },
+ )?;
+
+ let mut builder =
+ GenericStringBuilder::<T>::with_capacity(string_array.len(),
total_capacity);
+
+ string_array.iter().zip(number_array.iter()).try_for_each(
+ |(string, number)| -> Result<(), DataFusionError> {
+ match (string, number) {
+ (Some(string), Some(number)) if number >= 0 => {
+ builder.append_value(string.repeat(number as usize));
}
(Some(_), Some(_)) => builder.append_value(""),
_ => builder.append_null(),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]