Rachelint commented on code in PR #12395: URL: https://github.com/apache/datafusion/pull/12395#discussion_r1769427084
########## datafusion/functions/src/string/common.rs: ########## @@ -72,65 +94,126 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>( }; if use_string_view { - string_view_trim::<T>(func, args) + string_view_trim(func, args) } else { string_trim::<T>(func, args) } } // removing 'a will cause compiler complaining lifetime of `func` -fn string_view_trim<'a, T: OffsetSizeTrait>( - func: fn(&'a str, &'a str) -> &'a str, +fn string_view_trim<'a>( + trim_func: fn(&'a str, &'a str) -> &'a str, args: &'a [ArrayRef], ) -> Result<ArrayRef> { - let string_array = as_string_view_array(&args[0])?; + let string_view_array = as_string_view_array(&args[0])?; + let mut views_buf = Vec::with_capacity(string_view_array.len()); + let mut null_builder = NullBufferBuilder::new(string_view_array.len()); match args.len() { 1 => { - let result = string_array - .iter() - .map(|string| string.map(|string: &str| func(string, " "))) - .collect::<GenericStringArray<T>>(); - - Ok(Arc::new(result) as ArrayRef) + let array_iter = string_view_array.iter(); + let views_iter = string_view_array.views().iter(); + for (src_str_opt, raw_view) in array_iter.zip(views_iter) { + trim_and_append_str( + src_str_opt, + Some(" "), + trim_func, + &mut views_buf, + &mut null_builder, + raw_view, + ); + } } 2 => { let characters_array = as_string_view_array(&args[1])?; if characters_array.len() == 1 { + // Only one `trim characters` exist if characters_array.is_null(0) { return Ok(new_null_array( // The schema is expecting utf8 as null - &DataType::Utf8, - string_array.len(), + &DataType::Utf8View, + string_view_array.len(), )); } let characters = characters_array.value(0); - let result = string_array - .iter() - .map(|item| item.map(|string| func(string, characters))) - .collect::<GenericStringArray<T>>(); - return Ok(Arc::new(result) as ArrayRef); + let array_iter = string_view_array.iter(); + let views_iter = string_view_array.views().iter(); + for (src_str_opt, raw_view) in array_iter.zip(views_iter) { + trim_and_append_str( + src_str_opt, + Some(characters), + trim_func, + &mut views_buf, + &mut null_builder, + raw_view, + ); + } + } else { + // A specific `trim characters` for a row in the string view array + let characters_iter = characters_array.iter(); + let array_iter = string_view_array.iter(); + let views_iter = string_view_array.views().iter(); + for ((src_str_opt, raw_view), characters_opt) in + array_iter.zip(views_iter).zip(characters_iter) + { + trim_and_append_str( + src_str_opt, + characters_opt, + trim_func, + &mut views_buf, + &mut null_builder, + raw_view, + ); + } } - - let result = string_array - .iter() - .zip(characters_array.iter()) - .map(|(string, characters)| match (string, characters) { - (Some(string), Some(characters)) => Some(func(string, characters)), - _ => None, - }) - .collect::<GenericStringArray<T>>(); - - Ok(Arc::new(result) as ArrayRef) } other => { - exec_err!( + return exec_err!( "Function TRIM was called with {other} arguments. It requires at least 1 and at most 2." - ) + ); } } + + let views_buf = ScalarBuffer::from(views_buf); + let nulls_buf = null_builder.finish(); + + // Safety: Review Comment: Yes, I am seeking way to make `in-place modification` of `array` currently, too. And I found only primitive support it through converting itself to builder. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org