lyne7-sc commented on code in PR #20617:
URL: https://github.com/apache/datafusion/pull/20617#discussion_r2872805876
##########
datafusion/functions-nested/src/resize.rs:
##########
@@ -206,7 +206,103 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
let values = array.values();
let original_data = values.to_data();
- // create default element array
+ // Track the largest per-row growth so the uniform-fill fast path can
+ // materialize one reusable fill buffer of the required size.
+ let mut max_extra: usize = 0;
+ for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
+ if array.is_null(row_index) {
+ continue;
+ }
+ let target_count =
count_array.value(row_index).to_usize().ok_or_else(|| {
+ internal_datafusion_err!("array_resize: failed to convert size to
usize")
+ })?;
+ let current_len = (offset_window[1] -
offset_window[0]).to_usize().unwrap();
+ if target_count > current_len {
+ let extra = target_count - current_len;
+ if extra > max_extra {
+ max_extra = extra;
+ }
+ }
+ }
+
+ // The fast path is valid when at least one row grows and every row would
+ // use the same fill value.
+ let is_uniform_fill = max_extra > 0
+ && match &default_element {
+ None => true,
+ Some(fill_array) => {
+ let len = fill_array.len();
+ let null_count = fill_array.logical_null_count();
+
+ len <= 1
+ || null_count == len
+ || (null_count == 0 && {
+ let first = fill_array.slice(0, 1);
+ (1..len)
+ .all(|i| fill_array.slice(i, 1).as_ref() ==
first.as_ref())
+ })
+ }
+ };
+
+ // Fast path: at least one row needs to grow and all rows share
+ // the same fill value.
+ if is_uniform_fill {
+ let fill_scalar = match &default_element {
+ None => ScalarValue::try_from(&data_type)?,
+ Some(fill_array) if fill_array.logical_null_count() ==
fill_array.len() => {
+ ScalarValue::try_from(&data_type)?
+ }
+ Some(fill_array) =>
ScalarValue::try_from_array(fill_array.as_ref(), 0)?,
+ };
+ let default_element = fill_scalar.to_array_of_size(max_extra)?;
+ let default_value_data = default_element.to_data();
+
+ let capacity = Capacities::Array(original_data.len() +
default_value_data.len());
+ let mut offsets = vec![O::usize_as(0)];
+ let mut mutable = MutableArrayData::with_capacities(
+ vec![&original_data, &default_value_data],
+ false,
+ capacity,
+ );
+
+ let mut null_builder = NullBufferBuilder::new(array.len());
+
+ for (row_index, offset_window) in
array.offsets().windows(2).enumerate() {
Review Comment:
I refactored it to pull the shared loop out of the fast and slow paths.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]