Jefffrey commented on code in PR #20102:
URL: https://github.com/apache/datafusion/pull/20102#discussion_r2753000666
##########
datafusion/functions-nested/src/repeat.rs:
##########
@@ -222,7 +215,7 @@ fn general_repeat<O: OffsetSizeTrait>(
Arc::new(Field::new_list_field(array.data_type().to_owned(), true)),
OffsetBuffer::new(offsets.into()),
repeated_values,
- None,
+ Some(NullBuffer::new(nulls.finish())),
Review Comment:
We can copy the nulls buffer from the count array instead of using a builder
```suggestion
count_array.nulls().cloned(),
```
##########
datafusion/functions-nested/src/repeat.rs:
##########
@@ -193,21 +180,27 @@ fn array_repeat_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
/// ```
fn general_repeat<O: OffsetSizeTrait>(
array: &ArrayRef,
- count_array: &UInt64Array,
+ count_array: &Int64Array,
) -> Result<ArrayRef> {
- // Build offsets and take_indices
- let total_repeated_values: usize =
- count_array.values().iter().map(|&c| c as usize).sum();
+ let total_repeated_values: usize = count_array
+ .values()
+ .iter()
+ .map(|&c| if c > 0 { c as usize } else { 0 })
Review Comment:
nit: technically the spec allows null slots to have non-0 values, so this
could overestimate
##########
datafusion/functions-nested/src/repeat.rs:
##########
@@ -298,8 +295,22 @@ fn general_list_repeat<O: OffsetSizeTrait>(
list_array.data_type().to_owned(),
true,
)),
- OffsetBuffer::<O>::from_lengths(counts.iter().map(|&c| c as usize)),
+ OffsetBuffer::<O>::from_lengths(
+ count_array
+ .iter()
+ .map(|c| c.map(|v| if v > 0 { v as usize } else { 0
}).unwrap_or(0)),
+ ),
Arc::new(inner_list),
- None,
+ Some(NullBuffer::new(outer_nulls.finish())),
Review Comment:
Same note here about reusing input null buffer
##########
datafusion/functions-nested/src/repeat.rs:
##########
@@ -238,42 +231,46 @@ fn general_repeat<O: OffsetSizeTrait>(
/// ```
fn general_list_repeat<O: OffsetSizeTrait>(
list_array: &GenericListArray<O>,
- count_array: &UInt64Array,
+ count_array: &Int64Array,
) -> Result<ArrayRef> {
let counts = count_array.values();
let list_offsets = list_array.value_offsets();
// calculate capacities for pre-allocation
- let outer_total = counts.iter().map(|&c| c as usize).sum();
- let inner_total = counts
- .iter()
- .enumerate()
- .filter(|&(i, _)| !list_array.is_null(i))
- .map(|(i, &c)| {
- let len = list_offsets[i + 1].to_usize().unwrap()
- - list_offsets[i].to_usize().unwrap();
- len * (c as usize)
- })
- .sum();
+ let mut outer_total = 0usize;
+ let mut inner_total = 0usize;
+ for (i, &c) in counts.iter().enumerate() {
+ if c > 0 {
+ outer_total += c as usize;
+ if !list_array.is_null(i) {
+ let len = list_offsets[i + 1].to_usize().unwrap()
+ - list_offsets[i].to_usize().unwrap();
+ inner_total += len * (c as usize);
+ }
+ }
+ }
// Build inner structures
let mut inner_offsets = Vec::with_capacity(outer_total + 1);
let mut take_indices = Vec::with_capacity(inner_total);
let mut inner_nulls = BooleanBufferBuilder::new(outer_total);
+ let mut outer_nulls = BooleanBufferBuilder::new(count_array.len());
let mut inner_running = 0usize;
inner_offsets.push(O::zero());
- for (row_idx, &count) in counts.iter().enumerate() {
- let is_valid = !list_array.is_null(row_idx);
+ for row_idx in 0..count_array.len() {
+ let (count, count_is_valid) = get_count_with_validity(count_array,
row_idx);
+ outer_nulls.append(count_is_valid);
+ let list_is_valid = !list_array.is_null(row_idx);
Review Comment:
```suggestion
let list_is_valid = list_array.is_valid(row_idx);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]