ozankabak commented on code in PR #6065:
URL: https://github.com/apache/arrow-datafusion/pull/6065#discussion_r1174222013
##########
datafusion/core/src/physical_plan/aggregates/row_hash.rs:
##########
@@ -772,22 +773,22 @@ fn slice_and_maybe_filter(
filter_opt: Option<&Arc<dyn Array>>,
offsets: &[usize],
) -> Result<Vec<ArrayRef>> {
- let sliced_arrays: Vec<ArrayRef> = aggr_array
- .iter()
- .map(|array| array.slice(offsets[0], offsets[1] - offsets[0]))
- .collect();
+ let null_array = Arc::new(NullArray::new(0)) as ArrayRef;
+ let mut sliced_arrays: Vec<ArrayRef> = vec![null_array; aggr_array.len()];
- let filtered_arrays = match filter_opt.as_ref() {
- Some(f) => {
- let sliced = f.slice(offsets[0], offsets[1] - offsets[0]);
- let filter_array = as_boolean_array(&sliced)?;
+ if let Some(f) = filter_opt {
+ let sliced = f.slice(offsets[0], offsets[1] - offsets[0]);
+ let filter_array = as_boolean_array(&sliced)?;
- sliced_arrays
- .iter()
- .map(|array| filter(array, filter_array).unwrap())
- .collect::<Vec<ArrayRef>>()
+ for (i, arr) in aggr_array.iter().enumerate() {
+ let sliced = &arr.slice(offsets[0], offsets[1] - offsets[0]);
+ sliced_arrays[i] = filter(sliced, filter_array).unwrap();
+ }
+ } else {
+ for (i, arr) in aggr_array.iter().enumerate() {
+ sliced_arrays[i] = arr.slice(offsets[0], offsets[1] - offsets[0]);
}
- None => sliced_arrays,
- };
- Ok(filtered_arrays)
+ }
Review Comment:
I think writing these loops as a zip of `aggr_array.iter()` and
`sliced_arrays.iter_mut()` and avoiding the `sliced_arrays[i]` access inside
the loop can make the code (1) a little more idiomatic, and (2) may result in
less implicit bounds-checking at run-time.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]