alamb commented on code in PR #19364:
URL: https://github.com/apache/datafusion/pull/19364#discussion_r2624769536
##########
datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs:
##########
@@ -246,19 +279,9 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
}
if exist_view_len <= 12 {
- let exist_inline = unsafe {
- GenericByteViewArray::<B>::inline_value(
- &exist_view,
- exist_view_len as usize,
- )
- };
- let input_inline = unsafe {
- GenericByteViewArray::<B>::inline_value(
- &input_view,
- input_view_len as usize,
- )
- };
- exist_inline == input_inline
+ // the views are inlined and the lengths are equal, so just
Review Comment:
Optimization 3: just compare the view directly rather than breaking it into
parts first
##########
datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs:
##########
@@ -228,12 +251,22 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
if let Some(result) = nulls_equal_to(exist_null, input_null) {
return result;
}
+ self.do_equal_to_inner_values_only(lhs_row, array, rhs_row)
+ }
- // Otherwise, we need to check their values
- let exist_view = self.views[lhs_row];
+ /// Checks only the values for equality
+ #[inline(always)]
+ fn do_equal_to_inner_values_only(
+ &self,
+ lhs_row: usize,
+ array: &GenericByteViewArray<B>,
+ rhs_row: usize,
+ ) -> bool {
+ // SAFETY: the row indexes passed to vectorized_equal are in bounds
Review Comment:
Optimization 2: skip bounds check on data access
##########
datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs:
##########
@@ -117,25 +117,47 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
self.do_append_val_inner(arr, row);
}
- fn vectorized_equal_to_inner(
+ /// Comparison when there are no nulls in array
+ fn vectorized_equal_to_no_nulls(
Review Comment:
Change 1 is to create a second copy of the loop when there are no nulls (to
avoid the null check and give LLVM a better chance to optimize)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]