rluvaton commented on code in PR #17977:
URL: https://github.com/apache/datafusion/pull/17977#discussion_r2414709681
##########
datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs:
##########
@@ -54,6 +54,85 @@ where
nulls: MaybeNullBufferBuilder::new(),
}
}
+
+ fn vectorized_equal_to_non_nullable(
+ &self,
+ lhs_rows: &[usize],
+ array: &ArrayRef,
+ rhs_rows: &[usize],
+ equal_to_results: &mut [bool],
+ ) {
+ assert!(
+ !NULLABLE || (array.null_count() == 0 && !self.nulls.has_nulls()),
+ "called with nullable input"
+ );
+ let array_values = array.as_primitive::<T>().values();
+
+ let iter = izip!(
+ lhs_rows.iter(),
+ rhs_rows.iter(),
+ equal_to_results.iter_mut(),
+ );
+
+ for (&lhs_row, &rhs_row, equal_to_result) in iter {
+ let result = {
+ // Getting unchecked not only for bound checks but because the
bound checks are
+ // what prevents auto-vectorization
+ let left = if cfg!(debug_assertions) {
+ self.group_values[lhs_row]
+ } else {
+ // SAFETY: indices are guaranteed to be in bounds
+ unsafe { *self.group_values.get_unchecked(lhs_row) }
+ };
+ let right = if cfg!(debug_assertions) {
+ array_values[rhs_row]
+ } else {
+ // SAFETY: indices are guaranteed to be in bounds
+ unsafe { *array_values.get_unchecked(rhs_row) }
+ };
+
+ // Always evaluate, to allow for auto-vectorization
+ left.is_eq(right)
+ };
+
+ *equal_to_result = result && *equal_to_result;
+ }
+ }
+
+ pub fn vectorized_equal_nullable(
+ &self,
+ lhs_rows: &[usize],
+ array: &ArrayRef,
+ rhs_rows: &[usize],
+ equal_to_results: &mut [bool],
+ ) {
+ assert!(NULLABLE, "called with non-nullable input");
+ let array = array.as_primitive::<T>();
+
+ let iter = izip!(
+ lhs_rows.iter(),
+ rhs_rows.iter(),
+ equal_to_results.iter_mut(),
+ );
+
+ for (&lhs_row, &rhs_row, equal_to_result) in iter {
+ // Has found not equal to in previous column, don't need to check
+ if !*equal_to_result {
+ continue;
+ }
+
+ // Perf: skip null check (by short circuit) if input is not
nullable
+ let exist_null = self.nulls.is_null(lhs_row);
+ let input_null = array.is_null(rhs_row);
+ if let Some(result) = nulls_equal_to(exist_null, input_null) {
+ *equal_to_result = result;
+ continue;
+ }
+
+ // Otherwise, we need to check their values
+ *equal_to_result =
self.group_values[lhs_row].is_eq(array.value(rhs_row));
+ }
+ }
Review Comment:
moved the code from `vectorized_equal_to` and removed the `if NULLABLE` as
we will always get here if nullable
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]