Dandandan commented on code in PR #6800:
URL: https://github.com/apache/arrow-datafusion/pull/6800#discussion_r1249741213
##########
datafusion/physical-expr/src/aggregate/average.rs:
##########
@@ -383,6 +418,260 @@ impl RowAccumulator for AvgRowAccumulator {
}
}
+/// An accumulator to compute the average of PrimitiveArray<T>.
+/// Stores values as native types, and does overflow checking
+///
+/// F: Function that calcuates the average value from a sum of
+/// T::Native and a total count
+#[derive(Debug)]
+struct AvgGroupsAccumulator<T, F>
+where
+ T: ArrowNumericType + Send,
+ F: Fn(T::Native, u64) -> Result<T::Native> + Send,
+{
+ /// The type of the internal sum
+ sum_data_type: DataType,
+
+ /// The type of the returned sum
+ return_data_type: DataType,
+
+ /// Count per group (use u64 to make UInt64Array)
+ counts: Vec<u64>,
+
+ /// Sums per group, stored as the native type
+ sums: Vec<T::Native>,
+
+ /// Function that computes the average (value / count)
+ avg_fn: F,
+}
+
+impl<T, F> AvgGroupsAccumulator<T, F>
+where
+ T: ArrowNumericType + Send,
+ F: Fn(T::Native, u64) -> Result<T::Native> + Send,
+{
+ pub fn new(sum_data_type: &DataType, return_data_type: &DataType, avg_fn:
F) -> Self {
+ debug!(
+ "AvgGroupsAccumulator ({}, sum type: {sum_data_type:?}) -->
{return_data_type:?}",
+ std::any::type_name::<T>()
+ );
+ Self {
+ return_data_type: return_data_type.clone(),
+ sum_data_type: sum_data_type.clone(),
+ counts: vec![],
+ sums: vec![],
+ avg_fn,
+ }
+ }
+
+ /// Adds one to each group's counter
+ fn increment_counts(
+ &mut self,
+ group_indicies: &[usize],
+ values: &PrimitiveArray<T>,
+ opt_filter: Option<&arrow_array::BooleanArray>,
+ total_num_groups: usize,
+ ) {
+ self.counts.resize(total_num_groups, 0);
+
+ if values.null_count() == 0 {
+ accumulate_all(
+ group_indicies,
+ values,
+ opt_filter,
+ |group_index, _new_value| {
Review Comment:
I wonder if this compiles into the same code as with only iterating over
`group_indicies`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]