alamb commented on code in PR #6800: URL: https://github.com/apache/arrow-datafusion/pull/6800#discussion_r1272099864
########## datafusion/physical-expr/src/aggregate/average.rs: ########## @@ -383,6 +419,303 @@ impl RowAccumulator for AvgRowAccumulator { } } +/// An accumulator to compute the average of PrimitiveArray<T>. +/// Stores values as native types, and does overflow checking +/// +/// F: Function that calcuates the average value from a sum of +/// T::Native and a total count +#[derive(Debug)] +struct AvgGroupsAccumulator<T, F> +where + T: ArrowNumericType + Send, + F: Fn(T::Native, u64) -> Result<T::Native> + Send, +{ + /// The type of the internal sum + sum_data_type: DataType, + + /// The type of the returned sum + return_data_type: DataType, + + /// Count per group (use u64 to make UInt64Array) + counts: Vec<u64>, + + /// Sums per group, stored as the native type + sums: Vec<T::Native>, + + /// If we have seen a null input value for this group_index + null_inputs: BooleanBufferBuilder, + + /// Function that computes the average (value / count) + avg_fn: F, +} + +impl<T, F> AvgGroupsAccumulator<T, F> +where + T: ArrowNumericType + Send, + F: Fn(T::Native, u64) -> Result<T::Native> + Send, +{ + pub fn new(sum_data_type: &DataType, return_data_type: &DataType, avg_fn: F) -> Self { + debug!( + "AvgGroupsAccumulator ({}, sum type: {sum_data_type:?}) --> {return_data_type:?}", + std::any::type_name::<T>() + ); + + Self { + return_data_type: return_data_type.clone(), + sum_data_type: sum_data_type.clone(), + counts: vec![], + sums: vec![], + null_inputs: BooleanBufferBuilder::new(0), + avg_fn, + } + } + + /// Adds one to each group's counter + fn increment_counts( + &mut self, + group_indices: &[usize], + values: &PrimitiveArray<T>, + opt_filter: Option<&arrow_array::BooleanArray>, + total_num_groups: usize, + ) { + self.counts.resize(total_num_groups, 0); Review Comment: Filed https://github.com/apache/arrow-datafusion/issues/7065 to track -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org