alamb commented on code in PR #6904:
URL: https://github.com/apache/arrow-datafusion/pull/6904#discussion_r1258722776
##########
datafusion/physical-expr/src/aggregate/min_max.rs:
##########
@@ -1022,6 +1112,224 @@ impl RowAccumulator for MinRowAccumulator {
}
}
+trait MinMax {
+ fn min() -> Self;
+ fn max() -> Self;
+}
+
+impl MinMax for u8 {
+ fn min() -> Self {
+ u8::MIN
+ }
+ fn max() -> Self {
+ u8::MAX
+ }
+}
+impl MinMax for i8 {
+ fn min() -> Self {
+ i8::MIN
+ }
+ fn max() -> Self {
+ i8::MAX
+ }
+}
+impl MinMax for u16 {
+ fn min() -> Self {
+ u16::MIN
+ }
+ fn max() -> Self {
+ u16::MAX
+ }
+}
+impl MinMax for i16 {
+ fn min() -> Self {
+ i16::MIN
+ }
+ fn max() -> Self {
+ i16::MAX
+ }
+}
+impl MinMax for u32 {
+ fn min() -> Self {
+ u32::MIN
+ }
+ fn max() -> Self {
+ u32::MAX
+ }
+}
+impl MinMax for i32 {
+ fn min() -> Self {
+ i32::MIN
+ }
+ fn max() -> Self {
+ i32::MAX
+ }
+}
+impl MinMax for i64 {
+ fn min() -> Self {
+ i64::MIN
+ }
+ fn max() -> Self {
+ i64::MAX
+ }
+}
+impl MinMax for u64 {
+ fn min() -> Self {
+ u64::MIN
+ }
+ fn max() -> Self {
+ u64::MAX
+ }
+}
+impl MinMax for f32 {
+ fn min() -> Self {
+ f32::MIN
+ }
+ fn max() -> Self {
+ f32::MAX
+ }
+}
+impl MinMax for f64 {
+ fn min() -> Self {
+ f64::MIN
+ }
+ fn max() -> Self {
+ f64::MAX
+ }
+}
+impl MinMax for i128 {
+ fn min() -> Self {
+ i128::MIN
+ }
+ fn max() -> Self {
+ i128::MAX
+ }
+}
+
+/// An accumulator to compute the min or max of [`PrimitiveArray<T>`].
+/// Stores values as native/primitive type
+#[derive(Debug)]
+struct MinMaxGroupsPrimitiveAccumulator<T, const MIN: bool>
+where
+ T: ArrowNumericType + Send,
+ T::Native: MinMax,
+{
+ /// Min/max per group, stored as the native type
+ min_max: Vec<T::Native>,
+
+ /// Track nulls in the input / filters
+ null_state: NullState,
+
+ /// The output datatype (needed for decimal precision/scale)
+ data_type: DataType,
+}
+
+impl<T, const MIN: bool> MinMaxGroupsPrimitiveAccumulator<T, MIN>
+where
+ T: ArrowNumericType + Send,
+ T::Native: MinMax,
+{
+ pub fn new(data_type: &DataType) -> Self {
+ debug!(
+ "MinMaxGroupsPrimitiveAccumulator ({}, {})",
+ std::any::type_name::<T>(),
+ MIN,
+ );
+
+ Self {
+ min_max: vec![],
+ null_state: NullState::new(),
+ data_type: data_type.clone(),
+ }
+ }
+}
+
+impl<T, const MIN: bool> GroupsAccumulator for
MinMaxGroupsPrimitiveAccumulator<T, MIN>
+where
+ T: ArrowNumericType + Send,
+ T::Native: MinMax,
+{
+ fn update_batch(
+ &mut self,
+ values: &[ArrayRef],
+ group_indices: &[usize],
+ opt_filter: Option<&arrow_array::BooleanArray>,
+ total_num_groups: usize,
+ ) -> Result<()> {
+ assert_eq!(values.len(), 1, "single argument to update_batch");
+ let values = values.get(0).unwrap().as_primitive::<T>();
+
+ self.min_max.resize_with(total_num_groups, || {
Review Comment:
in 7c97b24df3
##########
datafusion/physical-expr/src/aggregate/count.rs:
##########
@@ -76,6 +85,109 @@ impl Count {
}
}
+/// An accumulator to compute the counts of [`PrimitiveArray<T>`].
+/// Stores values as native types, and does overflow checking
+///
+/// Unlike most other accumulators, COUNT never produces NULLs. If no
+/// non-null values are seen in any group the output is 0. Thus, this
+/// accumulator has no additional null or seen filter tracking.
+#[derive(Debug)]
+struct CountGroupsAccumulator {
+ /// Count per group (use i64 to make Int64Array)
+ counts: Vec<i64>,
Review Comment:
I do not have any good answer for that 🤔
##########
datafusion/physical-expr/src/aggregate/count.rs:
##########
@@ -76,6 +85,109 @@ impl Count {
}
}
+/// An accumulator to compute the counts of [`PrimitiveArray<T>`].
+/// Stores values as native types, and does overflow checking
+///
+/// Unlike most other accumulators, COUNT never produces NULLs. If no
+/// non-null values are seen in any group the output is 0. Thus, this
+/// accumulator has no additional null or seen filter tracking.
+#[derive(Debug)]
+struct CountGroupsAccumulator {
+ /// Count per group (use i64 to make Int64Array)
+ counts: Vec<i64>,
Review Comment:
I do not have any good answer for that 🤔
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]