alamb commented on code in PR #9087:
URL: https://github.com/apache/arrow-datafusion/pull/9087#discussion_r1473224667
##########
datafusion/physical-expr/src/aggregate/count_distinct/mod.rs:
##########
@@ -260,182 +261,6 @@ impl Accumulator for DistinctCountAccumulator {
}
}
-#[derive(Debug)]
-struct NativeDistinctCountAccumulator<T>
Review Comment:
Moved to native.rs
##########
datafusion/physical-expr/src/aggregate/count_distinct/mod.rs:
##########
@@ -175,9 +172,13 @@ impl PartialEq<dyn Any> for DistinctCount {
}
}
+/// General purpose distinct accumulator that works for any DataType by using
+/// [`ScalarValue`]. Some types have specialized accumulators that are (much)
+/// more efficient such as [`PrimitiveDistinctCountAccumulator`] and
+/// [`StringDistinctCountAccumulator`]
#[derive(Debug)]
struct DistinctCountAccumulator {
- values: HashSet<DistinctScalarValues, RandomState>,
+ values: HashSet<ScalarValue, RandomState>,
Review Comment:
This was typedefd to
```rust
type DistinctScalarValues = ScalarValue;
```
Which serves no purpose that I can tell other than obscuring what this code
is doing
##########
datafusion/physical-expr/src/aggregate/count_distinct/mod.rs:
##########
@@ -101,46 +98,46 @@ impl AggregateExpr for DistinctCount {
use TimeUnit::*;
Ok(match &self.state_data_type {
- Int8 =>
Box::new(NativeDistinctCountAccumulator::<Int8Type>::new()),
- Int16 =>
Box::new(NativeDistinctCountAccumulator::<Int16Type>::new()),
- Int32 =>
Box::new(NativeDistinctCountAccumulator::<Int32Type>::new()),
- Int64 =>
Box::new(NativeDistinctCountAccumulator::<Int64Type>::new()),
- UInt8 =>
Box::new(NativeDistinctCountAccumulator::<UInt8Type>::new()),
- UInt16 =>
Box::new(NativeDistinctCountAccumulator::<UInt16Type>::new()),
- UInt32 =>
Box::new(NativeDistinctCountAccumulator::<UInt32Type>::new()),
- UInt64 =>
Box::new(NativeDistinctCountAccumulator::<UInt64Type>::new()),
+ Int8 =>
Box::new(PrimitiveDistinctCountAccumulator::<Int8Type>::new()),
Review Comment:
Renamed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]