alamb commented on a change in pull request #808:
URL: https://github.com/apache/arrow-datafusion/pull/808#discussion_r683834491
##########
File path: datafusion/src/physical_plan/hash_aggregate.rs
##########
@@ -363,55 +348,74 @@ fn group_aggregate_batch(
let mut group_by_values = group_by_values.into_boxed_slice();
- let mut key = Vec::with_capacity(group_values.len());
-
// 1.1 construct the key from the group values
// 1.2 construct the mapping key if it does not exist
// 1.3 add the row' index to `indices`
// Make sure we can create the accumulators or otherwise return an error
create_accumulators(aggr_expr).map_err(DataFusionError::into_arrow_external_error)?;
- // Keys received in this batch
- let mut batch_keys = vec![];
+ // track which entries in `accumulators` have rows in this batch to
aggregate
+ let mut groups_with_rows = vec![];
- for row in 0..batch.num_rows() {
- // 1.1
- create_key(&group_values, row, &mut key)
- .map_err(DataFusionError::into_arrow_external_error)?;
+ // 1.1 Calculate the group keys for the group values
+ let mut batch_hashes = vec![0; batch.num_rows()];
+ create_hashes(&group_values, random_state, &mut batch_hashes)?;
- accumulators
- .raw_entry_mut()
- .from_key(&key)
- // 1.3
- .and_modify(|_, (_, _, v)| {
- if v.is_empty() {
- batch_keys.push(key.clone())
+ for (row, hash) in batch_hashes.into_iter().enumerate() {
+ let Accumulators { map, group_states } = &mut accumulators;
+
+ let entry = map.get_mut(hash, |(_hash, group_idx)| {
+ // verify that a group that we are inserting with hash is
Review comment:
So I have annecdotal evidence that this check simply slows things down
(as the equivalence function isn't called when the hashes are different). Why
do you think it will be called when the hash values are different?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]