This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new 7002a00278 Set aggregation hash seed (#16165) 7002a00278 is described below commit 7002a0027876a17e5bdf275e63d2a25373331943 Author: Christian <9384305+c...@users.noreply.github.com> AuthorDate: Wed May 28 19:58:59 2025 +0200 Set aggregation hash seed (#16165) --- .../physical-plan/src/aggregates/group_values/multi_group_by/mod.rs | 2 +- datafusion/physical-plan/src/aggregates/group_values/row.rs | 2 +- .../src/aggregates/group_values/single_group_by/primitive.rs | 2 +- datafusion/physical-plan/src/aggregates/mod.rs | 4 ++++ 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs index dee482cab1..d8e7b13aec 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs @@ -270,7 +270,7 @@ impl<const STREAMING: bool> GroupValuesColumn<STREAMING> { map_size: 0, group_values: vec![], hashes_buffer: Default::default(), - random_state: Default::default(), + random_state: crate::aggregates::AGGREGATION_HASH_SEED, }) } diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs b/datafusion/physical-plan/src/aggregates/group_values/row.rs index aa9eee5157..34893fcc4e 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/row.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs @@ -106,7 +106,7 @@ impl GroupValuesRows { group_values: None, hashes_buffer: Default::default(), rows_buffer, - random_state: Default::default(), + random_state: crate::aggregates::AGGREGATION_HASH_SEED, }) } } diff --git a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs index 279caa50b0..8b1905e540 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs @@ -105,7 +105,7 @@ impl<T: ArrowPrimitiveType> GroupValuesPrimitive<T> { map: HashTable::with_capacity(128), values: Vec::with_capacity(128), null_group: None, - random_state: Default::default(), + random_state: crate::aggregates::AGGREGATION_HASH_SEED, } } } diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 1ab08cd34e..656c9a2cd5 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -58,6 +58,10 @@ mod row_hash; mod topk; mod topk_stream; +/// Hard-coded seed for aggregations to ensure hash values differ from `RepartitionExec`, avoiding collisions. +const AGGREGATION_HASH_SEED: ahash::RandomState = + ahash::RandomState::with_seeds('A' as u64, 'G' as u64, 'G' as u64, 'R' as u64); + /// Aggregation modes /// /// See [`Accumulator::state`] for background information on multi-phase --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org