This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 7002a00278 Set aggregation hash seed (#16165)
7002a00278 is described below

commit 7002a0027876a17e5bdf275e63d2a25373331943
Author: Christian <9384305+c...@users.noreply.github.com>
AuthorDate: Wed May 28 19:58:59 2025 +0200

    Set aggregation hash seed (#16165)
---
 .../physical-plan/src/aggregates/group_values/multi_group_by/mod.rs   | 2 +-
 datafusion/physical-plan/src/aggregates/group_values/row.rs           | 2 +-
 .../src/aggregates/group_values/single_group_by/primitive.rs          | 2 +-
 datafusion/physical-plan/src/aggregates/mod.rs                        | 4 ++++
 4 files changed, 7 insertions(+), 3 deletions(-)

diff --git 
a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs 
b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs
index dee482cab1..d8e7b13aec 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs
@@ -270,7 +270,7 @@ impl<const STREAMING: bool> GroupValuesColumn<STREAMING> {
             map_size: 0,
             group_values: vec![],
             hashes_buffer: Default::default(),
-            random_state: Default::default(),
+            random_state: crate::aggregates::AGGREGATION_HASH_SEED,
         })
     }
 
diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs 
b/datafusion/physical-plan/src/aggregates/group_values/row.rs
index aa9eee5157..34893fcc4e 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/row.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs
@@ -106,7 +106,7 @@ impl GroupValuesRows {
             group_values: None,
             hashes_buffer: Default::default(),
             rows_buffer,
-            random_state: Default::default(),
+            random_state: crate::aggregates::AGGREGATION_HASH_SEED,
         })
     }
 }
diff --git 
a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs
 
b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs
index 279caa50b0..8b1905e540 100644
--- 
a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs
+++ 
b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs
@@ -105,7 +105,7 @@ impl<T: ArrowPrimitiveType> GroupValuesPrimitive<T> {
             map: HashTable::with_capacity(128),
             values: Vec::with_capacity(128),
             null_group: None,
-            random_state: Default::default(),
+            random_state: crate::aggregates::AGGREGATION_HASH_SEED,
         }
     }
 }
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs 
b/datafusion/physical-plan/src/aggregates/mod.rs
index 1ab08cd34e..656c9a2cd5 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -58,6 +58,10 @@ mod row_hash;
 mod topk;
 mod topk_stream;
 
+/// Hard-coded seed for aggregations to ensure hash values differ from 
`RepartitionExec`, avoiding collisions.
+const AGGREGATION_HASH_SEED: ahash::RandomState =
+    ahash::RandomState::with_seeds('A' as u64, 'G' as u64, 'G' as u64, 'R' as 
u64);
+
 /// Aggregation modes
 ///
 /// See [`Accumulator::state`] for background information on multi-phase


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to