This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch bucketing
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/bucketing by this push:
new cbfb01a875 Bucketed hash join
cbfb01a875 is described below
commit cbfb01a875bc80158912847b8f0b9008a7430a23
Author: Daniƫl Heres <[email protected]>
AuthorDate: Wed Jun 28 20:47:13 2023 +0200
Bucketed hash join
---
datafusion/core/src/physical_plan/joins/hash_join_utils.rs | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/datafusion/core/src/physical_plan/joins/hash_join_utils.rs
b/datafusion/core/src/physical_plan/joins/hash_join_utils.rs
index b3ac1f392b..927b3667a5 100644
--- a/datafusion/core/src/physical_plan/joins/hash_join_utils.rs
+++ b/datafusion/core/src/physical_plan/joins/hash_join_utils.rs
@@ -90,7 +90,7 @@ use datafusion_common::Result;
// TODO: speed up collision checks
// https://github.com/apache/arrow-datafusion/issues/50
pub struct JoinHashMap {
- // Stores hash value to first index
+ // Stores first index in bucket
pub map: Vec<u64>,
// Stores indices in chained list data structure
pub next: Vec<u64>,
@@ -103,8 +103,8 @@ pub struct SymmetricJoinHashMap(pub RawTable<(u64,
SmallVec<[u64; 1]>)>);
impl JoinHashMap {
pub(crate) fn with_capacity(capacity: usize) -> Self {
JoinHashMap {
- // Overallocate using 2 x the buckets
- map: vec![0; capacity * 2],
+ // Overallocate using 4 x the buckets
+ map: vec![0; capacity * 8],
next: vec![0; capacity],
}
}