This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch adapt_datastructure
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/adapt_datastructure by this
push:
new 6d05fb4a93 Update / simplify memory calculation with new datastructure
6d05fb4a93 is described below
commit 6d05fb4a9381f6c7ba0058055ca4ea6ff01c5258
Author: Daniƫl Heres <[email protected]>
AuthorDate: Mon Jun 19 13:37:52 2023 +0200
Update / simplify memory calculation with new datastructure
---
datafusion/core/src/physical_plan/joins/hash_join.rs | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/datafusion/core/src/physical_plan/joins/hash_join.rs
b/datafusion/core/src/physical_plan/joins/hash_join.rs
index 6491210231..fcfcc746e6 100644
--- a/datafusion/core/src/physical_plan/joins/hash_join.rs
+++ b/datafusion/core/src/physical_plan/joins/hash_join.rs
@@ -44,6 +44,7 @@ use arrow::{
};
use futures::{ready, Stream, StreamExt, TryStreamExt};
use std::fmt;
+use std::mem::size_of;
use std::sync::Arc;
use std::task::Poll;
use std::{any::Any, usize, vec};
@@ -508,10 +509,10 @@ async fn collect_left_input(
)
})? / 7)
.next_power_of_two();
- // 32 bytes per `(u64, SmallVec<[u64; 1]>)`
+ // 16 bytes per `(u64, u64)`
// + 1 byte for each bucket
- // + 16 bytes fixed
- let estimated_hastable_size = 32 * estimated_buckets + estimated_buckets +
16;
+ // + fixed size of JoinHashMap (RawTable + Vec)
+ let estimated_hastable_size = 16 * estimated_buckets + estimated_buckets +
size_of::<JoinHashMap>();
reservation.try_grow(estimated_hastable_size)?;
metrics.build_mem_used.add(estimated_hastable_size);