This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 2a2d828daa7f25de27c282ff4485835e5cd6fb63 Author: Gabriel <[email protected]> AuthorDate: Thu Jul 6 14:30:10 2023 +0800 [profile](join) add collisions into profile (#21510) --- be/src/vec/common/hash_table/hash_table.h | 10 +--------- be/src/vec/common/hash_table/partitioned_hash_table.h | 8 ++++++++ be/src/vec/exec/join/process_hash_table_probe_impl.h | 1 + be/src/vec/exec/join/vhash_join_node.cpp | 4 ++++ be/src/vec/exec/join/vhash_join_node.h | 2 ++ 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h index 0b9d6bc3e1..a460f0bc2a 100644 --- a/be/src/vec/common/hash_table/hash_table.h +++ b/be/src/vec/common/hash_table/hash_table.h @@ -462,9 +462,7 @@ protected: //factor that will trigger growing the hash table on insert. static constexpr float MAX_BUCKET_OCCUPANCY_FRACTION = 0.5f; -#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS mutable size_t collisions = 0; -#endif void set_partitioned_threshold(int threshold) { _partitioned_threshold = threshold; } @@ -479,9 +477,7 @@ protected: while (!buf[place_value].is_zero(*this) && !buf[place_value].key_equals(x, hash_value, *this)) { place_value = grower.next(place_value); -#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS ++collisions; -#endif } return place_value; @@ -503,9 +499,7 @@ protected: size_t ALWAYS_INLINE find_empty_cell(size_t place_value) const { while (!buf[place_value].is_zero(*this)) { place_value = grower.next(place_value); -#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS ++collisions; -#endif } return place_value; @@ -1090,9 +1084,7 @@ public: bool add_elem_size_overflow(size_t add_size) const { return grower.overflow(add_size + m_size); } -#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS - size_t getCollisions() const { return collisions; } -#endif + int64_t get_collisions() const { return collisions; } private: /// Increase the size of the buffer. diff --git a/be/src/vec/common/hash_table/partitioned_hash_table.h b/be/src/vec/common/hash_table/partitioned_hash_table.h index c7cdbf684d..9990c4491e 100644 --- a/be/src/vec/common/hash_table/partitioned_hash_table.h +++ b/be/src/vec/common/hash_table/partitioned_hash_table.h @@ -153,6 +153,14 @@ public: } } + int64_t get_collisions() const { + size_t collisions = level0_sub_table.get_collisions(); + for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; i++) { + collisions += level1_sub_tables[i].get_collisions(); + } + return collisions; + } + size_t get_buffer_size_in_bytes() const { if (_is_partitioned) { size_t buff_size = 0; diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h b/be/src/vec/exec/join/process_hash_table_probe_impl.h index 5923dbf1c5..341adcbfc5 100644 --- a/be/src/vec/exec/join/process_hash_table_probe_impl.h +++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h @@ -473,6 +473,7 @@ Status ProcessHashTableProbe<JoinOpType>::do_process_with_other_join_conjuncts( auto& probe_row_match_iter = std::get<ForwardIterator<Mapped>>(_join_node->_probe_row_match_iter); if (probe_row_match_iter.ok()) { + SCOPED_TIMER(_search_hashtable_timer); auto origin_offset = current_offset; for (; probe_row_match_iter.ok() && current_offset < _batch_size; ++probe_row_match_iter) { diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 33a4e6e35e..05976acadc 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -120,6 +120,8 @@ struct ProcessHashTableBuild { int64_t bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes(); COUNTER_SET(_join_node->_hash_table_memory_usage, bucket_bytes); COUNTER_SET(_join_node->_build_buckets_counter, bucket_size); + COUNTER_SET(_join_node->_build_collisions_counter, + hash_table_ctx.hash_table.get_collisions()); COUNTER_SET(_join_node->_build_buckets_fill_counter, filled_bucket_size); auto hash_table_buckets = hash_table_ctx.hash_table.get_buffer_sizes_in_cells(); @@ -476,6 +478,8 @@ Status HashJoinNode::prepare(RuntimeState* state) { _build_buckets_counter = ADD_COUNTER(runtime_profile(), "BuildBuckets", TUnit::UNIT); _build_buckets_fill_counter = ADD_COUNTER(runtime_profile(), "FilledBuckets", TUnit::UNIT); + _build_collisions_counter = ADD_COUNTER(runtime_profile(), "BuildCollisions", TUnit::UNIT); + RETURN_IF_ERROR(VExpr::prepare(_build_expr_ctxs, state, child(1)->row_desc())); RETURN_IF_ERROR(VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc())); diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index 671d0d4170..3d85d6b227 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -308,6 +308,8 @@ private: RuntimeProfile::Counter* _build_side_merge_block_timer; RuntimeProfile::Counter* _build_runtime_filter_timer; + RuntimeProfile::Counter* _build_collisions_counter; + RuntimeProfile::Counter* _open_timer; RuntimeProfile::Counter* _allocate_resource_timer; RuntimeProfile::Counter* _process_other_join_conjunct_timer; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
