This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch new_join
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/new_join by this push:
new 5b357868dac fix rf and outer join performance (#26320)
5b357868dac is described below
commit 5b357868dac83270856b1eb9d7aef8553faee05e
Author: HappenLee <[email protected]>
AuthorDate: Thu Nov 2 18:01:56 2023 +0800
fix rf and outer join performance (#26320)
---
be/src/exprs/bloom_filter_func.h | 30 +++++++++++++++---------------
be/src/vec/common/hash_table/hash_map.h | 4 +++-
2 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h
index a7b0904691f..48989473a32 100644
--- a/be/src/exprs/bloom_filter_func.h
+++ b/be/src/exprs/bloom_filter_func.h
@@ -93,24 +93,24 @@ public:
void set_build_bf_exactly(bool build_bf_exactly) { _build_bf_exactly =
build_bf_exactly; }
Status init_with_fixed_length() {
- if (_build_bf_exactly) {
- return Status::OK();
- }
+ // if (_build_bf_exactly) {
+ // return Status::OK();
+ // }
return init_with_fixed_length(_bloom_filter_length);
}
Status init_with_cardinality(const size_t build_bf_cardinality) {
- if (_build_bf_exactly) {
- // Use the same algorithm as
org.apache.doris.planner.RuntimeFilter#calculateFilterSize
- constexpr double fpp = 0.05;
- constexpr double k = 8; // BUCKET_WORDS
- // m is the number of bits we would need to get the fpp specified
- double m = -k * build_bf_cardinality / std::log(1 - std::pow(fpp,
1.0 / k));
-
- // Handle case where ndv == 1 => ceil(log2(m/8)) < 0.
- int log_filter_size = std::max(0, (int)(std::ceil(std::log(m / 8)
/ std::log(2))));
- return init_with_fixed_length(((int64_t)1) << log_filter_size);
- }
+ // if (_build_bf_exactly) {
+ // // Use the same algorithm as
org.apache.doris.planner.RuntimeFilter#calculateFilterSize
+ // constexpr double fpp = 0.05;
+ // constexpr double k = 8; // BUCKET_WORDS
+ // // m is the number of bits we would need to get the fpp
specified
+ // double m = -k * build_bf_cardinality / std::log(1 -
std::pow(fpp, 1.0 / k));
+ //
+ // // Handle case where ndv == 1 => ceil(log2(m/8)) < 0.
+ // int log_filter_size = std::max(0,
(int)(std::ceil(std::log(m / 8) / std::log(2))));
+ // return init_with_fixed_length(((int64_t)1) <<
log_filter_size);
+ // }
return Status::OK();
}
@@ -269,7 +269,7 @@ struct CommonFindOp {
bloom_filter.add_bytes((char*)data, sizeof(T));
}
bool find(const BloomFilterAdaptor& bloom_filter, const void* data) const {
- return bloom_filter.test(Slice((char*)data, sizeof(T)));
+ return bloom_filter.test_element(((T*)data)[0]);
}
bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void*
data) const {
return find(bloom_filter, data);
diff --git a/be/src/vec/common/hash_table/hash_map.h
b/be/src/vec/common/hash_table/hash_map.h
index 9e368bb3ff6..00e34900a44 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/hash_map.h
@@ -363,7 +363,9 @@ private:
if constexpr (!with_other_conjuncts &&
(JoinOpType ==
doris::TJoinOp::RIGHT_OUTER_JOIN ||
JoinOpType ==
doris::TJoinOp::FULL_OUTER_JOIN)) {
- visited[build_idx] = 1;
+ if (!visited[build_idx]) {
+ visited[build_idx] = 1;
+ }
}
matched_cnt++;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]