This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch new_join
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 1c850690d29a792bff21af4aa6d8e6cf58cdab92
Author: HappenLee <[email protected]>
AuthorDate: Thu Nov 2 18:01:56 2023 +0800

    fix rf and outer join performance (#26320)
---
 be/src/exprs/bloom_filter_func.h        | 30 +++++++++++++++---------------
 be/src/vec/common/hash_table/hash_map.h |  4 +++-
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h
index a7b0904691f..48989473a32 100644
--- a/be/src/exprs/bloom_filter_func.h
+++ b/be/src/exprs/bloom_filter_func.h
@@ -93,24 +93,24 @@ public:
     void set_build_bf_exactly(bool build_bf_exactly) { _build_bf_exactly = 
build_bf_exactly; }
 
     Status init_with_fixed_length() {
-        if (_build_bf_exactly) {
-            return Status::OK();
-        }
+        //        if (_build_bf_exactly) {
+        //            return Status::OK();
+        //        }
         return init_with_fixed_length(_bloom_filter_length);
     }
 
     Status init_with_cardinality(const size_t build_bf_cardinality) {
-        if (_build_bf_exactly) {
-            // Use the same algorithm as 
org.apache.doris.planner.RuntimeFilter#calculateFilterSize
-            constexpr double fpp = 0.05;
-            constexpr double k = 8; // BUCKET_WORDS
-            // m is the number of bits we would need to get the fpp specified
-            double m = -k * build_bf_cardinality / std::log(1 - std::pow(fpp, 
1.0 / k));
-
-            // Handle case where ndv == 1 => ceil(log2(m/8)) < 0.
-            int log_filter_size = std::max(0, (int)(std::ceil(std::log(m / 8) 
/ std::log(2))));
-            return init_with_fixed_length(((int64_t)1) << log_filter_size);
-        }
+        //        if (_build_bf_exactly) {
+        //            // Use the same algorithm as 
org.apache.doris.planner.RuntimeFilter#calculateFilterSize
+        //            constexpr double fpp = 0.05;
+        //            constexpr double k = 8; // BUCKET_WORDS
+        //            // m is the number of bits we would need to get the fpp 
specified
+        //            double m = -k * build_bf_cardinality / std::log(1 - 
std::pow(fpp, 1.0 / k));
+        //
+        //            // Handle case where ndv == 1 => ceil(log2(m/8)) < 0.
+        //            int log_filter_size = std::max(0, 
(int)(std::ceil(std::log(m / 8) / std::log(2))));
+        //            return init_with_fixed_length(((int64_t)1) << 
log_filter_size);
+        //        }
         return Status::OK();
     }
 
@@ -269,7 +269,7 @@ struct CommonFindOp {
         bloom_filter.add_bytes((char*)data, sizeof(T));
     }
     bool find(const BloomFilterAdaptor& bloom_filter, const void* data) const {
-        return bloom_filter.test(Slice((char*)data, sizeof(T)));
+        return bloom_filter.test_element(((T*)data)[0]);
     }
     bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* 
data) const {
         return find(bloom_filter, data);
diff --git a/be/src/vec/common/hash_table/hash_map.h 
b/be/src/vec/common/hash_table/hash_map.h
index 9e368bb3ff6..00e34900a44 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/hash_map.h
@@ -363,7 +363,9 @@ private:
                     if constexpr (!with_other_conjuncts &&
                                   (JoinOpType == 
doris::TJoinOp::RIGHT_OUTER_JOIN ||
                                    JoinOpType == 
doris::TJoinOp::FULL_OUTER_JOIN)) {
-                        visited[build_idx] = 1;
+                        if (!visited[build_idx]) {
+                            visited[build_idx] = 1;
+                        }
                     }
                     matched_cnt++;
                 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to