This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 1e47d115609 [Improvement](runtime-filter) send
RUNTIME_BLOOM_FILTER_MAX_SIZE to backends (#39686)
1e47d115609 is described below
commit 1e47d115609f75c3c427ba1d85818e2dcfd4784d
Author: Pxl <[email protected]>
AuthorDate: Thu Aug 22 00:37:25 2024 +0800
[Improvement](runtime-filter) send RUNTIME_BLOOM_FILTER_MAX_SIZE to
backends (#39686)
…ackends (#38972)
## Proposed changes
pick from #38972
---
be/src/exprs/bloom_filter_func.h | 31 +++++++++++++---------
be/src/exprs/runtime_filter.cpp | 3 +++
be/src/exprs/runtime_filter.h | 1 +
.../java/org/apache/doris/qe/SessionVariable.java | 1 +
gensrc/thrift/PaloInternalService.thrift | 2 ++
5 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h
index 95d50642448..6d452bbe992 100644
--- a/be/src/exprs/bloom_filter_func.h
+++ b/be/src/exprs/bloom_filter_func.h
@@ -100,14 +100,14 @@ public:
virtual ~BloomFilterFuncBase() = default;
void init_params(const RuntimeFilterParams* params) {
- _bloom_filter_length =
- params->runtime_bloom_filter_min_size > 0
- ? std::max(params->bloom_filter_size,
params->runtime_bloom_filter_min_size)
- : params->bloom_filter_size;
+ _bloom_filter_length = params->bloom_filter_size;
+
_build_bf_exactly = params->build_bf_exactly;
_runtime_bloom_filter_min_size = params->runtime_bloom_filter_min_size;
+ _runtime_bloom_filter_max_size = params->runtime_bloom_filter_max_size;
_null_aware = params->null_aware;
_bloom_filter_size_calculated_by_ndv =
params->bloom_filter_size_calculated_by_ndv;
+ _limit_length();
}
Status init_with_fixed_length() { return
init_with_fixed_length(_bloom_filter_length); }
@@ -128,17 +128,11 @@ public:
// if FE do use ndv stat to predict the bf size, BE only use the
row count. FE have more
// exactly row count stat. which one is min is more correctly.
if (_bloom_filter_size_calculated_by_ndv) {
- _bloom_filter_length =
- _runtime_bloom_filter_min_size > 0
- ? std::max(_runtime_bloom_filter_min_size,
- std::min(be_calculate_size,
_bloom_filter_length))
- : std::min(be_calculate_size,
_bloom_filter_length);
+ _bloom_filter_length = std::min(be_calculate_size,
_bloom_filter_length);
} else {
- _bloom_filter_length =
- _runtime_bloom_filter_min_size > 0
- ? std::max(_runtime_bloom_filter_min_size,
be_calculate_size)
- : be_calculate_size;
+ _bloom_filter_length = be_calculate_size;
}
+ _limit_length();
}
return init_with_fixed_length(_bloom_filter_length);
}
@@ -229,6 +223,16 @@ public:
uint16_t* offsets, int number,
bool is_parse_column) = 0;
+private:
+ void _limit_length() {
+ if (_runtime_bloom_filter_min_size > 0) {
+ _bloom_filter_length = std::max(_bloom_filter_length,
_runtime_bloom_filter_min_size);
+ }
+ if (_runtime_bloom_filter_max_size > 0) {
+ _bloom_filter_length = std::min(_bloom_filter_length,
_runtime_bloom_filter_max_size);
+ }
+ }
+
protected:
// bloom filter size
int32_t _bloom_filter_alloced;
@@ -236,6 +240,7 @@ protected:
bool _inited = false;
int64_t _bloom_filter_length;
int64_t _runtime_bloom_filter_min_size;
+ int64_t _runtime_bloom_filter_max_size;
bool _build_bf_exactly = false;
bool _bloom_filter_size_calculated_by_ndv = false;
};
diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp
index b03676d43c5..102846cbcf4 100644
--- a/be/src/exprs/runtime_filter.cpp
+++ b/be/src/exprs/runtime_filter.cpp
@@ -1396,6 +1396,9 @@ Status IRuntimeFilter::init_with_desc(const
TRuntimeFilterDesc* desc, const TQue
params.runtime_bloom_filter_min_size =
options->__isset.runtime_bloom_filter_min_size
?
options->runtime_bloom_filter_min_size
: 0;
+ params.runtime_bloom_filter_max_size =
options->__isset.runtime_bloom_filter_max_size
+ ?
options->runtime_bloom_filter_max_size
+ : 0;
// We build runtime filter by exact distinct count iff three conditions
are met:
// 1. Only 1 join key
// 2. Do not have remote target (e.g. do not need to merge), or broadcast
join
diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h
index 3acca8cd4ea..b71bbd0648c 100644
--- a/be/src/exprs/runtime_filter.h
+++ b/be/src/exprs/runtime_filter.h
@@ -129,6 +129,7 @@ struct RuntimeFilterParams {
int64_t bloom_filter_size;
int32_t max_in_num;
int64_t runtime_bloom_filter_min_size;
+ int64_t runtime_bloom_filter_max_size;
int32_t filter_id;
bool bitmap_filter_not_in;
bool build_bf_exactly;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 5ca5bf1d36c..eca92687a8c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -3408,6 +3408,7 @@ public class SessionVariable implements Serializable,
Writable {
tResult.setRuntimeFilterWaitTimeMs(runtimeFilterWaitTimeMs);
tResult.setRuntimeFilterMaxInNum(runtimeFilterMaxInNum);
tResult.setRuntimeBloomFilterMinSize(runtimeBloomFilterMinSize);
+ tResult.setRuntimeBloomFilterMaxSize(runtimeBloomFilterMaxSize);
tResult.setRuntimeFilterWaitInfinitely(runtimeFilterWaitInfinitely);
if (cpuResourceLimit > 0) {
diff --git a/gensrc/thrift/PaloInternalService.thrift
b/gensrc/thrift/PaloInternalService.thrift
index 9c80041f2ad..41b8fb8cf02 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -320,6 +320,8 @@ struct TQueryOptions {
125: optional bool enable_match_without_inverted_index = true;
126: optional bool enable_fallback_on_missing_inverted_index = true;
+ 127: optional i32 runtime_bloom_filter_max_size = 16777216;
+
// For cloud, to control if the content would be written into file cache
1000: optional bool disable_file_cache = false
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]