This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 1cdc43a1881 branch-3.1: [fix](bloomfilter) Correct bloom_filter_fpp
usage in BE #47384 (#52893)
1cdc43a1881 is described below
commit 1cdc43a18818975d65d17c6d1a8486501a8aa732
Author: airborne12 <[email protected]>
AuthorDate: Tue Jul 8 10:31:10 2025 +0800
branch-3.1: [fix](bloomfilter) Correct bloom_filter_fpp usage in BE #47384
(#52893)
cherry pick from #47384
---
.../segment_v2/bloom_filter_index_writer.cpp | 12 +++++
be/src/olap/rowset/segment_v2/column_writer.cpp | 3 +-
be/src/olap/rowset/segment_v2/column_writer.h | 2 +
be/src/olap/rowset/segment_v2/segment_writer.cpp | 3 ++
.../rowset/segment_v2/vertical_segment_writer.cpp | 4 ++
.../bloom_filter_p0/test_bloom_filter.groovy | 63 +++++++++++++++++++++-
6 files changed, 83 insertions(+), 4 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
index 940f6c7b18f..adcadcd114d 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
@@ -31,6 +31,7 @@
#include "olap/rowset/segment_v2/indexed_column_writer.h"
#include "olap/types.h"
#include "runtime/decimalv2_value.h"
+#include "util/debug_points.h"
#include "util/slice.h"
#include "util/types.h"
@@ -302,6 +303,17 @@ uint64_t NGramBloomFilterIndexWriterImpl::size() {
Status BloomFilterIndexWriter::create(const BloomFilterOptions& bf_options,
const TypeInfo* type_info,
std::unique_ptr<BloomFilterIndexWriter>*
res) {
+ DBUG_EXECUTE_IF("BloomFilterIndexWriter::create", {
+ auto fpp =
DebugPoints::instance()->get_debug_param_or_default<std::string>(
+ "BloomFilterIndexWriter::create", "fpp", "");
+ if (!fpp.empty()) {
+ double fpp_value = std::stod(fpp);
+ if (std::abs(bf_options.fpp - fpp_value) > 1e-6) {
+ return Status::Error<ErrorCode::INTERNAL_ERROR>("fpp {} is not
a equal to {}", fpp,
+
bf_options.fpp);
+ }
+ }
+ })
FieldType type = type_info->type();
switch (type) {
#define M(TYPE)
\
diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp
b/be/src/olap/rowset/segment_v2/column_writer.cpp
index acdcc711b68..ffc7385c03e 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@@ -29,7 +29,6 @@
#include "io/fs/file_writer.h"
#include "olap/olap_common.h"
#include "olap/rowset/segment_v2/bitmap_index_writer.h"
-#include "olap/rowset/segment_v2/bloom_filter.h"
#include "olap/rowset/segment_v2/bloom_filter_index_writer.h"
#include "olap/rowset/segment_v2/encoding_info.h"
#include "olap/rowset/segment_v2/inverted_index_writer.h"
@@ -519,7 +518,7 @@ Status ScalarColumnWriter::init() {
_opts.gram_bf_size, &_bloom_filter_index_builder));
} else {
RETURN_IF_ERROR(BloomFilterIndexWriter::create(
- BloomFilterOptions(), get_field()->type_info(),
&_bloom_filter_index_builder));
+ _opts.bf_options, get_field()->type_info(),
&_bloom_filter_index_builder));
}
}
return Status::OK();
diff --git a/be/src/olap/rowset/segment_v2/column_writer.h
b/be/src/olap/rowset/segment_v2/column_writer.h
index 9eb19b3443d..8e7398f0418 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.h
+++ b/be/src/olap/rowset/segment_v2/column_writer.h
@@ -30,6 +30,7 @@
#include "common/status.h" // for Status
#include "olap/field.h" // for Field
+#include "olap/rowset/segment_v2/bloom_filter.h"
#include "olap/rowset/segment_v2/common.h"
#include "olap/rowset/segment_v2/inverted_index_writer.h"
#include "util/bitmap.h" // for BitmapChange
@@ -66,6 +67,7 @@ struct ColumnWriterOptions {
uint8_t gram_size;
uint16_t gram_bf_size;
std::vector<const TabletIndex*> inverted_indexs;
+ BloomFilterOptions bf_options;
InvertedIndexFileWriter* inverted_index_file_writer;
// variant column writer used
SegmentFooterPB* footer = nullptr;
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 3b9e959dcd2..942128df31a 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -190,6 +190,9 @@ Status SegmentWriter::_create_column_writer(uint32_t cid,
const TabletColumn& co
// except for columns whose type don't support zone map.
opts.need_zone_map = column.is_key() || schema->keys_type() !=
KeysType::AGG_KEYS;
opts.need_bloom_filter = column.is_bf_column();
+ if (opts.need_bloom_filter) {
+ opts.bf_options.fpp = schema->has_bf_fpp() ?
schema->bloom_filter_fpp() : 0.05;
+ }
auto* tablet_index = schema->get_ngram_bf_index(column.unique_id());
if (tablet_index) {
opts.need_bloom_filter = true;
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index 30ad12d0991..335c79ccc27 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -184,6 +184,10 @@ Status
VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
// except for columns whose type don't support zone map.
opts.need_zone_map = column.is_key() || tablet_schema->keys_type() !=
KeysType::AGG_KEYS;
opts.need_bloom_filter = column.is_bf_column();
+ if (opts.need_bloom_filter) {
+ opts.bf_options.fpp =
+ tablet_schema->has_bf_fpp() ?
tablet_schema->bloom_filter_fpp() : 0.05;
+ }
auto* tablet_index = tablet_schema->get_ngram_bf_index(column.unique_id());
if (tablet_index) {
opts.need_bloom_filter = true;
diff --git a/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
b/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
index ff8710c5998..8e95dee8f11 100644
--- a/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
+++ b/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
@@ -14,7 +14,25 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-suite("test_bloom_filter") {
+suite("test_bloom_filter","nonConcurrent") {
+ def timeout = 60000
+ def delta_time = 1000
+ def alter_res = "null"
+ def useTime = 0
+ def wait_for_latest_op_on_table_finish = { table_name, OpTimeout ->
+ for(int t = delta_time; t <= OpTimeout; t += delta_time){
+ alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName =
"${table_name}" ORDER BY CreateTime DESC LIMIT 1;"""
+ alter_res = alter_res.toString()
+ if(alter_res.contains("FINISHED")) {
+ sleep(3000) // wait change table state to normal
+ logger.info(table_name + " latest alter job finished, detail:
" + alter_res)
+ break
+ }
+ useTime = t
+ sleep(delta_time)
+ }
+ assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish
timeout")
+ }
// todo: test bloom filter, such alter table bloom filter, create table
with bloom filter
sql "SHOW ALTER TABLE COLUMN"
@@ -176,11 +194,52 @@ suite("test_bloom_filter") {
(2,2,2,"2024-12-18 20:00:00", "2024-12-18 20:00:00", "2024-12-18",
"2024-12-18", "3.33", "3.33"),
(3,3,3,"2024-12-22 20:00:00", "2024-12-22 20:00:00", "2024-12-22",
"2024-12-22", "4.33", "4.33")"""
sql """ALTER TABLE ${test_datetime_tb} SET ("bloom_filter_columns" =
"d,d2,da,dav2,dec,dec2")"""
- Thread.sleep(3000)
+ wait_for_latest_op_on_table_finish(test_datetime_tb, timeout)
qt_select_datetime_v1 """SELECT * FROM ${test_datetime_tb} WHERE d IN
("2024-12-17 20:00:00", "2024-12-18 20:00:00") order by a"""
qt_select_datetime_v2 """SELECT * FROM ${test_datetime_tb} WHERE d2 IN
("2024-12-17 20:00:00", "2024-12-18 20:00:00") order by a"""
qt_select_date_v1 """SELECT * FROM ${test_datetime_tb} WHERE da IN
("2024-12-17", "2024-12-18") order by a"""
qt_select_date_v2 """SELECT * FROM ${test_datetime_tb} WHERE dav2 IN
("2024-12-17", "2024-12-18") order by a"""
sql """ADMIN SET FRONTEND CONFIG ('disable_decimalv2' = 'true')"""
sql """ADMIN SET FRONTEND CONFIG ('disable_datev1' = 'true')"""
+
+ def test_dynamic_fpp_tb = "test_dynamic_fpp_bloom_filter_tb"
+ sql """DROP TABLE IF EXISTS ${test_dynamic_fpp_tb}"""
+ sql """CREATE TABLE IF NOT EXISTS ${test_dynamic_fpp_tb} (
+ `id` int(11) NOT NULL,
+ `name` varchar(50) NOT NULL
+ ) ENGINE=OLAP
+ UNIQUE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 5
+ PROPERTIES (
+ "replication_num" = "1",
+ "bloom_filter_columns" = "id",
+ "bloom_filter_fpp" = "0.03"
+ )"""
+ try {
+
GetDebugPoint().enableDebugPointForAllBEs("BloomFilterIndexWriter::create",
[fpp: "0.03"])
+ sql """ INSERT INTO ${test_dynamic_fpp_tb} VALUES (1, 'Alice'), (2,
'Bob'), (3, 'Charlie'), (4, 'David'), (5, 'Eve') """
+ } catch (e) {
+ logger.info("catch exception: ${e}")
+ assert(false)
+ } finally {
+
GetDebugPoint().disableDebugPointForAllBEs("BloomFilterIndexWriter::create");
+ }
+
+ sql """ALTER TABLE ${test_dynamic_fpp_tb} SET("bloom_filter_fpp" =
"0.02")"""
+ wait_for_latest_op_on_table_finish(test_dynamic_fpp_tb, timeout)
+
+ try {
+
GetDebugPoint().enableDebugPointForAllBEs("BloomFilterIndexWriter::create",
[fpp: "0.02"])
+ sql """INSERT INTO ${test_dynamic_fpp_tb} VALUES
+ (6, 'Grace'),
+ (7, 'Henry'),
+ (8, 'Ivy'),
+ (9, 'Jack'),
+ (10, 'Kate')"""
+ } catch (e) {
+ logger.info("catch exception: ${e}")
+ assert(false)
+ } finally {
+
GetDebugPoint().disableDebugPointForAllBEs("BloomFilterIndexWriter::create");
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]