This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 1cdc43a1881 branch-3.1: [fix](bloomfilter) Correct bloom_filter_fpp 
usage in BE #47384 (#52893)
1cdc43a1881 is described below

commit 1cdc43a18818975d65d17c6d1a8486501a8aa732
Author: airborne12 <[email protected]>
AuthorDate: Tue Jul 8 10:31:10 2025 +0800

    branch-3.1: [fix](bloomfilter) Correct bloom_filter_fpp usage in BE #47384 
(#52893)
    
    cherry pick from #47384
---
 .../segment_v2/bloom_filter_index_writer.cpp       | 12 +++++
 be/src/olap/rowset/segment_v2/column_writer.cpp    |  3 +-
 be/src/olap/rowset/segment_v2/column_writer.h      |  2 +
 be/src/olap/rowset/segment_v2/segment_writer.cpp   |  3 ++
 .../rowset/segment_v2/vertical_segment_writer.cpp  |  4 ++
 .../bloom_filter_p0/test_bloom_filter.groovy       | 63 +++++++++++++++++++++-
 6 files changed, 83 insertions(+), 4 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp 
b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
index 940f6c7b18f..adcadcd114d 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
@@ -31,6 +31,7 @@
 #include "olap/rowset/segment_v2/indexed_column_writer.h"
 #include "olap/types.h"
 #include "runtime/decimalv2_value.h"
+#include "util/debug_points.h"
 #include "util/slice.h"
 #include "util/types.h"
 
@@ -302,6 +303,17 @@ uint64_t NGramBloomFilterIndexWriterImpl::size() {
 Status BloomFilterIndexWriter::create(const BloomFilterOptions& bf_options,
                                       const TypeInfo* type_info,
                                       std::unique_ptr<BloomFilterIndexWriter>* 
res) {
+    DBUG_EXECUTE_IF("BloomFilterIndexWriter::create", {
+        auto fpp = 
DebugPoints::instance()->get_debug_param_or_default<std::string>(
+                "BloomFilterIndexWriter::create", "fpp", "");
+        if (!fpp.empty()) {
+            double fpp_value = std::stod(fpp);
+            if (std::abs(bf_options.fpp - fpp_value) > 1e-6) {
+                return Status::Error<ErrorCode::INTERNAL_ERROR>("fpp {} is not 
a equal to {}", fpp,
+                                                                
bf_options.fpp);
+            }
+        }
+    })
     FieldType type = type_info->type();
     switch (type) {
 #define M(TYPE)                                                                
  \
diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp 
b/be/src/olap/rowset/segment_v2/column_writer.cpp
index acdcc711b68..ffc7385c03e 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@@ -29,7 +29,6 @@
 #include "io/fs/file_writer.h"
 #include "olap/olap_common.h"
 #include "olap/rowset/segment_v2/bitmap_index_writer.h"
-#include "olap/rowset/segment_v2/bloom_filter.h"
 #include "olap/rowset/segment_v2/bloom_filter_index_writer.h"
 #include "olap/rowset/segment_v2/encoding_info.h"
 #include "olap/rowset/segment_v2/inverted_index_writer.h"
@@ -519,7 +518,7 @@ Status ScalarColumnWriter::init() {
                     _opts.gram_bf_size, &_bloom_filter_index_builder));
         } else {
             RETURN_IF_ERROR(BloomFilterIndexWriter::create(
-                    BloomFilterOptions(), get_field()->type_info(), 
&_bloom_filter_index_builder));
+                    _opts.bf_options, get_field()->type_info(), 
&_bloom_filter_index_builder));
         }
     }
     return Status::OK();
diff --git a/be/src/olap/rowset/segment_v2/column_writer.h 
b/be/src/olap/rowset/segment_v2/column_writer.h
index 9eb19b3443d..8e7398f0418 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.h
+++ b/be/src/olap/rowset/segment_v2/column_writer.h
@@ -30,6 +30,7 @@
 
 #include "common/status.h" // for Status
 #include "olap/field.h"    // for Field
+#include "olap/rowset/segment_v2/bloom_filter.h"
 #include "olap/rowset/segment_v2/common.h"
 #include "olap/rowset/segment_v2/inverted_index_writer.h"
 #include "util/bitmap.h" // for BitmapChange
@@ -66,6 +67,7 @@ struct ColumnWriterOptions {
     uint8_t gram_size;
     uint16_t gram_bf_size;
     std::vector<const TabletIndex*> inverted_indexs;
+    BloomFilterOptions bf_options;
     InvertedIndexFileWriter* inverted_index_file_writer;
     // variant column writer used
     SegmentFooterPB* footer = nullptr;
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 3b9e959dcd2..942128df31a 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -190,6 +190,9 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, 
const TabletColumn& co
     // except for columns whose type don't support zone map.
     opts.need_zone_map = column.is_key() || schema->keys_type() != 
KeysType::AGG_KEYS;
     opts.need_bloom_filter = column.is_bf_column();
+    if (opts.need_bloom_filter) {
+        opts.bf_options.fpp = schema->has_bf_fpp() ? 
schema->bloom_filter_fpp() : 0.05;
+    }
     auto* tablet_index = schema->get_ngram_bf_index(column.unique_id());
     if (tablet_index) {
         opts.need_bloom_filter = true;
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index 30ad12d0991..335c79ccc27 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -184,6 +184,10 @@ Status 
VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
     // except for columns whose type don't support zone map.
     opts.need_zone_map = column.is_key() || tablet_schema->keys_type() != 
KeysType::AGG_KEYS;
     opts.need_bloom_filter = column.is_bf_column();
+    if (opts.need_bloom_filter) {
+        opts.bf_options.fpp =
+                tablet_schema->has_bf_fpp() ? 
tablet_schema->bloom_filter_fpp() : 0.05;
+    }
     auto* tablet_index = tablet_schema->get_ngram_bf_index(column.unique_id());
     if (tablet_index) {
         opts.need_bloom_filter = true;
diff --git a/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy 
b/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
index ff8710c5998..8e95dee8f11 100644
--- a/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
+++ b/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
@@ -14,7 +14,25 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-suite("test_bloom_filter") {
+suite("test_bloom_filter","nonConcurrent") {
+    def timeout = 60000
+    def delta_time = 1000
+    def alter_res = "null"
+    def useTime = 0
+    def wait_for_latest_op_on_table_finish = { table_name, OpTimeout ->
+        for(int t = delta_time; t <= OpTimeout; t += delta_time){
+            alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName = 
"${table_name}" ORDER BY CreateTime DESC LIMIT 1;"""
+            alter_res = alter_res.toString()
+            if(alter_res.contains("FINISHED")) {
+                sleep(3000) // wait change table state to normal
+                logger.info(table_name + " latest alter job finished, detail: 
" + alter_res)
+                break
+            }
+            useTime = t
+            sleep(delta_time)
+        }
+        assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish 
timeout")
+    }
     // todo: test bloom filter, such alter table bloom filter, create table 
with bloom filter
     sql "SHOW ALTER TABLE COLUMN"
 
@@ -176,11 +194,52 @@ suite("test_bloom_filter") {
         (2,2,2,"2024-12-18 20:00:00", "2024-12-18 20:00:00", "2024-12-18", 
"2024-12-18", "3.33", "3.33"),
         (3,3,3,"2024-12-22 20:00:00", "2024-12-22 20:00:00", "2024-12-22", 
"2024-12-22", "4.33", "4.33")"""
     sql """ALTER TABLE ${test_datetime_tb} SET ("bloom_filter_columns" = 
"d,d2,da,dav2,dec,dec2")"""
-    Thread.sleep(3000)
+    wait_for_latest_op_on_table_finish(test_datetime_tb, timeout)
     qt_select_datetime_v1 """SELECT * FROM ${test_datetime_tb} WHERE d IN 
("2024-12-17 20:00:00", "2024-12-18 20:00:00") order by a"""
     qt_select_datetime_v2 """SELECT * FROM ${test_datetime_tb} WHERE d2 IN 
("2024-12-17 20:00:00", "2024-12-18 20:00:00") order by a"""
     qt_select_date_v1 """SELECT * FROM ${test_datetime_tb} WHERE da IN 
("2024-12-17", "2024-12-18") order by a"""
     qt_select_date_v2 """SELECT * FROM ${test_datetime_tb} WHERE dav2 IN 
("2024-12-17", "2024-12-18") order by a"""
     sql """ADMIN SET FRONTEND CONFIG ('disable_decimalv2' = 'true')"""
     sql """ADMIN SET FRONTEND CONFIG ('disable_datev1' = 'true')"""
+
+    def test_dynamic_fpp_tb = "test_dynamic_fpp_bloom_filter_tb"
+    sql """DROP TABLE IF EXISTS ${test_dynamic_fpp_tb}"""
+    sql """CREATE TABLE IF NOT EXISTS ${test_dynamic_fpp_tb} (
+            `id` int(11) NOT NULL,
+            `name` varchar(50) NOT NULL
+        ) ENGINE=OLAP
+        UNIQUE KEY(`id`)
+        DISTRIBUTED BY HASH(`id`) BUCKETS 5
+        PROPERTIES (
+            "replication_num" = "1",
+            "bloom_filter_columns" = "id",
+            "bloom_filter_fpp" = "0.03"
+    )"""
+    try {
+        
GetDebugPoint().enableDebugPointForAllBEs("BloomFilterIndexWriter::create", 
[fpp: "0.03"])
+        sql """ INSERT INTO ${test_dynamic_fpp_tb} VALUES (1, 'Alice'), (2, 
'Bob'), (3, 'Charlie'), (4, 'David'), (5, 'Eve') """
+    } catch (e) {
+        logger.info("catch exception: ${e}")
+        assert(false)
+    } finally {
+        
GetDebugPoint().disableDebugPointForAllBEs("BloomFilterIndexWriter::create");
+    }
+
+    sql """ALTER TABLE ${test_dynamic_fpp_tb} SET("bloom_filter_fpp" = 
"0.02")"""
+    wait_for_latest_op_on_table_finish(test_dynamic_fpp_tb, timeout)
+
+    try {
+        
GetDebugPoint().enableDebugPointForAllBEs("BloomFilterIndexWriter::create", 
[fpp: "0.02"])
+        sql """INSERT INTO ${test_dynamic_fpp_tb} VALUES
+            (6, 'Grace'),
+            (7, 'Henry'),
+            (8, 'Ivy'),
+            (9, 'Jack'),
+            (10, 'Kate')"""
+    } catch (e) {
+        logger.info("catch exception: ${e}")
+        assert(false)
+    } finally {
+        
GetDebugPoint().disableDebugPointForAllBEs("BloomFilterIndexWriter::create");
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to