This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 3e098e14f10 [fix](ngram bloomfilter) fix narrow conversion for ngram 
bf_size #43480 (#43654)
3e098e14f10 is described below

commit 3e098e14f100bd829c3af6515bc64cebc9d4c480
Author: airborne12 <[email protected]>
AuthorDate: Tue Nov 12 16:54:43 2024 +0800

    [fix](ngram bloomfilter) fix narrow conversion for ngram bf_size #43480 
(#43654)
    
    cherry pick from #43480
---
 be/src/olap/rowset/segment_v2/segment_writer.cpp   | 15 ++++++-
 .../rowset/segment_v2/vertical_segment_writer.cpp  | 15 ++++++-
 .../java/org/apache/doris/analysis/IndexDef.java   |  4 +-
 .../trees/plans/commands/info/IndexDefinition.java |  4 +-
 .../index_p0/test_ngram_bloomfilter_index.groovy   | 47 ++++++++++++++++++++++
 5 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 7690a330ae4..7c9e608e828 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -201,8 +201,19 @@ Status SegmentWriter::init(const std::vector<uint32_t>& 
col_ids, bool has_key) {
         if (tablet_index) {
             opts.need_bloom_filter = true;
             opts.is_ngram_bf_index = true;
-            opts.gram_size = tablet_index->get_gram_size();
-            opts.gram_bf_size = tablet_index->get_gram_bf_size();
+            //narrow convert from int32_t to uint8_t and uint16_t which is 
dangerous
+            auto gram_size = tablet_index->get_gram_size();
+            auto gram_bf_size = tablet_index->get_gram_bf_size();
+            if (gram_size > 256 || gram_size < 1) {
+                return Status::NotSupported("Do not support ngram bloom filter 
for ngram_size: ",
+                                            gram_size);
+            }
+            if (gram_bf_size > 65535 || gram_bf_size < 64) {
+                return Status::NotSupported("Do not support ngram bloom filter 
for bf_size: ",
+                                            gram_bf_size);
+            }
+            opts.gram_size = gram_size;
+            opts.gram_bf_size = gram_bf_size;
         }
 
         opts.need_bitmap_index = column.has_bitmap_index();
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index 549ad41d2cc..2a929f79d50 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -156,8 +156,19 @@ Status 
VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
     if (tablet_index) {
         opts.need_bloom_filter = true;
         opts.is_ngram_bf_index = true;
-        opts.gram_size = tablet_index->get_gram_size();
-        opts.gram_bf_size = tablet_index->get_gram_bf_size();
+        //narrow convert from int32_t to uint8_t and uint16_t which is 
dangerous
+        auto gram_size = tablet_index->get_gram_size();
+        auto gram_bf_size = tablet_index->get_gram_bf_size();
+        if (gram_size > 256 || gram_size < 1) {
+            return Status::NotSupported("Do not support ngram bloom filter for 
ngram_size: ",
+                                        gram_size);
+        }
+        if (gram_bf_size > 65535 || gram_bf_size < 64) {
+            return Status::NotSupported("Do not support ngram bloom filter for 
bf_size: ",
+                                        gram_bf_size);
+        }
+        opts.gram_size = gram_size;
+        opts.gram_bf_size = gram_bf_size;
     }
 
     opts.need_bitmap_index = column.has_bitmap_index();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
index fc122cc82a5..d57ca506786 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
@@ -258,8 +258,8 @@ public class IndexDef {
                     if (ngramSize > 256 || ngramSize < 1) {
                         throw new AnalysisException("gram_size should be 
integer and less than 256");
                     }
-                    if (bfSize > 65536 || bfSize < 64) {
-                        throw new AnalysisException("bf_size should be integer 
and between 64 and 65536");
+                    if (bfSize > 65535 || bfSize < 64) {
+                        throw new AnalysisException("bf_size should be integer 
and between 64 and 65535");
                     }
                 } catch (NumberFormatException e) {
                     throw new AnalysisException("invalid ngram properties:" + 
e.getMessage(), e);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
index dc2968f53b8..cd9c19c25d4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
@@ -145,9 +145,9 @@ public class IndexDefinition {
                         throw new AnalysisException(
                                 "gram_size should be integer and less than 
256");
                     }
-                    if (bfSize > 65536 || bfSize < 64) {
+                    if (bfSize > 65535 || bfSize < 64) {
                         throw new AnalysisException(
-                                "bf_size should be integer and between 64 and 
65536");
+                                "bf_size should be integer and between 64 and 
65535");
                     }
                 } catch (NumberFormatException e) {
                     throw new AnalysisException("invalid ngram properties:" + 
e.getMessage(), e);
diff --git 
a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy 
b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy
index c56eed967a0..e2ab9b9c117 100644
--- a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy
+++ b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy
@@ -59,4 +59,51 @@ suite("test_ngram_bloomfilter_index") {
     qt_select_eq_3 "SELECT * FROM ${tableName} WHERE http_url = 
'/%/7212503657802320699%' ORDER BY key_id"
     qt_select_in_3 "SELECT * FROM ${tableName} WHERE http_url IN 
('/%/7212503657802320699%') ORDER BY key_id"
     qt_select_like_3 "SELECT * FROM ${tableName} WHERE http_url like 
'/%/7212503657802320699%' ORDER BY key_id"
+
+    //case for bf_size 65536
+    def tableName2 = 'test_ngram_bloomfilter_index2'
+    sql "DROP TABLE IF EXISTS ${tableName2}"
+    test {
+        sql """
+        CREATE TABLE IF NOT EXISTS ${tableName2} (
+            `key_id` bigint(20) NULL COMMENT '',
+            `category` varchar(200) NULL COMMENT '',
+            `https_url` varchar(300) NULL COMMENT '',
+            `hostname` varchar(300) NULL,
+            `http_url` text NULL COMMENT '',
+            `url_path` varchar(2000) NULL COMMENT '',
+            `cnt` bigint(20) NULL COMMENT '',
+            `host_flag` boolean NULL COMMENT '',
+            INDEX idx_ngrambf (`http_url`) USING NGRAM_BF 
PROPERTIES("gram_size" = "2", "bf_size" = "65536")
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`key_id`, `category`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`key_id`) BUCKETS 3
+        PROPERTIES("replication_num" = "1");
+        """
+        exception "bf_size should be integer and between 64 and 65535"
+    }
+
+    def tableName3 = 'test_ngram_bloomfilter_index3'
+    sql "DROP TABLE IF EXISTS ${tableName3}"
+    sql """
+        CREATE TABLE IF NOT EXISTS ${tableName3} (
+            `key_id` bigint(20) NULL COMMENT '',
+            `category` varchar(200) NULL COMMENT '',
+            `https_url` varchar(300) NULL COMMENT '',
+            `hostname` varchar(300) NULL,
+            `http_url` text NULL COMMENT '',
+            `url_path` varchar(2000) NULL COMMENT '',
+            `cnt` bigint(20) NULL COMMENT '',
+            `host_flag` boolean NULL COMMENT ''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`key_id`, `category`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`key_id`) BUCKETS 3
+        PROPERTIES("replication_num" = "1");
+        """
+    test {
+        sql """ALTER TABLE  ${tableName3} ADD INDEX idx_http_url(http_url) 
USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="65536") COMMENT 'http_url 
ngram_bf index'"""
+        exception "bf_size should be integer and between 64 and 65535"
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to