This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit e8aa5ee7d5681c677632060b9478d99128b83814
Author: lihangyu <[email protected]>
AuthorDate: Fri Mar 8 13:41:24 2024 +0800

    [Improve](Variant) support bloom filter for variant subcolumns (#31347)
    
    * [Improve](Variant) support bloom filter for variant subcolumns
    
    * rebase
---
 .../rowset/segment_v2/vertical_segment_writer.cpp  |  4 +-
 be/src/olap/tablet_schema.cpp                      |  4 ++
 be/src/olap/tablet_schema.h                        |  2 +
 be/src/vec/common/schema_util.cpp                  | 22 ++++++++---
 .../data/variant_p0/with_index/bloom_filter.out    |  9 +++++
 .../suites/variant_github_events_p0/load.groovy    |  2 +-
 .../variant_p0/with_index/bloom_filter.groovy      | 44 ++++++++++++++++++++++
 7 files changed, 79 insertions(+), 8 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index a5bea89b835..e755691f291 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -169,6 +169,9 @@ Status 
VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
         (column.is_extracted_column() && column.is_array_type())) {
         // variant and jsonb type skip write index
         opts.indexes.clear();
+        opts.need_zone_map = false;
+        opts.need_bloom_filter = false;
+        opts.need_bitmap_index = false;
     }
     for (auto index : opts.indexes) {
         if (!skip_inverted_index && index && index->index_type() == 
IndexType::INVERTED) {
@@ -194,7 +197,6 @@ Status 
VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
     CHECK_FIELD_TYPE(JSONB, "jsonb")
     CHECK_FIELD_TYPE(AGG_STATE, "agg_state")
     CHECK_FIELD_TYPE(MAP, "map")
-    CHECK_FIELD_TYPE(VARIANT, "variant")
     CHECK_FIELD_TYPE(OBJECT, "object")
     CHECK_FIELD_TYPE(HLL, "hll")
     CHECK_FIELD_TYPE(QUANTILE_STATE, "quantile_state")
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 804b4fc427a..6f097c96641 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -1192,6 +1192,10 @@ TabletColumn& 
TabletSchema::mutable_column_by_uid(int32_t col_unique_id) {
     return *_cols.at(_field_id_to_index.at(col_unique_id));
 }
 
+TabletColumn& TabletSchema::mutable_column(size_t ordinal) {
+    return *_cols.at(ordinal);
+}
+
 void TabletSchema::update_indexes_from_thrift(const 
std::vector<doris::TOlapTableIndex>& tindexes) {
     std::vector<TabletIndex> indexes;
     for (auto& tindex : tindexes) {
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index 847f74038b9..5c36122ffc2 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -158,6 +158,7 @@ public:
     };
     int32_t parent_unique_id() const { return _parent_col_unique_id; }
     void set_parent_unique_id(int32_t col_unique_id) { _parent_col_unique_id = 
col_unique_id; }
+    void set_is_bf_column(bool is_bf_column) { _is_bf_column = is_bf_column; }
     std::shared_ptr<const vectorized::IDataType> get_vec_type() const;
 
     void append_sparse_column(TabletColumn column);
@@ -292,6 +293,7 @@ public:
     Status have_column(const std::string& field_name) const;
     const TabletColumn& column_by_uid(int32_t col_unique_id) const;
     TabletColumn& mutable_column_by_uid(int32_t col_unique_id);
+    TabletColumn& mutable_column(size_t ordinal);
     void replace_column(size_t pos, TabletColumn new_col);
     const std::vector<TabletColumnPtr>& columns() const;
     size_t num_columns() const { return _num_columns; }
diff --git a/be/src/vec/common/schema_util.cpp 
b/be/src/vec/common/schema_util.cpp
index 98148fa55bc..29167734d96 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -259,6 +259,7 @@ void update_least_schema_internal(const 
std::map<PathInData, DataTypes>& subcolu
                                   std::set<PathInData>* path_set = nullptr) {
     PathsInData tuple_paths;
     DataTypes tuple_types;
+    CHECK(common_schema.use_count() == 1);
     // Get the least common type for all paths.
     for (const auto& [key, subtypes] : subcolumns_types) {
         assert(!subtypes.empty());
@@ -379,22 +380,30 @@ void inherit_tablet_index(TabletSchemaSPtr& schema) {
     }
 
     // Add index meta if extracted column is missing index meta
-    for (const auto& col : schema->columns()) {
-        if (!col->is_extracted_column()) {
+    for (size_t i = 0; i < schema->num_columns(); ++i) {
+        TabletColumn& col = schema->mutable_column(i);
+        if (!col.is_extracted_column()) {
             continue;
         }
-        auto it = variants_index_meta.find(col->parent_unique_id());
+        if (col.type() != FieldType::OLAP_FIELD_TYPE_TINYINT &&
+            col.type() != FieldType::OLAP_FIELD_TYPE_ARRAY &&
+            col.type() != FieldType::OLAP_FIELD_TYPE_DOUBLE &&
+            col.type() != FieldType::OLAP_FIELD_TYPE_FLOAT) {
+            // above types are not supported in bf
+            
col.set_is_bf_column(schema->column(col.parent_unique_id()).is_bf_column());
+        }
+        auto it = variants_index_meta.find(col.parent_unique_id());
         // variant has no index meta, ignore
         if (it == variants_index_meta.end()) {
             continue;
         }
-        auto index_meta = schema->get_inverted_index(*col);
+        auto index_meta = schema->get_inverted_index(col);
         // add index meta
         TabletIndex index_info = it->second;
-        
index_info.set_escaped_escaped_index_suffix_path(col->path_info_ptr()->get_path());
+        
index_info.set_escaped_escaped_index_suffix_path(col.path_info_ptr()->get_path());
         if (index_meta != nullptr) {
             // already exist
-            schema->update_index(*col, index_info);
+            schema->update_index(col, index_info);
         } else {
             schema->append_index(index_info);
         }
@@ -599,6 +608,7 @@ static void _append_column(const TabletColumn& 
parent_variant,
     // If column already exist in original tablet schema, then we pick common 
type
     // and cast column to common type, and modify tablet column to common type,
     // otherwise it's a new column
+    CHECK(to_append.use_count() == 1);
     const std::string& column_name =
             parent_variant.name_lower_case() + "." + 
subcolumn->path.get_path();
     const vectorized::DataTypePtr& final_data_type_from_object =
diff --git a/regression-test/data/variant_p0/with_index/bloom_filter.out 
b/regression-test/data/variant_p0/with_index/bloom_filter.out
new file mode 100644
index 00000000000..4eaab9d140b
--- /dev/null
+++ b/regression-test/data/variant_p0/with_index/bloom_filter.out
@@ -0,0 +1,9 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+2      {"a":789111}
+3      {"a":789111}
+
+-- !sql --
+2      {"b":"yyyyyyy"}
+2      {"b":"yyyyyyy"}
+
diff --git a/regression-test/suites/variant_github_events_p0/load.groovy 
b/regression-test/suites/variant_github_events_p0/load.groovy
index 4bde4400f44..e7485807c60 100644
--- a/regression-test/suites/variant_github_events_p0/load.groovy
+++ b/regression-test/suites/variant_github_events_p0/load.groovy
@@ -65,7 +65,7 @@ suite("regression_test_variant_github_events_p0", 
"nonConcurrent"){
         )
         DUPLICATE KEY(`k`)
         DISTRIBUTED BY HASH(k) BUCKETS 4 
-        properties("replication_num" = "1", "disable_auto_compaction" = 
"false");
+        properties("replication_num" = "1", "disable_auto_compaction" = 
"false", "bloom_filter_columns" = "v");
     """
     set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
     // 2015
diff --git a/regression-test/suites/variant_p0/with_index/bloom_filter.groovy 
b/regression-test/suites/variant_p0/with_index/bloom_filter.groovy
new file mode 100644
index 00000000000..b7f08609b87
--- /dev/null
+++ b/regression-test/suites/variant_p0/with_index/bloom_filter.groovy
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("regression_test_variant_with_bf", ""){
+    def table_name = "var_with_bloom_filter"
+    sql "DROP TABLE IF EXISTS var_with_bloom_filter"
+    sql """
+        CREATE TABLE IF NOT EXISTS var_with_bloom_filter (
+            k bigint,
+            v variant
+        )
+        DUPLICATE KEY(`k`)
+        DISTRIBUTED BY HASH(k) BUCKETS 1
+        properties("replication_num" = "1", "bloom_filter_columns" = "v");
+    """
+    sql """insert into ${table_name} values (1, '{"a" : 123456}')"""
+    sql """insert into ${table_name} values (2, '{"a" : 789111}')"""
+    sql """insert into ${table_name} values (3, '{"a" : 789111}')"""
+
+    sql """insert into ${table_name} values (1, '{"b" : "xxxxxxx"}')"""
+    sql """insert into ${table_name} values (2, '{"b" : "yyyyyyy"}')"""
+    sql """insert into ${table_name} values (3, '{"b" : "zzzzzzz"}')"""
+
+    sql """insert into ${table_name} values (1, '{"b" : "xxxxxxx"}')"""
+    sql """insert into ${table_name} values (2, '{"b" : "yyyyyyy"}')"""
+    sql """insert into ${table_name} values (3, '{"b" : "zzzzzzz"}')"""
+
+    qt_sql "select * from  var_with_bloom_filter where cast(v['a'] as int) = 
789111"
+    qt_sql "select * from  var_with_bloom_filter where cast(v['b'] as text) = 
'yyyyyyy' ";
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to