This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push:
new cbd17c72c6d fix bf lost after compaction (#48733)
cbd17c72c6d is described below
commit cbd17c72c6d3f369c2bab019fdf90701f9481561
Author: lihangyu <[email protected]>
AuthorDate: Thu Mar 6 13:08:37 2025 +0800
fix bf lost after compaction (#48733)
---
.../olap/rowset/segment_v2/variant_column_writer_impl.cpp | 6 ++++++
be/src/vec/common/schema_util.cpp | 6 ++----
.../suites/variant_p0/with_index/bloom_filter.groovy | 14 ++++++++++----
3 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
index 56d09224e48..5c57db390de 100644
--- a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
+++ b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
@@ -83,6 +83,9 @@ Status _create_column_writer(uint32_t cid, const
TabletColumn& column,
opt->need_bloom_filter = column.is_bf_column();
opt->need_bitmap_index = column.has_bitmap_index();
const auto& index =
tablet_schema->inverted_index(column.parent_unique_id());
+ VLOG_DEBUG << "column: " << column.name() << " need_inverted_index: " <<
opt->need_inverted_index
+ << " need_bloom_filter: " << opt->need_bloom_filter
+ << " need_bitmap_index: " << opt->need_bitmap_index;
// init inverted index
if (index != nullptr &&
@@ -657,6 +660,9 @@ Status VariantSubcolumnWriter::finalize() {
_opts.rowset_ctx->tablet_schema->column_by_uid(_tablet_column->parent_unique_id());
// refresh opts and get writer with flush column
vectorized::schema_util::inherit_column_attributes(parent_column,
flush_column);
+ VLOG_DEBUG << "parent_column: " << parent_column.name() << " flush_column:
"
+ << flush_column.name() << " is_bf_column: " <<
parent_column.is_bf_column() << " "
+ << flush_column.is_bf_column();
RETURN_IF_ERROR(_create_column_writer(
0, flush_column, _opts.rowset_ctx->tablet_schema,
_opts.inverted_index_file_writer,
&_writer, _index, &opts,
diff --git a/be/src/vec/common/schema_util.cpp
b/be/src/vec/common/schema_util.cpp
index 62f1597345e..50c8d0649fe 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -629,8 +629,6 @@ bool has_schema_index_diff(const TabletSchema* new_schema,
const TabletSchema* o
TabletColumn create_sparse_column(const TabletColumn& variant) {
TabletColumn res;
res.set_name(variant.name_lower_case() + "." + SPARSE_COLUMN_PATH);
- res.set_unique_id(variant.parent_unique_id() > 0 ?
variant.parent_unique_id()
- : variant.unique_id());
res.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
res.set_aggregation_method(variant.aggregation());
res.set_path_info(PathInData {variant.name_lower_case() + "." +
SPARSE_COLUMN_PATH});
@@ -762,10 +760,10 @@ Status get_compaction_schema(const
std::vector<RowsetSharedPtr>& rowsets,
// append subcolumns
for (const auto& subpath : sorted_subpaths) {
TabletColumn subcolumn;
- subcolumn.set_name(column->name() + "." + subpath.to_string());
+ subcolumn.set_name(column->name_lower_case() + "." +
subpath.to_string());
subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
subcolumn.set_parent_unique_id(column->unique_id());
- subcolumn.set_path_info(PathInData(column->name() + "." +
subpath.to_string()));
+ subcolumn.set_path_info(PathInData(column->name_lower_case() + "."
+ subpath.to_string()));
subcolumn.set_aggregation_method(column->aggregation());
subcolumn.set_variant_max_subcolumns_count(column->variant_max_subcolumns_count());
subcolumn.set_is_nullable(true);
diff --git a/regression-test/suites/variant_p0/with_index/bloom_filter.groovy
b/regression-test/suites/variant_p0/with_index/bloom_filter.groovy
index b7f08609b87..592318d4fcb 100644
--- a/regression-test/suites/variant_p0/with_index/bloom_filter.groovy
+++ b/regression-test/suites/variant_p0/with_index/bloom_filter.groovy
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-suite("regression_test_variant_with_bf", ""){
+suite("regression_test_variant_with_bf", "nonConcurrent,p0"){
def table_name = "var_with_bloom_filter"
sql "DROP TABLE IF EXISTS var_with_bloom_filter"
sql """
@@ -30,6 +30,7 @@ suite("regression_test_variant_with_bf", ""){
sql """insert into ${table_name} values (1, '{"a" : 123456}')"""
sql """insert into ${table_name} values (2, '{"a" : 789111}')"""
sql """insert into ${table_name} values (3, '{"a" : 789111}')"""
+
sql """insert into ${table_name} values (1, '{"b" : "xxxxxxx"}')"""
sql """insert into ${table_name} values (2, '{"b" : "yyyyyyy"}')"""
@@ -38,7 +39,12 @@ suite("regression_test_variant_with_bf", ""){
sql """insert into ${table_name} values (1, '{"b" : "xxxxxxx"}')"""
sql """insert into ${table_name} values (2, '{"b" : "yyyyyyy"}')"""
sql """insert into ${table_name} values (3, '{"b" : "zzzzzzz"}')"""
-
- qt_sql "select * from var_with_bloom_filter where cast(v['a'] as int) =
789111"
- qt_sql "select * from var_with_bloom_filter where cast(v['b'] as text) =
'yyyyyyy' ";
+ // trigger_and_wait_compaction("var_with_bloom_filter", "full")
+ // try {
+ //
GetDebugPoint().enableDebugPointForAllBEs("bloom_filter_must_filter_data")
+ // sql """ set enable_inverted_index_query = false """
+ // sql "select * from var_with_bloom_filter where cast(v['a'] as int)
= 789111"
+ // } finally {
+ //
GetDebugPoint().disableDebugPointForAllBEs("bloom_filter_must_filter_data")
+ // }
}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]