This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 21b213e2232 branch-4.1: [fix](inverted index) resolve variant
sub-column indexes for score() #62992 (#63078)
21b213e2232 is described below
commit 21b213e22329ac89a7ee7fa2504d78c2a81142d3
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Sat May 9 15:33:05 2026 +0800
branch-4.1: [fix](inverted index) resolve variant sub-column indexes for
score() #62992 (#63078)
Cherry-picked from #62992
Co-authored-by: Jack <[email protected]>
---
be/src/storage/predicate_collector.cpp | 57 +-
be/src/storage/predicate_collector.h | 1 +
.../compaction/collection_statistics_test.cpp | 660 ++++++++++++++++++++-
be/test/testutil/mock/mock_descriptors.h | 32 +-
.../inverted_index_p0/test_bm25_score.groovy | 49 +-
.../test_bm25_score_variant.groovy | 106 ++++
6 files changed, 898 insertions(+), 7 deletions(-)
diff --git a/be/src/storage/predicate_collector.cpp
b/be/src/storage/predicate_collector.cpp
index 8e319ae329f..fa8fc0117ce 100644
--- a/be/src/storage/predicate_collector.cpp
+++ b/be/src/storage/predicate_collector.cpp
@@ -19,6 +19,9 @@
#include <glog/logging.h>
+#include <vector>
+
+#include "exec/common/variant_util.h"
#include "exprs/vexpr.h"
#include "exprs/vexpr_context.h"
#include "exprs/vliteral.h"
@@ -91,7 +94,49 @@ Status MatchPredicateCollector::collect(RuntimeState* state,
const TabletSchemaS
}
const auto& column = tablet_schema->column(col_idx);
- auto index_metas = tablet_schema->inverted_indexs(sd->col_unique_id(),
column.suffix_path());
+ auto index_metas = tablet_schema->inverted_indexs(column);
+ std::vector<std::shared_ptr<const TabletIndex>> owned_index_metas;
+ std::string index_suffix_path = column.suffix_path();
+
+ // Schema-only fallback for variant sub-columns. Collector runs at tablet
+ // level without segment context, so we cannot do nested-group inference
+ // or inherit_index runtime-type dispatch. Two paths cover what is
+ // resolvable from schema alone:
+ // 1. field_pattern templates (MATCH_NAME / MATCH_NAME_GLOB) via
+ // generate_sub_column_info.
+ // 2. Plain parent inverted index when the schema column is the dynamic
+ // path's VARIANT placeholder produced by _init_variant_columns. In
+ // that state inverted_indexs(column) misses because
+ // _path_set_info_map.subcolumn_indexes is only populated for typed
+ // paths / field_pattern outputs, not for plain parent indexes added
+ // by ALTER. Clone the parent's non-field-pattern indexes with the
+ // variant path as suffix so segment-side BM25 statistics can be
+ // collected.
+ if (index_metas.empty() && column.is_extracted_column()) {
+ TabletSchema::SubColumnInfo sub_column_info;
+ const std::string relative_path =
column.path_info_ptr()->copy_pop_front().get_path();
+ if (variant_util::generate_sub_column_info(*tablet_schema,
column.parent_unique_id(),
+ relative_path,
&sub_column_info) &&
+ !sub_column_info.indexes.empty()) {
+ index_suffix_path = sub_column_info.column.suffix_path();
+ for (auto& idx : sub_column_info.indexes) {
+ index_metas.push_back(idx.get());
+ owned_index_metas.emplace_back(std::move(idx));
+ }
+ } else if (column.is_variant_type()) {
+ const auto parent_indexes =
tablet_schema->inverted_indexs(column.parent_unique_id());
+ for (const auto* index : parent_indexes) {
+ if (!index->field_pattern().empty()) {
+ continue;
+ }
+ auto index_ptr = std::make_shared<TabletIndex>(*index);
+ index_ptr->set_escaped_escaped_index_suffix_path(
+ column.path_info_ptr()->get_path());
+ index_metas.push_back(index_ptr.get());
+ owned_index_metas.emplace_back(std::move(index_ptr));
+ }
+ }
+ }
#ifndef BE_TEST
if (index_metas.empty()) {
@@ -117,7 +162,7 @@ Status MatchPredicateCollector::collect(RuntimeState*
state, const TabletSchemaS
index_meta->properties());
std::string field_name =
- build_field_name(index_meta->col_unique_ids()[0],
column.suffix_path());
+ build_field_name(index_meta->col_unique_ids()[0],
index_suffix_path);
std::wstring ws_field_name = StringHelper::to_wstring(field_name);
auto iter = collect_infos->find(ws_field_name);
@@ -125,6 +170,12 @@ Status MatchPredicateCollector::collect(RuntimeState*
state, const TabletSchemaS
CollectInfo collect_info;
collect_info.term_infos.insert(term_infos.begin(),
term_infos.end());
collect_info.index_meta = index_meta;
+ for (const auto& owned_index_meta : owned_index_metas) {
+ if (owned_index_meta.get() == index_meta) {
+ collect_info.owned_index_meta = owned_index_meta;
+ break;
+ }
+ }
(*collect_infos)[ws_field_name] = std::move(collect_info);
} else {
iter->second.term_infos.insert(term_infos.begin(),
term_infos.end());
@@ -260,4 +311,4 @@ SearchPredicateCollector::ClauseTypeCategory
SearchPredicateCollector::get_claus
}
}
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/storage/predicate_collector.h
b/be/src/storage/predicate_collector.h
index 0ad75f8d121..34af72b0714 100644
--- a/be/src/storage/predicate_collector.h
+++ b/be/src/storage/predicate_collector.h
@@ -44,6 +44,7 @@ struct TermInfoComparer {
struct CollectInfo {
std::set<segment_v2::TermInfo, TermInfoComparer> term_infos;
+ std::shared_ptr<const TabletIndex> owned_index_meta;
const TabletIndex* index_meta = nullptr;
};
using CollectInfoMap = std::unordered_map<std::wstring, CollectInfo>;
diff --git a/be/test/storage/compaction/collection_statistics_test.cpp
b/be/test/storage/compaction/collection_statistics_test.cpp
index c37d533c12d..738088ed341 100644
--- a/be/test/storage/compaction/collection_statistics_test.cpp
+++ b/be/test/storage/compaction/collection_statistics_test.cpp
@@ -25,6 +25,8 @@
#include <string>
#include "common/exception.h"
+#include "core/data_type/data_type_string.h"
+#include "exec/common/variant_util.h"
#include "exprs/vexpr.h"
#include "exprs/vexpr_context.h"
#include "exprs/vliteral.h"
@@ -43,7 +45,11 @@ namespace collection_statistics {
class MockVExpr : public VExpr {
public:
- MockVExpr(TExprNodeType::type node_type) : _mock_node_type(node_type) {}
+ MockVExpr(TExprNodeType::type node_type) : _mock_node_type(node_type) {
+ if (node_type == TExprNodeType::MATCH_PRED) {
+ _opcode = TExprOpcode::MATCH_PHRASE;
+ }
+ }
TExprNodeType::type node_type() const override { return _mock_node_type; }
@@ -100,6 +106,7 @@ public:
MockVLiteral(const std::string& value) : _value(value) {}
std::string value() const override { return _value; }
+ std::string value(const DataTypeSerDe::FormatOptions& options) const
override { return _value; }
const std::string& expr_name() const override { return _value; }
std::string debug_string() const override { return "MockVLiteral: " +
_value; }
@@ -268,6 +275,7 @@ protected:
index._col_unique_ids.push_back(1);
std::map<std::string, std::string> properties;
properties["parser"] = "standard";
+ properties["support_phrase"] = "true";
index._properties = properties;
tablet_schema->append_index(std::move(index));
@@ -614,6 +622,654 @@ TEST_F(CollectionStatisticsTest,
CollectWithDoubleCastWrappedSlotRef) {
EXPECT_TRUE(status.ok()) << status.msg();
}
+// Regression for AIR-36: match score collection must resolve indexes for
+// variant sub-columns whose indexes live in _path_set_info_map (typed paths or
+// inherited sub-column indexes). The previous simple lookup using
+// inverted_indexs(col_unique_id, suffix_path) missed those indexes.
+TEST_F(CollectionStatisticsTest, ExtractCollectInfoForVariantSubcolumnIndex) {
+ auto tablet_schema = std::make_shared<TabletSchema>();
+
+ constexpr int32_t kVariantUid = 9001;
+
+ TabletColumn variant_col;
+ variant_col.set_unique_id(kVariantUid);
+ variant_col.set_name("v");
+ variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+ tablet_schema->append_column(variant_col);
+
+ TabletColumn sub_col;
+ sub_col.set_unique_id(-1);
+ sub_col.set_name("v.host");
+ sub_col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ sub_col.set_parent_unique_id(kVariantUid);
+ PathInData path("v.host");
+ sub_col.set_path_info(path);
+ tablet_schema->append_column(sub_col);
+
+ auto sub_index = std::make_shared<TabletIndex>();
+ TabletIndexPB index_pb;
+ index_pb.set_index_id(2001);
+ index_pb.set_index_name("variant_subcolumn_idx");
+ index_pb.set_index_type(IndexType::INVERTED);
+ index_pb.add_col_unique_id(kVariantUid);
+ auto* props = index_pb.mutable_properties();
+ (*props)["parser"] = "standard";
+ (*props)["support_phrase"] = "true";
+ sub_index->init_from_pb(index_pb);
+
+ TabletSchema::PathsSetInfo path_set_info;
+ TabletIndexes sub_indexes = {sub_index};
+ path_set_info.subcolumn_indexes["host"] = sub_indexes;
+ std::unordered_map<int32_t, TabletSchema::PathsSetInfo> path_set_info_map;
+ path_set_info_map[kVariantUid] = std::move(path_set_info);
+ tablet_schema->set_path_set_info(std::move(path_set_info_map));
+
+ EXPECT_TRUE(tablet_schema->inverted_indexs(kVariantUid, "host").empty());
+
+ auto found =
tablet_schema->inverted_indexs(tablet_schema->column(/*ordinal=*/1));
+ ASSERT_EQ(found.size(), 1u);
+ EXPECT_EQ(found[0]->index_name(), "variant_subcolumn_idx");
+
+ constexpr int kSlotId = 42;
+ runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId),
kVariantUid);
+
+ auto match_expr =
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+ auto slot_ref =
+ std::make_shared<collection_statistics::MockVSlotRef>("v.host",
SlotId(kSlotId));
+ auto literal =
std::make_shared<collection_statistics::MockVLiteral>("foo");
+ match_expr->_children.push_back(slot_ref);
+ match_expr->_children.push_back(literal);
+
+ VExprContextSPtrs contexts;
+ contexts.push_back(std::make_shared<VExprContext>(match_expr));
+
+ std::unordered_map<std::wstring, CollectInfo> collect_infos;
+ auto status = stats_->extract_collect_info(runtime_state_.get(), contexts,
tablet_schema,
+ &collect_infos);
+ ASSERT_TRUE(status.ok()) << status.msg();
+ ASSERT_EQ(collect_infos.size(), 1u);
+ auto it =
collect_infos.find(StringHelper::to_wstring(std::to_string(kVariantUid) +
".v.host"));
+ ASSERT_NE(it, collect_infos.end());
+ ASSERT_NE(it->second.index_meta, nullptr);
+ EXPECT_EQ(it->second.index_meta->index_name(), "variant_subcolumn_idx");
+}
+
+// Regression for score on a dynamic variant sub-column inherited from a plain
+// parent variant inverted index (no field_pattern template). Matches the
+// scan-time schema shape: _init_variant_columns materializes the accessed
+// path as an extracted VARIANT placeholder, so neither inverted_indexs(column)
+// nor generate_sub_column_info resolves the parent index. Collector clones
+// the parent's non-field-pattern indexes with the variant path as suffix.
+TEST_F(CollectionStatisticsTest,
ExtractCollectInfoForVariantParentIndexWithoutTemplate) {
+ auto tablet_schema = std::make_shared<TabletSchema>();
+
+ constexpr int32_t kVariantUid = 9004;
+
+ TabletColumn variant_col;
+ variant_col.set_unique_id(kVariantUid);
+ variant_col.set_name("v");
+ variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+ tablet_schema->append_column(variant_col);
+
+ TabletColumn sub_col;
+ sub_col.set_unique_id(-1);
+ sub_col.set_name("v.key");
+ sub_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+ sub_col.set_parent_unique_id(kVariantUid);
+ PathInData path("v.key");
+ sub_col.set_path_info(path);
+ tablet_schema->append_column(sub_col);
+
+ TabletIndexPB index_pb;
+ index_pb.set_index_id(2004);
+ index_pb.set_index_name("variant_parent_idx");
+ index_pb.set_index_type(IndexType::INVERTED);
+ index_pb.add_col_unique_id(kVariantUid);
+ auto* props = index_pb.mutable_properties();
+ (*props)["parser"] = "english";
+ (*props)["support_phrase"] = "true";
+
+ TabletIndex index;
+ index.init_from_pb(index_pb);
+ tablet_schema->append_index(std::move(index));
+
+ // Pre-conditions: column-aware lookup is empty (no inheritance
pre-populated)
+ // and generate_sub_column_info returns false (no field_pattern template).
+ // The collector must still resolve through the VARIANT-placeholder branch.
+
ASSERT_TRUE(tablet_schema->inverted_indexs(tablet_schema->column(/*ordinal=*/1)).empty());
+ ASSERT_EQ(tablet_schema->inverted_indexs(kVariantUid).size(), 1u);
+ TabletSchema::SubColumnInfo sub_column_info;
+ ASSERT_FALSE(variant_util::generate_sub_column_info(*tablet_schema,
kVariantUid, "key",
+ &sub_column_info));
+
+ constexpr int kSlotId = 45;
+ runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId),
kVariantUid, "v.key",
+ {"key"});
+
+ auto match_expr =
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+ auto cast_expr =
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::CAST_EXPR);
+ cast_expr->_data_type = std::make_shared<DataTypeString>();
+ auto slot_ref =
std::make_shared<collection_statistics::MockVSlotRef>("v.key", SlotId(kSlotId));
+ auto literal =
std::make_shared<collection_statistics::MockVLiteral>("abc");
+ cast_expr->_children.push_back(slot_ref);
+ match_expr->_children.push_back(cast_expr);
+ match_expr->_children.push_back(literal);
+
+ VExprContextSPtrs contexts;
+ contexts.push_back(std::make_shared<VExprContext>(match_expr));
+
+ std::unordered_map<std::wstring, CollectInfo> collect_infos;
+ auto status = stats_->extract_collect_info(runtime_state_.get(), contexts,
tablet_schema,
+ &collect_infos);
+ ASSERT_TRUE(status.ok()) << status.msg();
+ ASSERT_EQ(collect_infos.size(), 1u);
+ auto it =
collect_infos.find(StringHelper::to_wstring(std::to_string(kVariantUid) +
".v.key"));
+ ASSERT_NE(it, collect_infos.end());
+ ASSERT_NE(it->second.index_meta, nullptr);
+ ASSERT_NE(it->second.owned_index_meta, nullptr);
+ EXPECT_EQ(it->second.index_meta->index_name(), "variant_parent_idx");
+}
+
+namespace {
+
+// Build a sub-column template for the parent variant column. pattern_type has
no
+// public setter on TabletColumn, so construct through ColumnPB.
+TabletColumn make_subcolumn_template(const std::string& pattern, PatternTypePB
pattern_type) {
+ ColumnPB column_pb;
+ column_pb.set_unique_id(-1);
+ column_pb.set_name(pattern);
+ column_pb.set_type("STRING");
+ column_pb.set_is_nullable(true);
+ column_pb.set_pattern_type(pattern_type);
+
+ TabletColumn templ;
+ templ.init_from_pb(column_pb);
+ return templ;
+}
+
+} // namespace
+
+TEST_F(CollectionStatisticsTest,
ExtractCollectInfoForVariantFieldPatternIndex) {
+ auto tablet_schema = std::make_shared<TabletSchema>();
+
+ constexpr int32_t kVariantUid = 9002;
+
+ TabletColumn variant_col;
+ variant_col.set_unique_id(kVariantUid);
+ variant_col.set_name("meta");
+ variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+ TabletColumn host_template = make_subcolumn_template("host",
PatternTypePB::MATCH_NAME);
+ variant_col.add_sub_column(host_template);
+ tablet_schema->append_column(variant_col);
+
+ TabletColumn sub_col;
+ sub_col.set_unique_id(-1);
+ sub_col.set_name("meta.host");
+ sub_col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ sub_col.set_parent_unique_id(kVariantUid);
+ PathInData path("meta.host");
+ sub_col.set_path_info(path);
+ tablet_schema->append_column(sub_col);
+
+ TabletIndexPB index_pb;
+ index_pb.set_index_id(2002);
+ index_pb.set_index_name("variant_field_pattern_idx");
+ index_pb.set_index_type(IndexType::INVERTED);
+ index_pb.add_col_unique_id(kVariantUid);
+ auto* props = index_pb.mutable_properties();
+ (*props)["parser"] = "standard";
+ (*props)["support_phrase"] = "true";
+ (*props)["field_pattern"] = "host";
+
+ TabletIndex index;
+ index.init_from_pb(index_pb);
+ tablet_schema->append_index(std::move(index));
+
+
ASSERT_TRUE(tablet_schema->inverted_indexs(tablet_schema->column(/*ordinal=*/1)).empty());
+ ASSERT_EQ(tablet_schema->inverted_index_by_field_pattern(kVariantUid,
"host").size(), 1u);
+
+ constexpr int kSlotId = 43;
+ runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId),
kVariantUid, "meta.host",
+ {"host"});
+
+ auto match_expr =
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+ auto slot_ref =
+ std::make_shared<collection_statistics::MockVSlotRef>("meta.host",
SlotId(kSlotId));
+ auto literal =
std::make_shared<collection_statistics::MockVLiteral>("alpha");
+ match_expr->_children.push_back(slot_ref);
+ match_expr->_children.push_back(literal);
+
+ VExprContextSPtrs contexts;
+ contexts.push_back(std::make_shared<VExprContext>(match_expr));
+
+ std::unordered_map<std::wstring, CollectInfo> collect_infos;
+ auto status = stats_->extract_collect_info(runtime_state_.get(), contexts,
tablet_schema,
+ &collect_infos);
+ ASSERT_TRUE(status.ok()) << status.msg();
+ ASSERT_EQ(collect_infos.size(), 1u);
+ auto it = collect_infos.find(
+ StringHelper::to_wstring(std::to_string(kVariantUid) +
".meta.host"));
+ ASSERT_NE(it, collect_infos.end());
+ ASSERT_NE(it->second.index_meta, nullptr);
+ ASSERT_NE(it->second.owned_index_meta, nullptr);
+ EXPECT_EQ(it->second.index_meta->index_name(),
"variant_field_pattern_idx");
+}
+
+// Regression: field_pattern="user.*" is registered under the pattern string,
+// while the query slot resolves to column_paths=["user", "name"]. The fallback
+// must match the parent variant's sub-column template first, then use the
+// matched pattern to fetch the index, and collect under the actual Lucene
field.
+TEST_F(CollectionStatisticsTest,
ExtractCollectInfoForVariantFieldPatternGlobIndex) {
+ auto tablet_schema = std::make_shared<TabletSchema>();
+
+ constexpr int32_t kVariantUid = 9003;
+
+ TabletColumn variant_col;
+ variant_col.set_unique_id(kVariantUid);
+ variant_col.set_name("meta");
+ variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+ TabletColumn glob_template = make_subcolumn_template("user.*",
PatternTypePB::MATCH_NAME_GLOB);
+ variant_col.add_sub_column(glob_template);
+ tablet_schema->append_column(variant_col);
+
+ TabletColumn sub_col;
+ sub_col.set_unique_id(-1);
+ sub_col.set_name("meta.user.name");
+ sub_col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ sub_col.set_parent_unique_id(kVariantUid);
+ PathInData path("meta.user.name");
+ sub_col.set_path_info(path);
+ tablet_schema->append_column(sub_col);
+
+ TabletIndexPB index_pb;
+ index_pb.set_index_id(2003);
+ index_pb.set_index_name("variant_field_pattern_glob_idx");
+ index_pb.set_index_type(IndexType::INVERTED);
+ index_pb.add_col_unique_id(kVariantUid);
+ auto* props = index_pb.mutable_properties();
+ (*props)["parser"] = "standard";
+ (*props)["support_phrase"] = "true";
+ (*props)["field_pattern"] = "user.*";
+
+ TabletIndex index;
+ index.init_from_pb(index_pb);
+ tablet_schema->append_index(std::move(index));
+
+
ASSERT_TRUE(tablet_schema->inverted_indexs(tablet_schema->column(/*ordinal=*/1)).empty());
+ ASSERT_TRUE(tablet_schema->inverted_index_by_field_pattern(kVariantUid,
"user.name").empty());
+ ASSERT_EQ(tablet_schema->inverted_index_by_field_pattern(kVariantUid,
"user.*").size(), 1u);
+ TabletSchema::SubColumnInfo sub_column_info;
+ ASSERT_TRUE(variant_util::generate_sub_column_info(*tablet_schema,
kVariantUid, "user.name",
+ &sub_column_info));
+ ASSERT_EQ(sub_column_info.indexes.size(), 1u);
+ EXPECT_EQ(sub_column_info.column.suffix_path(), "meta.user.name");
+ EXPECT_EQ(sub_column_info.indexes[0]->index_name(),
"variant_field_pattern_glob_idx");
+
+ constexpr int kSlotId = 44;
+ runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId),
kVariantUid,
+ "meta.user.name",
{"user", "name"});
+
+ auto match_expr =
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+ auto slot_ref =
std::make_shared<collection_statistics::MockVSlotRef>("meta.user.name",
+
SlotId(kSlotId));
+ auto literal =
std::make_shared<collection_statistics::MockVLiteral>("alice");
+ match_expr->_children.push_back(slot_ref);
+ match_expr->_children.push_back(literal);
+
+ VExprContextSPtrs contexts;
+ contexts.push_back(std::make_shared<VExprContext>(match_expr));
+
+ std::unordered_map<std::wstring, CollectInfo> collect_infos;
+ auto status = stats_->extract_collect_info(runtime_state_.get(), contexts,
tablet_schema,
+ &collect_infos);
+ ASSERT_TRUE(status.ok()) << status.msg();
+ ASSERT_EQ(collect_infos.size(), 1u);
+ auto it = collect_infos.find(
+ StringHelper::to_wstring(std::to_string(kVariantUid) +
".meta.user.name"));
+ ASSERT_NE(it, collect_infos.end());
+ ASSERT_NE(it->second.index_meta, nullptr);
+ ASSERT_NE(it->second.owned_index_meta, nullptr);
+ EXPECT_EQ(it->second.index_meta->index_name(),
"variant_field_pattern_glob_idx");
+}
+
+// E1: Match predicate whose left subtree contains no VSlotRef.
+// find_slot_ref recurses through children; when it returns nullptr the
+// collector reports INVERTED_INDEX_NOT_SUPPORTED.
+// Calls MatchPredicateCollector::collect() directly so coverage attribution
+// is not muddied by extract_collect_info's virtual-dispatch indirection.
+TEST_F(CollectionStatisticsTest, CollectMissingSlotRefReturnsError) {
+ auto tablet_schema = std::make_shared<TabletSchema>();
+ TabletColumn col;
+ col.set_unique_id(1001);
+ col.set_name("c");
+ col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ tablet_schema->append_column(col);
+
+ auto match_expr =
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+ auto literal_left =
std::make_shared<collection_statistics::MockVLiteral>("foo");
+ auto literal_right =
std::make_shared<collection_statistics::MockVLiteral>("bar");
+ match_expr->_children.push_back(literal_left);
+ match_expr->_children.push_back(literal_right);
+
+ MatchPredicateCollector collector;
+ std::unordered_map<std::wstring, CollectInfo> collect_infos;
+ auto status =
+ collector.collect(runtime_state_.get(), tablet_schema, match_expr,
&collect_infos);
+ ASSERT_FALSE(status.ok());
+ EXPECT_EQ(status.code(), ErrorCode::INVERTED_INDEX_NOT_SUPPORTED);
+ EXPECT_TRUE(status.msg().find("Cannot find slot reference") !=
std::string::npos);
+}
+
+// E2: SlotRef points to a slot_id absent from the runtime descriptor table.
+TEST_F(CollectionStatisticsTest, CollectMissingSlotDescriptorReturnsError) {
+ auto tablet_schema = std::make_shared<TabletSchema>();
+ TabletColumn col;
+ col.set_unique_id(1002);
+ col.set_name("c");
+ col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ tablet_schema->append_column(col);
+
+ constexpr int kAbsentSlotId = 99999;
+
+ auto match_expr =
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+ auto slot_ref =
+ std::make_shared<collection_statistics::MockVSlotRef>("c",
SlotId(kAbsentSlotId));
+ auto literal = std::make_shared<collection_statistics::MockVLiteral>("v");
+ match_expr->_children.push_back(slot_ref);
+ match_expr->_children.push_back(literal);
+
+ MatchPredicateCollector collector;
+ std::unordered_map<std::wstring, CollectInfo> collect_infos;
+ auto status =
+ collector.collect(runtime_state_.get(), tablet_schema, match_expr,
&collect_infos);
+ ASSERT_FALSE(status.ok());
+ EXPECT_EQ(status.code(), ErrorCode::INVERTED_INDEX_NOT_SUPPORTED);
+ EXPECT_TRUE(status.msg().find("Cannot find slot descriptor") !=
std::string::npos);
+}
+
+// E3: SlotRef name does not exist in tablet_schema (field_index returns -1).
+TEST_F(CollectionStatisticsTest, CollectUnknownColumnNameReturnsError) {
+ auto tablet_schema = std::make_shared<TabletSchema>();
+ TabletColumn col;
+ col.set_unique_id(1003);
+ col.set_name("declared");
+ col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ tablet_schema->append_column(col);
+
+ constexpr int kSlotId = 50;
+ runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId), 1003,
"missing", {});
+
+ auto match_expr =
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+ auto slot_ref =
+ std::make_shared<collection_statistics::MockVSlotRef>("missing",
SlotId(kSlotId));
+ auto literal = std::make_shared<collection_statistics::MockVLiteral>("v");
+ match_expr->_children.push_back(slot_ref);
+ match_expr->_children.push_back(literal);
+
+ MatchPredicateCollector collector;
+ std::unordered_map<std::wstring, CollectInfo> collect_infos;
+ auto status =
+ collector.collect(runtime_state_.get(), tablet_schema, match_expr,
&collect_infos);
+ ASSERT_FALSE(status.ok());
+ EXPECT_EQ(status.code(), ErrorCode::INVERTED_INDEX_NOT_SUPPORTED);
+ EXPECT_TRUE(status.msg().find("Cannot find column index") !=
std::string::npos);
+}
+
+// I1 + L3 + O1: Plain string column with a direct inverted index.
+// Direct hit produces a CollectInfo whose owned_index_meta is null
+// (the meta lives in the schema and is not cloned).
+TEST_F(CollectionStatisticsTest, CollectDirectIndexHitFromSchema) {
+ auto tablet_schema = std::make_shared<TabletSchema>();
+
+ constexpr int32_t kColUid = 1100;
+ TabletColumn col;
+ col.set_unique_id(kColUid);
+ col.set_name("note");
+ col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ tablet_schema->append_column(col);
+
+ TabletIndexPB index_pb;
+ index_pb.set_index_id(2100);
+ index_pb.set_index_name("note_idx");
+ index_pb.set_index_type(IndexType::INVERTED);
+ index_pb.add_col_unique_id(kColUid);
+ auto* props = index_pb.mutable_properties();
+ (*props)["parser"] = "english";
+ (*props)["support_phrase"] = "true";
+ TabletIndex index;
+ index.init_from_pb(index_pb);
+ tablet_schema->append_index(std::move(index));
+
+ constexpr int kSlotId = 60;
+ runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId),
kColUid, "note", {});
+
+ auto match_expr =
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+ auto slot_ref =
std::make_shared<collection_statistics::MockVSlotRef>("note", SlotId(kSlotId));
+ auto literal =
std::make_shared<collection_statistics::MockVLiteral>("hello world");
+ match_expr->_children.push_back(slot_ref);
+ match_expr->_children.push_back(literal);
+
+ MatchPredicateCollector collector;
+ std::unordered_map<std::wstring, CollectInfo> collect_infos;
+ auto status =
+ collector.collect(runtime_state_.get(), tablet_schema, match_expr,
&collect_infos);
+ ASSERT_TRUE(status.ok()) << status.msg();
+ ASSERT_EQ(collect_infos.size(), 1u);
+ auto it =
collect_infos.find(StringHelper::to_wstring(std::to_string(kColUid)));
+ ASSERT_NE(it, collect_infos.end());
+ EXPECT_NE(it->second.index_meta, nullptr);
+ EXPECT_EQ(it->second.owned_index_meta, nullptr); // O1: schema-direct meta
is not owned
+ EXPECT_FALSE(it->second.term_infos.empty());
+}
+
+// I2: Plain string column with no index and not an extracted variant
+// sub-column. Fallback path does not apply (column.is_extracted_column()
+// is false). In BE_TEST builds the empty-index check is skipped, so
+// collect returns OK with no CollectInfo emitted.
+TEST_F(CollectionStatisticsTest, CollectNotExtractedColumnSkipsFallback) {
+ auto tablet_schema = std::make_shared<TabletSchema>();
+
+ constexpr int32_t kColUid = 1200;
+ TabletColumn col;
+ col.set_unique_id(kColUid);
+ col.set_name("plain");
+ col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ tablet_schema->append_column(col);
+ // no index appended
+
+ constexpr int kSlotId = 70;
+ runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId),
kColUid, "plain", {});
+
+ auto match_expr =
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+ auto slot_ref =
std::make_shared<collection_statistics::MockVSlotRef>("plain", SlotId(kSlotId));
+ auto literal = std::make_shared<collection_statistics::MockVLiteral>("v");
+ match_expr->_children.push_back(slot_ref);
+ match_expr->_children.push_back(literal);
+
+ MatchPredicateCollector collector;
+ std::unordered_map<std::wstring, CollectInfo> collect_infos;
+ auto status =
+ collector.collect(runtime_state_.get(), tablet_schema, match_expr,
&collect_infos);
+ ASSERT_TRUE(status.ok()) << status.msg();
+ EXPECT_TRUE(collect_infos.empty());
+}
+
+// L1: Index whose properties do not request an analyzer
+// (should_analyzer returns false). The matching index_meta is iterated
+// but skipped before insertion.
+TEST_F(CollectionStatisticsTest, CollectSkipsIndexWithoutAnalyzer) {
+ auto tablet_schema = std::make_shared<TabletSchema>();
+
+ constexpr int32_t kColUid = 1300;
+ TabletColumn col;
+ col.set_unique_id(kColUid);
+ col.set_name("kw");
+ col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ tablet_schema->append_column(col);
+
+ TabletIndexPB index_pb;
+ index_pb.set_index_id(2300);
+ index_pb.set_index_name("kw_idx");
+ index_pb.set_index_type(IndexType::INVERTED);
+ index_pb.add_col_unique_id(kColUid);
+ // No "parser" property -> should_analyzer returns false
+ TabletIndex index;
+ index.init_from_pb(index_pb);
+ tablet_schema->append_index(std::move(index));
+
+ constexpr int kSlotId = 80;
+ runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId),
kColUid, "kw", {});
+
+ auto match_expr =
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+ auto slot_ref =
std::make_shared<collection_statistics::MockVSlotRef>("kw", SlotId(kSlotId));
+ auto literal = std::make_shared<collection_statistics::MockVLiteral>("v");
+ match_expr->_children.push_back(slot_ref);
+ match_expr->_children.push_back(literal);
+
+ MatchPredicateCollector collector;
+ std::unordered_map<std::wstring, CollectInfo> collect_infos;
+ auto status =
+ collector.collect(runtime_state_.get(), tablet_schema, match_expr,
&collect_infos);
+ ASSERT_TRUE(status.ok()) << status.msg();
+ EXPECT_TRUE(collect_infos.empty());
+}
+
+// L2: Index whose analyzer is set (should_analyzer returns true) but does
+// not declare "support_phrase=true". MockVExpr drives MATCH_PHRASE opcode,
+// so is_need_similarity_score returns false and the index is skipped.
+TEST_F(CollectionStatisticsTest, CollectSkipsIndexWithoutSimilarityScore) {
+ auto tablet_schema = std::make_shared<TabletSchema>();
+
+ constexpr int32_t kColUid = 1350;
+ TabletColumn col;
+ col.set_unique_id(kColUid);
+ col.set_name("body");
+ col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ tablet_schema->append_column(col);
+
+ TabletIndexPB index_pb;
+ index_pb.set_index_id(2350);
+ index_pb.set_index_name("body_idx");
+ index_pb.set_index_type(IndexType::INVERTED);
+ index_pb.add_col_unique_id(kColUid);
+ auto* props = index_pb.mutable_properties();
+ (*props)["parser"] = "english"; // should_analyzer == true
+ // Intentionally omit "support_phrase" -> is_need_similarity_score == false
+ TabletIndex index;
+ index.init_from_pb(index_pb);
+ tablet_schema->append_index(std::move(index));
+
+ constexpr int kSlotId = 85;
+ runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId),
kColUid, "body", {});
+
+ auto match_expr =
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+ auto slot_ref =
std::make_shared<collection_statistics::MockVSlotRef>("body", SlotId(kSlotId));
+ auto literal =
std::make_shared<collection_statistics::MockVLiteral>("hello");
+ match_expr->_children.push_back(slot_ref);
+ match_expr->_children.push_back(literal);
+
+ MatchPredicateCollector collector;
+ std::unordered_map<std::wstring, CollectInfo> collect_infos;
+ auto status =
+ collector.collect(runtime_state_.get(), tablet_schema, match_expr,
&collect_infos);
+ ASSERT_TRUE(status.ok()) << status.msg();
+ EXPECT_TRUE(collect_infos.empty());
+}
+
+// L4: Two MATCH predicates on the same column produce CollectInfo entries
+// keyed on the same field_name; the second insertion merges term_infos
+// into the first entry.
+TEST_F(CollectionStatisticsTest, CollectMergesTermsForSameFieldName) {
+ auto tablet_schema = std::make_shared<TabletSchema>();
+
+ constexpr int32_t kColUid = 1400;
+ TabletColumn col;
+ col.set_unique_id(kColUid);
+ col.set_name("doc");
+ col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ tablet_schema->append_column(col);
+
+ TabletIndexPB index_pb;
+ index_pb.set_index_id(2400);
+ index_pb.set_index_name("doc_idx");
+ index_pb.set_index_type(IndexType::INVERTED);
+ index_pb.add_col_unique_id(kColUid);
+ auto* props = index_pb.mutable_properties();
+ (*props)["parser"] = "english";
+ (*props)["support_phrase"] = "true";
+ TabletIndex index;
+ index.init_from_pb(index_pb);
+ tablet_schema->append_index(std::move(index));
+
+ constexpr int kSlotId = 90;
+ runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId),
kColUid, "doc", {});
+
+ auto build_match = [&](const std::string& term) {
+ auto m =
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+ auto s = std::make_shared<collection_statistics::MockVSlotRef>("doc",
SlotId(kSlotId));
+ auto l = std::make_shared<collection_statistics::MockVLiteral>(term);
+ m->_children.push_back(s);
+ m->_children.push_back(l);
+ return m;
+ };
+
+ MatchPredicateCollector collector;
+ std::unordered_map<std::wstring, CollectInfo> collect_infos;
+ auto first = collector.collect(runtime_state_.get(), tablet_schema,
build_match("alpha"),
+ &collect_infos);
+ ASSERT_TRUE(first.ok()) << first.msg();
+ auto second = collector.collect(runtime_state_.get(), tablet_schema,
build_match("beta"),
+ &collect_infos);
+ ASSERT_TRUE(second.ok()) << second.msg();
+ ASSERT_EQ(collect_infos.size(), 1u);
+ auto it =
collect_infos.find(StringHelper::to_wstring(std::to_string(kColUid)));
+ ASSERT_NE(it, collect_infos.end());
+ EXPECT_GE(it->second.term_infos.size(), 2u); // both "alpha" and "beta"
present
+}
+
+// Test-only subclass that exposes the protected helpers of PredicateCollector.
+class TestablePredicateCollector : public MatchPredicateCollector {
+public:
+ using MatchPredicateCollector::build_field_name;
+ using MatchPredicateCollector::find_slot_ref;
+};
+
+// find_slot_ref: null shared_ptr returns nullptr (early-return branch).
+TEST_F(CollectionStatisticsTest, FindSlotRefHandlesNullExpr) {
+ TestablePredicateCollector collector;
+ VExprSPtr null_expr;
+ EXPECT_EQ(collector.find_slot_ref(null_expr), nullptr);
+}
+
+// find_slot_ref: when expr is a non-CAST wrapper containing a SLOT_REF in its
+// children, the recursive descent finds the slot via the for-loop body.
+TEST_F(CollectionStatisticsTest, FindSlotRefRecursesIntoChildren) {
+ TestablePredicateCollector collector;
+ auto wrapper =
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::FUNCTION_CALL);
+ auto slot_ref = std::make_shared<collection_statistics::MockVSlotRef>("c",
SlotId(99));
+ wrapper->_children.push_back(slot_ref);
+ EXPECT_EQ(collector.find_slot_ref(wrapper), slot_ref.get());
+}
+
+// find_slot_ref: leaf non-slot (no children) returns nullptr after for-loop.
+TEST_F(CollectionStatisticsTest, FindSlotRefReturnsNullForLeafNonSlot) {
+ TestablePredicateCollector collector;
+ auto literal = std::make_shared<collection_statistics::MockVLiteral>("x");
+ EXPECT_EQ(collector.find_slot_ref(literal), nullptr);
+}
+
+// build_field_name: non-empty suffix is appended with a dot separator.
+TEST_F(CollectionStatisticsTest, BuildFieldNameWithSuffix) {
+ TestablePredicateCollector collector;
+ EXPECT_EQ(collector.build_field_name(42, "a.b"), "42.a.b");
+}
+
+// build_field_name: empty suffix returns just the unique id as string.
+TEST_F(CollectionStatisticsTest, BuildFieldNameWithoutSuffix) {
+ TestablePredicateCollector collector;
+ EXPECT_EQ(collector.build_field_name(42, ""), "42");
+}
+
TEST(TermInfoComparerTest, OrdersByTermAndDedups) {
using doris::TermInfoComparer;
using doris::segment_v2::TermInfo;
@@ -651,4 +1307,4 @@ TEST(TermInfoComparerTest, OrdersByTermAndDedups) {
EXPECT_THAT(ordered, ::testing::ElementsAre("apple", "banana", "cherry"));
}
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/test/testutil/mock/mock_descriptors.h
b/be/test/testutil/mock/mock_descriptors.h
index 4fec22bf7a1..cb8833cf8d8 100644
--- a/be/test/testutil/mock/mock_descriptors.h
+++ b/be/test/testutil/mock/mock_descriptors.h
@@ -20,6 +20,8 @@
#include <gmock/gmock-function-mocker.h>
#include <gmock/gmock.h>
+#include <memory>
+#include <unordered_map>
#include <vector>
#include "core/data_type/data_type.h"
@@ -106,13 +108,41 @@ public:
_slot_descriptors[slot_id] = std::move(slot_desc);
}
+ void add_slot_descriptor(SlotId slot_id, int32_t col_unique_id, const
std::string& col_name,
+ const std::vector<std::string>& column_paths) {
+ TTypeNode type_node;
+ type_node.__set_type(TTypeNodeType::SCALAR);
+ TScalarType scalar_type;
+ scalar_type.__set_type(TPrimitiveType::STRING);
+ type_node.__set_scalar_type(scalar_type);
+ TTypeDesc type_desc;
+ type_desc.types.push_back(type_node);
+
+ TSlotDescriptor slot_desc;
+ slot_desc.__set_id(slot_id);
+ slot_desc.__set_parent(0);
+ slot_desc.__set_slotType(type_desc);
+ slot_desc.__set_columnPos(0);
+ slot_desc.__set_byteOffset(0);
+ slot_desc.__set_nullIndicatorByte(0);
+ slot_desc.__set_nullIndicatorBit(-1);
+ slot_desc.__set_colName(col_name);
+ slot_desc.__set_slotIdx(0);
+ slot_desc.__set_isMaterialized(true);
+ slot_desc.__set_col_unique_id(col_unique_id);
+ slot_desc.__set_is_key(false);
+ slot_desc.__set_column_paths(column_paths);
+ slot_desc.__set_primitive_type(TPrimitiveType::STRING);
+ _slot_descriptors[slot_id] =
std::make_unique<SlotDescriptor>(slot_desc);
+ }
+
SlotDescriptor* get_slot_descriptor(SlotId id) const override {
auto it = _slot_descriptors.find(id);
return it != _slot_descriptors.end() ? it->second.get() : nullptr;
}
private:
- mutable std::unordered_map<SlotId, std::unique_ptr<MockSlopDescriptor>>
_slot_descriptors;
+ mutable std::unordered_map<SlotId, std::unique_ptr<SlotDescriptor>>
_slot_descriptors;
};
} // namespace doris
\ No newline at end of file
diff --git a/regression-test/suites/inverted_index_p0/test_bm25_score.groovy
b/regression-test/suites/inverted_index_p0/test_bm25_score.groovy
index 2686011e89e..3a8ad125dc5 100644
--- a/regression-test/suites/inverted_index_p0/test_bm25_score.groovy
+++ b/regression-test/suites/inverted_index_p0/test_bm25_score.groovy
@@ -226,6 +226,53 @@ suite("test_bm25_score", "p0") {
} finally {
}
+ try {
+ sql """ set enable_common_expr_pushdown = true; """
+ sql """ set enable_match_without_inverted_index = false; """
+ sql """ set default_variant_enable_typed_paths_to_sparse = false;
"""
+ sql """ set default_variant_enable_doc_mode = false; """
+
+ sql "DROP TABLE IF EXISTS test_variant_field_pattern_score"
+ sql """
+ CREATE TABLE test_variant_field_pattern_score (
+ id INT,
+ meta VARIANT<MATCH_NAME_GLOB 'user.*':text,
PROPERTIES("variant_max_subcolumns_count"="0")>,
+ INDEX idx_meta_user(meta) USING INVERTED PROPERTIES(
+ "parser"="english",
+ "support_phrase"="true",
+ "field_pattern"="user.*"
+ )
+ ) ENGINE=OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "disable_auto_compaction" = "true"
+ )
+ """
+
+ sql """ insert into test_variant_field_pattern_score values(3,
'{"other": "alice"}'); """
+ sql """ sync """
+ sql """
+ insert into test_variant_field_pattern_score values
+ (1, '{"user": {"name": "alice alpha"}}'),
+ (2, '{"user": {"name": "bob beta"}}');
+ """
+ sql """ sync """
+
+ def res = sql """
+ select id, score() as score
+ from test_variant_field_pattern_score
+ where cast(meta["user"]["name"] as string) match_phrase "alice"
+ order by score() desc
+ limit 10;
+ """
+ assertEquals(1, res.size())
+ assertEquals(1, res[0][0] as int)
+ assertTrue(Double.parseDouble(res[0][1].toString()) > 0.0)
+ } finally {
+ }
+
try {
sql "DROP TABLE IF EXISTS t2"
sql """ create table t2(a int, b int, s text) unique key(a)
DISTRIBUTED BY HASH(a) buckets 1 PROPERTIES ("replication_allocation" =
"tag.location.default: 1"); """
@@ -247,4 +294,4 @@ suite("test_bm25_score", "p0") {
} finally {
}
}
-}
\ No newline at end of file
+}
diff --git
a/regression-test/suites/inverted_index_p0/test_bm25_score_variant.groovy
b/regression-test/suites/inverted_index_p0/test_bm25_score_variant.groovy
new file mode 100644
index 00000000000..885d311bdfc
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_bm25_score_variant.groovy
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_bm25_score_variant", "p0") {
+ if (isCloudMode()) {
+ return
+ }
+
+ sql """ set enable_common_expr_pushdown = true """
+ sql """ set enable_match_without_inverted_index = false """
+ sql """ set default_variant_enable_typed_paths_to_sparse = false """
+ sql """ set default_variant_enable_doc_mode = false """
+
+ // A1: field_pattern exact name (MATCH_NAME)
+ try {
+ sql "DROP TABLE IF EXISTS test_bm25_score_variant_a1"
+ sql """
+ CREATE TABLE test_bm25_score_variant_a1 (
+ id INT,
+ v variant<
+ MATCH_NAME 'host' : text,
+ PROPERTIES("variant_max_subcolumns_count"="0")
+ >,
+ INDEX idx_v_host (v) USING INVERTED PROPERTIES(
+ "parser"="english",
+ "support_phrase"="true",
+ "field_pattern"="host"
+ )
+ ) ENGINE=OLAP DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "disable_auto_compaction" = "true"
+ )
+ """
+ sql """ insert into test_bm25_score_variant_a1 values
+ (1, '{"host":"alpha database server"}'),
+ (2, '{"host":"beta server cluster"}'),
+ (3, '{"other":"alpha"}')
+ """
+ sql " sync "
+
+ def res = sql """
+ select id, score() as score
+ from test_bm25_score_variant_a1
+ where cast(v["host"] as string) match_phrase "alpha"
+ order by score() desc
+ limit 10
+ """
+ assertEquals(1, res.size())
+ assertEquals(1, res[0][0] as int)
+ assertTrue(Double.parseDouble(res[0][1].toString()) > 0.0)
+ } finally {
+ }
+
+ // C: plain parent inverted index (baseline; not the fallback path)
+ try {
+ sql "DROP TABLE IF EXISTS test_bm25_score_variant_c"
+ sql """
+ CREATE TABLE test_bm25_score_variant_c (
+ id INT,
+ v VARIANT,
+ INDEX idx_v_plain (v) USING INVERTED PROPERTIES(
+ "parser"="english",
+ "support_phrase"="true"
+ )
+ ) ENGINE=OLAP DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "disable_auto_compaction" = "true"
+ )
+ """
+ sql """ insert into test_bm25_score_variant_c values
+ (1, '{"note":"latency spike at noon"}'),
+ (2, '{"note":"all green"}')
+ """
+ sql " sync "
+
+ def res = sql """
+ select id, score() as score
+ from test_bm25_score_variant_c
+ where cast(v["note"] as string) match_phrase "latency"
+ order by score() desc
+ limit 10
+ """
+ assertEquals(1, res.size())
+ assertEquals(1, res[0][0] as int)
+ assertTrue(Double.parseDouble(res[0][1].toString()) > 0.0)
+ } finally {
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]