This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 82a613a1279 [fix](inverted-index) Fix boolean query AllScorer
combination handling (#60438)
82a613a1279 is described below
commit 82a613a1279c722e8bd3c2a8e58f98945e7d8e40
Author: zzzxl <[email protected]>
AuthorDate: Tue Feb 3 10:26:25 2026 +0800
[fix](inverted-index) Fix boolean query AllScorer combination handling
(#60438)
https://github.com/apache/doris/pull/60150
https://github.com/apache/doris/pull/60237
---
.../boolean_query/occur_boolean_weight.cpp | 97 ++++--
.../query_v2/boolean_query/occur_boolean_weight.h | 12 +-
.../query_v2/regexp_query/regexp_query.h | 10 +-
.../query_v2/occur_boolean_query_test.cpp | 345 +++++++++++++++++++++
.../inverted_index/query_v2/regexp_query_test.cpp | 175 +++++++++++
5 files changed, 608 insertions(+), 31 deletions(-)
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
index 844d578338c..e92a32fbe94 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
+++
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
@@ -96,10 +96,6 @@ template <typename CombinerT>
std::optional<CombinationMethod>
OccurBooleanWeight<ScoreCombinerPtrT>::build_should_opt(
std::vector<ScorerPtr>& must_scorers, std::vector<ScorerPtr>
should_scorers,
CombinerT combiner, size_t num_all_scorers) {
- if (should_scorers.empty()) {
- return Ignored {};
- }
-
size_t adjusted_minimum = _minimum_number_should_match > num_all_scorers
? _minimum_number_should_match -
num_all_scorers
: 0;
@@ -109,12 +105,16 @@ std::optional<CombinationMethod>
OccurBooleanWeight<ScoreCombinerPtrT>::build_sh
return std::nullopt;
}
- if (adjusted_minimum == 0) {
+ if (adjusted_minimum == 0 && num_of_should_scorers == 0) {
+ return Ignored {};
+ } else if (adjusted_minimum == 0) {
return Optional {scorer_union(std::move(should_scorers), combiner)};
} else if (adjusted_minimum == 1) {
return Required {scorer_union(std::move(should_scorers), combiner)};
} else if (adjusted_minimum == num_of_should_scorers) {
- must_scorers.swap(should_scorers);
+ for (auto& scorer : should_scorers) {
+ must_scorers.push_back(std::move(scorer));
+ }
return Ignored {};
} else {
return Required {scorer_disjunction(std::move(should_scorers),
combiner, adjusted_minimum)};
@@ -132,43 +132,83 @@ ScorerPtr
OccurBooleanWeight<ScoreCombinerPtrT>::build_exclude_opt(
return into_box_scorer(std::move(specialized_scorer), do_nothing);
}
+template <typename ScoreCombinerPtrT>
+ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::effective_must_scorer(
+ std::vector<ScorerPtr> must_scorers, size_t must_num_all_scorers) {
+ if (must_scorers.empty()) {
+ if (must_num_all_scorers > 0) {
+ return std::make_shared<AllScorer>(_max_doc);
+ }
+ return nullptr;
+ }
+ return make_intersect_scorers(std::move(must_scorers), _max_doc);
+}
+
+template <typename ScoreCombinerPtrT>
+template <typename CombinerT>
+SpecializedScorer
OccurBooleanWeight<ScoreCombinerPtrT>::effective_should_scorer_for_union(
+ SpecializedScorer should_scorer, size_t should_num_all_scorers,
CombinerT combiner) {
+ if (should_num_all_scorers > 0) {
+ if (_enable_scoring) {
+ std::vector<ScorerPtr> scorers;
+ scorers.push_back(into_box_scorer(std::move(should_scorer),
combiner));
+ scorers.push_back(std::make_shared<AllScorer>(_max_doc));
+ return make_buffered_union(std::move(scorers), combiner);
+ } else {
+ return std::make_shared<AllScorer>(_max_doc);
+ }
+ }
+ return should_scorer;
+}
+
template <typename ScoreCombinerPtrT>
template <typename CombinerT>
SpecializedScorer OccurBooleanWeight<ScoreCombinerPtrT>::build_positive_opt(
CombinationMethod& should_opt, std::vector<ScorerPtr> must_scorers,
CombinerT combiner,
- size_t num_all_scorers) {
- const bool has_must = !must_scorers.empty();
+ const AllAndEmptyScorerCounts& must_special_counts,
+ const AllAndEmptyScorerCounts& should_special_counts) {
+ size_t num_all_scorers =
+ must_special_counts.num_all_scorers +
should_special_counts.num_all_scorers;
if (std::holds_alternative<Ignored>(should_opt)) {
- if (has_must) {
- return make_intersect_scorers(std::move(must_scorers), _max_doc);
- }
- if (num_all_scorers > 0) {
- return std::make_shared<AllScorer>(_max_doc);
+ ScorerPtr must_scorer = effective_must_scorer(std::move(must_scorers),
num_all_scorers);
+ if (must_scorer) {
+ return must_scorer;
}
return std::make_shared<EmptyScorer>();
}
if (std::holds_alternative<Optional>(should_opt)) {
auto& opt = std::get<Optional>(should_opt);
- if (has_must) {
- auto must_scorer = make_intersect_scorers(std::move(must_scorers),
_max_doc);
- if (_enable_scoring) {
- auto should_boxed = into_box_scorer(std::move(opt.scorer),
combiner);
- return make_required_optional_scorer(must_scorer,
should_boxed, combiner);
- } else {
- return must_scorer;
- }
+ ScorerPtr must_scorer =
+ effective_must_scorer(std::move(must_scorers),
must_special_counts.num_all_scorers);
+
+ if (!must_scorer) {
+ return effective_should_scorer_for_union(
+ std::move(opt.scorer),
should_special_counts.num_all_scorers, combiner);
+ }
+
+ if (_enable_scoring) {
+ auto should_boxed = into_box_scorer(std::move(opt.scorer),
combiner);
+ return make_required_optional_scorer(must_scorer, should_boxed,
combiner);
+ } else {
+ return must_scorer;
}
- return opt.scorer;
}
if (std::holds_alternative<Required>(should_opt)) {
auto& req = std::get<Required>(should_opt);
- if (has_must) {
- must_scorers.push_back(into_box_scorer(std::move(req.scorer),
combiner));
- return make_intersect_scorers(std::move(must_scorers), _max_doc);
+ ScorerPtr must_scorer =
+ effective_must_scorer(std::move(must_scorers),
must_special_counts.num_all_scorers);
+
+ if (!must_scorer) {
+ return req.scorer;
}
- return req.scorer;
+
+ auto should_boxed = into_box_scorer(std::move(req.scorer), combiner);
+ std::vector<ScorerPtr> scorers;
+ scorers.push_back(std::move(must_scorer));
+ scorers.push_back(std::move(should_boxed));
+ return make_intersect_scorers(std::move(scorers), _max_doc);
}
return std::make_shared<EmptyScorer>();
@@ -202,10 +242,9 @@ SpecializedScorer
OccurBooleanWeight<ScoreCombinerPtrT>::complex_scorer(
}
ScorerPtr exclude_opt = build_exclude_opt(std::move(must_not_scorers));
- size_t total_all_scorers =
- must_special_counts.num_all_scorers +
should_special_counts.num_all_scorers;
SpecializedScorer positive_opt =
- build_positive_opt(*should_opt, std::move(must_scorers), combiner,
total_all_scorers);
+ build_positive_opt(*should_opt, std::move(must_scorers), combiner,
must_special_counts,
+ should_special_counts);
if (exclude_opt) {
ScorerPtr positive_boxed = into_box_scorer(std::move(positive_opt),
combiner);
return make_exclude(std::move(positive_boxed), std::move(exclude_opt));
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h
index b1437777974..0daff6a1117 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h
+++
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h
@@ -63,10 +63,20 @@ private:
std::vector<ScorerPtr>
should_scorers,
CombinerT combiner,
size_t num_all_scorers);
ScorerPtr build_exclude_opt(std::vector<ScorerPtr> must_not_scorers);
+
+ ScorerPtr effective_must_scorer(std::vector<ScorerPtr> must_scorers,
+ size_t must_num_all_scorers);
+
+ template <typename CombinerT>
+ SpecializedScorer effective_should_scorer_for_union(SpecializedScorer
should_scorer,
+ size_t
should_num_all_scorers,
+ CombinerT combiner);
+
template <typename CombinerT>
SpecializedScorer build_positive_opt(CombinationMethod& should_opt,
std::vector<ScorerPtr> must_scorers,
CombinerT combiner,
- size_t num_all_scorers = 0);
+ const AllAndEmptyScorerCounts&
must_special_counts,
+ const AllAndEmptyScorerCounts&
should_special_counts);
template <typename CombinerT>
SpecializedScorer scorer_union(std::vector<ScorerPtr> scorers, CombinerT
combiner);
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h
index e5075511c67..1101412cb75 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h
+++
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h
@@ -32,11 +32,19 @@ public:
~RegexpQuery() override = default;
WeightPtr weight(bool enable_scoring) override {
+ auto pattern = make_exact_match(_pattern);
return std::make_shared<RegexpWeight>(std::move(_context),
std::move(_field),
- std::move(_pattern),
enable_scoring, _nullable);
+ std::move(pattern),
enable_scoring, _nullable);
}
private:
+ static std::string make_exact_match(const std::string& pattern) {
+ if (!pattern.empty() && pattern.front() == '^' && pattern.back() ==
'$') {
+ return pattern;
+ }
+ return "^(" + pattern + ")$";
+ }
+
IndexQueryContextPtr _context;
std::wstring _field;
diff --git
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
index bcffa0d7082..244ddfb8dcc 100644
---
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
+++
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
@@ -25,6 +25,7 @@
#include <set>
#include <vector>
+#include "olap/rowset/segment_v2/inverted_index/query_v2/all_query/all_query.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur.h"
#include
"olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/query.h"
@@ -704,4 +705,348 @@ TEST_F(OccurBooleanQueryTest, OnlyMustNotClausesEmpty) {
EXPECT_EQ(scorer->doc(), TERMINATED);
}
+TEST_F(OccurBooleanQueryTest,
MinimumShouldMatchExceedsShouldClausesReturnsEmpty) {
+ {
+ auto must_docs1 = generate_range_docs(0, 100);
+ auto must_docs2 = generate_range_docs(50, 150);
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::MUST,
std::make_shared<MockQuery>(must_docs1));
+ clauses.emplace_back(Occur::MUST,
std::make_shared<MockQuery>(must_docs2));
+
+ OccurBooleanQuery query(std::move(clauses), 2);
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+
+ EXPECT_EQ(scorer->doc(), TERMINATED);
+ }
+
+ {
+ auto must_docs = generate_range_docs(0, 100);
+ auto should_docs = generate_range_docs(0, 100);
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::MUST,
std::make_shared<MockQuery>(must_docs));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should_docs));
+
+ OccurBooleanQuery query(std::move(clauses), 2);
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+
+ EXPECT_EQ(scorer->doc(), TERMINATED);
+ }
+
+ {
+ auto should_docs1 = generate_range_docs(0, 100);
+ auto should_docs2 = generate_range_docs(50, 150);
+ auto expected = set_intersection(should_docs1, should_docs2);
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should_docs1));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should_docs2));
+
+ OccurBooleanQuery query(std::move(clauses), 2);
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+ auto result = collect_docs(scorer);
+
+ EXPECT_EQ(result.size(), expected.size());
+ EXPECT_EQ(to_set(result), to_set(expected));
+ }
+
+ {
+ auto must_docs = generate_range_docs(0, 100);
+ auto must_not_docs = generate_range_docs(50, 150);
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::MUST,
std::make_shared<MockQuery>(must_docs));
+ clauses.emplace_back(Occur::MUST_NOT,
std::make_shared<MockQuery>(must_not_docs));
+
+ OccurBooleanQuery query(std::move(clauses), 2);
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+
+ EXPECT_EQ(scorer->doc(), TERMINATED);
+ }
+}
+
+TEST_F(OccurBooleanQueryTest,
MinimumShouldMatchZeroWithNoShouldClausesReturnsIgnored) {
+ auto must_docs1 = generate_range_docs(0, 100);
+ auto must_docs2 = generate_range_docs(50, 150);
+ auto expected = set_intersection(must_docs1, must_docs2);
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs1));
+ clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs2));
+
+ OccurBooleanQuery query(std::move(clauses), 0);
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+ auto result = collect_docs(scorer);
+
+ EXPECT_EQ(result, expected);
+}
+
+TEST_F(OccurBooleanQueryTest, MinimumShouldMatchEqualsNumShouldWithMustClause)
{
+ auto must_docs = std::vector<uint32_t> {10, 20};
+ auto should1_docs = std::vector<uint32_t> {10, 20, 30, 100};
+ auto should2_docs = std::vector<uint32_t> {10, 20, 30, 200};
+ auto expected = std::vector<uint32_t> {10, 20};
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should1_docs));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should2_docs));
+
+ OccurBooleanQuery query(std::move(clauses), 2);
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+ auto result = collect_docs(scorer);
+
+ EXPECT_EQ(result, expected);
+}
+
+TEST_F(OccurBooleanQueryTest,
MinimumShouldMatchEqualsNumShouldWithMultipleMustClauses) {
+ auto must1_docs = std::vector<uint32_t> {10, 20, 30, 40, 50};
+ auto must2_docs = std::vector<uint32_t> {10, 20, 30, 60, 70};
+ auto should1_docs = std::vector<uint32_t> {10, 20, 30, 100};
+ auto should2_docs = std::vector<uint32_t> {10, 20, 30, 200};
+ auto should3_docs = std::vector<uint32_t> {10, 20, 30, 300};
+ auto expected = std::vector<uint32_t> {10, 20, 30};
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must1_docs));
+ clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must2_docs));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should1_docs));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should2_docs));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should3_docs));
+
+ OccurBooleanQuery query(std::move(clauses), 3);
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+ auto result = collect_docs(scorer);
+
+ EXPECT_EQ(result, expected);
+}
+
+TEST_F(OccurBooleanQueryTest, MinimumShouldMatchEqualsNumShouldOnlyShould) {
+ auto should1_docs = std::vector<uint32_t> {10, 20, 30, 40};
+ auto should2_docs = std::vector<uint32_t> {20, 30, 40, 50};
+ auto should3_docs = std::vector<uint32_t> {30, 40, 50, 60};
+ auto expected = std::vector<uint32_t> {30, 40};
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should1_docs));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should2_docs));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should3_docs));
+
+ OccurBooleanQuery query(std::move(clauses), 3);
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+ auto result = collect_docs(scorer);
+
+ EXPECT_EQ(result, expected);
+}
+
+TEST_F(OccurBooleanQueryTest, MinimumShouldMatchEqualsNumShouldWithMustNot) {
+ auto must_docs = std::vector<uint32_t> {10, 20, 30, 40, 50};
+ auto should1_docs = std::vector<uint32_t> {10, 20, 30, 100};
+ auto should2_docs = std::vector<uint32_t> {10, 20, 30, 200};
+ auto must_not_docs = std::vector<uint32_t> {20, 100, 200};
+ auto expected = std::vector<uint32_t> {10, 30};
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should1_docs));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should2_docs));
+ clauses.emplace_back(Occur::MUST_NOT,
std::make_shared<MockQuery>(must_not_docs));
+
+ OccurBooleanQuery query(std::move(clauses), 2);
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+ auto result = collect_docs(scorer);
+
+ EXPECT_EQ(result, expected);
+}
+
+TEST_F(OccurBooleanQueryTest, AllQueryWithMustClause) {
+ _ctx.segment_num_rows = 100;
+
+ auto must_docs = std::vector<uint32_t> {10, 20, 30, 40, 50};
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs));
+ clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
+
+ OccurBooleanQuery query(std::move(clauses));
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+ auto result = collect_docs(scorer);
+
+ EXPECT_EQ(result, must_docs);
+}
+
+TEST_F(OccurBooleanQueryTest, AllQueryWithShouldClause) {
+ _ctx.segment_num_rows = 50;
+
+ auto should_docs = std::vector<uint32_t> {10, 20, 30};
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should_docs));
+ clauses.emplace_back(Occur::SHOULD, std::make_shared<AllQuery>(50));
+
+ OccurBooleanQuery query(std::move(clauses));
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+ auto result = collect_docs(scorer);
+
+ EXPECT_EQ(result.size(), 50);
+ EXPECT_EQ(result.front(), 0);
+ EXPECT_EQ(result.back(), 49);
+}
+
+TEST_F(OccurBooleanQueryTest, AllQueryWithMustNotClause) {
+ _ctx.segment_num_rows = 100;
+
+ auto must_not_docs = std::vector<uint32_t> {10, 20, 30, 40, 50};
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
+ clauses.emplace_back(Occur::MUST_NOT,
std::make_shared<MockQuery>(must_not_docs));
+
+ OccurBooleanQuery query(std::move(clauses));
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+ auto result = collect_docs(scorer);
+
+ EXPECT_EQ(result.size(), 95);
+ for (uint32_t doc : must_not_docs) {
+ EXPECT_TRUE(std::find(result.begin(), result.end(), doc) ==
result.end());
+ }
+}
+
+TEST_F(OccurBooleanQueryTest, MultipleAllQueriesWithMust) {
+ _ctx.segment_num_rows = 100;
+
+ auto must_docs = std::vector<uint32_t> {5, 15, 25, 35, 45};
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs));
+ clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
+ clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
+
+ OccurBooleanQuery query(std::move(clauses));
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+ auto result = collect_docs(scorer);
+
+ EXPECT_EQ(result, must_docs);
+}
+
+TEST_F(OccurBooleanQueryTest, AllQueryOnlyMust) {
+ _ctx.segment_num_rows = 50;
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(50));
+
+ OccurBooleanQuery query(std::move(clauses));
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+ auto result = collect_docs(scorer);
+
+ EXPECT_EQ(result.size(), 50);
+ for (uint32_t i = 0; i < 50; ++i) {
+ EXPECT_EQ(result[i], i);
+ }
+}
+
+TEST_F(OccurBooleanQueryTest, AllQueryWithMustAndShouldMinMatch) {
+ _ctx.segment_num_rows = 100;
+
+ auto must_docs = std::vector<uint32_t> {10, 20, 30, 40, 50};
+ auto should1_docs = std::vector<uint32_t> {10, 20, 30};
+ auto should2_docs = std::vector<uint32_t> {10, 20, 40};
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs));
+ clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should1_docs));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should2_docs));
+
+ OccurBooleanQuery query(std::move(clauses), 2);
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+ auto result = collect_docs(scorer);
+
+ auto expected = std::vector<uint32_t> {10, 20};
+ EXPECT_EQ(result, expected);
+}
+
+TEST_F(OccurBooleanQueryTest, ScoringWithMinimumShouldMatchEqualsNumShould) {
+ auto must_docs = std::vector<uint32_t> {10, 20, 30};
+ auto should1_docs = std::vector<uint32_t> {10, 20, 30, 100};
+ auto should2_docs = std::vector<uint32_t> {10, 20, 30, 200};
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs,
1.0F));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should1_docs, 2.0F));
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should2_docs, 3.0F));
+
+ OccurBooleanQuery query(std::move(clauses), 2);
+ auto weight = query.weight(true);
+ auto scorer = weight->scorer(_ctx);
+
+ std::vector<uint32_t> result;
+ uint32_t doc = scorer->doc();
+ while (doc != TERMINATED) {
+ result.push_back(doc);
+ float s = scorer->score();
+ EXPECT_FLOAT_EQ(s, 6.0F);
+ doc = scorer->advance();
+ }
+
+ auto expected = std::vector<uint32_t> {10, 20, 30};
+ EXPECT_EQ(result, expected);
+}
+
+TEST_F(OccurBooleanQueryTest, ShouldOnlyWithAllQueryMinShouldMatch) {
+ _ctx.segment_num_rows = 50;
+
+ auto should_docs = std::vector<uint32_t> {10, 20, 30, 40, 45};
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should_docs));
+ clauses.emplace_back(Occur::SHOULD, std::make_shared<AllQuery>(50));
+
+ OccurBooleanQuery query(std::move(clauses), 2);
+ auto weight = query.weight(false);
+ auto scorer = weight->scorer(_ctx);
+ auto result = collect_docs(scorer);
+
+ EXPECT_EQ(result.size(), 5);
+ EXPECT_EQ(result, should_docs);
+}
+
+TEST_F(OccurBooleanQueryTest, ShouldOnlyAllQueryScoring) {
+ _ctx.segment_num_rows = 10;
+
+ std::vector<std::pair<Occur, QueryPtr>> clauses;
+ clauses.emplace_back(Occur::SHOULD,
+ std::make_shared<MockQuery>(std::vector<uint32_t> {1,
2}, 2.0F));
+ clauses.emplace_back(Occur::SHOULD, std::make_shared<AllQuery>(10));
+
+ OccurBooleanQuery query(std::move(clauses));
+ auto weight = query.weight(true);
+ auto scorer = weight->scorer(_ctx);
+
+ uint32_t doc = scorer->doc();
+ while (doc != TERMINATED) {
+ float s = scorer->score();
+ if (doc == 1 || doc == 2) {
+ EXPECT_FLOAT_EQ(s, 3.0F);
+ } else {
+ EXPECT_FLOAT_EQ(s, 1.0F);
+ }
+ doc = scorer->advance();
+ }
+}
+
} // namespace doris::segment_v2::inverted_index::query_v2
diff --git
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query_test.cpp
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query_test.cpp
index 3dd818764e0..4e883edff1d 100644
---
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query_test.cpp
+++
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query_test.cpp
@@ -389,4 +389,179 @@ TEST_F(RegexpQueryV2Test,
test_regexp_query_move_semantics) {
ASSERT_NE(weight2, nullptr);
}
+TEST_F(RegexpQueryV2Test, test_make_exact_match_anchoring) {
+ auto context = std::make_shared<IndexQueryContext>();
+ context->collection_statistics = std::make_shared<CollectionStatistics>();
+ context->collection_similarity = std::make_shared<CollectionSimilarity>();
+
+ auto* dir = FSDirectory::getDirectory(kTestDir.c_str());
+ auto reader_holder =
make_shared_reader(lucene::index::IndexReader::open(dir, true));
+ ASSERT_TRUE(reader_holder != nullptr);
+
+ std::wstring field = StringHelper::to_wstring("content");
+ std::string pattern = "apple123";
+
+ auto query = std::make_shared<query_v2::RegexpQuery>(context, field,
pattern);
+ auto weight = query->weight(false);
+
+ query_v2::QueryExecutionContext exec_ctx;
+ exec_ctx.segment_num_rows = reader_holder->maxDoc();
+ exec_ctx.readers = {reader_holder};
+ exec_ctx.field_reader_bindings.emplace(field, reader_holder);
+
+ auto scorer = weight->scorer(exec_ctx);
+ ASSERT_NE(scorer, nullptr);
+
+ roaring::Roaring result;
+ uint32_t doc = scorer->doc();
+ while (doc != query_v2::TERMINATED) {
+ result.add(doc);
+ doc = scorer->advance();
+ }
+
+ EXPECT_EQ(result.cardinality(), 1);
+
+ _CLDECDELETE(dir);
+}
+
+TEST_F(RegexpQueryV2Test, test_make_exact_match_already_anchored) {
+ auto context = std::make_shared<IndexQueryContext>();
+ context->collection_statistics = std::make_shared<CollectionStatistics>();
+ context->collection_similarity = std::make_shared<CollectionSimilarity>();
+
+ auto* dir = FSDirectory::getDirectory(kTestDir.c_str());
+ auto reader_holder =
make_shared_reader(lucene::index::IndexReader::open(dir, true));
+ ASSERT_TRUE(reader_holder != nullptr);
+
+ std::wstring field = StringHelper::to_wstring("content");
+ std::string pattern = "^apple123$";
+
+ auto query = std::make_shared<query_v2::RegexpQuery>(context, field,
pattern);
+ auto weight = query->weight(false);
+
+ query_v2::QueryExecutionContext exec_ctx;
+ exec_ctx.segment_num_rows = reader_holder->maxDoc();
+ exec_ctx.readers = {reader_holder};
+ exec_ctx.field_reader_bindings.emplace(field, reader_holder);
+
+ auto scorer = weight->scorer(exec_ctx);
+ ASSERT_NE(scorer, nullptr);
+
+ roaring::Roaring result;
+ uint32_t doc = scorer->doc();
+ while (doc != query_v2::TERMINATED) {
+ result.add(doc);
+ doc = scorer->advance();
+ }
+
+ EXPECT_EQ(result.cardinality(), 1);
+
+ _CLDECDELETE(dir);
+}
+
+TEST_F(RegexpQueryV2Test, test_make_exact_match_partial_anchor_start) {
+ auto context = std::make_shared<IndexQueryContext>();
+ context->collection_statistics = std::make_shared<CollectionStatistics>();
+ context->collection_similarity = std::make_shared<CollectionSimilarity>();
+
+ auto* dir = FSDirectory::getDirectory(kTestDir.c_str());
+ auto reader_holder =
make_shared_reader(lucene::index::IndexReader::open(dir, true));
+ ASSERT_TRUE(reader_holder != nullptr);
+
+ std::wstring field = StringHelper::to_wstring("content");
+ std::string pattern = "^apple.*";
+
+ auto query = std::make_shared<query_v2::RegexpQuery>(context, field,
pattern);
+ auto weight = query->weight(false);
+
+ query_v2::QueryExecutionContext exec_ctx;
+ exec_ctx.segment_num_rows = reader_holder->maxDoc();
+ exec_ctx.readers = {reader_holder};
+ exec_ctx.field_reader_bindings.emplace(field, reader_holder);
+
+ auto scorer = weight->scorer(exec_ctx);
+ ASSERT_NE(scorer, nullptr);
+
+ roaring::Roaring result;
+ uint32_t doc = scorer->doc();
+ while (doc != query_v2::TERMINATED) {
+ result.add(doc);
+ doc = scorer->advance();
+ }
+
+ EXPECT_GT(result.cardinality(), 0);
+
+ _CLDECDELETE(dir);
+}
+
+TEST_F(RegexpQueryV2Test, test_make_exact_match_partial_anchor_end) {
+ auto context = std::make_shared<IndexQueryContext>();
+ context->collection_statistics = std::make_shared<CollectionStatistics>();
+ context->collection_similarity = std::make_shared<CollectionSimilarity>();
+
+ auto* dir = FSDirectory::getDirectory(kTestDir.c_str());
+ auto reader_holder =
make_shared_reader(lucene::index::IndexReader::open(dir, true));
+ ASSERT_TRUE(reader_holder != nullptr);
+
+ std::wstring field = StringHelper::to_wstring("content");
+ std::string pattern = ".*123$";
+
+ auto query = std::make_shared<query_v2::RegexpQuery>(context, field,
pattern);
+ auto weight = query->weight(false);
+
+ query_v2::QueryExecutionContext exec_ctx;
+ exec_ctx.segment_num_rows = reader_holder->maxDoc();
+ exec_ctx.readers = {reader_holder};
+ exec_ctx.field_reader_bindings.emplace(field, reader_holder);
+
+ auto scorer = weight->scorer(exec_ctx);
+ ASSERT_NE(scorer, nullptr);
+
+ roaring::Roaring result;
+ uint32_t doc = scorer->doc();
+ while (doc != query_v2::TERMINATED) {
+ result.add(doc);
+ doc = scorer->advance();
+ }
+
+ EXPECT_GT(result.cardinality(), 0);
+
+ _CLDECDELETE(dir);
+}
+
+TEST_F(RegexpQueryV2Test, test_make_exact_match_wildcard_pattern) {
+ auto context = std::make_shared<IndexQueryContext>();
+ context->collection_statistics = std::make_shared<CollectionStatistics>();
+ context->collection_similarity = std::make_shared<CollectionSimilarity>();
+
+ auto* dir = FSDirectory::getDirectory(kTestDir.c_str());
+ auto reader_holder =
make_shared_reader(lucene::index::IndexReader::open(dir, true));
+ ASSERT_TRUE(reader_holder != nullptr);
+
+ std::wstring field = StringHelper::to_wstring("content");
+ std::string pattern = ".*";
+
+ auto query = std::make_shared<query_v2::RegexpQuery>(context, field,
pattern);
+ auto weight = query->weight(false);
+
+ query_v2::QueryExecutionContext exec_ctx;
+ exec_ctx.segment_num_rows = reader_holder->maxDoc();
+ exec_ctx.readers = {reader_holder};
+ exec_ctx.field_reader_bindings.emplace(field, reader_holder);
+
+ auto scorer = weight->scorer(exec_ctx);
+ ASSERT_NE(scorer, nullptr);
+
+ roaring::Roaring result;
+ uint32_t doc = scorer->doc();
+ while (doc != query_v2::TERMINATED) {
+ result.add(doc);
+ doc = scorer->advance();
+ }
+
+ EXPECT_EQ(result.cardinality(), 20);
+
+ _CLDECDELETE(dir);
+}
+
} // namespace doris::segment_v2
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]