This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 82a613a1279 [fix](inverted-index) Fix boolean query AllScorer 
combination handling (#60438)
82a613a1279 is described below

commit 82a613a1279c722e8bd3c2a8e58f98945e7d8e40
Author: zzzxl <[email protected]>
AuthorDate: Tue Feb 3 10:26:25 2026 +0800

    [fix](inverted-index) Fix boolean query AllScorer combination handling 
(#60438)
    
    https://github.com/apache/doris/pull/60150
    https://github.com/apache/doris/pull/60237
---
 .../boolean_query/occur_boolean_weight.cpp         |  97 ++++--
 .../query_v2/boolean_query/occur_boolean_weight.h  |  12 +-
 .../query_v2/regexp_query/regexp_query.h           |  10 +-
 .../query_v2/occur_boolean_query_test.cpp          | 345 +++++++++++++++++++++
 .../inverted_index/query_v2/regexp_query_test.cpp  | 175 +++++++++++
 5 files changed, 608 insertions(+), 31 deletions(-)

diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
index 844d578338c..e92a32fbe94 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
@@ -96,10 +96,6 @@ template <typename CombinerT>
 std::optional<CombinationMethod> 
OccurBooleanWeight<ScoreCombinerPtrT>::build_should_opt(
         std::vector<ScorerPtr>& must_scorers, std::vector<ScorerPtr> 
should_scorers,
         CombinerT combiner, size_t num_all_scorers) {
-    if (should_scorers.empty()) {
-        return Ignored {};
-    }
-
     size_t adjusted_minimum = _minimum_number_should_match > num_all_scorers
                                       ? _minimum_number_should_match - 
num_all_scorers
                                       : 0;
@@ -109,12 +105,16 @@ std::optional<CombinationMethod> 
OccurBooleanWeight<ScoreCombinerPtrT>::build_sh
         return std::nullopt;
     }
 
-    if (adjusted_minimum == 0) {
+    if (adjusted_minimum == 0 && num_of_should_scorers == 0) {
+        return Ignored {};
+    } else if (adjusted_minimum == 0) {
         return Optional {scorer_union(std::move(should_scorers), combiner)};
     } else if (adjusted_minimum == 1) {
         return Required {scorer_union(std::move(should_scorers), combiner)};
     } else if (adjusted_minimum == num_of_should_scorers) {
-        must_scorers.swap(should_scorers);
+        for (auto& scorer : should_scorers) {
+            must_scorers.push_back(std::move(scorer));
+        }
         return Ignored {};
     } else {
         return Required {scorer_disjunction(std::move(should_scorers), 
combiner, adjusted_minimum)};
@@ -132,43 +132,83 @@ ScorerPtr 
OccurBooleanWeight<ScoreCombinerPtrT>::build_exclude_opt(
     return into_box_scorer(std::move(specialized_scorer), do_nothing);
 }
 
+template <typename ScoreCombinerPtrT>
+ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::effective_must_scorer(
+        std::vector<ScorerPtr> must_scorers, size_t must_num_all_scorers) {
+    if (must_scorers.empty()) {
+        if (must_num_all_scorers > 0) {
+            return std::make_shared<AllScorer>(_max_doc);
+        }
+        return nullptr;
+    }
+    return make_intersect_scorers(std::move(must_scorers), _max_doc);
+}
+
+template <typename ScoreCombinerPtrT>
+template <typename CombinerT>
+SpecializedScorer 
OccurBooleanWeight<ScoreCombinerPtrT>::effective_should_scorer_for_union(
+        SpecializedScorer should_scorer, size_t should_num_all_scorers, 
CombinerT combiner) {
+    if (should_num_all_scorers > 0) {
+        if (_enable_scoring) {
+            std::vector<ScorerPtr> scorers;
+            scorers.push_back(into_box_scorer(std::move(should_scorer), 
combiner));
+            scorers.push_back(std::make_shared<AllScorer>(_max_doc));
+            return make_buffered_union(std::move(scorers), combiner);
+        } else {
+            return std::make_shared<AllScorer>(_max_doc);
+        }
+    }
+    return should_scorer;
+}
+
 template <typename ScoreCombinerPtrT>
 template <typename CombinerT>
 SpecializedScorer OccurBooleanWeight<ScoreCombinerPtrT>::build_positive_opt(
         CombinationMethod& should_opt, std::vector<ScorerPtr> must_scorers, 
CombinerT combiner,
-        size_t num_all_scorers) {
-    const bool has_must = !must_scorers.empty();
+        const AllAndEmptyScorerCounts& must_special_counts,
+        const AllAndEmptyScorerCounts& should_special_counts) {
+    size_t num_all_scorers =
+            must_special_counts.num_all_scorers + 
should_special_counts.num_all_scorers;
     if (std::holds_alternative<Ignored>(should_opt)) {
-        if (has_must) {
-            return make_intersect_scorers(std::move(must_scorers), _max_doc);
-        }
-        if (num_all_scorers > 0) {
-            return std::make_shared<AllScorer>(_max_doc);
+        ScorerPtr must_scorer = effective_must_scorer(std::move(must_scorers), 
num_all_scorers);
+        if (must_scorer) {
+            return must_scorer;
         }
         return std::make_shared<EmptyScorer>();
     }
 
     if (std::holds_alternative<Optional>(should_opt)) {
         auto& opt = std::get<Optional>(should_opt);
-        if (has_must) {
-            auto must_scorer = make_intersect_scorers(std::move(must_scorers), 
_max_doc);
-            if (_enable_scoring) {
-                auto should_boxed = into_box_scorer(std::move(opt.scorer), 
combiner);
-                return make_required_optional_scorer(must_scorer, 
should_boxed, combiner);
-            } else {
-                return must_scorer;
-            }
+        ScorerPtr must_scorer =
+                effective_must_scorer(std::move(must_scorers), 
must_special_counts.num_all_scorers);
+
+        if (!must_scorer) {
+            return effective_should_scorer_for_union(
+                    std::move(opt.scorer), 
should_special_counts.num_all_scorers, combiner);
+        }
+
+        if (_enable_scoring) {
+            auto should_boxed = into_box_scorer(std::move(opt.scorer), 
combiner);
+            return make_required_optional_scorer(must_scorer, should_boxed, 
combiner);
+        } else {
+            return must_scorer;
         }
-        return opt.scorer;
     }
 
     if (std::holds_alternative<Required>(should_opt)) {
         auto& req = std::get<Required>(should_opt);
-        if (has_must) {
-            must_scorers.push_back(into_box_scorer(std::move(req.scorer), 
combiner));
-            return make_intersect_scorers(std::move(must_scorers), _max_doc);
+        ScorerPtr must_scorer =
+                effective_must_scorer(std::move(must_scorers), 
must_special_counts.num_all_scorers);
+
+        if (!must_scorer) {
+            return req.scorer;
         }
-        return req.scorer;
+
+        auto should_boxed = into_box_scorer(std::move(req.scorer), combiner);
+        std::vector<ScorerPtr> scorers;
+        scorers.push_back(std::move(must_scorer));
+        scorers.push_back(std::move(should_boxed));
+        return make_intersect_scorers(std::move(scorers), _max_doc);
     }
 
     return std::make_shared<EmptyScorer>();
@@ -202,10 +242,9 @@ SpecializedScorer 
OccurBooleanWeight<ScoreCombinerPtrT>::complex_scorer(
     }
 
     ScorerPtr exclude_opt = build_exclude_opt(std::move(must_not_scorers));
-    size_t total_all_scorers =
-            must_special_counts.num_all_scorers + 
should_special_counts.num_all_scorers;
     SpecializedScorer positive_opt =
-            build_positive_opt(*should_opt, std::move(must_scorers), combiner, 
total_all_scorers);
+            build_positive_opt(*should_opt, std::move(must_scorers), combiner, 
must_special_counts,
+                               should_special_counts);
     if (exclude_opt) {
         ScorerPtr positive_boxed = into_box_scorer(std::move(positive_opt), 
combiner);
         return make_exclude(std::move(positive_boxed), std::move(exclude_opt));
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h
index b1437777974..0daff6a1117 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h
@@ -63,10 +63,20 @@ private:
                                                       std::vector<ScorerPtr> 
should_scorers,
                                                       CombinerT combiner, 
size_t num_all_scorers);
     ScorerPtr build_exclude_opt(std::vector<ScorerPtr> must_not_scorers);
+
+    ScorerPtr effective_must_scorer(std::vector<ScorerPtr> must_scorers,
+                                    size_t must_num_all_scorers);
+
+    template <typename CombinerT>
+    SpecializedScorer effective_should_scorer_for_union(SpecializedScorer 
should_scorer,
+                                                        size_t 
should_num_all_scorers,
+                                                        CombinerT combiner);
+
     template <typename CombinerT>
     SpecializedScorer build_positive_opt(CombinationMethod& should_opt,
                                          std::vector<ScorerPtr> must_scorers, 
CombinerT combiner,
-                                         size_t num_all_scorers = 0);
+                                         const AllAndEmptyScorerCounts& 
must_special_counts,
+                                         const AllAndEmptyScorerCounts& 
should_special_counts);
 
     template <typename CombinerT>
     SpecializedScorer scorer_union(std::vector<ScorerPtr> scorers, CombinerT 
combiner);
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h
index e5075511c67..1101412cb75 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h
@@ -32,11 +32,19 @@ public:
     ~RegexpQuery() override = default;
 
     WeightPtr weight(bool enable_scoring) override {
+        auto pattern = make_exact_match(_pattern);
         return std::make_shared<RegexpWeight>(std::move(_context), 
std::move(_field),
-                                              std::move(_pattern), 
enable_scoring, _nullable);
+                                              std::move(pattern), 
enable_scoring, _nullable);
     }
 
 private:
+    static std::string make_exact_match(const std::string& pattern) {
+        if (!pattern.empty() && pattern.front() == '^' && pattern.back() == 
'$') {
+            return pattern;
+        }
+        return "^(" + pattern + ")$";
+    }
+
     IndexQueryContextPtr _context;
 
     std::wstring _field;
diff --git 
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
 
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
index bcffa0d7082..244ddfb8dcc 100644
--- 
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
+++ 
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
@@ -25,6 +25,7 @@
 #include <set>
 #include <vector>
 
+#include "olap/rowset/segment_v2/inverted_index/query_v2/all_query/all_query.h"
 #include "olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur.h"
 #include 
"olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h"
 #include "olap/rowset/segment_v2/inverted_index/query_v2/query.h"
@@ -704,4 +705,348 @@ TEST_F(OccurBooleanQueryTest, OnlyMustNotClausesEmpty) {
     EXPECT_EQ(scorer->doc(), TERMINATED);
 }
 
+TEST_F(OccurBooleanQueryTest, 
MinimumShouldMatchExceedsShouldClausesReturnsEmpty) {
+    {
+        auto must_docs1 = generate_range_docs(0, 100);
+        auto must_docs2 = generate_range_docs(50, 150);
+        std::vector<std::pair<Occur, QueryPtr>> clauses;
+        clauses.emplace_back(Occur::MUST, 
std::make_shared<MockQuery>(must_docs1));
+        clauses.emplace_back(Occur::MUST, 
std::make_shared<MockQuery>(must_docs2));
+
+        OccurBooleanQuery query(std::move(clauses), 2);
+        auto weight = query.weight(false);
+        auto scorer = weight->scorer(_ctx);
+
+        EXPECT_EQ(scorer->doc(), TERMINATED);
+    }
+
+    {
+        auto must_docs = generate_range_docs(0, 100);
+        auto should_docs = generate_range_docs(0, 100);
+        std::vector<std::pair<Occur, QueryPtr>> clauses;
+        clauses.emplace_back(Occur::MUST, 
std::make_shared<MockQuery>(must_docs));
+        clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should_docs));
+
+        OccurBooleanQuery query(std::move(clauses), 2);
+        auto weight = query.weight(false);
+        auto scorer = weight->scorer(_ctx);
+
+        EXPECT_EQ(scorer->doc(), TERMINATED);
+    }
+
+    {
+        auto should_docs1 = generate_range_docs(0, 100);
+        auto should_docs2 = generate_range_docs(50, 150);
+        auto expected = set_intersection(should_docs1, should_docs2);
+
+        std::vector<std::pair<Occur, QueryPtr>> clauses;
+        clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should_docs1));
+        clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should_docs2));
+
+        OccurBooleanQuery query(std::move(clauses), 2);
+        auto weight = query.weight(false);
+        auto scorer = weight->scorer(_ctx);
+        auto result = collect_docs(scorer);
+
+        EXPECT_EQ(result.size(), expected.size());
+        EXPECT_EQ(to_set(result), to_set(expected));
+    }
+
+    {
+        auto must_docs = generate_range_docs(0, 100);
+        auto must_not_docs = generate_range_docs(50, 150);
+
+        std::vector<std::pair<Occur, QueryPtr>> clauses;
+        clauses.emplace_back(Occur::MUST, 
std::make_shared<MockQuery>(must_docs));
+        clauses.emplace_back(Occur::MUST_NOT, 
std::make_shared<MockQuery>(must_not_docs));
+
+        OccurBooleanQuery query(std::move(clauses), 2);
+        auto weight = query.weight(false);
+        auto scorer = weight->scorer(_ctx);
+
+        EXPECT_EQ(scorer->doc(), TERMINATED);
+    }
+}
+
+TEST_F(OccurBooleanQueryTest, 
MinimumShouldMatchZeroWithNoShouldClausesReturnsIgnored) {
+    auto must_docs1 = generate_range_docs(0, 100);
+    auto must_docs2 = generate_range_docs(50, 150);
+    auto expected = set_intersection(must_docs1, must_docs2);
+
+    std::vector<std::pair<Occur, QueryPtr>> clauses;
+    clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs1));
+    clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs2));
+
+    OccurBooleanQuery query(std::move(clauses), 0);
+    auto weight = query.weight(false);
+    auto scorer = weight->scorer(_ctx);
+    auto result = collect_docs(scorer);
+
+    EXPECT_EQ(result, expected);
+}
+
+TEST_F(OccurBooleanQueryTest, MinimumShouldMatchEqualsNumShouldWithMustClause) 
{
+    auto must_docs = std::vector<uint32_t> {10, 20};
+    auto should1_docs = std::vector<uint32_t> {10, 20, 30, 100};
+    auto should2_docs = std::vector<uint32_t> {10, 20, 30, 200};
+    auto expected = std::vector<uint32_t> {10, 20};
+
+    std::vector<std::pair<Occur, QueryPtr>> clauses;
+    clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs));
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should1_docs));
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should2_docs));
+
+    OccurBooleanQuery query(std::move(clauses), 2);
+    auto weight = query.weight(false);
+    auto scorer = weight->scorer(_ctx);
+    auto result = collect_docs(scorer);
+
+    EXPECT_EQ(result, expected);
+}
+
+TEST_F(OccurBooleanQueryTest, 
MinimumShouldMatchEqualsNumShouldWithMultipleMustClauses) {
+    auto must1_docs = std::vector<uint32_t> {10, 20, 30, 40, 50};
+    auto must2_docs = std::vector<uint32_t> {10, 20, 30, 60, 70};
+    auto should1_docs = std::vector<uint32_t> {10, 20, 30, 100};
+    auto should2_docs = std::vector<uint32_t> {10, 20, 30, 200};
+    auto should3_docs = std::vector<uint32_t> {10, 20, 30, 300};
+    auto expected = std::vector<uint32_t> {10, 20, 30};
+
+    std::vector<std::pair<Occur, QueryPtr>> clauses;
+    clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must1_docs));
+    clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must2_docs));
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should1_docs));
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should2_docs));
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should3_docs));
+
+    OccurBooleanQuery query(std::move(clauses), 3);
+    auto weight = query.weight(false);
+    auto scorer = weight->scorer(_ctx);
+    auto result = collect_docs(scorer);
+
+    EXPECT_EQ(result, expected);
+}
+
+TEST_F(OccurBooleanQueryTest, MinimumShouldMatchEqualsNumShouldOnlyShould) {
+    auto should1_docs = std::vector<uint32_t> {10, 20, 30, 40};
+    auto should2_docs = std::vector<uint32_t> {20, 30, 40, 50};
+    auto should3_docs = std::vector<uint32_t> {30, 40, 50, 60};
+    auto expected = std::vector<uint32_t> {30, 40};
+
+    std::vector<std::pair<Occur, QueryPtr>> clauses;
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should1_docs));
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should2_docs));
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should3_docs));
+
+    OccurBooleanQuery query(std::move(clauses), 3);
+    auto weight = query.weight(false);
+    auto scorer = weight->scorer(_ctx);
+    auto result = collect_docs(scorer);
+
+    EXPECT_EQ(result, expected);
+}
+
+TEST_F(OccurBooleanQueryTest, MinimumShouldMatchEqualsNumShouldWithMustNot) {
+    auto must_docs = std::vector<uint32_t> {10, 20, 30, 40, 50};
+    auto should1_docs = std::vector<uint32_t> {10, 20, 30, 100};
+    auto should2_docs = std::vector<uint32_t> {10, 20, 30, 200};
+    auto must_not_docs = std::vector<uint32_t> {20, 100, 200};
+    auto expected = std::vector<uint32_t> {10, 30};
+
+    std::vector<std::pair<Occur, QueryPtr>> clauses;
+    clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs));
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should1_docs));
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should2_docs));
+    clauses.emplace_back(Occur::MUST_NOT, 
std::make_shared<MockQuery>(must_not_docs));
+
+    OccurBooleanQuery query(std::move(clauses), 2);
+    auto weight = query.weight(false);
+    auto scorer = weight->scorer(_ctx);
+    auto result = collect_docs(scorer);
+
+    EXPECT_EQ(result, expected);
+}
+
+TEST_F(OccurBooleanQueryTest, AllQueryWithMustClause) {
+    _ctx.segment_num_rows = 100;
+
+    auto must_docs = std::vector<uint32_t> {10, 20, 30, 40, 50};
+
+    std::vector<std::pair<Occur, QueryPtr>> clauses;
+    clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs));
+    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
+
+    OccurBooleanQuery query(std::move(clauses));
+    auto weight = query.weight(false);
+    auto scorer = weight->scorer(_ctx);
+    auto result = collect_docs(scorer);
+
+    EXPECT_EQ(result, must_docs);
+}
+
+TEST_F(OccurBooleanQueryTest, AllQueryWithShouldClause) {
+    _ctx.segment_num_rows = 50;
+
+    auto should_docs = std::vector<uint32_t> {10, 20, 30};
+
+    std::vector<std::pair<Occur, QueryPtr>> clauses;
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should_docs));
+    clauses.emplace_back(Occur::SHOULD, std::make_shared<AllQuery>(50));
+
+    OccurBooleanQuery query(std::move(clauses));
+    auto weight = query.weight(false);
+    auto scorer = weight->scorer(_ctx);
+    auto result = collect_docs(scorer);
+
+    EXPECT_EQ(result.size(), 50);
+    EXPECT_EQ(result.front(), 0);
+    EXPECT_EQ(result.back(), 49);
+}
+
+TEST_F(OccurBooleanQueryTest, AllQueryWithMustNotClause) {
+    _ctx.segment_num_rows = 100;
+
+    auto must_not_docs = std::vector<uint32_t> {10, 20, 30, 40, 50};
+
+    std::vector<std::pair<Occur, QueryPtr>> clauses;
+    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
+    clauses.emplace_back(Occur::MUST_NOT, 
std::make_shared<MockQuery>(must_not_docs));
+
+    OccurBooleanQuery query(std::move(clauses));
+    auto weight = query.weight(false);
+    auto scorer = weight->scorer(_ctx);
+    auto result = collect_docs(scorer);
+
+    EXPECT_EQ(result.size(), 95);
+    for (uint32_t doc : must_not_docs) {
+        EXPECT_TRUE(std::find(result.begin(), result.end(), doc) == 
result.end());
+    }
+}
+
+TEST_F(OccurBooleanQueryTest, MultipleAllQueriesWithMust) {
+    _ctx.segment_num_rows = 100;
+
+    auto must_docs = std::vector<uint32_t> {5, 15, 25, 35, 45};
+
+    std::vector<std::pair<Occur, QueryPtr>> clauses;
+    clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs));
+    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
+    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
+
+    OccurBooleanQuery query(std::move(clauses));
+    auto weight = query.weight(false);
+    auto scorer = weight->scorer(_ctx);
+    auto result = collect_docs(scorer);
+
+    EXPECT_EQ(result, must_docs);
+}
+
+TEST_F(OccurBooleanQueryTest, AllQueryOnlyMust) {
+    _ctx.segment_num_rows = 50;
+
+    std::vector<std::pair<Occur, QueryPtr>> clauses;
+    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(50));
+
+    OccurBooleanQuery query(std::move(clauses));
+    auto weight = query.weight(false);
+    auto scorer = weight->scorer(_ctx);
+    auto result = collect_docs(scorer);
+
+    EXPECT_EQ(result.size(), 50);
+    for (uint32_t i = 0; i < 50; ++i) {
+        EXPECT_EQ(result[i], i);
+    }
+}
+
+TEST_F(OccurBooleanQueryTest, AllQueryWithMustAndShouldMinMatch) {
+    _ctx.segment_num_rows = 100;
+
+    auto must_docs = std::vector<uint32_t> {10, 20, 30, 40, 50};
+    auto should1_docs = std::vector<uint32_t> {10, 20, 30};
+    auto should2_docs = std::vector<uint32_t> {10, 20, 40};
+
+    std::vector<std::pair<Occur, QueryPtr>> clauses;
+    clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs));
+    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should1_docs));
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should2_docs));
+
+    OccurBooleanQuery query(std::move(clauses), 2);
+    auto weight = query.weight(false);
+    auto scorer = weight->scorer(_ctx);
+    auto result = collect_docs(scorer);
+
+    auto expected = std::vector<uint32_t> {10, 20};
+    EXPECT_EQ(result, expected);
+}
+
+TEST_F(OccurBooleanQueryTest, ScoringWithMinimumShouldMatchEqualsNumShould) {
+    auto must_docs = std::vector<uint32_t> {10, 20, 30};
+    auto should1_docs = std::vector<uint32_t> {10, 20, 30, 100};
+    auto should2_docs = std::vector<uint32_t> {10, 20, 30, 200};
+
+    std::vector<std::pair<Occur, QueryPtr>> clauses;
+    clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs, 
1.0F));
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should1_docs, 2.0F));
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should2_docs, 3.0F));
+
+    OccurBooleanQuery query(std::move(clauses), 2);
+    auto weight = query.weight(true);
+    auto scorer = weight->scorer(_ctx);
+
+    std::vector<uint32_t> result;
+    uint32_t doc = scorer->doc();
+    while (doc != TERMINATED) {
+        result.push_back(doc);
+        float s = scorer->score();
+        EXPECT_FLOAT_EQ(s, 6.0F);
+        doc = scorer->advance();
+    }
+
+    auto expected = std::vector<uint32_t> {10, 20, 30};
+    EXPECT_EQ(result, expected);
+}
+
+TEST_F(OccurBooleanQueryTest, ShouldOnlyWithAllQueryMinShouldMatch) {
+    _ctx.segment_num_rows = 50;
+
+    auto should_docs = std::vector<uint32_t> {10, 20, 30, 40, 45};
+
+    std::vector<std::pair<Occur, QueryPtr>> clauses;
+    clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should_docs));
+    clauses.emplace_back(Occur::SHOULD, std::make_shared<AllQuery>(50));
+
+    OccurBooleanQuery query(std::move(clauses), 2);
+    auto weight = query.weight(false);
+    auto scorer = weight->scorer(_ctx);
+    auto result = collect_docs(scorer);
+
+    EXPECT_EQ(result.size(), 5);
+    EXPECT_EQ(result, should_docs);
+}
+
+TEST_F(OccurBooleanQueryTest, ShouldOnlyAllQueryScoring) {
+    _ctx.segment_num_rows = 10;
+
+    std::vector<std::pair<Occur, QueryPtr>> clauses;
+    clauses.emplace_back(Occur::SHOULD,
+                         std::make_shared<MockQuery>(std::vector<uint32_t> {1, 
2}, 2.0F));
+    clauses.emplace_back(Occur::SHOULD, std::make_shared<AllQuery>(10));
+
+    OccurBooleanQuery query(std::move(clauses));
+    auto weight = query.weight(true);
+    auto scorer = weight->scorer(_ctx);
+
+    uint32_t doc = scorer->doc();
+    while (doc != TERMINATED) {
+        float s = scorer->score();
+        if (doc == 1 || doc == 2) {
+            EXPECT_FLOAT_EQ(s, 3.0F);
+        } else {
+            EXPECT_FLOAT_EQ(s, 1.0F);
+        }
+        doc = scorer->advance();
+    }
+}
+
 } // namespace doris::segment_v2::inverted_index::query_v2
diff --git 
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query_test.cpp 
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query_test.cpp
index 3dd818764e0..4e883edff1d 100644
--- 
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query_test.cpp
+++ 
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query_test.cpp
@@ -389,4 +389,179 @@ TEST_F(RegexpQueryV2Test, 
test_regexp_query_move_semantics) {
     ASSERT_NE(weight2, nullptr);
 }
 
+TEST_F(RegexpQueryV2Test, test_make_exact_match_anchoring) {
+    auto context = std::make_shared<IndexQueryContext>();
+    context->collection_statistics = std::make_shared<CollectionStatistics>();
+    context->collection_similarity = std::make_shared<CollectionSimilarity>();
+
+    auto* dir = FSDirectory::getDirectory(kTestDir.c_str());
+    auto reader_holder = 
make_shared_reader(lucene::index::IndexReader::open(dir, true));
+    ASSERT_TRUE(reader_holder != nullptr);
+
+    std::wstring field = StringHelper::to_wstring("content");
+    std::string pattern = "apple123";
+
+    auto query = std::make_shared<query_v2::RegexpQuery>(context, field, 
pattern);
+    auto weight = query->weight(false);
+
+    query_v2::QueryExecutionContext exec_ctx;
+    exec_ctx.segment_num_rows = reader_holder->maxDoc();
+    exec_ctx.readers = {reader_holder};
+    exec_ctx.field_reader_bindings.emplace(field, reader_holder);
+
+    auto scorer = weight->scorer(exec_ctx);
+    ASSERT_NE(scorer, nullptr);
+
+    roaring::Roaring result;
+    uint32_t doc = scorer->doc();
+    while (doc != query_v2::TERMINATED) {
+        result.add(doc);
+        doc = scorer->advance();
+    }
+
+    EXPECT_EQ(result.cardinality(), 1);
+
+    _CLDECDELETE(dir);
+}
+
+TEST_F(RegexpQueryV2Test, test_make_exact_match_already_anchored) {
+    auto context = std::make_shared<IndexQueryContext>();
+    context->collection_statistics = std::make_shared<CollectionStatistics>();
+    context->collection_similarity = std::make_shared<CollectionSimilarity>();
+
+    auto* dir = FSDirectory::getDirectory(kTestDir.c_str());
+    auto reader_holder = 
make_shared_reader(lucene::index::IndexReader::open(dir, true));
+    ASSERT_TRUE(reader_holder != nullptr);
+
+    std::wstring field = StringHelper::to_wstring("content");
+    std::string pattern = "^apple123$";
+
+    auto query = std::make_shared<query_v2::RegexpQuery>(context, field, 
pattern);
+    auto weight = query->weight(false);
+
+    query_v2::QueryExecutionContext exec_ctx;
+    exec_ctx.segment_num_rows = reader_holder->maxDoc();
+    exec_ctx.readers = {reader_holder};
+    exec_ctx.field_reader_bindings.emplace(field, reader_holder);
+
+    auto scorer = weight->scorer(exec_ctx);
+    ASSERT_NE(scorer, nullptr);
+
+    roaring::Roaring result;
+    uint32_t doc = scorer->doc();
+    while (doc != query_v2::TERMINATED) {
+        result.add(doc);
+        doc = scorer->advance();
+    }
+
+    EXPECT_EQ(result.cardinality(), 1);
+
+    _CLDECDELETE(dir);
+}
+
+TEST_F(RegexpQueryV2Test, test_make_exact_match_partial_anchor_start) {
+    auto context = std::make_shared<IndexQueryContext>();
+    context->collection_statistics = std::make_shared<CollectionStatistics>();
+    context->collection_similarity = std::make_shared<CollectionSimilarity>();
+
+    auto* dir = FSDirectory::getDirectory(kTestDir.c_str());
+    auto reader_holder = 
make_shared_reader(lucene::index::IndexReader::open(dir, true));
+    ASSERT_TRUE(reader_holder != nullptr);
+
+    std::wstring field = StringHelper::to_wstring("content");
+    std::string pattern = "^apple.*";
+
+    auto query = std::make_shared<query_v2::RegexpQuery>(context, field, 
pattern);
+    auto weight = query->weight(false);
+
+    query_v2::QueryExecutionContext exec_ctx;
+    exec_ctx.segment_num_rows = reader_holder->maxDoc();
+    exec_ctx.readers = {reader_holder};
+    exec_ctx.field_reader_bindings.emplace(field, reader_holder);
+
+    auto scorer = weight->scorer(exec_ctx);
+    ASSERT_NE(scorer, nullptr);
+
+    roaring::Roaring result;
+    uint32_t doc = scorer->doc();
+    while (doc != query_v2::TERMINATED) {
+        result.add(doc);
+        doc = scorer->advance();
+    }
+
+    EXPECT_GT(result.cardinality(), 0);
+
+    _CLDECDELETE(dir);
+}
+
+TEST_F(RegexpQueryV2Test, test_make_exact_match_partial_anchor_end) {
+    auto context = std::make_shared<IndexQueryContext>();
+    context->collection_statistics = std::make_shared<CollectionStatistics>();
+    context->collection_similarity = std::make_shared<CollectionSimilarity>();
+
+    auto* dir = FSDirectory::getDirectory(kTestDir.c_str());
+    auto reader_holder = 
make_shared_reader(lucene::index::IndexReader::open(dir, true));
+    ASSERT_TRUE(reader_holder != nullptr);
+
+    std::wstring field = StringHelper::to_wstring("content");
+    std::string pattern = ".*123$";
+
+    auto query = std::make_shared<query_v2::RegexpQuery>(context, field, 
pattern);
+    auto weight = query->weight(false);
+
+    query_v2::QueryExecutionContext exec_ctx;
+    exec_ctx.segment_num_rows = reader_holder->maxDoc();
+    exec_ctx.readers = {reader_holder};
+    exec_ctx.field_reader_bindings.emplace(field, reader_holder);
+
+    auto scorer = weight->scorer(exec_ctx);
+    ASSERT_NE(scorer, nullptr);
+
+    roaring::Roaring result;
+    uint32_t doc = scorer->doc();
+    while (doc != query_v2::TERMINATED) {
+        result.add(doc);
+        doc = scorer->advance();
+    }
+
+    EXPECT_GT(result.cardinality(), 0);
+
+    _CLDECDELETE(dir);
+}
+
+TEST_F(RegexpQueryV2Test, test_make_exact_match_wildcard_pattern) {
+    auto context = std::make_shared<IndexQueryContext>();
+    context->collection_statistics = std::make_shared<CollectionStatistics>();
+    context->collection_similarity = std::make_shared<CollectionSimilarity>();
+
+    auto* dir = FSDirectory::getDirectory(kTestDir.c_str());
+    auto reader_holder = 
make_shared_reader(lucene::index::IndexReader::open(dir, true));
+    ASSERT_TRUE(reader_holder != nullptr);
+
+    std::wstring field = StringHelper::to_wstring("content");
+    std::string pattern = ".*";
+
+    auto query = std::make_shared<query_v2::RegexpQuery>(context, field, 
pattern);
+    auto weight = query->weight(false);
+
+    query_v2::QueryExecutionContext exec_ctx;
+    exec_ctx.segment_num_rows = reader_holder->maxDoc();
+    exec_ctx.readers = {reader_holder};
+    exec_ctx.field_reader_bindings.emplace(field, reader_holder);
+
+    auto scorer = weight->scorer(exec_ctx);
+    ASSERT_NE(scorer, nullptr);
+
+    roaring::Roaring result;
+    uint32_t doc = scorer->doc();
+    while (doc != query_v2::TERMINATED) {
+        result.add(doc);
+        doc = scorer->advance();
+    }
+
+    EXPECT_EQ(result.cardinality(), 20);
+
+    _CLDECDELETE(dir);
+}
+
 } // namespace doris::segment_v2
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to