This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ac01e67c533 [fix](inverted index) Add CompositeReader to support 
multi-field boolean queries (#55960)
ac01e67c533 is described below

commit ac01e67c53393e64fbf7ee3ae36a012b428a5acc
Author: zzzxl <[email protected]>
AuthorDate: Sun Sep 14 16:19:48 2025 +0800

    [fix](inverted index) Add CompositeReader to support multi-field boolean 
queries (#55960)
---
 .../query_v2/boolean_query/boolean_weight.h        |   8 +-
 .../inverted_index/query_v2/composite_reader.h     |  64 ++++++++++
 .../query_v2/term_query/term_weight.h              |   3 +-
 .../segment_v2/inverted_index/query_v2/weight.h    |   8 +-
 .../inverted_index/query_v2/boolean_query_test.cpp | 131 +++++++++++++++++----
 .../query_v2/composite_reader_test.cpp             |  80 +++++++++++++
 6 files changed, 259 insertions(+), 35 deletions(-)

diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/boolean_weight.h
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/boolean_weight.h
index dd5b65837b8..7437757a972 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/boolean_weight.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/boolean_weight.h
@@ -36,8 +36,8 @@ public:
               _score_combiner(std::move(score_combiner)) {}
     ~BooleanWeight() override = default;
 
-    ScorerPtr scorer(lucene::index::IndexReader* reader) override {
-        std::vector<ScorerPtr> sub_scorers = per_scorers(reader);
+    ScorerPtr scorer(const CompositeReaderPtr& composite_reader) override {
+        std::vector<ScorerPtr> sub_scorers = per_scorers(composite_reader);
         if (_type == OperatorType::OP_AND) {
             return intersection_scorer_build(sub_scorers);
         } else if (_type == OperatorType::OP_OR) {
@@ -47,10 +47,10 @@ public:
     }
 
 private:
-    std::vector<ScorerPtr> per_scorers(lucene::index::IndexReader* reader) {
+    std::vector<ScorerPtr> per_scorers(const CompositeReaderPtr& 
composite_reader) {
         std::vector<ScorerPtr> sub_scorers;
         for (const auto& sub_weight : _sub_weights) {
-            sub_scorers.emplace_back(sub_weight->scorer(reader));
+            sub_scorers.emplace_back(sub_weight->scorer(composite_reader));
         }
         return sub_scorers;
     }
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/composite_reader.h 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/composite_reader.h
new file mode 100644
index 00000000000..67161d9910b
--- /dev/null
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/composite_reader.h
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <CLucene.h>
+#include <CLucene/index/IndexReader.h>
+
+#include <ranges>
+
+#include "common/exception.h"
+#include "olap/rowset/segment_v2/inverted_index/util/string_helper.h"
+
+CL_NS_USE(index)
+
+namespace doris::segment_v2::inverted_index::query_v2 {
+
+class CompositeReader {
+public:
+    CompositeReader() = default;
+    ~CompositeReader() = default;
+
+    void set_reader(const std::wstring& field, lucene::index::IndexReader* 
reader) {
+        if (_field_readers.contains(field)) {
+            throw Exception(ErrorCode::INDEX_INVALID_PARAMETERS, "Field {} 
already exists",
+                            StringHelper::to_string(field));
+        }
+        _field_readers[field] = reader;
+    }
+
+    lucene::index::IndexReader* get_reader(const std::wstring& field) {
+        if (!_field_readers.contains(field)) {
+            throw Exception(ErrorCode::NOT_FOUND, "Field {} not found",
+                            StringHelper::to_string(field));
+        }
+        return _field_readers[field];
+    }
+
+    void close() {
+        for (auto* reader : std::views::values(_field_readers)) {
+            reader->close();
+        }
+    }
+
+private:
+    std::unordered_map<std::wstring, lucene::index::IndexReader*> 
_field_readers;
+};
+using CompositeReaderPtr = std::unique_ptr<CompositeReader>;
+
+} // namespace doris::segment_v2::inverted_index::query_v2
\ No newline at end of file
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_weight.h
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_weight.h
index fb1985b7fd8..af899af3f9e 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_weight.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_weight.h
@@ -35,8 +35,9 @@ public:
               _enable_scoring(enable_scoring) {}
     ~TermWeight() override = default;
 
-    ScorerPtr scorer(lucene::index::IndexReader* reader) override {
+    ScorerPtr scorer(const CompositeReaderPtr& composite_reader) override {
         auto t = make_term_ptr(_field.c_str(), _term.c_str());
+        auto* reader = composite_reader->get_reader(_field);
         auto iter = make_term_doc_ptr(reader, t.get(), _enable_scoring, 
_context->io_ctx);
 
         auto make_scorer = [this](auto segment_postings) -> ScorerPtr {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/weight.h 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/weight.h
index a82bbeb57df..480c95c1a4d 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/weight.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/weight.h
@@ -17,13 +17,9 @@
 
 #pragma once
 
-#include <CLucene.h>
-#include <CLucene/index/IndexReader.h>
-
+#include "olap/rowset/segment_v2/inverted_index/query_v2/composite_reader.h"
 #include "olap/rowset/segment_v2/inverted_index/query_v2/scorer.h"
 
-CL_NS_USE(index)
-
 namespace doris::segment_v2::inverted_index::query_v2 {
 
 class Weight {
@@ -31,7 +27,7 @@ public:
     Weight() = default;
     virtual ~Weight() = default;
 
-    virtual ScorerPtr scorer(lucene::index::IndexReader* reader) = 0;
+    virtual ScorerPtr scorer(const CompositeReaderPtr& composite_reader) = 0;
 };
 using WeightPtr = std::shared_ptr<Weight>;
 
diff --git 
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query_test.cpp 
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query_test.cpp
index 71c79789c1c..33683deaa09 100644
--- 
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query_test.cpp
+++ 
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query_test.cpp
@@ -39,23 +39,34 @@ using namespace inverted_index;
 
 class BooleanQueryTest : public testing::Test {
 public:
-    const std::string kTestDir = "./ut_dir/query_test";
-    std::string field_name = "name";
+    const std::string kTestDir1 = "./ut_dir/query_test1";
+    const std::string kTestDir2 = "./ut_dir/query_test2";
 
     void SetUp() override {
-        auto st = io::global_local_filesystem()->delete_directory(kTestDir);
-        ASSERT_TRUE(st.ok()) << st;
-        st = io::global_local_filesystem()->create_directory(kTestDir);
-        ASSERT_TRUE(st.ok()) << st;
-
-        create_test_index();
+        {
+            auto st = 
io::global_local_filesystem()->delete_directory(kTestDir1);
+            ASSERT_TRUE(st.ok()) << st;
+            st = io::global_local_filesystem()->create_directory(kTestDir1);
+            ASSERT_TRUE(st.ok()) << st;
+            std::string field_name1 = "name1";
+            create_test_index(field_name1, kTestDir1);
+        }
+        {
+            auto st = 
io::global_local_filesystem()->delete_directory(kTestDir2);
+            ASSERT_TRUE(st.ok()) << st;
+            st = io::global_local_filesystem()->create_directory(kTestDir2);
+            ASSERT_TRUE(st.ok()) << st;
+            std::string field_name2 = "name2";
+            create_test_index(field_name2, kTestDir2);
+        }
     }
     void TearDown() override {
-        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok());
+        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir1).ok());
+        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir2).ok());
     }
 
 private:
-    void create_test_index() {
+    void create_test_index(const std::string& field_name, const std::string& 
dir) {
         std::vector<std::string> test_data = {"apple banana orange",   "apple 
cherry grape",
                                               "banana cherry kiwi",    "orange 
grape strawberry",
                                               "apple orange kiwi",     "cherry 
banana grape",
@@ -67,7 +78,7 @@ private:
         auto custom_analyzer = 
CustomAnalyzer::build_custom_analyzer(custom_analyzer_config);
 
         auto* indexwriter =
-                _CLNEW lucene::index::IndexWriter(kTestDir.c_str(), 
custom_analyzer.get(), true);
+                _CLNEW lucene::index::IndexWriter(dir.c_str(), 
custom_analyzer.get(), true);
         indexwriter->setMaxBufferedDocs(100);
         indexwriter->setRAMBufferSizeMB(-1);
         indexwriter->setMaxFieldLength(0x7FFFFFFFL);
@@ -109,6 +120,9 @@ static Status boolean_query_search(
         query_v2::OperatorType op, roaring::Roaring& out_bitmap) {
     std::wstring field = StringHelper::to_wstring(name);
 
+    auto composite_reader = std::make_unique<query_v2::CompositeReader>();
+    composite_reader->set_reader(field, reader);
+
     auto context = std::make_shared<IndexQueryContext>();
     context->collection_statistics = std::make_shared<CollectionStatistics>();
     context->collection_similarity = std::make_shared<CollectionSimilarity>();
@@ -136,7 +150,7 @@ static Status boolean_query_search(
     }
     auto boolean_query = builder.build();
     auto weight = boolean_query->weight(false);
-    auto scorer = weight->scorer(reader);
+    auto scorer = weight->scorer(composite_reader);
 
     uint32_t doc = scorer->doc();
     while (doc != query_v2::TERMINATED) {
@@ -173,7 +187,7 @@ TEST_F(BooleanQueryTest, test_boolean_query) {
             {{"cherry"}, {"strawberry"}},
             {{"apple", "banana"}, {"kiwi"}}};
 
-    auto* dir = FSDirectory::getDirectory(kTestDir.c_str());
+    auto* dir = FSDirectory::getDirectory(kTestDir1.c_str());
     auto* reader = IndexReader::open(dir, true);
 
     ASSERT_TRUE(reader != nullptr) << "Failed to open index reader";
@@ -186,7 +200,7 @@ TEST_F(BooleanQueryTest, test_boolean_query) {
         roaring::Roaring result;
 
         try {
-            Status res = boolean_query_search(field_name, reader, terms,
+            Status res = boolean_query_search("name1", reader, terms,
                                               query_v2::OperatorType::OP_AND, 
result);
             EXPECT_TRUE(res.ok()) << "Boolean query case " << i << " should 
execute successfully";
             EXPECT_EQ(result.cardinality(), expected_cards[i])
@@ -210,7 +224,7 @@ TEST_F(BooleanQueryTest, test_boolean_query_or_operation) {
     std::vector<std::pair<std::vector<std::string>, std::vector<std::string>>> 
test_cases = {
             {{"apple"}, {"banana"}}, {{"nonexistent"}, {"apple"}}};
 
-    auto* dir = FSDirectory::getDirectory(kTestDir.c_str());
+    auto* dir = FSDirectory::getDirectory(kTestDir1.c_str());
     auto* reader = IndexReader::open(dir, true);
 
     const std::vector<uint32_t> expected_cards = {70, 40};
@@ -220,8 +234,8 @@ TEST_F(BooleanQueryTest, test_boolean_query_or_operation) {
         roaring::Roaring result;
 
         try {
-            Status res = boolean_query_search(field_name, reader, terms,
-                                              query_v2::OperatorType::OP_OR, 
result);
+            Status res = boolean_query_search("name1", reader, terms, 
query_v2::OperatorType::OP_OR,
+                                              result);
             EXPECT_TRUE(res.ok()) << "Boolean OR query case " << i
                                   << " should execute successfully";
             EXPECT_EQ(result.cardinality(), expected_cards[i])
@@ -237,13 +251,13 @@ TEST_F(BooleanQueryTest, test_boolean_query_or_operation) 
{
 }
 
 TEST_F(BooleanQueryTest, test_boolean_query_scoring_or) {
-    std::wstring field = StringHelper::to_wstring(field_name);
+    std::wstring field = StringHelper::to_wstring("name1");
 
     auto context = std::make_shared<IndexQueryContext>();
     context->collection_statistics = std::make_shared<CollectionStatistics>();
     context->collection_similarity = std::make_shared<CollectionSimilarity>();
 
-    std::wstring ws_field = StringHelper::to_wstring(field_name);
+    std::wstring ws_field = StringHelper::to_wstring("name1");
     // 直接访问成员填充统计信息
     context->collection_statistics->_total_num_docs = 80;
     context->collection_statistics->_total_num_tokens[ws_field] = 240; // 80*3
@@ -276,12 +290,15 @@ TEST_F(BooleanQueryTest, test_boolean_query_scoring_or) {
     }
     auto boolean_query = builder.build();
 
-    auto* dir = FSDirectory::getDirectory(kTestDir.c_str());
+    auto* dir = FSDirectory::getDirectory(kTestDir1.c_str());
     auto* reader = IndexReader::open(dir, true);
     ASSERT_TRUE(reader != nullptr);
 
+    auto composite_reader = std::make_unique<query_v2::CompositeReader>();
+    composite_reader->set_reader(field, reader);
+
     auto weight = boolean_query->weight(true);
-    auto scorer = weight->scorer(reader);
+    auto scorer = weight->scorer(composite_reader);
 
     uint32_t doc = scorer->doc();
     uint32_t count = 0;
@@ -300,9 +317,6 @@ TEST_F(BooleanQueryTest, test_boolean_query_scoring_or) {
         doc = scorer->advance();
     }
 
-    std::cout << "count: " << count << std::endl;
-    std::cout << "score_single: " << score_single << std::endl;
-    std::cout << "score_both: " << score_both << std::endl;
     EXPECT_EQ(count, 50);
     EXPECT_GT(score_single, 0.0F);
     EXPECT_GT(score_both, 0.0F);
@@ -313,4 +327,73 @@ TEST_F(BooleanQueryTest, test_boolean_query_scoring_or) {
     _CLDECDELETE(dir);
 }
 
+TEST_F(BooleanQueryTest, 
test_boolean_query_cross_fields_with_composite_reader) {
+    std::string field_name1 = "name1";
+    std::string field_name2 = "name2";
+    std::wstring wfield1 = StringHelper::to_wstring(field_name1);
+    std::wstring wfield2 = StringHelper::to_wstring(field_name2);
+
+    auto* dir1 = FSDirectory::getDirectory(kTestDir1.c_str());
+    auto* dir2 = FSDirectory::getDirectory(kTestDir2.c_str());
+    auto* ir1 = IndexReader::open(dir1, true);
+    auto* ir2 = IndexReader::open(dir2, true);
+    ASSERT_TRUE(ir1 != nullptr);
+    ASSERT_TRUE(ir2 != nullptr);
+    EXPECT_EQ(ir1->numDocs(), 80);
+    EXPECT_EQ(ir2->numDocs(), 80);
+
+    auto composite_reader = std::make_unique<query_v2::CompositeReader>();
+    composite_reader->set_reader(wfield1, ir1);
+    composite_reader->set_reader(wfield2, ir2);
+
+    auto context = std::make_shared<IndexQueryContext>();
+    context->collection_statistics = std::make_shared<CollectionStatistics>();
+    context->collection_similarity = std::make_shared<CollectionSimilarity>();
+
+    {
+        query_v2::BooleanQuery::Builder b(query_v2::OperatorType::OP_AND);
+        b.add(std::make_shared<query_v2::TermQuery>(context, wfield1,
+                                                    
StringHelper::to_wstring("apple")));
+        b.add(std::make_shared<query_v2::TermQuery>(context, wfield2,
+                                                    
StringHelper::to_wstring("banana")));
+        auto q = b.build();
+        auto w = q->weight(false);
+        auto s = w->scorer(composite_reader);
+
+        uint32_t doc = s->doc();
+        uint32_t count = 0;
+        while (doc != query_v2::TERMINATED) {
+            ++count;
+            doc = s->advance();
+        }
+        EXPECT_EQ(count, 10);
+    }
+
+    {
+        query_v2::BooleanQuery::Builder b(query_v2::OperatorType::OP_OR);
+        b.add(std::make_shared<query_v2::TermQuery>(context, wfield1,
+                                                    
StringHelper::to_wstring("apple")));
+        b.add(std::make_shared<query_v2::TermQuery>(context, wfield2,
+                                                    
StringHelper::to_wstring("banana")));
+        auto q = b.build();
+        auto w = q->weight(false);
+        auto s = w->scorer(composite_reader);
+
+        uint32_t doc = s->doc();
+        uint32_t count = 0;
+        while (doc != query_v2::TERMINATED) {
+            ++count;
+            doc = s->advance();
+        }
+        EXPECT_EQ(count, 70);
+    }
+
+    ir1->close();
+    ir2->close();
+    _CLLDELETE(ir1);
+    _CLLDELETE(ir2);
+    _CLDECDELETE(dir1);
+    _CLDECDELETE(dir2);
+}
+
 } // namespace doris::segment_v2
diff --git 
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/composite_reader_test.cpp
 
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/composite_reader_test.cpp
new file mode 100644
index 00000000000..653df299aa1
--- /dev/null
+++ 
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/composite_reader_test.cpp
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/inverted_index/query_v2/composite_reader.h"
+
+#include <gtest/gtest.h>
+
+#include <string>
+
+#include "common/exception.h"
+#include "olap/rowset/segment_v2/inverted_index/util/string_helper.h"
+
+namespace doris::segment_v2 {
+
+using namespace inverted_index;
+
+TEST(CompositeReaderTest, SetAndGetNullptr) {
+    query_v2::CompositeReader cr;
+
+    std::wstring field = StringHelper::to_wstring("f1");
+    lucene::index::IndexReader* reader = nullptr;
+
+    cr.set_reader(field, reader);
+    auto* got = cr.get_reader(field);
+    EXPECT_EQ(got, reader);
+}
+
+TEST(CompositeReaderTest, GetNonExistingThrowsNotFound) {
+    query_v2::CompositeReader cr;
+
+    std::wstring field = StringHelper::to_wstring("no_such_field");
+
+    try {
+        (void)cr.get_reader(field);
+        FAIL() << "Expected doris::Exception to be thrown";
+    } catch (const doris::Exception& e) {
+        EXPECT_EQ(e.code(), doris::ErrorCode::NOT_FOUND);
+    } catch (...) {
+        FAIL() << "Unexpected exception type";
+    }
+}
+
+TEST(CompositeReaderTest, DuplicateSetThrowsIndexInvalidParameters) {
+    query_v2::CompositeReader cr;
+
+    std::wstring field = StringHelper::to_wstring("dup");
+    lucene::index::IndexReader* reader = nullptr;
+
+    cr.set_reader(field, reader);
+
+    try {
+        cr.set_reader(field, reader);
+        FAIL() << "Expected doris::Exception to be thrown";
+    } catch (const doris::Exception& e) {
+        EXPECT_EQ(e.code(), doris::ErrorCode::INDEX_INVALID_PARAMETERS);
+    } catch (...) {
+        FAIL() << "Unexpected exception type";
+    }
+}
+
+TEST(CompositeReaderTest, CloseOnEmptyDoesNotCrash) {
+    query_v2::CompositeReader cr;
+    cr.close();
+}
+
+} // namespace doris::segment_v2
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to