This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 919c2ef7d45 branch-4.0: [feature](search) add phrase/wildcard/regex 
support for search and refact some code #57372 (#57469)
919c2ef7d45 is described below

commit 919c2ef7d45ac79659ccc411567084f6169130c6
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Oct 31 09:41:58 2025 +0800

    branch-4.0: [feature](search) add phrase/wildcard/regex support for search 
and refact some code #57372 (#57469)
    
    Cherry-picked from #57372
    
    Co-authored-by: Jack <[email protected]>
---
 .../const_score_query/const_score_scorer.h         |  10 +-
 .../inverted_index/query_v2/null_bitmap_fetcher.h  |  82 ++++++++
 .../query_v2/phrase_query/phrase_query.h           |   2 +-
 .../query_v2/phrase_query/phrase_weight.h          |  32 ++-
 .../query_v2/regexp_query/regexp_query.h           |   2 +-
 .../query_v2/regexp_query/regexp_weight.cpp        |  15 +-
 .../query_v2/term_query/term_query.h               |  11 +-
 .../query_v2/term_query/term_scorer.h              |  25 +--
 .../query_v2/term_query/term_weight.h              |  11 +-
 .../segment_v2/inverted_index/query_v2/weight.h    |  31 +++
 .../query_v2/wildcard_query/wildcard_query.h       |   2 +-
 .../query_v2/wildcard_query/wildcard_weight.h      |   5 +-
 be/src/vec/functions/function_search.cpp           |  90 +++++++-
 be/src/vec/functions/function_search.h             |   4 +
 .../search/test_search_default_field_operator.out  |  79 +++++++
 .../data/search/test_search_dsl_syntax.out         |   6 +-
 .../data/search/test_search_function.out           |   3 +
 .../data/search/test_search_null_semantics.out     |  10 +-
 .../test_search_default_field_operator.groovy      | 230 +++++++++++++++++++++
 .../search/test_search_null_semantics.groovy       |   7 +
 20 files changed, 597 insertions(+), 60 deletions(-)

diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/const_score_query/const_score_scorer.h
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/const_score_query/const_score_scorer.h
index 1ebf47f1d5d..6f313068d4a 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/const_score_query/const_score_scorer.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/const_score_query/const_score_scorer.h
@@ -34,10 +34,18 @@ public:
 
     float score() override { return _score; }
 
+    bool has_null_bitmap(const NullBitmapResolver* resolver = nullptr) 
override {
+        return _scorer && _scorer->has_null_bitmap(resolver);
+    }
+
+    const roaring::Roaring* get_null_bitmap(const NullBitmapResolver* resolver 
= nullptr) override {
+        return _scorer ? _scorer->get_null_bitmap(resolver) : nullptr;
+    }
+
 private:
     ScorerPtrT _scorer;
 
     float _score = 1.0F;
 };
 
-} // namespace doris::segment_v2::inverted_index::query_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index::query_v2
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/null_bitmap_fetcher.h 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/null_bitmap_fetcher.h
new file mode 100644
index 00000000000..204533d2ec5
--- /dev/null
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/null_bitmap_fetcher.h
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <glog/logging.h>
+
+#include <memory>
+#include <roaring/roaring.hh>
+#include <string>
+
+#include "olap/rowset/segment_v2/index_iterator.h"
+#include "olap/rowset/segment_v2/inverted_index/query_v2/scorer.h"
+#include "olap/rowset/segment_v2/inverted_index/query_v2/weight.h"
+#include "olap/rowset/segment_v2/inverted_index_cache.h"
+
+namespace doris::segment_v2::inverted_index::query_v2 {
+
+// Small helper that centralizes "field NULL bitmap" lookups so weights/scorers
+// don't have to duplicate resolver plumbing.
+class FieldNullBitmapFetcher {
+public:
+    FieldNullBitmapFetcher() = delete;
+
+    static std::shared_ptr<roaring::Roaring> fetch(const 
QueryExecutionContext& context,
+                                                   const std::string& 
logical_field,
+                                                   const Scorer* scorer = 
nullptr) {
+        return fetch(context.null_resolver, logical_field, scorer);
+    }
+
+    static std::shared_ptr<roaring::Roaring> fetch(const NullBitmapResolver* 
resolver,
+                                                   const std::string& 
logical_field,
+                                                   const Scorer* scorer = 
nullptr) {
+        if (resolver == nullptr || logical_field.empty()) {
+            return nullptr;
+        }
+
+        EmptyScorer fallback_scorer;
+        const Scorer* resolver_scorer = scorer != nullptr ? scorer : 
&fallback_scorer;
+
+        auto iterator = resolver->iterator_for(*resolver_scorer, 
logical_field);
+        if (iterator == nullptr) {
+            return nullptr;
+        }
+
+        auto has_null = iterator->has_null();
+        if (!has_null.has_value() || !has_null.value()) {
+            return nullptr;
+        }
+
+        segment_v2::InvertedIndexQueryCacheHandle cache_handle;
+        auto status = iterator->read_null_bitmap(&cache_handle);
+        if (!status.ok()) {
+            LOG(WARNING) << "Failed to read null bitmap for field '" << 
logical_field
+                         << "': " << status.to_string();
+            return nullptr;
+        }
+
+        auto bitmap_ptr = cache_handle.get_bitmap();
+        if (bitmap_ptr == nullptr) {
+            return nullptr;
+        }
+
+        return std::make_shared<roaring::Roaring>(*bitmap_ptr);
+    }
+};
+
+} // namespace doris::segment_v2::inverted_index::query_v2
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/phrase_query/phrase_query.h
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/phrase_query/phrase_query.h
index 133cf71afe4..f46ac1793b7 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/phrase_query/phrase_query.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/phrase_query/phrase_query.h
@@ -51,4 +51,4 @@ private:
     std::vector<std::wstring> _terms;
 };
 
-} // namespace doris::segment_v2::inverted_index::query_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index::query_v2
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/phrase_query/phrase_weight.h
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/phrase_query/phrase_weight.h
index 7fe68e33995..339b4fdc99f 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/phrase_query/phrase_weight.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/phrase_query/phrase_weight.h
@@ -18,6 +18,9 @@
 #pragma once
 
 #include "olap/rowset/segment_v2/index_query_context.h"
+#include 
"olap/rowset/segment_v2/inverted_index/query_v2/bit_set_query/bit_set_scorer.h"
+#include 
"olap/rowset/segment_v2/inverted_index/query_v2/const_score_query/const_score_scorer.h"
+#include "olap/rowset/segment_v2/inverted_index/query_v2/null_bitmap_fetcher.h"
 #include 
"olap/rowset/segment_v2/inverted_index/query_v2/phrase_query/phrase_scorer.h"
 #include "olap/rowset/segment_v2/inverted_index/query_v2/scorer.h"
 #include "olap/rowset/segment_v2/inverted_index/query_v2/weight.h"
@@ -37,12 +40,29 @@ public:
     ~PhraseWeight() override = default;
 
     ScorerPtr scorer(const QueryExecutionContext& ctx, const std::string& 
binding_key) override {
-        auto scorer = phrase_scorer(ctx, binding_key);
-        if (scorer) {
-            return scorer;
-        } else {
-            return std::make_shared<EmptyScorer>();
+        auto phrase = phrase_scorer(ctx, binding_key);
+        auto logical_field = logical_field_or_fallback(ctx, binding_key, 
_field);
+        auto null_bitmap = FieldNullBitmapFetcher::fetch(ctx, logical_field);
+
+        auto doc_bitset = std::make_shared<roaring::Roaring>();
+        if (phrase) {
+            uint32_t doc = phrase->doc();
+            if (doc == TERMINATED) {
+                doc = phrase->advance();
+            }
+            while (doc != TERMINATED) {
+                doc_bitset->add(doc);
+                doc = phrase->advance();
+            }
+        }
+
+        auto bit_set =
+                std::make_shared<BitSetScorer>(std::move(doc_bitset), 
std::move(null_bitmap));
+        if (!phrase) {
+            return bit_set;
         }
+        // Wrap with const score for consistency with other non-scoring paths
+        return 
std::make_shared<ConstScoreScorer<BitSetScorerPtr>>(std::move(bit_set));
     }
 
 private:
@@ -78,4 +98,4 @@ private:
     bool _enable_scoring = false;
 };
 
-} // namespace doris::segment_v2::inverted_index::query_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index::query_v2
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h
index 9f1e7491b50..cce83b6e1e7 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h
@@ -43,4 +43,4 @@ private:
     std::string _pattern;
 };
 
-} // namespace doris::segment_v2::inverted_index::query_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index::query_v2
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_weight.cpp
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_weight.cpp
index 5404abaddb0..f70b5be77c4 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_weight.cpp
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_weight.cpp
@@ -26,8 +26,10 @@
 
 #include 
"olap/rowset/segment_v2/inverted_index/query_v2/bit_set_query/bit_set_scorer.h"
 #include 
"olap/rowset/segment_v2/inverted_index/query_v2/const_score_query/const_score_scorer.h"
+#include "olap/rowset/segment_v2/inverted_index/query_v2/null_bitmap_fetcher.h"
 #include "olap/rowset/segment_v2/inverted_index/query_v2/segment_postings.h"
 #include "olap/rowset/segment_v2/inverted_index/util/string_helper.h"
+#include "olap/rowset/segment_v2/inverted_index_iterator.h"
 
 CL_NS_USE(index)
 
@@ -44,6 +46,9 @@ RegexpWeight::RegexpWeight(IndexQueryContextPtr context, 
std::wstring field, std
 
 ScorerPtr RegexpWeight::scorer(const QueryExecutionContext& context,
                                const std::string& binding_key) {
+    auto logical_field = logical_field_or_fallback(context, binding_key, 
_field);
+    VLOG_DEBUG << "RegexpWeight::scorer() called - pattern=" << _pattern << ", 
logical_field='"
+               << logical_field << "'";
     auto prefix = get_regex_prefix(_pattern);
 
     hs_database_t* database = nullptr;
@@ -76,7 +81,10 @@ ScorerPtr RegexpWeight::scorer(const QueryExecutionContext& 
context,
     hs_free_database(database);
 
     if (matching_terms.empty()) {
-        return std::make_shared<EmptyScorer>();
+        // Even when there are no matching terms, we must honor NULL semantics 
for the field.
+        auto empty_true = std::make_shared<roaring::Roaring>();
+        auto null_bitmap = FieldNullBitmapFetcher::fetch(context, 
logical_field);
+        return std::make_shared<BitSetScorer>(std::move(empty_true), 
std::move(null_bitmap));
     }
 
     auto doc_bitset = std::make_shared<roaring::Roaring>();
@@ -93,7 +101,8 @@ ScorerPtr RegexpWeight::scorer(const QueryExecutionContext& 
context,
         }
     }
 
-    auto bit_set = std::make_shared<BitSetScorer>(doc_bitset);
+    auto null_bitmap = FieldNullBitmapFetcher::fetch(context, logical_field);
+    auto bit_set = std::make_shared<BitSetScorer>(doc_bitset, null_bitmap);
     auto const_score = 
std::make_shared<ConstScoreScorer<BitSetScorerPtr>>(std::move(bit_set));
     return const_score;
 }
@@ -220,4 +229,4 @@ void RegexpWeight::collect_matching_terms(const 
QueryExecutionContext& context,
     }
 }
 
-} // namespace doris::segment_v2::inverted_index::query_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index::query_v2
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_query.h 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_query.h
index e4370a6a14d..7b8fa2eeace 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_query.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_query.h
@@ -25,12 +25,8 @@ namespace doris::segment_v2::inverted_index::query_v2 {
 
 class TermQuery : public Query {
 public:
-    TermQuery(IndexQueryContextPtr context, std::wstring field, std::wstring 
term,
-              std::string logical_field = {})
-            : _context(std::move(context)),
-              _field(std::move(field)),
-              _term(std::move(term)),
-              _logical_field(std::move(logical_field)) {}
+    TermQuery(IndexQueryContextPtr context, std::wstring field, std::wstring 
term)
+            : _context(std::move(context)), _field(std::move(field)), 
_term(std::move(term)) {}
     ~TermQuery() override = default;
 
     WeightPtr weight(bool enable_scoring) override {
@@ -43,7 +39,7 @@ public:
         }
         return std::make_shared<TermWeight>(std::move(_context), 
std::move(_field),
                                             std::move(_term), 
std::move(bm25_similarity),
-                                            enable_scoring, _logical_field);
+                                            enable_scoring);
     }
 
 private:
@@ -51,7 +47,6 @@ private:
 
     std::wstring _field;
     std::wstring _term;
-    std::string _logical_field;
 };
 
 } // namespace doris::segment_v2::inverted_index::query_v2
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_scorer.h
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_scorer.h
index 77bbc922b1b..9099a71877d 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_scorer.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_scorer.h
@@ -22,6 +22,7 @@
 #include <optional>
 #include <roaring/roaring.hh>
 
+#include "olap/rowset/segment_v2/inverted_index/query_v2/null_bitmap_fetcher.h"
 #include "olap/rowset/segment_v2/inverted_index/query_v2/scorer.h"
 #include "olap/rowset/segment_v2/inverted_index/query_v2/segment_postings.h"
 #include "olap/rowset/segment_v2/inverted_index/similarity/similarity.h"
@@ -70,27 +71,9 @@ private:
 
         _null_bitmap_checked = true;
 
-        auto iterator = resolver->iterator_for(*this, _logical_field);
-        if (iterator == nullptr) {
-            return;
-        }
-
-        auto has_null_result = iterator->has_null();
-        if (!has_null_result.has_value() || !has_null_result.value()) {
-            return;
-        }
-
-        segment_v2::InvertedIndexQueryCacheHandle cache_handle;
-        auto status = iterator->read_null_bitmap(&cache_handle);
-        if (!status.ok()) {
-            LOG(WARNING) << "TermScorer failed to read null bitmap for field 
'" << _logical_field
-                         << "': " << status.to_string();
-            return;
-        }
-
-        auto bitmap_ptr = cache_handle.get_bitmap();
-        if (bitmap_ptr != nullptr) {
-            _null_bitmap = *bitmap_ptr;
+        auto bitmap = FieldNullBitmapFetcher::fetch(resolver, _logical_field, 
this);
+        if (bitmap != nullptr) {
+            _null_bitmap = *bitmap;
         }
     }
 
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_weight.h
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_weight.h
index 7f0e329d88e..d532e9664cb 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_weight.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_weight.h
@@ -27,23 +27,21 @@ namespace doris::segment_v2::inverted_index::query_v2 {
 class TermWeight : public Weight {
 public:
     TermWeight(IndexQueryContextPtr context, std::wstring field, std::wstring 
term,
-               SimilarityPtr similarity, bool enable_scoring, std::string 
logical_field = {})
+               SimilarityPtr similarity, bool enable_scoring)
             : _context(std::move(context)),
               _field(std::move(field)),
               _term(std::move(term)),
               _similarity(std::move(similarity)),
-              _enable_scoring(enable_scoring),
-              _logical_field(std::move(logical_field)) {}
+              _enable_scoring(enable_scoring) {}
     ~TermWeight() override = default;
 
     ScorerPtr scorer(const QueryExecutionContext& ctx, const std::string& 
binding_key) override {
         auto reader = lookup_reader(_field, ctx, binding_key);
-        auto field_name =
-                _logical_field.empty() ? std::string(_field.begin(), 
_field.end()) : _logical_field;
+        auto logical_field = logical_field_or_fallback(ctx, binding_key, 
_field);
         auto make_scorer = [&](auto segment_postings) -> ScorerPtr {
             using PostingsT = decltype(segment_postings);
             return 
std::make_shared<TermScorer<PostingsT>>(std::move(segment_postings), 
_similarity,
-                                                           field_name);
+                                                           logical_field);
         };
 
         if (!reader) {
@@ -76,7 +74,6 @@ private:
     std::wstring _term;
     SimilarityPtr _similarity;
     bool _enable_scoring = false;
-    std::string _logical_field;
 };
 
 } // namespace doris::segment_v2::inverted_index::query_v2
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/weight.h 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/weight.h
index c3483128912..17d8d11cbc7 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/weight.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/weight.h
@@ -30,12 +30,19 @@ class IndexReader;
 
 namespace doris::segment_v2::inverted_index::query_v2 {
 
+struct FieldBindingContext {
+    std::string logical_field_name;
+    std::string stored_field_name;
+    std::wstring stored_field_wstr;
+};
+
 struct QueryExecutionContext {
     uint32_t segment_num_rows = 0;
     std::vector<std::shared_ptr<lucene::index::IndexReader>> readers;
     std::unordered_map<std::string, 
std::shared_ptr<lucene::index::IndexReader>> reader_bindings;
     std::unordered_map<std::wstring, 
std::shared_ptr<lucene::index::IndexReader>>
             field_reader_bindings;
+    std::unordered_map<std::string, FieldBindingContext> binding_fields;
     const NullBitmapResolver* null_resolver = nullptr;
 };
 
@@ -52,6 +59,30 @@ public:
     }
 
 protected:
+    const FieldBindingContext* get_field_binding(const QueryExecutionContext& 
ctx,
+                                                 const std::string& 
binding_key) const {
+        auto it = ctx.binding_fields.find(binding_key);
+        if (it != ctx.binding_fields.end()) {
+            return &it->second;
+        }
+        return nullptr;
+    }
+
+    std::string logical_field_or_fallback(const QueryExecutionContext& ctx,
+                                          const std::string& binding_key,
+                                          const std::wstring& fallback) const {
+        const auto* binding = get_field_binding(ctx, binding_key);
+        if (binding != nullptr) {
+            if (!binding->logical_field_name.empty()) {
+                return binding->logical_field_name;
+            }
+            if (!binding->stored_field_name.empty()) {
+                return binding->stored_field_name;
+            }
+        }
+        return std::string(fallback.begin(), fallback.end());
+    }
+
     std::shared_ptr<lucene::index::IndexReader> lookup_reader(
             const std::wstring& field, const QueryExecutionContext& ctx,
             const std::string& binding_key) const {
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/wildcard_query/wildcard_query.h
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/wildcard_query/wildcard_query.h
index 8cd92418a00..8b71ab9c0d4 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/wildcard_query/wildcard_query.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/wildcard_query/wildcard_query.h
@@ -45,4 +45,4 @@ private:
     std::string _pattern;
 };
 
-} // namespace doris::segment_v2::inverted_index::query_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index::query_v2
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/wildcard_query/wildcard_weight.h
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/wildcard_query/wildcard_weight.h
index da2de84eae3..b906605db29 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/wildcard_query/wildcard_weight.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/wildcard_query/wildcard_weight.h
@@ -47,7 +47,10 @@ public:
 private:
     std::string wildcard_to_regex(const std::string& pattern) {
         std::string escaped = RE2::QuoteMeta(pattern);
+        // Replace wildcard characters with regex equivalents
+        // * -> .* (zero or more of any character)
         escaped = std::regex_replace(escaped, std::regex(R"(\\\*)"), ".*");
+        // ? -> . (exactly one of any character)
         escaped = std::regex_replace(escaped, std::regex(R"(\\\?)"), ".");
         return "^" + escaped + "$";
     }
@@ -59,4 +62,4 @@ private:
     bool _enable_scoring = false;
 };
 
-} // namespace doris::segment_v2::inverted_index::query_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index::query_v2
diff --git a/be/src/vec/functions/function_search.cpp 
b/be/src/vec/functions/function_search.cpp
index 19ec3a33612..95e0f868a48 100644
--- a/be/src/vec/functions/function_search.cpp
+++ b/be/src/vec/functions/function_search.cpp
@@ -38,7 +38,10 @@
 #include 
"olap/rowset/segment_v2/inverted_index/query_v2/bit_set_query/bit_set_query.h"
 #include 
"olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/boolean_query.h"
 #include "olap/rowset/segment_v2/inverted_index/query_v2/operator.h"
+#include 
"olap/rowset/segment_v2/inverted_index/query_v2/phrase_query/phrase_query.h"
+#include 
"olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h"
 #include 
"olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_query.h"
+#include 
"olap/rowset/segment_v2/inverted_index/query_v2/wildcard_query/wildcard_query.h"
 #include "olap/rowset/segment_v2/inverted_index/util/string_helper.h"
 #include "olap/rowset/segment_v2/inverted_index_iterator.h"
 #include "olap/rowset/segment_v2/inverted_index_reader.h"
@@ -225,6 +228,16 @@ Status 
FunctionSearch::evaluate_inverted_index_with_search_param(
     exec_ctx.readers = resolver.readers();
     exec_ctx.reader_bindings = resolver.reader_bindings();
     exec_ctx.field_reader_bindings = resolver.field_readers();
+    for (const auto& [binding_key, binding] : resolver.binding_cache()) {
+        if (binding_key.empty()) {
+            continue;
+        }
+        query_v2::FieldBindingContext binding_ctx;
+        binding_ctx.logical_field_name = binding.logical_field_name;
+        binding_ctx.stored_field_name = binding.stored_field_name;
+        binding_ctx.stored_field_wstr = binding.stored_field_wstr;
+        exec_ctx.binding_fields.emplace(binding_key, std::move(binding_ctx));
+    }
 
     class ResolverAdapter final : public query_v2::NullBitmapResolver {
     public:
@@ -253,7 +266,7 @@ Status 
FunctionSearch::evaluate_inverted_index_with_search_param(
         return Status::OK();
     }
 
-    auto scorer = weight->scorer(exec_ctx);
+    auto scorer = weight->scorer(exec_ctx, root_binding_key);
     if (!scorer) {
         LOG(WARNING) << "search: Failed to build scorer";
         bitmap_result = 
InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
@@ -478,7 +491,7 @@ Status FunctionSearch::build_leaf_query(const 
TSearchClause& clause,
     std::wstring value_wstr = StringHelper::to_wstring(value);
 
     auto make_term_query = [&](const std::wstring& term) -> query_v2::QueryPtr 
{
-        return std::make_shared<query_v2::TermQuery>(context, field_wstr, 
term, field_name);
+        return std::make_shared<query_v2::TermQuery>(context, field_wstr, 
term);
     };
 
     if (clause_type == "TERM") {
@@ -497,7 +510,9 @@ Status FunctionSearch::build_leaf_query(const 
TSearchClause& clause,
                             value, binding.index_properties);
             if (term_infos.empty()) {
                 LOG(WARNING) << "search: No terms found after tokenization for 
TERM query, field="
-                             << field_name << ", value='" << value << "'";
+                             << field_name << ", value='" << value
+                             << "', returning empty BitSetQuery";
+                *out = 
std::make_shared<query_v2::BitSetQuery>(roaring::Roaring());
                 return Status::OK();
             }
 
@@ -523,8 +538,47 @@ Status FunctionSearch::build_leaf_query(const 
TSearchClause& clause,
 
     if (category == FunctionSearch::ClauseTypeCategory::TOKENIZED) {
         if (clause_type == "PHRASE") {
-            VLOG_DEBUG << "search: PHRASE clause not implemented, fallback to 
TERM";
-            *out = make_term_query(value_wstr);
+            bool should_analyze = 
inverted_index::InvertedIndexAnalyzer::should_analyzer(
+                    binding.index_properties);
+            if (!should_analyze) {
+                VLOG_DEBUG << "search: PHRASE on non-tokenized field '" << 
field_name
+                           << "', falling back to TERM";
+                *out = make_term_query(value_wstr);
+                return Status::OK();
+            }
+
+            if (binding.index_properties.empty()) {
+                LOG(WARNING) << "search: analyzer required but index 
properties empty for PHRASE "
+                                "query on field '"
+                             << field_name << "'";
+                *out = make_term_query(value_wstr);
+                return Status::OK();
+            }
+
+            std::vector<TermInfo> term_infos =
+                    inverted_index::InvertedIndexAnalyzer::get_analyse_result(
+                            value, binding.index_properties);
+            if (term_infos.empty()) {
+                LOG(WARNING) << "search: No terms found after tokenization for 
PHRASE query, field="
+                             << field_name << ", value='" << value
+                             << "', returning empty BitSetQuery";
+                *out = 
std::make_shared<query_v2::BitSetQuery>(roaring::Roaring());
+                return Status::OK();
+            }
+
+            if (term_infos.size() == 1) {
+                std::wstring term_wstr = 
StringHelper::to_wstring(term_infos[0].get_single_term());
+                *out = make_term_query(term_wstr);
+                return Status::OK();
+            }
+
+            std::vector<std::wstring> terms;
+            for (const auto& term_info : term_infos) {
+                
terms.push_back(StringHelper::to_wstring(term_info.get_single_term()));
+            }
+            *out = std::make_shared<query_v2::PhraseQuery>(context, 
field_wstr, terms);
+            VLOG_DEBUG << "search: Built PhraseQuery for field=" << field_name 
<< " with "
+                       << terms.size() << " terms";
             return Status::OK();
         }
         if (clause_type == "MATCH") {
@@ -553,7 +607,8 @@ Status FunctionSearch::build_leaf_query(const 
TSearchClause& clause,
                             value, binding.index_properties);
             if (term_infos.empty()) {
                 LOG(WARNING) << "search: tokenization yielded no terms for 
clause '" << clause_type
-                             << "', field=" << field_name;
+                             << "', field=" << field_name << ", returning 
empty BitSetQuery";
+                *out = 
std::make_shared<query_v2::BitSetQuery>(roaring::Roaring());
                 return Status::OK();
             }
 
@@ -593,9 +648,28 @@ Status FunctionSearch::build_leaf_query(const 
TSearchClause& clause,
                        << value << "'";
             return Status::OK();
         }
+        if (clause_type == "PREFIX") {
+            *out = std::make_shared<query_v2::WildcardQuery>(context, 
field_wstr, value);
+            VLOG_DEBUG << "search: PREFIX clause processed, field=" << 
field_name << ", pattern='"
+                       << value << "'";
+            return Status::OK();
+        }
+
+        if (clause_type == "WILDCARD") {
+            *out = std::make_shared<query_v2::WildcardQuery>(context, 
field_wstr, value);
+            VLOG_DEBUG << "search: WILDCARD clause processed, field=" << 
field_name << ", pattern='"
+                       << value << "'";
+            return Status::OK();
+        }
+
+        if (clause_type == "REGEXP") {
+            *out = std::make_shared<query_v2::RegexpQuery>(context, 
field_wstr, value);
+            VLOG_DEBUG << "search: REGEXP clause processed, field=" << 
field_name << ", pattern='"
+                       << value << "'";
+            return Status::OK();
+        }
 
-        if (clause_type == "PREFIX" || clause_type == "WILDCARD" || 
clause_type == "REGEXP" ||
-            clause_type == "RANGE" || clause_type == "LIST") {
+        if (clause_type == "RANGE" || clause_type == "LIST") {
             VLOG_DEBUG << "search: clause type '" << clause_type
                        << "' not implemented, fallback to TERM";
         }
diff --git a/be/src/vec/functions/function_search.h 
b/be/src/vec/functions/function_search.h
index 910a8e25936..96e93220f44 100644
--- a/be/src/vec/functions/function_search.h
+++ b/be/src/vec/functions/function_search.h
@@ -93,6 +93,10 @@ public:
         return _field_readers;
     }
 
+    const std::unordered_map<std::string, FieldReaderBinding>& binding_cache() 
const {
+        return _cache;
+    }
+
     IndexIterator* get_iterator(const std::string& field_name) const {
         auto it = _iterators.find(field_name);
         return (it != _iterators.end()) ? it->second : nullptr;
diff --git a/regression-test/data/search/test_search_default_field_operator.out 
b/regression-test/data/search/test_search_default_field_operator.out
new file mode 100644
index 00000000000..c418209e4ea
--- /dev/null
+++ b/regression-test/data/search/test_search_default_field_operator.out
@@ -0,0 +1,79 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !wildcard_prefix --
+1      Chris
+2      Christopher
+
+-- !multi_term_and --
+1      foo bar
+3      bar foo
+
+-- !multi_term_or --
+1      foo bar
+3      bar foo
+4      foolish bark
+
+-- !wildcard_multi_and --
+1      foo bar
+3      bar foo
+4      foolish bark
+
+-- !explicit_or_override --
+1      foo bar
+3      bar foo
+4      foolish bark
+
+-- !exact_function --
+1      foo bar
+
+-- !traditional_syntax --
+1      Chris
+2      Christopher
+
+-- !single_term --
+1      foo bar
+3      bar foo
+
+-- !wildcard_middle --
+1      Chris
+2      Christopher
+
+-- !case_sensitive --
+
+-- !default_or --
+1      foo bar
+3      bar foo
+4      foolish bark
+
+-- !any_function --
+1      foo bar
+3      bar foo
+4      foolish bark
+
+-- !all_function --
+1      foo bar
+3      bar foo
+
+-- !complex_wildcard --
+3      Kevin
+4      kevin
+
+-- !explicit_and --
+1      foo bar
+3      bar foo
+
+-- !multiple_fields --
+1      Chris   foo bar
+2      Christopher     foobar
+4      kevin   foolish bark
+
+-- !not_operator --
+1      foo bar
+3      bar foo
+4      foolish bark
+
+-- !param_count_mix --
+1
+2
+3
+4
+
diff --git a/regression-test/data/search/test_search_dsl_syntax.out 
b/regression-test/data/search/test_search_dsl_syntax.out
index b1f1ba93d4e..e8f6c627121 100644
--- a/regression-test/data/search/test_search_dsl_syntax.out
+++ b/regression-test/data/search/test_search_dsl_syntax.out
@@ -4,10 +4,15 @@
 -- !sql --
 
 -- !sql --
+2      Advanced Deep Learning
 
 -- !sql --
+4      Data Science with R
+6      Database Design Patterns
 
 -- !sql --
+4      Data Science with R
+6      Database Design Patterns
 
 -- !sql --
 1      Machine Learning Introduction
@@ -175,7 +180,6 @@
 14     Test with null tags
 15     Test with null author
 16     Test with null status
-17     Message about success
 18     Error message details
 19     Warning message content
 20     Regular article without msg
diff --git a/regression-test/data/search/test_search_function.out 
b/regression-test/data/search/test_search_function.out
index f418a8c71eb..b24b8676831 100644
--- a/regression-test/data/search/test_search_function.out
+++ b/regression-test/data/search/test_search_function.out
@@ -22,6 +22,8 @@
 -- !sql --
 
 -- !sql --
+4      Data Science Methods
+9      Database Systems
 
 -- !sql --
 2      Deep Learning Tutorial
@@ -33,6 +35,7 @@
 -- !sql --
 
 -- !sql --
+1      Machine Learning Basics
 
 -- !sql --
 0
diff --git a/regression-test/data/search/test_search_null_semantics.out 
b/regression-test/data/search/test_search_null_semantics.out
index 27eddd437be..237bd14bc1b 100644
--- a/regression-test/data/search/test_search_null_semantics.out
+++ b/regression-test/data/search/test_search_null_semantics.out
@@ -11,6 +11,15 @@
 -- !test_case_2_external_not --
 4
 
+-- !test_case_2_phrase_not --
+1
+2
+3
+5
+7
+9
+10
+
 -- !test_case_3_or_with_null --
 1      Ronald Reagan   President of the United States
 3      \N      Biography of Ronald McDonald
@@ -148,4 +157,3 @@
 
 -- !ternary_7_all_null --
 0
-
diff --git 
a/regression-test/suites/search/test_search_default_field_operator.groovy 
b/regression-test/suites/search/test_search_default_field_operator.groovy
new file mode 100644
index 00000000000..fd5c7ce6198
--- /dev/null
+++ b/regression-test/suites/search/test_search_default_field_operator.groovy
@@ -0,0 +1,230 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_search_default_field_operator") {
+    def tableName = "search_enhanced_test"
+
+    sql "DROP TABLE IF EXISTS ${tableName}"
+
+    // Create table with inverted indexes
+    // firstname: with lower_case for case-insensitive wildcard search
+    // tags: with parser for tokenized search
+    // tags_exact: without parser specification (default behavior) for exact 
matching
+    sql """
+        CREATE TABLE ${tableName} (
+            id INT,
+            firstname VARCHAR(100),
+            tags VARCHAR(200),
+            tags_exact VARCHAR(200),
+            INDEX idx_firstname(firstname) USING INVERTED 
PROPERTIES("lower_case" = "true"),
+            INDEX idx_tags(tags) USING INVERTED PROPERTIES("parser" = 
"english"),
+            INDEX idx_tags_exact(tags_exact) USING INVERTED
+        ) ENGINE=OLAP
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+    """
+
+    // Insert test data matching the image requirements
+    sql """INSERT INTO ${tableName} VALUES
+        (1, 'Chris', 'foo bar', 'foo bar'),
+        (2, 'Christopher', 'foobar', 'foobar'),
+        (3, 'Kevin', 'bar foo', 'bar foo'),
+        (4, 'kevin', 'foolish bark', 'foolish bark')
+    """
+
+    // Wait for index building
+    Thread.sleep(3000)
+
+    // ============ Test 1: Wildcard Prefix with Default Field ============
+    // Requirement: firstname EQ Chris*
+    // SQL: search('Chris*', 'firstname')
+    // Expected: Chris (1), Christopher (2)
+    // Note: Without parser, inverted index is case-sensitive
+    qt_wildcard_prefix """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, firstname
+        FROM ${tableName}
+        WHERE search('Chris*', 'firstname')
+        ORDER BY id
+    """
+
+    // ============ Test 2: Multi-term AND with Default Operator ============
+    // Requirement: tags EQ foo bar (with AND semantics)
+    // SQL: search('foo bar', 'tags', 'and')
+    // Expected: 'foo bar' (1), 'bar foo' (3)
+    qt_multi_term_and """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, tags
+        FROM ${tableName}
+        WHERE search('foo bar', 'tags', 'and')
+        ORDER BY id
+    """
+
+    // ============ Test 3: Multi-term OR with Default Operator ============
+    // Requirement: tags EQ foo OR bark (with OR semantics)
+    // SQL: search('foo bark', 'tags', 'or')
+    // Expected: 'foo bar' (1), 'bar foo' (3), 'foolish bark' (4)
+    qt_multi_term_or """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, tags
+        FROM ${tableName}
+        WHERE search('foo bark', 'tags', 'or')
+        ORDER BY id
+    """
+
+    // ============ Test 4: Multi-wildcard AND ============
+    // Requirement: tags EQ foo* bar* (with AND semantics)
+    // SQL: search('foo* bar*', 'tags', 'and')
+    // Expands to: tags:foo* AND tags:bar*
+    // Expected: rows with tokens matching foo* AND tokens matching bar*
+    // - 'foo bar' (1): tokens=['foo','bar'] - matches foo* ✓ and bar* ✓
+    // - 'foobar' (2): tokens=['foobar'] - matches foo* ✓ but NOT bar* ✗ 
(excluded)
+    // - 'bar foo' (3): tokens=['bar','foo'] - matches foo* ✓ and bar* ✓
+    // - 'foolish bark' (4): tokens=['foolish','bark'] - matches foo* ✓ and 
bar* ✓
+    qt_wildcard_multi_and """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, tags
+        FROM ${tableName}
+        WHERE search('foo* bar*', 'tags', 'and')
+        ORDER BY id
+    """
+
+    // ============ Test 5: Explicit OR operator overrides default ============
+    // SQL: search('foo OR bark', 'tags', 'and')
+    // The explicit OR in DSL should override the default 'and' operator
+    // Expected: 'foo bar' (1), 'bar foo' (3), 'foolish bark' (4)
+    qt_explicit_or_override """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, tags
+        FROM ${tableName}
+        WHERE search('foo OR bark', 'tags', 'and')
+        ORDER BY id
+    """
+
+    // ============ Test 6: EXACT function with default field ============
+    // Requirement: EXACT(foo bar) on tags_exact field (no tokenization)
+    // SQL: search('EXACT(foo bar)', 'tags_exact')
+    // Expected: 'foo bar' (1) only - exact string match
+    qt_exact_function """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, tags_exact
+        FROM ${tableName}
+        WHERE search('EXACT(foo bar)', 'tags_exact')
+        ORDER BY id
+    """
+
+    // ============ Test 7: Traditional syntax still works ============
+    // Ensure backward compatibility - original syntax unchanged
+    qt_traditional_syntax """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, firstname
+        FROM ${tableName}
+        WHERE search('firstname:Chris*')
+        ORDER BY id
+    """
+
+    // ============ Test 8: Single term with default field ============
+    qt_single_term """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, tags
+        FROM ${tableName}
+        WHERE search('bar', 'tags')
+        ORDER BY id
+    """
+
+    // ============ Test 9: Wildcard in middle ============
+    qt_wildcard_middle """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, firstname
+        FROM ${tableName}
+        WHERE search('*ris*', 'firstname')
+        ORDER BY id
+    """
+
+    // ============ Test 10: Case sensitivity for wildcard ============
+    // Without parser, wildcard queries are case-sensitive (matches Lucene 
behavior)
+    // CHRIS* won't match Chris/Christopher
+    qt_case_sensitive """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, firstname
+        FROM ${tableName}
+        WHERE search('CHRIS*', 'firstname')
+        ORDER BY id
+    """
+
+    // ============ Test 11: Default operator is OR when not specified 
============
+    qt_default_or """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, tags
+        FROM ${tableName}
+        WHERE search('foo bark', 'tags')
+        ORDER BY id
+    """
+
+    // ============ Test 12: ANY function with default field ============
+    qt_any_function """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, tags
+        FROM ${tableName}
+        WHERE search('ANY(foo bark)', 'tags')
+        ORDER BY id
+    """
+
+    // ============ Test 13: ALL function with default field ============
+    qt_all_function """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, tags
+        FROM ${tableName}
+        WHERE search('ALL(foo bar)', 'tags')
+        ORDER BY id
+    """
+
+    // ============ Test 14: Complex wildcard pattern ============
+    qt_complex_wildcard """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, firstname
+        FROM ${tableName}
+        WHERE search('?evin', 'firstname')
+        ORDER BY id
+    """
+
+    // ============ Test 15: Default field with explicit AND ============
+    qt_explicit_and """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, tags
+        FROM ${tableName}
+        WHERE search('foo AND bar', 'tags')
+        ORDER BY id
+    """
+
+    // ============ Test 16: Multiple fields still work ============
+    qt_multiple_fields """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, firstname, 
tags
+        FROM ${tableName}
+        WHERE search('firstname:Chris* OR tags:bark')
+        ORDER BY id
+    """
+
+    // ============ Test 17: NOT operator with default field ============
+    qt_not_operator """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, tags
+        FROM ${tableName}
+        WHERE search('NOT foobar', 'tags')
+        ORDER BY id
+    """
+
+    // ============ Test 18: Combining different parameter counts ============
+    // Tests mixing 1-param, 2-param, and 3-param search() calls in same query
+    // - search('firstname:Chris*'): 1-param, traditional syntax → matches id 
1,2
+    // - search('foo*', 'tags', 'or'): 3-param with wildcard → matches id 1,3,4
+    // - OR combination → matches id 1,2,3,4 (all rows)
+    qt_param_count_mix """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id
+        FROM ${tableName}
+        WHERE search('firstname:Chris*') OR search('foo*', 'tags', 'or')
+        ORDER BY id
+    """
+
+    // Cleanup
+    sql "DROP TABLE IF EXISTS ${tableName}"
+}
diff --git a/regression-test/suites/search/test_search_null_semantics.groovy 
b/regression-test/suites/search/test_search_null_semantics.groovy
index 269a27056cf..c7d97c18bdc 100644
--- a/regression-test/suites/search/test_search_null_semantics.groovy
+++ b/regression-test/suites/search/test_search_null_semantics.groovy
@@ -81,6 +81,13 @@ suite("test_search_null_semantics") {
         WHERE not search('content:Round')
     """
 
+    // Test Case 2b: Phrase NOT queries must treat NULL rows as UNKNOWN
+    qt_test_case_2_phrase_not """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM 
${tableName}
+        WHERE NOT search('content:"Selma Blair"')
+        ORDER BY id
+    """
+
     // Test Case 3: NULL handling in OR queries
     // Verify that NULL OR TRUE = TRUE logic works
     qt_test_case_3_or_with_null """


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to