airborne12 commented on code in PR #20156:
URL: https://github.com/apache/doris/pull/20156#discussion_r1209605689


##########
be/src/olap/rowset/segment_v2/inverted_index_reader.cpp:
##########
@@ -222,101 +223,148 @@ Status FullTextIndexReader::query(OlapReaderStatistics* 
stats, const std::string
             return Status::Error<ErrorCode::INVERTED_INDEX_NO_TERMS>();
         }
 
+        auto index_search = [&](bool& null_bitmap_already_read,
+                                std::shared_ptr<roaring::Roaring>& 
term_match_bitmap,
+                                InvertedIndexQueryCache* cache,
+                                InvertedIndexQueryCache::CacheKey& cache_key,
+                                InvertedIndexQueryCacheHandle& cache_handle) {
+            // check index file existence
+            if (!indexExists(index_file_path)) {
+                LOG(WARNING) << "inverted index path: " << 
index_file_path.string()
+                            << " not exist.";
+                return 
Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>();
+            }
+
+            InvertedIndexCacheHandle inverted_index_cache_handle;
+            InvertedIndexSearcherCache::instance()->get_index_searcher(
+                    _fs, index_dir.c_str(), index_file_name, 
&inverted_index_cache_handle,
+                    stats);
+            auto index_searcher = 
inverted_index_cache_handle.get_index_searcher();
+
+            // try to reuse index_searcher's directory to read null_bitmap to 
cache
+            // to avoid open directory additionally for null_bitmap
+            if (!null_bitmap_already_read) {
+                InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
+                read_null_bitmap(&null_bitmap_cache_handle,
+                                index_searcher->getReader()->directory());
+                null_bitmap_already_read = true;
+            }
+
+            try {
+                SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer);
+                index_searcher->_search(
+                        query.get(),
+                        [&term_match_bitmap](const int32_t docid, const 
float_t /*score*/) {
+                            // docid equal to rowid in segment
+                            term_match_bitmap->add(docid);
+                        });
+            } catch (const CLuceneError& e) {
+                LOG(WARNING) << "CLuceneError occured: " << e.what();
+                return 
Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
+            }
+
+            {
+                // add to cache
+                term_match_bitmap->runOptimize();
+                cache->insert(cache_key, term_match_bitmap, &cache_handle);
+            }
+            return Status::OK();
+        };
+
         roaring::Roaring query_match_bitmap;
-        bool first = true;
         bool null_bitmap_already_read = false;
-        for (auto token_ws : analyse_result) {
-            std::shared_ptr<roaring::Roaring> term_match_bitmap = nullptr;
+        if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY) {
+            std::wstring str_tokens;
+            auto* phrase_query = new lucene::search::PhraseQuery();
+            int32_t slop = phrase_query->getSlop();
+            for (auto& token : analyse_result) {
+                auto* term = _CLNEW lucene::index::Term(field_ws.c_str(), 
token.c_str());
+                phrase_query->add(term);
+                _CLDECDELETE(term);
+                str_tokens += token;
+                str_tokens += std::to_wstring(slop);
+            }
 
-            // try to get term bitmap match result from cache to avoid query 
index on cache hit
             auto cache = InvertedIndexQueryCache::instance();
-            // use EQUAL_QUERY type here since cache is for each term/token
-            InvertedIndexQueryCache::CacheKey cache_key {
-                    index_file_path, column_name, 
InvertedIndexQueryType::EQUAL_QUERY, token_ws};
+            InvertedIndexQueryCache::CacheKey cache_key;
+            cache_key.index_path = index_file_path;
+            cache_key.column_name = column_name;
+            cache_key.query_type = InvertedIndexQueryType::MATCH_PHRASE_QUERY;
+            cache_key.value.swap(str_tokens);
             InvertedIndexQueryCacheHandle cache_handle;
+            std::shared_ptr<roaring::Roaring> term_match_bitmap = nullptr;
             if (cache->lookup(cache_key, &cache_handle)) {
                 stats->inverted_index_query_cache_hit++;
                 term_match_bitmap = cache_handle.get_bitmap();
             } else {
                 stats->inverted_index_query_cache_miss++;
 
-                // check index file existence
-                if (!indexExists(index_file_path)) {
-                    LOG(WARNING) << "inverted index path: " << 
index_file_path.string()
-                                 << " not exist.";
-                    return 
Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>();
-                }
-
                 term_match_bitmap = std::make_shared<roaring::Roaring>();
-                // unique_ptr with custom deleter
-                std::unique_ptr<lucene::index::Term, void 
(*)(lucene::index::Term*)> term {
-                        _CLNEW lucene::index::Term(field_ws.c_str(), 
token_ws.c_str()),
-                        [](lucene::index::Term* term) { _CLDECDELETE(term); }};
-                query.reset(new lucene::search::TermQuery(term.get()));
-
-                InvertedIndexCacheHandle inverted_index_cache_handle;
-                InvertedIndexSearcherCache::instance()->get_index_searcher(
-                        _fs, index_dir.c_str(), index_file_name, 
&inverted_index_cache_handle,
-                        stats);
-                auto index_searcher = 
inverted_index_cache_handle.get_index_searcher();
-
-                // try to reuse index_searcher's directory to read null_bitmap 
to cache
-                // to avoid open directory additionally for null_bitmap
-                if (!null_bitmap_already_read) {
-                    InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
-                    read_null_bitmap(&null_bitmap_cache_handle,
-                                     index_searcher->getReader()->directory());
-                    null_bitmap_already_read = true;
-                }
+                query.reset(phrase_query);
 
-                try {
-                    
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer);
-                    index_searcher->_search(
-                            query.get(),
-                            [&term_match_bitmap](const int32_t docid, const 
float_t /*score*/) {
-                                // docid equal to rowid in segment
-                                term_match_bitmap->add(docid);
-                            });
-                } catch (const CLuceneError& e) {
-                    LOG(WARNING) << "CLuceneError occured: " << e.what();
-                    return 
Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
+                Status res = index_search(null_bitmap_already_read, 
term_match_bitmap, cache,
+                                          cache_key, cache_handle);
+                if (!res.ok()) return res;
+            }
+            query_match_bitmap = *term_match_bitmap;
+        } else {
+            bool first = true;
+            for (auto token_ws : analyse_result) {
+                std::shared_ptr<roaring::Roaring> term_match_bitmap = nullptr;
+
+                // try to get term bitmap match result from cache to avoid 
query index on cache hit
+                auto cache = InvertedIndexQueryCache::instance();
+                // use EQUAL_QUERY type here since cache is for each term/token
+                InvertedIndexQueryCache::CacheKey cache_key {
+                        index_file_path, column_name, 
InvertedIndexQueryType::EQUAL_QUERY, token_ws};
+                InvertedIndexQueryCacheHandle cache_handle;
+                if (cache->lookup(cache_key, &cache_handle)) {
+                    stats->inverted_index_query_cache_hit++;
+                    term_match_bitmap = cache_handle.get_bitmap();
+                } else {
+                    stats->inverted_index_query_cache_miss++;
+
+                    term_match_bitmap = std::make_shared<roaring::Roaring>();
+                    // unique_ptr with custom deleter
+                    std::unique_ptr<lucene::index::Term, void 
(*)(lucene::index::Term*)> term {
+                            _CLNEW lucene::index::Term(field_ws.c_str(), 
token_ws.c_str()),
+                            [](lucene::index::Term* term) { 
_CLDECDELETE(term); }};
+                    query.reset(new lucene::search::TermQuery(term.get()));
+
+                    Status res = index_search(null_bitmap_already_read, 
term_match_bitmap, cache,
+                                              cache_key, cache_handle);
+                    if (!res.ok()) return res;
                 }
 
-                {
-                    // add to cache
-                    term_match_bitmap->runOptimize();
-                    cache->insert(cache_key, term_match_bitmap, &cache_handle);
+                // add to query_match_bitmap
+                if (first) {
+                    
SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer);
+                    query_match_bitmap = *term_match_bitmap;
+                    first = false;
+                    continue;
                 }
-            }
 
-            // add to query_match_bitmap
-            if (first) {
-                
SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer);
-                query_match_bitmap = *term_match_bitmap;
-                first = false;
-                continue;
-            }
-
-            switch (query_type) {
-            case InvertedIndexQueryType::MATCH_ANY_QUERY: {
-                SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_op_timer);
-                query_match_bitmap |= *term_match_bitmap;
-                break;
-            }
-            case InvertedIndexQueryType::EQUAL_QUERY:
-            case InvertedIndexQueryType::MATCH_ALL_QUERY: {
-                SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_op_timer);
-                query_match_bitmap &= *term_match_bitmap;
-                break;
-            }
-            case InvertedIndexQueryType::MATCH_PHRASE_QUERY: {
-                return 
Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
-                break;
-            }
-            default: {
-                LOG(ERROR) << "fulltext query do not support query type other 
than match.";
-                return 
Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
-            }
+                switch (query_type) {
+                case InvertedIndexQueryType::MATCH_ANY_QUERY: {
+                    
SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_op_timer);
+                    query_match_bitmap |= *term_match_bitmap;
+                    break;
+                }
+                case InvertedIndexQueryType::EQUAL_QUERY:
+                case InvertedIndexQueryType::MATCH_ALL_QUERY: {
+                    
SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_op_timer);
+                    query_match_bitmap &= *term_match_bitmap;
+                    break;
+                }
+                case InvertedIndexQueryType::MATCH_PHRASE_QUERY: {
+                    return 
Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();

Review Comment:
   need to fix this



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to