This is an automated email from the ASF dual-hosted git repository. jianliangqi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new f313f1f8c1d [opt](inverted index) Optimize sequential phrase query logic (#41432) f313f1f8c1d is described below commit f313f1f8c1d9ea18626a39c8c472bceccab27b9a Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com> AuthorDate: Thu Oct 10 14:17:57 2024 +0800 [opt](inverted index) Optimize sequential phrase query logic (#41432) 1. Set enable_phrase_query_sequential_opt = true to optimize conjunction matching in sequential phrase queries. 2. For example, match_phrase "赵丽颖 中国 ~20+" ensures that "赵丽颖" appears consecutively, and "中国" also appears consecutively, while maintaining the semantics of sequential phrase queries. --- .../inverted_index/analyzer/analyzer.cpp | 17 +++++ .../segment_v2/inverted_index/analyzer/analyzer.h | 6 ++ .../inverted_index/query/phrase_query.cpp | 87 +++++++++++++++------- .../segment_v2/inverted_index/query/phrase_query.h | 14 +++- .../rowset/segment_v2/inverted_index/query/query.h | 10 +++ .../rowset/segment_v2/inverted_index_reader.cpp | 23 ++---- .../inverted_index/query/phrase_query_test.cpp | 83 +++++++++++++++++++++ .../java/org/apache/doris/qe/SessionVariable.java | 9 +++ gensrc/thrift/PaloInternalService.thrift | 2 + .../test_index_match_phrase_ordered.out | 6 ++ .../test_index_match_phrase_ordered.groovy | 64 +++++++++++++++- 11 files changed, 273 insertions(+), 48 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.cpp b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.cpp index 8ad1abb322f..94ba8fce0bc 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.cpp @@ -115,4 +115,21 @@ std::vector<std::string> InvertedIndexAnalyzer::get_analyse_result( return analyse_result; } +std::vector<std::string> InvertedIndexAnalyzer::get_analyse_result( + const std::string& search_str, const std::string& field_name, + InvertedIndexQueryType query_type, const std::map<std::string, std::string>& properties) { + InvertedIndexCtxSPtr inverted_index_ctx = std::make_shared<InvertedIndexCtx>( + get_inverted_index_parser_type_from_string( + get_parser_string_from_properties(properties)), + get_parser_mode_string_from_properties(properties), + get_parser_char_filter_map_from_properties(properties), + get_parser_lowercase_from_properties(properties), + get_parser_stopwords_from_properties(properties)); + auto analyzer = create_analyzer(inverted_index_ctx.get()); + inverted_index_ctx->analyzer = analyzer.get(); + auto reader = create_reader(inverted_index_ctx->char_filter_map); + reader->init(search_str.data(), search_str.size(), true); + return get_analyse_result(reader.get(), analyzer.get(), field_name, query_type); +} + } // namespace doris::segment_v2::inverted_index diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h index ad5d71a5364..6f369d504b2 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h @@ -32,6 +32,7 @@ class Analyzer; } // namespace lucene namespace doris::segment_v2::inverted_index { + class InvertedIndexAnalyzer { public: static std::unique_ptr<lucene::util::Reader> create_reader(CharFilterMap& char_filter_map); @@ -44,5 +45,10 @@ public: const std::string& field_name, InvertedIndexQueryType query_type, bool drop_duplicates = true); + + static std::vector<std::string> get_analyse_result( + const std::string& search_str, const std::string& field_name, + InvertedIndexQueryType query_type, + const std::map<std::string, std::string>& properties); }; } // namespace doris::segment_v2::inverted_index \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.cpp index 0ca2dce94e3..9a3ecc68f89 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.cpp @@ -17,8 +17,13 @@ #include "phrase_query.h" +#include <boost/algorithm/string.hpp> +#include <boost/algorithm/string/split.hpp> #include <charconv> +#include "CLucene/index/Terms.h" +#include "olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h" + namespace doris::segment_v2 { template <typename Derived> @@ -141,19 +146,21 @@ void PhraseQuery::add(const InvertedIndexQueryInfo& query_info) { _slop = query_info.slop; if (_slop == 0 || query_info.ordered) { + if (query_info.ordered) { + _additional_terms = query_info.additional_terms; + } // Logic for no slop query and ordered phrase query add(query_info.field_name, query_info.terms); } else { // Simple slop query follows the default phrase query algorithm - auto query = std::make_unique<CL_NS(search)::PhraseQuery>(); + _phrase_query = std::make_unique<CL_NS(search)::PhraseQuery>(); for (const auto& term : query_info.terms) { std::wstring ws_term = StringUtil::string_to_wstring(term); auto* t = _CLNEW lucene::index::Term(query_info.field_name.c_str(), ws_term.c_str()); - query->add(t); + _phrase_query->add(t); _CLDECDELETE(t); } - query->setSlop(_slop); - _matcher = std::move(query); + _phrase_query->setSlop(_slop); } } @@ -173,13 +180,16 @@ void PhraseQuery::add(const std::wstring& field_name, const std::vector<std::str } std::vector<TermIterator> iterators; - auto ensureTermPosition = [this, &iterators, &field_name](const std::string& term) { + auto ensureTermPosition = [this, &iterators, &field_name](const std::string& term, + bool is_save_iter = true) { std::wstring ws_term = StringUtil::string_to_wstring(term); Term* t = _CLNEW Term(field_name.c_str(), ws_term.c_str()); _terms.push_back(t); TermPositions* term_pos = _searcher->getReader()->termPositions(t); _term_docs.push_back(term_pos); - iterators.emplace_back(term_pos); + if (is_save_iter) { + iterators.emplace_back(term_pos); + } return term_pos; }; @@ -190,16 +200,29 @@ void PhraseQuery::add(const std::wstring& field_name, const std::vector<std::str auto* term_pos = ensureTermPosition(term); matcher._postings.emplace_back(term_pos, i); } - _matcher = matcher; + _matchers.emplace_back(matcher); } else { - OrderedSloppyPhraseMatcher matcher; - for (size_t i = 0; i < terms.size(); i++) { - const auto& term = terms[i]; - auto* term_pos = ensureTermPosition(term); - matcher._postings.emplace_back(term_pos, i); + { + OrderedSloppyPhraseMatcher single_matcher; + for (size_t i = 0; i < terms.size(); i++) { + const auto& term = terms[i]; + auto* term_pos = ensureTermPosition(term); + single_matcher._postings.emplace_back(term_pos, i); + } + single_matcher._allowed_slop = _slop; + _matchers.emplace_back(single_matcher); + } + { + for (auto& terms : _additional_terms) { + ExactPhraseMatcher single_matcher; + for (size_t i = 0; i < terms.size(); i++) { + const auto& term = terms[i]; + auto* term_pos = ensureTermPosition(term, false); + single_matcher._postings.emplace_back(term_pos, i); + } + _matchers.emplace_back(std::move(single_matcher)); + } } - matcher._allowed_slop = _slop; - _matcher = matcher; } std::sort(iterators.begin(), iterators.end(), [](const TermIterator& a, const TermIterator& b) { @@ -214,9 +237,9 @@ void PhraseQuery::add(const std::wstring& field_name, const std::vector<std::str } void PhraseQuery::search(roaring::Roaring& roaring) { - if (std::holds_alternative<PhraseQueryPtr>(_matcher)) { + if (_phrase_query) { _searcher->_search( - std::get<PhraseQueryPtr>(_matcher).get(), + _phrase_query.get(), [&roaring](const int32_t docid, const float_t /*score*/) { roaring.add(docid); }); } else { if (_lead1.isEmpty()) { @@ -288,17 +311,9 @@ int32_t PhraseQuery::do_next(int32_t doc) { } bool PhraseQuery::matches(int32_t doc) { - return std::visit( - [&doc](auto&& m) -> bool { - using T = std::decay_t<decltype(m)>; - if constexpr (std::is_same_v<T, PhraseQueryPtr>) { - _CLTHROWA(CL_ERR_IllegalArgument, - "PhraseQueryPtr does not support matches function"); - } else { - return m.matches(doc); - } - }, - _matcher); + return std::ranges::all_of(_matchers, [&doc](auto&& matcher) { + return std::visit([&doc](auto&& m) -> bool { return m.matches(doc); }, matcher); + }); } void PhraseQuery::parser_slop(std::string& query, InvertedIndexQueryInfo& query_info) { @@ -343,6 +358,24 @@ void PhraseQuery::parser_slop(std::string& query, InvertedIndexQueryInfo& query_ } } +void PhraseQuery::parser_info(std::string& query, const std::string& field_name, + InvertedIndexQueryType query_type, + const std::map<std::string, std::string>& properties, + InvertedIndexQueryInfo& query_info, bool sequential_opt) { + parser_slop(query, query_info); + query_info.terms = inverted_index::InvertedIndexAnalyzer::get_analyse_result( + query, field_name, query_type, properties); + if (sequential_opt && query_info.ordered) { + std::vector<std::string> t_querys; + boost::split(t_querys, query, boost::algorithm::is_any_of(" ")); + for (auto& t_query : t_querys) { + auto terms = inverted_index::InvertedIndexAnalyzer::get_analyse_result( + t_query, field_name, query_type, properties); + query_info.additional_terms.emplace_back(std::move(terms)); + } + } +} + template class PhraseMatcherBase<ExactPhraseMatcher>; template class PhraseMatcherBase<OrderedSloppyPhraseMatcher>; diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.h index 253ba782b78..35a479ff7f9 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.h @@ -24,6 +24,8 @@ #include <variant> +#include "olap/rowset/segment_v2/inverted_index_query_type.h" + CL_NS_USE(index) CL_NS_USE(search) @@ -76,11 +78,11 @@ private: int32_t _match_width = -1; }; -using PhraseQueryPtr = std::unique_ptr<CL_NS(search)::PhraseQuery>; // ExactPhraseMatcher: x match_phrase 'aaa bbb' // PhraseQueryPtr: x match_phrase 'aaa bbb ~2', support slop // OrderedSloppyPhraseMatcher: x match_phrase 'aaa bbb ~2+', ensuring that the words appear in the specified order. -using Matcher = std::variant<ExactPhraseMatcher, OrderedSloppyPhraseMatcher, PhraseQueryPtr>; +using PhraseQueryPtr = std::unique_ptr<CL_NS(search)::PhraseQuery>; +using Matcher = std::variant<ExactPhraseMatcher, OrderedSloppyPhraseMatcher>; class PhraseQuery : public Query { public: @@ -103,6 +105,10 @@ private: public: static void parser_slop(std::string& query, InvertedIndexQueryInfo& query_info); + static void parser_info(std::string& query, const std::string& field_name, + InvertedIndexQueryType query_type, + const std::map<std::string, std::string>& properties, + InvertedIndexQueryInfo& query_info, bool sequential_opt); private: std::shared_ptr<lucene::search::IndexSearcher> _searcher; @@ -117,7 +123,9 @@ private: std::vector<TermDocs*> _term_docs; int32_t _slop = 0; - Matcher _matcher; + std::vector<std::vector<std::string>> _additional_terms; + PhraseQueryPtr _phrase_query = nullptr; + std::vector<Matcher> _matchers; }; } // namespace doris::segment_v2 \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/query.h b/be/src/olap/rowset/segment_v2/inverted_index/query/query.h index cef7fd51f72..c295765ec63 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/query.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/query.h @@ -38,8 +38,18 @@ namespace doris::segment_v2 { struct InvertedIndexQueryInfo { std::wstring field_name; std::vector<std::string> terms; + std::vector<std::vector<std::string>> additional_terms; int32_t slop = 0; bool ordered = false; + + std::string to_string() { + std::string s; + s += std::to_string(terms.size()) + ", "; + s += std::to_string(additional_terms.size()) + ", "; + s += std::to_string(slop) + ", "; + s += std::to_string(ordered); + return s; + } }; class Query { diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index 7b8504322d2..b7cfe7dfaff 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -266,24 +266,13 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run query_info.terms.emplace_back(search_str); } else { if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY) { - PhraseQuery::parser_slop(search_str, query_info); + PhraseQuery::parser_info( + search_str, column_name, query_type, _index_meta.properties(), query_info, + runtime_state->query_options().enable_phrase_query_sequential_opt); + } else { + query_info.terms = inverted_index::InvertedIndexAnalyzer::get_analyse_result( + search_str, column_name, query_type, _index_meta.properties()); } - - InvertedIndexCtxSPtr inverted_index_ctx = std::make_shared<InvertedIndexCtx>( - get_inverted_index_parser_type_from_string( - get_parser_string_from_properties(_index_meta.properties())), - get_parser_mode_string_from_properties(_index_meta.properties()), - get_parser_char_filter_map_from_properties(_index_meta.properties()), - get_parser_lowercase_from_properties(_index_meta.properties()), - get_parser_stopwords_from_properties(_index_meta.properties())); - auto analyzer = inverted_index::InvertedIndexAnalyzer::create_analyzer( - inverted_index_ctx.get()); - inverted_index_ctx->analyzer = analyzer.get(); - auto reader = inverted_index::InvertedIndexAnalyzer::create_reader( - inverted_index_ctx->char_filter_map); - reader->init(search_str.data(), search_str.size(), true); - query_info.terms = inverted_index::InvertedIndexAnalyzer::get_analyse_result( - reader.get(), analyzer.get(), column_name, query_type); } if (query_info.terms.empty()) { auto msg = fmt::format( diff --git a/be/test/olap/rowset/segment_v2/inverted_index/query/phrase_query_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/query/phrase_query_test.cpp new file mode 100644 index 00000000000..f3fb9763c9b --- /dev/null +++ b/be/test/olap/rowset/segment_v2/inverted_index/query/phrase_query_test.cpp @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/inverted_index/query/phrase_query.h" + +#include <gtest/gtest.h> + +#include "io/fs/local_file_system.h" + +namespace doris::segment_v2 { + +class PhraseQueryTest : public testing::Test { +public: + const std::string kTestDir = "./ut_dir/phrase_query_test"; + + void SetUp() override { + auto st = io::global_local_filesystem()->delete_directory(kTestDir); + ASSERT_TRUE(st.ok()) << st; + st = io::global_local_filesystem()->create_directory(kTestDir); + ASSERT_TRUE(st.ok()) << st; + } + void TearDown() override { + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok()); + } + + PhraseQueryTest() = default; + ~PhraseQueryTest() override = default; +}; + +TEST_F(PhraseQueryTest, test_parser_info) { + std::map<std::string, std::string> properties; + properties.insert({"parser", "english"}); + properties.insert({"support_phrase", "true"}); + properties.insert({"lower_case", "true"}); + + auto parser_info = [&properties](std::string& search_str, InvertedIndexQueryInfo& query_info, + bool sequential_opt) { + PhraseQuery::parser_info(search_str, "name", InvertedIndexQueryType::MATCH_REGEXP_QUERY, + properties, query_info, sequential_opt); + }; + + auto parser = [&parser_info](std::string search_str, std::string res1, size_t res2, + int32_t res3, bool res4, size_t res5) { + InvertedIndexQueryInfo query_info; + parser_info(search_str, query_info, true); + EXPECT_EQ(search_str, res1); + EXPECT_EQ(query_info.terms.size(), res2); + EXPECT_EQ(query_info.slop, res3); + EXPECT_EQ(query_info.ordered, res4); + EXPECT_EQ(query_info.additional_terms.size(), res5); + std::cout << "--- 1 ---: " << query_info.to_string() << std::endl; + }; + + // "english/history off.gif ~20+" sequential_opt = true + parser("", "", 0, 0, false, 0); + parser("english", "english", 1, 0, false, 0); + parser("english/history", "english/history", 2, 0, false, 0); + parser("english/history off", "english/history off", 3, 0, false, 0); + parser("english/history off.gif", "english/history off.gif", 4, 0, false, 0); + parser("english/history off.gif ", "english/history off.gif ", 4, 0, false, 0); + parser("english/history off.gif ~", "english/history off.gif ~", 4, 0, false, 0); + parser("english/history off.gif ~2", "english/history off.gif", 4, 2, false, 0); + parser("english/history off.gif ~20", "english/history off.gif", 4, 20, false, 0); + parser("english/history off.gif ~20+", "english/history off.gif", 4, 20, true, 2); + parser("english/history off.gif ~20+ ", "english/history off.gif ~20+ ", 5, 0, false, 0); + parser("english/history off.gif ~20+x", "english/history off.gif ~20+x", 6, 0, false, 0); +} + +} // namespace doris::segment_v2 \ No newline at end of file diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 638ea712ce9..60d1b914c95 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -663,6 +663,8 @@ public class SessionVariable implements Serializable, Writable { public static final String ADAPTIVE_PIPELINE_TASK_SERIAL_READ_ON_LIMIT = "adaptive_pipeline_task_serial_read_on_limit"; + public static final String ENABLE_PHRASE_QUERY_SEQUENYIAL_OPT = "enable_phrase_query_sequential_opt"; + /** * If set false, user couldn't submit analyze SQL and FE won't allocate any related resources. */ @@ -2169,6 +2171,12 @@ public class SessionVariable implements Serializable, Writable { }) public int adaptivePipelineTaskSerialReadOnLimit = 10000; + @VariableMgr.VarAttr(name = ENABLE_PHRASE_QUERY_SEQUENYIAL_OPT, needForward = true, description = { + "开启顺序短语查询对连词的优化", + "enable optimization for conjunctions in sequential phrase queries" + }) + public boolean enablePhraseQuerySequentialOpt = true; + public void setEnableEsParallelScroll(boolean enableESParallelScroll) { this.enableESParallelScroll = enableESParallelScroll; } @@ -3770,6 +3778,7 @@ public class SessionVariable implements Serializable, Writable { tResult.setEnableAdaptivePipelineTaskSerialReadOnLimit(enableAdaptivePipelineTaskSerialReadOnLimit); tResult.setAdaptivePipelineTaskSerialReadOnLimit(adaptivePipelineTaskSerialReadOnLimit); tResult.setInListValueCountThreshold(inListValueCountThreshold); + tResult.setEnablePhraseQuerySequentialOpt(enablePhraseQuerySequentialOpt); return tResult; } diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 332b6c6e425..b560059819f 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -349,6 +349,8 @@ struct TQueryOptions { 135: optional bool enable_parallel_outfile = false; + 136: optional bool enable_phrase_query_sequential_opt = true; + // For cloud, to control if the content would be written into file cache // In write path, to control if the content would be written into file cache. // In read path, read from file cache or remote storage when execute query. diff --git a/regression-test/data/inverted_index_p0/test_index_match_phrase_ordered.out b/regression-test/data/inverted_index_p0/test_index_match_phrase_ordered.out index d1e04ececd5..8636b6af5ac 100644 --- a/regression-test/data/inverted_index_p0/test_index_match_phrase_ordered.out +++ b/regression-test/data/inverted_index_p0/test_index_match_phrase_ordered.out @@ -65,3 +65,9 @@ -- !sql -- 7 +-- !sql -- +25 + +-- !sql -- +87 + diff --git a/regression-test/suites/inverted_index_p0/test_index_match_phrase_ordered.groovy b/regression-test/suites/inverted_index_p0/test_index_match_phrase_ordered.groovy index a65811d4f65..0f563835e86 100644 --- a/regression-test/suites/inverted_index_p0/test_index_match_phrase_ordered.groovy +++ b/regression-test/suites/inverted_index_p0/test_index_match_phrase_ordered.groovy @@ -17,9 +17,11 @@ suite("test_index_match_phrase_ordered", "nonConcurrent"){ - def indexTbName1 = "test_index_match_phrase_ordered" + def indexTbName1 = "test_index_match_phrase_ordered_1" + def indexTbName2 = "test_index_match_phrase_ordered_2" sql "DROP TABLE IF EXISTS ${indexTbName1}" + sql "DROP TABLE IF EXISTS ${indexTbName2}" sql """ CREATE TABLE ${indexTbName1} ( @@ -35,6 +37,61 @@ suite("test_index_match_phrase_ordered", "nonConcurrent"){ ); """ + sql """ + CREATE TABLE ${indexTbName2} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` varchar(20) NULL COMMENT "", + `request` text NULL COMMENT "", + `status` int(11) NULL COMMENT "", + `size` int(11) NULL COMMENT "", + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false, + expected_succ_rows = -1, load_to_single_tablet = 'true' -> + + // load the json data + streamLoad { + table "${table_name}" + + // set http request header params + set 'label', label + "_" + UUID.randomUUID().toString() + set 'read_json_by_line', read_flag + set 'format', format_flag + file file_name // import json file + time 10000 // limit inflight 10s + if (expected_succ_rows >= 0) { + set 'max_filter_ratio', '1' + } + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (ignore_failure && expected_succ_rows < 0) { return } + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + if (expected_succ_rows >= 0) { + assertEquals(json.NumberLoadedRows, expected_succ_rows) + } else { + assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + sql """ INSERT INTO ${indexTbName1} VALUES (1, "the quick brown fox jumped over the lazy dog"); """ sql """ INSERT INTO ${indexTbName1} VALUES (2, "the quick brown fox jumped over the lazy dog over"); """ sql """ INSERT INTO ${indexTbName1} VALUES (3, "the quick brown fox jumped over the lazy dog jumped"); """ @@ -48,6 +105,8 @@ suite("test_index_match_phrase_ordered", "nonConcurrent"){ sql """ INSERT INTO ${indexTbName1} VALUES (11, "quick brown fox jumped over the lazy dog quick"); """ try { + load_httplogs_data.call(indexTbName2, 'test_index_match_phrase_ordered_2', 'true', 'json', 'documents-1000.json') + sql "sync" sql """ set enable_common_expr_pushdown = true; """ GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute") @@ -83,6 +142,9 @@ suite("test_index_match_phrase_ordered", "nonConcurrent"){ qt_sql """ select count() from ${indexTbName1} where b match_phrase 'the quick ~6'; """ qt_sql """ select count() from ${indexTbName1} where b match_phrase 'the quick ~6+'; """ + + qt_sql """ select count() from ${indexTbName2} where request match_phrase 'english/history off.gif ~20+'; """ + qt_sql """ select count() from ${indexTbName2} where request match_phrase 'english/images off.gif ~20+'; """ } finally { GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute") } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org