This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b7f1198a1ff [fix](chinese) fix the issue where the be crashes due to
the missing chinese dict (#30712)
b7f1198a1ff is described below
commit b7f1198a1ff116dd802086d669cb2caef1f75a20
Author: zzzxl <[email protected]>
AuthorDate: Mon Feb 5 14:07:34 2024 +0800
[fix](chinese) fix the issue where the be crashes due to the missing
chinese dict (#30712)
---
be/src/clucene | 2 +-
be/src/common/status.h | 1 +
.../rowset/segment_v2/inverted_index_writer.cpp | 37 ++++++++++++----------
be/src/vec/functions/function_tokenize.cpp | 12 +++++--
4 files changed, 33 insertions(+), 19 deletions(-)
diff --git a/be/src/clucene b/be/src/clucene
index f4829cc50f3..63ae98a8bc2 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit f4829cc50f32723366026c401fdb0111f15ee537
+Subproject commit 63ae98a8bc280dc4728dca744c3fe06e7a38caf1
diff --git a/be/src/common/status.h b/be/src/common/status.h
index f8b07c58380..e5dcfe4fe81 100644
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -273,6 +273,7 @@ namespace ErrorCode {
E(INVERTED_INDEX_BUILD_WAITTING, -6008, false); \
E(INVERTED_INDEX_NOT_IMPLEMENTED, -6009, false); \
E(INVERTED_INDEX_COMPACTION_ERROR, -6010, false); \
+ E(INVERTED_INDEX_ANALYZER_ERROR, -6011, false); \
E(KEY_NOT_FOUND, -7000, false); \
E(KEY_ALREADY_EXISTS, -7001, false); \
E(ENTRY_NOT_FOUND, -7002, false); \
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index 477d52d471f..07bea0c83f3 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -244,23 +244,28 @@ public:
}
Status create_analyzer(std::unique_ptr<lucene::analysis::Analyzer>&
analyzer) {
- switch (_parser_type) {
- case InvertedIndexParserType::PARSER_STANDARD:
- case InvertedIndexParserType::PARSER_UNICODE:
- analyzer =
std::make_unique<lucene::analysis::standard95::StandardAnalyzer>();
- break;
- case InvertedIndexParserType::PARSER_ENGLISH:
- analyzer =
std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
- break;
- case InvertedIndexParserType::PARSER_CHINESE:
- analyzer = create_chinese_analyzer();
- break;
- default:
- analyzer =
std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
- break;
+ try {
+ switch (_parser_type) {
+ case InvertedIndexParserType::PARSER_STANDARD:
+ case InvertedIndexParserType::PARSER_UNICODE:
+ analyzer =
std::make_unique<lucene::analysis::standard95::StandardAnalyzer>();
+ break;
+ case InvertedIndexParserType::PARSER_ENGLISH:
+ analyzer =
std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
+ break;
+ case InvertedIndexParserType::PARSER_CHINESE:
+ analyzer = create_chinese_analyzer();
+ break;
+ default:
+ analyzer =
std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
+ break;
+ }
+ setup_analyzer_lowercase(analyzer);
+ return Status::OK();
+ } catch (CLuceneError& e) {
+ return
Status::Error<doris::ErrorCode::INVERTED_INDEX_ANALYZER_ERROR>(
+ "inverted index create analyzer failed: {}", e.what());
}
- setup_analyzer_lowercase(analyzer);
- return Status::OK();
}
void setup_analyzer_lowercase(std::unique_ptr<lucene::analysis::Analyzer>&
analyzer) {
diff --git a/be/src/vec/functions/function_tokenize.cpp
b/be/src/vec/functions/function_tokenize.cpp
index 2ecd164a598..1d9edbd7dbd 100644
--- a/be/src/vec/functions/function_tokenize.cpp
+++ b/be/src/vec/functions/function_tokenize.cpp
@@ -142,8 +142,16 @@ Status FunctionTokenize::execute_impl(FunctionContext*
/*context*/, Block& block
inverted_index_ctx.parser_mode =
get_parser_mode_string_from_properties(properties);
inverted_index_ctx.char_filter_map =
get_parser_char_filter_map_from_properties(properties);
- auto analyzer =
-
doris::segment_v2::InvertedIndexReader::create_analyzer(&inverted_index_ctx);
+
+ std::unique_ptr<lucene::analysis::Analyzer> analyzer;
+ try {
+ analyzer =
doris::segment_v2::InvertedIndexReader::create_analyzer(
+ &inverted_index_ctx);
+ } catch (CLuceneError& e) {
+ return
Status::Error<doris::ErrorCode::INVERTED_INDEX_ANALYZER_ERROR>(
+ "inverted index create analyzer failed: {}", e.what());
+ }
+
inverted_index_ctx.analyzer = analyzer.get();
_do_tokenize(*col_left, inverted_index_ctx, *dest_nested_column,
dest_offsets,
dest_nested_null_map);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]