This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 676ce907f93 branch-3.0: [fix](inverted index) Content Check for
Tokenize Function Parser #44465 (#44554)
676ce907f93 is described below
commit 676ce907f935b96ff70a1f220420c6f65092fb5a
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Nov 27 11:52:50 2024 +0800
branch-3.0: [fix](inverted index) Content Check for Tokenize Function
Parser #44465 (#44554)
Cherry-picked from #44465
Co-authored-by: zzzxl <[email protected]>
---
be/src/vec/functions/function_tokenize.cpp | 6 ++++++
regression-test/suites/inverted_index_p0/test_tokenize.groovy | 11 +++++++++++
2 files changed, 17 insertions(+)
diff --git a/be/src/vec/functions/function_tokenize.cpp
b/be/src/vec/functions/function_tokenize.cpp
index be0eb5dddc9..5c19ff1283a 100644
--- a/be/src/vec/functions/function_tokenize.cpp
+++ b/be/src/vec/functions/function_tokenize.cpp
@@ -142,6 +142,12 @@ Status FunctionTokenize::execute_impl(FunctionContext*
/*context*/, Block& block
}
inverted_index_ctx.parser_type =
get_inverted_index_parser_type_from_string(
get_parser_string_from_properties(properties));
+ if (inverted_index_ctx.parser_type ==
InvertedIndexParserType::PARSER_UNKNOWN) {
+ return
Status::Error<doris::ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
+ "unsupported parser type. currently, only 'english',
'chinese', and "
+ "'unicode' analyzers are supported.");
+ }
+
inverted_index_ctx.parser_mode =
get_parser_mode_string_from_properties(properties);
inverted_index_ctx.char_filter_map =
get_parser_char_filter_map_from_properties(properties);
diff --git a/regression-test/suites/inverted_index_p0/test_tokenize.groovy
b/regression-test/suites/inverted_index_p0/test_tokenize.groovy
index 8d7e2dac42e..4672a39cedb 100644
--- a/regression-test/suites/inverted_index_p0/test_tokenize.groovy
+++ b/regression-test/suites/inverted_index_p0/test_tokenize.groovy
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+import java.sql.SQLException
suite("test_tokenize"){
// prepare test table
@@ -98,4 +99,14 @@ suite("test_tokenize"){
qt_tokenize_sql """SELECT TOKENIZE('华夏智胜新税股票A', '"parser"="unicode"');"""
qt_tokenize_sql """SELECT TOKENIZE('华夏智胜新税股票A',
'"parser"="unicode","stopwords" = "none"');"""
+
+ try {
+ sql """ SELECT TOKENIZE('华夏智胜新税股票A', '"parser"="eng"'); """
+ } catch (SQLException e) {
+ if (e.message.contains("E-6000")) {
+ log.info("e message: {}", e.message)
+ } else {
+ throw e
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]