This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 66cb95ffed7 branch-2.1: [fix](inverted index) Content Check for
Tokenize Function Parser #44465 (#44555)
66cb95ffed7 is described below
commit 66cb95ffed7c503ab2f8485d08a4139669b53d21
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Nov 27 11:53:36 2024 +0800
branch-2.1: [fix](inverted index) Content Check for Tokenize Function
Parser #44465 (#44555)
Cherry-picked from #44465
Co-authored-by: zzzxl <[email protected]>
---
be/src/vec/functions/function_tokenize.cpp | 6 ++++++
regression-test/suites/inverted_index_p0/test_tokenize.groovy | 11 +++++++++++
2 files changed, 17 insertions(+)
diff --git a/be/src/vec/functions/function_tokenize.cpp
b/be/src/vec/functions/function_tokenize.cpp
index e7dc2debe62..b1ec177d444 100644
--- a/be/src/vec/functions/function_tokenize.cpp
+++ b/be/src/vec/functions/function_tokenize.cpp
@@ -140,6 +140,12 @@ Status FunctionTokenize::execute_impl(FunctionContext*
/*context*/, Block& block
}
inverted_index_ctx.parser_type =
get_inverted_index_parser_type_from_string(
get_parser_string_from_properties(properties));
+ if (inverted_index_ctx.parser_type ==
InvertedIndexParserType::PARSER_UNKNOWN) {
+ return
Status::Error<doris::ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
+ "unsupported parser type. currently, only 'english',
'chinese', and "
+ "'unicode' analyzers are supported.");
+ }
+
inverted_index_ctx.parser_mode =
get_parser_mode_string_from_properties(properties);
inverted_index_ctx.char_filter_map =
get_parser_char_filter_map_from_properties(properties);
diff --git a/regression-test/suites/inverted_index_p0/test_tokenize.groovy
b/regression-test/suites/inverted_index_p0/test_tokenize.groovy
index 8d7e2dac42e..4672a39cedb 100644
--- a/regression-test/suites/inverted_index_p0/test_tokenize.groovy
+++ b/regression-test/suites/inverted_index_p0/test_tokenize.groovy
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+import java.sql.SQLException
suite("test_tokenize"){
// prepare test table
@@ -98,4 +99,14 @@ suite("test_tokenize"){
qt_tokenize_sql """SELECT TOKENIZE('华夏智胜新税股票A', '"parser"="unicode"');"""
qt_tokenize_sql """SELECT TOKENIZE('华夏智胜新税股票A',
'"parser"="unicode","stopwords" = "none"');"""
+
+ try {
+ sql """ SELECT TOKENIZE('华夏智胜新税股票A', '"parser"="eng"'); """
+ } catch (SQLException e) {
+ if (e.message.contains("E-6000")) {
+ log.info("e message: {}", e.message)
+ } else {
+ throw e
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]