This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 129c07de7cf [Enhancement](inverted index) strictly checkout inverted
index properties (#29421) (#29748)
129c07de7cf is described below
commit 129c07de7cfa1110f09e283c9511b93492143f41
Author: Kang <[email protected]>
AuthorDate: Wed Jan 10 00:33:42 2024 +0800
[Enhancement](inverted index) strictly checkout inverted index properties
(#29421) (#29748)
---
.../apache/doris/analysis/InvertedIndexUtil.java | 99 +++++++--
.../inverted_index_p0/test_properties.groovy | 233 +++++++++++++++++++++
2 files changed, 311 insertions(+), 21 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
index 172af8c07dc..8ce732185d1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
@@ -20,8 +20,11 @@ package org.apache.doris.analysis;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.AnalysisException;
+import java.util.Arrays;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Map;
+import java.util.Set;
public class InvertedIndexUtil {
@@ -43,9 +46,11 @@ public class InvertedIndexUtil {
public static String INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE =
"char_replace";
- public static String INVERTED_INDEX_PARSER_IGNORE_ABOVE = "ignore_above";
+ public static String INVERTED_INDEX_SUPPORT_PHRASE_KEY = "support_phrase";
- public static String INVERTED_INDEX_PARSER_LOWERCASE = "lower_case";
+ public static String INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY =
"ignore_above";
+
+ public static String INVERTED_INDEX_PARSER_LOWERCASE_KEY = "lower_case";
public static String getInvertedIndexParser(Map<String, String>
properties) {
String parser = properties == null ? null :
properties.get(INVERTED_INDEX_PARSER_KEY);
@@ -100,26 +105,9 @@ public class InvertedIndexUtil {
if (properties != null) {
parser = properties.get(INVERTED_INDEX_PARSER_KEY);
if (parser == null && !properties.isEmpty()) {
- throw new AnalysisException("invalid index properties, please
check the properties");
- }
- String ignoreAbove =
properties.get(INVERTED_INDEX_PARSER_IGNORE_ABOVE);
- if (ignoreAbove != null) {
- try {
- int ignoreAboveValue = Integer.parseInt(ignoreAbove);
- if (ignoreAboveValue <= 0) {
- throw new AnalysisException("invalid index properties,
ignore_above must be positive");
- }
- } catch (NumberFormatException e) {
- throw new AnalysisException("invalid index properties,
ignore_above must be integer");
- }
+ throw new AnalysisException("Invalid index properties, parser
must not be none");
}
- String lowerCase = properties.get(INVERTED_INDEX_PARSER_LOWERCASE);
- if (lowerCase != null) {
- if (!"true".equals(lowerCase) && !"false".equals(lowerCase)) {
- throw new AnalysisException("invalid index properties,
lowercase must be true or false");
- }
- }
-
+ checkInvertedIndexProperties(properties);
}
// default is "none" if not set
@@ -141,4 +129,73 @@ public class InvertedIndexUtil {
+ " is not supported for column: " + indexColName + " of type
" + colType);
}
}
+
+ public static void checkInvertedIndexProperties(Map<String, String>
properties) throws AnalysisException {
+ Set<String> allowedKeys = new HashSet<>(Arrays.asList(
+ INVERTED_INDEX_PARSER_KEY,
+ INVERTED_INDEX_PARSER_MODE_KEY,
+ INVERTED_INDEX_SUPPORT_PHRASE_KEY,
+ INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE,
+ INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN,
+ INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT,
+ INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY,
+ INVERTED_INDEX_PARSER_LOWERCASE_KEY
+ ));
+
+ for (String key : properties.keySet()) {
+ if (!allowedKeys.contains(key)) {
+ throw new AnalysisException("Invalid inverted index property
key: " + key);
+ }
+ }
+
+ String parser = properties.get(INVERTED_INDEX_PARSER_KEY);
+ String parserMode = properties.get(INVERTED_INDEX_PARSER_MODE_KEY);
+ String supportPhrase =
properties.get(INVERTED_INDEX_SUPPORT_PHRASE_KEY);
+ String charFilterType =
properties.get(INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE);
+ String charFilterPattern =
properties.get(INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN);
+ String ignoreAbove =
properties.get(INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY);
+ String lowerCase = properties.get(INVERTED_INDEX_PARSER_LOWERCASE_KEY);
+
+ if (parser != null &&
!parser.matches("none|english|unicode|chinese|standard")) {
+ throw new AnalysisException("Invalid inverted index 'parser'
value: " + parser
+ + ", parser must be none, english, unicode or chinese");
+ }
+
+ if (!"chinese".equals(parser) && parserMode != null) {
+ throw new AnalysisException("parser_mode is only available for
chinese parser");
+ }
+
+ if ("chinese".equals(parser) && (parserMode != null &&
!parserMode.matches("fine_grained|coarse_grained"))) {
+ throw new AnalysisException("Invalid inverted index 'parser_mode'
value: " + parserMode
+ + ", parser_mode must be fine_grained or coarse_grained");
+ }
+
+ if (supportPhrase != null && !supportPhrase.matches("true|false")) {
+ throw new AnalysisException("Invalid inverted index
'support_phrase' value: " + supportPhrase
+ + ", support_phrase must be true or false");
+ }
+
+ if (INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE.equals(charFilterType) &&
(charFilterPattern == null
+ || charFilterPattern.isEmpty())) {
+ throw new AnalysisException("Missing 'char_filter_pattern' for
'char_replace' filter type");
+ }
+
+ if (ignoreAbove != null) {
+ try {
+ int ignoreAboveValue = Integer.parseInt(ignoreAbove);
+ if (ignoreAboveValue <= 0) {
+ throw new AnalysisException("Invalid inverted index
'ignore_above' value: " + ignoreAboveValue
+ + ", ignore_above must be positive");
+ }
+ } catch (NumberFormatException e) {
+ throw new AnalysisException(
+ "Invalid inverted index 'ignore_above' value,
ignore_above must be integer");
+ }
+ }
+
+ if (lowerCase != null && !lowerCase.matches("true|false")) {
+ throw new AnalysisException(
+ "Invalid inverted index 'lower_case' value: " + lowerCase
+ ", lower_case must be true or false");
+ }
+ }
}
diff --git a/regression-test/suites/inverted_index_p0/test_properties.groovy
b/regression-test/suites/inverted_index_p0/test_properties.groovy
new file mode 100644
index 00000000000..84b0aef8f3e
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_properties.groovy
@@ -0,0 +1,233 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_properties", "p0"){
+ // prepare test table
+ def indexTblName = "test_properties"
+
+ sql "DROP TABLE IF EXISTS ${indexTblName}"
+ def success = false;
+
+ def create_table_with_inverted_index_properties = { create_sql, error_msg->
+ try {
+ sql create_sql
+ success = true;
+ } catch(Throwable ex) {
+ logger.info("create_table_with_inverted_index_properties result: "
+ ex)
+ if (ex != null) {
+ def msg = ex.toString()
+ assertTrue(msg != null && msg.contains(error_msg), "Expect
exception msg contains '${error_msg}', but meet '${msg}'")
+ }
+ success = false;
+ } finally {
+ sql "DROP TABLE IF EXISTS ${indexTblName}"
+ }
+ }
+
+ def empty_parser = """
+ CREATE TABLE IF NOT EXISTS ${indexTblName}(
+ `id`int(11)NULL,
+ `c` text NULL,
+ INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="") COMMENT
''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ create_table_with_inverted_index_properties(empty_parser, "Invalid
inverted index 'parser' value")
+ assertEquals(success, false)
+
+ def wrong_parser = """
+ CREATE TABLE IF NOT EXISTS ${indexTblName}(
+ `id`int(11)NULL,
+ `c` text NULL,
+ INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="german")
COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ create_table_with_inverted_index_properties(wrong_parser, "Invalid
inverted index 'parser' value")
+ assertEquals(success, false)
+
+ def wrong_parser_mode = """
+ CREATE TABLE IF NOT EXISTS ${indexTblName}(
+ `id` int(11) NULL,
+ `c` text NULL,
+ INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english",
"parser_mode"="fine_grained") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ create_table_with_inverted_index_properties(wrong_parser_mode,
"parser_mode is only available for chinese parser")
+ assertEquals(success, false)
+
+ def valid_parser_and_mode = """
+ CREATE TABLE IF NOT EXISTS ${indexTblName}(
+ `id` int(11) NULL,
+ `c` text NULL,
+ INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="chinese",
"parser_mode"="fine_grained") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ create_table_with_inverted_index_properties(valid_parser_and_mode, "")
+ assertEquals(success, true)
+
+ def missing_char_filter_pattern = """
+ CREATE TABLE IF NOT EXISTS ${indexTblName}(
+ `id` int(11) NULL,
+ `c` text NULL,
+ INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english",
"char_filter_type"="char_replace") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ create_table_with_inverted_index_properties(missing_char_filter_pattern,
"Missing 'char_filter_pattern' for 'char_replace' filter type")
+ assertEquals(success, false)
+
+ def invalid_property_key = """
+ CREATE TABLE IF NOT EXISTS ${indexTblName}(
+ `id` int(11) NULL,
+ `c` text NULL,
+ INDEX c_idx(`c`) USING INVERTED PROPERTIES("invalid_key"="value")
COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ create_table_with_inverted_index_properties(invalid_property_key, "Invalid
index properties, parser must not be none")
+ assertEquals(success, false)
+
+ def invalid_property_key2 = """
+ CREATE TABLE IF NOT EXISTS ${indexTblName}(
+ `id` int(11) NULL,
+ `c` text NULL,
+ INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english",
"invalid_key"="value") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ create_table_with_inverted_index_properties(invalid_property_key2,
"Invalid inverted index property key:")
+ assertEquals(success, false)
+
+ def invalid_ignore_above = """
+ CREATE TABLE IF NOT EXISTS ${indexTblName}(
+ `id` int(11) NULL,
+ `c` text NULL,
+ INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english",
"ignore_above"="-1") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ create_table_with_inverted_index_properties(invalid_ignore_above, "Invalid
inverted index 'ignore_above' value")
+ assertEquals(success, false)
+
+ def non_numeric_ignore_above = """
+ CREATE TABLE IF NOT EXISTS ${indexTblName}(
+ `id` int(11) NULL,
+ `c` text NULL,
+ INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english",
"ignore_above"="non_numeric") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ create_table_with_inverted_index_properties(non_numeric_ignore_above,
"ignore_above must be integer")
+ assertEquals(success, false)
+
+ def invalid_lower_case = """
+ CREATE TABLE IF NOT EXISTS ${indexTblName}(
+ `id` int(11) NULL,
+ `c` text NULL,
+ INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english",
"lower_case"="invalid") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ create_table_with_inverted_index_properties(invalid_lower_case, "Invalid
inverted index 'lower_case' value")
+ assertEquals(success, false)
+
+ def invalid_support_phrase = """
+ CREATE TABLE IF NOT EXISTS ${indexTblName}(
+ `id` int(11) NULL,
+ `c` text NULL,
+ INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english",
"support_phrase"="invalid") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ create_table_with_inverted_index_properties(invalid_support_phrase,
"Invalid inverted index 'support_phrase' value")
+ assertEquals(success, false)
+
+ def invalid_support_phrase2 = """
+ CREATE TABLE IF NOT EXISTS ${indexTblName}(
+ `id` int(11) NULL,
+ `c` text NULL,
+ INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english",
"support_phase"="true") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ create_table_with_inverted_index_properties(invalid_support_phrase2,
"Invalid inverted index property key: support_phase")
+ assertEquals(success, false)
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]