This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 129c07de7cf [Enhancement](inverted index) strictly checkout inverted 
index properties (#29421) (#29748)
129c07de7cf is described below

commit 129c07de7cfa1110f09e283c9511b93492143f41
Author: Kang <[email protected]>
AuthorDate: Wed Jan 10 00:33:42 2024 +0800

    [Enhancement](inverted index) strictly checkout inverted index properties 
(#29421) (#29748)
---
 .../apache/doris/analysis/InvertedIndexUtil.java   |  99 +++++++--
 .../inverted_index_p0/test_properties.groovy       | 233 +++++++++++++++++++++
 2 files changed, 311 insertions(+), 21 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
index 172af8c07dc..8ce732185d1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java
@@ -20,8 +20,11 @@ package org.apache.doris.analysis;
 import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.common.AnalysisException;
 
+import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
 
 public class InvertedIndexUtil {
 
@@ -43,9 +46,11 @@ public class InvertedIndexUtil {
 
     public static String INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE = 
"char_replace";
 
-    public static String INVERTED_INDEX_PARSER_IGNORE_ABOVE = "ignore_above";
+    public static String INVERTED_INDEX_SUPPORT_PHRASE_KEY = "support_phrase";
 
-    public static String INVERTED_INDEX_PARSER_LOWERCASE = "lower_case";
+    public static String INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY = 
"ignore_above";
+
+    public static String INVERTED_INDEX_PARSER_LOWERCASE_KEY = "lower_case";
 
     public static String getInvertedIndexParser(Map<String, String> 
properties) {
         String parser = properties == null ? null : 
properties.get(INVERTED_INDEX_PARSER_KEY);
@@ -100,26 +105,9 @@ public class InvertedIndexUtil {
         if (properties != null) {
             parser = properties.get(INVERTED_INDEX_PARSER_KEY);
             if (parser == null && !properties.isEmpty()) {
-                throw new AnalysisException("invalid index properties, please 
check the properties");
-            }
-            String ignoreAbove = 
properties.get(INVERTED_INDEX_PARSER_IGNORE_ABOVE);
-            if (ignoreAbove != null) {
-                try {
-                    int ignoreAboveValue = Integer.parseInt(ignoreAbove);
-                    if (ignoreAboveValue <= 0) {
-                        throw new AnalysisException("invalid index properties, 
ignore_above must be positive");
-                    }
-                } catch (NumberFormatException e) {
-                    throw new AnalysisException("invalid index properties, 
ignore_above must be integer");
-                }
+                throw new AnalysisException("Invalid index properties, parser 
must not be none");
             }
-            String lowerCase = properties.get(INVERTED_INDEX_PARSER_LOWERCASE);
-            if (lowerCase != null) {
-                if (!"true".equals(lowerCase) && !"false".equals(lowerCase)) {
-                    throw new AnalysisException("invalid index properties, 
lowercase must be true or false");
-                }
-            }
-
+            checkInvertedIndexProperties(properties);
         }
 
         // default is "none" if not set
@@ -141,4 +129,73 @@ public class InvertedIndexUtil {
                 + " is not supported for column: " + indexColName + " of type 
" + colType);
         }
     }
+
+    public static void checkInvertedIndexProperties(Map<String, String> 
properties) throws AnalysisException {
+        Set<String> allowedKeys = new HashSet<>(Arrays.asList(
+                INVERTED_INDEX_PARSER_KEY,
+                INVERTED_INDEX_PARSER_MODE_KEY,
+                INVERTED_INDEX_SUPPORT_PHRASE_KEY,
+                INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE,
+                INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN,
+                INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT,
+                INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY,
+                INVERTED_INDEX_PARSER_LOWERCASE_KEY
+        ));
+
+        for (String key : properties.keySet()) {
+            if (!allowedKeys.contains(key)) {
+                throw new AnalysisException("Invalid inverted index property 
key: " + key);
+            }
+        }
+
+        String parser = properties.get(INVERTED_INDEX_PARSER_KEY);
+        String parserMode = properties.get(INVERTED_INDEX_PARSER_MODE_KEY);
+        String supportPhrase = 
properties.get(INVERTED_INDEX_SUPPORT_PHRASE_KEY);
+        String charFilterType = 
properties.get(INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE);
+        String charFilterPattern = 
properties.get(INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN);
+        String ignoreAbove = 
properties.get(INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY);
+        String lowerCase = properties.get(INVERTED_INDEX_PARSER_LOWERCASE_KEY);
+
+        if (parser != null && 
!parser.matches("none|english|unicode|chinese|standard")) {
+            throw new AnalysisException("Invalid inverted index 'parser' 
value: " + parser
+                    + ", parser must be none, english, unicode or chinese");
+        }
+
+        if (!"chinese".equals(parser) && parserMode != null) {
+            throw new AnalysisException("parser_mode is only available for 
chinese parser");
+        }
+
+        if ("chinese".equals(parser) && (parserMode != null && 
!parserMode.matches("fine_grained|coarse_grained"))) {
+            throw new AnalysisException("Invalid inverted index 'parser_mode' 
value: " + parserMode
+                    + ", parser_mode must be fine_grained or coarse_grained");
+        }
+
+        if (supportPhrase != null && !supportPhrase.matches("true|false")) {
+            throw new AnalysisException("Invalid inverted index 
'support_phrase' value: " + supportPhrase
+                    + ", support_phrase must be true or false");
+        }
+
+        if (INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE.equals(charFilterType) && 
(charFilterPattern == null
+                || charFilterPattern.isEmpty())) {
+            throw new AnalysisException("Missing 'char_filter_pattern' for 
'char_replace' filter type");
+        }
+
+        if (ignoreAbove != null) {
+            try {
+                int ignoreAboveValue = Integer.parseInt(ignoreAbove);
+                if (ignoreAboveValue <= 0) {
+                    throw new AnalysisException("Invalid inverted index 
'ignore_above' value: " + ignoreAboveValue
+                            + ", ignore_above must be positive");
+                }
+            } catch (NumberFormatException e) {
+                throw new AnalysisException(
+                        "Invalid inverted index 'ignore_above' value, 
ignore_above must be integer");
+            }
+        }
+
+        if (lowerCase != null && !lowerCase.matches("true|false")) {
+            throw new AnalysisException(
+                    "Invalid inverted index 'lower_case' value: " + lowerCase 
+ ", lower_case must be true or false");
+        }
+    }
 }
diff --git a/regression-test/suites/inverted_index_p0/test_properties.groovy 
b/regression-test/suites/inverted_index_p0/test_properties.groovy
new file mode 100644
index 00000000000..84b0aef8f3e
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_properties.groovy
@@ -0,0 +1,233 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_properties", "p0"){
+    // prepare test table
+    def indexTblName = "test_properties"
+
+    sql "DROP TABLE IF EXISTS ${indexTblName}"
+    def success = false;
+
+    def create_table_with_inverted_index_properties = { create_sql, error_msg->
+        try {
+            sql create_sql
+            success = true;
+        } catch(Throwable ex) {
+            logger.info("create_table_with_inverted_index_properties result: " 
+ ex)
+            if (ex != null) {
+                def msg = ex.toString()
+                assertTrue(msg != null && msg.contains(error_msg), "Expect 
exception msg contains '${error_msg}', but meet '${msg}'")
+            }
+            success = false;
+        } finally {
+            sql "DROP TABLE IF EXISTS ${indexTblName}"
+        }
+    }
+
+    def empty_parser = """
+        CREATE TABLE IF NOT EXISTS ${indexTblName}(
+               `id`int(11)NULL,
+               `c` text NULL,
+               INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="") COMMENT 
''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES(
+               "replication_allocation" = "tag.location.default: 1"
+        );
+        """
+    create_table_with_inverted_index_properties(empty_parser, "Invalid 
inverted index 'parser' value")
+    assertEquals(success, false)
+
+    def wrong_parser = """
+        CREATE TABLE IF NOT EXISTS ${indexTblName}(
+               `id`int(11)NULL,
+               `c` text NULL,
+               INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="german") 
COMMENT ''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES(
+               "replication_allocation" = "tag.location.default: 1"
+        );
+        """
+    create_table_with_inverted_index_properties(wrong_parser, "Invalid 
inverted index 'parser' value")
+    assertEquals(success, false)
+
+    def wrong_parser_mode = """
+        CREATE TABLE IF NOT EXISTS ${indexTblName}(
+            `id` int(11) NULL,
+            `c` text NULL,
+            INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", 
"parser_mode"="fine_grained") COMMENT ''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES(
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+    create_table_with_inverted_index_properties(wrong_parser_mode, 
"parser_mode is only available for chinese parser")
+    assertEquals(success, false)
+
+    def valid_parser_and_mode = """
+        CREATE TABLE IF NOT EXISTS ${indexTblName}(
+            `id` int(11) NULL,
+            `c` text NULL,
+            INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="chinese", 
"parser_mode"="fine_grained") COMMENT ''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES(
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+    create_table_with_inverted_index_properties(valid_parser_and_mode, "")
+    assertEquals(success, true)
+
+    def missing_char_filter_pattern = """
+        CREATE TABLE IF NOT EXISTS ${indexTblName}(
+            `id` int(11) NULL,
+            `c` text NULL,
+            INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", 
"char_filter_type"="char_replace") COMMENT ''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES(
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+    create_table_with_inverted_index_properties(missing_char_filter_pattern, 
"Missing 'char_filter_pattern' for 'char_replace' filter type")
+    assertEquals(success, false)
+
+    def invalid_property_key = """
+        CREATE TABLE IF NOT EXISTS ${indexTblName}(
+            `id` int(11) NULL,
+            `c` text NULL,
+            INDEX c_idx(`c`) USING INVERTED PROPERTIES("invalid_key"="value") 
COMMENT ''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES(
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+    create_table_with_inverted_index_properties(invalid_property_key, "Invalid 
index properties, parser must not be none")
+    assertEquals(success, false)
+
+    def invalid_property_key2 = """
+        CREATE TABLE IF NOT EXISTS ${indexTblName}(
+            `id` int(11) NULL,
+            `c` text NULL,
+            INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", 
"invalid_key"="value") COMMENT ''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES(
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+    create_table_with_inverted_index_properties(invalid_property_key2, 
"Invalid inverted index property key:")
+    assertEquals(success, false)
+
+    def invalid_ignore_above = """
+        CREATE TABLE IF NOT EXISTS ${indexTblName}(
+            `id` int(11) NULL,
+            `c` text NULL,
+            INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", 
"ignore_above"="-1") COMMENT ''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES(
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+    create_table_with_inverted_index_properties(invalid_ignore_above, "Invalid 
inverted index 'ignore_above' value")
+    assertEquals(success, false)
+
+    def non_numeric_ignore_above = """
+        CREATE TABLE IF NOT EXISTS ${indexTblName}(
+            `id` int(11) NULL,
+            `c` text NULL,
+            INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", 
"ignore_above"="non_numeric") COMMENT ''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES(
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+    create_table_with_inverted_index_properties(non_numeric_ignore_above, 
"ignore_above must be integer")
+    assertEquals(success, false)
+
+    def invalid_lower_case = """
+        CREATE TABLE IF NOT EXISTS ${indexTblName}(
+            `id` int(11) NULL,
+            `c` text NULL,
+            INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", 
"lower_case"="invalid") COMMENT ''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES(
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+    create_table_with_inverted_index_properties(invalid_lower_case, "Invalid 
inverted index 'lower_case' value")
+    assertEquals(success, false)
+
+    def invalid_support_phrase = """
+        CREATE TABLE IF NOT EXISTS ${indexTblName}(
+            `id` int(11) NULL,
+            `c` text NULL,
+            INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", 
"support_phrase"="invalid") COMMENT ''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES(
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+    create_table_with_inverted_index_properties(invalid_support_phrase, 
"Invalid inverted index 'support_phrase' value")
+    assertEquals(success, false)
+
+    def invalid_support_phrase2 = """
+        CREATE TABLE IF NOT EXISTS ${indexTblName}(
+            `id` int(11) NULL,
+            `c` text NULL,
+            INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", 
"support_phase"="true") COMMENT ''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES(
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+    create_table_with_inverted_index_properties(invalid_support_phrase2, 
"Invalid inverted index property key: support_phase")
+    assertEquals(success, false)
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to