This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new d992f08c54a Fix unrecognized column name delete handler (#32404)
d992f08c54a is described below

commit d992f08c54a374ca6027433d52ca416b301f3180
Author: Gavin Chou <[email protected]>
AuthorDate: Tue Mar 19 15:26:13 2024 +0800

    Fix unrecognized column name delete handler (#32404)
---
 be/src/olap/delete_handler.cpp                     | 77 +++++++++++---------
 be/src/olap/delete_handler.h                       | 12 +++-
 be/test/olap/delete_handler_test.cpp               | 40 +++++++++++
 .../java/org/apache/doris/common/FeNameFormat.java |  4 +-
 .../suites/delete_p0/test_delete_handler.groovy    | 84 ++++++++++++++++++++++
 5 files changed, 180 insertions(+), 37 deletions(-)

diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp
index 4d8385fe9f2..684fdc5c2a5 100644
--- a/be/src/olap/delete_handler.cpp
+++ b/be/src/olap/delete_handler.cpp
@@ -77,6 +77,13 @@ Status DeleteHandler::generate_delete_predicate(const 
TabletSchema& schema,
                       << "condition size=" << in_pred->values().size();
         } else {
             string condition_str = construct_sub_predicates(condition);
+            if (TCondition tmp; 
!DeleteHandler::parse_delete_condition(condition_str, &tmp)) {
+                LOG(WARNING) << "failed to parse condition_str, condtion="
+                             << ThriftDebugString(condition);
+                return Status::Error<DELETE_INVALID_CONDITION>(
+                        "failed to parse condition_str, condtion={}", 
ThriftDebugString(condition));
+            }
+            VLOG_NOTICE << __PRETTY_FUNCTION__ << " condition_str: " << 
condition_str;
             del_pred->add_sub_predicates(condition_str);
             LOG(INFO) << "store one sub-delete condition. condition=" << 
condition_str;
         }
@@ -95,8 +102,9 @@ std::string DeleteHandler::construct_sub_predicates(const 
TCondition& condition)
     }
     string condition_str;
     if ("IS" == op) {
-        condition_str = condition.column_name + " " + op + " " + 
condition.condition_values[0];
-    } else {
+        // ATTN: tricky! Surround IS with spaces to make it "special"
+        condition_str = condition.column_name + " IS " + 
condition.condition_values[0];
+    } else { // multi-elements IN expr has been processed with InPredicatePB
         if (op == "*=") {
             op = "=";
         } else if (op == "!*=") {
@@ -195,43 +203,48 @@ Status DeleteHandler::check_condition_valid(const 
TabletSchema& schema, const TC
     return Status::OK();
 }
 
-bool DeleteHandler::_parse_condition(const std::string& condition_str, 
TCondition* condition) {
-    bool matched = true;
+// clang-format off
+// Condition string format, the format is (column_name)(op)(value)
+// eg: condition_str="c1 = 1597751948193618247 and length(source)<1;\n;\n"
+// column_name: matches "c1", must include FeNameFormat.java COLUMN_NAME_REGEX
+//              and compactible with any the lagacy
+// operator: matches "="
+// value: matches "1597751948193618247  and length(source)<1;\n;\n"
+//
+// For more info, see DeleteHandler::construct_sub_predicates
+// FIXME(gavin): support unicode. And this is a tricky implementation, it 
should
+//               not be the final resolution, refactor it.
+const char* const CONDITION_STR_PATTERN =
+    // .----------------- column-name ----------------.   
.----------------------- operator ------------------------.   .------------ 
value ----------.
+    
R"(([_a-zA-Z@0-9\s/][.a-zA-Z0-9_+-/?@#$%^&*"\s,:]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:
 IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
+    // '----------------- group 1 --------------------'   
'--------------------- group 2 ---------------------------'   | '-- group 4--'  
            |
+    //                                                         match any of: = 
!= >> << >= <= *= " IS "                 '----------- group 3 ---------'
+    //                                                                         
                                          match **ANY THING** without(4)
+    //                                                                         
                                          or with(3) single quote
+boost::regex DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN);
+// clang-format on
+
+bool DeleteHandler::parse_delete_condition(const std::string& condition_str,
+                                           TCondition* condition) {
+    bool matched = false;
     boost::smatch what;
-
     try {
-        // Condition string format, the format is (column_name)(op)(value)
-        // eg:  condition_str="c1 = 1597751948193618247  and 
length(source)<1;\n;\n"
-        //  group1:  (\w+) matches "c1"
-        //  group2:  ((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS)) 
matches  "="
-        //  group3:  ((?:[\s\S]+)?) matches "1597751948193618247  and 
length(source)<1;\n;\n"
-        const char* const CONDITION_STR_PATTERN =
-                
R"(([\w$#%]+)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
-        boost::regex ex(CONDITION_STR_PATTERN);
-        if (boost::regex_match(condition_str, what, ex)) {
-            if (condition_str.size() != what[0].str().size()) {
-                matched = false;
-            }
-        } else {
-            matched = false;
-        }
+        VLOG_NOTICE << "condition_str: " << condition_str;
+        matched = boost::regex_match(condition_str, what, 
DELETE_HANDLER_REGEX) &&
+                  condition_str.size() == what[0].str().size(); // exact match
     } catch (boost::regex_error& e) {
         VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << "; 
error=" << e.what()
                     << "]";
-        matched = false;
     }
+    if (!matched) return false;
 
-    if (!matched) {
-        return false;
-    }
     condition->column_name = what[1].str();
-    condition->condition_op = what[2].str();
-    if (what[4].matched) { // match string with single quotes, eg. a = 'b'
-        condition->condition_values.push_back(what[4].str());
-    } else { // match string without quote, compat with old conditions, eg. a 
= b
-        condition->condition_values.push_back(what[3].str());
-    }
-
+    condition->condition_op = what[2].str() == " IS " ? "IS" : what[2].str();
+    // match string with single quotes, a = b  or a = 'b'
+    condition->condition_values.push_back(what[3 + !!what[4].matched].str());
+    VLOG_NOTICE << "parsed condition_str: col_name={" << 
condition->column_name << "} op={"
+                << condition->condition_op << "} val={" << 
condition->condition_values.back()
+                << "}";
     return true;
 }
 
@@ -253,7 +266,7 @@ Status DeleteHandler::init(TabletSchemaSPtr tablet_schema,
         temp.filter_version = delete_pred->version().first;
         for (const auto& sub_predicate : delete_condition.sub_predicates()) {
             TCondition condition;
-            if (!_parse_condition(sub_predicate, &condition)) {
+            if (!parse_delete_condition(sub_predicate, &condition)) {
                 return Status::Error<DELETE_INVALID_PARAMETERS>(
                         "fail to parse condition. condition={}", 
sub_predicate);
             }
diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h
index 7239f4326b7..535cf6d332f 100644
--- a/be/src/olap/delete_handler.h
+++ b/be/src/olap/delete_handler.h
@@ -65,6 +65,15 @@ public:
     // construct sub condition from TCondition
     static std::string construct_sub_predicates(const TCondition& condition);
 
+    /**
+     * Use regular expression to extract 'column_name', 'op' and 'operands'
+     *
+     * @param condition_str input predicate string in form of `X OP Y`
+     * @param condition output param
+     * @return true if matched and extracted correctly otherwise false
+     */
+    static bool parse_delete_condition(const std::string& condition_str, 
TCondition* condition);
+
 private:
     // Validate the condition on the schema.
     static Status check_condition_valid(const TabletSchema& tablet_schema, 
const TCondition& cond);
@@ -106,9 +115,6 @@ public:
                     del_predicates_for_zone_map) const;
 
 private:
-    // Use regular expression to extract 'column_name', 'op' and 'operands'
-    bool _parse_condition(const std::string& condition_str, TCondition* 
condition);
-
     bool _is_inited = false;
     // DeleteConditions in _del_conds are in 'OR' relationship
     std::vector<DeleteConditions> _del_conds;
diff --git a/be/test/olap/delete_handler_test.cpp 
b/be/test/olap/delete_handler_test.cpp
index 3029220903f..8fb26565ccb 100644
--- a/be/test/olap/delete_handler_test.cpp
+++ b/be/test/olap/delete_handler_test.cpp
@@ -1232,4 +1232,44 @@ TEST_F(TestDeleteHandler, FilterDataVersion) {
     _delete_handler.finalize();
 }
 
+// clang-format off
+TEST_F(TestDeleteHandler, TestParseDeleteCondition) {
+    auto test = [](const std::tuple<std::string, bool, TCondition>& in) {
+        auto& [cond_str, exp_succ, exp_cond] = in;
+        TCondition parsed_cond;
+        EXPECT_EQ(DeleteHandler::parse_delete_condition(cond_str, 
&parsed_cond), exp_succ) << " unexpected result, cond_str: " << cond_str;
+        if (exp_succ) EXPECT_EQ(parsed_cond, exp_cond) << " unexpected result, 
cond_str: " << cond_str;
+    };
+
+    auto gen_cond = [](const std::string& col, const std::string& op, const 
std::string& val) {
+        TCondition cond;
+        cond.__set_column_name(col);
+        cond.__set_condition_op(op);
+        cond.__set_condition_values(std::vector<std::string>{val});
+        return cond;
+    };
+
+    // <cond_str, parsed, expect_value>>
+    std::vector<std::tuple<std::string, bool, TCondition>> test_input {
+        {R"(abc=b)"             , true,  gen_cond(R"(abc)"   , "=" , R"(b)"    
     )}, // normal case
+        {R"(abc!=b)"            , true,  gen_cond(R"(abc)"   , "!=", R"(b)"    
     )}, // normal case
+        {R"(abc<=b)"            , true,  gen_cond(R"(abc)"   , "<=", R"(b)"    
     )}, // normal case
+        {R"(abc>=b)"            , true,  gen_cond(R"(abc)"   , ">=", R"(b)"    
     )}, // normal case
+        {R"(abc>>b)"            , true,  gen_cond(R"(abc)"   , ">>", R"(b)"    
     )}, // normal case
+        {R"(abc<<b)"            , true,  gen_cond(R"(abc)"   , "<<", R"(b)"    
     )}, // normal case
+        {R"(abc!='b')"          , true,  gen_cond(R"(abc)"   , "!=", R"(b)"    
     )}, // value surrounded by '
+        {R"(abc=)"              , true, gen_cond(R"(abc)"    , "=" , R"()"     
     )}, // missing value, it means not to be parsed succefully, how every it's 
a ignorable bug
+        {R"(@a*<<10086)"        , true,  gen_cond(R"(@a*)"   , "<<", 
R"(10086)"     )}, // column ends with *
+        {R"(*a=b)"              , false, gen_cond(R"(*a)"    , "=" , R"(b)"    
     )}, // starts with *
+        {R"(a*a>>WTF(10086))"   , true,  gen_cond(R"(a*a)"   , ">>", 
R"(WTF(10086))")}, // function
+        {R"(a-b IS NULL)"       , true,  gen_cond(R"(a-b)"   , "IS", R"(NULL)" 
     )}, // - in col name and test IS NULL
+        {R"(@a*-b IS NOT NULL)" , true,  gen_cond(R"(@a*-b)" , "IS", R"(NOT 
NULL)"  )}, // test IS NOT NULL
+        {R"(a IS b IS NOT NULL)", true,  gen_cond(R"(a IS b)", "IS", R"(NOT 
NULL)"  )}, // test " IS " in column name
+        {R"(_a-zA-Z@0-9 /.a-zA-Z0-9_+-/?@#$%^&*" ,:=hell)", true, 
gen_cond(R"(_a-zA-Z@0-9 /.a-zA-Z0-9_+-/?@#$%^&*" ,:)", "=", R"(hell)")}, // 
hellbound column name
+        {R"(this is a col very 
loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon
 colum name=long)", true,  gen_cond(R"(this is a col very 
loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon
 colum name)", "=", R"(long)")}, // test " IS " in column name
+    };
+    for (auto& i : test_input) { test(i); }
+}
+// clang-format on
+
 } // namespace doris
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
index 3a77bde84b6..5a10bce7954 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
@@ -33,7 +33,7 @@ public class FeNameFormat {
     private static final String UNDERSCORE_COMMON_NAME_REGEX = 
"^[_a-zA-Z][a-zA-Z0-9-_]{0,63}$";
     private static final String TABLE_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9-_]*$";
     private static final String USER_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9.-_]*$";
-    private static final String COLUMN_NAME_REGEX = 
"^[_a-zA-Z@0-9\\s<>/][.a-zA-Z0-9_+-/><?@#$%^&*\"\\s,:]{0,255}$";
+    private static final String COLUMN_NAME_REGEX = 
"^[_a-zA-Z@0-9\\s/][.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{0,255}$";
 
     private static final String UNICODE_LABEL_REGEX = 
"^[-_A-Za-z0-9:\\p{L}]{1,128}$";
     private static final String UNICODE_COMMON_NAME_REGEX = 
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$";
@@ -41,7 +41,7 @@ public class FeNameFormat {
     private static final String UNICODE_TABLE_NAME_REGEX = 
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]*$";
     private static final String UNICODE_USER_NAME_REGEX = 
"^[a-zA-Z\\p{L}][a-zA-Z0-9.-_\\p{L}]*$";
     private static final String UNICODE_COLUMN_NAME_REGEX
-            = "^[_a-zA-Z@0-9\\p{L}][.a-zA-Z0-9_+-/><?@#$%^&*\\p{L}]{0,255}$";
+            = "^[_a-zA-Z@0-9\\p{L}][.a-zA-Z0-9_+-/?@#$%^&*\\p{L}]{0,255}$";
 
     public static final String FORBIDDEN_PARTITION_NAME = "placeholder_";
 
diff --git a/regression-test/suites/delete_p0/test_delete_handler.groovy 
b/regression-test/suites/delete_p0/test_delete_handler.groovy
new file mode 100644
index 00000000000..8c9ac33edc9
--- /dev/null
+++ b/regression-test/suites/delete_p0/test_delete_handler.groovy
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/**
+ * Test delete with strange name
+ */
+suite("test_delete_handler") {
+    // test condition operator
+    sql """drop table if exists td1;"""
+    sql """
+    CREATE TABLE `td1` (
+      `id` int(11) NULL,
+      `name` varchar(255) NULL,
+      `score` int(11) NULL
+    ) ENGINE=OLAP
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`id`) BUCKETS 1
+    PROPERTIES ( "replication_num" = "1" );
+    """
+
+    sql """insert into td1(id,name,`score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+    sql """insert into td1(id,name,`score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+    sql """insert into td1(id,name,`score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+
+    sql """delete from td1 where `score` = 4 and `score` = 3 and `score` = 
1;"""
+    sql """delete from td1 where `score` is null;"""
+    sql """delete from td1 where `score` is not null;"""
+    sql """delete from td1 where `score` in (1,2);"""
+    sql """delete from td1 where `score` not in (3,4);"""
+    sql """select * from td1;"""
+    sql """insert into td1(id,name,`score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+    sql """select * from td1;"""
+
+
+    // test column name
+    sql """drop table if exists td2;"""
+    sql """
+    CREATE TABLE `td2` (
+      `id` int(11) NULL,
+      `name` varchar(255) NULL,
+      `@score` int(11) NULL,
+      `scoreIS` int(11) NULL,
+      `sc ore` int(11) NULL,
+      `score IS score` int(11) NULL,
+      `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` int(11) NULL
+    ) ENGINE=OLAP
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`id`) BUCKETS 1
+    PROPERTIES ( "replication_num" = "1" );
+    """
+
+    sql """insert into td2(id,name,`@score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+    sql """insert into td2(id,name,`@score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+    sql """insert into td2(id,name,`@score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+
+    sql """select * from td2;"""
+    sql """delete from td2 where `@score` = 88;"""
+    sql """delete from td2 where `scoreIS` is null;"""
+    sql """delete from td2 where `score IS score` is null;"""
+    sql """delete from td2 where `sc ore` is null;"""
+    sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` is null;"""
+    sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` is not null;"""
+    sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` in (1,2,3);"""
+    sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` not in 
(1,2,3);"""
+    sql """select * from td2;"""
+    sql """insert into td2(id,name,`@score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+    sql """select * from td2;"""
+
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to