This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new d992f08c54a Fix unrecognized column name delete handler (#32404)
d992f08c54a is described below
commit d992f08c54a374ca6027433d52ca416b301f3180
Author: Gavin Chou <[email protected]>
AuthorDate: Tue Mar 19 15:26:13 2024 +0800
Fix unrecognized column name delete handler (#32404)
---
be/src/olap/delete_handler.cpp | 77 +++++++++++---------
be/src/olap/delete_handler.h | 12 +++-
be/test/olap/delete_handler_test.cpp | 40 +++++++++++
.../java/org/apache/doris/common/FeNameFormat.java | 4 +-
.../suites/delete_p0/test_delete_handler.groovy | 84 ++++++++++++++++++++++
5 files changed, 180 insertions(+), 37 deletions(-)
diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp
index 4d8385fe9f2..684fdc5c2a5 100644
--- a/be/src/olap/delete_handler.cpp
+++ b/be/src/olap/delete_handler.cpp
@@ -77,6 +77,13 @@ Status DeleteHandler::generate_delete_predicate(const
TabletSchema& schema,
<< "condition size=" << in_pred->values().size();
} else {
string condition_str = construct_sub_predicates(condition);
+ if (TCondition tmp;
!DeleteHandler::parse_delete_condition(condition_str, &tmp)) {
+ LOG(WARNING) << "failed to parse condition_str, condtion="
+ << ThriftDebugString(condition);
+ return Status::Error<DELETE_INVALID_CONDITION>(
+ "failed to parse condition_str, condtion={}",
ThriftDebugString(condition));
+ }
+ VLOG_NOTICE << __PRETTY_FUNCTION__ << " condition_str: " <<
condition_str;
del_pred->add_sub_predicates(condition_str);
LOG(INFO) << "store one sub-delete condition. condition=" <<
condition_str;
}
@@ -95,8 +102,9 @@ std::string DeleteHandler::construct_sub_predicates(const
TCondition& condition)
}
string condition_str;
if ("IS" == op) {
- condition_str = condition.column_name + " " + op + " " +
condition.condition_values[0];
- } else {
+ // ATTN: tricky! Surround IS with spaces to make it "special"
+ condition_str = condition.column_name + " IS " +
condition.condition_values[0];
+ } else { // multi-elements IN expr has been processed with InPredicatePB
if (op == "*=") {
op = "=";
} else if (op == "!*=") {
@@ -195,43 +203,48 @@ Status DeleteHandler::check_condition_valid(const
TabletSchema& schema, const TC
return Status::OK();
}
-bool DeleteHandler::_parse_condition(const std::string& condition_str,
TCondition* condition) {
- bool matched = true;
+// clang-format off
+// Condition string format, the format is (column_name)(op)(value)
+// eg: condition_str="c1 = 1597751948193618247 and length(source)<1;\n;\n"
+// column_name: matches "c1", must include FeNameFormat.java COLUMN_NAME_REGEX
+// and compactible with any the lagacy
+// operator: matches "="
+// value: matches "1597751948193618247 and length(source)<1;\n;\n"
+//
+// For more info, see DeleteHandler::construct_sub_predicates
+// FIXME(gavin): support unicode. And this is a tricky implementation, it
should
+// not be the final resolution, refactor it.
+const char* const CONDITION_STR_PATTERN =
+ // .----------------- column-name ----------------.
.----------------------- operator ------------------------. .------------
value ----------.
+
R"(([_a-zA-Z@0-9\s/][.a-zA-Z0-9_+-/?@#$%^&*"\s,:]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:
IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
+ // '----------------- group 1 --------------------'
'--------------------- group 2 ---------------------------' | '-- group 4--'
|
+ // match any of: =
!= >> << >= <= *= " IS " '----------- group 3 ---------'
+ //
match **ANY THING** without(4)
+ //
or with(3) single quote
+boost::regex DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN);
+// clang-format on
+
+bool DeleteHandler::parse_delete_condition(const std::string& condition_str,
+ TCondition* condition) {
+ bool matched = false;
boost::smatch what;
-
try {
- // Condition string format, the format is (column_name)(op)(value)
- // eg: condition_str="c1 = 1597751948193618247 and
length(source)<1;\n;\n"
- // group1: (\w+) matches "c1"
- // group2: ((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS))
matches "="
- // group3: ((?:[\s\S]+)?) matches "1597751948193618247 and
length(source)<1;\n;\n"
- const char* const CONDITION_STR_PATTERN =
-
R"(([\w$#%]+)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
- boost::regex ex(CONDITION_STR_PATTERN);
- if (boost::regex_match(condition_str, what, ex)) {
- if (condition_str.size() != what[0].str().size()) {
- matched = false;
- }
- } else {
- matched = false;
- }
+ VLOG_NOTICE << "condition_str: " << condition_str;
+ matched = boost::regex_match(condition_str, what,
DELETE_HANDLER_REGEX) &&
+ condition_str.size() == what[0].str().size(); // exact match
} catch (boost::regex_error& e) {
VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << ";
error=" << e.what()
<< "]";
- matched = false;
}
+ if (!matched) return false;
- if (!matched) {
- return false;
- }
condition->column_name = what[1].str();
- condition->condition_op = what[2].str();
- if (what[4].matched) { // match string with single quotes, eg. a = 'b'
- condition->condition_values.push_back(what[4].str());
- } else { // match string without quote, compat with old conditions, eg. a
= b
- condition->condition_values.push_back(what[3].str());
- }
-
+ condition->condition_op = what[2].str() == " IS " ? "IS" : what[2].str();
+ // match string with single quotes, a = b or a = 'b'
+ condition->condition_values.push_back(what[3 + !!what[4].matched].str());
+ VLOG_NOTICE << "parsed condition_str: col_name={" <<
condition->column_name << "} op={"
+ << condition->condition_op << "} val={" <<
condition->condition_values.back()
+ << "}";
return true;
}
@@ -253,7 +266,7 @@ Status DeleteHandler::init(TabletSchemaSPtr tablet_schema,
temp.filter_version = delete_pred->version().first;
for (const auto& sub_predicate : delete_condition.sub_predicates()) {
TCondition condition;
- if (!_parse_condition(sub_predicate, &condition)) {
+ if (!parse_delete_condition(sub_predicate, &condition)) {
return Status::Error<DELETE_INVALID_PARAMETERS>(
"fail to parse condition. condition={}",
sub_predicate);
}
diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h
index 7239f4326b7..535cf6d332f 100644
--- a/be/src/olap/delete_handler.h
+++ b/be/src/olap/delete_handler.h
@@ -65,6 +65,15 @@ public:
// construct sub condition from TCondition
static std::string construct_sub_predicates(const TCondition& condition);
+ /**
+ * Use regular expression to extract 'column_name', 'op' and 'operands'
+ *
+ * @param condition_str input predicate string in form of `X OP Y`
+ * @param condition output param
+ * @return true if matched and extracted correctly otherwise false
+ */
+ static bool parse_delete_condition(const std::string& condition_str,
TCondition* condition);
+
private:
// Validate the condition on the schema.
static Status check_condition_valid(const TabletSchema& tablet_schema,
const TCondition& cond);
@@ -106,9 +115,6 @@ public:
del_predicates_for_zone_map) const;
private:
- // Use regular expression to extract 'column_name', 'op' and 'operands'
- bool _parse_condition(const std::string& condition_str, TCondition*
condition);
-
bool _is_inited = false;
// DeleteConditions in _del_conds are in 'OR' relationship
std::vector<DeleteConditions> _del_conds;
diff --git a/be/test/olap/delete_handler_test.cpp
b/be/test/olap/delete_handler_test.cpp
index 3029220903f..8fb26565ccb 100644
--- a/be/test/olap/delete_handler_test.cpp
+++ b/be/test/olap/delete_handler_test.cpp
@@ -1232,4 +1232,44 @@ TEST_F(TestDeleteHandler, FilterDataVersion) {
_delete_handler.finalize();
}
+// clang-format off
+TEST_F(TestDeleteHandler, TestParseDeleteCondition) {
+ auto test = [](const std::tuple<std::string, bool, TCondition>& in) {
+ auto& [cond_str, exp_succ, exp_cond] = in;
+ TCondition parsed_cond;
+ EXPECT_EQ(DeleteHandler::parse_delete_condition(cond_str,
&parsed_cond), exp_succ) << " unexpected result, cond_str: " << cond_str;
+ if (exp_succ) EXPECT_EQ(parsed_cond, exp_cond) << " unexpected result,
cond_str: " << cond_str;
+ };
+
+ auto gen_cond = [](const std::string& col, const std::string& op, const
std::string& val) {
+ TCondition cond;
+ cond.__set_column_name(col);
+ cond.__set_condition_op(op);
+ cond.__set_condition_values(std::vector<std::string>{val});
+ return cond;
+ };
+
+ // <cond_str, parsed, expect_value>>
+ std::vector<std::tuple<std::string, bool, TCondition>> test_input {
+ {R"(abc=b)" , true, gen_cond(R"(abc)" , "=" , R"(b)"
)}, // normal case
+ {R"(abc!=b)" , true, gen_cond(R"(abc)" , "!=", R"(b)"
)}, // normal case
+ {R"(abc<=b)" , true, gen_cond(R"(abc)" , "<=", R"(b)"
)}, // normal case
+ {R"(abc>=b)" , true, gen_cond(R"(abc)" , ">=", R"(b)"
)}, // normal case
+ {R"(abc>>b)" , true, gen_cond(R"(abc)" , ">>", R"(b)"
)}, // normal case
+ {R"(abc<<b)" , true, gen_cond(R"(abc)" , "<<", R"(b)"
)}, // normal case
+ {R"(abc!='b')" , true, gen_cond(R"(abc)" , "!=", R"(b)"
)}, // value surrounded by '
+ {R"(abc=)" , true, gen_cond(R"(abc)" , "=" , R"()"
)}, // missing value, it means not to be parsed succefully, how every it's
a ignorable bug
+ {R"(@a*<<10086)" , true, gen_cond(R"(@a*)" , "<<",
R"(10086)" )}, // column ends with *
+ {R"(*a=b)" , false, gen_cond(R"(*a)" , "=" , R"(b)"
)}, // starts with *
+ {R"(a*a>>WTF(10086))" , true, gen_cond(R"(a*a)" , ">>",
R"(WTF(10086))")}, // function
+ {R"(a-b IS NULL)" , true, gen_cond(R"(a-b)" , "IS", R"(NULL)"
)}, // - in col name and test IS NULL
+ {R"(@a*-b IS NOT NULL)" , true, gen_cond(R"(@a*-b)" , "IS", R"(NOT
NULL)" )}, // test IS NOT NULL
+ {R"(a IS b IS NOT NULL)", true, gen_cond(R"(a IS b)", "IS", R"(NOT
NULL)" )}, // test " IS " in column name
+ {R"(_a-zA-Z@0-9 /.a-zA-Z0-9_+-/?@#$%^&*" ,:=hell)", true,
gen_cond(R"(_a-zA-Z@0-9 /.a-zA-Z0-9_+-/?@#$%^&*" ,:)", "=", R"(hell)")}, //
hellbound column name
+ {R"(this is a col very
loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon
colum name=long)", true, gen_cond(R"(this is a col very
loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon
colum name)", "=", R"(long)")}, // test " IS " in column name
+ };
+ for (auto& i : test_input) { test(i); }
+}
+// clang-format on
+
} // namespace doris
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
index 3a77bde84b6..5a10bce7954 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
@@ -33,7 +33,7 @@ public class FeNameFormat {
private static final String UNDERSCORE_COMMON_NAME_REGEX =
"^[_a-zA-Z][a-zA-Z0-9-_]{0,63}$";
private static final String TABLE_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9-_]*$";
private static final String USER_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9.-_]*$";
- private static final String COLUMN_NAME_REGEX =
"^[_a-zA-Z@0-9\\s<>/][.a-zA-Z0-9_+-/><?@#$%^&*\"\\s,:]{0,255}$";
+ private static final String COLUMN_NAME_REGEX =
"^[_a-zA-Z@0-9\\s/][.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{0,255}$";
private static final String UNICODE_LABEL_REGEX =
"^[-_A-Za-z0-9:\\p{L}]{1,128}$";
private static final String UNICODE_COMMON_NAME_REGEX =
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$";
@@ -41,7 +41,7 @@ public class FeNameFormat {
private static final String UNICODE_TABLE_NAME_REGEX =
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]*$";
private static final String UNICODE_USER_NAME_REGEX =
"^[a-zA-Z\\p{L}][a-zA-Z0-9.-_\\p{L}]*$";
private static final String UNICODE_COLUMN_NAME_REGEX
- = "^[_a-zA-Z@0-9\\p{L}][.a-zA-Z0-9_+-/><?@#$%^&*\\p{L}]{0,255}$";
+ = "^[_a-zA-Z@0-9\\p{L}][.a-zA-Z0-9_+-/?@#$%^&*\\p{L}]{0,255}$";
public static final String FORBIDDEN_PARTITION_NAME = "placeholder_";
diff --git a/regression-test/suites/delete_p0/test_delete_handler.groovy
b/regression-test/suites/delete_p0/test_delete_handler.groovy
new file mode 100644
index 00000000000..8c9ac33edc9
--- /dev/null
+++ b/regression-test/suites/delete_p0/test_delete_handler.groovy
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/**
+ * Test delete with strange name
+ */
+suite("test_delete_handler") {
+ // test condition operator
+ sql """drop table if exists td1;"""
+ sql """
+ CREATE TABLE `td1` (
+ `id` int(11) NULL,
+ `name` varchar(255) NULL,
+ `score` int(11) NULL
+ ) ENGINE=OLAP
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES ( "replication_num" = "1" );
+ """
+
+ sql """insert into td1(id,name,`score`)
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+ sql """insert into td1(id,name,`score`)
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+ sql """insert into td1(id,name,`score`)
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+
+ sql """delete from td1 where `score` = 4 and `score` = 3 and `score` =
1;"""
+ sql """delete from td1 where `score` is null;"""
+ sql """delete from td1 where `score` is not null;"""
+ sql """delete from td1 where `score` in (1,2);"""
+ sql """delete from td1 where `score` not in (3,4);"""
+ sql """select * from td1;"""
+ sql """insert into td1(id,name,`score`)
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+ sql """select * from td1;"""
+
+
+ // test column name
+ sql """drop table if exists td2;"""
+ sql """
+ CREATE TABLE `td2` (
+ `id` int(11) NULL,
+ `name` varchar(255) NULL,
+ `@score` int(11) NULL,
+ `scoreIS` int(11) NULL,
+ `sc ore` int(11) NULL,
+ `score IS score` int(11) NULL,
+ `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` int(11) NULL
+ ) ENGINE=OLAP
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES ( "replication_num" = "1" );
+ """
+
+ sql """insert into td2(id,name,`@score`)
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+ sql """insert into td2(id,name,`@score`)
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+ sql """insert into td2(id,name,`@score`)
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+
+ sql """select * from td2;"""
+ sql """delete from td2 where `@score` = 88;"""
+ sql """delete from td2 where `scoreIS` is null;"""
+ sql """delete from td2 where `score IS score` is null;"""
+ sql """delete from td2 where `sc ore` is null;"""
+ sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` is null;"""
+ sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` is not null;"""
+ sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` in (1,2,3);"""
+ sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` not in
(1,2,3);"""
+ sql """select * from td2;"""
+ sql """insert into td2(id,name,`@score`)
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+ sql """select * from td2;"""
+
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]