This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 4edfac51205 [Feature](function) support like with escape clause
(#52146) (#52540)
4edfac51205 is described below
commit 4edfac512057a503925c79a183139f128bb9225d
Author: Pxl <[email protected]>
AuthorDate: Mon Jul 7 19:32:54 2025 +0800
[Feature](function) support like with escape clause (#52146) (#52540)
pick from #52146
---
be/src/vec/functions/like.cpp | 35 +++++++++-
be/src/vec/functions/like.h | 30 ++++++++-
.../antlr4/org/apache/doris/nereids/DorisLexer.g4 | 1 +
.../antlr4/org/apache/doris/nereids/DorisParser.g4 | 4 +-
.../doris/nereids/parser/LogicalPlanBuilder.java | 14 ++--
.../rules/expression/rules/LikeToEqualRewrite.java | 4 ++
.../doris/nereids/trees/expressions/Like.java | 55 +++++++++++++++-
.../string_functions/test_like_escape.out | Bin 0 -> 333 bytes
.../string_functions/test_like_escape.groovy | 71 +++++++++++++++++++++
9 files changed, 204 insertions(+), 10 deletions(-)
diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp
index dcb4fc6dd0d..4ed14280e4c 100644
--- a/be/src/vec/functions/like.cpp
+++ b/be/src/vec/functions/like.cpp
@@ -669,16 +669,30 @@ VPatternSearchStateSPtr
FunctionLikeBase::pattern_type_recognition(const ColumnS
Status FunctionLikeBase::vector_non_const(const ColumnString& values, const
ColumnString& patterns,
ColumnUInt8::Container& result,
LikeState* state,
size_t input_rows_count) const {
+ ColumnString::MutablePtr replaced_patterns;
VPatternSearchStateSPtr vector_search_state;
if (state->is_like_pattern) {
- vector_search_state = pattern_type_recognition<true>(patterns);
+ if (state->has_custom_escape) {
+ replaced_patterns = ColumnString::create();
+ for (int i = 0; i < input_rows_count; ++i) {
+ std::string val =
+ replace_pattern_by_escape(patterns.get_data_at(i),
state->escape_char);
+ replaced_patterns->insert_data(val.c_str(), val.size());
+ }
+ vector_search_state =
pattern_type_recognition<true>(*replaced_patterns);
+ } else {
+ vector_search_state = pattern_type_recognition<true>(patterns);
+ }
} else {
vector_search_state = pattern_type_recognition<false>(patterns);
}
+
+ const ColumnString& real_pattern = state->has_custom_escape ?
*replaced_patterns : patterns;
+
if (vector_search_state == nullptr) {
// pattern type recognition failed, use default case
for (int i = 0; i < input_rows_count; ++i) {
- const auto pattern_val = patterns.get_data_at(i);
+ const auto pattern_val = real_pattern.get_data_at(i);
const auto value_val = values.get_data_at(i);
RETURN_IF_ERROR((state->scalar_function)(&state->search_state,
value_val, pattern_val,
&result[i]));
@@ -815,7 +829,12 @@ void verbose_log_match(const std::string& str, const
std::string& pattern_name,
Status FunctionLike::construct_like_const_state(FunctionContext* context,
const StringRef& pattern,
std::shared_ptr<LikeState>&
state,
bool try_hyperscan) {
- std::string pattern_str = pattern.to_string();
+ std::string pattern_str;
+ if (state->has_custom_escape) {
+ pattern_str = replace_pattern_by_escape(pattern, state->escape_char);
+ } else {
+ pattern_str = pattern.to_string();
+ }
state->search_state.pattern_str = pattern_str;
std::string search_string;
@@ -920,6 +939,16 @@ Status FunctionLike::open(FunctionContext* context,
FunctionContext::FunctionSta
state->is_like_pattern = true;
state->function = like_fn;
state->scalar_function = like_fn_scalar;
+ if (context->is_col_constant(2)) {
+ state->has_custom_escape = true;
+ const auto escape_col = context->get_constant_col(2)->column_ptr;
+ const auto& escape = escape_col->get_data_at(0);
+ if (escape.size != 1) {
+ return Status::InternalError("Escape character must be a single
character, got: {}",
+ escape.to_string());
+ }
+ state->escape_char = escape.data[0];
+ }
if (context->is_col_constant(1)) {
const auto pattern_col = context->get_constant_col(1)->column_ptr;
const auto& pattern = pattern_col->get_data_at(0);
diff --git a/be/src/vec/functions/like.h b/be/src/vec/functions/like.h
index 435e2742788..d9f95123cea 100644
--- a/be/src/vec/functions/like.h
+++ b/be/src/vec/functions/like.h
@@ -51,6 +51,31 @@ class Block;
namespace doris::vectorized {
+inline std::string replace_pattern_by_escape(const StringRef& pattern, char
escape_char) {
+ std::string result;
+ result.reserve(pattern.size);
+ for (size_t i = 0; i < pattern.size; ++i) {
+ if (i + 1 < pattern.size && pattern.data[i] == escape_char &&
+ (pattern.data[i + 1] == escape_char || pattern.data[i + 1] == '%'
||
+ pattern.data[i + 1] == '_')) {
+ // "^^" -> "^"
+ // "^%" -> "\%"
+ // "^_" -> "\_"
+ if ((pattern.data[i + 1] == '%' || pattern.data[i + 1] == '_')) {
+ result.push_back('\\');
+ }
+ result.push_back(pattern.data[i + 1]);
+ ++i; // skip next char
+ } else if (pattern.data[i] == '\\') {
+ // "\" -> "\\"
+ result.append("\\\\");
+ } else {
+ result.push_back(pattern.data[i]);
+ }
+ }
+ return result;
+}
+
// TODO: replace with std::string_view when
`LikeSearchState.substring_pattern` can
// construct from std::string_view.
struct LikeSearchState {
@@ -123,6 +148,8 @@ using VectorLikeFn = std::function<doris::Status(const
ColumnString&, const Colu
struct LikeState {
bool is_like_pattern;
+ bool has_custom_escape = false;
+ char escape_char = {};
LikeSearchState search_state;
LikeFn function;
ScalarLikeFn scalar_function;
@@ -150,7 +177,8 @@ using VPatternSearchStateSPtr =
std::shared_ptr<VectorPatternSearchState>;
class FunctionLikeBase : public IFunction {
public:
- size_t get_number_of_arguments() const override { return 2; }
+ size_t get_number_of_arguments() const override { return 0; }
+ bool is_variadic() const override { return true; }
DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const
override {
return std::make_shared<DataTypeUInt8>();
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
index 47a45b67aa7..0fe651d9675 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
@@ -235,6 +235,7 @@ ENGINE: 'ENGINE';
ENGINES: 'ENGINES';
ENTER: 'ENTER';
ERRORS: 'ERRORS';
+ESCAPE: 'ESCAPE';
EVENTS: 'EVENTS';
EVERY: 'EVERY';
EXCEPT: 'EXCEPT';
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
index 7b3ecbafa7e..bc205f6686f 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
@@ -1455,7 +1455,8 @@ rowConstructorItem
predicate
: NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
- | NOT? kind=(LIKE | REGEXP | RLIKE) pattern=valueExpression
+ | NOT? kind=(REGEXP | RLIKE) pattern=valueExpression
+ | NOT? kind=LIKE pattern=valueExpression (ESCAPE escape=valueExpression)?
| NOT? kind=(MATCH | MATCH_ANY | MATCH_ALL | MATCH_PHRASE |
MATCH_PHRASE_PREFIX | MATCH_REGEXP | MATCH_PHRASE_EDGE) pattern=valueExpression
| NOT? kind=IN LEFT_PAREN query RIGHT_PAREN
| NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
@@ -1893,6 +1894,7 @@ nonReserved
| ENGINE
| ENGINES
| ERRORS
+ | ESCAPE
| EVENTS
| EVERY
| EXCLUDE
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index 6d21441c95f..39dc52a03fd 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -3505,10 +3505,16 @@ public class LogicalPlanBuilder extends
DorisParserBaseVisitor<Object> {
}
break;
case DorisParser.LIKE:
- outExpression = new Like(
- valueExpression,
- getExpression(ctx.pattern)
- );
+ if (ctx.ESCAPE() == null) {
+ outExpression = new Like(
+ valueExpression,
+ getExpression(ctx.pattern));
+ } else {
+ outExpression = new Like(
+ valueExpression,
+ getExpression(ctx.pattern),
+ getExpression(ctx.escape));
+ }
break;
case DorisParser.RLIKE:
case DorisParser.REGEXP:
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/LikeToEqualRewrite.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/LikeToEqualRewrite.java
index e2836204cdc..e532deb3901 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/LikeToEqualRewrite.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/LikeToEqualRewrite.java
@@ -51,6 +51,10 @@ public class LikeToEqualRewrite implements
ExpressionPatternRuleFactory {
StringBuilder sb = new StringBuilder();
int len = str.length();
char escapeChar = '\\';
+ if (like.arity() == 3) {
+ escapeChar = ((VarcharLiteral) like.child(2)).value.charAt(0);
+ }
+
for (int i = 0; i < len;) {
char c = str.charAt(i);
if (c == escapeChar && (i + 1) < len
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java
index 84b6ffa984f..10f25fb0ebc 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java
@@ -17,7 +17,12 @@
package org.apache.doris.nereids.trees.expressions;
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.BooleanType;
+import org.apache.doris.nereids.types.VarcharType;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@@ -28,10 +33,20 @@ import java.util.List;
* like expression: a like 'xxx%'.
*/
public class Like extends StringRegexPredicate {
+
+ private static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+
FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT,
VarcharType.SYSTEM_DEFAULT),
+
FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT,
VarcharType.SYSTEM_DEFAULT,
+ VarcharType.SYSTEM_DEFAULT));
+
public Like(Expression left, Expression right) {
this(ImmutableList.of(left, right));
}
+ public Like(Expression left, Expression right, Expression escape) {
+ this(ImmutableList.of(left, right, escape));
+ }
+
private Like(List<Expression> children) {
this(children, false);
}
@@ -40,9 +55,32 @@ public class Like extends StringRegexPredicate {
super("like", children, inferred);
}
+ @Override
+ public List<FunctionSignature> getSignatures() {
+ return SIGNATURES;
+ }
+
+ @Override
+ public String computeToSql() {
+ if (arity() == 2) {
+ return super.computeToSql();
+ }
+ return '(' + left().toSql() + ' ' + getName() + ' ' + right().toSql()
+ " escape " + child(2).toSql()
+ + ')';
+ }
+
+ @Override
+ public String toString() {
+ if (arity() == 2) {
+ return super.computeToSql();
+ }
+ return "(" + left() + " " + getName() + " " + right() + " escape " +
child(2)
+ + ")";
+ }
+
@Override
public Like withChildren(List<Expression> children) {
- Preconditions.checkArgument(children.size() == 2);
+ Preconditions.checkArgument(children.size() == 2 || children.size() ==
3);
return new Like(children);
}
@@ -54,4 +92,19 @@ public class Like extends StringRegexPredicate {
public Expression withInferred(boolean inferred) {
return new Like(this.children, inferred);
}
+
+ @Override
+ public void checkLegalityBeforeTypeCoercion() {
+ if (arity() == 3) {
+ if (child(2) instanceof StringLikeLiteral) {
+ String escapeChar = ((StringLikeLiteral)
child(2)).getStringValue();
+ if (escapeChar.getBytes().length != 1) {
+ throw new AnalysisException(
+ "like escape character must be a single ascii
character: " + escapeChar);
+ }
+ } else {
+ throw new AnalysisException("like escape character must be a
string literal: " + this.toSql());
+ }
+ }
+ }
}
diff --git
a/regression-test/data/query_p0/sql_functions/string_functions/test_like_escape.out
b/regression-test/data/query_p0/sql_functions/string_functions/test_like_escape.out
new file mode 100644
index 00000000000..59f0007906e
Binary files /dev/null and
b/regression-test/data/query_p0/sql_functions/string_functions/test_like_escape.out
differ
diff --git
a/regression-test/suites/query_p0/sql_functions/string_functions/test_like_escape.groovy
b/regression-test/suites/query_p0/sql_functions/string_functions/test_like_escape.groovy
new file mode 100644
index 00000000000..106d2709a85
--- /dev/null
+++
b/regression-test/suites/query_p0/sql_functions/string_functions/test_like_escape.groovy
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_like_escapes") {
+ qt_test """
+ select "%a" like "a%_" ESCAPE "a";
+ """
+ qt_test """
+ select "%_" like "a%_" ESCAPE "a";
+ """
+ qt_test """
+ select "a" like "a" ESCAPE "a";
+ """
+ qt_test """
+ select "a" like "aa" ESCAPE "a";
+ """
+ qt_test """
+ select "%a" like "a%a" ESCAPE "a";
+ """
+ qt_test """
+ select "%_" like "a%a" ESCAPE "a";
+ """
+ qt_test """
+ select "%a" like "a%a_" ESCAPE "a";
+ """
+ qt_test """
+ select "%_" like "a%a_" ESCAPE "a";
+ """
+
+ test {
+ sql """select "啊啊" like "啊啊" ESCAPE "啊";"""
+ exception "like escape character must be a single ascii character"
+ }
+ test {
+ sql """select "a" like "aa" ESCAPE "aa";"""
+ exception "like escape character must be a single ascii character"
+ }
+ test {
+ sql """select "a" like "aa" ESCAPE 1;"""
+ exception "like escape character must be a string literal"
+ }
+ qt_test """
+ select "啊%a" like "啊a%_" ESCAPE "a";
+ """
+ qt_test """
+ select "%a" like "a%_" ESCAPE "A";
+ """
+ qt_test """
+ select "\\\\" like "\\\\%" ESCAPE "A";
+ """
+ qt_test """
+ select "\\\\" like "\\\\A%" ESCAPE "A";
+ """
+ qt_test """
+ select "\\\\%" like "\\\\A%" ESCAPE "A";
+ """
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]