This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 4edfac51205 [Feature](function) support like with escape clause 
(#52146) (#52540)
4edfac51205 is described below

commit 4edfac512057a503925c79a183139f128bb9225d
Author: Pxl <[email protected]>
AuthorDate: Mon Jul 7 19:32:54 2025 +0800

    [Feature](function) support like with escape clause (#52146) (#52540)
    
    pick from #52146
---
 be/src/vec/functions/like.cpp                      |  35 +++++++++-
 be/src/vec/functions/like.h                        |  30 ++++++++-
 .../antlr4/org/apache/doris/nereids/DorisLexer.g4  |   1 +
 .../antlr4/org/apache/doris/nereids/DorisParser.g4 |   4 +-
 .../doris/nereids/parser/LogicalPlanBuilder.java   |  14 ++--
 .../rules/expression/rules/LikeToEqualRewrite.java |   4 ++
 .../doris/nereids/trees/expressions/Like.java      |  55 +++++++++++++++-
 .../string_functions/test_like_escape.out          | Bin 0 -> 333 bytes
 .../string_functions/test_like_escape.groovy       |  71 +++++++++++++++++++++
 9 files changed, 204 insertions(+), 10 deletions(-)

diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp
index dcb4fc6dd0d..4ed14280e4c 100644
--- a/be/src/vec/functions/like.cpp
+++ b/be/src/vec/functions/like.cpp
@@ -669,16 +669,30 @@ VPatternSearchStateSPtr 
FunctionLikeBase::pattern_type_recognition(const ColumnS
 Status FunctionLikeBase::vector_non_const(const ColumnString& values, const 
ColumnString& patterns,
                                           ColumnUInt8::Container& result, 
LikeState* state,
                                           size_t input_rows_count) const {
+    ColumnString::MutablePtr replaced_patterns;
     VPatternSearchStateSPtr vector_search_state;
     if (state->is_like_pattern) {
-        vector_search_state = pattern_type_recognition<true>(patterns);
+        if (state->has_custom_escape) {
+            replaced_patterns = ColumnString::create();
+            for (int i = 0; i < input_rows_count; ++i) {
+                std::string val =
+                        replace_pattern_by_escape(patterns.get_data_at(i), 
state->escape_char);
+                replaced_patterns->insert_data(val.c_str(), val.size());
+            }
+            vector_search_state = 
pattern_type_recognition<true>(*replaced_patterns);
+        } else {
+            vector_search_state = pattern_type_recognition<true>(patterns);
+        }
     } else {
         vector_search_state = pattern_type_recognition<false>(patterns);
     }
+
+    const ColumnString& real_pattern = state->has_custom_escape ? 
*replaced_patterns : patterns;
+
     if (vector_search_state == nullptr) {
         // pattern type recognition failed, use default case
         for (int i = 0; i < input_rows_count; ++i) {
-            const auto pattern_val = patterns.get_data_at(i);
+            const auto pattern_val = real_pattern.get_data_at(i);
             const auto value_val = values.get_data_at(i);
             RETURN_IF_ERROR((state->scalar_function)(&state->search_state, 
value_val, pattern_val,
                                                      &result[i]));
@@ -815,7 +829,12 @@ void verbose_log_match(const std::string& str, const 
std::string& pattern_name,
 Status FunctionLike::construct_like_const_state(FunctionContext* context, 
const StringRef& pattern,
                                                 std::shared_ptr<LikeState>& 
state,
                                                 bool try_hyperscan) {
-    std::string pattern_str = pattern.to_string();
+    std::string pattern_str;
+    if (state->has_custom_escape) {
+        pattern_str = replace_pattern_by_escape(pattern, state->escape_char);
+    } else {
+        pattern_str = pattern.to_string();
+    }
     state->search_state.pattern_str = pattern_str;
     std::string search_string;
 
@@ -920,6 +939,16 @@ Status FunctionLike::open(FunctionContext* context, 
FunctionContext::FunctionSta
     state->is_like_pattern = true;
     state->function = like_fn;
     state->scalar_function = like_fn_scalar;
+    if (context->is_col_constant(2)) {
+        state->has_custom_escape = true;
+        const auto escape_col = context->get_constant_col(2)->column_ptr;
+        const auto& escape = escape_col->get_data_at(0);
+        if (escape.size != 1) {
+            return Status::InternalError("Escape character must be a single 
character, got: {}",
+                                         escape.to_string());
+        }
+        state->escape_char = escape.data[0];
+    }
     if (context->is_col_constant(1)) {
         const auto pattern_col = context->get_constant_col(1)->column_ptr;
         const auto& pattern = pattern_col->get_data_at(0);
diff --git a/be/src/vec/functions/like.h b/be/src/vec/functions/like.h
index 435e2742788..d9f95123cea 100644
--- a/be/src/vec/functions/like.h
+++ b/be/src/vec/functions/like.h
@@ -51,6 +51,31 @@ class Block;
 
 namespace doris::vectorized {
 
+inline std::string replace_pattern_by_escape(const StringRef& pattern, char 
escape_char) {
+    std::string result;
+    result.reserve(pattern.size);
+    for (size_t i = 0; i < pattern.size; ++i) {
+        if (i + 1 < pattern.size && pattern.data[i] == escape_char &&
+            (pattern.data[i + 1] == escape_char || pattern.data[i + 1] == '%' 
||
+             pattern.data[i + 1] == '_')) {
+            // "^^" -> "^"
+            // "^%" -> "\%"
+            // "^_" -> "\_"
+            if ((pattern.data[i + 1] == '%' || pattern.data[i + 1] == '_')) {
+                result.push_back('\\');
+            }
+            result.push_back(pattern.data[i + 1]);
+            ++i; // skip next char
+        } else if (pattern.data[i] == '\\') {
+            // "\" -> "\\"
+            result.append("\\\\");
+        } else {
+            result.push_back(pattern.data[i]);
+        }
+    }
+    return result;
+}
+
 // TODO: replace with std::string_view when 
`LikeSearchState.substring_pattern` can
 // construct from std::string_view.
 struct LikeSearchState {
@@ -123,6 +148,8 @@ using VectorLikeFn = std::function<doris::Status(const 
ColumnString&, const Colu
 
 struct LikeState {
     bool is_like_pattern;
+    bool has_custom_escape = false;
+    char escape_char = {};
     LikeSearchState search_state;
     LikeFn function;
     ScalarLikeFn scalar_function;
@@ -150,7 +177,8 @@ using VPatternSearchStateSPtr = 
std::shared_ptr<VectorPatternSearchState>;
 
 class FunctionLikeBase : public IFunction {
 public:
-    size_t get_number_of_arguments() const override { return 2; }
+    size_t get_number_of_arguments() const override { return 0; }
+    bool is_variadic() const override { return true; }
 
     DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const 
override {
         return std::make_shared<DataTypeUInt8>();
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
index 47a45b67aa7..0fe651d9675 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
@@ -235,6 +235,7 @@ ENGINE: 'ENGINE';
 ENGINES: 'ENGINES';
 ENTER: 'ENTER';
 ERRORS: 'ERRORS';
+ESCAPE: 'ESCAPE';
 EVENTS: 'EVENTS';
 EVERY: 'EVERY';
 EXCEPT: 'EXCEPT';
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
index 7b3ecbafa7e..bc205f6686f 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
@@ -1455,7 +1455,8 @@ rowConstructorItem
 
 predicate
     : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
-    | NOT? kind=(LIKE | REGEXP | RLIKE) pattern=valueExpression
+    | NOT? kind=(REGEXP | RLIKE) pattern=valueExpression
+    | NOT? kind=LIKE pattern=valueExpression (ESCAPE escape=valueExpression)?
     | NOT? kind=(MATCH | MATCH_ANY | MATCH_ALL | MATCH_PHRASE | 
MATCH_PHRASE_PREFIX | MATCH_REGEXP | MATCH_PHRASE_EDGE) pattern=valueExpression
     | NOT? kind=IN LEFT_PAREN query RIGHT_PAREN
     | NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
@@ -1893,6 +1894,7 @@ nonReserved
     | ENGINE
     | ENGINES
     | ERRORS
+    | ESCAPE
     | EVENTS
     | EVERY
     | EXCLUDE
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index 6d21441c95f..39dc52a03fd 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -3505,10 +3505,16 @@ public class LogicalPlanBuilder extends 
DorisParserBaseVisitor<Object> {
                     }
                     break;
                 case DorisParser.LIKE:
-                    outExpression = new Like(
-                        valueExpression,
-                        getExpression(ctx.pattern)
-                    );
+                    if (ctx.ESCAPE() == null) {
+                        outExpression = new Like(
+                                valueExpression,
+                                getExpression(ctx.pattern));
+                    } else {
+                        outExpression = new Like(
+                                valueExpression,
+                                getExpression(ctx.pattern),
+                                getExpression(ctx.escape));
+                    }
                     break;
                 case DorisParser.RLIKE:
                 case DorisParser.REGEXP:
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/LikeToEqualRewrite.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/LikeToEqualRewrite.java
index e2836204cdc..e532deb3901 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/LikeToEqualRewrite.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/LikeToEqualRewrite.java
@@ -51,6 +51,10 @@ public class LikeToEqualRewrite implements 
ExpressionPatternRuleFactory {
         StringBuilder sb = new StringBuilder();
         int len = str.length();
         char escapeChar = '\\';
+        if (like.arity() == 3) {
+            escapeChar = ((VarcharLiteral) like.child(2)).value.charAt(0);
+        }
+
         for (int i = 0; i < len;) {
             char c = str.charAt(i);
             if (c == escapeChar && (i + 1) < len
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java
index 84b6ffa984f..10f25fb0ebc 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java
@@ -17,7 +17,12 @@
 
 package org.apache.doris.nereids.trees.expressions;
 
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.BooleanType;
+import org.apache.doris.nereids.types.VarcharType;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
@@ -28,10 +33,20 @@ import java.util.List;
  * like expression: a like 'xxx%'.
  */
 public class Like extends StringRegexPredicate {
+
+    private static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT, 
VarcharType.SYSTEM_DEFAULT),
+            
FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT, 
VarcharType.SYSTEM_DEFAULT,
+                    VarcharType.SYSTEM_DEFAULT));
+
     public Like(Expression left, Expression right) {
         this(ImmutableList.of(left, right));
     }
 
+    public Like(Expression left, Expression right, Expression escape) {
+        this(ImmutableList.of(left, right, escape));
+    }
+
     private Like(List<Expression> children) {
         this(children, false);
     }
@@ -40,9 +55,32 @@ public class Like extends StringRegexPredicate {
         super("like", children, inferred);
     }
 
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+
+    @Override
+    public String computeToSql() {
+        if (arity() == 2) {
+            return super.computeToSql();
+        }
+        return '(' + left().toSql() + ' ' + getName() + ' ' + right().toSql() 
+ " escape " + child(2).toSql()
+                + ')';
+    }
+
+    @Override
+    public String toString() {
+        if (arity() == 2) {
+            return super.computeToSql();
+        }
+        return "(" + left() + " " + getName() + " " + right() + " escape " + 
child(2)
+                + ")";
+    }
+
     @Override
     public Like withChildren(List<Expression> children) {
-        Preconditions.checkArgument(children.size() == 2);
+        Preconditions.checkArgument(children.size() == 2 || children.size() == 
3);
         return new Like(children);
     }
 
@@ -54,4 +92,19 @@ public class Like extends StringRegexPredicate {
     public Expression withInferred(boolean inferred) {
         return new Like(this.children, inferred);
     }
+
+    @Override
+    public void checkLegalityBeforeTypeCoercion() {
+        if (arity() == 3) {
+            if (child(2) instanceof StringLikeLiteral) {
+                String escapeChar = ((StringLikeLiteral) 
child(2)).getStringValue();
+                if (escapeChar.getBytes().length != 1) {
+                    throw new AnalysisException(
+                            "like escape character must be a single ascii 
character: " + escapeChar);
+                }
+            } else {
+                throw new AnalysisException("like escape character must be a 
string literal: " + this.toSql());
+            }
+        }
+    }
 }
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_like_escape.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_like_escape.out
new file mode 100644
index 00000000000..59f0007906e
Binary files /dev/null and 
b/regression-test/data/query_p0/sql_functions/string_functions/test_like_escape.out
 differ
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_like_escape.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_like_escape.groovy
new file mode 100644
index 00000000000..106d2709a85
--- /dev/null
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_like_escape.groovy
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_like_escapes") {
+    qt_test """
+    select "%a" like "a%_" ESCAPE "a";
+    """
+    qt_test """
+    select "%_" like "a%_" ESCAPE "a";
+    """
+    qt_test """
+    select "a" like "a" ESCAPE "a";
+    """
+    qt_test """
+    select "a" like "aa" ESCAPE "a";
+    """
+    qt_test """
+    select "%a" like "a%a" ESCAPE "a";
+    """
+    qt_test """
+    select "%_" like "a%a" ESCAPE "a";
+    """
+    qt_test """
+    select "%a" like "a%a_" ESCAPE "a";
+    """
+    qt_test """
+    select "%_" like "a%a_" ESCAPE "a";
+    """
+
+    test {
+        sql """select "啊啊" like "啊啊" ESCAPE "啊";"""
+        exception "like escape character must be a single ascii character"
+    }
+    test {
+        sql """select "a" like "aa" ESCAPE "aa";"""
+        exception "like escape character must be a single ascii character"
+    }
+    test {
+        sql """select "a" like "aa" ESCAPE 1;"""
+        exception "like escape character must be a string literal"
+    }
+    qt_test """
+    select "啊%a" like "啊a%_" ESCAPE "a";
+    """
+    qt_test """
+    select "%a" like "a%_" ESCAPE "A";
+    """
+    qt_test """
+    select "\\\\" like "\\\\%" ESCAPE "A";
+    """
+    qt_test """
+    select "\\\\" like "\\\\A%" ESCAPE "A";
+    """
+    qt_test """
+    select "\\\\%" like "\\\\A%" ESCAPE "A";
+    """
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to