This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new dd8f7aade0e branch-3.1: [improve](function) support regexp_replace 
function with ignore flag options #50245 (#52116)
dd8f7aade0e is described below

commit dd8f7aade0eababa10af00da0524cee6a2195a19
Author: zhangstar333 <[email protected]>
AuthorDate: Mon Jun 23 19:30:06 2025 +0800

    branch-3.1: [improve](function) support regexp_replace function with ignore 
flag options #50245 (#52116)
    
    Cherry-pick from #50245
---
 be/src/exprs/string_functions.cpp                  |  16 +-
 be/src/exprs/string_functions.h                    |   3 +-
 be/src/vec/functions/function_regexp.cpp           | 176 +++++++++++++++++----
 .../functions/scalar/RegexpReplace.java            |  45 +++++-
 .../functions/scalar/RegexpReplaceOne.java         |  45 +++++-
 5 files changed, 247 insertions(+), 38 deletions(-)

diff --git a/be/src/exprs/string_functions.cpp 
b/be/src/exprs/string_functions.cpp
index ce5738dba95..e21f9e365ed 100644
--- a/be/src/exprs/string_functions.cpp
+++ b/be/src/exprs/string_functions.cpp
@@ -25,6 +25,8 @@
 
 #include <sstream>
 
+#include "util/string_util.h"
+
 // NOTE: be careful not to use string::append.  It is not performant.
 namespace doris {
 
@@ -61,7 +63,7 @@ bool StringFunctions::set_re2_options(const StringRef& 
match_parameter, std::str
 // The caller owns the returned regex. Returns nullptr if the pattern could 
not be compiled.
 bool StringFunctions::compile_regex(const StringRef& pattern, std::string* 
error_str,
                                     const StringRef& match_parameter,
-                                    std::unique_ptr<re2::RE2>& re) {
+                                    const StringRef& options_value, 
std::unique_ptr<re2::RE2>& re) {
     re2::StringPiece pattern_sp(pattern.data, pattern.size);
     re2::RE2::Options options;
     // Disable error logging in case e.g. every row causes an error
@@ -70,6 +72,18 @@ bool StringFunctions::compile_regex(const StringRef& 
pattern, std::string* error
     // Return the leftmost longest match (rather than the first match).
     // options.set_longest_match(true);
     options.set_dot_nl(true);
+
+    if ((options_value.data != nullptr) && (options_value.size > 0)) {
+        auto options_split = split(options_value.to_string(), ",");
+        for (const auto& option : options_split) {
+            if (iequal("ignore_invalid_escape", option)) {
+                options.set_ignore_replace_escape(true);
+            } else {
+                // "none" do nothing, and could add more options for future 
extensibility.
+            }
+        }
+    }
+
     if (match_parameter.size > 0 &&
         !StringFunctions::set_re2_options(match_parameter, error_str, 
&options)) {
         return false;
diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h
index 8ed917b82d2..ab7079c846e 100644
--- a/be/src/exprs/string_functions.h
+++ b/be/src/exprs/string_functions.h
@@ -36,6 +36,7 @@ public:
 
     // The caller owns the returned regex. Returns nullptr if the pattern 
could not be compiled.
     static bool compile_regex(const StringRef& pattern, std::string* error_str,
-                              const StringRef& match_parameter, 
std::unique_ptr<re2::RE2>& re);
+                              const StringRef& match_parameter, const 
StringRef& options_value,
+                              std::unique_ptr<re2::RE2>& re);
 };
 } // namespace doris
diff --git a/be/src/vec/functions/function_regexp.cpp 
b/be/src/vec/functions/function_regexp.cpp
index f03ae176c20..77ffd6286ae 100644
--- a/be/src/vec/functions/function_regexp.cpp
+++ b/be/src/vec/functions/function_regexp.cpp
@@ -20,7 +20,6 @@
 #include <re2/stringpiece.h>
 #include <stddef.h>
 
-#include <algorithm>
 #include <memory>
 #include <string>
 #include <string_view>
@@ -51,12 +50,124 @@
 
 namespace doris::vectorized {
 
+struct ThreeParamTypes {
+    static DataTypes get_variadic_argument_types() {
+        return {std::make_shared<DataTypeString>(), 
std::make_shared<DataTypeString>(),
+                std::make_shared<DataTypeString>()};
+    }
+};
+
+struct FourParamTypes {
+    static DataTypes get_variadic_argument_types() {
+        return {std::make_shared<DataTypeString>(), 
std::make_shared<DataTypeString>(),
+                std::make_shared<DataTypeString>(), 
std::make_shared<DataTypeString>()};
+    }
+};
+
+// template FunctionRegexpFunctionality is used for 
regexp_replace/regexp_replace_one
+template <typename Impl, typename ParamTypes>
+class FunctionRegexpReplace : public IFunction {
+public:
+    static constexpr auto name = Impl::name;
+
+    static FunctionPtr create() { return 
std::make_shared<FunctionRegexpReplace>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override {
+        return get_variadic_argument_types_impl().size();
+    }
+
+    bool is_variadic() const override { return true; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        return make_nullable(std::make_shared<DataTypeString>());
+    }
+
+    DataTypes get_variadic_argument_types_impl() const override {
+        return ParamTypes::get_variadic_argument_types();
+    }
+
+    Status open(FunctionContext* context, FunctionContext::FunctionStateScope 
scope) override {
+        if (scope == FunctionContext::THREAD_LOCAL) {
+            if (context->is_col_constant(1)) {
+                DCHECK(!context->get_function_state(scope));
+                const auto pattern_col = 
context->get_constant_col(1)->column_ptr;
+                const auto& pattern = pattern_col->get_data_at(0);
+                if (pattern.size == 0) {
+                    return Status::OK();
+                }
+
+                std::string error_str;
+                std::unique_ptr<re2::RE2> scoped_re;
+                StringRef options_value;
+                if (context->get_num_args() == 4) {
+                    DCHECK(context->is_col_constant(3));
+                    const auto options_col = 
context->get_constant_col(3)->column_ptr;
+                    options_value = options_col->get_data_at(0);
+                }
+
+                bool st = StringFunctions::compile_regex(pattern, &error_str, 
StringRef(),
+                                                         options_value, 
scoped_re);
+                if (!st) {
+                    context->set_error(error_str.c_str());
+                    return Status::InvalidArgument(error_str);
+                }
+                std::shared_ptr<re2::RE2> re(scoped_re.release());
+                context->set_function_state(scope, re);
+            }
+        }
+        return Status::OK();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) const override 
{
+        size_t argument_size = arguments.size();
+
+        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
+        auto result_data_column = ColumnString::create();
+        auto& result_data = result_data_column->get_chars();
+        auto& result_offset = result_data_column->get_offsets();
+        result_offset.resize(input_rows_count);
+
+        bool col_const[3];
+        ColumnPtr argument_columns[3];
+        for (int i = 0; i < 3; ++i) {
+            col_const[i] = 
is_column_const(*block.get_by_position(arguments[i]).column);
+        }
+        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
+                                                     
*block.get_by_position(arguments[0]).column)
+                                                     .convert_to_full_column()
+                                           : 
block.get_by_position(arguments[0]).column;
+
+        default_preprocess_parameter_columns(argument_columns, col_const, {1, 
2}, block, arguments);
+
+        StringRef options_value;
+        if (col_const[1] && col_const[2]) {
+            Impl::execute_impl_const_args(context, argument_columns, 
options_value,
+                                          input_rows_count, result_data, 
result_offset,
+                                          result_null_map->get_data());
+        } else {
+            // the options have check in FE, so is always const, and get idx 
of 0
+            if (argument_size == 4) {
+                options_value = 
block.get_by_position(arguments[3]).column->get_data_at(0);
+            }
+            Impl::execute_impl(context, argument_columns, options_value, 
input_rows_count,
+                               result_data, result_offset, 
result_null_map->get_data());
+        }
+
+        block.get_by_position(result).column =
+                ColumnNullable::create(std::move(result_data_column), 
std::move(result_null_map));
+        return Status::OK();
+    }
+};
+
 struct RegexpReplaceImpl {
     static constexpr auto name = "regexp_replace";
-    // 3 args
     static void execute_impl(FunctionContext* context, ColumnPtr 
argument_columns[],
-                             size_t input_rows_count, ColumnString::Chars& 
result_data,
-                             ColumnString::Offsets& result_offset, NullMap& 
null_map) {
+                             const StringRef& options_value, size_t 
input_rows_count,
+                             ColumnString::Chars& result_data, 
ColumnString::Offsets& result_offset,
+                             NullMap& null_map) {
         const auto* str_col = 
check_and_get_column<ColumnString>(argument_columns[0].get());
         const auto* pattern_col = 
check_and_get_column<ColumnString>(argument_columns[1].get());
         const auto* replace_col = 
check_and_get_column<ColumnString>(argument_columns[2].get());
@@ -66,12 +177,13 @@ struct RegexpReplaceImpl {
                 StringOP::push_null_string(i, result_data, result_offset, 
null_map);
                 continue;
             }
-            _execute_inner_loop<false>(context, str_col, pattern_col, 
replace_col, result_data,
-                                       result_offset, null_map, i);
+            _execute_inner_loop<false>(context, str_col, pattern_col, 
replace_col, options_value,
+                                       result_data, result_offset, null_map, 
i);
         }
     }
     static void execute_impl_const_args(FunctionContext* context, ColumnPtr 
argument_columns[],
-                                        size_t input_rows_count, 
ColumnString::Chars& result_data,
+                                        const StringRef& options_value, size_t 
input_rows_count,
+                                        ColumnString::Chars& result_data,
                                         ColumnString::Offsets& result_offset, 
NullMap& null_map) {
         const auto* str_col = 
check_and_get_column<ColumnString>(argument_columns[0].get());
         const auto* pattern_col = 
check_and_get_column<ColumnString>(argument_columns[1].get());
@@ -82,14 +194,14 @@ struct RegexpReplaceImpl {
                 StringOP::push_null_string(i, result_data, result_offset, 
null_map);
                 continue;
             }
-            _execute_inner_loop<true>(context, str_col, pattern_col, 
replace_col, result_data,
-                                      result_offset, null_map, i);
+            _execute_inner_loop<true>(context, str_col, pattern_col, 
replace_col, options_value,
+                                      result_data, result_offset, null_map, i);
         }
     }
     template <bool Const>
     static void _execute_inner_loop(FunctionContext* context, const 
ColumnString* str_col,
                                     const ColumnString* pattern_col,
-                                    const ColumnString* replace_col,
+                                    const ColumnString* replace_col, const 
StringRef& options_value,
                                     ColumnString::Chars& result_data,
                                     ColumnString::Offsets& result_offset, 
NullMap& null_map,
                                     const size_t index_now) {
@@ -99,7 +211,8 @@ struct RegexpReplaceImpl {
         if (re == nullptr) {
             std::string error_str;
             const auto& pattern = 
pattern_col->get_data_at(index_check_const(index_now, Const));
-            bool st = StringFunctions::compile_regex(pattern, &error_str, 
StringRef(), scoped_re);
+            bool st = StringFunctions::compile_regex(pattern, &error_str, 
StringRef(),
+                                                     options_value, scoped_re);
             if (!st) {
                 context->add_warning(error_str.c_str());
                 StringOP::push_null_string(index_now, result_data, 
result_offset, null_map);
@@ -121,8 +234,9 @@ struct RegexpReplaceOneImpl {
     static constexpr auto name = "regexp_replace_one";
 
     static void execute_impl(FunctionContext* context, ColumnPtr 
argument_columns[],
-                             size_t input_rows_count, ColumnString::Chars& 
result_data,
-                             ColumnString::Offsets& result_offset, NullMap& 
null_map) {
+                             const StringRef& options_value, size_t 
input_rows_count,
+                             ColumnString::Chars& result_data, 
ColumnString::Offsets& result_offset,
+                             NullMap& null_map) {
         const auto* str_col = 
check_and_get_column<ColumnString>(argument_columns[0].get());
         const auto* pattern_col = 
check_and_get_column<ColumnString>(argument_columns[1].get());
         const auto* replace_col = 
check_and_get_column<ColumnString>(argument_columns[2].get());
@@ -132,13 +246,14 @@ struct RegexpReplaceOneImpl {
                 StringOP::push_null_string(i, result_data, result_offset, 
null_map);
                 continue;
             }
-            _execute_inner_loop<false>(context, str_col, pattern_col, 
replace_col, result_data,
-                                       result_offset, null_map, i);
+            _execute_inner_loop<false>(context, str_col, pattern_col, 
replace_col, options_value,
+                                       result_data, result_offset, null_map, 
i);
         }
     }
 
     static void execute_impl_const_args(FunctionContext* context, ColumnPtr 
argument_columns[],
-                                        size_t input_rows_count, 
ColumnString::Chars& result_data,
+                                        const StringRef& options_value, size_t 
input_rows_count,
+                                        ColumnString::Chars& result_data,
                                         ColumnString::Offsets& result_offset, 
NullMap& null_map) {
         const auto* str_col = 
check_and_get_column<ColumnString>(argument_columns[0].get());
         const auto* pattern_col = 
check_and_get_column<ColumnString>(argument_columns[1].get());
@@ -149,14 +264,14 @@ struct RegexpReplaceOneImpl {
                 StringOP::push_null_string(i, result_data, result_offset, 
null_map);
                 continue;
             }
-            _execute_inner_loop<true>(context, str_col, pattern_col, 
replace_col, result_data,
-                                      result_offset, null_map, i);
+            _execute_inner_loop<true>(context, str_col, pattern_col, 
replace_col, options_value,
+                                      result_data, result_offset, null_map, i);
         }
     }
     template <bool Const>
     static void _execute_inner_loop(FunctionContext* context, const 
ColumnString* str_col,
                                     const ColumnString* pattern_col,
-                                    const ColumnString* replace_col,
+                                    const ColumnString* replace_col, const 
StringRef& options_value,
                                     ColumnString::Chars& result_data,
                                     ColumnString::Offsets& result_offset, 
NullMap& null_map,
                                     const size_t index_now) {
@@ -166,7 +281,8 @@ struct RegexpReplaceOneImpl {
         if (re == nullptr) {
             std::string error_str;
             const auto& pattern = 
pattern_col->get_data_at(index_check_const(index_now, Const));
-            bool st = StringFunctions::compile_regex(pattern, &error_str, 
StringRef(), scoped_re);
+            bool st = StringFunctions::compile_regex(pattern, &error_str, 
StringRef(),
+                                                     options_value, scoped_re);
             if (!st) {
                 context->add_warning(error_str.c_str());
                 StringOP::push_null_string(index_now, result_data, 
result_offset, null_map);
@@ -250,7 +366,8 @@ struct RegexpExtractImpl {
         if (re == nullptr) {
             std::string error_str;
             const auto& pattern = 
pattern_col->get_data_at(index_check_const(index_now, Const));
-            bool st = StringFunctions::compile_regex(pattern, &error_str, 
StringRef(), scoped_re);
+            bool st = StringFunctions::compile_regex(pattern, &error_str, 
StringRef(), StringRef(),
+                                                     scoped_re);
             if (!st) {
                 context->add_warning(error_str.c_str());
                 StringOP::push_null_string(index_now, result_data, 
result_offset, null_map);
@@ -328,7 +445,8 @@ struct RegexpExtractAllImpl {
         if (re == nullptr) {
             std::string error_str;
             const auto& pattern = 
pattern_col->get_data_at(index_check_const(index_now, Const));
-            bool st = StringFunctions::compile_regex(pattern, &error_str, 
StringRef(), scoped_re);
+            bool st = StringFunctions::compile_regex(pattern, &error_str, 
StringRef(), StringRef(),
+                                                     scoped_re);
             if (!st) {
                 context->add_warning(error_str.c_str());
                 StringOP::push_null_string(index_now, result_data, 
result_offset, null_map);
@@ -417,8 +535,8 @@ public:
 
                 std::string error_str;
                 std::unique_ptr<re2::RE2> scoped_re;
-                bool st =
-                        StringFunctions::compile_regex(pattern, &error_str, 
StringRef(), scoped_re);
+                bool st = StringFunctions::compile_regex(pattern, &error_str, 
StringRef(),
+                                                         StringRef(), 
scoped_re);
                 if (!st) {
                     context->set_error(error_str.c_str());
                     return Status::InvalidArgument(error_str);
@@ -484,17 +602,15 @@ public:
                 ColumnNullable::create(std::move(result_data_column), 
std::move(result_null_map));
         return Status::OK();
     }
-
-    Status close(FunctionContext* context, FunctionContext::FunctionStateScope 
scope) override {
-        return Status::OK();
-    }
 };
 
 void register_function_regexp_extract(SimpleFunctionFactory& factory) {
-    
factory.register_function<FunctionRegexpFunctionality<RegexpReplaceImpl>>();
+    factory.register_function<FunctionRegexpReplace<RegexpReplaceImpl, 
ThreeParamTypes>>();
+    factory.register_function<FunctionRegexpReplace<RegexpReplaceImpl, 
FourParamTypes>>();
+    factory.register_function<FunctionRegexpReplace<RegexpReplaceOneImpl, 
ThreeParamTypes>>();
+    factory.register_function<FunctionRegexpReplace<RegexpReplaceOneImpl, 
FourParamTypes>>();
     
factory.register_function<FunctionRegexpFunctionality<RegexpExtractImpl<true>>>();
     
factory.register_function<FunctionRegexpFunctionality<RegexpExtractImpl<false>>>();
-    
factory.register_function<FunctionRegexpFunctionality<RegexpReplaceOneImpl>>();
     
factory.register_function<FunctionRegexpFunctionality<RegexpExtractAllImpl>>();
 }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpReplace.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpReplace.java
index 8a12b8d7205..46c7285bd8b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpReplace.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpReplace.java
@@ -18,12 +18,15 @@
 package org.apache.doris.nereids.trees.expressions.functions.scalar;
 
 import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
 import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
 import 
org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.Literal;
 import org.apache.doris.nereids.trees.expressions.shape.TernaryExpression;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.DataType;
 import org.apache.doris.nereids.types.StringType;
 import org.apache.doris.nereids.types.VarcharType;
 
@@ -42,7 +45,12 @@ public class RegexpReplace extends ScalarFunction
             FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT)
                     .args(VarcharType.SYSTEM_DEFAULT, 
VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT),
             FunctionSignature.ret(StringType.INSTANCE)
-                    .args(StringType.INSTANCE, StringType.INSTANCE, 
StringType.INSTANCE)
+                    .args(StringType.INSTANCE, StringType.INSTANCE, 
StringType.INSTANCE),
+            FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT)
+                    .args(VarcharType.SYSTEM_DEFAULT, 
VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT,
+                            VarcharType.SYSTEM_DEFAULT),
+            FunctionSignature.ret(StringType.INSTANCE)
+                    .args(StringType.INSTANCE, StringType.INSTANCE, 
StringType.INSTANCE, StringType.INSTANCE)
     );
 
     /**
@@ -52,13 +60,44 @@ public class RegexpReplace extends ScalarFunction
         super("regexp_replace", arg0, arg1, arg2);
     }
 
+    /**
+     * constructor with 4 arguments.
+     */
+    public RegexpReplace(Expression arg0, Expression arg1, Expression arg2, 
Expression arg3) {
+        super("regexp_replace", arg0, arg1, arg2, arg3);
+    }
+
     /**
      * withChildren.
      */
     @Override
     public RegexpReplace withChildren(List<Expression> children) {
-        Preconditions.checkArgument(children.size() == 3);
-        return new RegexpReplace(children.get(0), children.get(1), 
children.get(2));
+        Preconditions.checkArgument(children.size() == 3 || children.size() == 
4,
+                "RegexpReplace should have 3 or 4 children, but got: " + 
children.size());
+        if (children.size() == 3) {
+            return new RegexpReplace(children.get(0), children.get(1), 
children.get(2));
+        } else {
+            return new RegexpReplace(children.get(0), children.get(1), 
children.get(2), children.get(3));
+        }
+    }
+
+    @Override
+    public void checkLegalityBeforeTypeCoercion() {
+        if (children().size() == 3) {
+            return;
+        }
+        if (children().size() == 4) {
+            Expression value = child(3);
+            DataType type = value.getDataType();
+            if (!type.isStringLikeType()) {
+                throw new AnalysisException(
+                        "The fourth param of regexp_replace must be a string 
type: " + this.toSql());
+            }
+            if (!(value instanceof Literal)) {
+                throw new AnalysisException(
+                        "The fourth param of regexp_replace must be a constant 
value: " + this.toSql());
+            }
+        }
     }
 
     @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpReplaceOne.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpReplaceOne.java
index f31cf84cfa1..552d92d63a5 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpReplaceOne.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpReplaceOne.java
@@ -18,12 +18,15 @@
 package org.apache.doris.nereids.trees.expressions.functions.scalar;
 
 import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
 import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
 import 
org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.Literal;
 import org.apache.doris.nereids.trees.expressions.shape.TernaryExpression;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.DataType;
 import org.apache.doris.nereids.types.StringType;
 import org.apache.doris.nereids.types.VarcharType;
 
@@ -42,7 +45,12 @@ public class RegexpReplaceOne extends ScalarFunction
             FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT)
                     .args(VarcharType.SYSTEM_DEFAULT, 
VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT),
             FunctionSignature.ret(StringType.INSTANCE)
-                    .args(StringType.INSTANCE, StringType.INSTANCE, 
StringType.INSTANCE)
+                    .args(StringType.INSTANCE, StringType.INSTANCE, 
StringType.INSTANCE),
+            FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT)
+                    .args(VarcharType.SYSTEM_DEFAULT, 
VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT,
+                            VarcharType.SYSTEM_DEFAULT),
+            FunctionSignature.ret(StringType.INSTANCE)
+                    .args(StringType.INSTANCE, StringType.INSTANCE, 
StringType.INSTANCE, StringType.INSTANCE)
     );
 
     /**
@@ -52,13 +60,44 @@ public class RegexpReplaceOne extends ScalarFunction
         super("regexp_replace_one", arg0, arg1, arg2);
     }
 
+    /**
+     * constructor with 4 arguments.
+     */
+    public RegexpReplaceOne(Expression arg0, Expression arg1, Expression arg2, 
Expression arg3) {
+        super("regexp_replace_one", arg0, arg1, arg2, arg3);
+    }
+
     /**
      * withChildren.
      */
     @Override
     public RegexpReplaceOne withChildren(List<Expression> children) {
-        Preconditions.checkArgument(children.size() == 3);
-        return new RegexpReplaceOne(children.get(0), children.get(1), 
children.get(2));
+        Preconditions.checkArgument(children.size() == 3 || children.size() == 
4,
+                "RegexpReplaceOne should have 3 or 4 children");
+        if (children.size() == 3) {
+            return new RegexpReplaceOne(children.get(0), children.get(1), 
children.get(2));
+        } else {
+            return new RegexpReplaceOne(children.get(0), children.get(1), 
children.get(2), children.get(3));
+        }
+    }
+
+    @Override
+    public void checkLegalityBeforeTypeCoercion() {
+        if (children().size() == 3) {
+            return;
+        }
+        if (children().size() == 4) {
+            Expression value = child(3);
+            DataType type = value.getDataType();
+            if (!type.isStringLikeType()) {
+                throw new AnalysisException(
+                        "The fourth param of regexp_replace_one must be a 
string type: " + this.toSql());
+            }
+            if (!(value instanceof Literal)) {
+                throw new AnalysisException(
+                        "The fourth param of regexp_replace_one must be a 
constant value: " + this.toSql());
+            }
+        }
     }
 
     @Override


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to