This is an automated email from the ASF dual-hosted git repository.

zclll pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 97eded58bfb [Feature](func) Support function soundex (#55731)
97eded58bfb is described below

commit 97eded58bfb9376aea1dedf80df99de0b6c42a1f
Author: linrrarity <[email protected]>
AuthorDate: Tue Sep 9 18:16:16 2025 +0800

    [Feature](func) Support function soundex (#55731)
    
    The `SOUNDEX` function computes the [American
    Soundex](https://en.wikipedia.org/wiki/Soundex) value, which consists of
    the first letter followed by a three-digit sound code that represents
    the English pronunciation of the input string.
    
    - Example:
    ```sql
    SELECT SOUNDEX('Doris');
    ```
    ```text
    +------------------+
    | SOUNDEX('Doris') |
    +------------------+
    | D620             |
    +------------------+
    ```
---
 be/src/vec/functions/function_soundex.cpp          | 131 +++++++++++++++++++++
 be/src/vec/functions/simple_function_factory.h     |   2 +
 be/test/vec/function/function_string_test.cpp      |  69 +++++++++++
 .../doris/catalog/BuiltinScalarFunctions.java      |   2 +
 .../functions/executable/StringArithmetic.java     |  55 +++++++++
 .../expressions/functions/scalar/Soundex.java      |  69 +++++++++++
 .../expressions/visitor/ScalarFunctionVisitor.java |   5 +
 .../nereids/rules/expression/FoldConstantTest.java |  26 ++++
 .../string_functions/test_string_function.out      | Bin 3233 -> 4097 bytes
 .../fold_constant_string_arithmatic.groovy         |  43 +++++++
 .../string_functions/test_string_function.groovy   |  53 +++++++++
 11 files changed, 455 insertions(+)

diff --git a/be/src/vec/functions/function_soundex.cpp 
b/be/src/vec/functions/function_soundex.cpp
new file mode 100644
index 00000000000..80e775dec36
--- /dev/null
+++ b/be/src/vec/functions/function_soundex.cpp
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cctype>
+
+#include "common/status.h"
+#include "vec/columns/column_string.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/function.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+#include "common/compile_check_begin.h"
+
+class FunctionSoundex : public IFunction {
+public:
+    static constexpr auto name = "soundex";
+
+    static FunctionPtr create() { return std::make_shared<FunctionSoundex>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 1; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        return std::make_shared<DataTypeString>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        uint32_t result, size_t input_rows_count) const 
override {
+        const ColumnPtr col_ptr = block.get_by_position(arguments[0]).column;
+
+        auto res_column = ColumnString::create();
+        res_column->reserve(input_rows_count);
+        auto& res_data = res_column->get_chars();
+        auto& res_offsets = res_column->get_offsets();
+        res_data.reserve(input_rows_count * CODE_SIZE);
+        res_offsets.resize(input_rows_count);
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            StringRef ref = col_ptr->get_data_at(i);
+            RETURN_IF_ERROR(calculate_soundex_and_insert(ref, res_data, 
res_offsets, i));
+        }
+
+        block.replace_by_position(result, std::move(res_column));
+        return Status::OK();
+    }
+
+private:
+    Status calculate_soundex_and_insert(const StringRef& ref, 
ColumnString::Chars& chars,
+                                        ColumnString::Offsets& offsets, const 
size_t row) const {
+        uint32_t row_start = (row == 0) ? 0 : offsets[row - 1];
+        uint32_t expect_end = row_start + CODE_SIZE;
+
+        if (ref.size == 0) {
+            offsets[row] = row_start;
+            return Status::OK();
+        }
+
+        char pre_code = '\0';
+        for (size_t i = 0; i < ref.size; ++i) {
+            auto c = static_cast<unsigned char>(ref.data[i]);
+
+            if (c > 0x7f) {
+                return Status::InvalidArgument("soundex only supports ASCII, 
but got: {}",
+                                               ref.data[i]);
+            }
+            if (!std::isalpha(c)) {
+                continue;
+            }
+
+            c = static_cast<char>(std::toupper(c));
+            if (chars.size() == row_start) {
+                chars.push_back(c);
+                pre_code = (SOUNDEX_TABLE[c - 'A'] == 'N') ? '\0' : 
SOUNDEX_TABLE[c - 'A'];
+            } else if (char code = SOUNDEX_TABLE[c - 'A']; code != 'N') {
+                if (code != 'V' && code != pre_code) {
+                    chars.push_back(code);
+                    if (chars.size() == expect_end) {
+                        offsets[row] = 
static_cast<ColumnString::Offset>(chars.size());
+                        return Status::OK();
+                    }
+                }
+
+                pre_code = code;
+            }
+        }
+
+        while (chars.size() != row_start && chars.size() < expect_end) {
+            chars.push_back('0');
+        }
+        offsets[row] = static_cast<ColumnString::Offset>(chars.size());
+
+        return Status::OK();
+    }
+
+    /** 1. If a vowel (A, E, I, O, U) separates two consonants that have the 
same soundex code
+     *  the consonant to the right of the vowel is coded. Here we use 'V' to 
represent vowels.
+     *  eg : **Tymczak** is coded as T-522 (T, 5 for the M, 2 for the C, Z 
ignored , 2 for the K). 
+     *  Since the vowel "A" separates the Z and K, the K is coded.
+     *
+     *  2. If "H" or "W" separate two consonants that have the same soundex 
code, the consonant to the right of the vowel is NOT coded.
+     *  Here we use 'N' to represent these two characters.
+     *  eg : **Ashcraft** is coded A-261 (A, 2 for the S, C ignored, 6 for the 
R, 1 for the F). It is not coded A-226.
+     */
+    static constexpr char SOUNDEX_TABLE[26] = {'V', '1', '2', '3', 'V', '1', 
'2', 'N', 'V',
+                                               '2', '2', '4', '5', '5', 'V', 
'1', '2', '6',
+                                               '2', '3', 'V', '1', 'N', '2', 
'V', '2'};
+
+    static constexpr uint8_t CODE_SIZE = 4;
+};
+
+void register_function_soundex(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionSoundex>();
+}
+
+#include "common/compile_check_end.h"
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/simple_function_factory.h 
b/be/src/vec/functions/simple_function_factory.h
index 906a294d3db..b4d119496df 100644
--- a/be/src/vec/functions/simple_function_factory.h
+++ b/be/src/vec/functions/simple_function_factory.h
@@ -120,6 +120,7 @@ void register_function_dict_get_many(SimpleFunctionFactory& 
factory);
 void register_function_ai(SimpleFunctionFactory& factory);
 void register_function_score(SimpleFunctionFactory& factory);
 void register_function_variant_type(SimpleFunctionFactory& factory);
+void register_function_soundex(SimpleFunctionFactory& factory);
 
 #if defined(BE_TEST) && !defined(BE_BENCHMARK)
 void register_function_throw_exception(SimpleFunctionFactory& factory);
@@ -338,6 +339,7 @@ public:
             register_function_dict_get_many(instance);
             register_function_ai(instance);
             register_function_score(instance);
+            register_function_soundex(instance);
 #if defined(BE_TEST) && !defined(BE_BENCHMARK)
             register_function_throw_exception(instance);
 #endif
diff --git a/be/test/vec/function/function_string_test.cpp 
b/be/test/vec/function/function_string_test.cpp
index 4ee28fc8de0..d4c06c5b860 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -3607,4 +3607,73 @@ TEST(function_string_test, 
function_count_substring_test) {
         check_function_all_arg_comb<DataTypeInt32, true>(func_name, 
input_types, data_set);
     }
 }
+
+TEST(function_string_test, soundex_test) {
+    std::string func_name = "soundex";
+
+    {
+        InputTypeSet input_types = {PrimitiveType::TYPE_VARCHAR};
+
+        DataSet data_set = {
+                {{std::string("Doris")}, std::string("D620")},
+                {{std::string("ApacheDoris中文测试")}, std::string("A123")},
+                {{std::string("Robert")}, std::string("R163")},
+                {{std::string("Rupert")}, std::string("R163")},
+                {{std::string("Smith")}, std::string("S530")},
+                {{std::string("Smyth")}, std::string("S530")},
+                {{std::string("Johnson")}, std::string("J525")},
+                {{std::string("Jackson")}, std::string("J250")},
+                {{std::string("Ashcraft")}, std::string("A261")},
+                {{std::string("Ashcroft")}, std::string("A261")},
+                {{std::string("Washington")}, std::string("W252")},
+                {{std::string("Lee")}, std::string("L000")},
+                {{std::string("Gutierrez")}, std::string("G362")},
+                {{std::string("Pfister")}, std::string("P236")},
+                {{std::string("Honeyman")}, std::string("H555")},
+                {{std::string("Lloyd")}, std::string("L300")},
+                {{std::string("Tymczak")}, std::string("T522")},
+
+                {{std::string("A")}, std::string("A000")},
+                {{std::string("B")}, std::string("B000")},
+                {{std::string("Z")}, std::string("Z000")},
+
+                {{std::string("robert")}, std::string("R163")},
+                {{std::string("ROBERT")}, std::string("R163")},
+                {{std::string("RoBerT")}, std::string("R163")},
+
+                {{std::string("R@bert")}, std::string("R163")},
+                {{std::string("Rob3rt")}, std::string("R163")},
+                {{std::string("Rob-ert")}, std::string("R163")},
+                {{std::string("123Robert")}, std::string("R163")},
+                {{std::string("123")}, std::string("")},
+                {{std::string("@#$")}, std::string("")},
+                {{std::string("   ")}, std::string("")},
+                {{std::string("")}, std::string("")},
+                {{std::string("Ab_+ %*^cdefghijklmnopqrstuvwxyz")}, 
std::string("A123")},
+
+                {{std::string("Euler")}, std::string("E460")},
+                {{std::string("Gauss")}, std::string("G200")},
+                {{std::string("Hilbert")}, std::string("H416")},
+                {{std::string("Knuth")}, std::string("K530")},
+                {{std::string("Lloyd")}, std::string("L300")},
+                {{std::string("Lukasiewicz")}, std::string("L222")},
+
+                {{std::string("Huang")}, std::string("H520")},
+                {{std::string("Zhang")}, std::string("Z520")},
+                {{std::string("Wang")}, std::string("W520")}};
+
+        static_cast<void>(check_function<DataTypeString, true>(func_name, 
input_types, data_set));
+    }
+
+    {
+        InputTypeSet input_types = {PrimitiveType::TYPE_VARCHAR};
+
+        DataSet data_set = {{{std::string("中文测试")}, std::string("")},
+                            {{std::string("abc 你好")}, std::string("")}};
+
+        static_cast<void>(check_function<DataTypeString, true>(func_name, 
input_types, data_set, -1,
+                                                               -1, true));
+    }
+}
+
 } // namespace doris::vectorized
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index dbad709fff0..278d5c42252 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -415,6 +415,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.Sm3;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Sm3sum;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Sm4Decrypt;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Sm4Encrypt;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Soundex;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Space;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.SplitByChar;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.SplitByRegexp;
@@ -919,6 +920,7 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(Sm3sum.class, "sm3sum"),
             scalar(Sm4Decrypt.class, "sm4_decrypt"),
             scalar(Sm4Encrypt.class, "sm4_encrypt"),
+            scalar(Soundex.class, "soundex"),
             scalar(Space.class, "space"),
             scalar(SplitByChar.class, "split_by_char"),
             scalar(SplitByRegexp.class, "split_by_regexp"),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
index a592e71a9a1..afedc7a3fd1 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
@@ -18,6 +18,7 @@
 package org.apache.doris.nereids.trees.expressions.functions.executable;
 
 import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.exceptions.NotSupportedException;
 import org.apache.doris.nereids.trees.expressions.ExecFunction;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.literal.ArrayLiteral;
@@ -1059,4 +1060,58 @@ public class StringArithmetic {
         }
         return castStringLikeLiteral(first, 
first.getValue().replace(second.getValue(), third.getValue()));
     }
+
+    /**
+     * Executable arithmetic functions soundex
+     */
+    @ExecFunction(name = "soundex")
+    public static Expression soundex(StringLikeLiteral first) {
+        char[] soundexTable = {
+            'V', '1', '2', '3', 'V', '1', '2', 'N', 'V',
+            '2', '2', '4', '5', '5', 'V', '1', '2', '6',
+            '2', '3', 'V', '1', 'N', '2', 'V', '2'
+        };
+
+        String result = "";
+        if (!first.getValue().isEmpty()) {
+            char preCode = '\0';
+
+            for (int i = 0; i < first.getValue().length(); i++) {
+                char c = first.getValue().charAt(i);
+
+                if (c > 0x7f) {
+                    throw new NotSupportedException("soundex only supports 
ASCII, but got: " + c);
+                }
+
+                if (!Character.isLetter(c)) {
+                    continue;
+                }
+
+                c = Character.toUpperCase(c);
+                if (result.isEmpty()) {
+                    result += c;
+                    preCode = (soundexTable[c - 'A'] == 'N') ? '\0' : 
soundexTable[c - 'A'];
+                } else {
+                    char code = soundexTable[c - 'A'];
+                    if (code != 'N') {
+                        if (code != 'V' && code != preCode) {
+                            result += code;
+                            if (result.length() == 4) {
+                                break;
+                            }
+                        }
+                        preCode = code;
+                    }
+                }
+            }
+
+            if (result.length() > 0) {
+                while (result.length() < 4) {
+                    result += '0';
+                }
+            }
+        }
+
+        return castStringLikeLiteral(first, result);
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Soundex.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Soundex.java
new file mode 100644
index 00000000000..2e7b3bfa678
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Soundex.java
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.VarcharType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * Scalar function 'Soundex'
+ */
+public class Soundex extends ScalarFunction
+        implements UnaryExpression, ExplicitlyCastableSignature, 
PropagateNullable {
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT)
+    );
+
+    /**
+     * constructor with 1 argument.
+     */
+    public Soundex(Expression arg) {
+        super("soundex", arg);
+    }
+
+    /** constructor for withChildren and reuse signature */
+    private Soundex(ScalarFunctionParams functionParams) {
+        super(functionParams);
+    }
+
+    @Override
+    public Soundex withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() == 1);
+        return new Soundex(getFunctionParams(children));
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitSoundex(this, context);
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index d78d5bb7ff8..b52c8bda9da 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -417,6 +417,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.Sm3;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Sm3sum;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Sm4Decrypt;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Sm4Encrypt;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Soundex;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Space;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.SplitByChar;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.SplitByRegexp;
@@ -2031,6 +2032,10 @@ public interface ScalarFunctionVisitor<R, C> {
         return visitScalarFunction(sm4Encrypt, context);
     }
 
+    default R visitSoundex(Soundex soundex, C context) {
+        return visitScalarFunction(soundex, context);
+    }
+
     default R visitSpace(Space space, C context) {
         return visitScalarFunction(space, context);
     }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java
index 90465f67717..af299a36c0a 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java
@@ -20,6 +20,7 @@ package org.apache.doris.nereids.rules.expression;
 import org.apache.doris.analysis.ArithmeticExpr.Operator;
 import org.apache.doris.nereids.analyzer.UnboundRelation;
 import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.exceptions.NotSupportedException;
 import org.apache.doris.nereids.parser.NereidsParser;
 import org.apache.doris.nereids.rules.analysis.ExpressionAnalyzer;
 import org.apache.doris.nereids.rules.expression.rules.FoldConstantRule;
@@ -91,6 +92,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsAdd;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Sign;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Sin;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Sinh;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Soundex;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Sqrt;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.StrToDate;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Substring;
@@ -582,6 +584,30 @@ class FoldConstantTest extends ExpressionRewriteTestHelper 
{
         );
         rewritten = executor.rewrite(replace, context);
         Assertions.assertEquals(new StringLiteral("default"), rewritten);
+
+        Soundex soundex = new Soundex(StringLiteral.of("Ashcraft"));
+        rewritten = executor.rewrite(soundex, context);
+        Assertions.assertEquals(new StringLiteral("A261"), rewritten);
+        soundex = new Soundex(StringLiteral.of("Robert"));
+        rewritten = executor.rewrite(soundex, context);
+        Assertions.assertEquals(new StringLiteral("R163"), rewritten);
+        soundex = new Soundex(StringLiteral.of("R@bert"));
+        rewritten = executor.rewrite(soundex, context);
+        Assertions.assertEquals(new StringLiteral("R163"), rewritten);
+        soundex = new Soundex(StringLiteral.of("Honeyman"));
+        rewritten = executor.rewrite(soundex, context);
+        Assertions.assertEquals(new StringLiteral("H555"), rewritten);
+        soundex = new Soundex(StringLiteral.of("Apache Doris你好"));
+        rewritten = executor.rewrite(soundex, context);
+        Assertions.assertEquals(new StringLiteral("A123"), rewritten);
+        soundex = new Soundex(StringLiteral.of(""));
+        rewritten = executor.rewrite(soundex, context);
+        Assertions.assertEquals(new StringLiteral(""), rewritten);
+
+        Assertions.assertThrows(NotSupportedException.class, () -> {
+            Soundex soundexThrow = new Soundex(new StringLiteral("Doris你好"));
+            executor.rewrite(soundexThrow, context);
+        }, "soundex only supports ASCII");
     }
 
     @Test
diff --git 
a/regression-test/data/nereids_p0/sql_functions/string_functions/test_string_function.out
 
b/regression-test/data/nereids_p0/sql_functions/string_functions/test_string_function.out
index e3d169bd04f..51798c69629 100644
Binary files 
a/regression-test/data/nereids_p0/sql_functions/string_functions/test_string_function.out
 and 
b/regression-test/data/nereids_p0/sql_functions/string_functions/test_string_function.out
 differ
diff --git 
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
 
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
index 374f69bb58b..68db295f66c 100644
--- 
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
+++ 
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
@@ -1851,5 +1851,48 @@ suite("fold_constant_string_arithmatic") {
     testFoldConst("select cast(cast('inf' as double) as string)")
     testFoldConst("select cast(cast('-inf' as double) as string)")
     */
+
+    // SOUNDEX
+    testFoldConst("SELECT SOUNDEX('Doris')")
+    testFoldConst("SELECT SOUNDEX('Robert')")
+    testFoldConst("SELECT SOUNDEX('Rupert')")
+    testFoldConst("SELECT SOUNDEX('Smith')")
+    testFoldConst("SELECT SOUNDEX('Smyth')")
+    testFoldConst("SELECT SOUNDEX('Johnson')")
+    testFoldConst("SELECT SOUNDEX('Jackson')")
+    testFoldConst("SELECT SOUNDEX('Ashcraft')")
+    testFoldConst("SELECT SOUNDEX('Ashcroft')")
+    testFoldConst("SELECT SOUNDEX('Washington')")
+    testFoldConst("SELECT SOUNDEX('Lee')")
+    testFoldConst("SELECT SOUNDEX('Gutierrez')")
+    testFoldConst("SELECT SOUNDEX('Pfister')")
+    testFoldConst("SELECT SOUNDEX('Honeyman')")
+    testFoldConst("SELECT SOUNDEX('Lloyd')")
+    testFoldConst("SELECT SOUNDEX('Tymczak')")
+    testFoldConst("SELECT SOUNDEX('A')")
+    testFoldConst("SELECT SOUNDEX('B')")
+    testFoldConst("SELECT SOUNDEX('Z')")
+    testFoldConst("SELECT SOUNDEX('robert')")
+    testFoldConst("SELECT SOUNDEX('ROBERT')")
+    testFoldConst("SELECT SOUNDEX('RoBerT')")
+    testFoldConst("SELECT SOUNDEX('R@bert')")
+    testFoldConst("SELECT SOUNDEX('Rob3rt')")
+    testFoldConst("SELECT SOUNDEX('Rob-ert')")
+    testFoldConst("SELECT SOUNDEX('123Robert')")
+    testFoldConst("SELECT SOUNDEX('123')")
+    testFoldConst("SELECT SOUNDEX('~!@#%^&*-+')")
+    testFoldConst("SELECT SOUNDEX('   ')")
+    testFoldConst("SELECT SOUNDEX('')")
+    testFoldConst("SELECT SOUNDEX('Ab_+ %*^cdefghijklmnopqrstuvwxyz')")
+    testFoldConst("SELECT SOUNDEX('Euler')")
+    testFoldConst("SELECT SOUNDEX('Gauss')")
+    testFoldConst("SELECT SOUNDEX('Hilbert')")
+    testFoldConst("SELECT SOUNDEX('Knuth')")
+    testFoldConst("SELECT SOUNDEX('Lloyd')")
+    testFoldConst("SELECT SOUNDEX('Lukasiewicz')")
+    testFoldConst("SELECT SOUNDEX('Huang')")
+    testFoldConst("SELECT SOUNDEX('Zhang')")
+    testFoldConst("SELECT SOUNDEX('Wang')")
+    testFoldConst("SELECT SOUNDEX(NULL)")
 }
 
diff --git 
a/regression-test/suites/nereids_p0/sql_functions/string_functions/test_string_function.groovy
 
b/regression-test/suites/nereids_p0/sql_functions/string_functions/test_string_function.groovy
index 8b4fc9d2338..ee0210ce022 100644
--- 
a/regression-test/suites/nereids_p0/sql_functions/string_functions/test_string_function.groovy
+++ 
b/regression-test/suites/nereids_p0/sql_functions/string_functions/test_string_function.groovy
@@ -214,6 +214,59 @@ suite("test_string_function") {
     qt_sub_replace_utf8_sql9 " select sub_replace('你好世界','大家',4);"
     qt_sub_replace_utf8_sql10 " select sub_replace('你好世界','大家',-1);"
 
+    qt_soundex """SELECT SOUNDEX('Doris');"""
+    qt_soundex """SELECT SOUNDEX('Robert');"""
+    qt_soundex """SELECT SOUNDEX('Rupert');"""
+    qt_soundex """SELECT SOUNDEX('Smith');"""
+    qt_soundex """SELECT SOUNDEX('Smyth');"""
+    qt_soundex """SELECT SOUNDEX('Johnson');"""
+    qt_soundex """SELECT SOUNDEX('Jackson');"""
+    qt_soundex """SELECT SOUNDEX('Ashcraft');"""
+    qt_soundex """SELECT SOUNDEX('Ashcroft');"""
+    qt_soundex """SELECT SOUNDEX('Washington');"""
+    qt_soundex """SELECT SOUNDEX('Lee');"""
+    qt_soundex """SELECT SOUNDEX('Gutierrez');"""
+    qt_soundex """SELECT SOUNDEX('Pfister');"""
+    qt_soundex """SELECT SOUNDEX('Honeyman');"""
+    qt_soundex """SELECT SOUNDEX('Lloyd');"""
+    qt_soundex """SELECT SOUNDEX('Tymczak');"""
+    qt_soundex """SELECT SOUNDEX('A');"""
+    qt_soundex """SELECT SOUNDEX('B');"""
+    qt_soundex """SELECT SOUNDEX('Z');"""
+    qt_soundex """SELECT SOUNDEX('robert');"""
+    qt_soundex """SELECT SOUNDEX('ROBERT');"""
+    qt_soundex """SELECT SOUNDEX('RoBerT');"""
+    qt_soundex """SELECT SOUNDEX('R@bert');"""
+    qt_soundex """SELECT SOUNDEX('Rob3rt');"""
+    qt_soundex """SELECT SOUNDEX('Rob-ert');"""
+    qt_soundex """SELECT SOUNDEX('123Robert');"""
+    qt_soundex """SELECT SOUNDEX('123');"""
+    qt_soundex """SELECT SOUNDEX('~!@#%^&*-+');"""
+    qt_soundex """SELECT SOUNDEX('   ');"""
+    qt_soundex """SELECT SOUNDEX('');"""
+    qt_soundex """SELECT SOUNDEX('Ab_+ %*^cdefghijklmnopqrstuvwxyz');"""
+    qt_soundex """SELECT SOUNDEX('Euler');"""
+    qt_soundex """SELECT SOUNDEX('Gauss');"""
+    qt_soundex """SELECT SOUNDEX('Hilbert');"""
+    qt_soundex """SELECT SOUNDEX('Knuth');"""
+    qt_soundex """SELECT SOUNDEX('Lloyd');"""
+    qt_soundex """SELECT SOUNDEX('Lukasiewicz');"""
+    qt_soundex """SELECT SOUNDEX('Huang');"""
+    qt_soundex """SELECT SOUNDEX('Zhang');"""
+    qt_soundex """SELECT SOUNDEX('Wang');"""
+    qt_soundex """SELECT SOUNDEX(NULL);"""
+
+    // non-ASCII test for soundex
+    qt_soundex """SELECT SOUNDEX('ApacheDoris非 ASCII 测试');"""
+    test{
+        sql """SELECT SOUNDEX('非 ASCII 测试');"""
+        exception "soundex only supports ASCII"
+    }
+    test{
+        sql """SELECT SOUNDEX('Doris中文测试');"""
+        exception "soundex only supports ASCII"
+    }
+
 
 
     sql """


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to