This is an automated email from the ASF dual-hosted git repository.
zclll pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 97eded58bfb [Feature](func) Support function soundex (#55731)
97eded58bfb is described below
commit 97eded58bfb9376aea1dedf80df99de0b6c42a1f
Author: linrrarity <[email protected]>
AuthorDate: Tue Sep 9 18:16:16 2025 +0800
[Feature](func) Support function soundex (#55731)
The `SOUNDEX` function computes the [American
Soundex](https://en.wikipedia.org/wiki/Soundex) value, which consists of
the first letter followed by a three-digit sound code that represents
the English pronunciation of the input string.
- Example:
```sql
SELECT SOUNDEX('Doris');
```
```text
+------------------+
| SOUNDEX('Doris') |
+------------------+
| D620 |
+------------------+
```
---
be/src/vec/functions/function_soundex.cpp | 131 +++++++++++++++++++++
be/src/vec/functions/simple_function_factory.h | 2 +
be/test/vec/function/function_string_test.cpp | 69 +++++++++++
.../doris/catalog/BuiltinScalarFunctions.java | 2 +
.../functions/executable/StringArithmetic.java | 55 +++++++++
.../expressions/functions/scalar/Soundex.java | 69 +++++++++++
.../expressions/visitor/ScalarFunctionVisitor.java | 5 +
.../nereids/rules/expression/FoldConstantTest.java | 26 ++++
.../string_functions/test_string_function.out | Bin 3233 -> 4097 bytes
.../fold_constant_string_arithmatic.groovy | 43 +++++++
.../string_functions/test_string_function.groovy | 53 +++++++++
11 files changed, 455 insertions(+)
diff --git a/be/src/vec/functions/function_soundex.cpp
b/be/src/vec/functions/function_soundex.cpp
new file mode 100644
index 00000000000..80e775dec36
--- /dev/null
+++ b/be/src/vec/functions/function_soundex.cpp
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cctype>
+
+#include "common/status.h"
+#include "vec/columns/column_string.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/function.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+#include "common/compile_check_begin.h"
+
+class FunctionSoundex : public IFunction {
+public:
+ static constexpr auto name = "soundex";
+
+ static FunctionPtr create() { return std::make_shared<FunctionSoundex>(); }
+
+ String get_name() const override { return name; }
+
+ size_t get_number_of_arguments() const override { return 1; }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return std::make_shared<DataTypeString>();
+ }
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ uint32_t result, size_t input_rows_count) const
override {
+ const ColumnPtr col_ptr = block.get_by_position(arguments[0]).column;
+
+ auto res_column = ColumnString::create();
+ res_column->reserve(input_rows_count);
+ auto& res_data = res_column->get_chars();
+ auto& res_offsets = res_column->get_offsets();
+ res_data.reserve(input_rows_count * CODE_SIZE);
+ res_offsets.resize(input_rows_count);
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ StringRef ref = col_ptr->get_data_at(i);
+ RETURN_IF_ERROR(calculate_soundex_and_insert(ref, res_data,
res_offsets, i));
+ }
+
+ block.replace_by_position(result, std::move(res_column));
+ return Status::OK();
+ }
+
+private:
+ Status calculate_soundex_and_insert(const StringRef& ref,
ColumnString::Chars& chars,
+ ColumnString::Offsets& offsets, const
size_t row) const {
+ uint32_t row_start = (row == 0) ? 0 : offsets[row - 1];
+ uint32_t expect_end = row_start + CODE_SIZE;
+
+ if (ref.size == 0) {
+ offsets[row] = row_start;
+ return Status::OK();
+ }
+
+ char pre_code = '\0';
+ for (size_t i = 0; i < ref.size; ++i) {
+ auto c = static_cast<unsigned char>(ref.data[i]);
+
+ if (c > 0x7f) {
+ return Status::InvalidArgument("soundex only supports ASCII,
but got: {}",
+ ref.data[i]);
+ }
+ if (!std::isalpha(c)) {
+ continue;
+ }
+
+ c = static_cast<char>(std::toupper(c));
+ if (chars.size() == row_start) {
+ chars.push_back(c);
+ pre_code = (SOUNDEX_TABLE[c - 'A'] == 'N') ? '\0' :
SOUNDEX_TABLE[c - 'A'];
+ } else if (char code = SOUNDEX_TABLE[c - 'A']; code != 'N') {
+ if (code != 'V' && code != pre_code) {
+ chars.push_back(code);
+ if (chars.size() == expect_end) {
+ offsets[row] =
static_cast<ColumnString::Offset>(chars.size());
+ return Status::OK();
+ }
+ }
+
+ pre_code = code;
+ }
+ }
+
+ while (chars.size() != row_start && chars.size() < expect_end) {
+ chars.push_back('0');
+ }
+ offsets[row] = static_cast<ColumnString::Offset>(chars.size());
+
+ return Status::OK();
+ }
+
+ /** 1. If a vowel (A, E, I, O, U) separates two consonants that have the
same soundex code
+ * the consonant to the right of the vowel is coded. Here we use 'V' to
represent vowels.
+ * eg : **Tymczak** is coded as T-522 (T, 5 for the M, 2 for the C, Z
ignored , 2 for the K).
+ * Since the vowel "A" separates the Z and K, the K is coded.
+ *
+ * 2. If "H" or "W" separate two consonants that have the same soundex
code, the consonant to the right of the vowel is NOT coded.
+ * Here we use 'N' to represent these two characters.
+ * eg : **Ashcraft** is coded A-261 (A, 2 for the S, C ignored, 6 for the
R, 1 for the F). It is not coded A-226.
+ */
+ static constexpr char SOUNDEX_TABLE[26] = {'V', '1', '2', '3', 'V', '1',
'2', 'N', 'V',
+ '2', '2', '4', '5', '5', 'V',
'1', '2', '6',
+ '2', '3', 'V', '1', 'N', '2',
'V', '2'};
+
+ static constexpr uint8_t CODE_SIZE = 4;
+};
+
+void register_function_soundex(SimpleFunctionFactory& factory) {
+ factory.register_function<FunctionSoundex>();
+}
+
+#include "common/compile_check_end.h"
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/simple_function_factory.h
b/be/src/vec/functions/simple_function_factory.h
index 906a294d3db..b4d119496df 100644
--- a/be/src/vec/functions/simple_function_factory.h
+++ b/be/src/vec/functions/simple_function_factory.h
@@ -120,6 +120,7 @@ void register_function_dict_get_many(SimpleFunctionFactory&
factory);
void register_function_ai(SimpleFunctionFactory& factory);
void register_function_score(SimpleFunctionFactory& factory);
void register_function_variant_type(SimpleFunctionFactory& factory);
+void register_function_soundex(SimpleFunctionFactory& factory);
#if defined(BE_TEST) && !defined(BE_BENCHMARK)
void register_function_throw_exception(SimpleFunctionFactory& factory);
@@ -338,6 +339,7 @@ public:
register_function_dict_get_many(instance);
register_function_ai(instance);
register_function_score(instance);
+ register_function_soundex(instance);
#if defined(BE_TEST) && !defined(BE_BENCHMARK)
register_function_throw_exception(instance);
#endif
diff --git a/be/test/vec/function/function_string_test.cpp
b/be/test/vec/function/function_string_test.cpp
index 4ee28fc8de0..d4c06c5b860 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -3607,4 +3607,73 @@ TEST(function_string_test,
function_count_substring_test) {
check_function_all_arg_comb<DataTypeInt32, true>(func_name,
input_types, data_set);
}
}
+
+TEST(function_string_test, soundex_test) {
+ std::string func_name = "soundex";
+
+ {
+ InputTypeSet input_types = {PrimitiveType::TYPE_VARCHAR};
+
+ DataSet data_set = {
+ {{std::string("Doris")}, std::string("D620")},
+ {{std::string("ApacheDoris中文测试")}, std::string("A123")},
+ {{std::string("Robert")}, std::string("R163")},
+ {{std::string("Rupert")}, std::string("R163")},
+ {{std::string("Smith")}, std::string("S530")},
+ {{std::string("Smyth")}, std::string("S530")},
+ {{std::string("Johnson")}, std::string("J525")},
+ {{std::string("Jackson")}, std::string("J250")},
+ {{std::string("Ashcraft")}, std::string("A261")},
+ {{std::string("Ashcroft")}, std::string("A261")},
+ {{std::string("Washington")}, std::string("W252")},
+ {{std::string("Lee")}, std::string("L000")},
+ {{std::string("Gutierrez")}, std::string("G362")},
+ {{std::string("Pfister")}, std::string("P236")},
+ {{std::string("Honeyman")}, std::string("H555")},
+ {{std::string("Lloyd")}, std::string("L300")},
+ {{std::string("Tymczak")}, std::string("T522")},
+
+ {{std::string("A")}, std::string("A000")},
+ {{std::string("B")}, std::string("B000")},
+ {{std::string("Z")}, std::string("Z000")},
+
+ {{std::string("robert")}, std::string("R163")},
+ {{std::string("ROBERT")}, std::string("R163")},
+ {{std::string("RoBerT")}, std::string("R163")},
+
+ {{std::string("R@bert")}, std::string("R163")},
+ {{std::string("Rob3rt")}, std::string("R163")},
+ {{std::string("Rob-ert")}, std::string("R163")},
+ {{std::string("123Robert")}, std::string("R163")},
+ {{std::string("123")}, std::string("")},
+ {{std::string("@#$")}, std::string("")},
+ {{std::string(" ")}, std::string("")},
+ {{std::string("")}, std::string("")},
+ {{std::string("Ab_+ %*^cdefghijklmnopqrstuvwxyz")},
std::string("A123")},
+
+ {{std::string("Euler")}, std::string("E460")},
+ {{std::string("Gauss")}, std::string("G200")},
+ {{std::string("Hilbert")}, std::string("H416")},
+ {{std::string("Knuth")}, std::string("K530")},
+ {{std::string("Lloyd")}, std::string("L300")},
+ {{std::string("Lukasiewicz")}, std::string("L222")},
+
+ {{std::string("Huang")}, std::string("H520")},
+ {{std::string("Zhang")}, std::string("Z520")},
+ {{std::string("Wang")}, std::string("W520")}};
+
+ static_cast<void>(check_function<DataTypeString, true>(func_name,
input_types, data_set));
+ }
+
+ {
+ InputTypeSet input_types = {PrimitiveType::TYPE_VARCHAR};
+
+ DataSet data_set = {{{std::string("中文测试")}, std::string("")},
+ {{std::string("abc 你好")}, std::string("")}};
+
+ static_cast<void>(check_function<DataTypeString, true>(func_name,
input_types, data_set, -1,
+ -1, true));
+ }
+}
+
} // namespace doris::vectorized
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index dbad709fff0..278d5c42252 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -415,6 +415,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.Sm3;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Sm3sum;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Sm4Decrypt;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Sm4Encrypt;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Soundex;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Space;
import org.apache.doris.nereids.trees.expressions.functions.scalar.SplitByChar;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.SplitByRegexp;
@@ -919,6 +920,7 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
scalar(Sm3sum.class, "sm3sum"),
scalar(Sm4Decrypt.class, "sm4_decrypt"),
scalar(Sm4Encrypt.class, "sm4_encrypt"),
+ scalar(Soundex.class, "soundex"),
scalar(Space.class, "space"),
scalar(SplitByChar.class, "split_by_char"),
scalar(SplitByRegexp.class, "split_by_regexp"),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
index a592e71a9a1..afedc7a3fd1 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
@@ -18,6 +18,7 @@
package org.apache.doris.nereids.trees.expressions.functions.executable;
import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.exceptions.NotSupportedException;
import org.apache.doris.nereids.trees.expressions.ExecFunction;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.literal.ArrayLiteral;
@@ -1059,4 +1060,58 @@ public class StringArithmetic {
}
return castStringLikeLiteral(first,
first.getValue().replace(second.getValue(), third.getValue()));
}
+
+ /**
+ * Executable arithmetic functions soundex
+ */
+ @ExecFunction(name = "soundex")
+ public static Expression soundex(StringLikeLiteral first) {
+ char[] soundexTable = {
+ 'V', '1', '2', '3', 'V', '1', '2', 'N', 'V',
+ '2', '2', '4', '5', '5', 'V', '1', '2', '6',
+ '2', '3', 'V', '1', 'N', '2', 'V', '2'
+ };
+
+ String result = "";
+ if (!first.getValue().isEmpty()) {
+ char preCode = '\0';
+
+ for (int i = 0; i < first.getValue().length(); i++) {
+ char c = first.getValue().charAt(i);
+
+ if (c > 0x7f) {
+ throw new NotSupportedException("soundex only supports
ASCII, but got: " + c);
+ }
+
+ if (!Character.isLetter(c)) {
+ continue;
+ }
+
+ c = Character.toUpperCase(c);
+ if (result.isEmpty()) {
+ result += c;
+ preCode = (soundexTable[c - 'A'] == 'N') ? '\0' :
soundexTable[c - 'A'];
+ } else {
+ char code = soundexTable[c - 'A'];
+ if (code != 'N') {
+ if (code != 'V' && code != preCode) {
+ result += code;
+ if (result.length() == 4) {
+ break;
+ }
+ }
+ preCode = code;
+ }
+ }
+ }
+
+ if (result.length() > 0) {
+ while (result.length() < 4) {
+ result += '0';
+ }
+ }
+ }
+
+ return castStringLikeLiteral(first, result);
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Soundex.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Soundex.java
new file mode 100644
index 00000000000..2e7b3bfa678
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Soundex.java
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.VarcharType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * Scalar function 'Soundex'
+ */
+public class Soundex extends ScalarFunction
+ implements UnaryExpression, ExplicitlyCastableSignature,
PropagateNullable {
+ public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT)
+ );
+
+ /**
+ * constructor with 1 argument.
+ */
+ public Soundex(Expression arg) {
+ super("soundex", arg);
+ }
+
+ /** constructor for withChildren and reuse signature */
+ private Soundex(ScalarFunctionParams functionParams) {
+ super(functionParams);
+ }
+
+ @Override
+ public Soundex withChildren(List<Expression> children) {
+ Preconditions.checkArgument(children.size() == 1);
+ return new Soundex(getFunctionParams(children));
+ }
+
+ @Override
+ public List<FunctionSignature> getSignatures() {
+ return SIGNATURES;
+ }
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitSoundex(this, context);
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index d78d5bb7ff8..b52c8bda9da 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -417,6 +417,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.Sm3;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Sm3sum;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Sm4Decrypt;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Sm4Encrypt;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Soundex;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Space;
import org.apache.doris.nereids.trees.expressions.functions.scalar.SplitByChar;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.SplitByRegexp;
@@ -2031,6 +2032,10 @@ public interface ScalarFunctionVisitor<R, C> {
return visitScalarFunction(sm4Encrypt, context);
}
+ default R visitSoundex(Soundex soundex, C context) {
+ return visitScalarFunction(soundex, context);
+ }
+
default R visitSpace(Space space, C context) {
return visitScalarFunction(space, context);
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java
index 90465f67717..af299a36c0a 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java
@@ -20,6 +20,7 @@ package org.apache.doris.nereids.rules.expression;
import org.apache.doris.analysis.ArithmeticExpr.Operator;
import org.apache.doris.nereids.analyzer.UnboundRelation;
import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.exceptions.NotSupportedException;
import org.apache.doris.nereids.parser.NereidsParser;
import org.apache.doris.nereids.rules.analysis.ExpressionAnalyzer;
import org.apache.doris.nereids.rules.expression.rules.FoldConstantRule;
@@ -91,6 +92,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsAdd;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Sign;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Sin;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Sinh;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Soundex;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Sqrt;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StrToDate;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Substring;
@@ -582,6 +584,30 @@ class FoldConstantTest extends ExpressionRewriteTestHelper
{
);
rewritten = executor.rewrite(replace, context);
Assertions.assertEquals(new StringLiteral("default"), rewritten);
+
+ Soundex soundex = new Soundex(StringLiteral.of("Ashcraft"));
+ rewritten = executor.rewrite(soundex, context);
+ Assertions.assertEquals(new StringLiteral("A261"), rewritten);
+ soundex = new Soundex(StringLiteral.of("Robert"));
+ rewritten = executor.rewrite(soundex, context);
+ Assertions.assertEquals(new StringLiteral("R163"), rewritten);
+ soundex = new Soundex(StringLiteral.of("R@bert"));
+ rewritten = executor.rewrite(soundex, context);
+ Assertions.assertEquals(new StringLiteral("R163"), rewritten);
+ soundex = new Soundex(StringLiteral.of("Honeyman"));
+ rewritten = executor.rewrite(soundex, context);
+ Assertions.assertEquals(new StringLiteral("H555"), rewritten);
+ soundex = new Soundex(StringLiteral.of("Apache Doris你好"));
+ rewritten = executor.rewrite(soundex, context);
+ Assertions.assertEquals(new StringLiteral("A123"), rewritten);
+ soundex = new Soundex(StringLiteral.of(""));
+ rewritten = executor.rewrite(soundex, context);
+ Assertions.assertEquals(new StringLiteral(""), rewritten);
+
+ Assertions.assertThrows(NotSupportedException.class, () -> {
+ Soundex soundexThrow = new Soundex(new StringLiteral("Doris你好"));
+ executor.rewrite(soundexThrow, context);
+ }, "soundex only supports ASCII");
}
@Test
diff --git
a/regression-test/data/nereids_p0/sql_functions/string_functions/test_string_function.out
b/regression-test/data/nereids_p0/sql_functions/string_functions/test_string_function.out
index e3d169bd04f..51798c69629 100644
Binary files
a/regression-test/data/nereids_p0/sql_functions/string_functions/test_string_function.out
and
b/regression-test/data/nereids_p0/sql_functions/string_functions/test_string_function.out
differ
diff --git
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
index 374f69bb58b..68db295f66c 100644
---
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
+++
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
@@ -1851,5 +1851,48 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("select cast(cast('inf' as double) as string)")
testFoldConst("select cast(cast('-inf' as double) as string)")
*/
+
+ // SOUNDEX
+ testFoldConst("SELECT SOUNDEX('Doris')")
+ testFoldConst("SELECT SOUNDEX('Robert')")
+ testFoldConst("SELECT SOUNDEX('Rupert')")
+ testFoldConst("SELECT SOUNDEX('Smith')")
+ testFoldConst("SELECT SOUNDEX('Smyth')")
+ testFoldConst("SELECT SOUNDEX('Johnson')")
+ testFoldConst("SELECT SOUNDEX('Jackson')")
+ testFoldConst("SELECT SOUNDEX('Ashcraft')")
+ testFoldConst("SELECT SOUNDEX('Ashcroft')")
+ testFoldConst("SELECT SOUNDEX('Washington')")
+ testFoldConst("SELECT SOUNDEX('Lee')")
+ testFoldConst("SELECT SOUNDEX('Gutierrez')")
+ testFoldConst("SELECT SOUNDEX('Pfister')")
+ testFoldConst("SELECT SOUNDEX('Honeyman')")
+ testFoldConst("SELECT SOUNDEX('Lloyd')")
+ testFoldConst("SELECT SOUNDEX('Tymczak')")
+ testFoldConst("SELECT SOUNDEX('A')")
+ testFoldConst("SELECT SOUNDEX('B')")
+ testFoldConst("SELECT SOUNDEX('Z')")
+ testFoldConst("SELECT SOUNDEX('robert')")
+ testFoldConst("SELECT SOUNDEX('ROBERT')")
+ testFoldConst("SELECT SOUNDEX('RoBerT')")
+ testFoldConst("SELECT SOUNDEX('R@bert')")
+ testFoldConst("SELECT SOUNDEX('Rob3rt')")
+ testFoldConst("SELECT SOUNDEX('Rob-ert')")
+ testFoldConst("SELECT SOUNDEX('123Robert')")
+ testFoldConst("SELECT SOUNDEX('123')")
+ testFoldConst("SELECT SOUNDEX('~!@#%^&*-+')")
+ testFoldConst("SELECT SOUNDEX(' ')")
+ testFoldConst("SELECT SOUNDEX('')")
+ testFoldConst("SELECT SOUNDEX('Ab_+ %*^cdefghijklmnopqrstuvwxyz')")
+ testFoldConst("SELECT SOUNDEX('Euler')")
+ testFoldConst("SELECT SOUNDEX('Gauss')")
+ testFoldConst("SELECT SOUNDEX('Hilbert')")
+ testFoldConst("SELECT SOUNDEX('Knuth')")
+ testFoldConst("SELECT SOUNDEX('Lloyd')")
+ testFoldConst("SELECT SOUNDEX('Lukasiewicz')")
+ testFoldConst("SELECT SOUNDEX('Huang')")
+ testFoldConst("SELECT SOUNDEX('Zhang')")
+ testFoldConst("SELECT SOUNDEX('Wang')")
+ testFoldConst("SELECT SOUNDEX(NULL)")
}
diff --git
a/regression-test/suites/nereids_p0/sql_functions/string_functions/test_string_function.groovy
b/regression-test/suites/nereids_p0/sql_functions/string_functions/test_string_function.groovy
index 8b4fc9d2338..ee0210ce022 100644
---
a/regression-test/suites/nereids_p0/sql_functions/string_functions/test_string_function.groovy
+++
b/regression-test/suites/nereids_p0/sql_functions/string_functions/test_string_function.groovy
@@ -214,6 +214,59 @@ suite("test_string_function") {
qt_sub_replace_utf8_sql9 " select sub_replace('你好世界','大家',4);"
qt_sub_replace_utf8_sql10 " select sub_replace('你好世界','大家',-1);"
+ qt_soundex """SELECT SOUNDEX('Doris');"""
+ qt_soundex """SELECT SOUNDEX('Robert');"""
+ qt_soundex """SELECT SOUNDEX('Rupert');"""
+ qt_soundex """SELECT SOUNDEX('Smith');"""
+ qt_soundex """SELECT SOUNDEX('Smyth');"""
+ qt_soundex """SELECT SOUNDEX('Johnson');"""
+ qt_soundex """SELECT SOUNDEX('Jackson');"""
+ qt_soundex """SELECT SOUNDEX('Ashcraft');"""
+ qt_soundex """SELECT SOUNDEX('Ashcroft');"""
+ qt_soundex """SELECT SOUNDEX('Washington');"""
+ qt_soundex """SELECT SOUNDEX('Lee');"""
+ qt_soundex """SELECT SOUNDEX('Gutierrez');"""
+ qt_soundex """SELECT SOUNDEX('Pfister');"""
+ qt_soundex """SELECT SOUNDEX('Honeyman');"""
+ qt_soundex """SELECT SOUNDEX('Lloyd');"""
+ qt_soundex """SELECT SOUNDEX('Tymczak');"""
+ qt_soundex """SELECT SOUNDEX('A');"""
+ qt_soundex """SELECT SOUNDEX('B');"""
+ qt_soundex """SELECT SOUNDEX('Z');"""
+ qt_soundex """SELECT SOUNDEX('robert');"""
+ qt_soundex """SELECT SOUNDEX('ROBERT');"""
+ qt_soundex """SELECT SOUNDEX('RoBerT');"""
+ qt_soundex """SELECT SOUNDEX('R@bert');"""
+ qt_soundex """SELECT SOUNDEX('Rob3rt');"""
+ qt_soundex """SELECT SOUNDEX('Rob-ert');"""
+ qt_soundex """SELECT SOUNDEX('123Robert');"""
+ qt_soundex """SELECT SOUNDEX('123');"""
+ qt_soundex """SELECT SOUNDEX('~!@#%^&*-+');"""
+ qt_soundex """SELECT SOUNDEX(' ');"""
+ qt_soundex """SELECT SOUNDEX('');"""
+ qt_soundex """SELECT SOUNDEX('Ab_+ %*^cdefghijklmnopqrstuvwxyz');"""
+ qt_soundex """SELECT SOUNDEX('Euler');"""
+ qt_soundex """SELECT SOUNDEX('Gauss');"""
+ qt_soundex """SELECT SOUNDEX('Hilbert');"""
+ qt_soundex """SELECT SOUNDEX('Knuth');"""
+ qt_soundex """SELECT SOUNDEX('Lloyd');"""
+ qt_soundex """SELECT SOUNDEX('Lukasiewicz');"""
+ qt_soundex """SELECT SOUNDEX('Huang');"""
+ qt_soundex """SELECT SOUNDEX('Zhang');"""
+ qt_soundex """SELECT SOUNDEX('Wang');"""
+ qt_soundex """SELECT SOUNDEX(NULL);"""
+
+ // non-ASCII test for soundex
+ qt_soundex """SELECT SOUNDEX('ApacheDoris非 ASCII 测试');"""
+ test{
+ sql """SELECT SOUNDEX('非 ASCII 测试');"""
+ exception "soundex only supports ASCII"
+ }
+ test{
+ sql """SELECT SOUNDEX('Doris中文测试');"""
+ exception "soundex only supports ASCII"
+ }
+
sql """
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]