This is an automated email from the ASF dual-hosted git repository.
zclll pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 593f749a75e [feature](function) add is_uuid function (#57325)
593f749a75e is described below
commit 593f749a75eb1c60d91712e3aaf07997c3c78ee1
Author: admiring_xm <[email protected]>
AuthorDate: Wed Oct 29 13:12:02 2025 +0800
[feature](function) add is_uuid function (#57325)
add is_uuid
---
be/src/vec/common/string_utils/string_utils.h | 4 ++
be/src/vec/functions/uuid.cpp | 67 ++++++++++++++++++++
be/test/vec/function/function_uuid_test.cpp | 49 +++++++++++++++
.../doris/catalog/BuiltinScalarFunctions.java | 2 +
.../functions/executable/StringArithmetic.java | 55 +++++++++++++++++
.../trees/expressions/functions/scalar/IsUuid.java | 71 ++++++++++++++++++++++
.../expressions/visitor/ScalarFunctionVisitor.java | 5 ++
.../data/nereids_function_p0/scalar_function/I.out | 58 ++++++++++++++++++
.../nereids_function_p0/scalar_function/I.groovy | 4 ++
.../fold_constant_string_arithmatic.groovy | 15 +++++
10 files changed, 330 insertions(+)
diff --git a/be/src/vec/common/string_utils/string_utils.h
b/be/src/vec/common/string_utils/string_utils.h
index 58da68db5fb..0f4220489b4 100644
--- a/be/src/vec/common/string_utils/string_utils.h
+++ b/be/src/vec/common/string_utils/string_utils.h
@@ -76,3 +76,7 @@ inline bool is_whitespace_ascii(char c) {
inline bool is_not_whitespace_ascii(char c) {
return !is_whitespace_ascii(c);
}
+
+inline bool is_hex_ascii(char c) {
+ return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') ||
is_numeric_ascii(c);
+}
\ No newline at end of file
diff --git a/be/src/vec/functions/uuid.cpp b/be/src/vec/functions/uuid.cpp
index fd9b0427882..f3e5b43b35d 100644
--- a/be/src/vec/functions/uuid.cpp
+++ b/be/src/vec/functions/uuid.cpp
@@ -27,11 +27,13 @@
#include "common/status.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/columns/column_string.h"
+#include "vec/common/string_utils/string_utils.h"
#include "vec/core/block.h"
#include "vec/core/column_numbers.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_string.h"
#include "vec/functions/function.h"
+#include "vec/functions/function_totype.h"
#include "vec/functions/simple_function_factory.h"
namespace doris {
@@ -76,8 +78,73 @@ public:
}
};
+struct NameIsUuid {
+ static constexpr auto name = "is_uuid";
+};
+
+struct IsUuidImpl {
+ using ReturnType = DataTypeBool;
+ using ReturnColumnType = ColumnUInt8;
+ static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
+ static constexpr size_t uuid_without_dash_length = 32;
+ static constexpr size_t uuid_with_dash_length = 36;
+ static constexpr size_t uuid_with_braces_and_dash_length = 38;
+ static constexpr size_t dash_positions[4] = {8, 13, 18, 23};
+
+ static bool is_uuid_with_dash(const char* src, const char* end) {
+ size_t str_size = end - src;
+ for (int i = 0; i < str_size; ++i) {
+ if (!is_hex_ascii(src[i])) {
+ if (i == dash_positions[0] || i == dash_positions[1] || i ==
dash_positions[2] ||
+ i == dash_positions[3]) {
+ if (src[i] != '-') {
+ return false;
+ }
+ } else {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ static Status vector(const ColumnString::Chars& data, const
ColumnString::Offsets& offsets,
+ PaddedPODArray<UInt8>& res) {
+ size_t rows_count = offsets.size();
+ res.resize(rows_count);
+ for (size_t i = 0; i < rows_count; ++i) {
+ const char* source = reinterpret_cast<const char*>(&data[offsets[i
- 1]]);
+ int str_size = offsets[i] - offsets[i - 1];
+ if (str_size == uuid_without_dash_length) {
+ bool is_valid = true;
+ for (int j = 0; j < str_size; ++j) {
+ if (!is_hex_ascii(source[j])) {
+ is_valid = false;
+ break;
+ }
+ }
+ res[i] = is_valid;
+ } else if (str_size == uuid_with_dash_length) {
+ res[i] = is_uuid_with_dash(source, source + str_size);
+ } else if (str_size == uuid_with_braces_and_dash_length) {
+ if (source[0] != '{' || source[str_size - 1] != '}') {
+ res[i] = 0;
+ continue;
+ }
+ res[i] = is_uuid_with_dash(source + 1, source + str_size - 1);
+ } else {
+ res[i] = 0;
+ }
+ }
+ return Status::OK();
+ }
+};
+
+using FunctionIsUuid = FunctionUnaryToType<IsUuidImpl, NameIsUuid>;
+
void register_function_uuid(SimpleFunctionFactory& factory) {
factory.register_function<Uuid>();
+ factory.register_function<FunctionIsUuid>();
}
} // namespace doris::vectorized
diff --git a/be/test/vec/function/function_uuid_test.cpp
b/be/test/vec/function/function_uuid_test.cpp
new file mode 100644
index 00000000000..1f64554dadd
--- /dev/null
+++ b/be/test/vec/function/function_uuid_test.cpp
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "function_test_util.h"
+#include "vec/functions/uuid.cpp"
+
+namespace doris::vectorized {
+
+using namespace ut_type;
+
+TEST(function_uuid_test, function_is_uuid_test) {
+ std::string func_name = "is_uuid";
+ InputTypeSet input_types = {PrimitiveType::TYPE_VARCHAR};
+ DataSet data_set = {
+ {{STRING("6ccd780c-baba-1026-9564-5b8c656024db")}, BOOLEAN(1)},
+ {{STRING("6ccd780c-baba-1026-9564-5b8c656024dbaaaa")}, BOOLEAN(0)},
+ {{STRING("6ccd780c-baba-1026-9564-5b8c656024gg")}, BOOLEAN(0)},
+ {{STRING("6ccd780-cbaba-1026-9564-5b8c656024db")}, BOOLEAN(0)},
+ {{STRING("6ccd780-cbaba-1026-95645-b8c656024db")}, BOOLEAN(0)},
+ {{STRING("6ccd780-cbaba-1026-95645-b8c65602")}, BOOLEAN(0)},
+ {{STRING("{6ccd780c-baba-1026-9564-5b8c656024db}")}, BOOLEAN(1)},
+ {{STRING("{6ccd780c-baba-1026-95645b8c656024db}")}, BOOLEAN(0)},
+ {{STRING("{6ccd780c-baba-1026-95645-b8c656024db}")}, BOOLEAN(0)},
+ {{STRING("6ccd780c-baba-1026-95645-b8c656024db}")}, BOOLEAN(0)},
+ {{STRING("6ccd780cbaba102695645b8c656024db")}, BOOLEAN(1)},
+ {{STRING("6ccd780cbaba102695645b8c656024dz")}, BOOLEAN(0)},
+ {{STRING("6ccd780cbaba102")}, BOOLEAN(0)},
+ {{STRING("{6ccd780cbaba102}")}, BOOLEAN(0)},
+ {{Null()}, Null()},
+ };
+
+ check_function_all_arg_comb<DataTypeBool, true>(func_name, input_types,
data_set);
+}
+
+} // namespace doris::vectorized
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index 448d7bc3fb6..ead9c00c9fe 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -263,6 +263,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.IsIpv4Mapped;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.IsIpv4String;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.IsIpv6String;
import org.apache.doris.nereids.trees.expressions.functions.scalar.IsNan;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.IsUuid;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonArray;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.JsonArrayIgnoreNull;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.JsonContains;
@@ -786,6 +787,7 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
scalar(IsIpv6String.class, "is_ipv6_string", "is_ipv6"),
scalar(IsIpAddressInRange.class, "is_ip_address_in_range"),
scalar(IsNan.class, "isnan"),
+ scalar(IsUuid.class, "is_uuid"),
scalar(IsInf.class, "isinf"),
scalar(Ipv4CIDRToRange.class, "ipv4_cidr_to_range"),
scalar(Ipv6CIDRToRange.class, "ipv6_cidr_to_range"),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
index ae0f29806cc..0172c3b4339 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
@@ -1242,4 +1242,59 @@ public class StringArithmetic {
return new VarcharLiteral(result.toString());
}
+
+ /**
+ * Executable arithmetic functions is_uuid
+ */
+ @ExecFunction(name = "is_uuid")
+ public static Expression isUuid(StringLikeLiteral first) {
+ String uuid = first.getValue();
+ return isUuidImpl(uuid);
+ }
+
+ private static boolean isHexChar(char c) {
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A'
&& c <= 'F');
+ }
+
+ private static Expression isUuidImpl(String uuid) {
+ final int uuid_without_dash_length = 32;
+ final int uuid_with_dash_length = 36;
+ final int uuid_with_braces_and_dash_length = 38;
+ int len = uuid.length();
+ int start = 0;
+ int end = len - 1;
+ switch (len) {
+ case uuid_without_dash_length:
+ for (int i = 0; i < len; i++) {
+ if (!isHexChar(uuid.charAt(i))) {
+ return BooleanLiteral.of(false);
+ }
+ }
+ break;
+ case uuid_with_braces_and_dash_length:
+ if (uuid.charAt(0) != '{' || uuid.charAt(end) != '}') {
+ return BooleanLiteral.of(false);
+ }
+ start++;
+ end--;
+ // fall through
+ case uuid_with_dash_length:
+ for (int i = start; i <= end; i++) {
+ char c = uuid.charAt(i);
+ if (i == start + 8 || i == start + 13 || i == start + 18
|| i == start + 23) {
+ if (c != '-') {
+ return BooleanLiteral.of(false);
+ }
+ } else {
+ if (!isHexChar(c)) {
+ return BooleanLiteral.of(false);
+ }
+ }
+ }
+ break;
+ default:
+ return BooleanLiteral.of(false);
+ }
+ return BooleanLiteral.of(true);
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsUuid.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsUuid.java
new file mode 100644
index 00000000000..a7c882d31e3
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsUuid.java
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.BooleanType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'is_uuid'.
+ */
+public class IsUuid extends ScalarFunction
+ implements UnaryExpression, ExplicitlyCastableSignature,
PropagateNullable {
+
+ public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+
FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT),
+
FunctionSignature.ret(BooleanType.INSTANCE).args(StringType.INSTANCE));
+
+ /**
+ * constructor with 1 argument.
+ */
+ public IsUuid(Expression arg) {
+ super("is_uuid", arg);
+ }
+
+ /**
+ * withChildren.
+ */
+ @Override
+ public IsUuid withChildren(List<Expression> children) {
+ Preconditions.checkArgument(children.size() == 1);
+ return new IsUuid(children.get(0));
+ }
+
+ @Override
+ public List<FunctionSignature> getSignatures() {
+ return SIGNATURES;
+ }
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitIsUuid(this, context);
+ }
+
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index c37c73028d6..aa7ff7ac4bc 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -268,6 +268,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.IsIpv4Mapped;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.IsIpv4String;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.IsIpv6String;
import org.apache.doris.nereids.trees.expressions.functions.scalar.IsNan;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.IsUuid;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonArray;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.JsonArrayIgnoreNull;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.JsonContains;
@@ -1491,6 +1492,10 @@ public interface ScalarFunctionVisitor<R, C> {
return visitScalarFunction(isNan, context);
}
+ default R visitIsUuid(IsUuid isUuid, C context) {
+ return visitScalarFunction(isUuid, context);
+ }
+
default R visitIsInf(IsInf isInf, C context) {
return visitScalarFunction(isInf, context);
}
diff --git a/regression-test/data/nereids_function_p0/scalar_function/I.out
b/regression-test/data/nereids_function_p0/scalar_function/I.out
index b0061f3e678..1fcfdf60f99 100644
--- a/regression-test/data/nereids_function_p0/scalar_function/I.out
+++ b/regression-test/data/nereids_function_p0/scalar_function/I.out
@@ -86,3 +86,61 @@ Varchar13
1
1
+-- !sql_is_uuid_Varchar --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
+-- !sql_is_uuid_Varchar_notnull --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
+-- !sql_is_uuid_String --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
+-- !sql_is_uuid_String_notnull --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
diff --git
a/regression-test/suites/nereids_function_p0/scalar_function/I.groovy
b/regression-test/suites/nereids_function_p0/scalar_function/I.groovy
index 40da7069f2a..9c111d54a64 100644
--- a/regression-test/suites/nereids_function_p0/scalar_function/I.groovy
+++ b/regression-test/suites/nereids_function_p0/scalar_function/I.groovy
@@ -25,4 +25,8 @@ suite("nereids_scalar_fn_I") {
qt_sql_instr_Varchar_Varchar_notnull "select instr(kvchrs1, kvchrs1)
from fn_test_not_nullable order by kvchrs1, kvchrs1"
qt_sql_instr_String_String "select instr(kstr, kstr) from fn_test order
by kstr, kstr"
qt_sql_instr_String_String_notnull "select instr(kstr, kstr) from
fn_test_not_nullable order by kstr, kstr"
+ qt_sql_is_uuid_Varchar "select is_uuid(kvchrs1) from fn_test order by
kvchrs1"
+ qt_sql_is_uuid_Varchar_notnull "select is_uuid(kvchrs1) from
fn_test_not_nullable order by kvchrs1"
+ qt_sql_is_uuid_String "select is_uuid(kstr) from fn_test order by kstr"
+ qt_sql_is_uuid_String_notnull "select is_uuid(kstr) from
fn_test_not_nullable order by kstr"
}
\ No newline at end of file
diff --git
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
index 6e68587f8fe..88863da910b 100644
---
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
+++
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
@@ -2034,5 +2034,20 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("SELECT SOUNDEX('Zhang')")
testFoldConst("SELECT SOUNDEX('Wang')")
testFoldConst("SELECT SOUNDEX(NULL)")
+
+ testFoldConst("SELECT IS_UUID('6ccd780c-baba-1026-9564-5b8c656024db')")
+ testFoldConst("SELECT IS_UUID('6ccd780c-baba-1026-9564-5b8c656024dbaaaa')")
+ testFoldConst("SELECT IS_UUID('6ccd780c-baba-1026-9564-5b8c656024gg')")
+ testFoldConst("SELECT IS_UUID('6ccd780-cbaba-1026-9564-5b8c656024db')")
+ testFoldConst("SELECT IS_UUID('6ccd780-cbaba-1026-95645-b8c656024db')")
+ testFoldConst("SELECT IS_UUID('6ccd780-cbaba-1026-95645-b8c65602')")
+ testFoldConst("SELECT IS_UUID('{6ccd780c-baba-1026-9564-5b8c656024db}')")
+ testFoldConst("SELECT IS_UUID('{6ccd780c-baba-1026-95645b8c656024db}')")
+ testFoldConst("SELECT IS_UUID('{6ccd780c-baba-1026-95645-b8c656024db}')")
+ testFoldConst("SELECT IS_UUID('6ccd780c-baba-1026-95645-b8c656024db}')")
+ testFoldConst("SELECT IS_UUID('6ccd780cbaba102695645b8c656024db')")
+ testFoldConst("SELECT IS_UUID('6ccd780cbaba102695645b8c656024dz')")
+ testFoldConst("SELECT IS_UUID('6ccd780cbaba102')")
+ testFoldConst("SELECT IS_UUID(NULL)")
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]