This is an automated email from the ASF dual-hosted git repository.

zclll pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 593f749a75e [feature](function) add is_uuid function (#57325)
593f749a75e is described below

commit 593f749a75eb1c60d91712e3aaf07997c3c78ee1
Author: admiring_xm <[email protected]>
AuthorDate: Wed Oct 29 13:12:02 2025 +0800

    [feature](function) add is_uuid function (#57325)
    
    add is_uuid
---
 be/src/vec/common/string_utils/string_utils.h      |  4 ++
 be/src/vec/functions/uuid.cpp                      | 67 ++++++++++++++++++++
 be/test/vec/function/function_uuid_test.cpp        | 49 +++++++++++++++
 .../doris/catalog/BuiltinScalarFunctions.java      |  2 +
 .../functions/executable/StringArithmetic.java     | 55 +++++++++++++++++
 .../trees/expressions/functions/scalar/IsUuid.java | 71 ++++++++++++++++++++++
 .../expressions/visitor/ScalarFunctionVisitor.java |  5 ++
 .../data/nereids_function_p0/scalar_function/I.out | 58 ++++++++++++++++++
 .../nereids_function_p0/scalar_function/I.groovy   |  4 ++
 .../fold_constant_string_arithmatic.groovy         | 15 +++++
 10 files changed, 330 insertions(+)

diff --git a/be/src/vec/common/string_utils/string_utils.h 
b/be/src/vec/common/string_utils/string_utils.h
index 58da68db5fb..0f4220489b4 100644
--- a/be/src/vec/common/string_utils/string_utils.h
+++ b/be/src/vec/common/string_utils/string_utils.h
@@ -76,3 +76,7 @@ inline bool is_whitespace_ascii(char c) {
 inline bool is_not_whitespace_ascii(char c) {
     return !is_whitespace_ascii(c);
 }
+
+inline bool is_hex_ascii(char c) {
+    return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || 
is_numeric_ascii(c);
+}
\ No newline at end of file
diff --git a/be/src/vec/functions/uuid.cpp b/be/src/vec/functions/uuid.cpp
index fd9b0427882..f3e5b43b35d 100644
--- a/be/src/vec/functions/uuid.cpp
+++ b/be/src/vec/functions/uuid.cpp
@@ -27,11 +27,13 @@
 #include "common/status.h"
 #include "vec/aggregate_functions/aggregate_function.h"
 #include "vec/columns/column_string.h"
+#include "vec/common/string_utils/string_utils.h"
 #include "vec/core/block.h"
 #include "vec/core/column_numbers.h"
 #include "vec/core/types.h"
 #include "vec/data_types/data_type_string.h"
 #include "vec/functions/function.h"
+#include "vec/functions/function_totype.h"
 #include "vec/functions/simple_function_factory.h"
 
 namespace doris {
@@ -76,8 +78,73 @@ public:
     }
 };
 
+struct NameIsUuid {
+    static constexpr auto name = "is_uuid";
+};
+
+struct IsUuidImpl {
+    using ReturnType = DataTypeBool;
+    using ReturnColumnType = ColumnUInt8;
+    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
+    static constexpr size_t uuid_without_dash_length = 32;
+    static constexpr size_t uuid_with_dash_length = 36;
+    static constexpr size_t uuid_with_braces_and_dash_length = 38;
+    static constexpr size_t dash_positions[4] = {8, 13, 18, 23};
+
+    static bool is_uuid_with_dash(const char* src, const char* end) {
+        size_t str_size = end - src;
+        for (int i = 0; i < str_size; ++i) {
+            if (!is_hex_ascii(src[i])) {
+                if (i == dash_positions[0] || i == dash_positions[1] || i == 
dash_positions[2] ||
+                    i == dash_positions[3]) {
+                    if (src[i] != '-') {
+                        return false;
+                    }
+                } else {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    static Status vector(const ColumnString::Chars& data, const 
ColumnString::Offsets& offsets,
+                         PaddedPODArray<UInt8>& res) {
+        size_t rows_count = offsets.size();
+        res.resize(rows_count);
+        for (size_t i = 0; i < rows_count; ++i) {
+            const char* source = reinterpret_cast<const char*>(&data[offsets[i 
- 1]]);
+            int str_size = offsets[i] - offsets[i - 1];
+            if (str_size == uuid_without_dash_length) {
+                bool is_valid = true;
+                for (int j = 0; j < str_size; ++j) {
+                    if (!is_hex_ascii(source[j])) {
+                        is_valid = false;
+                        break;
+                    }
+                }
+                res[i] = is_valid;
+            } else if (str_size == uuid_with_dash_length) {
+                res[i] = is_uuid_with_dash(source, source + str_size);
+            } else if (str_size == uuid_with_braces_and_dash_length) {
+                if (source[0] != '{' || source[str_size - 1] != '}') {
+                    res[i] = 0;
+                    continue;
+                }
+                res[i] = is_uuid_with_dash(source + 1, source + str_size - 1);
+            } else {
+                res[i] = 0;
+            }
+        }
+        return Status::OK();
+    }
+};
+
+using FunctionIsUuid = FunctionUnaryToType<IsUuidImpl, NameIsUuid>;
+
 void register_function_uuid(SimpleFunctionFactory& factory) {
     factory.register_function<Uuid>();
+    factory.register_function<FunctionIsUuid>();
 }
 
 } // namespace doris::vectorized
diff --git a/be/test/vec/function/function_uuid_test.cpp 
b/be/test/vec/function/function_uuid_test.cpp
new file mode 100644
index 00000000000..1f64554dadd
--- /dev/null
+++ b/be/test/vec/function/function_uuid_test.cpp
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "function_test_util.h"
+#include "vec/functions/uuid.cpp"
+
+namespace doris::vectorized {
+
+using namespace ut_type;
+
+TEST(function_uuid_test, function_is_uuid_test) {
+    std::string func_name = "is_uuid";
+    InputTypeSet input_types = {PrimitiveType::TYPE_VARCHAR};
+    DataSet data_set = {
+            {{STRING("6ccd780c-baba-1026-9564-5b8c656024db")}, BOOLEAN(1)},
+            {{STRING("6ccd780c-baba-1026-9564-5b8c656024dbaaaa")}, BOOLEAN(0)},
+            {{STRING("6ccd780c-baba-1026-9564-5b8c656024gg")}, BOOLEAN(0)},
+            {{STRING("6ccd780-cbaba-1026-9564-5b8c656024db")}, BOOLEAN(0)},
+            {{STRING("6ccd780-cbaba-1026-95645-b8c656024db")}, BOOLEAN(0)},
+            {{STRING("6ccd780-cbaba-1026-95645-b8c65602")}, BOOLEAN(0)},
+            {{STRING("{6ccd780c-baba-1026-9564-5b8c656024db}")}, BOOLEAN(1)},
+            {{STRING("{6ccd780c-baba-1026-95645b8c656024db}")}, BOOLEAN(0)},
+            {{STRING("{6ccd780c-baba-1026-95645-b8c656024db}")}, BOOLEAN(0)},
+            {{STRING("6ccd780c-baba-1026-95645-b8c656024db}")}, BOOLEAN(0)},
+            {{STRING("6ccd780cbaba102695645b8c656024db")}, BOOLEAN(1)},
+            {{STRING("6ccd780cbaba102695645b8c656024dz")}, BOOLEAN(0)},
+            {{STRING("6ccd780cbaba102")}, BOOLEAN(0)},
+            {{STRING("{6ccd780cbaba102}")}, BOOLEAN(0)},
+            {{Null()}, Null()},
+    };
+
+    check_function_all_arg_comb<DataTypeBool, true>(func_name, input_types, 
data_set);
+}
+
+} // namespace doris::vectorized
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index 448d7bc3fb6..ead9c00c9fe 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -263,6 +263,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.IsIpv4Mapped;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.IsIpv4String;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.IsIpv6String;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.IsNan;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.IsUuid;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonArray;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.JsonArrayIgnoreNull;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.JsonContains;
@@ -786,6 +787,7 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(IsIpv6String.class, "is_ipv6_string", "is_ipv6"),
             scalar(IsIpAddressInRange.class, "is_ip_address_in_range"),
             scalar(IsNan.class, "isnan"),
+            scalar(IsUuid.class, "is_uuid"),
             scalar(IsInf.class, "isinf"),
             scalar(Ipv4CIDRToRange.class, "ipv4_cidr_to_range"),
             scalar(Ipv6CIDRToRange.class, "ipv6_cidr_to_range"),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
index ae0f29806cc..0172c3b4339 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
@@ -1242,4 +1242,59 @@ public class StringArithmetic {
 
         return new VarcharLiteral(result.toString());
     }
+
+    /**
+     * Executable arithmetic functions is_uuid
+     */
+    @ExecFunction(name = "is_uuid")
+    public static Expression isUuid(StringLikeLiteral first) {
+        String uuid = first.getValue();
+        return isUuidImpl(uuid);
+    }
+
+    private static boolean isHexChar(char c) {
+        return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' 
&& c <= 'F');
+    }
+
+    private static Expression isUuidImpl(String uuid) {
+        final int uuid_without_dash_length = 32;
+        final int uuid_with_dash_length = 36;
+        final int uuid_with_braces_and_dash_length = 38;
+        int len = uuid.length();
+        int start = 0;
+        int end = len - 1;
+        switch (len) {
+            case uuid_without_dash_length:
+                for (int i = 0; i < len; i++) {
+                    if (!isHexChar(uuid.charAt(i))) {
+                        return BooleanLiteral.of(false);
+                    }
+                }
+                break;
+            case uuid_with_braces_and_dash_length:
+                if (uuid.charAt(0) != '{' || uuid.charAt(end) != '}') {
+                    return BooleanLiteral.of(false);
+                }
+                start++;
+                end--;
+                // fall through
+            case uuid_with_dash_length:
+                for (int i = start; i <= end; i++) {
+                    char c = uuid.charAt(i);
+                    if (i == start + 8 || i == start + 13 || i == start + 18 
|| i == start + 23) {
+                        if (c != '-') {
+                            return BooleanLiteral.of(false);
+                        }
+                    } else {
+                        if (!isHexChar(c)) {
+                            return BooleanLiteral.of(false);
+                        }
+                    }
+                }
+                break;
+            default:
+                return BooleanLiteral.of(false);
+        }
+        return BooleanLiteral.of(true);
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsUuid.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsUuid.java
new file mode 100644
index 00000000000..a7c882d31e3
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsUuid.java
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.BooleanType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'is_uuid'.
+ */
+public class IsUuid extends ScalarFunction
+        implements UnaryExpression, ExplicitlyCastableSignature, 
PropagateNullable {
+
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT),
+            
FunctionSignature.ret(BooleanType.INSTANCE).args(StringType.INSTANCE));
+
+    /**
+     * constructor with 1 argument.
+     */
+    public IsUuid(Expression arg) {
+        super("is_uuid", arg);
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public IsUuid withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() == 1);
+        return new IsUuid(children.get(0));
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitIsUuid(this, context);
+    }
+
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index c37c73028d6..aa7ff7ac4bc 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -268,6 +268,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.IsIpv4Mapped;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.IsIpv4String;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.IsIpv6String;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.IsNan;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.IsUuid;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonArray;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.JsonArrayIgnoreNull;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.JsonContains;
@@ -1491,6 +1492,10 @@ public interface ScalarFunctionVisitor<R, C> {
         return visitScalarFunction(isNan, context);
     }
 
+    default R visitIsUuid(IsUuid isUuid, C context) {
+        return visitScalarFunction(isUuid, context);
+    }
+
     default R visitIsInf(IsInf isInf, C context) {
         return visitScalarFunction(isInf, context);
     }
diff --git a/regression-test/data/nereids_function_p0/scalar_function/I.out 
b/regression-test/data/nereids_function_p0/scalar_function/I.out
index b0061f3e678..1fcfdf60f99 100644
--- a/regression-test/data/nereids_function_p0/scalar_function/I.out
+++ b/regression-test/data/nereids_function_p0/scalar_function/I.out
@@ -86,3 +86,61 @@ Varchar13
 1
 1
 
+-- !sql_is_uuid_Varchar --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
+-- !sql_is_uuid_Varchar_notnull --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
+-- !sql_is_uuid_String --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
+-- !sql_is_uuid_String_notnull --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
diff --git 
a/regression-test/suites/nereids_function_p0/scalar_function/I.groovy 
b/regression-test/suites/nereids_function_p0/scalar_function/I.groovy
index 40da7069f2a..9c111d54a64 100644
--- a/regression-test/suites/nereids_function_p0/scalar_function/I.groovy
+++ b/regression-test/suites/nereids_function_p0/scalar_function/I.groovy
@@ -25,4 +25,8 @@ suite("nereids_scalar_fn_I") {
        qt_sql_instr_Varchar_Varchar_notnull "select instr(kvchrs1, kvchrs1) 
from fn_test_not_nullable order by kvchrs1, kvchrs1"
        qt_sql_instr_String_String "select instr(kstr, kstr) from fn_test order 
by kstr, kstr"
        qt_sql_instr_String_String_notnull "select instr(kstr, kstr) from 
fn_test_not_nullable order by kstr, kstr"
+       qt_sql_is_uuid_Varchar "select is_uuid(kvchrs1) from fn_test order by 
kvchrs1"
+       qt_sql_is_uuid_Varchar_notnull "select is_uuid(kvchrs1) from 
fn_test_not_nullable order by kvchrs1"
+       qt_sql_is_uuid_String "select is_uuid(kstr) from fn_test order by kstr"
+       qt_sql_is_uuid_String_notnull "select is_uuid(kstr) from 
fn_test_not_nullable order by kstr"
 }
\ No newline at end of file
diff --git 
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
 
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
index 6e68587f8fe..88863da910b 100644
--- 
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
+++ 
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
@@ -2034,5 +2034,20 @@ suite("fold_constant_string_arithmatic") {
     testFoldConst("SELECT SOUNDEX('Zhang')")
     testFoldConst("SELECT SOUNDEX('Wang')")
     testFoldConst("SELECT SOUNDEX(NULL)")
+
+    testFoldConst("SELECT IS_UUID('6ccd780c-baba-1026-9564-5b8c656024db')")
+    testFoldConst("SELECT IS_UUID('6ccd780c-baba-1026-9564-5b8c656024dbaaaa')")
+    testFoldConst("SELECT IS_UUID('6ccd780c-baba-1026-9564-5b8c656024gg')")
+    testFoldConst("SELECT IS_UUID('6ccd780-cbaba-1026-9564-5b8c656024db')")
+    testFoldConst("SELECT IS_UUID('6ccd780-cbaba-1026-95645-b8c656024db')")
+    testFoldConst("SELECT IS_UUID('6ccd780-cbaba-1026-95645-b8c65602')")
+    testFoldConst("SELECT IS_UUID('{6ccd780c-baba-1026-9564-5b8c656024db}')")
+    testFoldConst("SELECT IS_UUID('{6ccd780c-baba-1026-95645b8c656024db}')")
+    testFoldConst("SELECT IS_UUID('{6ccd780c-baba-1026-95645-b8c656024db}')")
+    testFoldConst("SELECT IS_UUID('6ccd780c-baba-1026-95645-b8c656024db}')")
+    testFoldConst("SELECT IS_UUID('6ccd780cbaba102695645b8c656024db')")
+    testFoldConst("SELECT IS_UUID('6ccd780cbaba102695645b8c656024dz')")
+    testFoldConst("SELECT IS_UUID('6ccd780cbaba102')")
+    testFoldConst("SELECT IS_UUID(NULL)")
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to