This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 990d6c02ec [Feature](new function) Add a uuid-numeric function,
returns uuid in largerint type, 20x faster than uuid (#24395)
990d6c02ec is described below
commit 990d6c02ec37ba4c57e15d598242c5669419db62
Author: zhiqqqq <[email protected]>
AuthorDate: Sat Sep 16 18:26:13 2023 +0800
[Feature](new function) Add a uuid-numeric function, returns uuid in
largerint type, 20x faster than uuid (#24395)
---
be/src/vec/functions/simple_function_factory.h | 2 +
be/src/vec/functions/uuid_numeric.cpp | 158 +++++++++++++++++++++
.../numeric-functions/uuid_numeric.md | 51 +++++++
.../numeric-functions/uuid_numeric.md | 49 +++++++
.../doris/catalog/BuiltinScalarFunctions.java | 2 +
.../expressions/functions/Nondeterministic.java | 1 -
.../expressions/functions/scalar/UuidNumeric.java | 58 ++++++++
.../expressions/visitor/ScalarFunctionVisitor.java | 5 +
gensrc/script/doris_builtins_functions.py | 3 +-
9 files changed, 327 insertions(+), 2 deletions(-)
diff --git a/be/src/vec/functions/simple_function_factory.h
b/be/src/vec/functions/simple_function_factory.h
index b5e72bdde6..e0962b6718 100644
--- a/be/src/vec/functions/simple_function_factory.h
+++ b/be/src/vec/functions/simple_function_factory.h
@@ -77,6 +77,7 @@ void register_function_like(SimpleFunctionFactory& factory);
void register_function_regexp(SimpleFunctionFactory& factory);
void register_function_random(SimpleFunctionFactory& factory);
void register_function_uuid(SimpleFunctionFactory& factory);
+void register_function_uuid_numeric(SimpleFunctionFactory& factory);
void register_function_coalesce(SimpleFunctionFactory& factory);
void register_function_grouping(SimpleFunctionFactory& factory);
void register_function_datetime_floor_ceil(SimpleFunctionFactory& factory);
@@ -253,6 +254,7 @@ public:
register_function_regexp(instance);
register_function_random(instance);
register_function_uuid(instance);
+ register_function_uuid_numeric(instance);
register_function_coalesce(instance);
register_function_grouping(instance);
register_function_datetime_floor_ceil(instance);
diff --git a/be/src/vec/functions/uuid_numeric.cpp
b/be/src/vec/functions/uuid_numeric.cpp
new file mode 100644
index 0000000000..d41912b20e
--- /dev/null
+++ b/be/src/vec/functions/uuid_numeric.cpp
@@ -0,0 +1,158 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "common/status.h"
+#include "runtime/large_int_value.h"
+#include "vec/columns/column_vector.h"
+#include "vec/columns/columns_number.h"
+#include "vec/common/hash_table/hash.h"
+#include "vec/common/sip_hash.h"
+#include "vec/common/uint128.h"
+#include "vec/core/block.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/function.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris {
+class FunctionContext;
+} // namespace doris
+
+namespace doris::vectorized {
+
+// NOTE:
+// The implementatin of random generator is inspired by the RandImpl::execute
of ClickHouse.
+// The ClickHouse RandImpl::execute function provided valuable insights and
ideas for the development process.
+
+struct LinearCongruentialGenerator {
+ /// Constants from `man lrand48_r`.
+ static constexpr UInt64 a = 0x5DEECE66D;
+ static constexpr UInt64 c = 0xB;
+
+ /// And this is from `head -c8 /dev/urandom | xxd -p`
+ UInt64 current = 0xbcabbed75dfe77cdLL;
+
+ void seed(UInt64 value) { current = value; }
+
+ UInt32 next() {
+ current = current * a + c;
+ return static_cast<UInt32>(current >> 16);
+ }
+};
+
+UInt64 calcSeed(UInt64 rand_seed, UInt64 additional_seed) {
+ return int_hash64(rand_seed ^ int_hash64(additional_seed));
+}
+
+void seed(LinearCongruentialGenerator& generator, UInt64 rand_seed, intptr_t
additional_seed) {
+ generator.seed(calcSeed(rand_seed, additional_seed));
+}
+
+/// The array of random numbers from 'head -c8 /dev/urandom | xxd -p'.
+/// Can be used for creating seeds for random generators.
+constexpr std::array<UInt64, 32> random_numbers = {
+ 0x62224b4e764e1560ULL, 0xa79ec6fdbb2ef873ULL, 0xe2862f147d1c0649ULL,
0xc8d47f9a38554cb2ULL,
+ 0x62b0dd532dcd8a43ULL, 0xef3128a01e7a28bcULL, 0x32e4eb5461fc0f6ULL,
0xd3377ce32d3d9579ULL,
+ 0x6f129aa32529a57cULL, 0x98dd0ba25301a5a3ULL, 0x457bd29769afabf1ULL,
0x3bb886ea86263d9dULL,
+ 0xec3e9514dc0bb543ULL, 0x84282031a89ce23eULL, 0x55212b07d1a9a765ULL,
0xe9de69f882aa48afULL,
+ 0x13a71c9baa9babbbULL, 0x3b7be8b0dd9cb586ULL, 0x1375e8cb773f3e35ULL,
0x9f841693b13e615fULL,
+ 0xab62458b90fd9aefULL, 0xa9d9fdd187f8e941ULL, 0xca1851150f831eeaULL,
0xa43f586f9078e918ULL,
+ 0xe336c2883038a257ULL, 0xfebaffc035561545ULL, 0x27c2436d2607840eULL,
0x21bab1489b0ff552ULL,
+ 0x22ca273c2756bb6cULL, 0x4b6260e129af35f1ULL, 0xeb42b6c0d4322c6fULL,
0xfea0f49cc4e68339ULL,
+};
+
+class UuidNumeric : public IFunction {
+public:
+ static constexpr auto name = "uuid_numeric";
+ static constexpr size_t uuid_length = 16; // Int128
+
+ static FunctionPtr create() { return std::make_shared<UuidNumeric>(); }
+
+ String get_name() const override { return name; }
+
+ bool use_default_implementation_for_constants() const override { return
false; }
+
+ size_t get_number_of_arguments() const override { return 0; }
+
+ bool is_variadic() const override { return false; }
+
+ // uuid numeric is a Int128 (maybe UInt128 is better but we do not support
it now
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return std::make_shared<DataTypeInt128>();
+ }
+
+ // TODO(zhiqiang): May be override open function?
+
+ Status execute_impl(FunctionContext* /*context*/, Block& block,
+ const ColumnNumbers& /*arguments*/, size_t result,
+ size_t input_rows_count) override {
+ auto col_res = ColumnInt128::create();
+ col_res->resize(input_rows_count);
+
+ GenerateUUIDs(reinterpret_cast<char*>(col_res->get_data().data()),
+ uuid_length * input_rows_count);
+
+ block.replace_by_position(result, std::move(col_res));
+ return Status::OK();
+ }
+
+private:
+ void GenerateUUIDs(char* output, size_t size) {
+ LinearCongruentialGenerator generator0;
+ LinearCongruentialGenerator generator1;
+ LinearCongruentialGenerator generator2;
+ LinearCongruentialGenerator generator3;
+
+ UInt64 rand_seed = randomSeed();
+
+ seed(generator0, rand_seed, random_numbers[0] +
reinterpret_cast<intptr_t>(output));
+ seed(generator1, rand_seed, random_numbers[1] +
reinterpret_cast<intptr_t>(output));
+ seed(generator2, rand_seed, random_numbers[2] +
reinterpret_cast<intptr_t>(output));
+ seed(generator3, rand_seed, random_numbers[3] +
reinterpret_cast<intptr_t>(output));
+
+ for (const char* end = output + size; output < end; output += 16) {
+ unaligned_store<UInt32>(output, generator0.next());
+ unaligned_store<UInt32>(output + 4, generator1.next());
+ unaligned_store<UInt32>(output + 8, generator2.next());
+ unaligned_store<UInt32>(output + 12, generator3.next());
+ }
+ /// It is guaranteed (by PaddedPODArray) that we can overwrite up to
15 bytes after end.
+ }
+
+ UInt64 randomSeed() {
+ struct timespec times;
+
+ /// Not cryptographically secure as time, pid and stack address can be
predictable.
+
+ SipHash hash;
+ hash.update(times.tv_nsec);
+ hash.update(times.tv_sec);
+ hash.update((uintptr_t)pthread_self());
+
+ return hash.get64();
+ }
+};
+
+void register_function_uuid_numeric(SimpleFunctionFactory& factory) {
+ factory.register_function<UuidNumeric>();
+}
+
+} // namespace doris::vectorized
diff --git
a/docs/en/docs/sql-manual/sql-functions/numeric-functions/uuid_numeric.md
b/docs/en/docs/sql-manual/sql-functions/numeric-functions/uuid_numeric.md
new file mode 100644
index 0000000000..07d7ec3f9c
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/numeric-functions/uuid_numeric.md
@@ -0,0 +1,51 @@
+---
+{
+ "title": "uuid_numeric",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## uuid_numeric
+### description
+#### Syntax
+
+`LARGEINT uuid_numeric()`
+
+Return a uuid in type `LARGEINT`.
+
+Note that `LARGEINT` has type Int128, so we could get a negative number from
`uuid_numeric()`.
+
+### example
+
+```
+
+mysql> select uuid_numeric();
++----------------------------------------+
+| uuid_numeric() |
++----------------------------------------+
+| 82218484683747862468445277894131281464 |
++----------------------------------------+
+```
+
+### keywords
+
+ UUID UUID-NUMERIC
diff --git
a/docs/zh-CN/docs/sql-manual/sql-functions/numeric-functions/uuid_numeric.md
b/docs/zh-CN/docs/sql-manual/sql-functions/numeric-functions/uuid_numeric.md
new file mode 100644
index 0000000000..227bd6b7d3
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/numeric-functions/uuid_numeric.md
@@ -0,0 +1,49 @@
+---
+{
+ "title": "uuid_numeric",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## uuid_numeric
+### description
+#### Syntax
+
+`LARGEINT uuid_numeric()`
+
+返回一个 `LARGEINT` 类型的 `uuid`。注意 `LARGEINT` 是一个 Int128,所以 `uuid_numeric()`
可能会得到负值。
+
+### example
+
+```
+
+mysql> select uuid_numeric();
++----------------------------------------+
+| uuid_numeric() |
++----------------------------------------+
+| 82218484683747862468445277894131281464 |
++----------------------------------------+
+```
+
+### keywords
+
+ UUID UUID-NUMERIC
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index d65f84d876..1a2d064d2b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -345,6 +345,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.Upper;
import org.apache.doris.nereids.trees.expressions.functions.scalar.User;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.UtcTimestamp;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Uuid;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.UuidNumeric;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Version;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Week;
import org.apache.doris.nereids.trees.expressions.functions.scalar.WeekCeil;
@@ -703,6 +704,7 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
scalar(User.class, "user"),
scalar(UtcTimestamp.class, "utc_timestamp"),
scalar(Uuid.class, "uuid"),
+ scalar(UuidNumeric.class, "uuid_numeric"),
scalar(Version.class, "version"),
scalar(Week.class, "week"),
scalar(WeekCeil.class, "week_ceil"),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/Nondeterministic.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/Nondeterministic.java
index 8fd6335740..88955c0c4d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/Nondeterministic.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/Nondeterministic.java
@@ -24,7 +24,6 @@ import org.apache.doris.qe.ConnectContext;
*
* e.g. 'rand()', 'random()'.
*
- * note: no 'uuid' function currently.
*/
public interface Nondeterministic extends ExpressionTrait {
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UuidNumeric.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UuidNumeric.java
new file mode 100644
index 0000000000..3e2267b437
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UuidNumeric.java
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable;
+import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.Nondeterministic;
+import org.apache.doris.nereids.trees.expressions.shape.LeafExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.LargeIntType;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'uuid_numeric'. This class is generated by GenerateFunction.
+ */
+public class UuidNumeric extends ScalarFunction
+ implements LeafExpression, ExplicitlyCastableSignature,
Nondeterministic, AlwaysNotNullable {
+
+ public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+ FunctionSignature.ret(LargeIntType.INSTANCE).args()
+ );
+
+ /**
+ * constructor with 0 argument.
+ */
+ public UuidNumeric() {
+ super("uuid_numeric");
+ }
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitUuidNumeric(this, context);
+ }
+
+ @Override
+ public List<FunctionSignature> getSignatures() {
+ return SIGNATURES;
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index 7f3d50322b..75c6644a4d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -341,6 +341,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.Upper;
import org.apache.doris.nereids.trees.expressions.functions.scalar.User;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.UtcTimestamp;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Uuid;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.UuidNumeric;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Version;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Week;
import org.apache.doris.nereids.trees.expressions.functions.scalar.WeekCeil;
@@ -1626,6 +1627,10 @@ public interface ScalarFunctionVisitor<R, C> {
return visitScalarFunction(uuid, context);
}
+ default R visitUuidNumeric(UuidNumeric uuidNumeric, C context) {
+ return visitScalarFunction(uuidNumeric, context);
+ }
+
default R visitVersion(Version version, C context) {
return visitScalarFunction(version, context);
}
diff --git a/gensrc/script/doris_builtins_functions.py
b/gensrc/script/doris_builtins_functions.py
index 0760064087..bde00145af 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1977,7 +1977,8 @@ visible_functions = {
],
"UUID": [
- [['uuid'], 'VARCHAR', [], 'ALWAYS_NOT_NULLABLE']
+ [['uuid'], 'VARCHAR', [], 'ALWAYS_NOT_NULLABLE'],
+ [['uuid_numeric'], 'LARGEINT', [], 'ALWAYS_NOT_NULLABLE']
],
#ip functions
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]