This is an automated email from the ASF dual-hosted git repository.
zclll pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 41b9f6bc921 [Refactor](type) use std::bit_cast to replace union-based
type punning (#53135)
41b9f6bc921 is described below
commit 41b9f6bc921929e14f46fe48bdefb14086a72a2d
Author: zclllyybb <[email protected]>
AuthorDate: Mon Jul 14 11:53:57 2025 +0800
[Refactor](type) use std::bit_cast to replace union-based type punning
(#53135)
```
Run on (96 X 3100.01 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x48)
L1 Instruction 32 KiB (x48)
L2 Unified 1024 KiB (x48)
L3 Unified 36608 KiB (x2)
Load Average: 6.58, 7.45, 10.99
-------------------------------------------------------------------------------------------------------------------------------------
Benchmark
Time CPU Iterations UserCounters...
-------------------------------------------------------------------------------------------------------------------------------------
BM_BinaryCast_UI64_to_DateTimeV2/1000000/min_time:2.000/iterations:100
534352472 us 866 us 100 items_per_second=1.15495G/s
BM_BinaryCast_UI64_to_DateTimeV2/10000000/min_time:2.000/iterations:100
534360412 us 8806 us 100 items_per_second=1.1356G/s
BM_OldBinaryCast_UI64_to_DateTimeV2/1000000/min_time:2.000/iterations:100
534361880 us 10274 us 100 items_per_second=97.3361M/s
BM_OldBinaryCast_UI64_to_DateTimeV2/10000000/min_time:2.000/iterations:100
534369846 us 18239 us 100 items_per_second=548.273M/s
BM_BinaryCast_DateTimeV2_to_UI64/1000000/min_time:2.000/iterations:100
534370998 us 19392 us 100 items_per_second=51.568M/s
BM_BinaryCast_DateTimeV2_to_UI64/10000000/min_time:2.000/iterations:100
534379002 us 27395 us 100 items_per_second=365.027M/s
BM_OldBinaryCast_DateTimeV2_to_UI64/1000000/min_time:2.000/iterations:100
534380603 us 28996 us 100 items_per_second=34.4872M/s
BM_OldBinaryCast_DateTimeV2_to_UI64/10000000/min_time:2.000/iterations:100
534388536 us 36929 us 100 items_per_second=270.786M/s
```
the benchmark result upon actually means: these two ways both has no
side-effect and could be optimized out.
---
be/CMakeLists.txt | 3 +
...nchmark_bit_pack.cpp => benchmark_bit_pack.hpp} | 5 +-
be/benchmark/benchmark_main.cpp | 8 +-
be/benchmark/binary_cast_benchmark.hpp | 254 +++++++++++++++++++++
be/src/util/binary_cast.hpp | 52 +----
be/src/vec/functions/simple_function_factory.h | 4 +-
6 files changed, 269 insertions(+), 57 deletions(-)
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index cd66506ef36..c48bad430ce 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -835,6 +835,9 @@ foreach(dir ${dirs})
endforeach()
if (BUILD_BENCHMARK)
+ if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "RELEASE")
+ message(FATAL_ERROR "Benchmark should be built with RELEASE build
type, current build type is ${CMAKE_BUILD_TYPE}")
+ endif()
add_executable(benchmark_test ${BASE_DIR}/benchmark/benchmark_main.cpp)
set_target_properties(benchmark_test PROPERTIES COMPILE_FLAGS
"-fno-access-control")
target_link_libraries(benchmark_test ${DORIS_LINK_LIBS})
diff --git a/be/benchmark/benchmark_bit_pack.cpp
b/be/benchmark/benchmark_bit_pack.hpp
similarity index 94%
rename from be/benchmark/benchmark_bit_pack.cpp
rename to be/benchmark/benchmark_bit_pack.hpp
index ef2b9e04f47..a4f269dd471 100644
--- a/be/benchmark/benchmark_bit_pack.cpp
+++ b/be/benchmark/benchmark_bit_pack.hpp
@@ -100,4 +100,7 @@ static void BM_BitPackOptimized(benchmark::State& state) {
state.SetBytesProcessed(int64_t(state.iterations()) * size);
}
-} // namespace doris
\ No newline at end of file
+
+BENCHMARK(BM_BitPack)->DenseRange(1, 127, 16)->Unit(benchmark::kNanosecond);
+BENCHMARK(BM_BitPackOptimized)->DenseRange(1, 127,
16)->Unit(benchmark::kNanosecond);
+} // namespace doris
diff --git a/be/benchmark/benchmark_main.cpp b/be/benchmark/benchmark_main.cpp
index 336b42a4bde..6e50f2af6eb 100644
--- a/be/benchmark/benchmark_main.cpp
+++ b/be/benchmark/benchmark_main.cpp
@@ -17,10 +17,8 @@
#include <benchmark/benchmark.h>
-#include <string>
-
-#include "benchmark_bit_pack.cpp"
-#include "vec/columns/column_string.h"
+#include "benchmark_bit_pack.hpp"
+#include "binary_cast_benchmark.hpp"
#include "vec/core/block.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_string.h"
@@ -47,8 +45,6 @@ static void Example1(benchmark::State& state) {
}
// could BENCHMARK many functions to compare them together.
BENCHMARK(Example1);
-BENCHMARK(BM_BitPack)->DenseRange(1, 127)->Unit(benchmark::kNanosecond);
-BENCHMARK(BM_BitPackOptimized)->DenseRange(1,
127)->Unit(benchmark::kNanosecond);
} // namespace doris::vectorized
BENCHMARK_MAIN();
diff --git a/be/benchmark/binary_cast_benchmark.hpp
b/be/benchmark/binary_cast_benchmark.hpp
new file mode 100644
index 00000000000..cc5874a82ca
--- /dev/null
+++ b/be/benchmark/binary_cast_benchmark.hpp
@@ -0,0 +1,254 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <benchmark/benchmark.h>
+
+#include <cstring>
+#include <random>
+#include <vector>
+
+#include "util/binary_cast.hpp"
+
+namespace doris {
+
+// similar to reinterpret_cast but won't break strict-aliasing rules
+template <typename From, typename To>
+To old_binary_cast(From from) {
+ constexpr bool from_u64_to_db = match_v<From, uint64_t, To, double>;
+ constexpr bool from_i64_to_db = match_v<From, int64_t, To, double>;
+ constexpr bool from_db_to_i64 = match_v<From, double, To, int64_t>;
+ constexpr bool from_db_to_u64 = match_v<From, double, To, uint64_t>;
+ constexpr bool from_i64_to_vec_dt = match_v<From, __int64_t, To,
doris::VecDateTimeValue>;
+ constexpr bool from_vec_dt_to_i64 = match_v<From, doris::VecDateTimeValue,
To, __int64_t>;
+ constexpr bool from_i128_to_decv2 = match_v<From, __int128_t, To,
DecimalV2Value>;
+ constexpr bool from_decv2_to_i128 = match_v<From, DecimalV2Value, To,
__int128_t>;
+ constexpr bool from_decv2_to_i256 = match_v<From, DecimalV2Value, To,
wide::Int256>;
+
+ constexpr bool from_ui32_to_date_v2 = match_v<From, uint32_t, To,
DateV2Value<DateV2ValueType>>;
+
+ constexpr bool from_date_v2_to_ui32 = match_v<From,
DateV2Value<DateV2ValueType>, To, uint32_t>;
+
+ constexpr bool from_ui64_to_datetime_v2 =
+ match_v<From, uint64_t, To, DateV2Value<DateTimeV2ValueType>>;
+
+ constexpr bool from_datetime_v2_to_ui64 =
+ match_v<From, DateV2Value<DateTimeV2ValueType>, To, uint64_t>;
+
+ static_assert(from_u64_to_db || from_i64_to_db || from_db_to_i64 ||
from_db_to_u64 ||
+ from_i64_to_vec_dt || from_vec_dt_to_i64 ||
from_i128_to_decv2 ||
+ from_decv2_to_i128 || from_decv2_to_i256 ||
from_ui32_to_date_v2 ||
+ from_date_v2_to_ui32 || from_ui64_to_datetime_v2 ||
from_datetime_v2_to_ui64);
+
+ if constexpr (from_u64_to_db) {
+ TypeConverter conv;
+ conv.u64 = from;
+ return conv.dbl;
+ } else if constexpr (from_i64_to_db) {
+ TypeConverter conv;
+ conv.i64 = from;
+ return conv.dbl;
+ } else if constexpr (from_db_to_i64) {
+ TypeConverter conv;
+ conv.dbl = from;
+ return conv.i64;
+ } else if constexpr (from_db_to_u64) {
+ TypeConverter conv;
+ conv.dbl = from;
+ return conv.u64;
+ } else if constexpr (from_i64_to_vec_dt) {
+ VecDateTimeInt64Union conv = {.i64 = from};
+ return conv.dt;
+ } else if constexpr (from_ui32_to_date_v2) {
+ DateV2UInt32Union conv = {.ui32 = from};
+ return conv.dt;
+ } else if constexpr (from_date_v2_to_ui32) {
+ DateV2UInt32Union conv = {.dt = from};
+ return conv.ui32;
+ } else if constexpr (from_ui64_to_datetime_v2) {
+ DateTimeV2UInt64Union conv = {.ui64 = from};
+ return conv.dt;
+ } else if constexpr (from_datetime_v2_to_ui64) {
+ DateTimeV2UInt64Union conv = {.dt = from};
+ return conv.ui64;
+ } else if constexpr (from_vec_dt_to_i64) {
+ VecDateTimeInt64Union conv = {.dt = from};
+ return conv.i64;
+ } else if constexpr (from_i128_to_decv2) {
+ DecimalInt128Union conv;
+ conv.i128 = from;
+ return conv.decimal;
+ } else if constexpr (from_decv2_to_i128) {
+ DecimalInt128Union conv;
+ conv.decimal = from;
+ return conv.i128;
+ } else {
+ throw Exception(Status::FatalError("__builtin_unreachable"));
+ }
+}
+
+// Generate random datetime values in uint64_t format for testing
+std::vector<uint64_t> generate_datetime_v2_ui64_data(size_t count) {
+ std::vector<uint64_t> data;
+ data.reserve(count);
+
+ static std::random_device rd;
+ static std::mt19937_64 gen(rd());
+
+ std::uniform_int_distribution<uint64_t> dis(MIN_DATETIME_V2,
MAX_DATETIME_V2);
+
+ for (size_t i = 0; i < count; ++i) {
+ data.push_back(dis(gen));
+ }
+
+ return data;
+}
+
+// Generate DateTimeV2Value objects from uint64_t data
+std::vector<DateV2Value<DateTimeV2ValueType>> convert_u_int64_to_date_time_v2(
+ const std::vector<uint64_t>& ui64_data) {
+ std::vector<DateV2Value<DateTimeV2ValueType>> result;
+ result.reserve(ui64_data.size());
+
+ for (const auto& ui64_val : ui64_data) {
+ result.push_back(binary_cast<uint64_t,
DateV2Value<DateTimeV2ValueType>>(ui64_val));
+ }
+
+ return result;
+}
+
+// Benchmark binary_cast from uint64_t to DateTimeV2Value
+static void BM_BinaryCast_UI64_to_DateTimeV2(benchmark::State& state) {
+ state.PauseTiming();
+ const size_t data_size = state.range(0);
+ auto test_data = generate_datetime_v2_ui64_data(data_size);
+ state.ResumeTiming();
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(test_data.data());
+
+ for (size_t i = 0; i < data_size; ++i) {
+ auto result = binary_cast<uint64_t,
DateV2Value<DateTimeV2ValueType>>(test_data[i]);
+ benchmark::DoNotOptimize(result);
+ }
+
+ benchmark::ClobberMemory();
+ }
+
+ // Set the number of items processed per second
+ state.SetItemsProcessed(int64_t(state.iterations()) * data_size);
+}
+
+// Benchmark old_binary_cast from uint64_t to DateTimeV2Value
+static void BM_OldBinaryCast_UI64_to_DateTimeV2(benchmark::State& state) {
+ state.PauseTiming();
+ const size_t data_size = state.range(0);
+ auto test_data = generate_datetime_v2_ui64_data(data_size);
+ state.ResumeTiming();
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(test_data.data());
+
+ for (size_t i = 0; i < data_size; ++i) {
+ auto result = old_binary_cast<uint64_t,
DateV2Value<DateTimeV2ValueType>>(test_data[i]);
+ benchmark::DoNotOptimize(result);
+ }
+
+ benchmark::ClobberMemory();
+ }
+
+ // Set the number of items processed per second
+ state.SetItemsProcessed(int64_t(state.iterations()) * data_size);
+}
+
+// Benchmark binary_cast from DateTimeV2Value to uint64_t
+static void BM_BinaryCast_DateTimeV2_to_UI64(benchmark::State& state) {
+ state.PauseTiming();
+ const size_t data_size = state.range(0);
+ auto ui64_data = generate_datetime_v2_ui64_data(data_size);
+ auto test_data = convert_u_int64_to_date_time_v2(ui64_data);
+ state.ResumeTiming();
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(test_data.data());
+
+ for (size_t i = 0; i < data_size; ++i) {
+ auto result = binary_cast<DateV2Value<DateTimeV2ValueType>,
uint64_t>(test_data[i]);
+ benchmark::DoNotOptimize(result);
+ }
+
+ benchmark::ClobberMemory();
+ }
+
+ // Set the number of items processed per second
+ state.SetItemsProcessed(int64_t(state.iterations()) * data_size);
+}
+
+// Benchmark old_binary_cast from DateTimeV2Value to uint64_t
+static void BM_OldBinaryCast_DateTimeV2_to_UI64(benchmark::State& state) {
+ state.PauseTiming();
+ const size_t data_size = state.range(0);
+ auto ui64_data = generate_datetime_v2_ui64_data(data_size);
+ auto test_data = convert_u_int64_to_date_time_v2(ui64_data);
+ state.ResumeTiming();
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(test_data.data());
+
+ for (size_t i = 0; i < data_size; ++i) {
+ auto result = old_binary_cast<DateV2Value<DateTimeV2ValueType>,
uint64_t>(test_data[i]);
+ benchmark::DoNotOptimize(result);
+ }
+
+ benchmark::ClobberMemory();
+ }
+
+ // Set the number of items processed per second
+ state.SetItemsProcessed(int64_t(state.iterations()) * data_size);
+}
+
+// Register benchmarks with only large data sizes
+// Use fixed larger sizes and more iterations for more reliable comparisons
+BENCHMARK(BM_BinaryCast_UI64_to_DateTimeV2)
+ ->Arg(1000000)
+ ->Arg(10000000)
+ ->Iterations(100)
+ ->MinTime(2.0) // Run each benchmark for at least 2 seconds
+ ->Unit(benchmark::
+ kMicrosecond); // Use microseconds for more readable
results with large datasets
+
+BENCHMARK(BM_OldBinaryCast_UI64_to_DateTimeV2)
+ ->Arg(1000000)
+ ->Arg(10000000)
+ ->Iterations(100)
+ ->MinTime(2.0)
+ ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_BinaryCast_DateTimeV2_to_UI64)
+ ->Arg(1000000)
+ ->Arg(10000000)
+ ->Iterations(100)
+ ->MinTime(2.0)
+ ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_OldBinaryCast_DateTimeV2_to_UI64)
+ ->Arg(1000000)
+ ->Arg(10000000)
+ ->Iterations(100)
+ ->MinTime(2.0)
+ ->Unit(benchmark::kMicrosecond);
+
+} // namespace doris
diff --git a/be/src/util/binary_cast.hpp b/be/src/util/binary_cast.hpp
index e7c62ad45ac..16bdbfeaab5 100644
--- a/be/src/util/binary_cast.hpp
+++ b/be/src/util/binary_cast.hpp
@@ -17,7 +17,6 @@
#pragma once
-#include <cstddef>
#include <cstdint>
#include <type_traits>
@@ -25,6 +24,7 @@
#include "util/types.h"
#include "vec/core/wide_integer.h"
#include "vec/runtime/vdatetime_value.h"
+
namespace doris {
union TypeConverter {
uint64_t u64;
@@ -66,9 +66,9 @@ union DateTimeV2UInt64Union {
~DateTimeV2UInt64Union() {}
};
-// similar to reinterpret_cast but won't break strict-aliasing rules
+// similar to reinterpret_cast but won't break strict-aliasing rules. you can
treat it as std::bit_cast with type checking
template <typename From, typename To>
-To binary_cast(From from) {
+constexpr PURE To binary_cast(const From& from) {
constexpr bool from_u64_to_db = match_v<From, uint64_t, To, double>;
constexpr bool from_i64_to_db = match_v<From, int64_t, To, double>;
constexpr bool from_db_to_i64 = match_v<From, double, To, int64_t>;
@@ -94,51 +94,7 @@ To binary_cast(From from) {
from_decv2_to_i128 || from_decv2_to_i256 ||
from_ui32_to_date_v2 ||
from_date_v2_to_ui32 || from_ui64_to_datetime_v2 ||
from_datetime_v2_to_ui64);
- if constexpr (from_u64_to_db) {
- TypeConverter conv;
- conv.u64 = from;
- return conv.dbl;
- } else if constexpr (from_i64_to_db) {
- TypeConverter conv;
- conv.i64 = from;
- return conv.dbl;
- } else if constexpr (from_db_to_i64) {
- TypeConverter conv;
- conv.dbl = from;
- return conv.i64;
- } else if constexpr (from_db_to_u64) {
- TypeConverter conv;
- conv.dbl = from;
- return conv.u64;
- } else if constexpr (from_i64_to_vec_dt) {
- VecDateTimeInt64Union conv = {.i64 = from};
- return conv.dt;
- } else if constexpr (from_ui32_to_date_v2) {
- DateV2UInt32Union conv = {.ui32 = from};
- return conv.dt;
- } else if constexpr (from_date_v2_to_ui32) {
- DateV2UInt32Union conv = {.dt = from};
- return conv.ui32;
- } else if constexpr (from_ui64_to_datetime_v2) {
- DateTimeV2UInt64Union conv = {.ui64 = from};
- return conv.dt;
- } else if constexpr (from_datetime_v2_to_ui64) {
- DateTimeV2UInt64Union conv = {.dt = from};
- return conv.ui64;
- } else if constexpr (from_vec_dt_to_i64) {
- VecDateTimeInt64Union conv = {.dt = from};
- return conv.i64;
- } else if constexpr (from_i128_to_decv2) {
- DecimalInt128Union conv;
- conv.i128 = from;
- return conv.decimal;
- } else if constexpr (from_decv2_to_i128) {
- DecimalInt128Union conv;
- conv.decimal = from;
- return conv.i128;
- } else {
- throw Exception(Status::FatalError("__builtin_unreachable"));
- }
+ return std::bit_cast<To>(from);
}
} // namespace doris
diff --git a/be/src/vec/functions/simple_function_factory.h
b/be/src/vec/functions/simple_function_factory.h
index 224e1c1ee60..e6bb0e00758 100644
--- a/be/src/vec/functions/simple_function_factory.h
+++ b/be/src/vec/functions/simple_function_factory.h
@@ -116,7 +116,7 @@ void register_function_bit_test(SimpleFunctionFactory&
factory);
void register_function_dict_get(SimpleFunctionFactory& factory);
void register_function_dict_get_many(SimpleFunctionFactory& factory);
-#ifdef BE_TEST
+#if defined(BE_TEST) && !defined(BE_BENCHMARK)
void register_function_throw_exception(SimpleFunctionFactory& factory);
#endif
@@ -324,7 +324,7 @@ public:
register_function_compress(instance);
register_function_dict_get(instance);
register_function_dict_get_many(instance);
-#ifdef BE_TEST
+#if defined(BE_TEST) && !defined(BE_BENCHMARK)
register_function_throw_exception(instance);
#endif
});
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]