This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2f172a943dd [refine](code) Add comments and benchmarks for ColumnView.
(#61678)
2f172a943dd is described below
commit 2f172a943ddd8d488e9fdb736848bd0806bbc39c
Author: Mryange <[email protected]>
AuthorDate: Wed Apr 8 18:21:29 2026 +0800
[refine](code) Add comments and benchmarks for ColumnView. (#61678)
```
-----------------------------------------------------------------------------------------
Benchmark Time
CPU Iterations
-----------------------------------------------------------------------------------------
Handwritten_Unary_Plain 326 ns 326
ns 2151459
ColumnView_Unary_Plain 326 ns 326
ns 2146584
Handwritten_Unary_Nullable 2067 ns 2067
ns 342110
ColumnView_Unary_Nullable 2061 ns 2061
ns 341236
Handwritten_Binary_Plain_Plain 680 ns 680
ns 1028990
ColumnView_Binary_Plain_Plain 679 ns 679
ns 1025809
Handwritten_Binary_Plain_Const 277 ns 277
ns 2534313
ColumnView_Binary_Plain_Const 282 ns 282
ns 2484547
Handwritten_Binary_Plain_Nullable 776 ns 776
ns 881182
ColumnView_Binary_Plain_Nullable 779 ns 779
ns 897644
Handwritten_Binary_Nullable_Nullable 3233 ns 3233
ns 217793
ColumnView_Binary_Nullable_Nullable 4469 ns 4469
ns 157379
Handwritten_Ternary_Plain_Plain_Plain 1016 ns 1016
ns 688153
ColumnView_Ternary_Plain_Plain_Plain 1017 ns 1017
ns 685327
Handwritten_Ternary_Const_Const_Plain 278 ns 278
ns 2506171
ColumnView_Ternary_Const_Const_Plain 285 ns 285
ns 2456870
Handwritten_Ternary_Plain_Const_Plain 678 ns 678
ns 1026683
ColumnView_Ternary_Plain_Const_Plain 681 ns 681
ns 1027665
Handwritten_Ternary_Nullable_Nullable_Nullable 4729 ns 4729
ns 149026
ColumnView_Ternary_Nullable_Nullable_Nullable 8608 ns 8608
ns 82746
```
1. Expensive per-element operations (e.g. geo functions, complex string
ops):
Use ColumnView freely — its overhead is negligible relative to the work.
2. Cheap per-element operations that the compiler can inline (e.g.
simple arithmetic):
a) Inputs are NOT nullable (e.g. the function framework already strips
nullable):
Safe to use. The compiler optimizes the is_const branch into code
equivalent
to hand-written direct array access (verified via assembly and
benchmarks).
b) Inputs involve nullable columns:
- Unary operations: safe to use, the compiler still optimizes
effectively.
- Binary / ternary operations: the combined is_null_at checks across
multiple
columns inhibit compiler vectorization and branch optimization, causing
significant regression (~1.4x for binary, ~1.8x for ternary in
benchmarks).
In this case, hand-written column access is recommended for best
performance.
In summary, ColumnView is designed to eliminate the combinatorial
explosion of
handling 4 column forms. It is suitable for the vast majority of use
cases.
Only the specific combination of "cheap computation + nullable +
multi-column"
requires weighing whether to hand-write the access code.
---
be/benchmark/benchmark_column_view.hpp | 484 +++++++++++++++++++++++++++++++
be/benchmark/benchmark_fastunion.hpp | 2 +-
be/benchmark/benchmark_hll_merge.hpp | 2 +-
be/benchmark/benchmark_main.cpp | 7 +-
be/benchmark/benchmark_string.hpp | 4 +-
be/benchmark/binary_cast_benchmark.hpp | 2 +-
be/src/core/column/column_execute_util.h | 24 ++
7 files changed, 517 insertions(+), 8 deletions(-)
diff --git a/be/benchmark/benchmark_column_view.hpp
b/be/benchmark/benchmark_column_view.hpp
new file mode 100644
index 00000000000..02ce3294b5b
--- /dev/null
+++ b/be/benchmark/benchmark_column_view.hpp
@@ -0,0 +1,484 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// ============================================================
+// Benchmark: ColumnView vs hand-written column access (Int64)
+//
+// ColumnView (see column_execute_util.h) provides a unified interface
+// to read column values regardless of whether the underlying column is
+// Plain, ColumnConst, ColumnNullable, or Const(Nullable).
+//
+// This benchmark measures whether ColumnView introduces measurable
+// overhead compared to hand-written (direct) column access code.
+// ============================================================
+
+#include <benchmark/benchmark.h>
+
+#include <cstdint>
+
+#include "core/assert_cast.h"
+#include "core/column/column_const.h"
+#include "core/column/column_execute_util.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/primitive_type.h"
+
+namespace doris {
+
+static constexpr size_t NUM_ROWS = 4096;
+
+// ============================================================
+// Column factory helpers
+// ============================================================
+
+static ColumnPtr make_plain_column() {
+ auto col = ColumnInt64::create();
+ col->reserve(NUM_ROWS);
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ col->insert_value(static_cast<int64_t>(i + 1));
+ }
+ return col;
+}
+
+static ColumnPtr make_const_column() {
+ auto inner = ColumnInt64::create();
+ inner->insert_value(42);
+ return ColumnConst::create(std::move(inner), NUM_ROWS);
+}
+
+static ColumnPtr make_nullable_column() {
+ return ColumnNullable::create(make_plain_column()->assume_mutable(),
+ ColumnUInt8::create(NUM_ROWS, 0));
+}
+
+// ============================================================
+// Helper: extract Int64 data from various column forms
+// ============================================================
+
+struct PlainAccessor {
+ const ColumnInt64::Container& data;
+
+ explicit PlainAccessor(const ColumnPtr& col)
+ : data(assert_cast<const ColumnInt64&>(*col).get_data()) {}
+
+ int64_t get(size_t i) const { return data[i]; }
+};
+
+struct ConstAccessor {
+ const int64_t value;
+
+ explicit ConstAccessor(const ColumnPtr& col)
+ : value(assert_cast<const ColumnInt64&>(
+ assert_cast<const
ColumnConst&>(*col).get_data_column())
+ .get_data()[0]) {}
+
+ int64_t get(size_t /*i*/) const { return value; }
+};
+
+struct NullableAccessor {
+ const ColumnInt64::Container& data;
+ const NullMap& null_map;
+
+ explicit NullableAccessor(const ColumnPtr& col)
+ : data(assert_cast<const ColumnInt64&>(
+ assert_cast<const
ColumnNullable&>(*col).get_nested_column())
+ .get_data()),
+ null_map(assert_cast<const
ColumnNullable&>(*col).get_null_map_data()) {}
+
+ int64_t get(size_t i) const { return data[i]; }
+ bool is_null(size_t i) const { return null_map[i]; }
+};
+
+struct ConstNullableAccessor {
+ const int64_t value;
+ const bool is_null_value;
+
+ explicit ConstNullableAccessor(const ColumnPtr& col)
+ : value(assert_cast<const ColumnInt64&>(
+ assert_cast<const ColumnNullable&>(
+ assert_cast<const
ColumnConst&>(*col).get_data_column())
+ .get_nested_column())
+ .get_data()[0]),
+ is_null_value(assert_cast<const ColumnNullable&>(
+ assert_cast<const
ColumnConst&>(*col).get_data_column())
+ .get_null_map_data()[0]) {}
+
+ int64_t get(size_t /*i*/) const { return value; }
+ bool is_null(size_t /*i*/) const { return is_null_value; }
+};
+
+// ============================================================
+// Unary benchmarks: sum = Σ a[i]
+// ============================================================
+
+// ---- Unary: Plain ----
+
+static void Handwritten_Unary_Plain(benchmark::State& state) {
+ const auto col_a = make_plain_column();
+ PlainAccessor a(col_a);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ sum += a.get(i);
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(Handwritten_Unary_Plain)->Unit(benchmark::kNanosecond);
+
+static void ColumnView_Unary_Plain(benchmark::State& state) {
+ const auto col_a = make_plain_column();
+ const auto view_a = ColumnView<TYPE_BIGINT>::create(col_a);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ sum += view_a.value_at(i);
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(ColumnView_Unary_Plain)->Unit(benchmark::kNanosecond);
+
+// ---- Unary: Nullable ----
+
+static void Handwritten_Unary_Nullable(benchmark::State& state) {
+ const auto col_a = make_nullable_column();
+ NullableAccessor a(col_a);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ if (!a.is_null(i)) {
+ sum += a.get(i);
+ }
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(Handwritten_Unary_Nullable)->Unit(benchmark::kNanosecond);
+
+static void ColumnView_Unary_Nullable(benchmark::State& state) {
+ const auto col_a = make_nullable_column();
+ const auto view_a = ColumnView<TYPE_BIGINT>::create(col_a);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ if (!view_a.is_null_at(i)) {
+ sum += view_a.value_at(i);
+ }
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(ColumnView_Unary_Nullable)->Unit(benchmark::kNanosecond);
+
+// ============================================================
+// Binary benchmarks: sum = Σ (a[i] + b[i])
+// ============================================================
+
+// ---- Binary: (Plain, Plain) ----
+
+static void Handwritten_Binary_Plain_Plain(benchmark::State& state) {
+ const auto col_a = make_plain_column();
+ const auto col_b = make_plain_column();
+ PlainAccessor a(col_a);
+ PlainAccessor b(col_b);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ sum += a.get(i) + b.get(i);
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(Handwritten_Binary_Plain_Plain)->Unit(benchmark::kNanosecond);
+
+static void ColumnView_Binary_Plain_Plain(benchmark::State& state) {
+ const auto col_a = make_plain_column();
+ const auto col_b = make_plain_column();
+ const auto view_a = ColumnView<TYPE_BIGINT>::create(col_a);
+ const auto view_b = ColumnView<TYPE_BIGINT>::create(col_b);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ sum += view_a.value_at(i) + view_b.value_at(i);
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(ColumnView_Binary_Plain_Plain)->Unit(benchmark::kNanosecond);
+
+// ---- Binary: (Plain, Const) ----
+
+static void Handwritten_Binary_Plain_Const(benchmark::State& state) {
+ const auto col_a = make_plain_column();
+ const auto col_b = make_const_column();
+ PlainAccessor a(col_a);
+ ConstAccessor b(col_b);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ sum += a.get(i) + b.get(i);
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(Handwritten_Binary_Plain_Const)->Unit(benchmark::kNanosecond);
+
+static void ColumnView_Binary_Plain_Const(benchmark::State& state) {
+ const auto col_a = make_plain_column();
+ const auto col_b = make_const_column();
+ const auto view_a = ColumnView<TYPE_BIGINT>::create(col_a);
+ const auto view_b = ColumnView<TYPE_BIGINT>::create(col_b);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ sum += view_a.value_at(i) + view_b.value_at(i);
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(ColumnView_Binary_Plain_Const)->Unit(benchmark::kNanosecond);
+
+// ---- Binary: (Plain, Nullable) ----
+
+static void Handwritten_Binary_Plain_Nullable(benchmark::State& state) {
+ const auto col_a = make_plain_column();
+ const auto col_b = make_nullable_column();
+ PlainAccessor a(col_a);
+ NullableAccessor b(col_b);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ int64_t val = a.get(i);
+ if (!b.is_null(i)) {
+ val += b.get(i);
+ }
+ sum += val;
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(Handwritten_Binary_Plain_Nullable)->Unit(benchmark::kNanosecond);
+
+static void ColumnView_Binary_Plain_Nullable(benchmark::State& state) {
+ const auto col_a = make_plain_column();
+ const auto col_b = make_nullable_column();
+ const auto view_a = ColumnView<TYPE_BIGINT>::create(col_a);
+ const auto view_b = ColumnView<TYPE_BIGINT>::create(col_b);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ int64_t val = view_a.value_at(i);
+ if (!view_b.is_null_at(i)) {
+ val += view_b.value_at(i);
+ }
+ sum += val;
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(ColumnView_Binary_Plain_Nullable)->Unit(benchmark::kNanosecond);
+
+// ---- Binary: (Nullable, Nullable) ----
+
+static void Handwritten_Binary_Nullable_Nullable(benchmark::State& state) {
+ const auto col_a = make_nullable_column();
+ const auto col_b = make_nullable_column();
+ NullableAccessor a(col_a);
+ NullableAccessor b(col_b);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ if (!a.is_null(i) && !b.is_null(i)) {
+ sum += a.get(i) + b.get(i);
+ }
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(Handwritten_Binary_Nullable_Nullable)->Unit(benchmark::kNanosecond);
+
+static void ColumnView_Binary_Nullable_Nullable(benchmark::State& state) {
+ const auto col_a = make_nullable_column();
+ const auto col_b = make_nullable_column();
+ const auto view_a = ColumnView<TYPE_BIGINT>::create(col_a);
+ const auto view_b = ColumnView<TYPE_BIGINT>::create(col_b);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ if (!view_a.is_null_at(i) && !view_b.is_null_at(i)) {
+ sum += view_a.value_at(i) + view_b.value_at(i);
+ }
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(ColumnView_Binary_Nullable_Nullable)->Unit(benchmark::kNanosecond);
+
+// ============================================================
+// Ternary benchmarks: sum = Σ (a[i] + b[i] + c[i])
+// ============================================================
+
+// ---- Ternary: (Plain, Plain, Plain) ----
+
+static void Handwritten_Ternary_Plain_Plain_Plain(benchmark::State& state) {
+ const auto col_a = make_plain_column();
+ const auto col_b = make_plain_column();
+ const auto col_c = make_plain_column();
+ PlainAccessor a(col_a);
+ PlainAccessor b(col_b);
+ PlainAccessor c(col_c);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ sum += a.get(i) + b.get(i) + c.get(i);
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(Handwritten_Ternary_Plain_Plain_Plain)->Unit(benchmark::kNanosecond);
+
+static void ColumnView_Ternary_Plain_Plain_Plain(benchmark::State& state) {
+ const auto col_a = make_plain_column();
+ const auto col_b = make_plain_column();
+ const auto col_c = make_plain_column();
+ const auto view_a = ColumnView<TYPE_BIGINT>::create(col_a);
+ const auto view_b = ColumnView<TYPE_BIGINT>::create(col_b);
+ const auto view_c = ColumnView<TYPE_BIGINT>::create(col_c);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ sum += view_a.value_at(i) + view_b.value_at(i) +
view_c.value_at(i);
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(ColumnView_Ternary_Plain_Plain_Plain)->Unit(benchmark::kNanosecond);
+
+// ---- Ternary: (Const, Const, Plain) ----
+
+static void Handwritten_Ternary_Const_Const_Plain(benchmark::State& state) {
+ const auto col_a = make_const_column();
+ const auto col_b = make_const_column();
+ const auto col_c = make_plain_column();
+ ConstAccessor a(col_a);
+ ConstAccessor b(col_b);
+ PlainAccessor c(col_c);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ sum += a.get(i) + b.get(i) + c.get(i);
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(Handwritten_Ternary_Const_Const_Plain)->Unit(benchmark::kNanosecond);
+
+static void ColumnView_Ternary_Const_Const_Plain(benchmark::State& state) {
+ const auto col_a = make_const_column();
+ const auto col_b = make_const_column();
+ const auto col_c = make_plain_column();
+ const auto view_a = ColumnView<TYPE_BIGINT>::create(col_a);
+ const auto view_b = ColumnView<TYPE_BIGINT>::create(col_b);
+ const auto view_c = ColumnView<TYPE_BIGINT>::create(col_c);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ sum += view_a.value_at(i) + view_b.value_at(i) +
view_c.value_at(i);
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(ColumnView_Ternary_Const_Const_Plain)->Unit(benchmark::kNanosecond);
+
+// ---- Ternary: (Plain, Const, Plain) ----
+
+static void Handwritten_Ternary_Plain_Const_Plain(benchmark::State& state) {
+ const auto col_a = make_plain_column();
+ const auto col_b = make_const_column();
+ const auto col_c = make_plain_column();
+ PlainAccessor a(col_a);
+ ConstAccessor b(col_b);
+ PlainAccessor c(col_c);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ sum += a.get(i) + b.get(i) + c.get(i);
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(Handwritten_Ternary_Plain_Const_Plain)->Unit(benchmark::kNanosecond);
+
+static void ColumnView_Ternary_Plain_Const_Plain(benchmark::State& state) {
+ const auto col_a = make_plain_column();
+ const auto col_b = make_const_column();
+ const auto col_c = make_plain_column();
+ const auto view_a = ColumnView<TYPE_BIGINT>::create(col_a);
+ const auto view_b = ColumnView<TYPE_BIGINT>::create(col_b);
+ const auto view_c = ColumnView<TYPE_BIGINT>::create(col_c);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ sum += view_a.value_at(i) + view_b.value_at(i) +
view_c.value_at(i);
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(ColumnView_Ternary_Plain_Const_Plain)->Unit(benchmark::kNanosecond);
+
+// ---- Ternary: (Nullable, Nullable, Nullable) ----
+
+static void Handwritten_Ternary_Nullable_Nullable_Nullable(benchmark::State&
state) {
+ const auto col_a = make_nullable_column();
+ const auto col_b = make_nullable_column();
+ const auto col_c = make_nullable_column();
+ NullableAccessor a(col_a);
+ NullableAccessor b(col_b);
+ NullableAccessor c(col_c);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ if (!a.is_null(i) && !b.is_null(i) && !c.is_null(i)) {
+ sum += a.get(i) + b.get(i) + c.get(i);
+ }
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(Handwritten_Ternary_Nullable_Nullable_Nullable)->Unit(benchmark::kNanosecond);
+
+static void ColumnView_Ternary_Nullable_Nullable_Nullable(benchmark::State&
state) {
+ const auto col_a = make_nullable_column();
+ const auto col_b = make_nullable_column();
+ const auto col_c = make_nullable_column();
+ const auto view_a = ColumnView<TYPE_BIGINT>::create(col_a);
+ const auto view_b = ColumnView<TYPE_BIGINT>::create(col_b);
+ const auto view_c = ColumnView<TYPE_BIGINT>::create(col_c);
+ for (auto _ : state) {
+ int64_t sum = 0;
+ for (size_t i = 0; i < NUM_ROWS; ++i) {
+ if (!view_a.is_null_at(i) && !view_b.is_null_at(i) &&
!view_c.is_null_at(i)) {
+ sum += view_a.value_at(i) + view_b.value_at(i) +
view_c.value_at(i);
+ }
+ }
+ benchmark::DoNotOptimize(sum);
+ }
+}
+BENCHMARK(ColumnView_Ternary_Nullable_Nullable_Nullable)->Unit(benchmark::kNanosecond);
+
+} // namespace doris
diff --git a/be/benchmark/benchmark_fastunion.hpp
b/be/benchmark/benchmark_fastunion.hpp
index ba469b75fa6..ae574321aec 100644
--- a/be/benchmark/benchmark_fastunion.hpp
+++ b/be/benchmark/benchmark_fastunion.hpp
@@ -19,7 +19,7 @@
#include <string>
-#include "util/bitmap_value.h"
+#include "core/value/bitmap_value.h"
using Roaring64Map = doris::detail::Roaring64Map;
diff --git a/be/benchmark/benchmark_hll_merge.hpp
b/be/benchmark/benchmark_hll_merge.hpp
index d923d208fe4..2fc6c47eaa6 100644
--- a/be/benchmark/benchmark_hll_merge.hpp
+++ b/be/benchmark/benchmark_hll_merge.hpp
@@ -17,7 +17,7 @@
#include <benchmark/benchmark.h>
-#include "olap/hll.h"
+#include "core/value/hll.h"
#include "util/hash_util.hpp"
namespace doris {
diff --git a/be/benchmark/benchmark_main.cpp b/be/benchmark/benchmark_main.cpp
index 5c516440a0c..f76dbb370e5 100644
--- a/be/benchmark/benchmark_main.cpp
+++ b/be/benchmark/benchmark_main.cpp
@@ -20,14 +20,15 @@
#include "benchmark_bit_pack.hpp"
#include "benchmark_bits.hpp"
#include "benchmark_block_bloom_filter.hpp"
+#include "benchmark_column_view.hpp"
#include "benchmark_fastunion.hpp"
#include "benchmark_hll_merge.hpp"
#include "benchmark_string.hpp"
#include "binary_cast_benchmark.hpp"
#include "core/block/block.h"
-#include "vec/columns/column_string.h"
-#include "vec/data_types/data_type.h"
-#include "vec/data_types/data_type_string.h"
+#include "core/column/column_string.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_string.h"
namespace doris { // change if need
diff --git a/be/benchmark/benchmark_string.hpp
b/be/benchmark/benchmark_string.hpp
index fc993e75744..bab869b693d 100644
--- a/be/benchmark/benchmark_string.hpp
+++ b/be/benchmark/benchmark_string.hpp
@@ -20,8 +20,8 @@
#include <random>
#include <vector>
-#include "vec/functions/function_string.cpp"
-#include "vec/functions/string_hex_util.h"
+#include "exprs/function/function_string.cpp"
+#include "exprs/function/string_hex_util.h"
namespace doris {
diff --git a/be/benchmark/binary_cast_benchmark.hpp
b/be/benchmark/binary_cast_benchmark.hpp
index cc5874a82ca..fec9ddb949a 100644
--- a/be/benchmark/binary_cast_benchmark.hpp
+++ b/be/benchmark/binary_cast_benchmark.hpp
@@ -21,7 +21,7 @@
#include <random>
#include <vector>
-#include "util/binary_cast.hpp"
+#include "core/binary_cast.hpp"
namespace doris {
diff --git a/be/src/core/column/column_execute_util.h
b/be/src/core/column/column_execute_util.h
index d8f33782c48..187f439d2f7 100644
--- a/be/src/core/column/column_execute_util.h
+++ b/be/src/core/column/column_execute_util.h
@@ -61,6 +61,30 @@ struct ColumnElementView<TYPE_STRING> {
// 3. Nullable(ColumnInt32)
// 4. Const(Nullable(ColumnInt32)) (although this case is rare, it can still
occur; many of our previous code did not consider this)
// You can use is_null_at and value_at to get the data at the corresponding
position
+//
+// ====== Performance Guide: When to Use ColumnView ======
+//
+// 1. Expensive per-element operations (e.g. geo functions, complex string
ops):
+// Use ColumnView freely — its overhead is negligible relative to the work.
+//
+// 2. Cheap per-element operations that the compiler can inline (e.g. simple
arithmetic):
+//
+// a) Inputs are NOT nullable (e.g. the function framework already strips
nullable):
+// Safe to use. The compiler optimizes the is_const branch into code
equivalent
+// to hand-written direct array access (verified via assembly and
benchmarks).
+//
+// b) Inputs involve nullable columns:
+// - Unary operations: safe to use, the compiler still optimizes
effectively.
+// - Binary / ternary operations: the combined is_null_at checks across
multiple
+// columns inhibit compiler vectorization and branch optimization,
causing
+// significant regression (~1.4x for binary, ~1.8x for ternary in
benchmarks).
+// In this case, hand-written column access is recommended for best
performance.
+//
+// In summary, ColumnView is designed to eliminate the combinatorial explosion
of
+// handling 4 column forms. It is suitable for the vast majority of use cases.
+// Only the specific combination of "cheap computation + nullable +
multi-column"
+// requires weighing whether to hand-write the access code.
+// ====== End of Performance Guide ======
template <PrimitiveType PType>
struct ColumnView {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]