This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 63fc74083fe branch-4.0: [check](column)Columns must be created only
via their corresponding create methods. #59087 (#59114)
63fc74083fe is described below
commit 63fc74083fe1310dec12c7303bddafd239cf5800
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Dec 18 12:20:50 2025 +0800
branch-4.0: [check](column)Columns must be created only via their
corresponding create methods. #59087 (#59114)
Cherry-picked from #59087
Co-authored-by: Mryange <[email protected]>
---
be/src/olap/rowset/segment_v2/column_writer.h | 3 ++-
.../variant/variant_column_writer_impl.cpp | 26 +++++++++-------------
.../variant/variant_column_writer_impl.h | 4 ++--
.../pipeline/exec/join/process_hash_table_probe.h | 1 +
be/src/vec/columns/column_varbinary.h | 4 ++--
be/src/vec/columns/column_variant.h | 3 +++
be/src/vec/columns/column_vector.h | 10 +++++++++
.../function_date_or_datetime_computation.h | 16 ++++++-------
8 files changed, 39 insertions(+), 28 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/column_writer.h
b/be/src/olap/rowset/segment_v2/column_writer.h
index 9ba8b286387..c3dcd503ca0 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.h
+++ b/be/src/olap/rowset/segment_v2/column_writer.h
@@ -36,6 +36,7 @@
#include "olap/rowset/segment_v2/inverted_index_writer.h"
#include "util/bitmap.h" // for BitmapChange
#include "util/slice.h" // for OwnedSlice
+#include "vec/columns/column_variant.h"
namespace doris {
@@ -502,7 +503,7 @@ private:
bool _is_finalized = false;
ordinal_t _next_rowid = 0;
size_t none_null_size = 0;
- vectorized::MutableColumnPtr _column;
+ vectorized::ColumnVariant::MutablePtr _column;
const TabletColumn* _tablet_column = nullptr;
ColumnWriterOptions _opts;
std::unique_ptr<ColumnWriter> _writer;
diff --git
a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp
b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp
index 155180411e7..7203b5171ab 100644
--- a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp
+++ b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp
@@ -370,6 +370,7 @@ VariantColumnWriterImpl::VariantColumnWriterImpl(const
ColumnWriterOptions& opts
const TabletColumn* column) {
_opts = opts;
_tablet_column = column;
+ _null_column = vectorized::ColumnUInt8::create();
}
Status VariantColumnWriterImpl::init() {
@@ -379,8 +380,7 @@ Status VariantColumnWriterImpl::init() {
if (_opts.rowset_ctx->write_type == DataWriteType::TYPE_DIRECT) {
count = 0;
}
- auto col = vectorized::ColumnVariant::create(count);
- _column = std::move(col);
+ _column = vectorized::ColumnVariant::create(count);
return Status::OK();
}
@@ -409,8 +409,8 @@ Status
VariantColumnWriterImpl::_process_root_column(vectorized::ColumnVariant*
if (_tablet_column->is_nullable()) {
// use outer null column as final null column
root_column = vectorized::ColumnNullable::create(
- root_column->get_ptr(),
vectorized::ColumnUInt8::create(_null_column));
- nullmap = _null_column.get_data().data();
+ root_column->get_ptr(),
vectorized::ColumnUInt8::create(*_null_column));
+ nullmap = _null_column->get_data().data();
} else {
// Otherwise setting to all not null.
root_column = vectorized::ColumnNullable::create(
@@ -544,7 +544,7 @@ Status VariantColumnWriterImpl::_process_sparse_column(
}
Status VariantColumnWriterImpl::finalize() {
- auto* ptr = assert_cast<vectorized::ColumnVariant*>(_column.get());
+ auto* ptr = _column.get();
ptr->set_max_subcolumns_count(_tablet_column->variant_max_subcolumns_count());
ptr->finalize(vectorized::ColumnVariant::FinalizeMode::WRITE_MODE);
// convert each subcolumns to storage format and add data to sub columns
writers buffer
@@ -604,8 +604,7 @@ Status VariantColumnWriterImpl::finalize() {
}
bool VariantColumnWriterImpl::is_finalized() const {
- const auto* ptr = assert_cast<vectorized::ColumnVariant*>(_column.get());
- return ptr->is_finalized() && _is_finalized;
+ return _column->is_finalized() && _is_finalized;
}
Status VariantColumnWriterImpl::append_data(const uint8_t** ptr, size_t
num_rows) {
@@ -613,9 +612,8 @@ Status VariantColumnWriterImpl::append_data(const uint8_t**
ptr, size_t num_rows
const auto* column = reinterpret_cast<const
vectorized::VariantColumnData*>(*ptr);
const auto& src = *reinterpret_cast<const
vectorized::ColumnVariant*>(column->column_data);
RETURN_IF_ERROR(src.sanitize());
- auto* dst_ptr = assert_cast<vectorized::ColumnVariant*>(_column.get());
// TODO: if direct write we could avoid copy
- dst_ptr->insert_range_from(src, column->row_pos, num_rows);
+ _column->insert_range_from(src, column->row_pos, num_rows);
return Status::OK();
}
@@ -698,7 +696,7 @@ Status VariantColumnWriterImpl::write_bloom_filter_index() {
Status VariantColumnWriterImpl::append_nullable(const uint8_t* null_map, const
uint8_t** ptr,
size_t num_rows) {
if (null_map != nullptr) {
- _null_column.insert_many_raw_data((const char*)null_map, num_rows);
+ _null_column->insert_many_raw_data((const char*)null_map, num_rows);
}
RETURN_IF_ERROR(append_data(ptr, num_rows));
return Status::OK();
@@ -721,9 +719,8 @@ Status VariantSubcolumnWriter::init() {
Status VariantSubcolumnWriter::append_data(const uint8_t** ptr, size_t
num_rows) {
const auto* column = reinterpret_cast<const
vectorized::VariantColumnData*>(*ptr);
const auto& src = *reinterpret_cast<const
vectorized::ColumnVariant*>(column->column_data);
- auto* dst_ptr = assert_cast<vectorized::ColumnVariant*>(_column.get());
// TODO: if direct write we could avoid copy
- dst_ptr->insert_range_from(src, column->row_pos, num_rows);
+ _column->insert_range_from(src, column->row_pos, num_rows);
return Status::OK();
}
@@ -732,12 +729,11 @@ uint64_t VariantSubcolumnWriter::estimate_buffer_size() {
}
bool VariantSubcolumnWriter::is_finalized() const {
- const auto* ptr = assert_cast<vectorized::ColumnVariant*>(_column.get());
- return ptr->is_finalized() && _is_finalized;
+ return _column->is_finalized() && _is_finalized;
}
Status VariantSubcolumnWriter::finalize() {
- auto* ptr = assert_cast<vectorized::ColumnVariant*>(_column.get());
+ auto* ptr = _column.get();
ptr->finalize();
DCHECK(ptr->is_finalized());
diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h
b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h
index c2aa7bd9a05..32ff8301e3a 100644
--- a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h
+++ b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h
@@ -137,8 +137,8 @@ private:
vectorized::OlapBlockDataConvertor* converter,
size_t num_rows,
int& column_id);
// prepare a column for finalize
- doris::vectorized::MutableColumnPtr _column;
- doris::vectorized::ColumnUInt8 _null_column;
+ doris::vectorized::ColumnVariant::MutablePtr _column;
+ doris::vectorized::ColumnUInt8::MutablePtr _null_column;
ColumnWriterOptions _opts;
const TabletColumn* _tablet_column = nullptr;
bool _is_finalized = false;
diff --git a/be/src/pipeline/exec/join/process_hash_table_probe.h
b/be/src/pipeline/exec/join/process_hash_table_probe.h
index b83ac8709e2..100a11fd2c7 100644
--- a/be/src/pipeline/exec/join/process_hash_table_probe.h
+++ b/be/src/pipeline/exec/join/process_hash_table_probe.h
@@ -20,6 +20,7 @@
#include <vector>
#include "vec/columns/column.h"
+#include "vec/columns/column_vector.h"
#include "vec/common/arena.h"
#include "vec/common/custom_allocator.h"
diff --git a/be/src/vec/columns/column_varbinary.h
b/be/src/vec/columns/column_varbinary.h
index 4097b3dbbaf..e9e900954c6 100644
--- a/be/src/vec/columns/column_varbinary.h
+++ b/be/src/vec/columns/column_varbinary.h
@@ -41,10 +41,10 @@ private:
public:
using value_type = typename
PrimitiveTypeTraits<TYPE_VARBINARY>::ColumnItemType;
using Container = PaddedPODArray<doris::StringView>;
- ColumnVarbinary() = default;
- ColumnVarbinary(const size_t n) : _data(n) {}
private:
+ ColumnVarbinary() = default;
+ ColumnVarbinary(const size_t n) : _data(n) {}
ColumnVarbinary(const ColumnVarbinary& src) : _data(src._data.begin(),
src._data.end()) {}
public:
diff --git a/be/src/vec/columns/column_variant.h
b/be/src/vec/columns/column_variant.h
index e25f0af9b96..2ca8a46b11e 100644
--- a/be/src/vec/columns/column_variant.h
+++ b/be/src/vec/columns/column_variant.h
@@ -291,6 +291,8 @@ private:
public:
static constexpr auto COLUMN_NAME_DUMMY = "_dummy";
+private:
+ friend class COWHelper<IColumn, ColumnVariant>;
// always create root: data type nothing
explicit ColumnVariant(int32_t max_subcolumns_count);
@@ -302,6 +304,7 @@ public:
explicit ColumnVariant(int32_t max_subcolumns_count, Subcolumns&&
subcolumns_);
+public:
~ColumnVariant() override = default;
/// Checks that all subcolumns have consistent sizes.
diff --git a/be/src/vec/columns/column_vector.h
b/be/src/vec/columns/column_vector.h
index 1f118a2a866..54c225fab51 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -52,6 +52,11 @@
class SipHash;
+namespace doris::pipeline {
+template <int JoinOpType>
+struct ProcessHashTableProbe;
+}
+
namespace doris::vectorized {
class Arena;
class ColumnSorter;
@@ -71,6 +76,9 @@ private:
using Self = ColumnVector;
friend class COWHelper<IColumn, Self>;
+ template <int JoinOpType>
+ friend struct doris::pipeline::ProcessHashTableProbe;
+
struct less;
struct greater;
@@ -78,6 +86,7 @@ public:
using value_type = typename PrimitiveTypeTraits<T>::ColumnItemType;
using Container = PaddedPODArray<value_type>;
+private:
ColumnVector() = default;
explicit ColumnVector(const size_t n) : data(n) {}
explicit ColumnVector(const size_t n, const value_type x) : data(n, x) {}
@@ -86,6 +95,7 @@ public:
/// Sugar constructor.
ColumnVector(std::initializer_list<value_type> il) : data {il} {}
+public:
size_t size() const override { return data.size(); }
StringRef get_data_at(size_t n) const override {
diff --git a/be/src/vec/functions/function_date_or_datetime_computation.h
b/be/src/vec/functions/function_date_or_datetime_computation.h
index dfc57f1694e..6efa38a24b5 100644
--- a/be/src/vec/functions/function_date_or_datetime_computation.h
+++ b/be/src/vec/functions/function_date_or_datetime_computation.h
@@ -618,13 +618,13 @@ public:
// vector-const
if (const auto* nest_col1_const =
check_and_get_column<ColumnConst>(*nest_col1)) {
rconst = true;
- const auto col1_inside_const =
+ const auto& col1_inside_const =
assert_cast<const ColumnVector<Transform::ArgPType>&>(
nest_col1_const->get_data_column());
Op::vector_constant(sources->get_data(), res_col->get_data(),
col1_inside_const.get_data()[0], nullmap0,
nullmap1);
} else { // vector-vector
- const auto concrete_col1 =
+ const auto& concrete_col1 =
assert_cast<const
ColumnVector<Transform::ArgPType>&>(*nest_col1);
Op::vector_vector(sources->get_data(),
concrete_col1.get_data(),
res_col->get_data(), nullmap0, nullmap1);
@@ -649,10 +649,10 @@ public:
check_and_get_column_const<ColumnVector<Transform::ArgPType>>(
src_nested_col.get())) {
// const-vector
- const auto col0_inside_const = assert_cast<const
ColumnVector<Transform::ArgPType>&>(
+ const auto& col0_inside_const = assert_cast<const
ColumnVector<Transform::ArgPType>&>(
sources_const->get_data_column());
const ColumnPtr nested_col1 = remove_nullable(col1);
- const auto concrete_col1 =
+ const auto& concrete_col1 =
assert_cast<const
ColumnVector<Transform::ArgPType>&>(*nested_col1);
Op::constant_vector(col0_inside_const.get_data()[0],
res_col->get_data(),
concrete_col1.get_data(), nullmap0, nullmap1);
@@ -747,14 +747,14 @@ public:
nest_col1_const->get_data_at(0).to_string(), nullmap0,
nullmap1);
} else {
- const auto col1_inside_const = assert_cast<const
IntervalColumnType&>(
+ const auto& col1_inside_const = assert_cast<const
IntervalColumnType&>(
nest_col1_const->get_data_column());
Op::vector_constant(sources->get_data(),
res_col->get_data(),
col1_inside_const.get_data()[0],
nullmap0, nullmap1);
}
} else { // vector-vector
if constexpr (Transform::IntervalPType != TYPE_STRING) {
- const auto concrete_col1 = assert_cast<const
IntervalColumnType&>(*nest_col1);
+ const auto& concrete_col1 = assert_cast<const
IntervalColumnType&>(*nest_col1);
Op::vector_vector(sources->get_data(),
concrete_col1.get_data(),
res_col->get_data(), nullmap0, nullmap1);
} else {
@@ -790,11 +790,11 @@ public:
src_nested_col.get())) {
if constexpr (Transform::IntervalPType != TYPE_STRING) {
// const-vector
- const auto col0_inside_const =
+ const auto& col0_inside_const =
assert_cast<const ColumnVector<Transform::ArgPType>&>(
sources_const->get_data_column());
const ColumnPtr nested_col1 = remove_nullable(col1);
- const auto concrete_col1 = assert_cast<const
IntervalColumnType&>(*nested_col1);
+ const auto& concrete_col1 = assert_cast<const
IntervalColumnType&>(*nested_col1);
Op::constant_vector(col0_inside_const.get_data()[0],
res_col->get_data(),
concrete_col1.get_data(), nullmap0,
nullmap1);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]