This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 63fc74083fe branch-4.0: [check](column)Columns must be created only 
via their corresponding create methods. #59087 (#59114)
63fc74083fe is described below

commit 63fc74083fe1310dec12c7303bddafd239cf5800
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Dec 18 12:20:50 2025 +0800

    branch-4.0: [check](column)Columns must be created only via their 
corresponding create methods. #59087 (#59114)
    
    Cherry-picked from #59087
    
    Co-authored-by: Mryange <[email protected]>
---
 be/src/olap/rowset/segment_v2/column_writer.h      |  3 ++-
 .../variant/variant_column_writer_impl.cpp         | 26 +++++++++-------------
 .../variant/variant_column_writer_impl.h           |  4 ++--
 .../pipeline/exec/join/process_hash_table_probe.h  |  1 +
 be/src/vec/columns/column_varbinary.h              |  4 ++--
 be/src/vec/columns/column_variant.h                |  3 +++
 be/src/vec/columns/column_vector.h                 | 10 +++++++++
 .../function_date_or_datetime_computation.h        | 16 ++++++-------
 8 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_writer.h 
b/be/src/olap/rowset/segment_v2/column_writer.h
index 9ba8b286387..c3dcd503ca0 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.h
+++ b/be/src/olap/rowset/segment_v2/column_writer.h
@@ -36,6 +36,7 @@
 #include "olap/rowset/segment_v2/inverted_index_writer.h"
 #include "util/bitmap.h" // for BitmapChange
 #include "util/slice.h"  // for OwnedSlice
+#include "vec/columns/column_variant.h"
 
 namespace doris {
 
@@ -502,7 +503,7 @@ private:
     bool _is_finalized = false;
     ordinal_t _next_rowid = 0;
     size_t none_null_size = 0;
-    vectorized::MutableColumnPtr _column;
+    vectorized::ColumnVariant::MutablePtr _column;
     const TabletColumn* _tablet_column = nullptr;
     ColumnWriterOptions _opts;
     std::unique_ptr<ColumnWriter> _writer;
diff --git 
a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp 
b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp
index 155180411e7..7203b5171ab 100644
--- a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp
+++ b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp
@@ -370,6 +370,7 @@ VariantColumnWriterImpl::VariantColumnWriterImpl(const 
ColumnWriterOptions& opts
                                                  const TabletColumn* column) {
     _opts = opts;
     _tablet_column = column;
+    _null_column = vectorized::ColumnUInt8::create();
 }
 
 Status VariantColumnWriterImpl::init() {
@@ -379,8 +380,7 @@ Status VariantColumnWriterImpl::init() {
     if (_opts.rowset_ctx->write_type == DataWriteType::TYPE_DIRECT) {
         count = 0;
     }
-    auto col = vectorized::ColumnVariant::create(count);
-    _column = std::move(col);
+    _column = vectorized::ColumnVariant::create(count);
     return Status::OK();
 }
 
@@ -409,8 +409,8 @@ Status 
VariantColumnWriterImpl::_process_root_column(vectorized::ColumnVariant*
     if (_tablet_column->is_nullable()) {
         // use outer null column as final null column
         root_column = vectorized::ColumnNullable::create(
-                root_column->get_ptr(), 
vectorized::ColumnUInt8::create(_null_column));
-        nullmap = _null_column.get_data().data();
+                root_column->get_ptr(), 
vectorized::ColumnUInt8::create(*_null_column));
+        nullmap = _null_column->get_data().data();
     } else {
         // Otherwise setting to all not null.
         root_column = vectorized::ColumnNullable::create(
@@ -544,7 +544,7 @@ Status VariantColumnWriterImpl::_process_sparse_column(
 }
 
 Status VariantColumnWriterImpl::finalize() {
-    auto* ptr = assert_cast<vectorized::ColumnVariant*>(_column.get());
+    auto* ptr = _column.get();
     
ptr->set_max_subcolumns_count(_tablet_column->variant_max_subcolumns_count());
     ptr->finalize(vectorized::ColumnVariant::FinalizeMode::WRITE_MODE);
     // convert each subcolumns to storage format and add data to sub columns 
writers buffer
@@ -604,8 +604,7 @@ Status VariantColumnWriterImpl::finalize() {
 }
 
 bool VariantColumnWriterImpl::is_finalized() const {
-    const auto* ptr = assert_cast<vectorized::ColumnVariant*>(_column.get());
-    return ptr->is_finalized() && _is_finalized;
+    return _column->is_finalized() && _is_finalized;
 }
 
 Status VariantColumnWriterImpl::append_data(const uint8_t** ptr, size_t 
num_rows) {
@@ -613,9 +612,8 @@ Status VariantColumnWriterImpl::append_data(const uint8_t** 
ptr, size_t num_rows
     const auto* column = reinterpret_cast<const 
vectorized::VariantColumnData*>(*ptr);
     const auto& src = *reinterpret_cast<const 
vectorized::ColumnVariant*>(column->column_data);
     RETURN_IF_ERROR(src.sanitize());
-    auto* dst_ptr = assert_cast<vectorized::ColumnVariant*>(_column.get());
     // TODO: if direct write we could avoid copy
-    dst_ptr->insert_range_from(src, column->row_pos, num_rows);
+    _column->insert_range_from(src, column->row_pos, num_rows);
     return Status::OK();
 }
 
@@ -698,7 +696,7 @@ Status VariantColumnWriterImpl::write_bloom_filter_index() {
 Status VariantColumnWriterImpl::append_nullable(const uint8_t* null_map, const 
uint8_t** ptr,
                                                 size_t num_rows) {
     if (null_map != nullptr) {
-        _null_column.insert_many_raw_data((const char*)null_map, num_rows);
+        _null_column->insert_many_raw_data((const char*)null_map, num_rows);
     }
     RETURN_IF_ERROR(append_data(ptr, num_rows));
     return Status::OK();
@@ -721,9 +719,8 @@ Status VariantSubcolumnWriter::init() {
 Status VariantSubcolumnWriter::append_data(const uint8_t** ptr, size_t 
num_rows) {
     const auto* column = reinterpret_cast<const 
vectorized::VariantColumnData*>(*ptr);
     const auto& src = *reinterpret_cast<const 
vectorized::ColumnVariant*>(column->column_data);
-    auto* dst_ptr = assert_cast<vectorized::ColumnVariant*>(_column.get());
     // TODO: if direct write we could avoid copy
-    dst_ptr->insert_range_from(src, column->row_pos, num_rows);
+    _column->insert_range_from(src, column->row_pos, num_rows);
     return Status::OK();
 }
 
@@ -732,12 +729,11 @@ uint64_t VariantSubcolumnWriter::estimate_buffer_size() {
 }
 
 bool VariantSubcolumnWriter::is_finalized() const {
-    const auto* ptr = assert_cast<vectorized::ColumnVariant*>(_column.get());
-    return ptr->is_finalized() && _is_finalized;
+    return _column->is_finalized() && _is_finalized;
 }
 
 Status VariantSubcolumnWriter::finalize() {
-    auto* ptr = assert_cast<vectorized::ColumnVariant*>(_column.get());
+    auto* ptr = _column.get();
     ptr->finalize();
 
     DCHECK(ptr->is_finalized());
diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h 
b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h
index c2aa7bd9a05..32ff8301e3a 100644
--- a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h
+++ b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h
@@ -137,8 +137,8 @@ private:
                                vectorized::OlapBlockDataConvertor* converter, 
size_t num_rows,
                                int& column_id);
     // prepare a column for finalize
-    doris::vectorized::MutableColumnPtr _column;
-    doris::vectorized::ColumnUInt8 _null_column;
+    doris::vectorized::ColumnVariant::MutablePtr _column;
+    doris::vectorized::ColumnUInt8::MutablePtr _null_column;
     ColumnWriterOptions _opts;
     const TabletColumn* _tablet_column = nullptr;
     bool _is_finalized = false;
diff --git a/be/src/pipeline/exec/join/process_hash_table_probe.h 
b/be/src/pipeline/exec/join/process_hash_table_probe.h
index b83ac8709e2..100a11fd2c7 100644
--- a/be/src/pipeline/exec/join/process_hash_table_probe.h
+++ b/be/src/pipeline/exec/join/process_hash_table_probe.h
@@ -20,6 +20,7 @@
 #include <vector>
 
 #include "vec/columns/column.h"
+#include "vec/columns/column_vector.h"
 #include "vec/common/arena.h"
 #include "vec/common/custom_allocator.h"
 
diff --git a/be/src/vec/columns/column_varbinary.h 
b/be/src/vec/columns/column_varbinary.h
index 4097b3dbbaf..e9e900954c6 100644
--- a/be/src/vec/columns/column_varbinary.h
+++ b/be/src/vec/columns/column_varbinary.h
@@ -41,10 +41,10 @@ private:
 public:
     using value_type = typename 
PrimitiveTypeTraits<TYPE_VARBINARY>::ColumnItemType;
     using Container = PaddedPODArray<doris::StringView>;
-    ColumnVarbinary() = default;
-    ColumnVarbinary(const size_t n) : _data(n) {}
 
 private:
+    ColumnVarbinary() = default;
+    ColumnVarbinary(const size_t n) : _data(n) {}
     ColumnVarbinary(const ColumnVarbinary& src) : _data(src._data.begin(), 
src._data.end()) {}
 
 public:
diff --git a/be/src/vec/columns/column_variant.h 
b/be/src/vec/columns/column_variant.h
index e25f0af9b96..2ca8a46b11e 100644
--- a/be/src/vec/columns/column_variant.h
+++ b/be/src/vec/columns/column_variant.h
@@ -291,6 +291,8 @@ private:
 public:
     static constexpr auto COLUMN_NAME_DUMMY = "_dummy";
 
+private:
+    friend class COWHelper<IColumn, ColumnVariant>;
     // always create root: data type nothing
     explicit ColumnVariant(int32_t max_subcolumns_count);
 
@@ -302,6 +304,7 @@ public:
 
     explicit ColumnVariant(int32_t max_subcolumns_count, Subcolumns&& 
subcolumns_);
 
+public:
     ~ColumnVariant() override = default;
 
     /// Checks that all subcolumns have consistent sizes.
diff --git a/be/src/vec/columns/column_vector.h 
b/be/src/vec/columns/column_vector.h
index 1f118a2a866..54c225fab51 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -52,6 +52,11 @@
 
 class SipHash;
 
+namespace doris::pipeline {
+template <int JoinOpType>
+struct ProcessHashTableProbe;
+}
+
 namespace doris::vectorized {
 class Arena;
 class ColumnSorter;
@@ -71,6 +76,9 @@ private:
     using Self = ColumnVector;
     friend class COWHelper<IColumn, Self>;
 
+    template <int JoinOpType>
+    friend struct doris::pipeline::ProcessHashTableProbe;
+
     struct less;
     struct greater;
 
@@ -78,6 +86,7 @@ public:
     using value_type = typename PrimitiveTypeTraits<T>::ColumnItemType;
     using Container = PaddedPODArray<value_type>;
 
+private:
     ColumnVector() = default;
     explicit ColumnVector(const size_t n) : data(n) {}
     explicit ColumnVector(const size_t n, const value_type x) : data(n, x) {}
@@ -86,6 +95,7 @@ public:
     /// Sugar constructor.
     ColumnVector(std::initializer_list<value_type> il) : data {il} {}
 
+public:
     size_t size() const override { return data.size(); }
 
     StringRef get_data_at(size_t n) const override {
diff --git a/be/src/vec/functions/function_date_or_datetime_computation.h 
b/be/src/vec/functions/function_date_or_datetime_computation.h
index dfc57f1694e..6efa38a24b5 100644
--- a/be/src/vec/functions/function_date_or_datetime_computation.h
+++ b/be/src/vec/functions/function_date_or_datetime_computation.h
@@ -618,13 +618,13 @@ public:
             // vector-const
             if (const auto* nest_col1_const = 
check_and_get_column<ColumnConst>(*nest_col1)) {
                 rconst = true;
-                const auto col1_inside_const =
+                const auto& col1_inside_const =
                         assert_cast<const ColumnVector<Transform::ArgPType>&>(
                                 nest_col1_const->get_data_column());
                 Op::vector_constant(sources->get_data(), res_col->get_data(),
                                     col1_inside_const.get_data()[0], nullmap0, 
nullmap1);
             } else { // vector-vector
-                const auto concrete_col1 =
+                const auto& concrete_col1 =
                         assert_cast<const 
ColumnVector<Transform::ArgPType>&>(*nest_col1);
                 Op::vector_vector(sources->get_data(), 
concrete_col1.get_data(),
                                   res_col->get_data(), nullmap0, nullmap1);
@@ -649,10 +649,10 @@ public:
                            
check_and_get_column_const<ColumnVector<Transform::ArgPType>>(
                                    src_nested_col.get())) {
             // const-vector
-            const auto col0_inside_const = assert_cast<const 
ColumnVector<Transform::ArgPType>&>(
+            const auto& col0_inside_const = assert_cast<const 
ColumnVector<Transform::ArgPType>&>(
                     sources_const->get_data_column());
             const ColumnPtr nested_col1 = remove_nullable(col1);
-            const auto concrete_col1 =
+            const auto& concrete_col1 =
                     assert_cast<const 
ColumnVector<Transform::ArgPType>&>(*nested_col1);
             Op::constant_vector(col0_inside_const.get_data()[0], 
res_col->get_data(),
                                 concrete_col1.get_data(), nullmap0, nullmap1);
@@ -747,14 +747,14 @@ public:
                                         
nest_col1_const->get_data_at(0).to_string(), nullmap0,
                                         nullmap1);
                 } else {
-                    const auto col1_inside_const = assert_cast<const 
IntervalColumnType&>(
+                    const auto& col1_inside_const = assert_cast<const 
IntervalColumnType&>(
                             nest_col1_const->get_data_column());
                     Op::vector_constant(sources->get_data(), 
res_col->get_data(),
                                         col1_inside_const.get_data()[0], 
nullmap0, nullmap1);
                 }
             } else { // vector-vector
                 if constexpr (Transform::IntervalPType != TYPE_STRING) {
-                    const auto concrete_col1 = assert_cast<const 
IntervalColumnType&>(*nest_col1);
+                    const auto& concrete_col1 = assert_cast<const 
IntervalColumnType&>(*nest_col1);
                     Op::vector_vector(sources->get_data(), 
concrete_col1.get_data(),
                                       res_col->get_data(), nullmap0, nullmap1);
                 } else {
@@ -790,11 +790,11 @@ public:
                                    src_nested_col.get())) {
             if constexpr (Transform::IntervalPType != TYPE_STRING) {
                 // const-vector
-                const auto col0_inside_const =
+                const auto& col0_inside_const =
                         assert_cast<const ColumnVector<Transform::ArgPType>&>(
                                 sources_const->get_data_column());
                 const ColumnPtr nested_col1 = remove_nullable(col1);
-                const auto concrete_col1 = assert_cast<const 
IntervalColumnType&>(*nested_col1);
+                const auto& concrete_col1 = assert_cast<const 
IntervalColumnType&>(*nested_col1);
                 Op::constant_vector(col0_inside_const.get_data()[0], 
res_col->get_data(),
                                     concrete_col1.get_data(), nullmap0, 
nullmap1);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to