github-actions[bot] commented on code in PR #26749:
URL: https://github.com/apache/doris/pull/26749#discussion_r1401715620


##########
be/src/olap/rowset/segment_creator.cpp:
##########
@@ -40,31 +49,177 @@ SegmentFlusher::SegmentFlusher() = default;
 
 SegmentFlusher::~SegmentFlusher() = default;
 
-Status SegmentFlusher::init(const RowsetWriterContext& rowset_writer_context) {
-    _context = rowset_writer_context;
+Status SegmentFlusher::init(RowsetWriterContext& rowset_writer_context) {
+    _context = &rowset_writer_context;
     return Status::OK();
 }
 
 Status SegmentFlusher::flush_single_block(const vectorized::Block* block, 
int32_t segment_id,
-                                          int64_t* flush_size, 
TabletSchemaSPtr flush_schema) {
+                                          int64_t* flush_size) {
     if (block->rows() == 0) {
         return Status::OK();
     }
-    bool no_compression = block->bytes() <= 
config::segment_compression_threshold_kb * 1024;
+    TabletSchemaSPtr flush_schema = nullptr;
+    // Expand variant columns
+    vectorized::Block flush_block(*block);
+    if (_context->write_type != DataWriteType::TYPE_COMPACTION &&
+        _context->tablet_schema->num_variant_columns() > 0) {
+        RETURN_IF_ERROR(_expand_variant_to_subcolumns(flush_block, 
flush_schema));
+    }
+    bool no_compression = flush_block.bytes() <= 
config::segment_compression_threshold_kb * 1024;
     if (config::enable_vertical_segment_writer) {
         std::unique_ptr<segment_v2::VerticalSegmentWriter> writer;
         RETURN_IF_ERROR(_create_segment_writer(writer, segment_id, 
no_compression, flush_schema));
-        RETURN_IF_ERROR(_add_rows(writer, block, 0, block->rows()));
+        RETURN_IF_ERROR(_add_rows(writer, &flush_block, 0, 
flush_block.rows()));
         RETURN_IF_ERROR(_flush_segment_writer(writer, flush_size));
     } else {
         std::unique_ptr<segment_v2::SegmentWriter> writer;
         RETURN_IF_ERROR(_create_segment_writer(writer, segment_id, 
no_compression, flush_schema));
-        RETURN_IF_ERROR(_add_rows(writer, block, 0, block->rows()));
+        RETURN_IF_ERROR(_add_rows(writer, &flush_block, 0, 
flush_block.rows()));
         RETURN_IF_ERROR(_flush_segment_writer(writer, flush_size));
     }
     return Status::OK();
 }
 
+Status SegmentFlusher::_expand_variant_to_subcolumns(vectorized::Block& block,

Review Comment:
   warning: function '_expand_variant_to_subcolumns' exceeds recommended 
size/complexity thresholds [readability-function-size]
   ```cpp
   Status SegmentFlusher::_expand_variant_to_subcolumns(vectorized::Block& 
block,
                          ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/olap/rowset/segment_creator.cpp:83:** 136 lines including 
whitespace and comments (threshold 80)
   ```cpp
   Status SegmentFlusher::_expand_variant_to_subcolumns(vectorized::Block& 
block,
                          ^
   ```
   
   </details>
   



##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.h:
##########
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+
+#include "io/io_common.h"
+#include "olap/field.h"
+#include "olap/iterators.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "olap/schema.h"
+#include "olap/tablet_schema.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_object.h"
+#include "vec/columns/subcolumn_tree.h"
+#include "vec/common/assert_cast.h"
+#include "vec/data_types/data_type_object.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris {
+namespace segment_v2 {
+
+struct StreamReader {
+    vectorized::MutableColumnPtr column;
+    std::unique_ptr<ColumnIterator> iterator;
+    std::shared_ptr<const vectorized::IDataType> type;
+    bool inited = false;
+    size_t rows_read = 0;
+    StreamReader() = default;
+    StreamReader(vectorized::MutableColumnPtr&& col, 
std::unique_ptr<ColumnIterator>&& it,
+                 std::shared_ptr<const vectorized::IDataType> t)
+            : column(std::move(col)), iterator(std::move(it)), type(t) {}
+};
+
+// path -> StreamReader
+using SubstreamReaderTree = vectorized::SubcolumnsTree<StreamReader>;
+
+// path -> SubcolumnReader
+struct SubcolumnReader {
+    std::unique_ptr<ColumnReader> reader;
+    std::shared_ptr<const vectorized::IDataType> file_column_type;
+};
+using SubcolumnColumnReaders = vectorized::SubcolumnsTree<SubcolumnReader>;
+
+// Reader for hierarchical data for variant, merge with root(sparse encoded 
columns)
+class HierarchicalDataReader : public ColumnIterator {
+public:
+    HierarchicalDataReader(const vectorized::PathInData& path, bool 
output_as_raw_json = false)
+            : _path(path), _output_as_raw_json(output_as_raw_json) {}
+
+    static Status create(std::unique_ptr<ColumnIterator>* reader,
+                         const SubcolumnColumnReaders::Node* target_node,
+                         const SubcolumnColumnReaders::Node* root, bool 
output_as_raw_json = false);
+
+    Status init(const ColumnIteratorOptions& opts) override;
+
+    Status seek_to_first() override;
+
+    Status seek_to_ordinal(ordinal_t ord) override;
+
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* 
has_null) override;
+
+    Status read_by_rowids(const rowid_t* rowids, const size_t count,
+                          vectorized::MutableColumnPtr& dst) override;
+
+    ordinal_t get_current_ordinal() const override;
+
+    Status add_stream(const SubcolumnColumnReaders::Node* node);
+
+    void set_root(std::unique_ptr<StreamReader>&& root) { _root_reader = 
std::move(root); }
+
+private:
+    SubstreamReaderTree _substream_reader;
+    std::unique_ptr<StreamReader> _root_reader;
+    size_t _rows_read = 0;
+    vectorized::PathInData _path;
+    bool _output_as_raw_json = false;
+
+    template <typename NodeFunction>
+    Status tranverse(NodeFunction&& node_func) {
+        for (auto& entry : _substream_reader) {
+            RETURN_IF_ERROR(node_func(*entry));
+        }
+        return Status::OK();
+    }
+    // process read
+    template <typename ReadFunction>
+    Status process_read(ReadFunction&& read_func, 
vectorized::MutableColumnPtr& dst, size_t nrows) {

Review Comment:
   warning: function 'process_read' exceeds recommended size/complexity 
thresholds [readability-function-size]
   ```cpp
       Status process_read(ReadFunction&& read_func, 
vectorized::MutableColumnPtr& dst, size_t nrows) {
              ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/olap/rowset/segment_v2/hierarchical_data_reader.h:105:** 99 lines 
including whitespace and comments (threshold 80)
   ```cpp
       Status process_read(ReadFunction&& read_func, 
vectorized::MutableColumnPtr& dst, size_t nrows) {
              ^
   ```
   
   </details>
   



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1954,6 +2021,49 @@ Status SegmentIterator::next_batch(vectorized::Block* 
block) {
     return status;
 }
 
+Status SegmentIterator::_convert_to_expected_type(const std::vector<ColumnId>& 
col_ids) {
+    for (ColumnId i : col_ids) {
+        if (_current_return_columns[i] == nullptr || _converted_column_ids[i] 
||
+            _is_pred_column[i]) {
+            continue;
+        }
+        if (!_segment->same_with_storage_type(
+                    i, *_schema, _opts.io_ctx.reader_type != 
ReaderType::READER_QUERY)) {
+            const Field* field_type = _schema->column(i);
+            vectorized::DataTypePtr expected_type = 
Schema::get_data_type_ptr(*field_type);
+            vectorized::DataTypePtr file_column_type = 
_storage_name_and_type[i].second;
+            vectorized::ColumnPtr expected;
+            vectorized::ColumnPtr original =
+                    _current_return_columns[i]->assume_mutable()->get_ptr();
+            RETURN_IF_ERROR(vectorized::schema_util::cast_column({original, 
file_column_type, ""},
+                                                                 
expected_type, &expected));
+            _current_return_columns[i] = expected->assume_mutable();
+            _converted_column_ids[i] = 1;
+            VLOG_DEBUG << fmt::format("Convert {} fom file column type {} to 
{}, num_rows {}",
+                                      field_type->path().get_path(), 
file_column_type->get_name(),
+                                      expected_type->get_name(),
+                                      _current_return_columns[i]->size());
+        }
+    }
+    return Status::OK();
+}
+
+Status SegmentIterator::copy_column_data_by_selector(vectorized::IColumn* 
input_col_ptr,
+                                                     
vectorized::MutableColumnPtr& output_col,
+                                                     uint16_t* sel_rowid_idx, 
uint16_t select_size,
+                                                     size_t batch_size) {
+    output_col->reserve(batch_size);
+
+    // adapt for outer join change column to nullable
+    if (output_col->is_nullable() && !input_col_ptr->is_nullable()) {
+        auto col_ptr_nullable = 
reinterpret_cast<vectorized::ColumnNullable*>(output_col.get());
+        
col_ptr_nullable->get_null_map_column().insert_many_defaults(select_size);
+        output_col = col_ptr_nullable->get_nested_column_ptr();
+    }
+
+    return input_col_ptr->filter_by_selector(sel_rowid_idx, select_size, 
output_col);
+}
+
 Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {

Review Comment:
   warning: function '_next_batch_internal' has cognitive complexity of 207 
(threshold 50) [readability-function-cognitive-complexity]
   ```cpp
   Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
                           ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2071:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (UNLIKELY(!_lazy_inited)) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2072:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(_lazy_init());
           ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2072:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(_lazy_init());
           ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2074:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (_lazy_materialization_read || _opts.record_rowids || 
_is_need_expr_eval) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2114:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (_wait_times_estimate_row_size > 0) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2121:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       RETURN_IF_ERROR(_read_columns_by_index(
       ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2121:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
       RETURN_IF_ERROR(_read_columns_by_index(
       ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2124:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (std::find(_first_read_column_ids.begin(), 
_first_read_column_ids.end(),
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2132:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (_current_batch_rows_read == 0) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2134:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(_convert_to_expected_type(_schema->column_ids()));
           ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2134:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(_convert_to_expected_type(_schema->column_ids()));
           ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2135:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           for (int i = 0; i < block->columns(); i++) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2138:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               if (!_is_pred_column[cid]) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2146:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2146:** +1
   ```cpp
       if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) {
                                                      ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2147:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(_convert_to_expected_type(_first_read_column_ids));
           ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2147:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(_convert_to_expected_type(_first_read_column_ids));
           ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2148:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(_convert_to_expected_type(_non_predicate_columns));
           ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2148:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(_convert_to_expected_type(_non_predicate_columns));
           ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2151:** +1, nesting 
level increased to 1
   ```cpp
       } else {
         ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2155:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (_is_need_vec_eval || _is_need_short_eval) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2155:** +1
   ```cpp
           if (_is_need_vec_eval || _is_need_short_eval) {
                                 ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2167:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               if (selected_size > 0) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2172:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
                   RETURN_IF_ERROR(_output_column_by_sel_idx(block, 
_first_read_column_ids,
                   ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2172:** +5, including 
nesting penalty of 4, nesting level increased to 5
   ```cpp
                   RETURN_IF_ERROR(_output_column_by_sel_idx(block, 
_first_read_column_ids,
                   ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2176:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
                   if (_is_need_expr_eval) {
                   ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2178:** +5, including 
nesting penalty of 4, nesting level increased to 5
   ```cpp
                       if (!_second_read_column_ids.empty()) {
                       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2180:** +6, including 
nesting penalty of 5, nesting level increased to 6
   ```cpp
                           RETURN_IF_ERROR(_read_columns_by_rowids(
                           ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2180:** +7, including 
nesting penalty of 6, nesting level increased to 7
   ```cpp
                           RETURN_IF_ERROR(_read_columns_by_rowids(
                           ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2183:** +6, including 
nesting penalty of 5, nesting level increased to 6
   ```cpp
                           if (std::find(_second_read_column_ids.begin(),
                           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2188:** +6, including 
nesting penalty of 5, nesting level increased to 6
   ```cpp
                           
RETURN_IF_ERROR(_convert_to_expected_type(_second_read_column_ids));
                           ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2188:** +7, including 
nesting penalty of 6, nesting level increased to 7
   ```cpp
                           
RETURN_IF_ERROR(_convert_to_expected_type(_second_read_column_ids));
                           ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2199:** +5, including 
nesting penalty of 4, nesting level increased to 5
   ```cpp
                       if (block->rows() == 0) {
                       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2209:** +6, including 
nesting penalty of 5, nesting level increased to 6
   ```cpp
                           RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, 
selected_size, block));
                           ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2209:** +7, including 
nesting penalty of 6, nesting level increased to 7
   ```cpp
                           RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, 
selected_size, block));
                           ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2211:** +1, nesting 
level increased to 5
   ```cpp
                       } else {
                         ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2214:** +6, including 
nesting penalty of 5, nesting level increased to 6
   ```cpp
                           RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, 
selected_size, block));
                           ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2214:** +7, including 
nesting penalty of 6, nesting level increased to 7
   ```cpp
                           RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, 
selected_size, block));
                           ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2217:** +1, nesting 
level increased to 3
   ```cpp
               } else if (_is_need_expr_eval) {
                      ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2218:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
                   
RETURN_IF_ERROR(_convert_to_expected_type(_second_read_column_ids));
                   ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2218:** +5, including 
nesting penalty of 4, nesting level increased to 5
   ```cpp
                   
RETURN_IF_ERROR(_convert_to_expected_type(_second_read_column_ids));
                   ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2224:** +1, nesting 
level increased to 2
   ```cpp
           } else if (_is_need_expr_eval) {
                  ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2226:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               
RETURN_IF_ERROR(_convert_to_expected_type(_first_read_column_ids));
               ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2226:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
               
RETURN_IF_ERROR(_convert_to_expected_type(_first_read_column_ids));
               ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2232:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               for (uint32_t i = 0; i < selected_size; ++i) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2236:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               if (block->rows() == 0) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2246:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
                   RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, 
selected_size, block));
                   ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2246:** +5, including 
nesting penalty of 4, nesting level increased to 5
   ```cpp
                   RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, 
selected_size, block));
                   ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2248:** +1, nesting 
level increased to 3
   ```cpp
               } else {
                 ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2251:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
                   RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, 
selected_size, block));
                   ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2251:** +5, including 
nesting penalty of 4, nesting level increased to 5
   ```cpp
                   RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, 
selected_size, block));
                   ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2255:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (UNLIKELY(_opts.record_rowids)) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2258:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               for (auto i = 0; i < _selected_size; i++) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2263:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (_non_predicate_columns.empty()) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2267:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               if (UNLIKELY(_estimate_row_size) && block->rows() > 0) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2267:** +1
   ```cpp
               if (UNLIKELY(_estimate_row_size) && block->rows() > 0) {
                                                ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2273:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (selected_size > 0) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2274:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               RETURN_IF_ERROR(_read_columns_by_rowids(_non_predicate_columns, 
_block_rowids,
               ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2274:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
               RETURN_IF_ERROR(_read_columns_by_rowids(_non_predicate_columns, 
_block_rowids,
               ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2277:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               if (std::find(_non_predicate_columns.begin(), 
_non_predicate_columns.end(),
               ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2283:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(_convert_to_expected_type(_non_predicate_columns));
           ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2283:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(_convert_to_expected_type(_non_predicate_columns));
           ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2287:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (!_is_need_expr_eval) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2305:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (UNLIKELY(_estimate_row_size) && block->rows() > 0) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2305:** +1
   ```cpp
       if (UNLIKELY(_estimate_row_size) && block->rows() > 0) {
                                        ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2310:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (_opts.read_orderby_key_reverse) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2312:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (num_rows == 0) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2317:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           for (size_t i = 0; i < num_rows; ++i) 
permutation.emplace_back(num_rows - 1 - i);
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2319:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           for (size_t i = 0; i < num_columns; ++i)
           ^
   ```
   
   </details>
   



##########
be/src/olap/rowset/segment_v2/segment.cpp:
##########
@@ -341,6 +379,115 @@ Status Segment::_create_column_readers(const 
SegmentFooterPB& footer) {
                                              _file_reader, &reader));
         _column_readers.emplace(column.unique_id(), std::move(reader));
     }
+
+    // init by column path
+    for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); 
++ordinal) {
+        auto& column = _tablet_schema->column(ordinal);
+        auto iter = column_path_to_footer_ordinal.find(column.path_info());
+        if (iter == column_path_to_footer_ordinal.end()) {
+            continue;
+        }
+        ColumnReaderOptions opts;
+        opts.kept_in_memory = _tablet_schema->is_in_memory();
+        std::unique_ptr<ColumnReader> reader;
+        RETURN_IF_ERROR(ColumnReader::create(opts, 
footer.columns(iter->second), footer.num_rows(),
+                                             _file_reader, &reader));
+        _sub_column_tree.add(
+                iter->first,
+                SubcolumnReader {std::move(reader),
+                                 
vectorized::DataTypeFactory::instance().create_data_type(
+                                         footer.columns(iter->second))});
+    }
+    return Status::OK();
+}
+
+static Status new_default_iterator(const TabletColumn& tablet_column,
+                                   std::unique_ptr<ColumnIterator>* iter) {
+    if (!tablet_column.has_default_value() && !tablet_column.is_nullable()) {
+        return Status::InternalError("invalid nonexistent column without 
default value.");
+    }
+    auto type_info = get_type_info(&tablet_column);
+    std::unique_ptr<DefaultValueColumnIterator> default_value_iter(new 
DefaultValueColumnIterator(
+            tablet_column.has_default_value(), tablet_column.default_value(),
+            tablet_column.is_nullable(), std::move(type_info), 
tablet_column.precision(),
+            tablet_column.frac()));
+    ColumnIteratorOptions iter_opts;
+
+    RETURN_IF_ERROR(default_value_iter->init(iter_opts));
+    *iter = std::move(default_value_iter);
+    return Status::OK();
+}
+
+Status Segment::new_column_iterator_with_path(const TabletColumn& 
tablet_column,

Review Comment:
   warning: function 'new_column_iterator_with_path' has cognitive complexity 
of 61 (threshold 50) [readability-function-cognitive-complexity]
   ```cpp
   Status Segment::new_column_iterator_with_path(const TabletColumn& 
tablet_column,
                   ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/olap/rowset/segment_v2/segment.cpp:424:** +1, including nesting 
penalty of 0, nesting level increased to 1
   ```cpp
       if (tablet_column.path_info().empty()) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:427:** +1, nesting level 
increased to 1
   ```cpp
       } else {
         ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:432:** +1, including nesting 
penalty of 0, nesting level increased to 1
   ```cpp
       if (opt->io_ctx.reader_type == ReaderType::READER_ALTER_TABLE) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:434:** +2, including nesting 
penalty of 1, nesting level increased to 2
   ```cpp
           if (node == nullptr) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:436:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
               RETURN_IF_ERROR(new_default_iterator(tablet_column, iter));
               ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:436:** +4, including nesting 
penalty of 3, nesting level increased to 4
   ```cpp
               RETURN_IF_ERROR(new_default_iterator(tablet_column, iter));
               ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:443:** +2, including nesting 
penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(HierarchicalDataReader::create(iter, node, root, 
output_as_raw_json));
           ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:443:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(HierarchicalDataReader::create(iter, node, root, 
output_as_raw_json));
           ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:447:** +1, including nesting 
penalty of 0, nesting level increased to 1
   ```cpp
       if (opt->io_ctx.reader_type != ReaderType::READER_QUERY) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:450:** +2, including nesting 
penalty of 1, nesting level increased to 2
   ```cpp
           if (!node) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:451:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
               RETURN_IF_ERROR(new_default_iterator(tablet_column, iter));
               ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:451:** +4, including nesting 
penalty of 3, nesting level increased to 4
   ```cpp
               RETURN_IF_ERROR(new_default_iterator(tablet_column, iter));
               ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:455:** +2, including nesting 
penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(node->data.reader->new_iterator(&it));
           ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:455:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(node->data.reader->new_iterator(&it));
           ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:465:** +1, including nesting 
penalty of 0, nesting level increased to 1
   ```cpp
       if (node != nullptr && node->is_scalar() && node->children.empty()) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:465:** +1
   ```cpp
       if (node != nullptr && node->is_scalar() && node->children.empty()) {
                                                ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:469:** +2, including nesting 
penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(node->data.reader->new_iterator(&it));
           ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:469:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(node->data.reader->new_iterator(&it));
           ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:471:** +1, nesting level 
increased to 1
   ```cpp
       } else if (node != nullptr && !node->children.empty()) {
              ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:471:** +1
   ```cpp
       } else if (node != nullptr && !node->children.empty()) {
                                  ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:473:** +2, including nesting 
penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(HierarchicalDataReader::create(iter, node, root));
           ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:473:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(HierarchicalDataReader::create(iter, node, root));
           ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:474:** +1, nesting level 
increased to 1
   ```cpp
       } else {
         ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:476:** +2, including nesting 
penalty of 1, nesting level increased to 2
   ```cpp
           if (root == nullptr) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:478:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
               RETURN_IF_ERROR(new_default_iterator(tablet_column, iter));
               ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:478:** +4, including nesting 
penalty of 3, nesting level increased to 4
   ```cpp
               RETURN_IF_ERROR(new_default_iterator(tablet_column, iter));
               ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:482:** +2, including nesting 
penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(root->data.reader->new_iterator(&it));
           ^
   ```
   **be/src/common/status.h:523:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment.cpp:482:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(root->data.reader->new_iterator(&it));
           ^
   ```
   **be/src/common/status.h:525:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   
   </details>
   



##########
be/src/olap/schema.cpp:
##########
@@ -130,10 +131,11 @@ vectorized::IColumn::MutablePtr 
Schema::get_column_by_field(const Field& field)
     return get_data_type_ptr(field)->create_column();
 }
 
-vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& 
field,
+vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const 
FieldType& type,

Review Comment:
   warning: function 'get_predicate_column_ptr' exceeds recommended 
size/complexity thresholds [readability-function-size]
   ```cpp
   vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const 
FieldType& type,
                                           ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/olap/schema.cpp:133:** 87 lines including whitespace and comments 
(threshold 80)
   ```cpp
   vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const 
FieldType& type,
                                           ^
   ```
   
   </details>
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to