BiteTheDDDDt commented on code in PR #56063:
URL: https://github.com/apache/doris/pull/56063#discussion_r2354984292


##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1977,10 +2041,10 @@ void 
SegmentIterator::_output_non_pred_columns(vectorized::Block* block) {
  * This approach optimizes reading performance by leveraging batch processing 
for continuous
  * rowid sequences and handling discontinuities gracefully in smaller chunks.
  */
-Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, 
uint32_t& nrows_read) {
+Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, 
uint16_t& nrows_read) {
     SCOPED_RAW_TIMER(&_opts.stats->predicate_column_read_ns);
 
-    nrows_read = _range_iter->read_batch_rowids(_block_rowids.data(), 
nrows_read_limit);
+    nrows_read = 
(uint16_t)_range_iter->read_batch_rowids(_block_rowids.data(), 
nrows_read_limit);

Review Comment:
   应该不会,这里下游处理都是uint16的



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -2654,24 +2552,74 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
             }
             std::string vir_cid_to_idx_in_block_msg =
                     fmt::format("_vir_cid_to_idx_in_block:[{}]", 
fmt::join(vcid_to_idx, ","));
-            throw doris::Exception(
-                    ErrorCode::INTERNAL_ERROR,
+            return Status::InternalError(
                     "Column in idx {} is nothing, block columns {}, 
normal_columns {}, "
                     "vir_cid_to_idx_in_block_msg {}",
                     idx, block->columns(), _schema->num_column_ids(), 
vir_cid_to_idx_in_block_msg);
         } else if (entry.column->size() != rows) {
-            throw doris::Exception(
-                    ErrorCode::INTERNAL_ERROR,
-                    "Unmatched size {}, expected {}, column: {}, type: {}, 
idx_in_block: {}",
+            return Status::InternalError(
+                    "Unmatched size {}, expected {}, column: {}, type: {}, 
idx_in_block: {}, "
+                    "block: {}",
                     entry.column->size(), rows, entry.column->get_name(), 
entry.type->get_name(),
-                    idx);
+                    idx, block->dump_structure());
         }
         idx++;
     }
 #endif
     return Status::OK();
 }
 
+Status SegmentIterator::_process_column_predicate() {
+    return Status::OK();
+}
+
+Status SegmentIterator::_process_eof(vectorized::Block* block) {
+    // Convert all columns in _current_return_columns to schema column
+    RETURN_IF_ERROR(_convert_to_expected_type(_schema->column_ids()));
+    for (int i = 0; i < block->columns(); i++) {
+        auto cid = _schema->column_id(i);
+        if (!_is_pred_column[cid]) {
+            block->replace_by_position(i, 
std::move(_current_return_columns[cid]));
+        }
+    }
+    block->clear_column_data();
+    // clear and release iterators memory footprint in advance
+    _column_iterators.clear();
+    _bitmap_index_iterators.clear();
+    _index_iterators.clear();
+    return Status::EndOfFile("no more data in segment");
+}
+
+Status SegmentIterator::_process_common_expr(uint16_t* sel_rowid_idx, 
uint16_t& selected_size,
+                                             vectorized::Block* block) {
+    // Here we just use col0 as row_number indicator. when reach here, we will 
calculate the predicates first.
+    //  then use the result to reduce our data read(that is, expr push down). 
there's now row in block means the first
+    //  column is not in common expr. so it's safe to replace it temporarily 
to provide correct `selected_size`.
+    VLOG_DEBUG << fmt::format("Execute common expr. block rows {}, selected 
size {}", block->rows(),
+                              _selected_size);
+
+    bool need_mock_col = block->rows() != selected_size;

Review Comment:
   这里是为了Block size正确所以要做一个mock的column



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to