zhiqiang-hhhh commented on code in PR #56063:
URL: https://github.com/apache/doris/pull/56063#discussion_r2354829078
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1977,10 +2041,10 @@ void
SegmentIterator::_output_non_pred_columns(vectorized::Block* block) {
* This approach optimizes reading performance by leveraging batch processing
for continuous
* rowid sequences and handling discontinuities gracefully in smaller chunks.
*/
-Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit,
uint32_t& nrows_read) {
+Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit,
uint16_t& nrows_read) {
SCOPED_RAW_TIMER(&_opts.stats->predicate_column_read_ns);
- nrows_read = _range_iter->read_batch_rowids(_block_rowids.data(),
nrows_read_limit);
+ nrows_read =
(uint16_t)_range_iter->read_batch_rowids(_block_rowids.data(),
nrows_read_limit);
Review Comment:
is there any possible overflow?
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -2654,24 +2552,74 @@ Status
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
}
std::string vir_cid_to_idx_in_block_msg =
fmt::format("_vir_cid_to_idx_in_block:[{}]",
fmt::join(vcid_to_idx, ","));
- throw doris::Exception(
- ErrorCode::INTERNAL_ERROR,
+ return Status::InternalError(
"Column in idx {} is nothing, block columns {},
normal_columns {}, "
"vir_cid_to_idx_in_block_msg {}",
idx, block->columns(), _schema->num_column_ids(),
vir_cid_to_idx_in_block_msg);
} else if (entry.column->size() != rows) {
- throw doris::Exception(
- ErrorCode::INTERNAL_ERROR,
- "Unmatched size {}, expected {}, column: {}, type: {},
idx_in_block: {}",
+ return Status::InternalError(
+ "Unmatched size {}, expected {}, column: {}, type: {},
idx_in_block: {}, "
+ "block: {}",
entry.column->size(), rows, entry.column->get_name(),
entry.type->get_name(),
- idx);
+ idx, block->dump_structure());
}
idx++;
}
#endif
return Status::OK();
}
+Status SegmentIterator::_process_column_predicate() {
+ return Status::OK();
+}
+
+Status SegmentIterator::_process_eof(vectorized::Block* block) {
+ // Convert all columns in _current_return_columns to schema column
+ RETURN_IF_ERROR(_convert_to_expected_type(_schema->column_ids()));
+ for (int i = 0; i < block->columns(); i++) {
+ auto cid = _schema->column_id(i);
+ if (!_is_pred_column[cid]) {
+ block->replace_by_position(i,
std::move(_current_return_columns[cid]));
+ }
+ }
+ block->clear_column_data();
+ // clear and release iterators memory footprint in advance
+ _column_iterators.clear();
+ _bitmap_index_iterators.clear();
+ _index_iterators.clear();
+ return Status::EndOfFile("no more data in segment");
+}
+
+Status SegmentIterator::_process_common_expr(uint16_t* sel_rowid_idx,
uint16_t& selected_size,
+ vectorized::Block* block) {
+ // Here we just use col0 as row_number indicator. when reach here, we will
calculate the predicates first.
+ // then use the result to reduce our data read(that is, expr push down).
there's now row in block means the first
+ // column is not in common expr. so it's safe to replace it temporarily
to provide correct `selected_size`.
+ VLOG_DEBUG << fmt::format("Execute common expr. block rows {}, selected
size {}", block->rows(),
+ _selected_size);
+
+ bool need_mock_col = block->rows() != selected_size;
Review Comment:
what is the meaning of mock_col? maybe move_col?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]