Copilot commented on code in PR #64010:
URL: https://github.com/apache/doris/pull/64010#discussion_r3339638205
##########
be/src/storage/segment/segment_iterator.cpp:
##########
@@ -586,6 +470,55 @@ Status SegmentIterator::_init_impl(const
StorageReadOptions& opts) {
return Status::OK();
}
+Status SegmentIterator::_init_project_schema() {
+ _schema_block_id_map.assign(_schema->columns().size(), -1);
+ for (int i = 0; i < _schema->num_column_ids(); i++) {
+ auto cid = _schema->column_id(i);
+ _schema_block_id_map[cid] = i;
+ }
+
+ _project_schema = _opts.project_columns != nullptr
+ ? std::make_shared<Schema>(_schema->columns(),
*_opts.project_columns)
+ : _schema;
+ return Status::OK();
+}
+
+Status SegmentIterator::_build_project_block(Block* block, uint16_t
selected_size,
+ Block* project_block) {
+ project_block->clear();
+ DORIS_CHECK(_project_schema != nullptr);
+ for (auto cid : _project_schema->column_ids()) {
+ auto loc = _schema_block_id_map[cid];
+ auto& output_column = block->get_by_position(loc);
+ auto type = output_column.type;
+ auto column = output_column.column;
+ auto virtual_it = _vir_cid_to_idx_in_block.find(cid);
+ if (virtual_it != _vir_cid_to_idx_in_block.end()) {
+ auto type_it = _opts.vir_col_idx_to_type.find(virtual_it->second);
+ DORIS_CHECK(type_it != _opts.vir_col_idx_to_type.end());
+ type = type_it->second;
+ if (!column || check_and_get_column<const
ColumnNothing>(column.get()) ||
+ column->size() != selected_size) {
+ column = ColumnNothing::create(selected_size);
+ }
+ } else {
+ if (!type) {
+ type = Schema::get_data_type_ptr(*_schema->column(cid));
+ }
+ if (!column) {
+ return Status::InternalError(
+ "project column {} is not materialized before project
block build", cid);
+ }
+ if (column->size() != selected_size) {
+ return Status::InternalError("project column {} has {} rows,
expected {}", cid,
+ column->size(), selected_size);
+ }
+ }
+ project_block->insert({std::move(column), type,
_schema->column(cid)->name()});
+ }
+ return Status::OK();
+}
+
void SegmentIterator::_initialize_predicate_results() {
// Initialize from _col_predicates
for (auto pred : _col_predicates) {
Review Comment:
_build_project_block() currently returns an InternalError when a non-virtual
projected column is not materialized yet (nullptr) or has size !=
selected_size. During lazy materialization, many projected columns are
intentionally empty (size 0) at the time common expr pushdown runs, so this can
fail even though the expr does not reference those columns. Consider filling
non-materialized columns with a typed placeholder (e.g. const default column)
so the project block preserves slot ordinals without forcing early
materialization.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]