github-actions[bot] commented on code in PR #41784:
URL: https://github.com/apache/doris/pull/41784#discussion_r1799201965


##########
be/src/olap/partial_update_info.cpp:
##########
@@ -152,4 +263,394 @@ void 
PartialUpdateInfo::_generate_default_values_for_missing_cids(
     }
     CHECK_EQ(missing_cids.size(), default_values.size());
 }
+
+void FixedReadPlan::prepare_to_read(const RowLocation& row_location, size_t 
pos) {
+    
plan[row_location.rowset_id][row_location.segment_id].emplace_back(row_location.row_id,
 pos);
+}
+
+// read columns by read plan
+// read_index: ori_pos-> block_idx
+Status FixedReadPlan::read_columns_by_plan(
+        const TabletSchema& tablet_schema, const std::vector<uint32_t> 
cids_to_read,
+        const std::map<RowsetId, RowsetSharedPtr>& rsid_to_rowset, 
vectorized::Block& block,
+        std::map<uint32_t, uint32_t>* read_index,
+        const signed char* __restrict delete_signs) const {
+    bool has_row_column = tablet_schema.store_row_column();
+    auto mutable_columns = block.mutate_columns();
+    size_t read_idx = 0;
+    for (const auto& [rowset_id, segment_row_mappings] : plan) {
+        for (const auto& [segment_id, mappings] : segment_row_mappings) {
+            auto rowset_iter = rsid_to_rowset.find(rowset_id);
+            CHECK(rowset_iter != rsid_to_rowset.end());
+            std::vector<uint32_t> rids;
+            for (auto [rid, pos] : mappings) {
+                if (delete_signs && delete_signs[pos]) {
+                    continue;
+                }
+                rids.emplace_back(rid);
+                (*read_index)[pos] = read_idx++;
+            }
+            if (has_row_column) {
+                auto st = doris::Tablet::fetch_value_through_row_column(
+                        rowset_iter->second, tablet_schema, segment_id, rids, 
cids_to_read, block);
+                if (!st.ok()) {
+                    LOG(WARNING) << "failed to fetch value through row column";
+                    return st;
+                }
+                continue;
+            }
+            for (size_t cid = 0; cid < mutable_columns.size(); ++cid) {
+                TabletColumn tablet_column = 
tablet_schema.column(cids_to_read[cid]);
+                auto st = Tablet::fetch_value_by_rowids(rowset_iter->second, 
segment_id, rids,
+                                                        tablet_column, 
mutable_columns[cid]);
+                // set read value to output block
+                if (!st.ok()) {
+                    LOG(WARNING) << "failed to fetch value";
+                    return st;
+                }
+            }
+        }
+    }
+    block.set_columns(std::move(mutable_columns));
+    return Status::OK();
+}
+
+Status FixedReadPlan::fill_missing_columns(
+        RowsetWriterContext* rowset_ctx, const std::map<RowsetId, 
RowsetSharedPtr>& rsid_to_rowset,
+        const TabletSchema& tablet_schema, vectorized::Block& full_block,
+        const std::vector<bool>& use_default_or_null_flag, bool 
has_default_or_nullable,
+        const size_t& segment_start_pos, const vectorized::Block* block) const 
{
+    auto mutable_full_columns = full_block.mutate_columns();
+    // create old value columns
+    const auto& missing_cids = rowset_ctx->partial_update_info->missing_cids;
+    auto old_value_block = tablet_schema.create_block_by_cids(missing_cids);
+    CHECK_EQ(missing_cids.size(), old_value_block.columns());
+
+    // segment pos to write -> rowid to read in old_value_block
+    std::map<uint32_t, uint32_t> read_index;
+    RETURN_IF_ERROR(read_columns_by_plan(tablet_schema, missing_cids, 
rsid_to_rowset,
+                                         old_value_block, &read_index, 
nullptr));
+
+    const auto* delete_sign_column_data = 
Tablet::get_delete_sign_column_data(old_value_block);
+
+    // build default value columns
+    auto default_value_block = old_value_block.clone_empty();
+    if (has_default_or_nullable || delete_sign_column_data != nullptr) {
+        RETURN_IF_ERROR(Tablet::generate_default_value_block(
+                tablet_schema, missing_cids, 
rowset_ctx->partial_update_info->default_values,
+                old_value_block, default_value_block));
+    }
+    auto mutable_default_value_columns = default_value_block.mutate_columns();
+
+    // fill all missing value from mutable_old_columns, need to consider 
default value and null value
+    for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) {
+        // `use_default_or_null_flag[idx] == false` doesn't mean that we 
should read values from the old row
+        // for the missing columns. For example, if a table has sequence 
column, the rows with DELETE_SIGN column
+        // marked will not be marked in delete bitmap(see 
https://github.com/apache/doris/pull/24011), so it will
+        // be found in Tablet::lookup_row_key() and 
`use_default_or_null_flag[idx]` will be false. But we should not
+        // read values from old rows for missing values in this occasion. So 
we should read the DELETE_SIGN column
+        // to check if a row REALLY exists in the table.
+        auto segment_pos = idx + segment_start_pos;
+        auto pos_in_old_block = read_index[segment_pos];
+        if (use_default_or_null_flag[idx] || (delete_sign_column_data != 
nullptr &&
+                                              
delete_sign_column_data[pos_in_old_block] != 0)) {
+            for (auto i = 0; i < missing_cids.size(); ++i) {
+                // if the column has default value, fill it with default value
+                // otherwise, if the column is nullable, fill it with null 
value
+                const auto& tablet_column = 
tablet_schema.column(missing_cids[i]);
+                auto& missing_col = mutable_full_columns[missing_cids[i]];
+                if (tablet_column.has_default_value()) {
+                    
missing_col->insert_from(*mutable_default_value_columns[i], 0);
+                } else if (tablet_column.is_nullable()) {
+                    auto* nullable_column =
+                            
assert_cast<vectorized::ColumnNullable*>(missing_col.get());
+                    nullable_column->insert_null_elements(1);
+                } else if (tablet_schema.auto_increment_column() == 
tablet_column.name()) {
+                    const auto& column = 
rowset_ctx->tablet_schema->column(tablet_column.name());
+                    DCHECK(column.type() == FieldType::OLAP_FIELD_TYPE_BIGINT);
+                    auto* auto_inc_column =
+                            
assert_cast<vectorized::ColumnInt64*>(missing_col.get());
+                    auto_inc_column->insert(
+                            (assert_cast<const vectorized::ColumnInt64*>(
+                                     
block->get_by_name("__PARTIAL_UPDATE_AUTO_INC_COLUMN__")
+                                             .column.get()))
+                                    ->get_element(idx));
+                } else {
+                    // If the control flow reaches this branch, the column 
neither has default value
+                    // nor is nullable. It means that the row's delete sign is 
marked, and the value
+                    // columns are useless and won't be read. So we can just 
put arbitary values in the cells
+                    missing_col->insert_default();
+                }
+            }
+            continue;
+        }
+        for (auto i = 0; i < missing_cids.size(); ++i) {
+            mutable_full_columns[missing_cids[i]]->insert_from(
+                    *old_value_block.get_by_position(i).column, 
pos_in_old_block);
+        }
+    }
+    full_block.set_columns(std::move(mutable_full_columns));
+    return Status::OK();
+}
+
+void FlexibleReadPlan::prepare_to_read(const RowLocation& row_location, size_t 
pos,

Review Comment:
   warning: method 'prepare_to_read' can be made const 
[readability-make-member-function-const]
   
   be/src/olap/partial_update_info.cpp:396:
   ```diff
   -                                        const BitmapValue& skip_bitmap) {
   +                                        const BitmapValue& skip_bitmap) 
const {
   ```
   
   be/src/olap/partial_update_info.h:133:
   ```diff
   -                          const BitmapValue& skip_bitmap);
   +                          const BitmapValue& skip_bitmap) const;
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to