yixiutt commented on code in PR #17542:
URL: https://github.com/apache/doris/pull/17542#discussion_r1178576093
##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -297,11 +306,235 @@ void SegmentWriter::_maybe_invalid_row_cache(const
std::string& key) {
}
}
+void SegmentWriter::_serialize_block_to_row_column(vectorized::Block& block) {
+ if (block.rows() == 0) {
+ return;
+ }
+ MonotonicStopWatch watch;
+ watch.start();
+ // find row column id
+ int row_column_id = 0;
+ for (int i = 0; i < _tablet_schema->num_columns(); ++i) {
+ if (_tablet_schema->column(i).is_row_store_column()) {
+ row_column_id = i;
+ break;
+ }
+ }
+ vectorized::ColumnString* row_store_column =
+
static_cast<vectorized::ColumnString*>(block.get_by_position(row_column_id)
+
.column->assume_mutable_ref()
+ .assume_mutable()
+ .get());
+ row_store_column->clear();
+ vectorized::JsonbSerializeUtil::block_to_jsonb(*_tablet_schema, block,
*row_store_column,
+
_tablet_schema->num_columns());
+ VLOG_DEBUG << "serialize , num_rows:" << block.rows() << ",
row_column_id:" << row_column_id
+ << ", total_byte_size:" << block.allocated_bytes() << ",
serialize_cost(us)"
+ << watch.elapsed_time() / 1000;
+}
+
+// for partial update, we should do following steps to fill content of block:
+// 1. set block data to data convertor, and get all key_column's converted
slice
+// 2. get pk of input block, and read missing columns
+// 2.1 first find key location{rowset_id, segment_id, row_id}
+// 2.2 build read plan to read by batch
+// 2.3 fill block
+// 3. set columns to data convertor and then write all columns
+Status SegmentWriter::append_block_with_partial_content(const
vectorized::Block* block,
+ size_t row_pos, size_t
num_rows) {
+ CHECK(block->columns() > _tablet_schema->num_key_columns() &&
+ block->columns() < _tablet_schema->num_columns());
+ CHECK(_tablet_schema->keys_type() == UNIQUE_KEYS &&
_opts.enable_unique_key_merge_on_write);
+
+ // find missing column cids
+ std::vector<uint32_t> missing_cids;
+ std::vector<uint32_t> including_cids;
+ for (uint32_t i = 0; i < _tablet_schema->num_columns(); ++i) {
+ if (_tablet_schema->is_column_missing(i)) {
+ missing_cids.push_back(i);
+ } else {
+ including_cids.push_back(i);
+ }
+ }
+ // create full block and fill with input columns
+ auto full_block = _tablet_schema->create_block();
+ size_t input_id = 0;
+ for (auto i : including_cids) {
+ full_block.replace_by_position(i,
block->get_by_position(input_id++).column);
+ }
+
_olap_data_convertor->set_source_content_with_specifid_columns(&full_block,
row_pos, num_rows,
+
including_cids);
+
+ // write including columns
+ std::vector<vectorized::IOlapColumnDataAccessor*> key_columns;
+ vectorized::IOlapColumnDataAccessor* seq_column = nullptr;
+ for (auto cid : including_cids) {
+ // olap data convertor alway start from id = 0
+ auto converted_result = _olap_data_convertor->convert_column_data(cid);
+ if (converted_result.first != Status::OK()) {
+ return converted_result.first;
+ }
+ if (cid < _num_key_columns) {
+ key_columns.push_back(converted_result.second);
+ } else if (cid == _tablet_schema->sequence_col_idx()) {
+ seq_column = converted_result.second;
+ }
+
RETURN_IF_ERROR(_column_writers[cid]->append(converted_result.second->get_nullmap(),
+
converted_result.second->get_data(),
+ num_rows));
+ }
+
+ bool has_default = false;
+ std::vector<bool> use_default_flag;
+ use_default_flag.reserve(num_rows);
+ for (size_t pos = 0; pos < num_rows; pos++) {
+ std::string key = _full_encode_keys(key_columns, pos);
+ if (_tablet_schema->has_sequence_col()) {
+ _encode_seq_column(seq_column, pos, &key);
+ }
+ RETURN_IF_ERROR(_primary_key_index_builder->add_item(key));
+ _maybe_invalid_row_cache(key);
+
+ RowLocation loc;
+ // save rowset shared ptr so this rowset wouldn't delete
+ RowsetSharedPtr rowset;
+ auto st = _tablet->lookup_row_key(key, &_mow_context->rowset_ids, &loc,
+ _mow_context->max_version, &rowset);
+ if (st.is<NOT_FOUND>()) {
+ if (!_tablet_schema->allow_key_not_exist_in_partial_update()) {
+ return Status::InternalError("partial update key not exist
before");
+ }
+ has_default = true;
+ use_default_flag.emplace_back(true);
+ continue;
+ }
+ if (!st.ok()) {
Review Comment:
这里应该不需要考虑,这里只是填充值,sequence列的处理逻辑应该跟之前是一样的
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]