This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c98a0bf803 [Enchancement](merge-on-write) check the correctness of
rowid conversion after compaction (#16689)
c98a0bf803 is described below
commit c98a0bf80321be998b626880483d7efd2c750328
Author: Xin Liao <[email protected]>
AuthorDate: Mon Feb 20 16:27:18 2023 +0800
[Enchancement](merge-on-write) check the correctness of rowid conversion
after compaction (#16689)
MoW updates the delete bitmap of the imported data during the compaction by
rowid conversion. The correctness of rowid conversion is very important to the
result of delete bitmap. So I add a rowid conversion result check.
---
be/src/olap/compaction.cpp | 12 ++++--
be/src/olap/primary_key_index.cpp | 2 +-
be/src/olap/rowset/segment_v2/segment.cpp | 16 ++++++++
be/src/olap/rowset/segment_v2/segment.h | 2 +
be/src/olap/tablet.cpp | 65 ++++++++++++++++++++++++++++++-
be/src/olap/tablet.h | 5 +++
6 files changed, 96 insertions(+), 6 deletions(-)
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 49526b6382..d585516954 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -418,22 +418,26 @@ Status Compaction::modify_rowsets() {
_tablet->enable_unique_key_merge_on_write()) {
Version version = _tablet->max_version();
DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id());
+ std::map<RowsetSharedPtr, std::list<std::pair<RowLocation,
RowLocation>>> location_map;
// Convert the delete bitmap of the input rowsets to output rowset.
// New loads are not blocked, so some keys of input rowsets might
// be deleted during the time. We need to deal with delete bitmap
// of incremental data later.
_tablet->calc_compaction_output_rowset_delete_bitmap(_input_rowsets,
_rowid_conversion, 0,
- version.second +
1,
+ version.second +
1, &location_map,
&output_rowset_delete_bitmap);
+ RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset,
location_map));
+ location_map.clear();
{
std::lock_guard<std::mutex>
wrlock_(_tablet->get_rowset_update_lock());
std::lock_guard<std::shared_mutex>
wrlock(_tablet->get_header_lock());
// Convert the delete bitmap of the input rowsets to output rowset
for
// incremental data.
-
_tablet->calc_compaction_output_rowset_delete_bitmap(_input_rowsets,
_rowid_conversion,
-
version.second, UINT64_MAX,
-
&output_rowset_delete_bitmap);
+ _tablet->calc_compaction_output_rowset_delete_bitmap(
+ _input_rowsets, _rowid_conversion, version.second,
UINT64_MAX, &location_map,
+ &output_rowset_delete_bitmap);
+ RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset,
location_map));
_tablet->merge_delete_bitmap(output_rowset_delete_bitmap);
RETURN_NOT_OK(_tablet->modify_rowsets(output_rowsets,
_input_rowsets, true));
diff --git a/be/src/olap/primary_key_index.cpp
b/be/src/olap/primary_key_index.cpp
index 6e2c3d954d..7c2c5fe16a 100644
--- a/be/src/olap/primary_key_index.cpp
+++ b/be/src/olap/primary_key_index.cpp
@@ -27,7 +27,7 @@ Status PrimaryKeyIndexBuilder::init() {
// TODO(liaoxin) using the column type directly if there's only one column
in unique key columns
const auto* type_info = get_scalar_type_info<OLAP_FIELD_TYPE_VARCHAR>();
segment_v2::IndexedColumnWriterOptions options;
- options.write_ordinal_index = false;
+ options.write_ordinal_index = true;
options.write_value_index = true;
options.encoding =
segment_v2::EncodingInfo::get_default_encoding(type_info, true);
// TODO(liaoxin) test to confirm whether it needs to be compressed
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index 77246995c5..09e294496c 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -366,5 +366,21 @@ Status Segment::lookup_row_key(const Slice& key,
RowLocation* row_location) {
return Status::OK();
}
+Status Segment::read_key_by_rowid(uint32_t row_id, std::string* key) {
+ RETURN_IF_ERROR(load_pk_index_and_bf());
+ std::unique_ptr<segment_v2::IndexedColumnIterator> iter;
+ RETURN_IF_ERROR(_pk_index_reader->new_iterator(&iter));
+
+ auto index_type = vectorized::DataTypeFactory::instance().create_data_type(
+ _pk_index_reader->type_info()->type(), 1, 0);
+ auto index_column = index_type->create_column();
+ RETURN_IF_ERROR(iter->seek_to_ordinal(row_id));
+ size_t num_read = 1;
+ RETURN_IF_ERROR(iter->next_batch(&num_read, index_column));
+ CHECK(num_read == 1);
+ *key = index_column->get_data_at(0).to_string();
+ return Status::OK();
+}
+
} // namespace segment_v2
} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment.h
b/be/src/olap/rowset/segment_v2/segment.h
index 078105eefb..47e5042f7b 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -96,6 +96,8 @@ public:
Status lookup_row_key(const Slice& key, RowLocation* row_location);
+ Status read_key_by_rowid(uint32_t row_id, std::string* key);
+
// only used by UT
const SegmentFooterPB& footer() const { return _footer; }
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 2a2ab4acf9..01087a2a01 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -2561,7 +2561,9 @@ Status Tablet::update_delete_bitmap(const
RowsetSharedPtr& rowset, DeleteBitmapP
void Tablet::calc_compaction_output_rowset_delete_bitmap(
const std::vector<RowsetSharedPtr>& input_rowsets, const
RowIdConversion& rowid_conversion,
- uint64_t start_version, uint64_t end_version, DeleteBitmap*
output_rowset_delete_bitmap) {
+ uint64_t start_version, uint64_t end_version,
+ std::map<RowsetSharedPtr, std::list<std::pair<RowLocation,
RowLocation>>>* location_map,
+ DeleteBitmap* output_rowset_delete_bitmap) {
RowLocation src;
RowLocation dst;
for (auto& rowset : input_rowsets) {
@@ -2590,6 +2592,7 @@ void Tablet::calc_compaction_output_rowset_delete_bitmap(
<< " src location: |" << src.rowset_id << "|"
<< src.segment_id
<< "|" << src.row_id << " start version: " <<
start_version
<< "end version" << end_version;
+ (*location_map)[rowset].emplace_back(src, dst);
output_rowset_delete_bitmap->add({dst.rowset_id,
dst.segment_id, cur_version},
dst.row_id);
}
@@ -2602,6 +2605,66 @@ void Tablet::merge_delete_bitmap(const DeleteBitmap&
delete_bitmap) {
_tablet_meta->delete_bitmap().merge(delete_bitmap);
}
+Status Tablet::check_rowid_conversion(
+ RowsetSharedPtr dst_rowset,
+ const std::map<RowsetSharedPtr, std::list<std::pair<RowLocation,
RowLocation>>>&
+ location_map) {
+ if (location_map.empty()) {
+ VLOG_DEBUG << "check_rowid_conversion, location_map is empty";
+ return Status::OK();
+ }
+ std::vector<segment_v2::SegmentSharedPtr> dst_segments;
+ _load_rowset_segments(dst_rowset, &dst_segments);
+ std::unordered_map<RowsetId, std::vector<segment_v2::SegmentSharedPtr>,
HashOfRowsetId>
+ input_rowsets_segment;
+
+ VLOG_DEBUG << "check_rowid_conversion, dst_segments size: " <<
dst_segments.size();
+ for (auto [src_rowset, locations] : location_map) {
+ std::vector<segment_v2::SegmentSharedPtr>& segments =
+ input_rowsets_segment[src_rowset->rowset_id()];
+ if (segments.empty()) {
+ _load_rowset_segments(src_rowset, &segments);
+ }
+ for (auto& [src, dst] : locations) {
+ std::string src_key;
+ std::string dst_key;
+ Status s = segments[src.segment_id]->read_key_by_rowid(src.row_id,
&src_key);
+ if (UNLIKELY(s.is<NOT_IMPLEMENTED_ERROR>())) {
+ LOG(INFO) << "primary key index of old version does not "
+ "support reading key by rowid";
+ break;
+ }
+ if (UNLIKELY(!s)) {
+ LOG(WARNING) << "failed to get src key: |" << src.rowset_id <<
"|" << src.segment_id
+ << "|" << src.row_id << " status: " << s;
+ DCHECK(false);
+ return s;
+ }
+
+ s = dst_segments[dst.segment_id]->read_key_by_rowid(dst.row_id,
&dst_key);
+ if (UNLIKELY(!s)) {
+ LOG(WARNING) << "failed to get dst key: |" << dst.rowset_id <<
"|" << dst.segment_id
+ << "|" << dst.row_id << " status: " << s;
+ DCHECK(false);
+ return s;
+ }
+
+ VLOG_DEBUG << "check_rowid_conversion, src: |" << src.rowset_id <<
"|" << src.segment_id
+ << "|" << src.row_id << "|" << src_key << " dst: |" <<
dst.rowset_id << "|"
+ << dst.segment_id << "|" << dst.row_id << "|" <<
dst_key;
+ if (UNLIKELY(src_key.compare(dst_key) != 0)) {
+ LOG(WARNING) << "failed to check key, src key: |" <<
src.rowset_id << "|"
+ << src.segment_id << "|" << src.row_id << "|" <<
src_key
+ << " dst key: |" << dst.rowset_id << "|" <<
dst.segment_id << "|"
+ << dst.row_id << "|" << dst_key;
+ DCHECK(false);
+ return Status::InternalError("failed to check rowid
conversion");
+ }
+ }
+ }
+ return Status::OK();
+}
+
RowsetIdUnorderedSet Tablet::all_rs_id(int64_t max_version) const {
RowsetIdUnorderedSet rowset_ids;
for (const auto& rs_it : _rs_version_map) {
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index adcc6368f8..34fe9c32fe 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -366,8 +366,13 @@ public:
void calc_compaction_output_rowset_delete_bitmap(
const std::vector<RowsetSharedPtr>& input_rowsets,
const RowIdConversion& rowid_conversion, uint64_t start_version,
uint64_t end_version,
+ std::map<RowsetSharedPtr, std::list<std::pair<RowLocation,
RowLocation>>>* location_map,
DeleteBitmap* output_rowset_delete_bitmap);
void merge_delete_bitmap(const DeleteBitmap& delete_bitmap);
+ Status check_rowid_conversion(
+ RowsetSharedPtr dst_rowset,
+ const std::map<RowsetSharedPtr, std::list<std::pair<RowLocation,
RowLocation>>>&
+ location_map);
RowsetIdUnorderedSet all_rs_id(int64_t max_version) const;
bool check_all_rowset_segment();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]