This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 60fddd56e7 [feature-wip](unique-key-merge-on-write) opt lock and only 
save valid delete_bitmap (#11953)
60fddd56e7 is described below

commit 60fddd56e7c19c6f24adf8f85ca032e7ef55cbfe
Author: yixiutt <[email protected]>
AuthorDate: Tue Aug 23 14:43:40 2022 +0800

    [feature-wip](unique-key-merge-on-write) opt lock and only save valid 
delete_bitmap (#11953)
    
    1. use rlock in most logic instead of wrlock
    2. filter stale rowset's delete bitmap in save meta
    3. add a delete_bitmap lock to handle compaction and publish_txn confict
    
    Co-authored-by: yixiutt <[email protected]>
---
 be/src/olap/compaction.cpp                       | 24 ++++++++++++--------
 be/src/olap/delta_writer.cpp                     |  2 +-
 be/src/olap/memtable.cpp                         | 27 +++++++++++-----------
 be/src/olap/tablet.cpp                           |  7 ++++--
 be/src/olap/tablet.h                             |  8 +++++++
 be/src/olap/tablet_meta.cpp                      | 12 +++++++---
 be/src/olap/tablet_meta.h                        |  2 +-
 be/src/olap/task/engine_publish_version_task.cpp | 12 +++++-----
 be/src/olap/txn_manager.cpp                      | 29 ++++++++++++------------
 be/test/olap/test_data/header_without_inc_rs.txt |  1 -
 10 files changed, 73 insertions(+), 51 deletions(-)

diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index fdf2ed36ec..bdeb7ab1e7 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -256,17 +256,23 @@ Status Compaction::construct_input_rowset_readers() {
 Status Compaction::modify_rowsets() {
     std::vector<RowsetSharedPtr> output_rowsets;
     output_rowsets.push_back(_output_rowset);
-    std::lock_guard<std::shared_mutex> wrlock(_tablet->get_header_lock());
+    {
+        std::lock_guard<std::mutex> wrlock_(_tablet->get_rowset_update_lock());
+        std::lock_guard<std::shared_mutex> wrlock(_tablet->get_header_lock());
+
+        // update dst rowset delete bitmap
+        if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
+            _tablet->enable_unique_key_merge_on_write()) {
+            _tablet->tablet_meta()->update_delete_bitmap(
+                    _input_rowsets, _output_rs_writer->version(), 
_rowid_conversion);
+        }
 
-    // update dst rowset delete bitmap
-    if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
-        _tablet->enable_unique_key_merge_on_write()) {
-        _tablet->tablet_meta()->update_delete_bitmap(_input_rowsets, 
_output_rs_writer->version(),
-                                                     _rowid_conversion);
+        RETURN_NOT_OK(_tablet->modify_rowsets(output_rowsets, _input_rowsets, 
true));
+    }
+    {
+        std::shared_lock rlock(_tablet->get_header_lock());
+        _tablet->save_meta();
     }
-
-    RETURN_NOT_OK(_tablet->modify_rowsets(output_rowsets, _input_rowsets, 
true));
-    _tablet->save_meta();
     return Status::OK();
 }
 
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index 7e34771c94..0f2e776919 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -282,7 +282,7 @@ Status DeltaWriter::wait_flush() {
 
 void DeltaWriter::_reset_mem_table() {
     if (_tablet->enable_unique_key_merge_on_write()) {
-        _delete_bitmap.reset(new DeleteBitmap(-1));
+        _delete_bitmap.reset(new DeleteBitmap(_tablet->tablet_id()));
     }
     _mem_table.reset(new MemTable(_tablet, _schema.get(), 
_tablet_schema.get(), _req.slots,
                                   _req.tuple_desc, _rowset_writer.get(), 
_delete_bitmap,
diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp
index 95b9277be3..629c9eb863 100644
--- a/be/src/olap/memtable.cpp
+++ b/be/src/olap/memtable.cpp
@@ -400,20 +400,21 @@ bool MemTable::need_to_agg() {
 
 Status MemTable::_generate_delete_bitmap() {
     // generate delete bitmap, build a tmp rowset and load recent segment
-    if (_tablet->enable_unique_key_merge_on_write()) {
-        auto rowset = _rowset_writer->build_tmp();
-        auto beta_rowset = reinterpret_cast<BetaRowset*>(rowset.get());
-        std::vector<segment_v2::SegmentSharedPtr> segments;
-        segment_v2::SegmentSharedPtr segment;
-        if (beta_rowset->num_segments() == 0) {
-            return Status::OK();
-        }
-        RETURN_IF_ERROR(beta_rowset->load_segment(beta_rowset->num_segments() 
- 1, &segment));
-        segments.push_back(segment);
-        std::lock_guard<std::shared_mutex> 
meta_wrlock(_tablet->get_header_lock());
-        RETURN_IF_ERROR(_tablet->calc_delete_bitmap(beta_rowset->rowset_id(), 
segments,
-                                                    &_rowset_ids, 
_delete_bitmap));
+    if (!_tablet->enable_unique_key_merge_on_write()) {
+        return Status::OK();
+    }
+    auto rowset = _rowset_writer->build_tmp();
+    auto beta_rowset = reinterpret_cast<BetaRowset*>(rowset.get());
+    std::vector<segment_v2::SegmentSharedPtr> segments;
+    segment_v2::SegmentSharedPtr segment;
+    if (beta_rowset->num_segments() == 0) {
+        return Status::OK();
     }
+    RETURN_IF_ERROR(beta_rowset->load_segment(beta_rowset->num_segments() - 1, 
&segment));
+    segments.push_back(segment);
+    std::shared_lock meta_rlock(_tablet->get_header_lock());
+    RETURN_IF_ERROR(_tablet->calc_delete_bitmap(beta_rowset->rowset_id(), 
segments, &_rowset_ids,
+                                                _delete_bitmap));
     return Status::OK();
 }
 
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 2a99ea8a57..d3971937b3 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -2064,7 +2064,8 @@ Status Tablet::update_delete_bitmap(const 
RowsetSharedPtr& rowset, DeleteBitmapP
     std::vector<segment_v2::SegmentSharedPtr> segments;
     _load_rowset_segments(rowset, &segments);
 
-    std::lock_guard<std::shared_mutex> meta_wrlock(_meta_lock);
+    std::lock_guard<std::mutex> rwlock(_rowset_update_lock);
+    std::shared_lock meta_rlock(_meta_lock);
     cur_rowset_ids = all_rs_id();
     _rowset_ids_difference(cur_rowset_ids, pre_rowset_ids, &rowset_ids_to_add, 
&rowset_ids_to_del);
     if (!rowset_ids_to_add.empty() || !rowset_ids_to_del.empty()) {
@@ -2079,7 +2080,9 @@ Status Tablet::update_delete_bitmap(const 
RowsetSharedPtr& rowset, DeleteBitmapP
                                            delete_bitmap, true));
     }
 
-    // update version
+    // update version without write lock, compaction and publish_txn
+    // will update delete bitmap, handle compaction with _delete_bitmap_lock
+    // and publish_txn runs sequencial so no need to lock here
     for (auto iter = delete_bitmap->delete_bitmap.begin();
          iter != delete_bitmap->delete_bitmap.end(); ++iter) {
         int ret = _tablet_meta->delete_bitmap().set(
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index c0c8cab7d4..51cf455c58 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -165,6 +165,7 @@ public:
 
     // meta lock
     std::shared_mutex& get_header_lock() { return _meta_lock; }
+    std::mutex& get_rowset_update_lock() { return _rowset_update_lock; }
     std::mutex& get_push_lock() { return _ingest_lock; }
     std::mutex& get_base_compaction_lock() { return _base_compaction_lock; }
     std::mutex& get_cumulative_compaction_lock() { return 
_cumulative_compaction_lock; }
@@ -392,6 +393,13 @@ private:
     // TODO(lingbin): There is a _meta_lock TabletMeta too, there should be a 
comment to
     // explain how these two locks work together.
     mutable std::shared_mutex _meta_lock;
+
+    // In unique key table with MoW, we should guarantee that only one
+    // writer can update rowset and delete bitmap at the same time.
+    // We use a separate lock rather than _meta_lock, to avoid blocking read 
queries
+    // during publish_txn, which might take hundreds of milliseconds
+    mutable std::mutex _rowset_update_lock;
+
     // After version 0.13, all newly created rowsets are saved in 
_rs_version_map.
     // And if rowset being compacted, the old rowsetis will be saved in 
_stale_rs_version_map;
     std::unordered_map<Version, RowsetSharedPtr, HashOfVersion> 
_rs_version_map;
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index 786fe2674e..1689c0a356 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -536,11 +536,17 @@ void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) 
{
     tablet_meta_pb->set_storage_policy(_storage_policy);
     
tablet_meta_pb->set_enable_unique_key_merge_on_write(_enable_unique_key_merge_on_write);
 
-    {
-        std::shared_lock l(delete_bitmap().lock);
+    if (_enable_unique_key_merge_on_write) {
+        std::set<RowsetId> rs_ids;
+        for (const auto& rowset : _rs_metas) {
+            rs_ids.insert(rowset->rowset_id());
+        }
         DeleteBitmapPB* delete_bitmap_pb = 
tablet_meta_pb->mutable_delete_bitmap();
-        for (auto& [id, bitmap] : delete_bitmap().delete_bitmap) {
+        for (auto& [id, bitmap] : delete_bitmap().snapshot().delete_bitmap) {
             auto& [rowset_id, segment_id, ver] = id;
+            if (rs_ids.count(rowset_id) == 0) {
+                continue;
+            }
             delete_bitmap_pb->add_rowset_ids(rowset_id.to_string());
             delete_bitmap_pb->add_segment_ids(segment_id);
             delete_bitmap_pb->add_versions(ver);
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 1bf4da2cbd..9cea34b248 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -404,7 +404,7 @@ public:
             static std::once_flag once;
             std::call_once(once, [size_in_bytes] {
                 auto tmp = new ShardedLRUCache("DeleteBitmap AggCache", 
size_in_bytes,
-                                               LRUCacheType::SIZE, 2048);
+                                               LRUCacheType::SIZE, 256);
                 AggCache::s_repr.store(tmp, std::memory_order_release);
             });
 
diff --git a/be/src/olap/task/engine_publish_version_task.cpp 
b/be/src/olap/task/engine_publish_version_task.cpp
index be4a2e6abc..4c05b8c59d 100644
--- a/be/src/olap/task/engine_publish_version_task.cpp
+++ b/be/src/olap/task/engine_publish_version_task.cpp
@@ -126,11 +126,11 @@ Status EnginePublishVersionTask::finish() {
                     max_version = tablet->max_version();
                 }
                 if (version.first != max_version.second + 1) {
-                    LOG(INFO) << "uniq key with merge-on-write version not 
continuous, current "
-                                 "max "
-                                 "version="
-                              << max_version.second << ", publish_version=" << 
version.first
-                              << " tablet_id=" << tablet->tablet_id();
+                    VLOG_NOTICE << "uniq key with merge-on-write version not 
continuous, current "
+                                   "max "
+                                   "version="
+                                << max_version.second << ", publish_version=" 
<< version.first
+                                << " tablet_id=" << tablet->tablet_id();
                     meet_version_not_continuous = true;
                     res = 
Status::OLAPInternalError(OLAP_ERR_PUBLISH_VERSION_NOT_CONTINUOUS);
                     continue;
@@ -180,7 +180,7 @@ Status EnginePublishVersionTask::finish() {
 
     LOG(INFO) << "finish to publish version on transaction."
               << "transaction_id=" << transaction_id
-              << ", error_tablet_size=" << _error_tablet_ids->size();
+              << ", error_tablet_size=" << _error_tablet_ids->size() << ", 
res=" << res.to_string();
     return res;
 }
 
diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp
index efb4185f46..3ef4f40084 100644
--- a/be/src/olap/txn_manager.cpp
+++ b/be/src/olap/txn_manager.cpp
@@ -309,6 +309,20 @@ Status TxnManager::publish_txn(OlapMeta* meta, 
TPartitionId partition_id,
         // save meta need access disk, it maybe very slow, so that it is not 
in global txn lock
         // it is under a single txn lock
         if (rowset_ptr != nullptr) {
+            // update delete_bitmap
+            {
+                if (load_info != nullptr && 
load_info->unique_key_merge_on_write) {
+                    auto tablet =
+                            
StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id);
+                    if (tablet == nullptr) {
+                        return Status::OK();
+                    }
+                    RETURN_IF_ERROR(tablet->update_delete_bitmap(
+                            rowset_ptr, load_info->delete_bitmap, 
load_info->rowset_ids));
+                    std::shared_lock rlock(tablet->get_header_lock());
+                    tablet->save_meta();
+                }
+            }
             // TODO(ygl): rowset is already set version here, memory is 
changed, if save failed
             // it maybe a fatal error
             rowset_ptr->make_visible(version);
@@ -325,21 +339,6 @@ Status TxnManager::publish_txn(OlapMeta* meta, 
TPartitionId partition_id,
             return Status::OLAPInternalError(OLAP_ERR_TRANSACTION_NOT_EXIST);
         }
     }
-    // update delete_bitmap
-    {
-        auto tablet = 
StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id);
-#ifdef BE_TEST
-        if (tablet == nullptr) {
-            return Status::OK();
-        }
-#endif
-        if (load_info != nullptr && load_info->unique_key_merge_on_write) {
-            RETURN_IF_ERROR(tablet->update_delete_bitmap(rowset_ptr, 
load_info->delete_bitmap,
-                                                         
load_info->rowset_ids));
-            std::lock_guard<std::shared_mutex> 
wrlock(tablet->get_header_lock());
-            tablet->save_meta();
-        }
-    }
     {
         std::unique_lock<std::mutex> txn_lock(_get_txn_lock(transaction_id));
         std::lock_guard<std::shared_mutex> 
wrlock(_get_txn_map_lock(transaction_id));
diff --git a/be/test/olap/test_data/header_without_inc_rs.txt 
b/be/test/olap/test_data/header_without_inc_rs.txt
index fc2a5cb1e6..44c191d3fc 100644
--- a/be/test/olap/test_data/header_without_inc_rs.txt
+++ b/be/test/olap/test_data/header_without_inc_rs.txt
@@ -147,6 +147,5 @@
     "tablet_type": "TABLET_TYPE_DISK",
     "replica_id": 0,
     "storage_policy": "",
-    "delete_bitmap": {},
     "enable_unique_key_merge_on_write": false
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to