This is an automated email from the ASF dual-hosted git repository.

zhangchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 415aa23e25e branch-3.0: [fix](cloud-mow) Fix the issue of inaccurate 
estimation of txn size when updating delete bitmap #46969 (#47046)
415aa23e25e is described below

commit 415aa23e25e65e477c43c00f99f2ed4b62ac331b
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Jan 17 15:12:17 2025 +0800

    branch-3.0: [fix](cloud-mow) Fix the issue of inaccurate estimation of txn 
size when updating delete bitmap #46969 (#47046)
    
    Cherry-picked from #46969
    
    Co-authored-by: huanghaibin <[email protected]>
    Co-authored-by: Calvin Kirs <[email protected]>
---
 cloud/src/common/config.h               |  9 ++++++
 cloud/src/meta-service/meta_service.cpp | 53 ++++++++++++++++++++++---------
 cloud/test/meta_service_test.cpp        | 55 +++++++++++++++++++++++++++++++++
 3 files changed, 103 insertions(+), 14 deletions(-)

diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index a5cfef70ff5..ac4064c8d92 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -232,6 +232,15 @@ CONF_mInt64(max_num_aborted_txn, "100");
 // Max byte getting delete bitmap can return, default is 1GB
 CONF_mInt64(max_get_delete_bitmap_byte, "1073741824");
 
+// Max byte txn commit when updating delete bitmap, default is 7MB.
+// Because the size of one fdb transaction can't exceed 10MB, and
+// fdb does not have an accurate way to estimate the size of txn.
+// In my test, when txn->approximate_bytes() bigger than 8MB,
+// it may meet Transaction exceeds byte limit error. We'd better
+// reserve 1MB of buffer, so setting the default value to 7MB is
+// more reasonable.
+CONF_mInt64(max_txn_commit_byte, "7340032");
+
 CONF_Bool(enable_cloud_txn_lazy_commit, "true");
 CONF_Int32(txn_lazy_commit_rowsets_thresold, "1000");
 CONF_Int32(txn_lazy_commit_num_threads, "8");
diff --git a/cloud/src/meta-service/meta_service.cpp 
b/cloud/src/meta-service/meta_service.cpp
index 2ca8379f0ba..70fb53b7fb3 100644
--- a/cloud/src/meta-service/meta_service.cpp
+++ b/cloud/src/meta-service/meta_service.cpp
@@ -1851,25 +1851,40 @@ void 
MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
     }
 
     // 4. Update delete bitmap for curent txn
-    size_t total_key = 0;
-    size_t total_size = 0;
+    size_t current_key_count = 0;
+    size_t current_value_count = 0;
+    size_t total_key_count = 0;
+    size_t total_value_count = 0;
+    size_t total_txn_put_keys = 0;
+    size_t total_txn_put_bytes = 0;
+    size_t total_txn_size = 0;
     for (size_t i = 0; i < request->rowset_ids_size(); ++i) {
         auto& key = delete_bitmap_keys.delete_bitmap_keys(i);
         auto& val = request->segment_delete_bitmaps(i);
 
         // Split into multiple fdb transactions, because the size of one fdb
         // transaction can't exceed 10MB.
-        if (fdb_txn_size + key.size() + val.size() > 9 * 1024 * 1024) {
-            LOG(INFO) << "fdb txn size more than 9MB, current size: " << 
fdb_txn_size
-                      << " lock_id=" << request->lock_id();
+        if (txn->approximate_bytes() + key.size() * 3 + val.size() > 
config::max_txn_commit_byte) {
+            LOG(INFO) << "fdb txn size more than " << 
config::max_txn_commit_byte
+                      << ", current size: " << txn->approximate_bytes()
+                      << " lock_id=" << request->lock_id() << ", need to 
commit";
             err = txn->commit();
+            total_txn_put_keys += txn->num_put_keys();
+            total_txn_put_bytes += txn->put_bytes();
+            total_txn_size += txn->approximate_bytes();
             if (err != TxnErrorCode::TXN_OK) {
                 code = cast_as<ErrCategory::COMMIT>(err);
-                ss << "failed to update delete bitmap, err=" << err;
+                ss << "failed to update delete bitmap, err=" << err << " 
tablet_id=" << tablet_id
+                   << " lock_id=" << request->lock_id()
+                   << " delete_bitmap_key=" << current_key_count
+                   << " delete_bitmap_value=" << current_value_count
+                   << " put_size=" << txn->put_bytes() << " num_put_keys=" << 
txn->num_put_keys()
+                   << " txn_size=" << txn->approximate_bytes();
                 msg = ss.str();
                 return;
             }
-            fdb_txn_size = 0;
+            current_key_count = 0;
+            current_value_count = 0;
             TxnErrorCode err = txn_kv_->create_txn(&txn);
             if (err != TxnErrorCode::TXN_OK) {
                 code = cast_as<ErrCategory::CREATE>(err);
@@ -1888,24 +1903,34 @@ void 
MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
         }
         // splitting large values (>90*1000) into multiple KVs
         cloud::put(txn.get(), key, val, 0);
-        fdb_txn_size = fdb_txn_size + key.size() + val.size();
-        total_key++;
-        total_size += key.size() + val.size();
+        current_key_count++;
+        current_value_count += val.size();
+        total_key_count++;
+        total_value_count += val.size();
         VLOG_DEBUG << "xxx update delete bitmap put delete_bitmap_key=" << 
hex(key)
                    << " lock_id=" << request->lock_id() << " key_size: " << 
key.size()
                    << " value_size: " << val.size();
     }
-
     err = txn->commit();
+    total_txn_put_keys += txn->num_put_keys();
+    total_txn_put_bytes += txn->put_bytes();
+    total_txn_size += txn->approximate_bytes();
     if (err != TxnErrorCode::TXN_OK) {
         code = cast_as<ErrCategory::COMMIT>(err);
-        ss << "failed to update delete bitmap, err=" << err;
+        ss << "failed to update delete bitmap, err=" << err << " tablet_id=" 
<< tablet_id
+           << " lock_id=" << request->lock_id() << " delete_bitmap_key=" << 
current_key_count
+           << " delete_bitmap_value=" << current_value_count << " put_size=" 
<< txn->put_bytes()
+           << " num_put_keys=" << txn->num_put_keys() << " txn_size=" << 
txn->approximate_bytes();
         msg = ss.str();
         return;
     }
     LOG(INFO) << "update_delete_bitmap tablet_id=" << tablet_id << " lock_id=" 
<< request->lock_id()
-              << " rowset_num=" << request->rowset_ids_size() << " total_key=" 
<< total_key
-              << " total_size=" << total_size << " unlock=" << unlock;
+              << " rowset_num=" << request->rowset_ids_size()
+              << " total_key_count=" << total_key_count
+              << " total_value_count=" << total_value_count << " unlock=" << 
unlock
+              << " total_txn_put_keys=" << total_txn_put_keys
+              << " total_txn_put_bytes=" << total_txn_put_bytes
+              << " total_txn_size=" << total_txn_size;
 }
 
 void MetaServiceImpl::get_delete_bitmap(google::protobuf::RpcController* 
controller,
diff --git a/cloud/test/meta_service_test.cpp b/cloud/test/meta_service_test.cpp
index b7004716035..fb17c29629b 100644
--- a/cloud/test/meta_service_test.cpp
+++ b/cloud/test/meta_service_test.cpp
@@ -104,6 +104,24 @@ std::unique_ptr<MetaServiceProxy> get_meta_service() {
     return get_meta_service(true);
 }
 
+std::unique_ptr<MetaServiceProxy> get_fdb_meta_service() {
+    config::fdb_cluster_file_path = "fdb.cluster";
+    static auto txn_kv = 
std::dynamic_pointer_cast<TxnKv>(std::make_shared<FdbTxnKv>());
+    static std::atomic<bool> init {false};
+    bool tmp = false;
+    if (init.compare_exchange_strong(tmp, true)) {
+        int ret = txn_kv->init();
+        [&] {
+            ASSERT_EQ(ret, 0);
+            ASSERT_NE(txn_kv.get(), nullptr);
+        }();
+    }
+    auto rs = std::make_shared<MockResourceManager>(txn_kv);
+    auto rl = std::make_shared<RateLimiter>();
+    auto meta_service = std::make_unique<MetaServiceImpl>(txn_kv, rs, rl);
+    return std::make_unique<MetaServiceProxy>(std::move(meta_service));
+}
+
 static std::string next_rowset_id() {
     static int cnt = 0;
     return std::to_string(++cnt);
@@ -4857,6 +4875,43 @@ static std::string generate_random_string(int length) {
     return randomString;
 }
 
+TEST(MetaServiceTest, UpdateDeleteBitmapWithBigKeys) {
+    auto meta_service = get_fdb_meta_service();
+    // get delete bitmap update lock
+    brpc::Controller cntl;
+    GetDeleteBitmapUpdateLockRequest get_lock_req;
+    GetDeleteBitmapUpdateLockResponse get_lock_res;
+    get_lock_req.set_cloud_unique_id("test_cloud_unique_id");
+    get_lock_req.set_table_id(1999);
+    get_lock_req.add_partition_ids(123);
+    get_lock_req.set_expiration(5);
+    get_lock_req.set_lock_id(-1);
+    get_lock_req.set_initiator(100);
+    meta_service->get_delete_bitmap_update_lock(
+            reinterpret_cast<::google::protobuf::RpcController*>(&cntl), 
&get_lock_req,
+            &get_lock_res, nullptr);
+    ASSERT_EQ(get_lock_res.status().code(), MetaServiceCode::OK);
+    UpdateDeleteBitmapRequest update_delete_bitmap_req;
+    UpdateDeleteBitmapResponse update_delete_bitmap_res;
+    update_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
+    update_delete_bitmap_req.set_table_id(1999);
+    update_delete_bitmap_req.set_partition_id(123);
+    update_delete_bitmap_req.set_lock_id(-1);
+    update_delete_bitmap_req.set_initiator(100);
+    update_delete_bitmap_req.set_tablet_id(333);
+    std::string large_value = generate_random_string(300 * 1000 * 3);
+    for (int i = 0; i < 100000; i++) {
+        
update_delete_bitmap_req.add_rowset_ids("0200000003ea308a3647dbea83220ed4b8897f2288244a91");
+        update_delete_bitmap_req.add_segment_ids(0);
+        update_delete_bitmap_req.add_versions(i);
+        update_delete_bitmap_req.add_segment_delete_bitmaps("1");
+    }
+    
meta_service->update_delete_bitmap(reinterpret_cast<google::protobuf::RpcController*>(&cntl),
+                                       &update_delete_bitmap_req, 
&update_delete_bitmap_res,
+                                       nullptr);
+    ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::OK);
+}
+
 TEST(MetaServiceTest, UpdateDeleteBitmap) {
     auto meta_service = get_meta_service();
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to