This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new b7809ebe355 branch-3.1: [chore](cloud) Optimize RPC log #53341 (#54212)
b7809ebe355 is described below

commit b7809ebe355e0bd5a4a01d9d3a3bc47d765a83e7
Author: Gavin Chou <[email protected]>
AuthorDate: Mon Aug 4 10:00:42 2025 +0800

    branch-3.1: [chore](cloud) Optimize RPC log #53341 (#54212)
    
    pick #53341
---
 be/src/cloud/cloud_meta_mgr.cpp              |  38 ++++-----
 cloud/src/meta-service/meta_service.cpp      |  20 +++--
 cloud/src/meta-service/meta_service_helper.h | 113 +++++++++++++--------------
 cloud/test/meta_service_test.cpp             |   8 +-
 4 files changed, 92 insertions(+), 87 deletions(-)

diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp
index f478a833a07..e0440991727 100644
--- a/be/src/cloud/cloud_meta_mgr.cpp
+++ b/be/src/cloud/cloud_meta_mgr.cpp
@@ -493,6 +493,9 @@ Status 
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
 
     MetaServiceProxy* proxy;
     RETURN_IF_ERROR(MetaServiceProxy::get_proxy(&proxy));
+    std::string tablet_info =
+            fmt::format("tablet_id={} table_id={} index_id={} 
partition_id={}", tablet->tablet_id(),
+                        tablet->table_id(), tablet->index_id(), 
tablet->partition_id());
     int tried = 0;
     while (true) {
         std::shared_ptr<MetaService_Stub> stub;
@@ -542,12 +545,8 @@ Status 
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
                 std::uniform_int_distribution<uint32_t> u1(500, 1000);
                 uint32_t duration_ms = tried >= 100 ? u(rng) : u1(rng);
                 bthread_usleep(duration_ms * 1000);
-                LOG_INFO("failed to get rowset meta")
+                LOG_INFO("failed to get rowset meta, " + tablet_info)
                         .tag("reason", cntl.ErrorText())
-                        .tag("tablet_id", tablet_id)
-                        .tag("table_id", table_id)
-                        .tag("index_id", index_id)
-                        .tag("partition_id", tablet->partition_id())
                         .tag("tried", tried)
                         .tag("sleep", duration_ms);
                 continue;
@@ -555,18 +554,26 @@ Status 
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
             return Status::RpcError("failed to get rowset meta: {}", 
cntl.ErrorText());
         }
         if (resp.status().code() == MetaServiceCode::TABLET_NOT_FOUND) {
-            return Status::NotFound("failed to get rowset meta: {}", 
resp.status().msg());
+            LOG(WARNING) << "failed to get rowset meta, err=" << 
resp.status().msg() << " "
+                         << tablet_info;
+            return Status::NotFound("failed to get rowset meta: {}, {}", 
resp.status().msg(),
+                                    tablet_info);
         }
         if (resp.status().code() != MetaServiceCode::OK) {
-            return Status::InternalError("failed to get rowset meta: {}", 
resp.status().msg());
+            LOG(WARNING) << " failed to get rowset meta, err=" << 
resp.status().msg() << " "
+                         << tablet_info;
+            return Status::InternalError("failed to get rowset meta: {}, {}", 
resp.status().msg(),
+                                         tablet_info);
         }
         if (latency > 100 * 1000) { // 100ms
             LOG(INFO) << "finish get_rowset rpc. rowset_meta.size()=" << 
resp.rowset_meta().size()
-                      << ", latency=" << latency << "us";
+                      << ", latency=" << latency << "us"
+                      << " " << tablet_info;
         } else {
             LOG_EVERY_N(INFO, 100)
                     << "finish get_rowset rpc. rowset_meta.size()=" << 
resp.rowset_meta().size()
-                    << ", latency=" << latency << "us";
+                    << ", latency=" << latency << "us"
+                    << " " << tablet_info;
         }
 
         int64_t now = 
duration_cast<seconds>(system_clock::now().time_since_epoch()).count();
@@ -590,16 +597,13 @@ Status 
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
                                                 resp.stats(), req.idx(), 
&delete_bitmap, full_sync,
                                                 sync_stats);
             if (st.is<ErrorCode::ROWSETS_EXPIRED>() && tried++ < retry_times) {
-                LOG_INFO("rowset meta is expired, need to retry")
-                        .tag("tablet", tablet->tablet_id())
+                LOG_INFO("rowset meta is expired, need to retry, " + 
tablet_info)
                         .tag("tried", tried)
                         .error(st);
                 continue;
             }
             if (!st.ok()) {
-                LOG_WARNING("failed to get delete bitmap")
-                        .tag("tablet", tablet->tablet_id())
-                        .error(st);
+                LOG_WARNING("failed to get delete bitmap, " + 
tablet_info).error(st);
                 return st;
             }
             tablet->tablet_meta()->delete_bitmap()->merge(delete_bitmap);
@@ -657,9 +661,7 @@ Status 
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
                     }
                 }
 
-                LOG_INFO("[verbose] sync tablet delete bitmap")
-                        .tag("tablet_id", tablet->tablet_id())
-                        .tag("table_id", tablet->table_id())
+                LOG_INFO("[verbose] sync tablet delete bitmap " + tablet_info)
                         .tag("full_sync", full_sync)
                         .tag("old_max_version", old_max_version)
                         .tag("new_max_version", new_max_version)
@@ -707,7 +709,7 @@ Status 
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
                 stats.cumulative_compaction_cnt() < 
tablet->cumulative_compaction_cnt())
                     [[unlikely]] {
                 // stale request, ignore
-                LOG_WARNING("stale get rowset meta request")
+                LOG_WARNING("stale get rowset meta request " + tablet_info)
                         .tag("resp_base_compaction_cnt", 
stats.base_compaction_cnt())
                         .tag("base_compaction_cnt", 
tablet->base_compaction_cnt())
                         .tag("resp_cumulative_compaction_cnt", 
stats.cumulative_compaction_cnt())
diff --git a/cloud/src/meta-service/meta_service.cpp 
b/cloud/src/meta-service/meta_service.cpp
index b1ec995d4bf..dec48f9a758 100644
--- a/cloud/src/meta-service/meta_service.cpp
+++ b/cloud/src/meta-service/meta_service.cpp
@@ -2169,7 +2169,8 @@ void 
MetaServiceImpl::update_tmp_rowset(::google::protobuf::RpcController* contr
 void internal_get_rowset(Transaction* txn, int64_t start, int64_t end,
                          const std::string& instance_id, int64_t tablet_id, 
MetaServiceCode& code,
                          std::string& msg, GetRowsetResponse* response) {
-    LOG(INFO) << "get_rowset start=" << start << ", end=" << end;
+    LOG(INFO) << "get_rowset start=" << start << ", end="
+              << " tablet_id=" << tablet_id;
     MetaRowsetKeyInfo key_info0 {instance_id, tablet_id, start};
     MetaRowsetKeyInfo key_info1 {instance_id, tablet_id, end + 1};
     std::string key0;
@@ -2181,7 +2182,8 @@ void internal_get_rowset(Transaction* txn, int64_t start, 
int64_t end,
     int num_rowsets = 0;
     DORIS_CLOUD_DEFER_COPY(key0, key1) {
         LOG(INFO) << "get rowset meta, num_rowsets=" << num_rowsets << " 
range=[" << hex(key0)
-                  << "," << hex(key1) << "]";
+                  << "," << hex(key1) << "]"
+                  << " tablet_id=" << tablet_id;
     };
 
     std::stringstream ss;
@@ -2189,7 +2191,7 @@ void internal_get_rowset(Transaction* txn, int64_t start, 
int64_t end,
         TxnErrorCode err = txn->get(key0, key1, &it);
         if (err != TxnErrorCode::TXN_OK) {
             code = cast_as<ErrCategory::READ>(err);
-            ss << "internal error, failed to get rowset, err=" << err;
+            ss << "internal error, failed to get rowset, err=" << err << " 
tablet_id=" << tablet_id;
             msg = ss.str();
             LOG(WARNING) << msg;
             return;
@@ -2203,14 +2205,16 @@ void internal_get_rowset(Transaction* txn, int64_t 
start, int64_t end,
             if (byte_size + v.size() > std::numeric_limits<int32_t>::max()) {
                 code = MetaServiceCode::PROTOBUF_PARSE_ERR;
                 msg = fmt::format(
-                        "rowset meta exceeded 2G, unable to serialize, key={}. 
byte_size={}",
-                        hex(k), byte_size);
+                        "rowset meta exceeded 2G, unable to serialize, key={}. 
byte_size={} "
+                        "tablet_id={}",
+                        hex(k), byte_size, tablet_id);
                 LOG(WARNING) << msg;
                 return;
             }
             if (!rs->ParseFromArray(v.data(), v.size())) {
                 code = MetaServiceCode::PROTOBUF_PARSE_ERR;
-                msg = "malformed rowset meta, unable to serialize";
+                msg = "malformed rowset meta, unable to serialize, tablet_id=" 
+
+                      std::to_string(tablet_id);
                 LOG(WARNING) << msg << " key=" << hex(k);
                 return;
             }
@@ -2339,7 +2343,9 @@ void 
MetaServiceImpl::get_rowset(::google::protobuf::RpcController* controller,
     if (!request->has_base_compaction_cnt() || 
!request->has_cumulative_compaction_cnt() ||
         !request->has_cumulative_point()) {
         code = MetaServiceCode::INVALID_ARGUMENT;
-        msg = "no valid compaction_cnt or cumulative_point given";
+        msg = "no valid compaction_cnt or cumulative_point given, tablet_id=" +
+              std::to_string(tablet_id);
+        LOG(WARNING) << msg;
         return;
     }
     int64_t req_bc_cnt = request->base_compaction_cnt();
diff --git a/cloud/src/meta-service/meta_service_helper.h 
b/cloud/src/meta-service/meta_service_helper.h
index 321b0ceb3a6..c35873ed949 100644
--- a/cloud/src/meta-service/meta_service_helper.h
+++ b/cloud/src/meta-service/meta_service_helper.h
@@ -94,107 +94,104 @@ inline std::string encryt_sk(std::string debug_string) {
     return debug_string;
 }
 
-template <class Request>
-void begin_rpc(std::string_view func_name, brpc::Controller* ctrl, const 
Request* req) {
+template <class Request, class Response>
+void begin_rpc(std::string_view func_name, brpc::Controller* ctrl, const 
Request* req,
+               Response* res) {
+    res->Clear(); // clear response in case of this is call is a local retry 
in MS
     if constexpr (std::is_same_v<Request, CreateRowsetRequest>) {
-        LOG(INFO) << "begin " << func_name << " remote caller: " << 
ctrl->remote_side()
-                  << " original client ip: " << req->request_ip();
+        LOG(INFO) << "begin " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                  << " original_client_ip=" << req->request_ip();
     } else if constexpr (std::is_same_v<Request, CreateTabletsRequest>) {
-        LOG(INFO) << "begin " << func_name << " remote caller: " << 
ctrl->remote_side()
-                  << " original client ip: " << req->request_ip();
+        LOG(INFO) << "begin " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                  << " original_client_ip=" << req->request_ip();
     } else if constexpr (std::is_same_v<Request, UpdateDeleteBitmapRequest>) {
-        LOG(INFO) << "begin " << func_name << " remote caller: " << 
ctrl->remote_side()
-                  << " original client ip: " << req->request_ip() << " 
table_id=" << req->table_id()
+        LOG(INFO) << "begin " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                  << " original_client_ip=" << req->request_ip() << " 
table_id=" << req->table_id()
                   << " tablet_id=" << req->tablet_id() << " lock_id=" << 
req->lock_id()
                   << " initiator=" << req->initiator()
                   << " delete_bitmap_size=" << 
req->segment_delete_bitmaps_size();
     } else if constexpr (std::is_same_v<Request, GetDeleteBitmapRequest>) {
-        LOG(INFO) << "begin " << func_name << " remote caller: " << 
ctrl->remote_side()
-                  << " original client ip: " << req->request_ip()
+        LOG(INFO) << "begin " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                  << " original_client_ip=" << req->request_ip()
                   << " tablet_id=" << req->tablet_id() << " rowset_size=" << 
req->rowset_ids_size();
     } else if constexpr (std::is_same_v<Request, GetTabletStatsRequest>) {
-        VLOG_DEBUG << "begin " << func_name << " remote caller: " << 
ctrl->remote_side()
-                   << " original client ip: " << req->request_ip()
-                   << " tablet size: " << req->tablet_idx().size();
+        VLOG_DEBUG << "begin " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                   << " original_client_ip=" << req->request_ip()
+                   << " num_tablets: " << req->tablet_idx().size();
     } else if constexpr (std::is_same_v<Request, GetVersionRequest> ||
                          std::is_same_v<Request, GetRowsetRequest> ||
                          std::is_same_v<Request, GetTabletRequest>) {
-        VLOG_DEBUG << "begin " << func_name << " remote caller: " << 
ctrl->remote_side()
-                   << " original client ip: " << req->request_ip()
+        VLOG_DEBUG << "begin " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                   << " original_client_ip=" << req->request_ip()
                    << " request=" << req->ShortDebugString();
     } else if constexpr (std::is_same_v<Request, RemoveDeleteBitmapRequest>) {
-        LOG(INFO) << "begin " << func_name << " remote caller: " << 
ctrl->remote_side()
-                  << " original client ip: " << req->request_ip()
+        LOG(INFO) << "begin " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                  << " original_client_ip=" << req->request_ip()
                   << " tablet_id=" << req->tablet_id() << " rowset_size=" << 
req->rowset_ids_size();
     } else if constexpr (std::is_same_v<Request, 
GetDeleteBitmapUpdateLockRequest>) {
-        LOG(INFO) << "begin " << func_name << " remote caller: " << 
ctrl->remote_side()
-                  << " original client ip: " << req->request_ip() << " 
table_id=" << req->table_id()
+        LOG(INFO) << "begin " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                  << " original_client_ip=" << req->request_ip() << " 
table_id=" << req->table_id()
                   << " lock_id=" << req->lock_id() << " initiator=" << 
req->initiator()
                   << " expiration=" << req->expiration()
                   << " require_compaction_stats=" << 
req->require_compaction_stats();
     } else {
-        LOG(INFO) << "begin " << func_name << " remote caller: " << 
ctrl->remote_side()
-                  << " original client ip: " << req->request_ip()
+        LOG(INFO) << "begin " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                  << " original_client_ip=" << req->request_ip()
                   << " request=" << req->ShortDebugString();
     }
 }
 
-template <class Response>
-void finish_rpc(std::string_view func_name, brpc::Controller* ctrl, Response* 
res) {
+template <class Request, class Response>
+void finish_rpc(std::string_view func_name, brpc::Controller* ctrl, const 
Request* req,
+                Response* res) {
     if constexpr (std::is_same_v<Response, CommitTxnResponse>) {
-        if (res->status().code() != MetaServiceCode::OK) {
-            res->clear_table_ids();
-            res->clear_partition_ids();
-            res->clear_versions();
-        }
-        LOG(INFO) << "finish " << func_name << " remote caller: " << 
ctrl->remote_side()
+        LOG(INFO) << "finish " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                  << " original_client_ip=" << req->request_ip()
                   << " response=" << res->ShortDebugString();
     } else if constexpr (std::is_same_v<Response, GetRowsetResponse>) {
-        if (res->status().code() != MetaServiceCode::OK) {
-            res->clear_rowset_meta();
-        }
-        VLOG_DEBUG << "finish " << func_name << " remote caller: " << 
ctrl->remote_side()
-                   << " status=" << res->status().ShortDebugString();
+        LOG_IF(INFO, res->status().code() != MetaServiceCode::OK)
+                << "finish " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                << " original_client_ip=" << req->request_ip()
+                << " request=" << req->ShortDebugString()
+                << " status=" << res->status().ShortDebugString();
     } else if constexpr (std::is_same_v<Response, GetTabletStatsResponse>) {
-        VLOG_DEBUG << "finish " << func_name << " remote caller: " << 
ctrl->remote_side()
-                   << " status=" << res->status().ShortDebugString()
-                   << " tablet size: " << res->tablet_stats().size();
+        LOG_IF(INFO, res->status().code() != MetaServiceCode::OK)
+                << "finish " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                << " original_client_ip=" << req->request_ip()
+                << " request=" << req->ShortDebugString()
+                << " status=" << res->status().ShortDebugString()
+                << " num_tablets: " << res->tablet_stats().size();
     } else if constexpr (std::is_same_v<Response, GetVersionResponse> ||
                          std::is_same_v<Response, GetTabletResponse>) {
-        VLOG_DEBUG << "finish " << func_name << " remote caller: " << 
ctrl->remote_side()
-                   << " response=" << res->ShortDebugString();
+        LOG_IF(INFO, res->status().code() != MetaServiceCode::OK)
+                << "finish " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                << " request=" << req->ShortDebugString()
+                << " original_client_ip=" << req->request_ip()
+                << " response=" << res->ShortDebugString();
     } else if constexpr (std::is_same_v<Response, GetDeleteBitmapResponse>) {
-        if (res->status().code() != MetaServiceCode::OK) {
-            res->clear_rowset_ids();
-            res->clear_segment_ids();
-            res->clear_versions();
-            res->clear_segment_delete_bitmaps();
-        }
-        LOG(INFO) << "finish " << func_name << " remote caller: " << 
ctrl->remote_side()
+        LOG(INFO) << "finish " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                  << " original_client_ip=" << req->request_ip()
                   << " status=" << res->status().ShortDebugString()
                   << " tablet=" << res->tablet_id()
                   << " delete_bitmap_count=" << 
res->segment_delete_bitmaps_size();
     } else if constexpr (std::is_same_v<Response, 
GetDeleteBitmapUpdateLockResponse>) {
-        if (res->status().code() != MetaServiceCode::OK) {
-            res->clear_base_compaction_cnts();
-            res->clear_cumulative_compaction_cnts();
-            res->clear_cumulative_points();
-        }
-        LOG(INFO) << "finish " << func_name << " remote caller: " << 
ctrl->remote_side()
+        LOG(INFO) << "finish " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                  << " original_client_ip=" << req->request_ip()
                   << " status=" << res->status().ShortDebugString();
     } else if constexpr (std::is_same_v<Response, GetObjStoreInfoResponse> ||
                          std::is_same_v<Response, GetStageResponse>) {
         std::string debug_string = encryt_sk(res->DebugString());
         TEST_SYNC_POINT_CALLBACK("sk_finish_rpc", &debug_string);
-        LOG(INFO) << "finish " << func_name << " remote caller: " << 
ctrl->remote_side()
-                  << " response=" << debug_string;
+        LOG(INFO) << "finish " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                  << " original_client_ip=" << req->request_ip() << " 
response=" << debug_string;
     } else {
-        LOG(INFO) << "finish " << func_name << " remote caller: " << 
ctrl->remote_side()
+        LOG(INFO) << "finish " << func_name << " remote_caller=" << 
ctrl->remote_side()
+                  << " original_client_ip=" << req->request_ip()
                   << " response=" << res->ShortDebugString();
     }
 }
 
-enum ErrCategory { CREATE, READ, COMMIT };
+enum class ErrCategory { CREATE, READ, COMMIT };
 
 template <ErrCategory category>
 inline MetaServiceCode cast_as(TxnErrorCode code) {
@@ -262,7 +259,7 @@ inline MetaServiceCode cast_as(TxnErrorCode code) {
 #define RPC_PREPROCESS(func_name, ...)                                         
               \
     StopWatch sw;                                                              
               \
     auto ctrl = static_cast<brpc::Controller*>(controller);                    
               \
-    begin_rpc(#func_name, ctrl, request);                                      
               \
+    begin_rpc(#func_name, ctrl, request, response);                            
               \
     brpc::ClosureGuard closure_guard(done);                                    
               \
     [[maybe_unused]] std::stringstream ss;                                     
               \
     [[maybe_unused]] MetaServiceCode code = MetaServiceCode::OK;               
               \
@@ -274,7 +271,7 @@ inline MetaServiceCode cast_as(TxnErrorCode code) {
     DORIS_CLOUD_DEFER {                                                        
               \
         response->mutable_status()->set_code(code);                            
               \
         response->mutable_status()->set_msg(msg);                              
               \
-        finish_rpc(#func_name, ctrl, response);                                
               \
+        finish_rpc(#func_name, ctrl, request, response);                       
               \
         closure_guard.reset(nullptr);                                          
               \
         if (txn != nullptr) {                                                  
               \
             stats.get_bytes += txn->get_bytes();                               
               \
diff --git a/cloud/test/meta_service_test.cpp b/cloud/test/meta_service_test.cpp
index d30e0f05ae6..4f6bce457ca 100644
--- a/cloud/test/meta_service_test.cpp
+++ b/cloud/test/meta_service_test.cpp
@@ -5068,10 +5068,10 @@ TEST(MetaServiceTest, 
GetDeleteBitmapUpdateLockTabletStatsLockExpired) {
         GetDeleteBitmapUpdateLockResponse res;
         get_delete_bitmap_update_lock(meta_service.get(), res, db_id, 
table_id, index_id,
                                       tablet_idxes, 5, 999999, -1, true);
-        ASSERT_EQ(res.status().code(), MetaServiceCode::LOCK_EXPIRED);
-        ASSERT_EQ(res.base_compaction_cnts().size(), 0);
-        ASSERT_EQ(res.cumulative_compaction_cnts().size(), 0);
-        ASSERT_EQ(res.cumulative_points().size(), 0);
+        EXPECT_EQ(res.status().code(), MetaServiceCode::LOCK_EXPIRED);
+        EXPECT_EQ(res.base_compaction_cnts().size(), 3);
+        EXPECT_EQ(res.cumulative_compaction_cnts().size(), 3);
+        EXPECT_EQ(res.cumulative_points().size(), 3);
     }
 
     for (bool val : enable_batch_get_mow_tablet_stats_and_meta_vals) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to