This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new b7809ebe355 branch-3.1: [chore](cloud) Optimize RPC log #53341 (#54212)
b7809ebe355 is described below
commit b7809ebe355e0bd5a4a01d9d3a3bc47d765a83e7
Author: Gavin Chou <[email protected]>
AuthorDate: Mon Aug 4 10:00:42 2025 +0800
branch-3.1: [chore](cloud) Optimize RPC log #53341 (#54212)
pick #53341
---
be/src/cloud/cloud_meta_mgr.cpp | 38 ++++-----
cloud/src/meta-service/meta_service.cpp | 20 +++--
cloud/src/meta-service/meta_service_helper.h | 113 +++++++++++++--------------
cloud/test/meta_service_test.cpp | 8 +-
4 files changed, 92 insertions(+), 87 deletions(-)
diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp
index f478a833a07..e0440991727 100644
--- a/be/src/cloud/cloud_meta_mgr.cpp
+++ b/be/src/cloud/cloud_meta_mgr.cpp
@@ -493,6 +493,9 @@ Status
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
MetaServiceProxy* proxy;
RETURN_IF_ERROR(MetaServiceProxy::get_proxy(&proxy));
+ std::string tablet_info =
+ fmt::format("tablet_id={} table_id={} index_id={}
partition_id={}", tablet->tablet_id(),
+ tablet->table_id(), tablet->index_id(),
tablet->partition_id());
int tried = 0;
while (true) {
std::shared_ptr<MetaService_Stub> stub;
@@ -542,12 +545,8 @@ Status
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
std::uniform_int_distribution<uint32_t> u1(500, 1000);
uint32_t duration_ms = tried >= 100 ? u(rng) : u1(rng);
bthread_usleep(duration_ms * 1000);
- LOG_INFO("failed to get rowset meta")
+ LOG_INFO("failed to get rowset meta, " + tablet_info)
.tag("reason", cntl.ErrorText())
- .tag("tablet_id", tablet_id)
- .tag("table_id", table_id)
- .tag("index_id", index_id)
- .tag("partition_id", tablet->partition_id())
.tag("tried", tried)
.tag("sleep", duration_ms);
continue;
@@ -555,18 +554,26 @@ Status
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
return Status::RpcError("failed to get rowset meta: {}",
cntl.ErrorText());
}
if (resp.status().code() == MetaServiceCode::TABLET_NOT_FOUND) {
- return Status::NotFound("failed to get rowset meta: {}",
resp.status().msg());
+ LOG(WARNING) << "failed to get rowset meta, err=" <<
resp.status().msg() << " "
+ << tablet_info;
+ return Status::NotFound("failed to get rowset meta: {}, {}",
resp.status().msg(),
+ tablet_info);
}
if (resp.status().code() != MetaServiceCode::OK) {
- return Status::InternalError("failed to get rowset meta: {}",
resp.status().msg());
+ LOG(WARNING) << " failed to get rowset meta, err=" <<
resp.status().msg() << " "
+ << tablet_info;
+ return Status::InternalError("failed to get rowset meta: {}, {}",
resp.status().msg(),
+ tablet_info);
}
if (latency > 100 * 1000) { // 100ms
LOG(INFO) << "finish get_rowset rpc. rowset_meta.size()=" <<
resp.rowset_meta().size()
- << ", latency=" << latency << "us";
+ << ", latency=" << latency << "us"
+ << " " << tablet_info;
} else {
LOG_EVERY_N(INFO, 100)
<< "finish get_rowset rpc. rowset_meta.size()=" <<
resp.rowset_meta().size()
- << ", latency=" << latency << "us";
+ << ", latency=" << latency << "us"
+ << " " << tablet_info;
}
int64_t now =
duration_cast<seconds>(system_clock::now().time_since_epoch()).count();
@@ -590,16 +597,13 @@ Status
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
resp.stats(), req.idx(),
&delete_bitmap, full_sync,
sync_stats);
if (st.is<ErrorCode::ROWSETS_EXPIRED>() && tried++ < retry_times) {
- LOG_INFO("rowset meta is expired, need to retry")
- .tag("tablet", tablet->tablet_id())
+ LOG_INFO("rowset meta is expired, need to retry, " +
tablet_info)
.tag("tried", tried)
.error(st);
continue;
}
if (!st.ok()) {
- LOG_WARNING("failed to get delete bitmap")
- .tag("tablet", tablet->tablet_id())
- .error(st);
+ LOG_WARNING("failed to get delete bitmap, " +
tablet_info).error(st);
return st;
}
tablet->tablet_meta()->delete_bitmap()->merge(delete_bitmap);
@@ -657,9 +661,7 @@ Status
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
}
}
- LOG_INFO("[verbose] sync tablet delete bitmap")
- .tag("tablet_id", tablet->tablet_id())
- .tag("table_id", tablet->table_id())
+ LOG_INFO("[verbose] sync tablet delete bitmap " + tablet_info)
.tag("full_sync", full_sync)
.tag("old_max_version", old_max_version)
.tag("new_max_version", new_max_version)
@@ -707,7 +709,7 @@ Status
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
stats.cumulative_compaction_cnt() <
tablet->cumulative_compaction_cnt())
[[unlikely]] {
// stale request, ignore
- LOG_WARNING("stale get rowset meta request")
+ LOG_WARNING("stale get rowset meta request " + tablet_info)
.tag("resp_base_compaction_cnt",
stats.base_compaction_cnt())
.tag("base_compaction_cnt",
tablet->base_compaction_cnt())
.tag("resp_cumulative_compaction_cnt",
stats.cumulative_compaction_cnt())
diff --git a/cloud/src/meta-service/meta_service.cpp
b/cloud/src/meta-service/meta_service.cpp
index b1ec995d4bf..dec48f9a758 100644
--- a/cloud/src/meta-service/meta_service.cpp
+++ b/cloud/src/meta-service/meta_service.cpp
@@ -2169,7 +2169,8 @@ void
MetaServiceImpl::update_tmp_rowset(::google::protobuf::RpcController* contr
void internal_get_rowset(Transaction* txn, int64_t start, int64_t end,
const std::string& instance_id, int64_t tablet_id,
MetaServiceCode& code,
std::string& msg, GetRowsetResponse* response) {
- LOG(INFO) << "get_rowset start=" << start << ", end=" << end;
+ LOG(INFO) << "get_rowset start=" << start << ", end="
+ << " tablet_id=" << tablet_id;
MetaRowsetKeyInfo key_info0 {instance_id, tablet_id, start};
MetaRowsetKeyInfo key_info1 {instance_id, tablet_id, end + 1};
std::string key0;
@@ -2181,7 +2182,8 @@ void internal_get_rowset(Transaction* txn, int64_t start,
int64_t end,
int num_rowsets = 0;
DORIS_CLOUD_DEFER_COPY(key0, key1) {
LOG(INFO) << "get rowset meta, num_rowsets=" << num_rowsets << "
range=[" << hex(key0)
- << "," << hex(key1) << "]";
+ << "," << hex(key1) << "]"
+ << " tablet_id=" << tablet_id;
};
std::stringstream ss;
@@ -2189,7 +2191,7 @@ void internal_get_rowset(Transaction* txn, int64_t start,
int64_t end,
TxnErrorCode err = txn->get(key0, key1, &it);
if (err != TxnErrorCode::TXN_OK) {
code = cast_as<ErrCategory::READ>(err);
- ss << "internal error, failed to get rowset, err=" << err;
+ ss << "internal error, failed to get rowset, err=" << err << "
tablet_id=" << tablet_id;
msg = ss.str();
LOG(WARNING) << msg;
return;
@@ -2203,14 +2205,16 @@ void internal_get_rowset(Transaction* txn, int64_t
start, int64_t end,
if (byte_size + v.size() > std::numeric_limits<int32_t>::max()) {
code = MetaServiceCode::PROTOBUF_PARSE_ERR;
msg = fmt::format(
- "rowset meta exceeded 2G, unable to serialize, key={}.
byte_size={}",
- hex(k), byte_size);
+ "rowset meta exceeded 2G, unable to serialize, key={}.
byte_size={} "
+ "tablet_id={}",
+ hex(k), byte_size, tablet_id);
LOG(WARNING) << msg;
return;
}
if (!rs->ParseFromArray(v.data(), v.size())) {
code = MetaServiceCode::PROTOBUF_PARSE_ERR;
- msg = "malformed rowset meta, unable to serialize";
+ msg = "malformed rowset meta, unable to serialize, tablet_id="
+
+ std::to_string(tablet_id);
LOG(WARNING) << msg << " key=" << hex(k);
return;
}
@@ -2339,7 +2343,9 @@ void
MetaServiceImpl::get_rowset(::google::protobuf::RpcController* controller,
if (!request->has_base_compaction_cnt() ||
!request->has_cumulative_compaction_cnt() ||
!request->has_cumulative_point()) {
code = MetaServiceCode::INVALID_ARGUMENT;
- msg = "no valid compaction_cnt or cumulative_point given";
+ msg = "no valid compaction_cnt or cumulative_point given, tablet_id=" +
+ std::to_string(tablet_id);
+ LOG(WARNING) << msg;
return;
}
int64_t req_bc_cnt = request->base_compaction_cnt();
diff --git a/cloud/src/meta-service/meta_service_helper.h
b/cloud/src/meta-service/meta_service_helper.h
index 321b0ceb3a6..c35873ed949 100644
--- a/cloud/src/meta-service/meta_service_helper.h
+++ b/cloud/src/meta-service/meta_service_helper.h
@@ -94,107 +94,104 @@ inline std::string encryt_sk(std::string debug_string) {
return debug_string;
}
-template <class Request>
-void begin_rpc(std::string_view func_name, brpc::Controller* ctrl, const
Request* req) {
+template <class Request, class Response>
+void begin_rpc(std::string_view func_name, brpc::Controller* ctrl, const
Request* req,
+ Response* res) {
+ res->Clear(); // clear response in case of this is call is a local retry
in MS
if constexpr (std::is_same_v<Request, CreateRowsetRequest>) {
- LOG(INFO) << "begin " << func_name << " remote caller: " <<
ctrl->remote_side()
- << " original client ip: " << req->request_ip();
+ LOG(INFO) << "begin " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip();
} else if constexpr (std::is_same_v<Request, CreateTabletsRequest>) {
- LOG(INFO) << "begin " << func_name << " remote caller: " <<
ctrl->remote_side()
- << " original client ip: " << req->request_ip();
+ LOG(INFO) << "begin " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip();
} else if constexpr (std::is_same_v<Request, UpdateDeleteBitmapRequest>) {
- LOG(INFO) << "begin " << func_name << " remote caller: " <<
ctrl->remote_side()
- << " original client ip: " << req->request_ip() << "
table_id=" << req->table_id()
+ LOG(INFO) << "begin " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip() << "
table_id=" << req->table_id()
<< " tablet_id=" << req->tablet_id() << " lock_id=" <<
req->lock_id()
<< " initiator=" << req->initiator()
<< " delete_bitmap_size=" <<
req->segment_delete_bitmaps_size();
} else if constexpr (std::is_same_v<Request, GetDeleteBitmapRequest>) {
- LOG(INFO) << "begin " << func_name << " remote caller: " <<
ctrl->remote_side()
- << " original client ip: " << req->request_ip()
+ LOG(INFO) << "begin " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip()
<< " tablet_id=" << req->tablet_id() << " rowset_size=" <<
req->rowset_ids_size();
} else if constexpr (std::is_same_v<Request, GetTabletStatsRequest>) {
- VLOG_DEBUG << "begin " << func_name << " remote caller: " <<
ctrl->remote_side()
- << " original client ip: " << req->request_ip()
- << " tablet size: " << req->tablet_idx().size();
+ VLOG_DEBUG << "begin " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip()
+ << " num_tablets: " << req->tablet_idx().size();
} else if constexpr (std::is_same_v<Request, GetVersionRequest> ||
std::is_same_v<Request, GetRowsetRequest> ||
std::is_same_v<Request, GetTabletRequest>) {
- VLOG_DEBUG << "begin " << func_name << " remote caller: " <<
ctrl->remote_side()
- << " original client ip: " << req->request_ip()
+ VLOG_DEBUG << "begin " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip()
<< " request=" << req->ShortDebugString();
} else if constexpr (std::is_same_v<Request, RemoveDeleteBitmapRequest>) {
- LOG(INFO) << "begin " << func_name << " remote caller: " <<
ctrl->remote_side()
- << " original client ip: " << req->request_ip()
+ LOG(INFO) << "begin " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip()
<< " tablet_id=" << req->tablet_id() << " rowset_size=" <<
req->rowset_ids_size();
} else if constexpr (std::is_same_v<Request,
GetDeleteBitmapUpdateLockRequest>) {
- LOG(INFO) << "begin " << func_name << " remote caller: " <<
ctrl->remote_side()
- << " original client ip: " << req->request_ip() << "
table_id=" << req->table_id()
+ LOG(INFO) << "begin " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip() << "
table_id=" << req->table_id()
<< " lock_id=" << req->lock_id() << " initiator=" <<
req->initiator()
<< " expiration=" << req->expiration()
<< " require_compaction_stats=" <<
req->require_compaction_stats();
} else {
- LOG(INFO) << "begin " << func_name << " remote caller: " <<
ctrl->remote_side()
- << " original client ip: " << req->request_ip()
+ LOG(INFO) << "begin " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip()
<< " request=" << req->ShortDebugString();
}
}
-template <class Response>
-void finish_rpc(std::string_view func_name, brpc::Controller* ctrl, Response*
res) {
+template <class Request, class Response>
+void finish_rpc(std::string_view func_name, brpc::Controller* ctrl, const
Request* req,
+ Response* res) {
if constexpr (std::is_same_v<Response, CommitTxnResponse>) {
- if (res->status().code() != MetaServiceCode::OK) {
- res->clear_table_ids();
- res->clear_partition_ids();
- res->clear_versions();
- }
- LOG(INFO) << "finish " << func_name << " remote caller: " <<
ctrl->remote_side()
+ LOG(INFO) << "finish " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip()
<< " response=" << res->ShortDebugString();
} else if constexpr (std::is_same_v<Response, GetRowsetResponse>) {
- if (res->status().code() != MetaServiceCode::OK) {
- res->clear_rowset_meta();
- }
- VLOG_DEBUG << "finish " << func_name << " remote caller: " <<
ctrl->remote_side()
- << " status=" << res->status().ShortDebugString();
+ LOG_IF(INFO, res->status().code() != MetaServiceCode::OK)
+ << "finish " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip()
+ << " request=" << req->ShortDebugString()
+ << " status=" << res->status().ShortDebugString();
} else if constexpr (std::is_same_v<Response, GetTabletStatsResponse>) {
- VLOG_DEBUG << "finish " << func_name << " remote caller: " <<
ctrl->remote_side()
- << " status=" << res->status().ShortDebugString()
- << " tablet size: " << res->tablet_stats().size();
+ LOG_IF(INFO, res->status().code() != MetaServiceCode::OK)
+ << "finish " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip()
+ << " request=" << req->ShortDebugString()
+ << " status=" << res->status().ShortDebugString()
+ << " num_tablets: " << res->tablet_stats().size();
} else if constexpr (std::is_same_v<Response, GetVersionResponse> ||
std::is_same_v<Response, GetTabletResponse>) {
- VLOG_DEBUG << "finish " << func_name << " remote caller: " <<
ctrl->remote_side()
- << " response=" << res->ShortDebugString();
+ LOG_IF(INFO, res->status().code() != MetaServiceCode::OK)
+ << "finish " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " request=" << req->ShortDebugString()
+ << " original_client_ip=" << req->request_ip()
+ << " response=" << res->ShortDebugString();
} else if constexpr (std::is_same_v<Response, GetDeleteBitmapResponse>) {
- if (res->status().code() != MetaServiceCode::OK) {
- res->clear_rowset_ids();
- res->clear_segment_ids();
- res->clear_versions();
- res->clear_segment_delete_bitmaps();
- }
- LOG(INFO) << "finish " << func_name << " remote caller: " <<
ctrl->remote_side()
+ LOG(INFO) << "finish " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip()
<< " status=" << res->status().ShortDebugString()
<< " tablet=" << res->tablet_id()
<< " delete_bitmap_count=" <<
res->segment_delete_bitmaps_size();
} else if constexpr (std::is_same_v<Response,
GetDeleteBitmapUpdateLockResponse>) {
- if (res->status().code() != MetaServiceCode::OK) {
- res->clear_base_compaction_cnts();
- res->clear_cumulative_compaction_cnts();
- res->clear_cumulative_points();
- }
- LOG(INFO) << "finish " << func_name << " remote caller: " <<
ctrl->remote_side()
+ LOG(INFO) << "finish " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip()
<< " status=" << res->status().ShortDebugString();
} else if constexpr (std::is_same_v<Response, GetObjStoreInfoResponse> ||
std::is_same_v<Response, GetStageResponse>) {
std::string debug_string = encryt_sk(res->DebugString());
TEST_SYNC_POINT_CALLBACK("sk_finish_rpc", &debug_string);
- LOG(INFO) << "finish " << func_name << " remote caller: " <<
ctrl->remote_side()
- << " response=" << debug_string;
+ LOG(INFO) << "finish " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip() << "
response=" << debug_string;
} else {
- LOG(INFO) << "finish " << func_name << " remote caller: " <<
ctrl->remote_side()
+ LOG(INFO) << "finish " << func_name << " remote_caller=" <<
ctrl->remote_side()
+ << " original_client_ip=" << req->request_ip()
<< " response=" << res->ShortDebugString();
}
}
-enum ErrCategory { CREATE, READ, COMMIT };
+enum class ErrCategory { CREATE, READ, COMMIT };
template <ErrCategory category>
inline MetaServiceCode cast_as(TxnErrorCode code) {
@@ -262,7 +259,7 @@ inline MetaServiceCode cast_as(TxnErrorCode code) {
#define RPC_PREPROCESS(func_name, ...)
\
StopWatch sw;
\
auto ctrl = static_cast<brpc::Controller*>(controller);
\
- begin_rpc(#func_name, ctrl, request);
\
+ begin_rpc(#func_name, ctrl, request, response);
\
brpc::ClosureGuard closure_guard(done);
\
[[maybe_unused]] std::stringstream ss;
\
[[maybe_unused]] MetaServiceCode code = MetaServiceCode::OK;
\
@@ -274,7 +271,7 @@ inline MetaServiceCode cast_as(TxnErrorCode code) {
DORIS_CLOUD_DEFER {
\
response->mutable_status()->set_code(code);
\
response->mutable_status()->set_msg(msg);
\
- finish_rpc(#func_name, ctrl, response);
\
+ finish_rpc(#func_name, ctrl, request, response);
\
closure_guard.reset(nullptr);
\
if (txn != nullptr) {
\
stats.get_bytes += txn->get_bytes();
\
diff --git a/cloud/test/meta_service_test.cpp b/cloud/test/meta_service_test.cpp
index d30e0f05ae6..4f6bce457ca 100644
--- a/cloud/test/meta_service_test.cpp
+++ b/cloud/test/meta_service_test.cpp
@@ -5068,10 +5068,10 @@ TEST(MetaServiceTest,
GetDeleteBitmapUpdateLockTabletStatsLockExpired) {
GetDeleteBitmapUpdateLockResponse res;
get_delete_bitmap_update_lock(meta_service.get(), res, db_id,
table_id, index_id,
tablet_idxes, 5, 999999, -1, true);
- ASSERT_EQ(res.status().code(), MetaServiceCode::LOCK_EXPIRED);
- ASSERT_EQ(res.base_compaction_cnts().size(), 0);
- ASSERT_EQ(res.cumulative_compaction_cnts().size(), 0);
- ASSERT_EQ(res.cumulative_points().size(), 0);
+ EXPECT_EQ(res.status().code(), MetaServiceCode::LOCK_EXPIRED);
+ EXPECT_EQ(res.base_compaction_cnts().size(), 3);
+ EXPECT_EQ(res.cumulative_compaction_cnts().size(), 3);
+ EXPECT_EQ(res.cumulative_points().size(), 3);
}
for (bool val : enable_batch_get_mow_tablet_stats_and_meta_vals) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]