This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 44d687c [client] add logging on slow metacache operations
44d687c is described below
commit 44d687caf3633d75b0cc64f1a25f444484ce43ad
Author: Alexey Serbin <[email protected]>
AuthorDate: Tue Mar 23 14:08:46 2021 -0700
[client] add logging on slow metacache operations
This patch adds 50ms threshold slow logging for metacache operations:
* ProcessGetTableLocationsResponse
* ProcessGetTabletLocationsResponse
* LookupEntryByIdFastPath
* LookupEntryByKeyFastPath
Change-Id: I5eb0593794f9a971f3daea7d346f70b0dc32bfa7
Reviewed-on: http://gerrit.cloudera.org:8080/17221
Tested-by: Kudu Jenkins
Reviewed-by: Grant Henke <[email protected]>
---
src/kudu/client/meta_cache.cc | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/src/kudu/client/meta_cache.cc b/src/kudu/client/meta_cache.cc
index 7bb692b..131795e 100644
--- a/src/kudu/client/meta_cache.cc
+++ b/src/kudu/client/meta_cache.cc
@@ -56,6 +56,7 @@
#include "kudu/util/net/sockaddr.h"
#include "kudu/util/pb_util.h"
#include "kudu/util/scoped_cleanup.h"
+#include "kudu/util/stopwatch.h"
using kudu::consensus::RaftPeerPB;
using kudu::master::ANY_REPLICA;
@@ -284,8 +285,9 @@ void RemoteTablet::MarkReplicaFailed(RemoteTabletServer *ts,
std::lock_guard<simple_spinlock> l(lock_);
VLOG(2) << "Tablet " << tablet_id_ << ": Current remote replicas in meta
cache: "
<< ReplicasAsStringUnlocked();
- KLOG_EVERY_N_SECS(WARNING, 1) << "Tablet " << tablet_id_ << ": Replica " <<
ts->ToString()
- << " has failed: " << status.ToString();
+ KLOG_EVERY_N_SECS(WARNING, 1) << Substitute(
+ "tablet $0: replica $1 has failed: $2",
+ tablet_id_, ts->ToString(),
status.ToString());
for (RemoteReplica& rep : replicas_) {
if (rep.ts == ts) {
rep.failed = true;
@@ -965,6 +967,7 @@ Status MetaCache::ProcessLookupResponse(const LookupRpc&
rpc,
Status MetaCache::ProcessGetTabletLocationsResponse(const string& tablet_id,
const
GetTabletLocationsResponsePB& resp,
MetaCacheEntry*
cache_entry) {
+ SCOPED_LOG_SLOW_EXECUTION(WARNING, 50, "processing tablet locations
response");
MonoTime expiration_time = MonoTime::Now() +
MonoDelta::FromMilliseconds(FLAGS_client_tablet_locations_by_id_ttl_ms);
const auto& tablet_locations = resp.tablet_locations();
@@ -1020,6 +1023,7 @@ Status MetaCache::ProcessGetTableLocationsResponse(const
KuduTable* table,
const
GetTableLocationsResponsePB& resp,
MetaCacheEntry* cache_entry,
int max_returned_locations)
{
+ SCOPED_LOG_SLOW_EXECUTION(WARNING, 50, "processing table locations
response");
MonoTime expiration_time = MonoTime::Now() +
MonoDelta::FromMilliseconds(resp.ttl_millis());
@@ -1177,6 +1181,7 @@ Status MetaCache::ProcessGetTableLocationsResponse(const
KuduTable* table,
bool MetaCache::LookupEntryByKeyFastPath(const KuduTable* table,
const string& partition_key,
MetaCacheEntry* entry) {
+ SCOPED_LOG_SLOW_EXECUTION(WARNING, 50, "slow lookup of entry by key");
shared_lock<rw_spinlock> l(lock_.get_lock());
const TabletMap* tablets = FindOrNull(tablets_by_table_and_key_,
table->id());
if (PREDICT_FALSE(!tablets)) {
@@ -1229,6 +1234,7 @@ Status MetaCache::DoFastPathLookup(const KuduTable* table,
bool MetaCache::LookupEntryByIdFastPath(const string& tablet_id,
MetaCacheEntry* entry) {
+ SCOPED_LOG_SLOW_EXECUTION(WARNING, 50, "slow lookup of entry by ID");
shared_lock<rw_spinlock> l(lock_.get_lock());
const auto* cache_entry = FindOrNull(entry_by_tablet_id_, tablet_id);
if (PREDICT_FALSE(!cache_entry)) {
@@ -1328,11 +1334,12 @@ void MetaCache::LookupTabletById(KuduClient* client,
void MetaCache::MarkTSFailed(RemoteTabletServer* ts,
const Status& status) {
- LOG(INFO) << "Marking tablet server " << ts->ToString() << " as failed.";
- Status ts_status = status.CloneAndPrepend("TS failed");
+ LOG(INFO) << Substitute("marking tablet server $0 as failed",
ts->ToString());
+ SCOPED_LOG_SLOW_EXECUTION(WARNING, 50, "marking tablet server as failed");
+ const auto ts_status = status.CloneAndPrepend("TS failed");
shared_lock<rw_spinlock> l(lock_.get_lock());
- // TODO: replace with a ts->tablet multimap for faster lookup?
+ // TODO(adar): replace with a ts->tablet multimap for faster lookup?
for (const auto& tablet : tablets_by_id_) {
// We just loop on all tablets; if a tablet does not have a replica on this
// TS, MarkReplicaFailed() returns false and we ignore the return value.