This is an automated email from the ASF dual-hosted git repository.
achennaka pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 1214a38e5 [log] use THROTTLE_MSG for KLOG_EVERY_N_SECS when necessary
1214a38e5 is described below
commit 1214a38e55231ff3aad342ef9efbc326f7a6e659
Author: Alexey Serbin <[email protected]>
AuthorDate: Mon Dec 18 17:55:08 2023 -0800
[log] use THROTTLE_MSG for KLOG_EVERY_N_SECS when necessary
As a follow-up to e8f459284 and 8bc0be58e, this patch updates call sites
of KLOG_EVERY_N_SECS w.r.t. using THROTTLE_MSG to report on the number
of suppressed messages. The rationale behind this update is to provide
extra information that is useful for troubleshooting, according to my
findings while troubleshooting a few recent issues.
In addition, I added one micro-test for KLOG_EVERY_N_SECS when used
without THROTTLE_MSG and removed useless logging in another test: it
seems the logging was added there just for debugging while implementing
the test. I also took the liberty of updating corresponding messages
and the logging frequency to be more appropriate.
Otherwise, this patch doesn't contain any functional modifications.
Change-Id: I1ef2952b86b15f15cd8cd806619e5113ad1c758f
Reviewed-on: http://gerrit.cloudera.org:8080/20855
Tested-by: Kudu Jenkins
Reviewed-by: Abhishek Chennaka <[email protected]>
---
src/kudu/master/master_service.cc | 6 ++++--
src/kudu/master/ts_manager.cc | 5 +++--
src/kudu/rpc/proxy.cc | 2 +-
src/kudu/rpc/server_negotiation.cc | 19 +++++++++++++------
src/kudu/security/token-test.cc | 8 +-------
src/kudu/security/token_verifier.cc | 8 ++++++--
src/kudu/tablet/rowset_info.cc | 4 ++--
src/kudu/tablet/tablet_replica.cc | 11 +++++++----
src/kudu/thrift/sasl_client_transport.cc | 5 +++--
src/kudu/util/logging-test.cc | 24 ++++++++++++++++++++++++
src/kudu/util/maintenance_manager.cc | 6 +++---
11 files changed, 67 insertions(+), 31 deletions(-)
diff --git a/src/kudu/master/master_service.cc
b/src/kudu/master/master_service.cc
index b4bb01c99..54d5c21f7 100644
--- a/src/kudu/master/master_service.cc
+++ b/src/kudu/master/master_service.cc
@@ -881,8 +881,10 @@ void MasterServiceImpl::ConnectToMaster(const
ConnectToMasterRequestPB* /*req*/,
resp->mutable_error()->mutable_status());
resp->mutable_error()->set_code(MasterErrorPB::UNKNOWN_ERROR);
rpc->RespondSuccess();
- KLOG_EVERY_N_SECS(WARNING, 60) << Substitute("invalid request from $0:
$1",
- rpc->requestor_string(),
kErrMsg);
+ KLOG_EVERY_N_SECS(WARNING, 60)
+ << Substitute("invalid request from $0: $1",
+ rpc->requestor_string(), kErrMsg)
+ << THROTTLE_MSG;
return;
}
diff --git a/src/kudu/master/ts_manager.cc b/src/kudu/master/ts_manager.cc
index 13a81d289..aede186cb 100644
--- a/src/kudu/master/ts_manager.cc
+++ b/src/kudu/master/ts_manager.cc
@@ -25,6 +25,7 @@
#include <mutex>
#include <optional>
#include <ostream>
+#include <type_traits>
#include <utility>
#include <gflags/gflags.h>
@@ -155,7 +156,7 @@ Status TSManager::RegisterTS(const NodeInstancePB& instance,
? uuid : registration.rpc_addresses(0).host();
TRACE(Substitute("tablet server $0: assigning location", uuid));
string location_str;
- const auto s = location_cache_->GetLocation(cmd_arg, &location_str);
+ auto s = location_cache_->GetLocation(cmd_arg, &location_str);
TRACE(Substitute(
"tablet server $0: assigned location '$1'", uuid, location_str));
@@ -166,7 +167,7 @@ Status TSManager::RegisterTS(const NodeInstancePB& instance,
KLOG_EVERY_N_SECS(ERROR, 60) << Substitute(
"Unable to assign location to tablet server $0: $1",
Substitute("$0 ($1:$2)", uuid, addr.host(), addr.port()),
- s.ToString());
+ s.ToString()) << THROTTLE_MSG;
return s;
}
location.emplace(std::move(location_str));
diff --git a/src/kudu/rpc/proxy.cc b/src/kudu/rpc/proxy.cc
index b6b355960..2ec1244f0 100644
--- a/src/kudu/rpc/proxy.cc
+++ b/src/kudu/rpc/proxy.cc
@@ -238,7 +238,7 @@ void Proxy::AsyncRequest(const string& method,
if (PREDICT_FALSE(!controller->status().ok())) {
KLOG_EVERY_N_SECS(WARNING, 5)
<< Substitute("Call had error, refreshing address and retrying: $0",
- controller->status().ToString());
+ controller->status().ToString()) << THROTTLE_MSG;
auto req_payload = controller->ReleaseRequestPayload();
controller->Reset();
RefreshDnsAndEnqueueRequest(method, std::move(req_payload), response,
controller, callback);
diff --git a/src/kudu/rpc/server_negotiation.cc
b/src/kudu/rpc/server_negotiation.cc
index 2f4481ee4..188cd0b35 100644
--- a/src/kudu/rpc/server_negotiation.cc
+++ b/src/kudu/rpc/server_negotiation.cc
@@ -74,6 +74,7 @@ using std::set;
using std::string;
using std::unique_ptr;
using std::vector;
+using strings::Substitute;
// Fault injection flags.
DEFINE_double(rpc_inject_invalid_authn_token_ratio, 0,
@@ -504,12 +505,18 @@ Status ServerNegotiation::HandleNegotiate(const
NegotiatePB& request) {
authn_types.insert(AuthenticationType::JWT);
}
break;
- case AuthenticationTypePB::TYPE_NOT_SET: {
+ case AuthenticationTypePB::TYPE_NOT_SET:
+ default: {
Sockaddr addr;
- RETURN_NOT_OK(socket_->GetPeerAddress(&addr));
+ const auto s = socket_->GetPeerAddress(&addr);
+ WARN_NOT_OK(s, "unable to get peer address");
+ constexpr const char* const kFormat =
+ "client at $0 supports unknown authentication type $1, consider
updating server";
KLOG_EVERY_N_SECS(WARNING, 60)
- << "client supports unknown authentication type, consider
updating server, address: "
- << addr.ToString();
+ << Substitute(kFormat,
+ s.ok() ? addr.ToString() : "<unknown address>",
+ static_cast<uint32_t>(type.type_case()))
+ << THROTTLE_MSG;
break;
}
}
@@ -699,8 +706,8 @@ Status ServerNegotiation::AuthenticateBySasl(faststring*
recv_buf) {
// locally for the purposes of group mapping, ACLs, etc.
string local_name;
RETURN_NOT_OK_PREPEND(security::MapPrincipalToLocalName(principal,
&local_name),
- strings::Substitute("could not map krb5 principal
'$0' to username",
- principal));
+ Substitute("could not map krb5 principal '$0' to
username",
+ principal));
authenticated_user_.SetAuthenticatedByKerberos(std::move(local_name),
std::move(principal));
} else {
authenticated_user_.SetUnauthenticated(c_username);
diff --git a/src/kudu/security/token-test.cc b/src/kudu/security/token-test.cc
index 158cb91f5..34a6c1d87 100644
--- a/src/kudu/security/token-test.cc
+++ b/src/kudu/security/token-test.cc
@@ -21,6 +21,7 @@
#include <ostream>
#include <string>
#include <thread>
+#include <type_traits>
#include <utility>
#include <vector>
@@ -35,7 +36,6 @@
#include "kudu/security/token_signing_key.h"
#include "kudu/security/token_verifier.h"
#include "kudu/util/countdown_latch.h"
-#include "kudu/util/logging.h"
#include "kudu/util/monotime.h"
#include "kudu/util/openssl_util.h"
#include "kudu/util/pb_util.h"
@@ -80,8 +80,6 @@ Status SignUntilRotatePast(TokenSigner* signer,
TokenGenerator generate_token,
auto cur_seq_num = seq_num;
while (cur_seq_num == seq_num) {
SleepFor(MonoDelta::FromMilliseconds(50));
- KLOG_EVERY_N_SECS(INFO, 1) <<
- Substitute("Generating $0 token for activity interval $1", token_type,
seq_num);
RETURN_NOT_OK_PREPEND(signer->TryRotateKey(), "Failed to attempt to rotate
key");
SignedTokenPB signed_token;
RETURN_NOT_OK_PREPEND(generate_token(&signed_token),
@@ -816,18 +814,14 @@ TEST_F(TokenTest, TestKeyValidity) {
const double key_validity_seconds = signer.key_validity_seconds_;
threads.emplace_back([&first_tsk_validity_latch, key_validity_seconds] {
SleepFor(MonoDelta::FromSeconds(key_validity_seconds));
- LOG(INFO) << Substitute("First TSK's validity interval of $0 secs has
finished!",
- key_validity_seconds);
first_tsk_validity_latch.CountDown();
});
// Set up a second TSK so our threads can rotate TSKs when the time comes.
while (true) {
- KLOG_EVERY_N_SECS(INFO, 1) << "Waiting for a second key...";
unique_ptr<TokenSigningPrivateKey> tsk;
ASSERT_OK(signer.CheckNeedKey(&tsk));
if (tsk) {
- LOG(INFO) << "Added second key!";
ASSERT_OK(signer.AddKey(std::move(tsk)));
break;
}
diff --git a/src/kudu/security/token_verifier.cc
b/src/kudu/security/token_verifier.cc
index 373c27258..155ecfa1c 100644
--- a/src/kudu/security/token_verifier.cc
+++ b/src/kudu/security/token_verifier.cc
@@ -28,6 +28,7 @@
#include <glog/logging.h>
#include "kudu/gutil/map-util.h"
+#include "kudu/gutil/strings/substitute.h"
#include "kudu/gutil/walltime.h"
#include "kudu/security/token.pb.h"
#include "kudu/security/token_signing_key.h"
@@ -40,6 +41,7 @@ using std::string;
using std::transform;
using std::unique_ptr;
using std::vector;
+using strings::Substitute;
namespace kudu {
namespace security {
@@ -123,8 +125,10 @@ TokenVerificationResult
TokenVerifier::VerifyTokenSignature(
for (auto flag : token->incompatible_features()) {
if (!TokenPB::Feature_IsValid(flag)) {
- KLOG_EVERY_N_SECS(WARNING, 60) << "received token with unknown feature; "
- "server needs to be updated";
+ constexpr const char* const kFormat =
+ "received token with unknown feature $0; consider updating server";
+ KLOG_EVERY_N_SECS(WARNING, 60) << Substitute(
+ kFormat, static_cast<uint32_t>(flag)) << THROTTLE_MSG;
return TokenVerificationResult::INCOMPATIBLE_FEATURE;
}
}
diff --git a/src/kudu/tablet/rowset_info.cc b/src/kudu/tablet/rowset_info.cc
index 3abbfc208..770295401 100644
--- a/src/kudu/tablet/rowset_info.cc
+++ b/src/kudu/tablet/rowset_info.cc
@@ -298,9 +298,9 @@ void RowSetInfo::ComputeCdfAndCollectOrdered(
if (rs->IsAvailableForCompaction()) {
if (is_on_memory_budget && !(*is_on_memory_budget)(rs.get())) {
// Skip rowsets filtered out by the memory budgeting.
- KLOG_EVERY_N_SECS(INFO, 600) << Substitute(
+ KLOG_EVERY_N_SECS(INFO, 60) << Substitute(
"$0 removed from compaction input due to memory constraints",
- rs->ToString());
+ rs->ToString()) << THROTTLE_MSG;
continue;
}
available_rowsets.push_back(rs);
diff --git a/src/kudu/tablet/tablet_replica.cc
b/src/kudu/tablet/tablet_replica.cc
index 0536580c0..ee2d99343 100644
--- a/src/kudu/tablet/tablet_replica.cc
+++ b/src/kudu/tablet/tablet_replica.cc
@@ -1184,8 +1184,10 @@ Status TabletReplica::TxnOpDispatcher::Dispatch(
const auto txn_id = op->request()->txn_id();
std::lock_guard<simple_spinlock> guard(lock_);
if (PREDICT_FALSE(unregistered_)) {
- KLOG_EVERY_N_SECS(WARNING, 10) << Substitute(
- "received request for unregistered TxnOpDispatcher (txn ID $0)",
txn_id);
+ KLOG_EVERY_N_SECS(WARNING, 10)
+ << Substitute("received request for unregistered TxnOpDispatcher (txn
ID $0)",
+ txn_id)
+ << THROTTLE_MSG;
// TODO(aserbin): Status::ServiceUnavailable() is more appropriate here?
return Status::IllegalState(
"tablet replica could not accept txn write operation");
@@ -1293,8 +1295,9 @@ Status TabletReplica::TxnOpDispatcher::Submit() {
void TabletReplica::TxnOpDispatcher::Cancel(const Status& status,
TabletServerErrorPB::Code code) {
CHECK(!status.ok());
- KLOG_EVERY_N_SECS(WARNING, 1) << Substitute("$0: cancelling pending write
operations",
- status.ToString());
+ KLOG_EVERY_N_SECS(WARNING, 1)
+ << Substitute("$0: cancelling pending write operations",
+ status.ToString()) << THROTTLE_MSG;
decltype(ops_queue_) ops;
{
std::lock_guard<simple_spinlock> guard(lock_);
diff --git a/src/kudu/thrift/sasl_client_transport.cc
b/src/kudu/thrift/sasl_client_transport.cc
index a1766c38b..e0d5b4052 100644
--- a/src/kudu/thrift/sasl_client_transport.cc
+++ b/src/kudu/thrift/sasl_client_transport.cc
@@ -24,9 +24,9 @@
#include <memory>
#include <ostream>
#include <string>
+#include <type_traits>
#include <glog/logging.h>
-#include <thrift/transport/TTransport.h>
#include "kudu/gutil/endian.h"
#include "kudu/gutil/port.h"
@@ -159,7 +159,8 @@ void SaslClientTransport::ReadFrame() {
if (payload_len > 1024 * 1024) {
KLOG_EVERY_N_SECS(WARNING, 60) << "Received large Thrift SASL frame: "
- <<
HumanReadableNumBytes::ToString(payload_len);
+ <<
HumanReadableNumBytes::ToString(payload_len)
+ << THROTTLE_MSG;
if (payload_len > max_recv_buf_size_) {
throw TTransportException(Substitute("Thrift SASL frame is too long:
$0/$1",
HumanReadableNumBytes::ToString(payload_len),
diff --git a/src/kudu/util/logging-test.cc b/src/kudu/util/logging-test.cc
index 6855a6d70..78994faf8 100644
--- a/src/kudu/util/logging-test.cc
+++ b/src/kudu/util/logging-test.cc
@@ -64,6 +64,30 @@ TEST(LoggingTest, TestThrottledLogging) {
EXPECT_THAT(msgs[1], testing::ContainsRegex("\\[suppressed [0-9]{3,} similar
messages\\]"));
}
+TEST(LoggingTest, ThrottledLoggingNoThrottleMsg) {
+ SKIP_IF_SLOW_NOT_ALLOWED();
+ StringVectorSink sink;
+ ScopedRegisterSink srs(&sink);
+
+ for (int i = 0; i < 10000; i++) {
+ KLOG_EVERY_N_SECS(INFO, 1) << "test";
+ SleepFor(MonoDelta::FromMilliseconds(1));
+ if (sink.logged_msgs().size() >= 2) {
+ break;
+ }
+ }
+ const vector<string>& msgs = sink.logged_msgs();
+ ASSERT_GE(msgs.size(), 2);
+
+ for (const auto& m: msgs) {
+ // All the lines should contain the message logged.
+ ASSERT_THAT(m, testing::ContainsRegex("test$"));
+ // Since the special THROTTLE_MSG isn't used, there isn't any report on
+ // suppressed messages.
+ ASSERT_STR_NOT_CONTAINS(m, "suppressed");
+ }
+}
+
// Test the KLOG_EVERY_N_SECS(...) macro with slow-paced messages, making sure
// no messages are lost or suppressed if they come staggered by more than
// the suppression time interval.
diff --git a/src/kudu/util/maintenance_manager.cc
b/src/kudu/util/maintenance_manager.cc
index 898fb0e3f..5145cd929 100644
--- a/src/kudu/util/maintenance_manager.cc
+++ b/src/kudu/util/maintenance_manager.cc
@@ -335,15 +335,15 @@ void MaintenanceManager::RunSchedulerThread() {
bool prev_iter_found_no_work = false;
while (true) {
- if (!FLAGS_enable_maintenance_manager) {
+ if (PREDICT_FALSE(!FLAGS_enable_maintenance_manager)) {
{
- std::unique_lock<Mutex> guard(lock_);
+ std::lock_guard<Mutex> guard(lock_);
if (shutdown_) {
VLOG_AND_TRACE_WITH_PREFIX("maintenance", 1) << "Shutting down
maintenance manager.";
return;
}
}
- KLOG_EVERY_N_SECS(INFO, 1200)
+ KLOG_EVERY_N_SECS(INFO, 300)
<< "Maintenance manager is disabled (check
--enable_maintenance_manager).";
SleepFor(polling_interval_);
continue;