This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit a9afbce14454f9b03b9cc5a699a09f4f9935da58 Author: Alexey Serbin <[email protected]> AuthorDate: Tue Feb 25 18:48:05 2020 -0800 [ksck] report on misconfiguration for flag categories This patch adds functionality to report on misconfiguration of flags in pre-defined flag categories. The 'misconfiguration' means different values for same flags in each group of masters/tablet servers in a cluster (i.e. intra-group differences), and also differences between masters' and tablet servers' flags (i.e. cross-group differences). With this patch, two categories are defined and ready for use: * 'time_source' includes the following flags ** --builtin_ntp_servers ** --time_source * 'unusual' includes all experimental, hidden, and unsafe flags The newly introduced functionality is enabled by default for the 'time_source' flags category. The list of group categories to check can be customized by adding --flags_categories_to_check=<comma-separated-list-of-flags-categories> to the 'kudu cluster ksck' invocation. Use an empty string to specify an empty list of flags categories to check. For testing, I ran a small Kudu cluster and verified that ksck reports on misconfiguration as expected: * if tablet servers are run with different flags * if masters are run with flag settings different from those used for tablet servers This patch also contains assorted re-factoring in related areas of code. I'm planning to add automated tests to cover the newly introduced functionality in a separate patch. Change-Id: I2afbfac9327f85b212ecf8d8f43a2139f90db6bb Reviewed-on: http://gerrit.cloudera.org:8080/15298 Tested-by: Alexey Serbin <[email protected]> Reviewed-by: Adar Dembo <[email protected]> --- src/kudu/rebalance/cluster_status.cc | 6 +- src/kudu/rebalance/cluster_status.h | 6 +- src/kudu/tools/ksck-test.cc | 47 +++-- src/kudu/tools/ksck.cc | 348 ++++++++++++++++++++++++++++------ src/kudu/tools/ksck.h | 144 ++++++++++---- src/kudu/tools/ksck_remote.cc | 71 ++++--- src/kudu/tools/ksck_remote.h | 4 +- src/kudu/tools/ksck_results.cc | 100 ++++++++-- src/kudu/tools/ksck_results.h | 29 ++- src/kudu/tools/tool_action_cluster.cc | 1 + 10 files changed, 582 insertions(+), 174 deletions(-) diff --git a/src/kudu/rebalance/cluster_status.cc b/src/kudu/rebalance/cluster_status.cc index 685b059..38b4da6 100644 --- a/src/kudu/rebalance/cluster_status.cc +++ b/src/kudu/rebalance/cluster_status.cc @@ -25,7 +25,7 @@ namespace kudu { namespace cluster_summary { -const char* const HealthCheckResultToString(HealthCheckResult cr) { +const char* HealthCheckResultToString(HealthCheckResult cr) { switch (cr) { case HealthCheckResult::HEALTHY: return "HEALTHY"; @@ -43,7 +43,7 @@ const char* const HealthCheckResultToString(HealthCheckResult cr) { } // Return a string representation of 'sh'. -const char* const ServerHealthToString(ServerHealth sh) { +const char* ServerHealthToString(ServerHealth sh) { switch (sh) { case ServerHealth::HEALTHY: return "HEALTHY"; @@ -59,7 +59,7 @@ const char* const ServerHealthToString(ServerHealth sh) { } // Return a string representation of 'type'. -const char* const ServerTypeToString(ServerType type) { +const char* ServerTypeToString(ServerType type) { switch (type) { case ServerType::MASTER: return "Master"; diff --git a/src/kudu/rebalance/cluster_status.h b/src/kudu/rebalance/cluster_status.h index 8d6ea8c..cc5dcbf 100644 --- a/src/kudu/rebalance/cluster_status.h +++ b/src/kudu/rebalance/cluster_status.h @@ -56,7 +56,7 @@ enum class HealthCheckResult { CONSENSUS_MISMATCH, }; -const char* const HealthCheckResultToString(HealthCheckResult cr); +const char* HealthCheckResultToString(HealthCheckResult cr); // Possible types of consensus configs. enum class ConsensusConfigType { @@ -127,7 +127,7 @@ enum class ServerHealth { }; // Return a string representation of 'sh'. -const char* const ServerHealthToString(ServerHealth sh); +const char* ServerHealthToString(ServerHealth sh); // Quiescing-related info. struct QuiescingInfo { @@ -193,7 +193,7 @@ enum class ServerType { }; // Return a string representation of 'type'. -const char* const ServerTypeToString(ServerType type); +const char* ServerTypeToString(ServerType type); // A summary of the state of a tablet replica. struct ReplicaSummary { diff --git a/src/kudu/tools/ksck-test.cc b/src/kudu/tools/ksck-test.cc index 13b669d..45cfa3f 100644 --- a/src/kudu/tools/ksck-test.cc +++ b/src/kudu/tools/ksck-test.cc @@ -19,6 +19,7 @@ #include <algorithm> #include <atomic> +#include <cstddef> #include <cstdint> #include <initializer_list> #include <map> @@ -92,7 +93,9 @@ class MockKsckMaster : public KsckMaster { uuid_ = uuid; version_ = "mock-version"; if (is_get_flags_available_) { - unusual_flags_.emplace(); + for (size_t cat = FlagsCategory::MIN; cat < FlagsCategory::ARRAY_SIZE; ++cat) { + flags_by_category_[cat].flags.emplace(); + } } } @@ -113,13 +116,16 @@ class MockKsckMaster : public KsckMaster { return fetch_cstate_status_; } - Status FetchUnusualFlags() override { - if (is_get_flags_available_) { - unusual_flags_state_ = KsckFetchState::FETCHED; - return Status::OK(); + Status FetchFlags(const std::vector<FlagsCategory>& categories) override { + for (const auto cat : categories) { + if (is_get_flags_available_) { + flags_by_category_[cat].state = KsckFetchState::FETCHED; + } else { + flags_by_category_[cat].state = KsckFetchState::FETCH_FAILED; + } } - unusual_flags_state_ = KsckFetchState::FETCH_FAILED; - return Status::RemoteError("GetFlags not available"); + return is_get_flags_available_ + ? Status::OK() : Status::RemoteError("GetFlags not available"); } // Public because the unit tests mutate these variables directly. @@ -127,7 +133,7 @@ class MockKsckMaster : public KsckMaster { Status fetch_cstate_status_; using KsckMaster::uuid_; using KsckMaster::cstate_; - using KsckMaster::unusual_flags_; + using KsckMaster::flags_by_category_; using KsckMaster::version_; private: const bool is_get_flags_available_; @@ -143,7 +149,9 @@ class MockKsckTabletServer : public KsckTabletServer { is_get_flags_available_(is_get_flags_available) { version_ = "mock-version"; if (is_get_flags_available_) { - unusual_flags_.emplace(); + for (size_t cat = FlagsCategory::MIN; cat < FlagsCategory::ARRAY_SIZE; ++cat) { + flags_by_category_[cat].flags.emplace(); + } } } @@ -163,13 +171,16 @@ class MockKsckTabletServer : public KsckTabletServer { return Status::OK(); } - Status FetchUnusualFlags() override { - if (is_get_flags_available_) { - unusual_flags_state_ = KsckFetchState::FETCHED; - return Status::OK(); + Status FetchFlags(const std::vector<FlagsCategory>& categories) override { + for (const auto cat : categories) { + if (is_get_flags_available_) { + flags_by_category_[cat].state = KsckFetchState::FETCHED; + } else { + flags_by_category_[cat].state = KsckFetchState::FETCH_FAILED; + } } - unusual_flags_state_ = KsckFetchState::FETCH_FAILED; - return Status::RemoteError("GetFlags not available"); + return is_get_flags_available_ + ? Status::OK() : Status::RemoteError("GetFlags not available"); } void FetchCurrentTimestampAsync() override {} @@ -203,7 +214,7 @@ class MockKsckTabletServer : public KsckTabletServer { // The fake progress amount for this mock server, used to mock checksum // progress for this server. int64_t checksum_progress_ = 10; - using KsckTabletServer::unusual_flags_; + using KsckTabletServer::flags_by_category_; using KsckTabletServer::location_; using KsckTabletServer::version_; @@ -1100,7 +1111,7 @@ TEST_F(KsckTest, TestMasterFlagCheck) { } shared_ptr<MockKsckMaster> master = std::static_pointer_cast<MockKsckMaster>(cluster_->masters_.at(i)); - master->unusual_flags_ = std::move(flags); + master->flags_by_category_[FlagsCategory::UNUSUAL].flags = std::move(flags); } ASSERT_OK(ksck_->CheckMasterHealth()); ASSERT_OK(ksck_->CheckMasterUnusualFlags()); @@ -1233,7 +1244,7 @@ TEST_F(KsckTest, TestTserverFlagCheck) { } shared_ptr<MockKsckTabletServer> ts = std::static_pointer_cast<MockKsckTabletServer>(entry.second); - ts->unusual_flags_ = std::move(flags); + ts->flags_by_category_[FlagsCategory::UNUSUAL].flags = std::move(flags); i++; } ASSERT_OK(ksck_->FetchInfoFromTabletServers()); diff --git a/src/kudu/tools/ksck.cc b/src/kudu/tools/ksck.cc index 0582de9..310007d 100644 --- a/src/kudu/tools/ksck.cc +++ b/src/kudu/tools/ksck.cc @@ -38,6 +38,7 @@ #include "kudu/gutil/map-util.h" #include "kudu/gutil/port.h" #include "kudu/gutil/strings/join.h" +#include "kudu/gutil/strings/split.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/tablet/tablet.pb.h" #include "kudu/tools/color.h" @@ -53,10 +54,19 @@ } \ } while (0) +#define STR_FLAGS_CATEGORY_TIME_SOURCE "time_source" +#define STR_FLAGS_CATEGORY_UNUSUAL "unusual" + DEFINE_bool(checksum_scan, false, "Perform a checksum scan on data in the cluster."); DEFINE_int32(fetch_info_concurrency, 20, "Number of threads to fetch info concurrently."); +DEFINE_string(flags_categories_to_check, STR_FLAGS_CATEGORY_TIME_SOURCE, + "Comma-separated list of flag categories to check for divergence " + "across the cluster; default is " + STR_FLAGS_CATEGORY_TIME_SOURCE "; available categories are " + STR_FLAGS_CATEGORY_TIME_SOURCE "," + STR_FLAGS_CATEGORY_UNUSUAL "."); DEFINE_string(ksck_format, "plain_concise", "Output format for ksck. Available options are 'plain_concise', " @@ -79,11 +89,14 @@ using kudu::cluster_summary::ServerHealth; using kudu::cluster_summary::ServerHealthSummary; using kudu::cluster_summary::TableSummary; using kudu::cluster_summary::TabletSummary; +using kudu::server::GetFlagsResponsePB; using std::atomic; using std::cout; using std::ostream; using std::ostringstream; +using std::pair; +using std::set; using std::shared_ptr; using std::string; using std::vector; @@ -92,6 +105,23 @@ using strings::Substitute; namespace kudu { namespace tools { +DEFINE_validator(flags_categories_to_check, + [](const char* /* flag_name */, const string& value) { + vector<string> categories; + SplitStringUsing(value, ",", &categories); + for (const auto& cat : categories) { + if (cat.empty() || StringToFlagsCategory(cat, nullptr).ok()) { + continue; + } + LOG(ERROR) << Substitute("unknown flag category: '$0' " + "(expecting comma-separated list built out of " + STR_FLAGS_CATEGORY_TIME_SOURCE ", " + STR_FLAGS_CATEGORY_UNUSUAL ")", cat); + return false; + } + return true; +}); + namespace { void BuildConsensusStateForConfigMember(const consensus::ConsensusStatePB& cstate, ConsensusState* ksck_cstate) { @@ -159,6 +189,70 @@ std::ostream& operator<<(std::ostream& lhs, KsckFetchState state) { return lhs; } +const FlagsFetchFilter& GetFlagsCategoryFilter(FlagsCategory category) { + // NOTE: using double braces for std::array aggregate initialization. + static const std::array<FlagsFetchFilter, FlagsCategory::ARRAY_SIZE> kFilters { { + { + // FlagsCategory::TIME_SOURCE + { "time_source", "builtin_ntp_servers", }, + {} + }, + { + // FlagsCategory::UNUSUAL + {}, + { "experimental", "hidden", "unsafe" } + }, + } }; + DCHECK_GE(category, FlagsCategory::MIN); + DCHECK_LE(category, FlagsCategory::MAX); + return kFilters[category]; +} + +const char* FlagsCategoryToString(FlagsCategory category) { + static constexpr const char* const kCategoryTimeSource = + STR_FLAGS_CATEGORY_TIME_SOURCE; + static constexpr const char* const kCategoryUnusual = + STR_FLAGS_CATEGORY_UNUSUAL; + switch (category) { + case FlagsCategory::TIME_SOURCE: + return kCategoryTimeSource; + case FlagsCategory::UNUSUAL: + return kCategoryUnusual; + } + return "unknown"; +} + +Status StringToFlagsCategory(const string& str, FlagsCategory* category) { + if (boost::iequals(str, STR_FLAGS_CATEGORY_TIME_SOURCE)) { + if (category) { + *category = FlagsCategory::TIME_SOURCE; + } + return Status::OK(); + } + if (boost::iequals(str, STR_FLAGS_CATEGORY_UNUSUAL)) { + if (category) { + *category = FlagsCategory::UNUSUAL; + } + return Status::OK(); + } + return Status::InvalidArgument(Substitute("$0: unknown flags category", str)); +} + +Status StringToFlagsCategories(const std::string& str, + vector<FlagsCategory>* categories) { + DCHECK(categories); + vector<string> categories_str(strings::Split(str, ",", strings::SkipEmpty())); + for (const auto& str : categories_str) { + FlagsCategory cat; + RETURN_NOT_OK(StringToFlagsCategory(str, &cat)); + categories->push_back(cat); + } + std::sort(categories->begin(), categories->end()); + categories->erase(std::unique(categories->begin(), categories->end()), + categories->end()); + return Status::OK(); +} + Ksck::Ksck(shared_ptr<KsckCluster> cluster, ostream* out) : cluster_(std::move(cluster)), out_(out == nullptr ? &std::cout : out) { @@ -180,35 +274,38 @@ Status Ksck::CheckMasterHealth() { vector<ServerHealthSummary> master_summaries; simple_spinlock master_summaries_lock; + vector<FlagsCategory> flags_categories_to_fetch = { FlagsCategory::UNUSUAL }; + RETURN_NOT_OK(StringToFlagsCategories(FLAGS_flags_categories_to_check, + &flags_categories_to_fetch)); for (const auto& master : cluster_->masters()) { RETURN_NOT_OK(pool_->SubmitFunc([&]() { - ServerHealthSummary sh; - Status s = master->FetchInfo().AndThen([&]() { - return master->FetchConsensusState(); - }); - sh.uuid = master->uuid(); - sh.address = master->address(); - sh.version = master->version(); - sh.status = s; - if (!s.ok()) { - if (IsNotAuthorizedMethodAccess(s)) { - sh.health = ServerHealth::UNAUTHORIZED; - ++unauthorized_masters; - } else { - sh.health = ServerHealth::UNAVAILABLE; - } - ++bad_masters; + ServerHealthSummary sh; + Status s = master->FetchInfo().AndThen([&]() { + return master->FetchConsensusState(); + }); + sh.uuid = master->uuid(); + sh.address = master->address(); + sh.version = master->version(); + sh.status = s; + if (!s.ok()) { + if (IsNotAuthorizedMethodAccess(s)) { + sh.health = ServerHealth::UNAUTHORIZED; + ++unauthorized_masters; + } else { + sh.health = ServerHealth::UNAVAILABLE; } + ++bad_masters; + } - { - std::lock_guard<simple_spinlock> lock(master_summaries_lock); - master_summaries.emplace_back(std::move(sh)); - } + { + std::lock_guard<simple_spinlock> lock(master_summaries_lock); + master_summaries.emplace_back(std::move(sh)); + } - // Fetch the flags information. - // Flag retrieval is not supported by older versions; failure is tracked in - // CheckMasterUnusualFlags(). - ignore_result(master->FetchUnusualFlags()); + // Fetch the flags information in every requested category. + // Flag retrieval is not supported by older versions; failure is tracked + // in CheckTabletServer{Unusual,Diverged}Flags(). + ignore_result(master->FetchFlags(flags_categories_to_fetch)); })); } pool_->Wait(); @@ -273,44 +370,83 @@ Status Ksck::CheckMasterConsensus() { return Status::OK(); } -void Ksck::AddFlagsToFlagMaps(const server::GetFlagsResponsePB& flags, +void Ksck::AddFlagsToFlagMaps(const GetFlagsResponsePB& flags, const string& server_address, KsckFlagToServersMap* flags_to_servers_map, KsckFlagTagsMap* flag_tags_map) { - CHECK(flags_to_servers_map); - CHECK(flag_tags_map); + DCHECK(flags_to_servers_map); for (const auto& f : flags.flags()) { - const std::pair<string, string> key(f.name(), f.value()); + const pair<string, string> key(f.name(), f.value()); if (!InsertIfNotPresent(flags_to_servers_map, key, { server_address })) { FindOrDieNoPrint(*flags_to_servers_map, key).push_back(server_address); } - InsertIfNotPresent(flag_tags_map, f.name(), JoinStrings(f.tags(), ",")); + if (flag_tags_map != nullptr) { + InsertIfNotPresent(flag_tags_map, f.name(), JoinStrings(f.tags(), ",")); + } } } Status Ksck::CheckMasterUnusualFlags() { - int bad_masters = 0; - Status last_error = Status::OK(); + size_t bad_servers = 0; for (const auto& master : cluster_->masters()) { - if (!master->unusual_flags()) { - bad_masters++; + const auto& unusual_flags = master->flags(FlagsCategory::UNUSUAL); + if (!unusual_flags) { + ++bad_servers; continue; } - AddFlagsToFlagMaps(*master->unusual_flags(), + AddFlagsToFlagMaps(*unusual_flags, master->address(), - &results_.master_flag_to_servers_map, - &results_.master_flag_tags_map); + &results_.master_unusual_flag_to_servers_map, + &results_.master_unusual_flag_tags_map); } - if (!results_.master_flag_to_servers_map.empty()) { + if (!results_.master_unusual_flag_to_servers_map.empty()) { results_.warning_messages.push_back(Status::ConfigurationError( "Some masters have unsafe, experimental, or hidden flags set")); } - if (bad_masters > 0) { - return Status::Incomplete( - Substitute("$0 of $1 masters' flags were not available", - bad_masters, cluster_->masters().size())); + if (bad_servers > 0) { + return Status::Incomplete(Substitute( + "$0 of $1 masters were not available to retrieve unusual flags", + bad_servers, cluster_->masters().size())); + } + return Status::OK(); +} + +Status Ksck::CheckMasterDivergedFlags() { + vector<FlagsCategory> flags_categories; + RETURN_NOT_OK(StringToFlagsCategories(FLAGS_flags_categories_to_check, + &flags_categories)); + for (const auto cat : flags_categories) { + KsckFlagToServersMap servers_by_flag; + size_t bad_servers = 0; + for (const auto& master : cluster_->masters()) { + const auto& flags = master->flags(cat); + if (!flags) { + ++bad_servers; + continue; + } + AddFlagsToFlagMaps(*flags, master->address(), &servers_by_flag); + AddFlagsToFlagMaps(*flags, + master->address(), + &results_.master_checked_flag_to_servers_map); + } + + for (const auto& e : servers_by_flag) { + if (e.second.size() + bad_servers == cluster_->masters().size()) { + continue; + } + results_.warning_messages.push_back(Status::ConfigurationError( + Substitute("Different masters have different settings for same " + "flags of checked category '$0'", + FlagsCategoryToString(cat)))); + break; + } + if (bad_servers > 0) { + return Status::Incomplete(Substitute( + "$0 of $1 masters were not available to retrieve $2 category flags", + bad_servers, cluster_->masters().size(), FlagsCategoryToString(cat))); + } } return Status::OK(); } @@ -339,6 +475,9 @@ Status Ksck::FetchInfoFromTabletServers() { vector<ServerHealthSummary> tablet_server_summaries; simple_spinlock tablet_server_summaries_lock; + vector<FlagsCategory> flags_categories_to_fetch = { FlagsCategory::UNUSUAL }; + RETURN_NOT_OK(StringToFlagsCategories(FLAGS_flags_categories_to_check, + &flags_categories_to_fetch)); for (const auto& entry : cluster_->tablet_servers()) { const auto& ts = entry.second; RETURN_NOT_OK(pool_->SubmitFunc([&]() { @@ -371,10 +510,10 @@ Status Ksck::FetchInfoFromTabletServers() { tablet_server_summaries.push_back(std::move(summary)); } - // Fetch the flags information. - // Flag retrieval is not supported by older versions; failure is tracked in - // CheckTabletServerUnusualFlags(). - ignore_result(ts->FetchUnusualFlags()); + // Fetch the flags information in every requested category. + // Flag retrieval is not supported by older versions; failure is tracked + // in CheckTabletServer{Unusual,Diverged}Flags(). + ignore_result(ts->FetchFlags(flags_categories_to_fetch)); })); } pool_->Wait(); @@ -439,7 +578,9 @@ Status Ksck::Run() { PUSH_PREPEND_NOT_OK(CheckMasterConsensus(), results_.error_messages, "master consensus error"); PUSH_PREPEND_NOT_OK(CheckMasterUnusualFlags(), results_.warning_messages, - "master flag check error"); + "master unusual flags check error"); + PUSH_PREPEND_NOT_OK(CheckMasterDivergedFlags(), results_.warning_messages, + "master diverged flags check error"); // CheckClusterRunning and FetchTableAndTabletInfo must succeed for // subsequent checks to be runnable. @@ -460,10 +601,15 @@ Status Ksck::Run() { PUSH_PREPEND_NOT_OK(FetchInfoFromTabletServers(), results_.error_messages, "error fetching info from tablet servers"); PUSH_PREPEND_NOT_OK(CheckTabletServerUnusualFlags(), results_.warning_messages, - "tserver flag check error"); + "tserver unusual flags check error"); + PUSH_PREPEND_NOT_OK(CheckTabletServerDivergedFlags(), results_.warning_messages, + "tserver diverged flags check error"); PUSH_PREPEND_NOT_OK(CheckServerVersions(), results_.warning_messages, "version check error"); + PUSH_PREPEND_NOT_OK(CheckDivergedFlags(), results_.warning_messages, + "diverged flags (both masters and tservers) check error"); + PUSH_PREPEND_NOT_OK(CheckTablesConsistency(), results_.error_messages, "table consistency check error"); @@ -492,28 +638,112 @@ Status Ksck::Run() { } Status Ksck::CheckTabletServerUnusualFlags() { - int bad_tservers = 0; + int bad_servers = 0; for (const auto& uuid_and_ts : cluster_->tablet_servers()) { const auto& tserver = uuid_and_ts.second; - if (!tserver->unusual_flags()) { - bad_tservers++; + const auto& unusual_flags = tserver->flags(FlagsCategory::UNUSUAL); + if (!unusual_flags) { + ++bad_servers; continue; } - AddFlagsToFlagMaps(*tserver->unusual_flags(), + AddFlagsToFlagMaps(*unusual_flags, tserver->address(), - &results_.tserver_flag_to_servers_map, - &results_.tserver_flag_tags_map); + &results_.tserver_unusual_flag_to_servers_map, + &results_.tserver_unusual_flag_tags_map); } - if (!results_.tserver_flag_to_servers_map.empty()) { + if (!results_.tserver_unusual_flag_to_servers_map.empty()) { results_.warning_messages.push_back(Status::ConfigurationError( "Some tablet servers have unsafe, experimental, or hidden flags set")); } - if (bad_tservers > 0) { - return Status::Incomplete( - Substitute("$0 of $1 tservers' flags were not available", - bad_tservers, cluster_->tablet_servers().size())); + if (bad_servers > 0) { + return Status::Incomplete(Substitute( + "$0 of $1 tservers were not available to retrieve unusual flags", + bad_servers, cluster_->tablet_servers().size())); + } + return Status::OK(); +} + +Status Ksck::CheckTabletServerDivergedFlags() { + vector<FlagsCategory> flags_categories; + RETURN_NOT_OK(StringToFlagsCategories(FLAGS_flags_categories_to_check, + &flags_categories)); + for (const auto cat : flags_categories) { + KsckFlagToServersMap servers_by_flag; + size_t bad_servers = 0; + for (const auto& uuid_and_ts : cluster_->tablet_servers()) { + const auto& tserver = uuid_and_ts.second; + const auto& flags = tserver->flags(cat); + if (!flags) { + ++bad_servers; + continue; + } + AddFlagsToFlagMaps(*flags, tserver->address(), &servers_by_flag); + AddFlagsToFlagMaps(*flags, + tserver->address(), + &results_.tserver_checked_flag_to_servers_map); + } + for (const auto& e : servers_by_flag) { + if (e.second.size() + bad_servers == cluster_->tablet_servers().size()) { + continue; + } + results_.warning_messages.push_back(Status::ConfigurationError( + Substitute("Different tservers have different settings for same " + "flags of checked category '$0'", + FlagsCategoryToString(cat)))); + break; + } + if (bad_servers > 0) { + return Status::Incomplete(Substitute( + "$0 of $1 tservers were not available to retrieve $2 category flags", + bad_servers, + cluster_->tablet_servers().size(), + FlagsCategoryToString(cat))); + } + } + return Status::OK(); +} + +Status Ksck::CheckDivergedFlags() { + set<KsckFlag> masters_flags; + for (const auto& elem : results_.master_checked_flag_to_servers_map) { + InsertOrDieNoPrint(&masters_flags, elem.first); + } + set<KsckFlag> tservers_flags; + for (const auto& elem : results_.tserver_checked_flag_to_servers_map) { + InsertOrDieNoPrint(&tservers_flags, elem.first); + } + + vector<KsckFlag> symm_diff; + std::set_symmetric_difference(masters_flags.begin(), + masters_flags.end(), + tservers_flags.begin(), + tservers_flags.end(), + back_inserter(symm_diff)); + if (!symm_diff.empty()) { + for (const auto& f : symm_diff) { + { + const auto* e = FindOrNull(results_.master_checked_flag_to_servers_map, f); + if (e) { + InsertOrDieNoPrint(&results_.master_diverged_flag_to_servers_map, f, *e); + continue; + } + } + { + const auto* e = FindOrNull(results_.tserver_checked_flag_to_servers_map, f); + if (e) { + InsertOrDieNoPrint(&results_.tserver_diverged_flag_to_servers_map, f, *e); + continue; + } + } + // The flag/value pair must be either of masters' or tservers'. + LOG(DFATAL) << "found neither masters' or tservers' flag: " << f.first; + } + + results_.warning_messages.push_back(Status::ConfigurationError( + "Same flags have different values between masters and tservers " + "for at least one checked flag category")); } return Status::OK(); } @@ -686,7 +916,7 @@ HealthCheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, } // Organize consensus info for each replica. - std::pair<string, string> tablet_key = std::make_pair(ts->uuid(), tablet->id()); + pair<string, string> tablet_key = std::make_pair(ts->uuid(), tablet->id()); if (ContainsKey(ts->tablet_consensus_state_map(), tablet_key)) { const auto& cstate = FindOrDieNoPrint(ts->tablet_consensus_state_map(), tablet_key); ConsensusState ksck_cstate; diff --git a/src/kudu/tools/ksck.h b/src/kudu/tools/ksck.h index 2c3b63e..5dee873 100644 --- a/src/kudu/tools/ksck.h +++ b/src/kudu/tools/ksck.h @@ -18,6 +18,7 @@ // Ksck, a tool to run a Kudu System Check. #pragma once +#include <array> #include <atomic> #include <cstdint> #include <iosfwd> @@ -174,6 +175,57 @@ enum class KsckFetchState { // Required for logging in case of CHECK failures. std::ostream& operator<<(std::ostream& lhs, KsckFetchState state); +// Enum representing logical categories of flags used in a Kudu cluster. +enum FlagsCategory { + // Flags specific to the time source, clock, and alike. + TIME_SOURCE = 0, + + // Flags tagged hidden, experimental, or unsafe. + UNUSUAL = 1, + + // Utility members used for range-related expressions. + // NOTE: update the MAX member upon updates + MIN = 0, + MAX = UNUSUAL, + ARRAY_SIZE = MAX + 1, +}; + +// Information on flags fetched using GetFlags() RPC. Consists of the status of +// the GetFlags() operation and the result flags wrapped into boost::optional, +// where the latter is boost::none unless the RPC was successful. +struct FetchedFlags { + KsckFetchState state = KsckFetchState::UNINITIALIZED; + boost::optional<server::GetFlagsResponsePB> flags; +}; + +// Flags retrieved using GetFlags(), indexed by FlagsCategory. +typedef std::array<FetchedFlags, FlagsCategory::ARRAY_SIZE> FetchedFlagsByCategory; + +// Structure to represent a filter to build appropriate request for GetFlags() +// RPC. +struct FlagsFetchFilter { + const std::vector<std::string> flags; + const std::vector<std::string> tags; +}; + +// Get filter to build GetFlags() RPC for the specified flags category. +const FlagsFetchFilter& GetFlagsCategoryFilter(FlagsCategory category); + +// Get string representation for the specified flag category. +const char* FlagsCategoryToString(FlagsCategory category); + +// Convert string representation of flag category into corresponding enum field. +// On success, returns Status::OK() and outputs the result category into the +// 'category' output parameter. On failure returns Status::InvalidParameter(). +Status StringToFlagsCategory(const std::string& str, + FlagsCategory* category); + +// Convert comma-separated string into vector of flag categories. The result +// is appended to the 'categories' output parameter. In addition, this function +// removes duplicates in resulting 'categories'. +Status StringToFlagsCategories(const std::string& str, + std::vector<FlagsCategory>* categories); + // The following three classes must be extended in order to communicate with their respective // components. The two main use cases envisioned for this are: // - To be able to mock a cluster to more easily test the ksck checks. @@ -183,8 +235,8 @@ std::ostream& operator<<(std::ostream& lhs, KsckFetchState state); class KsckMaster { public: explicit KsckMaster(std::string address) : - address_(std::move(address)), - uuid_(strings::Substitute("$0 ($1)", kDummyUuid, address_)) {} + address_(std::move(address)), + uuid_(strings::Substitute("$0 ($1)", kDummyUuid, address_)) {} virtual ~KsckMaster() = default; @@ -196,9 +248,8 @@ class KsckMaster { // Connects to the master and populates the consensus map. virtual Status FetchConsensusState() = 0; - // Retrieves "unusual" flags from the KsckMaster. - // "Unusual" flags ares ones tagged hidden, experimental, or unsafe. - virtual Status FetchUnusualFlags() = 0; + // Fetch flags for the requested categories. + virtual Status FetchFlags(const std::vector<FlagsCategory>& categories) = 0; // Since masters are provided by address, FetchInfo() must be called before // calling this method. @@ -221,9 +272,12 @@ class KsckMaster { return cstate_; } - virtual const boost::optional<server::GetFlagsResponsePB>& unusual_flags() const { - CHECK_NE(KsckFetchState::UNINITIALIZED, unusual_flags_state_); - return unusual_flags_; + virtual const boost::optional<server::GetFlagsResponsePB>& flags( + FlagsCategory category) const { + CHECK_GE(category, FlagsCategory::MIN); + CHECK_LE(category, FlagsCategory::MAX); + CHECK_NE(KsckFetchState::UNINITIALIZED, flags_by_category_[category].state); + return flags_by_category_[category].flags; } std::string ToString() const { @@ -252,15 +306,12 @@ class KsckMaster { // May be none if fetching info from the master fails. boost::optional<std::string> version_; + // Fetched flags indexed by category. + FetchedFlagsByCategory flags_by_category_; + // May be none if consensus state fetch fails. boost::optional<consensus::ConsensusStatePB> cstate_; - // unusual_flags_state_ reflects whether the fetch of the non-critical flags - // info has been done, and if it succeeded or failed. - KsckFetchState unusual_flags_state_ = KsckFetchState::UNINITIALIZED; - // May be none if flag fetch fails. - boost::optional<server::GetFlagsResponsePB> unusual_flags_; - private: DISALLOW_COPY_AND_ASSIGN(KsckMaster); }; @@ -272,14 +323,15 @@ class KsckTabletServer { typedef std::unordered_map<std::string, tablet::TabletStatusPB> TabletStatusMap; // Map from (tserver id, tablet id) to tablet consensus information. - typedef std::map - <std::pair<std::string, std::string>, consensus::ConsensusStatePB> TabletConsensusStateMap; + typedef std::map<std::pair<std::string, std::string>, + consensus::ConsensusStatePB> + TabletConsensusStateMap; explicit KsckTabletServer(std::string uuid, std::string location = "") : uuid_(std::move(uuid)), location_(std::move(location)) {} - virtual ~KsckTabletServer() { } + virtual ~KsckTabletServer() {} // Connects to the configured tablet server and populates the fields of this class. 'health' must // not be nullptr. @@ -296,9 +348,8 @@ class KsckTabletServer { // Otherwise 'health' will be UNAVAILABLE virtual Status FetchConsensusState(cluster_summary::ServerHealth* health) = 0; - // Retrieves "unusual" flags from the KsckTabletServer. - // "Unusual" flags ares ones tagged hidden, experimental, or unsafe. - virtual Status FetchUnusualFlags() = 0; + // Fetches flags for the requested categories. + virtual Status FetchFlags(const std::vector<FlagsCategory>& categories) = 0; // Fetches and updates the current timestamp from the tablet server. virtual void FetchCurrentTimestampAsync() = 0; @@ -354,9 +405,12 @@ class KsckTabletServer { return version_; } - virtual const boost::optional<server::GetFlagsResponsePB>& unusual_flags() const { - CHECK_NE(KsckFetchState::UNINITIALIZED, unusual_flags_state_); - return unusual_flags_; + virtual const boost::optional<server::GetFlagsResponsePB>& flags( + FlagsCategory category) const { + DCHECK_GE(category, FlagsCategory::MIN); + DCHECK_LE(category, FlagsCategory::MAX); + CHECK_NE(KsckFetchState::UNINITIALIZED, flags_by_category_[category].state); + return flags_by_category_[category].flags; } virtual const boost::optional<cluster_summary::QuiescingInfo>& quiescing_info() const { @@ -382,25 +436,21 @@ class KsckTabletServer { // it succeeded or failed. KsckFetchState state_ = KsckFetchState::UNINITIALIZED; - TabletStatusMap tablet_status_map_; - TabletConsensusStateMap tablet_consensus_state_map_; - // May be none if fetching info from the tablet server fails. boost::optional<std::string> version_; - // unusual_flags_state_ reflects whether the fetch of the non-critical flags - // info has been done, and if it succeeded or failed. - KsckFetchState unusual_flags_state_ = KsckFetchState::UNINITIALIZED; + // Fetched flags indexed by category. + FetchedFlagsByCategory flags_by_category_; - // May be none if flag fetch fails. - boost::optional<server::GetFlagsResponsePB> unusual_flags_; + TabletStatusMap tablet_status_map_; + TabletConsensusStateMap tablet_consensus_state_map_; // May be none if the quiescing request fails. boost::optional<cluster_summary::QuiescingInfo> quiescing_info_; - std::atomic<uint64_t> timestamp_; const std::string uuid_; std::string location_; + std::atomic<uint64_t> timestamp_; private: DISALLOW_COPY_AND_ASSIGN(KsckTabletServer); @@ -562,6 +612,10 @@ class Ksck { // Must first call CheckMasterHealth(). Status CheckMasterUnusualFlags(); + // Check for the consistency of flag values across all masters in the cluster. + // Must first call CheckMasterHealth(). + Status CheckMasterDivergedFlags(); + // Verifies that it can connect to the cluster, i.e. that it can contact a // leader master. Status CheckClusterRunning(); @@ -581,6 +635,17 @@ class Ksck { // Must first call FetchInfoFromTabletServers(). Status CheckTabletServerUnusualFlags(); + // Check for the consistency of flag values across all tablet servers + // in the cluster. + // Must first call FetchInfoFromTabletServers(). + Status CheckTabletServerDivergedFlags(); + + // Check for the consistency of flag values across all tablet servers and + // masters in the cluster. + // Must first call CheckMasterDivergedFlags() and + // CheckTabletServerDivergedFlags(). + Status CheckDivergedFlags(); + // Check for version inconsistencies among all servers. Status CheckServerVersions(); @@ -610,13 +675,14 @@ class Ksck { private: friend class KsckTest; - // Accumulate information about flags from a server into a FlagToServersMap and - // a FlagTagsMap. - // 'flags_to_server_map' and 'flag_tags_map' must not be null. - void AddFlagsToFlagMaps(const server::GetFlagsResponsePB& flags, - const std::string& server_address, - KsckFlagToServersMap* flags_to_servers_map, - KsckFlagTagsMap* flag_tags_map); + // Accumulate information about flags from a server into a FlagToServersMap + // and, optionally, flag's tags into a FlagTagsMap. 'flags_to_server_map' must + // not be null; 'flag_tags_map' may be null: in such case no information on + // flag's tags is accumulated. + static void AddFlagsToFlagMaps(const server::GetFlagsResponsePB& flags, + const std::string& server_address, + KsckFlagToServersMap* flags_to_servers_map, + KsckFlagTagsMap* flag_tags_map = nullptr); bool VerifyTable(const std::shared_ptr<KsckTable>& table); diff --git a/src/kudu/tools/ksck_remote.cc b/src/kudu/tools/ksck_remote.cc index 1afd91a..2804eca 100644 --- a/src/kudu/tools/ksck_remote.cc +++ b/src/kudu/tools/ksck_remote.cc @@ -20,7 +20,6 @@ #include <atomic> #include <cstdint> #include <functional> -#include <initializer_list> #include <map> #include <mutex> #include <ostream> @@ -95,6 +94,9 @@ using kudu::master::TServerStatePB; using kudu::rpc::Messenger; using kudu::rpc::MessengerBuilder; using kudu::rpc::RpcController; +using kudu::server::GenericServiceProxy; +using kudu::server::GetFlagsRequestPB; +using kudu::server::GetFlagsResponsePB; using std::shared_ptr; using std::string; using std::unique_ptr; @@ -111,17 +113,24 @@ MonoDelta GetDefaultTimeout() { return MonoDelta::FromMilliseconds(FLAGS_timeout_ms); } -// Common flag-fetching routine for masters and tablet servers. -Status FetchUnusualFlagsCommon(const shared_ptr<server::GenericServiceProxy>& proxy, - server::GetFlagsResponsePB* resp) { - server::GetFlagsRequestPB req; - RpcController rpc; - rpc.set_timeout(GetDefaultTimeout()); - for (const string& tag : { "experimental", "hidden", "unsafe" }) { +// Common flag-fetching routine. Fetches flags for the specified category, +// given service proxy object. +Status FetchCategoryFlags(FlagsCategory category, + const shared_ptr<GenericServiceProxy>& proxy, + GetFlagsResponsePB* resp) { + const auto& filter = GetFlagsCategoryFilter(category); + GetFlagsRequestPB req; + for (const auto& flag : filter.flags) { + req.add_flags(flag); + } + for (const auto& tag : filter.tags) { req.add_tags(tag); } - return proxy->GetFlags(req, resp, &rpc); + RpcController ctl; + ctl.set_timeout(GetDefaultTimeout()); + return proxy->GetFlags(req, resp, &ctl); } + } // anonymous namespace Status RemoteKsckMaster::Init() { @@ -174,16 +183,20 @@ Status RemoteKsckMaster::FetchConsensusState() { return Status::OK(); } -Status RemoteKsckMaster::FetchUnusualFlags() { - server::GetFlagsResponsePB resp; - Status s = FetchUnusualFlagsCommon(generic_proxy_, &resp); - if (!s.ok()) { - unusual_flags_state_ = KsckFetchState::FETCH_FAILED; - } else { - unusual_flags_state_ = KsckFetchState::FETCHED; - unusual_flags_ = std::move(resp); +Status RemoteKsckMaster::FetchFlags(const vector<FlagsCategory>& categories) { + Status result; + for (auto cat : categories) { + GetFlagsResponsePB resp; + const auto s = FetchCategoryFlags(cat, generic_proxy_, &resp); + if (!s.ok()) { + flags_by_category_[cat].state = KsckFetchState::FETCH_FAILED; + result = result.ok() ? s : result.CloneAndAppend(s.message()); + } else { + flags_by_category_[cat].state = KsckFetchState::FETCHED; + flags_by_category_[cat].flags = std::move(resp); + } } - return s; + return result; } Status RemoteKsckTabletServer::Init() { @@ -323,16 +336,20 @@ Status RemoteKsckTabletServer::FetchConsensusState(ServerHealth* health) { return Status::OK(); } -Status RemoteKsckTabletServer::FetchUnusualFlags() { - server::GetFlagsResponsePB resp; - Status s = FetchUnusualFlagsCommon(generic_proxy_, &resp); - if (!s.ok()) { - unusual_flags_state_ = KsckFetchState::FETCH_FAILED; - } else { - unusual_flags_state_ = KsckFetchState::FETCHED; - unusual_flags_ = std::move(resp); +Status RemoteKsckTabletServer::FetchFlags(const vector<FlagsCategory>& categories) { + Status result; + for (auto cat : categories) { + GetFlagsResponsePB resp; + const auto s = FetchCategoryFlags(cat, generic_proxy_, &resp); + if (!s.ok()) { + flags_by_category_[cat].state = KsckFetchState::FETCH_FAILED; + result = result.ok() ? s : result.CloneAndAppend(s.message()); + } else { + flags_by_category_[cat].state = KsckFetchState::FETCHED; + flags_by_category_[cat].flags = std::move(resp); + } } - return s; + return result; } class ChecksumStepper; diff --git a/src/kudu/tools/ksck_remote.h b/src/kudu/tools/ksck_remote.h index c1dc4ab..b82e6f4 100644 --- a/src/kudu/tools/ksck_remote.h +++ b/src/kudu/tools/ksck_remote.h @@ -82,7 +82,7 @@ class RemoteKsckMaster : public KsckMaster { // Gathers consensus state for the master tablet. Status FetchConsensusState() override; - Status FetchUnusualFlags() override; + Status FetchFlags(const std::vector<FlagsCategory>& categories) override; private: std::shared_ptr<rpc::Messenger> messenger_; @@ -111,7 +111,7 @@ class RemoteKsckTabletServer : public KsckTabletServer, Status FetchConsensusState(cluster_summary::ServerHealth* health) override; - Status FetchUnusualFlags() override; + Status FetchFlags(const std::vector<FlagsCategory>& categories) override; void FetchCurrentTimestampAsync() override; Status FetchCurrentTimestamp() override; diff --git a/src/kudu/tools/ksck_results.cc b/src/kudu/tools/ksck_results.cc index 16a9506..eebd5e7 100644 --- a/src/kudu/tools/ksck_results.cc +++ b/src/kudu/tools/ksck_results.cc @@ -172,6 +172,12 @@ string ServerCsv(int server_count, const vector<string>& servers) { } // anonymous namespace Status KsckResults::PrintTo(PrintMode mode, int sections, ostream& out) { + static const char* const kMsgCheckedFlags = + "Flags of checked categories for $0:"; + static const char* const kMsgUnusualFlags = "Unusual flags for $0:"; + static const char* const kMsgDivergedFlags = + "Flags of checked categories for $0 diverging from $1 flags:"; + if (mode == PrintMode::JSON_PRETTY || mode == PrintMode::JSON_COMPACT) { return PrintJsonTo(mode, sections, out); } @@ -192,12 +198,35 @@ Status KsckResults::PrintTo(PrintMode mode, int sections, ostream& out) { } out << endl; - RETURN_NOT_OK(PrintFlagTable(ServerType::MASTER, - cluster_status.master_summaries.size(), - master_flag_to_servers_map, - master_flag_tags_map, - out)); - if (!master_flag_to_servers_map.empty()) { + if (!master_unusual_flag_to_servers_map.empty()) { + out << Substitute(kMsgUnusualFlags, + ServerTypeToString(ServerType::MASTER)) << endl; + RETURN_NOT_OK(PrintTaggedFlagTable(ServerType::MASTER, + cluster_status.master_summaries.size(), + master_unusual_flag_to_servers_map, + master_unusual_flag_tags_map, + out)); + out << endl; + } + + if (!master_checked_flag_to_servers_map.empty()) { + out << Substitute(kMsgCheckedFlags, + ServerTypeToString(ServerType::MASTER)) << endl; + RETURN_NOT_OK(PrintFlagTable(ServerType::MASTER, + cluster_status.master_summaries.size(), + master_checked_flag_to_servers_map, + out)); + out << endl; + } + + if (!master_diverged_flag_to_servers_map.empty()) { + out << Substitute(kMsgDivergedFlags, + ServerTypeToString(ServerType::MASTER), + ServerTypeToString(ServerType::TABLET_SERVER)) << endl; + RETURN_NOT_OK(PrintFlagTable(ServerType::MASTER, + cluster_status.master_summaries.size(), + master_diverged_flag_to_servers_map, + out)); out << endl; } } @@ -221,12 +250,35 @@ Status KsckResults::PrintTo(PrintMode mode, int sections, ostream& out) { out << endl; } - RETURN_NOT_OK(PrintFlagTable(ServerType::TABLET_SERVER, - cluster_status.tserver_summaries.size(), - tserver_flag_to_servers_map, - tserver_flag_tags_map, - out)); - if (!tserver_flag_to_servers_map.empty()) { + if (!tserver_unusual_flag_to_servers_map.empty()) { + out << Substitute(kMsgUnusualFlags, + ServerTypeToString(ServerType::TABLET_SERVER)) << endl; + RETURN_NOT_OK(PrintTaggedFlagTable(ServerType::TABLET_SERVER, + cluster_status.tserver_summaries.size(), + tserver_unusual_flag_to_servers_map, + tserver_unusual_flag_tags_map, + out)); + out << endl; + } + + if (!tserver_checked_flag_to_servers_map.empty()) { + out << Substitute(kMsgCheckedFlags, + ServerTypeToString(ServerType::TABLET_SERVER)) << endl; + RETURN_NOT_OK(PrintFlagTable(ServerType::TABLET_SERVER, + cluster_status.tserver_summaries.size(), + tserver_checked_flag_to_servers_map, + out)); + out << endl; + } + + if (!tserver_diverged_flag_to_servers_map.empty()) { + out << Substitute(kMsgDivergedFlags, + ServerTypeToString(ServerType::TABLET_SERVER), + ServerTypeToString(ServerType::MASTER)) << endl; + RETURN_NOT_OK(PrintFlagTable(ServerType::TABLET_SERVER, + cluster_status.tserver_summaries.size(), + tserver_diverged_flag_to_servers_map, + out)); out << endl; } } @@ -234,8 +286,8 @@ Status KsckResults::PrintTo(PrintMode mode, int sections, ostream& out) { // Finally, in the "server section", print the version summary. if (sections & PrintSections::VERSION_SUMMARIES) { RETURN_NOT_OK(PrintVersionTable(version_summaries, - cluster_status.master_summaries.size() - + cluster_status.tserver_summaries.size(), + cluster_status.master_summaries.size() + + cluster_status.tserver_summaries.size(), out)); out << endl; } @@ -432,11 +484,29 @@ Status PrintServerHealthSummaries(ServerType type, Status PrintFlagTable(ServerType type, int num_servers, const KsckFlagToServersMap& flag_to_servers_map, - const KsckFlagTagsMap& flag_tags_map, ostream& out) { if (flag_to_servers_map.empty()) { return Status::OK(); } + DataTable flags_table({"Flag", "Value", ServerTypeToString(type)}); + for (const auto& flag : flag_to_servers_map) { + const string& name = flag.first.first; + const string& value = flag.first.second; + flags_table.AddRow({name, + value, + ServerCsv(num_servers, flag.second)}); + } + return flags_table.PrintTo(out); +} + +Status PrintTaggedFlagTable(ServerType type, + int num_servers, + const KsckFlagToServersMap& flag_to_servers_map, + const KsckFlagTagsMap& flag_tags_map, + ostream& out) { + if (flag_to_servers_map.empty()) { + return Status::OK(); + } DataTable flags_table({"Flag", "Value", "Tags", ServerTypeToString(type)}); for (const auto& flag : flag_to_servers_map) { const string& name = flag.first.first; diff --git a/src/kudu/tools/ksck_results.h b/src/kudu/tools/ksck_results.h index ec69b6b..4d68086 100644 --- a/src/kudu/tools/ksck_results.h +++ b/src/kudu/tools/ksck_results.h @@ -126,10 +126,16 @@ struct KsckResults { KsckVersionToServersMap version_summaries; // Information about the flags of masters and tablet servers. - KsckFlagToServersMap master_flag_to_servers_map; - KsckFlagTagsMap master_flag_tags_map; - KsckFlagToServersMap tserver_flag_to_servers_map; - KsckFlagTagsMap tserver_flag_tags_map; + KsckFlagToServersMap master_checked_flag_to_servers_map; + KsckFlagToServersMap master_unusual_flag_to_servers_map; + KsckFlagTagsMap master_unusual_flag_tags_map; + KsckFlagToServersMap tserver_checked_flag_to_servers_map; + KsckFlagToServersMap tserver_unusual_flag_to_servers_map; + KsckFlagTagsMap tserver_unusual_flag_tags_map; + + // Information on flags diverged between masters and tablet servers. + KsckFlagToServersMap master_diverged_flag_to_servers_map; + KsckFlagToServersMap tserver_diverged_flag_to_servers_map; // Any special states that the tablet servers may be in. KsckTServerStateMap ts_states; @@ -156,15 +162,22 @@ Status PrintServerHealthSummaries( const std::vector<cluster_summary::ServerHealthSummary>& summaries, std::ostream& out); -// Print a formatted summary of the flags in 'flag_to_servers_map', indicating -// which servers have which (flag, value) pairs set. -// Flag tag information is sourced from 'flag_tags_map'. +// Print a formatted summary of the flags in 'flag_to_servers_map', +// indicating which servers have which (flag, value) pairs set. Status PrintFlagTable(cluster_summary::ServerType type, int num_servers, const KsckFlagToServersMap& flag_to_servers_map, - const KsckFlagTagsMap& flag_tags_map, std::ostream& out); +// Similar to PrintFlagTable(), but also output information on tags for +// flags in 'flag_to_servers_map'. Flag tag information is sourced from +// 'flag_tags_map'. +Status PrintTaggedFlagTable(cluster_summary::ServerType type, + int num_servers, + const KsckFlagToServersMap& flag_to_servers_map, + const KsckFlagTagsMap& flag_tags_map, + std::ostream& out); + Status PrintTServerStatesTable(const KsckTServerStateMap& ts_states, std::ostream& out); diff --git a/src/kudu/tools/tool_action_cluster.cc b/src/kudu/tools/tool_action_cluster.cc index 4f8f4b9..81316b2 100644 --- a/src/kudu/tools/tool_action_cluster.cc +++ b/src/kudu/tools/tool_action_cluster.cc @@ -386,6 +386,7 @@ unique_ptr<Mode> BuildClusterMode() { .AddOptionalParameter("color") .AddOptionalParameter("consensus") .AddOptionalParameter("fetch_info_concurrency") + .AddOptionalParameter("flags_categories_to_check") .AddOptionalParameter("ksck_format") .AddOptionalParameter("quiescing_info") .AddOptionalParameter("sections")
