This is an automated email from the ASF dual-hosted git repository. awong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 59070bf5bd5924c6e4deb68434744cac3b062dcc Author: Abhishek Chennaka <[email protected]> AuthorDate: Mon Oct 11 11:36:55 2021 -0700 KUDU-1959 - Implement aggregate startup progress metrics We expose the below metrics as a part of this commit: * startup_progress_steps_remaining : count of server startup steps which are yet to be completed. This value is in the range [0,4]. * startup_progress_time_elapsed : the time elapsed so far for the server to startup. If the startup is completed, this is the total time taken for the startup. This is in milliseconds. These metrics are primarily expected to be used by third party monitoring tools to see how long has the server taken to startup historically for any sort of trend analysis. The startup_progress_time_elapsed metric can also be used to check the previous startup time as an alternative to the startup page in the WebUI. Change-Id: I0a508c3baf0a0d77baf75f36f7bb305a6ad821e1 Reviewed-on: http://gerrit.cloudera.org:8080/17903 Tested-by: Kudu Jenkins Reviewed-by: Andrew Wong <[email protected]> --- src/kudu/server/server_base.cc | 2 +- src/kudu/server/startup_path_handler.cc | 50 ++++++++++++++++++++++++++++++++- src/kudu/server/startup_path_handler.h | 11 +++++++- 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/kudu/server/server_base.cc b/src/kudu/server/server_base.cc index bbbcefa..ec44729 100644 --- a/src/kudu/server/server_base.cc +++ b/src/kudu/server/server_base.cc @@ -495,7 +495,7 @@ ServerBase::ServerBase(string name, const ServerBaseOptions& options, file_cache_(new FileCache("file cache", options.env, GetFileCacheCapacity(options.env), metric_entity_)), rpc_server_(new RpcServer(options.rpc_opts)), - startup_path_handler_(new StartupPathHandler), + startup_path_handler_(new StartupPathHandler(metric_entity_)), result_tracker_(new rpc::ResultTracker(shared_ptr<MemTracker>( MemTracker::CreateTracker(-1, "result-tracker", mem_tracker_)))), is_first_run_(false), diff --git a/src/kudu/server/startup_path_handler.cc b/src/kudu/server/startup_path_handler.cc index 93f839e..6e1948f 100644 --- a/src/kudu/server/startup_path_handler.cc +++ b/src/kudu/server/startup_path_handler.cc @@ -24,10 +24,24 @@ #include "kudu/gutil/strings/human_readable.h" #include "kudu/server/webserver.h" #include "kudu/util/easy_json.h" +#include "kudu/util/metrics.h" #include "kudu/util/monotime.h" #include "kudu/util/timer.h" #include "kudu/util/web_callback_registry.h" +METRIC_DEFINE_gauge_int32(server, startup_progress_steps_remaining, + "Server Startup Steps Remaining", + kudu::MetricUnit::kUnits, + "Server startup progress steps remaining ", + kudu::MetricLevel::kWarn); + +METRIC_DEFINE_gauge_int64(server, startup_progress_time_elapsed, + "Server Startup Progress Time Elapsed", + kudu::MetricUnit::kMilliseconds, + "Time taken by the server to complete the startup or" + "time elapsed so far for the server to startup", + kudu::MetricLevel::kInfo); + using std::ifstream; using std::ostringstream; using std::string; @@ -43,13 +57,19 @@ void SetWebResponse(EasyJson* output, const string& step, (startup_step.TimeElapsed()).ToSeconds())); } -StartupPathHandler::StartupPathHandler(): +StartupPathHandler::StartupPathHandler(const scoped_refptr<MetricEntity>& entity): tablets_processed_(0), tablets_total_(0), containers_processed_(0), containers_total_(0), is_tablet_server_(false), is_using_lbm_(true) { + METRIC_startup_progress_steps_remaining.InstantiateFunctionGauge(entity, + [this]() {return StartupProgressStepsRemainingMetric();}) + ->AutoDetachToLastValue(&metric_detacher_); + METRIC_startup_progress_time_elapsed.InstantiateFunctionGauge(entity, + [this]() {return StartupProgressTimeElapsedMetric().ToMilliseconds();}) + ->AutoDetachToLastValue(&metric_detacher_); } void StartupPathHandler::Startup(const Webserver::WebRequest& /*req*/, @@ -116,5 +136,33 @@ void StartupPathHandler::set_is_tablet_server(bool is_tablet_server) { void StartupPathHandler::set_is_using_lbm(bool is_using_lbm) { is_using_lbm_ = is_using_lbm; } + +int StartupPathHandler::StartupProgressStepsRemainingMetric() { + int counter = 0; + counter += (init_progress_.IsStopped() ? 0 : 1); + counter += (read_filesystem_progress_.IsStopped() ? 0 : 1); + counter += (is_tablet_server_ ? (start_tablets_progress_.IsStopped() ? 0 : 1) : 0); + if (is_tablet_server_) { + counter += start_tablets_progress_.IsStopped() ? 0 : 1; + } else { + counter += initialize_master_catalog_progress_.IsStopped() ? 0 : 1; + } + counter += (start_rpc_server_progress_.IsStopped() ? 0 : 1); + return counter; +} + +MonoDelta StartupPathHandler::StartupProgressTimeElapsedMetric() { + MonoDelta time_elapsed; + time_elapsed = init_progress_.TimeElapsed(); + time_elapsed += read_filesystem_progress_.TimeElapsed(); + if (is_tablet_server_) { + time_elapsed += start_tablets_progress_.TimeElapsed(); + } else { + time_elapsed += initialize_master_catalog_progress_.TimeElapsed(); + } + time_elapsed += start_rpc_server_progress_.TimeElapsed(); + return time_elapsed; +} + } // namespace server } // namespace kudu diff --git a/src/kudu/server/startup_path_handler.h b/src/kudu/server/startup_path_handler.h index 1a32281..33ef1f4 100644 --- a/src/kudu/server/startup_path_handler.h +++ b/src/kudu/server/startup_path_handler.h @@ -18,7 +18,10 @@ #include <atomic> +#include "kudu/gutil/ref_counted.h" #include "kudu/server/webserver.h" +#include "kudu/util/metrics.h" +#include "kudu/util/monotime.h" #include "kudu/util/timer.h" namespace kudu { @@ -28,7 +31,7 @@ namespace server { class StartupPathHandler { public: - StartupPathHandler(); + explicit StartupPathHandler(const scoped_refptr<MetricEntity>& entity); // Populate the response output with the current information void Startup(const Webserver::WebRequest &req, Webserver::WebResponse *resp); @@ -50,6 +53,10 @@ public: void set_is_tablet_server(bool is_tablet_server); void set_is_using_lbm(bool is_using_lbm); + // Call back functions for aggregate percentage and time elapsed + int StartupProgressStepsRemainingMetric(); + MonoDelta StartupProgressTimeElapsedMetric(); + private: // Hold the initialization step progress information like the status, start and end time. Timer init_progress_; @@ -91,6 +98,8 @@ private: // We do not open containers if file block manager is being used and hence display different // webpage contents if file block manager is being used. bool is_using_lbm_; + + FunctionGaugeDetacher metric_detacher_; }; } // namespace server
