This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 7d9d700  [clock] KUDU-3048 add builtin-NTP-specific metrics
7d9d700 is described below

commit 7d9d7009ac288327a54ea694d171f8c2ad23165d
Author: Alexey Serbin <[email protected]>
AuthorDate: Wed Feb 19 10:57:06 2020 -0800

    [clock] KUDU-3048 add builtin-NTP-specific metrics
    
    Added metrics specific to the built-in NTP client:
      * info : difference between tracked true time and local wallclock
      * debug: most recently computed true time
      * debug: histogram on the maximum error of the computed true time
    
    For testing, I manually ran kudu-master and verified the newly
    introduced metrics are present and their values make sense.
    
    I also re-visited the level of recently introduced metrics in the
    HybridClock class.
    
    Change-Id: Ia0a13d58c6e5da50ef7a98ae73de267ec4eab491
    Reviewed-on: http://gerrit.cloudera.org:8080/15249
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <[email protected]>
---
 src/kudu/clock/builtin_ntp.cc  | 76 +++++++++++++++++++++++++++++++++++++++---
 src/kudu/clock/builtin_ntp.h   | 31 ++++++++++++++---
 src/kudu/clock/hybrid_clock.cc | 24 ++++++++-----
 src/kudu/clock/hybrid_clock.h  |  5 ++-
 src/kudu/clock/ntp-test.cc     | 24 +++++++++----
 5 files changed, 135 insertions(+), 25 deletions(-)

diff --git a/src/kudu/clock/builtin_ntp.cc b/src/kudu/clock/builtin_ntp.cc
index ee2893f..c9ed6aa 100644
--- a/src/kudu/clock/builtin_ntp.cc
+++ b/src/kudu/clock/builtin_ntp.cc
@@ -27,6 +27,7 @@
 #include <cstdint>
 #include <cstring>
 #include <deque>
+#include <limits>
 #include <memory>
 #include <mutex>
 #include <ostream>
@@ -38,6 +39,8 @@
 #include <glog/logging.h>
 
 #include "kudu/clock/builtin_ntp-internal.h"
+#include "kudu/gutil/bind.h"
+#include "kudu/gutil/bind_helpers.h"
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/strings/join.h"
 #include "kudu/gutil/strings/strcat.h"
@@ -47,6 +50,7 @@
 #include "kudu/util/flag_tags.h"
 #include "kudu/util/locks.h"
 #include "kudu/util/logging.h"
+#include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/net/sockaddr.h"
@@ -105,6 +109,29 @@ 
DEFINE_uint32(builtin_ntp_true_time_refresh_max_interval_s, 3600,
 TAG_FLAG(builtin_ntp_true_time_refresh_max_interval_s, experimental);
 TAG_FLAG(builtin_ntp_true_time_refresh_max_interval_s, runtime);
 
+METRIC_DEFINE_gauge_int64(server, builtin_ntp_local_clock_delta,
+                          "Local Clock vs Built-In NTP True Time Delta",
+                          kudu::MetricUnit::kMilliseconds,
+                          "Delta between local clock and true time "
+                          "tracked by built-in NTP client; set to "
+                          "2^63-1 when true time is not tracked",
+                          kudu::MetricLevel::kInfo);
+
+METRIC_DEFINE_gauge_int64(server, builtin_ntp_walltime,
+                          "Built-in NTP Wall-Clock Time",
+                          kudu::MetricUnit::kMicroseconds,
+                          "Latest wall-clock time as tracked by "
+                          "built-in NTP client",
+                          kudu::MetricLevel::kDebug);
+
+METRIC_DEFINE_histogram(server, builtin_ntp_max_errors,
+                        "Built-In NTP Maximum Errors",
+                        kudu::MetricUnit::kMilliseconds,
+                        "Statistics on the maximum true time error computed by 
"
+                        "built-in NTP client",
+                         kudu::MetricLevel::kDebug,
+                        20000, 3);
+
 using kudu::clock::internal::Interval;
 using kudu::clock::internal::kIntervalNone;
 using kudu::clock::internal::RecordedResponse;
@@ -445,7 +472,6 @@ class BuiltInNtp::ServerState {
   }
 
  private:
-
   // Lock to protect internal state below from concurrent access.
   mutable rw_spinlock lock_;
 
@@ -475,13 +501,16 @@ class BuiltInNtp::ServerState {
   size_t o_pkt_total_num_;    // number of NTP requests sent
 };
 
-BuiltInNtp::BuiltInNtp()
+BuiltInNtp::BuiltInNtp(const scoped_refptr<MetricEntity>& metric_entity)
     : rng_(GetRandomSeed32()) {
+  RegisterMetrics(metric_entity);
 }
 
-BuiltInNtp::BuiltInNtp(vector<HostPort> servers)
+BuiltInNtp::BuiltInNtp(vector<HostPort> servers,
+                       const scoped_refptr<MetricEntity>& metric_entity)
     : rng_(GetRandomSeed32()) {
   CHECK_OK(PopulateServers(std::move(servers)));
+  RegisterMetrics(metric_entity);
 }
 
 BuiltInNtp::~BuiltInNtp() {
@@ -1019,8 +1048,8 @@ Status BuiltInNtp::CombineClocks() {
                           falsetickers_num, all_sources_num);
   }
 
-  int64_t compute_wall = (best_interval.first + best_interval.second) / 2;
-  int64_t compute_error = (best_interval.second - best_interval.first) / 2;
+  const int64_t compute_wall = (best_interval.first + best_interval.second) / 
2;
+  const int64_t compute_error = (best_interval.second - best_interval.first) / 
2;
   {
     // Extra sanity check to make sure walltime doesn't go back.
     std::lock_guard<rw_spinlock> l(last_computed_lock_);
@@ -1036,6 +1065,9 @@ Status BuiltInNtp::CombineClocks() {
     last_computed_.mono = now;
     last_computed_.wall = compute_wall;
     last_computed_.error = compute_error;
+
+    // Update stats on the computed error.
+    max_errors_histogram_->Increment(compute_error / 1000);
   }
   VLOG(2) << Substitute("combined clocks: $0 $1 $2",
                         now, compute_wall, compute_error);
@@ -1051,5 +1083,39 @@ Status BuiltInNtp::CombineClocks() {
   return Status::OK();
 }
 
+void BuiltInNtp::RegisterMetrics(const scoped_refptr<MetricEntity>& entity) {
+  METRIC_builtin_ntp_local_clock_delta.InstantiateFunctionGauge(
+      entity,
+      Bind(&BuiltInNtp::LocalClockDeltaForMetrics, Unretained(this)))->
+          AutoDetachToLastValue(&metric_detacher_);
+  METRIC_builtin_ntp_walltime.InstantiateFunctionGauge(
+      entity,
+      Bind(&BuiltInNtp::WalltimeForMetrics, Unretained(this)))->
+          AutoDetachToLastValue(&metric_detacher_);
+  max_errors_histogram_ =
+      METRIC_builtin_ntp_max_errors.Instantiate(entity);
+}
+
+int64_t BuiltInNtp::LocalClockDeltaForMetrics() {
+  uint64_t now = 0; // avoid clang-tidy warnings
+  uint64_t err_ignored;
+  auto s = WalltimeWithError(&now, &err_ignored);
+  const int64_t now_local = GetCurrentTimeMicros();
+  if (!s.ok()) {
+    return std::numeric_limits<int64_t>::max();
+  }
+
+  // Here we don't care much about scheduling anomalies to take into account
+  // the timing of the clock sampling above. There might be some delay between
+  // calls to WalltimeWithError() and GetCurrentTimeMicros(), but the metric
+  // is designed to spot relatively big offsets like few seconds and more.
+  return (now_local - static_cast<int64_t>(now)) / 1000;
+}
+
+int64_t BuiltInNtp::WalltimeForMetrics() {
+  shared_lock<rw_spinlock> l(last_computed_lock_);
+  return last_computed_.wall;
+}
+
 } // namespace clock
 } // namespace kudu
diff --git a/src/kudu/clock/builtin_ntp.h b/src/kudu/clock/builtin_ntp.h
index ffd8b9f..7b0dc8f 100644
--- a/src/kudu/clock/builtin_ntp.h
+++ b/src/kudu/clock/builtin_ntp.h
@@ -26,6 +26,7 @@
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/util/locks.h"
+#include "kudu/util/metrics.h"
 #include "kudu/util/mutex.h"
 #include "kudu/util/net/socket.h"
 #include "kudu/util/random.h"
@@ -67,12 +68,15 @@ constexpr const int kStandardNtpPort = 123;
 class BuiltInNtp : public TimeService {
  public:
   // Create an instance using the servers specified in --builtin_ntp_servers
-  // as NTP sources.
-  BuiltInNtp();
+  // as NTP sources. The 'metric_entity' is used to register metrics specific 
to
+  // the built-in NTP client.
+  explicit BuiltInNtp(const scoped_refptr<MetricEntity>& metric_entity);
 
   // Create an instance using the specified servers as NTP sources. The set
-  // of source NTP servers must not be empty.
-  explicit BuiltInNtp(std::vector<HostPort> servers);
+  // of source NTP servers must not be empty. The 'metric_entity' is used
+  // to register metrics specific to the built-in NTP client.
+  BuiltInNtp(std::vector<HostPort> servers,
+             const scoped_refptr<MetricEntity>& metric_entity);
 
   ~BuiltInNtp() override;
 
@@ -161,6 +165,16 @@ class BuiltInNtp : public TimeService {
   // based on responses received so far from configured NTP servers.
   Status CombineClocks();
 
+  // Register all metrics tracked by the built-in NTP client.
+  void RegisterMetrics(const scoped_refptr<MetricEntity>& entity);
+
+  // Get difference between the local clock and the tracked true time,
+  // in milliseconds.
+  int64_t LocalClockDeltaForMetrics();
+
+  // Get the latest computed wall-clock time, in microseconds.
+  int64_t WalltimeForMetrics();
+
   Random rng_;
   Socket socket_;
 
@@ -186,6 +200,15 @@ class BuiltInNtp : public TimeService {
   // updating the maintained walltime based on the NTP responses received.
   scoped_refptr<Thread> thread_;
 
+  // Stats on the maximum error is sampled every when wall-clock time
+  // is calculated upon receiving NTP server responses.
+  scoped_refptr<Histogram> max_errors_histogram_;
+
+  // Clock metrics are set to detach to their last value. This means
+  // that, during our destructor, we'll need to access other class members
+  // declared above this. Hence, this member must be declared last.
+  FunctionGaugeDetacher metric_detacher_;
+
   DISALLOW_COPY_AND_ASSIGN(BuiltInNtp);
 };
 
diff --git a/src/kudu/clock/hybrid_clock.cc b/src/kudu/clock/hybrid_clock.cc
index fbb97d0..03f3f27 100644
--- a/src/kudu/clock/hybrid_clock.cc
+++ b/src/kudu/clock/hybrid_clock.cc
@@ -161,23 +161,27 @@ METRIC_DEFINE_gauge_bool(server, 
hybrid_clock_extrapolating,
                          "Whether HybridClock timestamps are extrapolated "
                          "because of inability to read the underlying clock",
                          kudu::MetricLevel::kWarn);
+
 METRIC_DEFINE_gauge_uint64(server, hybrid_clock_error,
                            "Hybrid Clock Error",
                            kudu::MetricUnit::kMicroseconds,
                            "Server clock maximum error",
                            kudu::MetricLevel::kInfo);
+
 METRIC_DEFINE_gauge_uint64(server, hybrid_clock_timestamp,
                            "Hybrid Clock Timestamp",
                            kudu::MetricUnit::kMicroseconds,
                            "Hybrid clock timestamp",
                            kudu::MetricLevel::kInfo);
+
 METRIC_DEFINE_histogram(server, hybrid_clock_max_errors,
                         "Hybrid Clock Maximum Errors",
                         kudu::MetricUnit::kMilliseconds,
                         "The statistics on the maximum error of the underlying 
"
                         "clock",
-                         kudu::MetricLevel::kInfo,
+                         kudu::MetricLevel::kDebug,
                         20000, 3);
+
 METRIC_DEFINE_histogram(server, hybrid_clock_extrapolation_intervals,
                         "Intervals of Hybrid Clock Extrapolation",
                         kudu::MetricUnit::kSeconds,
@@ -210,20 +214,21 @@ Status CheckDeadlineNotWithinMicros(const MonoTime& 
deadline, int64_t wait_for_u
 
 HybridClock::HybridClock(const scoped_refptr<MetricEntity>& metric_entity)
     : next_timestamp_(0),
-      state_(kNotInitialized) {
+      state_(kNotInitialized),
+      metric_entity_(metric_entity) {
   DCHECK(metric_entity);
   max_errors_histogram_ =
-      METRIC_hybrid_clock_max_errors.Instantiate(metric_entity);
+      METRIC_hybrid_clock_max_errors.Instantiate(metric_entity_);
   extrapolation_intervals_histogram_ =
-      METRIC_hybrid_clock_extrapolation_intervals.Instantiate(metric_entity);
-  extrapolating_ = metric_entity->FindOrCreateGauge(
+      METRIC_hybrid_clock_extrapolation_intervals.Instantiate(metric_entity_);
+  extrapolating_ = metric_entity_->FindOrCreateGauge(
       &METRIC_hybrid_clock_extrapolating, false, MergeType::kMax);
   METRIC_hybrid_clock_timestamp.InstantiateFunctionGauge(
-      metric_entity,
+      metric_entity_,
       Bind(&HybridClock::NowForMetrics, Unretained(this)))->
           AutoDetachToLastValue(&metric_detacher_);
   METRIC_hybrid_clock_error.InstantiateFunctionGauge(
-      metric_entity,
+      metric_entity_,
       Bind(&HybridClock::ErrorForMetrics, Unretained(this)))->
           AutoDetachToLastValue(&metric_detacher_);
 }
@@ -487,8 +492,9 @@ Status HybridClock::InitWithTimeSource(TimeSource 
time_source,
   switch (time_source) {
     case TimeSource::NTP_SYNC_BUILTIN:
       time_service_.reset(builtin_ntp_servers.empty()
-                          ? new clock::BuiltInNtp
-                          : new 
clock::BuiltInNtp(std::move(builtin_ntp_servers)));
+                          ? new clock::BuiltInNtp(metric_entity_)
+                          : new 
clock::BuiltInNtp(std::move(builtin_ntp_servers),
+                                                  metric_entity_));
       break;
 #if defined(KUDU_HAS_SYSTEM_TIME_SOURCE)
     case TimeSource::NTP_SYNC_SYSTEM:
diff --git a/src/kudu/clock/hybrid_clock.h b/src/kudu/clock/hybrid_clock.h
index d36d5e2..545ba0a 100644
--- a/src/kudu/clock/hybrid_clock.h
+++ b/src/kudu/clock/hybrid_clock.h
@@ -258,6 +258,9 @@ class HybridClock : public Clock {
   };
   State state_;
 
+  // Metric entity.
+  scoped_refptr<MetricEntity> metric_entity_;
+
   // Whether the hybrid clock is extrapolating the readings of the underlying
   // clock instead of using the real ones. It's important to know whether
   // the extrapolation is happening, but 'extrapolation_intervals_histogram_'
@@ -265,7 +268,7 @@ class HybridClock : public Clock {
   // extrapolation interval.
   scoped_refptr<AtomicGauge<bool>> extrapolating_;
 
-  // Stats on the underlying clock's 'maximum clock' metric sampled every
+  // Stats on the underlying clock's 'maximum error' metric sampled every
   // NowWithError() call (essentially, every call when requesting a hybrid 
clock
   // timestamp).
   scoped_refptr<Histogram> max_errors_histogram_;
diff --git a/src/kudu/clock/ntp-test.cc b/src/kudu/clock/ntp-test.cc
index da558b2..5403144 100644
--- a/src/kudu/clock/ntp-test.cc
+++ b/src/kudu/clock/ntp-test.cc
@@ -40,10 +40,12 @@
 #include "kudu/clock/test/mini_chronyd.h"
 #include "kudu/clock/test/mini_chronyd_test_util.h"
 #include "kudu/gutil/integral_types.h"
+#include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/stringprintf.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/cloud/instance_detector.h"
 #include "kudu/util/cloud/instance_metadata.h"
+#include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/status.h"
@@ -56,6 +58,8 @@ DECLARE_string(builtin_ntp_servers);
 DECLARE_uint32(builtin_ntp_poll_interval_ms);
 DECLARE_uint32(cloud_metadata_server_request_timeout_ms);
 
+METRIC_DECLARE_entity(server);
+
 using kudu::clock::internal::FindIntersection;
 using kudu::clock::internal::Interval;
 using kudu::clock::internal::RecordedResponse;
@@ -435,8 +439,13 @@ TEST(TimeIntervalsTest, ThreeResponses) {
 //
 // TODO(aserbin): fix the described chrony's bug, at least in thirdparty
 class BuiltinNtpWithMiniChronydTest: public KuduTest {
+ public:
+  BuiltinNtpWithMiniChronydTest()
+      : metric_entity_(METRIC_ENTITY_server.Instantiate(&metric_registry_,
+                                                        "ntp-test")) {
+  }
  protected:
-  void CheckInitUnsync(BuiltInNtp* ntp_client) {
+  static void CheckInitUnsync(BuiltInNtp* ntp_client) {
     auto deadline = MonoTime::Now() +
         MonoDelta::FromSeconds(FLAGS_ntp_initial_sync_wait_secs);
     ASSERT_OK(ntp_client->Init());
@@ -469,6 +478,9 @@ class BuiltinNtpWithMiniChronydTest: public KuduTest {
     ASSERT_TRUE(s.IsRuntimeError()) << s.ToString();
     ASSERT_STR_CONTAINS(s.ToString(), "No suitable source for 
synchronisation");
   }
+
+  MetricRegistry metric_registry_;
+  scoped_refptr<MetricEntity> metric_entity_;
 };
 
 // This is a basic scenario to verify that built-in NTP client is able
@@ -493,7 +505,7 @@ TEST_F(BuiltinNtpWithMiniChronydTest, Basic) {
 
   // chronyd supports very short polling intervals (down to 1/64 second).
   FLAGS_builtin_ntp_poll_interval_ms = 50;
-  BuiltInNtp c(servers_endpoints);
+  BuiltInNtp c(servers_endpoints, metric_entity_);
   ASSERT_OK(c.Init());
   ASSERT_EVENTUALLY([&] {
     uint64_t now_us;
@@ -543,7 +555,7 @@ TEST_F(BuiltinNtpWithMiniChronydTest, 
NoIntersectionIntervalAtStartup) {
   // chronyd supports very short polling intervals (down to 1/64 second).
   FLAGS_builtin_ntp_poll_interval_ms = 50;
 
-  BuiltInNtp c(servers_endpoints);
+  BuiltInNtp c(servers_endpoints, metric_entity_);
   NO_FATALS(CheckInitUnsync(&c));
   NO_FATALS(CheckWallTimeUnavailable(&c));
 }
@@ -636,7 +648,7 @@ TEST_F(BuiltinNtpWithMiniChronydTest, 
SyncAndUnsyncReferenceServers) {
     // Verify chronyd's client itself does not accept the set of of NTP 
sources.
     NO_FATALS(CheckNoNtpSource(refs));
 
-    BuiltInNtp c(refs);
+    BuiltInNtp c(refs, metric_entity_);
     NO_FATALS(CheckInitUnsync(&c));
     NO_FATALS(CheckWallTimeUnavailable(&c));
   };
@@ -678,7 +690,7 @@ TEST_F(BuiltinNtpWithMiniChronydTest, 
SyncAndUnsyncReferenceServers) {
     // Verify chronyd's client itself accepts the set of of NTP sources.
     ASSERT_OK(MiniChronyd::CheckNtpSource(refs));
 
-    BuiltInNtp c(refs);
+    BuiltInNtp c(refs, metric_entity_);
     ASSERT_OK(c.Init());
     ASSERT_EVENTUALLY([&] {
       uint64_t now_us;
@@ -731,7 +743,7 @@ TEST_F(BuiltinNtpWithMiniChronydTest, 
CloudInstanceNtpServer) {
   ASSERT_OK(MiniChronyd::CheckNtpSource(servers_endpoints));
 
   FLAGS_builtin_ntp_poll_interval_ms = 500;
-  BuiltInNtp c(servers_endpoints);
+  BuiltInNtp c(servers_endpoints, metric_entity_);
   ASSERT_OK(c.Init());
   ASSERT_EVENTUALLY([&] {
     uint64_t now_us;

Reply via email to