This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 6371ffdb54fa09f891e01a120e74993f38286c9f
Author: Alexey Serbin <[email protected]>
AuthorDate: Tue Feb 11 20:56:05 2020 -0800

    [hybrid clock] KUDU-3048 introduce new clock metrics
    
    Introduced additional metrics for the hybrid clock:
      * whether hybrid clock is using extrapolated readings for the
        underlying clock instead of actual readings
      * histogram for the duration of intervals when the underlying clock
        was extrapolated
      * histogram for the maximum errors reported by the underlying clock
    
    I ran a small Kudu cluster to manually verify the behavior of the
    newly introduced metrics: I'm not sure it's worth adding automated
    tests for this given the already existing 'hybrid_clock_error' metric
    didn't have any test coverage.
    
    Change-Id: I8575ba7d8baed78b13351e8cebf1a74f44b31b82
    Reviewed-on: http://gerrit.cloudera.org:8080/15212
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <[email protected]>
---
 src/kudu/clock/hybrid_clock.cc | 60 +++++++++++++++++++++++++++++++++++-------
 src/kudu/clock/hybrid_clock.h  | 28 +++++++++++++++++---
 src/kudu/server/server_base.cc |  2 --
 src/kudu/util/metrics.cc       |  2 +-
 4 files changed, 75 insertions(+), 17 deletions(-)

diff --git a/src/kudu/clock/hybrid_clock.cc b/src/kudu/clock/hybrid_clock.cc
index 4280eca..d2a94fc 100644
--- a/src/kudu/clock/hybrid_clock.cc
+++ b/src/kudu/clock/hybrid_clock.cc
@@ -117,16 +117,37 @@ bool ValidateTimeSource() {
 }
 GROUP_FLAG_VALIDATOR(time_source_guardrail, ValidateTimeSource);
 
-METRIC_DEFINE_gauge_uint64(server, hybrid_clock_timestamp,
-                           "Hybrid Clock Timestamp",
-                           kudu::MetricUnit::kMicroseconds,
-                           "Hybrid clock timestamp.",
-                           kudu::MetricLevel::kInfo);
+METRIC_DEFINE_gauge_bool(server, hybrid_clock_extrapolating,
+                         "Hybrid Clock Is Being Extrapolated",
+                         kudu::MetricUnit::kState,
+                         "Whether HybridClock timestamps are extrapolated "
+                         "because of inability to read the underlying clock",
+                         kudu::MetricLevel::kWarn);
 METRIC_DEFINE_gauge_uint64(server, hybrid_clock_error,
                            "Hybrid Clock Error",
                            kudu::MetricUnit::kMicroseconds,
-                           "Server clock maximum error.",
+                           "Server clock maximum error",
+                           kudu::MetricLevel::kInfo);
+METRIC_DEFINE_gauge_uint64(server, hybrid_clock_timestamp,
+                           "Hybrid Clock Timestamp",
+                           kudu::MetricUnit::kMicroseconds,
+                           "Hybrid clock timestamp",
                            kudu::MetricLevel::kInfo);
+METRIC_DEFINE_histogram(server, hybrid_clock_max_errors,
+                        "Hybrid Clock Maximum Errors",
+                        kudu::MetricUnit::kMilliseconds,
+                        "The statistics on the maximum error of the underlying 
"
+                        "clock",
+                         kudu::MetricLevel::kInfo,
+                        20000, 3);
+METRIC_DEFINE_histogram(server, hybrid_clock_extrapolation_intervals,
+                        "Intervals of Hybrid Clock Extrapolation",
+                        kudu::MetricUnit::kSeconds,
+                        "The statistics on the duration of intervals when the "
+                        "underlying clock was extrapolated instead of using "
+                        "the direct readings",
+                        kudu::MetricLevel::kWarn,
+                        10000, 3);
 
 namespace kudu {
 namespace clock {
@@ -153,6 +174,12 @@ HybridClock::HybridClock(const 
scoped_refptr<MetricEntity>& metric_entity)
     : next_timestamp_(0),
       state_(kNotInitialized) {
   DCHECK(metric_entity);
+  max_errors_histogram_ =
+      METRIC_hybrid_clock_max_errors.Instantiate(metric_entity);
+  extrapolation_intervals_histogram_ =
+      METRIC_hybrid_clock_extrapolation_intervals.Instantiate(metric_entity);
+  extrapolating_ = metric_entity->FindOrCreateGauge(
+      &METRIC_hybrid_clock_extrapolating, false, MergeType::kMax);
   METRIC_hybrid_clock_timestamp.InstantiateFunctionGauge(
       metric_entity,
       Bind(&HybridClock::NowForMetrics, Unretained(this)))->
@@ -418,12 +445,13 @@ void HybridClock::NowWithErrorUnlocked(Timestamp 
*timestamp,
 }
 
 Status HybridClock::WalltimeWithError(uint64_t* now_usec, uint64_t* 
error_usec) {
-  bool is_extrapolated = false;
   auto read_time_before = MonoTime::Now();
   Status s = time_service_->WalltimeWithError(now_usec, error_usec);
   auto read_time_after = MonoTime::Now();
 
+  bool is_extrapolated = false;
   if (PREDICT_TRUE(s.ok())) {
+    max_errors_histogram_->Increment(*error_usec / 1000);
     // We got a good clock read. Remember this in case the clock later becomes
     // unsynchronized and we need to extrapolate from here.
     //
@@ -448,6 +476,13 @@ Status HybridClock::WalltimeWithError(uint64_t* now_usec, 
uint64_t* error_usec)
         MonoDelta::FromMicroseconds(read_time_error_us);
 
     std::unique_lock<decltype(last_clock_read_lock_)> l(last_clock_read_lock_);
+    if (is_extrapolating_) {
+      is_extrapolating_ = false;
+      extrapolating_->set_value(is_extrapolating_);
+      uint64_t duration_us = (*now_usec > last_clock_read_physical_)
+          ? (*now_usec - last_clock_read_physical_) : 0;
+      extrapolation_intervals_histogram_->Increment(duration_us / 1000000);
+    }
     if (!last_clock_read_time_.Initialized() ||
         last_clock_read_time_ < read_time_max_likelihood) {
       last_clock_read_time_ = read_time_max_likelihood;
@@ -458,6 +493,10 @@ Status HybridClock::WalltimeWithError(uint64_t* now_usec, 
uint64_t* error_usec)
     // We failed to read the clock. Extrapolate the new time based on our
     // last successful read.
     std::unique_lock<decltype(last_clock_read_lock_)> l(last_clock_read_lock_);
+    if (!is_extrapolating_) {
+      is_extrapolating_ = true;
+      extrapolating_->set_value(is_extrapolating_);
+    }
     if (!last_clock_read_time_.Initialized()) {
       RETURN_NOT_OK_PREPEND(s, "could not read system time source");
     }
@@ -468,10 +507,11 @@ Status HybridClock::WalltimeWithError(uint64_t* now_usec, 
uint64_t* error_usec)
     *error_usec = last_clock_read_error_ + accum_error_us;
     is_extrapolated = true;
     l.unlock();
-    // Log after unlocking to minimize the lock hold time.
-    KLOG_EVERY_N_SECS(ERROR, 1) << "Unable to read clock for last "
-                                << time_since_last_read.ToString() << ": " << 
s.ToString();
 
+    // Log after unlocking to minimize the lock hold time.
+    KLOG_EVERY_N_SECS(ERROR, 1) << Substitute(
+        "unable to read clock for last $0: $1",
+        time_since_last_read.ToString(), s.ToString());
   }
 
   // If the clock is synchronized but has max_error beyond 
max_clock_sync_error_usec
diff --git a/src/kudu/clock/hybrid_clock.h b/src/kudu/clock/hybrid_clock.h
index 1589937..a00335e 100644
--- a/src/kudu/clock/hybrid_clock.h
+++ b/src/kudu/clock/hybrid_clock.h
@@ -197,16 +197,19 @@ class HybridClock : public Clock {
   simple_spinlock lock_;
 
   // The next timestamp to be generated from this clock, assuming that
-  // the physical clock hasn't advanced beyond the value stored here. Guarded
-  // by 'lock_'.
+  // the physical clock hasn't advanced beyond the value stored here.
+  // Protected by 'lock_'.
   uint64_t next_timestamp_;
 
   // The last valid clock reading we got from the time source, along
-  // with the monotime that we took that reading.
-  simple_spinlock last_clock_read_lock_;
+  // with the monotime that we took that reading. The 'is_extrapolating' field
+  // tracks whether extrapolated or real readings of the underlying clock are
+  // used to generate hybrid timestamps.
+  simple_spinlock last_clock_read_lock_;  // protects four fields below
   MonoTime last_clock_read_time_;
   uint64_t last_clock_read_physical_;
   uint64_t last_clock_read_error_;
+  bool is_extrapolating_ = false;
 
   // The state of the object. Guarded by 'lock_'.
   enum State {
@@ -215,6 +218,23 @@ class HybridClock : public Clock {
   };
   State state_;
 
+  // Whether the hybrid clock is extrapolating the readings of the underlying
+  // clock instead of using the real ones. It's important to know whether
+  // the extrapolation is happening, but 'extrapolation_intervals_histogram_'
+  // metric doesn't allow for exposing this fact prior to the end of current
+  // extrapolation interval.
+  scoped_refptr<AtomicGauge<bool>> extrapolating_;
+
+  // Stats on the underlying clock's 'maximum clock' metric sampled every
+  // NowWithError() call (essentially, every call when requesting a hybrid 
clock
+  // timestamp).
+  scoped_refptr<Histogram> max_errors_histogram_;
+
+  // Stats on time intervals when the underlying clock was extrapolated
+  // instead of using the actual readings. Extrapolation happens when an 
attempt
+  // to read the clock yields an error (clock might be unsynchronized, etc.).
+  scoped_refptr<Histogram> extrapolation_intervals_histogram_;
+
   // Clock metrics are set to detach to their last value. This means
   // that, during our destructor, we'll need to access other class members
   // declared above this. Hence, this member must be declared last.
diff --git a/src/kudu/server/server_base.cc b/src/kudu/server/server_base.cc
index a5bf5ad..36b5093 100644
--- a/src/kudu/server/server_base.cc
+++ b/src/kudu/server/server_base.cc
@@ -492,9 +492,7 @@ Status ServerBase::Init() {
   // so we're less likely to get into a partially initialized state on disk 
during startup
   // if we're having clock problems.
   RETURN_NOT_OK_PREPEND(clock_->Init(), "Cannot initialize clock");
-
   RETURN_NOT_OK(security::InitKerberosForServer(FLAGS_principal, 
FLAGS_keytab_file));
-
   RETURN_NOT_OK(file_cache_->Init());
 
   fs::FsReport report;
diff --git a/src/kudu/util/metrics.cc b/src/kudu/util/metrics.cc
index 2ad5eed..d250387 100644
--- a/src/kudu/util/metrics.cc
+++ b/src/kudu/util/metrics.cc
@@ -897,7 +897,7 @@ HistogramPrototype::HistogramPrototype(const 
MetricPrototype::CtorArgs& args,
   : MetricPrototype(args),
     max_trackable_value_(max_trackable_value),
     num_sig_digits_(num_sig_digits) {
-  // Better to crash at definition time that at instantiation time.
+  // Better to crash at definition time than at instantiation time.
   CHECK(HdrHistogram::IsValidHighestTrackableValue(max_trackable_value))
       << Substitute("Invalid max trackable value on histogram $0: $1",
                     args.name_, max_trackable_value);

Reply via email to