This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new fc4daf6 [clock] change clock source selection for 'auto'
fc4daf6 is described below
commit fc4daf6ee16b1939397718cc5f1cfc9d1ca4a31a
Author: Alexey Serbin <[email protected]>
AuthorDate: Thu Jun 10 23:02:37 2021 -0700
[clock] change clock source selection for 'auto'
This patch changes the logic to select particular time source when
the pseudo-source 'auto' used for --time_source.
Before this patch, the 'system' time source would be auto-selected if
a Kudu server with --time_source=auto is run in an environment where the
instance detector isn't aware of a dedicated NTP server (those are
usually available via the host-only interface, at least for EC2 and
GCE instances).
After this patch, the 'builtin' time source would be auto-selected
if a Kudu server runs with --time_source=auto in environment where the
instance detector isn't aware of dedicated NTP servers AND the
--builtin_ntp_servers flag is set to a valid value. Otherwise, if
--builtin_ntp_servers flag is set to an empty/invalid value, 'system'
becomes the auto-selected time source for platforms supporting
get_ntptime() API, otherwise the catch-all case is used to auto-select
'system_unsync' as the time source.
I also added a new test scenario to cover the updated functionality.
Change-Id: I72cb54c488b2aa2c73acd6e6e5f6e50dd5811175
Reviewed-on: http://gerrit.cloudera.org:8080/17582
Tested-by: Kudu Jenkins
Reviewed-by: Andrew Wong <[email protected]>
---
src/kudu/clock/builtin_ntp.cc | 42 +++++++++++++++++-------
src/kudu/clock/builtin_ntp.h | 6 ----
src/kudu/clock/hybrid_clock-test.cc | 56 +++++++++++++++++++++++++++++++-
src/kudu/clock/hybrid_clock.cc | 48 ++++++++++++++-------------
src/kudu/clock/hybrid_clock.h | 23 +++++++------
src/kudu/clock/ntp-test.cc | 17 +++++++---
src/kudu/server/default_path_handlers.cc | 14 ++++++--
7 files changed, 146 insertions(+), 60 deletions(-)
diff --git a/src/kudu/clock/builtin_ntp.cc b/src/kudu/clock/builtin_ntp.cc
index 52688cb..f5f416e 100644
--- a/src/kudu/clock/builtin_ntp.cc
+++ b/src/kudu/clock/builtin_ntp.cc
@@ -46,6 +46,7 @@
#include "kudu/gutil/walltime.h"
#include "kudu/util/errno.h"
#include "kudu/util/flag_tags.h"
+#include "kudu/util/flag_validators.h"
#include "kudu/util/locks.h"
#include "kudu/util/logging.h"
#include "kudu/util/metrics.h"
@@ -68,9 +69,13 @@ DEFINE_string(builtin_ntp_servers,
"1.pool.ntp.org,"
"2.pool.ntp.org,"
"3.pool.ntp.org",
- "The NTP servers used by the built-in NTP client, in format "
- "<FQDN|IP>[:PORT]. These will only be used if the built-in NTP "
- "client is enabled (i.e. if setting --time_source=builtin).");
+ "Comma-separated list of NTP servers for the built-in NTP "
+ "client, each in format <FQDN|IP>[:PORT]. This list will be used
"
+ "in one of the following cases: (A) the built-in NTP client is "
+ "explicitly set as the time source (i.e. --time_source=builtin) "
+ "(B) the 'auto' pseudo-source for time is used and the cloud "
+ "instance detector isn't aware about any preferred NTP servers "
+ "provided by the environment.");
TAG_FLAG(builtin_ntp_servers, stable);
// In the 'Best practices' section, RFC 4330 states that 15 seconds is the
@@ -108,6 +113,8 @@ DEFINE_uint32(builtin_ntp_true_time_refresh_max_interval_s,
3600,
TAG_FLAG(builtin_ntp_true_time_refresh_max_interval_s, experimental);
TAG_FLAG(builtin_ntp_true_time_refresh_max_interval_s, runtime);
+DECLARE_string(time_source);
+
METRIC_DEFINE_gauge_int64(server, builtin_ntp_local_clock_delta,
"Local Clock vs Built-In NTP True Time Delta",
kudu::MetricUnit::kMilliseconds,
@@ -164,6 +171,26 @@ constexpr uint8_t kMaxStratum = 16;
// Maximum allowed dispersion (in microseconds) per RFC 5905.
constexpr uint64_t kMaxDispersionUs = 16000000; // 16 seconds
+static bool ValidateBuiltinNtpServers() {
+ if (FLAGS_time_source == "builtin") {
+ vector<HostPort> hps;
+ Status s = HostPort::ParseStrings(
+ FLAGS_builtin_ntp_servers, kStandardNtpPort, &hps);
+ if (!s.ok()) {
+ LOG(ERROR) << Substitute("could not parse --builtin_ntp_servers flag:
$0",
+ s.message().ToString());
+ return false;
+ }
+ }
+ // In case of --time_source=auto, setting --bultin_ntp_servers to an empty
+ // string makes the time source auto-selection skip the 'builtin' and start
+ // using 'system' on supported platforms. See HybridClock::SelectTimeSource()
+ // for details.
+ return true;
+}
+GROUP_FLAG_VALIDATOR(timesource_and_builtin_ntp_client,
+ ValidateBuiltinNtpServers);
+
DEFINE_validator(builtin_ntp_servers,
[](const char* name, const string& val) {
vector<HostPort> hps;
@@ -513,13 +540,6 @@ BuiltInNtp::BuiltInNtp(const scoped_refptr<MetricEntity>&
metric_entity)
RegisterMetrics(metric_entity);
}
-BuiltInNtp::BuiltInNtp(vector<HostPort> servers,
- const scoped_refptr<MetricEntity>& metric_entity)
- : rng_(GetRandomSeed32()) {
- CHECK_OK(PopulateServers(std::move(servers)));
- RegisterMetrics(metric_entity);
-}
-
BuiltInNtp::~BuiltInNtp() {
Shutdown();
}
@@ -796,7 +816,7 @@ bool BuiltInNtp::TryReceivePacket() {
return true;
}
- // TODO(KUDU-2938): handle "kiss-of-death" (RFC 5905 section 7.4)
+ // TODO(KUDU-2938): handle "kiss-of-death" packets (RFC 5905 section 7.4)
// TODO(aserbin): add more validators regarding consistency of measurements
// (reported precision and measured intervals, delays, etc.)
diff --git a/src/kudu/clock/builtin_ntp.h b/src/kudu/clock/builtin_ntp.h
index 7667115..2c61b9a 100644
--- a/src/kudu/clock/builtin_ntp.h
+++ b/src/kudu/clock/builtin_ntp.h
@@ -72,12 +72,6 @@ class BuiltInNtp : public TimeService {
// the built-in NTP client.
explicit BuiltInNtp(const scoped_refptr<MetricEntity>& metric_entity);
- // Create an instance using the specified servers as NTP sources. The set
- // of source NTP servers must not be empty. The 'metric_entity' is used
- // to register metrics specific to the built-in NTP client.
- BuiltInNtp(std::vector<HostPort> servers,
- const scoped_refptr<MetricEntity>& metric_entity);
-
~BuiltInNtp() override;
Status Init() override;
diff --git a/src/kudu/clock/hybrid_clock-test.cc
b/src/kudu/clock/hybrid_clock-test.cc
index 50e6a26..c3e8996 100644
--- a/src/kudu/clock/hybrid_clock-test.cc
+++ b/src/kudu/clock/hybrid_clock-test.cc
@@ -19,6 +19,8 @@
#include <algorithm>
#include <cstdint>
+#include <initializer_list>
+#include <memory>
#include <ostream>
#include <string>
#include <thread>
@@ -39,6 +41,8 @@
#include "kudu/mini-cluster/external_mini_cluster.h"
#include "kudu/mini-cluster/webui_checker.h"
#include "kudu/util/atomic.h"
+#include "kudu/util/cloud/instance_detector.h"
+#include "kudu/util/cloud/instance_metadata.h"
#include "kudu/util/curl_util.h"
#include "kudu/util/faststring.h"
#include "kudu/util/metrics.h"
@@ -52,9 +56,12 @@
#include "kudu/util/test_util.h"
DECLARE_bool(inject_unsync_time_errors);
+DECLARE_string(builtin_ntp_servers);
DECLARE_string(time_source);
+DECLARE_uint32(cloud_metadata_server_request_timeout_ms);
METRIC_DECLARE_entity(server);
+using kudu::cloud::InstanceDetector;
using kudu::cluster::ExternalMiniCluster;
using kudu::cluster::ExternalMiniClusterOptions;
using std::string;
@@ -486,7 +493,7 @@ TEST_F(HybridClockTest, TimeSourceAutoSelection) {
}
if (!preconditions_satisfied) {
LOG(WARNING) << "preconditions are not satisfied, skipping the test";
- return;
+ GTEST_SKIP();
}
ASSERT_OK(s);
@@ -497,6 +504,53 @@ TEST_F(HybridClockTest, TimeSourceAutoSelection) {
ASSERT_LE(timestamp[0].value(), timestamp[1].value());
}
+// This test scenario verifies that if the environment doesn't provide a
+// dedicated NTP server when Kudu is running with --time_source=auto, the
+// auto-selected time source is 'builtin' with the set of reference servers for
+// the built-in NTP client as configured per --builtin_ntp_servers, or
+// 'system' ('system_unsync' for platforms not supporting get_ntptime() API)
+// when --builtin_ntp_servers is empty or unparsable.
+TEST_F(HybridClockTest, AutoTimeSourceNoDedicatedNtpServer) {
+ // Set very short interval for the instance detection timeout.
+ FLAGS_cloud_metadata_server_request_timeout_ms = 1;
+
+ {
+ // Check for the pre-condition: the auto-detection fails as expected due
+ // to the super-short timeout setting for the cloud metadata requests and
+ // phony DNS server to be used by libcurl.
+ InstanceDetector detector;
+ std::unique_ptr<cloud::InstanceMetadata> md;
+ const auto s = detector.Detect(&md);
+ ASSERT_TRUE(s.IsNotFound()) << s.ToString();
+ }
+
+ // With --builtin_ntp_servers set to a valid sequence of NTP server
addresses,
+ // the auto-selected time source should result in 'builtin'.
+ {
+ FLAGS_builtin_ntp_servers = "127.0.0.1";
+ HybridClock::TimeSource time_source;
+ ASSERT_OK(HybridClock::SelectTimeSource("auto", &time_source));
+ ASSERT_EQ(HybridClock::TimeSource::NTP_SYNC_BUILTIN, time_source)
+ << HybridClock::TimeSourceToString(time_source);
+ }
+
+ // With an empty or invalid value for --builtin_ntp_servers the auto-selected
+ // time source should be 'system' for the platforms which support it and
+ // 'system_unsync' otherwise.
+ for (const auto ntp_servers : { "", "," }) {
+ FLAGS_builtin_ntp_servers = ntp_servers;
+ HybridClock::TimeSource time_source;
+ ASSERT_OK(HybridClock::SelectTimeSource("auto", &time_source));
+#if defined(KUDU_HAS_SYSTEM_TIME_SOURCE)
+ ASSERT_EQ(HybridClock::TimeSource::NTP_SYNC_SYSTEM, time_source)
+ << HybridClock::TimeSourceToString(time_source);
+#else
+ ASSERT_EQ(HybridClock::TimeSource::UNSYNC_SYSTEM, time_source)
+ << HybridClock::TimeSourceToString(time_source);
+#endif // #if defined(KUDU_HAS_SYSTEM_TIME_SOURCE) ... #else ...
+ }
+}
+
#if defined(KUDU_HAS_SYSTEM_TIME_SOURCE)
TEST_F(HybridClockTest, TestNtpDiagnostics) {
FLAGS_time_source = "system";
diff --git a/src/kudu/clock/hybrid_clock.cc b/src/kudu/clock/hybrid_clock.cc
index ff4fc45..e88744a 100644
--- a/src/kudu/clock/hybrid_clock.cc
+++ b/src/kudu/clock/hybrid_clock.cc
@@ -48,6 +48,7 @@
#include "kudu/util/status.h"
#include "kudu/util/string_case.h"
+using google::GetCommandLineOption;
using google::SetCommandLineOptionWithMode;
using google::FlagSettingMode;
using kudu::clock::BuiltInNtp;
@@ -238,12 +239,10 @@ HybridClock::HybridClock(const
scoped_refptr<MetricEntity>& metric_entity)
Status HybridClock::Init() {
TimeSource time_source = TimeSource::UNKNOWN;
- vector<HostPort> builtin_ntp_servers;
- RETURN_NOT_OK(SelectTimeSource(
- FLAGS_time_source, &time_source, &builtin_ntp_servers));
+ RETURN_NOT_OK(SelectTimeSource(FLAGS_time_source, &time_source));
LOG(INFO) << Substitute("auto-selected time source: $0",
TimeSourceToString(time_source));
- return InitWithTimeSource(time_source, std::move(builtin_ntp_servers));
+ return InitWithTimeSource(time_source);
}
Timestamp HybridClock::Now() {
@@ -449,10 +448,10 @@ void HybridClock::NowWithErrorOrDie(Timestamp* timestamp,
}
Status HybridClock::SelectTimeSource(const string& time_source_str,
- TimeSource* time_source,
- vector<HostPort>* builtin_ntp_servers) {
+ TimeSource* time_source) {
+ constexpr const char* const BUILTIN_NTP_SERVERS = "builtin_ntp_servers";
+
TimeSource result_time_source = TimeSource::UNKNOWN;
- vector<HostPort> result_builtin_ntp_servers;
if (iequals(time_source_str, TIME_SOURCE_AUTO)) {
InstanceDetector detector;
unique_ptr<InstanceMetadata> md;
@@ -463,8 +462,6 @@ Status HybridClock::SelectTimeSource(const string&
time_source_str,
// built-in NTP client enabled, use the dedicated NTP server provided
// by the hosting environment.
DCHECK(!ntp_server.empty());
- result_builtin_ntp_servers.emplace_back(
- ntp_server, clock::kStandardNtpPort);
result_time_source = TimeSource::NTP_SYNC_BUILTIN;
// NOTE: setting the default value for the --builtin_ntp_servers flag
@@ -472,16 +469,28 @@ Status HybridClock::SelectTimeSource(const string&
time_source_str,
// built-in NTP client if time source auto-selection has run. This
// convention is used by the embedded Web server to show the
corresponding
// information at the '/config' page.
- SetCommandLineOptionWithMode("builtin_ntp_servers",
+ SetCommandLineOptionWithMode(BUILTIN_NTP_SERVERS,
ntp_server.c_str(),
FlagSettingMode::SET_FLAGS_DEFAULT);
-#if defined(KUDU_HAS_SYSTEM_TIME_SOURCE)
} else {
- // For the 'auto' time source, in case of environment that's known not
- // to provide a dedicated NTP server, select TimeSource::NTP_SYNC_SYSTEM,
- // if supported.
- result_time_source = TimeSource::NTP_SYNC_SYSTEM;
+ // Switch to the built-in NTP client unless the set of reference servers
+ // for the built-in client is empty/invalid.
+ string ntp_servers;
+ vector<HostPort> hps;
+ if (GetCommandLineOption(BUILTIN_NTP_SERVERS, &ntp_servers) &&
+ HostPort::ParseStrings(ntp_servers, kStandardNtpPort, &hps).ok() &&
+ !hps.empty()) {
+ result_time_source = TimeSource::NTP_SYNC_BUILTIN;
+#if defined(KUDU_HAS_SYSTEM_TIME_SOURCE)
+ } else {
+ // For the 'auto' time source, in case of environment that's known not
+ // to provide a dedicated NTP server and the set of reference servers
+ // for the built-in NTP client is empty, select
+ // TimeSource::NTP_SYNC_SYSTEM (if supported by the platform this code
+ // has been compiled for).
+ result_time_source = TimeSource::NTP_SYNC_SYSTEM;
#endif
+ }
}
// Finally, fallback to TimeSource::SYSTEM_UNSYNC. This is an option only
@@ -515,20 +524,15 @@ Status HybridClock::SelectTimeSource(const string&
time_source_str,
}
*time_source = result_time_source;
- *builtin_ntp_servers = std::move(result_builtin_ntp_servers);
return Status::OK();
}
-Status HybridClock::InitWithTimeSource(TimeSource time_source,
- vector<HostPort> builtin_ntp_servers) {
+Status HybridClock::InitWithTimeSource(TimeSource time_source) {
DCHECK_EQ(kNotInitialized, state_);
switch (time_source) {
case TimeSource::NTP_SYNC_BUILTIN:
- time_service_.reset(builtin_ntp_servers.empty()
- ? new clock::BuiltInNtp(metric_entity_)
- : new
clock::BuiltInNtp(std::move(builtin_ntp_servers),
- metric_entity_));
+ time_service_.reset(new clock::BuiltInNtp(metric_entity_));
break;
#if defined(KUDU_HAS_SYSTEM_TIME_SOURCE)
case TimeSource::NTP_SYNC_SYSTEM:
diff --git a/src/kudu/clock/hybrid_clock.h b/src/kudu/clock/hybrid_clock.h
index beeb365..42c6100 100644
--- a/src/kudu/clock/hybrid_clock.h
+++ b/src/kudu/clock/hybrid_clock.h
@@ -19,7 +19,8 @@
#include <cstdint>
#include <memory>
#include <string>
-#include <vector>
+
+#include <gtest/gtest_prod.h>
#include "kudu/clock/clock.h"
#include "kudu/clock/time_service.h"
@@ -33,9 +34,6 @@
#include "kudu/util/status.h"
namespace kudu {
-
-class HostPort;
-
namespace clock {
// The HybridTime clock.
@@ -168,6 +166,8 @@ class HybridClock : public Clock {
}
private:
+ FRIEND_TEST(HybridClockTest, AutoTimeSourceNoDedicatedNtpServer);
+
enum class TimeSource {
// Internal Kudu clock synchronized by built-in NTP client.
NTP_SYNC_BUILTIN,
@@ -200,17 +200,16 @@ class HybridClock : public Clock {
// Select particular time source for the hybrid clock given the
// 'time_source_str' parameter which can be a pseudo-source such as 'auto'.
// On success, the 'time_source' output parameter contains time source that
- // determines particular time service to use, and the 'builtin_ntp_servers'
- // contains NTP servers for the built-in NTP client if the 'builtin' time
- // source is selected.
+ // determines particular time service to use. If the 'builtin' time source is
+ // selected, the --builtin_ntp_servers flag's value is used to build the set
+ // of reference servers for the built-in NTP client.
static Status SelectTimeSource(const std::string& time_source_str,
- TimeSource* time_source,
- std::vector<HostPort>* builtin_ntp_servers);
+ TimeSource* time_source);
// Initialize hybrid clock with the specified time source.
- // The 'builtin_ntp_servers' is used in case of TimeSource::BUILTIN_NTP_SYNC.
- Status InitWithTimeSource(TimeSource time_source,
- std::vector<HostPort> builtin_ntp_servers);
+ // If 'time_source' is TimeSource::BUILTIN_NTP_SYNC, the set of reference
+ // servers for the built-in NTP client is sourced from --builtin_ntp_servers.
+ Status InitWithTimeSource(TimeSource time_source);
// Variant of NowWithError() that requires 'lock_' to be held already.
Status NowWithErrorUnlocked(Timestamp* timestamp, uint64_t* max_error_usec);
diff --git a/src/kudu/clock/ntp-test.cc b/src/kudu/clock/ntp-test.cc
index 167c788..dc3c713 100644
--- a/src/kudu/clock/ntp-test.cc
+++ b/src/kudu/clock/ntp-test.cc
@@ -506,7 +506,9 @@ TEST_F(BuiltinNtpWithMiniChronydTest, Basic) {
// chronyd supports very short polling intervals (down to 1/64 second).
FLAGS_builtin_ntp_poll_interval_ms = 50;
- BuiltInNtp c(servers_endpoints, metric_entity_);
+ FLAGS_builtin_ntp_servers =
HostPort::ToCommaSeparatedString(servers_endpoints);
+
+ BuiltInNtp c(metric_entity_);
ASSERT_OK(c.Init());
ASSERT_EVENTUALLY([&] {
uint64_t now_us;
@@ -555,8 +557,9 @@ TEST_F(BuiltinNtpWithMiniChronydTest,
NoIntersectionIntervalAtStartup) {
FLAGS_ntp_initial_sync_wait_secs = 2;
// chronyd supports very short polling intervals (down to 1/64 second).
FLAGS_builtin_ntp_poll_interval_ms = 50;
+ FLAGS_builtin_ntp_servers =
HostPort::ToCommaSeparatedString(servers_endpoints);
- BuiltInNtp c(servers_endpoints, metric_entity_);
+ BuiltInNtp c(metric_entity_);
NO_FATALS(CheckInitUnsync(&c));
NO_FATALS(CheckWallTimeUnavailable(&c));
}
@@ -646,10 +649,11 @@ TEST_F(BuiltinNtpWithMiniChronydTest,
SyncAndUnsyncReferenceServers) {
// * Make sure the built-in NTP client doesn't "latch" with the specified
// not-so-good NTP source.
auto check_not_a_good_clock_source = [&](const vector<HostPort>& refs) {
+ FLAGS_builtin_ntp_servers = HostPort::ToCommaSeparatedString(refs);
// Verify chronyd's client itself does not accept the set of of NTP
sources.
NO_FATALS(CheckNoNtpSource(refs));
- BuiltInNtp c(refs, metric_entity_);
+ BuiltInNtp c(metric_entity_);
NO_FATALS(CheckInitUnsync(&c));
NO_FATALS(CheckWallTimeUnavailable(&c));
};
@@ -691,7 +695,8 @@ TEST_F(BuiltinNtpWithMiniChronydTest,
SyncAndUnsyncReferenceServers) {
// Verify chronyd's client itself accepts the set of of NTP sources.
ASSERT_OK(MiniChronyd::CheckNtpSource(refs));
- BuiltInNtp c(refs, metric_entity_);
+ FLAGS_builtin_ntp_servers = HostPort::ToCommaSeparatedString(refs);
+ BuiltInNtp c(metric_entity_);
ASSERT_OK(c.Init());
ASSERT_EVENTUALLY([&] {
uint64_t now_us;
@@ -713,6 +718,7 @@ TEST_F(BuiltinNtpWithMiniChronydTest,
CloudInstanceNtpServer) {
// than the regular version.
FLAGS_cloud_metadata_server_request_timeout_ms = 10000;
#endif
+
InstanceDetector detector;
unique_ptr<cloud::InstanceMetadata> md;
string ntp_server;
@@ -744,7 +750,8 @@ TEST_F(BuiltinNtpWithMiniChronydTest,
CloudInstanceNtpServer) {
ASSERT_OK(MiniChronyd::CheckNtpSource(servers_endpoints));
FLAGS_builtin_ntp_poll_interval_ms = 500;
- BuiltInNtp c(servers_endpoints, metric_entity_);
+ FLAGS_builtin_ntp_servers =
HostPort::ToCommaSeparatedString(servers_endpoints);
+ BuiltInNtp c(metric_entity_);
ASSERT_OK(c.Init());
ASSERT_EVENTUALLY([&] {
uint64_t now_us;
diff --git a/src/kudu/server/default_path_handlers.cc
b/src/kudu/server/default_path_handlers.cc
index 460c957..b167017 100644
--- a/src/kudu/server/default_path_handlers.cc
+++ b/src/kudu/server/default_path_handlers.cc
@@ -387,9 +387,17 @@ static void FillTimeSourceConfigs(EasyJson* output) {
"Effective list of NTP servers used by the built-in NTP client. "
"Configurable via --builtin_ntp_servers. If Kudu is configured with "
"--time_source=auto and the Effective Time Source is auto-selected "
- "to be 'builtin', Kudu uses dedicated NTP servers provided by the "
- "hosting environment, overriding the list of NTP servers configured "
- "via --builtin_ntp_servers.";
+ "to be 'builtin', Kudu tries to use dedicated NTP servers provided by "
+ "the hosting environment known to Kudu, overriding the list of servers
"
+ "configured via --builtin_ntp_servers. If Kudu cannot recognize the "
+ "hosting environment it runs with --time_source=auto, the Effective "
+ "Time Source is auto-selected to be 'builtin' with the set of "
+ "reference servers configured per --builtin_ntp_servers flag's value, "
+ "unless it's empty or otherwise unparsable. The last resort for a "
+ "cluster-wide synchronized clock is to auto-select the 'system' Time "
+ "Source if the platform supports get_ntptime() API. The catch-all case
"
+ "is 'system_unsync' Time Source which is for development-only "
+ "platforms or single-node-runs-it-all proof-of-concept Kudu clusters.";
}
}