This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 4aa0c7c0bc7d91af8be9a837b64f2a53fe31dd44
Author: Alexey Serbin <[email protected]>
AuthorDate: Fri Feb 14 14:50:21 2020 -0800

    [tests] add time anomalies test
    
    This patch adds a new test to exercise the behavior of Kudu masters
    and tablet servers in case of various time anomalies.
    
    As of now, a single scenario is present: starting up tablet servers
    in case if current time is far behind timestamps in replica's WAL.
    
    This patch also contains a small update on MiniChronyd::SetTime()
    to make it more robust if calling the method multiple times
    consecutively.
    
    Change-Id: I369f0d78997824324426943a75c126d35d81e264
    Reviewed-on: http://gerrit.cloudera.org:8080/15239
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <[email protected]>
---
 src/kudu/clock/test/mini_chronyd.cc                |  28 ++++-
 src/kudu/integration-tests/CMakeLists.txt          |   3 +
 src/kudu/integration-tests/time_anomalies-itest.cc | 126 +++++++++++++++++++++
 src/kudu/tablet/tablet_bootstrap.cc                |   4 +-
 4 files changed, 156 insertions(+), 5 deletions(-)

diff --git a/src/kudu/clock/test/mini_chronyd.cc 
b/src/kudu/clock/test/mini_chronyd.cc
index 1d86a40..22616db 100644
--- a/src/kudu/clock/test/mini_chronyd.cc
+++ b/src/kudu/clock/test/mini_chronyd.cc
@@ -292,7 +292,20 @@ Status MiniChronyd::GetServerStats(ServerStats* stats) 
const {
 Status MiniChronyd::SetTime(time_t time) {
   char buf[kFastToBufferSize];
   char* time_to_set = FastTimeToBuffer(time, buf);
-  return RunChronyCmd({ "settime", time_to_set });
+  string out;
+  string err;
+  // The first command is 'manual reset' to remove samples accumulated prior
+  // to the run of this 'settime' command. Those are not needed and could
+  // confuse chrony when SetTime() is called next time with a close enough
+  // timestamp (in the chrony's source, see functions handle_settime() from
+  // cmdmon.c and MNL_AcceptTimestamp() from manual.c).
+  // The second command is 'settime <time>' which sets the time as requested.
+  auto s = RunChronyCmd(
+      { "manual reset", Substitute("settime $0", time_to_set) }, &out, &err);
+  if (!s.ok()) {
+    s = s.CloneAndAppend(Substitute("stdout: $0 stderr: $1", out, err));
+  }
+  return s;
 }
 
 // Find absolute path to chronyc (chrony's CLI tool),
@@ -444,7 +457,18 @@ Status MiniChronyd::RunChronyCmd(const vector<string>& 
args,
                                  string* out_stderr) const {
   string chronyc_bin;
   RETURN_NOT_OK(GetChronycPath(&chronyc_bin));
-  vector<string> cmd_and_args = { chronyc_bin, "-h", cmdaddress(), };
+  // Use '-m' switch to allow for multiple commands to be sent at once: each
+  // argument is interpreted as a separate command.
+  // Use '-n' switch to avoid resolving IP addresses into DNS names in case if
+  // chronyc outputs anything related to to IP addresses/hostnames (e.g.,
+  // list of source NTP servers or list of NTP clients server by chronyd, 
etc.).
+  // Since non-default command address is used for chronyd (the 
'bindcmdaddress'
+  // option in 'chrony.conf'), it's necessary to specify the address using the
+  // '-h' switch (it's a path of the Unix domain socket in case of 
MiniChronyd).
+  // See 'man chronyc' for more details on the switches used for 'chronyc'
+  // invocation.
+  vector<string> cmd_and_args = {
+      chronyc_bin, "-n", "-m", "-h", cmdaddress(), };
   std::copy(args.begin(), args.end(), std::back_inserter(cmd_and_args));
   return Subprocess::Call(cmd_and_args, "", out_stdout, out_stderr);
 }
diff --git a/src/kudu/integration-tests/CMakeLists.txt 
b/src/kudu/integration-tests/CMakeLists.txt
index dbbd80c..4663f0d 100644
--- a/src/kudu/integration-tests/CMakeLists.txt
+++ b/src/kudu/integration-tests/CMakeLists.txt
@@ -122,6 +122,9 @@ ADD_KUDU_TEST(tablet_copy_client_session-itest)
 ADD_KUDU_TEST(tablet_history_gc-itest)
 ADD_KUDU_TEST(tablet_replacement-itest)
 ADD_KUDU_TEST(tablet_server_quiescing-itest)
+if (NOT NO_CHRONY)
+  ADD_KUDU_TEST(time_anomalies-itest)
+endif()
 ADD_KUDU_TEST(timestamp_advancement-itest)
 ADD_KUDU_TEST(tombstoned_voting-imc-itest)
 ADD_KUDU_TEST(tombstoned_voting-itest)
diff --git a/src/kudu/integration-tests/time_anomalies-itest.cc 
b/src/kudu/integration-tests/time_anomalies-itest.cc
new file mode 100644
index 0000000..d0d45bc
--- /dev/null
+++ b/src/kudu/integration-tests/time_anomalies-itest.cc
@@ -0,0 +1,126 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <ctime>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <gflags/gflags_declare.h>
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "kudu/clock/test/mini_chronyd.h"
+#include "kudu/integration-tests/cluster_verifier.h"
+#include "kudu/integration-tests/external_mini_cluster-itest-base.h"
+#include "kudu/integration-tests/test_workload.h"
+#include "kudu/mini-cluster/external_mini_cluster.h"
+#include "kudu/mini-cluster/mini_cluster.h"
+#include "kudu/util/monotime.h"
+#include "kudu/util/test_macros.h"
+#include "kudu/util/test_util.h"
+
+DECLARE_int32(max_clock_sync_error_usec);
+
+using kudu::cluster::ExternalMiniClusterOptions;
+using kudu::cluster::BuiltinNtpConfigMode;
+using kudu::cluster::ClusterNodes;
+using std::string;
+using std::vector;
+
+namespace kudu {
+
+class TimeAnomaliesITest : public ExternalMiniClusterITestBase {
+};
+
+// A scenario to assert on the expected behavior of Kudu servers if true time
+// is far behind of HybridClock timestamps recorded in tablet replicas' WALs.
+TEST_F(TimeAnomaliesITest, CrashOnBootstrapIfClockFarAhead) {
+  SKIP_IF_SLOW_NOT_ALLOWED();
+
+  const auto kTServersNum = 3;
+
+  ExternalMiniClusterOptions cluster_opts;
+  cluster_opts.num_tablet_servers = kTServersNum;
+  cluster_opts.num_ntp_servers = cluster_opts.num_tablet_servers;
+  cluster_opts.ntp_config_mode = 
BuiltinNtpConfigMode::ROUND_ROBIN_SINGLE_SERVER;
+  NO_FATALS(StartClusterWithOpts(std::move(cluster_opts)));
+
+  // Set NTP time in the future, so new timestamps are ahead of the ones 
present
+  // in WAL records. Restart tablet servers to be sure their built-in NTP
+  // clients immediately have new times.
+  cluster_->ShutdownNodes(ClusterNodes::TS_ONLY);
+  {
+    vector<clock::MiniChronyd*> ntp_servers = cluster_->ntp_servers();
+    const time_t t = time(nullptr) +
+        5 * FLAGS_max_clock_sync_error_usec / (1000 * 1000);
+    for (auto* s : ntp_servers) {
+      ASSERT_OK(s->SetTime(t));
+    }
+  }
+  ASSERT_OK(cluster_->Restart());
+
+  // Create a table with RF equal to the number of tablet servers in the
+  // cluster to make sure every tablet servers contains a repica of a tablet.
+  // Populate the tablet with some data.
+  TestWorkload workload(cluster_.get());
+  workload.set_num_replicas(kTServersNum);
+  workload.Setup();
+  workload.Start();
+  while (workload.rows_inserted() < 10) {
+    SleepFor(MonoDelta::FromMilliseconds(10));
+  }
+  workload.StopAndJoin();
+
+  NO_FATALS(cluster_->AssertNoCrashes());
+  ClusterVerifier v(cluster_.get());
+  NO_FATALS(v.CheckCluster());
+  NO_FATALS(v.CheckRowCount(workload.table_name(),
+                            ClusterVerifier::EXACTLY,
+                            workload.rows_inserted() - 
workload.rows_deleted()));
+
+  // Return the time back to 'normal': the timestamps of transactions in WAL
+  // will become far ahead of timestamps output by the built-in NTP client.
+  cluster_->ShutdownNodes(ClusterNodes::TS_ONLY);
+  {
+    vector<clock::MiniChronyd*> ntp_servers = cluster_->ntp_servers();
+    const time_t t = time(nullptr);
+    for (auto* s : ntp_servers) {
+      ASSERT_OK(s->SetTime(t));
+    }
+  }
+
+  for (auto idx = 0; idx < cluster_->num_tablet_servers(); ++idx) {
+    auto* srv = cluster_->tablet_server(idx);
+    // Inject a bit of delay before bootstrapping tablets: this is to allow for
+    // Status::OK() returned upon starting tablet servers.
+    
srv->mutable_flags()->emplace_back("--tablet_bootstrap_inject_latency_ms=3000");
+    ASSERT_OK(srv->Restart());
+  }
+
+  // All tablet servers should crash on attempt to load data timestamped with
+  // offset more than FLAGS_max_clock_sync_error_usec in the future from 
current
+  // clock timestamps.
+  const MonoDelta kTimeout = MonoDelta::FromSeconds(30);
+  for (auto idx = 0; idx < cluster_->num_tablet_servers(); ++idx) {
+    auto* srv = cluster_->tablet_server(idx);
+    ASSERT_OK(srv->WaitForFatal(kTimeout));
+  }
+}
+
+} // namespace kudu
diff --git a/src/kudu/tablet/tablet_bootstrap.cc 
b/src/kudu/tablet/tablet_bootstrap.cc
index 992601d..f016cfb 100644
--- a/src/kudu/tablet/tablet_bootstrap.cc
+++ b/src/kudu/tablet/tablet_bootstrap.cc
@@ -1663,9 +1663,7 @@ Status TabletBootstrap::FilterOperation(const 
OperationResultPB& op_result,
 }
 
 Status TabletBootstrap::UpdateClock(uint64_t timestamp) {
-  Timestamp ts(timestamp);
-  RETURN_NOT_OK(clock_->Update(ts));
-  return Status::OK();
+  return clock_->Update(Timestamp(timestamp));
 }
 
 string TabletBootstrap::LogPrefix() const {

Reply via email to