This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 79a856aa2500061573096b155c17d01c0a47fcb2
Author: Alexey Serbin <[email protected]>
AuthorDate: Wed Feb 5 16:35:39 2020 -0800

    [util] remove timeout parameter for cloud::InstanceDetector
    
    With this patch, the detector relies solely on the timeouts reported
    by metadata fetchers and doesn't set the timeout on the auto-detection
    process itself.  This change simplifies the semantics of the cloud
    instance detection.
    
    In addition, and this patch fixes intermittent failures recently seen
    in the following test scenarios when running on non-cloud hosts:
      * InstanceDetectorTest.Basic
      * BuiltinNtpWithMiniChronydTest.CloudInstanceNtpServer
    
    Change-Id: I6f7cac9273e884abda22b9e9a7d37fa3e307c2d4
    Reviewed-on: http://gerrit.cloudera.org:8080/15168
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <[email protected]>
---
 src/kudu/clock/ntp-test.cc                    |  4 +--
 src/kudu/util/cloud/instance_detector-test.cc | 30 +++++++---------
 src/kudu/util/cloud/instance_detector.cc      | 21 ++++--------
 src/kudu/util/cloud/instance_detector.h       | 35 ++++++++-----------
 src/kudu/util/cloud/instance_metadata.cc      | 49 ++++++++++++++++-----------
 src/kudu/util/cloud/instance_metadata.h       | 38 +++++++--------------
 6 files changed, 79 insertions(+), 98 deletions(-)

diff --git a/src/kudu/clock/ntp-test.cc b/src/kudu/clock/ntp-test.cc
index 93f17ed..da558b2 100644
--- a/src/kudu/clock/ntp-test.cc
+++ b/src/kudu/clock/ntp-test.cc
@@ -700,8 +700,7 @@ TEST_F(BuiltinNtpWithMiniChronydTest, 
CloudInstanceNtpServer) {
   // than the regular version.
   FLAGS_cloud_metadata_server_request_timeout_ms = 10000;
 #endif
-  InstanceDetector detector(MonoDelta::FromMilliseconds(
-      FLAGS_cloud_metadata_server_request_timeout_ms));
+  InstanceDetector detector;
   unique_ptr<cloud::InstanceMetadata> md;
   string ntp_server;
   {
@@ -713,6 +712,7 @@ TEST_F(BuiltinNtpWithMiniChronydTest, 
CloudInstanceNtpServer) {
     }
   }
   {
+    ASSERT_NE(nullptr, md.get());
     auto s = md->GetNtpServer(&ntp_server);
     if (!s.ok()) {
       // The only expected error in this case is Status::NotSupported().
diff --git a/src/kudu/util/cloud/instance_detector-test.cc 
b/src/kudu/util/cloud/instance_detector-test.cc
index 6f3b2fa..8ad4229 100644
--- a/src/kudu/util/cloud/instance_detector-test.cc
+++ b/src/kudu/util/cloud/instance_detector-test.cc
@@ -4,7 +4,6 @@
 
 #include "kudu/util/cloud/instance_detector.h"
 
-#include <initializer_list>
 #include <memory>
 #include <ostream>
 #include <string>
@@ -15,7 +14,6 @@
 
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/cloud/instance_metadata.h"
-#include "kudu/util/monotime.h"
 #include "kudu/util/status.h"
 
 DECLARE_uint32(cloud_metadata_server_request_timeout_ms);
@@ -43,8 +41,7 @@ TEST(InstanceDetectorTest, Basic) {
   // error even if the server responds in a timely manner.
   FLAGS_cloud_metadata_server_request_timeout_ms = 10000;
 #endif
-  InstanceDetector detector(MonoDelta::FromMilliseconds(
-      FLAGS_cloud_metadata_server_request_timeout_ms));
+  InstanceDetector detector;
   unique_ptr<InstanceMetadata> metadata;
   const auto s = detector.Detect(&metadata);
 
@@ -55,8 +52,8 @@ TEST(InstanceDetectorTest, Basic) {
   if (s_aws.ok() || s_azure.ok() || s_gce.ok()) {
     ASSERT_TRUE(s.ok()) << s.ToString();
     ASSERT_NE(nullptr, metadata.get());
-    LOG(INFO) << Substitute("detected $0 environment, VM id: $1",
-                            TypeToString(metadata->type()), metadata->id());
+    LOG(INFO) << Substitute("detected $0 environment",
+                            TypeToString(metadata->type()));
   } else {
     ASSERT_TRUE(s.IsNotFound()) << s.ToString();
     ASSERT_EQ(nullptr, metadata.get());
@@ -79,18 +76,17 @@ TEST(InstanceDetectorTest, Basic) {
 }
 
 // If the timeout for auto-detection is set too low, the detector should return
-// Status::TimedOut().
+// Status::NotFound(). Even if the timeout is set too low to get a response
+// from the metadata servers, the detector has no other choice but to rely
+// on the results from the metadata fetchers.
 TEST(InstanceDetectorTest, Timeout) {
-  // Try both no-time and very short interval.
-  for (const auto& interval : { MonoDelta::FromNanoseconds(0),
-                                MonoDelta::FromNanoseconds(1), } ) {
-    SCOPED_TRACE(Substitute("timeout interval '$0'", interval.ToString()));
-    InstanceDetector detector(interval);
-    unique_ptr<InstanceMetadata> metadata;
-    const auto s = detector.Detect(&metadata);
-    ASSERT_TRUE(s.IsTimedOut()) << s.ToString();
-    ASSERT_EQ(nullptr, metadata.get());
-  }
+  // Set very short interval for the timeout.
+  FLAGS_cloud_metadata_server_request_timeout_ms = 1;
+  InstanceDetector detector;
+  unique_ptr<InstanceMetadata> metadata;
+  const auto s = detector.Detect(&metadata);
+  ASSERT_TRUE(s.IsNotFound()) << s.ToString();
+  ASSERT_EQ(nullptr, metadata.get());
 }
 
 }  // namespace cloud
diff --git a/src/kudu/util/cloud/instance_detector.cc 
b/src/kudu/util/cloud/instance_detector.cc
index 2142bd3..4dd7321 100644
--- a/src/kudu/util/cloud/instance_detector.cc
+++ b/src/kudu/util/cloud/instance_detector.cc
@@ -26,7 +26,6 @@
 
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/cloud/instance_metadata.h"
-#include "kudu/util/monotime.h"
 #include "kudu/util/thread.h"
 
 using std::unique_ptr;
@@ -37,17 +36,16 @@ namespace cloud {
 
 const size_t InstanceDetector::kNoIdx = std::numeric_limits<size_t>::max();
 
-InstanceDetector::InstanceDetector(MonoDelta timeout)
-    : timeout_(timeout),
-      cv_(&mutex_),
+InstanceDetector::InstanceDetector()
+    : cv_(&mutex_),
       num_running_detectors_(0),
       result_detector_idx_(kNoIdx) {
   detectors_.push_back(
-    { unique_ptr<InstanceMetadata>(new AwsInstanceMetadata), nullptr });
+      { unique_ptr<InstanceMetadata>(new AwsInstanceMetadata), nullptr });
   detectors_.push_back(
-    { unique_ptr<InstanceMetadata>(new AzureInstanceMetadata), nullptr });
+      { unique_ptr<InstanceMetadata>(new AzureInstanceMetadata), nullptr });
   detectors_.push_back(
-    { unique_ptr<InstanceMetadata>(new GceInstanceMetadata), nullptr });
+      { unique_ptr<InstanceMetadata>(new GceInstanceMetadata), nullptr });
 }
 
 InstanceDetector::~InstanceDetector() {
@@ -59,7 +57,6 @@ InstanceDetector::~InstanceDetector() {
 }
 
 Status InstanceDetector::Detect(unique_ptr<InstanceMetadata>* metadata) {
-  const auto deadline = MonoTime::Now() + timeout_;
   {
     // An extra sanity check.
     MutexLock lock(mutex_);
@@ -86,13 +83,7 @@ Status 
InstanceDetector::Detect(unique_ptr<InstanceMetadata>* metadata) {
   {
     MutexLock lock(mutex_);
     while (result_detector_idx_ == kNoIdx && num_running_detectors_ > 0) {
-      if (!cv_.WaitUntil(deadline)) {
-        break;
-      }
-    }
-    if (deadline < MonoTime::Now()) {
-      return Status::TimedOut(
-          "could not retrieve instance metadata before the deadline");
+      cv_.Wait();
     }
     if (result_detector_idx_ != kNoIdx) {
       CHECK_LT(result_detector_idx_, detectors_.size());
diff --git a/src/kudu/util/cloud/instance_detector.h 
b/src/kudu/util/cloud/instance_detector.h
index d5a38ac..14a8ebd 100644
--- a/src/kudu/util/cloud/instance_detector.h
+++ b/src/kudu/util/cloud/instance_detector.h
@@ -25,7 +25,6 @@
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/util/cloud/instance_metadata.h"
 #include "kudu/util/condition_variable.h"
-#include "kudu/util/monotime.h"
 #include "kudu/util/mutex.h"
 #include "kudu/util/status.h"
 #include "kudu/util/thread.h"
@@ -38,28 +37,26 @@ namespace cloud {
 // machines/instances run by public cloud vendors.
 class InstanceDetector {
  public:
-  // Instantiate the detector using the specified timeout for auto-detection of
-  // the type of the cloud environment it's run at.
-  explicit InstanceDetector(MonoDelta timeout = MonoDelta::FromSeconds(1));
+  // Instantiate the detector for all known types of cloud instances.
+  InstanceDetector();
 
   // The destructor awaits for the detector threads to join (if any spawn).
   virtual ~InstanceDetector();
 
   // Perform the auto-detection of a cloud instance this object is running at.
-  // This method must not be invoked more than once.
-  // It's a synchronous call and it can take some time to complete (see
-  // the parameters of the constructor to specify timeout for the operation).
-  // On success, returns Status::OK() and provides the instance's metadata
-  // object via the 'metadata' output parameter. In case of a failure, no
-  // valid metadata is provided in the 'metadata' output parameter, and this
-  // method returns
-  //   * Status::NotFound() if the auto-detection didn't recognize any known
-  //                        instance types: the auto-detection was run in a
-  //                        non-cloud environment (most likely) or at a VM
-  //                        of unknown/not-yet-supported cloud type
-  //   * Status::TimedOut() if the specified auto-detection timeout was too
-  //                        short to identify at least one known cloud type;
-  //                        the auto-detection results should not be trusted
+  // This method must not be invoked more than once. It's a synchronous call
+  // and it can take some time to complete. On success, returns Status::OK()
+  // and provides the instance's metadata object via the 'metadata' output
+  // parameter. In case of a failure, no valid metadata is provided in the
+  // 'metadata' output parameter, and this method returns
+  //
+  //   * Status::NotFound()   if the auto-detection didn't recognize any known
+  //                          instance types: most likely, the auto-detection
+  //                          was run in a non-cloud environment or at a VM of
+  //                          unknown/not-yet-supported cloud type
+  //
+  //   * other non-OK status  an unexpected error happened while trying to
+  //                          determine the type of the environment
   //
   // TODO(aserbin): do we need async version of this method?
   Status Detect(std::unique_ptr<InstanceMetadata>* metadata) 
WARN_UNUSED_RESULT;
@@ -71,8 +68,6 @@ class InstanceDetector {
   // specified index 'idx' in 'result_detector_idx_' field.
   void GetInstanceInfo(InstanceMetadata* imd, size_t idx);
 
-  const MonoDelta timeout_;
-
   // Mutex and associated condition variable.
   Mutex mutex_;
   ConditionVariable cv_;
diff --git a/src/kudu/util/cloud/instance_metadata.cc 
b/src/kudu/util/cloud/instance_metadata.cc
index b895181..b47fe3f 100644
--- a/src/kudu/util/cloud/instance_metadata.cc
+++ b/src/kudu/util/cloud/instance_metadata.cc
@@ -26,6 +26,7 @@
 #include <glog/logging.h>
 
 #include "kudu/gutil/macros.h"
+#include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/curl_util.h"
 #include "kudu/util/faststring.h"
 #include "kudu/util/flag_tags.h"
@@ -34,9 +35,9 @@
 
 // The timeout should be high enough to work effectively, but as low as 
possible
 // to avoid slowing down detection of running in non-cloud environments. As of
-// now, the metadata servers of major public cloud providers is robust enough
+// now, the metadata servers of major public cloud providers are robust enough
 // to send the response in a fraction of a second.
-DEFINE_uint32(cloud_metadata_server_request_timeout_ms, 500,
+DEFINE_uint32(cloud_metadata_server_request_timeout_ms, 1000,
               "Timeout for HTTP/HTTPS requests to the instance metadata server 
"
               "(in milliseconds)");
 TAG_FLAG(cloud_metadata_server_request_timeout_ms, advanced);
@@ -90,6 +91,17 @@ DEFINE_string(cloud_gce_instance_id_url,
 TAG_FLAG(cloud_gce_instance_id_url, advanced);
 TAG_FLAG(cloud_gce_instance_id_url, runtime);
 
+DEFINE_validator(cloud_metadata_server_request_timeout_ms,
+                 [](const char* name, const uint32_t val) {
+  if (val == 0) {
+    LOG(ERROR) << strings::Substitute(
+        "unlimited timeout for metadata requests (value $0 for flag --$1) "
+        "is not allowed", val, name);
+    return false;
+  }
+  return true;
+});
+
 using std::string;
 using std::vector;
 
@@ -113,44 +125,43 @@ const char* TypeToString(CloudType type) {
   }
 }
 
-InstanceMetadataBase::InstanceMetadataBase()
+InstanceMetadata::InstanceMetadata()
     : is_initialized_(false) {
 }
 
-Status InstanceMetadataBase::Init() {
+Status InstanceMetadata::Init() {
   // As of now, fetching the instance identifier from metadata service is
   // the criterion for successful initialization of the instance metadata.
   DCHECK(!is_initialized_);
-  RETURN_NOT_OK(FetchInstanceId(&id_));
-  DCHECK(!id_.empty());
+  RETURN_NOT_OK(FetchInstanceId(nullptr));
   is_initialized_ = true;
   return Status::OK();
 }
 
-const string& InstanceMetadataBase::id() const {
-  CHECK(is_initialized_);
-  return id_;
-}
-
-MonoDelta InstanceMetadataBase::request_timeout() const {
+MonoDelta InstanceMetadata::request_timeout() const {
   return MonoDelta::FromMilliseconds(
       FLAGS_cloud_metadata_server_request_timeout_ms);
 }
 
-Status InstanceMetadataBase::Fetch(const string& url,
-                                   MonoDelta timeout,
-                                   const vector<string>& headers,
-                                   string* out) {
-  DCHECK(out);
+Status InstanceMetadata::Fetch(const string& url,
+                               MonoDelta timeout,
+                               const vector<string>& headers,
+                               string* out) {
+  if (timeout.ToMilliseconds() == 0) {
+    return Status::NotSupported(
+        "unlimited timeout is not supported when retrieving instance 
metadata");
+  }
   EasyCurl curl;
   curl.set_timeout(timeout);
   faststring resp;
   RETURN_NOT_OK(curl.FetchURL(url, &resp, headers));
-  *out = resp.ToString();
+  if (out) {
+    *out = resp.ToString();
+  }
   return Status::OK();
 }
 
-Status InstanceMetadataBase::FetchInstanceId(string* id) {
+Status InstanceMetadata::FetchInstanceId(string* id) {
   return Fetch(instance_id_url(), request_timeout(), request_headers(), id);
 }
 
diff --git a/src/kudu/util/cloud/instance_metadata.h 
b/src/kudu/util/cloud/instance_metadata.h
index 55a4300..213f11a 100644
--- a/src/kudu/util/cloud/instance_metadata.h
+++ b/src/kudu/util/cloud/instance_metadata.h
@@ -38,11 +38,16 @@ enum class CloudType {
 
 const char* TypeToString(CloudType type);
 
-// Generic interface to collect and access metadata of a public cloud instance.
+// A class to collect metadata of a public cloud instance. It includes a 
generic
+// interface and common base stuff to work with HTTP-based metadata server.
+// That's the ubiquitous way of accessing metadata from within a cloud instance
+// such as AWS, GCE, DigitalOcean.
+//
 // Concrete classes implementing this interface use stable APIs to retrieve
 // corresponding information (published by corresponding cloud providers).
 class InstanceMetadata {
  public:
+  InstanceMetadata();
   virtual ~InstanceMetadata() {}
 
   // Initialize the object, collecting information about a cloud instance.
@@ -50,14 +55,11 @@ class InstanceMetadata {
   // If the basic information has been retrieved successfully, returns
   // Status::OK(), otherwise returns non-OK status to reflect the error
   // encountered.
-  virtual Status Init() WARN_UNUSED_RESULT = 0;
+  virtual Status Init() WARN_UNUSED_RESULT;
 
   // Get the type of the cloud instance.
   virtual CloudType type() const = 0;
 
-  // Get identifier of the cloud instance.
-  virtual const std::string& id() const = 0;
-
   // Get the internal NTP server accessible from within the instance.
   // On success, returns Status::OK() and populates the output parameter
   // 'server' with IP address or FQDN of the NTP server available from within
@@ -67,22 +69,11 @@ class InstanceMetadata {
   //   * Status::IllegalState() if the metadata object requires initialization,
   //                            but it hasn't been initialized yet
   virtual Status GetNtpServer(std::string* server) const WARN_UNUSED_RESULT = 
0;
-};
-
-// The common base class to work with the instance's metadata using the 
metadata
-// server HTTP-based API. That's the ubiquitous way of accessing metadata from
-// within a cloud instance (e.g., exists in AWS, GCE, DigitalOcean).
-class InstanceMetadataBase : public InstanceMetadata {
- public:
-  InstanceMetadataBase();
-  ~InstanceMetadataBase() = default;
-
-  Status Init() override WARN_UNUSED_RESULT;
-  const std::string& id() const override;
 
  protected:
-  // Fetch data from specified URL. Targeted for fetching information from
-  // the instance's metadata server.
+  // Fetch data from specified URL and output into the 'out' parameter. This
+  // method is targeted for fetching information from the instance's metadata
+  // server. The 'out' output parameter can be null.
   static Status Fetch(const std::string& url,
                       MonoDelta timeout,
                       const std::vector<std::string>& headers,
@@ -108,9 +99,6 @@ class InstanceMetadataBase : public InstanceMetadata {
   // the identifier of the instance. Returns non-OK status in case of errors.
   Status FetchInstanceId(std::string* id);
 
-  // Instance identifier; valid only after successful initialization.
-  std::string id_;
-
   // Whether this object has been initialized.
   bool is_initialized_;
 };
@@ -118,7 +106,7 @@ class InstanceMetadataBase : public InstanceMetadata {
 // More information on the metadata server for EC2 cloud instances:
 //   https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ \
 //     ec2-instance-metadata.html
-class AwsInstanceMetadata : public InstanceMetadataBase {
+class AwsInstanceMetadata : public InstanceMetadata {
  public:
   AwsInstanceMetadata() = default;
   ~AwsInstanceMetadata() = default;
@@ -134,7 +122,7 @@ class AwsInstanceMetadata : public InstanceMetadataBase {
 // More information on the metadata server for Azure cloud instances:
 //   https://docs.microsoft.com/en-us/azure/virtual-machines/linux/ \
 //     instance-metadata-service
-class AzureInstanceMetadata : public InstanceMetadataBase {
+class AzureInstanceMetadata : public InstanceMetadata {
  public:
   AzureInstanceMetadata() = default;
   ~AzureInstanceMetadata() = default;
@@ -149,7 +137,7 @@ class AzureInstanceMetadata : public InstanceMetadataBase {
 
 // More information on the metadata server for GCE cloud instances:
 //   https://cloud.google.com/compute/docs/storing-retrieving-metadata
-class GceInstanceMetadata : public InstanceMetadataBase {
+class GceInstanceMetadata : public InstanceMetadata {
  public:
   GceInstanceMetadata() = default;
   ~GceInstanceMetadata() = default;

Reply via email to