This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 53533bbc88183710329285789db5572e173a9abb
Author: Alexey Serbin <[email protected]>
AuthorDate: Tue Nov 26 18:33:50 2019 -0800

    [utility] auto-detection of cloud VM instance type
    
    Added a utility to auto-detect the type of VM instance for
    AWS, Azure and GCE public clouds.
    
    Follow-up changelists will make use of this functionality if
    auto-configuring the built-in NTP client upon startup of kudu-master
    and kudu-tserver.
    
    Change-Id: I083f4803decaef5d6e7d44184bbd98b074d2578b
    Reviewed-on: http://gerrit.cloudera.org:8080/14866
    Reviewed-by: Adar Dembo <[email protected]>
    Tested-by: Kudu Jenkins
---
 src/kudu/util/CMakeLists.txt                  |  42 ++++--
 src/kudu/util/cloud/instance_detector-test.cc |  97 ++++++++++++
 src/kudu/util/cloud/instance_detector.cc      | 127 ++++++++++++++++
 src/kudu/util/cloud/instance_detector.h       |  95 ++++++++++++
 src/kudu/util/cloud/instance_metadata.cc      | 205 ++++++++++++++++++++++++++
 src/kudu/util/cloud/instance_metadata.h       | 166 +++++++++++++++++++++
 6 files changed, 719 insertions(+), 13 deletions(-)

diff --git a/src/kudu/util/CMakeLists.txt b/src/kudu/util/CMakeLists.txt
index f92af7b..4c576bb 100644
--- a/src/kudu/util/CMakeLists.txt
+++ b/src/kudu/util/CMakeLists.txt
@@ -322,6 +322,26 @@ ADD_EXPORTABLE_LIBRARY(kudu_util_compression
 target_compile_definitions(kudu_util_compression PUBLIC 
LZ4_DISABLE_DEPRECATE_WARNINGS)
 target_compile_definitions(kudu_util_compression_exported PUBLIC 
LZ4_DISABLE_DEPRECATE_WARNINGS)
 
+#######################################
+# kudu_curl_util
+#######################################
+add_library(kudu_curl_util
+  curl_util.cc)
+target_link_libraries(kudu_curl_util
+  security
+  ${CURL_LIBRARIES}
+  glog
+  gutil)
+
+#######################################
+# kudu_cloud_util
+#######################################
+add_library(kudu_cloud_util
+  cloud/instance_detector.cc
+  cloud/instance_metadata.cc)
+target_link_libraries(kudu_cloud_util
+  kudu_curl_util)
+
 # See the comment in sanitizer_options.cc for details on this library's usage.
 # The top-level CMakeLists sets a ${SANITIZER_OPTIONS_OVERRIDE} variable which
 # should be linked first into all Kudu binaries.
@@ -359,19 +379,6 @@ target_link_libraries(kudu_test_util
   kudu_util)
 
 #######################################
-# kudu_curl_util
-#######################################
-if(NOT NO_TESTS)
-  add_library(kudu_curl_util
-    curl_util.cc)
-  target_link_libraries(kudu_curl_util
-    security
-    ${CURL_LIBRARIES}
-    glog
-    gutil)
-endif()
-
-#######################################
 # kudu_test_main
 #######################################
 if(NOT NO_TESTS)
@@ -563,3 +570,12 @@ if(NOT NO_TESTS)
   target_link_libraries(curl_util-test
     kudu_curl_util)
 endif()
+
+#######################################
+# instance_detector-test
+#######################################
+ADD_KUDU_TEST(cloud/instance_detector-test)
+if(NOT NO_TESTS)
+  target_link_libraries(instance_detector-test
+    kudu_cloud_util)
+endif()
diff --git a/src/kudu/util/cloud/instance_detector-test.cc 
b/src/kudu/util/cloud/instance_detector-test.cc
new file mode 100644
index 0000000..6f3b2fa
--- /dev/null
+++ b/src/kudu/util/cloud/instance_detector-test.cc
@@ -0,0 +1,97 @@
+// Some portions Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "kudu/util/cloud/instance_detector.h"
+
+#include <initializer_list>
+#include <memory>
+#include <ostream>
+#include <string>
+
+#include <gflags/gflags_declare.h>
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/cloud/instance_metadata.h"
+#include "kudu/util/monotime.h"
+#include "kudu/util/status.h"
+
+DECLARE_uint32(cloud_metadata_server_request_timeout_ms);
+
+using std::string;
+using std::unique_ptr;
+using strings::Substitute;
+
+namespace kudu {
+namespace cloud {
+
+// Test the destruction of the object if not running the auto-detection.
+TEST(InstanceDetectorTest, NoDetectionRun) {
+  InstanceDetector detector;
+}
+
+// Basic scenario to verify the functionality of the InstanceDetector::Detect()
+// method.
+TEST(InstanceDetectorTest, Basic) {
+#ifdef THREAD_SANITIZER
+  // In case of TSAN builds, it takes longer to spawn threads and overall work
+  // with the sanitized version of libcurl while having a lot of concurrent
+  // activity. It doesn't make the metadata server taking more time to respond,
+  // but it takes longer to process a response: it's often turns into a 
time-out
+  // error even if the server responds in a timely manner.
+  FLAGS_cloud_metadata_server_request_timeout_ms = 10000;
+#endif
+  InstanceDetector detector(MonoDelta::FromMilliseconds(
+      FLAGS_cloud_metadata_server_request_timeout_ms));
+  unique_ptr<InstanceMetadata> metadata;
+  const auto s = detector.Detect(&metadata);
+
+  const auto s_aws = AwsInstanceMetadata().Init();
+  const auto s_azure = AzureInstanceMetadata().Init();
+  const auto s_gce = GceInstanceMetadata().Init();
+
+  if (s_aws.ok() || s_azure.ok() || s_gce.ok()) {
+    ASSERT_TRUE(s.ok()) << s.ToString();
+    ASSERT_NE(nullptr, metadata.get());
+    LOG(INFO) << Substitute("detected $0 environment, VM id: $1",
+                            TypeToString(metadata->type()), metadata->id());
+  } else {
+    ASSERT_TRUE(s.IsNotFound()) << s.ToString();
+    ASSERT_EQ(nullptr, metadata.get());
+    LOG(INFO) << "detected non-cloud environment";
+  }
+
+  if (s_aws.ok()) {
+    ASSERT_FALSE(s_azure.ok());
+    ASSERT_FALSE(s_gce.ok());
+    ASSERT_EQ(CloudType::AWS, metadata->type());
+  } else if (s_azure.ok()) {
+    ASSERT_FALSE(s_aws.ok());
+    ASSERT_FALSE(s_gce.ok());
+    ASSERT_EQ(CloudType::AZURE, metadata->type());
+  } else if (s_gce.ok()) {
+    ASSERT_FALSE(s_aws.ok());
+    ASSERT_FALSE(s_azure.ok());
+    ASSERT_EQ(CloudType::GCE, metadata->type());
+  }
+}
+
+// If the timeout for auto-detection is set too low, the detector should return
+// Status::TimedOut().
+TEST(InstanceDetectorTest, Timeout) {
+  // Try both no-time and very short interval.
+  for (const auto& interval : { MonoDelta::FromNanoseconds(0),
+                                MonoDelta::FromNanoseconds(1), } ) {
+    SCOPED_TRACE(Substitute("timeout interval '$0'", interval.ToString()));
+    InstanceDetector detector(interval);
+    unique_ptr<InstanceMetadata> metadata;
+    const auto s = detector.Detect(&metadata);
+    ASSERT_TRUE(s.IsTimedOut()) << s.ToString();
+    ASSERT_EQ(nullptr, metadata.get());
+  }
+}
+
+}  // namespace cloud
+}  // namespace kudu
diff --git a/src/kudu/util/cloud/instance_detector.cc 
b/src/kudu/util/cloud/instance_detector.cc
new file mode 100644
index 0000000..2142bd3
--- /dev/null
+++ b/src/kudu/util/cloud/instance_detector.cc
@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/util/cloud/instance_detector.h"
+
+#include <algorithm>
+#include <limits>
+#include <ostream>
+#include <string>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/cloud/instance_metadata.h"
+#include "kudu/util/monotime.h"
+#include "kudu/util/thread.h"
+
+using std::unique_ptr;
+using strings::Substitute;
+
+namespace kudu {
+namespace cloud {
+
+const size_t InstanceDetector::kNoIdx = std::numeric_limits<size_t>::max();
+
+InstanceDetector::InstanceDetector(MonoDelta timeout)
+    : timeout_(timeout),
+      cv_(&mutex_),
+      num_running_detectors_(0),
+      result_detector_idx_(kNoIdx) {
+  detectors_.push_back(
+    { unique_ptr<InstanceMetadata>(new AwsInstanceMetadata), nullptr });
+  detectors_.push_back(
+    { unique_ptr<InstanceMetadata>(new AzureInstanceMetadata), nullptr });
+  detectors_.push_back(
+    { unique_ptr<InstanceMetadata>(new GceInstanceMetadata), nullptr });
+}
+
+InstanceDetector::~InstanceDetector() {
+  for (auto& d : detectors_) {
+    if (d.runner) {
+      CHECK_OK(ThreadJoiner(d.runner.get()).Join());
+    }
+  }
+}
+
+Status InstanceDetector::Detect(unique_ptr<InstanceMetadata>* metadata) {
+  const auto deadline = MonoTime::Now() + timeout_;
+  {
+    // An extra sanity check.
+    MutexLock lock(mutex_);
+    CHECK_EQ(0, num_running_detectors_);
+    CHECK_EQ(kNoIdx, result_detector_idx_);
+    num_running_detectors_ = detectors_.size();
+  }
+
+  // Spawn multiple threads: one thread per known type of cloud instance.
+  for (auto idx = 0; idx < detectors_.size(); ++idx) {
+    auto& d = detectors_[idx];
+    CHECK(d.metadata);
+    CHECK(!d.runner);
+    scoped_refptr<Thread> runner;
+    RETURN_NOT_OK(Thread::Create("cloud detector", 
TypeToString(d.metadata->type()),
+        &InstanceDetector::GetInstanceInfo, this, d.metadata.get(), idx, 
&runner));
+    d.runner = std::move(runner);
+  }
+
+  // A cloud instance cannot be of many types: the first detector to update
+  // the 'result_detector_idx_' field wins the race and breaks the loop.
+  // Spurious wakeups are ignored by the virtue of checking the value of the
+  // 'result_detector_idx_' field.
+  {
+    MutexLock lock(mutex_);
+    while (result_detector_idx_ == kNoIdx && num_running_detectors_ > 0) {
+      if (!cv_.WaitUntil(deadline)) {
+        break;
+      }
+    }
+    if (deadline < MonoTime::Now()) {
+      return Status::TimedOut(
+          "could not retrieve instance metadata before the deadline");
+    }
+    if (result_detector_idx_ != kNoIdx) {
+      CHECK_LT(result_detector_idx_, detectors_.size());
+      *metadata = std::move(detectors_[result_detector_idx_].metadata);
+      return Status::OK();
+    }
+  }
+
+  return Status::NotFound("could not retrieve instance metadata");
+}
+
+void InstanceDetector::GetInstanceInfo(InstanceMetadata* imd, size_t idx) {
+  DCHECK(imd);
+  const auto s = imd->Init();
+  {
+    MutexLock lock(mutex_);
+    --num_running_detectors_;
+    if (s.ok()) {
+      CHECK_EQ(kNoIdx, result_detector_idx_)
+          << "conflicting cloud instance types";
+      result_detector_idx_ = idx;
+    }
+  }
+  cv_.Signal();
+  if (!s.ok()) {
+    LOG(WARNING) << Substitute("could not retrieve $0 instance metadata: $1",
+                               TypeToString(imd->type()), s.ToString());
+  }
+}
+
+} // namespace cloud
+} // namespace kudu
diff --git a/src/kudu/util/cloud/instance_detector.h 
b/src/kudu/util/cloud/instance_detector.h
new file mode 100644
index 0000000..d5a38ac
--- /dev/null
+++ b/src/kudu/util/cloud/instance_detector.h
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include "kudu/gutil/port.h"
+#include "kudu/gutil/ref_counted.h"
+#include "kudu/util/cloud/instance_metadata.h"
+#include "kudu/util/condition_variable.h"
+#include "kudu/util/monotime.h"
+#include "kudu/util/mutex.h"
+#include "kudu/util/status.h"
+#include "kudu/util/thread.h"
+
+namespace kudu {
+
+namespace cloud {
+
+// Cloud instance detector. Provides an interface to retieve metadata about
+// machines/instances run by public cloud vendors.
+class InstanceDetector {
+ public:
+  // Instantiate the detector using the specified timeout for auto-detection of
+  // the type of the cloud environment it's run at.
+  explicit InstanceDetector(MonoDelta timeout = MonoDelta::FromSeconds(1));
+
+  // The destructor awaits for the detector threads to join (if any spawn).
+  virtual ~InstanceDetector();
+
+  // Perform the auto-detection of a cloud instance this object is running at.
+  // This method must not be invoked more than once.
+  // It's a synchronous call and it can take some time to complete (see
+  // the parameters of the constructor to specify timeout for the operation).
+  // On success, returns Status::OK() and provides the instance's metadata
+  // object via the 'metadata' output parameter. In case of a failure, no
+  // valid metadata is provided in the 'metadata' output parameter, and this
+  // method returns
+  //   * Status::NotFound() if the auto-detection didn't recognize any known
+  //                        instance types: the auto-detection was run in a
+  //                        non-cloud environment (most likely) or at a VM
+  //                        of unknown/not-yet-supported cloud type
+  //   * Status::TimedOut() if the specified auto-detection timeout was too
+  //                        short to identify at least one known cloud type;
+  //                        the auto-detection results should not be trusted
+  //
+  // TODO(aserbin): do we need async version of this method?
+  Status Detect(std::unique_ptr<InstanceMetadata>* metadata) 
WARN_UNUSED_RESULT;
+
+ private:
+  static const size_t kNoIdx;
+
+  // Get metadata for the specified instance. In case of success, store the
+  // specified index 'idx' in 'result_detector_idx_' field.
+  void GetInstanceInfo(InstanceMetadata* imd, size_t idx);
+
+  const MonoDelta timeout_;
+
+  // Mutex and associated condition variable.
+  Mutex mutex_;
+  ConditionVariable cv_;
+
+  struct DetectorInfo {
+    std::unique_ptr<InstanceMetadata> metadata;
+    scoped_refptr<Thread> runner;
+  };
+  std::vector<DetectorInfo> detectors_;
+
+  // Number of detector threads starting/running.
+  size_t num_running_detectors_;
+
+  // The index of the matched detector thread in the detectors_ container;
+  // access to the field is guarded by the 'mutex_'.
+  size_t result_detector_idx_;
+};
+
+} // namespace cloud
+} // namespace kudu
diff --git a/src/kudu/util/cloud/instance_metadata.cc 
b/src/kudu/util/cloud/instance_metadata.cc
new file mode 100644
index 0000000..b895181
--- /dev/null
+++ b/src/kudu/util/cloud/instance_metadata.cc
@@ -0,0 +1,205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/util/cloud/instance_metadata.h"
+
+#include <cstdint>
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+
+#include "kudu/gutil/macros.h"
+#include "kudu/util/curl_util.h"
+#include "kudu/util/faststring.h"
+#include "kudu/util/flag_tags.h"
+#include "kudu/util/monotime.h"
+#include "kudu/util/status.h"
+
+// The timeout should be high enough to work effectively, but as low as 
possible
+// to avoid slowing down detection of running in non-cloud environments. As of
+// now, the metadata servers of major public cloud providers is robust enough
+// to send the response in a fraction of a second.
+DEFINE_uint32(cloud_metadata_server_request_timeout_ms, 500,
+              "Timeout for HTTP/HTTPS requests to the instance metadata server 
"
+              "(in milliseconds)");
+TAG_FLAG(cloud_metadata_server_request_timeout_ms, advanced);
+TAG_FLAG(cloud_metadata_server_request_timeout_ms, runtime);
+
+// The flags below are very unlikely to be customized since they are a part
+// of the public API provided by the cloud providers. They are here for
+// the peace of mind to be able to adapt for the changes in the cloud
+// environment without the need to recompile the binaries.
+
+// See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/\
+//   ec2-instance-metadata.html#instancedata-data-retrieval
+// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/\
+//   ec2-instance-metadata.html#instancedata-data-categories
+DEFINE_string(cloud_aws_instance_id_url,
+              "http://169.254.169.254/latest/meta-data/instance-id";,
+              "The URL to fetch the identifier of an AWS instance");
+TAG_FLAG(cloud_aws_instance_id_url, advanced);
+TAG_FLAG(cloud_aws_instance_id_url, runtime);
+
+// See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/set-time.html
+// for details.
+DEFINE_string(cloud_aws_ntp_server, "169.254.169.123",
+              "IP address/FQDN of the internal NTP server to use from within "
+              "an AWS instance");
+TAG_FLAG(cloud_aws_ntp_server, advanced);
+TAG_FLAG(cloud_aws_ntp_server, runtime);
+
+// See https://docs.microsoft.com/en-us/azure/virtual-machines/linux/ \
+//   instance-metadata-service#retrieving-metadata for details.
+DEFINE_string(cloud_azure_instance_id_url,
+              "http://169.254.169.254/metadata/instance/compute/vmId?";
+              "api-version=2018-10-01&format=text",
+              "The URL to fetch the identifier of an Azure instance");
+TAG_FLAG(cloud_azure_instance_id_url, advanced);
+TAG_FLAG(cloud_azure_instance_id_url, runtime);
+
+// See https://cloud.google.com/compute/docs/instances/managing-instances# \
+//   configure_ntp_for_your_instances for details.
+DEFINE_string(cloud_gce_ntp_server, "metadata.google.internal",
+              "IP address/FQDN of the internal NTP server to use from within "
+              "a GCE instance");
+TAG_FLAG(cloud_gce_ntp_server, advanced);
+TAG_FLAG(cloud_gce_ntp_server, runtime);
+
+// See https://cloud.google.com/compute/docs/storing-retrieving-metadata
+// for details.
+DEFINE_string(cloud_gce_instance_id_url,
+              "http://metadata.google.internal/computeMetadata/v1/instance/id";,
+              "The URL to fetch the identifier of a GCE instance");
+TAG_FLAG(cloud_gce_instance_id_url, advanced);
+TAG_FLAG(cloud_gce_instance_id_url, runtime);
+
+using std::string;
+using std::vector;
+
+namespace kudu {
+namespace cloud {
+
+const char* TypeToString(CloudType type) {
+  static const char* const kTypeAws = "AWS";
+  static const char* const kTypeAzure = "Azure";
+  static const char* const kTypeGce = "GCE";
+  switch (type) {
+    case CloudType::AWS:
+      return kTypeAws;
+    case CloudType::AZURE:
+      return kTypeAzure;
+    case CloudType::GCE:
+      return kTypeGce;
+    default:
+      LOG(FATAL) << static_cast<uint16_t>(type) << ": unknown cloud type";
+      break;  // unreachable
+  }
+}
+
+InstanceMetadataBase::InstanceMetadataBase()
+    : is_initialized_(false) {
+}
+
+Status InstanceMetadataBase::Init() {
+  // As of now, fetching the instance identifier from metadata service is
+  // the criterion for successful initialization of the instance metadata.
+  DCHECK(!is_initialized_);
+  RETURN_NOT_OK(FetchInstanceId(&id_));
+  DCHECK(!id_.empty());
+  is_initialized_ = true;
+  return Status::OK();
+}
+
+const string& InstanceMetadataBase::id() const {
+  CHECK(is_initialized_);
+  return id_;
+}
+
+MonoDelta InstanceMetadataBase::request_timeout() const {
+  return MonoDelta::FromMilliseconds(
+      FLAGS_cloud_metadata_server_request_timeout_ms);
+}
+
+Status InstanceMetadataBase::Fetch(const string& url,
+                                   MonoDelta timeout,
+                                   const vector<string>& headers,
+                                   string* out) {
+  DCHECK(out);
+  EasyCurl curl;
+  curl.set_timeout(timeout);
+  faststring resp;
+  RETURN_NOT_OK(curl.FetchURL(url, &resp, headers));
+  *out = resp.ToString();
+  return Status::OK();
+}
+
+Status InstanceMetadataBase::FetchInstanceId(string* id) {
+  return Fetch(instance_id_url(), request_timeout(), request_headers(), id);
+}
+
+Status AwsInstanceMetadata::GetNtpServer(string* server) const {
+  DCHECK(server);
+  *server = FLAGS_cloud_aws_ntp_server;
+  return Status::OK();
+}
+
+const vector<string>& AwsInstanceMetadata::request_headers() const {
+  // EC2 doesn't require any specific headers supplied with a generic query
+  // to the metadata server.
+  static const vector<string> kRequestHeaders = {};
+  return kRequestHeaders;
+}
+
+const string& AwsInstanceMetadata::instance_id_url() const {
+  return FLAGS_cloud_aws_instance_id_url;
+}
+
+Status AzureInstanceMetadata::GetNtpServer(string* /* server */) const {
+  // An Azure instance doesn't have access to dedicated NTP servers: Azure
+  // doesn't provide such a service.
+  return Status::NotSupported("Azure doesn't provide a dedicated NTP server");
+}
+
+const vector<string>& AzureInstanceMetadata::request_headers() const {
+  static const vector<string> kRequestHeaders = { "Metadata:true" };
+  return kRequestHeaders;
+}
+
+const string& AzureInstanceMetadata::instance_id_url() const {
+  return FLAGS_cloud_azure_instance_id_url;
+}
+
+Status GceInstanceMetadata::GetNtpServer(string* server) const {
+  DCHECK(server);
+  *server = FLAGS_cloud_gce_ntp_server;
+  return Status::OK();
+}
+
+const vector<string>& GceInstanceMetadata::request_headers() const {
+  static const vector<string> kHeaders = { "Metadata-Flavor:Google" };
+  return kHeaders;
+}
+
+const string& GceInstanceMetadata::instance_id_url() const {
+  return FLAGS_cloud_gce_instance_id_url;
+}
+
+} // namespace cloud
+} // namespace kudu
diff --git a/src/kudu/util/cloud/instance_metadata.h 
b/src/kudu/util/cloud/instance_metadata.h
new file mode 100644
index 0000000..55a4300
--- /dev/null
+++ b/src/kudu/util/cloud/instance_metadata.h
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "kudu/gutil/port.h"
+#include "kudu/util/status.h"
+
+namespace kudu {
+class MonoDelta;
+}  // namespace kudu
+
+namespace kudu {
+namespace cloud {
+
+enum class CloudType {
+  AWS,
+  AZURE,
+  GCE,
+};
+
+const char* TypeToString(CloudType type);
+
+// Generic interface to collect and access metadata of a public cloud instance.
+// Concrete classes implementing this interface use stable APIs to retrieve
+// corresponding information (published by corresponding cloud providers).
+class InstanceMetadata {
+ public:
+  virtual ~InstanceMetadata() {}
+
+  // Initialize the object, collecting information about a cloud instance.
+  // It's a synchronous call and it can take some time to complete.
+  // If the basic information has been retrieved successfully, returns
+  // Status::OK(), otherwise returns non-OK status to reflect the error
+  // encountered.
+  virtual Status Init() WARN_UNUSED_RESULT = 0;
+
+  // Get the type of the cloud instance.
+  virtual CloudType type() const = 0;
+
+  // Get identifier of the cloud instance.
+  virtual const std::string& id() const = 0;
+
+  // Get the internal NTP server accessible from within the instance.
+  // On success, returns Status::OK() and populates the output parameter
+  // 'server' with IP address or FQDN of the NTP server available from within
+  // the instance. Returns
+  //   * Status::NotSupported() if the cloud platform doesn't provide internal
+  //                            NTP service for its instances
+  //   * Status::IllegalState() if the metadata object requires initialization,
+  //                            but it hasn't been initialized yet
+  virtual Status GetNtpServer(std::string* server) const WARN_UNUSED_RESULT = 
0;
+};
+
+// The common base class to work with the instance's metadata using the 
metadata
+// server HTTP-based API. That's the ubiquitous way of accessing metadata from
+// within a cloud instance (e.g., exists in AWS, GCE, DigitalOcean).
+class InstanceMetadataBase : public InstanceMetadata {
+ public:
+  InstanceMetadataBase();
+  ~InstanceMetadataBase() = default;
+
+  Status Init() override WARN_UNUSED_RESULT;
+  const std::string& id() const override;
+
+ protected:
+  // Fetch data from specified URL. Targeted for fetching information from
+  // the instance's metadata server.
+  static Status Fetch(const std::string& url,
+                      MonoDelta timeout,
+                      const std::vector<std::string>& headers,
+                      std::string* out);
+
+  // The timeout used for HTTP requests sent to the metadata server. The base
+  // implementation assumes the metadata server is robust enough to respond
+  // in a fraction of a second. If not, override this method accordingly.
+  virtual MonoDelta request_timeout() const;
+
+  // Return HTTP header fields to supply with requests to the metadata server.
+  // Metadata servers might have specific requirements on expected headers.
+  virtual const std::vector<std::string>& request_headers() const = 0;
+
+  // Return metadata server's URL used to retrieve instance identifier.
+  // It's assumed the server replies with plain text, where the string
+  // contains just the identifier.
+  virtual const std::string& instance_id_url() const = 0;
+
+ private:
+  // Fetch cloud instance identifier from the metadata server. Returns
+  // Status::OK() in case of success, populating the output parameter 'id' with
+  // the identifier of the instance. Returns non-OK status in case of errors.
+  Status FetchInstanceId(std::string* id);
+
+  // Instance identifier; valid only after successful initialization.
+  std::string id_;
+
+  // Whether this object has been initialized.
+  bool is_initialized_;
+};
+
+// More information on the metadata server for EC2 cloud instances:
+//   https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ \
+//     ec2-instance-metadata.html
+class AwsInstanceMetadata : public InstanceMetadataBase {
+ public:
+  AwsInstanceMetadata() = default;
+  ~AwsInstanceMetadata() = default;
+
+  CloudType type() const override { return CloudType::AWS; }
+  Status GetNtpServer(std::string* server) const override WARN_UNUSED_RESULT;
+
+ protected:
+  const std::vector<std::string>& request_headers() const override;
+  const std::string& instance_id_url() const override;
+};
+
+// More information on the metadata server for Azure cloud instances:
+//   https://docs.microsoft.com/en-us/azure/virtual-machines/linux/ \
+//     instance-metadata-service
+class AzureInstanceMetadata : public InstanceMetadataBase {
+ public:
+  AzureInstanceMetadata() = default;
+  ~AzureInstanceMetadata() = default;
+
+  CloudType type() const override { return CloudType::AZURE; }
+  Status GetNtpServer(std::string* server) const override WARN_UNUSED_RESULT;
+
+ protected:
+  const std::vector<std::string>& request_headers() const override;
+  const std::string& instance_id_url() const override;
+};
+
+// More information on the metadata server for GCE cloud instances:
+//   https://cloud.google.com/compute/docs/storing-retrieving-metadata
+class GceInstanceMetadata : public InstanceMetadataBase {
+ public:
+  GceInstanceMetadata() = default;
+  ~GceInstanceMetadata() = default;
+
+  CloudType type() const override { return CloudType::GCE; }
+  Status GetNtpServer(std::string* server) const override WARN_UNUSED_RESULT;
+
+ protected:
+  const std::vector<std::string>& request_headers() const override;
+  const std::string& instance_id_url() const override;
+};
+
+} // namespace cloud
+} // namespace kudu

Reply via email to