[kudu] 02/02: [master] cache for table locations

alexey Fri, 10 Jul 2020 14:17:33 -0700

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


commit e55ee9d38abeec9f80998c9af1c7864d5fef4e9b
Author: Alexey Serbin <[email protected]>
AuthorDate: Tue May 19 08:32:28 2020 -0700

    [master] cache for table locations
    
    This patch introduces a cache for table locations in catalog manager.
    
    When running with 48 concurrent client threads, the performance of
    CatalogManager::GetTableLocations() method improved about 100%
    when the cache is enabled.  Also, 14% improvement is observed for
    GetTableLocations RPC when running with the same number of concurrent
    client threads and cache enabled.  The test results are below.
    
    I'm planning to add these test scenarios into kudu/scripts/benchmarks.sh
    in a follow up patch.
    
    ========================================================================
    
    After this patch with 128MByte cache enabled:
    
    table_locations-itest \
      --gtest_filter=TableLocationsTest.GetTableLocationsBenchmarkFunctionCall \
      --benchmark_num_threads=48 \
      --table_locations_cache_capacity_mb=128
    
      GetTableLocations function call: 504187.6 req/sec
    
    Before this patch:
    
    table_locations-itest \
      --gtest_filter=TableLocationsTest.GetTableLocationsBenchmarkFunctionCall \
      --benchmark_num_threads=48
    
      GetTableLocations function call: 252443.4 req/sec
    
    ========================================================================
    
    After this patch with 128MByte cache enabled:
    
    table_locations-itest \
      --gtest_filter=TableLocationsTest.GetTableLocationsBenchmark \
      --rpc_num_service_threads=32 \
      --benchmark_num_threads=48 \
      --table_locations_cache_capacity_mb=128
    
      GetTableLocations RPC: 40033.4 req/sec
      Stats on GetTableLocations RPC (times in microseconds):
      Count: 200167
      Mean: 155.144
      Percentiles:
         0%  (min) = 45
        25%        = 125
        50%  (med) = 147
        75%        = 177
        95%        = 223
        99%        = 314
        99.9%      = 652
        99.99%     = 1928
        100% (max) = 5096
    
    Before this patch:
    
    table_locations-itest \
      --gtest_filter=TableLocationsTest.GetTableLocationsBenchmark \
      --rpc_num_service_threads=32 \
      --benchmark_num_threads=48
    
      GetTableLocations RPC: 34981 req/sec
      Stats on GetTableLocations RPC (times in microseconds):
      Count: 174905
      Mean: 249.308
      Percentiles:
         0%  (min) = 64
        25%        = 197
        50%  (med) = 231
        75%        = 284
        95%        = 386
        99%        = 556
        99.9%      = 980
        99.99%     = 2400
        100% (max) = 6113
    
    Change-Id: I7d2a4771ddc455d92a1da00db91c555a21151a23
    Reviewed-on: http://gerrit.cloudera.org:8080/15971
    Tested-by: Kudu Jenkins
    Reviewed-by: Andrew Wong <[email protected]>
---
 .../integration-tests/table_locations-itest.cc     | 549 ++++++++++++++++++++-
 src/kudu/master/CMakeLists.txt                     |   2 +
 src/kudu/master/catalog_manager.cc                 | 106 +++-
 src/kudu/master/catalog_manager.h                  |   6 +
 src/kudu/master/master_service.cc                  |  36 +-
 src/kudu/master/table_locations_cache.cc           | 162 ++++++
 src/kudu/master/table_locations_cache.h            | 162 ++++++
 src/kudu/master/table_locations_cache_metrics.cc   |  75 +++
 src/kudu/master/table_locations_cache_metrics.h    |  35 ++
 9 files changed, 1098 insertions(+), 35 deletions(-)

diff --git a/src/kudu/integration-tests/table_locations-itest.cc 
b/src/kudu/integration-tests/table_locations-itest.cc
index b78bbba..2014ca7 100644
--- a/src/kudu/integration-tests/table_locations-itest.cc
+++ b/src/kudu/integration-tests/table_locations-itest.cc
@@ -19,6 +19,8 @@
 #include <atomic>
 #include <cstddef>
 #include <cstdint>
+#include <functional>
+#include <initializer_list>
 #include <memory>
 #include <numeric>
 #include <ostream>
@@ -32,6 +34,8 @@
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
+#include "kudu/client/client.h"
+#include "kudu/client/schema.h"
 #include "kudu/common/common.pb.h"
 #include "kudu/common/partial_row.h"
 #include "kudu/common/row_operations.h"
@@ -40,15 +44,18 @@
 #include "kudu/common/wire_protocol.pb.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/integration-tests/cluster_itest_util.h"
 #include "kudu/integration-tests/test_workload.h"
 #include "kudu/master/catalog_manager.h"
 #include "kudu/master/master.h"
 #include "kudu/master/master.pb.h"
 #include "kudu/master/master.proxy.h"
 #include "kudu/master/mini_master.h"
+#include "kudu/mini-cluster/external_mini_cluster.h"
 #include "kudu/mini-cluster/internal_mini_cluster.h"
 #include "kudu/rpc/messenger.h"
 #include "kudu/rpc/rpc_controller.h"
+#include "kudu/tserver/mini_tablet_server.h"
 #include "kudu/util/hdr_histogram.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
@@ -59,8 +66,15 @@
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
 
+using kudu::client::KuduClient;
+using kudu::client::KuduSchema;
+using kudu::client::KuduTableAlterer;
+using kudu::client::KuduTableCreator;
+using kudu::cluster::ExternalMiniCluster;
+using kudu::cluster::ExternalMiniClusterOptions;
 using kudu::cluster::InternalMiniCluster;
 using kudu::cluster::InternalMiniClusterOptions;
+using kudu::master::ReplicaTypeFilter;
 using kudu::pb_util::SecureDebugString;
 using kudu::rpc::Messenger;
 using kudu::rpc::MessengerBuilder;
@@ -74,17 +88,31 @@ using std::unique_ptr;
 using std::vector;
 using strings::Substitute;
 
+DEFINE_int32(benchmark_runtime_secs, 5, "Number of seconds to run the 
benchmark");
+DEFINE_int32(benchmark_num_threads, 16, "Number of threads to run the 
benchmark");
+DEFINE_int32(benchmark_num_tablets, 60, "Number of tablets to create");
+
+DECLARE_double(leader_failure_max_missed_heartbeat_periods);
+DECLARE_int32(follower_unavailable_considered_failed_sec);
+DECLARE_int32(heartbeat_interval_ms);
 DECLARE_int32(max_create_tablets_per_ts);
+DECLARE_int32(raft_heartbeat_interval_ms);
 DECLARE_int32(rpc_num_service_threads);
 DECLARE_int32(rpc_service_queue_length);
 DECLARE_int32(table_locations_ttl_ms);
 DECLARE_string(location_mapping_cmd);
-METRIC_DECLARE_histogram(handler_latency_kudu_master_MasterService_GetTableLocations);
-METRIC_DECLARE_counter(rpcs_queue_overflow);
+DECLARE_uint32(table_locations_cache_capacity_mb);
 
-DEFINE_int32(benchmark_runtime_secs, 5, "Number of seconds to run the 
benchmark");
-DEFINE_int32(benchmark_num_threads, 16, "Number of threads to run the 
benchmark");
-DEFINE_int32(benchmark_num_tablets, 60, "Number of tablets to create");
+METRIC_DECLARE_entity(server);
+
+METRIC_DECLARE_counter(rpcs_queue_overflow);
+METRIC_DECLARE_counter(table_locations_cache_evictions);
+METRIC_DECLARE_counter(table_locations_cache_hits);
+METRIC_DECLARE_counter(table_locations_cache_inserts);
+METRIC_DECLARE_counter(table_locations_cache_lookups);
+METRIC_DECLARE_counter(table_locations_cache_misses);
+METRIC_DECLARE_gauge_uint64(table_locations_cache_memory_usage);
+METRIC_DECLARE_histogram(handler_latency_kudu_master_MasterService_GetTableLocations);
 
 namespace kudu {
 namespace master {
@@ -442,7 +470,9 @@ TEST_F(TableLocationsTest, GetTableLocationsBenchmark) {
   std::atomic<bool> stop { false };
   vector<thread> threads;
   threads.reserve(kNumThreads);
-  for (int i = 0; i < kNumThreads; i++) {
+  // If a thread encounters an error, the test is failed.
+  vector<uint64_t> err_counters(kNumThreads, 0);
+  for (auto i = 0; i < kNumThreads; ++i) {
     threads.emplace_back([&, i]() {
         while (!stop) {
           GetTableLocationsRequestPB req;
@@ -451,7 +481,12 @@ TEST_F(TableLocationsTest, GetTableLocationsBenchmark) {
           req.mutable_table()->set_table_name(table_name);
           req.set_max_returned_locations(1000);
           req.set_intern_ts_infos_in_response(true);
-          CHECK_OK(proxies[i]->GetTableLocations(req, &resp, &controller));
+          const auto s = proxies[i]->GetTableLocations(req, &resp, 
&controller);
+          if (!s.ok()) {
+            LOG(WARNING) << "GetTableLocations() failed: " << s.ToString();
+            ++err_counters[i];
+            break;
+          }
           CHECK_EQ(resp.tablet_locations_size(), kNumSplits + 1);
         }
       });
@@ -464,11 +499,18 @@ TEST_F(TableLocationsTest, GetTableLocationsBenchmark) {
   }
 
   const auto& ent = cluster_->mini_master()->master()->metric_entity();
+  auto counter = METRIC_rpcs_queue_overflow.Instantiate(ent);
   auto hist = 
METRIC_handler_latency_kudu_master_MasterService_GetTableLocations
       .Instantiate(ent);
 
   cluster_->Shutdown();
 
+  LOG(INFO) << "RPC queue overflows: " << counter->value();
+  const auto errors = accumulate(err_counters.begin(), err_counters.end(), 
0UL);
+  if (errors != 0) {
+    FAIL() << Substitute("detected $0 errors", errors);
+  }
+
   LOG(INFO) << Substitute(
       "GetTableLocations RPC: $0 req/sec",
       hist->histogram()->TotalCount() / kRuntime.ToSeconds());
@@ -633,6 +675,7 @@ TEST_F(RefreshTableLocationsTest, ThunderingHerd) {
 
   cluster_->Shutdown();
 
+  LOG(INFO) << "RPC queue overflows: " << counter->value();
   LOG(INFO) << Substitute(
       "GetTableLocations RPC: $0 req/sec",
       hist->histogram()->TotalCount() / kRuntime.ToSeconds());
@@ -641,8 +684,498 @@ TEST_F(RefreshTableLocationsTest, ThunderingHerd) {
   ostr << "Stats on GetTableLocations RPC (times in microseconds): ";
   hist->histogram()->DumpHumanReadable(&ostr);
   LOG(INFO) << ostr.str();
+}
 
-  LOG(INFO) << "RPC queue overflows: " << counter->value();
+class TableLocationsCacheBaseTest : public KuduTest {
+ public:
+  TableLocationsCacheBaseTest(int num_tablet_servers,
+                              int cache_capacity_mb)
+      : num_tablet_servers_(num_tablet_servers),
+        cache_capacity_mb_(cache_capacity_mb),
+        schema_(itest::SimpleIntKeyKuduSchema()) {
+  }
+
+  void SetUp() override {
+    KuduTest::SetUp();
+
+    InternalMiniClusterOptions opts;
+    opts.num_tablet_servers = num_tablet_servers_;
+
+    // Setting the cache's capacity to anything > 0 enables caching of table
+    // locations.
+    FLAGS_table_locations_cache_capacity_mb = cache_capacity_mb_;
+
+    cluster_.reset(new InternalMiniCluster(env_, opts));
+    ASSERT_OK(cluster_->Start());
+    ASSERT_OK(cluster_->CreateClient(nullptr, &client_));
+
+    unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
+    ASSERT_OK(table_creator->table_name(kTableName)
+        .schema(&schema_)
+        .set_range_partition_columns({ "key" })
+        .num_replicas(std::min<int>(3, num_tablet_servers_))
+        .add_hash_partitions({ "key" }, 5)
+        .Create());
+    metric_entity_ = cluster_->mini_master()->master()->metric_entity();
+    ASSERT_NE(nullptr, metric_entity_.get());
+  }
+
+  void TearDown() override {
+    if (cluster_) {
+      cluster_->Shutdown();
+      cluster_.reset();
+    }
+    KuduTest::TearDown();
+  }
+
+  int64_t GetCacheInserts() const {
+    return 
METRIC_table_locations_cache_inserts.Instantiate(metric_entity_)->value();
+  }
+
+  int64_t GetCacheLookups() const {
+    return 
METRIC_table_locations_cache_lookups.Instantiate(metric_entity_)->value();
+  }
+
+  int64_t GetCacheHits() const {
+    return 
METRIC_table_locations_cache_hits.Instantiate(metric_entity_)->value();
+  }
+
+  int64_t GetCacheMisses() const {
+    return 
METRIC_table_locations_cache_misses.Instantiate(metric_entity_)->value();
+  }
+
+  int64_t GetCacheEvictions() const {
+    return 
METRIC_table_locations_cache_evictions.Instantiate(metric_entity_)->value();
+  }
+
+  uint64_t GetCacheMemoryUsage() const {
+    return 
METRIC_table_locations_cache_memory_usage.Instantiate(metric_entity_, 
0)->value();
+  }
+
+  // Insert about the 'rows_num' into the table named 'table_name', returning
+  // the actual number of rows written.
+  int64_t InsertRows(const string& table_name, int64_t rows_num = 10) {
+    TestWorkload w(cluster_.get());
+    w.set_table_name(table_name);
+    w.set_schema(schema_);
+    w.set_num_read_threads(0);
+    w.set_num_write_threads(1);
+    w.set_write_batch_size(1);
+    w.set_write_pattern(TestWorkload::INSERT_SEQUENTIAL_ROWS);
+    w.set_already_present_allowed(true);
+    w.Setup();
+    w.Start();
+    while (w.rows_inserted() < rows_num) {
+      SleepFor(MonoDelta::FromMilliseconds(10));
+    }
+    w.StopAndJoin();
+
+    // Return the actual number of rows inserted.
+    return w.rows_inserted();
+  }
+
+ protected:
+  static constexpr const char* const kTableName = "table_locations_cache_test";
+  const int num_tablet_servers_;
+  const int cache_capacity_mb_;
+
+  const KuduSchema schema_;
+
+  unique_ptr<InternalMiniCluster> cluster_;
+  client::sp::shared_ptr<KuduClient> client_;
+  scoped_refptr<MetricEntity> metric_entity_;
+};
+
+class TableLocationsCacheDisabledTest : public TableLocationsCacheBaseTest {
+ public:
+  TableLocationsCacheDisabledTest()
+      : TableLocationsCacheBaseTest(1 /*num_tablet_servers*/,
+                                    0 /*cache_capacity_mb*/) {}
+};
+
+TEST_F(TableLocationsCacheDisabledTest, Basic) {
+  // Verify that by default tablet location cache is disabled.
+  google::CommandLineFlagInfo flag_info;
+  ASSERT_TRUE(google::GetCommandLineFlagInfo(
+      "table_locations_cache_capacity_mb", &flag_info));
+  ASSERT_TRUE(flag_info.is_default);
+  ASSERT_EQ("0", flag_info.default_value);
+
+  InsertRows(kTableName);
+
+  // When disabled, cache should not be used.
+  EXPECT_EQ(0, GetCacheEvictions());
+  EXPECT_EQ(0, GetCacheHits());
+  EXPECT_EQ(0, GetCacheInserts());
+  EXPECT_EQ(0, GetCacheLookups());
+  EXPECT_EQ(0, GetCacheMisses());
+  EXPECT_EQ(0, GetCacheMemoryUsage());
+}
+
+class TableLocationsCacheTest : public TableLocationsCacheBaseTest {
+ public:
+  TableLocationsCacheTest()
+      : TableLocationsCacheBaseTest(1 /*num_tablet_servers*/,
+                                    1 /*cache_capacity_mb*/) {}
+};
+
+// Verify that requests with different essential parameters produce
+// different entries in the table locations cache.
+TEST_F(TableLocationsCacheTest, DifferentRequestParameters) {
+  MessengerBuilder bld("test_builder");
+  shared_ptr<Messenger> messenger;
+  ASSERT_OK(bld.Build(&messenger));
+  const auto& addr = cluster_->mini_master()->bound_rpc_addr();
+  MasterServiceProxy proxy(messenger, addr, addr.host());
+
+  {
+    // Issue one query many times -- there should be just one record inserted.
+    const auto prev_cache_inserts = GetCacheInserts();
+    for (auto i = 0; i < 5; ++i) {
+      GetTableLocationsRequestPB req;
+      GetTableLocationsResponsePB resp;
+      req.mutable_table()->set_table_name(kTableName);
+      req.set_max_returned_locations(1);
+      RpcController ctl;
+      ASSERT_OK(proxy.GetTableLocations(req, &resp, &ctl));
+      ASSERT_TRUE(!resp.has_error());
+    }
+    ASSERT_EQ(prev_cache_inserts + 1, GetCacheInserts());
+  }
+
+  {
+    // Issue a query with different value of 'max_returned_locations'. A new
+    // entry should be added into the cache even if other parameters are the
+    // same as in requests sent prior.
+    const auto prev_cache_inserts = GetCacheInserts();
+    GetTableLocationsRequestPB req;
+    GetTableLocationsResponsePB resp;
+    req.mutable_table()->set_table_name(kTableName);
+    req.set_max_returned_locations(10);
+    RpcController ctl;
+    ASSERT_OK(proxy.GetTableLocations(req, &resp, &ctl));
+    ASSERT_TRUE(!resp.has_error());
+    ASSERT_EQ(prev_cache_inserts + 1, GetCacheInserts());
+  }
+
+  {
+    // Set 'replica_type_filter' parameter.
+    const auto prev_cache_inserts = GetCacheInserts();
+    GetTableLocationsRequestPB req;
+    GetTableLocationsResponsePB resp;
+    req.mutable_table()->set_table_name(kTableName);
+    req.set_max_returned_locations(10);
+    req.set_replica_type_filter(ReplicaTypeFilter::ANY_REPLICA);
+    RpcController ctl;
+    ASSERT_OK(proxy.GetTableLocations(req, &resp, &ctl));
+    ASSERT_TRUE(!resp.has_error());
+    ASSERT_EQ(prev_cache_inserts + 1, GetCacheInserts());
+  }
+
+  {
+    // Switch 'replica_type_filter' parameter to a different value.
+    const auto prev_cache_inserts = GetCacheInserts();
+    GetTableLocationsRequestPB req;
+    GetTableLocationsResponsePB resp;
+    req.mutable_table()->set_table_name(kTableName);
+    req.set_max_returned_locations(10);
+    req.set_replica_type_filter(ReplicaTypeFilter::VOTER_REPLICA);
+    RpcController ctl;
+    ASSERT_OK(proxy.GetTableLocations(req, &resp, &ctl));
+    ASSERT_TRUE(!resp.has_error());
+    ASSERT_EQ(prev_cache_inserts + 1, GetCacheInserts());
+  }
+}
+
+// This scenario verifies basic functionality of the table locations cache.
+TEST_F(TableLocationsCacheTest, Basic) {
+  SKIP_IF_SLOW_NOT_ALLOWED();
+
+  int64_t prev_cache_hits = 0;
+  int64_t prev_cache_inserts = 0;
+  int64_t prev_cache_lookups = 0;
+  int64_t prev_cache_misses = 0;
+  int64_t prev_cache_evictions = 0;
+
+  {
+    // Run a workload to prime the table locations cache.
+    InsertRows(kTableName);
+    const auto cache_lookups = GetCacheLookups();
+    EXPECT_GT(cache_lookups, 0);
+    const auto cache_misses = GetCacheMisses();
+    const auto cache_inserts = GetCacheInserts();
+    EXPECT_EQ(cache_misses, cache_inserts);
+    const auto cache_hits = GetCacheHits();
+    EXPECT_EQ(cache_lookups - cache_misses, cache_hits);
+
+    // The cache usage should be non-zero since some entries are present.
+    EXPECT_GT(GetCacheMemoryUsage(), 0);
+
+    // Store the counters for the next round.
+    prev_cache_hits = cache_hits;
+    prev_cache_inserts = cache_inserts;
+    prev_cache_lookups = cache_lookups;
+    prev_cache_misses = cache_misses;
+    prev_cache_evictions = GetCacheEvictions();
+  }
+
+  {
+    // The cache has been primed -- now run another workload.
+    InsertRows(kTableName);
+
+    // No new cache inserts expected.
+    EXPECT_EQ(prev_cache_inserts, GetCacheInserts());
+    // No new cache misses expected.
+    EXPECT_EQ(prev_cache_misses, GetCacheMisses());
+
+    // There should be new cache lookups.
+    const auto cache_lookups = GetCacheLookups();
+    EXPECT_LT(prev_cache_lookups, cache_lookups);
+
+    // There should be new cache hits.
+    const auto cache_hits = GetCacheHits();
+    EXPECT_LT(prev_cache_hits, cache_hits);
+
+    // All new lookups should hit the cache.
+    EXPECT_EQ(cache_lookups - prev_cache_lookups, cache_hits - 
prev_cache_hits);
+  }
+
+  {
+    // Alter the test table and make sure the information isn't purged from the
+    // table locations cache since the table locations haven't changed.
+    const string new_table_name = string(kTableName) + "_renamed";
+
+    unique_ptr<KuduTableAlterer> a0(client_->NewTableAlterer(kTableName));
+    auto s = a0->RenameTo(new_table_name)->Alter();
+    ASSERT_OK(s);
+
+    // Wait for the master to get notified on the change.
+    SleepFor(MonoDelta::FromMilliseconds(3 * FLAGS_heartbeat_interval_ms));
+
+    // No new cache evictions are expected.
+    EXPECT_EQ(prev_cache_evictions, GetCacheEvictions());
+
+    InsertRows(new_table_name);
+
+    EXPECT_EQ(prev_cache_evictions, GetCacheEvictions());
+    // No new cache inserts nor misses are expected.
+    EXPECT_EQ(prev_cache_inserts, GetCacheInserts());
+    EXPECT_EQ(prev_cache_misses, GetCacheMisses());
+
+    // Rename the table back.
+    unique_ptr<KuduTableAlterer> a1(client_->NewTableAlterer(new_table_name));
+    s = a1->RenameTo(kTableName)->Alter();
+    ASSERT_OK(s);
+  }
+
+  {
+    // Drop the test table and make sure the cached location information is
+    // purged from the table locations cache.
+    ASSERT_OK(client_->DeleteTable(kTableName));
+
+    // Wait for the master to get notified on the change.
+    SleepFor(MonoDelta::FromMilliseconds(3 * FLAGS_heartbeat_interval_ms));
+
+    // All the previously inserted records should be purged.
+    EXPECT_EQ(GetCacheEvictions(), GetCacheInserts());
+    EXPECT_EQ(0, GetCacheMemoryUsage());
+  }
+}
+
+class TableLocationsCacheTabletChangeTest :
+    public TableLocationsCacheBaseTest {
+ public:
+  TableLocationsCacheTabletChangeTest()
+      : TableLocationsCacheBaseTest(5 /*num_tablet_servers*/,
+                                    1 /*cache_capacity_mb*/) {}
+};
+
+// Verify the behavior of the table locations cache when table's tablets change
+// leadership roles or move around.
+TEST_F(TableLocationsCacheTabletChangeTest, Basic) {
+  SKIP_IF_SLOW_NOT_ALLOWED();
+
+  {
+    // Run a workload to prime the table locations cache.
+    InsertRows(kTableName);
+    const auto cache_lookups = GetCacheLookups();
+    EXPECT_GT(cache_lookups, 0);
+    const auto cache_misses = GetCacheMisses();
+    EXPECT_GT(cache_misses, 0);
+    EXPECT_EQ(cache_misses, GetCacheInserts());
+    EXPECT_EQ(cache_lookups - cache_misses, GetCacheHits());
+  }
+
+  // Restart tablet servers to reshuffle leadership role of tablet replicas.
+  for (auto idx = 0; idx < cluster_->num_tablet_servers(); ++idx) {
+    cluster_->mini_tablet_server(idx)->Shutdown();
+    SleepFor(MonoDelta::FromMilliseconds(static_cast<int64_t>(
+        2 * FLAGS_leader_failure_max_missed_heartbeat_periods *
+        FLAGS_raft_heartbeat_interval_ms) + 3 * FLAGS_heartbeat_interval_ms));
+    ASSERT_OK(cluster_->mini_tablet_server(idx)->Restart());
+  }
+
+  {
+    // All the previously inserted records should be purged.
+    EXPECT_EQ(GetCacheEvictions(), GetCacheInserts());
+    const auto prev_cache_inserts = GetCacheInserts();
+    const auto prev_cache_misses = GetCacheMisses();
+
+    InsertRows(kTableName);
+
+    // New cache inserts and misses are expected since the previously inserted
+    // records have been just purged.
+    EXPECT_LT(prev_cache_inserts, GetCacheInserts());
+    EXPECT_LT(prev_cache_misses, GetCacheMisses());
+  }
+
+  // Shutdown one tablet server and get the system some time to re-replicate
+  // affected tablet replicas elsewhere.
+  FLAGS_follower_unavailable_considered_failed_sec = 1;
+  cluster_->mini_tablet_server(0)->Shutdown();
+  SleepFor(MonoDelta::FromMilliseconds(static_cast<int64_t>(
+      2 * FLAGS_leader_failure_max_missed_heartbeat_periods *
+          FLAGS_raft_heartbeat_interval_ms +
+      3 * 1000 * FLAGS_follower_unavailable_considered_failed_sec +
+      3 * FLAGS_heartbeat_interval_ms)));
+
+  // All the previously inserted records should be purged.
+  EXPECT_EQ(GetCacheEvictions(), GetCacheInserts());
+  EXPECT_EQ(0, GetCacheMemoryUsage());
+}
+
+class TableLocationsCacheMultiMasterTest : public KuduTest {
+ public:
+  TableLocationsCacheMultiMasterTest()
+      : schema_(itest::SimpleIntKeyKuduSchema()) {
+  }
+
+  void SetUp() override {
+    KuduTest::SetUp();
+
+    ExternalMiniClusterOptions opts;
+    opts.num_masters = 3;
+    opts.num_tablet_servers = 3;
+    opts.extra_master_flags = {
+      "--table_locations_cache_capacity_mb=8",
+      Substitute("--raft_heartbeat_interval_ms=$0",
+          kRaftHeartbeatIntervalMs),
+      Substitute("--leader_failure_max_missed_heartbeat_periods=$0",
+          kMaxMissedHeartbeatPeriods),
+    };
+    cluster_.reset(new cluster::ExternalMiniCluster(std::move(opts)));
+    ASSERT_OK(cluster_->Start());
+
+    client::sp::shared_ptr<client::KuduClient> client;
+    ASSERT_OK(cluster_->CreateClient(nullptr, &client));
+
+    unique_ptr<KuduTableCreator> table_creator(client->NewTableCreator());
+    ASSERT_OK(table_creator->table_name(kTableName)
+        .schema(&schema_)
+        .set_range_partition_columns({ "key" })
+        .num_replicas(3)
+        .add_hash_partitions({ "key" }, 5)
+        .Create());
+  }
+
+  int64_t GetTableLocationCacheMetric(
+      int master_idx,
+      const MetricPrototype* metric_proto) const {
+    CHECK_LT(master_idx, cluster_->num_masters());
+    int64_t value;
+    CHECK_OK(itest::GetInt64Metric(
+        cluster_->master(master_idx)->bound_http_hostport(),
+        &METRIC_ENTITY_server, "kudu.master", metric_proto, "value", &value));
+    return value;
+  }
+
+  int64_t GetTableLocationCacheMetricAllMasters(
+      const MetricPrototype* metric_proto) const {
+    int64_t total = 0;
+    for (auto idx = 0; idx < cluster_->num_masters(); ++idx) {
+      const auto val = GetTableLocationCacheMetric(idx, metric_proto);
+      total += val;
+    }
+    return total;
+  }
+
+  void CheckCacheMetricsReset(int master_idx) const {
+    for (const auto* metric : {
+         &METRIC_table_locations_cache_evictions,
+         &METRIC_table_locations_cache_hits,
+         &METRIC_table_locations_cache_inserts,
+         &METRIC_table_locations_cache_lookups,
+         &METRIC_table_locations_cache_misses, }) {
+      SCOPED_TRACE(Substitute(
+          "verifying value of '$0' metric ($1) for master at index $2",
+          metric->name(), metric->label(), master_idx));
+      const auto val = GetTableLocationCacheMetric(master_idx, metric);
+      EXPECT_EQ(0, val);
+    }
+    const auto cache_memory_usage = GetTableLocationCacheMetric(
+        master_idx, &METRIC_table_locations_cache_memory_usage);
+    EXPECT_EQ(0, cache_memory_usage);
+  }
+
+ protected:
+  static constexpr const char* const kTableName = 
"test_locations_cache_multi_master";
+  static constexpr int32_t kRaftHeartbeatIntervalMs = 300;
+  static constexpr int32_t kMaxMissedHeartbeatPeriods = 2;
+  const KuduSchema schema_;
+  std::unique_ptr<cluster::ExternalMiniCluster> cluster_;
+};
+
+// Verify that the table location cache is reset upon change once master
+// starts its leadership role.
+TEST_F(TableLocationsCacheMultiMasterTest, ResetCache) {
+  SKIP_IF_SLOW_NOT_ALLOWED();
+
+  // Make sure the cache's metrics are zeroed if no client activity has been
+  // there yet.
+  for (auto idx = 0; idx < cluster_->num_masters(); ++idx) {
+    NO_FATALS(CheckCacheMetricsReset(idx));
+  }
+
+  TestWorkload w(cluster_.get());
+  w.set_table_name(kTableName);
+  w.set_schema(schema_);
+  w.Setup();
+  w.Start();
+  while (w.rows_inserted() < 10) {
+    SleepFor(MonoDelta::FromMilliseconds(10));
+  }
+  w.StopAndJoin();
+
+  // Make sure some items are added into the cache.
+  EXPECT_GT(GetTableLocationCacheMetricAllMasters(
+      &METRIC_table_locations_cache_inserts), 0);
+
+  int former_leader_master_idx;
+  ASSERT_OK(cluster_->GetLeaderMasterIndex(&former_leader_master_idx));
+
+  ASSERT_EVENTUALLY([&] {
+    // Induce a change in master leadership (maybe, even few of them, up to the
+    // number of masters in the cluster).
+    for (auto idx = 0; idx < cluster_->num_masters(); ++idx) {
+      ASSERT_OK(cluster_->master(idx)->Pause());
+      // Make one master to stop sending hearbeats, and give the rest about
+      // three heartbeat periods to elect a new leader in case if the stopped
+      // master was a leader.
+      SleepFor(MonoDelta::FromMilliseconds(
+          2 * kRaftHeartbeatIntervalMs * kMaxMissedHeartbeatPeriods +
+          3 * kRaftHeartbeatIntervalMs));
+      ASSERT_OK(cluster_->master(idx)->Resume());
+    }
+    int leader_master_idx;
+    ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_master_idx));
+    ASSERT_NE(former_leader_master_idx, leader_master_idx);
+  });
+
+  // Make sure all the cache's metrics are reset once master just has become
+  // a leader.
+  int leader_master_idx;
+  ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_master_idx));
+  NO_FATALS(CheckCacheMetricsReset(leader_master_idx));
 }
 
 } // namespace master
diff --git a/src/kudu/master/CMakeLists.txt b/src/kudu/master/CMakeLists.txt
index 7644535..103abfb 100644
--- a/src/kudu/master/CMakeLists.txt
+++ b/src/kudu/master/CMakeLists.txt
@@ -48,6 +48,8 @@ set(MASTER_SRCS
   placement_policy.cc
   ranger_authz_provider.cc
   sys_catalog.cc
+  table_locations_cache.cc
+  table_locations_cache_metrics.cc
   table_metrics.cc
   ts_descriptor.cc
   ts_manager.cc)
diff --git a/src/kudu/master/catalog_manager.cc 
b/src/kudu/master/catalog_manager.cc
index fd684ef..f3ee81d 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -104,6 +104,8 @@
 #include "kudu/master/placement_policy.h"
 #include "kudu/master/ranger_authz_provider.h"
 #include "kudu/master/sys_catalog.h"
+#include "kudu/master/table_locations_cache.h"
+#include "kudu/master/table_locations_cache_metrics.h"
 #include "kudu/master/table_metrics.h"
 #include "kudu/master/ts_descriptor.h"
 #include "kudu/master/ts_manager.h"
@@ -125,6 +127,7 @@
 #include "kudu/tablet/tablet_replica.h"
 #include "kudu/tserver/tserver_admin.pb.h"
 #include "kudu/tserver/tserver_admin.proxy.h"
+#include "kudu/util/cache_metrics.h"
 #include "kudu/util/condition_variable.h"
 #include "kudu/util/debug/trace_event.h"
 #include "kudu/util/fault_injection.h"
@@ -318,6 +321,11 @@ DEFINE_bool(auto_rebalancing_enabled, false,
 TAG_FLAG(auto_rebalancing_enabled, advanced);
 TAG_FLAG(auto_rebalancing_enabled, experimental);
 
+DEFINE_uint32(table_locations_cache_capacity_mb, 0,
+              "Capacity for the table locations cache (in MiB); a value "
+              "of 0 means table locations are not be cached");
+TAG_FLAG(table_locations_cache_capacity_mb, advanced);
+
 DECLARE_bool(raft_prepare_replacement_before_eviction);
 DECLARE_int64(tsk_rotation_seconds);
 
@@ -789,6 +797,7 @@ CatalogManager::CatalogManager(Master* master)
            // closely timed consecutive elections).
            .set_max_threads(1)
            .Build(&leader_election_pool_));
+  ResetTableLocationsCache();
 }
 
 CatalogManager::~CatalogManager() {
@@ -1187,6 +1196,9 @@ void CatalogManager::PrepareForLeadershipTask() {
         }
       }
     }
+
+    // Reset the cache storing information on table locations.
+    ResetTableLocationsCache();
   }
 
   std::lock_guard<simple_spinlock> l(state_lock_);
@@ -2173,6 +2185,11 @@ Status CatalogManager::DeleteTable(const 
DeleteTableRequestPB& req,
   // 8. Send a DeleteTablet() request to each tablet replica in the table.
   SendDeleteTableRequest(table, deletion_msg);
 
+  // 9. Invalidate/purge corresponding entries in the table locations cache.
+  if (table_locations_cache_) {
+    table_locations_cache_->Remove(table->id());
+  }
+
   VLOG(1) << "Deleted table " << table->ToString();
   return Status::OK();
 }
@@ -2867,6 +2884,12 @@ Status CatalogManager::AlterTable(const 
AlterTableRequestPB& req,
     SendDeleteTabletRequest(tablet, l, deletion_msg);
   }
 
+  // 10. Invalidate/purge corresponding entries in the table locations cache.
+  if (table_locations_cache_ &&
+      (!tablets_to_add.empty() || !tablets_to_drop.empty())) {
+    table_locations_cache_->Remove(table->id());
+  }
+
   background_tasks_->Wake();
   return Status::OK();
 }
@@ -4091,6 +4114,7 @@ Status CatalogManager::ProcessTabletReport(
   // 3. Process each tablet. This may not be in the order that the tablets
   // appear in 'full_report', but that has no bearing on correctness.
   vector<scoped_refptr<TabletInfo>> mutated_tablets;
+  unordered_set<string> mutated_table_ids;
   unordered_set<string> uuids_ignored_for_underreplication =
       master_->ts_manager()->GetUuidsToIgnoreForUnderreplication();
   for (const auto& e : tablet_infos) {
@@ -4336,6 +4360,7 @@ Status CatalogManager::ProcessTabletReport(
     // Done here and not on a per-mutation basis to avoid duplicate entries.
     if (tablet_was_mutated) {
       mutated_tablets.push_back(tablet);
+      mutated_table_ids.emplace(table->id());
     }
 
     // 10. Process the report's tablet statistics.
@@ -4421,6 +4446,13 @@ Status CatalogManager::ProcessTabletReport(
     WARN_NOT_OK(rpc->Run(), Substitute("Failed to send $0", 
rpc->description()));
   }
 
+  // 16. Invalidate corresponding entries in the table locations cache.
+  if (table_locations_cache_) {
+    for (const auto& table_id : mutated_table_ids) {
+      table_locations_cache_->Remove(table_id);
+    }
+  }
+
   return Status::OK();
 }
 
@@ -4959,7 +4991,9 @@ Status CatalogManager::BuildLocationsForTablet(
         if (reg.has_unix_domain_socket_path()) {
           
tsinfo_pb->set_unix_domain_socket_path(reg.unix_domain_socket_path());
         }
-        if (ts_desc->location()) 
tsinfo_pb->set_location(*(ts_desc->location()));
+        if (ts_desc->location()) {
+          tsinfo_pb->set_location(*(ts_desc->location()));
+        }
       } else {
         // If we've never received a heartbeat from the tserver, we'll fall 
back
         // to the last known RPC address in the RaftPeerPB.
@@ -4980,8 +5014,9 @@ Status CatalogManager::BuildLocationsForTablet(
           &ts_infos_dict->uuid_to_idx, peer.permanent_uuid(),
           [&]() -> pair<StringPiece, int> {
             auto& ts_info_pbs = ts_infos_dict->ts_info_pbs;
-            auto ts_info_idx = ts_info_pbs.size();
-            ts_info_pbs.emplace_back(make_tsinfo_pb().release());
+            const auto ts_info_idx = ts_info_pbs.size();
+            auto new_elem = make_tsinfo_pb();
+            ts_info_pbs.emplace_back(std::move(new_elem));
             return { ts_info_pbs.back()->permanent_uuid(), ts_info_idx };
           });
 
@@ -5129,11 +5164,11 @@ Status CatalogManager::GetTableLocations(const 
GetTableLocationsRequestPB* req,
                                          optional<const string&> user) {
   // If start-key is > end-key report an error instead of swapping the two
   // since probably there is something wrong app-side.
-  if (req->has_partition_key_start() && req->has_partition_key_end()
-      && req->partition_key_start() > req->partition_key_end()) {
+  if (PREDICT_FALSE(req->has_partition_key_start() && 
req->has_partition_key_end()
+      && req->partition_key_start() > req->partition_key_end())) {
     return Status::InvalidArgument("start partition key is greater than the 
end partition key");
   }
-  if (req->max_returned_locations() <= 0) {
+  if (PREDICT_FALSE(req->max_returned_locations() <= 0)) {
     return Status::InvalidArgument("max_returned_locations must be greater 
than 0");
   }
 
@@ -5154,27 +5189,42 @@ Status CatalogManager::GetTableLocations(const 
GetTableLocationsRequestPB* req,
   vector<scoped_refptr<TabletInfo>> tablets_in_range;
   table->GetTabletsInRange(req, &tablets_in_range);
 
-  TSInfosDict infos_dict;
+  // Check for items in the cache.
+  if (table_locations_cache_) {
+    auto handle = table_locations_cache_->Get(
+        table->id(),
+        tablets_in_range.size(),
+        tablets_in_range.empty() ? "" : tablets_in_range.front()->id(),
+        *req);
+    if (handle) {
+      *resp = handle.value();
+      return Status::OK();
+    }
+  }
 
+  TSInfosDict infos_dict;
+  bool consistent_locations = true;
   for (const auto& tablet : tablets_in_range) {
-    Status s = BuildLocationsForTablet(
+    const auto s = BuildLocationsForTablet(
         tablet, req->replica_type_filter(), resp->add_tablet_locations(),
         req->intern_ts_infos_in_response() ? &infos_dict : nullptr);
-    if (s.ok()) {
+    if (PREDICT_TRUE(s.ok())) {
       continue;
     }
+
+    // All the rest are various error cases.
+    consistent_locations = false;
+    resp->Clear();
+    resp->mutable_error()->set_code(
+        MasterErrorPB_Code::MasterErrorPB_Code_TABLET_NOT_RUNNING);
     if (s.IsNotFound()) {
       // The tablet has been deleted; force the client to retry. This is a
       // transient state that only happens with a concurrent drop range
       // partition alter table operation.
-      resp->Clear();
-      
resp->mutable_error()->set_code(MasterErrorPB_Code::MasterErrorPB_Code_TABLET_NOT_RUNNING);
       StatusToPB(Status::ServiceUnavailable("Tablet not running"),
                  resp->mutable_error()->mutable_status());
     } else if (s.IsServiceUnavailable()) {
       // The tablet is not yet running; fail the request.
-      resp->Clear();
-      
resp->mutable_error()->set_code(MasterErrorPB_Code::MasterErrorPB_Code_TABLET_NOT_RUNNING);
       StatusToPB(s, resp->mutable_error()->mutable_status());
       break;
     } else {
@@ -5183,10 +5233,24 @@ Status CatalogManager::GetTableLocations(const 
GetTableLocationsRequestPB* req,
           << s.ToString();
     }
   }
+  resp->mutable_ts_infos()->Reserve(infos_dict.ts_info_pbs.size());
   for (auto& pb : infos_dict.ts_info_pbs) {
     resp->mutable_ts_infos()->AddAllocated(pb.release());
   }
   resp->set_ttl_millis(FLAGS_table_locations_ttl_ms);
+
+  // Items with consistent tablet location information are added into the 
cache.
+  if (table_locations_cache_ && consistent_locations) {
+    unique_ptr<GetTableLocationsResponsePB> entry_ptr(
+        new GetTableLocationsResponsePB(*resp));
+    table_locations_cache_->Put(
+        table->id(),
+        tablets_in_range.size(),
+        tablets_in_range.empty() ? "" : tablets_in_range.front()->id(),
+        *req,
+        std::move(entry_ptr));
+  }
+
   return Status::OK();
 }
 
@@ -5317,6 +5381,22 @@ const char* CatalogManager::StateToString(State state) {
   __builtin_unreachable();
 }
 
+void CatalogManager::ResetTableLocationsCache() {
+  const auto cache_capacity_bytes =
+      FLAGS_table_locations_cache_capacity_mb * 1024 * 1024;
+  if (cache_capacity_bytes == 0) {
+    table_locations_cache_.reset();
+  } else {
+    unique_ptr<TableLocationsCache> new_cache(
+        new TableLocationsCache(cache_capacity_bytes));
+    unique_ptr<TableLocationsCacheMetrics> metrics(
+        new TableLocationsCacheMetrics(master_->metric_entity()));
+    new_cache->SetMetrics(std::move(metrics));
+    table_locations_cache_ = std::move(new_cache);
+  }
+  VLOG(1) << "table locations cache has been reset";
+}
+
 ////////////////////////////////////////////////////////////
 // CatalogManager::ScopedLeaderSharedLock
 ////////////////////////////////////////////////////////////
diff --git a/src/kudu/master/catalog_manager.h 
b/src/kudu/master/catalog_manager.h
index 31cda7d..7991d0d 100644
--- a/src/kudu/master/catalog_manager.h
+++ b/src/kudu/master/catalog_manager.h
@@ -115,6 +115,7 @@ class PlacementPolicy;
 class SysCatalogTable;
 class TSDescriptor;
 class TableInfo;
+class TableLocationsCache;
 struct DeferredAssignmentActions;
 struct TableMetrics;
 
@@ -1047,6 +1048,8 @@ class CatalogManager : public 
tserver::TabletReplicaLookupIf {
                                const TabletMetadataLock& tablet_lock,
                                const std::string& deletion_msg);
 
+  void ResetTableLocationsCache();
+
   std::string GenerateId() { return oid_generator_.Next(); }
 
   // Conventional "T xxx P yyy: " prefix for logging.
@@ -1161,6 +1164,9 @@ class CatalogManager : public 
tserver::TabletReplicaLookupIf {
   // (TODO: this stuff should be deferred and done in the background thread)
   friend class AsyncAlterTable;
 
+  // LRU cache to store responses generated by GetTableLocations().
+  std::unique_ptr<TableLocationsCache> table_locations_cache_;
+
   DISALLOW_COPY_AND_ASSIGN(CatalogManager);
 };
 
diff --git a/src/kudu/master/master_service.cc 
b/src/kudu/master/master_service.cc
index 3934c78..e622e6a 100644
--- a/src/kudu/master/master_service.cc
+++ b/src/kudu/master/master_service.cc
@@ -510,16 +510,20 @@ void MasterServiceImpl::GetTableLocations(const 
GetTableLocationsRequestPB* req,
   TRACE_EVENT1("master", "GetTableLocations",
                "requestor", rpc->requestor_string());
 
-  CatalogManager::ScopedLeaderSharedLock l(server_->catalog_manager());
-  if (!l.CheckIsInitializedAndIsLeaderOrRespond(resp, rpc)) {
-    return;
-  }
+  Status s;
+  {
+    CatalogManager::ScopedLeaderSharedLock l(server_->catalog_manager());
+    if (!l.CheckIsInitializedAndIsLeaderOrRespond(resp, rpc)) {
+      return;
+    }
 
-  if (PREDICT_FALSE(FLAGS_master_inject_latency_on_tablet_lookups_ms > 0)) {
-    
SleepFor(MonoDelta::FromMilliseconds(FLAGS_master_inject_latency_on_tablet_lookups_ms));
+    if (PREDICT_FALSE(FLAGS_master_inject_latency_on_tablet_lookups_ms > 0)) {
+      
SleepFor(MonoDelta::FromMilliseconds(FLAGS_master_inject_latency_on_tablet_lookups_ms));
+    }
+    s = server_->catalog_manager()->GetTableLocations(
+        req, resp, make_optional<const 
string&>(rpc->remote_user().username()));
   }
-  Status s = server_->catalog_manager()->GetTableLocations(
-      req, resp, make_optional<const string&>(rpc->remote_user().username()));
+
   CheckRespErrorOrSetUnknown(s, resp);
   rpc->RespondSuccess();
 }
@@ -527,14 +531,18 @@ void MasterServiceImpl::GetTableLocations(const 
GetTableLocationsRequestPB* req,
 void MasterServiceImpl::GetTableSchema(const GetTableSchemaRequestPB* req,
                                        GetTableSchemaResponsePB* resp,
                                        rpc::RpcContext* rpc) {
-  CatalogManager::ScopedLeaderSharedLock l(server_->catalog_manager());
-  if (!l.CheckIsInitializedAndIsLeaderOrRespond(resp, rpc)) {
-    return;
+  Status s;
+  {
+    CatalogManager::ScopedLeaderSharedLock l(server_->catalog_manager());
+    if (!l.CheckIsInitializedAndIsLeaderOrRespond(resp, rpc)) {
+      return;
+    }
+
+    s = server_->catalog_manager()->GetTableSchema(
+        req, resp, make_optional<const string&>(rpc->remote_user().username()),
+        FLAGS_master_support_authz_tokens ? server_->token_signer() : nullptr);
   }
 
-  Status s = server_->catalog_manager()->GetTableSchema(
-      req, resp, make_optional<const string&>(rpc->remote_user().username()),
-      FLAGS_master_support_authz_tokens ? server_->token_signer() : nullptr);
   CheckRespErrorOrSetUnknown(s, resp);
   if (resp->has_error()) {
     // If there was an application error, respond to the RPC.
diff --git a/src/kudu/master/table_locations_cache.cc 
b/src/kudu/master/table_locations_cache.cc
new file mode 100644
index 0000000..a903889
--- /dev/null
+++ b/src/kudu/master/table_locations_cache.cc
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/master/table_locations_cache.h"
+
+#include <cstddef>
+#include <memory>
+#include <mutex>
+#include <ostream>
+#include <string>
+#include <utility>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/master/master.pb.h"
+#include "kudu/util/cache.h"
+#include "kudu/util/cache_metrics.h"
+#include "kudu/util/slice.h"
+
+using std::string;
+using strings::Substitute;
+
+namespace kudu {
+namespace master {
+
+TableLocationsCache::TableLocationsCache(size_t capacity_bytes)
+    : eviction_cb_(this),
+      cache_(NewCache<Cache::EvictionPolicy::LRU>(capacity_bytes,
+                                                  "table-locations-cache")) {
+}
+
+string TableLocationsCache::BuildKey(
+    const string& table_id,
+    size_t num_tablets,
+    const string& first_tablet_id,
+    const GetTableLocationsRequestPB& req) {
+  return Substitute("$0:$1:$2:$3:$4",
+      table_id,
+      num_tablets,
+      first_tablet_id,
+      req.has_intern_ts_infos_in_response() ? 1 : 0,
+      req.has_replica_type_filter() ? 
static_cast<int>(req.replica_type_filter())
+                                    : -1);
+}
+
+TableLocationsCache::EntryHandle TableLocationsCache::Get(
+    const string& table_id,
+    size_t num_tablets,
+    const string& first_tablet_id,
+    const GetTableLocationsRequestPB& req) {
+  const string key = BuildKey(table_id, num_tablets, first_tablet_id, req);
+  auto h(cache_->Lookup(key, Cache::EXPECT_IN_CACHE));
+  if (!h) {
+    VLOG(2) << Substitute("key '$0': entry absent", key);
+    return EntryHandle();
+  }
+  VLOG(2) << Substitute("key '$0': entry present", key);
+  auto* entry_ptr = reinterpret_cast<Entry*>(cache_->Value(h).mutable_data());
+  DCHECK(entry_ptr);
+  return EntryHandle(DCHECK_NOTNULL(entry_ptr->val_ptr), std::move(h));
+}
+
+TableLocationsCache::EntryHandle TableLocationsCache::Put(
+    const string& table_id,
+    size_t num_tablets,
+    const string& first_tablet_id,
+    const GetTableLocationsRequestPB& req,
+    std::unique_ptr<GetTableLocationsResponsePB> val) {
+  const auto key = BuildKey(table_id, num_tablets, first_tablet_id, req);
+  const auto charge = key.size() + val->ByteSizeLong();
+  auto pending(cache_->Allocate(key, sizeof(Entry), charge));
+  CHECK(pending);
+  Entry* entry = reinterpret_cast<Entry*>(cache_->MutableValue(&pending));
+  entry->val_ptr = val.get();
+  // Insert() evicts already existing entry with the same key, if any.
+  auto h(cache_->Insert(std::move(pending), &eviction_cb_));
+  {
+    std::lock_guard<simple_spinlock> l(keys_by_table_id_lock_);
+    keys_by_table_id_[table_id].emplace(key);
+  }
+  VLOG(2) << Substitute("key '$0': added entry for table '$1'", key, table_id);
+  // The cache takes care of the entry from this point: deallocation of the
+  // resources passed via 'val' parameter is performed by the eviction 
callback.
+  return EntryHandle(DCHECK_NOTNULL(val.release()), std::move(h));
+}
+
+void TableLocationsCache::Remove(const std::string& table_id) {
+  std::lock_guard<simple_spinlock> l(keys_by_table_id_lock_);
+  const auto it = keys_by_table_id_.find(table_id);
+  if (it != keys_by_table_id_.end()) {
+    VLOG(2) << Substitute("removing cached locations for table '$0'", 
table_id);
+    const auto& keys = it->second;
+    for (const auto& key : keys) {
+      VLOG(2) << Substitute("removing key '$0' from table location cache", 
key);
+      cache_->Erase(key);
+    }
+    keys_by_table_id_.erase(it);
+  }
+}
+
+// Set metrics for the cache.
+void TableLocationsCache::SetMetrics(std::unique_ptr<CacheMetrics> metrics) {
+  cache_->SetMetrics(std::move(metrics));
+}
+
+TableLocationsCache::EntryHandle::EntryHandle()
+    : ptr_(nullptr),
+      handle_(Cache::UniqueHandle(nullptr,
+                                  Cache::HandleDeleter(nullptr))) {
+}
+
+TableLocationsCache::EntryHandle::EntryHandle(EntryHandle&& other) noexcept
+    : EntryHandle() {
+  std::swap(ptr_, other.ptr_);
+  handle_ = std::move(other.handle_);
+}
+
+TableLocationsCache::EntryHandle& TableLocationsCache::EntryHandle::operator=(
+    TableLocationsCache::EntryHandle&& other) noexcept {
+  ptr_ = other.ptr_;
+  other.ptr_ = nullptr;
+  handle_ = std::move(other.handle_);
+  return *this;
+}
+
+TableLocationsCache::EntryHandle::EntryHandle(GetTableLocationsResponsePB* ptr,
+                                              Cache::UniqueHandle handle)
+    : ptr_(ptr),
+      handle_(std::move(handle)) {
+  DCHECK((ptr_ != nullptr && handle_) ||
+         (ptr_ == nullptr && !handle_));
+}
+
+TableLocationsCache::EvictionCallback::EvictionCallback(TableLocationsCache* 
cache)
+    : cache_(cache) {
+  DCHECK(cache_);
+}
+
+void TableLocationsCache::EvictionCallback::EvictedEntry(Slice key, Slice val) 
{
+  VLOG(2) << Substitute("EvictedEntry callback for key '$0'", key.ToString());
+  auto* entry_ptr = reinterpret_cast<Entry*>(val.mutable_data());
+  DCHECK(entry_ptr);
+  delete entry_ptr->val_ptr;
+}
+
+} // namespace master
+} // namespace kudu
diff --git a/src/kudu/master/table_locations_cache.h 
b/src/kudu/master/table_locations_cache.h
new file mode 100644
index 0000000..4249436
--- /dev/null
+++ b/src/kudu/master/table_locations_cache.h
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/macros.h"
+#include "kudu/util/cache.h"
+#include "kudu/util/locks.h"
+#include "kudu/util/slice.h"
+
+namespace kudu {
+struct CacheMetrics;
+
+namespace master {
+
+class GetTableLocationsRequestPB;
+class GetTableLocationsResponsePB;
+
+class TableLocationsCache final {
+ public:
+
+  // The handle for an entry in the cache.
+  class EntryHandle {
+   public:
+    // Construct an empty/nullptr handle.
+    EntryHandle();
+    ~EntryHandle() = default;
+
+    EntryHandle(EntryHandle&& other) noexcept;
+    EntryHandle& operator=(EntryHandle&& other) noexcept;
+
+    explicit operator bool() const noexcept {
+      return ptr_ != nullptr;
+    }
+
+    // No modifications of the value are allowed through EntryHandle.
+    const GetTableLocationsResponsePB& value() const {
+      DCHECK(ptr_);
+      return *ptr_;
+    }
+
+   private:
+    friend class TableLocationsCache;
+    DISALLOW_COPY_AND_ASSIGN(EntryHandle);
+
+    EntryHandle(GetTableLocationsResponsePB* ptr, Cache::UniqueHandle handle);
+
+    GetTableLocationsResponsePB* ptr_;
+    Cache::UniqueHandle handle_;
+  };
+
+  // Create a new cache (LRU eviction policy) with the specified capacity.
+  explicit TableLocationsCache(size_t capacity_bytes);
+
+  ~TableLocationsCache() = default;
+
+  // Retrieve an entry from the cache for the specified table identifier
+  // and parameters of GetTableLocationsRequest.
+  //
+  // Cached key/value pairs may still be evicted even with an outstanding
+  // handle, but a cached value won't be destroyed until the handle goes
+  // out of scope.
+  EntryHandle Get(const std::string& table_id,
+                  size_t num_tablets,
+                  const std::string& first_tablet_id,
+                  const GetTableLocationsRequestPB& req);
+
+  // For the specified key, add an entry into the cache or replace already
+  // existing one. This method returns a smart pointer for the entry's handle.
+  //
+  // Put() evicts already existing entry with the same key (if any), 
effectively
+  // replacing corresponding entry in the cache as the result.
+  EntryHandle Put(const std::string& table_id,
+                  size_t num_tablets,
+                  const std::string& first_tablet_id,
+                  const GetTableLocationsRequestPB& req,
+                  std::unique_ptr<GetTableLocationsResponsePB> val);
+
+  // Remove all entries for the specified table identifier.
+  void Remove(const std::string& table_id);
+
+  // Set metrics for the cache.
+  void SetMetrics(std::unique_ptr<CacheMetrics> metrics);
+
+ private:
+  friend class EvictionCallback;
+
+  // An entry to store in the underlying LRU cache.
+  struct Entry {
+    // Raw pointer to the 'value'. The cache owns the memory allocated and 
takes
+    // care of deallocating it upon call of EvictionCallback::EvictedEntry()
+    // by the underlying cache instance.
+    GetTableLocationsResponsePB* val_ptr;
+  };
+
+  // Callback invoked by the underlying LRU cache when a cached entry's
+  // reference count reaches zero and is evicted.
+  class EvictionCallback : public Cache::EvictionCallback {
+   public:
+    explicit EvictionCallback(TableLocationsCache* cache);
+
+    // This method called upon evicting an entry with the specified key ('key')
+    // and value ('val').
+    void EvictedEntry(Slice key, Slice val) override;
+
+   private:
+    TableLocationsCache* cache_;
+    DISALLOW_COPY_AND_ASSIGN(EvictionCallback);
+  };
+
+  // Build a key for the item in the cache given table identifier
+  // and the information in the request. The key is a composite one and 
includes
+  // request parameters because the response depends on them, so there might be
+  // multiple cached items in the cache for the same table identifier.
+  static std::string BuildKey(const std::string& table_id,
+                              size_t num_tablets,
+                              const std::string& first_tablet_id,
+                              const GetTableLocationsRequestPB& req);
+
+  // Invoked whenever a cached entry reaches zero reference count, i.e. it was
+  // removed from the cache and there aren't any other references
+  // floating around.
+  EvictionCallback eviction_cb_;
+
+  // The underlying LRU cache instance.
+  std::unique_ptr<Cache> cache_;
+
+  // A convenience dictionary to keep correspondence between a table's
+  // identifier and cached locations for the table. This map is used when
+  // invalidating element in the cache given table identifier.
+  std::unordered_map<std::string, std::unordered_set<std::string>> 
keys_by_table_id_;
+
+  // A synchronisation primitive to guard access to keys_by_table_id_.
+  simple_spinlock keys_by_table_id_lock_;
+
+  DISALLOW_COPY_AND_ASSIGN(TableLocationsCache);
+};
+
+} // namespace master
+} // namespace kudu
diff --git a/src/kudu/master/table_locations_cache_metrics.cc 
b/src/kudu/master/table_locations_cache_metrics.cc
new file mode 100644
index 0000000..e648354
--- /dev/null
+++ b/src/kudu/master/table_locations_cache_metrics.cc
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/master/table_locations_cache_metrics.h"
+
+#include "kudu/util/metrics.h"
+
+METRIC_DEFINE_counter(server, table_locations_cache_inserts,
+                      "Table Locations Cache Inserts",
+                      kudu::MetricUnit::kEntries,
+                      "Number of entries inserted in the cache",
+                      kudu::MetricLevel::kDebug);
+METRIC_DEFINE_counter(server, table_locations_cache_lookups,
+                      "Table Locations Cache Lookups",
+                      kudu::MetricUnit::kEntries,
+                      "Number of entries looked up from the cache",
+                      kudu::MetricLevel::kDebug);
+METRIC_DEFINE_counter(server, table_locations_cache_evictions,
+                      "Table Locations Cache Evictions",
+                      kudu::MetricUnit::kEntries,
+                      "Number of entries evicted from the cache",
+                      kudu::MetricLevel::kDebug);
+METRIC_DEFINE_counter(server, table_locations_cache_misses,
+                      "Table Locations Cache Misses",
+                      kudu::MetricUnit::kEntries,
+                      "Number of lookups that didn't find a cached entry",
+                      kudu::MetricLevel::kDebug);
+METRIC_DEFINE_counter(server, table_locations_cache_hits,
+                      "Table Locations Cache Hits",
+                      kudu::MetricUnit::kEntries,
+                      "Number of lookups that found a cached entry",
+                      kudu::MetricLevel::kDebug);
+METRIC_DEFINE_gauge_uint64(server, table_locations_cache_memory_usage,
+                           "Table Locations Cache Memory Usage",
+                           kudu::MetricUnit::kBytes,
+                           "Memory consumed by the cache",
+                           kudu::MetricLevel::kDebug);
+
+namespace kudu {
+namespace master {
+
+#define GINIT(member, x) (member) = METRIC_##x.Instantiate(metric_entity, 0)
+
+#define MINIT(member, x) \
+  (member) = METRIC_##x.Instantiate(metric_entity); \
+  (member)->Reset()
+
+TableLocationsCacheMetrics::TableLocationsCacheMetrics(
+    const scoped_refptr<MetricEntity>& metric_entity) {
+  MINIT(inserts, table_locations_cache_inserts);
+  MINIT(lookups, table_locations_cache_lookups);
+  MINIT(evictions, table_locations_cache_evictions);
+  MINIT(cache_hits_caching, table_locations_cache_hits);
+  MINIT(cache_misses_caching, table_locations_cache_misses);
+  GINIT(cache_usage, table_locations_cache_memory_usage);
+}
+#undef MINIT
+#undef GINIT
+
+} // namespace master
+} // namespace kudu
diff --git a/src/kudu/master/table_locations_cache_metrics.h 
b/src/kudu/master/table_locations_cache_metrics.h
new file mode 100644
index 0000000..49926b2
--- /dev/null
+++ b/src/kudu/master/table_locations_cache_metrics.h
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "kudu/gutil/ref_counted.h"
+#include "kudu/util/cache_metrics.h"
+
+namespace kudu {
+
+class MetricEntity;
+
+namespace master {
+
+struct TableLocationsCacheMetrics : public CacheMetrics {
+  explicit TableLocationsCacheMetrics(
+      const scoped_refptr<MetricEntity>& metric_entity);
+};
+
+} // namespace master
+} // namespace kudu

[kudu] 02/02: [master] cache for table locations

Reply via email to