This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 06a62ec681009b146de3f73808711161c5bed12d
Author: helifu <[email protected]>
AuthorDate: Mon Jan 20 19:14:04 2020 +0800

    KUDU-2986 p3: hide the values while there is old version
    
    The old version will not report statistics in the heartbeat
    msg, so the aggregated values are not accurate and we need
    to hide them.
    
    Change-Id: I691bb8c447c878471ed795028f0aab89a39bd946
    Reviewed-on: http://gerrit.cloudera.org:8080/15071
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <[email protected]>
---
 src/kudu/client/client-test.cc              |  10 +-
 src/kudu/client/client.cc                   |   8 +-
 src/kudu/client/client.h                    |   1 +
 src/kudu/client/table_statistics-internal.h |   8 +-
 src/kudu/master/catalog_manager.cc          | 158 +++++++++++++++++++++-------
 src/kudu/master/catalog_manager.h           |   3 +
 src/kudu/master/master-test.cc              |  62 ++++++-----
 src/kudu/master/master_path_handlers.cc     |  12 ++-
 src/kudu/master/table_metrics.cc            |  20 ++++
 src/kudu/master/table_metrics.h             |  11 +-
 src/kudu/tools/kudu-tool-test.cc            |   5 +-
 src/kudu/tserver/ts_tablet_manager-test.cc  |  15 ++-
 src/kudu/tserver/ts_tablet_manager.cc       |  13 +--
 13 files changed, 232 insertions(+), 94 deletions(-)

diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index 87f0642..47a2510 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -120,6 +120,7 @@
 
 DECLARE_bool(allow_unsafe_replication_factor);
 DECLARE_bool(catalog_manager_support_live_row_count);
+DECLARE_bool(catalog_manager_support_on_disk_size);
 DECLARE_bool(fail_dns_resolution);
 DECLARE_bool(location_mapping_by_uuid);
 DECLARE_bool(log_inject_latency);
@@ -813,16 +814,17 @@ TEST_F(ClientTest, TestGetTableStatistics) {
     statistics.reset(table_statistics);
   };
 
-  // Master supports live row count.
+  // Master supports 'on disk size' and 'live row count'.
   NO_FATALS(GetTableStatistics());
   ASSERT_EQ(FLAGS_on_disk_size_for_testing, statistics->on_disk_size());
   ASSERT_EQ(FLAGS_live_row_count_for_testing, statistics->live_row_count());
-  // Master doesn't support live row count.
+
+  // Master doesn't support 'on disk size' and 'live row count'.
+  FLAGS_catalog_manager_support_on_disk_size = false;
   FLAGS_catalog_manager_support_live_row_count = false;
   NO_FATALS(GetTableStatistics());
-  ASSERT_EQ(FLAGS_on_disk_size_for_testing, statistics->on_disk_size());
+  ASSERT_EQ(-1, statistics->on_disk_size());
   ASSERT_EQ(-1, statistics->live_row_count());
-  ASSERT_NE(-1, FLAGS_live_row_count_for_testing);
 }
 
 TEST_F(ClientTest, TestBadTable) {
diff --git a/src/kudu/client/client.cc b/src/kudu/client/client.cc
index 33d78f3..0e1a1b2 100644
--- a/src/kudu/client/client.cc
+++ b/src/kudu/client/client.cc
@@ -17,6 +17,7 @@
 
 #include "kudu/client/client.h"
 
+#include <algorithm>
 #include <cstdint>
 #include <cstdlib>
 #include <map>
@@ -25,12 +26,12 @@
 #include <ostream>
 #include <set>
 #include <string>
-#include <utility>
 #include <vector>
 
 #include <boost/bind.hpp>
 #include <boost/optional/optional.hpp>
 #include <glog/logging.h>
+#include <google/protobuf/stubs/common.h>
 
 #include "kudu/client/callbacks.h"
 #include "kudu/client/client-internal.h"
@@ -602,7 +603,8 @@ Status KuduClient::GetTableStatistics(const string& 
table_name,
     return StatusFromPB(resp.error().status());
   }
   unique_ptr<KuduTableStatistics> table_statistics(new KuduTableStatistics);
-  table_statistics->data_ = new KuduTableStatistics::Data(resp.on_disk_size(),
+  table_statistics->data_ = new KuduTableStatistics::Data(
+      resp.has_on_disk_size() ? boost::optional<int64_t>(resp.on_disk_size()) 
: boost::none,
       resp.has_live_row_count() ? 
boost::optional<int64_t>(resp.live_row_count()) : boost::none);
 
   *statistics = table_statistics.release();
@@ -877,7 +879,7 @@ KuduTableStatistics::~KuduTableStatistics() {
 }
 
 int64_t KuduTableStatistics::on_disk_size() const {
-  return data_->on_disk_size_;
+  return data_->on_disk_size_ ? *data_->on_disk_size_ : -1;
 }
 
 int64_t KuduTableStatistics::live_row_count() const {
diff --git a/src/kudu/client/client.h b/src/kudu/client/client.h
index 0c3dfbe..1746822 100644
--- a/src/kudu/client/client.h
+++ b/src/kudu/client/client.h
@@ -964,6 +964,7 @@ class KUDU_EXPORT KuduTableStatistics {
   ~KuduTableStatistics();
 
   /// @return The table's on disk size.
+  ///  -1 is returned if the table doesn't support on_disk_size.
   ///
   /// @note This statistic is pre-replication.
   int64_t on_disk_size() const;
diff --git a/src/kudu/client/table_statistics-internal.h 
b/src/kudu/client/table_statistics-internal.h
index c720836..2f4f33b 100644
--- a/src/kudu/client/table_statistics-internal.h
+++ b/src/kudu/client/table_statistics-internal.h
@@ -33,8 +33,8 @@ using strings::Substitute;
 
 class KuduTableStatistics::Data {
  public:
-  Data(int64_t on_disk_size, boost::optional<int64_t> live_row_count)
-      : on_disk_size_(on_disk_size),
+  Data(boost::optional<int64_t> on_disk_size, boost::optional<int64_t> 
live_row_count)
+      : on_disk_size_(std::move(on_disk_size)),
         live_row_count_(std::move(live_row_count)) {
   }
 
@@ -44,11 +44,11 @@ class KuduTableStatistics::Data {
   string ToString() const {
     return Substitute("on disk size: $0\n"
                       "live row count: $1\n",
-                      on_disk_size_,
+                      on_disk_size_ ? std::to_string(*on_disk_size_) : "N/A",
                       live_row_count_ ? std::to_string(*live_row_count_) : 
"N/A");
   }
 
-  const int64_t on_disk_size_;
+  const boost::optional<int64_t> on_disk_size_;
   const boost::optional<int64_t> live_row_count_;
 
  private:
diff --git a/src/kudu/master/catalog_manager.cc 
b/src/kudu/master/catalog_manager.cc
index 876e372..6aac7d8 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -274,8 +274,13 @@ DEFINE_bool(mock_table_metrics_for_testing, false,
 TAG_FLAG(mock_table_metrics_for_testing, hidden);
 TAG_FLAG(mock_table_metrics_for_testing, runtime);
 
+DEFINE_bool(catalog_manager_support_on_disk_size, true,
+            "Whether to enable mock on disk size statistic for tables. For 
testing only.");
+TAG_FLAG(catalog_manager_support_on_disk_size, hidden);
+TAG_FLAG(catalog_manager_support_on_disk_size, runtime);
+
 DEFINE_bool(catalog_manager_support_live_row_count, true,
-            "Whether to enable mock live row count statistic for tables. For 
testing only");
+            "Whether to enable mock live row count statistic for tables. For 
testing only.");
 TAG_FLAG(catalog_manager_support_live_row_count, hidden);
 TAG_FLAG(catalog_manager_support_live_row_count, runtime);
 
@@ -2960,12 +2965,16 @@ Status CatalogManager::GetTableStatistics(const 
GetTableStatisticsRequestPB* req
                                           &table, &l));
 
   if (PREDICT_FALSE(FLAGS_mock_table_metrics_for_testing)) {
-    resp->set_on_disk_size(FLAGS_on_disk_size_for_testing);
+    if (FLAGS_catalog_manager_support_on_disk_size) {
+      resp->set_on_disk_size(FLAGS_on_disk_size_for_testing);
+    }
     if (FLAGS_catalog_manager_support_live_row_count) {
       resp->set_live_row_count(FLAGS_live_row_count_for_testing);
     }
   } else {
-    resp->set_on_disk_size(table->GetMetrics()->on_disk_size->value());
+    if (table->GetMetrics()->TableSupportsOnDiskSize()) {
+      resp->set_on_disk_size(table->GetMetrics()->on_disk_size->value());
+    }
     if (table->GetMetrics()->TableSupportsLiveRowCount()) {
       resp->set_live_row_count(table->GetMetrics()->live_row_count->value());
     }
@@ -4272,14 +4281,21 @@ Status CatalogManager::ProcessTabletReport(
     }
 
     // 10. Process the report's tablet statistics.
-    if (report.has_stats() && report.has_consensus_state()) {
-      DCHECK_EQ(ts_desc->permanent_uuid(), 
report.consensus_state().leader_uuid());
-
-      // Now the tserver only reports the LEADER replicas its own.
-      // First, update table's stats.
-      tablet->table()->UpdateMetrics(tablet_id, tablet->GetStats(), 
report.stats());
-      // Then, update tablet's stats.
-      tablet->UpdateStats(report.stats());
+    //
+    // The tserver only reports the LEADER replicas it owns.
+    if (report.has_consensus_state() &&
+        report.consensus_state().leader_uuid() == ts_desc->permanent_uuid()) {
+      if (report.has_stats()) {
+        // For the versions >= 1.11.x, the tserver reports stats. But keep in
+        // mind that 'live_row_count' is not supported for the legacy replicas.
+        tablet->table()->UpdateMetrics(tablet_id, tablet->GetStats(), 
report.stats());
+        tablet->UpdateStats(report.stats());
+      } else {
+        // For the versions < 1.11.x, the tserver doesn't report stats. Thus,
+        // the metrics from the stats should be hidden, for example, when it's
+        // in the upgrade/downgrade process or in a mixed environment.
+        tablet->table()->InvalidateMetrics(tablet_id);
+      }
     }
   }
 
@@ -5638,52 +5654,114 @@ void TableInfo::UnregisterMetrics() {
 void TableInfo::UpdateMetrics(const string& tablet_id,
                               const tablet::ReportedTabletStatsPB& old_stats,
                               const tablet::ReportedTabletStatsPB& new_stats) {
-  if (metrics_) {
+  if (!metrics_) return;
+
+  if (PREDICT_TRUE(!metrics_->on_disk_size->IsInvisible())) {
     metrics_->on_disk_size->IncrementBy(
-        static_cast<int64_t>(new_stats.on_disk_size())
-        - static_cast<int64_t>(old_stats.on_disk_size()));
-    if (new_stats.has_live_row_count()) {
-      DCHECK(!metrics_->ContainsTabletNoLiveRowCount(tablet_id));
-
-      // The function "IncrementBy" makes the metric visible by validating the 
epoch.
-      // So, it's very important to skip calling it while the metric is 
invisible.
-      if (!metrics_->live_row_count->IsInvisible()) {
-        metrics_->live_row_count->IncrementBy(
-            static_cast<int64_t>(new_stats.live_row_count())
-            - static_cast<int64_t>(old_stats.live_row_count()));
+        static_cast<int64_t>(new_stats.on_disk_size()) -
+        static_cast<int64_t>(old_stats.on_disk_size()));
+  } else {
+    // When is the 'on disk size' invisible?
+    // 1. there is a tablet(legacy or not) under the old tserver version;
+    if (metrics_->ContainsTabletNoOnDiskSize(tablet_id)) {
+      metrics_->DeleteTabletNoOnDiskSize(tablet_id);
+      // The tserver version has been updated since the 'on_disk_size' of the
+      // tablet was not supported before but now it is supported, so we need
+      // to check that the metric could be visible.
+      if (metrics_->TableSupportsOnDiskSize()) {
+        DCHECK(new_stats.has_on_disk_size());
+        uint64_t on_disk_size = new_stats.on_disk_size();
+        {
+          std::lock_guard<rw_spinlock> l(lock_);
+          for (const auto& e : tablet_map_) {
+            if (e.first != tablet_id) {
+              on_disk_size += e.second->GetStats().on_disk_size();
+            }
+          }
+        }
+        // Set the metric value and it will be visible again.
+        metrics_->on_disk_size->set_value(static_cast<int64_t>(on_disk_size));
       }
-    } else if (!old_stats.has_on_disk_size()) {
-      // For an existing tablet, either it supports 'live_row_count' or not. 
Obviously,
-      // it doesn't support this feature in this branch. So we gather the 
tablet uuid to
-      // make sure later that the table doesn't support this either. But the 
new_stats
-      // will keep coming, so it's necessary to limit the operation to one 
time only.
-      // Thus, we use the 'on_disk_size' metric of the old_stats as a switch 
when it is
-      // transitioned from "uninitialized stats" to "has_on_disk_size()".
-      metrics_->AddTabletNoLiveRowCount(tablet_id);
+    }
+  }
 
-      // Make the metric invisible by invalidating the epoch.
+  if (PREDICT_TRUE(!metrics_->live_row_count->IsInvisible())) {
+    if (new_stats.has_live_row_count()) {
+      metrics_->live_row_count->IncrementBy(
+          static_cast<int64_t>(new_stats.live_row_count()) -
+          static_cast<int64_t>(old_stats.live_row_count()));
+    } else {
+      // The legacy tablet makes the metric invisible by invalidating the 
epoch.
+      metrics_->AddTabletNoLiveRowCount(tablet_id);
       metrics_->live_row_count->InvalidateEpoch();
     }
+  } else {
+    // When is the 'live row count' invisible?
+    // 1. there is a legacy tablet under the new tserver version;
+    // 2. there is a newly created tablet which has 'live_row_count',
+    //    but the tserver rolls back to the old version;
+    if (metrics_->ContainsTabletNoLiveRowCount(tablet_id) && 
new_stats.has_live_row_count()) {
+      // It is case 2 and the tserver version has been updated.
+      metrics_->DeleteTabletNoLiveRowCount(tablet_id);
+      if (metrics_->TableSupportsLiveRowCount()) {
+        uint64_t live_row_count = new_stats.live_row_count();
+        {
+          std::lock_guard<rw_spinlock> l(lock_);
+          for (const auto& e : tablet_map_) {
+            if (e.first != tablet_id) {
+              live_row_count += e.second->GetStats().live_row_count();
+            }
+          }
+        }
+        
metrics_->live_row_count->set_value(static_cast<int64_t>(live_row_count));
+      }
+    }
+  }
+}
+
+void TableInfo::InvalidateMetrics(const std::string& tablet_id) {
+  if (!metrics_) return;
+  if (!metrics_->ContainsTabletNoOnDiskSize(tablet_id)) {
+    metrics_->AddTabletNoOnDiskSize(tablet_id);
+    metrics_->on_disk_size->InvalidateEpoch();
+  }
+  if (!metrics_->ContainsTabletNoLiveRowCount(tablet_id)) {
+    metrics_->AddTabletNoLiveRowCount(tablet_id);
+    metrics_->live_row_count->InvalidateEpoch();
   }
 }
 
 void TableInfo::RemoveMetrics(const string& tablet_id,
                               const tablet::ReportedTabletStatsPB& old_stats) {
-  if (metrics_) {
+  DCHECK(lock_.is_locked());
+  if (!metrics_) return;
+
+  if (PREDICT_TRUE(!metrics_->on_disk_size->IsInvisible())) {
     
metrics_->on_disk_size->IncrementBy(-static_cast<int64_t>(old_stats.on_disk_size()));
-    if (old_stats.has_live_row_count()) {
-      DCHECK(!metrics_->ContainsTabletNoLiveRowCount(tablet_id));
-      
metrics_->live_row_count->IncrementBy(-static_cast<int64_t>(old_stats.live_row_count()));
-    } else {
-      metrics_->DeleteTabletNoLiveRowCount(tablet_id);
+  } else {
+    if (metrics_->ContainsTabletNoOnDiskSize(tablet_id)) {
+      metrics_->DeleteTabletNoOnDiskSize(tablet_id);
+      if (metrics_->TableSupportsOnDiskSize()) {
+        uint64_t on_disk_size = 0;
+        for (const auto& e : tablet_map_) {
+          on_disk_size += e.second->GetStats().on_disk_size();
+        }
+        metrics_->on_disk_size->set_value(static_cast<int64_t>(on_disk_size));
+      }
+    }
+  }
 
-      // Make the metric visible again after all of the legacy tablets are 
deleted.
+  if (PREDICT_TRUE(!metrics_->live_row_count->IsInvisible())) {
+    
metrics_->live_row_count->IncrementBy(-static_cast<int64_t>(old_stats.live_row_count()));
+  } else {
+    if (metrics_->ContainsTabletNoLiveRowCount(tablet_id)) {
+      metrics_->DeleteTabletNoLiveRowCount(tablet_id);
       if (metrics_->TableSupportsLiveRowCount()) {
         uint64_t live_row_count = 0;
         for (const auto& e : tablet_map_) {
           live_row_count += e.second->GetStats().live_row_count();
         }
-        
metrics_->live_row_count->IncrementBy(static_cast<int64_t>(live_row_count));
+        
metrics_->live_row_count->set_value(static_cast<int64_t>(live_row_count));
       }
     }
   }
diff --git a/src/kudu/master/catalog_manager.h 
b/src/kudu/master/catalog_manager.h
index 7c5ed7a..0d9d9b3 100644
--- a/src/kudu/master/catalog_manager.h
+++ b/src/kudu/master/catalog_manager.h
@@ -332,6 +332,9 @@ class TableInfo : public RefCountedThreadSafe<TableInfo> {
                      const tablet::ReportedTabletStatsPB& old_stats,
                      const tablet::ReportedTabletStatsPB& new_stats);
 
+  // Invalidate stats belonging to 'tablet_id' in the table's metrics.
+  void InvalidateMetrics(const std::string& tablet_id);
+
   // Remove stats belonging to 'tablet_id' from table metrics.
   void RemoveMetrics(const std::string& tablet_id,
                      const tablet::ReportedTabletStatsPB& old_stats);
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index dfb13cd..18bd30d 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -63,6 +63,7 @@
 #include "kudu/master/master_options.h"
 #include "kudu/master/mini_master.h"
 #include "kudu/master/sys_catalog.h"
+#include "kudu/master/table_metrics.h"
 #include "kudu/master/ts_descriptor.h"
 #include "kudu/master/ts_manager.h"
 #include "kudu/rpc/messenger.h"
@@ -78,6 +79,7 @@
 #include "kudu/util/curl_util.h"
 #include "kudu/util/env.h"
 #include "kudu/util/faststring.h"
+#include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/net/sockaddr.h"
@@ -1899,31 +1901,6 @@ TEST_F(MasterTest, TestDuplicateRequest) {
   ASSERT_EQ(task_list.size(), 2);
 }
 
-TEST_F(MasterTest, TestGetTableStatistics) {
-  const char *kTableName = "testtable";
-  const Schema kTableSchema({ ColumnSchema("key", INT32) }, 1);
-  ASSERT_OK(CreateTable(kTableName, kTableSchema));
-
-  // Get table statistics with right name.
-  GetTableStatisticsRequestPB req;
-  GetTableStatisticsResponsePB resp;
-  RpcController controller;
-  req.mutable_table()->set_table_name(kTableName);
-  ASSERT_OK(proxy_->GetTableStatistics(req, &resp, &controller));
-  ASSERT_FALSE(resp.has_error()) << resp.error().DebugString();
-  ASSERT_EQ(0, resp.on_disk_size());
-  ASSERT_EQ(0, resp.live_row_count());
-
-  FLAGS_mock_table_metrics_for_testing = true;
-  FLAGS_on_disk_size_for_testing = 1024;
-  FLAGS_live_row_count_for_testing = 100;
-  controller.Reset();
-  ASSERT_OK(proxy_->GetTableStatistics(req, &resp, &controller));
-  ASSERT_FALSE(resp.has_error()) << resp.error().DebugString();
-  ASSERT_EQ(FLAGS_on_disk_size_for_testing, resp.on_disk_size());
-  ASSERT_EQ(FLAGS_live_row_count_for_testing, resp.live_row_count());
-}
-
 TEST_F(MasterTest, TestHideLiveRowCountInTableMetrics) {
   const char* kTableName = "testtable";
   const Schema kTableSchema({ ColumnSchema("key", INT32) }, 1);
@@ -1973,7 +1950,6 @@ TEST_F(MasterTest, TestHideLiveRowCountInTableMetrics) {
 
   // Trigger to cause 'live_row_count' visible.
   {
-    tables[0]->RemoveMetrics(tablets.back()->id(), 
tablet::ReportedTabletStatsPB());
     for (int i = 0; i < 100; ++i) {
       for (int j = 0; j < tablets.size(); ++j) {
         NO_FATALS(call_update_metrics(tablets[j], true));
@@ -1992,6 +1968,40 @@ TEST_F(MasterTest, TestHideLiveRowCountInTableMetrics) {
   }
 }
 
+TEST_F(MasterTest, TestTableStatisticsWithOldVersion) {
+  const char* kTableName = "testtable";
+  const Schema kTableSchema({ ColumnSchema("key", INT32) }, 1);
+  ASSERT_OK(CreateTable(kTableName, kTableSchema));
+
+  vector<scoped_refptr<TableInfo>> tables;
+  {
+    CatalogManager::ScopedLeaderSharedLock l(master_->catalog_manager());
+    ASSERT_OK(master_->catalog_manager()->GetAllTables(&tables));
+    ASSERT_EQ(1, tables.size());
+  }
+  const auto& table = tables[0];
+  vector<scoped_refptr<TabletInfo>> tablets;
+  table->GetAllTablets(&tablets);
+  ASSERT_FALSE(tablets.empty());
+
+  {
+    table->InvalidateMetrics(tablets.back()->id());
+    ASSERT_FALSE(table->GetMetrics()->TableSupportsOnDiskSize());
+    ASSERT_FALSE(table->GetMetrics()->TableSupportsLiveRowCount());
+  }
+  {
+    tablet::ReportedTabletStatsPB old_stats;
+    tablet::ReportedTabletStatsPB new_stats;
+    new_stats.set_on_disk_size(1024);
+    new_stats.set_live_row_count(1000);
+    table->UpdateMetrics(tablets.back()->id(), old_stats, new_stats);
+    ASSERT_TRUE(table->GetMetrics()->TableSupportsOnDiskSize());
+    ASSERT_TRUE(table->GetMetrics()->TableSupportsLiveRowCount());
+    ASSERT_EQ(1024, table->GetMetrics()->on_disk_size->value());
+    ASSERT_EQ(1000, table->GetMetrics()->live_row_count->value());
+  }
+}
+
 class AuthzTokenMasterTest : public MasterTest,
                              public ::testing::WithParamInterface<bool> {};
 
diff --git a/src/kudu/master/master_path_handlers.cc 
b/src/kudu/master/master_path_handlers.cc
index 790fe34..385aa0a 100644
--- a/src/kudu/master/master_path_handlers.cc
+++ b/src/kudu/master/master_path_handlers.cc
@@ -498,10 +498,14 @@ void MasterPathHandlers::HandleTablePage(const 
Webserver::WebRequest& req,
 
   const TableMetrics* table_metrics = table->GetMetrics();
   if (table_metrics) {
-    // If the table doesn't support live row counts, the value will be 
negative.
-    // But the value of disk size will never be negative.
-    (*output)["table_disk_size"] =
-        HumanReadableNumBytes::ToString(table_metrics->on_disk_size->value());
+    // If the table doesn't support 'on disk size' and 'live row count',
+    // we need to show their values as "N/A".
+    if (table_metrics->TableSupportsOnDiskSize()) {
+      (*output)["table_disk_size"] =
+          
HumanReadableNumBytes::ToString(table_metrics->on_disk_size->value());
+    } else {
+      (*output)["table_disk_size"] = "N/A";
+    }
     if (table_metrics->TableSupportsLiveRowCount()) {
       (*output)["table_live_row_count"] = 
table_metrics->live_row_count->value();
     } else {
diff --git a/src/kudu/master/table_metrics.cc b/src/kudu/master/table_metrics.cc
index d67760a..8b85ca0 100644
--- a/src/kudu/master/table_metrics.cc
+++ b/src/kudu/master/table_metrics.cc
@@ -47,6 +47,26 @@ TableMetrics::TableMetrics(const 
scoped_refptr<MetricEntity>& entity)
 #undef GINIT
 #undef HIDEINIT
 
+void TableMetrics::AddTabletNoOnDiskSize(const std::string& tablet_id) {
+  std::lock_guard<simple_spinlock> l(lock_);
+  InsertIfNotPresent(&tablet_ids_no_on_disk_size_, tablet_id);
+}
+
+void TableMetrics::DeleteTabletNoOnDiskSize(const std::string& tablet_id) {
+  std::lock_guard<simple_spinlock> l(lock_);
+  tablet_ids_no_on_disk_size_.erase(tablet_id);
+}
+
+bool TableMetrics::ContainsTabletNoOnDiskSize(const std::string& tablet_id) 
const {
+  std::lock_guard<simple_spinlock> l(lock_);
+  return ContainsKey(tablet_ids_no_on_disk_size_, tablet_id);
+}
+
+bool TableMetrics::TableSupportsOnDiskSize() const {
+  std::lock_guard<simple_spinlock> l(lock_);
+  return tablet_ids_no_on_disk_size_.empty();
+}
+
 void TableMetrics::AddTabletNoLiveRowCount(const std::string& tablet_id) {
   std::lock_guard<simple_spinlock> l(lock_);
   InsertIfNotPresent(&tablet_ids_no_live_row_count_, tablet_id);
diff --git a/src/kudu/master/table_metrics.h b/src/kudu/master/table_metrics.h
index 0b18dbb..1cf731c 100644
--- a/src/kudu/master/table_metrics.h
+++ b/src/kudu/master/table_metrics.h
@@ -17,6 +17,8 @@
 #ifndef KUDU_MASTER_TABLE_METRICS_H
 #define KUDU_MASTER_TABLE_METRICS_H
 
+#include <cstddef>
+
 #include <cstdint>
 #include <string>
 #include <unordered_set>
@@ -49,6 +51,11 @@ struct TableMetrics {
   scoped_refptr<AtomicGauge<uint64_t>> live_row_count;
   scoped_refptr<AtomicGauge<size_t>> merged_entities_count_of_table;
 
+  void AddTabletNoOnDiskSize(const std::string& tablet_id);
+  void DeleteTabletNoOnDiskSize(const std::string& tablet_id);
+  bool ContainsTabletNoOnDiskSize(const std::string& tablet_id) const;
+  bool TableSupportsOnDiskSize() const;
+
   void AddTabletNoLiveRowCount(const std::string& tablet_id);
   void DeleteTabletNoLiveRowCount(const std::string& tablet_id);
   bool ContainsTabletNoLiveRowCount(const std::string& tablet_id) const;
@@ -56,7 +63,9 @@ struct TableMetrics {
 
  private:
   mutable simple_spinlock lock_;
-  // IDs of tablets which do not support reporting live row count.
+  // Identifiers of tablets that do not support reporting on disk size.
+  std::unordered_set<std::string> tablet_ids_no_on_disk_size_;
+  // Identifiers of tablets that do not support reporting live row count.
   std::unordered_set<std::string> tablet_ids_no_live_row_count_;
 };
 
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index d2b1190..a88c602 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -5727,9 +5727,11 @@ TEST_F(ToolTest, TestFailedTableCopy) {
   ASSERT_STR_CONTAINS(stderr, "Timed out");
 }
 
-TEST_F(ToolTest, TestGetTableStatisticsLiveRowCountNotSupported) {
+TEST_F(ToolTest, TestGetTableStatisticsNotSupported) {
   ExternalMiniClusterOptions opts;
   opts.extra_master_flags = { "--mock_table_metrics_for_testing=true",
+                              "--on_disk_size_for_testing=77",
+                              "--catalog_manager_support_on_disk_size=false",
                               "--live_row_count_for_testing=99",
                               "--catalog_manager_support_live_row_count=false" 
};
   NO_FATALS(StartExternalMiniCluster(std::move(opts)));
@@ -5745,6 +5747,7 @@ TEST_F(ToolTest, 
TestGetTableStatisticsLiveRowCountNotSupported) {
                  cluster_->master()->bound_rpc_addr().ToString(),
                  TestWorkload::kDefaultTableName),
       &stdout));
+  ASSERT_STR_CONTAINS(stdout, "on disk size: N/A");
   ASSERT_STR_CONTAINS(stdout, "live row count: N/A");
 }
 
diff --git a/src/kudu/tserver/ts_tablet_manager-test.cc 
b/src/kudu/tserver/ts_tablet_manager-test.cc
index 48ea188..a346089 100644
--- a/src/kudu/tserver/ts_tablet_manager-test.cc
+++ b/src/kudu/tserver/ts_tablet_manager-test.cc
@@ -334,12 +334,16 @@ TEST_F(TsTabletManagerTest, TestTabletStatsReports) {
   ASSERT_OK(CreateNewTablet("tablet-1", schema_, boost::none, boost::none, 
&replica1));
   ASSERT_OK(CreateNewTablet("tablet-2", schema_, boost::none, boost::none, 
nullptr));
 
-  // 2. Do a full report - should include these two tablets but statistics are 
all zero.
+  // 2. Do a full report - should include two tablets and statistics are 
uninitialized.
   NO_FATALS(GenerateFullTabletReport(&report));
   ASSERT_FALSE(report.is_incremental());
   ASSERT_EQ(2, report.updated_tablets().size());
-  ASSERT_FALSE(report.updated_tablets(0).has_stats());
-  ASSERT_FALSE(report.updated_tablets(1).has_stats());
+  ASSERT_TRUE(report.updated_tablets(0).has_stats());
+  ASSERT_TRUE(report.updated_tablets(1).has_stats());
+  ASSERT_FALSE(report.updated_tablets(0).stats().has_on_disk_size());
+  ASSERT_FALSE(report.updated_tablets(0).stats().has_live_row_count());
+  ASSERT_FALSE(report.updated_tablets(1).stats().has_on_disk_size());
+  ASSERT_FALSE(report.updated_tablets(1).stats().has_live_row_count());
   ASSERT_MONOTONIC_REPORT_SEQNO(&seqno, report);
   MarkTabletReportAcknowledged(report);
 
@@ -347,7 +351,7 @@ TEST_F(TsTabletManagerTest, TestTabletStatsReports) {
   tablet_manager_->SetNextUpdateTimeForTests();
   heartbeater_->TriggerASAP();
 
-  // Do an incremental report - should include these two tablets and tablet 
statistics.
+  // Do an incremental report - should include two tablets and statistics have 
been initialized.
   ASSERT_EVENTUALLY([&] () {
     NO_FATALS(GenerateIncrementalTabletReport(&report));
     ASSERT_TRUE(report.is_incremental());
@@ -379,13 +383,14 @@ TEST_F(TsTabletManagerTest, TestTabletStatsReports) {
   tablet_manager_->SetNextUpdateTimeForTests();
   heartbeater_->TriggerASAP();
 
-  // Do an incremental report - should include the tablet and tablet 
statistics.
+  // Do an incremental report - should include the tablet and check the 
statistics.
   ASSERT_EVENTUALLY([&] () {
     NO_FATALS(GenerateIncrementalTabletReport(&report));
     ASSERT_TRUE(report.is_incremental());
     ASSERT_EQ(1, report.updated_tablets().size());
   });
   ASSERT_MONOTONIC_REPORT_SEQNO(&seqno, report);
+  ASSERT_TRUE(report.updated_tablets(0).has_stats());
   ASSERT_GT(report.updated_tablets(0).stats().on_disk_size(), 0);
   ASSERT_EQ(kCount, report.updated_tablets(0).stats().live_row_count());
   ASSERT_REPORT_HAS_UPDATED_TABLET(report, "tablet-1");
diff --git a/src/kudu/tserver/ts_tablet_manager.cc 
b/src/kudu/tserver/ts_tablet_manager.cc
index ee578ba..b614d7e 100644
--- a/src/kudu/tserver/ts_tablet_manager.cc
+++ b/src/kudu/tserver/ts_tablet_manager.cc
@@ -252,8 +252,9 @@ TSTabletManager::TSTabletManager(TabletServer* server)
     metric_registry_(server->metric_registry()),
     tablet_copy_metrics_(server->metric_entity()),
     state_(MANAGER_INITIALIZING) {
-  next_update_time_ = MonoTime::Now() +
-      MonoDelta::FromMilliseconds(FLAGS_update_tablet_stats_interval_ms);
+  // A heartbeat msg without statistics will be considered to be from an old
+  // version, thus it's necessary to trigger updating stats as soon as 
possible.
+  next_update_time_ = MonoTime::Now();
 
   METRIC_tablets_num_not_initialized.InstantiateFunctionGauge(
           server->metric_entity(),
@@ -1350,9 +1351,7 @@ void TSTabletManager::CreateReportedTabletPB(const 
scoped_refptr<TabletReplica>&
     // If we're the leader, report stats.
     if (cstate.leader_uuid() == fs_manager_->uuid()) {
       ReportedTabletStatsPB stats_pb = replica->GetTabletStats();
-      if (stats_pb.has_on_disk_size() || stats_pb.has_live_row_count()) {
-        *reported_tablet->mutable_stats() = std::move(stats_pb);
-      }
+      *reported_tablet->mutable_stats() = std::move(stats_pb);
     }
     *reported_tablet->mutable_consensus_state() = std::move(cstate);
   }
@@ -1628,7 +1627,9 @@ void TSTabletManager::UpdateTabletStatsIfNecessary() {
     replica->UpdateTabletStats(&dirty_tablets);
   }
 
-  MarkTabletsDirty(dirty_tablets, "The tablet statistics have been changed");
+  if (!dirty_tablets.empty()) {
+    MarkTabletsDirty(dirty_tablets, "The tablet statistics have been changed");
+  }
 }
 
 void TSTabletManager::SetNextUpdateTimeForTests() {

Reply via email to