This is an automated email from the ASF dual-hosted git repository. laiyingchun pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 8409d28a5ace591f635b029cb37ebd70f3b8eb10 Author: Yingchun Lai <[email protected]> AuthorDate: Mon Feb 21 18:08:30 2022 +0800 [metrics] Add table level metrics column_count and schema_version on master This patch adds some table level metrics on master. - column_count: The column count in the table's latest schema. Since too many column count will lead to bad performance and space amplification, we can add some monitor alerts based on this metric after this. - schema_version: The table's schema version. Too many or too frequently schema changes is also a metric worth to be monitored. Change-Id: Ia0671cfb0758d53e950a4e35a968dae15f82d18e Reviewed-on: http://gerrit.cloudera.org:8080/18258 Tested-by: Kudu Jenkins Reviewed-by: Abhishek Chennaka <[email protected]> Reviewed-by: Yifan Zhang <[email protected]> Reviewed-by: Yingchun Lai <[email protected]> --- src/kudu/master/catalog_manager.cc | 25 +++++++++++++++--- src/kudu/master/catalog_manager.h | 10 +++++--- src/kudu/master/master-test.cc | 52 ++++++++++++++++++++++++++++++++++++-- src/kudu/master/table_metrics.cc | 10 ++++++++ src/kudu/master/table_metrics.h | 2 ++ 5 files changed, 90 insertions(+), 9 deletions(-) diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc index 3f5333f..1b7aa22 100644 --- a/src/kudu/master/catalog_manager.cc +++ b/src/kudu/master/catalog_manager.cc @@ -590,6 +590,10 @@ class TableLoader : public TableVisitor { // It's unnecessary to register metrics for the deleted tables. table->RegisterMetrics(catalog_manager_->master_->metric_registry(), CatalogManager::NormalizeTableName(metadata.name())); + + // Update table's schema related metrics after being loaded. + table->UpdateSchemaMetrics(); + LOG(INFO) << Substitute("Loaded metadata for table $0", table->ToString()); } VLOG(2) << Substitute("Metadata for table $0: $1", @@ -2103,6 +2107,9 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req, } TRACE("Inserted table and tablets into CatalogManager maps"); + // Update table's schema related metrics after being created. + table->UpdateSchemaMetrics(); + resp->set_table_id(table->id()); VLOG(1) << "Created table " << table->ToString(); background_tasks_->Wake(); @@ -3383,6 +3390,9 @@ Status CatalogManager::AlterTable(const AlterTableRequestPB& req, table_locations_cache_->Remove(table->id()); } + // Update table's schema related metrics after being altered. + table->UpdateSchemaMetrics(); + background_tasks_->Wake(); return Status::OK(); } @@ -4981,7 +4991,7 @@ Status CatalogManager::ProcessTabletReport( if (report.has_stats()) { // For the versions >= 1.11.x, the tserver reports stats. But keep in // mind that 'live_row_count' is not supported for the legacy replicas. - tablet->table()->UpdateMetrics(tablet_id, tablet->GetStats(), report.stats()); + tablet->table()->UpdateStatsMetrics(tablet_id, tablet->GetStats(), report.stats()); tablet->UpdateStats(report.stats()); } else { // For the versions < 1.11.x, the tserver doesn't report stats. Thus, @@ -6739,9 +6749,9 @@ void TableInfo::UnregisterMetrics() { } } -void TableInfo::UpdateMetrics(const string& tablet_id, - const tablet::ReportedTabletStatsPB& old_stats, - const tablet::ReportedTabletStatsPB& new_stats) { +void TableInfo::UpdateStatsMetrics(const string& tablet_id, + const tablet::ReportedTabletStatsPB& old_stats, + const tablet::ReportedTabletStatsPB& new_stats) { if (!metrics_) { return; } @@ -6808,6 +6818,13 @@ void TableInfo::UpdateMetrics(const string& tablet_id, } } +void TableInfo::UpdateSchemaMetrics() { + TableMetadataLock l(this, LockMode::READ); + const SysTablesEntryPB& pb = metadata().state().pb; + metrics_->column_count->set_value(pb.schema().columns().size()); + metrics_->schema_version->set_value(pb.version()); +} + void TableInfo::InvalidateMetrics(const std::string& tablet_id) { if (!metrics_) return; if (!metrics_->ContainsTabletNoOnDiskSize(tablet_id)) { diff --git a/src/kudu/master/catalog_manager.h b/src/kudu/master/catalog_manager.h index 0379ec1..b23b5b7 100644 --- a/src/kudu/master/catalog_manager.h +++ b/src/kudu/master/catalog_manager.h @@ -360,9 +360,13 @@ class TableInfo : public RefCountedThreadSafe<TableInfo> { void UnregisterMetrics(); // Update stats belonging to 'tablet_id' in the table's metrics. - void UpdateMetrics(const std::string& tablet_id, - const tablet::ReportedTabletStatsPB& old_stats, - const tablet::ReportedTabletStatsPB& new_stats); + void UpdateStatsMetrics(const std::string& tablet_id, + const tablet::ReportedTabletStatsPB& old_stats, + const tablet::ReportedTabletStatsPB& new_stats); + + // Update table's schema related metrics, for exapmle, schema version, + // column count, and etc. + void UpdateSchemaMetrics(); // Invalidate stats belonging to 'tablet_id' in the table's metrics. void InvalidateMetrics(const std::string& tablet_id); diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc index ddf6adc..db34547 100644 --- a/src/kudu/master/master-test.cc +++ b/src/kudu/master/master-test.cc @@ -2491,7 +2491,7 @@ TEST_F(MasterTest, TestHideLiveRowCountInTableMetrics) { old_stats.set_live_row_count(1); new_stats.set_live_row_count(1); } - tables[0]->UpdateMetrics(tablet->id(), old_stats, new_stats); + tables[0]->UpdateStatsMetrics(tablet->id(), old_stats, new_stats); }; // Trigger to cause 'live_row_count' invisible. @@ -2560,7 +2560,7 @@ TEST_F(MasterTest, TestTableStatisticsWithOldVersion) { tablet::ReportedTabletStatsPB new_stats; new_stats.set_on_disk_size(1024); new_stats.set_live_row_count(1000); - table->UpdateMetrics(tablets.back()->id(), old_stats, new_stats); + table->UpdateStatsMetrics(tablets.back()->id(), old_stats, new_stats); ASSERT_TRUE(table->GetMetrics()->TableSupportsOnDiskSize()); ASSERT_TRUE(table->GetMetrics()->TableSupportsLiveRowCount()); ASSERT_EQ(1024, table->GetMetrics()->on_disk_size->value()); @@ -2627,6 +2627,54 @@ TEST_F(MasterTest, TestDeletedTablesAndTabletsCleanup) { }); } +TEST_F(MasterTest, TestTableSchemaMetrics) { + const char* kTableName = "testtable"; + const Schema kTableSchema({ ColumnSchema("key", INT32) }, 1); + ASSERT_OK(CreateTable(kTableName, kTableSchema)); + + vector<scoped_refptr<TableInfo>> tables; + { + CatalogManager::ScopedLeaderSharedLock l(master_->catalog_manager()); + master_->catalog_manager()->GetAllTables(&tables); + ASSERT_EQ(1, tables.size()); + } + const auto& table = tables[0]; + + ASSERT_EQ(1, table->GetMetrics()->column_count->value()); + ASSERT_EQ(0, table->GetMetrics()->schema_version->value()); + + { + AlterTableRequestPB req; + req.mutable_table()->set_table_name(kTableName); + AlterTableRequestPB::Step* step = req.add_alter_schema_steps(); + step->set_type(AlterTableRequestPB::ADD_COLUMN); + ColumnSchemaToPB(ColumnSchema("c1", INT32, true), + step->mutable_add_column()->mutable_schema()); + + AlterTableResponsePB resp; + RpcController controller; + ASSERT_OK(proxy_->AlterTable(req, &resp, &controller)); + ASSERT_FALSE(resp.has_error()); + ASSERT_EQ(2, table->GetMetrics()->column_count->value()); + ASSERT_EQ(1, table->GetMetrics()->schema_version->value()); + } + + { + AlterTableRequestPB req; + req.mutable_table()->set_table_name(kTableName); + AlterTableRequestPB::Step* step = req.add_alter_schema_steps(); + step->set_type(AlterTableRequestPB::DROP_COLUMN); + step->mutable_drop_column()->set_name("c1"); + + AlterTableResponsePB resp; + RpcController controller; + ASSERT_OK(proxy_->AlterTable(req, &resp, &controller)); + ASSERT_FALSE(resp.has_error()); + ASSERT_EQ(1, table->GetMetrics()->column_count->value()); + ASSERT_EQ(2, table->GetMetrics()->schema_version->value()); + } +} + class AuthzTokenMasterTest : public MasterTest, public ::testing::WithParamInterface<bool> {}; diff --git a/src/kudu/master/table_metrics.cc b/src/kudu/master/table_metrics.cc index 8b85ca0..5fdc6d3 100644 --- a/src/kudu/master/table_metrics.cc +++ b/src/kudu/master/table_metrics.cc @@ -36,12 +36,22 @@ METRIC_DEFINE_gauge_uint64(table, live_row_count, "Table Live Row count", "Pre-replication aggregated number of live rows in this table. " "Only accurate if all tablets in the table support live row counting.", kudu::MetricLevel::kInfo); +METRIC_DEFINE_gauge_uint32(table, column_count, "Table Column count", + kudu::MetricUnit::kUnits, + "The column count in the table's latest schema.", + kudu::MetricLevel::kInfo); +METRIC_DEFINE_gauge_uint32(table, schema_version, "Table Schema Version", + kudu::MetricUnit::kUnits, + "The table's schema version.", + kudu::MetricLevel::kInfo); #define GINIT(x) x(METRIC_##x.Instantiate(entity, 0)) #define HIDEINIT(x, v) x(METRIC_##x.InstantiateHidden(entity, v)) TableMetrics::TableMetrics(const scoped_refptr<MetricEntity>& entity) : GINIT(on_disk_size), GINIT(live_row_count), + GINIT(column_count), + GINIT(schema_version), HIDEINIT(merged_entities_count_of_table, 1) { } #undef GINIT diff --git a/src/kudu/master/table_metrics.h b/src/kudu/master/table_metrics.h index 1cf731c..2b851ba 100644 --- a/src/kudu/master/table_metrics.h +++ b/src/kudu/master/table_metrics.h @@ -49,6 +49,8 @@ struct TableMetrics { scoped_refptr<AtomicGauge<uint64_t>> on_disk_size; scoped_refptr<AtomicGauge<uint64_t>> live_row_count; + scoped_refptr<AtomicGauge<uint32_t>> column_count; + scoped_refptr<AtomicGauge<uint32_t>> schema_version; scoped_refptr<AtomicGauge<size_t>> merged_entities_count_of_table; void AddTabletNoOnDiskSize(const std::string& tablet_id);
