This is an automated email from the ASF dual-hosted git repository.
laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new c2d25d5f5 [metrics] Add tablet level metrics for alter schema op time
c2d25d5f5 is described below
commit c2d25d5f57d102d9ad9869eaf208d035a22e18aa
Author: kedeng <[email protected]>
AuthorDate: Mon Apr 15 17:24:31 2024 +0800
[metrics] Add tablet level metrics for alter schema op time
Add tablet-level statistics to track the time consumption of
alter schema operations.
The addition of monitoring items will aid in historical issue
tracking and analysis, as well as facilitate the configuration
of monitoring alarms.
Change-Id: I034fc3141349a940ee8aaac22ca90e1948fc7a6a
Reviewed-on: http://gerrit.cloudera.org:8080/21300
Reviewed-by: Yingchun Lai <[email protected]>
Tested-by: Yingchun Lai <[email protected]>
---
src/kudu/tablet/ops/alter_schema_op.cc | 17 +++++++++++++++++
src/kudu/tablet/ops/alter_schema_op.h | 2 +-
src/kudu/tablet/tablet_metrics.cc | 8 ++++++++
src/kudu/tablet/tablet_metrics.h | 1 +
src/kudu/tablet/tablet_replica-test.cc | 22 ++++++++++++++++++++++
5 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/src/kudu/tablet/ops/alter_schema_op.cc
b/src/kudu/tablet/ops/alter_schema_op.cc
index 5e386d38e..cec6585a9 100644
--- a/src/kudu/tablet/ops/alter_schema_op.cc
+++ b/src/kudu/tablet/ops/alter_schema_op.cc
@@ -19,6 +19,7 @@
#include <memory>
#include <ostream>
+#include <type_traits>
#include <utility>
#include <glog/logging.h>
@@ -32,12 +33,17 @@
#include "kudu/consensus/log.h"
#include "kudu/consensus/raft_consensus.h"
#include "kudu/gutil/port.h"
+#include "kudu/gutil/ref_counted.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/tablet/tablet.h"
#include "kudu/tablet/tablet.pb.h"
+#include "kudu/tablet/tablet_metrics.h"
#include "kudu/tablet/tablet_replica.h"
#include "kudu/tserver/tserver.pb.h"
+#include "kudu/util/metrics.h"
+#include "kudu/util/monotime.h"
#include "kudu/util/pb_util.h"
+#include "kudu/util/scoped_cleanup.h"
#include "kudu/util/trace.h"
namespace kudu {
@@ -116,6 +122,7 @@ Status AlterSchemaOp::Start() {
DCHECK(!state_->has_timestamp());
DCHECK(state_->consensus_round()->replicate_msg()->has_timestamp());
state_->set_timestamp(Timestamp(state_->consensus_round()->replicate_msg()->timestamp()));
+ state_->set_start_time(MonoTime::Now());
TRACE("START. Timestamp: $0",
clock::HybridClock::GetPhysicalValueMicros(state_->timestamp()));
return Status::OK();
}
@@ -148,6 +155,16 @@ Status AlterSchemaOp::Apply(CommitMsg** commit_msg) {
}
void AlterSchemaOp::Finish(OpResult result) {
+ // Record the duration of the operation.
+ auto update_metrics = MakeScopedCleanup([&]() {
+ if (auto* metrics = state_->tablet_replica()->tablet()->metrics();
+ PREDICT_TRUE(metrics != nullptr &&
state_->start_time().Initialized())) {
+ uint64_t op_duration_usec =
+ (MonoTime::Now() - state_->start_time()).ToMicroseconds();
+ metrics->alter_schema_duration->Increment(op_duration_usec);
+ }
+ });
+
if (PREDICT_FALSE(result == Op::ABORTED)) {
TRACE("AlterSchemaCommitCallback: op aborted");
state()->Finish();
diff --git a/src/kudu/tablet/ops/alter_schema_op.h
b/src/kudu/tablet/ops/alter_schema_op.h
index 74f9344de..ed2e7aa1f 100644
--- a/src/kudu/tablet/ops/alter_schema_op.h
+++ b/src/kudu/tablet/ops/alter_schema_op.h
@@ -27,6 +27,7 @@
#include "kudu/consensus/consensus.pb.h"
#include "kudu/gutil/macros.h"
#include "kudu/tablet/ops/op.h"
+#include "kudu/tablet/tablet.pb.h"
#include "kudu/tserver/tserver_admin.pb.h"
#include "kudu/util/status.h"
@@ -36,7 +37,6 @@ class rw_semaphore;
namespace tablet {
-class OperationResultPB;
class TabletReplica;
// Op Context for the AlterSchema operation.
diff --git a/src/kudu/tablet/tablet_metrics.cc
b/src/kudu/tablet/tablet_metrics.cc
index 7e53dde34..5a20b2948 100644
--- a/src/kudu/tablet/tablet_metrics.cc
+++ b/src/kudu/tablet/tablet_metrics.cc
@@ -224,6 +224,13 @@ METRIC_DEFINE_histogram(tablet, delta_file_lookups_per_op,
"Delta File Lookups p
kudu::MetricLevel::kDebug,
20, 2);
+METRIC_DEFINE_histogram(tablet, alter_schema_duration,
+ "Alter Schema Op Duration",
+ kudu::MetricUnit::kMicroseconds,
+ "Duration of alter schema ops to this tablet.",
+ kudu::MetricLevel::kDebug,
+ 60000000LU, 2);
+
METRIC_DEFINE_histogram(tablet,
write_op_duration_client_propagated_consistency,
"Write Op Duration with Propagated Consistency",
kudu::MetricUnit::kMicroseconds,
@@ -441,6 +448,7 @@ TabletMetrics::TabletMetrics(const
scoped_refptr<MetricEntity>& entity)
MINIT(snapshot_read_inflight_wait_duration),
MINIT(write_op_duration_client_propagated_consistency),
MINIT(write_op_duration_commit_wait_consistency),
+ MINIT(alter_schema_duration),
GINIT(flush_dms_running),
GINIT(flush_mrs_running),
GINIT(compact_rs_running),
diff --git a/src/kudu/tablet/tablet_metrics.h b/src/kudu/tablet/tablet_metrics.h
index d8d030bae..9c6b815ab 100644
--- a/src/kudu/tablet/tablet_metrics.h
+++ b/src/kudu/tablet/tablet_metrics.h
@@ -86,6 +86,7 @@ struct TabletMetrics {
scoped_refptr<Histogram> snapshot_read_inflight_wait_duration;
scoped_refptr<Histogram> write_op_duration_client_propagated_consistency;
scoped_refptr<Histogram> write_op_duration_commit_wait_consistency;
+ scoped_refptr<Histogram> alter_schema_duration;
scoped_refptr<AtomicGauge<uint32_t> > flush_dms_running;
scoped_refptr<AtomicGauge<uint32_t> > flush_mrs_running;
diff --git a/src/kudu/tablet/tablet_replica-test.cc
b/src/kudu/tablet/tablet_replica-test.cc
index 52092fa8b..c9885a645 100644
--- a/src/kudu/tablet/tablet_replica-test.cc
+++ b/src/kudu/tablet/tablet_replica-test.cc
@@ -56,6 +56,7 @@
#include "kudu/tablet/ops/write_op.h"
#include "kudu/tablet/tablet.h"
#include "kudu/tablet/tablet_metadata.h"
+#include "kudu/tablet/tablet_metrics.h"
#include "kudu/tablet/tablet_replica-test-base.h"
#include "kudu/tablet/tablet_replica_mm_ops.h"
#include "kudu/tserver/tserver.pb.h"
@@ -79,6 +80,7 @@ DECLARE_int32(tablet_history_max_age_sec);
METRIC_DECLARE_entity(tablet);
METRIC_DECLARE_gauge_uint64(live_row_count);
+METRIC_DECLARE_histogram(alter_schema_duration);
using kudu::consensus::CommitMsg;
using kudu::consensus::ConsensusBootstrapInfo;
@@ -259,6 +261,26 @@ class DelayedApplyOp : public WriteOp {
DISALLOW_COPY_AND_ASSIGN(DelayedApplyOp);
};
+TEST_F(TabletReplicaTest, TestAlterSchemaMetric) {
+ ConsensusBootstrapInfo info;
+ ASSERT_OK(StartReplicaAndWaitUntilLeader(info));
+ const int orig_schema_version = tablet()->metadata()->schema_version();
+
+ // Get the metric.
+ auto alter_schema_duration =
+ tablet_replica_->tablet()->metrics()->alter_schema_duration;
+ const auto before_cnt = alter_schema_duration->TotalCount();
+
+ // Add a new column.
+ SchemaBuilder builder(*tablet()->metadata()->schema());
+ ASSERT_OK(builder.AddColumn("new_col", INT32));
+ SchemaPB new_schema;
+ ASSERT_OK(SchemaToPB(builder.Build(), &new_schema));
+ ASSERT_OK(UpdateSchema(new_schema, orig_schema_version + 1));
+
+ ASSERT_EQ(before_cnt + 1, alter_schema_duration->TotalCount());
+}
+
// Ensure that Log::GC() doesn't delete logs when the MRS has an anchor.
TEST_F(TabletReplicaTest, TestMRSAnchorPreventsLogGC) {
ConsensusBootstrapInfo info;