This is an automated email from the ASF dual-hosted git repository.

wangdan pushed a commit to branch migrate-metrics-dev
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git

commit 10d18e671d2ca6bdfe36942188cb78f4af673de8
Author: Dan Wang <[email protected]>
AuthorDate: Wed May 17 16:44:21 2023 +0800

    feat(new_metrics): migrate metrics for replica_stub (part 7) (#1475)
    
    https://github.com/apache/incubator-pegasus/issues/1454
    
    This is the 7th part of migrating metrics of replica_stub to new framework,
    all of which are partition-splitting-related. This is also the last part for
    replica_stub.
    
    During this migration, there are 6 metrics which are changed from 
server-level
    to replica-level, including the number of started splittings, the number and
    the size of files copied for splitting, the number of mutations copied for 
splitting,
    the number of failed/successful splittings.
    
    Another 4 metrics are still kept server-level, including the number of 
current
    splitting replicas, the max duration among all splitting replicas, the max 
duration
    among all splitting replicas for async learns, the max size of copied files 
among
    all splitting replicas.
---
 src/replica/replica_stub.cpp                | 93 ++++++++++-------------------
 src/replica/replica_stub.h                  | 18 ++----
 src/replica/split/replica_split_manager.cpp | 55 +++++++++++++----
 src/replica/split/replica_split_manager.h   |  8 +++
 src/replica/test/replica_test.cpp           |  1 -
 src/utils/metrics.h                         |  1 +
 6 files changed, 88 insertions(+), 88 deletions(-)

diff --git a/src/replica/replica_stub.cpp b/src/replica/replica_stub.cpp
index f3dbc298f..d5aa20778 100644
--- a/src/replica/replica_stub.cpp
+++ b/src/replica/replica_stub.cpp
@@ -62,7 +62,6 @@
 #include "mutation_log.h"
 #include "nfs/nfs_node.h"
 #include "nfs_types.h"
-#include "perf_counter/perf_counter.h"
 #include "replica.h"
 #include "replica/duplication/replica_follower.h"
 #include "replica/log_file.h"
@@ -205,6 +204,26 @@ METRIC_DEFINE_gauge_int64(server,
                           dsn::metric_unit::kMilliSeconds,
                           "The max duration of bulk loads");
 
+METRIC_DEFINE_gauge_int64(server,
+                          splitting_replicas,
+                          dsn::metric_unit::kReplicas,
+                          "The number of current splitting replicas");
+
+METRIC_DEFINE_gauge_int64(server,
+                          splitting_replicas_max_duration_ms,
+                          dsn::metric_unit::kMilliSeconds,
+                          "The max duration among all splitting replicas");
+
+METRIC_DEFINE_gauge_int64(server,
+                          splitting_replicas_async_learn_max_duration_ms,
+                          dsn::metric_unit::kMilliSeconds,
+                          "The max duration among all splitting replicas for 
async learns");
+
+METRIC_DEFINE_gauge_int64(server,
+                          splitting_replicas_max_copy_file_bytes,
+                          dsn::metric_unit::kBytes,
+                          "The max size of copied files among all splitting 
replicas");
+
 namespace dsn {
 namespace replication {
 DSN_DEFINE_bool(replication,
@@ -333,7 +352,11 @@ replica_stub::replica_stub(replica_state_subscriber 
subscriber /*= nullptr*/,
       METRIC_VAR_INIT_server(write_busy_requests),
       METRIC_VAR_INIT_server(bulk_load_running_count),
       METRIC_VAR_INIT_server(bulk_load_ingestion_max_duration_ms),
-      METRIC_VAR_INIT_server(bulk_load_max_duration_ms)
+      METRIC_VAR_INIT_server(bulk_load_max_duration_ms),
+      METRIC_VAR_INIT_server(splitting_replicas),
+      METRIC_VAR_INIT_server(splitting_replicas_max_duration_ms),
+      METRIC_VAR_INIT_server(splitting_replicas_async_learn_max_duration_ms),
+      METRIC_VAR_INIT_server(splitting_replicas_max_copy_file_bytes)
 {
 #ifdef DSN_ENABLE_GPERF
     _is_releasing_memory = false;
@@ -344,67 +367,10 @@ replica_stub::replica_stub(replica_state_subscriber 
subscriber /*= nullptr*/,
     _state = NS_Disconnected;
     _log = nullptr;
     _primary_address_str[0] = '\0';
-    install_perf_counters();
 }
 
 replica_stub::~replica_stub(void) { close(); }
 
-void replica_stub::install_perf_counters()
-{
-    // <- Partition split Metrics ->
-
-    _counter_replicas_splitting_count.init_app_counter("eon.replica_stub",
-                                                       
"replicas.splitting.count",
-                                                       COUNTER_TYPE_NUMBER,
-                                                       "current partition 
splitting count");
-
-    _counter_replicas_splitting_max_duration_time_ms.init_app_counter(
-        "eon.replica_stub",
-        "replicas.splitting.max.duration.time(ms)",
-        COUNTER_TYPE_NUMBER,
-        "current partition splitting max duration time(ms)");
-    _counter_replicas_splitting_max_async_learn_time_ms.init_app_counter(
-        "eon.replica_stub",
-        "replicas.splitting.max.async.learn.time(ms)",
-        COUNTER_TYPE_NUMBER,
-        "current partition splitting max async learn time(ms)");
-    _counter_replicas_splitting_max_copy_file_size.init_app_counter(
-        "eon.replica_stub",
-        "replicas.splitting.max.copy.file.size",
-        COUNTER_TYPE_NUMBER,
-        "current splitting max copy file size");
-    _counter_replicas_splitting_recent_start_count.init_app_counter(
-        "eon.replica_stub",
-        "replicas.splitting.recent.start.count",
-        COUNTER_TYPE_VOLATILE_NUMBER,
-        "current splitting start count in the recent period");
-    _counter_replicas_splitting_recent_copy_file_count.init_app_counter(
-        "eon.replica_stub",
-        "replicas.splitting.recent.copy.file.count",
-        COUNTER_TYPE_VOLATILE_NUMBER,
-        "splitting copy file count in the recent period");
-    _counter_replicas_splitting_recent_copy_file_size.init_app_counter(
-        "eon.replica_stub",
-        "replicas.splitting.recent.copy.file.size",
-        COUNTER_TYPE_VOLATILE_NUMBER,
-        "splitting copy file size in the recent period");
-    _counter_replicas_splitting_recent_copy_mutation_count.init_app_counter(
-        "eon.replica_stub",
-        "replicas.splitting.recent.copy.mutation.count",
-        COUNTER_TYPE_VOLATILE_NUMBER,
-        "splitting copy mutation count in the recent period");
-    _counter_replicas_splitting_recent_split_succ_count.init_app_counter(
-        "eon.replica_stub",
-        "replicas.splitting.recent.split.succ.count",
-        COUNTER_TYPE_VOLATILE_NUMBER,
-        "splitting succeed count in the recent period");
-    _counter_replicas_splitting_recent_split_fail_count.init_app_counter(
-        "eon.replica_stub",
-        "replicas.splitting.recent.split.fail.count",
-        COUNTER_TYPE_VOLATILE_NUMBER,
-        "splitting fail count in the recent period");
-}
-
 void replica_stub::initialize(bool clear /* = false*/)
 {
     replication_options opts;
@@ -1787,10 +1753,11 @@ void replica_stub::on_gc()
     METRIC_VAR_SET(bulk_load_running_count, bulk_load_running_count);
     METRIC_VAR_SET(bulk_load_ingestion_max_duration_ms, 
bulk_load_max_ingestion_time_ms);
     METRIC_VAR_SET(bulk_load_max_duration_ms, bulk_load_max_duration_time_ms);
-    _counter_replicas_splitting_count->set(splitting_count);
-    
_counter_replicas_splitting_max_duration_time_ms->set(splitting_max_duration_time_ms);
-    
_counter_replicas_splitting_max_async_learn_time_ms->set(splitting_max_async_learn_time_ms);
-    
_counter_replicas_splitting_max_copy_file_size->set(splitting_max_copy_file_size);
+    METRIC_VAR_SET(splitting_replicas, splitting_count);
+    METRIC_VAR_SET(splitting_replicas_max_duration_ms, 
splitting_max_duration_time_ms);
+    METRIC_VAR_SET(splitting_replicas_async_learn_max_duration_ms,
+                   splitting_max_async_learn_time_ms);
+    METRIC_VAR_SET(splitting_replicas_max_copy_file_bytes, 
splitting_max_copy_file_size);
 
     LOG_INFO("finish to garbage collection, time_used_ns = {}", dsn_now_ns() - 
start);
 }
diff --git a/src/replica/replica_stub.h b/src/replica/replica_stub.h
index 4560c4375..96de7802b 100644
--- a/src/replica/replica_stub.h
+++ b/src/replica/replica_stub.h
@@ -56,7 +56,6 @@
 #include "failure_detector/failure_detector_multimaster.h"
 #include "metadata_types.h"
 #include "partition_split_types.h"
-#include "perf_counter/perf_counter_wrapper.h"
 #include "replica.h"
 #include "replica/mutation_log.h"
 #include "replica_admin_types.h"
@@ -350,7 +349,6 @@ private:
     void trigger_checkpoint(replica_ptr r, bool is_emergency);
     void handle_log_failure(error_code err);
 
-    void install_perf_counters();
     dsn::error_code on_kill_replica(gpid id);
 
     void get_replica_info(/*out*/ replica_info &info, /*in*/ replica_ptr r);
@@ -494,7 +492,6 @@ private:
     std::atomic_bool _is_releasing_memory{false};
 #endif
 
-    // performance counters
     METRIC_VAR_DECLARE_gauge_int64(total_replicas);
     METRIC_VAR_DECLARE_gauge_int64(opening_replicas);
     METRIC_VAR_DECLARE_gauge_int64(closing_replicas);
@@ -524,17 +521,10 @@ private:
     METRIC_VAR_DECLARE_gauge_int64(bulk_load_ingestion_max_duration_ms);
     METRIC_VAR_DECLARE_gauge_int64(bulk_load_max_duration_ms);
 
-    // <- Partition split Metrics ->
-    perf_counter_wrapper _counter_replicas_splitting_count;
-    perf_counter_wrapper _counter_replicas_splitting_max_duration_time_ms;
-    perf_counter_wrapper _counter_replicas_splitting_max_async_learn_time_ms;
-    perf_counter_wrapper _counter_replicas_splitting_max_copy_file_size;
-    perf_counter_wrapper _counter_replicas_splitting_recent_start_count;
-    perf_counter_wrapper _counter_replicas_splitting_recent_copy_file_count;
-    perf_counter_wrapper _counter_replicas_splitting_recent_copy_file_size;
-    perf_counter_wrapper 
_counter_replicas_splitting_recent_copy_mutation_count;
-    perf_counter_wrapper _counter_replicas_splitting_recent_split_fail_count;
-    perf_counter_wrapper _counter_replicas_splitting_recent_split_succ_count;
+    METRIC_VAR_DECLARE_gauge_int64(splitting_replicas);
+    METRIC_VAR_DECLARE_gauge_int64(splitting_replicas_max_duration_ms);
+    
METRIC_VAR_DECLARE_gauge_int64(splitting_replicas_async_learn_max_duration_ms);
+    METRIC_VAR_DECLARE_gauge_int64(splitting_replicas_max_copy_file_bytes);
 
     dsn::task_tracker _tracker;
 };
diff --git a/src/replica/split/replica_split_manager.cpp 
b/src/replica/split/replica_split_manager.cpp
index 0c63a5ebf..49cd9449f 100644
--- a/src/replica/split/replica_split_manager.cpp
+++ b/src/replica/split/replica_split_manager.cpp
@@ -29,8 +29,6 @@
 #include "dsn.layer2_types.h"
 #include "failure_detector/failure_detector_multimaster.h"
 #include "partition_split_types.h"
-#include "perf_counter/perf_counter.h"
-#include "perf_counter/perf_counter_wrapper.h"
 #include "replica/mutation_log.h"
 #include "replica/prepare_list.h"
 #include "replica/replica_context.h"
@@ -51,6 +49,36 @@
 #include "utils/string_view.h"
 #include "utils/thread_access_checker.h"
 
+METRIC_DEFINE_counter(replica,
+                      splitting_started_count,
+                      dsn::metric_unit::kPartitionSplittings,
+                      "The number of started splittings");
+
+METRIC_DEFINE_counter(replica,
+                      splitting_copy_file_count,
+                      dsn::metric_unit::kFiles,
+                      "The number of files copied for splitting");
+
+METRIC_DEFINE_counter(replica,
+                      splitting_copy_file_bytes,
+                      dsn::metric_unit::kBytes,
+                      "The size of files copied for splitting");
+
+METRIC_DEFINE_counter(replica,
+                      splitting_copy_mutation_count,
+                      dsn::metric_unit::kMutations,
+                      "The number of mutations copied for splitting");
+
+METRIC_DEFINE_counter(replica,
+                      splitting_failed_count,
+                      dsn::metric_unit::kPartitionSplittings,
+                      "The number of failed splittings");
+
+METRIC_DEFINE_counter(replica,
+                      splitting_successful_count,
+                      dsn::metric_unit::kPartitionSplittings,
+                      "The number of successful splittings");
+
 namespace dsn {
 namespace replication {
 
@@ -58,7 +86,15 @@ DSN_DECLARE_bool(empty_write_disabled);
 DSN_DECLARE_int32(max_mutation_count_in_prepare_list);
 
 replica_split_manager::replica_split_manager(replica *r)
-    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+    : replica_base(r),
+      _replica(r),
+      _stub(r->get_replica_stub()),
+      METRIC_VAR_INIT_replica(splitting_started_count),
+      METRIC_VAR_INIT_replica(splitting_copy_file_count),
+      METRIC_VAR_INIT_replica(splitting_copy_file_bytes),
+      METRIC_VAR_INIT_replica(splitting_copy_mutation_count),
+      METRIC_VAR_INIT_replica(splitting_failed_count),
+      METRIC_VAR_INIT_replica(splitting_successful_count)
 {
     _partition_version.store(_replica->_app_info.partition_count - 1);
 }
@@ -159,7 +195,7 @@ void replica_split_manager::child_init_replica(gpid 
parent_gpid,
                          get_gpid().thread_hash(),
                          std::chrono::seconds(3));
     _replica->_split_states.splitting_start_ts_ns = dsn_now_ns();
-    _stub->_counter_replicas_splitting_recent_start_count->increment();
+    METRIC_VAR_INCREMENT(splitting_started_count);
 
     LOG_INFO_PREFIX(
         "child initialize succeed, init_ballot={}, parent_gpid={}", 
init_ballot, parent_gpid);
@@ -469,8 +505,8 @@ 
replica_split_manager::child_apply_private_logs(std::vector<std::string> plog_fi
 
     _replica->_split_states.splitting_copy_file_count += plog_files.size();
     _replica->_split_states.splitting_copy_file_size += total_file_size;
-    
_stub->_counter_replicas_splitting_recent_copy_file_count->add(plog_files.size());
-    
_stub->_counter_replicas_splitting_recent_copy_file_size->add(total_file_size);
+    METRIC_VAR_INCREMENT_BY(splitting_copy_file_count, plog_files.size());
+    METRIC_VAR_INCREMENT_BY(splitting_copy_file_bytes, total_file_size);
 
     LOG_INFO_PREFIX("replay private_log files succeed, file count={}, app 
last_committed_decree={}",
                     plog_files.size(),
@@ -494,7 +530,7 @@ 
replica_split_manager::child_apply_private_logs(std::vector<std::string> plog_fi
         ++count;
     }
     _replica->_split_states.splitting_copy_mutation_count += count;
-    _stub->_counter_replicas_splitting_recent_copy_mutation_count->add(count);
+    METRIC_VAR_INCREMENT_BY(splitting_copy_mutation_count, count);
     plist.commit(last_committed_decree, COMMIT_TO_DECREE_HARD);
     LOG_INFO_PREFIX(
         "apply in-memory mutations succeed, mutation count={}, app 
last_committed_decree={}",
@@ -1096,11 +1132,10 @@ void replica_split_manager::child_partition_active(
         return;
     }
 
-    _stub->_counter_replicas_splitting_recent_split_succ_count->increment();
     _replica->_primary_states.last_prepare_decree_on_new_primary =
         _replica->_prepare_list->max_decree();
     _replica->update_configuration(config);
-    _stub->_counter_replicas_splitting_recent_split_succ_count->increment();
+    METRIC_VAR_INCREMENT(splitting_successful_count);
     LOG_INFO_PREFIX("child partition is active, status={}", 
enum_to_string(status()));
 }
 
@@ -1123,7 +1158,7 @@ void replica_split_manager::child_handle_split_error(
                          _replica->_split_states.parent_gpid,
                          _replica->_split_states.total_ms(),
                          _replica->_split_states.async_learn_ms());
-        
_stub->_counter_replicas_splitting_recent_split_fail_count->increment();
+        METRIC_VAR_INCREMENT(splitting_failed_count);
         
_replica->update_local_configuration_with_no_ballot_change(partition_status::PS_ERROR);
     }
 }
diff --git a/src/replica/split/replica_split_manager.h 
b/src/replica/split/replica_split_manager.h
index 9676e8494..7435e1705 100644
--- a/src/replica/split/replica_split_manager.h
+++ b/src/replica/split/replica_split_manager.h
@@ -32,6 +32,7 @@
 #include "replica/replica_base.h"
 #include "utils/error_code.h"
 #include "utils/fmt_logging.h"
+#include "utils/metrics.h"
 #include "utils/ports.h"
 
 namespace dsn {
@@ -251,6 +252,13 @@ private:
     // It will be updated each time when config sync from meta
     // TODO(heyuchen): clear it when primary parent clean up status
     split_status::type _meta_split_status{split_status::NOT_SPLIT};
+
+    METRIC_VAR_DECLARE_counter(splitting_started_count);
+    METRIC_VAR_DECLARE_counter(splitting_copy_file_count);
+    METRIC_VAR_DECLARE_counter(splitting_copy_file_bytes);
+    METRIC_VAR_DECLARE_counter(splitting_copy_mutation_count);
+    METRIC_VAR_DECLARE_counter(splitting_failed_count);
+    METRIC_VAR_DECLARE_counter(splitting_successful_count);
 };
 
 } // namespace replication
diff --git a/src/replica/test/replica_test.cpp 
b/src/replica/test/replica_test.cpp
index 89fd4dcaa..f2a211384 100644
--- a/src/replica/test/replica_test.cpp
+++ b/src/replica/test/replica_test.cpp
@@ -88,7 +88,6 @@ public:
     void SetUp() override
     {
         FLAGS_enable_http_server = false;
-        stub->install_perf_counters();
         mock_app_info();
         _mock_replica =
             stub->generate_replica_ptr(_app_info, _pid, 
partition_status::PS_PRIMARY, 1);
diff --git a/src/utils/metrics.h b/src/utils/metrics.h
index 566db835d..3b88ba5e9 100644
--- a/src/utils/metrics.h
+++ b/src/utils/metrics.h
@@ -662,6 +662,7 @@ enum class metric_unit : size_t
     kPercent,
     kReplicas,
     kPartitions,
+    kPartitionSplittings,
     kServers,
     kRequests,
     kResponses,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to