This is an automated email from the ASF dual-hosted git repository. wangdan pushed a commit to branch migrate-metrics-dev in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git
commit 6004e1e004817f124b10e2c483d2566f2fe6baf2 Author: Dan Wang <[email protected]> AuthorDate: Thu Apr 27 11:02:54 2023 +0800 feat(new_metrics): migrate metrics for replica_stub (part 3) (#1462) https://github.com/apache/incubator-pegasus/issues/1454 This is the 3rd part of migrating metrics of replica_stub to new framework. During this migration, there are 3 metrics which are changed from server-level to replica-level, including the number of failed RPC_PREPARE requests, the number of failed RPC_GROUP_CHECK requests launched by primary replicas, the number of triggered emergency checkpoints. Another 7 metrics are still kept server-level, the number of replicas whose dirs are moved as error or garbage, the number of removed replica dirs, error replica dirs (*.err), garbage replica dirs (*.gar), tmp replica dirs (*.tmp) and origin replica dirs (*.ori) for disk migration. There are 2 metrics removed, since both are shared-log-related. --- src/nfs/nfs_client_impl.cpp | 6 +- src/nfs/nfs_client_impl.h | 2 +- src/nfs/nfs_server_impl.cpp | 6 +- src/nfs/nfs_server_impl.h | 2 +- src/replica/replica.cpp | 20 ++++- src/replica/replica.h | 6 ++ src/replica/replica_2pc.cpp | 2 +- src/replica/replica_check.cpp | 5 +- src/replica/replica_chkpt.cpp | 7 +- src/replica/replica_learn.cpp | 12 --- src/replica/replica_stub.cpp | 136 ++++++++++++----------------- src/replica/replica_stub.h | 21 ++--- src/server/pegasus_mutation_duplicator.cpp | 12 +-- src/server/pegasus_mutation_duplicator.h | 4 +- src/utils/metrics.h | 4 +- 15 files changed, 113 insertions(+), 132 deletions(-) diff --git a/src/nfs/nfs_client_impl.cpp b/src/nfs/nfs_client_impl.cpp index 8ddadc46c..4c10b4da8 100644 --- a/src/nfs/nfs_client_impl.cpp +++ b/src/nfs/nfs_client_impl.cpp @@ -48,7 +48,7 @@ METRIC_DEFINE_counter(server, "The accumulated data size in bytes requested by client during nfs copy"); METRIC_DEFINE_counter(server, - nfs_client_failed_copy_requests, + nfs_client_copy_failed_requests, dsn::metric_unit::kRequests, "The number of failed nfs copy requests (requested by client)"); @@ -121,7 +121,7 @@ nfs_client_impl::nfs_client_impl() _copy_requests_low(FLAGS_max_file_copy_request_count_per_file), _high_priority_remaining_time(FLAGS_high_priority_speed_rate), METRIC_VAR_INIT_server(nfs_client_copy_bytes), - METRIC_VAR_INIT_server(nfs_client_failed_copy_requests), + METRIC_VAR_INIT_server(nfs_client_copy_failed_requests), METRIC_VAR_INIT_server(nfs_client_write_bytes), METRIC_VAR_INIT_server(nfs_client_failed_writes) { @@ -345,7 +345,7 @@ void nfs_client_impl::end_copy(::dsn::error_code err, } if (err != ::dsn::ERR_OK) { - METRIC_VAR_INCREMENT(nfs_client_failed_copy_requests); + METRIC_VAR_INCREMENT(nfs_client_copy_failed_requests); if (!fc->user_req->is_finished) { if (reqc->retry_count > 0) { diff --git a/src/nfs/nfs_client_impl.h b/src/nfs/nfs_client_impl.h index 0c15fc8b3..183ac38a9 100644 --- a/src/nfs/nfs_client_impl.h +++ b/src/nfs/nfs_client_impl.h @@ -312,7 +312,7 @@ private: std::deque<copy_request_ex_ptr> _local_writes; METRIC_VAR_DECLARE_counter(nfs_client_copy_bytes); - METRIC_VAR_DECLARE_counter(nfs_client_failed_copy_requests); + METRIC_VAR_DECLARE_counter(nfs_client_copy_failed_requests); METRIC_VAR_DECLARE_counter(nfs_client_write_bytes); METRIC_VAR_DECLARE_counter(nfs_client_failed_writes); diff --git a/src/nfs/nfs_server_impl.cpp b/src/nfs/nfs_server_impl.cpp index 25632d4f9..ac2d6a14d 100644 --- a/src/nfs/nfs_server_impl.cpp +++ b/src/nfs/nfs_server_impl.cpp @@ -55,7 +55,7 @@ METRIC_DEFINE_counter( METRIC_DEFINE_counter( server, - nfs_server_failed_copy_requests, + nfs_server_copy_failed_requests, dsn::metric_unit::kRequests, "The number of nfs copy requests (received by server) that fail to read local file in server"); @@ -77,7 +77,7 @@ DSN_DECLARE_int32(file_close_expire_time_ms); nfs_service_impl::nfs_service_impl() : ::dsn::serverlet<nfs_service_impl>("nfs"), METRIC_VAR_INIT_server(nfs_server_copy_bytes), - METRIC_VAR_INIT_server(nfs_server_failed_copy_requests) + METRIC_VAR_INIT_server(nfs_server_copy_failed_requests) { _file_close_timer = ::dsn::tasking::enqueue_timer( LPC_NFS_FILE_CLOSE_TIMER, @@ -167,7 +167,7 @@ void nfs_service_impl::internal_read_callback(error_code err, size_t sz, callbac if (err != ERR_OK) { LOG_ERROR("[nfs_service] read file {} failed, err = {}", cp.file_path, err); - METRIC_VAR_INCREMENT(nfs_server_failed_copy_requests); + METRIC_VAR_INCREMENT(nfs_server_copy_failed_requests); } else { METRIC_VAR_INCREMENT_BY(nfs_server_copy_bytes, sz); } diff --git a/src/nfs/nfs_server_impl.h b/src/nfs/nfs_server_impl.h index 4c07a4996..4a4c5b5c4 100644 --- a/src/nfs/nfs_server_impl.h +++ b/src/nfs/nfs_server_impl.h @@ -138,7 +138,7 @@ private: _send_token_buckets; // rate limiter of send to remote METRIC_VAR_DECLARE_counter(nfs_server_copy_bytes); - METRIC_VAR_DECLARE_counter(nfs_server_failed_copy_requests); + METRIC_VAR_DECLARE_counter(nfs_server_copy_failed_requests); std::unique_ptr<command_deregister> _nfs_max_send_rate_megabytes_cmd; diff --git a/src/replica/replica.cpp b/src/replica/replica.cpp index ad458c543..91835c445 100644 --- a/src/replica/replica.cpp +++ b/src/replica/replica.cpp @@ -189,6 +189,21 @@ METRIC_DEFINE_counter(replica, dsn::metric_unit::kLearns, "The number of successful learns launched by learner"); +METRIC_DEFINE_counter(replica, + prepare_failed_requests, + dsn::metric_unit::kRequests, + "The number of failed RPC_PREPARE requests"); + +METRIC_DEFINE_counter(replica, + group_check_failed_requests, + dsn::metric_unit::kRequests, + "The number of failed RPC_GROUP_CHECK requests launched by primary replicas"); + +METRIC_DEFINE_counter(replica, + emergency_checkpoints, + dsn::metric_unit::kCheckpoints, + "The number of triggered emergency checkpoints"); + namespace dsn { namespace replication { @@ -264,7 +279,10 @@ replica::replica(replica_stub *stub, METRIC_VAR_INIT_replica(learn_lt_log_responses), METRIC_VAR_INIT_replica(learn_resets), METRIC_VAR_INIT_replica(learn_failed_count), - METRIC_VAR_INIT_replica(learn_successful_count) + METRIC_VAR_INIT_replica(learn_successful_count), + METRIC_VAR_INIT_replica(prepare_failed_requests), + METRIC_VAR_INIT_replica(group_check_failed_requests), + METRIC_VAR_INIT_replica(emergency_checkpoints) { CHECK(!_app_info.app_type.empty(), ""); CHECK_NOTNULL(stub, ""); diff --git a/src/replica/replica.h b/src/replica/replica.h index 8df71a964..05695dae4 100644 --- a/src/replica/replica.h +++ b/src/replica/replica.h @@ -680,6 +680,12 @@ private: METRIC_VAR_DECLARE_counter(learn_failed_count); METRIC_VAR_DECLARE_counter(learn_successful_count); + METRIC_VAR_DECLARE_counter(prepare_failed_requests); + + METRIC_VAR_DECLARE_counter(group_check_failed_requests); + + METRIC_VAR_DECLARE_counter(emergency_checkpoints); + dsn::task_tracker _tracker; // the thread access checker dsn::thread_access_checker _checker; diff --git a/src/replica/replica_2pc.cpp b/src/replica/replica_2pc.cpp index 3d07d48aa..944a6a3f5 100644 --- a/src/replica/replica_2pc.cpp +++ b/src/replica/replica_2pc.cpp @@ -765,7 +765,7 @@ void replica::on_prepare_reply(std::pair<mutation_ptr, partition_status::type> p } } - _stub->_counter_replicas_recent_prepare_fail_count->increment(); + METRIC_VAR_INCREMENT(prepare_failed_requests); // make sure this is before any later commit ops // because now commit ops may lead to new prepare ops diff --git a/src/replica/replica_check.cpp b/src/replica/replica_check.cpp index bf4b0bff8..f7fbbd058 100644 --- a/src/replica/replica_check.cpp +++ b/src/replica/replica_check.cpp @@ -50,8 +50,6 @@ #include "duplication/replica_duplicator_manager.h" #include "metadata_types.h" #include "mutation.h" -#include "perf_counter/perf_counter.h" -#include "perf_counter/perf_counter_wrapper.h" #include "replica.h" #include "replica/prepare_list.h" #include "replica/replica_context.h" @@ -67,6 +65,7 @@ #include "utils/fail_point.h" #include "utils/flags.h" #include "utils/fmt_logging.h" +#include "utils/metrics.h" #include "utils/string_view.h" #include "utils/thread_access_checker.h" @@ -255,7 +254,7 @@ void replica::on_group_check_reply(error_code err, err = resp->err; } handle_remote_failure(req->config.status, req->node, err, "group check"); - _stub->_counter_replicas_recent_group_check_fail_count->increment(); + METRIC_VAR_INCREMENT(group_check_failed_requests); } else { if (resp->learner_status_ == learner_status::LearningSucceeded && req->config.status == partition_status::PS_POTENTIAL_SECONDARY) { diff --git a/src/replica/replica_chkpt.cpp b/src/replica/replica_chkpt.cpp index 5985e5d8d..7c9f6f931 100644 --- a/src/replica/replica_chkpt.cpp +++ b/src/replica/replica_chkpt.cpp @@ -50,8 +50,6 @@ #include "duplication/replica_duplicator_manager.h" #include "metadata_types.h" #include "mutation_log.h" -#include "perf_counter/perf_counter.h" -#include "perf_counter/perf_counter_wrapper.h" #include "replica.h" #include "replica/prepare_list.h" #include "replica/replica_context.h" @@ -240,8 +238,9 @@ void replica::init_checkpoint(bool is_emergency) 0, 10_ms); - if (is_emergency) - _stub->_counter_recent_trigger_emergency_checkpoint_count->increment(); + if (is_emergency) { + METRIC_VAR_INCREMENT(emergency_checkpoints); + } } // ThreadPool: THREAD_POOL_REPLICATION diff --git a/src/replica/replica_learn.cpp b/src/replica/replica_learn.cpp index b3f6f6a1a..910b87f12 100644 --- a/src/replica/replica_learn.cpp +++ b/src/replica/replica_learn.cpp @@ -81,18 +81,6 @@ #include "utils/metrics.h" #include "utils/thread_access_checker.h" -METRIC_DECLARE_counter(learn_count); -METRIC_DECLARE_counter(learn_rounds); -METRIC_DECLARE_counter(learn_copy_files); -METRIC_DECLARE_counter(learn_copy_file_bytes); -METRIC_DECLARE_counter(learn_copy_buffer_bytes); -METRIC_DECLARE_counter(learn_lt_cache_responses); -METRIC_DECLARE_counter(learn_lt_app_responses); -METRIC_DECLARE_counter(learn_lt_log_responses); -METRIC_DECLARE_counter(learn_resets); -METRIC_DECLARE_counter(learn_failed_count); -METRIC_DECLARE_counter(learn_successful_count); - namespace dsn { namespace replication { diff --git a/src/replica/replica_stub.cpp b/src/replica/replica_stub.cpp index 9c00fc6e3..95ba2502b 100644 --- a/src/replica/replica_stub.cpp +++ b/src/replica/replica_stub.cpp @@ -128,6 +128,41 @@ METRIC_DEFINE_gauge_int64( dsn::metric_unit::kBytes, "The max size of files that are copied from learnee among all learning replicas"); +METRIC_DEFINE_counter(server, + moved_error_replicas, + dsn::metric_unit::kReplicas, + "The number of replicas whose dirs are moved as error"); + +METRIC_DEFINE_counter(server, + moved_garbage_replicas, + dsn::metric_unit::kReplicas, + "The number of replicas whose dirs are moved as garbage"); + +METRIC_DEFINE_counter(server, + replica_removed_dirs, + dsn::metric_unit::kDirs, + "The number of removed replica dirs"); + +METRIC_DEFINE_gauge_int64(server, + replica_error_dirs, + dsn::metric_unit::kDirs, + "The number of error replica dirs (*.err)"); + +METRIC_DEFINE_gauge_int64(server, + replica_garbage_dirs, + dsn::metric_unit::kDirs, + "The number of garbage replica dirs (*.gar)"); + +METRIC_DEFINE_gauge_int64(server, + replica_tmp_dirs, + dsn::metric_unit::kDirs, + "The number of tmp replica dirs (*.tmp) for disk migration"); + +METRIC_DEFINE_gauge_int64(server, + replica_origin_dirs, + dsn::metric_unit::kDirs, + "The number of origin replica dirs (*.ori) for disk migration"); + namespace dsn { namespace replication { DSN_DEFINE_bool(replication, @@ -239,7 +274,14 @@ replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/, METRIC_VAR_INIT_server(closing_replicas), METRIC_VAR_INIT_server(learning_replicas), METRIC_VAR_INIT_server(learning_replicas_max_duration_ms), - METRIC_VAR_INIT_server(learning_replicas_max_copy_file_bytes) + METRIC_VAR_INIT_server(learning_replicas_max_copy_file_bytes), + METRIC_VAR_INIT_server(moved_error_replicas), + METRIC_VAR_INIT_server(moved_garbage_replicas), + METRIC_VAR_INIT_server(replica_removed_dirs), + METRIC_VAR_INIT_server(replica_error_dirs), + METRIC_VAR_INIT_server(replica_garbage_dirs), + METRIC_VAR_INIT_server(replica_tmp_dirs), + METRIC_VAR_INIT_server(replica_origin_dirs) { #ifdef DSN_ENABLE_GPERF _is_releasing_memory = false; @@ -257,66 +299,6 @@ replica_stub::~replica_stub(void) { close(); } void replica_stub::install_perf_counters() { - _counter_replicas_recent_prepare_fail_count.init_app_counter( - "eon.replica_stub", - "replicas.recent.prepare.fail.count", - COUNTER_TYPE_VOLATILE_NUMBER, - "prepare fail count in the recent period"); - _counter_replicas_recent_replica_move_error_count.init_app_counter( - "eon.replica_stub", - "replicas.recent.replica.move.error.count", - COUNTER_TYPE_VOLATILE_NUMBER, - "replica move to error count in the recent period"); - _counter_replicas_recent_replica_move_garbage_count.init_app_counter( - "eon.replica_stub", - "replicas.recent.replica.move.garbage.count", - COUNTER_TYPE_VOLATILE_NUMBER, - "replica move to garbage count in the recent period"); - _counter_replicas_recent_replica_remove_dir_count.init_app_counter( - "eon.replica_stub", - "replicas.recent.replica.remove.dir.count", - COUNTER_TYPE_VOLATILE_NUMBER, - "replica directory remove count in the recent period"); - _counter_replicas_error_replica_dir_count.init_app_counter( - "eon.replica_stub", - "replicas.error.replica.dir.count", - COUNTER_TYPE_NUMBER, - "error replica directory(*.err) count"); - _counter_replicas_garbage_replica_dir_count.init_app_counter( - "eon.replica_stub", - "replicas.garbage.replica.dir.count", - COUNTER_TYPE_NUMBER, - "garbage replica directory(*.gar) count"); - _counter_replicas_tmp_replica_dir_count.init_app_counter( - "eon.replica_stub", - "replicas.tmp.replica.dir.count", - COUNTER_TYPE_NUMBER, - "disk migration tmp replica directory(*.tmp) count"); - _counter_replicas_origin_replica_dir_count.init_app_counter( - "eon.replica_stub", - "replicas.origin.replica.dir.count", - COUNTER_TYPE_NUMBER, - "disk migration origin replica directory(.ori) count"); - - _counter_replicas_recent_group_check_fail_count.init_app_counter( - "eon.replica_stub", - "replicas.recent.group.check.fail.count", - COUNTER_TYPE_VOLATILE_NUMBER, - "group check fail count in the recent period"); - - _counter_shared_log_size.init_app_counter( - "eon.replica_stub", "shared.log.size(MB)", COUNTER_TYPE_NUMBER, "shared log size(MB)"); - _counter_shared_log_recent_write_size.init_app_counter( - "eon.replica_stub", - "shared.log.recent.write.size", - COUNTER_TYPE_VOLATILE_NUMBER, - "shared log write size in the recent period"); - _counter_recent_trigger_emergency_checkpoint_count.init_app_counter( - "eon.replica_stub", - "recent.trigger.emergency.checkpoint.count", - COUNTER_TYPE_VOLATILE_NUMBER, - "trigger emergency checkpoint count in the recent period"); - // <- Duplication Metrics -> _counter_dup_confirmed_rate.init_app_counter("eon.replica_stub", @@ -568,10 +550,8 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f _options.slog_dir = cdir; // Initialize slog. - _log = new mutation_log_shared(_options.slog_dir, - FLAGS_log_shared_file_size_mb, - FLAGS_log_shared_force_flush, - &_counter_shared_log_recent_write_size); + _log = new mutation_log_shared( + _options.slog_dir, FLAGS_log_shared_file_size_mb, FLAGS_log_shared_force_flush); LOG_INFO("slog_dir = {}", _options.slog_dir); // Start to load replicas in available data directories. @@ -685,7 +665,7 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f for (auto it = rps.begin(); it != rps.end(); ++it) { it->second->close(); move_to_err_path(it->second->dir(), "initialize replica"); - _counter_replicas_recent_replica_move_error_count->increment(); + METRIC_VAR_INCREMENT(moved_error_replicas); } rps.clear(); @@ -695,10 +675,8 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f CHECK(utils::filesystem::remove_path(_options.slog_dir), "remove directory {} failed", _options.slog_dir); - _log = new mutation_log_shared(_options.slog_dir, - FLAGS_log_shared_file_size_mb, - FLAGS_log_shared_force_flush, - &_counter_shared_log_recent_write_size); + _log = new mutation_log_shared( + _options.slog_dir, FLAGS_log_shared_file_size_mb, FLAGS_log_shared_force_flush); CHECK_EQ_MSG(_log->open(nullptr, [this](error_code err) { this->handle_log_failure(err); }), ERR_OK, "restart log service failed"); @@ -1716,7 +1694,7 @@ void replica_stub::on_gc_replica(replica_stub_ptr this_, gpid id) LOG_WARNING("gc_replica: replica_dir_op succeed to move directory '{}' to '{}'", replica_path, rename_path); - _counter_replicas_recent_replica_move_garbage_count->increment(); + METRIC_VAR_INCREMENT(moved_garbage_replicas); } } @@ -1849,8 +1827,6 @@ void replica_stub::on_gc() } } } - - _counter_shared_log_size->set(_log->total_size() / (1024 * 1024)); } // statistic learning info @@ -1932,11 +1908,11 @@ void replica_stub::on_disk_stat() _fs_manager.update_disk_stat(); update_disk_holding_replicas(); - _counter_replicas_error_replica_dir_count->set(report.error_replica_count); - _counter_replicas_garbage_replica_dir_count->set(report.garbage_replica_count); - _counter_replicas_tmp_replica_dir_count->set(report.disk_migrate_tmp_count); - _counter_replicas_origin_replica_dir_count->set(report.disk_migrate_origin_count); - _counter_replicas_recent_replica_remove_dir_count->add(report.remove_dir_count); + METRIC_VAR_SET(replica_error_dirs, report.error_replica_count); + METRIC_VAR_SET(replica_garbage_dirs, report.garbage_replica_count); + METRIC_VAR_SET(replica_tmp_dirs, report.disk_migrate_tmp_count); + METRIC_VAR_SET(replica_origin_dirs, report.disk_migrate_origin_count); + METRIC_VAR_INCREMENT_BY(replica_removed_dirs, report.remove_dir_count); LOG_INFO("finish to update disk stat, time_used_ns = {}", dsn_now_ns() - start); } @@ -2250,7 +2226,7 @@ replica *replica_stub::load_replica(dir_node *dn, const char *dir) // clear work on failure if (dsn::utils::filesystem::directory_exists(dir)) { move_to_err_path(dir, "load replica"); - _counter_replicas_recent_replica_move_error_count->increment(); + METRIC_VAR_INCREMENT(moved_error_replicas); _fs_manager.remove_replica(pid); } @@ -2338,7 +2314,7 @@ void replica_stub::close_replica(replica_ptr r) _fs_manager.remove_replica(id); if (r->is_data_corrupted()) { move_to_err_path(r->dir(), "trash replica"); - _counter_replicas_recent_replica_move_error_count->increment(); + METRIC_VAR_INCREMENT(moved_error_replicas); } LOG_INFO("{}: finish to close replica", name); diff --git a/src/replica/replica_stub.h b/src/replica/replica_stub.h index ebfc166b6..b73287f86 100644 --- a/src/replica/replica_stub.h +++ b/src/replica/replica_stub.h @@ -503,20 +503,13 @@ private: METRIC_VAR_DECLARE_gauge_int64(learning_replicas_max_duration_ms); METRIC_VAR_DECLARE_gauge_int64(learning_replicas_max_copy_file_bytes); - perf_counter_wrapper _counter_replicas_recent_prepare_fail_count; - perf_counter_wrapper _counter_replicas_recent_replica_move_error_count; - perf_counter_wrapper _counter_replicas_recent_replica_move_garbage_count; - perf_counter_wrapper _counter_replicas_recent_replica_remove_dir_count; - perf_counter_wrapper _counter_replicas_error_replica_dir_count; - perf_counter_wrapper _counter_replicas_garbage_replica_dir_count; - perf_counter_wrapper _counter_replicas_tmp_replica_dir_count; - perf_counter_wrapper _counter_replicas_origin_replica_dir_count; - - perf_counter_wrapper _counter_replicas_recent_group_check_fail_count; - - perf_counter_wrapper _counter_shared_log_size; - perf_counter_wrapper _counter_shared_log_recent_write_size; - perf_counter_wrapper _counter_recent_trigger_emergency_checkpoint_count; + METRIC_VAR_DECLARE_counter(moved_error_replicas); + METRIC_VAR_DECLARE_counter(moved_garbage_replicas); + METRIC_VAR_DECLARE_counter(replica_removed_dirs); + METRIC_VAR_DECLARE_gauge_int64(replica_error_dirs); + METRIC_VAR_DECLARE_gauge_int64(replica_garbage_dirs); + METRIC_VAR_DECLARE_gauge_int64(replica_tmp_dirs); + METRIC_VAR_DECLARE_gauge_int64(replica_origin_dirs); // <- Duplication Metrics -> // TODO(wutao1): calculate the counters independently for each remote cluster diff --git a/src/server/pegasus_mutation_duplicator.cpp b/src/server/pegasus_mutation_duplicator.cpp index 8d87ce60b..74832d5e6 100644 --- a/src/server/pegasus_mutation_duplicator.cpp +++ b/src/server/pegasus_mutation_duplicator.cpp @@ -48,12 +48,12 @@ #include "utils/rand.h" METRIC_DEFINE_counter(replica, - successful_mutation_dup_requests, + mutation_dup_successful_requests, dsn::metric_unit::kRequests, "The number of successful DUPLICATE requests sent from mutation duplicator"); METRIC_DEFINE_counter(replica, - failed_mutation_dup_requests, + mutation_dup_failed_requests, dsn::metric_unit::kRequests, "The number of failed DUPLICATE requests sent from mutation duplicator"); @@ -107,8 +107,8 @@ pegasus_mutation_duplicator::pegasus_mutation_duplicator(dsn::replication::repli dsn::string_view app) : mutation_duplicator(r), _remote_cluster(remote_cluster), - METRIC_VAR_INIT_replica(successful_mutation_dup_requests), - METRIC_VAR_INIT_replica(failed_mutation_dup_requests) + METRIC_VAR_INIT_replica(mutation_dup_successful_requests), + METRIC_VAR_INIT_replica(mutation_dup_failed_requests) { // initialize pegasus-client when this class is first time used. static __attribute__((unused)) bool _dummy = pegasus_client_factory::initialize(nullptr); @@ -162,7 +162,7 @@ void pegasus_mutation_duplicator::on_duplicate_reply(uint64_t hash, } if (perr != PERR_OK || err != dsn::ERR_OK) { - METRIC_VAR_INCREMENT(failed_mutation_dup_requests); + METRIC_VAR_INCREMENT(mutation_dup_failed_requests); // randomly log the 1% of the failed duplicate rpc, because minor number of // errors are acceptable. @@ -175,7 +175,7 @@ void pegasus_mutation_duplicator::on_duplicate_reply(uint64_t hash, // duplicating an illegal write to server is unacceptable, fail fast. CHECK_NE_PREFIX_MSG(perr, PERR_INVALID_ARGUMENT, rpc.response().error_hint); } else { - METRIC_VAR_INCREMENT(successful_mutation_dup_requests); + METRIC_VAR_INCREMENT(mutation_dup_successful_requests); _total_shipped_size += rpc.dsn_request()->header->body_length + rpc.dsn_request()->header->hdr_length; } diff --git a/src/server/pegasus_mutation_duplicator.h b/src/server/pegasus_mutation_duplicator.h index 9a5aa086c..dfe126df7 100644 --- a/src/server/pegasus_mutation_duplicator.h +++ b/src/server/pegasus_mutation_duplicator.h @@ -89,8 +89,8 @@ private: size_t _total_shipped_size{0}; - METRIC_VAR_DECLARE_counter(successful_mutation_dup_requests); - METRIC_VAR_DECLARE_counter(failed_mutation_dup_requests); + METRIC_VAR_DECLARE_counter(mutation_dup_successful_requests); + METRIC_VAR_DECLARE_counter(mutation_dup_failed_requests); }; // Decodes the binary `request_data` into write request in thrift struct, and diff --git a/src/utils/metrics.h b/src/utils/metrics.h index b5e31c050..ec2fb6977 100644 --- a/src/utils/metrics.h +++ b/src/utils/metrics.h @@ -652,8 +652,8 @@ enum class metric_unit : size_t kMegaBytes, kCapacityUnits, kPercent, - kPartitions, kReplicas, + kPartitions, kServers, kRequests, kResponses, @@ -662,7 +662,9 @@ enum class metric_unit : size_t kValues, kKeys, kFiles, + kDirs, kAmplification, + kCheckpoints, kFlushes, kCompactions, kWrites, --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
