This is an automated email from the ASF dual-hosted git repository. wangdan pushed a commit to branch migrate-metrics-dev in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git
commit 04abab7501375529a33dc131ba39915ef9bb723d Author: Dan Wang <[email protected]> AuthorDate: Sun Apr 16 21:39:00 2023 +0800 feat(new_metrics): migrate partition-level metrics for partition_guardian (#1440) https://github.com/apache/incubator-pegasus/issues/1331 In perf counters, there's only one metric for partition_guardian, namely the number of operations that fail to choose the primary replica, which is server-level. It would be changed to partition-level in new metrics since this could give which partitions fail to choose primaries and how frequency those happen. Still, to compute table-level or server-level metrics just aggregate on partition-level ones. --- src/meta/partition_guardian.cpp | 14 ++++++-------- src/meta/partition_guardian.h | 2 -- src/meta/table_metrics.cpp | 8 +++++++- src/meta/table_metrics.h | 5 +++++ 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/meta/partition_guardian.cpp b/src/meta/partition_guardian.cpp index 09b0862c4..47d4ace37 100644 --- a/src/meta/partition_guardian.cpp +++ b/src/meta/partition_guardian.cpp @@ -29,9 +29,11 @@ #include "meta/meta_data.h" #include "meta/meta_service.h" #include "meta/server_load_balancer.h" -#include "perf_counter/perf_counter.h" +#include "meta/server_state.h" +#include "meta/table_metrics.h" #include "utils/flags.h" #include "utils/fmt_logging.h" +#include "utils/metrics.h" #include "utils/string_conv.h" #include "utils/strings.h" #include "utils/time_utils.h" @@ -53,12 +55,6 @@ partition_guardian::partition_guardian(meta_service *svc) : _svc(svc) } else { _replica_assign_delay_ms_for_dropouts = 0; } - - _recent_choose_primary_fail_count.init_app_counter( - "eon.server_load_balancer", - "recent_choose_primary_fail_count", - COUNTER_TYPE_VOLATILE_NUMBER, - "choose primary fail count in the recent period"); } pc_status partition_guardian::cure(meta_view view, @@ -452,7 +448,9 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi LOG_WARNING("{}: don't select any node for security reason, administrator can select " "a proper one by shell", gpid_name); - _recent_choose_primary_fail_count->increment(); + METRIC_INCREMENT(_svc->get_server_state()->get_table_metric_entities(), + choose_primary_failed_operations, + gpid); ddd_partition_info pinfo; pinfo.config = pc; for (int i = 0; i < cc.dropped.size(); ++i) { diff --git a/src/meta/partition_guardian.h b/src/meta/partition_guardian.h index 23d78563a..9c77da7e5 100644 --- a/src/meta/partition_guardian.h +++ b/src/meta/partition_guardian.h @@ -29,7 +29,6 @@ #include "dsn.layer2_types.h" #include "meta_admin_types.h" #include "meta_data.h" -#include "perf_counter/perf_counter_wrapper.h" #include "runtime/rpc/rpc_address.h" #include "utils/command_manager.h" #include "utils/zlocks.h" @@ -91,7 +90,6 @@ private: } meta_service *_svc; - perf_counter_wrapper _recent_choose_primary_fail_count; mutable zlock _ddd_partitions_lock; // [ std::map<gpid, ddd_partition_info> _ddd_partitions; diff --git a/src/meta/table_metrics.cpp b/src/meta/table_metrics.cpp index 73b992062..c63dd1f09 100644 --- a/src/meta/table_metrics.cpp +++ b/src/meta/table_metrics.cpp @@ -65,6 +65,11 @@ METRIC_DEFINE_counter(partition, dsn::metric_unit::kOperations, "The number of balance operations by greedy balancer that copy secondaries"); +METRIC_DEFINE_counter(partition, + choose_primary_failed_operations, + dsn::metric_unit::kOperations, + "The number of operations that fail to choose the primary replica"); + METRIC_DEFINE_entity(table); // The number of partitions in each status, see `health_status` and `partition_health_status()` @@ -133,7 +138,8 @@ partition_metrics::partition_metrics(int32_t table_id, int32_t partition_id) METRIC_VAR_INIT_partition(greedy_recent_balance_operations), METRIC_VAR_INIT_partition(greedy_move_primary_operations), METRIC_VAR_INIT_partition(greedy_copy_primary_operations), - METRIC_VAR_INIT_partition(greedy_copy_secondary_operations) + METRIC_VAR_INIT_partition(greedy_copy_secondary_operations), + METRIC_VAR_INIT_partition(choose_primary_failed_operations) { } diff --git a/src/meta/table_metrics.h b/src/meta/table_metrics.h index 39fec611d..7dc5e83a3 100644 --- a/src/meta/table_metrics.h +++ b/src/meta/table_metrics.h @@ -52,6 +52,8 @@ public: METRIC_DEFINE_INCREMENT_BY(greedy_copy_primary_operations) METRIC_DEFINE_INCREMENT_BY(greedy_copy_secondary_operations) + METRIC_DEFINE_INCREMENT(choose_primary_failed_operations) + private: const int32_t _table_id; const int32_t _partition_id; @@ -64,6 +66,7 @@ private: METRIC_VAR_DECLARE_counter(greedy_move_primary_operations); METRIC_VAR_DECLARE_counter(greedy_copy_primary_operations); METRIC_VAR_DECLARE_counter(greedy_copy_secondary_operations); + METRIC_VAR_DECLARE_counter(choose_primary_failed_operations); DISALLOW_COPY_AND_ASSIGN(partition_metrics); }; @@ -112,6 +115,7 @@ public: __METRIC_DEFINE_INCREMENT(partition_configuration_changes) __METRIC_DEFINE_INCREMENT(unwritable_partition_changes) __METRIC_DEFINE_INCREMENT(writable_partition_changes) + __METRIC_DEFINE_INCREMENT(choose_primary_failed_operations) #undef __METRIC_DEFINE_INCREMENT @@ -221,6 +225,7 @@ public: __METRIC_DEFINE_INCREMENT(partition_configuration_changes) __METRIC_DEFINE_INCREMENT(unwritable_partition_changes) __METRIC_DEFINE_INCREMENT(writable_partition_changes) + __METRIC_DEFINE_INCREMENT(choose_primary_failed_operations) #undef __METRIC_DEFINE_INCREMENT --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
