This is an automated email from the ASF dual-hosted git repository. wangdan pushed a commit to branch migrate-metrics-dev in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git
commit 879e7ff7e2a137811602b6318f0f2cb4178d6d77 Author: Dan Wang <[email protected]> AuthorDate: Wed Jun 21 11:27:43 2023 +0800 feat(new_metrics): migrate metrics for latency tracer (#1537) https://github.com/apache/incubator-pegasus/issues/1536 The only metric of latency tracer, namely the duration between two points (stages), is migrated to the new framework: - latency tracer entity is introduced as the new metric entity to which the metric is attached; - create new class that bind each entity and manage the metric; - create new class that manage all entity instances, with a read-write lock that helps improving the performance of search for the target entity; - solve the problem of "No space left on device" for Build ASAN temporarily. --- src/common/fs_manager.cpp | 2 +- src/meta/meta_backup_service.cpp | 2 +- src/meta/table_metrics.cpp | 4 +- src/replica/replica_base.cpp | 2 +- src/runtime/profiler.cpp | 2 +- src/runtime/task/task_queue.cpp | 2 +- src/utils/latency_tracer.cpp | 178 +++++++++++++++++++++++++++++---------- src/utils/latency_tracer.h | 3 - src/utils/metrics.h | 2 + 9 files changed, 142 insertions(+), 55 deletions(-) diff --git a/src/common/fs_manager.cpp b/src/common/fs_manager.cpp index 583ac8228..9b8d791e6 100644 --- a/src/common/fs_manager.cpp +++ b/src/common/fs_manager.cpp @@ -99,7 +99,7 @@ namespace { metric_entity_ptr instantiate_disk_metric_entity(const std::string &tag, const std::string &data_dir) { - auto entity_id = fmt::format("disk_{}", tag); + auto entity_id = fmt::format("disk@{}", tag); return METRIC_ENTITY_disk.instantiate(entity_id, {{"tag", tag}, {"data_dir", data_dir}}); } diff --git a/src/meta/meta_backup_service.cpp b/src/meta/meta_backup_service.cpp index a746c7bdb..b7e443efe 100644 --- a/src/meta/meta_backup_service.cpp +++ b/src/meta/meta_backup_service.cpp @@ -69,7 +69,7 @@ namespace { metric_entity_ptr instantiate_backup_policy_metric_entity(const std::string &policy_name) { - auto entity_id = fmt::format("backup_policy_{}", policy_name); + auto entity_id = fmt::format("backup_policy@{}", policy_name); return METRIC_ENTITY_backup_policy.instantiate(entity_id, {{"policy_name", policy_name}}); } diff --git a/src/meta/table_metrics.cpp b/src/meta/table_metrics.cpp index 73b45d69e..6801c97fc 100644 --- a/src/meta/table_metrics.cpp +++ b/src/meta/table_metrics.cpp @@ -112,7 +112,7 @@ namespace { metric_entity_ptr instantiate_partition_metric_entity(int32_t table_id, int32_t partition_id) { - auto entity_id = fmt::format("partition_{}", gpid(table_id, partition_id)); + auto entity_id = fmt::format("partition@{}", gpid(table_id, partition_id)); return METRIC_ENTITY_partition.instantiate( entity_id, @@ -121,7 +121,7 @@ metric_entity_ptr instantiate_partition_metric_entity(int32_t table_id, int32_t metric_entity_ptr instantiate_table_metric_entity(int32_t table_id) { - auto entity_id = fmt::format("table_{}", table_id); + auto entity_id = fmt::format("table@{}", table_id); return METRIC_ENTITY_table.instantiate(entity_id, {{"table_id", std::to_string(table_id)}}); } diff --git a/src/replica/replica_base.cpp b/src/replica/replica_base.cpp index 3168ad651..306591119 100644 --- a/src/replica/replica_base.cpp +++ b/src/replica/replica_base.cpp @@ -30,7 +30,7 @@ namespace { metric_entity_ptr instantiate_replica_metric_entity(const gpid &id) { - auto entity_id = fmt::format("replica_{}", id); + auto entity_id = fmt::format("replica@{}", id); // Do NOT add `replica_base._app_name` as the table name to the attributes of entity, since // it is read-only and will never be updated even if the table is renamed. diff --git a/src/runtime/profiler.cpp b/src/runtime/profiler.cpp index 1a6a2bb7b..ebedb6536 100644 --- a/src/runtime/profiler.cpp +++ b/src/runtime/profiler.cpp @@ -361,7 +361,7 @@ namespace { metric_entity_ptr instantiate_profiler_metric_entity(const std::string &task_name) { - auto entity_id = fmt::format("task_{}", task_name); + auto entity_id = fmt::format("task@{}", task_name); return METRIC_ENTITY_profiler.instantiate(entity_id, {{"task_name", task_name}}); } diff --git a/src/runtime/task/task_queue.cpp b/src/runtime/task/task_queue.cpp index 425880909..f7bcb641d 100644 --- a/src/runtime/task/task_queue.cpp +++ b/src/runtime/task/task_queue.cpp @@ -63,7 +63,7 @@ namespace { metric_entity_ptr instantiate_queue_metric_entity(const std::string &queue_name) { - auto entity_id = fmt::format("queue_{}", queue_name); + auto entity_id = fmt::format("queue@{}", queue_name); return METRIC_ENTITY_queue.instantiate(entity_id, {{"queue_name", queue_name}}); } diff --git a/src/utils/latency_tracer.cpp b/src/utils/latency_tracer.cpp index 3d5e6fe48..f60d4b004 100644 --- a/src/utils/latency_tracer.cpp +++ b/src/utils/latency_tracer.cpp @@ -20,15 +20,23 @@ #include <fmt/core.h> #include <cstdint> #include <iterator> +#include <set> #include <utility> -#include "perf_counter/perf_counter.h" -#include "perf_counter/perf_counters.h" #include "runtime/api_layer1.h" #include "utils/autoref_ptr.h" #include "utils/config_api.h" #include "utils/flags.h" #include "utils/fmt_logging.h" +#include "utils/metrics.h" +#include "utils/string_view.h" + +METRIC_DEFINE_entity(latency_tracer); + +METRIC_DEFINE_percentile_int64(latency_tracer, + latency_tracer_duration_ns, + dsn::metric_unit::kNanoSeconds, + "The duration between two points(stages)"); namespace dsn { namespace utils { @@ -42,48 +50,131 @@ DSN_TAG_VARIABLE(enable_latency_tracer, FT_MUTABLE); DSN_DEFINE_bool(replication, enable_latency_tracer_report, false, - "whether open the latency tracer report perf counter"); + "whether open the latency tracer report for metrics"); DSN_TAG_VARIABLE(enable_latency_tracer_report, FT_MUTABLE); -DSN_DEFINE_string(replication, - latency_tracer_counter_name_prefix, - "trace_latency", - "perf counter common name prefix"); +namespace { -utils::rw_lock_nr counter_lock; //{ -std::unordered_map<std::string, perf_counter_ptr> counters_trace_latency; -// } +#define LATENCY_TRACER_METRIC_ENTITY_ID(description, starting_point, end_point) \ + fmt::format("latency_tracer@{}|{}|{}", description, starting_point, end_point) -utils::rw_lock_nr task_code_lock; //{ -std::unordered_map<std::string, bool> task_codes; -// } +metric_entity_ptr instantiate_latency_tracer_metric_entity(const std::string &description, + const std::string &starting_point, + const std::string &end_point) +{ + auto entity_id = LATENCY_TRACER_METRIC_ENTITY_ID(description, starting_point, end_point); + + return METRIC_ENTITY_latency_tracer.instantiate(entity_id, + {{"description", description}, + {"starting_point", starting_point}, + {"end_point", end_point}}); +} -perf_counter_ptr get_trace_counter(const std::string &name) +// Maintain each latency-tracer-level metric entity, and all metrics attached to it. +class latency_tracer_metrics { - { - utils::auto_read_lock read(counter_lock); - auto iter = counters_trace_latency.find(name); - if (iter != counters_trace_latency.end()) { - return iter->second; - } - } +public: + latency_tracer_metrics(const std::string &description, + const std::string &starting_point, + const std::string &end_point); + ~latency_tracer_metrics() = default; + + const metric_entity_ptr &latency_tracer_metric_entity() const; + + METRIC_DEFINE_SET(latency_tracer_duration_ns, int64_t) + +private: + const std::string _description; + const std::string _starting_point; + const std::string _end_point; + + const dsn::metric_entity_ptr _latency_tracer_metric_entity; + METRIC_VAR_DECLARE_percentile_int64(latency_tracer_duration_ns); + + DISALLOW_COPY_AND_ASSIGN(latency_tracer_metrics); +}; + +latency_tracer_metrics::latency_tracer_metrics(const std::string &description, + const std::string &starting_point, + const std::string &end_point) + : _description(description), + _starting_point(starting_point), + _end_point(end_point), + _latency_tracer_metric_entity( + instantiate_latency_tracer_metric_entity(description, starting_point, end_point)), + METRIC_VAR_INIT_latency_tracer(latency_tracer_duration_ns) +{ +} - utils::auto_write_lock write(counter_lock); - auto iter = counters_trace_latency.find(name); - if (iter != counters_trace_latency.end()) { - return iter->second; +const dsn::metric_entity_ptr &latency_tracer_metrics::latency_tracer_metric_entity() const +{ + CHECK_NOTNULL(_latency_tracer_metric_entity, + "latency_tracer metric entity (description={}, starting_point={}, end_point={}) " + "should has been instantiated: uninitialized entity cannot be used to " + "instantiate metric", + _description, + _starting_point, + _end_point); + return _latency_tracer_metric_entity; +} + +// Manage the lifetime of all latency-tracer-level metric entities. +class latency_tracer_metric_entities +{ +public: + using entity_map = std::unordered_map<std::string, std::unique_ptr<latency_tracer_metrics>>; + + latency_tracer_metric_entities() = default; + ~latency_tracer_metric_entities() = default; + +// Acquire read lock firstly, since once the metric entity were created, there would be no need to +// acquire write lock again. +#define __METRIC_DEFINE_SET(name, value_type) \ + void METRIC_FUNC_NAME_SET(name)(const std::string &description, \ + const std::string &starting_point, \ + const std::string &end_point, \ + value_type value) \ + { \ + auto entity_id = LATENCY_TRACER_METRIC_ENTITY_ID(description, starting_point, end_point); \ + { \ + dsn::utils::auto_read_lock l(_lock); \ + auto iter = _entities.find(entity_id); \ + if (dsn_likely(iter != _entities.end())) { \ + METRIC_SET(*(iter->second), name, value); \ + return; \ + } \ + } \ + \ + dsn::utils::auto_write_lock l(_lock); \ + auto iter = _entities.find(entity_id); \ + if (dsn_unlikely(iter != _entities.end())) { \ + METRIC_SET(*(iter->second), name, value); \ + return; \ + } \ + \ + auto ret = _entities.emplace( \ + entity_id, \ + std::make_unique<latency_tracer_metrics>(description, starting_point, end_point)); \ + CHECK_TRUE(ret.second); \ + METRIC_SET(*(ret.first->second), name, value); \ } - auto perf_counter = - dsn::perf_counters::instance().get_app_counter(FLAGS_latency_tracer_counter_name_prefix, - name.c_str(), - COUNTER_TYPE_NUMBER_PERCENTILES, - name.c_str(), - true); + __METRIC_DEFINE_SET(latency_tracer_duration_ns, int64_t) - counters_trace_latency.emplace(name, perf_counter); - return perf_counter; -} +#undef __METRIC_DEFINE_SET + +private: + mutable utils::rw_lock_nr _lock; + entity_map _entities; + + DISALLOW_COPY_AND_ASSIGN(latency_tracer_metric_entities); +}; + +latency_tracer_metric_entities s_latency_tracer_metric_entities; + +dsn::utils::rw_lock_nr task_code_lock; //{ +std::unordered_map<std::string, bool> task_codes; +// } bool is_enable_trace(const dsn::task_code &code) { @@ -118,6 +209,8 @@ bool is_enable_trace(const dsn::task_code &code) return enable_trace; } +} // anonymous namespace + latency_tracer::latency_tracer(bool is_sub, std::string name, uint64_t threshold, @@ -242,9 +335,12 @@ void latency_tracer::dump_trace_points(/*out*/ std::string &traces) auto total_latency = point.first - start_time; if (FLAGS_enable_latency_tracer_report) { - std::string counter_name = - fmt::format("[{}]{}@{}", _description, previous_point_name, cur_point_name); - report_trace_point(counter_name, span_duration); + METRIC_SET(s_latency_tracer_metric_entities, + latency_tracer_duration_ns, + _description, + previous_point_name, + cur_point_name, + span_duration); } if (total_time_used >= _threshold) { @@ -281,13 +377,5 @@ void latency_tracer::dump_trace_points(/*out*/ std::string &traces) } } -void latency_tracer::report_trace_point(const std::string &name, uint64_t span) -{ - auto perf_counter = get_trace_counter(name); - if (perf_counter) { - perf_counter->set(span); - } -} - } // namespace utils } // namespace dsn diff --git a/src/utils/latency_tracer.h b/src/utils/latency_tracer.h index 226536e01..459786645 100644 --- a/src/utils/latency_tracer.h +++ b/src/utils/latency_tracer.h @@ -165,9 +165,6 @@ public: bool enabled() const { return _enable_trace; } private: - // report the trace point duration to monitor system - static void report_trace_point(const std::string &name, uint64_t span); - // dump and print the trace point into log file void dump_trace_points(/*out*/ std::string &traces); diff --git a/src/utils/metrics.h b/src/utils/metrics.h index e484e4c1d..3627b078f 100644 --- a/src/utils/metrics.h +++ b/src/utils/metrics.h @@ -193,6 +193,8 @@ class error_code; #define METRIC_VAR_INIT_backup_policy(name, ...) METRIC_VAR_INIT(name, backup_policy, ##__VA_ARGS__) #define METRIC_VAR_INIT_queue(name, ...) METRIC_VAR_INIT(name, queue, ##__VA_ARGS__) #define METRIC_VAR_ASSIGN_profiler(name, ...) METRIC_VAR_ASSIGN(name, profiler, ##__VA_ARGS__) +#define METRIC_VAR_INIT_latency_tracer(name, ...) \ + METRIC_VAR_INIT(name, latency_tracer, ##__VA_ARGS__) // Perform increment_by() operations on gauges and counters. #define METRIC_VAR_INCREMENT_BY(name, x) \ --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
