This is an automated email from the ASF dual-hosted git repository.

wangdan pushed a commit to branch migrate-metrics-dev
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git

commit 879e7ff7e2a137811602b6318f0f2cb4178d6d77
Author: Dan Wang <[email protected]>
AuthorDate: Wed Jun 21 11:27:43 2023 +0800

    feat(new_metrics): migrate metrics for latency tracer (#1537)
    
    https://github.com/apache/incubator-pegasus/issues/1536
    
    The only metric of latency tracer, namely the duration between two points
    (stages), is migrated to the new framework:
    
    - latency tracer entity is introduced as the new metric entity to which the
    metric is attached;
    - create new class that bind each entity and manage the metric;
    - create new class that manage all entity instances, with a read-write lock
    that helps improving the performance of search for the target entity;
    - solve the problem of "No space left on device" for Build ASAN temporarily.
---
 src/common/fs_manager.cpp        |   2 +-
 src/meta/meta_backup_service.cpp |   2 +-
 src/meta/table_metrics.cpp       |   4 +-
 src/replica/replica_base.cpp     |   2 +-
 src/runtime/profiler.cpp         |   2 +-
 src/runtime/task/task_queue.cpp  |   2 +-
 src/utils/latency_tracer.cpp     | 178 +++++++++++++++++++++++++++++----------
 src/utils/latency_tracer.h       |   3 -
 src/utils/metrics.h              |   2 +
 9 files changed, 142 insertions(+), 55 deletions(-)

diff --git a/src/common/fs_manager.cpp b/src/common/fs_manager.cpp
index 583ac8228..9b8d791e6 100644
--- a/src/common/fs_manager.cpp
+++ b/src/common/fs_manager.cpp
@@ -99,7 +99,7 @@ namespace {
 metric_entity_ptr instantiate_disk_metric_entity(const std::string &tag,
                                                  const std::string &data_dir)
 {
-    auto entity_id = fmt::format("disk_{}", tag);
+    auto entity_id = fmt::format("disk@{}", tag);
 
     return METRIC_ENTITY_disk.instantiate(entity_id, {{"tag", tag}, 
{"data_dir", data_dir}});
 }
diff --git a/src/meta/meta_backup_service.cpp b/src/meta/meta_backup_service.cpp
index a746c7bdb..b7e443efe 100644
--- a/src/meta/meta_backup_service.cpp
+++ b/src/meta/meta_backup_service.cpp
@@ -69,7 +69,7 @@ namespace {
 
 metric_entity_ptr instantiate_backup_policy_metric_entity(const std::string 
&policy_name)
 {
-    auto entity_id = fmt::format("backup_policy_{}", policy_name);
+    auto entity_id = fmt::format("backup_policy@{}", policy_name);
 
     return METRIC_ENTITY_backup_policy.instantiate(entity_id, {{"policy_name", 
policy_name}});
 }
diff --git a/src/meta/table_metrics.cpp b/src/meta/table_metrics.cpp
index 73b45d69e..6801c97fc 100644
--- a/src/meta/table_metrics.cpp
+++ b/src/meta/table_metrics.cpp
@@ -112,7 +112,7 @@ namespace {
 
 metric_entity_ptr instantiate_partition_metric_entity(int32_t table_id, 
int32_t partition_id)
 {
-    auto entity_id = fmt::format("partition_{}", gpid(table_id, partition_id));
+    auto entity_id = fmt::format("partition@{}", gpid(table_id, partition_id));
 
     return METRIC_ENTITY_partition.instantiate(
         entity_id,
@@ -121,7 +121,7 @@ metric_entity_ptr 
instantiate_partition_metric_entity(int32_t table_id, int32_t
 
 metric_entity_ptr instantiate_table_metric_entity(int32_t table_id)
 {
-    auto entity_id = fmt::format("table_{}", table_id);
+    auto entity_id = fmt::format("table@{}", table_id);
 
     return METRIC_ENTITY_table.instantiate(entity_id, {{"table_id", 
std::to_string(table_id)}});
 }
diff --git a/src/replica/replica_base.cpp b/src/replica/replica_base.cpp
index 3168ad651..306591119 100644
--- a/src/replica/replica_base.cpp
+++ b/src/replica/replica_base.cpp
@@ -30,7 +30,7 @@ namespace {
 
 metric_entity_ptr instantiate_replica_metric_entity(const gpid &id)
 {
-    auto entity_id = fmt::format("replica_{}", id);
+    auto entity_id = fmt::format("replica@{}", id);
 
     // Do NOT add `replica_base._app_name` as the table name to the attributes 
of entity, since
     // it is read-only and will never be updated even if the table is renamed.
diff --git a/src/runtime/profiler.cpp b/src/runtime/profiler.cpp
index 1a6a2bb7b..ebedb6536 100644
--- a/src/runtime/profiler.cpp
+++ b/src/runtime/profiler.cpp
@@ -361,7 +361,7 @@ namespace {
 
 metric_entity_ptr instantiate_profiler_metric_entity(const std::string 
&task_name)
 {
-    auto entity_id = fmt::format("task_{}", task_name);
+    auto entity_id = fmt::format("task@{}", task_name);
 
     return METRIC_ENTITY_profiler.instantiate(entity_id, {{"task_name", 
task_name}});
 }
diff --git a/src/runtime/task/task_queue.cpp b/src/runtime/task/task_queue.cpp
index 425880909..f7bcb641d 100644
--- a/src/runtime/task/task_queue.cpp
+++ b/src/runtime/task/task_queue.cpp
@@ -63,7 +63,7 @@ namespace {
 
 metric_entity_ptr instantiate_queue_metric_entity(const std::string 
&queue_name)
 {
-    auto entity_id = fmt::format("queue_{}", queue_name);
+    auto entity_id = fmt::format("queue@{}", queue_name);
 
     return METRIC_ENTITY_queue.instantiate(entity_id, {{"queue_name", 
queue_name}});
 }
diff --git a/src/utils/latency_tracer.cpp b/src/utils/latency_tracer.cpp
index 3d5e6fe48..f60d4b004 100644
--- a/src/utils/latency_tracer.cpp
+++ b/src/utils/latency_tracer.cpp
@@ -20,15 +20,23 @@
 #include <fmt/core.h>
 #include <cstdint>
 #include <iterator>
+#include <set>
 #include <utility>
 
-#include "perf_counter/perf_counter.h"
-#include "perf_counter/perf_counters.h"
 #include "runtime/api_layer1.h"
 #include "utils/autoref_ptr.h"
 #include "utils/config_api.h"
 #include "utils/flags.h"
 #include "utils/fmt_logging.h"
+#include "utils/metrics.h"
+#include "utils/string_view.h"
+
+METRIC_DEFINE_entity(latency_tracer);
+
+METRIC_DEFINE_percentile_int64(latency_tracer,
+                               latency_tracer_duration_ns,
+                               dsn::metric_unit::kNanoSeconds,
+                               "The duration between two points(stages)");
 
 namespace dsn {
 namespace utils {
@@ -42,48 +50,131 @@ DSN_TAG_VARIABLE(enable_latency_tracer, FT_MUTABLE);
 DSN_DEFINE_bool(replication,
                 enable_latency_tracer_report,
                 false,
-                "whether open the latency tracer report perf counter");
+                "whether open the latency tracer report for metrics");
 DSN_TAG_VARIABLE(enable_latency_tracer_report, FT_MUTABLE);
 
-DSN_DEFINE_string(replication,
-                  latency_tracer_counter_name_prefix,
-                  "trace_latency",
-                  "perf counter common name prefix");
+namespace {
 
-utils::rw_lock_nr counter_lock; //{
-std::unordered_map<std::string, perf_counter_ptr> counters_trace_latency;
-// }
+#define LATENCY_TRACER_METRIC_ENTITY_ID(description, starting_point, 
end_point)                    \
+    fmt::format("latency_tracer@{}|{}|{}", description, starting_point, 
end_point)
 
-utils::rw_lock_nr task_code_lock; //{
-std::unordered_map<std::string, bool> task_codes;
-// }
+metric_entity_ptr instantiate_latency_tracer_metric_entity(const std::string 
&description,
+                                                           const std::string 
&starting_point,
+                                                           const std::string 
&end_point)
+{
+    auto entity_id = LATENCY_TRACER_METRIC_ENTITY_ID(description, 
starting_point, end_point);
+
+    return METRIC_ENTITY_latency_tracer.instantiate(entity_id,
+                                                    {{"description", 
description},
+                                                     {"starting_point", 
starting_point},
+                                                     {"end_point", 
end_point}});
+}
 
-perf_counter_ptr get_trace_counter(const std::string &name)
+// Maintain each latency-tracer-level metric entity, and all metrics attached 
to it.
+class latency_tracer_metrics
 {
-    {
-        utils::auto_read_lock read(counter_lock);
-        auto iter = counters_trace_latency.find(name);
-        if (iter != counters_trace_latency.end()) {
-            return iter->second;
-        }
-    }
+public:
+    latency_tracer_metrics(const std::string &description,
+                           const std::string &starting_point,
+                           const std::string &end_point);
+    ~latency_tracer_metrics() = default;
+
+    const metric_entity_ptr &latency_tracer_metric_entity() const;
+
+    METRIC_DEFINE_SET(latency_tracer_duration_ns, int64_t)
+
+private:
+    const std::string _description;
+    const std::string _starting_point;
+    const std::string _end_point;
+
+    const dsn::metric_entity_ptr _latency_tracer_metric_entity;
+    METRIC_VAR_DECLARE_percentile_int64(latency_tracer_duration_ns);
+
+    DISALLOW_COPY_AND_ASSIGN(latency_tracer_metrics);
+};
+
+latency_tracer_metrics::latency_tracer_metrics(const std::string &description,
+                                               const std::string 
&starting_point,
+                                               const std::string &end_point)
+    : _description(description),
+      _starting_point(starting_point),
+      _end_point(end_point),
+      _latency_tracer_metric_entity(
+          instantiate_latency_tracer_metric_entity(description, 
starting_point, end_point)),
+      METRIC_VAR_INIT_latency_tracer(latency_tracer_duration_ns)
+{
+}
 
-    utils::auto_write_lock write(counter_lock);
-    auto iter = counters_trace_latency.find(name);
-    if (iter != counters_trace_latency.end()) {
-        return iter->second;
+const dsn::metric_entity_ptr 
&latency_tracer_metrics::latency_tracer_metric_entity() const
+{
+    CHECK_NOTNULL(_latency_tracer_metric_entity,
+                  "latency_tracer metric entity (description={}, 
starting_point={}, end_point={}) "
+                  "should has been instantiated: uninitialized entity cannot 
be used to "
+                  "instantiate metric",
+                  _description,
+                  _starting_point,
+                  _end_point);
+    return _latency_tracer_metric_entity;
+}
+
+// Manage the lifetime of all latency-tracer-level metric entities.
+class latency_tracer_metric_entities
+{
+public:
+    using entity_map = std::unordered_map<std::string, 
std::unique_ptr<latency_tracer_metrics>>;
+
+    latency_tracer_metric_entities() = default;
+    ~latency_tracer_metric_entities() = default;
+
+// Acquire read lock firstly, since once the metric entity were created, there 
would be no need to
+// acquire write lock again.
+#define __METRIC_DEFINE_SET(name, value_type)                                  
                    \
+    void METRIC_FUNC_NAME_SET(name)(const std::string &description,            
                    \
+                                    const std::string &starting_point,         
                    \
+                                    const std::string &end_point,              
                    \
+                                    value_type value)                          
                    \
+    {                                                                          
                    \
+        auto entity_id = LATENCY_TRACER_METRIC_ENTITY_ID(description, 
starting_point, end_point);  \
+        {                                                                      
                    \
+            dsn::utils::auto_read_lock l(_lock);                               
                    \
+            auto iter = _entities.find(entity_id);                             
                    \
+            if (dsn_likely(iter != _entities.end())) {                         
                    \
+                METRIC_SET(*(iter->second), name, value);                      
                    \
+                return;                                                        
                    \
+            }                                                                  
                    \
+        }                                                                      
                    \
+                                                                               
                    \
+        dsn::utils::auto_write_lock l(_lock);                                  
                    \
+        auto iter = _entities.find(entity_id);                                 
                    \
+        if (dsn_unlikely(iter != _entities.end())) {                           
                    \
+            METRIC_SET(*(iter->second), name, value);                          
                    \
+            return;                                                            
                    \
+        }                                                                      
                    \
+                                                                               
                    \
+        auto ret = _entities.emplace(                                          
                    \
+            entity_id,                                                         
                    \
+            std::make_unique<latency_tracer_metrics>(description, 
starting_point, end_point));     \
+        CHECK_TRUE(ret.second);                                                
                    \
+        METRIC_SET(*(ret.first->second), name, value);                         
                    \
     }
 
-    auto perf_counter =
-        
dsn::perf_counters::instance().get_app_counter(FLAGS_latency_tracer_counter_name_prefix,
-                                                       name.c_str(),
-                                                       
COUNTER_TYPE_NUMBER_PERCENTILES,
-                                                       name.c_str(),
-                                                       true);
+    __METRIC_DEFINE_SET(latency_tracer_duration_ns, int64_t)
 
-    counters_trace_latency.emplace(name, perf_counter);
-    return perf_counter;
-}
+#undef __METRIC_DEFINE_SET
+
+private:
+    mutable utils::rw_lock_nr _lock;
+    entity_map _entities;
+
+    DISALLOW_COPY_AND_ASSIGN(latency_tracer_metric_entities);
+};
+
+latency_tracer_metric_entities s_latency_tracer_metric_entities;
+
+dsn::utils::rw_lock_nr task_code_lock; //{
+std::unordered_map<std::string, bool> task_codes;
+// }
 
 bool is_enable_trace(const dsn::task_code &code)
 {
@@ -118,6 +209,8 @@ bool is_enable_trace(const dsn::task_code &code)
     return enable_trace;
 }
 
+} // anonymous namespace
+
 latency_tracer::latency_tracer(bool is_sub,
                                std::string name,
                                uint64_t threshold,
@@ -242,9 +335,12 @@ void latency_tracer::dump_trace_points(/*out*/ std::string 
&traces)
             auto total_latency = point.first - start_time;
 
             if (FLAGS_enable_latency_tracer_report) {
-                std::string counter_name =
-                    fmt::format("[{}]{}@{}", _description, 
previous_point_name, cur_point_name);
-                report_trace_point(counter_name, span_duration);
+                METRIC_SET(s_latency_tracer_metric_entities,
+                           latency_tracer_duration_ns,
+                           _description,
+                           previous_point_name,
+                           cur_point_name,
+                           span_duration);
             }
 
             if (total_time_used >= _threshold) {
@@ -281,13 +377,5 @@ void latency_tracer::dump_trace_points(/*out*/ std::string 
&traces)
     }
 }
 
-void latency_tracer::report_trace_point(const std::string &name, uint64_t span)
-{
-    auto perf_counter = get_trace_counter(name);
-    if (perf_counter) {
-        perf_counter->set(span);
-    }
-}
-
 } // namespace utils
 } // namespace dsn
diff --git a/src/utils/latency_tracer.h b/src/utils/latency_tracer.h
index 226536e01..459786645 100644
--- a/src/utils/latency_tracer.h
+++ b/src/utils/latency_tracer.h
@@ -165,9 +165,6 @@ public:
     bool enabled() const { return _enable_trace; }
 
 private:
-    // report the trace point duration to monitor system
-    static void report_trace_point(const std::string &name, uint64_t span);
-
     // dump and print the trace point into log file
     void dump_trace_points(/*out*/ std::string &traces);
 
diff --git a/src/utils/metrics.h b/src/utils/metrics.h
index e484e4c1d..3627b078f 100644
--- a/src/utils/metrics.h
+++ b/src/utils/metrics.h
@@ -193,6 +193,8 @@ class error_code;
 #define METRIC_VAR_INIT_backup_policy(name, ...) METRIC_VAR_INIT(name, 
backup_policy, ##__VA_ARGS__)
 #define METRIC_VAR_INIT_queue(name, ...) METRIC_VAR_INIT(name, queue, 
##__VA_ARGS__)
 #define METRIC_VAR_ASSIGN_profiler(name, ...) METRIC_VAR_ASSIGN(name, 
profiler, ##__VA_ARGS__)
+#define METRIC_VAR_INIT_latency_tracer(name, ...)                              
                    \
+    METRIC_VAR_INIT(name, latency_tracer, ##__VA_ARGS__)
 
 // Perform increment_by() operations on gauges and counters.
 #define METRIC_VAR_INCREMENT_BY(name, x)                                       
                    \


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to