This is an automated email from the ASF dual-hosted git repository. rzo1 pushed a commit to branch experimental/prometheus_cluster_details_wip in repository https://gitbox.apache.org/repos/asf/storm.git
commit 9b8bc2a431e872ccb6b98e6d7628668ab5c721c2 Author: Richard Zowalla <[email protected]> AuthorDate: Fri Jun 7 17:22:11 2024 +0200 Fix metric naming --- .../prometheus/PrometheusReporterClient.java | 90 +++++++++++----------- .../PrometheusPreparableReporterTest.java | 14 ++-- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/external/storm-metrics-prometheus/src/main/java/org/apache/storm/metrics/prometheus/PrometheusReporterClient.java b/external/storm-metrics-prometheus/src/main/java/org/apache/storm/metrics/prometheus/PrometheusReporterClient.java index 199472534..a403f7ec5 100644 --- a/external/storm-metrics-prometheus/src/main/java/org/apache/storm/metrics/prometheus/PrometheusReporterClient.java +++ b/external/storm-metrics-prometheus/src/main/java/org/apache/storm/metrics/prometheus/PrometheusReporterClient.java @@ -108,43 +108,43 @@ public class PrometheusReporterClient extends ScheduledReporter { } private static void initClusterMetrics() { - CLUSTER_SUMMARY_METRICS.put("cluster:num-nimbus-leaders", io.prometheus.metrics.core.metrics.Gauge.builder() - .name("cluster_num_nimbus_leaders") + CLUSTER_SUMMARY_METRICS.put("summary.cluster:num-nimbus-leaders", io.prometheus.metrics.core.metrics.Gauge.builder() + .name("summary_cluster_num_nimbus_leaders") .help("Number of nimbuses marked as a leader. This should really only ever be 1 in a healthy cluster, or 0 for a short period of time while a fail over happens.") .register()); - CLUSTER_SUMMARY_METRICS.put("cluster:num-nimbuses", io.prometheus.metrics.core.metrics.Gauge.builder() - .name("cluster_num_nimbuses") + CLUSTER_SUMMARY_METRICS.put("summary.cluster:num-nimbuses", io.prometheus.metrics.core.metrics.Gauge.builder() + .name("summary_cluster_num_nimbuses") .help("Number of nimbuses, leader or standby.") .register()); - CLUSTER_SUMMARY_METRICS.put("cluster:num-supervisors", io.prometheus.metrics.core.metrics.Gauge.builder() - .name("cluster_num_supervisors") + CLUSTER_SUMMARY_METRICS.put("summary.cluster:num-supervisors", io.prometheus.metrics.core.metrics.Gauge.builder() + .name("summary_cluster_num_supervisors") .help("Number of supervisors.") .register()); - CLUSTER_SUMMARY_METRICS.put("cluster:num-topologies", io.prometheus.metrics.core.metrics.Gauge.builder() - .name("cluster_num_topologies") + CLUSTER_SUMMARY_METRICS.put("summary.cluster:num-topologies", io.prometheus.metrics.core.metrics.Gauge.builder() + .name("summary_cluster_num_topologies") .help("Number of topologies.") .register()); - CLUSTER_SUMMARY_METRICS.put("cluster:num-total-used-workers", io.prometheus.metrics.core.metrics.Gauge.builder() - .name("cluster_num_total_used_workers") + CLUSTER_SUMMARY_METRICS.put("summary.cluster:num-total-used-workers", io.prometheus.metrics.core.metrics.Gauge.builder() + .name("summary_cluster_num_total_used_workers") .help("Number of used workers/slots.") .register()); - CLUSTER_SUMMARY_METRICS.put("cluster:num-total-workers", io.prometheus.metrics.core.metrics.Gauge.builder() - .name("cluster_num_total_workers") + CLUSTER_SUMMARY_METRICS.put("summary.cluster:num-total-workers", io.prometheus.metrics.core.metrics.Gauge.builder() + .name("summary_cluster_num_total_workers") .help("Number of workers/slots.") .register()); - CLUSTER_SUMMARY_METRICS.put("cluster:total-fragmented-cpu-non-negative", io.prometheus.metrics.core.metrics.Gauge.builder() - .name("cluster_total_fragmented_cpu_non_negative") + CLUSTER_SUMMARY_METRICS.put("summary.cluster:total-fragmented-cpu-non-negative", io.prometheus.metrics.core.metrics.Gauge.builder() + .name("summary_cluster_total_fragmented_cpu_non_negative") .help("Total fragmented CPU (% of core). This is CPU that the system thinks it cannot use because other resources on the node are used up.") .register()); - CLUSTER_SUMMARY_METRICS.put("cluster:total-fragmented-memory-non-negative", io.prometheus.metrics.core.metrics.Gauge.builder() - .name("cluster_total_fragmented_memory_non_negative") + CLUSTER_SUMMARY_METRICS.put("summary.cluster:total-fragmented-memory-non-negative", io.prometheus.metrics.core.metrics.Gauge.builder() + .name("summary_cluster_total_fragmented_memory_non_negative") .help("Total fragmented memory (MB). This is memory that the system thinks it cannot use because other resources on the node are used up.") .register()); @@ -164,92 +164,92 @@ public class PrometheusReporterClient extends ScheduledReporter { .register()); - CLUSTER_SUMMARY_METRICS.put("topologies:assigned-cpu", io.prometheus.metrics.core.metrics.Histogram.builder() - .name("topologies_assigned_cpu") + CLUSTER_SUMMARY_METRICS.put("summary.topologies:assigned-cpu", io.prometheus.metrics.core.metrics.Histogram.builder() + .name("summary_topologies_assigned_cpu") .help("CPU scheduled per topology (% of a core)") .register()); - CLUSTER_SUMMARY_METRICS.put("topologies:assigned-mem-off-heap", io.prometheus.metrics.core.metrics.Histogram.builder() - .name("topologies_assigned_mem_off_heap") + CLUSTER_SUMMARY_METRICS.put("summary.topologies:assigned-mem-off-heap", io.prometheus.metrics.core.metrics.Histogram.builder() + .name("summary_topologies_assigned_mem_off_heap") .help("Off heap memory scheduled per topology (MB)") .register()); - CLUSTER_SUMMARY_METRICS.put("topologies:assigned-mem-on-heap", io.prometheus.metrics.core.metrics.Histogram.builder() - .name("topologies_assigned_mem_on_heap") + CLUSTER_SUMMARY_METRICS.put("summary.topologies:assigned-mem-on-heap", io.prometheus.metrics.core.metrics.Histogram.builder() + .name("summary_topologies_assigned_mem_on_heap") .help("On heap memory scheduled per topology (MB)") .register()); - CLUSTER_SUMMARY_METRICS.put("topologies:num-executors", io.prometheus.metrics.core.metrics.Histogram.builder() - .name("topologies_num_executors") + CLUSTER_SUMMARY_METRICS.put("summary.topologies:num-executors", io.prometheus.metrics.core.metrics.Histogram.builder() + .name("summary_topologies_num_executors") .help("Number of executors per topology") .register()); - CLUSTER_SUMMARY_METRICS.put("topologies:num-tasks", io.prometheus.metrics.core.metrics.Histogram.builder() - .name("topologies_num_tasks") + CLUSTER_SUMMARY_METRICS.put("summary.topologies:num-tasks", io.prometheus.metrics.core.metrics.Histogram.builder() + .name("summary_topologies_num_tasks") .help("Number of tasks per topology") .register()); - CLUSTER_SUMMARY_METRICS.put("topologies:num-workers", io.prometheus.metrics.core.metrics.Histogram.builder() - .name("topologies_num_workers") + CLUSTER_SUMMARY_METRICS.put("summary.topologies:num-workers", io.prometheus.metrics.core.metrics.Histogram.builder() + .name("summary_topologies_num_workers") .help("Number of workers per topology") .register()); - CLUSTER_SUMMARY_METRICS.put("topologies:replication-count", io.prometheus.metrics.core.metrics.Histogram.builder() - .name("topologies_replication_count") + CLUSTER_SUMMARY_METRICS.put("summary.topologies:replication-count", io.prometheus.metrics.core.metrics.Histogram.builder() + .name("summary_topologies_replication_count") .help("Replication count per topology") .register()); - CLUSTER_SUMMARY_METRICS.put("topologies:requested-cpu", io.prometheus.metrics.core.metrics.Histogram.builder() - .name("topologies_requested_cpu") + CLUSTER_SUMMARY_METRICS.put("summary.topologies:requested-cpu", io.prometheus.metrics.core.metrics.Histogram.builder() + .name("summary_topologies_requested_cpu") .help("CPU requested per topology (% of a core)") .register()); - CLUSTER_SUMMARY_METRICS.put("topologies:requested-mem-off-heap", io.prometheus.metrics.core.metrics.Histogram.builder() - .name("topologies_requested_mem_off_heap") + CLUSTER_SUMMARY_METRICS.put("summary.topologies:requested-mem-off-heap", io.prometheus.metrics.core.metrics.Histogram.builder() + .name("summary_topologies_requested_mem_off_heap") .help("Off heap memory requested per topology (MB)") .register()); - CLUSTER_SUMMARY_METRICS.put("topologies:requested-mem-on-heap", io.prometheus.metrics.core.metrics.Histogram.builder() - .name("topologies_requested_mem_on_heap") + CLUSTER_SUMMARY_METRICS.put("summary.topologies:requested-mem-on-heap", io.prometheus.metrics.core.metrics.Histogram.builder() + .name("summary_topologies_requested_mem_on_heap") .help("On heap memory requested per topology (MB)") .register()); - CLUSTER_SUMMARY_METRICS.put("topologies:uptime-secs", io.prometheus.metrics.core.metrics.Histogram.builder() - .name("topologies_uptime_secs") + CLUSTER_SUMMARY_METRICS.put("summary.topologies:uptime-secs", io.prometheus.metrics.core.metrics.Histogram.builder() + .name("summary_topologies_uptime_secs") .help("Uptime per topology (seconds)") .register()); - CLUSTER_SUMMARY_METRICS.put("supervisors:fragmented-cpu", io.prometheus.metrics.core.metrics.Histogram.builder() + CLUSTER_SUMMARY_METRICS.put("summary.supervisors:fragmented-cpu", io.prometheus.metrics.core.metrics.Histogram.builder() .name("supervisors_fragmented_cpu") .help("fragmented CPU per supervisor (% of a core)") .register()); - CLUSTER_SUMMARY_METRICS.put("supervisors:fragmented-mem", io.prometheus.metrics.core.metrics.Histogram.builder() + CLUSTER_SUMMARY_METRICS.put("summary.supervisors:fragmented-mem", io.prometheus.metrics.core.metrics.Histogram.builder() .name("supervisors_fragmented_mem") .help("fragmented memory per supervisor (MB)") .register()); - CLUSTER_SUMMARY_METRICS.put("supervisors:num-used-workers", io.prometheus.metrics.core.metrics.Histogram.builder() + CLUSTER_SUMMARY_METRICS.put("summary.supervisors:num-used-workers", io.prometheus.metrics.core.metrics.Histogram.builder() .name("supervisors_num_used_workers") .help("workers used per supervisor") .register()); - CLUSTER_SUMMARY_METRICS.put("supervisors:num-workers", io.prometheus.metrics.core.metrics.Histogram.builder() + CLUSTER_SUMMARY_METRICS.put("summary.supervisors:num-workers", io.prometheus.metrics.core.metrics.Histogram.builder() .name("supervisors_num_workers") .help("number of workers per supervisor") .register()); - CLUSTER_SUMMARY_METRICS.put("supervisors:uptime-secs", io.prometheus.metrics.core.metrics.Histogram.builder() + CLUSTER_SUMMARY_METRICS.put("summary.supervisors:uptime-secs", io.prometheus.metrics.core.metrics.Histogram.builder() .name("supervisors_uptime_secs") .help("uptime of supervisors") .register()); - CLUSTER_SUMMARY_METRICS.put("supervisors:used-cpu", io.prometheus.metrics.core.metrics.Histogram.builder() + CLUSTER_SUMMARY_METRICS.put("summary.supervisors:used-cpu", io.prometheus.metrics.core.metrics.Histogram.builder() .name("supervisors_used_cpu") .help("CPU used per supervisor (% of a core)") .register()); - CLUSTER_SUMMARY_METRICS.put("supervisors:used-mem", io.prometheus.metrics.core.metrics.Histogram.builder() + CLUSTER_SUMMARY_METRICS.put("summary.supervisors:used-mem", io.prometheus.metrics.core.metrics.Histogram.builder() .name("supervisors_used_mem") .help("memory used per supervisor (MB)") .register()); diff --git a/external/storm-metrics-prometheus/src/test/java/org/apache/storm/metrics/prometheus/PrometheusPreparableReporterTest.java b/external/storm-metrics-prometheus/src/test/java/org/apache/storm/metrics/prometheus/PrometheusPreparableReporterTest.java index 85e31fde3..a49a808d7 100644 --- a/external/storm-metrics-prometheus/src/test/java/org/apache/storm/metrics/prometheus/PrometheusPreparableReporterTest.java +++ b/external/storm-metrics-prometheus/src/test/java/org/apache/storm/metrics/prometheus/PrometheusPreparableReporterTest.java @@ -111,7 +111,7 @@ public class PrometheusPreparableReporterTest { // We fake the metrics here. In a real Storm environment, these metrics are generated. final MetricRegistry r = new MetricRegistry(); final SimpleGauge<Integer> supervisor = new SimpleGauge<>(5); - r.register("cluster:num-supervisors", supervisor); + r.register("summary.cluster:num-supervisors", supervisor); r.register("nimbus:total-memory", new SimpleGauge<>(5.6)); r.register("nimbus:total-cpu", new SimpleGauge<>("500")); @@ -122,9 +122,9 @@ public class PrometheusPreparableReporterTest { assertMetrics( List.of( - "# HELP cluster_num_supervisors Number of supervisors.", - "# TYPE cluster_num_supervisors gauge", - "cluster_num_supervisors{instance=\"\",job=\"test_simple\"} 5", + "# HELP summary_cluster_num_supervisors Number of supervisors.", + "# TYPE summary_cluster_num_supervisors gauge", + "summary_cluster_num_supervisors{instance=\"\",job=\"test_simple\"} 5", "# HELP nimbus_total_memory total memory on the cluster MB", "# TYPE nimbus_total_memory gauge", "nimbus_total_memory{instance=\"\",job=\"test_simple\"} 5.6", @@ -141,9 +141,9 @@ public class PrometheusPreparableReporterTest { assertMetrics( List.of( - "# HELP cluster_num_supervisors Number of supervisors.", - "# TYPE cluster_num_supervisors gauge", - "cluster_num_supervisors{instance=\"\",job=\"test_simple\"} 100", + "# HELP summary_cluster_num_supervisors Number of supervisors.", + "# TYPE summary_cluster_num_supervisors gauge", + "summary_cluster_num_supervisors{instance=\"\",job=\"test_simple\"} 100", "# HELP nimbus_total_memory total memory on the cluster MB", "# TYPE nimbus_total_memory gauge", "nimbus_total_memory{instance=\"\",job=\"test_simple\"} 5.6",
