This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new e7b816d2d0 HDDS-8746. Add metrics to ReplicationSupervisor for task
count and max stream (#4818)
e7b816d2d0 is described below
commit e7b816d2d0c4735275f3719289b29065e194e92b
Author: Stephen O'Donnell <[email protected]>
AuthorDate: Fri Jun 2 11:59:08 2023 +0100
HDDS-8746. Add metrics to ReplicationSupervisor for task count and max
stream (#4818)
---
.../replication/ReplicationSupervisor.java | 17 +++++++++++++++
.../replication/ReplicationSupervisorMetrics.java | 24 +++++++++++++++++-----
2 files changed, 36 insertions(+), 5 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java
index 51109846df..8eb8b6a50f 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java
@@ -21,6 +21,7 @@ import java.time.Clock;
import java.time.Instant;
import java.time.ZoneId;
import java.util.Comparator;
+import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.OptionalLong;
@@ -268,6 +269,14 @@ public final class ReplicationSupervisor {
return counter == null ? 0 : counter.get();
}
+ public Map<String, Integer> getInFlightReplicationSummary() {
+ Map<String, Integer> result = new HashMap<>();
+ for (Map.Entry<Class<?>, AtomicInteger> entry : taskCounter.entrySet()) {
+ result.put(entry.getKey().getSimpleName(), entry.getValue().get());
+ }
+ return result;
+ }
+
/**
* Returns a count of all inflight replication tasks across all task types.
* Note that `getInFlightReplications(Class taskClass) allows for the .count
@@ -414,6 +423,14 @@ public final class ReplicationSupervisor {
}
}
+ public long getMaxReplicationStreams() {
+ if (executor instanceof ThreadPoolExecutor) {
+ return ((ThreadPoolExecutor) executor).getMaximumPoolSize();
+ } else {
+ return 1;
+ }
+ }
+
public long getReplicationSuccessCount() {
return successCounter.get();
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorMetrics.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorMetrics.java
index 57fb599891..671e985d7a 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorMetrics.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorMetrics.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.ozone.container.replication;
import org.apache.hadoop.hdds.annotation.InterfaceAudience;
import org.apache.hadoop.metrics2.MetricsCollector;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.metrics2.MetricsSource;
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.annotation.Metrics;
@@ -26,6 +27,8 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.Interns;
import org.apache.hadoop.ozone.OzoneConsts;
+import java.util.Map;
+
/**
* Metrics source to report number of replication tasks.
*/
@@ -34,7 +37,7 @@ import org.apache.hadoop.ozone.OzoneConsts;
context = OzoneConsts.OZONE)
public class ReplicationSupervisorMetrics implements MetricsSource {
- private static final String SOURCE =
+ public static final String SOURCE =
ReplicationSupervisorMetrics.class.getSimpleName();
private final ReplicationSupervisor supervisor;
@@ -57,9 +60,10 @@ public class ReplicationSupervisorMetrics implements
MetricsSource {
@Override
public void getMetrics(MetricsCollector collector, boolean all) {
- collector.addRecord(SOURCE)
- .addGauge(Interns.info("numInFlightReplications",
- "Number of pending replications(including queued replications"),
+ MetricsRecordBuilder builder = collector.addRecord(SOURCE);
+ builder.addGauge(Interns.info("numInFlightReplications",
+ "Total number of pending replications and reconstructions both low "
+ + "and normal priority"),
supervisor.getTotalInFlightReplications())
.addGauge(Interns.info("numQueuedReplications",
"Number of replications in queue"),
@@ -72,6 +76,16 @@ public class ReplicationSupervisorMetrics implements
MetricsSource {
supervisor.getReplicationTimeoutCount())
.addGauge(Interns.info("numSkippedReplications",
"Number of replication requests skipped as the container is "
- + "already present"), supervisor.getReplicationSkippedCount());
+ + "already present"), supervisor.getReplicationSkippedCount())
+ .addGauge(Interns.info("maxReplicationStreams", "Maximum number of "
+ + "concurrent replication tasks which can run simultaneously"),
+ supervisor.getMaxReplicationStreams());
+
+ Map<String, Integer> tasks = supervisor.getInFlightReplicationSummary();
+ for (Map.Entry<String, Integer> entry : tasks.entrySet()) {
+ builder.addGauge(Interns.info("numInflight" + entry.getKey(),
+ "Number of normal priority" + entry.getKey() + " tasks pending"),
+ entry.getValue());
+ }
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]