This is an automated email from the ASF dual-hosted git repository.

ritesh pushed a commit to branch HDDS-10239-container-reconciliation
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to 
refs/heads/HDDS-10239-container-reconciliation by this push:
     new fa8e0a422c HDDS-11345. Add metrics specific tests for reconciliation 
tasks. (#7732)
fa8e0a422c is described below

commit fa8e0a422ce9e6431ea0467ce1c046b531c1db33
Author: Aswin Shakil Balasubramanian <[email protected]>
AuthorDate: Mon Feb 10 10:44:13 2025 -0800

    HDDS-11345. Add metrics specific tests for reconciliation tasks. (#7732)
---
 .../container/checksum/ReconcileContainerTask.java |  4 +-
 .../ReconcileContainerCommandHandler.java          | 34 ++++++-----
 .../replication/ReplicationSupervisor.java         |  4 +-
 .../TestReconcileContainerCommandHandler.java      | 14 ++++-
 .../replication/TestReplicationSupervisor.java     | 68 ++++++++++++++++++++++
 5 files changed, 104 insertions(+), 20 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ReconcileContainerTask.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ReconcileContainerTask.java
index 5d949e90b1..f09258fdf6 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ReconcileContainerTask.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ReconcileContainerTask.java
@@ -70,12 +70,12 @@ protected Object getCommandForDebug() {
   }
 
   @Override
-  protected String getMetricName() {
+  public String getMetricName() {
     return "ContainerReconciliations";
   }
 
   @Override
-  protected String getMetricDescriptionSegment() {
+  public String getMetricDescriptionSegment() {
     return "Container Reconciliations";
   }
 
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconcileContainerCommandHandler.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconcileContainerCommandHandler.java
index 99185a7e10..8a290d6744 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconcileContainerCommandHandler.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconcileContainerCommandHandler.java
@@ -28,28 +28,28 @@
 import org.apache.hadoop.ozone.protocol.commands.ReconcileContainerCommand;
 import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
 
-import java.util.concurrent.atomic.AtomicLong;
-
 /**
  * Handles commands from SCM to reconcile a container replica on this datanode 
with the replicas on its peers.
  */
 public class ReconcileContainerCommandHandler implements CommandHandler {
   private final ReplicationSupervisor supervisor;
-  private final AtomicLong invocationCount;
   private final DNContainerOperationClient dnClient;
+  private String metricsName;
 
   public ReconcileContainerCommandHandler(ReplicationSupervisor supervisor, 
DNContainerOperationClient dnClient) {
     this.supervisor = supervisor;
     this.dnClient = dnClient;
-    this.invocationCount = new AtomicLong(0);
   }
 
   @Override
   public void handle(SCMCommand command, OzoneContainer container, 
StateContext context,
       SCMConnectionManager connectionManager) {
-    invocationCount.incrementAndGet();
     ReconcileContainerCommand reconcileCommand = (ReconcileContainerCommand) 
command;
-    supervisor.addTask(new ReconcileContainerTask(container.getController(), 
dnClient, reconcileCommand));
+    ReconcileContainerTask task = new 
ReconcileContainerTask(container.getController(), dnClient, reconcileCommand);
+    if (metricsName == null) {
+      metricsName = task.getMetricName();
+    }
+    supervisor.addTask(task);
   }
 
   @Override
@@ -58,24 +58,30 @@ public SCMCommandProto.Type getCommandType() {
   }
 
   @Override
-  public int getInvocationCount() {
-    return (int)invocationCount.get();
+  public int getQueuedCount() {
+    return this.metricsName == null ? 0 : (int) this.supervisor
+        .getReplicationQueuedCount(metricsName);
   }
 
-  // Uses ReplicationSupervisor for these metrics.
+  @Override
+  public int getInvocationCount() {
+    return this.metricsName == null ? 0 : (int) this.supervisor
+        .getReplicationRequestCount(metricsName);
+  }
 
   @Override
   public long getAverageRunTime() {
-    return 0;
+    return this.metricsName == null ? 0 : (int) this.supervisor
+        .getReplicationRequestAvgTime(metricsName);
   }
 
   @Override
   public long getTotalRunTime() {
-    return 0;
+    return this.metricsName == null ? 0 : this.supervisor
+        .getReplicationRequestTotalTime(metricsName);
   }
 
-  @Override
-  public int getQueuedCount() {
-    return 0;
+  public String getMetricsName() {
+    return this.metricsName;
   }
 }
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java
index 8374e45e17..4d31a4aff7 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java
@@ -549,11 +549,11 @@ public long getReplicationQueuedCount(String metricsName) 
{
 
   public long getReplicationRequestAvgTime(String metricsName) {
     MutableRate rate = opsLatencyMs.get(metricsName);
-    return rate != null ? (long) rate.lastStat().mean() : 0;
+    return rate != null ? (long) Math.ceil(rate.lastStat().mean()) : 0;
   }
 
   public long getReplicationRequestTotalTime(String metricsName) {
     MutableRate rate = opsLatencyMs.get(metricsName);
-    return rate != null ? (long) rate.lastStat().total() : 0;
+    return rate != null ? (long) Math.ceil(rate.lastStat().total()) : 0;
   }
 }
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReconcileContainerCommandHandler.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReconcileContainerCommandHandler.java
index f27ed097d2..fbc0f9714a 100644
--- 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReconcileContainerCommandHandler.java
+++ 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReconcileContainerCommandHandler.java
@@ -74,6 +74,7 @@ public class TestReconcileContainerCommandHandler {
   private OzoneContainer ozoneContainer;
   private StateContext context;
   private ReconcileContainerCommandHandler subject;
+  private ReplicationSupervisor mockSupervisor;
 
   public void init(ContainerLayoutVersion layout, 
IncrementalReportSender<Container> icrSender)
       throws Exception {
@@ -81,7 +82,7 @@ public void init(ContainerLayoutVersion layout, 
IncrementalReportSender<Containe
     OzoneConfiguration conf = new OzoneConfiguration();
     DatanodeDetails dnDetails = randomDatanodeDetails();
 
-    ReplicationSupervisor mockSupervisor = mock(ReplicationSupervisor.class);
+    mockSupervisor = mock(ReplicationSupervisor.class);
     doAnswer(invocation -> {
       ((ReconcileContainerTask)invocation.getArguments()[0]).runTask();
       return null;
@@ -145,12 +146,21 @@ public void 
testReconcileContainerCommandMetrics(ContainerLayoutVersion layout)
 
     assertEquals(0, subject.getInvocationCount());
 
-    // All commands submitted will be blocked until the latch is counted down.
     for (int id = 1; id <= NUM_CONTAINERS; id++) {
       ReconcileContainerCommand cmd = new ReconcileContainerCommand(id, 
Collections.emptySet());
       subject.handle(cmd, ozoneContainer, context, null);
     }
+
+    
when(mockSupervisor.getReplicationRequestCount(subject.getMetricsName())).thenReturn(3L);
+    
when(mockSupervisor.getReplicationRequestTotalTime(subject.getMetricsName())).thenReturn(10L);
+    
when(mockSupervisor.getReplicationRequestAvgTime(subject.getMetricsName())).thenReturn(3L);
+    
when(mockSupervisor.getReplicationQueuedCount(subject.getMetricsName())).thenReturn(1L);
+
+    assertEquals(subject.getMetricsName(), "ContainerReconciliations");
     assertEquals(NUM_CONTAINERS, subject.getInvocationCount());
+    assertEquals(subject.getQueuedCount(), 1);
+    assertEquals(subject.getTotalRunTime(), 10);
+    assertEquals(subject.getAverageRunTime(), 3);
   }
 
   private void verifyAllContainerReports(Map<ContainerID, 
ContainerReplicaProto> reportsSent) {
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java
index 315e0c0253..bfae06b56a 100644
--- 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java
+++ 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java
@@ -26,6 +26,7 @@
 import java.util.ArrayList;
 import java.time.Instant;
 import java.time.ZoneId;
+import java.util.Collections;
 import java.util.List;
 import java.util.SortedMap;
 import java.util.UUID;
@@ -50,6 +51,8 @@
 import org.apache.hadoop.hdds.security.symmetric.SecretKeySignerClient;
 import 
org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient;
 import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl;
+import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient;
+import org.apache.hadoop.ozone.container.checksum.ReconcileContainerTask;
 import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion;
 import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
 import 
org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
@@ -66,6 +69,7 @@
 import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
 
 import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController;
+import org.apache.hadoop.ozone.protocol.commands.ReconcileContainerCommand;
 import 
org.apache.hadoop.ozone.protocol.commands.ReconstructECContainersCommand;
 import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand;
 import org.apache.ozone.test.GenericTestUtils;
@@ -93,6 +97,7 @@
 import static org.mockito.Mockito.any;
 import static org.mockito.Mockito.anyList;
 import static org.mockito.Mockito.anyLong;
+import static org.mockito.Mockito.doThrow;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
@@ -125,6 +130,8 @@ public class TestReplicationSupervisor {
   private StateContext context;
   private TestClock clock;
   private DatanodeDetails datanode;
+  private DNContainerOperationClient mockClient;
+  private ContainerController mockController;
 
   @BeforeEach
   public void setUp() throws Exception {
@@ -137,6 +144,8 @@ public void setUp() throws Exception {
         stateMachine, "");
     context.setTermOfLeaderSCM(CURRENT_TERM);
     datanode = MockDatanodeDetails.randomDatanodeDetails();
+    mockClient = mock(DNContainerOperationClient.class);
+    mockController = mock(ContainerController.class);
     when(stateMachine.getDatanodeDetails()).thenReturn(datanode);
   }
 
@@ -513,6 +522,56 @@ public void testMultipleReplication(ContainerLayoutVersion 
layout,
     }
   }
 
+  @ContainerLayoutTestInfo.ContainerTest
+  public void testReconciliationTaskMetrics(ContainerLayoutVersion layout) 
throws IOException {
+    this.layoutVersion = layout;
+    // GIVEN
+    ReplicationSupervisor replicationSupervisor =
+        supervisorWithReplicator(FakeReplicator::new);
+    ReplicationSupervisorMetrics replicationMetrics =
+        ReplicationSupervisorMetrics.create(replicationSupervisor);
+
+    try {
+      //WHEN
+      replicationSupervisor.addTask(createReconciliationTask(1L));
+      replicationSupervisor.addTask(createReconciliationTask(2L));
+
+      ReconcileContainerTask reconciliationTask = createReconciliationTask(6L);
+      clock.fastForward(15000);
+      replicationSupervisor.addTask(reconciliationTask);
+      
doThrow(IOException.class).when(mockController).reconcileContainer(any(), 
anyLong(), any());
+      replicationSupervisor.addTask(createReconciliationTask(7L));
+
+      //THEN
+      assertEquals(2, replicationSupervisor.getReplicationSuccessCount());
+
+      assertEquals(2, replicationSupervisor.getReplicationSuccessCount(
+          reconciliationTask.getMetricName()));
+      assertEquals(1, replicationSupervisor.getReplicationFailureCount());
+      assertEquals(1, replicationSupervisor.getReplicationFailureCount(
+          reconciliationTask.getMetricName()));
+      assertEquals(1, replicationSupervisor.getReplicationTimeoutCount());
+      assertEquals(1, replicationSupervisor.getReplicationTimeoutCount(
+          reconciliationTask.getMetricName()));
+      assertEquals(4, replicationSupervisor.getReplicationRequestCount());
+      assertEquals(4, replicationSupervisor.getReplicationRequestCount(
+          reconciliationTask.getMetricName()));
+
+
+      assertTrue(replicationSupervisor.getReplicationRequestTotalTime(
+          reconciliationTask.getMetricName()) > 0);
+      assertTrue(replicationSupervisor.getReplicationRequestAvgTime(
+          reconciliationTask.getMetricName()) > 0);
+
+      MetricsCollectorImpl replicationMetricsCollector = new 
MetricsCollectorImpl();
+      replicationMetrics.getMetrics(replicationMetricsCollector, true);
+      assertEquals(1, replicationMetricsCollector.getRecords().size());
+    } finally {
+      replicationMetrics.unRegister();
+      replicationSupervisor.stop();
+    }
+  }
+
   @ContainerLayoutTestInfo.ContainerTest
   public void testPriorityOrdering(ContainerLayoutVersion layout)
       throws InterruptedException {
@@ -691,6 +750,15 @@ private ReplicationTask createTask(long containerId) {
     return new ReplicationTask(cmd, replicatorRef.get());
   }
 
+  private ReconcileContainerTask createReconciliationTask(long containerId) {
+    ReconcileContainerCommand reconcileContainerCommand =
+        new ReconcileContainerCommand(containerId, 
Collections.singleton(datanode));
+    reconcileContainerCommand.setTerm(CURRENT_TERM);
+    reconcileContainerCommand.setDeadline(clock.millis() + 10000);
+    return new ReconcileContainerTask(mockController, mockClient,
+        reconcileContainerCommand);
+  }
+
   private ECReconstructionCoordinatorTask createECTask(long containerId) {
     return new ECReconstructionCoordinatorTask(null,
         createReconstructionCmdInfo(containerId));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to