This is an automated email from the ASF dual-hosted git repository.

siddhant pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new fd0eddda8c HDDS-7642. EC: Add debug logging to the Replication Manager 
check handlers (#4087)
fd0eddda8c is described below

commit fd0eddda8ce051188e694abb367cf14868d284ad
Author: Stephen O'Donnell <[email protected]>
AuthorDate: Thu Dec 15 07:00:10 2022 +0000

    HDDS-7642. EC: Add debug logging to the Replication Manager check handlers 
(#4087)
---
 .../scm/container/replication/ReplicationManager.java  |  2 ++
 .../health/ClosedWithMismatchedReplicasHandler.java    |  3 +++
 .../health/ClosedWithUnhealthyReplicasHandler.java     |  4 +++-
 .../replication/health/ClosingContainerHandler.java    |  7 +++++++
 .../replication/health/DeletingContainerHandler.java   |  5 +++++
 .../replication/health/ECReplicationCheckHandler.java  | 18 +++++++++++++++++-
 .../replication/health/EmptyContainerHandler.java      |  6 ++----
 .../replication/health/OpenContainerHandler.java       | 11 ++++++++++-
 .../health/QuasiClosedContainerHandler.java            |  4 ++++
 .../health/RatisReplicationCheckHandler.java           | 14 ++++++++++++++
 10 files changed, 67 insertions(+), 7 deletions(-)

diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
index 21e08f460c..351eec7178 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
@@ -409,6 +409,8 @@ public class ReplicationManager implements SCMService {
    */
   public void sendDeleteCommand(final ContainerInfo container, int 
replicaIndex,
       final DatanodeDetails datanode) throws NotLeaderException {
+    LOG.debug("Sending delete command for container {} and index {} on {}",
+        container, replicaIndex, datanode);
     final DeleteContainerCommand deleteCommand =
         new DeleteContainerCommand(container.containerID(), false);
     deleteCommand.setReplicaIndex(replicaIndex);
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithMismatchedReplicasHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithMismatchedReplicasHandler.java
index eaf6664b5b..4428428d17 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithMismatchedReplicasHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithMismatchedReplicasHandler.java
@@ -34,6 +34,7 @@ import java.util.Set;
  * mis-matched replica to close it.
  */
 public class ClosedWithMismatchedReplicasHandler extends AbstractCheck {
+
   public static final Logger LOG =
       LoggerFactory.getLogger(ClosedWithMismatchedReplicasHandler.class);
 
@@ -59,6 +60,8 @@ public class ClosedWithMismatchedReplicasHandler extends 
AbstractCheck {
       // Handler is only relevant for CLOSED containers.
       return false;
     }
+    LOG.debug("Checking container {} in ClosedWithMismatchedReplicasHandler",
+        containerInfo);
 
     // close replica if its state is OPEN or CLOSING
     for (ContainerReplica replica : replicas) {
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithUnhealthyReplicasHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithUnhealthyReplicasHandler.java
index fd4e3005ca..3b9b30d3a4 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithUnhealthyReplicasHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithUnhealthyReplicasHandler.java
@@ -77,7 +77,8 @@ public class ClosedWithUnhealthyReplicasHandler extends 
AbstractCheck {
     if (containerInfo.getState() != HddsProtos.LifeCycleState.CLOSED) {
       return false;
     }
-
+    LOG.debug("Checking container {} in ClosedWithUnhealthyReplicasHandler",
+        containerInfo);
     Set<ContainerReplica> replicas = request.getContainerReplicas();
     // create a set of indexes that are closed
     Set<Integer> closedIndexes = replicas.stream()
@@ -111,6 +112,7 @@ public class ClosedWithUnhealthyReplicasHandler extends 
AbstractCheck {
           ReplicationManagerReport.HealthState.UNHEALTHY,
           containerInfo.containerID());
     }
+    LOG.debug("Returning {} for container {}", foundUnhealthy, containerInfo);
     return foundUnhealthy;
   }
 
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java
index 103f7d6646..c06581bf2a 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java
@@ -24,11 +24,16 @@ import org.apache.hadoop.hdds.scm.container.ContainerInfo;
 import org.apache.hadoop.hdds.scm.container.ContainerReplica;
 import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest;
 import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Class used in Replication Manager to close replicas of CLOSING containers.
  */
 public class ClosingContainerHandler extends AbstractCheck {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(ClosingContainerHandler.class);
+
   private final ReplicationManager replicationManager;
 
   public ClosingContainerHandler(ReplicationManager replicationManager) {
@@ -50,6 +55,8 @@ public class ClosingContainerHandler extends AbstractCheck {
     if (containerInfo.getState() != HddsProtos.LifeCycleState.CLOSING) {
       return false;
     }
+    LOG.debug("Checking container {} in ClosingContainerHandler",
+        containerInfo);
 
     boolean forceClose = request.getContainerInfo().getReplicationConfig()
         .getReplicationType() != HddsProtos.ReplicationType.RATIS;
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/DeletingContainerHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/DeletingContainerHandler.java
index a31e5ecb16..9bc0160a3b 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/DeletingContainerHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/DeletingContainerHandler.java
@@ -60,14 +60,19 @@ public class DeletingContainerHandler extends AbstractCheck 
{
     HddsProtos.LifeCycleState containerState = containerInfo.getState();
 
     if (containerState == HddsProtos.LifeCycleState.DELETED) {
+      LOG.debug("Container {} is DELETED so returning true", containerInfo);
       return true;
     }
 
     if (containerState != HddsProtos.LifeCycleState.DELETING) {
       return false;
     }
+    LOG.debug("Checking container {} in DeletingContainerHandler",
+        containerInfo);
 
     if (request.getContainerReplicas().size() == 0) {
+      LOG.debug("Deleting Container {} has no replicas so marking for cleanup" 
+
+          " and returning true", containerInfo);
       replicationManager.updateContainerState(
           cID, HddsProtos.LifeCycleEvent.CLEANUP);
       return true;
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java
index 2aba554498..cdd2f565ea 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java
@@ -28,6 +28,8 @@ import 
org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest;
 import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult;
 import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp;
 import 
org.apache.hadoop.hdds.scm.container.replication.ECContainerReplicaCount;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -44,6 +46,9 @@ import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType.E
  */
 public class ECReplicationCheckHandler extends AbstractCheck {
 
+  private static final Logger LOG =
+      LoggerFactory.getLogger(ECReplicationCheckHandler.class);
+
   private final PlacementPolicy placementPolicy;
 
   public ECReplicationCheckHandler(PlacementPolicy placementPolicy) {
@@ -60,6 +65,7 @@ public class ECReplicationCheckHandler extends AbstractCheck {
     ContainerInfo container = request.getContainerInfo();
     ContainerID containerID = container.containerID();
     ContainerHealthResult health = checkHealth(request);
+    LOG.debug("Checking container {} in ECReplicationCheckHandler", container);
     if (health.getHealthState() == ContainerHealthResult.HealthState.HEALTHY) {
       // If the container is healthy, there is nothing else to do in this
       // handler so return as unhandled so any further handlers will be tried.
@@ -82,6 +88,10 @@ public class ECReplicationCheckHandler extends AbstractCheck 
{
           !underHealth.isUnrecoverable()) {
         request.getReplicationQueue().enqueue(underHealth);
       }
+      LOG.debug("Container {} is Under Replicated. isReplicatedOkAfterPending "
+          + "is [{}]. isUnrecoverable is [{}]", container,
+          underHealth.isReplicatedOkAfterPending(),
+          underHealth.isUnrecoverable());
       return true;
     } else if (health.getHealthState()
         == ContainerHealthResult.HealthState.OVER_REPLICATED) {
@@ -92,6 +102,8 @@ public class ECReplicationCheckHandler extends AbstractCheck 
{
       if (!overHealth.isReplicatedOkAfterPending()) {
         request.getReplicationQueue().enqueue(overHealth);
       }
+      LOG.debug("Container {} is Over Replicated. isReplicatedOkAfterPending "
+          + "is [{}]", container, overHealth.isReplicatedOkAfterPending());
       return true;
     } else if (health.getHealthState() ==
         ContainerHealthResult.HealthState.MIS_REPLICATED) {
@@ -102,10 +114,14 @@ public class ECReplicationCheckHandler extends 
AbstractCheck {
       if (!misRepHealth.isReplicatedOkAfterPending()) {
         request.getReplicationQueue().enqueue(misRepHealth);
       }
+      LOG.debug("Container {} is Mis Replicated. isReplicatedOkAfterPending "
+          + "is [{}]", container, misRepHealth.isReplicatedOkAfterPending());
       return true;
     }
-    // Should not get here, but incase it does the container is not healthy,
+    // Should not get here, but in case it does the container is not healthy,
     // but is also not under or over replicated.
+    LOG.warn("Container {} is not healthy but is not under, over or "
+        + " mis-replicated. Should not happen.", container);
     return false;
   }
 
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/EmptyContainerHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/EmptyContainerHandler.java
index 19c8d0a93a..808f3f4165 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/EmptyContainerHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/EmptyContainerHandler.java
@@ -60,7 +60,8 @@ public class EmptyContainerHandler extends AbstractCheck {
       request.getReport()
           .incrementAndSample(ReplicationManagerReport.HealthState.EMPTY,
               containerInfo.containerID());
-
+      LOG.debug("Container {} is empty and closed, marking as DELETING",
+          containerInfo);
       // delete replicas if they are closed and empty
       deleteContainerReplicas(containerInfo, replicas);
 
@@ -109,9 +110,6 @@ public class EmptyContainerHandler extends AbstractCheck {
           rp.getState() == ContainerReplicaProto.State.CLOSED);
       Preconditions.assertTrue(rp.getKeyCount() == 0);
 
-      LOG.debug("Trying to delete empty replica with index {} for container " +
-              "{} on datanode {}", rp.getReplicaIndex(),
-          containerInfo.containerID(), 
rp.getDatanodeDetails().getUuidString());
       try {
         replicationManager.sendDeleteCommand(containerInfo,
             rp.getReplicaIndex(), rp.getDatanodeDetails());
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/OpenContainerHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/OpenContainerHandler.java
index 666b1a2287..a644f5e834 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/OpenContainerHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/OpenContainerHandler.java
@@ -23,6 +23,8 @@ import org.apache.hadoop.hdds.scm.container.ContainerReplica;
 import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport;
 import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest;
 import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.util.Set;
 
@@ -36,7 +38,10 @@ import static 
org.apache.hadoop.hdds.scm.container.replication.ReplicationManage
  */
 public class OpenContainerHandler extends AbstractCheck {
 
-  private ReplicationManager replicationManager;
+  private static final Logger LOG =
+      LoggerFactory.getLogger(OpenContainerHandler.class);
+
+  private final ReplicationManager replicationManager;
 
   public OpenContainerHandler(ReplicationManager replicationManager) {
     this.replicationManager = replicationManager;
@@ -46,10 +51,14 @@ public class OpenContainerHandler extends AbstractCheck {
   public boolean handle(ContainerCheckRequest request) {
     ContainerInfo containerInfo = request.getContainerInfo();
     if (containerInfo.getState() == HddsProtos.LifeCycleState.OPEN) {
+      LOG.debug("Checking open container {} in OpenContainerHandler",
+          containerInfo);
       if (!isOpenContainerHealthy(
           containerInfo, request.getContainerReplicas())) {
         // This is an unhealthy open container, so we need to trigger the
         // close process on it.
+        LOG.debug("Container {} is open but unhealthy. Triggering close.",
+            containerInfo);
         request.getReport().incrementAndSample(
             ReplicationManagerReport.HealthState.OPEN_UNHEALTHY,
             containerInfo.containerID());
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedContainerHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedContainerHandler.java
index 449d0776e1..01f6a05d77 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedContainerHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedContainerHandler.java
@@ -64,12 +64,16 @@ public class QuasiClosedContainerHandler extends 
AbstractCheck {
     if (containerInfo.getState() != HddsProtos.LifeCycleState.QUASI_CLOSED) {
       return false;
     }
+    LOG.debug("Checking container {} in QuasiClosedContainerHandler",
+        containerInfo);
 
     Set<ContainerReplica> replicas = request.getContainerReplicas();
     if (canForceCloseContainer(containerInfo, replicas)) {
       forceCloseContainer(containerInfo, replicas);
       return true;
     } else {
+      LOG.debug("Container {} cannot be force closed and is stuck in " +
+              "QUASI_CLOSED", containerInfo);
       request.getReport().incrementAndSample(
           ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK,
           containerInfo.containerID());
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
index 9d33498dda..91dd51a607 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
@@ -66,6 +66,8 @@ public class RatisReplicationCheckHandler extends 
AbstractCheck {
     ReplicationManagerReport report = request.getReport();
     ContainerInfo container = request.getContainerInfo();
     ContainerHealthResult health = checkHealth(request);
+    LOG.debug("Checking container {} in RatisReplicationCheckHandler",
+        container);
     if (health.getHealthState() == ContainerHealthResult.HealthState.HEALTHY) {
       // If the container is healthy, there is nothing else to do in this
       // handler so return as unhandled so any further handlers will be tried.
@@ -88,6 +90,10 @@ public class RatisReplicationCheckHandler extends 
AbstractCheck {
           !underHealth.isReplicatedOkAfterPending()) {
         request.getReplicationQueue().enqueue(underHealth);
       }
+      LOG.debug("Container {} is Under Replicated. isReplicatedOkAfterPending" 
+
+          " is [{}]. isUnrecoverable is [{}]", container,
+          underHealth.isReplicatedOkAfterPending(),
+          underHealth.isUnrecoverable());
       return true;
     }
 
@@ -101,6 +107,8 @@ public class RatisReplicationCheckHandler extends 
AbstractCheck {
       if (!overHealth.isReplicatedOkAfterPending()) {
         request.getReplicationQueue().enqueue(overHealth);
       }
+      LOG.debug("Container {} is Over Replicated. isReplicatedOkAfterPending" +
+              " is [{}]", container, overHealth.isReplicatedOkAfterPending());
       return true;
     }
     if (health.getHealthState() ==
@@ -113,8 +121,14 @@ public class RatisReplicationCheckHandler extends 
AbstractCheck {
       if (!misRepHealth.isReplicatedOkAfterPending()) {
         request.getReplicationQueue().enqueue(misRepHealth);
       }
+      LOG.debug("Container {} is Mid Replicated. isReplicatedOkAfterPending" +
+          " is [{}]", container, misRepHealth.isReplicatedOkAfterPending());
       return true;
     }
+    // Should not get here, but in case it does the container is not healthy,
+    // but is also not under, over or mis replicated.
+    LOG.warn("Container {} is not healthy but is not under, over or "
+        + " mis-replicated. Should not happen.", container);
     return false;
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to