This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 1afb6fa79b HDDS-7098. Provide a way for admin to identify all 
unhealthy container replicas (#4443)
1afb6fa79b is described below

commit 1afb6fa79bfd599e54ef5e2f8235509768b75f0e
Author: Mladjan Gadzic <[email protected]>
AuthorDate: Mon May 22 10:50:07 2023 +0200

    HDDS-7098. Provide a way for admin to identify all unhealthy container 
replicas (#4443)
---
 .../interface-client/src/main/proto/hdds.proto     |  1 +
 .../ozone/recon/persistence/ContainerHistory.java  | 12 ++++++-
 .../ozone/recon/scm/ContainerReplicaHistory.java   | 17 +++++++--
 .../ozone/recon/scm/ReconContainerManager.java     | 23 ++++++++----
 .../ozone/recon/api/TestContainerEndpoint.java     | 41 ++++++++++++++++------
 5 files changed, 73 insertions(+), 21 deletions(-)

diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto 
b/hadoop-hdds/interface-client/src/main/proto/hdds.proto
index a8a748aad7..975e619160 100644
--- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto
+++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto
@@ -416,6 +416,7 @@ message ContainerReplicaHistoryProto {
     required int64 firstSeenTime = 2;
     required int64 lastSeenTime = 3;
     required int64 bcsId = 4;
+    optional string state = 5;
 }
 
 message SCMContainerReplicaProto {
diff --git 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHistory.java
 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHistory.java
index a1d0b5d454..9a0dccdc99 100644
--- 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHistory.java
+++ 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHistory.java
@@ -31,16 +31,18 @@ public class ContainerHistory implements Serializable {
   private long firstSeenTime;
   private long lastSeenTime;
   private long bcsId;
+  private String state;
 
   public ContainerHistory(long containerId, String datanodeUuid,
                           String datanodeHost, long firstSeenTime,
-                          long lastSeenTime, long lastBcsId) {
+                          long lastSeenTime, long lastBcsId, String state) {
     this.containerId = containerId;
     this.datanodeUuid = datanodeUuid;
     this.datanodeHost = datanodeHost;
     this.firstSeenTime = firstSeenTime;
     this.lastSeenTime = lastSeenTime;
     this.bcsId = lastBcsId;
+    this.state = state;
   }
 
   public long getLastBcsId() {
@@ -86,4 +88,12 @@ public class ContainerHistory implements Serializable {
   public void setLastSeenTime(long lastSeenTime) {
     this.lastSeenTime = lastSeenTime;
   }
+
+  public String getState() {
+    return state;
+  }
+
+  public void setState(String state) {
+    this.state = state;
+  }
 }
diff --git 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ContainerReplicaHistory.java
 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ContainerReplicaHistory.java
index 79ea9b658e..6ba50fe50d 100644
--- 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ContainerReplicaHistory.java
+++ 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ContainerReplicaHistory.java
@@ -40,13 +40,15 @@ public class ContainerReplicaHistory {
   private Long lastSeenTime;
 
   private long bcsId;
+  private String state;
 
   public ContainerReplicaHistory(UUID id, Long firstSeenTime,
-      Long lastSeenTime, long bcsId) {
+      Long lastSeenTime, long bcsId, String state) {
     this.uuid = id;
     this.firstSeenTime = firstSeenTime;
     this.lastSeenTime = lastSeenTime;
     this.bcsId = bcsId;
+    this.state = state;
   }
 
   public long getBcsId() {
@@ -73,15 +75,24 @@ public class ContainerReplicaHistory {
     this.lastSeenTime = lastSeenTime;
   }
 
+  public String getState() {
+    return state;
+  }
+
+  public void setState(String state) {
+    this.state = state;
+  }
+
   public static ContainerReplicaHistory fromProto(
       ContainerReplicaHistoryProto proto) {
     return new ContainerReplicaHistory(UUID.fromString(proto.getUuid()),
-        proto.getFirstSeenTime(), proto.getLastSeenTime(), proto.getBcsId());
+        proto.getFirstSeenTime(), proto.getLastSeenTime(), proto.getBcsId(),
+        proto.getState());
   }
 
   public ContainerReplicaHistoryProto toProto() {
     return ContainerReplicaHistoryProto.newBuilder().setUuid(uuid.toString())
         .setFirstSeenTime(firstSeenTime).setLastSeenTime(lastSeenTime)
-        .setBcsId(bcsId).build();
+        .setBcsId(bcsId).setState(state).build();
   }
 }
diff --git 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java
 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java
index 2fdcb91a44..4bc9ebeb69 100644
--- 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java
+++ 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java
@@ -280,6 +280,7 @@ public class ReconContainerManager extends 
ContainerManagerImpl {
 
     boolean flushToDB = false;
     long bcsId = replica.getSequenceId() != null ? replica.getSequenceId() : 
-1;
+    String state = replica.getState().toString();
 
     // If replica doesn't exist in in-memory map, add to DB and add to map
     if (replicaLastSeenMap == null) {
@@ -287,7 +288,7 @@ public class ReconContainerManager extends 
ContainerManagerImpl {
       replicaHistoryMap.putIfAbsent(id,
           new ConcurrentHashMap<UUID, ContainerReplicaHistory>() {{
             put(uuid, new ContainerReplicaHistory(uuid, currTime, currTime,
-                bcsId));
+                bcsId, state));
           }});
       flushToDB = true;
     } else {
@@ -296,17 +297,19 @@ public class ReconContainerManager extends 
ContainerManagerImpl {
       if (ts == null) {
         // New Datanode
         replicaLastSeenMap.put(uuid,
-            new ContainerReplicaHistory(uuid, currTime, currTime, bcsId));
+            new ContainerReplicaHistory(uuid, currTime, currTime, bcsId,
+                state));
         flushToDB = true;
       } else {
         // if the object exists, only update the last seen time & bcsId fields
         ts.setLastSeenTime(currTime);
         ts.setBcsId(bcsId);
+        ts.setState(state);
       }
     }
 
     if (flushToDB) {
-      upsertContainerHistory(id, uuid, currTime, bcsId);
+      upsertContainerHistory(id, uuid, currTime, bcsId, state);
     }
   }
 
@@ -322,6 +325,7 @@ public class ReconContainerManager extends 
ContainerManagerImpl {
     final long id = containerID.getId();
     final DatanodeDetails dnInfo = replica.getDatanodeDetails();
     final UUID uuid = dnInfo.getUuid();
+    String state = replica.getState().toString();
 
     final Map<UUID, ContainerReplicaHistory> replicaLastSeenMap =
         replicaHistoryMap.get(id);
@@ -329,7 +333,8 @@ public class ReconContainerManager extends 
ContainerManagerImpl {
       final ContainerReplicaHistory ts = replicaLastSeenMap.get(uuid);
       if (ts != null) {
         // Flush to DB, then remove from in-memory map
-        upsertContainerHistory(id, uuid, ts.getLastSeenTime(), ts.getBcsId());
+        upsertContainerHistory(id, uuid, ts.getLastSeenTime(), ts.getBcsId(),
+            state);
         replicaLastSeenMap.remove(uuid);
       }
     }
@@ -387,8 +392,10 @@ public class ReconContainerManager extends 
ContainerManagerImpl {
       final long firstSeenTime = entry.getValue().getFirstSeenTime();
       final long lastSeenTime = entry.getValue().getLastSeenTime();
       long bcsId = entry.getValue().getBcsId();
+      String state = entry.getValue().getState();
+
       resList.add(new ContainerHistory(containerID, uuid.toString(), hostname,
-          firstSeenTime, lastSeenTime, bcsId));
+          firstSeenTime, lastSeenTime, bcsId, state));
     }
     return resList;
   }
@@ -423,17 +430,19 @@ public class ReconContainerManager extends 
ContainerManagerImpl {
   }
 
   public void upsertContainerHistory(long containerID, UUID uuid, long time,
-                                     long bcsId) {
+                                     long bcsId, String state) {
     Map<UUID, ContainerReplicaHistory> tsMap;
     try {
       tsMap = cdbServiceProvider.getContainerReplicaHistory(containerID);
       ContainerReplicaHistory ts = tsMap.get(uuid);
       if (ts == null) {
         // New entry
-        tsMap.put(uuid, new ContainerReplicaHistory(uuid, time, time, bcsId));
+        tsMap.put(uuid, new ContainerReplicaHistory(uuid, time, time, bcsId,
+            state));
       } else {
         // Entry exists, update last seen time and put it back to DB.
         ts.setLastSeenTime(time);
+        ts.setState(state);
       }
       cdbServiceProvider.storeContainerReplicaHistory(containerID, tsMap);
     } catch (IOException e) {
diff --git 
a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java
 
b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java
index 96c68a7101..55badb4d85 100644
--- 
a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java
+++ 
b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java
@@ -680,6 +680,9 @@ public class TestContainerEndpoint {
             responseWithLimitObject.getContainers().stream().findFirst()
                     .orElse(null);
     assertNotNull(containerWithLimit);
+    assertTrue(containerWithLimit.getReplicas().stream()
+        .map(ContainerHistory::getState)
+        .allMatch(s -> s.equals("UNHEALTHY")));
 
     Collection<MissingContainerMetadata> recordsWithLimit
             = responseWithLimitObject.getContainers();
@@ -761,6 +764,10 @@ public class TestContainerEndpoint {
 
     Collection<UnhealthyContainerMetadata> records
         = responseObject.getContainers();
+    assertTrue(records.stream()
+        .flatMap(containerMetadata -> containerMetadata.getReplicas().stream()
+            .map(ContainerHistory::getState))
+        .allMatch(s -> s.equals("UNHEALTHY")));
     List<UnhealthyContainerMetadata> missing = records
         .stream()
         .filter(r -> r.getContainerState()
@@ -859,7 +866,10 @@ public class TestContainerEndpoint {
 
     Collection<UnhealthyContainerMetadata> records
         = responseObject.getContainers();
-
+    assertTrue(records.stream()
+        .flatMap(containerMetadata -> containerMetadata.getReplicas().stream()
+            .map(ContainerHistory::getState))
+            .allMatch(s -> s.equals("UNHEALTHY")));
     // There should only be 5 missing containers and no others as we asked for
     // only missing.
     assertEquals(5, records.size());
@@ -890,6 +900,10 @@ public class TestContainerEndpoint {
     UnhealthyContainersResponse firstBatch =
         (UnhealthyContainersResponse) containerEndpoint.getUnhealthyContainers(
             3, 1).getEntity();
+    assertTrue(firstBatch.getContainers().stream()
+        .flatMap(containerMetadata -> containerMetadata.getReplicas().stream()
+            .map(ContainerHistory::getState))
+        .allMatch(s -> s.equals("UNHEALTHY")));
 
     UnhealthyContainersResponse secondBatch =
         (UnhealthyContainersResponse) containerEndpoint.getUnhealthyContainers(
@@ -917,16 +931,19 @@ public class TestContainerEndpoint {
     final UUID u2 = newDatanode("host2", "127.0.0.2");
     final UUID u3 = newDatanode("host3", "127.0.0.3");
     final UUID u4 = newDatanode("host4", "127.0.0.4");
-    reconContainerManager.upsertContainerHistory(1L, u1, 1L, 1L);
-    reconContainerManager.upsertContainerHistory(1L, u2, 2L, 1L);
-    reconContainerManager.upsertContainerHistory(1L, u3, 3L, 1L);
-    reconContainerManager.upsertContainerHistory(1L, u4, 4L, 1L);
+    reconContainerManager.upsertContainerHistory(1L, u1, 1L, 1L, "OPEN");
+    reconContainerManager.upsertContainerHistory(1L, u2, 2L, 1L, "OPEN");
+    reconContainerManager.upsertContainerHistory(1L, u3, 3L, 1L, "OPEN");
+    reconContainerManager.upsertContainerHistory(1L, u4, 4L, 1L, "OPEN");
 
-    reconContainerManager.upsertContainerHistory(1L, u1, 5L, 1L);
+    reconContainerManager.upsertContainerHistory(1L, u1, 5L, 1L, "OPEN");
 
     Response response = containerEndpoint.getReplicaHistoryForContainer(1L);
     List<ContainerHistory> histories =
         (List<ContainerHistory>) response.getEntity();
+    assertTrue(histories.stream()
+        .map(ContainerHistory::getState)
+        .allMatch(s -> s.equals("OPEN")));
     Set<String> datanodes = Collections.unmodifiableSet(
         new HashSet<>(Arrays.asList(
             u1.toString(), u2.toString(), u3.toString(), u4.toString())));
@@ -1002,10 +1019,14 @@ public class TestContainerEndpoint {
     missingList.add(missing);
     containerHealthSchemaManager.insertUnhealthyContainerRecords(missingList);
 
-    reconContainerManager.upsertContainerHistory(cID, uuid1, 1L, 1L);
-    reconContainerManager.upsertContainerHistory(cID, uuid2, 2L, 1L);
-    reconContainerManager.upsertContainerHistory(cID, uuid3, 3L, 1L);
-    reconContainerManager.upsertContainerHistory(cID, uuid4, 4L, 1L);
+    reconContainerManager.upsertContainerHistory(cID, uuid1, 1L, 1L,
+        "UNHEALTHY");
+    reconContainerManager.upsertContainerHistory(cID, uuid2, 2L, 1L,
+        "UNHEALTHY");
+    reconContainerManager.upsertContainerHistory(cID, uuid3, 3L, 1L,
+        "UNHEALTHY");
+    reconContainerManager.upsertContainerHistory(cID, uuid4, 4L, 1L,
+        "UNHEALTHY");
   }
 
   protected ContainerWithPipeline getTestContainer(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to