This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 1afb6fa79b HDDS-7098. Provide a way for admin to identify all
unhealthy container replicas (#4443)
1afb6fa79b is described below
commit 1afb6fa79bfd599e54ef5e2f8235509768b75f0e
Author: Mladjan Gadzic <[email protected]>
AuthorDate: Mon May 22 10:50:07 2023 +0200
HDDS-7098. Provide a way for admin to identify all unhealthy container
replicas (#4443)
---
.../interface-client/src/main/proto/hdds.proto | 1 +
.../ozone/recon/persistence/ContainerHistory.java | 12 ++++++-
.../ozone/recon/scm/ContainerReplicaHistory.java | 17 +++++++--
.../ozone/recon/scm/ReconContainerManager.java | 23 ++++++++----
.../ozone/recon/api/TestContainerEndpoint.java | 41 ++++++++++++++++------
5 files changed, 73 insertions(+), 21 deletions(-)
diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto
b/hadoop-hdds/interface-client/src/main/proto/hdds.proto
index a8a748aad7..975e619160 100644
--- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto
+++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto
@@ -416,6 +416,7 @@ message ContainerReplicaHistoryProto {
required int64 firstSeenTime = 2;
required int64 lastSeenTime = 3;
required int64 bcsId = 4;
+ optional string state = 5;
}
message SCMContainerReplicaProto {
diff --git
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHistory.java
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHistory.java
index a1d0b5d454..9a0dccdc99 100644
---
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHistory.java
+++
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHistory.java
@@ -31,16 +31,18 @@ public class ContainerHistory implements Serializable {
private long firstSeenTime;
private long lastSeenTime;
private long bcsId;
+ private String state;
public ContainerHistory(long containerId, String datanodeUuid,
String datanodeHost, long firstSeenTime,
- long lastSeenTime, long lastBcsId) {
+ long lastSeenTime, long lastBcsId, String state) {
this.containerId = containerId;
this.datanodeUuid = datanodeUuid;
this.datanodeHost = datanodeHost;
this.firstSeenTime = firstSeenTime;
this.lastSeenTime = lastSeenTime;
this.bcsId = lastBcsId;
+ this.state = state;
}
public long getLastBcsId() {
@@ -86,4 +88,12 @@ public class ContainerHistory implements Serializable {
public void setLastSeenTime(long lastSeenTime) {
this.lastSeenTime = lastSeenTime;
}
+
+ public String getState() {
+ return state;
+ }
+
+ public void setState(String state) {
+ this.state = state;
+ }
}
diff --git
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ContainerReplicaHistory.java
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ContainerReplicaHistory.java
index 79ea9b658e..6ba50fe50d 100644
---
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ContainerReplicaHistory.java
+++
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ContainerReplicaHistory.java
@@ -40,13 +40,15 @@ public class ContainerReplicaHistory {
private Long lastSeenTime;
private long bcsId;
+ private String state;
public ContainerReplicaHistory(UUID id, Long firstSeenTime,
- Long lastSeenTime, long bcsId) {
+ Long lastSeenTime, long bcsId, String state) {
this.uuid = id;
this.firstSeenTime = firstSeenTime;
this.lastSeenTime = lastSeenTime;
this.bcsId = bcsId;
+ this.state = state;
}
public long getBcsId() {
@@ -73,15 +75,24 @@ public class ContainerReplicaHistory {
this.lastSeenTime = lastSeenTime;
}
+ public String getState() {
+ return state;
+ }
+
+ public void setState(String state) {
+ this.state = state;
+ }
+
public static ContainerReplicaHistory fromProto(
ContainerReplicaHistoryProto proto) {
return new ContainerReplicaHistory(UUID.fromString(proto.getUuid()),
- proto.getFirstSeenTime(), proto.getLastSeenTime(), proto.getBcsId());
+ proto.getFirstSeenTime(), proto.getLastSeenTime(), proto.getBcsId(),
+ proto.getState());
}
public ContainerReplicaHistoryProto toProto() {
return ContainerReplicaHistoryProto.newBuilder().setUuid(uuid.toString())
.setFirstSeenTime(firstSeenTime).setLastSeenTime(lastSeenTime)
- .setBcsId(bcsId).build();
+ .setBcsId(bcsId).setState(state).build();
}
}
diff --git
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java
index 2fdcb91a44..4bc9ebeb69 100644
---
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java
+++
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java
@@ -280,6 +280,7 @@ public class ReconContainerManager extends
ContainerManagerImpl {
boolean flushToDB = false;
long bcsId = replica.getSequenceId() != null ? replica.getSequenceId() :
-1;
+ String state = replica.getState().toString();
// If replica doesn't exist in in-memory map, add to DB and add to map
if (replicaLastSeenMap == null) {
@@ -287,7 +288,7 @@ public class ReconContainerManager extends
ContainerManagerImpl {
replicaHistoryMap.putIfAbsent(id,
new ConcurrentHashMap<UUID, ContainerReplicaHistory>() {{
put(uuid, new ContainerReplicaHistory(uuid, currTime, currTime,
- bcsId));
+ bcsId, state));
}});
flushToDB = true;
} else {
@@ -296,17 +297,19 @@ public class ReconContainerManager extends
ContainerManagerImpl {
if (ts == null) {
// New Datanode
replicaLastSeenMap.put(uuid,
- new ContainerReplicaHistory(uuid, currTime, currTime, bcsId));
+ new ContainerReplicaHistory(uuid, currTime, currTime, bcsId,
+ state));
flushToDB = true;
} else {
// if the object exists, only update the last seen time & bcsId fields
ts.setLastSeenTime(currTime);
ts.setBcsId(bcsId);
+ ts.setState(state);
}
}
if (flushToDB) {
- upsertContainerHistory(id, uuid, currTime, bcsId);
+ upsertContainerHistory(id, uuid, currTime, bcsId, state);
}
}
@@ -322,6 +325,7 @@ public class ReconContainerManager extends
ContainerManagerImpl {
final long id = containerID.getId();
final DatanodeDetails dnInfo = replica.getDatanodeDetails();
final UUID uuid = dnInfo.getUuid();
+ String state = replica.getState().toString();
final Map<UUID, ContainerReplicaHistory> replicaLastSeenMap =
replicaHistoryMap.get(id);
@@ -329,7 +333,8 @@ public class ReconContainerManager extends
ContainerManagerImpl {
final ContainerReplicaHistory ts = replicaLastSeenMap.get(uuid);
if (ts != null) {
// Flush to DB, then remove from in-memory map
- upsertContainerHistory(id, uuid, ts.getLastSeenTime(), ts.getBcsId());
+ upsertContainerHistory(id, uuid, ts.getLastSeenTime(), ts.getBcsId(),
+ state);
replicaLastSeenMap.remove(uuid);
}
}
@@ -387,8 +392,10 @@ public class ReconContainerManager extends
ContainerManagerImpl {
final long firstSeenTime = entry.getValue().getFirstSeenTime();
final long lastSeenTime = entry.getValue().getLastSeenTime();
long bcsId = entry.getValue().getBcsId();
+ String state = entry.getValue().getState();
+
resList.add(new ContainerHistory(containerID, uuid.toString(), hostname,
- firstSeenTime, lastSeenTime, bcsId));
+ firstSeenTime, lastSeenTime, bcsId, state));
}
return resList;
}
@@ -423,17 +430,19 @@ public class ReconContainerManager extends
ContainerManagerImpl {
}
public void upsertContainerHistory(long containerID, UUID uuid, long time,
- long bcsId) {
+ long bcsId, String state) {
Map<UUID, ContainerReplicaHistory> tsMap;
try {
tsMap = cdbServiceProvider.getContainerReplicaHistory(containerID);
ContainerReplicaHistory ts = tsMap.get(uuid);
if (ts == null) {
// New entry
- tsMap.put(uuid, new ContainerReplicaHistory(uuid, time, time, bcsId));
+ tsMap.put(uuid, new ContainerReplicaHistory(uuid, time, time, bcsId,
+ state));
} else {
// Entry exists, update last seen time and put it back to DB.
ts.setLastSeenTime(time);
+ ts.setState(state);
}
cdbServiceProvider.storeContainerReplicaHistory(containerID, tsMap);
} catch (IOException e) {
diff --git
a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java
b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java
index 96c68a7101..55badb4d85 100644
---
a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java
+++
b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java
@@ -680,6 +680,9 @@ public class TestContainerEndpoint {
responseWithLimitObject.getContainers().stream().findFirst()
.orElse(null);
assertNotNull(containerWithLimit);
+ assertTrue(containerWithLimit.getReplicas().stream()
+ .map(ContainerHistory::getState)
+ .allMatch(s -> s.equals("UNHEALTHY")));
Collection<MissingContainerMetadata> recordsWithLimit
= responseWithLimitObject.getContainers();
@@ -761,6 +764,10 @@ public class TestContainerEndpoint {
Collection<UnhealthyContainerMetadata> records
= responseObject.getContainers();
+ assertTrue(records.stream()
+ .flatMap(containerMetadata -> containerMetadata.getReplicas().stream()
+ .map(ContainerHistory::getState))
+ .allMatch(s -> s.equals("UNHEALTHY")));
List<UnhealthyContainerMetadata> missing = records
.stream()
.filter(r -> r.getContainerState()
@@ -859,7 +866,10 @@ public class TestContainerEndpoint {
Collection<UnhealthyContainerMetadata> records
= responseObject.getContainers();
-
+ assertTrue(records.stream()
+ .flatMap(containerMetadata -> containerMetadata.getReplicas().stream()
+ .map(ContainerHistory::getState))
+ .allMatch(s -> s.equals("UNHEALTHY")));
// There should only be 5 missing containers and no others as we asked for
// only missing.
assertEquals(5, records.size());
@@ -890,6 +900,10 @@ public class TestContainerEndpoint {
UnhealthyContainersResponse firstBatch =
(UnhealthyContainersResponse) containerEndpoint.getUnhealthyContainers(
3, 1).getEntity();
+ assertTrue(firstBatch.getContainers().stream()
+ .flatMap(containerMetadata -> containerMetadata.getReplicas().stream()
+ .map(ContainerHistory::getState))
+ .allMatch(s -> s.equals("UNHEALTHY")));
UnhealthyContainersResponse secondBatch =
(UnhealthyContainersResponse) containerEndpoint.getUnhealthyContainers(
@@ -917,16 +931,19 @@ public class TestContainerEndpoint {
final UUID u2 = newDatanode("host2", "127.0.0.2");
final UUID u3 = newDatanode("host3", "127.0.0.3");
final UUID u4 = newDatanode("host4", "127.0.0.4");
- reconContainerManager.upsertContainerHistory(1L, u1, 1L, 1L);
- reconContainerManager.upsertContainerHistory(1L, u2, 2L, 1L);
- reconContainerManager.upsertContainerHistory(1L, u3, 3L, 1L);
- reconContainerManager.upsertContainerHistory(1L, u4, 4L, 1L);
+ reconContainerManager.upsertContainerHistory(1L, u1, 1L, 1L, "OPEN");
+ reconContainerManager.upsertContainerHistory(1L, u2, 2L, 1L, "OPEN");
+ reconContainerManager.upsertContainerHistory(1L, u3, 3L, 1L, "OPEN");
+ reconContainerManager.upsertContainerHistory(1L, u4, 4L, 1L, "OPEN");
- reconContainerManager.upsertContainerHistory(1L, u1, 5L, 1L);
+ reconContainerManager.upsertContainerHistory(1L, u1, 5L, 1L, "OPEN");
Response response = containerEndpoint.getReplicaHistoryForContainer(1L);
List<ContainerHistory> histories =
(List<ContainerHistory>) response.getEntity();
+ assertTrue(histories.stream()
+ .map(ContainerHistory::getState)
+ .allMatch(s -> s.equals("OPEN")));
Set<String> datanodes = Collections.unmodifiableSet(
new HashSet<>(Arrays.asList(
u1.toString(), u2.toString(), u3.toString(), u4.toString())));
@@ -1002,10 +1019,14 @@ public class TestContainerEndpoint {
missingList.add(missing);
containerHealthSchemaManager.insertUnhealthyContainerRecords(missingList);
- reconContainerManager.upsertContainerHistory(cID, uuid1, 1L, 1L);
- reconContainerManager.upsertContainerHistory(cID, uuid2, 2L, 1L);
- reconContainerManager.upsertContainerHistory(cID, uuid3, 3L, 1L);
- reconContainerManager.upsertContainerHistory(cID, uuid4, 4L, 1L);
+ reconContainerManager.upsertContainerHistory(cID, uuid1, 1L, 1L,
+ "UNHEALTHY");
+ reconContainerManager.upsertContainerHistory(cID, uuid2, 2L, 1L,
+ "UNHEALTHY");
+ reconContainerManager.upsertContainerHistory(cID, uuid3, 3L, 1L,
+ "UNHEALTHY");
+ reconContainerManager.upsertContainerHistory(cID, uuid4, 4L, 1L,
+ "UNHEALTHY");
}
protected ContainerWithPipeline getTestContainer(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]