slfan1989 commented on code in PR #7008:
URL: https://github.com/apache/ozone/pull/7008#discussion_r1772486520
##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/ContainerSafeModeRule.java:
##########
@@ -71,83 +84,166 @@ public ContainerSafeModeRule(String ruleName, EventQueue
eventQueue,
HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT +
" value should be >= 0.0 and <= 1.0");
- containerMap = new ConcurrentHashMap<>();
+ ratisContainerMap = new ConcurrentHashMap<>();
+ ratisContainerDNsMap = new ConcurrentHashMap<>();
+ ecContainerMap = new ConcurrentHashMap<>();
+ ecContainerDNsMap = new ConcurrentHashMap<>();
+
containers.forEach(container -> {
// There can be containers in OPEN/CLOSING state which were never
// created by the client. We are not considering these containers for
// now. These containers can be handled by tracking pipelines.
- Optional.ofNullable(container.getState())
- .filter(state -> (state == HddsProtos.LifeCycleState.QUASI_CLOSED ||
- state == HddsProtos.LifeCycleState.CLOSED)
- && container.getNumberOfKeys() > 0)
- .ifPresent(s -> containerMap.put(container.getContainerID(),
- container));
+ LifeCycleState containerState = container.getState();
+ ReplicationConfig replicationConfig = container.getReplicationConfig();
+
+ if (checkContainerState(containerState) && container.getNumberOfKeys() >
0) {
+ if (replicationConfig instanceof RatisReplicationConfig) {
+ ratisContainerMap.put(container.getContainerID(), container);
+ }
+ if (replicationConfig instanceof ECReplicationConfig) {
+ ecContainerMap.put(container.getContainerID(), container);
+ }
+ }
});
- maxContainer = containerMap.size();
- long cutOff = (long) Math.ceil(maxContainer * safeModeCutoff);
-
getSafeModeMetrics().setNumContainerWithOneReplicaReportedThreshold(cutOff);
- LOG.info("containers with one replica threshold count {}", cutOff);
+ ratisMaxContainer = ratisContainerMap.size();
+ ecMaxContainer = ecContainerMap.size();
+
+ long ratisCutOff = (long) Math.ceil(ratisMaxContainer * safeModeCutoff);
+ long ecCutOff = (long) Math.ceil(ecMaxContainer * safeModeCutoff);
+
+
getSafeModeMetrics().setNumContainerWithOneReplicaReportedThreshold(ratisCutOff);
+
+ LOG.info("Containers with one replica threshold count {}, with ec n
replica threshold count {}.",
+ ratisCutOff, ecCutOff);
}
@Override
protected TypedEvent<NodeRegistrationContainerReport> getEventType() {
- return SCMEvents.NODE_REGISTRATION_CONT_REPORT;
+ return SCMEvents.CONTAINER_REGISTRATION_REPORT;
}
@Override
protected synchronized boolean validate() {
- return getCurrentContainerThreshold() >= safeModeCutoff;
+ return (getCurrentContainerThreshold() >= safeModeCutoff) &&
+ (getCurrentECContainerThreshold() >= safeModeCutoff);
}
@VisibleForTesting
public synchronized double getCurrentContainerThreshold() {
- if (maxContainer == 0) {
+ if (ratisMaxContainer == 0) {
+ return 1;
+ }
+ return (ratisContainerWithMinReplicas.doubleValue() / ratisMaxContainer);
+ }
+
+ @VisibleForTesting
+ public synchronized double getCurrentECContainerThreshold() {
+ if (ecMaxContainer == 0) {
+ return 1;
+ }
+ return (ecContainerWithMinReplicas.doubleValue() / ecMaxContainer);
+ }
+
+ public synchronized double getEcMaxContainer() {
+ if (ecMaxContainer == 0) {
return 1;
}
- return (containerWithMinReplicas.doubleValue() / maxContainer);
+ return ecMaxContainer;
+ }
+
+ private synchronized double getRatisMaxContainer() {
+ if (ratisMaxContainer == 0) {
+ return 1;
+ }
+ return ratisMaxContainer;
}
@Override
protected synchronized void process(
NodeRegistrationContainerReport reportsProto) {
+ DatanodeDetails datanodeDetails = reportsProto.getDatanodeDetails();
+ UUID datanodeUUID = datanodeDetails.getUuid();
+ StorageContainerDatanodeProtocolProtos.ContainerReportsProto report =
reportsProto.getReport();
+
+ report.getReportsList().forEach(c -> {
+ long containerID = c.getContainerID();
- reportsProto.getReport().getReportsList().forEach(c -> {
- if (containerMap.containsKey(c.getContainerID())) {
- if (containerMap.remove(c.getContainerID()) != null) {
- containerWithMinReplicas.getAndAdd(1);
- getSafeModeMetrics()
- .incCurrentContainersWithOneReplicaReportedCount();
+ if (ratisContainerMap.containsKey(containerID)) {
+ ratisContainerDNsMap.computeIfAbsent(containerID, key ->
Sets.newHashSet());
+ ratisContainerDNsMap.get(containerID).add(datanodeUUID);
+ if (!reportedConatinerIDSet.contains(containerID)) {
+ Set<UUID> uuids = ratisContainerDNsMap.get(containerID);
+ if (uuids != null && uuids.size() >= 1) {
+ ratisContainerWithMinReplicas.getAndAdd(1);
+ reportedConatinerIDSet.add(containerID);
+ getSafeModeMetrics()
+ .incCurrentContainersWithOneReplicaReportedCount();
+ }
+ }
+ }
Review Comment:
@siddhantsangwan Can you help review this PR again? Thank you very much!
I improved some of the code, made it less repetitive, and added some
comments.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]