This is an automated email from the ASF dual-hosted git repository.
sumitagrawl pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 9e0402b0043 HDDS-15238. ContainerSafeModeRule containers list refresh
with only removing deleted containers (#10249)
9e0402b0043 is described below
commit 9e0402b004397474ca3ab2beac2adf514c765fca
Author: Sumit Agrawal <[email protected]>
AuthorDate: Fri May 15 15:11:46 2026 +0530
HDDS-15238. ContainerSafeModeRule containers list refresh with only
removing deleted containers (#10249)
---
.../safemode/AbstractContainerSafeModeRule.java | 24 ++++++++-
.../AbstractContainerSafeModeRuleTest.java | 61 +++++++++++++++-------
2 files changed, 66 insertions(+), 19 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/AbstractContainerSafeModeRule.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/AbstractContainerSafeModeRule.java
index 09480009455..a5e3a4eed0f 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/AbstractContainerSafeModeRule.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/AbstractContainerSafeModeRule.java
@@ -24,6 +24,7 @@
import com.google.common.base.Preconditions;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
@@ -80,6 +81,27 @@ protected void initializeRule() {
totalContainers.set(containers.size());
final long cutOff = (long) Math.ceil(getTotalNumberOfContainers() *
getSafeModeCutoff());
getSafeModeMetrics().setNumContainerReportedThreshold(getContainerType(),
cutOff);
+ SCMSafeModeManager.getLogger().info("Initialized {} Containers threshold
count to {}.", getContainerType(), cutOff);
+ }
+
+ protected void reinitializeRule() {
+ // Remove closed containers that are moved to deleted state as DN will not
report those containers during
+ // registration. Update totalContainers, cutoff and threshold based on
reduced containers.
+ // Since ContainerSafeModeRule is updated with container list notified
during DN registration only,
+ // So its not required to add newly created container after DN
registration.
+ int oldContainerCount = containers.size();
+ Set<ContainerID> containerInfoSet =
containerManager.getContainers(getContainerType()).stream()
+ .filter(this::isClosed)
+ .filter(c -> c.getNumberOfKeys() > 0)
+ .filter(c ->
containers.containsKey(ContainerID.valueOf(c.getContainerID())))
+ .map(c -> ContainerID.valueOf(c.getContainerID()))
+ .collect(Collectors.toSet());
+ // remove deleted containers from containers list
+ containers.keySet().removeIf(c -> !containerInfoSet.contains(c));
+ // update new total with reducing removed containers
+ totalContainers.set(totalContainers.get() - (oldContainerCount -
containers.size()));
+ final long cutOff = (long) Math.ceil(getTotalNumberOfContainers() *
getSafeModeCutoff());
+ getSafeModeMetrics().setNumContainerReportedThreshold(getContainerType(),
cutOff);
SCMSafeModeManager.getLogger().info("Refreshed {} Containers threshold
count to {}.", getContainerType(), cutOff);
}
@@ -136,7 +158,7 @@ public double getCurrentContainerThreshold() {
@Override
public synchronized void refresh(boolean forceRefresh) {
if (forceRefresh || !validate()) {
- initializeRule();
+ reinitializeRule();
}
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/AbstractContainerSafeModeRuleTest.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/AbstractContainerSafeModeRuleTest.java
index 7bfdecc7196..77485e722b9 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/AbstractContainerSafeModeRuleTest.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/AbstractContainerSafeModeRuleTest.java
@@ -50,14 +50,17 @@
*/
public abstract class AbstractContainerSafeModeRuleTest {
private List<ContainerInfo> containers;
- private AbstractContainerSafeModeRule rule;
+ private SCMSafeModeManager safeModeManager;
+ private ConfigurationSource conf;
+ private ContainerManager containerManager;
+ private EventQueue eventQueue;
@BeforeEach
public void setup() throws ContainerNotFoundException {
- final ContainerManager containerManager = mock(ContainerManager.class);
- final ConfigurationSource conf = mock(ConfigurationSource.class);
- final EventQueue eventQueue = mock(EventQueue.class);
- final SCMSafeModeManager safeModeManager = mock(SCMSafeModeManager.class);
+ containerManager = mock(ContainerManager.class);
+ conf = mock(ConfigurationSource.class);
+ eventQueue = mock(EventQueue.class);
+ safeModeManager = mock(SCMSafeModeManager.class);
final SafeModeMetrics metrics = mock(SafeModeMetrics.class);
when(safeModeManager.getSafeModeMetrics()).thenReturn(metrics);
@@ -70,18 +73,25 @@ public void setup() throws ContainerNotFoundException {
.findFirst()
.orElseThrow(ContainerNotFoundException::new);
});
-
- rule = createRule(eventQueue, conf, containerManager, safeModeManager);
- rule.setValidateBasedOnReportProcessing(false);
}
@Test
public void testRefreshInitializeContainers() {
containers.add(mockContainer(LifeCycleState.OPEN, 1L));
containers.add(mockContainer(LifeCycleState.CLOSED, 2L));
+ containers.add(mockContainer(LifeCycleState.CLOSED, 8L));
+ AbstractContainerSafeModeRule rule = createRule(eventQueue, conf,
containerManager, safeModeManager);
+ rule.setValidateBasedOnReportProcessing(false);
+ assertEquals(2, rule.getTotalNumberOfContainers(), "Total number of
containers should be 2");
+ containers.add(mockContainer(LifeCycleState.CLOSED, 2L));
+ containers.add(mockContainer(LifeCycleState.OPEN, 3L));
+ containers.add(mockContainer(LifeCycleState.CLOSED, 4L));
+ containers.removeIf(c -> c.containerID().equals(ContainerID.valueOf(8L)));
+ containers.add(mockContainer(LifeCycleState.DELETED, 8L));
rule.refresh(true);
assertEquals(0.0, rule.getCurrentContainerThreshold());
+ assertEquals(1, rule.getTotalNumberOfContainers(), "Total number of
containers should be 1 after delete");
}
@ParameterizedTest
@@ -89,7 +99,8 @@ public void testRefreshInitializeContainers() {
names = {"OPEN", "CLOSING", "QUASI_CLOSED", "CLOSED", "DELETING",
"DELETED", "RECOVERING"})
public void testValidateReturnsTrueAndFalse(LifeCycleState state) {
containers.add(mockContainer(state, 1L));
- rule.refresh(true);
+ AbstractContainerSafeModeRule rule = createRule(eventQueue, conf,
containerManager, safeModeManager);
+ rule.setValidateBasedOnReportProcessing(false);
boolean expected = state != LifeCycleState.QUASI_CLOSED && state !=
LifeCycleState.CLOSED;
assertEquals(expected, rule.validate());
@@ -99,7 +110,8 @@ public void testValidateReturnsTrueAndFalse(LifeCycleState
state) {
public void testProcessContainer() {
long containerId = 123L;
containers.add(mockContainer(LifeCycleState.CLOSED, containerId));
- rule.refresh(true);
+ AbstractContainerSafeModeRule rule = createRule(eventQueue, conf,
containerManager, safeModeManager);
+ rule.setValidateBasedOnReportProcessing(false);
assertEquals(0.0, rule.getCurrentContainerThreshold());
@@ -129,29 +141,42 @@ private NodeRegistrationContainerReport
getNewContainerReport(long containerID)
@Test
public void testAllContainersClosed() {
+ containers.add(mockContainer(LifeCycleState.CLOSED, 1L));
+ AbstractContainerSafeModeRule rule = createRule(eventQueue, conf,
containerManager, safeModeManager);
+ rule.setValidateBasedOnReportProcessing(false);
containers.add(mockContainer(LifeCycleState.CLOSED, 11L));
containers.add(mockContainer(LifeCycleState.CLOSED, 32L));
rule.refresh(true);
assertEquals(0.0, rule.getCurrentContainerThreshold(), "Threshold should
be 0.0 when all containers are closed");
assertFalse(rule.validate(), "Validate should return false when all
containers are closed");
+ assertEquals(1, rule.getTotalNumberOfContainers(), "Total number of
containers should be 1 even after refresh");
}
@Test
public void testAllContainersOpen() {
containers.add(mockContainer(LifeCycleState.OPEN, 11L));
containers.add(mockContainer(LifeCycleState.OPEN, 32L));
- rule.refresh(true);
+ AbstractContainerSafeModeRule rule = createRule(eventQueue, conf,
containerManager, safeModeManager);
+ rule.setValidateBasedOnReportProcessing(false);
assertEquals(1.0, rule.getCurrentContainerThreshold(), "Threshold should
be 1.0 when all containers are open");
assertTrue(rule.validate(), "Validate should return true when all
containers are open");
+
+ containers.add(mockContainer(LifeCycleState.OPEN, 11L));
+ containers.add(mockContainer(LifeCycleState.OPEN, 32L));
+ rule.refresh(true);
+
+ assertEquals(1.0, rule.getCurrentContainerThreshold(), "Threshold should
be 1.0 after refresh also");
+ assertTrue(rule.validate(), "Validate should return true when all
containers are open");
}
@Test
public void testDuplicateContainerIdsInReports() {
long containerId = 42L;
containers.add(mockContainer(LifeCycleState.OPEN, containerId));
- rule.refresh(true);
+ AbstractContainerSafeModeRule rule = createRule(eventQueue, conf,
containerManager, safeModeManager);
+ rule.setValidateBasedOnReportProcessing(false);
ContainerReplicaProto replica = mock(ContainerReplicaProto.class);
ContainerReportsProto containerReport = mock(ContainerReportsProto.class);
@@ -172,10 +197,10 @@ public void testDuplicateContainerIdsInReports() {
@Test
public void testValidateBasedOnReportProcessingTrue() {
- rule.setValidateBasedOnReportProcessing(true);
long containerId = 1L;
containers.add(mockContainer(LifeCycleState.OPEN, containerId));
- rule.refresh(true);
+ AbstractContainerSafeModeRule rule = createRule(eventQueue, conf,
containerManager, safeModeManager);
+ rule.setValidateBasedOnReportProcessing(true);
ContainerReplicaProto replica = mock(ContainerReplicaProto.class);
ContainerReportsProto reportsProto = mock(ContainerReportsProto.class);
@@ -196,10 +221,10 @@ public void testValidateBasedOnReportProcessingTrue() {
protected abstract ReplicationType getReplicationType();
protected abstract AbstractContainerSafeModeRule createRule(
- EventQueue eventQueue,
- ConfigurationSource conf,
- ContainerManager containerManager,
- SCMSafeModeManager safeModeManager
+ EventQueue eventQueueParam,
+ ConfigurationSource confParam,
+ ContainerManager containerManagerParam,
+ SCMSafeModeManager safeModeManagerParam
);
protected abstract ContainerInfo mockContainer(LifeCycleState state, long
containerID);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]