This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 8f6222e756 HDDS-12949. Cleanup SCMSafeModeManager (#8390)
8f6222e756 is described below
commit 8f6222e756f31d005874401c4b073571e75129cc
Author: Nandakumar Vadivelu <[email protected]>
AuthorDate: Sun May 11 16:06:57 2025 +0530
HDDS-12949. Cleanup SCMSafeModeManager (#8390)
---
.../org/apache/hadoop/hdds/scm/ha/SCMContext.java | 2 +-
.../hdds/scm/safemode/SCMSafeModeManager.java | 300 +++++++--------------
.../hadoop/hdds/scm/safemode/SafeModeExitRule.java | 4 +-
.../hadoop/hdds/scm/safemode/SafeModeMetrics.java | 14 +-
.../hdds/scm/safemode/SafeModeRuleFactory.java | 8 +
.../hdds/scm/server/StorageContainerManager.java | 9 +-
.../hadoop/hdds/scm/block/TestBlockManager.java | 14 +-
.../hdds/scm/ha/TestBackgroundSCMService.java | 2 +-
.../apache/hadoop/hdds/scm/ha/TestSCMContext.java | 4 +-
.../hadoop/hdds/scm/ha/TestSCMServiceManager.java | 8 +-
.../hdds/scm/pipeline/TestPipelineManagerImpl.java | 18 +-
.../scm/safemode/TestDataNodeSafeModeRule.java | 10 +-
.../safemode/TestHealthyPipelineSafeModeRule.java | 31 ++-
.../TestOneReplicaPipelineSafeModeRule.java | 8 +-
.../hdds/scm/safemode/TestSCMSafeModeManager.java | 104 ++++---
.../safemode/TestSCMSafeModeWithPipelineRules.java | 20 +-
.../apache/hadoop/ozone/MiniOzoneClusterImpl.java | 5 +-
.../hadoop/ozone/MiniOzoneHAClusterImpl.java | 5 +-
18 files changed, 231 insertions(+), 335 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java
index 50aafe189e..eca34e6b6e 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java
@@ -72,7 +72,7 @@ public final class SCMContext {
private SCMContext(Builder b) {
isLeader = b.isLeader;
term = b.term;
- safeModeStatus = new SafeModeStatus(b.isInSafeMode, b.isPreCheckComplete);
+ safeModeStatus = SafeModeStatus.of(b.isInSafeMode, b.isPreCheckComplete);
finalizationCheckpoint = b.finalizationCheckpoint;
scm = b.scm;
threadNamePrefix = b.threadNamePrefix;
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java
index 248103a48e..e30b103ecf 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java
@@ -17,24 +17,22 @@
package org.apache.hadoop.hdds.scm.safemode;
-import com.google.common.annotations.VisibleForTesting;
+import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED;
+import static
org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED_DEFAULT;
+
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
-import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.scm.container.ContainerManager;
-import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.ha.SCMService.Event;
import org.apache.hadoop.hdds.scm.ha.SCMServiceManager;
import org.apache.hadoop.hdds.scm.node.NodeManager;
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
-import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.hdds.server.events.EventQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -42,124 +40,87 @@
/**
* StorageContainerManager enters safe mode on startup to allow system to
* reach a stable state before becoming fully functional. SCM will wait
- * for certain resources to be reported before coming out of safe mode.
- *
- * SafeModeExitRule defines format to define new rules which must be satisfied
- * to exit Safe mode.
+ * for certain resources to be reported before coming out of safe mode.<p>
*
- * Current SafeMode rules:
- * 1. ContainerSafeModeRule:
- * On every new datanode registration, it fires
- * {@link SCMEvents#NODE_REGISTRATION_CONT_REPORT}. This rule handles this
- * event. This rule process this report, increment the
- * containerWithMinReplicas count when this reported replica is in the
- * containerMap. Then validates if cutoff threshold for containers is meet.
+ * Set of {@link SafeModeExitRule} are defined to verify if the required
+ * resources are reported, so that SCM can come out of safemode.<p>
*
- * 2. DatanodeSafeModeRule:
- * On every new datanode registration, it fires
- * {@link SCMEvents#NODE_REGISTRATION_CONT_REPORT}. This rule handles this
- * event. This rule process this report, and check if this is new node, add
- * to its reported node list. Then validate it cutoff threshold for minimum
- * number of datanode registered is met or not.
+ * There are two stages in safemode exit,
+ * <ul>
+ * <li>pre-check complete</li>
+ * <li>safemode exit</li>
+ * </ul>
+ * <br>
+ * Each {@link SafeModeExitRule} can be configured to be part of either
+ * {@code pre-check}, {@code safemode} or both.<p>
*
- * 3. HealthyPipelineSafeModeRule:
- * Once the PipelineReportHandler processes the
- * {@link SCMEvents#PIPELINE_REPORT}, it fires
- * {@link SCMEvents#OPEN_PIPELINE}. This rule handles this
- * event. This rule processes this report, and check if pipeline is healthy
- * and increments current healthy pipeline count. Then validate it cutoff
- * threshold for healthy pipeline is met or not.
- *
- * 4. OneReplicaPipelineSafeModeRule:
- * Once the PipelineReportHandler processes the
- * {@link SCMEvents#PIPELINE_REPORT}, it fires
- * {@link SCMEvents#OPEN_PIPELINE}. This rule handles this
- * event. This rule processes this report, and add the reported pipeline to
- * reported pipeline set. Then validate it cutoff threshold for one replica
- * per pipeline is met or not.
+ * <i>Note: The Safemode logic can be completely disabled using
+ * {@link org.apache.hadoop.hdds.HddsConfigKeys#HDDS_SCM_SAFEMODE_ENABLED}
property</i>
+ * <p>
*
+ * @see SafeModeExitRule
+ * @see DataNodeSafeModeRule
+ * @see HealthyPipelineSafeModeRule
+ * @see OneReplicaPipelineSafeModeRule
+ * @see RatisContainerSafeModeRule
+ * @see ECContainerSafeModeRule
*/
public class SCMSafeModeManager implements SafeModeManager {
- private static final Logger LOG =
- LoggerFactory.getLogger(SCMSafeModeManager.class);
- private final boolean isSafeModeEnabled;
- private AtomicBoolean inSafeMode = new AtomicBoolean(true);
- private AtomicBoolean preCheckComplete = new AtomicBoolean(false);
- private AtomicBoolean forceExitSafeMode = new AtomicBoolean(false);
-
- private Map<String, SafeModeExitRule> exitRules = new HashMap<>(1);
- private Set<String> preCheckRules = new HashSet<>(1);
- private ConfigurationSource config;
- private static final String RATIS_CONTAINER_EXIT_RULE =
"RatisContainerSafeModeRule";
- private static final String EC_CONTAINER_EXIT_RULE =
"ECContainerSafeModeRule";
- private static final String DN_EXIT_RULE = "DataNodeSafeModeRule";
- private static final String HEALTHY_PIPELINE_EXIT_RULE =
- "HealthyPipelineSafeModeRule";
- private static final String ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE
=
- "AtleastOneDatanodeReportedRule";
-
- private Set<String> validatedRules = new HashSet<>();
- private Set<String> validatedPreCheckRules = new HashSet<>(1);
-
- private final EventQueue eventPublisher;
+ private static final Logger LOG =
LoggerFactory.getLogger(SCMSafeModeManager.class);
+
+ private final AtomicBoolean inSafeMode = new AtomicBoolean(true);
+ private final AtomicBoolean preCheckComplete = new AtomicBoolean(false);
+ private final AtomicBoolean forceExitSafeMode = new AtomicBoolean(false);
+ private final Map<String, SafeModeExitRule<?>> exitRules = new HashMap<>();
+ private final Set<String> preCheckRules = new HashSet<>();
+ private final Set<String> validatedRules = new HashSet<>();
+ private final Set<String> validatedPreCheckRules = new HashSet<>();
+
private final SCMServiceManager serviceManager;
private final SCMContext scmContext;
-
private final SafeModeMetrics safeModeMetrics;
- public SCMSafeModeManager(ConfigurationSource conf,
- ContainerManager containerManager, PipelineManager
pipelineManager,
- NodeManager nodeManager, EventQueue eventQueue,
- SCMServiceManager serviceManager, SCMContext scmContext) {
- this.config = conf;
- this.eventPublisher = eventQueue;
+ public SCMSafeModeManager(final ConfigurationSource conf,
+ final NodeManager nodeManager,
+ final PipelineManager pipelineManager,
+ final ContainerManager containerManager,
+ final SCMServiceManager serviceManager,
+ final EventQueue eventQueue,
+ final SCMContext scmContext) {
this.serviceManager = serviceManager;
this.scmContext = scmContext;
- this.isSafeModeEnabled = conf.getBoolean(
- HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED,
- HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED_DEFAULT);
-
- if (isSafeModeEnabled) {
- this.safeModeMetrics = SafeModeMetrics.create();
-
- // TODO: Remove the cyclic ("this") dependency (HDDS-11797)
- SafeModeRuleFactory.initialize(config, scmContext, eventQueue,
- this, pipelineManager, containerManager, nodeManager);
- SafeModeRuleFactory factory = SafeModeRuleFactory.getInstance();
+ this.safeModeMetrics = SafeModeMetrics.create();
- exitRules = factory.getSafeModeRules().stream().collect(
- Collectors.toMap(SafeModeExitRule::getRuleName, rule -> rule));
+ // TODO: Remove the cyclic ("this") dependency (HDDS-11797)
+ SafeModeRuleFactory.initialize(conf, scmContext, eventQueue, this,
+ pipelineManager, containerManager, nodeManager);
+ SafeModeRuleFactory factory = SafeModeRuleFactory.getInstance();
+ factory.getSafeModeRules().forEach(rule ->
exitRules.put(rule.getRuleName(), rule));
+ factory.getPreCheckRules().forEach(rule ->
preCheckRules.add(rule.getRuleName()));
- preCheckRules = factory.getPreCheckRules().stream()
- .map(SafeModeExitRule::getRuleName).collect(Collectors.toSet());
- } else {
- this.safeModeMetrics = null;
- exitSafeMode(eventQueue, true);
+ final boolean isSafeModeEnabled =
conf.getBoolean(HDDS_SCM_SAFEMODE_ENABLED, HDDS_SCM_SAFEMODE_ENABLED_DEFAULT);
+ if (!isSafeModeEnabled) {
+ LOG.info("Safemode is disabled, skipping Safemode rule validation and
force exiting Safemode.");
+ exitSafeMode(true);
}
}
+ public void start() {
+ emitSafeModeStatus();
+ }
+
public void stop() {
- if (isSafeModeEnabled) {
- this.safeModeMetrics.unRegister();
- }
+ safeModeMetrics.unRegister();
}
public SafeModeMetrics getSafeModeMetrics() {
return safeModeMetrics;
}
- /**
- * Emit Safe mode status.
- */
- @VisibleForTesting
- public void emitSafeModeStatus() {
- SafeModeStatus safeModeStatus =
- new SafeModeStatus(getInSafeMode(), getPreCheckComplete());
-
- safeModeStatus.setForceExitSafeMode(isForceExitSafeMode());
-
- // update SCMContext
+ private void emitSafeModeStatus() {
+ final SafeModeStatus safeModeStatus = SafeModeStatus.of(
+ getInSafeMode(), getPreCheckComplete(), isForceExitSafeMode());
scmContext.updateSafeModeStatus(safeModeStatus);
// notify SCMServiceManager
@@ -173,69 +134,55 @@ public void emitSafeModeStatus() {
}
}
- public synchronized void validateSafeModeExitRules(String ruleName,
- EventPublisher eventQueue) {
-
- if (exitRules.get(ruleName) != null) {
- boolean added = validatedRules.add(ruleName);
+ public synchronized void validateSafeModeExitRules(String ruleName) {
+ if (exitRules.containsKey(ruleName)) {
+ validatedRules.add(ruleName);
+ LOG.info("{} rule is successfully validated", ruleName);
if (preCheckRules.contains(ruleName)) {
validatedPreCheckRules.add(ruleName);
}
- if (added) {
- LOG.info("{} rule is successfully validated", ruleName);
- }
} else {
// This should never happen
LOG.error("No Such Exit rule {}", ruleName);
}
if (!getPreCheckComplete()) {
- if (validatedPreCheckRules.size() == preCheckRules.size()) {
- completePreCheck(eventQueue);
- }
+ completePreCheck();
}
if (validatedRules.size() == exitRules.size()) {
// All rules are satisfied, we can exit safe mode.
LOG.info("ScmSafeModeManager, all rules are successfully validated");
- exitSafeMode(eventQueue, false);
+ exitSafeMode(false);
}
-
}
/**
* When all the precheck rules have been validated, set preCheckComplete to
* true and then emit the safemode status so any listeners get notified of
* the safemode state change.
- * @param eventQueue
*/
- @VisibleForTesting
- public void completePreCheck(EventPublisher eventQueue) {
- LOG.info("All SCM safe mode pre check rules have passed");
- setPreCheckComplete(true);
- emitSafeModeStatus();
+ private void completePreCheck() {
+ if (validatedPreCheckRules.size() == preCheckRules.size()) {
+ LOG.info("All SCM safe mode pre check rules have passed");
+ preCheckComplete.set(true);
+ emitSafeModeStatus();
+ }
}
- /**
- * Exit safe mode. It does following actions:
- * 1. Set safe mode status to false.
- * 2. Emits START_REPLICATION for ReplicationManager.
- * 3. Cleanup resources.
- * 4. Emit safe mode status.
- * @param eventQueue
- * @param force
- */
- @VisibleForTesting
- public void exitSafeMode(EventPublisher eventQueue, boolean force) {
+ public void forceExitSafeMode() {
+ exitSafeMode(true);
+ }
+
+ private void exitSafeMode(boolean force) {
LOG.info("SCM exiting safe mode.");
- // If safemode is exiting, then pre check must also have passed so
- // set it to true.
- setPreCheckComplete(true);
- setInSafeMode(false);
- setForceExitSafeMode(force);
+ // If safemode is exiting, then pre-check must also have passed.
+ preCheckComplete.set(true);
+ inSafeMode.set(false);
+ forceExitSafeMode.set(force);
// TODO: Remove handler registration as there is no need to listen to
- // register events anymore.
+ // register events anymore.
emitSafeModeStatus();
}
@@ -262,7 +209,7 @@ public void refreshAndValidate() {
exitRules.values().forEach(rule -> {
rule.refresh(false);
if (rule.validate() && inSafeMode.get()) {
- validateSafeModeExitRules(rule.getRuleName(), eventPublisher);
+ validateSafeModeExitRules(rule.getRuleName());
rule.cleanup();
}
});
@@ -271,20 +218,13 @@ public void refreshAndValidate() {
@Override
public boolean getInSafeMode() {
- if (!isSafeModeEnabled) {
- return false;
- }
return inSafeMode.get();
}
- /**
- * Get the safe mode status of all rules.
- *
- * @return map of rule statuses.
- */
+ /** Get the safe mode status of all rules. */
public Map<String, Pair<Boolean, String>> getRuleStatus() {
Map<String, Pair<Boolean, String>> map = new HashMap<>();
- for (SafeModeExitRule exitRule : exitRules.values()) {
+ for (SafeModeExitRule<?> exitRule : exitRules.values()) {
map.put(exitRule.getRuleName(),
Pair.of(exitRule.validate(), exitRule.getStatusText()));
}
@@ -295,80 +235,42 @@ public boolean getPreCheckComplete() {
return preCheckComplete.get();
}
- /**
- * Set safe mode status.
- */
- public void setInSafeMode(boolean inSafeMode) {
- this.inSafeMode.set(inSafeMode);
- }
-
- public void setPreCheckComplete(boolean newState) {
- this.preCheckComplete.set(newState);
- }
-
public boolean isForceExitSafeMode() {
return forceExitSafeMode.get();
}
- public void setForceExitSafeMode(boolean forceExitSafeMode) {
- this.forceExitSafeMode.set(forceExitSafeMode);
- }
-
public static Logger getLogger() {
return LOG;
}
- @VisibleForTesting
+ // TODO: This will be removed by HDDS-12955
public double getCurrentContainerThreshold() {
- return ((RatisContainerSafeModeRule)
exitRules.get(RATIS_CONTAINER_EXIT_RULE))
- .getCurrentContainerThreshold();
- }
-
- @VisibleForTesting
- public double getCurrentECContainerThreshold() {
- return ((ECContainerSafeModeRule) exitRules.get(EC_CONTAINER_EXIT_RULE))
+ return ((RatisContainerSafeModeRule)
exitRules.get("RatisContainerSafeModeRule"))
.getCurrentContainerThreshold();
}
- @VisibleForTesting
- public RatisContainerSafeModeRule getRatisContainerSafeModeRule() {
- return (RatisContainerSafeModeRule)
exitRules.get(RATIS_CONTAINER_EXIT_RULE);
- }
-
- @VisibleForTesting
- public ECContainerSafeModeRule getECContainerSafeModeRule() {
- return (ECContainerSafeModeRule) exitRules.get(EC_CONTAINER_EXIT_RULE);
- }
-
- @VisibleForTesting
- public HealthyPipelineSafeModeRule getHealthyPipelineSafeModeRule() {
- return (HealthyPipelineSafeModeRule)
- exitRules.get(HEALTHY_PIPELINE_EXIT_RULE);
- }
-
- @VisibleForTesting
- public OneReplicaPipelineSafeModeRule getOneReplicaPipelineSafeModeRule() {
- return (OneReplicaPipelineSafeModeRule)
- exitRules.get(ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE);
- }
-
- public DataNodeSafeModeRule getDataNodeSafeModeRule() {
- return (DataNodeSafeModeRule) exitRules.get(DN_EXIT_RULE);
- }
-
/**
* Class used during SafeMode status event.
*/
- public static class SafeModeStatus {
+ public static final class SafeModeStatus {
+ // TODO: forceExitSafeMode value is not used anywhere, check and remove
(HDDS-12957).
private final boolean safeModeStatus;
private final boolean preCheckPassed;
+ private final boolean forceExitSafeMode;
- private boolean forceExitSafeMode;
-
- public SafeModeStatus(boolean safeModeState, boolean preCheckPassed) {
+ private SafeModeStatus(boolean safeModeState, boolean preCheckPassed,
boolean forceExitSafeMode) {
this.safeModeStatus = safeModeState;
this.preCheckPassed = preCheckPassed;
+ this.forceExitSafeMode = forceExitSafeMode;
+ }
+
+ public static SafeModeStatus of(boolean safeMode, boolean preCheck) {
+ return of(safeMode, preCheck, false);
+ }
+
+ public static SafeModeStatus of(boolean safeMode, boolean preCheck,
boolean forceExit) {
+ return new SafeModeStatus(safeMode, preCheck, forceExit);
}
public boolean isInSafeMode() {
@@ -379,10 +281,6 @@ public boolean isPreCheckComplete() {
return preCheckPassed;
}
- public void setForceExitSafeMode(boolean forceExitSafeMode) {
- this.forceExitSafeMode = forceExitSafeMode;
- }
-
public boolean isForceExitSafeMode() {
return forceExitSafeMode;
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeExitRule.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeExitRule.java
index 8b0456e68c..fb6f10b6f6 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeExitRule.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeExitRule.java
@@ -99,7 +99,7 @@ public final void onMessage(T report, EventPublisher
publisher) {
if (scmInSafeMode()) {
if (validate()) {
- safeModeManager.validateSafeModeExitRules(ruleName, publisher);
+ safeModeManager.validateSafeModeExitRules(ruleName);
cleanup();
return;
}
@@ -107,7 +107,7 @@ public final void onMessage(T report, EventPublisher
publisher) {
process(report);
if (validate()) {
- safeModeManager.validateSafeModeExitRules(ruleName, publisher);
+ safeModeManager.validateSafeModeExitRules(ruleName);
cleanup();
}
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java
index d103e1f1fc..d650c4056e 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java
@@ -25,11 +25,13 @@
/**
* This class is used for maintaining SafeMode metric information, which can
- * be used for monitoring during SCM startup when SCM is still in SafeMode.
+ * be used for monitoring during SCM startup when SCM is still in SafeMode.<p>
+ * The metrics from this class are valid iff
+ * {@link org.apache.hadoop.hdds.HddsConfigKeys#HDDS_SCM_SAFEMODE_ENABLED} is
+ * set to true and the SCM is still in SafeMode.
*/
public class SafeModeMetrics {
- private static final String SOURCE_NAME =
- SafeModeMetrics.class.getSimpleName();
+ private static final String SOURCE_NAME =
SafeModeMetrics.class.getSimpleName();
// These all values will be set to some values when safemode is enabled.
private @Metric MutableGaugeLong
@@ -50,10 +52,8 @@ public class SafeModeMetrics {
currentPipelinesWithAtleastOneReplicaReportedCount;
public static SafeModeMetrics create() {
- MetricsSystem ms = DefaultMetricsSystem.instance();
- return ms.register(SOURCE_NAME,
- "SCM Safemode Metrics",
- new SafeModeMetrics());
+ final MetricsSystem ms = DefaultMetricsSystem.instance();
+ return ms.register(SOURCE_NAME, "SCM Safemode Metrics", new
SafeModeMetrics());
}
public void setNumHealthyPipelinesThreshold(long val) {
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeRuleFactory.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeRuleFactory.java
index 8d866d748a..7c6385307c 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeRuleFactory.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeRuleFactory.java
@@ -117,4 +117,12 @@ public List<SafeModeExitRule<?>> getSafeModeRules() {
public List<SafeModeExitRule<?>> getPreCheckRules() {
return preCheckRules;
}
+
+ public <T extends SafeModeExitRule<?>> T getSafeModeRule(Class<T> ruleClass)
{
+ return safeModeRules.stream()
+ .filter(r -> ruleClass.isAssignableFrom(r.getClass()))
+ .map(ruleClass::cast)
+ .findFirst()
+ .orElse(null);
+ }
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index 06c94394a7..7773a91fec 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -442,7 +442,7 @@ private StorageContainerManager(OzoneConfiguration conf,
containerBalancer = new ContainerBalancer(this);
// Emit initial safe mode status, as now handlers are registered.
- scmSafeModeManager.emitSafeModeStatus();
+ scmSafeModeManager.start();
scmHostName = HddsUtils.getHostName(conf);
registerMXBean();
@@ -831,9 +831,8 @@ private void initializeSystemManagers(OzoneConfiguration
conf,
if (configurator.getScmSafeModeManager() != null) {
scmSafeModeManager = configurator.getScmSafeModeManager();
} else {
- scmSafeModeManager = new SCMSafeModeManager(conf,
- containerManager, pipelineManager, scmNodeManager, eventQueue,
- serviceManager, scmContext);
+ scmSafeModeManager = new SCMSafeModeManager(conf, scmNodeManager,
pipelineManager,
+ containerManager, serviceManager, eventQueue, scmContext);
}
scmDecommissionManager = new NodeDecommissionManager(conf, scmNodeManager,
containerManager,
@@ -1971,7 +1970,7 @@ public SCMServiceManager getSCMServiceManager() {
* Force SCM out of safe mode.
*/
public boolean exitSafeMode() {
- scmSafeModeManager.exitSafeMode(eventQueue, true);
+ scmSafeModeManager.forceExitSafeMode();
return true;
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java
index 282b92554d..5ed9cefb7b 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java
@@ -17,6 +17,7 @@
package org.apache.hadoop.hdds.scm.block;
+import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED;
import static org.apache.hadoop.ozone.OzoneConsts.GB;
import static org.apache.hadoop.ozone.OzoneConsts.MB;
import static org.assertj.core.api.Assertions.assertThat;
@@ -115,7 +116,7 @@ void setUp(@TempDir File tempDir) throws Exception {
ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT,
ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT_DEFAULT);
-
+ conf.setBoolean(HDDS_SCM_SAFEMODE_ENABLED, false);
conf.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false);
conf.setTimeDuration(HddsConfigKeys.HDDS_PIPELINE_REPORT_INTERVAL, 5,
TimeUnit.SECONDS);
@@ -160,12 +161,7 @@ void setUp(@TempDir File tempDir) throws Exception {
new ContainerReplicaPendingOps(
Clock.system(ZoneId.systemDefault())));
SCMSafeModeManager safeModeManager = new SCMSafeModeManager(conf,
- containerManager, pipelineManager, nodeManager, eventQueue,
serviceManager, scmContext) {
- @Override
- public void emitSafeModeStatus() {
- // skip
- }
- };
+ nodeManager, pipelineManager, containerManager, serviceManager,
eventQueue, scmContext);
SCMConfigurator configurator = new SCMConfigurator();
configurator.setScmNodeManager(nodeManager);
configurator.setPipelineManager(pipelineManager);
@@ -190,7 +186,7 @@ public void emitSafeModeStatus() {
replicationConfig = RatisReplicationConfig
.getInstance(ReplicationFactor.THREE);
- scm.getScmContext().updateSafeModeStatus(new SafeModeStatus(false, true));
+ scm.getScmContext().updateSafeModeStatus(SafeModeStatus.of(false, true));
}
@AfterEach
@@ -455,7 +451,7 @@ public void testAllocateOversizedBlock() {
@Test
public void testAllocateBlockFailureInSafeMode() {
scm.getScmContext().updateSafeModeStatus(
- new SCMSafeModeManager.SafeModeStatus(true, true));
+ SCMSafeModeManager.SafeModeStatus.of(true, true));
// Test1: In safe mode expect an SCMException.
Throwable t = assertThrows(IOException.class, () ->
blockManager.allocateBlock(DEFAULT_BLOCK_SIZE,
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestBackgroundSCMService.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestBackgroundSCMService.java
index 86894eeb42..4a35daab0a 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestBackgroundSCMService.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestBackgroundSCMService.java
@@ -92,7 +92,7 @@ public void testNotifyStatusChanged() {
// go into safe mode, RUNNING -> PAUSING
scmContext.updateSafeModeStatus(
- new SCMSafeModeManager.SafeModeStatus(true, true));
+ SCMSafeModeManager.SafeModeStatus.of(true, true));
backgroundSCMService.notifyStatusChanged();
assertFalse(backgroundSCMService.shouldRun());
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMContext.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMContext.java
index 666d2034da..a1d34c82c3 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMContext.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMContext.java
@@ -62,12 +62,12 @@ public void testSafeModeOperations() {
assertFalse(scmContext.isPreCheckComplete());
// in safe mode, pass preCheck
- scmContext.updateSafeModeStatus(new SafeModeStatus(true, true));
+ scmContext.updateSafeModeStatus(SafeModeStatus.of(true, true));
assertTrue(scmContext.isInSafeMode());
assertTrue(scmContext.isPreCheckComplete());
// out of safe mode
- scmContext.updateSafeModeStatus(new SafeModeStatus(false, true));
+ scmContext.updateSafeModeStatus(SafeModeStatus.of(false, true));
assertFalse(scmContext.isInSafeMode());
assertTrue(scmContext.isPreCheckComplete());
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMServiceManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMServiceManager.java
index 04ceebd6c3..67ff3ee80c 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMServiceManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMServiceManager.java
@@ -76,7 +76,7 @@ public void stop() {
// PAUSING when out of safe mode.
scmContext.updateSafeModeStatus(
- new SCMSafeModeManager.SafeModeStatus(false, true));
+ SCMSafeModeManager.SafeModeStatus.of(false, true));
serviceManager.notifyStatusChanged();
assertFalse(serviceRunWhenLeader.shouldRun());
@@ -87,7 +87,7 @@ public void stop() {
// RUNNING when in safe mode.
scmContext.updateSafeModeStatus(
- new SCMSafeModeManager.SafeModeStatus(true, false));
+ SCMSafeModeManager.SafeModeStatus.of(true, false));
serviceManager.notifyStatusChanged();
assertTrue(serviceRunWhenLeader.shouldRun());
@@ -146,7 +146,7 @@ public void stop() {
// PAUSING when out of safe mode.
scmContext.updateSafeModeStatus(
- new SCMSafeModeManager.SafeModeStatus(false, true));
+ SCMSafeModeManager.SafeModeStatus.of(false, true));
serviceManager.notifyStatusChanged();
assertFalse(serviceRunWhenLeaderAndOutOfSafeMode.shouldRun());
@@ -157,7 +157,7 @@ public void stop() {
// PAUSING when in safe mode.
scmContext.updateSafeModeStatus(
- new SCMSafeModeManager.SafeModeStatus(true, false));
+ SCMSafeModeManager.SafeModeStatus.of(true, false));
serviceManager.notifyStatusChanged();
assertFalse(serviceRunWhenLeaderAndOutOfSafeMode.shouldRun());
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java
index 597725dfc8..8b20e279a0 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java
@@ -359,8 +359,8 @@ public void testClosePipelineShouldFailOnFollower() throws
Exception {
public void testPipelineReport() throws Exception {
try (PipelineManagerImpl pipelineManager = createPipelineManager(true)) {
SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(conf,
- mock(ContainerManager.class), pipelineManager,
mock(NodeManager.class),
- new EventQueue(), serviceManager, scmContext);
+ mock(NodeManager.class), pipelineManager,
mock(ContainerManager.class),
+ serviceManager, new EventQueue(), scmContext);
Pipeline pipeline = pipelineManager
.createPipeline(RatisReplicationConfig
.getInstance(ReplicationFactor.THREE));
@@ -469,8 +469,8 @@ public void testPipelineOpenOnlyWhenLeaderReported() throws
Exception {
pipelineManager.getPipeline(pipeline.getId()).getPipelineState());
SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(new
OzoneConfiguration(),
- mock(ContainerManager.class), pipelineManager, mock(NodeManager.class),
- new EventQueue(), serviceManager, scmContext);
+ mock(NodeManager.class), pipelineManager, mock(ContainerManager.class),
+ serviceManager, new EventQueue(), scmContext);
PipelineReportHandler pipelineReportHandler =
new PipelineReportHandler(scmSafeModeManager, pipelineManager,
SCMContext.emptyContext(), conf);
@@ -613,7 +613,7 @@ public void testPipelineNotCreatedUntilSafeModePrecheck()
throws Exception {
TimeUnit.MILLISECONDS);
scmContext.updateSafeModeStatus(
- new SCMSafeModeManager.SafeModeStatus(true, false));
+ SCMSafeModeManager.SafeModeStatus.of(true, false));
PipelineManagerImpl pipelineManager = createPipelineManager(true);
assertThrows(IOException.class,
@@ -634,7 +634,7 @@ public void testPipelineNotCreatedUntilSafeModePrecheck()
throws Exception {
// Simulate safemode check exiting.
scmContext.updateSafeModeStatus(
- new SCMSafeModeManager.SafeModeStatus(true, true));
+ SCMSafeModeManager.SafeModeStatus.of(true, true));
GenericTestUtils.waitFor(() -> !pipelineManager.getPipelines().isEmpty(),
100, 10000);
pipelineManager.close();
@@ -650,20 +650,20 @@ public void testSafeModeUpdatedOnSafemodeExit() throws
Exception {
PipelineManagerImpl pipelineManager = createPipelineManager(true);
scmContext.updateSafeModeStatus(
- new SCMSafeModeManager.SafeModeStatus(true, false));
+ SCMSafeModeManager.SafeModeStatus.of(true, false));
assertTrue(pipelineManager.getSafeModeStatus());
assertFalse(pipelineManager.isPipelineCreationAllowed());
// First pass pre-check as true, but safemode still on
// Simulate safemode check exiting.
scmContext.updateSafeModeStatus(
- new SCMSafeModeManager.SafeModeStatus(true, true));
+ SCMSafeModeManager.SafeModeStatus.of(true, true));
assertTrue(pipelineManager.getSafeModeStatus());
assertTrue(pipelineManager.isPipelineCreationAllowed());
// Then also turn safemode off
scmContext.updateSafeModeStatus(
- new SCMSafeModeManager.SafeModeStatus(false, true));
+ SCMSafeModeManager.SafeModeStatus.of(false, true));
assertFalse(pipelineManager.getSafeModeStatus());
assertTrue(pipelineManager.isPipelineCreationAllowed());
pipelineManager.close();
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java
index 847da184fa..eab3f1cf2b 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java
@@ -76,13 +76,13 @@ private void setup(int requiredDns) throws Exception {
serviceManager = new SCMServiceManager();
scmContext = SCMContext.emptyContext();
- SCMSafeModeManager scmSafeModeManager =
- new SCMSafeModeManager(ozoneConfiguration, containerManager,
- null, nodeManager, eventQueue, serviceManager, scmContext);
+ SCMSafeModeManager scmSafeModeManager = new
SCMSafeModeManager(ozoneConfiguration,
+ nodeManager, null, containerManager, serviceManager, eventQueue,
scmContext);
+ scmSafeModeManager.start();
- rule = scmSafeModeManager.getDataNodeSafeModeRule();
+ rule =
SafeModeRuleFactory.getInstance().getSafeModeRule(DataNodeSafeModeRule.class);
assertNotNull(rule);
-
+
rule.setValidateBasedOnReportProcessing(true);
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java
index 8ffdba407f..9f1b30c51d 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java
@@ -94,12 +94,12 @@ public void testHealthyPipelineSafeModeRuleWithNoPipelines()
pipelineManager.getStateManager(), config);
pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS,
mockRatisProvider);
- SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
- config, containerManager, pipelineManager, nodeManager, eventQueue,
- serviceManager, scmContext);
+ SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(config,
+ nodeManager, pipelineManager, containerManager, serviceManager,
eventQueue, scmContext);
+ scmSafeModeManager.start();
- HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
- scmSafeModeManager.getHealthyPipelineSafeModeRule();
+ HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
SafeModeRuleFactory.getInstance()
+ .getSafeModeRule(HealthyPipelineSafeModeRule.class);
// This should be immediately satisfied, as no pipelines are there yet.
assertTrue(healthyPipelineSafeModeRule.validate());
@@ -172,12 +172,12 @@ public void
testHealthyPipelineSafeModeRuleWithPipelines() throws Exception {
pipeline3 = pipelineManager.getPipeline(pipeline3.getId());
MockRatisPipelineProvider.markPipelineHealthy(pipeline3);
- SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
- config, containerManager, pipelineManager, nodeManager, eventQueue,
- serviceManager, scmContext);
+ SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(config,
+ nodeManager, pipelineManager, containerManager, serviceManager,
eventQueue, scmContext);
+ scmSafeModeManager.start();
- HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
- scmSafeModeManager.getHealthyPipelineSafeModeRule();
+ HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
SafeModeRuleFactory.getInstance()
+ .getSafeModeRule(HealthyPipelineSafeModeRule.class);
// No datanodes have sent pipelinereport from datanode
assertFalse(healthyPipelineSafeModeRule.validate());
@@ -265,13 +265,12 @@ public void
testHealthyPipelineSafeModeRuleWithMixedPipelines()
pipeline3 = pipelineManager.getPipeline(pipeline3.getId());
MockRatisPipelineProvider.markPipelineHealthy(pipeline3);
- SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
- config, containerManager, pipelineManager, nodeManager, eventQueue,
- serviceManager, scmContext);
-
- HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
- scmSafeModeManager.getHealthyPipelineSafeModeRule();
+ SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(config,
+ nodeManager, pipelineManager, containerManager, serviceManager,
eventQueue, scmContext);
+ scmSafeModeManager.start();
+ HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
SafeModeRuleFactory.getInstance()
+ .getSafeModeRule(HealthyPipelineSafeModeRule.class);
// No pipeline event have sent to SCMSafemodeManager
assertFalse(healthyPipelineSafeModeRule.validate());
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestOneReplicaPipelineSafeModeRule.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestOneReplicaPipelineSafeModeRule.java
index 0314c76c69..3670b893b1 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestOneReplicaPipelineSafeModeRule.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestOneReplicaPipelineSafeModeRule.java
@@ -115,11 +115,11 @@ private void setup(int nodes, int
pipelineFactorThreeCount,
createPipelines(pipelineFactorOneCount,
HddsProtos.ReplicationFactor.ONE);
- SCMSafeModeManager scmSafeModeManager =
- new SCMSafeModeManager(ozoneConfiguration, containerManager,
- pipelineManager, mockNodeManager, eventQueue, serviceManager,
scmContext);
+ SCMSafeModeManager scmSafeModeManager = new
SCMSafeModeManager(ozoneConfiguration,
+ mockNodeManager, pipelineManager, containerManager, serviceManager,
eventQueue, scmContext);
+ scmSafeModeManager.start();
- rule = scmSafeModeManager.getOneReplicaPipelineSafeModeRule();
+ rule =
SafeModeRuleFactory.getInstance().getSafeModeRule(OneReplicaPipelineSafeModeRule.class);
}
@Test
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java
index b00f5b4ea5..3729ca11a4 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java
@@ -130,9 +130,9 @@ private void testSafeMode(int numContainers) throws
Exception {
}
ContainerManager containerManager = mock(ContainerManager.class);
when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers);
- scmSafeModeManager = new SCMSafeModeManager(
- config, containerManager, null, null, queue,
- serviceManager, scmContext);
+ scmSafeModeManager = new SCMSafeModeManager(config, null, null,
containerManager,
+ serviceManager, queue, scmContext);
+ scmSafeModeManager.start();
assertTrue(scmSafeModeManager.getInSafeMode());
validateRuleStatus("DatanodeSafeModeRule", "registered datanodes 0");
@@ -169,9 +169,9 @@ public void testSafeModeExitRule() throws Exception {
}
ContainerManager containerManager = mock(ContainerManager.class);
when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers);
- scmSafeModeManager = new SCMSafeModeManager(
- config, containerManager, null, null, queue,
- serviceManager, scmContext);
+ scmSafeModeManager = new SCMSafeModeManager(config, null, null,
containerManager,
+ serviceManager, queue, scmContext);
+ scmSafeModeManager.start();
long cutOff = (long) Math.ceil(numContainers * config.getDouble(
HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT,
@@ -235,8 +235,8 @@ public void
testHealthyPipelinePercentWithIncorrectValue(double healthyPercent,
ContainerManager containerManager = mock(ContainerManager.class);
when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers);
IllegalArgumentException exception =
assertThrows(IllegalArgumentException.class,
- () -> new SCMSafeModeManager(conf, containerManager,
- pipelineManager, mockNodeManager, queue, serviceManager,
scmContext));
+ () -> new SCMSafeModeManager(conf, mockNodeManager, pipelineManager,
containerManager,
+ serviceManager, queue, scmContext));
assertThat(exception).hasMessageEndingWith("value should be >= 0.0 and <=
1.0");
}
@@ -301,9 +301,9 @@ public void
testSafeModeExitRuleWithPipelineAvailabilityCheck(
ContainerManager containerManager = mock(ContainerManager.class);
when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers);
- scmSafeModeManager = new SCMSafeModeManager(
- conf, containerManager, pipelineManager, mockNodeManager, queue,
- serviceManager, scmContext);
+ scmSafeModeManager = new SCMSafeModeManager(conf, mockNodeManager,
pipelineManager,
+ containerManager, serviceManager, queue, scmContext);
+ scmSafeModeManager.start();
assertTrue(scmSafeModeManager.getInSafeMode());
if (healthyPipelinePercent > 0) {
@@ -317,12 +317,10 @@ public void
testSafeModeExitRuleWithPipelineAvailabilityCheck(
List<Pipeline> pipelines = pipelineManager.getPipelines();
- int healthyPipelineThresholdCount =
- scmSafeModeManager.getHealthyPipelineSafeModeRule()
- .getHealthyPipelineThresholdCount();
- int oneReplicaThresholdCount =
- scmSafeModeManager.getOneReplicaPipelineSafeModeRule()
- .getThresholdCount();
+ int healthyPipelineThresholdCount = SafeModeRuleFactory.getInstance().
+
getSafeModeRule(HealthyPipelineSafeModeRule.class).getHealthyPipelineThresholdCount();
+ int oneReplicaThresholdCount = SafeModeRuleFactory.getInstance().
+
getSafeModeRule(OneReplicaPipelineSafeModeRule.class).getThresholdCount();
assertEquals(healthyPipelineThresholdCount,
scmSafeModeManager.getSafeModeMetrics()
@@ -388,16 +386,16 @@ private void validateRuleStatus(String safeModeRule,
String stringToMatch) {
}
private void checkHealthy(int expectedCount) throws Exception {
- GenericTestUtils.waitFor(() -> scmSafeModeManager
- .getHealthyPipelineSafeModeRule()
- .getCurrentHealthyPipelineCount() == expectedCount,
+ final HealthyPipelineSafeModeRule pipelineRule =
SafeModeRuleFactory.getInstance()
+ .getSafeModeRule(HealthyPipelineSafeModeRule.class);
+ GenericTestUtils.waitFor(() ->
pipelineRule.getCurrentHealthyPipelineCount() == expectedCount,
100, 5000);
}
private void checkOpen(int expectedCount) throws Exception {
- GenericTestUtils.waitFor(() -> scmSafeModeManager
- .getOneReplicaPipelineSafeModeRule()
- .getCurrentReportedPipelineCount() == expectedCount,
+ final OneReplicaPipelineSafeModeRule pipelineRule =
SafeModeRuleFactory.getInstance()
+ .getSafeModeRule(OneReplicaPipelineSafeModeRule.class);
+ GenericTestUtils.waitFor(() ->
pipelineRule.getCurrentReportedPipelineCount() == expectedCount,
1000, 5000);
}
@@ -437,9 +435,8 @@ public void testDisableSafeMode() {
ContainerManager containerManager = mock(ContainerManager.class);
when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers);
NodeManager nodeManager = mock(SCMNodeManager.class);
- scmSafeModeManager = new SCMSafeModeManager(
- conf, containerManager, pipelineManager, nodeManager, queue,
- serviceManager, scmContext);
+ scmSafeModeManager = new SCMSafeModeManager(conf, nodeManager,
pipelineManager,
+ containerManager, serviceManager, queue, scmContext);
assertFalse(scmSafeModeManager.getInSafeMode());
}
@@ -479,8 +476,8 @@ public void testContainerSafeModeRule() throws Exception {
ContainerManager containerManager = mock(ContainerManager.class);
when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers);
- scmSafeModeManager = new SCMSafeModeManager(
- config, containerManager, null, null, queue, serviceManager,
scmContext);
+ scmSafeModeManager = new SCMSafeModeManager(config, null, null,
+ containerManager, serviceManager, queue, scmContext);
assertTrue(scmSafeModeManager.getInSafeMode());
@@ -555,9 +552,8 @@ public void testContainerSafeModeRuleEC(int data, int
parity) throws Exception {
scmMetadataStore.getContainerTable(),
new ContainerReplicaPendingOps(Clock.system(ZoneId.systemDefault())));
- scmSafeModeManager = new SCMSafeModeManager(
- config, containerManager, pipelineManager, nodeManager, queue,
- serviceManager, scmContext);
+ scmSafeModeManager = new SCMSafeModeManager(config, nodeManager,
pipelineManager,
+ containerManager, serviceManager, queue, scmContext);
assertTrue(scmSafeModeManager.getInSafeMode());
// Only 20 containers are involved in the calculation,
@@ -569,12 +565,12 @@ public void testContainerSafeModeRuleEC(int data, int
parity) throws Exception {
// the threshold will reach 100%.
testECContainerThreshold(containers.subList(10, 20), 1.0, data);
- ECContainerSafeModeRule ecContainerSafeModeRule =
- scmSafeModeManager.getECContainerSafeModeRule();
+ ECContainerSafeModeRule ecContainerSafeModeRule =
SafeModeRuleFactory.getInstance()
+ .getSafeModeRule(ECContainerSafeModeRule.class);
assertTrue(ecContainerSafeModeRule.validate());
- RatisContainerSafeModeRule ratisContainerSafeModeRule =
- scmSafeModeManager.getRatisContainerSafeModeRule();
+ RatisContainerSafeModeRule ratisContainerSafeModeRule =
SafeModeRuleFactory.getInstance()
+ .getSafeModeRule(RatisContainerSafeModeRule.class);
assertTrue(ratisContainerSafeModeRule.validate());
}
@@ -583,9 +579,9 @@ private void testSafeModeDataNodes(int numOfDns) throws
Exception {
conf.setInt(HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE, numOfDns);
ContainerManager containerManager = mock(ContainerManager.class);
when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers);
- scmSafeModeManager = new SCMSafeModeManager(
- conf, containerManager, null, null, queue,
- serviceManager, scmContext);
+ scmSafeModeManager = new SCMSafeModeManager(conf, null, null,
+ containerManager, serviceManager, queue, scmContext);
+ scmSafeModeManager.start();
// Assert SCM is in Safe mode.
assertTrue(scmSafeModeManager.getInSafeMode());
@@ -597,7 +593,8 @@ private void testSafeModeDataNodes(int numOfDns) throws
Exception {
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
nodeRegistrationContainerReport);
queue.fireEvent(SCMEvents.CONTAINER_REGISTRATION_REPORT,
nodeRegistrationContainerReport);
assertTrue(scmSafeModeManager.getInSafeMode());
- assertEquals(1, scmSafeModeManager.getCurrentContainerThreshold());
+ assertEquals(1, SafeModeRuleFactory.getInstance()
+
.getSafeModeRule(RatisContainerSafeModeRule.class).getCurrentContainerThreshold());
}
if (numOfDns == 0) {
@@ -621,10 +618,11 @@ private void testContainerThreshold(List<ContainerInfo>
dnContainers,
nodeRegistrationContainerReport);
queue.fireEvent(SCMEvents.CONTAINER_REGISTRATION_REPORT,
nodeRegistrationContainerReport);
- GenericTestUtils.waitFor(() -> {
- double threshold = scmSafeModeManager.getCurrentContainerThreshold();
- return threshold == expectedThreshold;
- }, 100, 2000 * 9);
+ final RatisContainerSafeModeRule containerRule =
SafeModeRuleFactory.getInstance()
+ .getSafeModeRule(RatisContainerSafeModeRule.class);
+ GenericTestUtils.waitFor(() ->
+ containerRule.getCurrentContainerThreshold() == expectedThreshold,
+ 100, 2000 * 9);
}
/**
@@ -655,10 +653,10 @@ private void testECContainerThreshold(List<ContainerInfo>
dnContainers,
}
// Step2. Wait for the threshold to be reached.
- GenericTestUtils.waitFor(() -> {
- double threshold = scmSafeModeManager.getCurrentECContainerThreshold();
- return threshold == expectedThreshold;
- }, 100, 2000 * 9);
+ ECContainerSafeModeRule containerRule = SafeModeRuleFactory.getInstance()
+ .getSafeModeRule(ECContainerSafeModeRule.class);
+ GenericTestUtils.waitFor(() ->
containerRule.getCurrentContainerThreshold() == expectedThreshold,
+ 100, 2000 * 9);
}
@Test
@@ -692,9 +690,9 @@ public void testSafeModePipelineExitRule() throws Exception
{
ContainerManager containerManager = mock(ContainerManager.class);
when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers);
- scmSafeModeManager = new SCMSafeModeManager(
- config, containerManager, pipelineManager, nodeManager, queue,
- serviceManager, scmContext);
+ scmSafeModeManager = new SCMSafeModeManager(config, nodeManager,
pipelineManager,
+ containerManager, serviceManager, queue, scmContext);
+ scmSafeModeManager.start();
SCMDatanodeProtocolServer.NodeRegistrationContainerReport
nodeRegistrationContainerReport =
HddsTestUtils.createNodeRegistrationContainerReport(containers);
@@ -741,9 +739,9 @@ public void testPipelinesNotCreatedUntilPreCheckPasses()
throws Exception {
ContainerManager containerManager = mock(ContainerManager.class);
when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers);
- scmSafeModeManager = new SCMSafeModeManager(
- config, containerManager, pipelineManager, nodeManager, queue,
- serviceManager, scmContext);
+ scmSafeModeManager = new SCMSafeModeManager(config, nodeManager,
pipelineManager,
+ containerManager, serviceManager, queue, scmContext);
+ scmSafeModeManager.start();
// Assert SCM is in Safe mode.
assertTrue(scmSafeModeManager.getInSafeMode());
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java
index f6e33b3526..134f7f6ea8 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java
@@ -112,19 +112,18 @@ void testScmSafeMode() throws Exception {
SCMSafeModeManager scmSafeModeManager =
cluster.getStorageContainerManager().getScmSafeModeManager();
-
// Ceil(0.1 * 2) is 1, as one pipeline is healthy pipeline rule is
// satisfied
- GenericTestUtils.waitFor(() ->
- scmSafeModeManager.getHealthyPipelineSafeModeRule()
- .validate(), 1000, 60000);
+ final HealthyPipelineSafeModeRule healthyPipelineRule =
SafeModeRuleFactory.getInstance()
+ .getSafeModeRule(HealthyPipelineSafeModeRule.class);
+ GenericTestUtils.waitFor(healthyPipelineRule::validate, 1000, 60000);
// As Ceil(0.9 * 2) is 2, and from second pipeline no datanodes's are
// reported this rule is not met yet.
- GenericTestUtils.waitFor(() ->
- !scmSafeModeManager.getOneReplicaPipelineSafeModeRule()
- .validate(), 1000, 60000);
+ final OneReplicaPipelineSafeModeRule oneReplicaPipelineRule =
SafeModeRuleFactory.getInstance()
+ .getSafeModeRule(OneReplicaPipelineSafeModeRule.class);
+ GenericTestUtils.waitFor(() -> !oneReplicaPipelineRule.validate(), 1000,
60000);
assertTrue(cluster.getStorageContainerManager().isInSafeMode());
@@ -132,9 +131,7 @@ void testScmSafeMode() throws Exception {
// Now restart one datanode from the 2nd pipeline
cluster.restartHddsDatanode(restartedDatanode, false);
- GenericTestUtils.waitFor(() ->
- scmSafeModeManager.getOneReplicaPipelineSafeModeRule()
- .validate(), 1000, 60000);
+ GenericTestUtils.waitFor(oneReplicaPipelineRule::validate, 1000, 60000);
// All safeMode preChecks are now satisfied, SCM should be out of safe
mode.
@@ -157,8 +154,7 @@ void testScmSafeMode() throws Exception {
ReplicationManager replicationManager =
cluster.getStorageContainerManager().getReplicationManager();
- GenericTestUtils.waitFor(() ->
- replicationManager.isRunning(), 1000, 60000);
+ GenericTestUtils.waitFor(replicationManager::isRunning, 1000, 60000);
}
@AfterEach
diff --git
a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
index ef097ab579..dd0d476170 100644
---
a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
+++
b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
@@ -62,6 +62,7 @@
import org.apache.hadoop.hdds.scm.proxy.SCMClientConfig;
import
org.apache.hadoop.hdds.scm.proxy.SCMContainerLocationFailoverProxyProvider;
import org.apache.hadoop.hdds.scm.safemode.HealthyPipelineSafeModeRule;
+import org.apache.hadoop.hdds.scm.safemode.SafeModeRuleFactory;
import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager;
import org.apache.hadoop.hdds.scm.server.SCMConfigurator;
import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
@@ -646,8 +647,8 @@ protected StorageContainerManager createSCM()
initializeScmStorage(scmStore);
StorageContainerManager scm = HddsTestUtils.getScmSimple(conf,
scmConfigurator);
- HealthyPipelineSafeModeRule rule =
- scm.getScmSafeModeManager().getHealthyPipelineSafeModeRule();
+ HealthyPipelineSafeModeRule rule = SafeModeRuleFactory.getInstance()
+ .getSafeModeRule(HealthyPipelineSafeModeRule.class);
if (rule != null) {
// Set threshold to wait for safe mode exit - this is needed since a
// pipeline is marked open only after leader election.
diff --git
a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
index 92e1360732..fdea7b0f4f 100644
---
a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
+++
b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
@@ -41,6 +41,7 @@
import org.apache.hadoop.hdds.scm.HddsTestUtils;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.safemode.HealthyPipelineSafeModeRule;
+import org.apache.hadoop.hdds.scm.safemode.SafeModeRuleFactory;
import org.apache.hadoop.hdds.scm.server.SCMConfigurator;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
@@ -572,8 +573,8 @@ protected SCMHAService createSCMService()
}
StorageContainerManager scm =
HddsTestUtils.getScmSimple(scmConfig, scmConfigurator);
- HealthyPipelineSafeModeRule rule =
- scm.getScmSafeModeManager().getHealthyPipelineSafeModeRule();
+ HealthyPipelineSafeModeRule rule =
SafeModeRuleFactory.getInstance()
+ .getSafeModeRule(HealthyPipelineSafeModeRule.class);
if (rule != null) {
// Set threshold to wait for safe mode exit -
// this is needed since a pipeline is marked open only after
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]