This is an automated email from the ASF dual-hosted git repository.
sumitagrawal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 4779f60d879 HDDS-14108. Provide option in ‘scm safemode status’ to
show status of all SCM nodes (#9611)
4779f60d879 is described below
commit 4779f60d879abed8657fd303157c19bb3dcbf2b4
Author: sreejasahithi <[email protected]>
AuthorDate: Wed Feb 25 17:57:21 2026 +0530
HDDS-14108. Provide option in ‘scm safemode status’ to show status of all
SCM nodes (#9611)
---
.../protocol/StorageContainerLocationProtocol.java | 8 +
...inerLocationProtocolClientSideTranslatorPB.java | 66 ++++++-
.../scm/proxy/SCMFailoverProxyProviderBase.java | 11 ++
.../java/org/apache/hadoop/hdds/utils/HAUtils.java | 12 ++
...inerLocationProtocolServerSideTranslatorPB.java | 8 +-
.../hdds/scm/cli/ContainerOperationClient.java | 16 +-
.../hdds/scm/cli/SafeModeCheckSubcommand.java | 190 +++++++++++++++++++--
.../org/apache/hadoop/hdds/scm/cli/ScmOption.java | 10 ++
.../ozone/shell/TestSafeModeCheckSubcommandHA.java | 183 ++++++++++++++++++++
9 files changed, 483 insertions(+), 21 deletions(-)
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
index f0fb3378c6f..8e04ed225d6 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
@@ -73,6 +73,14 @@ public interface StorageContainerLocationProtocol extends
Closeable {
Type.StopReplicationManager,
Type.ForceExitSafeMode));
+ /**
+ * Read-only commands that can execute on followers without leader check.
+ * These commands respect the --scm parameter and query the specified SCM.
+ */
+ Set<Type> FOLLOWER_READABLE_COMMAND_TYPES =
Collections.unmodifiableSet(EnumSet.of(
+ Type.InSafeMode,
+ Type.GetSafeModeRuleStatuses));
+
/**
* Asks SCM where a container should be allocated. SCM responds with the
* set of datanodes that should be used creating this container.
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
index 678b40ec788..832aa478dbd 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
@@ -145,6 +145,8 @@
import org.apache.hadoop.ozone.upgrade.UpgradeFinalization.StatusAndMessages;
import org.apache.hadoop.ozone.util.ProtobufUtils;
import org.apache.hadoop.security.token.Token;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* This class is the client-side translator to translate the requests made on
@@ -162,6 +164,9 @@ public final class
StorageContainerLocationProtocolClientSideTranslatorPB
private final StorageContainerLocationProtocolPB rpcProxy;
private final SCMContainerLocationFailoverProxyProvider fpp;
+ private final ScmNodeTarget targetScmNode;
+ private static final Logger LOG =
+
LoggerFactory.getLogger(StorageContainerLocationProtocolClientSideTranslatorPB.class);
/**
* Creates a new StorageContainerLocationProtocolClientSideTranslatorPB.
@@ -170,8 +175,20 @@ public final class
StorageContainerLocationProtocolClientSideTranslatorPB
*/
public StorageContainerLocationProtocolClientSideTranslatorPB(
SCMContainerLocationFailoverProxyProvider proxyProvider) {
+ this(proxyProvider, null);
+ }
+
+ /**
+ * Creates a new StorageContainerLocationProtocolClientSideTranslatorPB with
a ScmNodeTarget.
+ *
+ * @param proxyProvider {@link SCMContainerLocationFailoverProxyProvider}
+ * @param targetScmNode {@link ScmNodeTarget} to route requests to specific
SCM nodes
+ */
+ public StorageContainerLocationProtocolClientSideTranslatorPB(
+ SCMContainerLocationFailoverProxyProvider proxyProvider, ScmNodeTarget
targetScmNode) {
Objects.requireNonNull(proxyProvider, "proxyProvider == null");
this.fpp = proxyProvider;
+ this.targetScmNode = targetScmNode;
this.rpcProxy = (StorageContainerLocationProtocolPB) RetryProxy.create(
StorageContainerLocationProtocolPB.class,
fpp,
@@ -202,6 +219,17 @@ private ScmContainerLocationResponse submitRequest(
private ScmContainerLocationResponse submitRpcRequest(
ScmContainerLocationRequest wrapper) throws ServiceException {
+ // If targetScmNode has a specific node ID, route follower-readable
requests to that node
+ if (targetScmNode != null && targetScmNode.hasNodeId() &&
+ FOLLOWER_READABLE_COMMAND_TYPES.contains(wrapper.getCmdType())) {
+ try {
+ StorageContainerLocationProtocolPB proxy =
fpp.getProxyForNode(targetScmNode.getNodeId());
+ return proxy.submitRequest(NULL_RPC_CONTROLLER, wrapper);
+ } catch (IOException e) {
+ throw new ServiceException("Failed to get proxy for node: " +
targetScmNode.getNodeId(), e);
+ }
+ }
+
if (!ADMIN_COMMAND_TYPE.contains(wrapper.getCmdType())) {
return rpcProxy.submitRequest(NULL_RPC_CONTROLLER, wrapper);
}
@@ -843,13 +871,21 @@ public Map<String, Pair<Boolean, String>>
getSafeModeRuleStatuses()
submitRequest(Type.GetSafeModeRuleStatuses,
builder -> builder.setGetSafeModeRuleStatusesRequest(request))
.getGetSafeModeRuleStatusesResponse();
- Map<String, Pair<Boolean, String>> map = new HashMap();
- for (SafeModeRuleStatusProto statusProto :
- response.getSafeModeRuleStatusesProtoList()) {
- map.put(statusProto.getRuleName(),
+ return buildSafeModeRuleStatusesMap(response);
+ }
+
+ /**
+ * Helper method to build a map from GetSafeModeRuleStatusesResponseProto.
+ * Extracts rule names and their status information.
+ */
+ private Map<String, Pair<Boolean, String>> buildSafeModeRuleStatusesMap(
+ GetSafeModeRuleStatusesResponseProto response) {
+ Map<String, Pair<Boolean, String>> ruleStatuses = new HashMap<>();
+ for (SafeModeRuleStatusProto statusProto :
response.getSafeModeRuleStatusesProtoList()) {
+ ruleStatuses.put(statusProto.getRuleName(),
Pair.of(statusProto.getValidate(), statusProto.getStatusText()));
}
- return map;
+ return ruleStatuses;
}
/**
@@ -1246,4 +1282,24 @@ public void reconcileContainer(long containerID) throws
IOException {
// TODO check error handling.
submitRequest(Type.ReconcileContainer, builder ->
builder.setReconcileContainerRequest(request));
}
+
+ /**
+ * Holder class to store the target SCM node ID for routing requests.
+ * This allows requests to be directed to specific SCM nodes in an HA
cluster.
+ */
+ public static class ScmNodeTarget {
+ private String nodeId;
+
+ public String getNodeId() {
+ return nodeId;
+ }
+
+ public void setNodeId(String nodeId) {
+ this.nodeId = nodeId;
+ }
+
+ public boolean hasNodeId() {
+ return nodeId != null && !nodeId.isEmpty();
+ }
+ }
}
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMFailoverProxyProviderBase.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMFailoverProxyProviderBase.java
index 0dfa95a9524..272db7a04ae 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMFailoverProxyProviderBase.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMFailoverProxyProviderBase.java
@@ -193,6 +193,17 @@ public synchronized List<T> getProxies() {
.map(proxyInfo -> proxyInfo.proxy).collect(Collectors.toList());
}
+ public synchronized T getProxyForNode(String nodeId) throws IOException {
+ ProxyInfo<T> proxyInfo = scmProxies.get(nodeId);
+ if (proxyInfo == null) {
+ if (!scmProxyInfoMap.containsKey(nodeId)) {
+ throw new IOException("Unknown SCM node ID: " + nodeId);
+ }
+ proxyInfo = createSCMProxy(nodeId);
+ }
+ return proxyInfo.proxy;
+ }
+
@Override
public synchronized void performFailover(T newLeader) {
if (updatedLeaderNodeID != null) {
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
index 406736f5310..1fce92a111d 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
@@ -52,6 +52,7 @@
import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol;
import
org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolClientSideTranslatorPB;
import
org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB;
+import
org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB.ScmNodeTarget;
import org.apache.hadoop.hdds.scm.proxy.SCMBlockLocationFailoverProxyProvider;
import org.apache.hadoop.hdds.scm.proxy.SCMClientConfig;
import
org.apache.hadoop.hdds.scm.proxy.SCMContainerLocationFailoverProxyProvider;
@@ -161,6 +162,17 @@ public static StorageContainerLocationProtocol
getScmContainerClient(
return scmContainerClient;
}
+ public static StorageContainerLocationProtocol getScmContainerClientForNode(
+ ConfigurationSource conf, ScmNodeTarget targetScmNode) {
+ SCMContainerLocationFailoverProxyProvider proxyProvider =
+ new SCMContainerLocationFailoverProxyProvider(conf, null);
+ StorageContainerLocationProtocol scmContainerClient =
+ TracingUtil.createProxy(
+ new StorageContainerLocationProtocolClientSideTranslatorPB(
+ proxyProvider, targetScmNode),
StorageContainerLocationProtocol.class, conf);
+ return scmContainerClient;
+ }
+
/**
* Replace the current DB with the new DB checkpoint.
* (checkpoint in checkpointPath will not be deleted here)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
index 6693f7be025..287ae47c06f 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
@@ -29,6 +29,7 @@
import static
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type.ListContainer;
import static
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type.ListPipelines;
import static
org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol.ADMIN_COMMAND_TYPE;
+import static
org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol.FOLLOWER_READABLE_COMMAND_TYPES;
import com.google.protobuf.RpcController;
import com.google.protobuf.ServiceException;
@@ -209,9 +210,12 @@ public
StorageContainerLocationProtocolServerSideTranslatorPB(
@Override
public ScmContainerLocationResponse submitRequest(RpcController controller,
ScmContainerLocationRequest request) throws ServiceException {
- // not leader or not belong to admin command.
+ // Trigger not leader exception unless:
+ // This is the leader node, or this is an admin command,
+ // or this is a follower-readable command.
if (!scm.checkLeader()
- && !ADMIN_COMMAND_TYPE.contains(request.getCmdType())) {
+ && !ADMIN_COMMAND_TYPE.contains(request.getCmdType())
+ && !FOLLOWER_READABLE_COMMAND_TYPES.contains(request.getCmdType())) {
RatisUtil.checkRatisException(
scm.getScmHAManager().getRatisServer().triggerNotLeaderException(),
scm.getClientRpcPort(), scm.getScmId(), scm.getHostname(),
ROLE_TYPE);
diff --git
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
index 3ca49be2443..ea50f9d6056 100644
---
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
+++
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
@@ -57,6 +57,7 @@
import
org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol;
+import
org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB.ScmNodeTarget;
import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls;
import
org.apache.hadoop.hdds.security.x509.certificate.client.CACertificateProvider;
import org.apache.hadoop.hdds.utils.HAUtils;
@@ -94,8 +95,16 @@ public synchronized XceiverClientManager
getXceiverClientManager()
}
public ContainerOperationClient(OzoneConfiguration conf) throws IOException {
+ this(conf, null);
+ }
+
+ public ContainerOperationClient(OzoneConfiguration conf, ScmNodeTarget
targetScmNode) throws IOException {
this.configuration = conf;
- storageContainerLocationClient = newContainerRpcClient(conf);
+ if (targetScmNode != null) {
+ storageContainerLocationClient = newContainerRpcClientForNode(conf,
targetScmNode);
+ } else {
+ storageContainerLocationClient = newContainerRpcClient(conf);
+ }
secretKeyClient = newSecretKeyClient(conf);
containerSizeB = (int) conf.getStorageSize(OZONE_SCM_CONTAINER_SIZE,
OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES);
@@ -135,6 +144,11 @@ public static StorageContainerLocationProtocol
newContainerRpcClient(
return HAUtils.getScmContainerClient(configSource);
}
+ public static StorageContainerLocationProtocol newContainerRpcClientForNode(
+ ConfigurationSource configSource, ScmNodeTarget targetScmNode) {
+ return HAUtils.getScmContainerClientForNode(configSource, targetScmNode);
+ }
+
public static SecretKeyProtocolScm newSecretKeyClient(
ConfigurationSource configSource) throws IOException {
return HddsServerUtil.getSecretKeyClientForSCM(configSource);
diff --git
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/SafeModeCheckSubcommand.java
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/SafeModeCheckSubcommand.java
index d15be56410f..3a130945ced 100644
---
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/SafeModeCheckSubcommand.java
+++
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/SafeModeCheckSubcommand.java
@@ -18,10 +18,20 @@
package org.apache.hadoop.hdds.scm.cli;
import java.io.IOException;
+import java.util.List;
import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.stream.Collectors;
+import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.hdds.HddsUtils;
+import org.apache.hadoop.hdds.cli.AbstractSubcommand;
import org.apache.hadoop.hdds.cli.HddsVersionProvider;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.scm.client.ScmClient;
+import org.apache.hadoop.hdds.scm.ha.SCMNodeInfo;
+import
org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB.ScmNodeTarget;
+import picocli.CommandLine;
import picocli.CommandLine.Command;
/**
@@ -32,25 +42,179 @@
description = "Check if SCM is in safe mode",
mixinStandardHelpOptions = true,
versionProvider = HddsVersionProvider.class)
-public class SafeModeCheckSubcommand extends ScmSubcommand {
+public class SafeModeCheckSubcommand extends AbstractSubcommand implements
Callable<Void> {
+ @CommandLine.Mixin
+ private ScmOption scmOption;
+
+ @CommandLine.Option(names = {"--all", "-a"},
+ description = "Show safe mode status for all SCM nodes in the service. "
+
+ "When multiple SCM service IDs are configured, --service-id must be
specified.")
+ private boolean allNodes;
+
+ private String serviceId;
+ private List<SCMNodeInfo> nodes;
@Override
- public void execute(ScmClient scmClient) throws IOException {
- boolean execReturn = scmClient.inSafeMode();
+ public Void call() throws Exception {
+ OzoneConfiguration conf = getOzoneConf();
+ serviceId = HddsUtils.getScmServiceId(conf);
+ String scmAddress = scmOption.getScm();
+
+ ScmNodeTarget targetScmNode = new ScmNodeTarget();
+ try (ScmClient scmClient = scmOption.createScmClient(conf, targetScmNode))
{
+ nodes = SCMNodeInfo.buildNodeInfo(conf);
+
+ if (serviceId != null) {
+ System.out.println("Service ID: " + serviceId);
+ }
+
+ if (allNodes) {
+ executeForAllNodes(scmClient, targetScmNode);
+ } else if (StringUtils.isNotEmpty(scmAddress)) {
+ executeForSpecificNode(scmClient, targetScmNode, scmAddress);
+ } else {
+ executeForSingleNode(scmClient, targetScmNode);
+ }
+ }
+ return null;
+ }
- // Output data list
- if (execReturn) {
- System.out.println("SCM is in safe mode.");
+ private void executeForSingleNode(ScmClient scmClient, ScmNodeTarget
targetScmNode) throws IOException {
+ SCMNodeInfo targetNode;
+ if (serviceId != null) {
+ // HA mode: find leader
+ targetNode = findLeaderNode(scmClient);
+ if (targetNode == null) {
+ throw new IOException("Could not determine leader node");
+ }
} else {
- System.out.println("SCM is out of safe mode.");
+ // Non-HA mode: use single node
+ targetNode = nodes.get(0);
}
- if (isVerbose()) {
- for (Map.Entry<String, Pair<Boolean, String>> entry :
- scmClient.getSafeModeRuleStatuses().entrySet()) {
- Pair<Boolean, String> value = entry.getValue();
- System.out.printf("validated:%s, %s, %s%n",
- value.getLeft(), entry.getKey(), value.getRight());
+
+ queryNode(scmClient, targetScmNode, targetNode);
+ }
+
+ /**
+ * Find the leader node from SCM roles.
+ * @param scmClient the SCM client
+ * @return the leader SCMNodeInfo
+ */
+ private SCMNodeInfo findLeaderNode(ScmClient scmClient) throws IOException {
+ try {
+ List<String> roles = scmClient.getScmRoles();
+ for (String role : roles) {
+ String[] parts = role.split(":");
+ if (parts.length < 3 || !"LEADER".equalsIgnoreCase(parts[2])) {
+ continue;
+ }
+ String leaderHost = parts[0];
+ String leaderIp = parts.length >= 5 ? parts[4] : null;
+ for (SCMNodeInfo node : nodes) {
+ String nodeHost = node.getScmClientAddress().split(":")[0];
+
+ if (matchesAddress(leaderHost, nodeHost) || (leaderIp != null &&
!leaderIp.isEmpty() &&
+ matchesAddress(leaderIp, nodeHost))) {
+ return node;
+ }
+ }
}
+
+ return null;
+ } catch (IOException e) {
+ throw new IOException("Could not determine leader node", e);
+ }
+ }
+
+ private void executeForSpecificNode(ScmClient scmClient, ScmNodeTarget
targetScmNode,
+ String scmAddress) throws IOException {
+ SCMNodeInfo matchedNode = nodes.stream()
+ .filter(node -> matchesAddress(node.getScmClientAddress(), scmAddress))
+ .findFirst()
+ .orElseThrow(() -> new IOException("Specified --scm address " +
scmAddress +
+ " does not match any node in service " + serviceId +
+ ". Nodes: " + nodes.stream()
+ .map(n -> n.getScmClientAddress() + " [" + n.getNodeId() + "]")
+ .collect(Collectors.joining(", "))));
+
+ queryNode(scmClient, targetScmNode, matchedNode);
+ }
+
+ private void executeForAllNodes(ScmClient scmClient, ScmNodeTarget
targetScmNode) throws IOException {
+ for (SCMNodeInfo node : nodes) {
+ queryNode(scmClient, targetScmNode, node);
+ }
+ }
+
+ private void queryNode(ScmClient scmClient, ScmNodeTarget targetScmNode,
SCMNodeInfo node) {
+ String nodeId = node.getNodeId();
+
+ try {
+ // Set the targetScmNode to target this specific node
+ targetScmNode.setNodeId(nodeId);
+
+ boolean inSafeMode = scmClient.inSafeMode();
+
+ if (serviceId != null) {
+ System.out.printf("%s [%s]: %s%n",
+ node.getScmClientAddress(),
+ nodeId,
+ inSafeMode ? "in safe mode" : "out of safe mode");
+ } else {
+ System.out.printf("SCM is %s safe mode.%n", inSafeMode ? "in" : "out
of");
+ }
+
+ if (isVerbose()) {
+ Map<String, Pair<Boolean, String>> rules =
scmClient.getSafeModeRuleStatuses();
+ if (rules != null && !rules.isEmpty()) {
+ printSafeModeRules(rules);
+ }
+ }
+ } catch (Exception e) {
+ System.out.printf("%s [%s]: ERROR: Failed to get safe mode status for
SCM node: %s%n",
+ node.getScmClientAddress(), nodeId, e.getMessage());
+ }
+ }
+
+ /**
+ * Check if the given SCMNodeInfo matches the target address.
+ * Tries to match by direct string comparison and by resolved address.
+ */
+ private boolean matchesAddress(String address1, String address2) {
+ if (address1.equalsIgnoreCase(address2)) {
+ return true;
+ }
+
+ try {
+ // Parse both addresses into host:port components
+ String[] parts1 = address1.split(":", 2);
+ String[] parts2 = address2.split(":", 2);
+
+ String host1 = parts1[0];
+ String host2 = parts2[0];
+
+ // Hostnames must match
+ if (!host1.equalsIgnoreCase(host2)) {
+ return false;
+ }
+
+ // If both have ports specified, they must match
+ if (parts1.length > 1 && parts2.length > 1) {
+ return parts1[1].equals(parts2[1]);
+ }
+
+ return true;
+ } catch (Exception e) {
+ // If address resolution fails, no match
+ return false;
+ }
+ }
+
+ private void printSafeModeRules(Map<String, Pair<Boolean, String>> rules) {
+ for (Map.Entry<String, Pair<Boolean, String>> entry : rules.entrySet()) {
+ Pair<Boolean, String> value = entry.getValue();
+ System.out.printf("validated:%s, %s, %s%n",
+ value.getLeft(), entry.getKey(), value.getRight());
}
}
}
diff --git
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ScmOption.java
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ScmOption.java
index 640433c99b3..f82b2be6c88 100644
---
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ScmOption.java
+++
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ScmOption.java
@@ -30,6 +30,7 @@
import org.apache.hadoop.hdds.protocol.SCMSecurityProtocol;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.client.ScmClient;
+import
org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB.ScmNodeTarget;
import picocli.CommandLine;
/**
@@ -57,6 +58,11 @@ public ScmClient createScmClient(OzoneConfiguration conf)
throws IOException {
return new ContainerOperationClient(conf);
}
+ public ScmClient createScmClient(OzoneConfiguration conf, ScmNodeTarget
targetScmNode) throws IOException {
+ checkAndSetSCMAddressArg(conf);
+ return new ContainerOperationClient(conf, targetScmNode);
+ }
+
private void checkAndSetSCMAddressArg(MutableConfigurationSource conf) {
if (StringUtils.isNotEmpty(scm)) {
conf.set(OZONE_SCM_CLIENT_ADDRESS_KEY, scm);
@@ -88,4 +94,8 @@ public SCMSecurityProtocol createScmSecurityClient() {
"Can't create SCM Security client", ex);
}
}
+
+ public String getScm() {
+ return scm;
+ }
}
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestSafeModeCheckSubcommandHA.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestSafeModeCheckSubcommandHA.java
new file mode 100644
index 00000000000..e26accfc6d1
--- /dev/null
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestSafeModeCheckSubcommandHA.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.shell;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeoutException;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
+import org.apache.hadoop.hdds.utils.IOUtils;
+import org.apache.hadoop.ozone.MiniOzoneCluster;
+import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
+import org.apache.hadoop.ozone.admin.OzoneAdmin;
+import org.apache.ozone.test.GenericTestUtils;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+
+/**
+ * Integration tests for SafeModeCheckSubcommand in HA mode.
+ * Tests the 'ozone admin safemode status' command with SCM HA cluster.
+ */
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+public class TestSafeModeCheckSubcommandHA {
+ private OzoneAdmin ozoneAdmin;
+ private MiniOzoneHAClusterImpl cluster;
+ private GenericTestUtils.PrintStreamCapturer out;
+ private GenericTestUtils.PrintStreamCapturer err;
+
+ @BeforeAll
+ void init() throws IOException, InterruptedException, TimeoutException {
+ OzoneConfiguration conf = new OzoneConfiguration();
+ cluster = MiniOzoneCluster.newHABuilder(conf)
+ .setOMServiceId("om-test")
+ .setNumOfOzoneManagers(3)
+ .setSCMServiceId("scm-test")
+ .setNumOfStorageContainerManagers(3)
+ .build();
+
+ cluster.waitForClusterToBeReady();
+ }
+
+ @BeforeEach
+ void setupCapture() {
+ out = GenericTestUtils.captureOut();
+ err = GenericTestUtils.captureErr();
+ ozoneAdmin = new OzoneAdmin();
+ Map<String, String> configOverrides = new HashMap<>();
+ cluster.getConf().forEach(entry ->
+ configOverrides.put(entry.getKey(), entry.getValue()));
+
+ ozoneAdmin.setConfigurationOverrides(configOverrides);
+ }
+
+ @AfterEach
+ void stopCapture() {
+ IOUtils.closeQuietly(out);
+ IOUtils.closeQuietly(err);
+ }
+
+ @Test
+ public void testNoOptionQueriesLeader() {
+ String[] args = {"safemode", "status"};
+ ozoneAdmin.execute(args);
+ String output = out.get();
+
+ assertThat(output).contains(cluster.getScmLeader().getSCMNodeId());
+ assertThat(output).containsPattern("(in|out of) safe mode");
+ }
+
+ @Test
+ public void testNoOptionWithVerboseShowsRules() {
+ String[] args = {"safemode", "status", "--verbose"};
+ ozoneAdmin.execute(args);
+ String output = out.get();
+
+ assertThat(output).contains(cluster.getScmLeader().getSCMNodeId());
+ assertThat(output).containsPattern("(in|out of) safe mode");
+ assertAllSafeModeRules(output);
+ }
+
+ @Test
+ public void testScmOptionSpecificNodeByAddress() {
+ // Query each SCM node individually
+ List<StorageContainerManager> scms = cluster.getStorageContainerManagers();
+ String serviceId = getServiceId();
+ for (StorageContainerManager scm : scms) {
+ String nodeId = scm.getSCMNodeId();
+ String hostPort = cluster.getConf().get("ozone.scm.client.address." +
serviceId + "." + nodeId);
+
+ String[] args = {"safemode", "status", "--scm", hostPort};
+ ozoneAdmin.execute(args);
+ String output = out.get();
+
+ assertThat(output).contains("Service ID: " + serviceId);
+ assertThat(output).contains(nodeId);
+ assertThat(output).containsPattern("\\[" + nodeId + "\\]: (in|out of)
safe mode");
+ }
+ }
+
+ @Test
+ public void testScmOptionWithVerbose() {
+ // Query specific scm node with verbose flag
+ StorageContainerManager scm = cluster.getStorageContainerManagers().get(0);
+ String nodeId = scm.getSCMNodeId();
+ String serviceId = getServiceId();
+ String hostPort = cluster.getConf().get("ozone.scm.client.address." +
serviceId + "." + nodeId);
+
+ String[] args = {"safemode", "status", "--scm", hostPort, "--verbose"};
+ ozoneAdmin.execute(args);
+ String output = out.get();
+
+ assertThat(output).contains("Service ID: " + serviceId);
+ assertThat(output).contains(nodeId);
+ assertAllSafeModeRules(output);
+ }
+
+ @Test
+ public void testAllOptionShowsAllNodes() {
+ // Query all nodes in the cluster
+ String[] args = {"safemode", "status", "--all"};
+ ozoneAdmin.execute(args);
+ String output = out.get();
+
+ assertThat(output).contains("Service ID: " + getServiceId());
+ assertAllScmNodes(output);
+ }
+
+ @Test
+ public void testAllOptionWithVerboseShowsAllRules() {
+ // Query all nodes with verbose flag
+ String[] args = {"safemode", "status", "--all", "--verbose"};
+ ozoneAdmin.execute(args);
+ String output = out.get();
+
+ assertThat(output).contains("Service ID: " + getServiceId());
+ assertAllScmNodes(output);
+ assertAllSafeModeRules(output);
+ }
+
+ private String getServiceId() {
+ return cluster.getConf().get("ozone.scm.service.ids");
+ }
+
+ private void assertAllSafeModeRules(String output) {
+ assertThat(output).contains("DataNodeSafeModeRule");
+ assertThat(output).contains("RatisContainerSafeModeRule");
+ assertThat(output).contains("HealthyPipelineSafeModeRule");
+ assertThat(output).contains("StateMachineReadyRule");
+ assertThat(output).contains("OneReplicaPipelineSafeModeRule");
+ assertThat(output).contains("ECContainerSafeModeRule");
+ }
+
+ private void assertAllScmNodes(String output) {
+ List<StorageContainerManager> scms = cluster.getStorageContainerManagers();
+ for (StorageContainerManager scm : scms) {
+ String nodeId = scm.getSCMNodeId();
+ assertThat(output).contains(nodeId);
+ assertThat(output).containsPattern("\\[" + nodeId + "\\]: (in|out of)
safe mode");
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]