This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new fa1cfc1a38 HDDS-9897. Option to enable Ratis in SCM. (#5779)
fa1cfc1a38 is described below

commit fa1cfc1a38c3559ca070bc18a9a5921a99939be7
Author: Nandakumar Vadivelu <[email protected]>
AuthorDate: Fri Dec 29 02:59:20 2023 +0530

    HDDS-9897. Option to enable Ratis in SCM. (#5779)
---
 .../hadoop/hdds/conf/DefaultConfigManager.java     |  4 +++
 .../upgrade/AbstractLayoutVersionManager.java      |  1 +
 .../hadoop/ozone/upgrade/LayoutVersionManager.java |  2 ++
 hadoop-hdds/docs/content/feature/SCM-HA.md         |  7 -----
 .../hadoop/hdds/scm/ha/SCMHANodeDetails.java       | 35 ++++++++++------------
 .../hdds/scm/server/StorageContainerManager.java   | 35 +++++++++++++++++++---
 .../hadoop/hdds/scm/ha/TestSCMHAConfiguration.java | 21 ++++++-------
 .../TestSCMHAUnfinalizedStateValidationAction.java |  9 ++++--
 .../ozone/scm/TestStorageContainerManager.java     | 34 ++++++++++++++++++++-
 9 files changed, 103 insertions(+), 45 deletions(-)

diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/DefaultConfigManager.java
 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/DefaultConfigManager.java
index c33b1a3308..5b883e8b2c 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/DefaultConfigManager.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/DefaultConfigManager.java
@@ -47,6 +47,10 @@ public final class DefaultConfigManager {
     return (T) CONFIG_DEFAULT_MAP.getOrDefault(config, defaultValue);
   }
 
+  public static <T> void forceUpdateConfigValue(String config, T value) {
+    CONFIG_DEFAULT_MAP.put(config, value);
+  }
+
   @VisibleForTesting
   public static void clearDefaultConfigs() {
     CONFIG_DEFAULT_MAP.clear();
diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/AbstractLayoutVersionManager.java
 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/AbstractLayoutVersionManager.java
index 3e09ae2b07..e234a30b81 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/AbstractLayoutVersionManager.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/AbstractLayoutVersionManager.java
@@ -221,6 +221,7 @@ public abstract class AbstractLayoutVersionManager<T 
extends LayoutFeature>
     }
   }
 
+  @Override
   public void close() {
     if (mBean != null) {
       MBeans.unregister(mBean);
diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutVersionManager.java
 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutVersionManager.java
index 83ccc1e2f6..3137d756e6 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutVersionManager.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutVersionManager.java
@@ -80,4 +80,6 @@ public interface LayoutVersionManager {
     return null;
   }
 
+  void close();
+
 }
diff --git a/hadoop-hdds/docs/content/feature/SCM-HA.md 
b/hadoop-hdds/docs/content/feature/SCM-HA.md
index 551c340409..cc42500e0c 100644
--- a/hadoop-hdds/docs/content/feature/SCM-HA.md
+++ b/hadoop-hdds/docs/content/feature/SCM-HA.md
@@ -33,13 +33,6 @@ This document explains the HA setup of Storage Container 
Manager (SCM), please c
 
 ## Configuration
 
-> &#x26a0;&#xfe0f; **IMPORTANT** &#x26a0;&#xfe0f;
->
-> SCM HA is currently supported only for fresh installations.
-> SCM HA must be enabled when starting the Ozone service in the beginning.
-> Once an SCM has been started in non-HA mode,
-> changing it to HA mode is unsupported.
-
 HA mode of Storage Container Manager can be enabled with the following 
settings in `ozone-site.xml`:
 
 ```XML
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHANodeDetails.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHANodeDetails.java
index a83ba1b692..0c90987f0c 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHANodeDetails.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHANodeDetails.java
@@ -18,7 +18,6 @@
 package org.apache.hadoop.hdds.scm.ha;
 
 import com.google.common.base.Preconditions;
-import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.hdds.conf.ConfigurationException;
 import org.apache.hadoop.hdds.conf.DefaultConfigManager;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
@@ -155,8 +154,9 @@ public class SCMHANodeDetails {
    which defaults to
    {@link org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_DEFAULT}
    For Previously Initialized SCM the values are taken from the version file
-   Ratis SCM -> Non Ratis SCM & vice versa is not supported
-   This values is validated with the config provided.
+   <br>
+   Ratis SCM -> Non Ratis SCM is not supported.
+   This value is validated with the config provided.
   **/
   private static void validateSCMHAConfig(SCMStorageConfig scmStorageConfig,
                                           OzoneConfiguration conf) {
@@ -169,24 +169,21 @@ public class SCMHANodeDetails {
     if (Storage.StorageState.INITIALIZED.equals(state) &&
             scmHAEnabled != scmHAEnableDefault) {
       String errorMessage = String.format("Current State of SCM: %s",
-              scmHAEnableDefault ? "Ratis SCM is enabled "
-              : "SCM is running in Non-HA without Ratis")
-              + " Ratis SCM -> Non Ratis SCM or " +
-              "Non HA SCM -> HA SCM is not supported";
-      if (StringUtils.isNotEmpty(
-          conf.get(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY))) {
+              scmHAEnableDefault ? "SCM is running with Ratis. "
+              : "SCM is running without Ratis. ")
+              + "Ratis SCM -> Non Ratis SCM is not supported.";
+      if (!scmHAEnabled) {
         throw new ConfigurationException(String.format("Invalid Config %s " +
-                "Provided ConfigValue: %s, Expected Config Value: %s. %s",
-            ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, scmHAEnabled,
-            scmHAEnableDefault, errorMessage));
+                "Provided ConfigValue: false, Expected Config Value: true. %s",
+            ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, errorMessage));
       } else {
-        LOG.warn("Invalid config {}. The config was not specified, " +
-                        "but the default value {} conflicts with " +
-                        "the expected config value {}. " +
-                        "Falling back to the expected value. {}",
-                ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY,
-                ScmConfigKeys.OZONE_SCM_HA_ENABLE_DEFAULT,
-                scmHAEnableDefault, errorMessage);
+        LOG.warn("Default/Configured value of config {} conflicts with " +
+                "the expected value. " +
+                "Default/Configured: {}. " +
+                "Expected: {}. " +
+                "Falling back to the expected value. {}",
+            ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY,
+            scmHAEnabled, scmHAEnableDefault, errorMessage);
       }
     }
     DefaultConfigManager.setConfigValue(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY,
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index c000514ed3..5cbca83ff1 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hdds.HddsConfigKeys;
 import org.apache.hadoop.hdds.HddsUtils;
 import org.apache.hadoop.hdds.annotation.InterfaceAudience;
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
+import org.apache.hadoop.hdds.conf.DefaultConfigManager;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.conf.ReconfigurationHandler;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
@@ -160,6 +161,7 @@ import org.apache.hadoop.ozone.common.Storage.StorageState;
 import org.apache.hadoop.ozone.lease.LeaseManager;
 import org.apache.hadoop.ozone.lease.LeaseManagerNotRunningException;
 import org.apache.hadoop.ozone.upgrade.DefaultUpgradeFinalizationExecutor;
+import org.apache.hadoop.ozone.upgrade.LayoutVersionManager;
 import org.apache.hadoop.ozone.upgrade.UpgradeFinalizationExecutor;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.SecurityUtil;
@@ -1328,8 +1330,14 @@ public final class StorageContainerManager extends 
ServiceRuntimeInfoImpl
       // If SCM HA was not being used before pre-finalize, and is being used
       // when the cluster is pre-finalized for the SCM HA feature, init
       // should fail.
-      ScmHAUnfinalizedStateValidationAction.checkScmHA(conf, scmStorageConfig,
-          new HDDSLayoutVersionManager(scmStorageConfig.getLayoutVersion()));
+      final LayoutVersionManager layoutVersionManager =
+          new HDDSLayoutVersionManager(scmStorageConfig.getLayoutVersion());
+      try {
+        ScmHAUnfinalizedStateValidationAction.checkScmHA(conf, 
scmStorageConfig,
+            layoutVersionManager);
+      } finally {
+        layoutVersionManager.close();
+      }
 
       clusterId = scmStorageConfig.getClusterID();
       final boolean isSCMHAEnabled = scmStorageConfig.isSCMHAEnabled();
@@ -1337,14 +1345,33 @@ public final class StorageContainerManager extends 
ServiceRuntimeInfoImpl
       // Initialize security if security is enabled later.
       initializeSecurityIfNeeded(conf, scmStorageConfig, selfHostName, true);
 
-      if (SCMHAUtils.isSCMHAEnabled(conf) && !isSCMHAEnabled) {
+      if (conf.getBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY,
+          ScmConfigKeys.OZONE_SCM_HA_ENABLE_DEFAULT) && !isSCMHAEnabled) {
         SCMRatisServerImpl.initialize(scmStorageConfig.getClusterID(),
             scmStorageConfig.getScmId(), haDetails.getLocalNodeDetails(),
             conf);
         scmStorageConfig.setSCMHAFlag(true);
         scmStorageConfig.setPrimaryScmNodeId(scmStorageConfig.getScmId());
         scmStorageConfig.forceInitialize();
-        LOG.debug("Enabled SCM HA");
+
+        /*
+         * Since Ratis is initialized on an existing cluster, we have to
+         * trigger Ratis snapshot so that this SCM can send the latest scm.db
+         * to the bootstrapping SCMs later.
+         */
+
+        try {
+          DefaultConfigManager.forceUpdateConfigValue(
+              ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true);
+          StorageContainerManager scm = createSCM(conf);
+          scm.start();
+          scm.getScmHAManager().getRatisServer().triggerSnapshot();
+          scm.stop();
+          scm.join();
+        } catch (AuthenticationException e) {
+          throw new IOException(e);
+        }
+        LOG.info("Enabled SCM HA");
       }
 
       LOG.info("SCM already initialized. Reusing existing cluster id for sd={}"
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAConfiguration.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAConfiguration.java
index 7c29250237..95b8df2132 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAConfiguration.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAConfiguration.java
@@ -322,7 +322,7 @@ class TestSCMHAConfiguration {
 
   @Test
   public void testRatisEnabledDefaultConfigWithInitializedSCM()
-      throws IOException, NoSuchFieldException, IllegalAccessException {
+      throws IOException {
     SCMStorageConfig scmStorageConfig = Mockito.mock(SCMStorageConfig.class);
     Mockito.when(scmStorageConfig.getState())
         .thenReturn(Storage.StorageState.INITIALIZED);
@@ -336,15 +336,13 @@ class TestSCMHAConfiguration {
     assertTrue(SCMHAUtils.isSCMHAEnabled(conf));
   }
 
-  @ParameterizedTest
-  @ValueSource(booleans = {true, false})
-  public void testRatisEnabledDefaultConflictConfigWithInitializedSCM(
-      boolean isRatisEnabled) {
+  @Test
+  public void testRatisEnabledDefaultConflictConfigWithInitializedSCM() {
     SCMStorageConfig scmStorageConfig = Mockito.mock(SCMStorageConfig.class);
     Mockito.when(scmStorageConfig.getState())
         .thenReturn(Storage.StorageState.INITIALIZED);
-    Mockito.when(scmStorageConfig.isSCMHAEnabled()).thenReturn(isRatisEnabled);
-    conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, !isRatisEnabled);
+    Mockito.when(scmStorageConfig.isSCMHAEnabled()).thenReturn(true);
+    conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, false);
     assertThrows(ConfigurationException.class,
             () -> SCMHANodeDetails.loadSCMHAConfig(conf, scmStorageConfig));
   }
@@ -359,11 +357,10 @@ class TestSCMHAConfiguration {
         ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, !ratisEnabled));
   }
 
-  @ParameterizedTest
-  @ValueSource(booleans = {true, false})
-  void testInvalidHAConfig(boolean ratisEnabled) throws IOException {
-    conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, ratisEnabled);
-    SCMStorageConfig scmStorageConfig = newStorageConfig(!ratisEnabled);
+  @Test
+  void testInvalidHAConfig() throws IOException {
+    conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, false);
+    SCMStorageConfig scmStorageConfig = newStorageConfig(true);
     String clusterID = scmStorageConfig.getClusterID();
     assertThrows(ConfigurationException.class,
         () -> StorageContainerManager.scmInit(conf, clusterID));
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestSCMHAUnfinalizedStateValidationAction.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestSCMHAUnfinalizedStateValidationAction.java
index f658805a7b..1e910681f1 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestSCMHAUnfinalizedStateValidationAction.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestSCMHAUnfinalizedStateValidationAction.java
@@ -87,8 +87,13 @@ public class TestSCMHAUnfinalizedStateValidationAction {
        Ratis SCM -> Non Ratis SCM not supported
      */
     if (haEnabledPreFinalized != haEnabledBefore) {
-      Assertions.assertThrows(ConfigurationException.class,
-              () -> StorageContainerManager.scmInit(conf, CLUSTER_ID));
+      if (haEnabledBefore) {
+        Assertions.assertThrows(ConfigurationException.class,
+            () -> StorageContainerManager.scmInit(conf, CLUSTER_ID));
+      } else {
+        Assertions.assertThrows(UpgradeException.class,
+            () -> StorageContainerManager.scmInit(conf, CLUSTER_ID));
+      }
       return;
     }
     StorageContainerManager scm = HddsTestUtils.getScm(conf);
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestStorageContainerManager.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestStorageContainerManager.java
index 9286cfe521..b6c5d0dea0 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestStorageContainerManager.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestStorageContainerManager.java
@@ -637,13 +637,14 @@ public class TestStorageContainerManager {
     MiniOzoneCluster cluster =
         MiniOzoneCluster.newBuilder(conf).setNumDatanodes(3).build();
     cluster.waitForClusterToBeReady();
+    cluster.getStorageContainerManager().stop();
     try {
       final UUID clusterId = UUID.randomUUID();
       // This will initialize SCM
       StorageContainerManager.scmInit(conf, clusterId.toString());
       SCMStorageConfig scmStore = new SCMStorageConfig(conf);
       Assert.assertNotEquals(clusterId.toString(), scmStore.getClusterID());
-      Assert.assertFalse(scmStore.isSCMHAEnabled());
+      Assert.assertTrue(scmStore.isSCMHAEnabled());
     } finally {
       cluster.shutdown();
     }
@@ -1080,6 +1081,37 @@ public class TestStorageContainerManager {
     containerReportExecutors.close();
   }
 
+  @Test
+  public void testNonRatisToRatis()
+      throws IOException, AuthenticationException, InterruptedException,
+      TimeoutException {
+    final OzoneConfiguration conf = new OzoneConfiguration();
+    final String clusterID = UUID.randomUUID().toString();
+    try (MiniOzoneCluster cluster = MiniOzoneCluster.newBuilder(conf)
+        .setClusterId(clusterID)
+        .setScmId(UUID.randomUUID().toString())
+        .setNumDatanodes(3)
+        .build()) {
+      final StorageContainerManager nonRatisSCM = cluster
+          .getStorageContainerManager();
+      Assert.assertNull(nonRatisSCM.getScmHAManager().getRatisServer());
+      Assert.assertFalse(nonRatisSCM.getScmStorageConfig().isSCMHAEnabled());
+      nonRatisSCM.stop();
+      nonRatisSCM.join();
+
+      DefaultConfigManager.clearDefaultConfigs();
+      conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true);
+      StorageContainerManager.scmInit(conf, clusterID);
+      cluster.restartStorageContainerManager(false);
+
+      final StorageContainerManager ratisSCM = cluster
+          .getStorageContainerManager();
+      Assert.assertNotNull(ratisSCM.getScmHAManager().getRatisServer());
+      Assert.assertTrue(ratisSCM.getScmStorageConfig().isSCMHAEnabled());
+
+    }
+  }
+
   private void addTransactions(StorageContainerManager scm,
       DeletedBlockLog delLog,
       Map<Long, List<Long>> containerBlocksMap)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to