This is an automated email from the ASF dual-hosted git repository.

avijayan pushed a commit to branch HDDS-3698-nonrolling-upgrade
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/HDDS-3698-nonrolling-upgrade 
by this push:
     new 5927bd2  HDDS-5118. Recover from failure during upgrade action. (#2179)
5927bd2 is described below

commit 5927bd26a61edf964c1ea429f61d27eec4713165
Author: Ethan Rose <[email protected]>
AuthorDate: Wed Apr 28 11:57:26 2021 -0400

    HDDS-5118. Recover from failure during upgrade action. (#2179)
---
 .../org/apache/hadoop/ozone/common/Storage.java    |   8 --
 .../apache/hadoop/ozone/common/StorageInfo.java    |  36 +------
 .../ozone/upgrade/BasicUpgradeFinalizer.java       | 119 ++-------------------
 .../apache/hadoop/ozone/upgrade/LayoutFeature.java |  15 ++-
 .../hadoop/ozone/upgrade/UpgradeException.java     |   2 -
 .../docs/content/design/upgrade-dev-primer.md      |  16 ++-
 .../ozone/om/upgrade/TestOMUpgradeFinalizer.java   |  24 +----
 7 files changed, 40 insertions(+), 180 deletions(-)

diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Storage.java 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Storage.java
index 8faa662b..e3664da 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Storage.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Storage.java
@@ -135,14 +135,6 @@ public abstract class Storage {
     storageInfo.setLayoutVersion(version);
   }
 
-  public void setUpgradeToLayoutVersion(int version) {
-    storageInfo.setUpgradingToLayoutVersion(version);
-  }
-
-  public void unsetUpgradeToLayoutVersion() {
-    storageInfo.unsetUpgradingToLayoutVersion();
-  }
-
   public void setFirstUpgradeActionLayoutVersion(int version) {
     storageInfo.setFirstUpgradeActionLayoutVersion(version);
   }
diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/StorageInfo.java
 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/StorageInfo.java
index e3da004..e6e1df5 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/StorageInfo.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/StorageInfo.java
@@ -64,9 +64,6 @@ public class StorageInfo {
    */
   private static final String LAYOUT_VERSION = "layoutVersion";
 
-  private static final String UPGRADING_TO_LAYOUT_VERSION =
-      "upgradingToLayoutVersion";
-
   private static final String FIRST_UPGRADE_ACTION_LAYOUT_VERSION =
       "firstUpgradeActionLayoutVersion";
 
@@ -100,7 +97,6 @@ public class StorageInfo {
     verifyClusterId();
     verifyCreationTime();
     verifyLayoutVersion();
-    verifyUpgradingToLayoutVersion();
   }
 
   public NodeType getNodeType() {
@@ -127,20 +123,11 @@ public class StorageInfo {
     return 0;
   }
 
-  public int getUpgradingToLayoutVersion() {
-    String upgradingTo = properties.getProperty(UPGRADING_TO_LAYOUT_VERSION);
-    if (upgradingTo != null) {
-      return Integer.parseInt(upgradingTo);
-    }
-    return INVALID_LAYOUT_VERSION;
-  }
-
   private void verifyLayoutVersion() {
     String layout = getProperty(LAYOUT_VERSION);
     if (layout == null) {
       LOG.warn("Found " + STORAGE_FILE_VERSION + " file without any layout " +
-          "version. Defaulting to 0. This should happen only if a cluster is " 
+
-          "being upgraded from 0.5.0.");
+          "version. Defaulting to 0.");
       setProperty(LAYOUT_VERSION, "0");
     }
   }
@@ -175,17 +162,6 @@ public class StorageInfo {
     properties.setProperty(LAYOUT_VERSION, Integer.toString(version));
   }
 
-  public void setUpgradingToLayoutVersion(int layoutVersion) {
-    //TODO: do we need to check consecutiveness of versions here?
-    // if so, then change  setLayoutVersion to incLayoutVersion in APIs!
-    properties.setProperty(
-        UPGRADING_TO_LAYOUT_VERSION, Integer.toString(layoutVersion));
-  }
-
-  public void unsetUpgradingToLayoutVersion() {
-    properties.remove(UPGRADING_TO_LAYOUT_VERSION);
-  }
-
   private void verifyNodeType(NodeType type)
       throws InconsistentStorageStateException {
     NodeType nodeType = getNodeType();
@@ -205,16 +181,6 @@ public class StorageInfo {
     }
   }
 
-  private void verifyUpgradingToLayoutVersion()
-      throws InconsistentStorageStateException {
-    int upgradeMark = getUpgradingToLayoutVersion();
-    if (upgradeMark != INVALID_LAYOUT_VERSION) {
-      throw new InconsistentStorageStateException("Ozone Manager died during "
-          + "a LayoutFeature upgrade.");
-      //TODO add recovery steps here, or point to a recovery doc.
-    }
-  }
-
   private void verifyCreationTime() {
     Long creationTime = getCreationTime();
     Preconditions.checkNotNull(creationTime);
diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/BasicUpgradeFinalizer.java
 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/BasicUpgradeFinalizer.java
index 7609dcb..f001a39 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/BasicUpgradeFinalizer.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/BasicUpgradeFinalizer.java
@@ -24,9 +24,7 @@ import static 
org.apache.hadoop.ozone.upgrade.LayoutFeature.UpgradeActionType.VA
 import static 
org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.FIRST_UPGRADE_START_ACTION_FAILED;
 import static 
org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.INVALID_REQUEST;
 import static 
org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.LAYOUT_FEATURE_FINALIZATION_FAILED;
-import static 
org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.PERSIST_UPGRADE_TO_LAYOUT_VERSION_FAILED;
 import static 
org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.PREFINALIZE_ACTION_VALIDATION_FAILED;
-import static 
org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.REMOVE_UPGRADE_TO_LAYOUT_VERSION_FAILED;
 import static 
org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.UPDATE_LAYOUT_VERSION_FAILED;
 import static 
org.apache.hadoop.ozone.upgrade.UpgradeFinalizer.Status.FINALIZATION_DONE;
 import static 
org.apache.hadoop.ozone.upgrade.UpgradeFinalizer.Status.FINALIZATION_REQUIRED;
@@ -203,39 +201,24 @@ public abstract class BasicUpgradeFinalizer
       LayoutFeature lf = (LayoutFeature) obj;
       Storage layoutStorage = storageSuppplier.get();
       Optional<? extends UpgradeAction> action = lf.action(ON_FINALIZE);
-      finalizeFeature(lf, layoutStorage, action);
+      runFinalizationAction(lf, action);
       updateLayoutVersionInVersionFile(lf, layoutStorage);
       versionManager.finalized(lf);
     }
     versionManager.completeFinalization();
   }
 
-  protected void finalizeFeature(LayoutFeature feature, Storage config,
-                                 Optional<? extends UpgradeAction> action)
-      throws UpgradeException {
+  protected void runFinalizationAction(LayoutFeature feature,
+      Optional<?extends UpgradeAction> action) throws UpgradeException {
 
     if (!action.isPresent()) {
       emitNOOPMsg(feature.name());
-      return;
-    }
-
-    putFinalizationMarkIntoVersionFile(feature, config);
-
-    emitStartingFinalizationActionMsg(feature.name());
-
-    runOnFinalizeAction(feature, action.get());
-
-    emitFinishFinalizationActionMsg(feature.name());
-
-    removeFinalizationMarkFromVersionFile(feature, config);
-  }
-
-  private void runOnFinalizeAction(LayoutFeature feature, UpgradeAction action)
-      throws UpgradeException {
-    try {
-      action.execute(component);
-    } catch (Exception e) {
-      logFinalizationFailureAndThrow(e, feature.name());
+    } else {
+      try {
+        action.get().execute(component);
+      } catch (Exception e) {
+        logFinalizationFailureAndThrow(e, feature.name());
+      }
     }
   }
 
@@ -325,44 +308,6 @@ public abstract class BasicUpgradeFinalizer
     }
   }
 
-  protected void putFinalizationMarkIntoVersionFile(LayoutFeature feature,
-                                                  Storage config)
-      throws UpgradeException {
-    try {
-      emitUpgradeToLayoutVersionPersistingMsg(feature.name());
-
-      setUpgradeToLayoutVersionInStorage(feature.layoutVersion(), config);
-      persistStorage(config);
-
-      emitUpgradeToLayoutVersionPersistedMsg();
-    } catch (IOException e) {
-      logUpgradeToLayoutVersionPersistingFailureAndThrow(feature.name(), e);
-    }
-  }
-
-  protected void removeFinalizationMarkFromVersionFile(
-      LayoutFeature feature, Storage config) throws UpgradeException {
-    try {
-      emitRemovingUpgradeToLayoutVersionMsg(feature.name());
-
-      unsetUpgradeToLayoutVersionInStorage(config);
-      persistStorage(config);
-
-      emitRemovedUpgradeToLayoutVersionMsg();
-    } catch (IOException e) {
-      logUpgradeToLayoutVersionRemovalFailureAndThrow(feature.name(), e);
-    }
-  }
-
-  private void setUpgradeToLayoutVersionInStorage(int version,
-                                                  Storage config) {
-    config.setUpgradeToLayoutVersion(version);
-  }
-
-  private void unsetUpgradeToLayoutVersionInStorage(Storage config) {
-    config.unsetUpgradeToLayoutVersion();
-  }
-
   private int currentStoredLayoutVersion(Storage config) {
     return config.getLayoutVersion();
   }
@@ -391,37 +336,6 @@ public abstract class BasicUpgradeFinalizer
     logAndEmit(msg);
   }
 
-  protected void emitStartingFinalizationActionMsg(String feature) {
-    String msg = "Executing onFinalize action of feature: " + feature + ".";
-    logAndEmit(msg);
-  }
-
-  protected void emitFinishFinalizationActionMsg(String feature) {
-    String msg = "The feature " + feature + " is finalized.";
-    logAndEmit(msg);
-  }
-
-  private void emitUpgradeToLayoutVersionPersistingMsg(String feature) {
-    String msg = "Mark finalization of " + feature + " in VERSION file.";
-    logAndEmit(msg);
-  }
-
-  private void emitUpgradeToLayoutVersionPersistedMsg() {
-    String msg = "Finalization mark placed.";
-    logAndEmit(msg);
-  }
-
-  private void emitRemovingUpgradeToLayoutVersionMsg(String feature) {
-    String msg = "Remove finalization mark of " + feature
-        + " feature from VERSION file.";
-    logAndEmit(msg);
-  }
-
-  private void emitRemovedUpgradeToLayoutVersionMsg() {
-    String msg = "Finalization mark removed.";
-    logAndEmit(msg);
-  }
-
   protected void logAndEmit(String msg) {
     LOG.info(msg);
     msgs.offer(msg);
@@ -439,21 +353,6 @@ public abstract class BasicUpgradeFinalizer
     logAndThrow(e, msg, UPDATE_LAYOUT_VERSION_FAILED);
   }
 
-  private void logUpgradeToLayoutVersionPersistingFailureAndThrow(
-      String feature, IOException e
-  ) throws UpgradeException {
-    String msg = "Failed to update VERSION file with the upgrading feature: "
-        + feature + ".";
-    logAndThrow(e, msg, PERSIST_UPGRADE_TO_LAYOUT_VERSION_FAILED);
-  }
-
-  private void logUpgradeToLayoutVersionRemovalFailureAndThrow(
-      String feature, IOException e) throws UpgradeException {
-    String msg =
-        "Failed to unmark finalization of " + feature + " LayoutFeature.";
-    logAndThrow(e, msg, REMOVE_UPGRADE_TO_LAYOUT_VERSION_FAILED);
-  }
-
   private void logAndThrow(Exception e, String msg, ResultCodes resultCode)
       throws UpgradeException {
     LOG.error(msg, e);
diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutFeature.java
 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutFeature.java
index 52704c9..92dd706 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutFeature.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutFeature.java
@@ -56,15 +56,24 @@ public interface LayoutFeature {
     // Run every time an un-finalized component is started up.
     VALIDATE_IN_PREFINALIZE,
 
-    // Run exactly once when an upgraded cluster is detected with this new
+    // Run once when an upgraded cluster is started with this new
     // layout version.
+    // If the action fails, it will be run again when the component is
+    // restarted.
+    // If updating the VERSION file fails, the action may be run again when the
+    // component is restarted, even if it finished successfully.
     // NOTE 1 : This will not be run in a NEW cluster!
     // NOTE 2 : This needs to be a backward compatible action until a DOWNGRADE
-    // hook is provided!
+    //  hook is provided!
+    //  Even if the action fails partway through, all on disk structures should
+    //  still be in a backwards compatible state.
     // NOTE 3 : These actions are not submitted through RATIS (TODO)
     ON_FIRST_UPGRADE_START,
 
-    // Run exactly once during finalization of layout feature.
+    // Run once during finalization of the layout feature.
+    // If the action fails, it will be run again when finalization is retried.
+    // If updating the VERSION file fails, the action may be run again when
+    // finalization is retried, even if it finished successfully.
     ON_FINALIZE
   }
 }
diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeException.java
 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeException.java
index dfc43ef..0e69a4f 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeException.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeException.java
@@ -105,8 +105,6 @@ public class UpgradeException extends IOException {
   public enum ResultCodes {
     OK,
     INVALID_REQUEST,
-    PERSIST_UPGRADE_TO_LAYOUT_VERSION_FAILED,
-    REMOVE_UPGRADE_TO_LAYOUT_VERSION_FAILED,
     UPDATE_LAYOUT_VERSION_FAILED,
     LAYOUT_FEATURE_FINALIZATION_FAILED,
     PREFINALIZE_ACTION_VALIDATION_FAILED,
diff --git a/hadoop-hdds/docs/content/design/upgrade-dev-primer.md 
b/hadoop-hdds/docs/content/design/upgrade-dev-primer.md
index 338765e..92c5330 100644
--- a/hadoop-hdds/docs/content/design/upgrade-dev-primer.md
+++ b/hadoop-hdds/docs/content/design/upgrade-dev-primer.md
@@ -62,11 +62,23 @@ Annotation to specify upgrade action run during specific 
upgrade phases. Each la
 #### VALIDATE_IN_PREFINALIZE
 A ‘validation’ action run every time a component is started up with this 
layout feature being unfinalized.
 
+- Example: Stopping a component if a new configuration is used prior to it 
being finalized.
+
+- Example: Cleaning up from a failed ON_FINALIZE action that may have left on 
disk data in an inoperable state.
+    - Note that because the ON_FINALIZE action failed, the feature remains 
pre-finalized.
+
 #### ON_FIRST_UPGRADE_START
-A backward compatible action run exactly once when an upgraded cluster is 
detected with this new  layout version.
+A backward compatible action run once when an upgraded cluster is detected 
with this new layout version. This differs from VALIDATE_IN_PREFINALIZE because 
it will not be run again once it completes successfully.
+This action will be run again if it fails partway through, and may be run 
again if another error occurs during the upgrade. The action must always leave 
on disk data in a backwards compatible state, even if it fails partway through, 
since it is being executed before finalization.
+
+- Example: The new version expects data in a different location even when it 
is pre-finalized. The action creates a symlink in the new location pointing to 
the old location.
 
 #### ON_FINALIZE
-An action run exactly once during finalization of layout version (feature).
+An action run once during finalization of layout version (feature). This 
action will be run again if it fails partway through, and may be run again if 
another error occurs during the upgrade. This is the only action permitted to 
make backwards incompatible changes to on disk structures, since finalization 
has been initiated by the time it is run. If a failure partway through could 
leave the component in an inoperable state, a cleanup action should be used in 
VALIDATE_IN_PREFINALIZE, whic [...]
+
+- Example: Adding a new RocksDB column family.
+
+- Example: Logging a message saying a feature is being finalized.
 
 ## ‘Prepare’ the Ozone Manager
 Used to flush all transactions to disk, take a DB snapshot, and purge the 
logs, leaving Ratis in a clean state without unapplied log entries. This 
prepares the OM for upgrades/downgrades so that no request in the log is 
applied to the database in the old version of the code in one OM, and the new 
version of the code in another OM.
diff --git 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/upgrade/TestOMUpgradeFinalizer.java
 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/upgrade/TestOMUpgradeFinalizer.java
index 5279530..9699524 100644
--- 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/upgrade/TestOMUpgradeFinalizer.java
+++ 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/upgrade/TestOMUpgradeFinalizer.java
@@ -163,25 +163,17 @@ public class TestOMUpgradeFinalizer {
     OMUpgradeFinalizer finalizer = new OMUpgradeFinalizer(versionManager);
     finalizer.finalize(CLIENT_ID, om);
 
-
     Iterator<OMLayoutFeature> it = lfs.iterator();
     OMLayoutFeature f = it.next();
 
-    // the first feature has an upgrade action, it calls the setUpgradeToLV
-    // method, and the action execution is checked by verifying on 
om.getVersion
-    verify(om.getOmStorage(), once())
-        .setUpgradeToLayoutVersion(f.layoutVersion());
+    // the first feature has an upgrade action, and the action execution is
+    // checked by verifying on om.getVersion
     verify(om.getOmStorage(), once())
         .setLayoutVersion(f.layoutVersion());
-    verify(om.getOmStorage(), once())
-        .unsetUpgradeToLayoutVersion();
     verify(om, once()).getVersion();
 
-    // the second feature has a NOOP it should not call the setUpgradeToLV
-    // method, but should update the LV.
+    // The second feature has a NOOP, but should update the layout version.
     f = it.next();
-    verify(om.getOmStorage(), never())
-        .setUpgradeToLayoutVersion(f.layoutVersion());
     verify(om.getOmStorage(), once())
         .setLayoutVersion(f.layoutVersion());
 
@@ -222,23 +214,15 @@ public class TestOMUpgradeFinalizer {
       when(versionManager.getUpgradeState()).thenReturn(FINALIZATION_DONE);
     }
 
-    // Verify that we have never removed the upgradeToLV from the storage
-    // as finalization of the first feature in the list fails.
-    // Also verify that we have never updated the LV.
+    // Verify that we have never updated the layout version.
     Iterator<OMLayoutFeature> it = lfs.iterator();
     OMLayoutFeature f = it.next();
-    verify(om.getOmStorage(), once())
-        .setUpgradeToLayoutVersion(f.layoutVersion());
     verify(om.getOmStorage(), never())
         .setLayoutVersion(f.layoutVersion());
-    verify(om.getOmStorage(), never())
-        .unsetUpgradeToLayoutVersion();
 
     // Verify that we never got to the second feature.
     f = it.next();
     verify(om.getOmStorage(), never())
-        .setUpgradeToLayoutVersion(f.layoutVersion());
-    verify(om.getOmStorage(), never())
         .setLayoutVersion(f.layoutVersion());
 
     StatusAndMessages status = finalizer.reportStatus(CLIENT_ID, false);

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to