This is an automated email from the ASF dual-hosted git repository.
avijayan pushed a commit to branch HDDS-3698-nonrolling-upgrade
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/HDDS-3698-nonrolling-upgrade
by this push:
new 5927bd2 HDDS-5118. Recover from failure during upgrade action. (#2179)
5927bd2 is described below
commit 5927bd26a61edf964c1ea429f61d27eec4713165
Author: Ethan Rose <[email protected]>
AuthorDate: Wed Apr 28 11:57:26 2021 -0400
HDDS-5118. Recover from failure during upgrade action. (#2179)
---
.../org/apache/hadoop/ozone/common/Storage.java | 8 --
.../apache/hadoop/ozone/common/StorageInfo.java | 36 +------
.../ozone/upgrade/BasicUpgradeFinalizer.java | 119 ++-------------------
.../apache/hadoop/ozone/upgrade/LayoutFeature.java | 15 ++-
.../hadoop/ozone/upgrade/UpgradeException.java | 2 -
.../docs/content/design/upgrade-dev-primer.md | 16 ++-
.../ozone/om/upgrade/TestOMUpgradeFinalizer.java | 24 +----
7 files changed, 40 insertions(+), 180 deletions(-)
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Storage.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Storage.java
index 8faa662b..e3664da 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Storage.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Storage.java
@@ -135,14 +135,6 @@ public abstract class Storage {
storageInfo.setLayoutVersion(version);
}
- public void setUpgradeToLayoutVersion(int version) {
- storageInfo.setUpgradingToLayoutVersion(version);
- }
-
- public void unsetUpgradeToLayoutVersion() {
- storageInfo.unsetUpgradingToLayoutVersion();
- }
-
public void setFirstUpgradeActionLayoutVersion(int version) {
storageInfo.setFirstUpgradeActionLayoutVersion(version);
}
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/StorageInfo.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/StorageInfo.java
index e3da004..e6e1df5 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/StorageInfo.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/StorageInfo.java
@@ -64,9 +64,6 @@ public class StorageInfo {
*/
private static final String LAYOUT_VERSION = "layoutVersion";
- private static final String UPGRADING_TO_LAYOUT_VERSION =
- "upgradingToLayoutVersion";
-
private static final String FIRST_UPGRADE_ACTION_LAYOUT_VERSION =
"firstUpgradeActionLayoutVersion";
@@ -100,7 +97,6 @@ public class StorageInfo {
verifyClusterId();
verifyCreationTime();
verifyLayoutVersion();
- verifyUpgradingToLayoutVersion();
}
public NodeType getNodeType() {
@@ -127,20 +123,11 @@ public class StorageInfo {
return 0;
}
- public int getUpgradingToLayoutVersion() {
- String upgradingTo = properties.getProperty(UPGRADING_TO_LAYOUT_VERSION);
- if (upgradingTo != null) {
- return Integer.parseInt(upgradingTo);
- }
- return INVALID_LAYOUT_VERSION;
- }
-
private void verifyLayoutVersion() {
String layout = getProperty(LAYOUT_VERSION);
if (layout == null) {
LOG.warn("Found " + STORAGE_FILE_VERSION + " file without any layout " +
- "version. Defaulting to 0. This should happen only if a cluster is "
+
- "being upgraded from 0.5.0.");
+ "version. Defaulting to 0.");
setProperty(LAYOUT_VERSION, "0");
}
}
@@ -175,17 +162,6 @@ public class StorageInfo {
properties.setProperty(LAYOUT_VERSION, Integer.toString(version));
}
- public void setUpgradingToLayoutVersion(int layoutVersion) {
- //TODO: do we need to check consecutiveness of versions here?
- // if so, then change setLayoutVersion to incLayoutVersion in APIs!
- properties.setProperty(
- UPGRADING_TO_LAYOUT_VERSION, Integer.toString(layoutVersion));
- }
-
- public void unsetUpgradingToLayoutVersion() {
- properties.remove(UPGRADING_TO_LAYOUT_VERSION);
- }
-
private void verifyNodeType(NodeType type)
throws InconsistentStorageStateException {
NodeType nodeType = getNodeType();
@@ -205,16 +181,6 @@ public class StorageInfo {
}
}
- private void verifyUpgradingToLayoutVersion()
- throws InconsistentStorageStateException {
- int upgradeMark = getUpgradingToLayoutVersion();
- if (upgradeMark != INVALID_LAYOUT_VERSION) {
- throw new InconsistentStorageStateException("Ozone Manager died during "
- + "a LayoutFeature upgrade.");
- //TODO add recovery steps here, or point to a recovery doc.
- }
- }
-
private void verifyCreationTime() {
Long creationTime = getCreationTime();
Preconditions.checkNotNull(creationTime);
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/BasicUpgradeFinalizer.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/BasicUpgradeFinalizer.java
index 7609dcb..f001a39 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/BasicUpgradeFinalizer.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/BasicUpgradeFinalizer.java
@@ -24,9 +24,7 @@ import static
org.apache.hadoop.ozone.upgrade.LayoutFeature.UpgradeActionType.VA
import static
org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.FIRST_UPGRADE_START_ACTION_FAILED;
import static
org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.INVALID_REQUEST;
import static
org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.LAYOUT_FEATURE_FINALIZATION_FAILED;
-import static
org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.PERSIST_UPGRADE_TO_LAYOUT_VERSION_FAILED;
import static
org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.PREFINALIZE_ACTION_VALIDATION_FAILED;
-import static
org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.REMOVE_UPGRADE_TO_LAYOUT_VERSION_FAILED;
import static
org.apache.hadoop.ozone.upgrade.UpgradeException.ResultCodes.UPDATE_LAYOUT_VERSION_FAILED;
import static
org.apache.hadoop.ozone.upgrade.UpgradeFinalizer.Status.FINALIZATION_DONE;
import static
org.apache.hadoop.ozone.upgrade.UpgradeFinalizer.Status.FINALIZATION_REQUIRED;
@@ -203,39 +201,24 @@ public abstract class BasicUpgradeFinalizer
LayoutFeature lf = (LayoutFeature) obj;
Storage layoutStorage = storageSuppplier.get();
Optional<? extends UpgradeAction> action = lf.action(ON_FINALIZE);
- finalizeFeature(lf, layoutStorage, action);
+ runFinalizationAction(lf, action);
updateLayoutVersionInVersionFile(lf, layoutStorage);
versionManager.finalized(lf);
}
versionManager.completeFinalization();
}
- protected void finalizeFeature(LayoutFeature feature, Storage config,
- Optional<? extends UpgradeAction> action)
- throws UpgradeException {
+ protected void runFinalizationAction(LayoutFeature feature,
+ Optional<?extends UpgradeAction> action) throws UpgradeException {
if (!action.isPresent()) {
emitNOOPMsg(feature.name());
- return;
- }
-
- putFinalizationMarkIntoVersionFile(feature, config);
-
- emitStartingFinalizationActionMsg(feature.name());
-
- runOnFinalizeAction(feature, action.get());
-
- emitFinishFinalizationActionMsg(feature.name());
-
- removeFinalizationMarkFromVersionFile(feature, config);
- }
-
- private void runOnFinalizeAction(LayoutFeature feature, UpgradeAction action)
- throws UpgradeException {
- try {
- action.execute(component);
- } catch (Exception e) {
- logFinalizationFailureAndThrow(e, feature.name());
+ } else {
+ try {
+ action.get().execute(component);
+ } catch (Exception e) {
+ logFinalizationFailureAndThrow(e, feature.name());
+ }
}
}
@@ -325,44 +308,6 @@ public abstract class BasicUpgradeFinalizer
}
}
- protected void putFinalizationMarkIntoVersionFile(LayoutFeature feature,
- Storage config)
- throws UpgradeException {
- try {
- emitUpgradeToLayoutVersionPersistingMsg(feature.name());
-
- setUpgradeToLayoutVersionInStorage(feature.layoutVersion(), config);
- persistStorage(config);
-
- emitUpgradeToLayoutVersionPersistedMsg();
- } catch (IOException e) {
- logUpgradeToLayoutVersionPersistingFailureAndThrow(feature.name(), e);
- }
- }
-
- protected void removeFinalizationMarkFromVersionFile(
- LayoutFeature feature, Storage config) throws UpgradeException {
- try {
- emitRemovingUpgradeToLayoutVersionMsg(feature.name());
-
- unsetUpgradeToLayoutVersionInStorage(config);
- persistStorage(config);
-
- emitRemovedUpgradeToLayoutVersionMsg();
- } catch (IOException e) {
- logUpgradeToLayoutVersionRemovalFailureAndThrow(feature.name(), e);
- }
- }
-
- private void setUpgradeToLayoutVersionInStorage(int version,
- Storage config) {
- config.setUpgradeToLayoutVersion(version);
- }
-
- private void unsetUpgradeToLayoutVersionInStorage(Storage config) {
- config.unsetUpgradeToLayoutVersion();
- }
-
private int currentStoredLayoutVersion(Storage config) {
return config.getLayoutVersion();
}
@@ -391,37 +336,6 @@ public abstract class BasicUpgradeFinalizer
logAndEmit(msg);
}
- protected void emitStartingFinalizationActionMsg(String feature) {
- String msg = "Executing onFinalize action of feature: " + feature + ".";
- logAndEmit(msg);
- }
-
- protected void emitFinishFinalizationActionMsg(String feature) {
- String msg = "The feature " + feature + " is finalized.";
- logAndEmit(msg);
- }
-
- private void emitUpgradeToLayoutVersionPersistingMsg(String feature) {
- String msg = "Mark finalization of " + feature + " in VERSION file.";
- logAndEmit(msg);
- }
-
- private void emitUpgradeToLayoutVersionPersistedMsg() {
- String msg = "Finalization mark placed.";
- logAndEmit(msg);
- }
-
- private void emitRemovingUpgradeToLayoutVersionMsg(String feature) {
- String msg = "Remove finalization mark of " + feature
- + " feature from VERSION file.";
- logAndEmit(msg);
- }
-
- private void emitRemovedUpgradeToLayoutVersionMsg() {
- String msg = "Finalization mark removed.";
- logAndEmit(msg);
- }
-
protected void logAndEmit(String msg) {
LOG.info(msg);
msgs.offer(msg);
@@ -439,21 +353,6 @@ public abstract class BasicUpgradeFinalizer
logAndThrow(e, msg, UPDATE_LAYOUT_VERSION_FAILED);
}
- private void logUpgradeToLayoutVersionPersistingFailureAndThrow(
- String feature, IOException e
- ) throws UpgradeException {
- String msg = "Failed to update VERSION file with the upgrading feature: "
- + feature + ".";
- logAndThrow(e, msg, PERSIST_UPGRADE_TO_LAYOUT_VERSION_FAILED);
- }
-
- private void logUpgradeToLayoutVersionRemovalFailureAndThrow(
- String feature, IOException e) throws UpgradeException {
- String msg =
- "Failed to unmark finalization of " + feature + " LayoutFeature.";
- logAndThrow(e, msg, REMOVE_UPGRADE_TO_LAYOUT_VERSION_FAILED);
- }
-
private void logAndThrow(Exception e, String msg, ResultCodes resultCode)
throws UpgradeException {
LOG.error(msg, e);
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutFeature.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutFeature.java
index 52704c9..92dd706 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutFeature.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutFeature.java
@@ -56,15 +56,24 @@ public interface LayoutFeature {
// Run every time an un-finalized component is started up.
VALIDATE_IN_PREFINALIZE,
- // Run exactly once when an upgraded cluster is detected with this new
+ // Run once when an upgraded cluster is started with this new
// layout version.
+ // If the action fails, it will be run again when the component is
+ // restarted.
+ // If updating the VERSION file fails, the action may be run again when the
+ // component is restarted, even if it finished successfully.
// NOTE 1 : This will not be run in a NEW cluster!
// NOTE 2 : This needs to be a backward compatible action until a DOWNGRADE
- // hook is provided!
+ // hook is provided!
+ // Even if the action fails partway through, all on disk structures should
+ // still be in a backwards compatible state.
// NOTE 3 : These actions are not submitted through RATIS (TODO)
ON_FIRST_UPGRADE_START,
- // Run exactly once during finalization of layout feature.
+ // Run once during finalization of the layout feature.
+ // If the action fails, it will be run again when finalization is retried.
+ // If updating the VERSION file fails, the action may be run again when
+ // finalization is retried, even if it finished successfully.
ON_FINALIZE
}
}
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeException.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeException.java
index dfc43ef..0e69a4f 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeException.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeException.java
@@ -105,8 +105,6 @@ public class UpgradeException extends IOException {
public enum ResultCodes {
OK,
INVALID_REQUEST,
- PERSIST_UPGRADE_TO_LAYOUT_VERSION_FAILED,
- REMOVE_UPGRADE_TO_LAYOUT_VERSION_FAILED,
UPDATE_LAYOUT_VERSION_FAILED,
LAYOUT_FEATURE_FINALIZATION_FAILED,
PREFINALIZE_ACTION_VALIDATION_FAILED,
diff --git a/hadoop-hdds/docs/content/design/upgrade-dev-primer.md
b/hadoop-hdds/docs/content/design/upgrade-dev-primer.md
index 338765e..92c5330 100644
--- a/hadoop-hdds/docs/content/design/upgrade-dev-primer.md
+++ b/hadoop-hdds/docs/content/design/upgrade-dev-primer.md
@@ -62,11 +62,23 @@ Annotation to specify upgrade action run during specific
upgrade phases. Each la
#### VALIDATE_IN_PREFINALIZE
A ‘validation’ action run every time a component is started up with this
layout feature being unfinalized.
+- Example: Stopping a component if a new configuration is used prior to it
being finalized.
+
+- Example: Cleaning up from a failed ON_FINALIZE action that may have left on
disk data in an inoperable state.
+ - Note that because the ON_FINALIZE action failed, the feature remains
pre-finalized.
+
#### ON_FIRST_UPGRADE_START
-A backward compatible action run exactly once when an upgraded cluster is
detected with this new layout version.
+A backward compatible action run once when an upgraded cluster is detected
with this new layout version. This differs from VALIDATE_IN_PREFINALIZE because
it will not be run again once it completes successfully.
+This action will be run again if it fails partway through, and may be run
again if another error occurs during the upgrade. The action must always leave
on disk data in a backwards compatible state, even if it fails partway through,
since it is being executed before finalization.
+
+- Example: The new version expects data in a different location even when it
is pre-finalized. The action creates a symlink in the new location pointing to
the old location.
#### ON_FINALIZE
-An action run exactly once during finalization of layout version (feature).
+An action run once during finalization of layout version (feature). This
action will be run again if it fails partway through, and may be run again if
another error occurs during the upgrade. This is the only action permitted to
make backwards incompatible changes to on disk structures, since finalization
has been initiated by the time it is run. If a failure partway through could
leave the component in an inoperable state, a cleanup action should be used in
VALIDATE_IN_PREFINALIZE, whic [...]
+
+- Example: Adding a new RocksDB column family.
+
+- Example: Logging a message saying a feature is being finalized.
## ‘Prepare’ the Ozone Manager
Used to flush all transactions to disk, take a DB snapshot, and purge the
logs, leaving Ratis in a clean state without unapplied log entries. This
prepares the OM for upgrades/downgrades so that no request in the log is
applied to the database in the old version of the code in one OM, and the new
version of the code in another OM.
diff --git
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/upgrade/TestOMUpgradeFinalizer.java
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/upgrade/TestOMUpgradeFinalizer.java
index 5279530..9699524 100644
---
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/upgrade/TestOMUpgradeFinalizer.java
+++
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/upgrade/TestOMUpgradeFinalizer.java
@@ -163,25 +163,17 @@ public class TestOMUpgradeFinalizer {
OMUpgradeFinalizer finalizer = new OMUpgradeFinalizer(versionManager);
finalizer.finalize(CLIENT_ID, om);
-
Iterator<OMLayoutFeature> it = lfs.iterator();
OMLayoutFeature f = it.next();
- // the first feature has an upgrade action, it calls the setUpgradeToLV
- // method, and the action execution is checked by verifying on
om.getVersion
- verify(om.getOmStorage(), once())
- .setUpgradeToLayoutVersion(f.layoutVersion());
+ // the first feature has an upgrade action, and the action execution is
+ // checked by verifying on om.getVersion
verify(om.getOmStorage(), once())
.setLayoutVersion(f.layoutVersion());
- verify(om.getOmStorage(), once())
- .unsetUpgradeToLayoutVersion();
verify(om, once()).getVersion();
- // the second feature has a NOOP it should not call the setUpgradeToLV
- // method, but should update the LV.
+ // The second feature has a NOOP, but should update the layout version.
f = it.next();
- verify(om.getOmStorage(), never())
- .setUpgradeToLayoutVersion(f.layoutVersion());
verify(om.getOmStorage(), once())
.setLayoutVersion(f.layoutVersion());
@@ -222,23 +214,15 @@ public class TestOMUpgradeFinalizer {
when(versionManager.getUpgradeState()).thenReturn(FINALIZATION_DONE);
}
- // Verify that we have never removed the upgradeToLV from the storage
- // as finalization of the first feature in the list fails.
- // Also verify that we have never updated the LV.
+ // Verify that we have never updated the layout version.
Iterator<OMLayoutFeature> it = lfs.iterator();
OMLayoutFeature f = it.next();
- verify(om.getOmStorage(), once())
- .setUpgradeToLayoutVersion(f.layoutVersion());
verify(om.getOmStorage(), never())
.setLayoutVersion(f.layoutVersion());
- verify(om.getOmStorage(), never())
- .unsetUpgradeToLayoutVersion();
// Verify that we never got to the second feature.
f = it.next();
verify(om.getOmStorage(), never())
- .setUpgradeToLayoutVersion(f.layoutVersion());
- verify(om.getOmStorage(), never())
.setLayoutVersion(f.layoutVersion());
StatusAndMessages status = finalizer.reportStatus(CLIENT_ID, false);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]