prashantpogde commented on a change in pull request #1998:
URL: https://github.com/apache/ozone/pull/1998#discussion_r612032871
##########
File path:
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestHDDSUpgrade.java
##########
@@ -305,4 +400,579 @@ public void
testFinalizationFromInitialVersionToLatestVersion()
// Verify that new pipeline can be created with upgraded datanodes.
testPostUpgradePipelineCreation();
}
+
+ /*
+ * All the subsequent tests here are failure cases. Some of the tests below
+ * could simultaneously fail one or more nodes at specific execution points
+ * and in different thread contexts.
+ * Upgrade path key execution points are defined in
+ * UpgradeFinalizer:UpgradeTestInjectionPoints.
+ */
+
+ /*
+ * Helper function to inject SCM failure and a SCM restart at a given
+ * execution point during SCM-Upgrade.
+ *
+ * Injects Failure in : SCM
+ * Executing-Thread-Context : SCM-Upgrade
+ */
+ private Boolean injectSCMFailureDuringSCMUpgrade()
+ throws InterruptedException, TimeoutException, AuthenticationException,
+ IOException {
+ // For some tests this could get called in a different thread context.
+ // We need to guard concurrent updates to the cluster.
+ synchronized(cluster) {
+ cluster.restartStorageContainerManager(true);
+ loadSCMState();
+ }
+ // The ongoing current SCM Upgrade is getting aborted at this point. We
+ // need to schedule a new SCM Upgrade on a different thread context.
+ Thread t = new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try {
+ loadSCMState();
+ scm.finalizeUpgrade("xyz");
+ } catch (IOException e) {
+ e.printStackTrace();
+ Assert.fail(e.getMessage());
+ }
+ }
+ });
+ t.start();
+ return true;
+ }
+
+ /*
+ * Helper function to inject DataNode failures and DataNode restarts at a
+ * given execution point during SCM-Upgrade. Please note that it fails all
+ * the DataNodes in the cluster and is part of test cases that simulate
+ * multi-node failure at specific code-execution points during SCM Upgrade.
+ * Please note that this helper function should be called in the thread
+ * context of an SCM-Upgrade only. The return value has a significance that
+ * it does not abort the currently ongoing SCM upgrade. because this
+ * failure injection does not fail the SCM node and only impacts datanodes,
+ * we do not need to schedule another scm-finalize-upgrade here.
+ *
+ * Injects Failure in : All the DataNodes
+ * Executing-Thread-Context : SCM-Upgrade
+ */
+ private Boolean injectDataNodeFailureDuringSCMUpgrade() {
+ try {
+ // Work on a Copy of current set of DataNodes to avoid
+ // running into tricky situations.
+ List<HddsDatanodeService> currentDataNodes =
+ new ArrayList<>(cluster.getHddsDatanodes());
+ for (HddsDatanodeService ds: currentDataNodes) {
+ DatanodeDetails dn = ds.getDatanodeDetails();
+ cluster.restartHddsDatanode(dn, false);
+ }
+ cluster.waitForClusterToBeReady();
+ } catch (Exception e) {
+ LOG.info("DataNode Restarts Failed!");
+ Assert.fail(e.getMessage());
+ }
+ loadSCMState();
+ // returning false from injection function, continues currently ongoing
+ // SCM-Upgrade-Finalization.
+ return false;
+ }
+
+ /*
+ * Helper function to inject a DataNode failure and restart for a specific
+ * DataNode. This injection function can target a specific DataNode and
+ * thus facilitates getting called in the upgrade-finalization thread context
+ * of that specific DataNode.
+ *
+ * Injects Failure in : Given DataNodes
+ * Executing-Thread-Context : the same DataNode that we are failing here.
+ */
+ private Thread injectDataNodeFailureDuringDataNodeUpgrade(
+ DatanodeDetails dn) {
+ Thread t = null;
+ try {
+ // Schedule the DataNode restart on a separate thread context
+ // otherwise DataNode restart will hang. Also any cluster modification
+ // needs to be guarded since it could get modified in multiple
independent
+ // threads.
+ t = new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try {
+ synchronized (cluster) {
+ cluster.restartHddsDatanode(dn, true);
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ Assert.fail(e.getMessage());
Review comment:
Done.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]