dlmarion commented on code in PR #5383:
URL: https://github.com/apache/accumulo/pull/5383#discussion_r2031830856


##########
server/base/src/main/java/org/apache/accumulo/server/util/UpgradeUtil.java:
##########
@@ -57,79 +76,222 @@ public String keyword() {
 
   @Override
   public String description() {
-    return "utility used to perform various upgrade steps for an Accumulo 
instance";
+    return "utility used to perform various upgrade steps for an Accumulo 
instance. The 'prepare'"
+        + " step is intended to be run using the old version of software after 
an instance has"
+        + " been shut down. The 'check' step is intended to be run on the 
instance with the new"
+        + " version of software. Server processes should fail to start after 
the 'prepare' step"
+        + " has been run due to the existence of a node in ZooKeeper. When the 
'check' step"
+        + " completes successfully it will remove this node allowing the user 
to start the"
+        + " Manager to begin the instance upgrade process.";
+  }
+
+  private void prepare(final ServerContext context) {
+
+    final int persistentVersion = 
AccumuloDataVersion.getCurrentVersion(context);
+    final int thisVersion = AccumuloDataVersion.get();
+    if (persistentVersion != thisVersion) {
+      throw new IllegalStateException("It looks like you are running 'prepare' 
with "
+          + "a different version of software than what the instance was 
running with."
+          + " The 'prepare' command is intended to be run after an instance is 
shutdown"
+          + " with the same version of software before trying to upgrade.");
+    }
+
+    final ZooSession zs = context.getZooSession();
+    final ZooReaderWriter zoo = zs.asReaderWriter();
+
+    try {
+      if (zoo.exists(ZPREPARE_FOR_UPGRADE)) {
+        zoo.delete(ZPREPARE_FOR_UPGRADE);
+      }
+    } catch (KeeperException | InterruptedException e) {
+      throw new IllegalStateException("Error creating or checking for " + 
ZPREPARE_FOR_UPGRADE
+          + " node in zookeeper: " + e.getMessage(), e);
+    }
+
+    LOG.info("Upgrade specified, validating that Manager is stopped");
+    if (context.getServerPaths().getManager(true) != null) {
+      throw new IllegalStateException("Manager is running, shut it down and 
retry this operation");
+    }
+
+    LOG.info("Checking for existing fate transactions");
+    try {
+      // Adapted from UpgradeCoordinator.abortIfFateTransactions
+      // TODO: After the 4.0.0 release this code block needs to be
+      // modified to account for the new Fate table.
+      if (!zoo.getChildren(ZFATE).isEmpty()) {
+        throw new IllegalStateException("Cannot complete upgrade preparation"
+            + " because FATE transactions exist. You can start a tserver, but"
+            + " not the Manager, then use the shell to delete completed"
+            + " transactions and fail pending or in-progress transactions."
+            + " Once all of the FATE transactions have been removed you can"
+            + " retry this operation.");
+      }
+    } catch (KeeperException | InterruptedException e) {
+      throw new IllegalStateException("Error checking for existing FATE 
transactions", e);
+    }
+
+    LOG.info("Creating {} node in zookeeper, servers will be prevented from"
+        + " starting while this node exists", ZPREPARE_FOR_UPGRADE);
+    try {
+      zoo.putPersistentData(ZPREPARE_FOR_UPGRADE, new byte[0], 
NodeExistsPolicy.SKIP);
+    } catch (KeeperException | InterruptedException e) {
+      throw new IllegalStateException("Error creating " + ZPREPARE_FOR_UPGRADE
+          + " node in zookeeper. Check for any issues and retry.", e);
+    }
+
+    LOG.info("Forcing removal of all server locks");
+    new ZooZap().zap(context, "-manager", "-tservers", "-compactors", 
"-sservers");
+
+    LOG.info(
+        "Instance {} prepared for upgrade. Server processes will not start 
while"
+            + " in this state. To undo this state and abort upgrade 
preparations delete"
+            + " the zookeeper node: {}. If you abort and restart the instance, 
then you "
+            + " should re-run this utility before upgrading.",
+        context.getInstanceID(), ZPREPARE_FOR_UPGRADE);
+  }
+
+  private void check(ServerContext context, boolean force) {
+    final int persistentVersion = 
AccumuloDataVersion.getCurrentVersion(context);
+    final int thisVersion = AccumuloDataVersion.get();
+    if (persistentVersion == thisVersion) {
+      throw new IllegalStateException("Running this utility is unnecessary, 
this instance"
+          + " has already been upgraded to version " + thisVersion);
+    }
+
+    final ZooSession zs = context.getZooSession();
+    final ZooReader zr = zs.asReader();
+    final String prepUpgradePath = Constants.ZPREPARE_FOR_UPGRADE;
+
+    boolean nodeExists = false;
+    try {
+      nodeExists = zr.exists(prepUpgradePath);
+    } catch (KeeperException | InterruptedException e) {
+      throw new IllegalStateException("Error checking for existence of node: " 
+ prepUpgradePath,
+          e);
+    }
+
+    if (!nodeExists) {
+
+      if (force) {
+        LOG.info("{} node not found in ZooKeeper, 'accumulo upgrade --prepare' 
was likely"
+            + " not run after shutting down instance for upgrade. Removing"
+            + " server locks and checking for fate transactions.", 
prepUpgradePath);
+      } else {
+        throw new IllegalStateException(prepUpgradePath + " node not found in 
ZooKeeper indicating"
+            + " that 'accumulo upgrade --prepare' was not run after shutting 
down the instance. If"
+            + " you wish to continue, then run this command using the --force 
option.");
+      }
+
+      try {
+        // Adapted from UpgradeCoordinator.abortIfFateTransactions
+        // TODO: After the 4.0.0 release this code block needs to be
+        // modified to account for the new Fate table.
+        if (!zr.getChildren(Constants.ZFATE).isEmpty()) {
+          throw new IllegalStateException("Cannot continue pre-upgrade checks"
+              + " because FATE transactions exist. You can start a tserver, 
but"
+              + " not the Manager, with the old version of Accumulo then use "
+              + " the shell to delete completed transactions and fail pending"
+              + " or in-progress transactions. Once all of the FATE 
transactions"
+              + " have been removed you can retry this operation.");
+        }
+      } catch (KeeperException | InterruptedException e) {
+        throw new IllegalStateException("Error checking for existing FATE 
transactions", e);
+      }
+      LOG.info("No FATE transactions found");
+
+      // Forcefully delete all server locks
+      Set<ServiceLockPath> serviceLockPaths =
+          context.getServerPaths().getCompactor((g) -> true, 
AddressSelector.all(), true);
+      serviceLockPaths.addAll(
+          context.getServerPaths().getTabletServer((g) -> true, 
AddressSelector.all(), true));
+      serviceLockPaths
+          .addAll(context.getServerPaths().getScanServer((g) -> true, 
AddressSelector.all(), true));
+      var mgrPath = context.getServerPaths().getManager(true);
+      if (mgrPath != null) {
+        serviceLockPaths.add(mgrPath);
+      }
+      var gcPath = context.getServerPaths().getGarbageCollector(true);
+      if (gcPath != null) {
+        serviceLockPaths.add(gcPath);
+      }
+      var monitorPath = context.getServerPaths().getMonitor(true);
+      if (monitorPath != null) {
+        serviceLockPaths.add(monitorPath);
+      }
+
+      for (ServiceLockPath slp : serviceLockPaths) {
+        LOG.info("Deleting all zookeeper entries under {}", slp);
+        try {
+          List<String> children = zr.getChildren(slp.toString());
+          for (String child : children) {
+            LOG.debug("Performing recursive delete on node:  {}", child);
+            ZooUtil.recursiveDelete(zs, slp + "/" + child, 
NodeMissingPolicy.SKIP);
+          }
+        } catch (KeeperException.NoNodeException e) {
+          LOG.warn("{} path does not exist in zookeeper", slp);
+        } catch (KeeperException e) {
+          throw new IllegalStateException(
+              "Error performing recursive delete on children under node: " + 
slp.toString(), e);
+        } catch (InterruptedException e) {
+          throw new IllegalStateException("Interrupted while trying to find"
+              + " and delete children of zookeeper node: " + slp);
+        }
+      }
+    }
+
+    // Initialize the UpgradeProgress object in ZooKeeper. If the node exists, 
maybe
+    // because the 'check' command is being re-run, delete it.
+    try {
+      if (zr.exists(Constants.ZUPGRADE_PROGRESS)) {
+        ZooUtil.recursiveDelete(zs, Constants.ZUPGRADE_PROGRESS, 
NodeMissingPolicy.FAIL);

Review Comment:
   Maybe just an initial check that if the Manager lock is held, then the 
command can't be run.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscr...@accumulo.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to