keith-turner commented on a change in pull request #1405: Made tablet 
assignment wait on upgrade.
URL: https://github.com/apache/accumulo/pull/1405#discussion_r343153087
 
 

 ##########
 File path: 
server/master/src/main/java/org/apache/accumulo/master/upgrade/UpgradeCoordinator.java
 ##########
 @@ -22,47 +22,120 @@
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 
+import org.apache.accumulo.core.dataImpl.KeyExtent;
+import org.apache.accumulo.master.EventCoordinator;
 import org.apache.accumulo.server.ServerConstants;
 import org.apache.accumulo.server.ServerContext;
 import org.apache.accumulo.server.ServerUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.base.Preconditions;
+
 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
 
 public class UpgradeCoordinator {
 
+  public enum UpgradeStatus {
+    /**
+     * This signifies the upgrade status is in the process of being 
determined. Its best to assume
+     * nothing is upgraded when seeing this.
+     */
+    INITIAL {
+      @Override
+      public boolean isParentLevelUpgraded(KeyExtent extent) {
+        return false;
+      }
+    },
+    /**
+     * This signifies that only zookeeper has been upgraded so far.
+     */
+    UPGRADED_ZOOKEEPER {
+      @Override
+      public boolean isParentLevelUpgraded(KeyExtent extent) {
+        return extent.isRootTablet();
+      }
+    },
+    /**
+     * This signifies that only zookeeper and the root table have been 
upgraded so far.
+     */
+    UPGRADED_ROOT {
+      @Override
+      public boolean isParentLevelUpgraded(KeyExtent extent) {
+        return extent.isMeta();
+      }
+    },
+    /**
+     * This signifies that everything (zookeeper, root table, metadata table) 
is upgraded.
+     */
+    COMPLETE {
+      @Override
+      public boolean isParentLevelUpgraded(KeyExtent extent) {
+        return true;
+      }
+    },
+    /**
+     * This signifies a failure occurred during upgrade.
+     */
+    FAILED {
+      @Override
+      public boolean isParentLevelUpgraded(KeyExtent extent) {
+        return false;
+      }
+    };
+
+    /**
+     * Determines if the place where this extent stores its metadata was 
upgraded for a given
+     * upgrade status.
+     */
+    public abstract boolean isParentLevelUpgraded(KeyExtent extent);
+  }
+
   private static Logger log = 
LoggerFactory.getLogger(UpgradeCoordinator.class);
 
-  private ServerContext context;
-  private boolean haveUpgradedZooKeeper = false;
-  private boolean startedMetadataUpgrade = false;
   private int currentVersion;
   private Map<Integer,Upgrader> upgraders = 
Map.of(ServerConstants.SHORTEN_RFILE_KEYS,
       new Upgrader8to9(), ServerConstants.CRYPTO_CHANGES, new Upgrader9to10());
 
-  public UpgradeCoordinator(ServerContext ctx) {
-    int currentVersion = 
ServerUtil.getAccumuloPersistentVersion(ctx.getVolumeManager());
+  private volatile UpgradeStatus status;
 
-    ServerUtil.ensureDataVersionCompatible(currentVersion);
+  public UpgradeCoordinator() {
+    status = UpgradeStatus.INITIAL;
+  }
 
-    this.currentVersion = currentVersion;
-    this.context = ctx;
+  private void setStatus(UpgradeStatus status, EventCoordinator 
eventCoordinator) {
+    UpgradeStatus oldStatus = this.status;
+    this.status = status;
+    // calling this will wake up threads that may assign tablets. After the 
upgrade status changes
+    // those threads may make different assignment decisions.
+    eventCoordinator.event("Upgrade status changed from %s to %s", oldStatus, 
status);
   }
 
   @SuppressFBWarnings(value = "DM_EXIT",
       justification = "Want to immediately stop all master threads on upgrade 
error")
   private void handleFailure(Exception e) {
     log.error("FATAL: Error performing upgrade", e);
+    // do not want to call setStatus and signal an event in this case
+    status = UpgradeStatus.FAILED;
     System.exit(1);
 
 Review comment:
   Things may be in a really bad state and who knows what other threads are 
doing.  I think this is best.  I would only advocate for removing it if we knew 
what every other thread in the process was doing when this happened.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to