kevinrr888 commented on code in PR #5348:
URL: https://github.com/apache/accumulo/pull/5348#discussion_r2012787980


##########
server/base/src/main/java/org/apache/accumulo/server/util/checkCommand/SystemConfigCheckRunner.java:
##########
@@ -18,22 +18,230 @@
  */
 package org.apache.accumulo.server.util.checkCommand;
 
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.lock.ServiceLock;
+import org.apache.accumulo.core.metadata.AccumuloTable;
+import org.apache.accumulo.core.metadata.TServerInstance;
+import org.apache.accumulo.core.metadata.schema.TabletMetadata;
+import org.apache.accumulo.core.zookeeper.ZooSession;
 import org.apache.accumulo.server.ServerContext;
 import org.apache.accumulo.server.cli.ServerUtilOpts;
+import org.apache.accumulo.server.log.WalStateManager;
 import org.apache.accumulo.server.util.Admin;
 
 public class SystemConfigCheckRunner implements CheckRunner {
   private static final Admin.CheckCommand.Check check = 
Admin.CheckCommand.Check.SYSTEM_CONFIG;
 
+  public enum ServerProcess {
+    MANAGER, GC, TSERVER, COMPACTION_COORDINATOR, COMPACTOR, MONITOR, 
SCAN_SERVER
+  }
+
   @Override
   public Admin.CheckCommand.CheckStatus runCheck(ServerContext context, 
ServerUtilOpts opts,
       boolean fixFiles) throws Exception {
     Admin.CheckCommand.CheckStatus status = Admin.CheckCommand.CheckStatus.OK;
     printRunning();
+
+    log.trace("********** Checking validity of some ZooKeeper nodes 
**********");
+    status = checkZkNodes(context, status);
+
     printCompleted(status);
     return status;
   }
 
+  private static Admin.CheckCommand.CheckStatus checkZkNodes(ServerContext 
context,
+      Admin.CheckCommand.CheckStatus status) throws Exception {
+    status = checkZKLocks(context, status);
+    status = checkZKTableNodes(context, status);
+    status = checkZKWALsMetadata(context, status);
+
+    return status;
+  }
+
+  private static Admin.CheckCommand.CheckStatus checkZKLocks(ServerContext 
context,
+      Admin.CheckCommand.CheckStatus status) throws Exception {
+    final ServerProcess[] serverProcesses = ServerProcess.values();
+    final String zkRoot = context.getZooKeeperRoot();
+    final var zs = context.getZooSession();
+    final var zrw = zs.asReaderWriter();
+    final var compactors = context.instanceOperations().getCompactors();
+    final var sservers = context.instanceOperations().getScanServers();
+
+    log.trace("Checking ZooKeeper locks for Accumulo server processes...");
+
+    // check that essential server processes have a ZK lock failing otherwise
+    // check that nonessential server processes have a ZK lock only if they 
are running. If they are
+    // not running, alerts the user that the process is not running which may 
or may not be expected
+    for (ServerProcess proc : serverProcesses) {
+      log.trace("Looking for {} lock(s)...", proc);
+      switch (proc) {
+        case MANAGER:
+          // essential process
+          status = checkLock(zkRoot + Constants.ZMANAGER_LOCK, proc, true, zs, 
status);
+          break;
+        case GC:
+          // essential process
+          status = checkLock(zkRoot + Constants.ZGC_LOCK, proc, true, zs, 
status);
+          break;
+        case TSERVER:
+          // essential process(es)
+          final var tservers = TabletMetadata.getLiveTServers(context);
+          if (tservers.isEmpty()) {
+            log.warn("Did not find any running tablet servers!");
+            status = Admin.CheckCommand.CheckStatus.FAILED;
+          }
+          break;
+        case COMPACTION_COORDINATOR:
+          // nonessential process
+          status = checkLock(zkRoot + Constants.ZCOORDINATOR_LOCK, proc, 
false, zs, status);
+          break;
+        case COMPACTOR:
+          // nonessential process(es)
+          if (compactors.isEmpty()) {
+            log.debug("No compactors appear to be running... This may or may 
not be expected");
+          }
+          for (String compactor : compactors) {
+            // for each running compactor, ensure a zk lock exists for it
+            boolean checkedLock = false;
+            String compactorQueuesPath = zkRoot + Constants.ZCOMPACTORS;
+            var compactorQueues = zrw.getChildren(compactorQueuesPath);
+            // find the queue the compactor is in
+            for (var queue : compactorQueues) {
+              String compactorQueuePath = compactorQueuesPath + "/" + queue;
+              String lockPath = compactorQueuePath + "/" + compactor;
+              if (zrw.exists(lockPath)) {
+                status = checkLock(lockPath, proc, true, zs, status);
+                checkedLock = true;
+                break;
+              }
+            }
+            if (!checkedLock) {
+              log.warn("Did not find a ZooKeeper lock for the compactor {}!", 
compactor);
+              status = Admin.CheckCommand.CheckStatus.FAILED;
+            }
+          }
+          break;
+        case MONITOR:
+          // nonessential process
+          status = checkLock(zkRoot + Constants.ZMONITOR_LOCK, proc, false, 
zs, status);
+          break;
+        case SCAN_SERVER:
+          // nonessential process(es)
+          if (sservers.isEmpty()) {
+            log.debug("No scan servers appear to be running... This may or may 
not be expected");
+          }
+          for (String sserver : sservers) {
+            status =
+                checkLock(zkRoot + Constants.ZSSERVERS + "/" + sserver, proc, 
true, zs, status);
+          }
+          break;
+        default:
+          throw new IllegalStateException("Unhandled case: " + proc);
+      }
+    }

Review Comment:
   Made changes and improvements to `SystemConfigCheckRunner` since this now 
targets 4.0.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to