kevinrr888 commented on code in PR #5348:
URL: https://github.com/apache/accumulo/pull/5348#discussion_r2012787980
##########
server/base/src/main/java/org/apache/accumulo/server/util/checkCommand/SystemConfigCheckRunner.java:
##########
@@ -18,22 +18,230 @@
*/
package org.apache.accumulo.server.util.checkCommand;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.lock.ServiceLock;
+import org.apache.accumulo.core.metadata.AccumuloTable;
+import org.apache.accumulo.core.metadata.TServerInstance;
+import org.apache.accumulo.core.metadata.schema.TabletMetadata;
+import org.apache.accumulo.core.zookeeper.ZooSession;
import org.apache.accumulo.server.ServerContext;
import org.apache.accumulo.server.cli.ServerUtilOpts;
+import org.apache.accumulo.server.log.WalStateManager;
import org.apache.accumulo.server.util.Admin;
public class SystemConfigCheckRunner implements CheckRunner {
private static final Admin.CheckCommand.Check check =
Admin.CheckCommand.Check.SYSTEM_CONFIG;
+ public enum ServerProcess {
+ MANAGER, GC, TSERVER, COMPACTION_COORDINATOR, COMPACTOR, MONITOR,
SCAN_SERVER
+ }
+
@Override
public Admin.CheckCommand.CheckStatus runCheck(ServerContext context,
ServerUtilOpts opts,
boolean fixFiles) throws Exception {
Admin.CheckCommand.CheckStatus status = Admin.CheckCommand.CheckStatus.OK;
printRunning();
+
+ log.trace("********** Checking validity of some ZooKeeper nodes
**********");
+ status = checkZkNodes(context, status);
+
printCompleted(status);
return status;
}
+ private static Admin.CheckCommand.CheckStatus checkZkNodes(ServerContext
context,
+ Admin.CheckCommand.CheckStatus status) throws Exception {
+ status = checkZKLocks(context, status);
+ status = checkZKTableNodes(context, status);
+ status = checkZKWALsMetadata(context, status);
+
+ return status;
+ }
+
+ private static Admin.CheckCommand.CheckStatus checkZKLocks(ServerContext
context,
+ Admin.CheckCommand.CheckStatus status) throws Exception {
+ final ServerProcess[] serverProcesses = ServerProcess.values();
+ final String zkRoot = context.getZooKeeperRoot();
+ final var zs = context.getZooSession();
+ final var zrw = zs.asReaderWriter();
+ final var compactors = context.instanceOperations().getCompactors();
+ final var sservers = context.instanceOperations().getScanServers();
+
+ log.trace("Checking ZooKeeper locks for Accumulo server processes...");
+
+ // check that essential server processes have a ZK lock failing otherwise
+ // check that nonessential server processes have a ZK lock only if they
are running. If they are
+ // not running, alerts the user that the process is not running which may
or may not be expected
+ for (ServerProcess proc : serverProcesses) {
+ log.trace("Looking for {} lock(s)...", proc);
+ switch (proc) {
+ case MANAGER:
+ // essential process
+ status = checkLock(zkRoot + Constants.ZMANAGER_LOCK, proc, true, zs,
status);
+ break;
+ case GC:
+ // essential process
+ status = checkLock(zkRoot + Constants.ZGC_LOCK, proc, true, zs,
status);
+ break;
+ case TSERVER:
+ // essential process(es)
+ final var tservers = TabletMetadata.getLiveTServers(context);
+ if (tservers.isEmpty()) {
+ log.warn("Did not find any running tablet servers!");
+ status = Admin.CheckCommand.CheckStatus.FAILED;
+ }
+ break;
+ case COMPACTION_COORDINATOR:
+ // nonessential process
+ status = checkLock(zkRoot + Constants.ZCOORDINATOR_LOCK, proc,
false, zs, status);
+ break;
+ case COMPACTOR:
+ // nonessential process(es)
+ if (compactors.isEmpty()) {
+ log.debug("No compactors appear to be running... This may or may
not be expected");
+ }
+ for (String compactor : compactors) {
+ // for each running compactor, ensure a zk lock exists for it
+ boolean checkedLock = false;
+ String compactorQueuesPath = zkRoot + Constants.ZCOMPACTORS;
+ var compactorQueues = zrw.getChildren(compactorQueuesPath);
+ // find the queue the compactor is in
+ for (var queue : compactorQueues) {
+ String compactorQueuePath = compactorQueuesPath + "/" + queue;
+ String lockPath = compactorQueuePath + "/" + compactor;
+ if (zrw.exists(lockPath)) {
+ status = checkLock(lockPath, proc, true, zs, status);
+ checkedLock = true;
+ break;
+ }
+ }
+ if (!checkedLock) {
+ log.warn("Did not find a ZooKeeper lock for the compactor {}!",
compactor);
+ status = Admin.CheckCommand.CheckStatus.FAILED;
+ }
+ }
+ break;
+ case MONITOR:
+ // nonessential process
+ status = checkLock(zkRoot + Constants.ZMONITOR_LOCK, proc, false,
zs, status);
+ break;
+ case SCAN_SERVER:
+ // nonessential process(es)
+ if (sservers.isEmpty()) {
+ log.debug("No scan servers appear to be running... This may or may
not be expected");
+ }
+ for (String sserver : sservers) {
+ status =
+ checkLock(zkRoot + Constants.ZSSERVERS + "/" + sserver, proc,
true, zs, status);
+ }
+ break;
+ default:
+ throw new IllegalStateException("Unhandled case: " + proc);
+ }
+ }
Review Comment:
Made changes and improvements to `SystemConfigCheckRunner` since this now
targets 4.0.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]