HBASE-12034 If I kill single RS in branch-1, all regions end up on Master!
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/e7c610b9 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/e7c610b9 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/e7c610b9 Branch: refs/heads/branch-1 Commit: e7c610b94c5359af2c4ff0e962c12553872f5396 Parents: 895768d Author: Jimmy Xiang <[email protected]> Authored: Mon Sep 22 13:39:18 2014 -0700 Committer: Jimmy Xiang <[email protected]> Committed: Mon Sep 29 17:12:43 2014 -0700 ---------------------------------------------------------------------- bin/regionservers.sh | 29 +- conf/regionservers | 1 + .../src/main/resources/hbase-default.xml | 9 - .../tmpl/master/BackupMasterStatusTmpl.jamon | 5 +- .../tmpl/regionserver/BlockCacheViewTmpl.jamon | 8 +- .../org/apache/hadoop/hbase/master/HMaster.java | 41 ++- .../hadoop/hbase/master/HMasterCommandLine.java | 8 +- .../hadoop/hbase/master/MasterRpcServices.java | 1 + .../hadoop/hbase/master/ServerManager.java | 66 +---- .../hbase/master/balancer/BaseLoadBalancer.java | 294 +++++-------------- .../hbase/master/balancer/ClusterLoadState.java | 18 +- .../master/balancer/SimpleLoadBalancer.java | 25 +- .../master/balancer/StochasticLoadBalancer.java | 30 +- .../hbase/regionserver/HRegionServer.java | 80 +++-- .../hadoop/hbase/regionserver/Leases.java | 2 - .../hbase/regionserver/RSRpcServices.java | 54 ++-- .../hbase/zookeeper/RegionServerTracker.java | 19 +- .../hadoop/hbase/HBaseTestingUtility.java | 8 +- .../apache/hadoop/hbase/client/TestAdmin.java | 5 - .../hadoop/hbase/client/TestFromClientSide.java | 2 +- .../hadoop/hbase/client/TestMultiParallel.java | 13 +- .../hadoop/hbase/fs/TestBlockReorder.java | 3 +- .../master/TestDistributedLogSplitting.java | 16 - .../hadoop/hbase/master/TestMasterFailover.java | 6 +- .../hadoop/hbase/master/TestMasterMetrics.java | 2 +- .../hbase/master/TestMasterMetricsWrapper.java | 6 +- .../hadoop/hbase/master/TestRestartCluster.java | 15 +- .../hbase/master/balancer/BalancerTestBase.java | 2 +- .../master/balancer/TestBaseLoadBalancer.java | 70 +---- .../balancer/TestDefaultLoadBalancer.java | 47 --- .../balancer/TestStochasticLoadBalancer.java | 16 +- .../hbase/regionserver/TestClusterId.java | 4 +- .../TestRSKilledWhenInitializing.java | 4 +- .../TestSplitTransactionOnCluster.java | 11 +- .../security/access/TestNamespaceCommands.java | 2 +- 35 files changed, 312 insertions(+), 610 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/bin/regionservers.sh ---------------------------------------------------------------------- diff --git a/bin/regionservers.sh b/bin/regionservers.sh index a86322d..e2af598 100755 --- a/bin/regionservers.sh +++ b/bin/regionservers.sh @@ -59,17 +59,22 @@ if [ "$HOSTLIST" = "" ]; then fi fi -for regionserver in `cat "$HOSTLIST"`; do - if ${HBASE_SLAVE_PARALLEL:-true}; then - ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \ - 2>&1 | sed "s/^/$regionserver: /" & - else # run each command serially - ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \ - 2>&1 | sed "s/^/$regionserver: /" - fi - if [ "$HBASE_SLAVE_SLEEP" != "" ]; then - sleep $HBASE_SLAVE_SLEEP - fi -done +regionservers=`cat "$HOSTLIST"` +if [ "$regionservers" = "localhost" ]; then + "$bin"/local-regionservers.sh start 1 +else + for regionserver in `cat "$HOSTLIST"`; do + if ${HBASE_SLAVE_PARALLEL:-true}; then + ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \ + 2>&1 | sed "s/^/$regionserver: /" & + else # run each command serially + ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \ + 2>&1 | sed "s/^/$regionserver: /" + fi + if [ "$HBASE_SLAVE_SLEEP" != "" ]; then + sleep $HBASE_SLAVE_SLEEP + fi + done +fi wait http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/conf/regionservers ---------------------------------------------------------------------- diff --git a/conf/regionservers b/conf/regionservers index e69de29..2fbb50c 100644 --- a/conf/regionservers +++ b/conf/regionservers @@ -0,0 +1 @@ +localhost http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-common/src/main/resources/hbase-default.xml ---------------------------------------------------------------------- diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml index fc78b6c..711c1b9 100644 --- a/hbase-common/src/main/resources/hbase-default.xml +++ b/hbase-common/src/main/resources/hbase-default.xml @@ -569,15 +569,6 @@ possible configurations would overwhelm and obscure the important. <description>Period at which the region balancer runs in the Master.</description> </property> <property> - <name>hbase.balancer.backupMasterWeight</name> - <value>1</value> - <description>Used to control how many regions the region balancer can assign to - backup Masters, compared to normal region servers. The default value 1 means a - backup Master can host as many regions as a normal region server. The bigger the - weight, the less the regions a backup Master will host. If the weight is less than 1, - the balancer will not assign any region to any backup Master</description> - </property> - <property> <name>hbase.regions.slop</name> <value>0.2</value> <description>Rebalance if any regionserver has average + (average * slop) regions.</description> http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon index 4d49144..0dc6245 100644 --- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon +++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon @@ -24,6 +24,7 @@ java.util.*; org.apache.hadoop.hbase.ServerName; org.apache.hadoop.hbase.ClusterStatus; org.apache.hadoop.hbase.master.HMaster; +org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; </%import> <%java> Collection<ServerName> masters = null; @@ -32,7 +33,9 @@ if (master.isActiveMaster()) { ClusterStatus status = master.getClusterStatus(); masters = status.getBackupMasters(); } else{ - ServerName sn = master.getMasterAddressTracker().getMasterAddress(); + MasterAddressTracker masterAddressTracker = master.getMasterAddressTracker(); + ServerName sn = masterAddressTracker == null ? null + : masterAddressTracker.getMasterAddress(); assert sn != null : "Failed to retreive master's ServerName!"; masters = Collections.singletonList(sn); } http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheViewTmpl.jamon ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheViewTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheViewTmpl.jamon index c5002b5..523d1b9 100644 --- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheViewTmpl.jamon +++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheViewTmpl.jamon @@ -40,8 +40,8 @@ org.apache.hadoop.util.StringUtils; com.yammer.metrics.stats.Snapshot; </%import> <%java> - BlockCache bc = cacheConfig.getBlockCache(); - BlockCache [] bcs = bc.getBlockCaches(); + BlockCache bc = cacheConfig == null ? null : cacheConfig.getBlockCache(); + BlockCache [] bcs = bc == null ? null : bc.getBlockCaches(); if (bcn.equals("L1")) { bc = bcs == null || bcs.length == 0? bc: bcs[0]; } else { @@ -51,6 +51,10 @@ com.yammer.metrics.stats.Snapshot; } bc = bcs[1]; } + if (bc == null) { + System.out.println("There is no block cache"); + return; + } CachedBlocksByFile cbsbf = BlockCacheUtil.getLoadedCachedBlocksByFile(conf, bc); </%java> <%if bcv.equals("file") %><& bc_by_file; cbsbf = cbsbf; &><%else>[ <% BlockCacheUtil.toJSON(bc) %>, <% BlockCacheUtil.toJSON(cbsbf) %> ]</%if> http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index b6a4763..7f041ff 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -41,11 +41,11 @@ import javax.servlet.http.HttpServletResponse; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.CoordinatedStateException; +import org.apache.hadoop.hbase.CoordinatedStateManager; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HColumnDescriptor; @@ -53,6 +53,8 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MasterNotRunningException; +import org.apache.hadoop.hbase.MetaMigrationConvertingToPB; +import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.NamespaceNotFoundException; import org.apache.hadoop.hbase.PleaseHoldException; @@ -64,13 +66,11 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotDisabledException; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.UnknownRegionException; -import org.apache.hadoop.hbase.MetaMigrationConvertingToPB; -import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.MetaScanner; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase; import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.CoordinatedStateManager; import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.executor.ExecutorType; @@ -80,6 +80,7 @@ import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.balancer.BalancerChore; +import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer; import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore; import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory; import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; @@ -101,8 +102,8 @@ import org.apache.hadoop.hbase.monitoring.TaskMonitor; import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost; import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo; -import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.regionserver.RSRpcServices; import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy; @@ -225,6 +226,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server { // monitor for distributed procedures MasterProcedureManagerHost mpmHost; + // A flag to indicate if any table is configured to put on the active master + protected final boolean tablesOnMaster; + /** flag used in test cases in order to simulate RS failures during master initialization */ private volatile boolean initializationBeforeMetaAssignment = false; @@ -282,6 +286,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server { this.masterCheckCompression = conf.getBoolean("hbase.master.check.compression", true); this.metricsMaster = new MetricsMaster( new MetricsMasterWrapperImpl(this)); + String[] tablesOnMaster = BaseLoadBalancer.getTablesOnMaster(conf); + this.tablesOnMaster = tablesOnMaster != null && tablesOnMaster.length > 0; // Do we publish the status? boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED, @@ -344,6 +350,18 @@ public class HMaster extends HRegionServer implements MasterServices, Server { } } + /** + * If configured to put regions on active master, + * wait till a backup master becomes active. + * Otherwise, loop till the server is stopped or aborted. + */ + protected void waitForMasterActive(){ + while (!(tablesOnMaster && isActiveMaster) + && !isStopped() && !isAborted()) { + sleeper.sleep(); + } + } + @VisibleForTesting public MasterRpcServices getMasterRpcServices() { return (MasterRpcServices)rpcServices; @@ -372,7 +390,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server { protected void configureInfoServer() { infoServer.addServlet("master-status", "/master-status", MasterStatusServlet.class); infoServer.setAttribute(MASTER, this); - super.configureInfoServer(); + if (tablesOnMaster) { + super.configureInfoServer(); + } } protected Class<? extends HttpServlet> getDumpServlet() { @@ -548,10 +568,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server { this.initializationBeforeMetaAssignment = true; // Wait for regionserver to finish initialization. - synchronized (online) { - while (!isStopped() && !isOnline()) { - online.wait(100); - } + if (tablesOnMaster) { + waitForServerOnline(); } //initialize load balancer @@ -1590,6 +1608,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server { @Override public void abort(final String msg, final Throwable t) { + if (isAborted() || isStopped()) { + return; + } if (cpHost != null) { // HBASE-4014: dump a list of loaded coprocessors. LOG.fatal("Master server abort: loaded coprocessors are: " + http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java index e54b65c..d01e618 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java @@ -32,10 +32,10 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CoordinatedStateManager; import org.apache.hadoop.hbase.CoordinatedStateManagerFactory; -import org.apache.hadoop.hbase.MasterNotRunningException; -import org.apache.hadoop.hbase.ZNodeClearer; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.LocalHBaseCluster; +import org.apache.hadoop.hbase.MasterNotRunningException; +import org.apache.hadoop.hbase.ZNodeClearer; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.HBaseAdmin; @@ -154,7 +154,6 @@ public class HMasterCommandLine extends ServerCommandLine { // and regionserver both in the one JVM. if (LocalHBaseCluster.isLocal(conf)) { DefaultMetricsSystem.setMiniClusterMode(true); - conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1); final MiniZooKeeperCluster zooKeeperCluster = new MiniZooKeeperCluster(conf); File zkDataPath = new File(conf.get(HConstants.ZOOKEEPER_DATA_DIR)); int zkClientPort = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, 0); @@ -183,7 +182,7 @@ public class HMasterCommandLine extends ServerCommandLine { // Need to have the zk cluster shutdown when master is shutdown. // Run a subclass that does the zk cluster shutdown on its way out. LocalHBaseCluster cluster = new LocalHBaseCluster(conf, conf.getInt("hbase.masters", 1), - conf.getInt("hbase.regionservers", 0), LocalHMaster.class, HRegionServer.class); + conf.getInt("hbase.regionservers", 1), LocalHMaster.class, HRegionServer.class); ((LocalHMaster)cluster.getMaster(0)).setZKCluster(zooKeeperCluster); cluster.startup(); waitOnMasterThreads(cluster); @@ -208,6 +207,7 @@ public class HMasterCommandLine extends ServerCommandLine { return 0; } + @SuppressWarnings("resource") private int stopMaster() { Admin adm = null; try { http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 31874e9..830a5a2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -166,6 +166,7 @@ import com.google.protobuf.ServiceException; * Implements the master RPC services. */ @InterfaceAudience.Private +@SuppressWarnings("deprecation") public class MasterRpcServices extends RSRpcServices implements MasterService.BlockingInterface, RegionServerStatusService.BlockingInterface { protected static final Log LOG = LogFactory.getLog(MasterRpcServices.class.getName()); http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index c343585..8ff1a3c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -49,7 +49,6 @@ import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.RetriesExhaustedException; -import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer; import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler; import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; import org.apache.hadoop.hbase.monitoring.MonitoredTask; @@ -95,6 +94,7 @@ import com.google.protobuf.ServiceException; * and has completed the handling. */ @InterfaceAudience.Private +@SuppressWarnings("deprecation") public class ServerManager { public static final String WAIT_ON_REGIONSERVERS_MAXTOSTART = "hbase.master.wait.on.regionservers.maxtostart"; @@ -142,8 +142,6 @@ public class ServerManager { private final long maxSkew; private final long warningSkew; - private final boolean checkingBackupMaster; - private BaseLoadBalancer balancer; /** * Set of region servers which are dead but not processed immediately. If one @@ -203,18 +201,6 @@ public class ServerManager { maxSkew = c.getLong("hbase.master.maxclockskew", 30000); warningSkew = c.getLong("hbase.master.warningclockskew", 10000); this.connection = connect ? HConnectionManager.getConnection(c) : null; - - // Put this in constructor so we don't cast it every time - // - // We need to check if a newly added server is a backup master - // only if we are configured not to assign any region to it. - checkingBackupMaster = (master instanceof HMaster) - && ((HMaster)master).balancer instanceof BaseLoadBalancer - && (c.getInt(BaseLoadBalancer.BACKUP_MASTER_WEIGHT_KEY, - BaseLoadBalancer.DEFAULT_BACKUP_MASTER_WEIGHT) < 1); - if (checkingBackupMaster) { - balancer = (BaseLoadBalancer)((HMaster)master).balancer; - } } /** @@ -419,18 +405,6 @@ public class ServerManager { @VisibleForTesting void recordNewServerWithLock(final ServerName serverName, final ServerLoad sl) { LOG.info("Registering server=" + serverName); - if (checkingBackupMaster) { - ZooKeeperWatcher zooKeeper = master.getZooKeeper(); - String backupZNode = ZKUtil.joinZNode( - zooKeeper.backupMasterAddressesZNode, serverName.toString()); - try { - if (ZKUtil.checkExists(zooKeeper, backupZNode) != -1) { - balancer.excludeServer(serverName); - } - } catch (KeeperException e) { - master.abort("Failed to check if a new server a backup master", e); - } - } this.onlineServers.put(serverName, sl); this.rsAdmins.remove(serverName); } @@ -468,19 +442,10 @@ public class ServerManager { (double)totalLoad / (double)numServers; } - /** - * Get the count of active regionservers that are not backup - * masters. This count may not be accurate depending on timing. - * @return the count of active regionservers - */ + /** @return the count of active regionservers */ private int countOfRegionServers() { // Presumes onlineServers is a concurrent map - int count = this.onlineServers.size(); - if (balancer != null) { - count -= balancer.getExcludedServers().size(); - if (count < 0) count = 0; - } - return count; + return this.onlineServers.size(); } /** @@ -535,7 +500,7 @@ public class ServerManager { try { List<String> servers = ZKUtil.listChildrenNoWatch(zkw, zkw.rsZNode); - if (servers == null || (servers.size() == 1 + if (servers == null || servers.size() == 0 || (servers.size() == 1 && servers.contains(sn.toString()))) { LOG.info("ZK shows there is only the master self online, exiting now"); // Master could have lost some ZK events, no need to wait more. @@ -862,7 +827,6 @@ public class ServerManager { * @throws IOException * @throws RetriesExhaustedException wrapping a ConnectException if failed */ - @SuppressWarnings("deprecation") private AdminService.BlockingInterface getRsAdmin(final ServerName sn) throws IOException { AdminService.BlockingInterface admin = this.rsAdmins.get(sn); @@ -898,14 +862,12 @@ public class ServerManager { getLong(WAIT_ON_REGIONSERVERS_INTERVAL, 1500); final long timeout = this.master.getConfiguration(). getLong(WAIT_ON_REGIONSERVERS_TIMEOUT, 4500); - String[] tablesOnMaster = this.master.getConfiguration(). - getStrings("hbase.balancer.tablesOnMaster"); int defaultMinToStart = 1; - if (tablesOnMaster != null && tablesOnMaster.length > 0) { + if (((HMaster)services).tablesOnMaster) { // If we assign regions to master, we'd like to start // at least another region server so that we don't - // assign all regions to master if that region server - // doesn't come up in time. + // assign all regions to master if other region servers + // don't come up in time. defaultMinToStart = 2; } int minToStart = this.master.getConfiguration(). @@ -933,10 +895,8 @@ public class ServerManager { long lastCountChange = startTime; int count = countOfRegionServers(); int oldCount = 0; - ServerName masterSn = master.getServerName(); - boolean selfCheckedIn = isServerOnline(masterSn); - while (!this.master.isStopped() && (!selfCheckedIn || (count < maxToStart - && (lastCountChange+interval > now || timeout > slept || count < minToStart)))) { + while (!this.master.isStopped() && count < maxToStart + && (lastCountChange+interval > now || timeout > slept || count < minToStart)) { // Log some info at every interval time or if there is a change if (oldCount != count || lastLogTime+interval < now){ lastLogTime = now; @@ -944,8 +904,7 @@ public class ServerManager { "Waiting for region servers count to settle; currently"+ " checked in " + count + ", slept for " + slept + " ms," + " expecting minimum of " + minToStart + ", maximum of "+ maxToStart+ - ", timeout of "+timeout+" ms, interval of "+interval+" ms," + - " selfCheckedIn " + selfCheckedIn; + ", timeout of "+timeout+" ms, interval of "+interval+" ms."; LOG.info(msg); status.setStatus(msg); } @@ -956,8 +915,6 @@ public class ServerManager { now = System.currentTimeMillis(); slept = now - startTime; - selfCheckedIn = isServerOnline(masterSn); - oldCount = count; count = countOfRegionServers(); if (count != oldCount) { @@ -968,8 +925,7 @@ public class ServerManager { LOG.info("Finished waiting for region servers count to settle;" + " checked in " + count + ", slept for " + slept + " ms," + " expecting minimum of " + minToStart + ", maximum of "+ maxToStart+","+ - " master is "+ (this.master.isStopped() ? "stopped.": "running," + - " selfCheckedIn " + selfCheckedIn) + " master is "+ (this.master.isStopped() ? "stopped.": "running") ); } http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index 5d57707..fad18e7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -43,13 +43,14 @@ import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.RegionLoad; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.LoadBalancer; import org.apache.hadoop.hbase.master.MasterServices; -import org.apache.hadoop.hbase.master.RegionPlan; -import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.RackManager; +import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type; +import org.apache.hadoop.util.StringUtils; import com.google.common.base.Joiner; import com.google.common.collect.ArrayListMultimap; @@ -98,7 +99,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { ArrayList<String> tables; HRegionInfo[] regions; Deque<RegionLoad>[] regionLoads; - boolean[] backupMasterFlags; int activeMasterIndex = -1; int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality @@ -151,10 +151,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer { Map<ServerName, List<HRegionInfo>> clusterState, Map<String, Deque<RegionLoad>> loads, RegionLocationFinder regionFinder, - Collection<ServerName> backupMasters, Set<String> tablesOnMaster, RackManager rackManager) { - this(masterServerName, null, clusterState, loads, regionFinder, backupMasters, + this(masterServerName, null, clusterState, loads, regionFinder, tablesOnMaster, rackManager); } @@ -165,7 +164,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { Map<ServerName, List<HRegionInfo>> clusterState, Map<String, Deque<RegionLoad>> loads, RegionLocationFinder regionFinder, - Collection<ServerName> backupMasters, Set<String> tablesOnMaster, RackManager rackManager) { @@ -233,7 +231,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { regionLoads = new Deque[numRegions]; regionLocations = new int[numRegions][]; serverIndicesSortedByRegionCount = new Integer[numServers]; - backupMasterFlags = new boolean[numServers]; serverIndexToHostIndex = new int[numServers]; serverIndexToRackIndex = new int[numServers]; @@ -254,8 +251,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { if (servers[serverIndex] == null || servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) { servers[serverIndex] = entry.getKey(); - backupMasterFlags[serverIndex] = backupMasters != null - && backupMasters.contains(servers[serverIndex]); } if (regionsPerServer[serverIndex] != null) { @@ -270,11 +265,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { if (servers[serverIndex].equals(masterServerName)) { activeMasterIndex = serverIndex; - for (HRegionInfo hri: entry.getValue()) { - if (!shouldBeOnMaster(hri)) { - numUserRegionsOnMaster++; - } - } } } @@ -716,15 +706,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } } } - if (oldServer >= 0 && isActiveMaster(oldServer)) { - if (!shouldBeOnMaster(regions[region])) { - numUserRegionsOnMaster--; - } - } else if (isActiveMaster(newServer)) { - if (!shouldBeOnMaster(regions[region])) { - numUserRegionsOnMaster++; - } - } } int[] removeRegion(int[] regions, int regionIndex) { @@ -782,10 +763,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { return regionsPerServer[server].length; } - boolean isBackupMaster(int server) { - return backupMasterFlags[server]; - } - boolean isActiveMaster(int server) { return activeMasterIndex == server; } @@ -846,25 +823,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer { private static final Random RANDOM = new Random(System.currentTimeMillis()); private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class); - // The weight means that each region on the backup master is - // equal to that many regions on a normal regionserver, in calculating - // the region load by the load balancer. So that the backup master - // can host less (or equal if weight = 1) regions than normal regionservers. - // - // The weight can be used to control the number of regions on backup - // masters, which shouldn't host as many regions as normal regionservers. - // So that we don't need to move around too many regions when a - // backup master becomes the active one. - public static final String BACKUP_MASTER_WEIGHT_KEY = - "hbase.balancer.backupMasterWeight"; - public static final int DEFAULT_BACKUP_MASTER_WEIGHT = 0; - - protected int backupMasterWeight; - - // a flag to indicate if assigning regions to backup masters - protected boolean usingBackupMasters = true; - protected final Set<ServerName> excludedServers = - Collections.synchronizedSet(new HashSet<ServerName>()); + public static final String TABLES_ON_MASTER = + "hbase.balancer.tablesOnMaster"; protected final Set<String> tablesOnMaster = new HashSet<String>(); protected final MetricsBalancer metricsBalancer = new MetricsBalancer(); @@ -872,6 +832,17 @@ public abstract class BaseLoadBalancer implements LoadBalancer { protected ServerName masterServerName; protected MasterServices services; + public static String[] getTablesOnMaster(Configuration conf) { + String valueString = conf.get(TABLES_ON_MASTER); + if (valueString != null) { + valueString = valueString.trim(); + } + if (valueString == null || valueString.equalsIgnoreCase("none")) { + return null; + } + return StringUtils.getStrings(valueString); + } + @Override public void setConf(Configuration conf) { setSlop(conf); @@ -879,16 +850,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer { else if (slop > 1) slop = 1; this.config = conf; - backupMasterWeight = conf.getInt( - BACKUP_MASTER_WEIGHT_KEY, DEFAULT_BACKUP_MASTER_WEIGHT); - if (backupMasterWeight < 1) { - usingBackupMasters = false; - LOG.info("Backup master won't host any region since " - + BACKUP_MASTER_WEIGHT_KEY + " is " + backupMasterWeight - + "(<1)"); - } - String[] tables = conf.getStrings("hbase.balancer.tablesOnMaster"); - if (tables != null) { + String[] tables = getTablesOnMaster(conf); + if (tables != null && tables.length > 0) { Collections.addAll(tablesOnMaster, tables); } this.rackManager = new RackManager(getConf()); @@ -900,23 +863,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } /** - * If there is any server excluded, filter it out from the cluster map so - * we won't assign any region to it, assuming none's already assigned there. - */ - protected void filterExcludedServers(Map<ServerName, List<HRegionInfo>> clusterMap) { - if (excludedServers.isEmpty()) { // No server to filter out - return; - } - Iterator<Map.Entry<ServerName, List<HRegionInfo>>> it = clusterMap.entrySet().iterator(); - while (it.hasNext()) { - Map.Entry<ServerName, List<HRegionInfo>> en = it.next(); - if (excludedServers.contains(en.getKey()) && en.getValue().isEmpty()) { - it.remove(); - } - } - } - - /** * Check if a region belongs to some small system table. * If so, it may be expected to be put on the master regionserver. */ @@ -973,14 +919,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { return plans; } - public void excludeServer(ServerName serverName) { - if (!usingBackupMasters) excludedServers.add(serverName); - } - - public Set<ServerName> getExcludedServers() { - return excludedServers; - } - @Override public Configuration getConf() { return this.config; @@ -989,20 +927,12 @@ public abstract class BaseLoadBalancer implements LoadBalancer { @Override public void setClusterStatus(ClusterStatus st) { this.clusterStatus = st; - if (st == null || usingBackupMasters) return; - - // Not assign any region to backup masters. - // Put them on the excluded server list. - // Assume there won't be too much backup masters - // re/starting, so this won't leak much memory. - excludedServers.addAll(st.getBackupMasters()); regionFinder.setClusterStatus(st); } @Override public void setMasterServices(MasterServices masterServices) { masterServerName = masterServices.getServerName(); - excludedServers.remove(masterServerName); this.services = masterServices; this.regionFinder.setServices(masterServices); } @@ -1011,13 +941,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer { this.rackManager = rackManager; } - protected Collection<ServerName> getBackupMasters() { - return clusterStatus == null ? null : clusterStatus.getBackupMasters(); - } - protected boolean needsBalance(Cluster c) { ClusterLoadState cs = new ClusterLoadState( - masterServerName, getBackupMasters(), backupMasterWeight, c.clusterState); + masterServerName, c.clusterState); if (cs.getNumServers() < MIN_SERVER_BALANCE) { if (LOG.isDebugEnabled()) { LOG.debug("Not running balancer because only " + cs.getNumServers() @@ -1036,9 +962,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer { if (LOG.isTraceEnabled()) { // If nothing to balance, then don't say anything unless trace-level logging. LOG.trace("Skipping load balancing because balanced cluster; " + - "servers=" + cs.getNumServers() + "(backupMasters=" + cs.getNumBackupMasters() + - ") regions=" + cs.getNumRegions() + " average=" + average + " " + - "mostloaded=" + serversByLoad.lastKey().getLoad() + + "servers=" + cs.getNumServers() + + " regions=" + cs.getNumRegions() + " average=" + average + + " mostloaded=" + serversByLoad.lastKey().getLoad() + " leastloaded=" + serversByLoad.firstKey().getLoad()); } return false; @@ -1082,10 +1008,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer { return null; } - List<ServerName> backupMasters = normalizeServers(servers); int numServers = servers == null ? 0 : servers.size(); - int numBackupMasters = backupMasters == null ? 0 : backupMasters.size(); - if (numServers == 0 && numBackupMasters == 0) { + if (numServers == 0) { LOG.warn("Wanted to do round robin assignment but no servers to assign to"); return null; } @@ -1096,40 +1020,22 @@ public abstract class BaseLoadBalancer implements LoadBalancer { // and balanced. This should also run fast with fewer number of iterations. Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>(); - if (numServers + numBackupMasters == 1) { // Only one server, nothing fancy we can do here - ServerName server = numServers > 0 ? servers.get(0) : backupMasters.get(0); + if (numServers == 1) { // Only one server, nothing fancy we can do here + ServerName server = servers.get(0); assignments.put(server, new ArrayList<HRegionInfo>(regions)); return assignments; } List<HRegionInfo> masterRegions = null; - if (numServers > 0 && servers.contains(masterServerName)) { + if (servers.contains(masterServerName)) { masterRegions = new ArrayList<HRegionInfo>(); - if (numServers == 1) { - // The only server in servers is the master, - // Assign all regions to backup masters - numServers = 0; - } } - Cluster cluster = createCluster(servers, regions, backupMasters, tablesOnMaster); + Cluster cluster = createCluster(servers, regions, tablesOnMaster); List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>(); - int total = regions.size(); - // Get the number of regions to be assigned - // to backup masters based on the weight - int numRegions = usingBackupMasters ? total * numBackupMasters - / (numServers * backupMasterWeight + numBackupMasters) : 0; - if (numRegions > 0) { - // backupMasters can't be null, according to the formula, numBackupMasters != 0 - roundRobinAssignment(cluster, regions, unassignedRegions, 0, - numRegions, backupMasters, masterRegions, assignments); - } - int remainder = total - numRegions; - if (remainder > 0) { - // servers can't be null, or contains the master only since numServers != 0 - roundRobinAssignment(cluster, regions, unassignedRegions, numRegions, remainder, - servers, masterRegions, assignments); - } + roundRobinAssignment(cluster, regions, unassignedRegions, + servers, masterRegions, assignments); + if (masterRegions != null && !masterRegions.isEmpty()) { assignments.put(masterServerName, masterRegions); for (HRegionInfo r : masterRegions) { @@ -1166,16 +1072,12 @@ public abstract class BaseLoadBalancer implements LoadBalancer { // just sprinkle the rest of the regions on random regionservers. The balanceCluster will // make it optimal later. we can end up with this if numReplicas > numServers. for (HRegionInfo region : lastFewRegions) { - ServerName server = null; - if (numServers == 0) { - // select from backup masters - int i = RANDOM.nextInt(backupMasters.size()); - server = backupMasters.get(i); - } else { - do { - int i = RANDOM.nextInt(numServers); - server = servers.get(i); - } while (numServers > 1 && server.equals(masterServerName)); + int i = RANDOM.nextInt(numServers); + ServerName server = servers.get(i); + if (server.equals(masterServerName)) { + // Try to avoid master for a user region + i = (i == 0 ? 1 : i - 1); + server = servers.get(i); } List<HRegionInfo> serverRegions = assignments.get(server); if (serverRegions == null) { @@ -1189,7 +1091,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } protected Cluster createCluster(List<ServerName> servers, - Collection<HRegionInfo> regions, List<ServerName> backupMasters, Set<String> tablesOnMaster) { + Collection<HRegionInfo> regions, Set<String> tablesOnMaster) { // Get the snapshot of the current assignments for the regions in question, and then create // a cluster out of it. Note that we might have replicas already assigned to some servers // earlier. So we want to get the snapshot to see those assignments, but this will only contain @@ -1201,7 +1103,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { clusterState.put(server, EMPTY_REGION_LIST); } } - return new Cluster(masterServerName, regions, clusterState, null, this.regionFinder, backupMasters, + return new Cluster(masterServerName, regions, clusterState, null, this.regionFinder, tablesOnMaster, rackManager); } @@ -1244,15 +1146,22 @@ public abstract class BaseLoadBalancer implements LoadBalancer { @Override public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) { metricsBalancer.incrMiscInvocations(); - if (servers == null || servers.isEmpty()) { - LOG.warn("Wanted to do random assignment but no servers to assign to"); + int numServers = servers == null ? 0 : servers.size(); + if (numServers == 0) { + LOG.warn("Wanted to do retain assignment but no servers to assign to"); return null; } - List<ServerName> backupMasters = normalizeServers(servers); - List<HRegionInfo> regions = Lists.newArrayList(regionInfo); - Cluster cluster = createCluster(servers, regions, backupMasters, tablesOnMaster); + if (numServers == 1) { // Only one server, nothing fancy we can do here + return servers.get(0); + } + if (shouldBeOnMaster(regionInfo) + && servers.contains(masterServerName)) { + return masterServerName; + } - return randomAssignment(cluster, regionInfo, servers, backupMasters); + List<HRegionInfo> regions = Lists.newArrayList(regionInfo); + Cluster cluster = createCluster(servers, regions, tablesOnMaster); + return randomAssignment(cluster, regionInfo, servers); } /** @@ -1281,16 +1190,14 @@ public abstract class BaseLoadBalancer implements LoadBalancer { return null; } - List<ServerName> backupMasters = normalizeServers(servers); int numServers = servers == null ? 0 : servers.size(); - int numBackupMasters = backupMasters == null ? 0 : backupMasters.size(); - if (numServers == 0 && numBackupMasters == 0) { + if (numServers == 0) { LOG.warn("Wanted to do retain assignment but no servers to assign to"); return null; } Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>(); - if (numServers + numBackupMasters == 1) { // Only one server, nothing fancy we can do here - ServerName server = numServers > 0 ? servers.get(0) : backupMasters.get(0); + if (numServers == 1) { // Only one server, nothing fancy we can do here + ServerName server = servers.get(0); assignments.put(server, new ArrayList<HRegionInfo>(regions.keySet())); return assignments; } @@ -1308,11 +1215,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { serversByHostname.put(server.getHostname(), server); } } - if (numBackupMasters > 0) { - for (ServerName server : backupMasters) { - assignments.put(server, new ArrayList<HRegionInfo>()); - } - } // Collection of the hostnames that used to have regions // assigned, but for which we no longer have any RS running @@ -1325,7 +1227,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { int numRandomAssignments = 0; int numRetainedAssigments = 0; - Cluster cluster = createCluster(servers, regions.keySet(), backupMasters, tablesOnMaster); + Cluster cluster = createCluster(servers, regions.keySet(), tablesOnMaster); for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) { HRegionInfo region = entry.getKey(); @@ -1344,7 +1246,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } else if (localServers.isEmpty()) { // No servers on the new cluster match up with this hostname, // assign randomly. - ServerName randomServer = randomAssignment(cluster, region, servers, backupMasters); + ServerName randomServer = randomAssignment(cluster, region, servers); assignments.get(randomServer).add(region); numRandomAssignments++; if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname()); @@ -1368,7 +1270,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } } if (target == null) { - target = randomAssignment(cluster, region, localServers, backupMasters); + target = randomAssignment(cluster, region, localServers); } assignments.get(target).add(region); } @@ -1414,78 +1316,22 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } /** - * Prepare the list of target regionservers so that it doesn't - * contain any excluded server, or backup master. Those backup masters - * used to be in the original list are returned. - */ - private List<ServerName> normalizeServers(List<ServerName> servers) { - if (servers == null) { - return null; - } - if (!excludedServers.isEmpty()) { - servers.removeAll(excludedServers); - } - Collection<ServerName> allBackupMasters = getBackupMasters(); - List<ServerName> backupMasters = null; - if (allBackupMasters != null && !allBackupMasters.isEmpty()) { - for (ServerName server: allBackupMasters) { - if (!servers.contains(server)) { - // Ignore backup masters not included - continue; - } - servers.remove(server); - if (backupMasters == null) { - backupMasters = new ArrayList<ServerName>(); - } - backupMasters.add(server); - } - } - return backupMasters; - } - - /** - * Used to assign a single region to a random server. The input should - * have been already normalized: 1) servers doesn't include any exclude sever, - * 2) servers doesn't include any backup master, 3) backupMasters contains - * only backup masters that are intended to host this region, i.e, it - * may not have all the backup masters. + * Used to assign a single region to a random server. */ private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo, - List<ServerName> servers, List<ServerName> backupMasters) { - int numServers = servers == null ? 0 : servers.size(); - int numBackupMasters = backupMasters == null ? 0 : backupMasters.size(); - if (numServers == 0 && numBackupMasters == 0) { - LOG.warn("Wanted to do random assignment but no servers to assign to"); - return null; - } - if (servers != null && shouldBeOnMaster(regionInfo) - && servers.contains(masterServerName)) { - return masterServerName; - } + List<ServerName> servers) { + int numServers = servers.size(); // servers is not null, numServers > 1 ServerName sn = null; - final int maxIterations = servers.size() * 4; + final int maxIterations = numServers * 4; int iterations = 0; do { - // Generate a random number weighted more towards - // regular regionservers instead of backup masters. - // This formula is chosen for simplicity. - int i = RANDOM.nextInt( - numBackupMasters + numServers * backupMasterWeight); - if (i < numBackupMasters) { - sn = backupMasters.get(i); - continue; - } - i = (i - numBackupMasters)/backupMasterWeight; + int i = RANDOM.nextInt(numServers); sn = servers.get(i); if (sn.equals(masterServerName)) { // Try to avoid master for a user region - if (numServers > 1) { - i = (i == 0 ? 1 : i - 1); - sn = servers.get(i); - } else if (numBackupMasters > 0) { - sn = backupMasters.get(0); - } + i = (i == 0 ? 1 : i - 1); + sn = servers.get(i); } } while (cluster.wouldLowerAvailability(regionInfo, sn) && iterations++ < maxIterations); @@ -1494,12 +1340,11 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } /** - * Round robin a chunk of a list of regions to a list of servers + * Round robin a list of regions to a list of servers */ private void roundRobinAssignment(Cluster cluster, List<HRegionInfo> regions, - List<HRegionInfo> unassignedRegions, int offset, - int numRegions, List<ServerName> servers, List<HRegionInfo> masterRegions, - Map<ServerName, List<HRegionInfo>> assignments) { + List<HRegionInfo> unassignedRegions, List<ServerName> servers, + List<HRegionInfo> masterRegions, Map<ServerName, List<HRegionInfo>> assignments) { boolean masterIncluded = servers.contains(masterServerName); int numServers = servers.size(); @@ -1507,6 +1352,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { if (masterIncluded) { skipServers--; } + int numRegions = regions.size(); int max = (int) Math.ceil((float) numRegions / skipServers); int serverIdx = 0; if (numServers > 1) { @@ -1523,7 +1369,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max); for (int i = regionIdx; i < numRegions; i += skipServers) { - HRegionInfo region = regions.get(offset + i % numRegions); + HRegionInfo region = regions.get(i % numRegions); if (masterRegions == null || !shouldBeOnMaster(region)) { if (cluster.wouldLowerAvailability(region, server)) { unassignedRegions.add(region); http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterLoadState.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterLoadState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterLoadState.java index d1e6beb..e7fbc4a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterLoadState.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterLoadState.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hbase.master.balancer; -import java.util.Collection; import java.util.List; import java.util.Map; import java.util.NavigableMap; @@ -35,12 +34,9 @@ public class ClusterLoadState { private boolean emptyRegionServerPresent = false; private int numRegions = 0; private int numServers = 0; - private int numBackupMasters = 0; - private int backupMasterWeight; - public ClusterLoadState(ServerName master, Collection<ServerName> backupMasters, - int backupMasterWeight, Map<ServerName, List<HRegionInfo>> clusterState) { - this.backupMasterWeight = backupMasterWeight; + public ClusterLoadState(ServerName master, + Map<ServerName, List<HRegionInfo>> clusterState) { this.numRegions = 0; this.numServers = clusterState.size(); this.clusterState = clusterState; @@ -56,10 +52,6 @@ public class ClusterLoadState { int sz = regions.size(); if (sz == 0) emptyRegionServerPresent = true; numRegions += sz; - if (backupMasters != null && backupMasters.contains(server.getKey())) { - sz *= backupMasterWeight; - numBackupMasters++; - } serversByLoad.put(new ServerAndLoad(server.getKey(), sz), regions); } } @@ -84,12 +76,8 @@ public class ClusterLoadState { return numServers; } - int getNumBackupMasters() { - return numBackupMasters; - } - float getLoadAverage() { - return numRegions / (numServers - numBackupMasters * (1 - 1.0f/backupMasterWeight)); + return (float) numRegions / numServers; } int getMaxLoad() { http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java index 6225f6c..fb269ac 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.master.balancer; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -187,17 +186,14 @@ public class SimpleLoadBalancer extends BaseLoadBalancer { if (regionsToReturn != null) { return regionsToReturn; } - filterExcludedServers(clusterMap); boolean emptyRegionServerPresent = false; long startTime = System.currentTimeMillis(); - Collection<ServerName> backupMasters = getBackupMasters(); - ClusterLoadState cs = new ClusterLoadState(masterServerName, - backupMasters, backupMasterWeight, clusterMap); + ClusterLoadState cs = new ClusterLoadState(masterServerName, clusterMap); // construct a Cluster object with clusterMap and rest of the // argument as defaults Cluster c = new Cluster(masterServerName, clusterMap, null, this.regionFinder, - getBackupMasters(), tablesOnMaster, this.rackManager); + tablesOnMaster, this.rackManager); if (!this.needsBalance(c)) return null; int numServers = cs.getNumServers(); @@ -210,9 +206,7 @@ public class SimpleLoadBalancer extends BaseLoadBalancer { // Using to check balance result. StringBuilder strBalanceParam = new StringBuilder(); strBalanceParam.append("Balance parameter: numRegions=").append(numRegions) - .append(", numServers=").append(numServers).append(", numBackupMasters=") - .append(cs.getNumBackupMasters()).append(", backupMasterWeight=") - .append(backupMasterWeight).append(", max=").append(max) + .append(", numServers=").append(numServers).append(", max=").append(max) .append(", min=").append(min); LOG.debug(strBalanceParam.toString()); @@ -238,11 +232,7 @@ public class SimpleLoadBalancer extends BaseLoadBalancer { } serversOverloaded++; List<HRegionInfo> regions = server.getValue(); - int w = 1; // Normal region server has weight 1 - if (backupMasters != null && backupMasters.contains(sal.getServerName())) { - w = backupMasterWeight; // Backup master has heavier weight - } - int numToOffload = Math.min((load - max) / w, regions.size()); + int numToOffload = Math.min(load - max, regions.size()); // account for the out-of-band regions which were assigned to this server // after some other region server crashed Collections.sort(regions, riComparator); @@ -282,12 +272,7 @@ public class SimpleLoadBalancer extends BaseLoadBalancer { if (load >= min && load > 0) { continue; // look for other servers which haven't reached min } - int w = 1; // Normal region server has weight 1 - if (backupMasters != null - && backupMasters.contains(server.getKey().getServerName())) { - w = backupMasterWeight; // Backup master has heavier weight - } - int regionsToPut = (min - load) / w; + int regionsToPut = min - load; if (regionsToPut == 0) { regionsToPut = 1; http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 854e6a2..6f564e0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -157,7 +157,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf); costFunctions = new CostFunction[]{ - new RegionCountSkewCostFunction(conf, backupMasterWeight), + new RegionCountSkewCostFunction(conf), new MoveCostFunction(conf), localityCost, new TableSkewCostFunction(conf), @@ -211,12 +211,11 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { if (plans != null) { return plans; } - filterExcludedServers(clusterState); //The clusterState that is given to this method contains the state //of all the regions in the table(s) (that's true today) // Keep track of servers to iterate through them. Cluster cluster = new Cluster(masterServerName, - clusterState, loads, regionFinder, getBackupMasters(), tablesOnMaster, rackManager); + clusterState, loads, regionFinder, tablesOnMaster, rackManager); if (!needsBalance(cluster)) { return null; } @@ -329,7 +328,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { loads = new HashMap<String, Deque<RegionLoad>>(); for (ServerName sn : clusterStatus.getServers()) { - if (excludedServers.contains(sn)) continue; ServerLoad sl = clusterStatus.getLoad(sn); if (sl == null) { continue; @@ -438,7 +436,10 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { } protected int pickOtherRandomServer(Cluster cluster, int serverIndex) { - if (cluster.numServers <= 2) { + if (cluster.numServers < 2) { + return -1; + } + if (cluster.activeMasterIndex != -1 && cluster.numServers == 2) { return -1; } while (true) { @@ -528,10 +529,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { private int pickLeastLoadedServer(final Cluster cluster, int thisServer) { Integer[] servers = cluster.serverIndicesSortedByRegionCount; - if (servers.length <= 2) { - return -1; - } - int index = 0; while (servers[index] == null || servers[index] == thisServer || cluster.isActiveMaster(index)) { @@ -584,6 +581,10 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { // Pick the server with the highest locality int otherServer = pickHighestLocalityServer(cluster, thisServer, thisRegion); + if (otherServer == -1) { + return Cluster.NullAction; + } + // pick an region on the other server to potentially swap int otherRegion = this.pickRandomRegion(cluster, otherServer, 0.5f); @@ -800,7 +801,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { double total = getSum(stats); double count = stats.length; - if (stats.length > 1 && cluster.masterServerName != null) { + if (stats.length > 1 && cluster.activeMasterIndex != -1) { count--; // Exclude the active master } double mean = total/count; @@ -901,14 +902,12 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { "hbase.master.balancer.stochastic.regionCountCost"; private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500; - private double backupMasterWeight; private double[] stats = null; - RegionCountSkewCostFunction(Configuration conf, double backupMasterWeight) { + RegionCountSkewCostFunction(Configuration conf) { super(conf); // Load multiplier should be the greatest as it is the most general way to balance data. this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST)); - this.backupMasterWeight = backupMasterWeight; } @Override @@ -919,11 +918,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { for (int i =0; i < cluster.numServers; i++) { stats[i] = cluster.regionsPerServer[i].length; - // Use some weight on regions assigned to active/backup masters, - // so that they won't carry as many regions as normal regionservers. - if (cluster.isBackupMaster(i)) { - stats[i] *= backupMasterWeight; - } } return costFromArray(stats); } http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index de52bf2..9fc872a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -52,7 +52,6 @@ import javax.servlet.http.HttpServlet; import org.apache.commons.lang.math.RandomUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -64,6 +63,7 @@ import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HealthCheckChore; +import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.ServerName; @@ -72,7 +72,7 @@ import org.apache.hadoop.hbase.TableDescriptors; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.YouAreDeadException; import org.apache.hadoop.hbase.ZNodeClearer; -import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.ConnectionUtils; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; @@ -319,7 +319,7 @@ public class HRegionServer extends HasThread implements LogRoller metaHLogRoller; // flag set after we're done setting up server threads - protected AtomicBoolean online; + final AtomicBoolean online = new AtomicBoolean(false); // zookeeper connection and watcher protected ZooKeeperWatcher zooKeeper; @@ -341,7 +341,7 @@ public class HRegionServer extends HasThread implements private final RegionServerAccounting regionServerAccounting; // Cache configuration and block cache reference - final CacheConfig cacheConfig; + protected CacheConfig cacheConfig; /** The health check chore. */ private HealthCheckChore healthCheckChore; @@ -436,7 +436,6 @@ public class HRegionServer extends HasThread implements this.fsOk = true; this.conf = conf; checkCodecs(this.conf); - this.online = new AtomicBoolean(false); this.userProvider = UserProvider.instantiate(conf); FSUtils.setupShortCircuitRead(this.conf); @@ -473,7 +472,6 @@ public class HRegionServer extends HasThread implements login(userProvider, hostName); regionServerAccounting = new RegionServerAccounting(); - cacheConfig = new CacheConfig(conf); uncaughtExceptionHandler = new UncaughtExceptionHandler() { @Override public void uncaughtException(Thread t, Throwable e) { @@ -527,6 +525,9 @@ public class HRegionServer extends HasThread implements "hbase.regionserver.kerberos.principal", host); } + protected void waitForMasterActive(){ + } + protected String getProcessName() { return REGIONSERVER; } @@ -594,8 +595,26 @@ public class HRegionServer extends HasThread implements */ private void preRegistrationInitialization(){ try { + synchronized (this) { + if (shortCircuitConnection == null) { + shortCircuitConnection = createShortCircuitConnection(); + metaTableLocator = new MetaTableLocator(); + } + } + + // Health checker thread. + if (isHealthCheckerConfigured()) { + int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ, + HConstants.DEFAULT_THREAD_WAKE_FREQUENCY); + healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration()); + } + this.pauseMonitor = new JvmPauseMonitor(conf); + pauseMonitor.start(); + initializeZooKeeper(); - initializeThreads(); + if (!isStopped() && !isAborted()) { + initializeThreads(); + } } catch (Throwable t) { // Call stop if error or process will stick around for ever since server // puts up non-daemon threads. @@ -616,8 +635,6 @@ public class HRegionServer extends HasThread implements // Create the master address tracker, register with zk, and start it. Then // block until a master is available. No point in starting up if no master // running. - this.masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this); - this.masterAddressTracker.start(); blockAndCheckIfStopped(this.masterAddressTracker); // Wait on cluster being up. Master will set this flag up in zookeeper @@ -637,11 +654,13 @@ public class HRegionServer extends HasThread implements this.abort("Failed to retrieve Cluster ID",e); } - synchronized (this) { - if (shortCircuitConnection == null) { - shortCircuitConnection = createShortCircuitConnection(); - metaTableLocator = new MetaTableLocator(); - } + // In case colocated master, wait here till it's active. + // So backup masters won't start as regionservers. + // This is to avoid showing backup masters as regionservers + // in master web UI, or assigning any region to them. + waitForMasterActive(); + if (isStopped() || isAborted()) { + return; // No need for further initialization } // watch for snapshots and other procedures @@ -690,13 +709,6 @@ public class HRegionServer extends HasThread implements // in a while. It will take care of not checking too frequently on store-by-store basis. this.compactionChecker = new CompactionChecker(this, this.threadWakeFrequency, this); this.periodicFlusher = new PeriodicMemstoreFlusher(this.threadWakeFrequency, this); - // Health checker thread. - int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ, - HConstants.DEFAULT_THREAD_WAKE_FREQUENCY); - if (isHealthCheckerConfigured()) { - healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration()); - } - this.leases = new Leases(this.threadWakeFrequency); // Create the thread to clean the moved regions list @@ -710,8 +722,6 @@ public class HRegionServer extends HasThread implements // Setup RPC client for master communication rpcClient = new RpcClient(conf, clusterId, new InetSocketAddress( rpcServices.isa.getAddress(), 0)); - this.pauseMonitor = new JvmPauseMonitor(conf); - pauseMonitor.start(); int storefileRefreshPeriod = conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD , StorefileRefresherChore.DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD); @@ -827,7 +837,7 @@ public class HRegionServer extends HasThread implements } } // Send cache a shutdown. - if (cacheConfig.isBlockCacheEnabled()) { + if (cacheConfig != null && cacheConfig.isBlockCacheEnabled()) { cacheConfig.getBlockCache().shutdown(); } @@ -927,6 +937,7 @@ public class HRegionServer extends HasThread implements try { deleteMyEphemeralNode(); + } catch (KeeperException.NoNodeException nn) { } catch (KeeperException e) { LOG.warn("Failed deleting my ephemeral node", e); } @@ -1172,6 +1183,7 @@ public class HRegionServer extends HasThread implements // Save it in a file, this will allow to see if we crash ZNodeClearer.writeMyEphemeralNodeOnDisk(getMyEphemeralNodePath()); + this.cacheConfig = new CacheConfig(conf); this.hlog = setupWALAndReplication(); // Init in here rather than in constructor after thread name has been set this.metricsRegionServer = new MetricsRegionServer(new MetricsRegionServerWrapperImpl(this)); @@ -1182,6 +1194,8 @@ public class HRegionServer extends HasThread implements ", RpcServer on " + rpcServices.isa + ", sessionid=0x" + Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId())); + + // Wake up anyone waiting for this server to online synchronized (online) { online.set(true); online.notifyAll(); @@ -1566,10 +1580,6 @@ public class HRegionServer extends HasThread implements } } - // Start Server. This service is like leases in that it internally runs - // a thread. - rpcServices.rpcServer.start(); - // Create the log splitting worker and start it // set a smaller retries to fast fail otherwise splitlogworker could be blocked for // quite a while inside HConnection layer. The worker won't be available for other @@ -1697,8 +1707,15 @@ public class HRegionServer extends HasThread implements } public void waitForServerOnline(){ - while (!isOnline() && !isStopped()){ - sleeper.sleep(); + while (!isStopped() && !isOnline()) { + synchronized (online) { + try { + online.wait(msgInterval); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + break; + } + } } } @@ -1941,12 +1958,11 @@ public class HRegionServer extends HasThread implements } ServerName sn = null; long previousLogTime = 0; - RegionServerStatusService.BlockingInterface master = null; boolean refresh = false; // for the first time, use cached data RegionServerStatusService.BlockingInterface intf = null; boolean interrupted = false; try { - while (keepLooping() && master == null) { + while (keepLooping()) { sn = this.masterAddressTracker.getMasterAddress(refresh); if (sn == null) { if (!keepLooping()) { http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java index b0a69f8..83b9fb1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java @@ -25,12 +25,10 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.HasThread; import java.util.ConcurrentModificationException; -import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Delayed; -import java.util.concurrent.DelayQueue; import java.util.concurrent.TimeUnit; import java.io.IOException; http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index 98f961f..732f3d4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -18,14 +18,25 @@ */ package org.apache.hadoop.hbase.regionserver; -import com.google.protobuf.ByteString; -import com.google.protobuf.Message; -import com.google.protobuf.RpcController; -import com.google.protobuf.ServiceException; -import com.google.protobuf.TextFormat; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellScannable; @@ -39,11 +50,12 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.UnknownScannerException; -import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Append; import org.apache.hadoop.hbase.client.ConnectionUtils; import org.apache.hadoop.hbase.client.Delete; @@ -56,6 +68,7 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.coordination.CloseRegionCoordination; +import org.apache.hadoop.hbase.coordination.OpenRegionCoordination; import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException; import org.apache.hadoop.hbase.exceptions.OperationConflictException; import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException; @@ -134,7 +147,6 @@ import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.RequestHeader; import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor; import org.apache.hadoop.hbase.regionserver.HRegion.Operation; import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException; -import org.apache.hadoop.hbase.coordination.OpenRegionCoordination; import org.apache.hadoop.hbase.regionserver.handler.OpenMetaHandler; import org.apache.hadoop.hbase.regionserver.handler.OpenRegionHandler; import org.apache.hadoop.hbase.regionserver.wal.HLog; @@ -151,27 +163,17 @@ import org.apache.hadoop.hbase.zookeeper.ZKSplitLog; import org.apache.hadoop.net.DNS; import org.apache.zookeeper.KeeperException; -import java.io.IOException; -import java.io.InterruptedIOException; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.net.InetSocketAddress; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.NavigableMap; -import java.util.Set; -import java.util.TreeSet; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicLong; +import com.google.protobuf.ByteString; +import com.google.protobuf.Message; +import com.google.protobuf.RpcController; +import com.google.protobuf.ServiceException; +import com.google.protobuf.TextFormat; /** * Implements the regionserver RPC services. */ @InterfaceAudience.Private +@SuppressWarnings("deprecation") public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.BlockingInterface, ClientService.BlockingInterface, PriorityFunction { protected static final Log LOG = LogFactory.getLog(RSRpcServices.class); @@ -1129,7 +1131,6 @@ public class RSRpcServices implements HBaseRPCErrorHandler, * @throws ServiceException */ @Override - @SuppressWarnings("deprecation") public GetServerInfoResponse getServerInfo(final RpcController controller, final GetServerInfoRequest request) throws ServiceException { try { @@ -1224,7 +1225,6 @@ public class RSRpcServices implements HBaseRPCErrorHandler, * @throws ServiceException */ @Override - @SuppressWarnings("deprecation") @QosPriority(priority=HConstants.HIGH_QOS) public OpenRegionResponse openRegion(final RpcController controller, final OpenRegionRequest request) throws ServiceException { @@ -1265,7 +1265,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, try { while (System.currentTimeMillis() <= endTime && !regionServer.isStopped() && !regionServer.isOnline()) { - regionServer.online.wait(100); + regionServer.online.wait(regionServer.msgInterval); } checkOpen(); } catch (InterruptedException t) { http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java index f038ed3..0b8846c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java @@ -23,15 +23,13 @@ import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.List; import java.util.NavigableMap; -import java.util.NavigableSet; import java.util.TreeMap; -import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.Abortable; +import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo; @@ -54,12 +52,12 @@ public class RegionServerTracker extends ZooKeeperListener { private NavigableMap<ServerName, RegionServerInfo> regionServers = new TreeMap<ServerName, RegionServerInfo>(); private ServerManager serverManager; - private Abortable abortable; + private Server server; public RegionServerTracker(ZooKeeperWatcher watcher, - Abortable abortable, ServerManager serverManager) { + Server server, ServerManager serverManager) { super(watcher); - this.abortable = abortable; + this.server = server; this.serverManager = serverManager; } @@ -133,15 +131,16 @@ public class RegionServerTracker extends ZooKeeperListener { @Override public void nodeChildrenChanged(String path) { - if (path.equals(watcher.rsZNode)) { + if (path.equals(watcher.rsZNode) + && !server.isAborted() && !server.isStopped()) { try { List<String> servers = ZKUtil.listChildrenAndWatchThem(watcher, watcher.rsZNode); add(servers); } catch (IOException e) { - abortable.abort("Unexpected zk exception getting RS nodes", e); + server.abort("Unexpected zk exception getting RS nodes", e); } catch (KeeperException e) { - abortable.abort("Unexpected zk exception getting RS nodes", e); + server.abort("Unexpected zk exception getting RS nodes", e); } } } http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index 48b5612..cec27df 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -48,12 +48,12 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.impl.Jdk14Logger; import org.apache.commons.logging.impl.Log4JLogger; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Waiter.Predicate; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; @@ -76,6 +76,7 @@ import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.ChecksumUtil; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.ipc.RpcServerInterface; +import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.mapreduce.MapreduceTestingShim; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.RegionStates; @@ -1473,6 +1474,7 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility { /** * Modify a table, synchronous. Waiting logic similar to that of {@code admin.rb#alter_status}. */ + @SuppressWarnings("serial") public static void modifyTableSync(Admin admin, HTableDescriptor desc) throws IOException, InterruptedException { admin.modifyTable(desc.getTableName(), desc); @@ -2832,6 +2834,8 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility { } } catch (RegionServerStoppedException e) { // That's fine. + } catch (ServerNotRunningYetException e) { + // That's fine. } } return online; http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java index e7b748e..c9a2bb0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java @@ -602,11 +602,6 @@ public class TestAdmin { } regs.add(entry.getKey()); } - if (numRS >= 2) { - // Ignore the master region server, - // which contains less regions by intention. - numRS--; - } float average = (float) expectedRegions/numRS; int min = (int)Math.floor(average); int max = (int)Math.ceil(average); http://git-wip-us.apache.org/repos/asf/hbase/blob/e7c610b9/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java index 69becbd..a38cb1f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java @@ -4144,7 +4144,7 @@ public class TestFromClientSide { // HBaseAdmin and can connect to the new master; HBaseAdmin newAdmin = new HBaseAdmin(conn); assertTrue(newAdmin.tableExists(tableName)); - assertTrue(newAdmin.getClusterStatus().getServersSize() == SLAVES + 1); + assertTrue(newAdmin.getClusterStatus().getServersSize() == SLAVES); } @Test
