http://git-wip-us.apache.org/repos/asf/hbase/blob/c1293cc9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index af05c16..970744d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -83,6 +83,8 @@ import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.exceptions.MergeRegionException; import org.apache.hadoop.hbase.executor.ExecutorType; +import org.apache.hadoop.hbase.favored.FavoredNodesManager; +import org.apache.hadoop.hbase.favored.FavoredNodesPromoter; import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils; import org.apache.hadoop.hbase.ipc.RpcServer; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; @@ -91,7 +93,6 @@ import org.apache.hadoop.hbase.master.balancer.BalancerChore; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer; import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore; import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory; -import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer; import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; import org.apache.hadoop.hbase.master.cleaner.LogCleaner; import org.apache.hadoop.hbase.master.cleaner.ReplicationMetaCleaner; @@ -365,6 +366,9 @@ public class HMaster extends HRegionServer implements MasterServices { /** flag used in test cases in order to simulate RS failures during master initialization */ private volatile boolean initializationBeforeMetaAssignment = false; + /* Handle favored nodes information */ + private FavoredNodesManager favoredNodesManager; + /** jetty server for master to redirect requests to regionserver infoServer */ private org.mortbay.jetty.Server masterJettyServer; @@ -749,6 +753,9 @@ public class HMaster extends HRegionServer implements MasterServices { this.initializationBeforeMetaAssignment = true; + if (this.balancer instanceof FavoredNodesPromoter) { + favoredNodesManager = new FavoredNodesManager(this); + } // Wait for regionserver to finish initialization. if (BaseLoadBalancer.tablesOnMaster(conf)) { waitForServerOnline(); @@ -771,6 +778,14 @@ public class HMaster extends HRegionServer implements MasterServices { // assigned when master is shutting down if (isStopped()) return; + //Initialize after meta as it scans meta + if (favoredNodesManager != null) { + SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment = + new SnapshotOfRegionAssignmentFromMeta(getConnection()); + snapshotOfRegionAssignment.initialize(); + favoredNodesManager.initialize(snapshotOfRegionAssignment); + } + // migrating existent table state from zk, so splitters // and recovery process treat states properly. for (Map.Entry<TableName, TableState.State> entry : ZKDataMigrator @@ -2995,4 +3010,9 @@ public class HMaster extends HRegionServer implements MasterServices { public LoadBalancer getLoadBalancer() { return balancer; } + + @Override + public FavoredNodesManager getFavoredNodesManager() { + return favoredNodesManager; + } }
http://git-wip-us.apache.org/repos/asf/hbase/blob/c1293cc9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java index a4c27f3..7845101 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.TableNotDisabledException; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.executor.ExecutorService; +import org.apache.hadoop.hbase.favored.FavoredNodesManager; import org.apache.hadoop.hbase.master.normalizer.RegionNormalizer; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; @@ -409,4 +410,9 @@ public interface MasterServices extends Server { * @return True if this master is stopping. */ boolean isStopping(); + + /** + * @return Favored Nodes Manager + */ + public FavoredNodesManager getFavoredNodesManager(); } http://git-wip-us.apache.org/repos/asf/hbase/blob/c1293cc9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionPlacementMaintainer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionPlacementMaintainer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionPlacementMaintainer.java index 01a698e..7acf9df 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionPlacementMaintainer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionPlacementMaintainer.java @@ -51,8 +51,8 @@ import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; -import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper; -import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan; +import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper; +import org.apache.hadoop.hbase.favored.FavoredNodesPlan; import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService.BlockingInterface; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter; http://git-wip-us.apache.org/repos/asf/hbase/blob/c1293cc9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index d15b87e..f3b21ac 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminServic import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.OpenRegionRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.OpenRegionResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ServerInfo; +import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.UpdateFavoredNodesRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.RegionStoreSequenceIds; import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.StoreSequenceId; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest; @@ -1237,4 +1238,27 @@ public class ServerManager { removeRegion(hri); } } + + public void sendFavoredNodes(final ServerName server, + Map<HRegionInfo, List<ServerName>> favoredNodes) throws IOException { + AdminService.BlockingInterface admin = getRsAdmin(server); + if (admin == null) { + LOG.warn("Attempting to send favored nodes update rpc to server " + server.toString() + + " failed because no RPC connection found to this server"); + } else { + List<Pair<HRegionInfo, List<ServerName>>> regionUpdateInfos = + new ArrayList<Pair<HRegionInfo, List<ServerName>>>(); + for (Entry<HRegionInfo, List<ServerName>> entry : favoredNodes.entrySet()) { + regionUpdateInfos.add(new Pair<HRegionInfo, List<ServerName>>(entry.getKey(), + entry.getValue())); + } + UpdateFavoredNodesRequest request = + RequestConverter.buildUpdateFavoredNodesRequest(regionUpdateInfos); + try { + admin.updateFavoredNodes(null, request); + } catch (ServiceException se) { + throw ProtobufUtil.getRemoteException(se); + } + } + } } http://git-wip-us.apache.org/repos/asf/hbase/blob/c1293cc9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SnapshotOfRegionAssignmentFromMeta.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SnapshotOfRegionAssignmentFromMeta.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SnapshotOfRegionAssignmentFromMeta.java index 39beba8..8fedb40 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SnapshotOfRegionAssignmentFromMeta.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SnapshotOfRegionAssignmentFromMeta.java @@ -18,6 +18,10 @@ */ package org.apache.hadoop.hbase.master; +import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.PRIMARY; +import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.SECONDARY; +import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.TERTIARY; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -31,6 +35,8 @@ import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper; +import org.apache.hadoop.hbase.favored.FavoredNodesPlan; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; @@ -41,8 +47,6 @@ import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.MetaTableAccessor.Visitor; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper; -import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan; /** * Used internally for reading meta and constructing datastructures that are @@ -66,7 +70,10 @@ public class SnapshotOfRegionAssignmentFromMeta { private final Map<String, HRegionInfo> regionNameToRegionInfoMap; /** the regionServer to region map */ - private final Map<ServerName, List<HRegionInfo>> regionServerToRegionMap; + private final Map<ServerName, List<HRegionInfo>> currentRSToRegionMap; + private final Map<ServerName, List<HRegionInfo>> secondaryRSToRegionMap; + private final Map<ServerName, List<HRegionInfo>> teritiaryRSToRegionMap; + private final Map<ServerName, List<HRegionInfo>> primaryRSToRegionMap; /** the existing assignment plan in the hbase:meta region */ private final FavoredNodesPlan existingAssignmentPlan; private final Set<TableName> disabledTables; @@ -81,7 +88,10 @@ public class SnapshotOfRegionAssignmentFromMeta { this.connection = connection; tableToRegionMap = new HashMap<TableName, List<HRegionInfo>>(); regionToRegionServerMap = new HashMap<HRegionInfo, ServerName>(); - regionServerToRegionMap = new HashMap<ServerName, List<HRegionInfo>>(); + currentRSToRegionMap = new HashMap<ServerName, List<HRegionInfo>>(); + primaryRSToRegionMap = new HashMap<ServerName, List<HRegionInfo>>(); + secondaryRSToRegionMap = new HashMap<ServerName, List<HRegionInfo>>(); + teritiaryRSToRegionMap = new HashMap<ServerName, List<HRegionInfo>>(); regionNameToRegionInfoMap = new TreeMap<String, HRegionInfo>(); existingAssignmentPlan = new FavoredNodesPlan(); this.disabledTables = disabledTables; @@ -122,6 +132,7 @@ public class SnapshotOfRegionAssignmentFromMeta { addRegion(hri); } + hri = rl.getRegionLocation(0).getRegionInfo(); // the code below is to handle favored nodes byte[] favoredNodes = result.getValue(HConstants.CATALOG_FAMILY, FavoredNodeAssignmentHelper.FAVOREDNODES_QUALIFIER); @@ -132,6 +143,20 @@ public class SnapshotOfRegionAssignmentFromMeta { // Add the favored nodes into assignment plan existingAssignmentPlan.updateFavoredNodesMap(hri, Arrays.asList(favoredServerList)); + + /* + * Typically there should be FAVORED_NODES_NUM favored nodes for a region in meta. If + * there is less than FAVORED_NODES_NUM, lets use as much as we can but log a warning. + */ + if (favoredServerList.length != FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) { + LOG.warn("Insufficient favored nodes for region " + hri + " fn: " + Arrays + .toString(favoredServerList)); + } + for (int i = 0; i < favoredServerList.length; i++) { + if (i == PRIMARY.ordinal()) addPrimaryAssignment(hri, favoredServerList[i]); + if (i == SECONDARY.ordinal()) addSecondaryAssignment(hri, favoredServerList[i]); + if (i == TERTIARY.ordinal()) addTeritiaryAssignment(hri, favoredServerList[i]); + } return true; } catch (RuntimeException e) { LOG.error("Catche remote exception " + e.getMessage() + @@ -169,12 +194,42 @@ public class SnapshotOfRegionAssignmentFromMeta { if (server == null) return; // Process the region server to region map - List<HRegionInfo> regionList = regionServerToRegionMap.get(server); + List<HRegionInfo> regionList = currentRSToRegionMap.get(server); + if (regionList == null) { + regionList = new ArrayList<HRegionInfo>(); + } + regionList.add(regionInfo); + currentRSToRegionMap.put(server, regionList); + } + + private void addPrimaryAssignment(HRegionInfo regionInfo, ServerName server) { + // Process the region server to region map + List<HRegionInfo> regionList = primaryRSToRegionMap.get(server); if (regionList == null) { regionList = new ArrayList<HRegionInfo>(); } regionList.add(regionInfo); - regionServerToRegionMap.put(server, regionList); + primaryRSToRegionMap.put(server, regionList); + } + + private void addSecondaryAssignment(HRegionInfo regionInfo, ServerName server) { + // Process the region server to region map + List<HRegionInfo> regionList = secondaryRSToRegionMap.get(server); + if (regionList == null) { + regionList = new ArrayList<HRegionInfo>(); + } + regionList.add(regionInfo); + secondaryRSToRegionMap.put(server, regionList); + } + + private void addTeritiaryAssignment(HRegionInfo regionInfo, ServerName server) { + // Process the region server to region map + List<HRegionInfo> regionList = teritiaryRSToRegionMap.get(server); + if (regionList == null) { + regionList = new ArrayList<HRegionInfo>(); + } + regionList.add(regionInfo); + teritiaryRSToRegionMap.put(server, regionList); } /** @@ -206,7 +261,7 @@ public class SnapshotOfRegionAssignmentFromMeta { * @return regionserver to region map */ public Map<ServerName, List<HRegionInfo>> getRegionServerToRegionMap() { - return regionServerToRegionMap; + return currentRSToRegionMap; } /** @@ -224,4 +279,16 @@ public class SnapshotOfRegionAssignmentFromMeta { public Set<TableName> getTableSet() { return this.tableToRegionMap.keySet(); } + + public Map<ServerName, List<HRegionInfo>> getSecondaryToRegionInfoMap() { + return this.secondaryRSToRegionMap; + } + + public Map<ServerName, List<HRegionInfo>> getTertiaryToRegionInfoMap() { + return this.teritiaryRSToRegionMap; + } + + public Map<ServerName, List<HRegionInfo>> getPrimaryToRegionInfoMap() { + return this.primaryRSToRegionMap; + } } http://git-wip-us.apache.org/repos/asf/hbase/blob/c1293cc9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java deleted file mode 100644 index b4d6909..0000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java +++ /dev/null @@ -1,606 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase.master.balancer; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Random; -import java.util.Set; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.MetaTableAccessor; -import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.client.Connection; -import org.apache.hadoop.hbase.client.ConnectionFactory; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.client.Table; -import org.apache.hadoop.hbase.master.RackManager; -import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; -import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; -import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.FavoredNodes; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; - -import com.google.protobuf.InvalidProtocolBufferException; - -/** - * Helper class for {@link FavoredNodeLoadBalancer} that has all the intelligence - * for racks, meta scans, etc. Instantiated by the {@link FavoredNodeLoadBalancer} - * when needed (from within calls like - * {@link FavoredNodeLoadBalancer#randomAssignment(HRegionInfo, List)}). - * - */ [email protected] -public class FavoredNodeAssignmentHelper { - private static final Log LOG = LogFactory.getLog(FavoredNodeAssignmentHelper.class); - private RackManager rackManager; - private Map<String, List<ServerName>> rackToRegionServerMap; - private List<String> uniqueRackList; - private Map<ServerName, String> regionServerToRackMap; - private Random random; - private List<ServerName> servers; - public static final byte [] FAVOREDNODES_QUALIFIER = Bytes.toBytes("fn"); - public final static short FAVORED_NODES_NUM = 3; - - public FavoredNodeAssignmentHelper(final List<ServerName> servers, Configuration conf) { - this(servers, new RackManager(conf)); - } - - public FavoredNodeAssignmentHelper(final List<ServerName> servers, - final RackManager rackManager) { - this.servers = servers; - this.rackManager = rackManager; - this.rackToRegionServerMap = new HashMap<String, List<ServerName>>(); - this.regionServerToRackMap = new HashMap<ServerName, String>(); - this.uniqueRackList = new ArrayList<String>(); - this.random = new Random(); - } - - /** - * Update meta table with favored nodes info - * @param regionToFavoredNodes map of HRegionInfo's to their favored nodes - * @param connection connection to be used - * @throws IOException - */ - public static void updateMetaWithFavoredNodesInfo( - Map<HRegionInfo, List<ServerName>> regionToFavoredNodes, - Connection connection) throws IOException { - List<Put> puts = new ArrayList<Put>(); - for (Map.Entry<HRegionInfo, List<ServerName>> entry : regionToFavoredNodes.entrySet()) { - Put put = makePutFromRegionInfo(entry.getKey(), entry.getValue()); - if (put != null) { - puts.add(put); - } - } - MetaTableAccessor.putsToMetaTable(connection, puts); - LOG.info("Added " + puts.size() + " regions in META"); - } - - /** - * Update meta table with favored nodes info - * @param regionToFavoredNodes - * @param conf - * @throws IOException - */ - public static void updateMetaWithFavoredNodesInfo( - Map<HRegionInfo, List<ServerName>> regionToFavoredNodes, - Configuration conf) throws IOException { - List<Put> puts = new ArrayList<Put>(); - for (Map.Entry<HRegionInfo, List<ServerName>> entry : regionToFavoredNodes.entrySet()) { - Put put = makePutFromRegionInfo(entry.getKey(), entry.getValue()); - if (put != null) { - puts.add(put); - } - } - // Write the region assignments to the meta table. - // TODO: See above overrides take a Connection rather than a Configuration only the - // Connection is a short circuit connection. That is not going to good in all cases, when - // master and meta are not colocated. Fix when this favored nodes feature is actually used - // someday. - try (Connection connection = ConnectionFactory.createConnection(conf)) { - try (Table metaTable = connection.getTable(TableName.META_TABLE_NAME)) { - metaTable.put(puts); - } - } - LOG.info("Added " + puts.size() + " regions in META"); - } - - /** - * Generates and returns a Put containing the region info for the catalog table - * and the servers - * @param regionInfo - * @param favoredNodeList - * @return Put object - */ - static Put makePutFromRegionInfo(HRegionInfo regionInfo, List<ServerName>favoredNodeList) - throws IOException { - Put put = null; - if (favoredNodeList != null) { - put = MetaTableAccessor.makePutFromRegionInfo(regionInfo); - byte[] favoredNodes = getFavoredNodes(favoredNodeList); - put.addImmutable(HConstants.CATALOG_FAMILY, FAVOREDNODES_QUALIFIER, - EnvironmentEdgeManager.currentTime(), favoredNodes); - LOG.info("Create the region " + regionInfo.getRegionNameAsString() + - " with favored nodes " + Bytes.toString(favoredNodes)); - } - return put; - } - - /** - * @param favoredNodes The PB'ed bytes of favored nodes - * @return the array of {@link ServerName} for the byte array of favored nodes. - * @throws IOException - */ - public static ServerName[] getFavoredNodesList(byte[] favoredNodes) throws IOException { - FavoredNodes f = FavoredNodes.parseFrom(favoredNodes); - List<HBaseProtos.ServerName> protoNodes = f.getFavoredNodeList(); - ServerName[] servers = new ServerName[protoNodes.size()]; - int i = 0; - for (HBaseProtos.ServerName node : protoNodes) { - servers[i++] = ProtobufUtil.toServerName(node); - } - return servers; - } - - /** - * @param serverAddrList - * @return PB'ed bytes of {@link FavoredNodes} generated by the server list. - */ - public static byte[] getFavoredNodes(List<ServerName> serverAddrList) { - FavoredNodes.Builder f = FavoredNodes.newBuilder(); - for (ServerName s : serverAddrList) { - HBaseProtos.ServerName.Builder b = HBaseProtos.ServerName.newBuilder(); - b.setHostName(s.getHostname()); - b.setPort(s.getPort()); - b.setStartCode(s.getStartcode()); - f.addFavoredNode(b.build()); - } - return f.build().toByteArray(); - } - - // Place the regions round-robin across the racks picking one server from each - // rack at a time. Start with a random rack, and a random server from every rack. - // If a rack doesn't have enough servers it will go to the next rack and so on. - // for choosing a primary. - // For example, if 4 racks (r1 .. r4) with 8 servers (s1..s8) each, one possible - // placement could be r2:s5, r3:s5, r4:s5, r1:s5, r2:s6, r3:s6.. - // If there were fewer servers in one rack, say r3, which had 3 servers, one possible - // placement could be r2:s5, <skip-r3>, r4:s5, r1:s5, r2:s6, <skip-r3> ... - // The regions should be distributed proportionately to the racksizes - void placePrimaryRSAsRoundRobin(Map<ServerName, List<HRegionInfo>> assignmentMap, - Map<HRegionInfo, ServerName> primaryRSMap, List<HRegionInfo> regions) { - List<String> rackList = new ArrayList<String>(rackToRegionServerMap.size()); - rackList.addAll(rackToRegionServerMap.keySet()); - int rackIndex = random.nextInt(rackList.size()); - int maxRackSize = 0; - for (Map.Entry<String,List<ServerName>> r : rackToRegionServerMap.entrySet()) { - if (r.getValue().size() > maxRackSize) { - maxRackSize = r.getValue().size(); - } - } - int numIterations = 0; - int firstServerIndex = random.nextInt(maxRackSize); - // Initialize the current processing host index. - int serverIndex = firstServerIndex; - for (HRegionInfo regionInfo : regions) { - List<ServerName> currentServerList; - String rackName; - while (true) { - rackName = rackList.get(rackIndex); - numIterations++; - // Get the server list for the current rack - currentServerList = rackToRegionServerMap.get(rackName); - - if (serverIndex >= currentServerList.size()) { //not enough machines in this rack - if (numIterations % rackList.size() == 0) { - if (++serverIndex >= maxRackSize) serverIndex = 0; - } - if ((++rackIndex) >= rackList.size()) { - rackIndex = 0; // reset the rack index to 0 - } - } else break; - } - - // Get the current process region server - ServerName currentServer = currentServerList.get(serverIndex); - - // Place the current region with the current primary region server - primaryRSMap.put(regionInfo, currentServer); - List<HRegionInfo> regionsForServer = assignmentMap.get(currentServer); - if (regionsForServer == null) { - regionsForServer = new ArrayList<HRegionInfo>(); - assignmentMap.put(currentServer, regionsForServer); - } - regionsForServer.add(regionInfo); - - // Set the next processing index - if (numIterations % rackList.size() == 0) { - ++serverIndex; - } - if ((++rackIndex) >= rackList.size()) { - rackIndex = 0; // reset the rack index to 0 - } - } - } - - Map<HRegionInfo, ServerName[]> placeSecondaryAndTertiaryRS( - Map<HRegionInfo, ServerName> primaryRSMap) { - Map<HRegionInfo, ServerName[]> secondaryAndTertiaryMap = - new HashMap<HRegionInfo, ServerName[]>(); - for (Map.Entry<HRegionInfo, ServerName> entry : primaryRSMap.entrySet()) { - // Get the target region and its primary region server rack - HRegionInfo regionInfo = entry.getKey(); - ServerName primaryRS = entry.getValue(); - try { - // Create the secondary and tertiary region server pair object. - ServerName[] favoredNodes; - // Get the rack for the primary region server - String primaryRack = rackManager.getRack(primaryRS); - - if (getTotalNumberOfRacks() == 1) { - favoredNodes = singleRackCase(regionInfo, primaryRS, primaryRack); - } else { - favoredNodes = multiRackCase(regionInfo, primaryRS, primaryRack); - } - if (favoredNodes != null) { - secondaryAndTertiaryMap.put(regionInfo, favoredNodes); - LOG.debug("Place the secondary and tertiary region server for region " - + regionInfo.getRegionNameAsString()); - } - } catch (Exception e) { - LOG.warn("Cannot place the favored nodes for region " + - regionInfo.getRegionNameAsString() + " because " + e, e); - continue; - } - } - return secondaryAndTertiaryMap; - } - - private Map<ServerName, Set<HRegionInfo>> mapRSToPrimaries( - Map<HRegionInfo, ServerName> primaryRSMap) { - Map<ServerName, Set<HRegionInfo>> primaryServerMap = - new HashMap<ServerName, Set<HRegionInfo>>(); - for (Entry<HRegionInfo, ServerName> e : primaryRSMap.entrySet()) { - Set<HRegionInfo> currentSet = primaryServerMap.get(e.getValue()); - if (currentSet == null) { - currentSet = new HashSet<HRegionInfo>(); - } - currentSet.add(e.getKey()); - primaryServerMap.put(e.getValue(), currentSet); - } - return primaryServerMap; - } - - /** - * For regions that share the primary, avoid placing the secondary and tertiary - * on a same RS. Used for generating new assignments for the - * primary/secondary/tertiary RegionServers - * @param primaryRSMap - * @return the map of regions to the servers the region-files should be hosted on - */ - public Map<HRegionInfo, ServerName[]> placeSecondaryAndTertiaryWithRestrictions( - Map<HRegionInfo, ServerName> primaryRSMap) { - Map<ServerName, Set<HRegionInfo>> serverToPrimaries = - mapRSToPrimaries(primaryRSMap); - Map<HRegionInfo, ServerName[]> secondaryAndTertiaryMap = - new HashMap<HRegionInfo, ServerName[]>(); - - for (Entry<HRegionInfo, ServerName> entry : primaryRSMap.entrySet()) { - // Get the target region and its primary region server rack - HRegionInfo regionInfo = entry.getKey(); - ServerName primaryRS = entry.getValue(); - try { - // Get the rack for the primary region server - String primaryRack = rackManager.getRack(primaryRS); - ServerName[] favoredNodes = null; - if (getTotalNumberOfRacks() == 1) { - // Single rack case: have to pick the secondary and tertiary - // from the same rack - favoredNodes = singleRackCase(regionInfo, primaryRS, primaryRack); - } else { - favoredNodes = multiRackCaseWithRestrictions(serverToPrimaries, - secondaryAndTertiaryMap, primaryRack, primaryRS, regionInfo); - } - if (favoredNodes != null) { - secondaryAndTertiaryMap.put(regionInfo, favoredNodes); - LOG.debug("Place the secondary and tertiary region server for region " - + regionInfo.getRegionNameAsString()); - } - } catch (Exception e) { - LOG.warn("Cannot place the favored nodes for region " - + regionInfo.getRegionNameAsString() + " because " + e, e); - continue; - } - } - return secondaryAndTertiaryMap; - } - - private ServerName[] multiRackCaseWithRestrictions( - Map<ServerName, Set<HRegionInfo>> serverToPrimaries, - Map<HRegionInfo, ServerName[]> secondaryAndTertiaryMap, - String primaryRack, ServerName primaryRS, HRegionInfo regionInfo) throws IOException { - // Random to choose the secondary and tertiary region server - // from another rack to place the secondary and tertiary - // Random to choose one rack except for the current rack - Set<String> rackSkipSet = new HashSet<String>(); - rackSkipSet.add(primaryRack); - String secondaryRack = getOneRandomRack(rackSkipSet); - List<ServerName> serverList = getServersFromRack(secondaryRack); - Set<ServerName> serverSet = new HashSet<ServerName>(); - serverSet.addAll(serverList); - ServerName[] favoredNodes; - if (serverList.size() >= 2) { - // Randomly pick up two servers from this secondary rack - // Skip the secondary for the tertiary placement - // skip the servers which share the primary already - Set<HRegionInfo> primaries = serverToPrimaries.get(primaryRS); - Set<ServerName> skipServerSet = new HashSet<ServerName>(); - while (true) { - ServerName[] secondaryAndTertiary = null; - if (primaries.size() > 1) { - // check where his tertiary and secondary are - for (HRegionInfo primary : primaries) { - secondaryAndTertiary = secondaryAndTertiaryMap.get(primary); - if (secondaryAndTertiary != null) { - if (regionServerToRackMap.get(secondaryAndTertiary[0]).equals(secondaryRack)) { - skipServerSet.add(secondaryAndTertiary[0]); - } - if (regionServerToRackMap.get(secondaryAndTertiary[1]).equals(secondaryRack)) { - skipServerSet.add(secondaryAndTertiary[1]); - } - } - } - } - if (skipServerSet.size() + 2 <= serverSet.size()) - break; - skipServerSet.clear(); - rackSkipSet.add(secondaryRack); - // we used all racks - if (rackSkipSet.size() == getTotalNumberOfRacks()) { - // remove the last two added and break - skipServerSet.remove(secondaryAndTertiary[0]); - skipServerSet.remove(secondaryAndTertiary[1]); - break; - } - secondaryRack = getOneRandomRack(rackSkipSet); - serverList = getServersFromRack(secondaryRack); - serverSet = new HashSet<ServerName>(); - serverSet.addAll(serverList); - } - - // Place the secondary RS - ServerName secondaryRS = getOneRandomServer(secondaryRack, skipServerSet); - skipServerSet.add(secondaryRS); - // Place the tertiary RS - ServerName tertiaryRS = getOneRandomServer(secondaryRack, skipServerSet); - - if (secondaryRS == null || tertiaryRS == null) { - LOG.error("Cannot place the secondary and tertiary" - + " region server for region " - + regionInfo.getRegionNameAsString()); - } - // Create the secondary and tertiary pair - favoredNodes = new ServerName[2]; - favoredNodes[0] = secondaryRS; - favoredNodes[1] = tertiaryRS; - } else { - // Pick the secondary rs from this secondary rack - // and pick the tertiary from another random rack - favoredNodes = new ServerName[2]; - ServerName secondary = getOneRandomServer(secondaryRack); - favoredNodes[0] = secondary; - - // Pick the tertiary - if (getTotalNumberOfRacks() == 2) { - // Pick the tertiary from the same rack of the primary RS - Set<ServerName> serverSkipSet = new HashSet<ServerName>(); - serverSkipSet.add(primaryRS); - favoredNodes[1] = getOneRandomServer(primaryRack, serverSkipSet); - } else { - // Pick the tertiary from another rack - rackSkipSet.add(secondaryRack); - String tertiaryRandomRack = getOneRandomRack(rackSkipSet); - favoredNodes[1] = getOneRandomServer(tertiaryRandomRack); - } - } - return favoredNodes; - } - - private ServerName[] singleRackCase(HRegionInfo regionInfo, - ServerName primaryRS, - String primaryRack) throws IOException { - // Single rack case: have to pick the secondary and tertiary - // from the same rack - List<ServerName> serverList = getServersFromRack(primaryRack); - if (serverList.size() <= 2) { - // Single region server case: cannot not place the favored nodes - // on any server; - return null; - } else { - // Randomly select two region servers from the server list and make sure - // they are not overlap with the primary region server; - Set<ServerName> serverSkipSet = new HashSet<ServerName>(); - serverSkipSet.add(primaryRS); - - // Place the secondary RS - ServerName secondaryRS = getOneRandomServer(primaryRack, serverSkipSet); - // Skip the secondary for the tertiary placement - serverSkipSet.add(secondaryRS); - - // Place the tertiary RS - ServerName tertiaryRS = - getOneRandomServer(primaryRack, serverSkipSet); - - if (secondaryRS == null || tertiaryRS == null) { - LOG.error("Cannot place the secondary and ternary" + - "region server for region " + - regionInfo.getRegionNameAsString()); - } - // Create the secondary and tertiary pair - ServerName[] favoredNodes = new ServerName[2]; - favoredNodes[0] = secondaryRS; - favoredNodes[1] = tertiaryRS; - return favoredNodes; - } - } - - private ServerName[] multiRackCase(HRegionInfo regionInfo, - ServerName primaryRS, - String primaryRack) throws IOException { - - // Random to choose the secondary and tertiary region server - // from another rack to place the secondary and tertiary - - // Random to choose one rack except for the current rack - Set<String> rackSkipSet = new HashSet<String>(); - rackSkipSet.add(primaryRack); - ServerName[] favoredNodes = new ServerName[2]; - String secondaryRack = getOneRandomRack(rackSkipSet); - List<ServerName> serverList = getServersFromRack(secondaryRack); - if (serverList.size() >= 2) { - // Randomly pick up two servers from this secondary rack - - // Place the secondary RS - ServerName secondaryRS = getOneRandomServer(secondaryRack); - - // Skip the secondary for the tertiary placement - Set<ServerName> skipServerSet = new HashSet<ServerName>(); - skipServerSet.add(secondaryRS); - // Place the tertiary RS - ServerName tertiaryRS = getOneRandomServer(secondaryRack, skipServerSet); - - if (secondaryRS == null || tertiaryRS == null) { - LOG.error("Cannot place the secondary and ternary" + - "region server for region " + - regionInfo.getRegionNameAsString()); - } - // Create the secondary and tertiary pair - favoredNodes[0] = secondaryRS; - favoredNodes[1] = tertiaryRS; - } else { - // Pick the secondary rs from this secondary rack - // and pick the tertiary from another random rack - favoredNodes[0] = getOneRandomServer(secondaryRack); - - // Pick the tertiary - if (getTotalNumberOfRacks() == 2) { - // Pick the tertiary from the same rack of the primary RS - Set<ServerName> serverSkipSet = new HashSet<ServerName>(); - serverSkipSet.add(primaryRS); - favoredNodes[1] = getOneRandomServer(primaryRack, serverSkipSet); - } else { - // Pick the tertiary from another rack - rackSkipSet.add(secondaryRack); - String tertiaryRandomRack = getOneRandomRack(rackSkipSet); - favoredNodes[1] = getOneRandomServer(tertiaryRandomRack); - } - } - return favoredNodes; - } - - boolean canPlaceFavoredNodes() { - int serverSize = this.regionServerToRackMap.size(); - return (serverSize >= FAVORED_NODES_NUM); - } - - public void initialize() { - for (ServerName sn : this.servers) { - String rackName = this.rackManager.getRack(sn); - List<ServerName> serverList = this.rackToRegionServerMap.get(rackName); - if (serverList == null) { - serverList = new ArrayList<ServerName>(); - // Add the current rack to the unique rack list - this.uniqueRackList.add(rackName); - } - if (!serverList.contains(sn)) { - serverList.add(sn); - this.rackToRegionServerMap.put(rackName, serverList); - this.regionServerToRackMap.put(sn, rackName); - } - } - } - - private int getTotalNumberOfRacks() { - return this.uniqueRackList.size(); - } - - private List<ServerName> getServersFromRack(String rack) { - return this.rackToRegionServerMap.get(rack); - } - - private ServerName getOneRandomServer(String rack, - Set<ServerName> skipServerSet) throws IOException { - if(rack == null) return null; - List<ServerName> serverList = this.rackToRegionServerMap.get(rack); - if (serverList == null) return null; - - // Get a random server except for any servers from the skip set - if (skipServerSet != null && serverList.size() <= skipServerSet.size()) { - throw new IOException("Cannot randomly pick another random server"); - } - - ServerName randomServer; - do { - int randomIndex = random.nextInt(serverList.size()); - randomServer = serverList.get(randomIndex); - } while (skipServerSet != null && skipServerSet.contains(randomServer)); - - return randomServer; - } - - private ServerName getOneRandomServer(String rack) throws IOException { - return this.getOneRandomServer(rack, null); - } - - private String getOneRandomRack(Set<String> skipRackSet) throws IOException { - if (skipRackSet == null || uniqueRackList.size() <= skipRackSet.size()) { - throw new IOException("Cannot randomly pick another random server"); - } - - String randomRack; - do { - int randomIndex = random.nextInt(this.uniqueRackList.size()); - randomRack = this.uniqueRackList.get(randomIndex); - } while (skipRackSet.contains(randomRack)); - - return randomRack; - } - - public static String getFavoredNodesAsString(List<ServerName> nodes) { - StringBuffer strBuf = new StringBuffer(); - int i = 0; - for (ServerName node : nodes) { - strBuf.append(node.getHostAndPort()); - if (++i != nodes.size()) strBuf.append(";"); - } - return strBuf.toString(); - } -} http://git-wip-us.apache.org/repos/asf/hbase/blob/c1293cc9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java deleted file mode 100644 index 7e4fecf..0000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java +++ /dev/null @@ -1,356 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase.master.balancer; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HBaseIOException; -import org.apache.hadoop.hbase.HBaseInterfaceAudience; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.NamespaceDescriptor; -import org.apache.hadoop.hbase.ServerLoad; -import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.master.RackManager; -import org.apache.hadoop.hbase.master.RegionPlan; -import org.apache.hadoop.hbase.master.ServerManager; -import org.apache.hadoop.hbase.master.SnapshotOfRegionAssignmentFromMeta; -import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan.Position; -import org.apache.hadoop.hbase.util.Pair; - -/** - * An implementation of the {@link org.apache.hadoop.hbase.master.LoadBalancer} that - * assigns favored nodes for each region. There is a Primary RegionServer that hosts - * the region, and then there is Secondary and Tertiary RegionServers. Currently, the - * favored nodes information is used in creating HDFS files - the Primary RegionServer - * passes the primary, secondary, tertiary node addresses as hints to the - * DistributedFileSystem API for creating files on the filesystem. These nodes are - * treated as hints by the HDFS to place the blocks of the file. This alleviates the - * problem to do with reading from remote nodes (since we can make the Secondary - * RegionServer as the new Primary RegionServer) after a region is recovered. This - * should help provide consistent read latencies for the regions even when their - * primary region servers die. - * - */ [email protected](HBaseInterfaceAudience.CONFIG) -public class FavoredNodeLoadBalancer extends BaseLoadBalancer { - private static final Log LOG = LogFactory.getLog(FavoredNodeLoadBalancer.class); - - private FavoredNodesPlan globalFavoredNodesAssignmentPlan; - private RackManager rackManager; - - @Override - public void setConf(Configuration conf) { - super.setConf(conf); - globalFavoredNodesAssignmentPlan = new FavoredNodesPlan(); - this.rackManager = new RackManager(conf); - super.setConf(conf); - } - - @Override - public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) { - //TODO. Look at is whether Stochastic loadbalancer can be integrated with this - List<RegionPlan> plans = new ArrayList<RegionPlan>(); - //perform a scan of the meta to get the latest updates (if any) - SnapshotOfRegionAssignmentFromMeta snaphotOfRegionAssignment = - new SnapshotOfRegionAssignmentFromMeta(super.services.getConnection()); - try { - snaphotOfRegionAssignment.initialize(); - } catch (IOException ie) { - LOG.warn("Not running balancer since exception was thrown " + ie); - return plans; - } - globalFavoredNodesAssignmentPlan = snaphotOfRegionAssignment.getExistingAssignmentPlan(); - Map<ServerName, ServerName> serverNameToServerNameWithoutCode = - new HashMap<ServerName, ServerName>(); - Map<ServerName, ServerName> serverNameWithoutCodeToServerName = - new HashMap<ServerName, ServerName>(); - ServerManager serverMgr = super.services.getServerManager(); - for (ServerName sn: serverMgr.getOnlineServersList()) { - ServerName s = ServerName.valueOf(sn.getHostname(), sn.getPort(), ServerName.NON_STARTCODE); - serverNameToServerNameWithoutCode.put(sn, s); - serverNameWithoutCodeToServerName.put(s, sn); - } - for (Map.Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) { - ServerName currentServer = entry.getKey(); - //get a server without the startcode for the currentServer - ServerName currentServerWithoutStartCode = ServerName.valueOf(currentServer.getHostname(), - currentServer.getPort(), ServerName.NON_STARTCODE); - List<HRegionInfo> list = entry.getValue(); - for (HRegionInfo region : list) { - if(region.getTable().getNamespaceAsString() - .equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) { - continue; - } - List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region); - if (favoredNodes == null || favoredNodes.get(0).equals(currentServerWithoutStartCode)) { - continue; //either favorednodes does not exist or we are already on the primary node - } - ServerName destination = null; - //check whether the primary is available - destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(0)); - if (destination == null) { - //check whether the region is on secondary/tertiary - if (currentServerWithoutStartCode.equals(favoredNodes.get(1)) || - currentServerWithoutStartCode.equals(favoredNodes.get(2))) { - continue; - } - //the region is currently on none of the favored nodes - //get it on one of them if possible - ServerLoad l1 = super.services.getServerManager().getLoad( - serverNameWithoutCodeToServerName.get(favoredNodes.get(1))); - ServerLoad l2 = super.services.getServerManager().getLoad( - serverNameWithoutCodeToServerName.get(favoredNodes.get(2))); - if (l1 != null && l2 != null) { - if (l1.getLoad() > l2.getLoad()) { - destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(2)); - } else { - destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(1)); - } - } else if (l1 != null) { - destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(1)); - } else if (l2 != null) { - destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(2)); - } - } - - if (destination != null) { - RegionPlan plan = new RegionPlan(region, currentServer, destination); - plans.add(plan); - } - } - } - return plans; - } - - @Override - public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions, - List<ServerName> servers) { - Map<ServerName, List<HRegionInfo>> assignmentMap; - try { - FavoredNodeAssignmentHelper assignmentHelper = - new FavoredNodeAssignmentHelper(servers, rackManager); - assignmentHelper.initialize(); - if (!assignmentHelper.canPlaceFavoredNodes()) { - return super.roundRobinAssignment(regions, servers); - } - // Segregate the regions into two types: - // 1. The regions that have favored node assignment, and where at least - // one of the favored node is still alive. In this case, try to adhere - // to the current favored nodes assignment as much as possible - i.e., - // if the current primary is gone, then make the secondary or tertiary - // as the new host for the region (based on their current load). - // Note that we don't change the favored - // node assignments here (even though one or more favored node is currently - // down). It is up to the balanceCluster to do this hard work. The HDFS - // can handle the fact that some nodes in the favored nodes hint is down - // It'd allocate some other DNs. In combination with stale settings for HDFS, - // we should be just fine. - // 2. The regions that currently don't have favored node assignment. We will - // need to come up with favored nodes assignments for them. The corner case - // in (1) above is that all the nodes are unavailable and in that case, we - // will note that this region doesn't have favored nodes. - Pair<Map<ServerName,List<HRegionInfo>>, List<HRegionInfo>> segregatedRegions = - segregateRegionsAndAssignRegionsWithFavoredNodes(regions, servers); - Map<ServerName,List<HRegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst(); - List<HRegionInfo> regionsWithNoFavoredNodes = segregatedRegions.getSecond(); - assignmentMap = new HashMap<ServerName, List<HRegionInfo>>(); - roundRobinAssignmentImpl(assignmentHelper, assignmentMap, regionsWithNoFavoredNodes, - servers); - // merge the assignment maps - assignmentMap.putAll(regionsWithFavoredNodesMap); - } catch (Exception ex) { - LOG.warn("Encountered exception while doing favored-nodes assignment " + ex + - " Falling back to regular assignment"); - assignmentMap = super.roundRobinAssignment(regions, servers); - } - return assignmentMap; - } - - @Override - public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) { - try { - FavoredNodeAssignmentHelper assignmentHelper = - new FavoredNodeAssignmentHelper(servers, rackManager); - assignmentHelper.initialize(); - ServerName primary = super.randomAssignment(regionInfo, servers); - if (!assignmentHelper.canPlaceFavoredNodes()) { - return primary; - } - List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo); - // check if we have a favored nodes mapping for this region and if so, return - // a server from the favored nodes list if the passed 'servers' contains this - // server as well (available servers, that is) - if (favoredNodes != null) { - for (ServerName s : favoredNodes) { - ServerName serverWithLegitStartCode = availableServersContains(servers, s); - if (serverWithLegitStartCode != null) { - return serverWithLegitStartCode; - } - } - } - List<HRegionInfo> regions = new ArrayList<HRegionInfo>(1); - regions.add(regionInfo); - Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>(1); - primaryRSMap.put(regionInfo, primary); - assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap); - return primary; - } catch (Exception ex) { - LOG.warn("Encountered exception while doing favored-nodes (random)assignment " + ex + - " Falling back to regular assignment"); - return super.randomAssignment(regionInfo, servers); - } - } - - private Pair<Map<ServerName, List<HRegionInfo>>, List<HRegionInfo>> - segregateRegionsAndAssignRegionsWithFavoredNodes(List<HRegionInfo> regions, - List<ServerName> availableServers) { - Map<ServerName, List<HRegionInfo>> assignmentMapForFavoredNodes = - new HashMap<ServerName, List<HRegionInfo>>(regions.size() / 2); - List<HRegionInfo> regionsWithNoFavoredNodes = new ArrayList<HRegionInfo>(regions.size()/2); - for (HRegionInfo region : regions) { - List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region); - ServerName primaryHost = null; - ServerName secondaryHost = null; - ServerName tertiaryHost = null; - if (favoredNodes != null) { - for (ServerName s : favoredNodes) { - ServerName serverWithLegitStartCode = availableServersContains(availableServers, s); - if (serverWithLegitStartCode != null) { - FavoredNodesPlan.Position position = - FavoredNodesPlan.getFavoredServerPosition(favoredNodes, s); - if (Position.PRIMARY.equals(position)) { - primaryHost = serverWithLegitStartCode; - } else if (Position.SECONDARY.equals(position)) { - secondaryHost = serverWithLegitStartCode; - } else if (Position.TERTIARY.equals(position)) { - tertiaryHost = serverWithLegitStartCode; - } - } - } - assignRegionToAvailableFavoredNode(assignmentMapForFavoredNodes, region, - primaryHost, secondaryHost, tertiaryHost); - } - if (primaryHost == null && secondaryHost == null && tertiaryHost == null) { - //all favored nodes unavailable - regionsWithNoFavoredNodes.add(region); - } - } - return new Pair<Map<ServerName, List<HRegionInfo>>, List<HRegionInfo>>( - assignmentMapForFavoredNodes, regionsWithNoFavoredNodes); - } - - // Do a check of the hostname and port and return the servername from the servers list - // that matched (the favoredNode will have a startcode of -1 but we want the real - // server with the legit startcode - private ServerName availableServersContains(List<ServerName> servers, ServerName favoredNode) { - for (ServerName server : servers) { - if (ServerName.isSameHostnameAndPort(favoredNode, server)) { - return server; - } - } - return null; - } - - private void assignRegionToAvailableFavoredNode(Map<ServerName, - List<HRegionInfo>> assignmentMapForFavoredNodes, HRegionInfo region, ServerName primaryHost, - ServerName secondaryHost, ServerName tertiaryHost) { - if (primaryHost != null) { - addRegionToMap(assignmentMapForFavoredNodes, region, primaryHost); - } else if (secondaryHost != null && tertiaryHost != null) { - // assign the region to the one with a lower load - // (both have the desired hdfs blocks) - ServerName s; - ServerLoad tertiaryLoad = super.services.getServerManager().getLoad(tertiaryHost); - ServerLoad secondaryLoad = super.services.getServerManager().getLoad(secondaryHost); - if (secondaryLoad.getLoad() < tertiaryLoad.getLoad()) { - s = secondaryHost; - } else { - s = tertiaryHost; - } - addRegionToMap(assignmentMapForFavoredNodes, region, s); - } else if (secondaryHost != null) { - addRegionToMap(assignmentMapForFavoredNodes, region, secondaryHost); - } else if (tertiaryHost != null) { - addRegionToMap(assignmentMapForFavoredNodes, region, tertiaryHost); - } - } - - private void addRegionToMap(Map<ServerName, List<HRegionInfo>> assignmentMapForFavoredNodes, - HRegionInfo region, ServerName host) { - List<HRegionInfo> regionsOnServer = null; - if ((regionsOnServer = assignmentMapForFavoredNodes.get(host)) == null) { - regionsOnServer = new ArrayList<HRegionInfo>(); - assignmentMapForFavoredNodes.put(host, regionsOnServer); - } - regionsOnServer.add(region); - } - - public List<ServerName> getFavoredNodes(HRegionInfo regionInfo) { - return this.globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo); - } - - private void roundRobinAssignmentImpl(FavoredNodeAssignmentHelper assignmentHelper, - Map<ServerName, List<HRegionInfo>> assignmentMap, - List<HRegionInfo> regions, List<ServerName> servers) { - Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>(); - // figure the primary RSs - assignmentHelper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions); - assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap); - } - - private void assignSecondaryAndTertiaryNodesForRegion( - FavoredNodeAssignmentHelper assignmentHelper, - List<HRegionInfo> regions, Map<HRegionInfo, ServerName> primaryRSMap) { - // figure the secondary and tertiary RSs - Map<HRegionInfo, ServerName[]> secondaryAndTertiaryRSMap = - assignmentHelper.placeSecondaryAndTertiaryRS(primaryRSMap); - // now record all the assignments so that we can serve queries later - for (HRegionInfo region : regions) { - // Store the favored nodes without startCode for the ServerName objects - // We don't care about the startcode; but only the hostname really - List<ServerName> favoredNodesForRegion = new ArrayList<ServerName>(3); - ServerName sn = primaryRSMap.get(region); - favoredNodesForRegion.add(ServerName.valueOf(sn.getHostname(), sn.getPort(), - ServerName.NON_STARTCODE)); - ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(region); - if (secondaryAndTertiaryNodes != null) { - favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[0].getHostname(), - secondaryAndTertiaryNodes[0].getPort(), ServerName.NON_STARTCODE)); - favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[1].getHostname(), - secondaryAndTertiaryNodes[1].getPort(), ServerName.NON_STARTCODE)); - } - globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(region, favoredNodesForRegion); - } - } - - @Override - public List<RegionPlan> balanceCluster(TableName tableName, - Map<ServerName, List<HRegionInfo>> clusterState) throws HBaseIOException { - return balanceCluster(clusterState); - } -} http://git-wip-us.apache.org/repos/asf/hbase/blob/c1293cc9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodesPlan.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodesPlan.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodesPlan.java deleted file mode 100644 index 2031fc9..0000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodesPlan.java +++ /dev/null @@ -1,135 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.master.balancer; - -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.ServerName; - -/** - * This class contains the mapping information between each region name and - * its favored region server list. Used by {@link FavoredNodeLoadBalancer} set - * of classes and from unit tests (hence the class is public) - * - * All the access to this class is thread-safe. - */ [email protected] -public class FavoredNodesPlan { - - /** the map between each region name and its favored region server list */ - private Map<String, List<ServerName>> favoredNodesMap; - - public static enum Position { - PRIMARY, - SECONDARY, - TERTIARY - } - - public FavoredNodesPlan() { - favoredNodesMap = new ConcurrentHashMap<String, List<ServerName>>(); - } - - /** - * Update an assignment to the plan - * @param region - * @param servers - */ - public void updateFavoredNodesMap(HRegionInfo region, List<ServerName> servers) { - if (region == null || servers == null || servers.size() == 0) { - return; - } - this.favoredNodesMap.put(region.getRegionNameAsString(), servers); - } - - /** - * @param region - * @return the list of favored region server for this region based on the plan - */ - public List<ServerName> getFavoredNodes(HRegionInfo region) { - return favoredNodesMap.get(region.getRegionNameAsString()); - } - - /** - * Return the position of the server in the favoredNodes list. Assumes the - * favoredNodes list is of size 3. - * @param favoredNodes - * @param server - * @return position - */ - public static Position getFavoredServerPosition( - List<ServerName> favoredNodes, ServerName server) { - if (favoredNodes == null || server == null || - favoredNodes.size() != FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) { - return null; - } - for (Position p : Position.values()) { - if (ServerName.isSameHostnameAndPort(favoredNodes.get(p.ordinal()),server)) { - return p; - } - } - return null; - } - - /** - * @return the mapping between each region to its favored region server list - */ - public Map<String, List<ServerName>> getAssignmentMap() { - return favoredNodesMap; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null) { - return false; - } - if (getClass() != o.getClass()) { - return false; - } - // To compare the map from objec o is identical to current assignment map. - Map<String, List<ServerName>> comparedMap = ((FavoredNodesPlan)o).getAssignmentMap(); - - // compare the size - if (comparedMap.size() != this.favoredNodesMap.size()) - return false; - - // compare each element in the assignment map - for (Map.Entry<String, List<ServerName>> entry : - comparedMap.entrySet()) { - List<ServerName> serverList = this.favoredNodesMap.get(entry.getKey()); - if (serverList == null && entry.getValue() != null) { - return false; - } else if (serverList != null && !serverList.equals(entry.getValue())) { - return false; - } - } - return true; - } - - @Override - public int hashCode() { - return favoredNodesMap.hashCode(); - } -} http://git-wip-us.apache.org/repos/asf/hbase/blob/c1293cc9/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableFavoredNodes.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableFavoredNodes.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableFavoredNodes.java new file mode 100644 index 0000000..184d80e --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableFavoredNodes.java @@ -0,0 +1,297 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.PRIMARY; +import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.SECONDARY; +import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.TERTIARY; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper; +import org.apache.hadoop.hbase.favored.FavoredNodeLoadBalancer; +import org.apache.hadoop.hbase.favored.FavoredNodesManager; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.Waiter; +import org.apache.hadoop.hbase.master.LoadBalancer; +import org.apache.hadoop.hbase.master.RegionStates; +import org.apache.hadoop.hbase.master.ServerManager; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.testclassification.ClientTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.JVMClusterUtil; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + + +@Category({ClientTests.class, MediumTests.class}) +public class TestTableFavoredNodes { + + private static final Log LOG = LogFactory.getLog(TestTableFavoredNodes.class); + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private final static int WAIT_TIMEOUT = 60000; + private final static int SLAVES = 8; + private FavoredNodesManager fnm; + private RegionStates regionStates; + private Admin admin; + + private final byte[][] splitKeys = new byte[][] {Bytes.toBytes(1), Bytes.toBytes(9)}; + private final int NUM_REGIONS = splitKeys.length + 1; + + @BeforeClass + public static void setupBeforeClass() throws Exception { + Configuration conf = TEST_UTIL.getConfiguration(); + // Setting FavoredNodeBalancer will enable favored nodes + conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, + FavoredNodeLoadBalancer.class, LoadBalancer.class); + conf.set(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, "" + SLAVES); + TEST_UTIL.startMiniCluster(SLAVES); + TEST_UTIL.getMiniHBaseCluster().waitForActiveAndReadyMaster(WAIT_TIMEOUT); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.cleanupTestDir(); + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void setup() throws IOException { + fnm = TEST_UTIL.getMiniHBaseCluster().getMaster().getFavoredNodesManager(); + admin = TEST_UTIL.getAdmin(); + admin.setBalancerRunning(false, true); + regionStates = + TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates(); + } + + /* + * Create a table with FN enabled and check if all its regions have favored nodes set. + */ + @Test + public void testCreateTable() throws Exception { + + TableName tableName = TableName.valueOf("createTable"); + TEST_UTIL.createTable(tableName, Bytes.toBytes("f"), splitKeys); + TEST_UTIL.waitUntilAllRegionsAssigned(tableName); + + // All regions should have favored nodes + checkIfFavoredNodeInformationIsCorrect(tableName); + + TEST_UTIL.deleteTable(tableName); + } + + /* + * Check if daughters inherit at-least 2 FN from parent after region split. + */ + @Test + public void testSplitTable() throws Exception { + + TableName tableName = TableName.valueOf("splitRegions"); + TEST_UTIL.createTable(tableName, Bytes.toBytes("f"), splitKeys); + TEST_UTIL.waitUntilAllRegionsAssigned(tableName); + + checkIfFavoredNodeInformationIsCorrect(tableName); + + byte[] splitPoint = Bytes.toBytes(0); + RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(tableName); + HRegionInfo parent = locator.getRegionLocation(splitPoint).getRegionInfo(); + List<ServerName> parentFN = fnm.getFavoredNodes(parent); + assertNotNull("FN should not be null for region: " + parent, parentFN); + + admin.split(tableName, splitPoint); + + TEST_UTIL.waitUntilNoRegionsInTransition(WAIT_TIMEOUT); + waitUntilTableRegionCountReached(tableName, NUM_REGIONS + 1); + + // All regions should have favored nodes + checkIfFavoredNodeInformationIsCorrect(tableName); + + // Get the daughters of parent. + HRegionInfo daughter1 = locator.getRegionLocation(parent.getStartKey(), true).getRegionInfo(); + List<ServerName> daughter1FN = fnm.getFavoredNodes(daughter1); + + HRegionInfo daughter2 = locator.getRegionLocation(splitPoint, true).getRegionInfo(); + List<ServerName> daughter2FN = fnm.getFavoredNodes(daughter2); + + checkIfDaughterInherits2FN(parentFN, daughter1FN); + checkIfDaughterInherits2FN(parentFN, daughter2FN); + + assertEquals("Daughter's PRIMARY FN should be PRIMARY of parent", + parentFN.get(PRIMARY.ordinal()), daughter1FN.get(PRIMARY.ordinal())); + assertEquals("Daughter's SECONDARY FN should be SECONDARY of parent", + parentFN.get(SECONDARY.ordinal()), daughter1FN.get(SECONDARY.ordinal())); + + assertEquals("Daughter's PRIMARY FN should be PRIMARY of parent", + parentFN.get(PRIMARY.ordinal()), daughter2FN.get(PRIMARY.ordinal())); + assertEquals("Daughter's SECONDARY FN should be TERTIARY of parent", + parentFN.get(TERTIARY.ordinal()), daughter2FN.get(SECONDARY.ordinal())); + + TEST_UTIL.deleteTable(tableName); + } + + /* + * Check if merged region inherits FN from one of its regions. + */ + @Test + public void testMergeTable() throws Exception { + + TableName tableName = TableName.valueOf("mergeRegions"); + TEST_UTIL.createTable(tableName, Bytes.toBytes("f"), splitKeys); + TEST_UTIL.waitUntilAllRegionsAssigned(tableName); + + checkIfFavoredNodeInformationIsCorrect(tableName); + + RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(tableName); + HRegionInfo regionA = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo(); + HRegionInfo regionB = locator.getRegionLocation(splitKeys[0]).getRegionInfo(); + + List<ServerName> regionAFN = fnm.getFavoredNodes(regionA); + LOG.info("regionA: " + regionA.getEncodedName() + " with FN: " + fnm.getFavoredNodes(regionA)); + LOG.info("regionB: " + regionA.getEncodedName() + " with FN: " + fnm.getFavoredNodes(regionB)); + + admin.mergeRegionsAsync(regionA.getEncodedNameAsBytes(), + regionB.getEncodedNameAsBytes(), false).get(60, TimeUnit.SECONDS); + + TEST_UTIL.waitUntilNoRegionsInTransition(WAIT_TIMEOUT); + waitUntilTableRegionCountReached(tableName, NUM_REGIONS - 1); + + // All regions should have favored nodes + checkIfFavoredNodeInformationIsCorrect(tableName); + + HRegionInfo mergedRegion = + locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo(); + List<ServerName> mergedFN = fnm.getFavoredNodes(mergedRegion); + + assertArrayEquals("Merged region doesn't match regionA's FN", + regionAFN.toArray(), mergedFN.toArray()); + + TEST_UTIL.deleteTable(tableName); + } + + /* + * This checks the following: + * + * 1. Do all regions of the table have favored nodes updated in master? + * 2. Is the number of favored nodes correct for a region? Is the start code -1? + * 3. Is the FN information consistent between Master and the respective RegionServer? + */ + private void checkIfFavoredNodeInformationIsCorrect(TableName tableName) throws Exception { + + /* + * Since we need HRegionServer to check for consistency of FN between Master and RS, + * lets construct a map for each serverName lookup. Makes it easy later. + */ + Map<ServerName, HRegionServer> snRSMap = Maps.newHashMap(); + for (JVMClusterUtil.RegionServerThread rst : + TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads()) { + snRSMap.put(rst.getRegionServer().getServerName(), rst.getRegionServer()); + } + // Also include master, since it can also host user regions. + for (JVMClusterUtil.MasterThread rst : + TEST_UTIL.getMiniHBaseCluster().getLiveMasterThreads()) { + snRSMap.put(rst.getMaster().getServerName(), rst.getMaster()); + } + + RegionLocator regionLocator = admin.getConnection().getRegionLocator(tableName); + for (HRegionLocation regionLocation : regionLocator.getAllRegionLocations()) { + + HRegionInfo regionInfo = regionLocation.getRegionInfo(); + List<ServerName> fnList = fnm.getFavoredNodes(regionInfo); + + // 1. Does each region have favored node? + assertNotNull("Favored nodes should not be null for region:" + regionInfo, fnList); + + // 2. Do we have the right number of favored nodes? Is start code -1? + assertEquals("Incorrect favored nodes for region:" + regionInfo + " fnlist: " + fnList, + FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, fnList.size()); + for (ServerName sn : fnList) { + assertEquals("FN should not have startCode, fnlist:" + fnList, -1, sn.getStartcode()); + } + + // 3. Check if the regionServers have all the FN updated and in sync with Master + HRegionServer regionServer = snRSMap.get(regionLocation.getServerName()); + assertNotNull("RS should not be null for regionLocation: " + regionLocation, regionServer); + + InetSocketAddress[] rsFavNodes = + regionServer.getFavoredNodesForRegion(regionInfo.getEncodedName()); + assertNotNull("RS " + regionLocation.getServerName() + + " does not have FN for region: " + regionInfo, rsFavNodes); + + List<ServerName> fnFromRS = Lists.newArrayList(); + for (InetSocketAddress addr : rsFavNodes) { + fnFromRS.add(ServerName.valueOf(addr.getHostName(), addr.getPort(), + ServerName.NON_STARTCODE)); + } + + fnFromRS.removeAll(fnList); + assertEquals("Inconsistent FN bet RS and Master, RS diff: " + fnFromRS + + " List on master: " + fnList, 0, fnFromRS.size()); + } + } + + private void checkIfDaughterInherits2FN(List<ServerName> parentFN, List<ServerName> daughterFN) { + + assertNotNull(parentFN); + assertNotNull(daughterFN); + + List<ServerName> favoredNodes = Lists.newArrayList(daughterFN); + favoredNodes.removeAll(parentFN); + + /* + * With a small cluster its likely some FN might accidentally get shared. Its likely the + * 3rd FN the balancer chooses might still belong to the parent in which case favoredNodes + * size would be 0. + */ + assertTrue("Daughter FN:" + daughterFN + " should have inherited 2 FN from parent FN:" + + parentFN, favoredNodes.size() <= 1); + } + + private void waitUntilTableRegionCountReached(final TableName tableName, final int numRegions) + throws Exception { + TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { + @Override + public boolean evaluate() throws Exception { + return regionStates.getRegionsOfTable(tableName).size() == numRegions; + } + }); + } +} \ No newline at end of file
