Repository: hbase Updated Branches: refs/heads/master 2cd45eb9a -> b7f751476
HBASE-11689 Track meta in transition (Andrey Stepachev and Jimmy Xiang) Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/b7f75147 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/b7f75147 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/b7f75147 Branch: refs/heads/master Commit: b7f7514762433a7a02635e141e60d0e2ba333049 Parents: 2cd45eb Author: Jimmy Xiang <[email protected]> Authored: Thu Aug 28 10:23:31 2014 -0700 Committer: Jimmy Xiang <[email protected]> Committed: Fri Aug 29 13:23:35 2014 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hbase/ServerName.java | 22 +- .../apache/hadoop/hbase/master/RegionState.java | 224 +++++++++-------- .../hbase/zookeeper/MetaTableLocator.java | 150 ++++++----- .../apache/hadoop/hbase/zookeeper/ZKUtil.java | 30 --- .../protobuf/generated/ZooKeeperProtos.java | 246 +++++++++++++++---- .../src/main/protobuf/ZooKeeper.proto | 7 +- .../hadoop/hbase/master/AssignmentManager.java | 34 +-- .../org/apache/hadoop/hbase/master/HMaster.java | 57 ++--- .../hadoop/hbase/master/RegionStateStore.java | 40 ++- .../handler/MetaServerShutdownHandler.java | 6 - .../hbase/regionserver/HRegionServer.java | 14 +- .../hadoop/hbase/TestMetaTableLocator.java | 73 ++++-- .../master/TestAssignmentManagerOnCluster.java | 23 +- .../hadoop/hbase/master/TestMasterFailover.java | 122 ++++++++- .../hbase/master/TestMasterNoCluster.java | 5 +- 15 files changed, 691 insertions(+), 362 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java ---------------------------------------------------------------------- diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java index 48b4a79..dde9202 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java @@ -18,21 +18,21 @@ */ package org.apache.hadoop.hbase; -import com.google.common.net.InetAddresses; -import com.google.protobuf.InvalidProtocolBufferException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; -import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.MetaRegionServer; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Pattern; +import com.google.common.net.InetAddresses; +import com.google.protobuf.InvalidProtocolBufferException; /** * Instance of an HBase ServerName. @@ -54,6 +54,8 @@ import java.util.regex.Pattern; @InterfaceAudience.Public @InterfaceStability.Evolving public class ServerName implements Comparable<ServerName>, Serializable { + private static final long serialVersionUID = 1367463982557264981L; + /** * Version for this class. * Its a short rather than a byte so I can for sure distinguish between this @@ -370,9 +372,9 @@ public class ServerName implements Comparable<ServerName>, Serializable { if (ProtobufUtil.isPBMagicPrefix(data)) { int prefixLen = ProtobufUtil.lengthOfPBMagic(); try { - MetaRegionServer rss = - MetaRegionServer.PARSER.parseFrom(data, prefixLen, data.length - prefixLen); - org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName sn = rss.getServer(); + ZooKeeperProtos.Master rss = + ZooKeeperProtos.Master.PARSER.parseFrom(data, prefixLen, data.length - prefixLen); + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName sn = rss.getMaster(); return valueOf(sn.getHostName(), sn.getPort(), sn.getStartCode()); } catch (InvalidProtocolBufferException e) { // A failed parse of the znode is pretty catastrophic. Rather than loop http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java ---------------------------------------------------------------------- diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java index d660db7..0a9c123 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java @@ -51,9 +51,125 @@ public class RegionState { SPLITTING_NEW, // new region to be created when RS splits a parent // region but hasn't be created yet, or master doesn't // know it's already created - MERGING_NEW // new region to be created when RS merges two + MERGING_NEW; // new region to be created when RS merges two // daughter regions but hasn't be created yet, or // master doesn't know it's already created + + /** + * Convert to protobuf ClusterStatusProtos.RegionState.State + */ + public ClusterStatusProtos.RegionState.State convert() { + ClusterStatusProtos.RegionState.State rs; + switch (this) { + case OFFLINE: + rs = ClusterStatusProtos.RegionState.State.OFFLINE; + break; + case PENDING_OPEN: + rs = ClusterStatusProtos.RegionState.State.PENDING_OPEN; + break; + case OPENING: + rs = ClusterStatusProtos.RegionState.State.OPENING; + break; + case OPEN: + rs = ClusterStatusProtos.RegionState.State.OPEN; + break; + case PENDING_CLOSE: + rs = ClusterStatusProtos.RegionState.State.PENDING_CLOSE; + break; + case CLOSING: + rs = ClusterStatusProtos.RegionState.State.CLOSING; + break; + case CLOSED: + rs = ClusterStatusProtos.RegionState.State.CLOSED; + break; + case SPLITTING: + rs = ClusterStatusProtos.RegionState.State.SPLITTING; + break; + case SPLIT: + rs = ClusterStatusProtos.RegionState.State.SPLIT; + break; + case FAILED_OPEN: + rs = ClusterStatusProtos.RegionState.State.FAILED_OPEN; + break; + case FAILED_CLOSE: + rs = ClusterStatusProtos.RegionState.State.FAILED_CLOSE; + break; + case MERGING: + rs = ClusterStatusProtos.RegionState.State.MERGING; + break; + case MERGED: + rs = ClusterStatusProtos.RegionState.State.MERGED; + break; + case SPLITTING_NEW: + rs = ClusterStatusProtos.RegionState.State.SPLITTING_NEW; + break; + case MERGING_NEW: + rs = ClusterStatusProtos.RegionState.State.MERGING_NEW; + break; + default: + throw new IllegalStateException(""); + } + return rs; + } + + /** + * Convert a protobuf HBaseProtos.RegionState.State to a RegionState.State + * + * @return the RegionState.State + */ + public static State convert(ClusterStatusProtos.RegionState.State protoState) { + State state; + switch (protoState) { + case OFFLINE: + state = OFFLINE; + break; + case PENDING_OPEN: + state = PENDING_OPEN; + break; + case OPENING: + state = OPENING; + break; + case OPEN: + state = OPEN; + break; + case PENDING_CLOSE: + state = PENDING_CLOSE; + break; + case CLOSING: + state = CLOSING; + break; + case CLOSED: + state = CLOSED; + break; + case SPLITTING: + state = SPLITTING; + break; + case SPLIT: + state = SPLIT; + break; + case FAILED_OPEN: + state = FAILED_OPEN; + break; + case FAILED_CLOSE: + state = FAILED_CLOSE; + break; + case MERGING: + state = MERGING; + break; + case MERGED: + state = MERGED; + break; + case SPLITTING_NEW: + state = SPLITTING_NEW; + break; + case MERGING_NEW: + state = MERGING_NEW; + break; + default: + throw new IllegalStateException(""); + } + return state; + } } private final long stamp; @@ -250,58 +366,8 @@ public class RegionState { */ public ClusterStatusProtos.RegionState convert() { ClusterStatusProtos.RegionState.Builder regionState = ClusterStatusProtos.RegionState.newBuilder(); - ClusterStatusProtos.RegionState.State rs; - switch (this.state) { - case OFFLINE: - rs = ClusterStatusProtos.RegionState.State.OFFLINE; - break; - case PENDING_OPEN: - rs = ClusterStatusProtos.RegionState.State.PENDING_OPEN; - break; - case OPENING: - rs = ClusterStatusProtos.RegionState.State.OPENING; - break; - case OPEN: - rs = ClusterStatusProtos.RegionState.State.OPEN; - break; - case PENDING_CLOSE: - rs = ClusterStatusProtos.RegionState.State.PENDING_CLOSE; - break; - case CLOSING: - rs = ClusterStatusProtos.RegionState.State.CLOSING; - break; - case CLOSED: - rs = ClusterStatusProtos.RegionState.State.CLOSED; - break; - case SPLITTING: - rs = ClusterStatusProtos.RegionState.State.SPLITTING; - break; - case SPLIT: - rs = ClusterStatusProtos.RegionState.State.SPLIT; - break; - case FAILED_OPEN: - rs = ClusterStatusProtos.RegionState.State.FAILED_OPEN; - break; - case FAILED_CLOSE: - rs = ClusterStatusProtos.RegionState.State.FAILED_CLOSE; - break; - case MERGING: - rs = ClusterStatusProtos.RegionState.State.MERGING; - break; - case MERGED: - rs = ClusterStatusProtos.RegionState.State.MERGED; - break; - case SPLITTING_NEW: - rs = ClusterStatusProtos.RegionState.State.SPLITTING_NEW; - break; - case MERGING_NEW: - rs = ClusterStatusProtos.RegionState.State.MERGING_NEW; - break; - default: - throw new IllegalStateException(""); - } regionState.setRegionInfo(HRegionInfo.convert(hri)); - regionState.setState(rs); + regionState.setState(state.convert()); regionState.setStamp(getStamp()); return regionState.build(); } @@ -312,58 +378,8 @@ public class RegionState { * @return the RegionState */ public static RegionState convert(ClusterStatusProtos.RegionState proto) { - RegionState.State state; - switch (proto.getState()) { - case OFFLINE: - state = State.OFFLINE; - break; - case PENDING_OPEN: - state = State.PENDING_OPEN; - break; - case OPENING: - state = State.OPENING; - break; - case OPEN: - state = State.OPEN; - break; - case PENDING_CLOSE: - state = State.PENDING_CLOSE; - break; - case CLOSING: - state = State.CLOSING; - break; - case CLOSED: - state = State.CLOSED; - break; - case SPLITTING: - state = State.SPLITTING; - break; - case SPLIT: - state = State.SPLIT; - break; - case FAILED_OPEN: - state = State.FAILED_OPEN; - break; - case FAILED_CLOSE: - state = State.FAILED_CLOSE; - break; - case MERGING: - state = State.MERGING; - break; - case MERGED: - state = State.MERGED; - break; - case SPLITTING_NEW: - state = State.SPLITTING_NEW; - break; - case MERGING_NEW: - state = State.MERGING_NEW; - break; - default: - throw new IllegalStateException(""); - } - - return new RegionState(HRegionInfo.convert(proto.getRegionInfo()),state,proto.getStamp(),null); + return new RegionState(HRegionInfo.convert(proto.getRegionInfo()), + State.convert(proto.getState()), proto.getStamp(), null); } /** http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java ---------------------------------------------------------------------- diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java index f1d17c2..f0c1d87 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java @@ -17,36 +17,42 @@ */ package org.apache.hadoop.hbase.zookeeper; -import com.google.common.base.Stopwatch; +import java.io.EOFException; +import java.io.IOException; +import java.net.ConnectException; +import java.net.NoRouteToHostException; +import java.net.SocketException; +import java.net.SocketTimeoutException; +import java.rmi.UnknownHostException; + +import javax.annotation.Nullable; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.exceptions.DeserializationException; -import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ipc.RpcClient; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; +import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.MetaRegionServer; import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.ipc.RemoteException; import org.apache.zookeeper.KeeperException; -import java.io.EOFException; -import java.io.IOException; -import java.net.ConnectException; -import java.net.NoRouteToHostException; -import java.net.SocketException; -import java.net.SocketTimeoutException; -import java.rmi.UnknownHostException; +import com.google.common.base.Stopwatch; +import com.google.protobuf.InvalidProtocolBufferException; /** * Utility class to perform operation (get/wait for/verify/set/delete) on znode in ZooKeeper @@ -78,15 +84,7 @@ public class MetaTableLocator { * @return true if meta region location is available, false if not */ public boolean isLocationAvailable(ZooKeeperWatcher zkw) { - try { - return ZKUtil.getData(zkw, zkw.metaServerZNode) != null; - } catch(KeeperException e) { - LOG.error("ZK error trying to get hbase:meta from ZooKeeper"); - return false; - } catch (InterruptedException e) { - LOG.error("ZK error trying to get hbase:meta from ZooKeeper"); - return false; - } + return getMetaRegionLocation(zkw) != null; } /** @@ -94,18 +92,13 @@ public class MetaTableLocator { * @param zkw zookeeper connection to use * @return server name or null if we failed to get the data. */ + @Nullable public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw) { try { - try { - return ServerName.parseFrom(ZKUtil.getData(zkw, zkw.metaServerZNode)); - } catch (DeserializationException e) { - throw ZKUtil.convert(e); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - return null; - } + RegionState state = getMetaRegionState(zkw); + return state.isOpened() ? state.getServerName() : null; } catch (KeeperException ke) { - return null; + return null; } } @@ -189,9 +182,8 @@ public class MetaTableLocator { } catch (RegionServerStoppedException e) { // Pass -- server name sends us to a server that is dying or already dead. } - return (service == null)? false: - verifyRegionLocation(service, - getMetaRegionLocation(zkw), META_REGION_NAME); + return (service != null) && verifyRegionLocation(service, + getMetaRegionLocation(zkw), META_REGION_NAME); } /** @@ -315,44 +307,65 @@ public class MetaTableLocator { * Sets the location of <code>hbase:meta</code> in ZooKeeper to the * specified server address. * @param zookeeper zookeeper reference - * @param location The server hosting <code>hbase:meta</code> + * @param serverName The server hosting <code>hbase:meta</code> + * @param state The region transition state * @throws KeeperException unexpected zookeeper exception */ public static void setMetaLocation(ZooKeeperWatcher zookeeper, - final ServerName location) - throws KeeperException { - LOG.info("Setting hbase:meta region location in ZooKeeper as " + location); + ServerName serverName, RegionState.State state) throws KeeperException { + LOG.info("Setting hbase:meta region location in ZooKeeper as " + serverName); // Make the MetaRegionServer pb and then get its bytes and save this as // the znode content. - byte [] data = toByteArray(location); + MetaRegionServer pbrsr = MetaRegionServer.newBuilder() + .setServer(ProtobufUtil.toServerName(serverName)) + .setRpcVersion(HConstants.RPC_CURRENT_VERSION) + .setState(state.convert()).build(); + byte[] data = ProtobufUtil.prependPBMagic(pbrsr.toByteArray()); try { - ZKUtil.createAndWatch(zookeeper, zookeeper.metaServerZNode, data); - } catch(KeeperException.NodeExistsException nee) { - LOG.debug("META region location already existed, updated location"); ZKUtil.setData(zookeeper, zookeeper.metaServerZNode, data); + } catch(KeeperException.NoNodeException nne) { + LOG.debug("META region location doesn't existed, create it"); + ZKUtil.createAndWatch(zookeeper, zookeeper.metaServerZNode, data); } } /** - * Build up the znode content. - * @param sn What to put into the znode. - * @return The content of the meta-region-server znode + * Load the meta region state from the meta server ZNode. */ - private static byte [] toByteArray(final ServerName sn) { - // ZNode content is a pb message preceded by some pb magic. - HBaseProtos.ServerName pbsn = - HBaseProtos.ServerName.newBuilder() - .setHostName(sn.getHostname()) - .setPort(sn.getPort()) - .setStartCode(sn.getStartcode()) - .build(); - - ZooKeeperProtos.MetaRegionServer pbrsr = - ZooKeeperProtos.MetaRegionServer.newBuilder() - .setServer(pbsn) - .setRpcVersion(HConstants.RPC_CURRENT_VERSION) - .build(); - return ProtobufUtil.prependPBMagic(pbrsr.toByteArray()); + public static RegionState getMetaRegionState(ZooKeeperWatcher zkw) throws KeeperException { + RegionState.State state = RegionState.State.OPEN; + ServerName serverName = null; + try { + byte[] data = ZKUtil.getData(zkw, zkw.metaServerZNode); + if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) { + try { + int prefixLen = ProtobufUtil.lengthOfPBMagic(); + ZooKeeperProtos.MetaRegionServer rl = + ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom + (data, prefixLen, data.length - prefixLen); + if (rl.hasState()) { + state = RegionState.State.convert(rl.getState()); + } + HBaseProtos.ServerName sn = rl.getServer(); + serverName = ServerName.valueOf( + sn.getHostName(), sn.getPort(), sn.getStartCode()); + } catch (InvalidProtocolBufferException e) { + throw new DeserializationException("Unable to parse meta region location"); + } + } else { + // old style of meta region location? + serverName = ServerName.parseFrom(data); + } + } catch (DeserializationException e) { + throw ZKUtil.convert(e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + if (serverName == null) { + state = RegionState.State.OFFLINE; + } + return new RegionState(HRegionInfo.FIRST_META_REGIONINFO, + state, serverName); } /** @@ -362,7 +375,7 @@ public class MetaTableLocator { */ public void deleteMetaLocation(ZooKeeperWatcher zookeeper) throws KeeperException { - LOG.info("Unsetting hbase:meta region location in ZooKeeper"); + LOG.info("Deleting hbase:meta region location in ZooKeeper"); try { // Just delete the node. Don't need any watches. ZKUtil.deleteNode(zookeeper, zookeeper.metaServerZNode); @@ -372,7 +385,7 @@ public class MetaTableLocator { } /** - * Wait until the meta region is available. + * Wait until the meta region is available and is not in transition. * @param zkw zookeeper connection to use * @param timeout maximum time to wait, in millis * @return ServerName or null if we timed out. @@ -381,14 +394,23 @@ public class MetaTableLocator { public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw, final long timeout) throws InterruptedException { - byte [] data = ZKUtil.blockUntilAvailable(zkw, zkw.metaServerZNode, timeout); - if (data == null) return null; + if (timeout < 0) throw new IllegalArgumentException(); + if (zkw == null) throw new IllegalArgumentException(); + Stopwatch sw = new Stopwatch().start(); + ServerName sn = null; try { - return ServerName.parseFrom(data); - } catch (DeserializationException e) { - LOG.warn("Failed parse", e); - return null; + while (true) { + sn = getMetaRegionLocation(zkw); + if (sn != null || sw.elapsedMillis() + > timeout - HConstants.SOCKET_RETRY_WAIT_MS) { + break; + } + Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS); + } + } finally { + sw.stop(); } + return sn; } /** http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java ---------------------------------------------------------------------- diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java index b13667f..79fa4ba 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java @@ -1831,36 +1831,6 @@ public class ZKUtil { } } - - public static byte[] blockUntilAvailable( - final ZooKeeperWatcher zkw, final String znode, final long timeout) - throws InterruptedException { - if (timeout < 0) throw new IllegalArgumentException(); - if (zkw == null) throw new IllegalArgumentException(); - if (znode == null) throw new IllegalArgumentException(); - - byte[] data = null; - boolean finished = false; - final long endTime = System.currentTimeMillis() + timeout; - while (!finished) { - try { - data = ZKUtil.getData(zkw, znode); - } catch(KeeperException e) { - LOG.warn("Unexpected exception handling blockUntilAvailable", e); - } - - if (data == null && (System.currentTimeMillis() + - HConstants.SOCKET_RETRY_WAIT_MS < endTime)) { - Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS); - } else { - finished = true; - } - } - - return data; - } - - /** * Convert a {@link DeserializationException} to a more palatable {@link KeeperException}. * Used when can't let a {@link DeserializationException} out w/o changing public API. http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java ---------------------------------------------------------------------- diff --git a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java index 10274b4..6da497e 100644 --- a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java +++ b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java @@ -16,7 +16,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ boolean hasServer(); @@ -24,7 +25,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName getServer(); @@ -32,7 +34,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerNameOrBuilder getServerOrBuilder(); @@ -58,6 +61,24 @@ public final class ZooKeeperProtos { * </pre> */ int getRpcVersion(); + + // optional .RegionState.State state = 3; + /** + * <code>optional .RegionState.State state = 3;</code> + * + * <pre> + * State of the region transition. OPEN means fully operational 'hbase:meta' + * </pre> + */ + boolean hasState(); + /** + * <code>optional .RegionState.State state = 3;</code> + * + * <pre> + * State of the region transition. OPEN means fully operational 'hbase:meta' + * </pre> + */ + org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State getState(); } /** * Protobuf type {@code MetaRegionServer} @@ -133,6 +154,17 @@ public final class ZooKeeperProtos { rpcVersion_ = input.readUInt32(); break; } + case 24: { + int rawValue = input.readEnum(); + org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State value = org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State.valueOf(rawValue); + if (value == null) { + unknownFields.mergeVarintField(3, rawValue); + } else { + bitField0_ |= 0x00000004; + state_ = value; + } + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -180,7 +212,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ public boolean hasServer() { @@ -190,7 +223,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName getServer() { @@ -200,7 +234,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerNameOrBuilder getServerOrBuilder() { @@ -235,9 +270,34 @@ public final class ZooKeeperProtos { return rpcVersion_; } + // optional .RegionState.State state = 3; + public static final int STATE_FIELD_NUMBER = 3; + private org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State state_; + /** + * <code>optional .RegionState.State state = 3;</code> + * + * <pre> + * State of the region transition. OPEN means fully operational 'hbase:meta' + * </pre> + */ + public boolean hasState() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + /** + * <code>optional .RegionState.State state = 3;</code> + * + * <pre> + * State of the region transition. OPEN means fully operational 'hbase:meta' + * </pre> + */ + public org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State getState() { + return state_; + } + private void initFields() { server_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.getDefaultInstance(); rpcVersion_ = 0; + state_ = org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State.OFFLINE; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -265,6 +325,9 @@ public final class ZooKeeperProtos { if (((bitField0_ & 0x00000002) == 0x00000002)) { output.writeUInt32(2, rpcVersion_); } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + output.writeEnum(3, state_.getNumber()); + } getUnknownFields().writeTo(output); } @@ -282,6 +345,10 @@ public final class ZooKeeperProtos { size += com.google.protobuf.CodedOutputStream .computeUInt32Size(2, rpcVersion_); } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + size += com.google.protobuf.CodedOutputStream + .computeEnumSize(3, state_.getNumber()); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -315,6 +382,11 @@ public final class ZooKeeperProtos { result = result && (getRpcVersion() == other.getRpcVersion()); } + result = result && (hasState() == other.hasState()); + if (hasState()) { + result = result && + (getState() == other.getState()); + } result = result && getUnknownFields().equals(other.getUnknownFields()); return result; @@ -336,6 +408,10 @@ public final class ZooKeeperProtos { hash = (37 * hash) + RPC_VERSION_FIELD_NUMBER; hash = (53 * hash) + getRpcVersion(); } + if (hasState()) { + hash = (37 * hash) + STATE_FIELD_NUMBER; + hash = (53 * hash) + hashEnum(getState()); + } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; @@ -459,6 +535,8 @@ public final class ZooKeeperProtos { bitField0_ = (bitField0_ & ~0x00000001); rpcVersion_ = 0; bitField0_ = (bitField0_ & ~0x00000002); + state_ = org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State.OFFLINE; + bitField0_ = (bitField0_ & ~0x00000004); return this; } @@ -499,6 +577,10 @@ public final class ZooKeeperProtos { to_bitField0_ |= 0x00000002; } result.rpcVersion_ = rpcVersion_; + if (((from_bitField0_ & 0x00000004) == 0x00000004)) { + to_bitField0_ |= 0x00000004; + } + result.state_ = state_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -521,6 +603,9 @@ public final class ZooKeeperProtos { if (other.hasRpcVersion()) { setRpcVersion(other.getRpcVersion()); } + if (other.hasState()) { + setState(other.getState()); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -564,7 +649,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ public boolean hasServer() { @@ -574,7 +660,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName getServer() { @@ -588,7 +675,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ public Builder setServer(org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName value) { @@ -608,7 +696,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ public Builder setServer( @@ -626,7 +715,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ public Builder mergeServer(org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName value) { @@ -649,7 +739,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ public Builder clearServer() { @@ -666,7 +757,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.Builder getServerBuilder() { @@ -678,7 +770,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerNameOrBuilder getServerOrBuilder() { @@ -692,7 +785,8 @@ public final class ZooKeeperProtos { * <code>required .ServerName server = 1;</code> * * <pre> - * The ServerName hosting the meta region currently. + * The ServerName hosting the meta region currently, or destination server, + * if meta region is in transition. * </pre> */ private com.google.protobuf.SingleFieldBuilder< @@ -766,6 +860,58 @@ public final class ZooKeeperProtos { return this; } + // optional .RegionState.State state = 3; + private org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State state_ = org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State.OFFLINE; + /** + * <code>optional .RegionState.State state = 3;</code> + * + * <pre> + * State of the region transition. OPEN means fully operational 'hbase:meta' + * </pre> + */ + public boolean hasState() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + /** + * <code>optional .RegionState.State state = 3;</code> + * + * <pre> + * State of the region transition. OPEN means fully operational 'hbase:meta' + * </pre> + */ + public org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State getState() { + return state_; + } + /** + * <code>optional .RegionState.State state = 3;</code> + * + * <pre> + * State of the region transition. OPEN means fully operational 'hbase:meta' + * </pre> + */ + public Builder setState(org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000004; + state_ = value; + onChanged(); + return this; + } + /** + * <code>optional .RegionState.State state = 3;</code> + * + * <pre> + * State of the region transition. OPEN means fully operational 'hbase:meta' + * </pre> + */ + public Builder clearState() { + bitField0_ = (bitField0_ & ~0x00000004); + state_ = org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State.OFFLINE; + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:MetaRegionServer) } @@ -9414,39 +9560,40 @@ public final class ZooKeeperProtos { descriptor; static { java.lang.String[] descriptorData = { - "\n\017ZooKeeper.proto\032\013HBase.proto\"D\n\020MetaRe" + - "gionServer\022\033\n\006server\030\001 \002(\0132\013.ServerName\022" + - "\023\n\013rpc_version\030\002 \001(\r\":\n\006Master\022\033\n\006master" + - "\030\001 \002(\0132\013.ServerName\022\023\n\013rpc_version\030\002 \001(\r" + - "\"\037\n\tClusterUp\022\022\n\nstart_date\030\001 \002(\t\"\214\002\n\014Sp" + - "litLogTask\022\"\n\005state\030\001 \002(\0162\023.SplitLogTask" + - ".State\022 \n\013server_name\030\002 \002(\0132\013.ServerName" + - "\0221\n\004mode\030\003 \001(\0162\032.SplitLogTask.RecoveryMo" + - "de:\007UNKNOWN\"C\n\005State\022\016\n\nUNASSIGNED\020\000\022\t\n\005" + - "OWNED\020\001\022\014\n\010RESIGNED\020\002\022\010\n\004DONE\020\003\022\007\n\003ERR\020\004", - "\">\n\014RecoveryMode\022\013\n\007UNKNOWN\020\000\022\021\n\rLOG_SPL" + - "ITTING\020\001\022\016\n\nLOG_REPLAY\020\002\"n\n\005Table\022$\n\005sta" + - "te\030\001 \002(\0162\014.Table.State:\007ENABLED\"?\n\005State" + - "\022\013\n\007ENABLED\020\000\022\014\n\010DISABLED\020\001\022\r\n\tDISABLING" + - "\020\002\022\014\n\010ENABLING\020\003\"\215\001\n\017ReplicationPeer\022\022\n\n" + - "clusterkey\030\001 \002(\t\022\037\n\027replicationEndpointI" + - "mpl\030\002 \001(\t\022\035\n\004data\030\003 \003(\0132\017.BytesBytesPair" + - "\022&\n\rconfiguration\030\004 \003(\0132\017.NameStringPair" + - "\"^\n\020ReplicationState\022&\n\005state\030\001 \002(\0162\027.Re" + - "plicationState.State\"\"\n\005State\022\013\n\007ENABLED", - "\020\000\022\014\n\010DISABLED\020\001\"+\n\027ReplicationHLogPosit" + - "ion\022\020\n\010position\030\001 \002(\003\"%\n\017ReplicationLock" + - "\022\022\n\nlock_owner\030\001 \002(\t\"\230\001\n\tTableLock\022\036\n\nta" + - "ble_name\030\001 \001(\0132\n.TableName\022\037\n\nlock_owner" + - "\030\002 \001(\0132\013.ServerName\022\021\n\tthread_id\030\003 \001(\003\022\021" + - "\n\tis_shared\030\004 \001(\010\022\017\n\007purpose\030\005 \001(\t\022\023\n\013cr" + - "eate_time\030\006 \001(\003\";\n\017StoreSequenceId\022\023\n\013fa" + - "mily_name\030\001 \002(\014\022\023\n\013sequence_id\030\002 \002(\004\"g\n\026" + - "RegionStoreSequenceIds\022 \n\030last_flushed_s" + - "equence_id\030\001 \002(\004\022+\n\021store_sequence_id\030\002 ", - "\003(\0132\020.StoreSequenceIdBE\n*org.apache.hado" + - "op.hbase.protobuf.generatedB\017ZooKeeperPr" + - "otosH\001\210\001\001\240\001\001" + "\n\017ZooKeeper.proto\032\013HBase.proto\032\023ClusterS" + + "tatus.proto\"g\n\020MetaRegionServer\022\033\n\006serve" + + "r\030\001 \002(\0132\013.ServerName\022\023\n\013rpc_version\030\002 \001(" + + "\r\022!\n\005state\030\003 \001(\0162\022.RegionState.State\":\n\006" + + "Master\022\033\n\006master\030\001 \002(\0132\013.ServerName\022\023\n\013r" + + "pc_version\030\002 \001(\r\"\037\n\tClusterUp\022\022\n\nstart_d" + + "ate\030\001 \002(\t\"\214\002\n\014SplitLogTask\022\"\n\005state\030\001 \002(" + + "\0162\023.SplitLogTask.State\022 \n\013server_name\030\002 " + + "\002(\0132\013.ServerName\0221\n\004mode\030\003 \001(\0162\032.SplitLo" + + "gTask.RecoveryMode:\007UNKNOWN\"C\n\005State\022\016\n\n", + "UNASSIGNED\020\000\022\t\n\005OWNED\020\001\022\014\n\010RESIGNED\020\002\022\010\n" + + "\004DONE\020\003\022\007\n\003ERR\020\004\">\n\014RecoveryMode\022\013\n\007UNKN" + + "OWN\020\000\022\021\n\rLOG_SPLITTING\020\001\022\016\n\nLOG_REPLAY\020\002" + + "\"n\n\005Table\022$\n\005state\030\001 \002(\0162\014.Table.State:\007" + + "ENABLED\"?\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISABLE" + + "D\020\001\022\r\n\tDISABLING\020\002\022\014\n\010ENABLING\020\003\"\215\001\n\017Rep" + + "licationPeer\022\022\n\nclusterkey\030\001 \002(\t\022\037\n\027repl" + + "icationEndpointImpl\030\002 \001(\t\022\035\n\004data\030\003 \003(\0132" + + "\017.BytesBytesPair\022&\n\rconfiguration\030\004 \003(\0132" + + "\017.NameStringPair\"^\n\020ReplicationState\022&\n\005", + "state\030\001 \002(\0162\027.ReplicationState.State\"\"\n\005" + + "State\022\013\n\007ENABLED\020\000\022\014\n\010DISABLED\020\001\"+\n\027Repl" + + "icationHLogPosition\022\020\n\010position\030\001 \002(\003\"%\n" + + "\017ReplicationLock\022\022\n\nlock_owner\030\001 \002(\t\"\230\001\n" + + "\tTableLock\022\036\n\ntable_name\030\001 \001(\0132\n.TableNa" + + "me\022\037\n\nlock_owner\030\002 \001(\0132\013.ServerName\022\021\n\tt" + + "hread_id\030\003 \001(\003\022\021\n\tis_shared\030\004 \001(\010\022\017\n\007pur" + + "pose\030\005 \001(\t\022\023\n\013create_time\030\006 \001(\003\";\n\017Store" + + "SequenceId\022\023\n\013family_name\030\001 \002(\014\022\023\n\013seque" + + "nce_id\030\002 \002(\004\"g\n\026RegionStoreSequenceIds\022 ", + "\n\030last_flushed_sequence_id\030\001 \002(\004\022+\n\021stor" + + "e_sequence_id\030\002 \003(\0132\020.StoreSequenceIdBE\n" + + "*org.apache.hadoop.hbase.protobuf.genera" + + "tedB\017ZooKeeperProtosH\001\210\001\001\240\001\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -9458,7 +9605,7 @@ public final class ZooKeeperProtos { internal_static_MetaRegionServer_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_MetaRegionServer_descriptor, - new java.lang.String[] { "Server", "RpcVersion", }); + new java.lang.String[] { "Server", "RpcVersion", "State", }); internal_static_Master_descriptor = getDescriptor().getMessageTypes().get(1); internal_static_Master_fieldAccessorTable = new @@ -9532,6 +9679,7 @@ public final class ZooKeeperProtos { .internalBuildGeneratedFileFrom(descriptorData, new com.google.protobuf.Descriptors.FileDescriptor[] { org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.getDescriptor(), + org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.getDescriptor(), }, assigner); } http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-protocol/src/main/protobuf/ZooKeeper.proto ---------------------------------------------------------------------- diff --git a/hbase-protocol/src/main/protobuf/ZooKeeper.proto b/hbase-protocol/src/main/protobuf/ZooKeeper.proto index 4d727c6..8acd778 100644 --- a/hbase-protocol/src/main/protobuf/ZooKeeper.proto +++ b/hbase-protocol/src/main/protobuf/ZooKeeper.proto @@ -26,17 +26,22 @@ option java_generate_equals_and_hash = true; option optimize_for = SPEED; import "HBase.proto"; +import "ClusterStatus.proto"; /** * Content of the meta-region-server znode. */ message MetaRegionServer { - // The ServerName hosting the meta region currently. + // The ServerName hosting the meta region currently, or destination server, + // if meta region is in transition. required ServerName server = 1; // The major version of the rpc the server speaks. This is used so that // clients connecting to the cluster can have prior knowledge of what version // to send to a RegionServer. AsyncHBase will use this to detect versions. optional uint32 rpc_version = 2; + + // State of the region transition. OPEN means fully operational 'hbase:meta' + optional RegionState.State state = 3; } /** http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 53f159a..feedfef 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -524,7 +524,14 @@ public class AssignmentManager { regionStateStore.start(); if (failover) { - processDeadServers(deadServers); + if (deadServers != null && !deadServers.isEmpty()) { + for (ServerName serverName: deadServers) { + if (!serverManager.isServerDead(serverName)) { + serverManager.expireServer(serverName); // Let SSH do region re-assign + } + } + } + processRegionsInTransition(regionStates.getRegionsInTransition().values()); } // Now we can safely claim failover cleanup completed and enable @@ -1399,13 +1406,9 @@ public class AssignmentManager { * <p> * Assumes that hbase:meta is currently closed and is not being actively served by * any RegionServer. - * <p> - * Forcibly unsets the current meta region location in ZooKeeper and assigns - * hbase:meta to a random RegionServer. - * @throws KeeperException */ public void assignMeta() throws KeeperException { - this.server.getMetaTableLocator().deleteMetaLocation(this.server.getZooKeeper()); + regionStates.updateRegionState(HRegionInfo.FIRST_META_REGIONINFO, State.OFFLINE); assign(HRegionInfo.FIRST_META_REGIONINFO); } @@ -1709,28 +1712,15 @@ public class AssignmentManager { } /** - * Processes list of dead servers from result of hbase:meta scan and regions in RIT - * - * @param deadServers - * The list of dead servers which failed while there was no active - * master. Can be null. + * Processes list of regions in transition at startup */ - private void processDeadServers(Set<ServerName> deadServers) { - if (deadServers != null && !deadServers.isEmpty()) { - for (ServerName serverName: deadServers) { - if (!serverManager.isServerDead(serverName)) { - serverManager.expireServer(serverName); // Let SSH do region re-assign - } - } - } - + void processRegionsInTransition(Collection<RegionState> regionStates) { // We need to send RPC call again for PENDING_OPEN/PENDING_CLOSE regions // in case the RPC call is not sent out yet before the master was shut down // since we update the state before we send the RPC call. We can't update // the state after the RPC call. Otherwise, we don't know what's happened // to the region if the master dies right after the RPC call is out. - Map<String, RegionState> rits = regionStates.getRegionsInTransition(); - for (RegionState regionState: rits.values()) { + for (RegionState regionState: regionStates) { if (!serverManager.isServerOnline(regionState.getServerName())) { continue; // SSH will handle it } http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index add3b1f..5024313 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -26,6 +26,7 @@ import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; @@ -46,6 +47,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.CoordinatedStateException; +import org.apache.hadoop.hbase.CoordinatedStateManager; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HColumnDescriptor; @@ -53,6 +55,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MasterNotRunningException; +import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.NamespaceNotFoundException; import org.apache.hadoop.hbase.PleaseHoldException; @@ -64,12 +67,10 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotDisabledException; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.UnknownRegionException; -import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.client.MetaScanner; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase; import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.CoordinatedStateManager; import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.executor.ExecutorType; @@ -77,7 +78,6 @@ import org.apache.hadoop.hbase.ipc.RequestContext; import org.apache.hadoop.hbase.ipc.RpcServer; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode; -import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.balancer.BalancerChore; import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore; import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory; @@ -100,8 +100,8 @@ import org.apache.hadoop.hbase.monitoring.TaskMonitor; import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost; import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo; -import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.regionserver.RSRpcServices; import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy; @@ -647,37 +647,29 @@ public class HMaster extends HRegionServer implements MasterServices, Server { long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000); status.setStatus("Assigning hbase:meta region"); + // Get current meta state from zk. + RegionState metaState = MetaTableLocator.getMetaRegionState(getZooKeeper()); + RegionStates regionStates = assignmentManager.getRegionStates(); - regionStates.createRegionState(HRegionInfo.FIRST_META_REGIONINFO); - boolean metaRegionLocation = metaTableLocator.verifyMetaRegionLocation( - this.getShortCircuitConnection(), this.getZooKeeper(), timeout); - ServerName currentMetaServer = metaTableLocator.getMetaRegionLocation(this.getZooKeeper()); - if (!metaRegionLocation) { - // Meta location is not verified. It should be in transition, or offline. - // We will wait for it to be assigned in enableSSHandWaitForMeta below. - if (currentMetaServer != null) { - // If the meta server is not known to be dead or online, - // just split the meta log, and don't expire it since this - // could be a full cluster restart. Otherwise, we will think - // this is a failover and lose previous region locations. - // If it is really a failover case, AM will find out in rebuilding - // user regions. Otherwise, we are good since all logs are split - // or known to be replayed before user regions are assigned. - if (serverManager.isServerOnline(currentMetaServer)) { - LOG.info("Forcing expire of " + currentMetaServer); - serverManager.expireServer(currentMetaServer); + regionStates.createRegionState(HRegionInfo.FIRST_META_REGIONINFO, + metaState.getState(), metaState.getServerName(), null); + + if (!metaState.isOpened() || !metaTableLocator.verifyMetaRegionLocation( + this.getShortCircuitConnection(), this.getZooKeeper(), timeout)) { + ServerName currentMetaServer = metaState.getServerName(); + if (serverManager.isServerOnline(currentMetaServer)) { + LOG.info("Meta was in transition on " + currentMetaServer); + assignmentManager.processRegionsInTransition(Arrays.asList(metaState)); + } else { + if (currentMetaServer != null) { + splitMetaLogBeforeAssignment(currentMetaServer); + regionStates.logSplit(HRegionInfo.FIRST_META_REGIONINFO); + previouslyFailedMetaRSs.add(currentMetaServer); } - splitMetaLogBeforeAssignment(currentMetaServer); - previouslyFailedMetaRSs.add(currentMetaServer); + LOG.info("Re-assigning hbase:meta, it was on " + currentMetaServer); + assignmentManager.assignMeta(); } - assignmentManager.assignMeta(); assigned++; - } else { - // Region already assigned. We didn't assign it. Add to in-memory state. - regionStates.updateRegionState( - HRegionInfo.FIRST_META_REGIONINFO, State.OPEN, currentMetaServer); - this.assignmentManager.regionOnline( - HRegionInfo.FIRST_META_REGIONINFO, currentMetaServer); } enableMeta(TableName.META_TABLE_NAME); @@ -737,9 +729,6 @@ public class HMaster extends HRegionServer implements MasterServices, Server { if (waitForMeta) { metaTableLocator.waitMetaRegionLocation(this.getZooKeeper()); - // Above check waits for general meta availability but this does not - // guarantee that the transition has completed - this.assignmentManager.waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStateStore.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStateStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStateStore.java index fb16dd3..e5370c5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStateStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStateStore.java @@ -40,6 +40,9 @@ import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.RegionServerServices; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.MultiHConnection; +import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; +import org.apache.zookeeper.KeeperException; + import com.google.common.base.Preconditions; /** @@ -158,20 +161,31 @@ public class RegionStateStore { void updateRegionState(long openSeqNum, RegionState newState, RegionState oldState) { - if (!initialized) { - return; - } + try { + HRegionInfo hri = newState.getRegion(); - HRegionInfo hri = newState.getRegion(); - if (!shouldPersistStateChange(hri, newState, oldState)) { - return; - } + // update meta before checking for initialization. + // meta state stored in zk. + if (hri.isMetaRegion()) { + // persist meta state in MetaTableLocator (which in turn is zk storage currently) + try { + MetaTableLocator.setMetaLocation(server.getZooKeeper(), + newState.getServerName(), newState.getState()); + return; // Done + } catch (KeeperException e) { + throw new IOException("Failed to update meta ZNode", e); + } + } - ServerName oldServer = oldState != null ? oldState.getServerName() : null; - ServerName serverName = newState.getServerName(); - State state = newState.getState(); + if (!initialized + || !shouldPersistStateChange(hri, newState, oldState)) { + return; + } + + ServerName oldServer = oldState != null ? oldState.getServerName() : null; + ServerName serverName = newState.getServerName(); + State state = newState.getState(); - try { int replicaId = hri.getReplicaId(); Put put = new Put(MetaTableAccessor.getMetaKeyForRegion(hri)); StringBuilder info = new StringBuilder("Updating row "); @@ -217,13 +231,13 @@ public class RegionStateStore { } // Called when meta is not on master multiHConnection.processBatchCallback(Arrays.asList(put), TableName.META_TABLE_NAME, null, null); - + } catch (IOException ioe) { LOG.error("Failed to persist region state " + newState, ioe); server.abort("Failed to update region location", ioe); } } - + void splitRegion(HRegionInfo p, HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException { MetaTableAccessor.splitRegion(server.getShortCircuitConnection(), p, a, b, sn); http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java index a24e387..06cad0e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java @@ -89,12 +89,6 @@ public class MetaServerShutdownHandler extends ServerShutdownHandler { // timeout if (am.isCarryingMeta(serverName)) { LOG.info("Server " + serverName + " was carrying META. Trying to assign."); - am.regionOffline(HRegionInfo.FIRST_META_REGIONINFO); - verifyAndAssignMetaWithRetries(); - } else if (!server.getMetaTableLocator().isLocationAvailable(this.server.getZooKeeper())) { - // the meta location as per master is null. This could happen in case when meta assignment - // in previous run failed, while meta znode has been updated to null. We should try to - // assign the meta again. verifyAndAssignMetaWithRetries(); } else { LOG.info("META has been assigned to otherwhere, skip assigning."); http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 8e62620..deb5ed1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -88,6 +88,7 @@ import org.apache.hadoop.hbase.ipc.RpcClient; import org.apache.hadoop.hbase.ipc.RpcServerInterface; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.SplitLogManager; import org.apache.hadoop.hbase.master.TableLockManager; import org.apache.hadoop.hbase.procedure.RegionServerProcedureManagerHost; @@ -1719,10 +1720,6 @@ public class HRegionServer extends HasThread implements // Update flushed sequence id of a recovering region in ZK updateRecoveringRegionLastFlushedSequenceId(r); - if (r.getRegionInfo().isMetaRegion()) { - MetaTableLocator.setMetaLocation(getZooKeeper(), serverName); - } - // Notify master if (!reportRegionStateTransition( TransitionCode.OPENED, openSeqNum, r.getRegionInfo())) { @@ -1746,7 +1743,14 @@ public class HRegionServer extends HasThread implements // to handle the region transition report at all. if (code == TransitionCode.OPENED) { Preconditions.checkArgument(hris != null && hris.length == 1); - if (!hris[0].isMetaRegion()) { + if (hris[0].isMetaRegion()) { + try { + MetaTableLocator.setMetaLocation(getZooKeeper(), serverName, State.OPEN); + } catch (KeeperException e) { + LOG.info("Failed to update meta location", e); + return false; + } + } else { try { MetaTableAccessor.updateRegionLocation(shortCircuitConnection, hris[0], serverName, openSeqNum); http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableLocator.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableLocator.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableLocator.java index dccd7ca..8a439a8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableLocator.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableLocator.java @@ -18,13 +18,14 @@ */ package org.apache.hadoop.hbase; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; import java.io.IOException; import java.net.ConnectException; -import junit.framework.Assert; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -32,6 +33,7 @@ import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.HConnectionTestingUtility; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; +import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoRequest; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; @@ -106,6 +108,39 @@ public class TestMetaTableLocator { } /** + * Test normal operations + */ + @Test public void testMetaLookup() + throws IOException, InterruptedException, ServiceException, KeeperException { + final ClientProtos.ClientService.BlockingInterface client = + Mockito.mock(ClientProtos.ClientService.BlockingInterface.class); + + Mockito.when(client.get((RpcController)Mockito.any(), (GetRequest)Mockito.any())). + thenReturn(GetResponse.newBuilder().build()); + + final MetaTableLocator mtl = new MetaTableLocator(); + assertNull(mtl.getMetaRegionLocation(this.watcher)); + for (RegionState.State state : RegionState.State.values()) { + if (state.equals(RegionState.State.OPEN)) + continue; + MetaTableLocator.setMetaLocation(this.watcher, SN, state); + assertNull(mtl.getMetaRegionLocation(this.watcher)); + assertEquals(state, MetaTableLocator.getMetaRegionState(this.watcher).getState()); + } + MetaTableLocator.setMetaLocation(this.watcher, SN, RegionState.State.OPEN); + assertEquals(mtl.getMetaRegionLocation(this.watcher), SN); + assertEquals(RegionState.State.OPEN, + MetaTableLocator.getMetaRegionState(this.watcher).getState()); + + mtl.deleteMetaLocation(this.watcher); + assertNull(MetaTableLocator.getMetaRegionState(this.watcher).getServerName()); + assertEquals(MetaTableLocator.getMetaRegionState(this.watcher).getState(), + RegionState.State.OFFLINE); + assertNull(mtl.getMetaRegionLocation(this.watcher)); + } + + + /** * Test interruptable while blocking wait on meta. * @throws IOException * @throws ServiceException @@ -121,7 +156,7 @@ public class TestMetaTableLocator { final MetaTableLocator mtl = new MetaTableLocator(); ServerName meta = new MetaTableLocator().getMetaRegionLocation(this.watcher); - Assert.assertNull(meta); + assertNull(meta); Thread t = new Thread() { @Override public void run() { @@ -153,11 +188,15 @@ public class TestMetaTableLocator { Mockito.when(implementation.get((RpcController) Mockito.any(), (GetRequest) Mockito.any())). thenThrow(new ServiceException(ex)); - MetaTableLocator.setMetaLocation(this.watcher, SN); long timeout = UTIL.getConfiguration(). - getLong("hbase.catalog.verification.timeout", 1000); - Assert.assertFalse(new MetaTableLocator().verifyMetaRegionLocation( + getLong("hbase.catalog.verification.timeout", 1000); + MetaTableLocator.setMetaLocation(this.watcher, SN, RegionState.State.OPENING); + assertFalse(new MetaTableLocator().verifyMetaRegionLocation( connection, watcher, timeout)); + + MetaTableLocator.setMetaLocation(this.watcher, SN, RegionState.State.OPEN); + assertFalse(new MetaTableLocator().verifyMetaRegionLocation( + connection, watcher, timeout)); } /** @@ -213,9 +252,13 @@ public class TestMetaTableLocator { Mockito.when(connection.getAdmin(Mockito.any(ServerName.class), Mockito.anyBoolean())). thenReturn(implementation); + ServerName sn = ServerName.valueOf("example.com", 1234, System.currentTimeMillis()); MetaTableLocator.setMetaLocation(this.watcher, - ServerName.valueOf("example.com", 1234, System.currentTimeMillis())); - Assert.assertFalse(new MetaTableLocator().verifyMetaRegionLocation(connection, watcher, 100)); + sn, + RegionState.State.OPENING); + assertFalse(new MetaTableLocator().verifyMetaRegionLocation(connection, watcher, 100)); + MetaTableLocator.setMetaLocation(this.watcher, sn, RegionState.State.OPEN); + assertFalse(new MetaTableLocator().verifyMetaRegionLocation(connection, watcher, 100)); } @Test (expected = NotAllMetaRegionsOnlineException.class) @@ -234,22 +277,18 @@ public class TestMetaTableLocator { throws IOException, InterruptedException, KeeperException { final MetaTableLocator mtl = new MetaTableLocator(); ServerName hsa = mtl.getMetaRegionLocation(watcher); - Assert.assertNull(hsa); + assertNull(hsa); // Now test waiting on meta location getting set. Thread t = new WaitOnMetaThread(); startWaitAliveThenWaitItLives(t, 1); // Set a meta location. - hsa = setMetaLocation(); + MetaTableLocator.setMetaLocation(this.watcher, SN, RegionState.State.OPEN); + hsa = SN; // Join the thread... should exit shortly. t.join(); // Now meta is available. - Assert.assertTrue(mtl.getMetaRegionLocation(watcher).equals(hsa)); - } - - private ServerName setMetaLocation() throws KeeperException { - MetaTableLocator.setMetaLocation(this.watcher, SN); - return SN; + assertTrue(mtl.getMetaRegionLocation(watcher).equals(hsa)); } /** @@ -302,7 +341,7 @@ public class TestMetaTableLocator { } // Wait one second. Threads.sleep(ms); - Assert.assertTrue("Assert " + t.getName() + " still waiting", t.isAlive()); + assertTrue("Assert " + t.getName() + " still waiting", t.isAlive()); } /** http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java index 9ebf5ec..519f806 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java @@ -67,7 +67,7 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.JVMClusterUtil; -import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; import org.apache.zookeeper.KeeperException; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -120,27 +120,42 @@ public class TestAssignmentManagerOnCluster { metaServerName = cluster.getLiveRegionServerThreads() .get(0).getRegionServer().getServerName(); master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), - Bytes.toBytes(metaServerName.getServerName())); + Bytes.toBytes(metaServerName.getServerName())); TEST_UTIL.waitUntilNoRegionsInTransition(60000); } + RegionState metaState = + MetaTableLocator.getMetaRegionState(master.getZooKeeper()); + assertEquals("Meta should be not in transition", metaState.getState(), State.OPEN); assertNotEquals("Meta should be moved off master", - metaServerName, master.getServerName()); + metaState.getServerName(), master.getServerName()); + assertEquals("Meta should be on the meta server", + metaState.getServerName(), metaServerName); cluster.killRegionServer(metaServerName); stoppedARegionServer = true; cluster.waitForRegionServerToStop(metaServerName, 60000); // Wait for SSH to finish + final ServerName oldServerName = metaServerName; final ServerManager serverManager = master.getServerManager(); TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() { @Override public boolean evaluate() throws Exception { - return !serverManager.areDeadServersInProgress(); + return serverManager.isServerDead(oldServerName) + && !serverManager.areDeadServersInProgress(); } }); + TEST_UTIL.waitUntilNoRegionsInTransition(60000); // Now, make sure meta is assigned assertTrue("Meta should be assigned", regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO)); + // Now, make sure meta is registered in zk + metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); + assertEquals("Meta should be not in transition", metaState.getState(), State.OPEN); + assertEquals("Meta should be assigned", metaState.getServerName(), + regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO)); + assertNotEquals("Meta should be assigned on a different server", + metaState.getServerName(), metaServerName); } finally { if (stoppedARegionServer) { cluster.startRegionServer(); http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java index 5c35611..ef244b6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java @@ -44,11 +44,14 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.master.RegionState.State; +import org.apache.hadoop.hbase.protobuf.RequestConverter; import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; +import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -235,7 +238,8 @@ public class TestMasterFailover { // at this point we only expect 3 regions to be assigned out // (catalogs and namespace, + 1 online region) assertEquals(3, cluster.countServedRegions()); - HRegionInfo hriOnline = onlineTable.getRegionLocation("").getRegionInfo(); + HRegionInfo hriOnline = onlineTable.getRegionLocation( + HConstants.EMPTY_START_ROW).getRegionInfo(); RegionStates regionStates = master.getAssignmentManager().getRegionStates(); RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore(); @@ -281,5 +285,121 @@ public class TestMasterFailover { // Done, shutdown the cluster TEST_UTIL.shutdownMiniCluster(); } + + /** + * Test meta in transition when master failover + */ + @Test(timeout = 180000) + public void testMetaInTransitionWhenMasterFailover() throws Exception { + final int NUM_MASTERS = 1; + final int NUM_RS = 1; + + // Start the cluster + HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + log("Cluster started"); + + log("Moving meta off the master"); + HMaster activeMaster = cluster.getMaster(); + HRegionServer rs = cluster.getRegionServer(0); + ServerName metaServerName = cluster.getLiveRegionServerThreads() + .get(0).getRegionServer().getServerName(); + activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), + Bytes.toBytes(metaServerName.getServerName())); + TEST_UTIL.waitUntilNoRegionsInTransition(60000); + assertEquals("Meta should be assigned on expected regionserver", + metaServerName, activeMaster.getMetaTableLocator() + .getMetaRegionLocation(activeMaster.getZooKeeper())); + + // Now kill master, meta should remain on rs, where we placed it before. + log("Aborting master"); + activeMaster.abort("test-kill"); + cluster.waitForMasterToStop(activeMaster.getServerName(), 30000); + log("Master has aborted"); + + // meta should remain where it was + RegionState metaState = + MetaTableLocator.getMetaRegionState(rs.getZooKeeper()); + assertEquals("hbase:meta should be onlined on RS", + metaState.getServerName(), rs.getServerName()); + assertEquals("hbase:meta should be onlined on RS", + metaState.getState(), State.OPEN); + + // Start up a new master + log("Starting up a new master"); + activeMaster = cluster.startMaster().getMaster(); + log("Waiting for master to be ready"); + cluster.waitForActiveAndReadyMaster(); + log("Master is ready"); + + // ensure meta is still deployed on RS + metaState = + MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper()); + assertEquals("hbase:meta should be onlined on RS", + metaState.getServerName(), rs.getServerName()); + assertEquals("hbase:meta should be onlined on RS", + metaState.getState(), State.OPEN); + + // Update meta state as PENDING_OPEN, then kill master + // that simulates, that RS successfully deployed, but + // RPC was lost right before failure. + // region server should expire (how it can be verified?) + MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(), + rs.getServerName(), State.PENDING_OPEN); + HRegion meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()); + rs.removeFromOnlineRegions(meta, null); + meta.close(); + + log("Aborting master"); + activeMaster.abort("test-kill"); + cluster.waitForMasterToStop(activeMaster.getServerName(), 30000); + log("Master has aborted"); + + // Start up a new master + log("Starting up a new master"); + activeMaster = cluster.startMaster().getMaster(); + log("Waiting for master to be ready"); + cluster.waitForActiveAndReadyMaster(); + log("Master is ready"); + + TEST_UTIL.waitUntilNoRegionsInTransition(60000); + log("Meta was assigned"); + + metaState = + MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper()); + assertEquals("hbase:meta should be onlined on RS", + metaState.getServerName(), rs.getServerName()); + assertEquals("hbase:meta should be onlined on RS", + metaState.getState(), State.OPEN); + + // Update meta state as PENDING_CLOSE, then kill master + // that simulates, that RS successfully deployed, but + // RPC was lost right before failure. + // region server should expire (how it can be verified?) + MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(), + rs.getServerName(), State.PENDING_CLOSE); + + log("Aborting master"); + activeMaster.abort("test-kill"); + cluster.waitForMasterToStop(activeMaster.getServerName(), 30000); + log("Master has aborted"); + + rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest( + rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())); + + // Start up a new master + log("Starting up a new master"); + activeMaster = cluster.startMaster().getMaster(); + log("Waiting for master to be ready"); + cluster.waitForActiveAndReadyMaster(); + log("Master is ready"); + + TEST_UTIL.waitUntilNoRegionsInTransition(60000); + log("Meta was assigned"); + + // Done, shutdown the cluster + TEST_UTIL.shutdownMiniCluster(); + } } http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java index 9f18f87..e643a5b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java @@ -39,12 +39,12 @@ import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.MetaMockingUtil; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.ZooKeeperConnectionException; -import org.apache.hadoop.hbase.MetaMockingUtil; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionTestingUtility; import org.apache.hadoop.hbase.client.Result; @@ -157,7 +157,8 @@ public class TestMasterNoCluster { final MockRegionServer rs2 = new MockRegionServer(conf, sn2); // Put some data into the servers. Make it look like sn0 has the metaH // Put data into sn2 so it looks like it has a few regions for a table named 't'. - MetaTableLocator.setMetaLocation(rs0.getZooKeeper(), rs0.getServerName()); + MetaTableLocator.setMetaLocation(rs0.getZooKeeper(), + rs0.getServerName(), RegionState.State.OPEN); final TableName tableName = TableName.valueOf("t"); Result [] results = new Result [] { MetaMockingUtil.getMetaTableRowResult(
