Repository: hbase
Updated Branches:
refs/heads/branch-1.0 9539373ca -> 45552e742
HBASE-12958 SSH doing hbase:meta get but hbase:meta not assigned
Conflicts:
hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionStates.java
Conflicts:
hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/45552e74
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/45552e74
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/45552e74
Branch: refs/heads/branch-1.0
Commit: 45552e7427edba0907e7d1caaf7369f2845791ee
Parents: 9539373
Author: stack <[email protected]>
Authored: Wed Feb 4 22:56:40 2015 -0800
Committer: stack <[email protected]>
Committed: Wed Feb 4 23:26:00 2015 -0800
----------------------------------------------------------------------
.../apache/hadoop/hbase/MetaTableAccessor.java | 1 +
.../hadoop/hbase/master/RegionStates.java | 136 +++++++++++--------
.../hadoop/hbase/master/TestRegionStates.java | 58 ++++++++
3 files changed, 136 insertions(+), 59 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/45552e74/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
----------------------------------------------------------------------
diff --git
a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
index 04d6a54..8a9f154 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
@@ -204,6 +204,7 @@ public class MetaTableAccessor {
* @throws IOException
*/
private static Result get(final Table t, final Get g) throws IOException {
+ if (t == null) return null;
try {
return t.get(g);
} finally {
http://git-wip-us.apache.org/repos/asf/hbase/blob/45552e74/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
----------------------------------------------------------------------
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index 5f8bf20..b5422ef 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -31,18 +31,19 @@ import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.RegionTransition;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerLoad;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableStateManager;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
-import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
import org.apache.hadoop.hbase.util.Bytes;
@@ -386,7 +387,7 @@ public class RegionStates {
return updateRegionState(regionInfo, state,
transition.getServerName());
}
-
+
/**
* Transition a region state to OPEN from OPENING/PENDING_OPEN
*/
@@ -584,74 +585,90 @@ public class RegionStates {
/**
* A server is offline, all regions on it are dead.
*/
- public synchronized List<HRegionInfo> serverOffline(
- final ZooKeeperWatcher watcher, final ServerName sn) {
+ public List<HRegionInfo> serverOffline(final ZooKeeperWatcher watcher, final
ServerName sn) {
// Offline all regions on this server not already in transition.
List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
- Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
- if (assignedRegions == null) {
- assignedRegions = new HashSet<HRegionInfo>();
- }
+ Set<HRegionInfo> regionsToCleanIfNoMetaEntry = new HashSet<HRegionInfo>();
+ synchronized (this) {
+ Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
+ if (assignedRegions == null) {
+ assignedRegions = new HashSet<HRegionInfo>();
+ }
- // Offline regions outside the loop to avoid
ConcurrentModificationException
- Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
- for (HRegionInfo region : assignedRegions) {
- // Offline open regions, no need to offline if SPLIT/MERGED/OFFLINE
- if (isRegionOnline(region)) {
- regionsToOffline.add(region);
- } else if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
- LOG.debug("Offline splitting/merging region " +
getRegionState(region));
- try {
- // Delete the ZNode if exists
- ZKAssign.deleteNodeFailSilent(watcher, region);
+ // Offline regions outside the loop to avoid
ConcurrentModificationException
+ Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
+ for (HRegionInfo region : assignedRegions) {
+ // Offline open regions, no need to offline if SPLIT/MERGED/OFFLINE
+ if (isRegionOnline(region)) {
regionsToOffline.add(region);
- } catch (KeeperException ke) {
- server.abort("Unexpected ZK exception deleting node " + region, ke);
+ } else if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
+ LOG.debug("Offline splitting/merging region " +
getRegionState(region));
+ try {
+ // Delete the ZNode if exists
+ ZKAssign.deleteNodeFailSilent(watcher, region);
+ regionsToOffline.add(region);
+ } catch (KeeperException ke) {
+ server.abort("Unexpected ZK exception deleting node " + region,
ke);
+ }
}
}
- }
- for (RegionState state : regionsInTransition.values()) {
- HRegionInfo hri = state.getRegion();
- if (assignedRegions.contains(hri)) {
- // Region is open on this region server, but in transition.
- // This region must be moving away from this server, or
splitting/merging.
- // SSH will handle it, either skip assigning, or re-assign.
- LOG.info("Transitioning " + state + " will be handled by SSH for " +
sn);
- } else if (sn.equals(state.getServerName())) {
- // Region is in transition on this region server, and this
- // region is not open on this server. So the region must be
- // moving to this server from another one (i.e. opening or
- // pending open on this server, was open on another one.
- // Offline state is also kind of pending open if the region is in
- // transition. The region could be in failed_close state too if we have
- // tried several times to open it while this region server is not
reachable)
- if (state.isPendingOpenOrOpening() || state.isFailedClose() ||
state.isOffline()) {
- LOG.info("Found region in " + state + " to be reassigned by SSH for
" + sn);
- rits.add(hri);
- } else if(state.isSplittingNew()) {
- try {
- if (MetaTableAccessor.getRegion(server.getConnection(),
state.getRegion()
- .getEncodedNameAsBytes()) == null) {
- regionsToOffline.add(state.getRegion());
- FSUtils.deleteRegionDir(server.getConfiguration(),
state.getRegion());
- }
- } catch (IOException e) {
- LOG.warn("Got exception while deleting " + state.getRegion()
- + " directories from file system.", e);
+ for (RegionState state : regionsInTransition.values()) {
+ HRegionInfo hri = state.getRegion();
+ if (assignedRegions.contains(hri)) {
+ // Region is open on this region server, but in transition.
+ // This region must be moving away from this server, or
splitting/merging.
+ // SSH will handle it, either skip assigning, or re-assign.
+ LOG.info("Transitioning " + state + " will be handled by SSH for " +
sn);
+ } else if (sn.equals(state.getServerName())) {
+ // Region is in transition on this region server, and this
+ // region is not open on this server. So the region must be
+ // moving to this server from another one (i.e. opening or
+ // pending open on this server, was open on another one.
+ // Offline state is also kind of pending open if the region is in
+ // transition. The region could be in failed_close state too if we
have
+ // tried several times to open it while this region server is not
reachable)
+ if (state.isPendingOpenOrOpening() || state.isFailedClose() ||
state.isOffline()) {
+ LOG.info("Found region in " + state + " to be reassigned by SSH
for " + sn);
+ rits.add(hri);
+ } else if(state.isSplittingNew()) {
+ regionsToCleanIfNoMetaEntry.add(state.getRegion());
+ } else {
+ LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
}
- } else {
- LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
}
+
+ for (HRegionInfo h : regionsToOffline) {
+ regionOffline(h);
+ }
+
+ this.notifyAll();
}
}
+ cleanIfNoMetaEntry(regionsToCleanIfNoMetaEntry);
+ return rits;
+ }
- for (HRegionInfo hri : regionsToOffline) {
- regionOffline(hri);
+ /**
+ * This method does an RPC to hbase:meta. Do not call this method with a
lock/synchronize held.
+ * @param hris The hris to check if empty in hbase:meta and if so, clean
them up.
+ */
+ private void cleanIfNoMetaEntry(Set<HRegionInfo> hris) {
+ if (hris.isEmpty()) return;
+ for (HRegionInfo hri: hris) {
+ try {
+ // This is RPC to meta table. It is done while we have a synchronize on
+ // regionstates. No progress will be made if meta is not available at
this time.
+ // This is a cleanup task. Not critical.
+ if (MetaTableAccessor.getRegion(server.getConnection(),
hri.getEncodedNameAsBytes()) ==
+ null) {
+ regionOffline(hri);
+ FSUtils.deleteRegionDir(server.getConfiguration(), hri);
+ }
+ } catch (IOException e) {
+ LOG.warn("Got exception while deleting " + hri + " directories from
file system.", e);
+ }
}
-
- this.notifyAll();
- return rits;
}
/**
@@ -973,7 +990,8 @@ public class RegionStates {
}
/**
- * Get the HRegionInfo from cache, if not there, from the hbase:meta table
+ * Get the HRegionInfo from cache, if not there, from the hbase:meta table.
+ * Be careful. Does RPC. Do not hold a lock or synchronize when you call
this method.
* @param regionName
* @return HRegionInfo for the region
*/
http://git-wip-us.apache.org/repos/asf/hbase/blob/45552e74/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionStates.java
----------------------------------------------------------------------
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionStates.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionStates.java
index 20cbd62..9abec43 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionStates.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionStates.java
@@ -23,6 +23,12 @@ import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableStateManager;
+import org.apache.hadoop.hbase.client.ClusterConnection;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@@ -31,7 +37,15 @@ import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.UUID;
+import java.util.concurrent.BrokenBarrierException;
+import java.util.concurrent.CyclicBarrier;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import java.io.IOException;
+import static org.junit.Assert.assertTrue;
import static junit.framework.Assert.assertFalse;
import static org.mockito.Matchers.isA;
import static org.mockito.Mockito.mock;
@@ -39,7 +53,51 @@ import static org.mockito.Mockito.when;
@Category(SmallTests.class)
public class TestRegionStates {
+ @Test (timeout=10000)
+ public void testCanMakeProgressThoughMetaIsDown()
+ throws IOException, InterruptedException, BrokenBarrierException {
+ Server server = mock(Server.class);
+ when(server.getServerName()).thenReturn(ServerName.valueOf("master,1,1"));
+ Connection connection = mock(ClusterConnection.class);
+ // Set up a table that gets 'stuck' when we try to fetch a row from the
meta table.
+ // It is stuck on a CyclicBarrier latch. We use CyclicBarrier because it
will tell us when
+ // thread is waiting on latch.
+ Table metaTable = Mockito.mock(Table.class);
+ final CyclicBarrier latch = new CyclicBarrier(2);
+ when(metaTable.get((Get)Mockito.any())).thenAnswer(new Answer<Result>() {
+ @Override
+ public Result answer(InvocationOnMock invocation) throws Throwable {
+ latch.await();
+ throw new java.net.ConnectException("Connection refused");
+ }
+ });
+ when(connection.getTable(TableName.META_TABLE_NAME)).thenReturn(metaTable);
+ when(server.getConnection()).thenReturn((ClusterConnection)connection);
+ Configuration configuration = mock(Configuration.class);
+ when(server.getConfiguration()).thenReturn(configuration);
+ TableStateManager tsm = mock(TableStateManager.class);
+ ServerManager sm = mock(ServerManager.class);
+ when(sm.isServerOnline(isA(ServerName.class))).thenReturn(true);
+ RegionStateStore rss = mock(RegionStateStore.class);
+ final RegionStates regionStates = new RegionStates(server, tsm, sm, rss);
+ final ServerName sn = mockServer("one", 1);
+ regionStates.updateRegionState(HRegionInfo.FIRST_META_REGIONINFO,
State.SPLITTING_NEW, sn);
+ Thread backgroundThread = new Thread("Get stuck setting server offline") {
+ @Override
+ public void run() {
+ regionStates.serverOffline(null, sn);
+ }
+ };
+ assertTrue(latch.getNumberWaiting() == 0);
+ backgroundThread.start();
+ while (latch.getNumberWaiting() == 0);
+ // Verify I can do stuff with synchronized RegionStates methods, that I am
not locked out.
+ // Below is a call that is synchronized. Can I do it and not block?
+ regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO);
+ // Done. Trip the barrier on the background thread.
+ latch.await();
+ }
@Test
public void testWeDontReturnDrainingServersForOurBalancePlans() throws
Exception {