Repository: hbase
Updated Branches:
  refs/heads/branch-1.0 9539373ca -> 45552e742


HBASE-12958 SSH doing hbase:meta get but hbase:meta not assigned

Conflicts:
        
hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
        
hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionStates.java

Conflicts:
        
hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/45552e74
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/45552e74
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/45552e74

Branch: refs/heads/branch-1.0
Commit: 45552e7427edba0907e7d1caaf7369f2845791ee
Parents: 9539373
Author: stack <[email protected]>
Authored: Wed Feb 4 22:56:40 2015 -0800
Committer: stack <[email protected]>
Committed: Wed Feb 4 23:26:00 2015 -0800

----------------------------------------------------------------------
 .../apache/hadoop/hbase/MetaTableAccessor.java  |   1 +
 .../hadoop/hbase/master/RegionStates.java       | 136 +++++++++++--------
 .../hadoop/hbase/master/TestRegionStates.java   |  58 ++++++++
 3 files changed, 136 insertions(+), 59 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/45552e74/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
----------------------------------------------------------------------
diff --git 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
index 04d6a54..8a9f154 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
@@ -204,6 +204,7 @@ public class MetaTableAccessor {
    * @throws IOException
    */
   private static Result get(final Table t, final Get g) throws IOException {
+    if (t == null) return null;
     try {
       return t.get(g);
     } finally {

http://git-wip-us.apache.org/repos/asf/hbase/blob/45552e74/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index 5f8bf20..b5422ef 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -31,18 +31,19 @@ import java.util.TreeMap;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.RegionTransition;
 import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.ServerLoad;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.TableStateManager;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
-import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -386,7 +387,7 @@ public class RegionStates {
     return updateRegionState(regionInfo, state,
       transition.getServerName());
   }
-  
+
   /**
    * Transition a region state to OPEN from OPENING/PENDING_OPEN
    */
@@ -584,74 +585,90 @@ public class RegionStates {
   /**
    * A server is offline, all regions on it are dead.
    */
-  public synchronized List<HRegionInfo> serverOffline(
-      final ZooKeeperWatcher watcher, final ServerName sn) {
+  public List<HRegionInfo> serverOffline(final ZooKeeperWatcher watcher, final 
ServerName sn) {
     // Offline all regions on this server not already in transition.
     List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
-    Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
-    if (assignedRegions == null) {
-      assignedRegions = new HashSet<HRegionInfo>();
-    }
+    Set<HRegionInfo> regionsToCleanIfNoMetaEntry = new HashSet<HRegionInfo>();
+    synchronized (this) {
+      Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
+      if (assignedRegions == null) {
+        assignedRegions = new HashSet<HRegionInfo>();
+      }
 
-    // Offline regions outside the loop to avoid 
ConcurrentModificationException
-    Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
-    for (HRegionInfo region : assignedRegions) {
-      // Offline open regions, no need to offline if SPLIT/MERGED/OFFLINE
-      if (isRegionOnline(region)) {
-        regionsToOffline.add(region);
-      } else if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
-        LOG.debug("Offline splitting/merging region " + 
getRegionState(region));
-        try {
-          // Delete the ZNode if exists
-          ZKAssign.deleteNodeFailSilent(watcher, region);
+      // Offline regions outside the loop to avoid 
ConcurrentModificationException
+      Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
+      for (HRegionInfo region : assignedRegions) {
+        // Offline open regions, no need to offline if SPLIT/MERGED/OFFLINE
+        if (isRegionOnline(region)) {
           regionsToOffline.add(region);
-        } catch (KeeperException ke) {
-          server.abort("Unexpected ZK exception deleting node " + region, ke);
+        } else if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
+          LOG.debug("Offline splitting/merging region " + 
getRegionState(region));
+          try {
+            // Delete the ZNode if exists
+            ZKAssign.deleteNodeFailSilent(watcher, region);
+            regionsToOffline.add(region);
+          } catch (KeeperException ke) {
+            server.abort("Unexpected ZK exception deleting node " + region, 
ke);
+          }
         }
       }
-    }
 
-    for (RegionState state : regionsInTransition.values()) {
-      HRegionInfo hri = state.getRegion();
-      if (assignedRegions.contains(hri)) {
-        // Region is open on this region server, but in transition.
-        // This region must be moving away from this server, or 
splitting/merging.
-        // SSH will handle it, either skip assigning, or re-assign.
-        LOG.info("Transitioning " + state + " will be handled by SSH for " + 
sn);
-      } else if (sn.equals(state.getServerName())) {
-        // Region is in transition on this region server, and this
-        // region is not open on this server. So the region must be
-        // moving to this server from another one (i.e. opening or
-        // pending open on this server, was open on another one.
-        // Offline state is also kind of pending open if the region is in
-        // transition. The region could be in failed_close state too if we have
-        // tried several times to open it while this region server is not 
reachable)
-        if (state.isPendingOpenOrOpening() || state.isFailedClose() || 
state.isOffline()) {
-          LOG.info("Found region in " + state + " to be reassigned by SSH for 
" + sn);
-          rits.add(hri);
-        } else if(state.isSplittingNew()) {
-          try {
-            if (MetaTableAccessor.getRegion(server.getConnection(), 
state.getRegion()
-                .getEncodedNameAsBytes()) == null) {
-              regionsToOffline.add(state.getRegion());
-              FSUtils.deleteRegionDir(server.getConfiguration(), 
state.getRegion());
-            }
-          } catch (IOException e) {
-            LOG.warn("Got exception while deleting " + state.getRegion()
-                + " directories from file system.", e);
+      for (RegionState state : regionsInTransition.values()) {
+        HRegionInfo hri = state.getRegion();
+        if (assignedRegions.contains(hri)) {
+          // Region is open on this region server, but in transition.
+          // This region must be moving away from this server, or 
splitting/merging.
+          // SSH will handle it, either skip assigning, or re-assign.
+          LOG.info("Transitioning " + state + " will be handled by SSH for " + 
sn);
+        } else if (sn.equals(state.getServerName())) {
+          // Region is in transition on this region server, and this
+          // region is not open on this server. So the region must be
+          // moving to this server from another one (i.e. opening or
+          // pending open on this server, was open on another one.
+          // Offline state is also kind of pending open if the region is in
+          // transition. The region could be in failed_close state too if we 
have
+          // tried several times to open it while this region server is not 
reachable)
+          if (state.isPendingOpenOrOpening() || state.isFailedClose() || 
state.isOffline()) {
+            LOG.info("Found region in " + state + " to be reassigned by SSH 
for " + sn);
+            rits.add(hri);
+          } else if(state.isSplittingNew()) {
+            regionsToCleanIfNoMetaEntry.add(state.getRegion());
+          } else {
+            LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
           }
-        } else {
-          LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
         }
+
+        for (HRegionInfo h : regionsToOffline) {
+          regionOffline(h);
+        }
+
+        this.notifyAll();
       }
     }
+    cleanIfNoMetaEntry(regionsToCleanIfNoMetaEntry);
+    return rits;
+  }
 
-    for (HRegionInfo hri : regionsToOffline) {
-      regionOffline(hri);
+  /**
+   * This method does an RPC to hbase:meta. Do not call this method with a 
lock/synchronize held.
+   * @param hris The hris to check if empty in hbase:meta and if so, clean 
them up.
+   */
+  private void cleanIfNoMetaEntry(Set<HRegionInfo> hris) {
+    if (hris.isEmpty()) return;
+    for (HRegionInfo hri: hris) {
+      try {
+        // This is RPC to meta table. It is done while we have a synchronize on
+        // regionstates. No progress will be made if meta is not available at 
this time.
+        // This is a cleanup task. Not critical.
+        if (MetaTableAccessor.getRegion(server.getConnection(), 
hri.getEncodedNameAsBytes()) ==
+            null) {
+          regionOffline(hri);
+          FSUtils.deleteRegionDir(server.getConfiguration(), hri);
+        }
+      } catch (IOException e) {
+        LOG.warn("Got exception while deleting " + hri + " directories from 
file system.", e);
+      }
     }
-
-    this.notifyAll();
-    return rits;
   }
 
   /**
@@ -973,7 +990,8 @@ public class RegionStates {
   }
 
   /**
-   * Get the HRegionInfo from cache, if not there, from the hbase:meta table
+   * Get the HRegionInfo from cache, if not there, from the hbase:meta table.
+   * Be careful. Does RPC. Do not hold a lock or synchronize when you call 
this method.
    * @param  regionName
    * @return HRegionInfo for the region
    */

http://git-wip-us.apache.org/repos/asf/hbase/blob/45552e74/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionStates.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionStates.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionStates.java
index 20cbd62..9abec43 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionStates.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionStates.java
@@ -23,6 +23,12 @@ import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.TableStateManager;
+import org.apache.hadoop.hbase.client.ClusterConnection;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -31,7 +37,15 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.UUID;
+import java.util.concurrent.BrokenBarrierException;
+import java.util.concurrent.CyclicBarrier;
 
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import java.io.IOException;
+import static org.junit.Assert.assertTrue;
 import static junit.framework.Assert.assertFalse;
 import static org.mockito.Matchers.isA;
 import static org.mockito.Mockito.mock;
@@ -39,7 +53,51 @@ import static org.mockito.Mockito.when;
 
 @Category(SmallTests.class)
 public class TestRegionStates {
+  @Test (timeout=10000)
+  public void testCanMakeProgressThoughMetaIsDown()
+  throws IOException, InterruptedException, BrokenBarrierException {
+    Server server = mock(Server.class);
+    when(server.getServerName()).thenReturn(ServerName.valueOf("master,1,1"));
+    Connection connection = mock(ClusterConnection.class);
+    // Set up a table that gets 'stuck' when we try to fetch a row from the 
meta table.
+    // It is stuck on a CyclicBarrier latch. We use CyclicBarrier because it 
will tell us when
+    // thread is waiting on latch.
+    Table metaTable = Mockito.mock(Table.class);
+    final CyclicBarrier latch = new CyclicBarrier(2);
+    when(metaTable.get((Get)Mockito.any())).thenAnswer(new Answer<Result>() {
+      @Override
+      public Result answer(InvocationOnMock invocation) throws Throwable {
+        latch.await();
+        throw new java.net.ConnectException("Connection refused");
+      }
+    });
+    when(connection.getTable(TableName.META_TABLE_NAME)).thenReturn(metaTable);
+    when(server.getConnection()).thenReturn((ClusterConnection)connection);
+    Configuration configuration = mock(Configuration.class);
+    when(server.getConfiguration()).thenReturn(configuration);
+    TableStateManager tsm = mock(TableStateManager.class);
+    ServerManager sm = mock(ServerManager.class);
+    when(sm.isServerOnline(isA(ServerName.class))).thenReturn(true);
 
+    RegionStateStore rss = mock(RegionStateStore.class);
+    final RegionStates regionStates = new RegionStates(server, tsm, sm, rss);
+    final ServerName sn = mockServer("one", 1);
+    regionStates.updateRegionState(HRegionInfo.FIRST_META_REGIONINFO, 
State.SPLITTING_NEW, sn);
+    Thread backgroundThread = new Thread("Get stuck setting server offline") {
+      @Override
+      public void run() {
+        regionStates.serverOffline(null, sn);
+      }
+    };
+    assertTrue(latch.getNumberWaiting() == 0);
+    backgroundThread.start();
+    while (latch.getNumberWaiting() == 0);
+    // Verify I can do stuff with synchronized RegionStates methods, that I am 
not locked out.
+    // Below is a call that is synchronized.  Can I do it and not block?
+    regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO);
+    // Done. Trip the barrier on the background thread.
+    latch.await();
+  }
 
   @Test
   public void testWeDontReturnDrainingServersForOurBalancePlans() throws 
Exception {

Reply via email to