Author: stack
Date: Sun Jan  9 05:25:50 2011
New Revision: 1056884

URL: http://svn.apache.org/viewvc?rev=1056884&view=rev
Log:
HBASE-3403 Region orphaned after failure during split

Added:
    
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java

Modified: hbase/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1056884&r1=1056883&r2=1056884&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Sun Jan  9 05:25:50 2011
@@ -24,6 +24,7 @@ Release 0.91.0 - Unreleased
    HBASE-3332  Regions stuck in transition after RS failure
    HBASE-3418  Increment operations can break when qualifiers are split
                between memstore/snapshot and storefiles
+   HBASE-3403  Region orphaned after failure during split
 
 
   IMPROVEMENTS

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java?rev=1056884&r1=1056883&r2=1056884&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java 
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java 
Sun Jan  9 05:25:50 2011
@@ -77,10 +77,6 @@ public class MetaEditor {
     copyOfParent.setSplit(true);
     Put put = new Put(copyOfParent.getRegionName());
     addRegionInfo(put, copyOfParent);
-    put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
-        HConstants.EMPTY_BYTE_ARRAY);
-    put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
-        HConstants.EMPTY_BYTE_ARRAY);
     put.add(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER,
       Writables.getBytes(a));
     put.add(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER,

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java?rev=1056884&r1=1056883&r2=1056884&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java 
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java 
Sun Jan  9 05:25:50 2011
@@ -234,9 +234,28 @@ public class MetaReader {
   public static void fullScan(CatalogTracker catalogTracker,
       final Visitor visitor)
   throws IOException {
+    fullScan(catalogTracker, visitor, null);
+  }
+
+  /**
+   * Performs a full scan of <code>.META.</code>.
+   * <p>
+   * Returns a map of every region to it's currently assigned server, according
+   * to META.  If the region does not have an assignment it will have a null
+   * value in the map.
+   * @param catalogTracker
+   * @param visitor
+   * @param startrow Where to start the scan. Pass null if want to begin scan
+   * at first row.
+   * @throws IOException
+   */
+  public static void fullScan(CatalogTracker catalogTracker,
+      final Visitor visitor, final byte [] startrow)
+  throws IOException {
     HRegionInterface metaServer =
       catalogTracker.waitForMetaServerConnectionDefault();
     Scan scan = new Scan();
+    if (startrow != null) scan.setStartRow(startrow);
     scan.addFamily(HConstants.CATALOG_FAMILY);
     long scannerid = metaServer.openScanner(
         HRegionInfo.FIRST_META_REGIONINFO.getRegionName(), scan);

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java?rev=1056884&r1=1056883&r2=1056884&view=diff
==============================================================================
--- 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java 
(original)
+++ 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java 
Sun Jan  9 05:25:50 2011
@@ -51,6 +51,7 @@ class CatalogJanitor extends Chore {
   private static final Log LOG = 
LogFactory.getLog(CatalogJanitor.class.getName());
   private final Server server;
   private final MasterServices services;
+  private boolean enabled = true;
 
   CatalogJanitor(final Server server, final MasterServices services) {
     super(server.getServerName() + "-CatalogJanitor",
@@ -63,7 +64,7 @@ class CatalogJanitor extends Chore {
   @Override
   protected boolean initialChore() {
     try {
-      scan();
+      if (this.enabled) scan();
     } catch (IOException e) {
       LOG.warn("Failed initial scan of catalog table", e);
       return false;
@@ -71,6 +72,13 @@ class CatalogJanitor extends Chore {
     return true;
   }
 
+  /**
+   * @param enabled
+   */
+  public void setEnabled(final boolean enabled) {
+    this.enabled = enabled;
+  }
+
   @Override
   protected void chore() {
     try {

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1056884&r1=1056883&r2=1056884&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java 
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Sun 
Jan  9 05:25:50 2011
@@ -741,6 +741,16 @@ implements HMasterInterface, HMasterRegi
     return oldValue;
   }
 
+  /**
+   * Switch for the background {...@link CatalogJanitor} thread.
+   * Used for testing.  The thread will continue to run.  It will just be a 
noop
+   * if disabled.
+   * @param b If false, the catalog janitor won't do anything.
+   */
+  public void setCatalogJanitorEnabled(final boolean b) {
+    ((CatalogJanitor)this.catalogJanitorChore).setEnabled(b);
+  }
+
   @Override
   public void move(final byte[] encodedRegionName, final byte[] destServerName)
   throws UnknownRegionException {

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java?rev=1056884&r1=1056883&r2=1056884&view=diff
==============================================================================
--- 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
 (original)
+++ 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
 Sun Jan  9 05:25:50 2011
@@ -28,7 +28,6 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HServerAddress;
 import org.apache.hadoop.hbase.HServerInfo;
 import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.catalog.CatalogTracker;
@@ -37,11 +36,11 @@ import org.apache.hadoop.hbase.catalog.M
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.executor.EventHandler;
 import org.apache.hadoop.hbase.master.AssignmentManager;
+import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
 import org.apache.hadoop.hbase.master.DeadServer;
 import org.apache.hadoop.hbase.master.MasterServices;
 import org.apache.hadoop.hbase.master.ServerManager;
-import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
-import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Writables;
 import org.apache.zookeeper.KeeperException;
 
@@ -210,18 +209,95 @@ public class ServerShutdownHandler exten
       final AssignmentManager assignmentManager,
       final CatalogTracker catalogTracker)
   throws IOException {
-    byte [] bytes = result.getValue(HConstants.CATALOG_FAMILY, qualifier);
-    if (bytes == null || bytes.length <= 0) return;
-    HRegionInfo hri = Writables.getHRegionInfoOrNull(bytes);
-    if (hri == null) return;
-    Pair<HRegionInfo, HServerAddress> pair =
-      MetaReader.getRegion(catalogTracker, hri.getRegionName());
-    if (pair == null || pair.getFirst() == null) {
-      LOG.info("Fixup; missing daughter " + hri.getEncodedName());
-      MetaEditor.addDaughter(catalogTracker, hri, null);
-      assignmentManager.assign(hri, true);
+    HRegionInfo daughter = getHRegionInfo(result, qualifier);
+    if (daughter == null) return;
+    if (isDaughterMissing(catalogTracker, daughter)) {
+      LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString());
+      MetaEditor.addDaughter(catalogTracker, daughter, null);
+      // And assign it.
+      assignmentManager.assign(daughter, true);
     } else {
-      LOG.debug("Daughter " + hri.getRegionNameAsString() + " present");
+      LOG.debug("Daughter " + daughter.getRegionNameAsString() + " present");
+    }
+  }
+
+  /**
+   * Interpret the content of the cell at {...@link HConstants#CATALOG_FAMILY} 
and
+   * <code>qualifier</code> as an HRegionInfo and return it, or null.
+   * @param r Result instance to pull from.
+   * @param qualifier Column family qualifier
+   * @return An HRegionInfo instance or null.
+   * @throws IOException
+   */
+  private static HRegionInfo getHRegionInfo(final Result r, byte [] qualifier)
+  throws IOException {
+    byte [] bytes = r.getValue(HConstants.CATALOG_FAMILY, qualifier);
+    if (bytes == null || bytes.length <= 0) return null;
+    return Writables.getHRegionInfoOrNull(bytes);
+  }
+
+  /**
+   * Look for presence of the daughter OR of a split of the daughter. Daughter
+   * could have been split over on regionserver before a run of the
+   * catalogJanitor had chance to clear reference from parent.
+   * @param daughter Daughter region to search for.
+   * @throws IOException 
+   */
+  private static boolean isDaughterMissing(final CatalogTracker catalogTracker,
+      final HRegionInfo daughter) throws IOException {
+    FindDaughterVisitor visitor = new FindDaughterVisitor(daughter);
+    // Start the scan at what should be the daughter's row in the .META.
+    // We will either 1., find the daughter or some derivative split of the
+    // daughter (will have same table name and start row at least but will sort
+    // after because has larger regionid -- the regionid is timestamp of region
+    // creation), OR, we will not find anything with same table name and start
+    // row.  If the latter, then assume daughter missing and do fixup.
+    byte [] startrow = daughter.getRegionName();
+    MetaReader.fullScan(catalogTracker, visitor, startrow);
+    return !visitor.foundDaughter();
+  }
+
+  /**
+   * Looks for daughter.  Sets a flag if daughter or some progeny of daughter
+   * is found up in <code>.META.</code>.
+   */
+  static class FindDaughterVisitor implements MetaReader.Visitor {
+    private final HRegionInfo daughter;
+    private boolean found = false;
+
+    FindDaughterVisitor(final HRegionInfo daughter) {
+      this.daughter = daughter;
+    }
+
+    /**
+     * @return True if we found a daughter region during our visiting.
+     */
+    boolean foundDaughter() {
+      return this.found;
+    }
+
+    @Override
+    public boolean visit(Result r) throws IOException {
+      HRegionInfo hri = getHRegionInfo(r, HConstants.REGIONINFO_QUALIFIER);
+      if (hri == null) {
+        LOG.warn("No serialized HRegionInfo in " + r);
+        return true;
+      }
+      // Now see if we have gone beyond the daughter's startrow.
+      if (!Bytes.equals(daughter.getTableDesc().getName(),
+          hri.getTableDesc().getName())) {
+        // We fell into another table.  Stop scanning.
+        return false;
+      }
+      // If our start rows do not compare, move on.
+      if (!Bytes.equals(daughter.getStartKey(), hri.getStartKey())) {
+        return false;
+      }
+      // Else, table name and start rows compare.  It means that the daughter
+      // or some derivative split of the daughter is up in .META.  Daughter
+      // exists.
+      this.found = true;
+      return false;
     }
   }
-}
+}
\ No newline at end of file

Added: 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java?rev=1056884&view=auto
==============================================================================
--- 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
 (added)
+++ 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
 Sun Jan  9 05:25:50 2011
@@ -0,0 +1,290 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MasterNotRunningException;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.UnknownRegionException;
+import org.apache.hadoop.hbase.ZooKeeperConnectionException;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Like {...@link TestSplitTransaction} in that we're testing {...@link 
SplitTransaction}
+ * only the below tests are against a running cluster where {...@link 
TestSplitTransaction}
+ * is tests against a bare {...@link HRegion}.
+ */
+public class TestSplitTransactionOnCluster {
+  private static final Log LOG =
+    LogFactory.getLog(TestSplitTransactionOnCluster.class);
+  private HBaseAdmin admin = null;
+  private MiniHBaseCluster cluster = null;
+
+  private static final HBaseTestingUtility TESTING_UTIL =
+    new HBaseTestingUtility();
+
+  @BeforeClass public static void before() throws Exception {
+    TESTING_UTIL.startMiniCluster(2);
+  }
+
+  @AfterClass public static void after() throws Exception {
+    TESTING_UTIL.shutdownMiniCluster();
+  }
+
+  @Before public void setup() throws IOException {
+    TESTING_UTIL.ensureSomeRegionServersAvailable(2);
+    this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
+    this.cluster = TESTING_UTIL.getMiniHBaseCluster();
+  }
+
+  /**
+   * Messy test that simulates case where SplitTransactions fails to add one
+   * of the daughters up into the .META. table before crash.  We're testing
+   * fact that the shutdown handler will fixup the missing daughter region
+   * adding it back into .META.
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  @Test (timeout = 600000) public void testShutdownSimpleFixup()
+  throws IOException, InterruptedException {
+    final byte [] tableName = Bytes.toBytes("testShutdownSimpleFixup");
+
+    // Create table then get the single region for our new table.
+    HTable t = TESTING_UTIL.createTable(tableName, HConstants.CATALOG_FAMILY);
+
+    List<HRegion> regions = cluster.getRegions(tableName);
+    assertEquals(1, regions.size());
+    HRegionInfo hri = regions.get(0).getRegionInfo();
+
+    int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
+
+    // Turn off balancer so it doesn't cut in and mess up our placements.
+    this.admin.balanceSwitch(false);
+    // Turn off the meta scanner so it don't remove parent on us.
+    cluster.getMaster().setCatalogJanitorEnabled(false);
+    try {
+      // Add a bit of load up into the table so splittable.
+      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
+      // Get region pre-split.
+      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
+      printOutRegions(server, "Initial regions: ");
+      int regionCount = server.getOnlineRegions().size();
+      // Now split.
+      split(hri, server, regionCount);
+      // Get daughters
+      List<HRegion> daughters = cluster.getRegions(tableName);
+      assertTrue(daughters.size() >= 2);
+      // Remove one of the daughters from .META. to simulate failed insert of
+      // daughter region up into .META.
+      removeDaughterFromMeta(daughters.get(0).getRegionName());
+      // Now crash the server
+      cluster.abortRegionServer(tableRegionIndex);
+      while(server.getOnlineRegions().size() > 0) {
+        LOG.info("Waiting on server to go down");
+        Thread.sleep(100);
+      }
+      // Wait till regions are back on line again.
+      while(cluster.getRegions(tableName).size() < daughters.size()) {
+        LOG.info("Waiting for repair to happen");
+        Thread.sleep(1000);
+      }
+      // Assert daughters are online.
+      regions = cluster.getRegions(tableName);
+      for (HRegion r: regions) {
+        assertTrue(daughters.contains(r));
+      }
+    } finally {
+      admin.balanceSwitch(true);
+      cluster.getMaster().setCatalogJanitorEnabled(true);
+    }
+  }
+
+  /**
+   * Test that if daughter split on us, we won't do the shutdown handler fixup
+   * just because we can't find the immediate daughter of an offlined parent.
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  @Test public void testShutdownFixupWhenDaughterHasSplit()
+  throws IOException, InterruptedException {
+    final byte [] tableName =
+      Bytes.toBytes("testShutdownFixupWhenDaughterHasSplit");
+
+    // Create table then get the single region for our new table.
+    HTable t = TESTING_UTIL.createTable(tableName, HConstants.CATALOG_FAMILY);
+
+    List<HRegion> regions = cluster.getRegions(tableName);
+    assertEquals(1, regions.size());
+    HRegionInfo hri = regions.get(0).getRegionInfo();
+
+    int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
+
+    // Turn off balancer so it doesn't cut in and mess up our placements.
+    this.admin.balanceSwitch(false);
+    // Turn off the meta scanner so it don't remove parent on us.
+    cluster.getMaster().setCatalogJanitorEnabled(false);
+    try {
+      // Add a bit of load up into the table so splittable.
+      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
+      // Get region pre-split.
+      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
+      printOutRegions(server, "Initial regions: ");
+      int regionCount = server.getOnlineRegions().size();
+      // Now split.
+      split(hri, server, regionCount);
+      // Get daughters
+      List<HRegion> daughters = cluster.getRegions(tableName);
+      assertTrue(daughters.size() >= 2);
+      // Now split one of the daughters.
+      regionCount = server.getOnlineRegions().size();
+      split(daughters.get(0).getRegionInfo(), server, regionCount);
+      // Get list of daughters
+      daughters = cluster.getRegions(tableName);
+      // Now crash the server
+      cluster.abortRegionServer(tableRegionIndex);
+      while(server.getOnlineRegions().size() > 0) {
+        LOG.info("Waiting on server to go down");
+        Thread.sleep(100);
+      }
+      // Wait till regions are back on line again.
+      while(cluster.getRegions(tableName).size() < daughters.size()) {
+        LOG.info("Waiting for repair to happen");
+        Thread.sleep(1000);
+      }
+      // Assert daughters are online and ONLY the original daughters -- that
+      // fixup didn't insert one during server shutdown recover.
+      regions = cluster.getRegions(tableName);
+      assertEquals(daughters.size(), regions.size());
+      for (HRegion r: regions) {
+        assertTrue(daughters.contains(r));
+      }
+    } finally {
+      admin.balanceSwitch(true);
+      cluster.getMaster().setCatalogJanitorEnabled(true);
+    }
+  }
+
+  private void split(final HRegionInfo hri, final HRegionServer server,
+      final int regionCount)
+  throws IOException, InterruptedException {
+    this.admin.split(hri.getRegionNameAsString());
+    while(server.getOnlineRegions().size() <= regionCount) {
+      LOG.debug("Waiting on region to split");
+      Thread.sleep(100);
+    }
+  }
+
+  private void removeDaughterFromMeta(final byte [] regionName) throws 
IOException {
+    HTable metaTable =
+      new HTable(TESTING_UTIL.getConfiguration(), HConstants.META_TABLE_NAME);
+    Delete d = new Delete(regionName);
+    LOG.info("Deleted " + Bytes.toString(regionName));
+    metaTable.delete(d);
+  }
+
+  /**
+   * Ensure single table region is not on same server as the single .META. 
table
+   * region.
+   * @param admin
+   * @param hri
+   * @return Index of the server hosting the single table region
+   * @throws UnknownRegionException
+   * @throws MasterNotRunningException
+   * @throws ZooKeeperConnectionException
+   * @throws InterruptedException
+   */
+  private int ensureTableRegionNotOnSameServerAsMeta(final HBaseAdmin admin,
+      final HRegionInfo hri)
+  throws UnknownRegionException, MasterNotRunningException,
+  ZooKeeperConnectionException, InterruptedException {
+    MiniHBaseCluster cluster = TESTING_UTIL.getMiniHBaseCluster();
+    // Now make sure that the table region is not on same server as that 
hosting
+    // .META.  We don't want .META. replay polluting our test when we later 
crash
+    // the table region serving server.
+    int metaServerIndex = cluster.getServerWithMeta();
+    assertTrue(metaServerIndex != -1);
+    HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex);
+    int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
+    assertTrue(tableRegionIndex != -1);
+    HRegionServer tableRegionServer = 
cluster.getRegionServer(tableRegionIndex);
+    if 
(metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
+      HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
+      admin.move(hri.getEncodedNameAsBytes(), 
Bytes.toBytes(hrs.getServerName()));
+    }
+    // Wait till table region is up on the server that is NOT carrying .META..
+    while (true) {
+      tableRegionIndex = cluster.getServerWith(hri.getRegionName());
+      if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
+      LOG.debug("Waiting on region move off the .META. server; current index " 
+
+        tableRegionIndex);
+      Thread.sleep(100);
+    }
+    // Verify for sure table region is not on same server as .META.
+    tableRegionIndex = cluster.getServerWith(hri.getRegionName());
+    assertTrue(tableRegionIndex != -1);
+    assertNotSame(metaServerIndex, tableRegionIndex);
+    return tableRegionIndex;
+  }
+
+  /**
+   * Find regionserver other than the one passed.
+   * Can't rely on indexes into list of regionservers since crashed servers
+   * occupy an index.
+   * @param cluster
+   * @param notThisOne
+   * @return A regionserver that is not <code>notThisOne</code> or null if none
+   * found
+   */
+  private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
+      final HRegionServer notThisOne) {
+    for (RegionServerThread rst: cluster.getRegionServerThreads()) {
+      HRegionServer hrs = rst.getRegionServer();
+      if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
+      return hrs;
+    }
+    return null;
+  }
+
+  private void printOutRegions(final HRegionServer hrs, final String prefix) {
+    List<HRegionInfo> regions = hrs.getOnlineRegions();
+    for (HRegionInfo region: regions) {
+      LOG.info(prefix + region.getRegionNameAsString());
+    }
+  }
+}
\ No newline at end of file


Reply via email to