Author: stack
Date: Fri Jan  7 20:17:38 2011
New Revision: 1056484

URL: http://svn.apache.org/viewvc?rev=1056484&view=rev
Log:
HBASE-3403 Region orphaned after failure during split

Modified:
    hbase/branches/0.90/CHANGES.txt
    
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java
    
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
    
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
    
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java

Modified: hbase/branches/0.90/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1056484&r1=1056483&r2=1056484&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Fri Jan  7 20:17:38 2011
@@ -786,6 +786,7 @@ Release 0.90.0 - Unreleased
    HBASE-3401  Region IPC operations should be high priority
    HBASE-3418  Increment operations can break when qualifiers are split
                between memstore/snapshot and storefiles
+   HBASE-3403  Region orphaned after failure during split
 
 
   IMPROVEMENTS

Modified: 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java
URL: 
http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java?rev=1056484&r1=1056483&r2=1056484&view=diff
==============================================================================
--- 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java
 (original)
+++ 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java
 Fri Jan  7 20:17:38 2011
@@ -77,10 +77,6 @@ public class MetaEditor {
     copyOfParent.setSplit(true);
     Put put = new Put(copyOfParent.getRegionName());
     addRegionInfo(put, copyOfParent);
-    put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
-        HConstants.EMPTY_BYTE_ARRAY);
-    put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
-        HConstants.EMPTY_BYTE_ARRAY);
     put.add(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER,
       Writables.getBytes(a));
     put.add(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER,

Modified: 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
URL: 
http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java?rev=1056484&r1=1056483&r2=1056484&view=diff
==============================================================================
--- 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
 (original)
+++ 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
 Fri Jan  7 20:17:38 2011
@@ -234,9 +234,28 @@ public class MetaReader {
   public static void fullScan(CatalogTracker catalogTracker,
       final Visitor visitor)
   throws IOException {
+    fullScan(catalogTracker, visitor, null);
+  }
+
+  /**
+   * Performs a full scan of <code>.META.</code>.
+   * <p>
+   * Returns a map of every region to it's currently assigned server, according
+   * to META.  If the region does not have an assignment it will have a null
+   * value in the map.
+   * @param catalogTracker
+   * @param visitor
+   * @param startrow Where to start the scan. Pass null if want to begin scan
+   * at first row.
+   * @throws IOException
+   */
+  public static void fullScan(CatalogTracker catalogTracker,
+      final Visitor visitor, final byte [] startrow)
+  throws IOException {
     HRegionInterface metaServer =
       catalogTracker.waitForMetaServerConnectionDefault();
     Scan scan = new Scan();
+    if (startrow != null) scan.setStartRow(startrow);
     scan.addFamily(HConstants.CATALOG_FAMILY);
     long scannerid = metaServer.openScanner(
         HRegionInfo.FIRST_META_REGIONINFO.getRegionName(), scan);

Modified: 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
URL: 
http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java?rev=1056484&r1=1056483&r2=1056484&view=diff
==============================================================================
--- 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
 (original)
+++ 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
 Fri Jan  7 20:17:38 2011
@@ -51,6 +51,7 @@ class CatalogJanitor extends Chore {
   private static final Log LOG = 
LogFactory.getLog(CatalogJanitor.class.getName());
   private final Server server;
   private final MasterServices services;
+  private boolean enabled = true;
 
   CatalogJanitor(final Server server, final MasterServices services) {
     super(server.getServerName() + "-CatalogJanitor",
@@ -63,7 +64,7 @@ class CatalogJanitor extends Chore {
   @Override
   protected boolean initialChore() {
     try {
-      scan();
+      if (this.enabled) scan();
     } catch (IOException e) {
       LOG.warn("Failed initial scan of catalog table", e);
       return false;
@@ -71,6 +72,13 @@ class CatalogJanitor extends Chore {
     return true;
   }
 
+  /**
+   * @param enabled
+   */
+  public void setEnabled(final boolean enabled) {
+    this.enabled = enabled;
+  }
+
   @Override
   protected void chore() {
     try {

Modified: 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: 
http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1056484&r1=1056483&r2=1056484&view=diff
==============================================================================
--- 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java 
(original)
+++ 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java 
Fri Jan  7 20:17:38 2011
@@ -703,6 +703,16 @@ implements HMasterInterface, HMasterRegi
     return oldValue;
   }
 
+  /**
+   * Switch for the background {...@link CatalogJanitor} thread.
+   * Used for testing.  The thread will continue to run.  It will just be a 
noop
+   * if disabled.
+   * @param b If false, the catalog janitor won't do anything.
+   */
+  public void setCatalogJanitorEnabled(final boolean b) {
+    ((CatalogJanitor)this.catalogJanitorChore).setEnabled(b);
+  }
+
   @Override
   public void move(final byte[] encodedRegionName, final byte[] destServerName)
   throws UnknownRegionException {

Modified: 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
URL: 
http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java?rev=1056484&r1=1056483&r2=1056484&view=diff
==============================================================================
--- 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
 (original)
+++ 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
 Fri Jan  7 20:17:38 2011
@@ -28,7 +28,6 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HServerAddress;
 import org.apache.hadoop.hbase.HServerInfo;
 import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.catalog.CatalogTracker;
@@ -37,11 +36,11 @@ import org.apache.hadoop.hbase.catalog.M
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.executor.EventHandler;
 import org.apache.hadoop.hbase.master.AssignmentManager;
+import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
 import org.apache.hadoop.hbase.master.DeadServer;
 import org.apache.hadoop.hbase.master.MasterServices;
 import org.apache.hadoop.hbase.master.ServerManager;
-import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
-import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Writables;
 import org.apache.zookeeper.KeeperException;
 
@@ -210,18 +209,95 @@ public class ServerShutdownHandler exten
       final AssignmentManager assignmentManager,
       final CatalogTracker catalogTracker)
   throws IOException {
-    byte [] bytes = result.getValue(HConstants.CATALOG_FAMILY, qualifier);
-    if (bytes == null || bytes.length <= 0) return;
-    HRegionInfo hri = Writables.getHRegionInfoOrNull(bytes);
-    if (hri == null) return;
-    Pair<HRegionInfo, HServerAddress> pair =
-      MetaReader.getRegion(catalogTracker, hri.getRegionName());
-    if (pair == null || pair.getFirst() == null) {
-      LOG.info("Fixup; missing daughter " + hri.getEncodedName());
-      MetaEditor.addDaughter(catalogTracker, hri, null);
-      assignmentManager.assign(hri, true);
+    HRegionInfo daughter = getHRegionInfo(result, qualifier);
+    if (daughter == null) return;
+    if (isDaughterMissing(catalogTracker, daughter)) {
+      LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString());
+      MetaEditor.addDaughter(catalogTracker, daughter, null);
+      // And assign it.
+      assignmentManager.assign(daughter, true);
     } else {
-      LOG.debug("Daughter " + hri.getRegionNameAsString() + " present");
+      LOG.debug("Daughter " + daughter.getRegionNameAsString() + " present");
+    }
+  }
+
+  /**
+   * Interpret the content of the cell at {...@link HConstants#CATALOG_FAMILY} 
and
+   * <code>qualifier</code> as an HRegionInfo and return it, or null.
+   * @param r Result instance to pull from.
+   * @param qualifier Column family qualifier
+   * @return An HRegionInfo instance or null.
+   * @throws IOException
+   */
+  private static HRegionInfo getHRegionInfo(final Result r, byte [] qualifier)
+  throws IOException {
+    byte [] bytes = r.getValue(HConstants.CATALOG_FAMILY, qualifier);
+    if (bytes == null || bytes.length <= 0) return null;
+    return Writables.getHRegionInfoOrNull(bytes);
+  }
+
+  /**
+   * Look for presence of the daughter OR of a split of the daughter. Daughter
+   * could have been split over on regionserver before a run of the
+   * catalogJanitor had chance to clear reference from parent.
+   * @param daughter Daughter region to search for.
+   * @throws IOException 
+   */
+  private static boolean isDaughterMissing(final CatalogTracker catalogTracker,
+      final HRegionInfo daughter) throws IOException {
+    FindDaughterVisitor visitor = new FindDaughterVisitor(daughter);
+    // Start the scan at what should be the daughter's row in the .META.
+    // We will either 1., find the daughter or some derivative split of the
+    // daughter (will have same table name and start row at least but will sort
+    // after because has larger regionid -- the regionid is timestamp of region
+    // creation), OR, we will not find anything with same table name and start
+    // row.  If the latter, then assume daughter missing and do fixup.
+    byte [] startrow = daughter.getRegionName();
+    MetaReader.fullScan(catalogTracker, visitor, startrow);
+    return !visitor.foundDaughter();
+  }
+
+  /**
+   * Looks for daughter.  Sets a flag if daughter or some progeny of daughter
+   * is found up in <code>.META.</code>.
+   */
+  static class FindDaughterVisitor implements MetaReader.Visitor {
+    private final HRegionInfo daughter;
+    private boolean found = false;
+
+    FindDaughterVisitor(final HRegionInfo daughter) {
+      this.daughter = daughter;
+    }
+
+    /**
+     * @return True if we found a daughter region during our visiting.
+     */
+    boolean foundDaughter() {
+      return this.found;
+    }
+
+    @Override
+    public boolean visit(Result r) throws IOException {
+      HRegionInfo hri = getHRegionInfo(r, HConstants.REGIONINFO_QUALIFIER);
+      if (hri == null) {
+        LOG.warn("No serialized HRegionInfo in " + r);
+        return true;
+      }
+      // Now see if we have gone beyond the daughter's startrow.
+      if (!Bytes.equals(daughter.getTableDesc().getName(),
+          hri.getTableDesc().getName())) {
+        // We fell into another table.  Stop scanning.
+        return false;
+      }
+      // If our start rows do not compare, move on.
+      if (!Bytes.equals(daughter.getStartKey(), hri.getStartKey())) {
+        return false;
+      }
+      // Else, table name and start rows compare.  It means that the daughter
+      // or some derivative split of the daughter is up in .META.  Daughter
+      // exists.
+      this.found = true;
+      return false;
     }
   }
-}
+}
\ No newline at end of file


Reply via email to