Author: stack
Date: Fri Jan 7 20:17:38 2011
New Revision: 1056484
URL: http://svn.apache.org/viewvc?rev=1056484&view=rev
Log:
HBASE-3403 Region orphaned after failure during split
Modified:
hbase/branches/0.90/CHANGES.txt
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
Modified: hbase/branches/0.90/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1056484&r1=1056483&r2=1056484&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Fri Jan 7 20:17:38 2011
@@ -786,6 +786,7 @@ Release 0.90.0 - Unreleased
HBASE-3401 Region IPC operations should be high priority
HBASE-3418 Increment operations can break when qualifiers are split
between memstore/snapshot and storefiles
+ HBASE-3403 Region orphaned after failure during split
IMPROVEMENTS
Modified:
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java?rev=1056484&r1=1056483&r2=1056484&view=diff
==============================================================================
---
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java
(original)
+++
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java
Fri Jan 7 20:17:38 2011
@@ -77,10 +77,6 @@ public class MetaEditor {
copyOfParent.setSplit(true);
Put put = new Put(copyOfParent.getRegionName());
addRegionInfo(put, copyOfParent);
- put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
- HConstants.EMPTY_BYTE_ARRAY);
- put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
- HConstants.EMPTY_BYTE_ARRAY);
put.add(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER,
Writables.getBytes(a));
put.add(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER,
Modified:
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java?rev=1056484&r1=1056483&r2=1056484&view=diff
==============================================================================
---
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
(original)
+++
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
Fri Jan 7 20:17:38 2011
@@ -234,9 +234,28 @@ public class MetaReader {
public static void fullScan(CatalogTracker catalogTracker,
final Visitor visitor)
throws IOException {
+ fullScan(catalogTracker, visitor, null);
+ }
+
+ /**
+ * Performs a full scan of <code>.META.</code>.
+ * <p>
+ * Returns a map of every region to it's currently assigned server, according
+ * to META. If the region does not have an assignment it will have a null
+ * value in the map.
+ * @param catalogTracker
+ * @param visitor
+ * @param startrow Where to start the scan. Pass null if want to begin scan
+ * at first row.
+ * @throws IOException
+ */
+ public static void fullScan(CatalogTracker catalogTracker,
+ final Visitor visitor, final byte [] startrow)
+ throws IOException {
HRegionInterface metaServer =
catalogTracker.waitForMetaServerConnectionDefault();
Scan scan = new Scan();
+ if (startrow != null) scan.setStartRow(startrow);
scan.addFamily(HConstants.CATALOG_FAMILY);
long scannerid = metaServer.openScanner(
HRegionInfo.FIRST_META_REGIONINFO.getRegionName(), scan);
Modified:
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java?rev=1056484&r1=1056483&r2=1056484&view=diff
==============================================================================
---
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
(original)
+++
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
Fri Jan 7 20:17:38 2011
@@ -51,6 +51,7 @@ class CatalogJanitor extends Chore {
private static final Log LOG =
LogFactory.getLog(CatalogJanitor.class.getName());
private final Server server;
private final MasterServices services;
+ private boolean enabled = true;
CatalogJanitor(final Server server, final MasterServices services) {
super(server.getServerName() + "-CatalogJanitor",
@@ -63,7 +64,7 @@ class CatalogJanitor extends Chore {
@Override
protected boolean initialChore() {
try {
- scan();
+ if (this.enabled) scan();
} catch (IOException e) {
LOG.warn("Failed initial scan of catalog table", e);
return false;
@@ -71,6 +72,13 @@ class CatalogJanitor extends Chore {
return true;
}
+ /**
+ * @param enabled
+ */
+ public void setEnabled(final boolean enabled) {
+ this.enabled = enabled;
+ }
+
@Override
protected void chore() {
try {
Modified:
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1056484&r1=1056483&r2=1056484&view=diff
==============================================================================
---
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
(original)
+++
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
Fri Jan 7 20:17:38 2011
@@ -703,6 +703,16 @@ implements HMasterInterface, HMasterRegi
return oldValue;
}
+ /**
+ * Switch for the background {...@link CatalogJanitor} thread.
+ * Used for testing. The thread will continue to run. It will just be a
noop
+ * if disabled.
+ * @param b If false, the catalog janitor won't do anything.
+ */
+ public void setCatalogJanitorEnabled(final boolean b) {
+ ((CatalogJanitor)this.catalogJanitorChore).setEnabled(b);
+ }
+
@Override
public void move(final byte[] encodedRegionName, final byte[] destServerName)
throws UnknownRegionException {
Modified:
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java?rev=1056484&r1=1056483&r2=1056484&view=diff
==============================================================================
---
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
(original)
+++
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
Fri Jan 7 20:17:38 2011
@@ -28,7 +28,6 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.catalog.CatalogTracker;
@@ -37,11 +36,11 @@ import org.apache.hadoop.hbase.catalog.M
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.executor.EventHandler;
import org.apache.hadoop.hbase.master.AssignmentManager;
+import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
import org.apache.hadoop.hbase.master.DeadServer;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.ServerManager;
-import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
-import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.zookeeper.KeeperException;
@@ -210,18 +209,95 @@ public class ServerShutdownHandler exten
final AssignmentManager assignmentManager,
final CatalogTracker catalogTracker)
throws IOException {
- byte [] bytes = result.getValue(HConstants.CATALOG_FAMILY, qualifier);
- if (bytes == null || bytes.length <= 0) return;
- HRegionInfo hri = Writables.getHRegionInfoOrNull(bytes);
- if (hri == null) return;
- Pair<HRegionInfo, HServerAddress> pair =
- MetaReader.getRegion(catalogTracker, hri.getRegionName());
- if (pair == null || pair.getFirst() == null) {
- LOG.info("Fixup; missing daughter " + hri.getEncodedName());
- MetaEditor.addDaughter(catalogTracker, hri, null);
- assignmentManager.assign(hri, true);
+ HRegionInfo daughter = getHRegionInfo(result, qualifier);
+ if (daughter == null) return;
+ if (isDaughterMissing(catalogTracker, daughter)) {
+ LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString());
+ MetaEditor.addDaughter(catalogTracker, daughter, null);
+ // And assign it.
+ assignmentManager.assign(daughter, true);
} else {
- LOG.debug("Daughter " + hri.getRegionNameAsString() + " present");
+ LOG.debug("Daughter " + daughter.getRegionNameAsString() + " present");
+ }
+ }
+
+ /**
+ * Interpret the content of the cell at {...@link HConstants#CATALOG_FAMILY}
and
+ * <code>qualifier</code> as an HRegionInfo and return it, or null.
+ * @param r Result instance to pull from.
+ * @param qualifier Column family qualifier
+ * @return An HRegionInfo instance or null.
+ * @throws IOException
+ */
+ private static HRegionInfo getHRegionInfo(final Result r, byte [] qualifier)
+ throws IOException {
+ byte [] bytes = r.getValue(HConstants.CATALOG_FAMILY, qualifier);
+ if (bytes == null || bytes.length <= 0) return null;
+ return Writables.getHRegionInfoOrNull(bytes);
+ }
+
+ /**
+ * Look for presence of the daughter OR of a split of the daughter. Daughter
+ * could have been split over on regionserver before a run of the
+ * catalogJanitor had chance to clear reference from parent.
+ * @param daughter Daughter region to search for.
+ * @throws IOException
+ */
+ private static boolean isDaughterMissing(final CatalogTracker catalogTracker,
+ final HRegionInfo daughter) throws IOException {
+ FindDaughterVisitor visitor = new FindDaughterVisitor(daughter);
+ // Start the scan at what should be the daughter's row in the .META.
+ // We will either 1., find the daughter or some derivative split of the
+ // daughter (will have same table name and start row at least but will sort
+ // after because has larger regionid -- the regionid is timestamp of region
+ // creation), OR, we will not find anything with same table name and start
+ // row. If the latter, then assume daughter missing and do fixup.
+ byte [] startrow = daughter.getRegionName();
+ MetaReader.fullScan(catalogTracker, visitor, startrow);
+ return !visitor.foundDaughter();
+ }
+
+ /**
+ * Looks for daughter. Sets a flag if daughter or some progeny of daughter
+ * is found up in <code>.META.</code>.
+ */
+ static class FindDaughterVisitor implements MetaReader.Visitor {
+ private final HRegionInfo daughter;
+ private boolean found = false;
+
+ FindDaughterVisitor(final HRegionInfo daughter) {
+ this.daughter = daughter;
+ }
+
+ /**
+ * @return True if we found a daughter region during our visiting.
+ */
+ boolean foundDaughter() {
+ return this.found;
+ }
+
+ @Override
+ public boolean visit(Result r) throws IOException {
+ HRegionInfo hri = getHRegionInfo(r, HConstants.REGIONINFO_QUALIFIER);
+ if (hri == null) {
+ LOG.warn("No serialized HRegionInfo in " + r);
+ return true;
+ }
+ // Now see if we have gone beyond the daughter's startrow.
+ if (!Bytes.equals(daughter.getTableDesc().getName(),
+ hri.getTableDesc().getName())) {
+ // We fell into another table. Stop scanning.
+ return false;
+ }
+ // If our start rows do not compare, move on.
+ if (!Bytes.equals(daughter.getStartKey(), hri.getStartKey())) {
+ return false;
+ }
+ // Else, table name and start rows compare. It means that the daughter
+ // or some derivative split of the daughter is up in .META. Daughter
+ // exists.
+ this.found = true;
+ return false;
}
}
-}
+}
\ No newline at end of file