Author: jdcryans
Date: Fri Jul 10 14:58:26 2009
New Revision: 792964
URL: http://svn.apache.org/viewvc?rev=792964&view=rev
Log:
HBASE-698 HLog recovery is not performed after master failure
Modified:
hadoop/hbase/trunk/CHANGES.txt
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
Modified: hadoop/hbase/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=792964&r1=792963&r2=792964&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Fri Jul 10 14:58:26 2009
@@ -458,6 +458,7 @@
HBASE-1637 Delete client class methods should return itself like Put, Get,
Scan (Jon Gray via Nitay)
HBASE-1640 Allow passing arguments to jruby script run when run by
bin/hbase shell
+ HBASE-698 HLog recovery is not performed after master failure
OPTIMIZATIONS
HBASE-1412 Change values for delete column and column family in KeyValue
Modified:
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java
URL:
http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java?rev=792964&r1=792963&r2=792964&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java
(original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java
Fri Jul 10 14:58:26 2009
@@ -42,7 +42,6 @@
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
-import org.apache.hadoop.hbase.regionserver.HLog;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.Store;
import org.apache.hadoop.hbase.regionserver.StoreFile;
@@ -372,33 +371,6 @@
" is not valid; " + " Server '" + serverAddress + "' startCode: " +
startCode + " unknown.");
}
-
- // Recover the region server's log if there is one.
- // This is only done from here if we are restarting and there is stale
- // data in the meta region. Once we are on-line, dead server log
- // recovery is handled by lease expiration and ProcessServerShutdown
- if (!this.master.regionManager.isInitialMetaScanComplete() &&
- serverName != null) {
- Path logDir =
- new Path(this.master.rootdir,
HLog.getHLogDirectoryName(serverName));
- try {
- if (master.fs.exists(logDir)) {
- this.master.regionManager.splitLogLock.lock();
- try {
- HLog.splitLog(master.rootdir, logDir, master.fs,
- master.getConfiguration());
- } finally {
- this.master.regionManager.splitLogLock.unlock();
- }
- }
- if (LOG.isDebugEnabled()) {
- LOG.debug("Split " + logDir.toString());
- }
- } catch (IOException e) {
- LOG.warn("unable to split region server log because: ", e);
- throw e;
- }
- }
// Now get the region assigned
this.master.regionManager.setUnassigned(info, true);
}
Modified:
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
URL:
http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java?rev=792964&r1=792963&r2=792964&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
(original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java Fri
Jul 10 14:58:26 2009
@@ -38,6 +38,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ClusterStatus;
@@ -71,6 +72,7 @@
import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
+import org.apache.hadoop.hbase.regionserver.HLog;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
@@ -543,6 +545,7 @@
// Check if this is a fresh start of the cluster
if(addresses.size() == 0) {
LOG.debug("This is a fresh start, proceeding with normal startup");
+ splitLogAfterStartup();
return;
}
LOG.info("This is a failover, ZK inspection begins...");
@@ -579,10 +582,45 @@
}
LOG.info("Inspection found " + assignedRegions.size() + " regions, " +
(isRootRegionAssigned ? "with -ROOT-" : "but -ROOT- was MIA"));
+ splitLogAfterStartup();
} catch(IOException ex) {
ex.printStackTrace();
}
}
+
+ /**
+ * Inspect the log directory to recover any log file without
+ * and active region server.
+ * @throws IOException
+ */
+ private void splitLogAfterStartup() throws IOException {
+ Path logsDirPath =
+ new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME);
+ FileStatus [] logFolders = this.fs.listStatus(logsDirPath);
+ if (logFolders == null || logFolders.length == 0) {
+ LOG.debug("No log files to split, proceeding...");
+ return;
+ }
+ for (FileStatus status : logFolders) {
+ String serverName = status.getPath().getName();
+ LOG.info("Found log folder : " + serverName);
+ if(this.serverManager.getServerInfo(serverName) == null) {
+ LOG.info("Log folder doesn't belong " +
+ "to a known region server, splitting");
+ this.regionManager.splitLogLock.lock();
+ Path logDir =
+ new Path(this.rootdir, HLog.getHLogDirectoryName(serverName));
+ try {
+ HLog.splitLog(this.rootdir, logDir, this.fs,
+ getConfiguration());
+ } finally {
+ this.regionManager.splitLogLock.unlock();
+ }
+ } else {
+ LOG.info("Log folder belongs to an existing region server");
+ }
+ }
+ }
/*
* Start up all services. If any of these threads gets an unhandled exception