Author: stack
Date: Mon Mar 31 17:09:25 2008
New Revision: 643223
URL: http://svn.apache.org/viewvc?rev=643223&view=rev
Log:
HBASE-505 Region assignments should never time out so long as the region
server reports that it is processing the open request
This is patch reviewed with Jim but with the number of edits between
reports made into a configurable.
Have the HRegionServer pass down a Progressable implementation down into
Region and then down int Store where edits are replayed. Call progress
after every couple of thousand edits.
M src/java/org/apache/hadoop/hbase/HStore.java
Take a Progessable in the constructor. Call it when applying edits.
M src/java/org/apache/hadoop/hbase/HMaster.java
Update commment around MSG_REPORT_PROCESS_OPEN so its expected
that we can get more than one of these messages during a region open.
M src/java/org/apache/hadoop/hbase/HRegion.java
New constructor that takes a Progressable. Pass it to Stores on construction.
M src/java/org/apache/hadoop/hbase/HRegionServer.java
On open of a region, pass in a Progressable that adds a
MSG_REPORT_PROCESS_OPEN every time its called.
Modified:
hadoop/hbase/branches/0.1/CHANGES.txt
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegion.java
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java
hadoop/hbase/trunk/CHANGES.txt
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
Modified: hadoop/hbase/branches/0.1/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/CHANGES.txt?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.1/CHANGES.txt Mon Mar 31 17:09:25 2008
@@ -1,11 +1,11 @@
HBase Change Log
-
BUG FIXES
HBASE-550 EOF trying to read reconstruction log stops region deployment
HBASE-551 Master stuck splitting server logs in shutdown loop; on each
iteration, edits are aggregated up into the millions
-
+ HBASE-505 Region assignments should never time out so long as the region
+ server reports that it is processing the open request
Release 0.1.0
INCOMPATIBLE CHANGES
Modified:
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java
URL:
http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java
(original)
+++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java Mon
Mar 31 17:09:25 2008
@@ -1567,10 +1567,12 @@
case HMsg.MSG_REPORT_PROCESS_OPEN:
synchronized (unassignedRegions) {
- // Region server has acknowledged request to open region.
+ // Region server is reporting in that its working on region open
+ // (We can get more than one of these messages if region is replaying
+ // a bunch of edits and taking a while to open).
// Extend region open time by max region open time.
- unassignedRegions.put(region,
- System.currentTimeMillis() + this.maxRegionOpenTime);
+ this.unassignedRegions.put(region,
+ Long.valueOf(System.currentTimeMillis() + this.maxRegionOpenTime));
}
break;
Modified:
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegion.java
URL:
http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegion.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegion.java
(original)
+++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegion.java Mon
Mar 31 17:09:25 2008
@@ -47,6 +47,7 @@
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.StringUtils;
/**
@@ -356,12 +357,41 @@
* @param initialFiles If there are initial files (implying that the HRegion
* is new), then read them from the supplied path.
* @param listener an object that implements CacheFlushListener or null
- *
* @throws IOException
*/
public HRegion(Path basedir, HLog log, FileSystem fs, HBaseConfiguration
conf,
HRegionInfo regionInfo, Path initialFiles, CacheFlushListener listener)
throws IOException {
+ this(basedir, log, fs, conf, regionInfo, initialFiles, listener, null);
+ }
+
+ /**
+ * HRegion constructor.
+ *
+ * @param log The HLog is the outbound log for any updates to the HRegion
+ * (There's a single HLog for all the HRegions on a single HRegionServer.)
+ * The log file is a logfile from the previous execution that's
+ * custom-computed for this HRegion. The HRegionServer computes and sorts the
+ * appropriate log info for this HRegion. If there is a previous log file
+ * (implying that the HRegion has been written-to before), then read it from
+ * the supplied path.
+ * @param basedir qualified path of directory where region should be located,
+ * usually the table directory.
+ * @param fs is the filesystem.
+ * @param conf is global configuration settings.
+ * @param regionInfo - HRegionInfo that describes the region
+ * @param initialFiles If there are initial files (implying that the HRegion
+ * is new), then read them from the supplied path.
+ * @param listener an object that implements CacheFlushListener or null
+ * @param reporter Call on a period so hosting server can report we're
+ * making progress to master -- otherwise master might think region deploy
+ * failed. Can be null.
+ * @throws IOException
+ */
+ public HRegion(Path basedir, HLog log, FileSystem fs, HBaseConfiguration
conf,
+ HRegionInfo regionInfo, Path initialFiles, CacheFlushListener listener,
+ final Progressable reporter)
+ throws IOException {
this.basedir = basedir;
this.log = log;
@@ -385,12 +415,9 @@
long maxSeqId = -1;
for(HColumnDescriptor c :
this.regionInfo.getTableDesc().families().values()) {
-
HStore store = new HStore(this.basedir, this.regionInfo, c, this.fs,
- oldLogFile, this.conf);
-
+ oldLogFile, this.conf, reporter);
stores.put(c.getFamilyName(), store);
-
long storeSeqId = store.getMaxSequenceId();
if (storeSeqId > maxSeqId) {
maxSeqId = storeSeqId;
@@ -406,7 +433,7 @@
this.minSequenceId = maxSeqId;
if (LOG.isDebugEnabled()) {
LOG.debug("Next sequence id for region " + regionInfo.getRegionName() +
- " is " + this.minSequenceId);
+ " is " + this.minSequenceId);
}
// Get rid of any splits or merges that were lost in-progress
Modified:
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java
URL:
http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
---
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java
(original)
+++
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java
Mon Mar 31 17:09:25 2008
@@ -65,6 +65,7 @@
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.net.DNS;
+import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.StringUtils;
/**
@@ -1160,7 +1161,13 @@
HTableDescriptor.getTableDir(rootDir,
regionInfo.getTableDesc().getName()
),
- this.log, this.fs, conf, regionInfo, null, this.cacheFlusher
+ this.log, this.fs, conf, regionInfo, null, this.cacheFlusher,
+ new Progressable() {
+ public void progress() {
+ getOutboundMsgs().add(new HMsg(HMsg.MSG_REPORT_PROCESS_OPEN,
+ regionInfo));
+ }
+ }
);
// Startup a compaction early if one is needed.
this.compactSplitThread.compactionRequested(region);
@@ -1677,11 +1684,18 @@
}
throw new IOException("Unknown protocol to name node: " + protocol);
}
-
+
+ /**
+ * @return Queue to which you can add outbound messages.
+ */
+ protected List<HMsg> getOutboundMsgs() {
+ return this.outboundMsgs;
+ }
+
//
// Main program and support routines
//
-
+
private static void printUsageAndExit() {
printUsageAndExit(null);
}
Modified: hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java
URL:
http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java
(original)
+++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java Mon
Mar 31 17:09:25 2008
@@ -50,6 +50,7 @@
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.StringUtils;
import org.onelab.filter.BloomFilter;
import org.onelab.filter.CountingBloomFilter;
@@ -692,10 +693,14 @@
* @param fs file system object
* @param reconstructionLog existing log file to apply if any
* @param conf configuration object
+ * @param reporter Call on a period so hosting server can report we're
+ * making progress to master -- otherwise master might think region deploy
+ * failed. Can be null.
* @throws IOException
*/
HStore(Path basedir, HRegionInfo info, HColumnDescriptor family,
- FileSystem fs, Path reconstructionLog, HBaseConfiguration conf)
+ FileSystem fs, Path reconstructionLog, HBaseConfiguration conf,
+ final Progressable reporter)
throws IOException {
this.basedir = basedir;
@@ -769,7 +774,7 @@
}
try {
- doReconstructionLog(reconstructionLog, maxSeqId);
+ doReconstructionLog(reconstructionLog, maxSeqId, reporter);
} catch (IOException e) {
// Presume we got here because of some HDFS issue or because of a lack of
// HADOOP-1700; for now keep going but this is probably not what we want
@@ -832,7 +837,7 @@
* reflected in the MapFiles.)
*/
private void doReconstructionLog(final Path reconstructionLog,
- final long maxSeqID)
+ final long maxSeqID, final Progressable reporter)
throws UnsupportedEncodingException, IOException {
if (reconstructionLog == null || !fs.exists(reconstructionLog)) {
@@ -857,6 +862,8 @@
HLogEdit val = new HLogEdit();
long skippedEdits = 0;
long editsCount = 0;
+ // How many edits to apply before we send a progress report.
+ int reportInterval =
this.conf.getInt("hbase.hstore.report.interval.edits", 2000);
while (logReader.next(key, val)) {
maxSeqIdInLog = Math.max(maxSeqIdInLog, key.getLogSeqNum());
if (key.getLogSeqNum() <= maxSeqID) {
@@ -874,6 +881,11 @@
HStoreKey k = new HStoreKey(key.getRow(), column, val.getTimestamp());
reconstructedCache.put(k, val.getVal());
editsCount++;
+ // Every 2k edits, tell the reporter we're making progress.
+ // Have seen 60k edits taking 3minutes to complete.
+ if (reporter != null && (editsCount % reportInterval) == 0) {
+ reporter.progress();
+ }
}
if (LOG.isDebugEnabled()) {
LOG.debug("Applied " + editsCount + ", skipped " + skippedEdits +
Modified: hadoop/hbase/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Mon Mar 31 17:09:25 2008
@@ -4,6 +4,8 @@
HBASE-550 EOF trying to read reconstruction log stops region deployment
HBASE-551 Master stuck splitting server logs in shutdown loop; on each
iteration, edits are aggregated up into the millions
+ HBASE-505 Region assignments should never time out so long as the region
+ server reports that it is processing the open request
Release 0.1.0
Modified:
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL:
http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
---
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
(original)
+++
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
Mon Mar 31 17:09:25 2008
@@ -494,10 +494,12 @@
/** Update the deadline for a region assignment to be completed */
public void updateAssignmentDeadline(HRegionInfo info) {
synchronized (unassignedRegions) {
- // Region server has acknowledged request to open region.
+ // Region server is reporting in that its working on region open
+ // (We can get more than one of these messages if region is replaying
+ // a bunch of edits and taking a while to open).
// Extend region open time by max region open time.
- unassignedRegions.put(info,
- System.currentTimeMillis() + master.maxRegionOpenTime);
+ this.unassignedRegions.put(info,
+ Long.valueOf(System.currentTimeMillis() +
this.master.maxRegionOpenTime));
}
}
Modified:
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL:
http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
---
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
(original)
+++
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
Mon Mar 31 17:09:25 2008
@@ -49,6 +49,7 @@
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.hbase.HConstants;
@@ -373,12 +374,41 @@
* @param initialFiles If there are initial files (implying that the HRegion
* is new), then read them from the supplied path.
* @param listener an object that implements CacheFlushListener or null
- *
* @throws IOException
*/
public HRegion(Path basedir, HLog log, FileSystem fs, HBaseConfiguration
conf,
HRegionInfo regionInfo, Path initialFiles, CacheFlushListener listener)
throws IOException {
+ this(basedir, log, fs, conf, regionInfo, initialFiles, listener, null);
+ }
+
+ /**
+ * HRegion constructor.
+ *
+ * @param log The HLog is the outbound log for any updates to the HRegion
+ * (There's a single HLog for all the HRegions on a single HRegionServer.)
+ * The log file is a logfile from the previous execution that's
+ * custom-computed for this HRegion. The HRegionServer computes and sorts the
+ * appropriate log info for this HRegion. If there is a previous log file
+ * (implying that the HRegion has been written-to before), then read it from
+ * the supplied path.
+ * @param basedir qualified path of directory where region should be located,
+ * usually the table directory.
+ * @param fs is the filesystem.
+ * @param conf is global configuration settings.
+ * @param regionInfo - HRegionInfo that describes the region
+ * @param initialFiles If there are initial files (implying that the HRegion
+ * is new), then read them from the supplied path.
+ * @param listener an object that implements CacheFlushListener or null
+ * @param reporter Call on a period so hosting server can report we're
+ * making progress to master -- otherwise master might think region deploy
+ * failed. Can be null.
+ * @throws IOException
+ */
+ public HRegion(Path basedir, HLog log, FileSystem fs, HBaseConfiguration
conf,
+ HRegionInfo regionInfo, Path initialFiles, CacheFlushListener listener,
+ final Progressable reporter)
+ throws IOException {
this.basedir = basedir;
this.log = log;
@@ -402,12 +432,9 @@
long maxSeqId = -1;
for(HColumnDescriptor c :
this.regionInfo.getTableDesc().families().values()) {
-
HStore store = new HStore(this.basedir, this.regionInfo, c, this.fs,
- oldLogFile, this.conf);
-
+ oldLogFile, this.conf, reporter);
stores.put(c.getFamilyName(), store);
-
long storeSeqId = store.getMaxSequenceId();
if (storeSeqId > maxSeqId) {
maxSeqId = storeSeqId;
@@ -423,7 +450,7 @@
this.minSequenceId = maxSeqId;
if (LOG.isDebugEnabled()) {
LOG.debug("Next sequence id for region " + regionInfo.getRegionName() +
- " is " + this.minSequenceId);
+ " is " + this.minSequenceId);
}
// Get rid of any splits or merges that were lost in-progress
Modified:
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL:
http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
---
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
(original)
+++
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Mon Mar 31 17:09:25 2008
@@ -82,6 +82,7 @@
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.net.DNS;
+import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.StringUtils;
/**
@@ -785,7 +786,13 @@
HTableDescriptor.getTableDir(rootDir,
regionInfo.getTableDesc().getName()
),
- this.log, this.fs, conf, regionInfo, null, this.cacheFlusher
+ this.log, this.fs, conf, regionInfo, null, this.cacheFlusher,
+ new Progressable() {
+ public void progress() {
+ getOutboundMsgs().add(new HMsg(HMsg.MSG_REPORT_PROCESS_OPEN,
+ regionInfo));
+ }
+ }
);
// Startup a compaction early if one is needed.
this.compactSplitThread.compactionRequested(region);
@@ -1336,11 +1343,18 @@
}
throw new IOException("Unknown protocol to name node: " + protocol);
}
-
+
+ /**
+ * @return Queue to which you can add outbound messages.
+ */
+ protected List<HMsg> getOutboundMsgs() {
+ return this.outboundMsgs;
+ }
+
//
// Main program and support routines
//
-
+
private static void printUsageAndExit() {
printUsageAndExit(null);
}
Modified:
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
URL:
http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
---
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
(original)
+++
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
Mon Mar 31 17:09:25 2008
@@ -54,6 +54,7 @@
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.hbase.BloomFilterDescriptor;
import org.onelab.filter.BloomFilter;
@@ -159,10 +160,14 @@
* @param fs file system object
* @param reconstructionLog existing log file to apply if any
* @param conf configuration object
+ * @param reporter Call on a period so hosting server can report we're
+ * making progress to master -- otherwise master might think region deploy
+ * failed. Can be null.
* @throws IOException
*/
HStore(Path basedir, HRegionInfo info, HColumnDescriptor family,
- FileSystem fs, Path reconstructionLog, HBaseConfiguration conf)
+ FileSystem fs, Path reconstructionLog, HBaseConfiguration conf,
+ final Progressable reporter)
throws IOException {
this.basedir = basedir;
this.info = info;
@@ -235,7 +240,7 @@
}
try {
- doReconstructionLog(reconstructionLog, maxSeqId);
+ doReconstructionLog(reconstructionLog, maxSeqId, reporter);
} catch (IOException e) {
// Presume we got here because of some HDFS issue or because of a lack of
// HADOOP-1700; for now keep going but this is probably not what we want
@@ -308,7 +313,7 @@
* reflected in the MapFiles.)
*/
private void doReconstructionLog(final Path reconstructionLog,
- final long maxSeqID)
+ final long maxSeqID, final Progressable reporter)
throws UnsupportedEncodingException, IOException {
if (reconstructionLog == null || !fs.exists(reconstructionLog)) {
// Nothing to do.
@@ -332,6 +337,8 @@
HLogEdit val = new HLogEdit();
long skippedEdits = 0;
long editsCount = 0;
+ // How many edits to apply before we send a progress report.
+ int reportInterval =
this.conf.getInt("hbase.hstore.report.interval.edits", 2000);
while (logReader.next(key, val)) {
maxSeqIdInLog = Math.max(maxSeqIdInLog, key.getLogSeqNum());
if (key.getLogSeqNum() <= maxSeqID) {
@@ -349,6 +356,11 @@
HStoreKey k = new HStoreKey(key.getRow(), column, val.getTimestamp());
reconstructedCache.put(k, val.getVal());
editsCount++;
+ // Every 2k edits, tell the reporter we're making progress.
+ // Have seen 60k edits taking 3minutes to complete.
+ if (reporter != null && (editsCount % reportInterval) == 0) {
+ reporter.progress();
+ }
}
if (LOG.isDebugEnabled()) {
LOG.debug("Applied " + editsCount + ", skipped " + skippedEdits +