Author: stack
Date: Mon Mar 31 17:09:25 2008
New Revision: 643223

URL: http://svn.apache.org/viewvc?rev=643223&view=rev
Log:
HBASE-505 Region assignments should never time out so long as the region
server reports that it is processing the open request

This is patch reviewed with Jim but with the number of edits between
reports made into a configurable.

Have the HRegionServer pass down a Progressable implementation down into
Region and then down int Store where edits are replayed. Call progress
after every couple of thousand edits.

M src/java/org/apache/hadoop/hbase/HStore.java
  Take a Progessable in the constructor. Call it when applying edits.
M src/java/org/apache/hadoop/hbase/HMaster.java
  Update commment around MSG_REPORT_PROCESS_OPEN so its expected
  that we can get more than one of these messages during a region open.
M src/java/org/apache/hadoop/hbase/HRegion.java
  New constructor that takes a Progressable. Pass it to Stores on construction.
M src/java/org/apache/hadoop/hbase/HRegionServer.java
  On open of a region, pass in a Progressable that adds a
MSG_REPORT_PROCESS_OPEN every time its called.

Modified:
    hadoop/hbase/branches/0.1/CHANGES.txt
    hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java
    hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegion.java
    
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java
    hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java
    hadoop/hbase/trunk/CHANGES.txt
    
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
    
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java

Modified: hadoop/hbase/branches/0.1/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/CHANGES.txt?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.1/CHANGES.txt Mon Mar 31 17:09:25 2008
@@ -1,11 +1,11 @@
 HBase Change Log
 
-
   BUG FIXES
    HBASE-550   EOF trying to read reconstruction log stops region deployment
    HBASE-551   Master stuck splitting server logs in shutdown loop; on each
                iteration, edits are aggregated up into the millions
-
+   HBASE-505   Region assignments should never time out so long as the region
+               server reports that it is processing the open request
 Release 0.1.0
 
   INCOMPATIBLE CHANGES

Modified: 
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java
URL: 
http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java 
(original)
+++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java Mon 
Mar 31 17:09:25 2008
@@ -1567,10 +1567,12 @@
 
       case HMsg.MSG_REPORT_PROCESS_OPEN:
         synchronized (unassignedRegions) {
-          // Region server has acknowledged request to open region.
+          // Region server is reporting in that its working on region open
+          // (We can get more than one of these messages if region is replaying
+          // a bunch of edits and taking a while to open).
           // Extend region open time by max region open time.
-          unassignedRegions.put(region,
-              System.currentTimeMillis() + this.maxRegionOpenTime);
+          this.unassignedRegions.put(region,
+            Long.valueOf(System.currentTimeMillis() + this.maxRegionOpenTime));
         }
         break;
         

Modified: 
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegion.java
URL: 
http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegion.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegion.java 
(original)
+++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegion.java Mon 
Mar 31 17:09:25 2008
@@ -47,6 +47,7 @@
 import org.apache.hadoop.hbase.util.Writables;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.StringUtils;
 
 /**
@@ -356,12 +357,41 @@
    * @param initialFiles If there are initial files (implying that the HRegion
    * is new), then read them from the supplied path.
    * @param listener an object that implements CacheFlushListener or null
-   * 
    * @throws IOException
    */
   public HRegion(Path basedir, HLog log, FileSystem fs, HBaseConfiguration 
conf, 
       HRegionInfo regionInfo, Path initialFiles, CacheFlushListener listener)
     throws IOException {
+    this(basedir, log, fs, conf, regionInfo, initialFiles, listener, null);
+  }
+  
+  /**
+   * HRegion constructor.
+   *
+   * @param log The HLog is the outbound log for any updates to the HRegion
+   * (There's a single HLog for all the HRegions on a single HRegionServer.)
+   * The log file is a logfile from the previous execution that's
+   * custom-computed for this HRegion. The HRegionServer computes and sorts the
+   * appropriate log info for this HRegion. If there is a previous log file
+   * (implying that the HRegion has been written-to before), then read it from
+   * the supplied path.
+   * @param basedir qualified path of directory where region should be located,
+   * usually the table directory.
+   * @param fs is the filesystem.  
+   * @param conf is global configuration settings.
+   * @param regionInfo - HRegionInfo that describes the region
+   * @param initialFiles If there are initial files (implying that the HRegion
+   * is new), then read them from the supplied path.
+   * @param listener an object that implements CacheFlushListener or null
+   * @param reporter Call on a period so hosting server can report we're
+   * making progress to master -- otherwise master might think region deploy
+   * failed.  Can be null.
+   * @throws IOException
+   */
+  public HRegion(Path basedir, HLog log, FileSystem fs, HBaseConfiguration 
conf, 
+      HRegionInfo regionInfo, Path initialFiles, CacheFlushListener listener,
+      final Progressable reporter)
+    throws IOException {
     
     this.basedir = basedir;
     this.log = log;
@@ -385,12 +415,9 @@
     long maxSeqId = -1;
     for(HColumnDescriptor c :
       this.regionInfo.getTableDesc().families().values()) {
-
       HStore store = new HStore(this.basedir, this.regionInfo, c, this.fs,
-          oldLogFile, this.conf);
-
+        oldLogFile, this.conf, reporter);
       stores.put(c.getFamilyName(), store);
-
       long storeSeqId = store.getMaxSequenceId();
       if (storeSeqId > maxSeqId) {
         maxSeqId = storeSeqId;
@@ -406,7 +433,7 @@
     this.minSequenceId = maxSeqId;
     if (LOG.isDebugEnabled()) {
       LOG.debug("Next sequence id for region " + regionInfo.getRegionName() +
-          " is " + this.minSequenceId);
+        " is " + this.minSequenceId);
     }
 
     // Get rid of any splits or merges that were lost in-progress

Modified: 
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java
URL: 
http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- 
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java 
(original)
+++ 
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HRegionServer.java 
Mon Mar 31 17:09:25 2008
@@ -65,6 +65,7 @@
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.net.DNS;
+import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.StringUtils;
 
 /**
@@ -1160,7 +1161,13 @@
             HTableDescriptor.getTableDir(rootDir,
                 regionInfo.getTableDesc().getName()
             ),
-            this.log, this.fs, conf, regionInfo, null, this.cacheFlusher
+            this.log, this.fs, conf, regionInfo, null, this.cacheFlusher,
+            new Progressable() {
+              public void progress() {
+                getOutboundMsgs().add(new HMsg(HMsg.MSG_REPORT_PROCESS_OPEN,
+                  regionInfo));
+              }
+            }
         );
         // Startup a compaction early if one is needed.
         this.compactSplitThread.compactionRequested(region);
@@ -1677,11 +1684,18 @@
     }
     throw new IOException("Unknown protocol to name node: " + protocol);
   }
-
+  
+  /**
+   * @return Queue to which you can add outbound messages.
+   */
+  protected List<HMsg> getOutboundMsgs() {
+    return this.outboundMsgs;
+  }
+  
   //
   // Main program and support routines
   //
-  
+
   private static void printUsageAndExit() {
     printUsageAndExit(null);
   }

Modified: hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java
URL: 
http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java 
(original)
+++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java Mon 
Mar 31 17:09:25 2008
@@ -50,6 +50,7 @@
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.StringUtils;
 import org.onelab.filter.BloomFilter;
 import org.onelab.filter.CountingBloomFilter;
@@ -692,10 +693,14 @@
    * @param fs file system object
    * @param reconstructionLog existing log file to apply if any
    * @param conf configuration object
+   * @param reporter Call on a period so hosting server can report we're
+   * making progress to master -- otherwise master might think region deploy
+   * failed.  Can be null.
    * @throws IOException
    */
   HStore(Path basedir, HRegionInfo info, HColumnDescriptor family,
-      FileSystem fs, Path reconstructionLog, HBaseConfiguration conf)
+      FileSystem fs, Path reconstructionLog, HBaseConfiguration conf,
+      final Progressable reporter)
       throws IOException {  
     
     this.basedir = basedir;
@@ -769,7 +774,7 @@
     }
     
     try {
-      doReconstructionLog(reconstructionLog, maxSeqId);
+      doReconstructionLog(reconstructionLog, maxSeqId, reporter);
     } catch (IOException e) {
       // Presume we got here because of some HDFS issue or because of a lack of
       // HADOOP-1700; for now keep going but this is probably not what we want
@@ -832,7 +837,7 @@
    * reflected in the MapFiles.)
    */
   private void doReconstructionLog(final Path reconstructionLog,
-    final long maxSeqID)
+    final long maxSeqID, final Progressable reporter)
   throws UnsupportedEncodingException, IOException {
     
     if (reconstructionLog == null || !fs.exists(reconstructionLog)) {
@@ -857,6 +862,8 @@
       HLogEdit val = new HLogEdit();
       long skippedEdits = 0;
       long editsCount = 0;
+      // How many edits to apply before we send a progress report.
+      int reportInterval = 
this.conf.getInt("hbase.hstore.report.interval.edits", 2000);
       while (logReader.next(key, val)) {
         maxSeqIdInLog = Math.max(maxSeqIdInLog, key.getLogSeqNum());
         if (key.getLogSeqNum() <= maxSeqID) {
@@ -874,6 +881,11 @@
         HStoreKey k = new HStoreKey(key.getRow(), column, val.getTimestamp());
         reconstructedCache.put(k, val.getVal());
         editsCount++;
+        // Every 2k edits, tell the reporter we're making progress.
+        // Have seen 60k edits taking 3minutes to complete.
+        if (reporter != null && (editsCount % reportInterval) == 0) {
+          reporter.progress();
+        }
       }
       if (LOG.isDebugEnabled()) {
         LOG.debug("Applied " + editsCount + ", skipped " + skippedEdits +

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Mon Mar 31 17:09:25 2008
@@ -4,6 +4,8 @@
    HBASE-550   EOF trying to read reconstruction log stops region deployment
    HBASE-551   Master stuck splitting server logs in shutdown loop; on each
                iteration, edits are aggregated up into the millions
+   HBASE-505   Region assignments should never time out so long as the region
+               server reports that it is processing the open request
 
 Release 0.1.0
 

Modified: 
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: 
http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- 
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java 
(original)
+++ 
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java 
Mon Mar 31 17:09:25 2008
@@ -494,10 +494,12 @@
   /** Update the deadline for a region assignment to be completed */
   public void updateAssignmentDeadline(HRegionInfo info) {
     synchronized (unassignedRegions) {
-      // Region server has acknowledged request to open region.
+      // Region server is reporting in that its working on region open
+      // (We can get more than one of these messages if region is replaying
+      // a bunch of edits and taking a while to open).
       // Extend region open time by max region open time.
-      unassignedRegions.put(info,
-        System.currentTimeMillis() + master.maxRegionOpenTime);
+      this.unassignedRegions.put(info,
+        Long.valueOf(System.currentTimeMillis() + 
this.master.maxRegionOpenTime));
     }
   }
   

Modified: 
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: 
http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- 
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java 
(original)
+++ 
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java 
Mon Mar 31 17:09:25 2008
@@ -49,6 +49,7 @@
 import org.apache.hadoop.hbase.util.Writables;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.StringUtils;
 
 import org.apache.hadoop.hbase.HConstants;
@@ -373,12 +374,41 @@
    * @param initialFiles If there are initial files (implying that the HRegion
    * is new), then read them from the supplied path.
    * @param listener an object that implements CacheFlushListener or null
-   * 
    * @throws IOException
    */
   public HRegion(Path basedir, HLog log, FileSystem fs, HBaseConfiguration 
conf, 
       HRegionInfo regionInfo, Path initialFiles, CacheFlushListener listener)
     throws IOException {
+    this(basedir, log, fs, conf, regionInfo, initialFiles, listener, null);
+  }
+  
+  /**
+   * HRegion constructor.
+   *
+   * @param log The HLog is the outbound log for any updates to the HRegion
+   * (There's a single HLog for all the HRegions on a single HRegionServer.)
+   * The log file is a logfile from the previous execution that's
+   * custom-computed for this HRegion. The HRegionServer computes and sorts the
+   * appropriate log info for this HRegion. If there is a previous log file
+   * (implying that the HRegion has been written-to before), then read it from
+   * the supplied path.
+   * @param basedir qualified path of directory where region should be located,
+   * usually the table directory.
+   * @param fs is the filesystem.  
+   * @param conf is global configuration settings.
+   * @param regionInfo - HRegionInfo that describes the region
+   * @param initialFiles If there are initial files (implying that the HRegion
+   * is new), then read them from the supplied path.
+   * @param listener an object that implements CacheFlushListener or null
+   * @param reporter Call on a period so hosting server can report we're
+   * making progress to master -- otherwise master might think region deploy
+   * failed.  Can be null.
+   * @throws IOException
+   */
+  public HRegion(Path basedir, HLog log, FileSystem fs, HBaseConfiguration 
conf, 
+      HRegionInfo regionInfo, Path initialFiles, CacheFlushListener listener,
+      final Progressable reporter)
+    throws IOException {
     
     this.basedir = basedir;
     this.log = log;
@@ -402,12 +432,9 @@
     long maxSeqId = -1;
     for(HColumnDescriptor c :
       this.regionInfo.getTableDesc().families().values()) {
-
       HStore store = new HStore(this.basedir, this.regionInfo, c, this.fs,
-          oldLogFile, this.conf);
-
+        oldLogFile, this.conf, reporter);
       stores.put(c.getFamilyName(), store);
-
       long storeSeqId = store.getMaxSequenceId();
       if (storeSeqId > maxSeqId) {
         maxSeqId = storeSeqId;
@@ -423,7 +450,7 @@
     this.minSequenceId = maxSeqId;
     if (LOG.isDebugEnabled()) {
       LOG.debug("Next sequence id for region " + regionInfo.getRegionName() +
-          " is " + this.minSequenceId);
+        " is " + this.minSequenceId);
     }
 
     // Get rid of any splits or merges that were lost in-progress

Modified: 
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: 
http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- 
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
 (original)
+++ 
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
 Mon Mar 31 17:09:25 2008
@@ -82,6 +82,7 @@
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.net.DNS;
+import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.StringUtils;
 
 /**
@@ -785,7 +786,13 @@
             HTableDescriptor.getTableDir(rootDir,
                 regionInfo.getTableDesc().getName()
             ),
-            this.log, this.fs, conf, regionInfo, null, this.cacheFlusher
+            this.log, this.fs, conf, regionInfo, null, this.cacheFlusher,
+            new Progressable() {
+              public void progress() {
+                getOutboundMsgs().add(new HMsg(HMsg.MSG_REPORT_PROCESS_OPEN,
+                  regionInfo));
+              }
+            }
         );
         // Startup a compaction early if one is needed.
         this.compactSplitThread.compactionRequested(region);
@@ -1336,11 +1343,18 @@
     }
     throw new IOException("Unknown protocol to name node: " + protocol);
   }
-
+  
+  /**
+   * @return Queue to which you can add outbound messages.
+   */
+  protected List<HMsg> getOutboundMsgs() {
+    return this.outboundMsgs;
+  }
+  
   //
   // Main program and support routines
   //
-  
+
   private static void printUsageAndExit() {
     printUsageAndExit(null);
   }

Modified: 
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
URL: 
http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java?rev=643223&r1=643222&r2=643223&view=diff
==============================================================================
--- 
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java 
(original)
+++ 
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java 
Mon Mar 31 17:09:25 2008
@@ -54,6 +54,7 @@
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.hbase.BloomFilterDescriptor;
 import org.onelab.filter.BloomFilter;
@@ -159,10 +160,14 @@
    * @param fs file system object
    * @param reconstructionLog existing log file to apply if any
    * @param conf configuration object
+   * @param reporter Call on a period so hosting server can report we're
+   * making progress to master -- otherwise master might think region deploy
+   * failed.  Can be null.
    * @throws IOException
    */
   HStore(Path basedir, HRegionInfo info, HColumnDescriptor family,
-      FileSystem fs, Path reconstructionLog, HBaseConfiguration conf)
+      FileSystem fs, Path reconstructionLog, HBaseConfiguration conf,
+      final Progressable reporter)
   throws IOException {  
     this.basedir = basedir;
     this.info = info;
@@ -235,7 +240,7 @@
     }
     
     try {
-      doReconstructionLog(reconstructionLog, maxSeqId);
+      doReconstructionLog(reconstructionLog, maxSeqId, reporter);
     } catch (IOException e) {
       // Presume we got here because of some HDFS issue or because of a lack of
       // HADOOP-1700; for now keep going but this is probably not what we want
@@ -308,7 +313,7 @@
    * reflected in the MapFiles.)
    */
   private void doReconstructionLog(final Path reconstructionLog,
-    final long maxSeqID)
+    final long maxSeqID, final Progressable reporter)
   throws UnsupportedEncodingException, IOException {
     if (reconstructionLog == null || !fs.exists(reconstructionLog)) {
       // Nothing to do.
@@ -332,6 +337,8 @@
       HLogEdit val = new HLogEdit();
       long skippedEdits = 0;
       long editsCount = 0;
+      // How many edits to apply before we send a progress report.
+      int reportInterval = 
this.conf.getInt("hbase.hstore.report.interval.edits", 2000);
       while (logReader.next(key, val)) {
         maxSeqIdInLog = Math.max(maxSeqIdInLog, key.getLogSeqNum());
         if (key.getLogSeqNum() <= maxSeqID) {
@@ -349,6 +356,11 @@
         HStoreKey k = new HStoreKey(key.getRow(), column, val.getTimestamp());
         reconstructedCache.put(k, val.getVal());
         editsCount++;
+        // Every 2k edits, tell the reporter we're making progress.
+        // Have seen 60k edits taking 3minutes to complete.
+        if (reporter != null && (editsCount % reportInterval) == 0) {
+          reporter.progress();
+        }
       }
       if (LOG.isDebugEnabled()) {
         LOG.debug("Applied " + editsCount + ", skipped " + skippedEdits +


Reply via email to