Author: stack Date: Tue Jan 8 16:07:16 2008 New Revision: 610237 URL: http://svn.apache.org/viewvc?rev=610237&view=rev Log: HADOOP-2490 Failure in nightly #346 Add one fix and more logging to help diagnose the failures up on hudson.
Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/LocalHBaseCluster.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/io/TextSequence.java lucene/hadoop/trunk/src/contrib/hbase/src/test/hbase-site.xml lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/StaticTestEnvironment.java lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=610237&r1=610236&r2=610237&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original) +++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Tue Jan 8 16:07:16 2008 @@ -105,6 +105,7 @@ HADOOP-2507 REST servlet does not properly base64 row keys and column names (Bryan Duxbury via Stack) HADOOP-2530 Missing type in new hbase custom RPC serializer + HADOOP-2490 Failure in nightly #346 (Added debugging of hudson failures). IMPROVEMENTS HADOOP-2401 Add convenience put method that takes writable Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java?rev=610237&r1=610236&r2=610237&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java Tue Jan 8 16:07:16 2008 @@ -462,12 +462,17 @@ !pendingRegions.contains(info.getRegionName()) ) ) - ) { + ) { // The current assignment is no good if (LOG.isDebugEnabled()) { LOG.debug("Current assignment of " + info.getRegionName() + - " is no good"); + " is no good: storedInfo: " + storedInfo + ", startCode: " + + startCode + ", storedInfo.startCode: " + + ((storedInfo != null)? storedInfo.getStartCode(): -1) + + ", unassignedRegions: " + unassignedRegions.containsKey(info) + + ", pendingRegions: " + + pendingRegions.contains(info.getRegionName())); } // Recover the region server's log if there is one. // This is only done from here if we are restarting and there is stale @@ -1026,9 +1031,7 @@ final String threadName = "HMaster"; Thread.currentThread().setName(threadName); startServiceThreads(); - /* - * Main processing loop - */ + /* Main processing loop */ try { for (RegionServerOperation op = null; !closed.get(); ) { if (shutdownRequested && serversToServerInfo.size() == 0) { @@ -1037,7 +1040,6 @@ } if (rootRegionLocation.get() != null) { // We can't process server shutdowns unless the root region is online - op = this.delayedToDoQueue.poll(); } if (op == null ) { @@ -1178,6 +1180,9 @@ // Something happened during startup. Shut things down. this.closed.set(true); LOG.error("Failed startup", e); + } + if (LOG.isDebugEnabled()) { + LOG.debug("Started service threads"); } } Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java?rev=610237&r1=610236&r2=610237&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java Tue Jan 8 16:07:16 2008 @@ -1057,7 +1057,8 @@ */ private MapWritable reportForDuty() throws IOException { if (LOG.isDebugEnabled()) { - LOG.debug("Telling master we are up"); + LOG.debug("Telling master at " + + conf.get(MASTER_ADDRESS) + " that we are up"); } // Do initial RPC setup. this.hbaseMaster = (HMasterRegionInterface)HbaseRPC.waitForProxy( Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/LocalHBaseCluster.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/LocalHBaseCluster.java?rev=610237&r1=610236&r2=610237&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/LocalHBaseCluster.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/LocalHBaseCluster.java Tue Jan 8 16:07:16 2008 @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase; import java.io.IOException; +import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -27,6 +28,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.ReflectionUtils; /** * This class creates a single process HBase cluster. One thread is created for @@ -229,7 +231,10 @@ if (this.master != null) { while (this.master.isAlive()) { try { - this.master.join(); + // The below has been replaced to debug sometime hangs on end of + // tests. + // this.master.join(): + threadDumpingJoin(this.master); } catch(InterruptedException e) { // continue } @@ -238,6 +243,22 @@ LOG.info("Shutdown " + ((this.regionThreads != null)? this.master.getName(): "0 masters") + " " + this.regionThreads.size() + " region server(s)"); + } + + public void threadDumpingJoin(final Thread t) throws InterruptedException { + if (t == null) { + return; + } + long startTime = System.currentTimeMillis(); + while (t.isAlive()) { + Thread.sleep(1000); + if (System.currentTimeMillis() - startTime > 60000) { + startTime = System.currentTimeMillis(); + ReflectionUtils.printThreadInfo(new PrintWriter(System.out), + "Automatic Stack Trace every 60 seconds waiting on " + + t.getName()); + } + } } /** Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/io/TextSequence.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/io/TextSequence.java?rev=610237&r1=610236&r2=610237&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/io/TextSequence.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/io/TextSequence.java Tue Jan 8 16:07:16 2008 @@ -37,6 +37,11 @@ * * <p>Equals considers a Text equal if the TextSequence brackets the same bytes. * + * <p>TextSequence will not always work as a Text. For instance, the following + * fails <code>Text c = new Text(new TextSequence(new Text("some string"))); + * </code> because the Text constructor accesses private Text data members + * making the new instance from the passed 'Text'. + * * <p>TODO: Should this be an Interface as CharSequence is? */ public class TextSequence extends Text { Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/hbase-site.xml URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/hbase-site.xml?rev=610237&r1=610236&r2=610237&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/test/hbase-site.xml (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/test/hbase-site.xml Tue Jan 8 16:07:16 2008 @@ -116,4 +116,14 @@ <value>/hbase</value> <description>location of HBase instance in dfs</description> </property> + <property> + <name>hbase.hregion.max.filesize</name> + <value>67108864</value> + <description> + Maximum desired file size for an HRegion. If filesize exceeds + value + (value / 2), the HRegion is split in two. Default: 256M. + + Keep the maximum filesize small so we split more often in tests. + </description> + </property> </configuration> Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/StaticTestEnvironment.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/StaticTestEnvironment.java?rev=610237&r1=610236&r2=610237&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/StaticTestEnvironment.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/StaticTestEnvironment.java Tue Jan 8 16:07:16 2008 @@ -138,7 +138,13 @@ } LOG.info("Shutting down Mini DFS "); - cluster.shutdown(); + try { + cluster.shutdown(); + } catch (Exception e) { + /// Can get a java.lang.reflect.UndeclaredThrowableException thrown + // here because of an InterruptedException. Don't let exceptions in + // here be cause of test failure. + } } } } Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java?rev=610237&r1=610236&r2=610237&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java Tue Jan 8 16:07:16 2008 @@ -122,6 +122,8 @@ dir = new Path("/hbase"); fs.mkdirs(dir); // Start up HBase cluster + // Only one region server. MultiRegionServer manufacturing code below + // depends on there being one region server only. hCluster = new MiniHBaseCluster(conf, 1, dfsCluster); LOG.info("Master is at " + this.conf.get(HConstants.MASTER_ADDRESS)); } catch (Exception e) { @@ -235,7 +237,8 @@ } } - LOG.info("Print table contents before map/reduce"); + LOG.info("Print table contents before map/reduce for " + + SINGLE_REGION_TABLE_NAME); scanTable(SINGLE_REGION_TABLE_NAME, true); @SuppressWarnings("deprecation") @@ -252,19 +255,18 @@ TableReduce.initJob(SINGLE_REGION_TABLE_NAME, IdentityTableReduce.class, jobConf); - + LOG.info("Started " + SINGLE_REGION_TABLE_NAME); JobClient.runJob(jobConf); + + LOG.info("Print table contents after map/reduce for " + + SINGLE_REGION_TABLE_NAME); + scanTable(SINGLE_REGION_TABLE_NAME, true); + // verify map-reduce results + verify(SINGLE_REGION_TABLE_NAME); } finally { mrCluster.shutdown(); } - - LOG.info("Print table contents after map/reduce"); - scanTable(SINGLE_REGION_TABLE_NAME, true); - - // verify map-reduce results - verify(SINGLE_REGION_TABLE_NAME); - } finally { table.close(); } @@ -307,16 +309,14 @@ TableReduce.initJob(MULTI_REGION_TABLE_NAME, IdentityTableReduce.class, jobConf); - + LOG.info("Started " + MULTI_REGION_TABLE_NAME); JobClient.runJob(jobConf); - + + // verify map-reduce results + verify(MULTI_REGION_TABLE_NAME); } finally { mrCluster.shutdown(); } - - // verify map-reduce results - verify(MULTI_REGION_TABLE_NAME); - } finally { table.close(); }