Author: jimk Date: Thu Oct 4 12:58:56 2007 New Revision: 581995 URL: http://svn.apache.org/viewvc?rev=581995&view=rev Log: HADOOP-1990 Regression test instability affects nightly and patch builds
Added: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/DFSAbort.java Removed: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestDFSAbort.java Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HServerInfo.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MultiRegionTable.java lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/StaticTestEnvironment.java lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestLogRolling.java Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=581995&r1=581994&r2=581995&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original) +++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Thu Oct 4 12:58:56 2007 @@ -64,6 +64,7 @@ HADOOP-1941 StopRowFilter throws NPE when passed null row HADOOP-1966 Make HBase unit tests more reliable in the Hudson environment. HADOOP-1975 HBase tests failing with java.lang.NumberFormatException + HADOOP-1990 Regression test instability affects nightly and patch builds IMPROVEMENTS HADOOP-1737 Make HColumnDescriptor data publically members settable Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java?rev=581995&r1=581994&r2=581995&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java Thu Oct 4 12:58:56 2007 @@ -1049,15 +1049,6 @@ } catch(Exception iex) { LOG.warn("meta scanner", iex); } - try { - // TODO: Maybe do in parallel in its own thread as is done in TaskTracker - // if its taking a long time to go down. - - server.join(); // Wait for server to finish. - } catch(InterruptedException iex) { - LOG.warn("server", iex); - } - LOG.info("HMaster main thread exiting"); } Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java?rev=581995&r1=581994&r2=581995&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java Thu Oct 4 12:58:56 2007 @@ -729,12 +729,6 @@ join(this.logRollerThread); join(this.cacheFlusherThread); join(this.splitOrCompactCheckerThread); - try { - this.server.join(); - } catch (InterruptedException e) { - // No means of asking server if its done... .so just assume it is even - // if an interrupt. - } } private void join(final Thread t) { Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HServerInfo.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HServerInfo.java?rev=581995&r1=581994&r2=581995&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HServerInfo.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HServerInfo.java Thu Oct 4 12:58:56 2007 @@ -87,6 +87,13 @@ return startCode; } + /** + * @param startCode the startCode to set + */ + public void setStartCode(long startCode) { + this.startCode = startCode; + } + /** [EMAIL PROTECTED] */ @Override public String toString() { Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java?rev=581995&r1=581994&r2=581995&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java Thu Oct 4 12:58:56 2007 @@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.util; import java.io.IOException; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -59,11 +58,11 @@ } catch (IOException e) { exception = e.getMessage(); } - LOG.info("Failed file system available test. Thread: " + - Thread.currentThread().getName() + ": " + exception); try { if (!available) { + LOG.info("Failed file system available test. Thread: " + + Thread.currentThread().getName() + ": " + exception); fs.close(); } Added: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/DFSAbort.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/DFSAbort.java?rev=581995&view=auto ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/DFSAbort.java (added) +++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/DFSAbort.java Thu Oct 4 12:58:56 2007 @@ -0,0 +1,113 @@ +/** + * Copyright 2007 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import junit.framework.TestSuite; +import junit.textui.TestRunner; + +import java.io.PrintWriter; +import org.apache.hadoop.util.ReflectionUtils; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Test ability of HBase to handle DFS failure + */ +public class DFSAbort extends HBaseClusterTestCase { + private static final Log LOG = + LogFactory.getLog(DFSAbort.class.getName()); + + /** constructor */ + public DFSAbort() { + super(); + + // For less frequently updated regions flush after every 2 flushes + conf.setInt("hbase.hregion.memcache.optionalflushcount", 2); + } + + /** [EMAIL PROTECTED] */ + @Override + public void setUp() throws Exception { + try { + super.setUp(); + HTableDescriptor desc = new HTableDescriptor(getName()); + desc.addFamily(new HColumnDescriptor(HConstants.COLUMN_FAMILY_STR)); + HBaseAdmin admin = new HBaseAdmin(conf); + admin.createTable(desc); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + /** + * @throws Exception + */ + public void testDFSAbort() throws Exception { + try { + // By now the Mini DFS is running, Mini HBase is running and we have + // created a table. Now let's yank the rug out from HBase + cluster.getDFSCluster().shutdown(); + // Now wait for Mini HBase Cluster to shut down +// cluster.join(); + join(); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + private void join() { + if (this.cluster.regionThreads != null) { + synchronized(this.cluster.regionThreads) { + for(Thread t: this.cluster.regionThreads) { + join(t); + } + } + } + join(this.cluster.getMasterThread()); + } + + private void join(final Thread t) { + if (t == null) { + return; + } + for (int i = 0; t.isAlive(); i++) { + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + LOG.info("Continuing...", e); + } + if (i != 0 && i % 30 == 0) { + ReflectionUtils.printThreadInfo(new PrintWriter(System.out), + "Automatic Stack Trace every 30 seconds waiting on " + + t.getName()); + } + } + } + + /** + * @param args unused + */ + public static void main(@SuppressWarnings("unused") String[] args) { + TestRunner.run(new TestSuite(DFSAbort.class)); + } +} \ No newline at end of file Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java?rev=581995&r1=581994&r2=581995&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java Thu Oct 4 12:58:56 2007 @@ -85,6 +85,7 @@ /** * Starts a MiniHBaseCluster on top of an existing HDFSCluster * + *<pre> **************************************************************************** * * * * * * N O T E * * * * * * @@ -93,6 +94,7 @@ * * * * * * * N O T E * * * * * **************************************************************************** + *</pre> * * @param conf * @param nRegionNodes @@ -286,6 +288,13 @@ return this.masterThread.getMaster().getMasterAddress(); } + /** + * @return the thread running the HMaster + */ + public MasterThread getMasterThread() { + return this.masterThread; + } + /** * Cause a region server to exit without cleaning up * Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MultiRegionTable.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MultiRegionTable.java?rev=581995&r1=581994&r2=581995&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MultiRegionTable.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MultiRegionTable.java Thu Oct 4 12:58:56 2007 @@ -55,6 +55,10 @@ MiniHBaseCluster cluster, FileSystem localFs, String tableName, String columnName) throws IOException { + final int retries = 10; + final long waitTime = + conf.getLong("hbase.master.meta.thread.rescanfrequency", 10L * 1000L); + // This size should make it so we always split using the addContent // below. After adding all data, the first region is 1.3M. Should // set max filesize to be <= 1M. @@ -62,7 +66,6 @@ assertTrue(conf.getLong("hbase.hregion.max.filesize", HConstants.DEFAULT_MAX_FILE_SIZE) <= 1024 * 1024); - final int retries = 10; FileSystem fs = (cluster.getDFSCluster() == null) ? localFs : cluster.getDFSCluster().getFileSystem(); assertNotNull(fs); @@ -89,18 +92,18 @@ // Now, wait until split makes it into the meta table. - for (int i = 0; - i < retries && (count(meta, HConstants.COLUMN_FAMILY_STR) <= count); - i++) { - + int oldCount = count; + for (int i = 0; i < retries; i++) { + count = count(meta, HConstants.COLUMN_FAMILY_STR); + if (count > oldCount) { + break; + } try { - Thread.sleep(5000); + Thread.sleep(waitTime); } catch (InterruptedException e) { // continue } } - int oldCount = count; - count = count(meta, HConstants.COLUMN_FAMILY_STR); if (count <= oldCount) { throw new IOException("Failed waiting on splits to show up"); } @@ -126,7 +129,7 @@ // Recalibrate will cause us to wait on new regions' deployment - recalibrate(t, new Text(columnName), retries); + recalibrate(t, new Text(columnName), retries, waitTime); // Compact a region at a time so we can test case where one region has // no references but the other still has some @@ -138,7 +141,7 @@ while (getSplitParentInfo(meta, parent).size() == 3) { try { - Thread.sleep(5000); + Thread.sleep(waitTime); } catch (InterruptedException e) { // continue } @@ -153,12 +156,13 @@ // Now wait until parent disappears. LOG.info("Waiting on parent " + parent.getRegionName() + " to disappear"); - for (int i = 0; - i < retries && getSplitParentInfo(meta, parent) != null; - i++) { + for (int i = 0; i < retries; i++) { + if (getSplitParentInfo(meta, parent) == null) { + break; + } try { - Thread.sleep(5000); + Thread.sleep(waitTime); } catch (InterruptedException e) { // continue } @@ -167,9 +171,12 @@ // Assert cleaned up. - for (int i = 0; i < retries && fs.exists(parentDir); i++) { + for (int i = 0; i < retries; i++) { + if (!fs.exists(parentDir)) { + break; + } try { - Thread.sleep(5000); + Thread.sleep(waitTime); } catch (InterruptedException e) { // continue } @@ -243,7 +250,7 @@ * @param retries */ private static void recalibrate(final HTable t, final Text column, - final int retries) throws IOException { + final int retries, final long waitTime) throws IOException { for (int i = 0; i < retries; i++) { try { @@ -260,7 +267,7 @@ } catch (NotServingRegionException x) { System.out.println("it's alright"); try { - Thread.sleep(5000); + Thread.sleep(waitTime); } catch (InterruptedException e) { // continue } Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/StaticTestEnvironment.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/StaticTestEnvironment.java?rev=581995&r1=581994&r2=581995&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/StaticTestEnvironment.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/StaticTestEnvironment.java Thu Oct 4 12:58:56 2007 @@ -86,25 +86,25 @@ } else if(value.equalsIgnoreCase("WARN")) { logLevel = Level.WARN; } + } - ConsoleAppender consoleAppender = null; - for(Enumeration<Appender> e = rootLogger.getAllAppenders(); - e.hasMoreElements();) { + ConsoleAppender consoleAppender = null; + for(Enumeration<Appender> e = rootLogger.getAllAppenders(); + e.hasMoreElements();) { - Appender a = e.nextElement(); - if(a instanceof ConsoleAppender) { - consoleAppender = (ConsoleAppender)a; - break; - } + Appender a = e.nextElement(); + if(a instanceof ConsoleAppender) { + consoleAppender = (ConsoleAppender)a; + break; } - if(consoleAppender != null) { - Layout layout = consoleAppender.getLayout(); - if(layout instanceof PatternLayout) { - PatternLayout consoleLayout = (PatternLayout)layout; - consoleLayout.setConversionPattern("%d %-5p [%t] %l: %m%n"); - } + } + if(consoleAppender != null) { + Layout layout = consoleAppender.getLayout(); + if(layout instanceof PatternLayout) { + PatternLayout consoleLayout = (PatternLayout)layout; + consoleLayout.setConversionPattern("%d %-5p [%t] %l: %m%n"); } - } + } Logger.getLogger( HBaseTestCase.class.getPackage().getName()).setLevel(logLevel); } Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestLogRolling.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestLogRolling.java?rev=581995&r1=581994&r2=581995&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestLogRolling.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestLogRolling.java Thu Oct 4 12:58:56 2007 @@ -68,6 +68,10 @@ // Increase the amount of time between client retries conf.setLong("hbase.client.pause", 15 * 1000); + // Reduce thread wake frequency so that other threads can get + // a chance to run. + conf.setInt(HConstants.THREAD_WAKE_FREQUENCY, 2 * 1000); + String className = this.getClass().getName(); StringBuilder v = new StringBuilder(className); while (v.length() < 1000) {