Author: cutting Date: Mon Feb 26 11:55:58 2007 New Revision: 511974 URL: http://svn.apache.org/viewvc?view=rev&rev=511974 Log: HADOOP-1039. In HDFS's TestCheckpoint, avoid restarting MiniDFSCluster so often, speeding this test. Contributed by Dhruba.
Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestCheckpoint.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=511974&r1=511973&r2=511974 ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Mon Feb 26 11:55:58 2007 @@ -135,6 +135,9 @@ 39. HADOOP-878. In contrib/streaming, fix reducer=NONE to work with multiple maps. (Arun C Murthy via cutting) +40. HADOOP-1039. In HDFS's TestCheckpoint, avoid restarting + MiniDFSCluster so often, speeding this test. (Dhruba Borthakur via cutting) + Release 0.11.2 - 2007-02-16 Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java?view=diff&rev=511974&r1=511973&r2=511974 ============================================================================== --- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java (original) +++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java Mon Feb 26 11:55:58 2007 @@ -18,6 +18,7 @@ package org.apache.hadoop.dfs; import java.io.*; +import java.net.*; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.net.NetworkTopology; @@ -36,8 +37,8 @@ private Thread dataNodeThreads[]; private NameNodeRunner nameNode; private DataNodeRunner dataNodes[]; - private int MAX_RETRIES = 10; - private int MAX_RETRIES_PER_PORT = 10; + static public int MAX_RETRIES = 10; + static public int MAX_RETRIES_PER_PORT = 10; private int nameNodePort = 0; private int nameNodeInfoPort = 0; @@ -151,9 +152,24 @@ public MiniDFSCluster(int namenodePort, Configuration conf, boolean dataNodeFirst) throws IOException { - this(namenodePort, conf, 1, dataNodeFirst, true, null); + this(namenodePort, conf, 1, dataNodeFirst, true, + MAX_RETRIES, MAX_RETRIES_PER_PORT, null); } - + + /** + * Create the config and start up the only the namenode. If either the rpc or info port is already + * in use, we will try new ports. + * @param namenodePort suggestion for which rpc port to use. caller should use + * getNameNodePort() to get the actual port used. + */ + public MiniDFSCluster(int namenodePort, + Configuration conf, + int numRetries, + int numRetriesPerPort) throws IOException { + this(namenodePort, conf, 0, false, false, + numRetries, numRetriesPerPort, null); + } + /** * Create the config and start up the servers. If either the rpc or info port is already * in use, we will try new ports. @@ -166,7 +182,8 @@ Configuration conf, int nDatanodes, boolean dataNodeFirst) throws IOException { - this(namenodePort, conf, nDatanodes, dataNodeFirst, true, null); + this(namenodePort, conf, nDatanodes, dataNodeFirst, true, + MAX_RETRIES, MAX_RETRIES_PER_PORT, null); } /** @@ -183,7 +200,18 @@ int nDatanodes, boolean dataNodeFirst, boolean formatNamenode ) throws IOException { - this(namenodePort, conf, nDatanodes, dataNodeFirst, formatNamenode, null); + this(namenodePort, conf, nDatanodes, dataNodeFirst, formatNamenode, + MAX_RETRIES, MAX_RETRIES_PER_PORT, null); + } + + public MiniDFSCluster(int namenodePort, + Configuration conf, + int nDatanodes, + boolean dataNodeFirst, + boolean formatNamenode, + String[] racks) throws IOException { + this(namenodePort, conf, nDatanodes, dataNodeFirst, formatNamenode, + MAX_RETRIES, MAX_RETRIES_PER_PORT, racks); } /** @@ -201,6 +229,8 @@ int nDatanodes, boolean dataNodeFirst, boolean formatNamenode, + int numRetries, + int numRetriesPerPort, String[] racks) throws IOException { this.conf = conf; @@ -220,7 +250,7 @@ // too many tries have failed. boolean foundPorts = false; int portsTried = 0; - while ((!foundPorts) && (portsTried < MAX_RETRIES)) { + while ((!foundPorts) && (portsTried < numRetries)) { conf.set("fs.default.name", "localhost:"+ Integer.toString(nameNodePort)); conf.set("dfs.info.port", nameNodeInfoPort); @@ -251,7 +281,7 @@ } int retry = 0; - while (!nameNode.isUp() && (retry < MAX_RETRIES_PER_PORT)) { + while (!nameNode.isUp() && (retry < numRetriesPerPort)) { try { // let daemons get started System.out.println("waiting for dfs minicluster to start"); Thread.sleep(1000); @@ -259,7 +289,7 @@ } retry++; } - if (retry >= MAX_RETRIES_PER_PORT) { + if (retry >= numRetriesPerPort) { this.nameNodePort += 3; this.nameNodeInfoPort += 7; System.out.println("Failed to start DFS minicluster in " + retry + " attempts. Trying new ports:"); @@ -276,7 +306,8 @@ } portsTried++; } - if (portsTried >= MAX_RETRIES) { + System.out.println("\tNameNode portsTried " + portsTried); + if (portsTried >= numRetries) { throw new IOException("Failed to start a DFS minicluster after trying " + portsTried + " ports."); } } @@ -311,5 +342,48 @@ */ public File[] getNameDirs() { return NameNode.getDirs(conf); + } + + /** + * Wait till the cluster is active and running. + */ + public void waitActive() throws IOException { + InetSocketAddress addr = new InetSocketAddress("localhost", + getNameNodePort()); + DFSClient client = new DFSClient(addr, conf); + + // + // get initial state of datanodes + // + DatanodeInfo[] oldinfo = client.datanodeReport(); + while (oldinfo.length != nDatanodes) { + try { + Thread.sleep(500); + } catch (Exception e) { + } + oldinfo = client.datanodeReport(); + } + + // + // wait till all datanodes send at least yet another heartbeat + // + int numdead = 0; + while (numdead > 0) { + try { + Thread.sleep(500); + } catch (Exception e) { + } + DatanodeInfo[] info = client.datanodeReport(); + if (info.length != nDatanodes) { + continue; + } + numdead = 0; + for (int i = 0; i < info.length; i++) { + if (oldinfo[i].getLastUpdate() >= info[i].getLastUpdate()) { + numdead++; + } + } + } + client.close(); } } Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestCheckpoint.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestCheckpoint.java?view=diff&rev=511974&r1=511973&r2=511974 ============================================================================== --- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestCheckpoint.java (original) +++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestCheckpoint.java Mon Feb 26 11:55:58 2007 @@ -33,13 +33,12 @@ */ public class TestCheckpoint extends TestCase { static final long seed = 0xDEADBEEFL; - static final int blockSize = 8192; - static final int fileSize = 16384; - static final int numDatanodes = 4; + static final int blockSize = 4096; + static final int fileSize = 8192; + static final int numDatanodes = 1; private void writeFile(FileSystem fileSys, Path name, int repl) throws IOException { - // create and write a file that contains three blocks of data FSOutputStream stm = fileSys.createRaw(name, true, (short)repl, (long)blockSize); byte[] buffer = new byte[fileSize]; @@ -56,7 +55,7 @@ String[][] locations = fileSys.getFileCacheHints(name, 0, fileSize); for (int idx = 0; idx < locations.length; idx++) { assertEquals("Number of replicas for block" + idx, - Math.min(numDatanodes, repl), locations[idx].length); + Math.min(numDatanodes, repl), locations[idx].length); } } @@ -111,8 +110,7 @@ // removeOneNameDir(namedirs); try { - cluster = new MiniDFSCluster(65312, conf, numDatanodes, - false, false); + cluster = new MiniDFSCluster(65312, conf, 1, 1); assertTrue(false); } catch (IOException e) { // no nothing @@ -126,16 +124,12 @@ private void testSecondaryNamenodeError1(Configuration conf) throws IOException { System.out.println("Starting testSecondaryNamenodeError 1"); - Path file1 = new Path("checkpoint.dat"); + Path file1 = new Path("checkpointxx.dat"); MiniDFSCluster cluster = new MiniDFSCluster(65312, conf, numDatanodes, false, false); + cluster.waitActive(); FileSystem fileSys = cluster.getFileSystem(); try { - Thread.sleep(15000L); - } catch (InterruptedException e) { - // nothing - } - try { assertTrue(!fileSys.exists(file1)); // // Make the checkpoint fail after rolling the @@ -155,8 +149,8 @@ // // Create a new file // - writeFile(fileSys, file1, 3); - checkFile(fileSys, file1, 3); + writeFile(fileSys, file1, 1); + checkFile(fileSys, file1, 1); } finally { fileSys.close(); cluster.shutdown(); @@ -170,14 +164,10 @@ System.out.println("Starting testSecondaryNamenodeError 2"); cluster = new MiniDFSCluster(65312, conf, numDatanodes, false, false); + cluster.waitActive(); fileSys = cluster.getFileSystem(); try { - Thread.sleep(15000L); - } catch (InterruptedException e) { - // nothing - } - try { - checkFile(fileSys, file1, 3); + checkFile(fileSys, file1, 1); cleanupFile(fileSys, file1); SecondaryNameNode secondary = new SecondaryNameNode(conf); secondary.doCheckpoint(); @@ -194,16 +184,12 @@ private void testSecondaryNamenodeError2(Configuration conf) throws IOException { System.out.println("Starting testSecondaryNamenodeError 21"); - Path file1 = new Path("checkpoint.dat"); + Path file1 = new Path("checkpointyy.dat"); MiniDFSCluster cluster = new MiniDFSCluster(65312, conf, numDatanodes, false, false); + cluster.waitActive(); FileSystem fileSys = cluster.getFileSystem(); try { - Thread.sleep(15000L); - } catch (InterruptedException e) { - // nothing - } - try { assertTrue(!fileSys.exists(file1)); // // Make the checkpoint fail after rolling the @@ -223,8 +209,8 @@ // // Create a new file // - writeFile(fileSys, file1, 3); - checkFile(fileSys, file1, 3); + writeFile(fileSys, file1, 1); + checkFile(fileSys, file1, 1); } finally { fileSys.close(); cluster.shutdown(); @@ -238,14 +224,10 @@ System.out.println("Starting testSecondaryNamenodeError 22"); cluster = new MiniDFSCluster(65312, conf, numDatanodes, false, false); + cluster.waitActive(); fileSys = cluster.getFileSystem(); try { - Thread.sleep(15000L); - } catch (InterruptedException e) { - // nothing - } - try { - checkFile(fileSys, file1, 3); + checkFile(fileSys, file1, 1); cleanupFile(fileSys, file1); SecondaryNameNode secondary = new SecondaryNameNode(conf); secondary.doCheckpoint(); @@ -256,7 +238,7 @@ } } - /** + /** * Tests checkpoint in DFS. */ public void testCheckpoint() throws IOException { @@ -266,15 +248,9 @@ Configuration conf = new Configuration(); MiniDFSCluster cluster = new MiniDFSCluster(65312, conf, numDatanodes, false); + cluster.waitActive(); FileSystem fileSys = cluster.getFileSystem(); - // Now wait for 15 seconds to give datanodes chance to register - // themselves and to report heartbeat - try { - Thread.sleep(15000L); - } catch (InterruptedException e) { - // nothing - } try { // // verify that 'format' really blew away all pre-existing files @@ -286,8 +262,8 @@ // // Create file1 // - writeFile(fileSys, file1, 3); - checkFile(fileSys, file1, 3); + writeFile(fileSys, file1, 1); + checkFile(fileSys, file1, 1); // // Take a checkpoint @@ -304,20 +280,16 @@ // Restart cluster and verify that file1 still exist. // cluster = new MiniDFSCluster(65312, conf, numDatanodes, false, false); + cluster.waitActive(); fileSys = cluster.getFileSystem(); try { - Thread.sleep(15000L); - } catch (InterruptedException e) { - // nothing - } - try { // check that file1 still exists - checkFile(fileSys, file1, 3); + checkFile(fileSys, file1, 1); cleanupFile(fileSys, file1); // create new file file2 - writeFile(fileSys, file2, 3); - checkFile(fileSys, file2, 3); + writeFile(fileSys, file2, 1); + checkFile(fileSys, file2, 1); // // Take a checkpoint @@ -336,22 +308,18 @@ // cluster = new MiniDFSCluster(65312, conf, numDatanodes, false, false); fileSys = cluster.getFileSystem(); - try { - Thread.sleep(15000L); - } catch (InterruptedException e) { - // nothing - } assertTrue(!fileSys.exists(file1)); try { // verify that file2 exists - checkFile(fileSys, file2, 3); - cleanupFile(fileSys, file2); + checkFile(fileSys, file2, 1); } finally { fileSys.close(); cluster.shutdown(); } + + // file2 is left behind. testSecondaryNamenodeError1(conf); testSecondaryNamenodeError2(conf);