Author: cutting Date: Fri May 25 14:41:22 2007 New Revision: 541786 URL: http://svn.apache.org/viewvc?view=rev&rev=541786 Log: HADOOP-1376. Modify RandomWriter example so that it can generate data for the Terasort benchmark. Contributed by Devaraj.
Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=541786&r1=541785&r2=541786 ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Fri May 25 14:41:22 2007 @@ -69,6 +69,9 @@ 22. HADOOP-1408. Fix a compiler warning by adding a class to replace a generic. (omalley via cutting) + 23. HADOOP-1376. Modify RandomWriter example so that it can generate + data for the Terasort benchmark. (Devaraj Das via cutting) + Branch 0.13 (unreleased changes) Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java?view=diff&rev=541786&r1=541785&r2=541786 ============================================================================== --- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java (original) +++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java Fri May 25 14:41:22 2007 @@ -34,8 +34,33 @@ * This program uses map/reduce to just run a distributed job where there is * no interaction between the tasks and each task write a large unsorted * random binary sequence file of BytesWritable. - * - * @author Owen O'Malley + * In order for this program to generate data for terasort with 10-byte keys + * and 90-byte values, have the following config: + * <xmp> + * <?xml version="1.0"?> + * <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> + * <configuration> + * <property> + * <name>test.randomwrite.min_key</name> + * <value>10</value> + * </property> + * <property> + * <name>test.randomwrite.max_key</name> + * <value>10</value> + * </property> + * <property> + * <name>test.randomwrite.min_value</name> + * <value>90</value> + * </property> + * <property> + * <name>test.randomwrite.max_value</name> + * <value>90</value> + * </property> + * <property> + * <name>test.randomwrite.total_bytes</name> + * <value>1099511627776</value> + * </property> + * </configuration></xmp> */ public class RandomWriter { @@ -220,8 +245,21 @@ JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); - int numMaps = cluster.getTaskTrackers() * - job.getInt("test.randomwriter.maps_per_host", 10); + int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10); + long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", + 1*1024*1024*1024); + if (numBytesToWritePerMap == 0) { + System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0"); + System.exit(-1); + } + long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", + numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); + int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); + if (numMaps == 0 && totalBytesToWrite > 0) { + numMaps = 1; + job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite); + } + job.setNumMapTasks(numMaps); System.out.println("Running " + numMaps + " maps."); job.setNumReduceTasks(1);