[code] package org.apache.hadoop.examples;
import java.io.IOException; import java.math.BigDecimal; import java.util.Iterator; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class PiEstimator extends Configured implements Tool { /** tmp directory for input/output */ static private final Path TMP_DIR = new Path(PiEstimator.class.getSimpleName() + "_TMP_3_141592654"); /** * Mapper class for Pi estimation. * Generate points in a unit square * and then count points inside/outside of the inscribed circle of the square. */ public static class PiMapper extends MapReduceBase implements Mapper<LongWritable, LongWritable, BooleanWritable, LongWritable> { /** Map method. * @param offset samples starting from the (offset+1)th sample. * @param size the number of samples for this map * @param out output {ture->numInside, false->numOutside} * @param reporter */ public void map(LongWritable offset, LongWritable size, OutputCollector<BooleanWritable, LongWritable> out, Reporter reporter) throws IOException { final HaltonSequence haltonsequence = new HaltonSequence(offset.get()); long numInside = 0L; long numOutside = 0L; for(long i = 0; i < size.get(); ) { //generate points in a unit square final double[] point = haltonsequence.nextPoint(); //count points inside/outside of the inscribed circle of the square final double x = point[0] - 0.5; final double y = point[1] - 0.5; if (x*x + y*y > 0.25) { numOutside++; } else { numInside++; } //report status i++; if (i % 1000 == 0) { reporter.setStatus("Generated " + i + " samples."); } } //output map results out.collect(new BooleanWritable(true), new LongWritable(numInside)); out.collect(new BooleanWritable(false), new LongWritable(numOutside)); } } /** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); // jobConf.setMapOutputKeyClass(BooleanWritable.class); // jobConf.setMapOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException("Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for(int i=0; i < numMaps; ++i) { final Path file = new Path(inDir, "part"+i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter( fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #"+i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime)/1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20) .multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)) .divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } } /** * Parse arguments and then runs a map/reduce job. * Print output in standard out. * * @return a non-zero if there is an error. Otherwise, return 0. */ public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: "+getClass().getName()+" <nMaps> <nSamples>"); ToolRunner.printGenericCommandUsage(System.err); return -1; } final int nMaps = Integer.parseInt(args[0]); final long nSamples = Long.parseLong(args[1]); System.out.println("Number of Maps = " + nMaps); System.out.println("Samples per Map = " + nSamples); final JobConf jobConf = new JobConf(getConf(), getClass()); System.out.println("Estimated value of Pi is " + estimate(nMaps, nSamples, jobConf)); return 0; } } [/code] On Thu, Jan 27, 2011 at 4:44 PM, Chase Bradford <chase.bradf...@gmail.com> wrote: > That should be fine, but mapreduce.Mapper.map has this signature: > > map(K key, V value, Context) > > Your PiEstimator map signature doesn't match, so it's not overriding > the proper function and is never getting called by the framework. > > Could you paste your complete PiMapper class definition and the series > of calls you make to setup your job? That would make debugging the > problem much easier. > > Chase > > > On Thu, Jan 27, 2011 at 8:29 AM, Pedro Costa <psdc1...@gmail.com> wrote: >> Yes, that's the one that's being used ( o.a.h.mapreduce.Mapper ). This >> is not the right one to use? >> >> >> >> On Thu, Jan 27, 2011 at 3:40 PM, Chase Bradford >> <chase.bradf...@gmail.com> wrote: >>> Are you sure the function signature for you Mapper's map matches the super >>> class, and that you specified your Map class in the job setup? It sounds a >>> bit like the base o.a.h.mapreduce.Mapper map implementation is being used >>> instead. >>> >>> >>> On Jan 27, 2011, at 2:36 AM, Pedro Costa <psdc1...@gmail.com> wrote: >>> >>>> The map output class are well defined: >>>> keyClass: class org.apache.hadoop.io.BooleanWritable - valClass: class >>>> org.apache.hadoop.io.LongWritable >>>> >>>> but executing the pi example, the values that map function passes is: >>>> keyClass: class org.apache.hadoop.io.LongWritable - valClass: class >>>> org.apache.hadoop.io.Text >>>> >>>> >>>> I looked at the PiEstimator.class.PiMapper#map function, and the >>>> output collector seems ok. >>>> >>>> [code] >>>> public void map(LongWritable offset, >>>> LongWritable size, >>>> OutputCollector<BooleanWritable, LongWritable> out, >>>> Reporter reporter) throws IOException { >>>> (...) >>>> out.collect(new BooleanWritable(true), new LongWritable(numInside)); >>>> out.collect(new BooleanWritable(false), new LongWritable(numOutside)); >>>> } >>>> [/code] >>>> >>>> I'm really confused, right now. How can this be happening? >>>> >>>> >>>> On Thu, Jan 27, 2011 at 10:19 AM, Pedro Costa <psdc1...@gmail.com> wrote: >>>>> Thanks Nicholas, but it didn't worked. >>>>> >>>>> Can I do a remote debugging on hadoop examples? I really like to put a >>>>> breakpoint in the Pi class. >>>>> >>>>> Thanks, >>>>> >>>>> On Wed, Jan 26, 2011 at 6:46 PM, Tsz Wo (Nicholas), Sze >>>>> <s29752-hadoopu...@yahoo.com> wrote: >>>>>> Okay, I got it now. You were talking about your programs but not the >>>>>> PiEstimator example came from Hadoop. Then, you have to set >>>>>> "mapred.output.key.class" and "mapred.output.value.class" as Srihari >>>>>> mentioned. Below are the APIs. >>>>>> >>>>>> //new API >>>>>> final Job job = ... >>>>>> job.setMapOutputKeyClass(BooleanWritable.class); >>>>>> job.setMapOutputValueClass(LongWritable.class); >>>>>> >>>>>> //old API >>>>>> final JobConf jobconf = ... >>>>>> jobconf.setOutputKeyClass(BooleanWritable.class); >>>>>> jobconf.setOutputValueClass(LongWritable.class); >>>>>> >>>>>> Nicholas >>>>>> >>>>>> ________________________________ >>>>>> From: Srihari Anantha Padmanabhan <sriha...@yahoo-inc.com> >>>>>> To: "mapreduce-user@hadoop.apache.org" <mapreduce-user@hadoop.apache.org> >>>>>> Sent: Wed, January 26, 2011 10:36:09 AM >>>>>> Subject: Re: PiEstimator error - Type mismatch in key from map >>>>>> >>>>>> I am using Hadoop 0.20.2. I just wrote my own map-reduce program based on >>>>>> the map-reduce tutorial at >>>>>> http://hadoop.apache.org/common/docs/r0.20.2/mapred_tutorial.html >>>>>> >>>>>> On Jan 26, 2011, at 10:27 AM, Pedro Costa wrote: >>>>>> >>>>>>> Hadoop 20.1 >>>>>>> >>>>>>> On Wed, Jan 26, 2011 at 6:26 PM, Tsz Wo (Nicholas), Sze >>>>>>> <s29752-hadoopu...@yahoo.com> wrote: >>>>>>>> Hi Srihari, >>>>>>>> >>>>>>>> Same questions to you: Which version of Hadoop are you using? And >>>>>>>> where >>>>>>>> did >>>>>>>> you get the examples? I guess you were able to reproduce it. I >>>>>>>> suspect >>>>>>>> the >>>>>>>> examples and the Hadoop are in different versions. >>>>>>>> >>>>>>>> Nicholas >>>>>>>> >>>>>>>> >>>>>>>> ________________________________ >>>>>>>> From: Srihari Anantha Padmanabhan <sriha...@yahoo-inc.com> >>>>>>>> To: "mapreduce-user@hadoop.apache.org" >>>>>>>> <mapreduce-user@hadoop.apache.org> >>>>>>>> Sent: Wed, January 26, 2011 10:15:08 AM >>>>>>>> Subject: Re: PiEstimator error - Type mismatch in key from map >>>>>>>> >>>>>>>> I got a similar error before in one of my projects. I had to set the >>>>>>>> values >>>>>>>> for "mapred.output.key.class" and "mapred.output.value.class". >>>>>>>> That resolved the issue for me. >>>>>>>> Srihari >>>>>>>> On Jan 26, 2011, at 10:09 AM, Pedro Costa wrote: >>>>>>>> >>>>>>>> Yes, I can reproduce it deterministically. But, I also did some >>>>>>>> changes to the Hadoop MR code. Most definitely this is the reason. I'm >>>>>>>> looking throughly through the code. >>>>>>>> >>>>>>>> I'll say something after I find the problem. >>>>>>>> >>>>>>>> I was just wondering if this error has happened to someone before. >>>>>>>> Maybe I could get a hint and try to see what's my problem easily. >>>>>>>> >>>>>>>> Thanks, >>>>>>>> >>>>>>>> On Wed, Jan 26, 2011 at 6:02 PM, Tsz Wo (Nicholas), Sze >>>>>>>> <s29752-hadoopu...@yahoo.com> wrote: >>>>>>>> >>>>>>>> Hi Pedro, >>>>>>>> >>>>>>>> This is interesting. Which version of Hadoop are you using? And where >>>>>>>> did >>>>>>>> >>>>>>>> you get the example class files? Also, are you able to reproduce it >>>>>>>> >>>>>>>> deterministically? >>>>>>>> >>>>>>>> Nicholas >>>>>>>> >>>>>>>> ________________________________ >>>>>>>> >>>>>>>> From: Pedro Costa <psdc1...@gmail.com> >>>>>>>> >>>>>>>> To: mapreduce-user@hadoop.apache.org >>>>>>>> >>>>>>>> Sent: Wed, January 26, 2011 5:47:01 AM >>>>>>>> >>>>>>>> Subject: PiEstimator error - Type mismatch in key from map >>>>>>>> >>>>>>>> Hi, >>>>>>>> >>>>>>>> I run the PI example of hadoop, and I've got the following error: >>>>>>>> >>>>>>>> [code] >>>>>>>> >>>>>>>> java.io.IOException: Type mismatch in key from map: expected >>>>>>>> >>>>>>>> org.apache.hadoop.io.BooleanWritable, recieved >>>>>>>> >>>>>>>> org.apache.hadoop.io.LongWritable >>>>>>>> >>>>>>>> at >>>>>>>> >>>>>>>> >>>>>>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:885) >>>>>>>> >>>>>>>> at >>>>>>>> >>>>>>>> >>>>>>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:551) >>>>>>>> >>>>>>>> at >>>>>>>> >>>>>>>> >>>>>>>> org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:81) >>>>>>>> >>>>>>>> at org.apache.hadoop.mapreduce.Mapper.map(Mapper.java:124) >>>>>>>> >>>>>>>> at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144) >>>>>>>> >>>>>>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:637) >>>>>>>> >>>>>>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) >>>>>>>> >>>>>>>> at org.apache.hadoop.mapred.Child.main(Child.java:190) >>>>>>>> >>>>>>>> [/code] >>>>>>>> >>>>>>>> I've look at the map function of the class "PiEstimator.class" and it >>>>>>>> seems >>>>>>>> >>>>>>>> ok. >>>>>>>> >>>>>>>> [code] >>>>>>>> >>>>>>>> public void map(LongWritable offset, >>>>>>>> >>>>>>>> LongWritable size, >>>>>>>> >>>>>>>> OutputCollector<BooleanWritable, LongWritable> out, >>>>>>>> >>>>>>>> Reporter reporter) throws IOException {} >>>>>>>> >>>>>>>> [/code] >>>>>>>> >>>>>>>> >>>>>>>> What's wrong with this examples? >>>>>>>> >>>>>>>> Thanks, >>>>>>>> >>>>>>>> -- >>>>>>>> >>>>>>>> Pedro >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> -- >>>>>>>> Pedro >>>>>>>> >>>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> -- >>>>>>> Pedro >>>>>> >>>>>> >>>>> >>>>> >>>>> >>>>> -- >>>>> Pedro >>>>> >>>> >>>> >>>> >>>> -- >>>> Pedro >>> >> >> >> >> -- >> Pedro >> > > > > -- > Chase Bradford > > > “If in physics there's something you don't understand, you can always > hide behind the uncharted depths of nature. But if your program > doesn't work, there is no obstinate nature. If it doesn't work, you've > messed up.” > > - Edsger Dijkstra > -- Pedro