The reason was that I set the mapred-site.xml to use the new api. Thanks, On Thu, Jan 27, 2011 at 5:04 PM, Chase Bradford <chase.bradf...@gmail.com> wrote: > That's very puzzling, because I don't see any reason for the new API > to get activated. I'm pretty sure that's what's happening though, > based on this section of the exception's call stack: > > at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144) > at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:637) > at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) > > > > On Thu, Jan 27, 2011 at 8:50 AM, Pedro Costa <psdc1...@gmail.com> wrote: >> [code] >> >> package org.apache.hadoop.examples; >> >> import java.io.IOException; >> import java.math.BigDecimal; >> import java.util.Iterator; >> >> import org.apache.hadoop.conf.Configured; >> import org.apache.hadoop.fs.FileSystem; >> import org.apache.hadoop.fs.Path; >> import org.apache.hadoop.io.BooleanWritable; >> import org.apache.hadoop.io.LongWritable; >> import org.apache.hadoop.io.SequenceFile; >> import org.apache.hadoop.io.Writable; >> import org.apache.hadoop.io.WritableComparable; >> import org.apache.hadoop.io.SequenceFile.CompressionType; >> import org.apache.hadoop.mapred.FileInputFormat; >> import org.apache.hadoop.mapred.FileOutputFormat; >> import org.apache.hadoop.mapred.JobClient; >> import org.apache.hadoop.mapred.JobConf; >> import org.apache.hadoop.mapred.MapReduceBase; >> import org.apache.hadoop.mapred.Mapper; >> import org.apache.hadoop.mapred.OutputCollector; >> import org.apache.hadoop.mapred.Reducer; >> import org.apache.hadoop.mapred.Reporter; >> import org.apache.hadoop.mapred.SequenceFileInputFormat; >> import org.apache.hadoop.mapred.SequenceFileOutputFormat; >> import org.apache.hadoop.util.Tool; >> import org.apache.hadoop.util.ToolRunner; >> >> >> public class PiEstimator extends Configured implements Tool { >> /** tmp directory for input/output */ >> static private final Path TMP_DIR = new >> Path(PiEstimator.class.getSimpleName() + "_TMP_3_141592654"); >> >> >> /** >> * Mapper class for Pi estimation. >> * Generate points in a unit square >> * and then count points inside/outside of the inscribed circle of >> the square. >> */ >> public static class PiMapper extends MapReduceBase >> implements Mapper<LongWritable, LongWritable, BooleanWritable, >> LongWritable> { >> >> /** Map method. >> * @param offset samples starting from the (offset+1)th >> sample. >> * @param size the number of samples for this map >> * @param out output {ture->numInside, false->numOutside} >> * @param reporter >> */ >> public void map(LongWritable offset, >> LongWritable size, >> OutputCollector<BooleanWritable, >> LongWritable> out, >> Reporter reporter) throws IOException { >> >> final HaltonSequence haltonsequence = new >> HaltonSequence(offset.get()); >> long numInside = 0L; >> long numOutside = 0L; >> >> for(long i = 0; i < size.get(); ) { >> //generate points in a unit square >> final double[] point = >> haltonsequence.nextPoint(); >> >> //count points inside/outside of the >> inscribed circle of the square >> final double x = point[0] - 0.5; >> final double y = point[1] - 0.5; >> if (x*x + y*y > 0.25) { >> numOutside++; >> } else { >> numInside++; >> } >> >> //report status >> i++; >> if (i % 1000 == 0) { >> reporter.setStatus("Generated " + i + >> " samples."); >> } >> } >> >> //output map results >> out.collect(new BooleanWritable(true), new >> LongWritable(numInside)); >> out.collect(new BooleanWritable(false), new >> LongWritable(numOutside)); >> } >> } >> >> >> >> /** >> * Run a map/reduce job for estimating Pi. >> * >> * @return the estimated value of Pi >> */ >> public static BigDecimal estimate(int numMaps, long numPoints, >> JobConf jobConf) >> throws IOException { >> //setup job conf >> jobConf.setJobName(PiEstimator.class.getSimpleName()); >> >> jobConf.setInputFormat(SequenceFileInputFormat.class); >> >> jobConf.setOutputKeyClass(BooleanWritable.class); >> jobConf.setOutputValueClass(LongWritable.class); >> // >> jobConf.setMapOutputKeyClass(BooleanWritable.class); >> // >> jobConf.setMapOutputValueClass(LongWritable.class); >> jobConf.setOutputFormat(SequenceFileOutputFormat.class); >> >> jobConf.setMapperClass(PiMapper.class); >> jobConf.setNumMapTasks(numMaps); >> >> jobConf.setReducerClass(PiReducer.class); >> jobConf.setNumReduceTasks(1); >> >> // turn off speculative execution, because DFS doesn't handle >> // multiple writers to the same file. >> jobConf.setSpeculativeExecution(false); >> >> //setup input/output directories >> final Path inDir = new Path(TMP_DIR, "in"); >> final Path outDir = new Path(TMP_DIR, "out"); >> FileInputFormat.setInputPaths(jobConf, inDir); >> FileOutputFormat.setOutputPath(jobConf, outDir); >> >> final FileSystem fs = FileSystem.get(jobConf); >> if (fs.exists(TMP_DIR)) { >> throw new IOException("Tmp directory " + >> fs.makeQualified(TMP_DIR) >> + " already exists. Please remove it >> first."); >> } >> if (!fs.mkdirs(inDir)) { >> throw new IOException("Cannot create input directory >> " + inDir); >> } >> >> try { >> //generate an input file for each map task >> for(int i=0; i < numMaps; ++i) { >> final Path file = new Path(inDir, "part"+i); >> final LongWritable offset = new >> LongWritable(i * numPoints); >> final LongWritable size = new >> LongWritable(numPoints); >> final SequenceFile.Writer writer = >> SequenceFile.createWriter( >> fs, jobConf, file, >> LongWritable.class, >> LongWritable.class, CompressionType.NONE); >> try { >> writer.append(offset, size); >> } finally { >> writer.close(); >> } >> System.out.println("Wrote input for Map #"+i); >> } >> >> //start a map/reduce job >> System.out.println("Starting Job"); >> final long startTime = System.currentTimeMillis(); >> JobClient.runJob(jobConf); >> final double duration = (System.currentTimeMillis() - >> startTime)/1000.0; >> System.out.println("Job Finished in " + duration + " >> seconds"); >> >> //read outputs >> Path inFile = new Path(outDir, "reduce-out"); >> LongWritable numInside = new LongWritable(); >> LongWritable numOutside = new LongWritable(); >> SequenceFile.Reader reader = new >> SequenceFile.Reader(fs, inFile, jobConf); >> try { >> reader.next(numInside, numOutside); >> } finally { >> reader.close(); >> } >> >> //compute estimated value >> return BigDecimal.valueOf(4).setScale(20) >> .multiply(BigDecimal.valueOf(numInside.get())) >> .divide(BigDecimal.valueOf(numMaps)) >> .divide(BigDecimal.valueOf(numPoints)); >> } finally { >> fs.delete(TMP_DIR, true); >> } >> } >> >> /** >> * Parse arguments and then runs a map/reduce job. >> * Print output in standard out. >> * >> * @return a non-zero if there is an error. Otherwise, return 0. >> */ >> public int run(String[] args) throws Exception { >> if (args.length != 2) { >> System.err.println("Usage: "+getClass().getName()+" >> <nMaps> <nSamples>"); >> ToolRunner.printGenericCommandUsage(System.err); >> return -1; >> } >> >> final int nMaps = Integer.parseInt(args[0]); >> final long nSamples = Long.parseLong(args[1]); >> >> System.out.println("Number of Maps = " + nMaps); >> System.out.println("Samples per Map = " + nSamples); >> >> final JobConf jobConf = new JobConf(getConf(), getClass()); >> System.out.println("Estimated value of Pi is " >> + estimate(nMaps, nSamples, jobConf)); >> return 0; >> } >> } >> >> [/code] >> >> >> >> On Thu, Jan 27, 2011 at 4:44 PM, Chase Bradford >> <chase.bradf...@gmail.com> wrote: >>> That should be fine, but mapreduce.Mapper.map has this signature: >>> >>> map(K key, V value, Context) >>> >>> Your PiEstimator map signature doesn't match, so it's not overriding >>> the proper function and is never getting called by the framework. >>> >>> Could you paste your complete PiMapper class definition and the series >>> of calls you make to setup your job? That would make debugging the >>> problem much easier. >>> >>> Chase >>> >>> >>> On Thu, Jan 27, 2011 at 8:29 AM, Pedro Costa <psdc1...@gmail.com> wrote: >>>> Yes, that's the one that's being used ( o.a.h.mapreduce.Mapper ). This >>>> is not the right one to use? >>>> >>>> >>>> >>>> On Thu, Jan 27, 2011 at 3:40 PM, Chase Bradford >>>> <chase.bradf...@gmail.com> wrote: >>>>> Are you sure the function signature for you Mapper's map matches the >>>>> super class, and that you specified your Map class in the job setup? It >>>>> sounds a bit like the base o.a.h.mapreduce.Mapper map implementation is >>>>> being used instead. >>>>> >>>>> >>>>> On Jan 27, 2011, at 2:36 AM, Pedro Costa <psdc1...@gmail.com> wrote: >>>>> >>>>>> The map output class are well defined: >>>>>> keyClass: class org.apache.hadoop.io.BooleanWritable - valClass: class >>>>>> org.apache.hadoop.io.LongWritable >>>>>> >>>>>> but executing the pi example, the values that map function passes is: >>>>>> keyClass: class org.apache.hadoop.io.LongWritable - valClass: class >>>>>> org.apache.hadoop.io.Text >>>>>> >>>>>> >>>>>> I looked at the PiEstimator.class.PiMapper#map function, and the >>>>>> output collector seems ok. >>>>>> >>>>>> [code] >>>>>> public void map(LongWritable offset, >>>>>> LongWritable size, >>>>>> OutputCollector<BooleanWritable, LongWritable> out, >>>>>> Reporter reporter) throws IOException { >>>>>> (...) >>>>>> out.collect(new BooleanWritable(true), new LongWritable(numInside)); >>>>>> out.collect(new BooleanWritable(false), new LongWritable(numOutside)); >>>>>> } >>>>>> [/code] >>>>>> >>>>>> I'm really confused, right now. How can this be happening? >>>>>> >>>>>> >>>>>> On Thu, Jan 27, 2011 at 10:19 AM, Pedro Costa <psdc1...@gmail.com> wrote: >>>>>>> Thanks Nicholas, but it didn't worked. >>>>>>> >>>>>>> Can I do a remote debugging on hadoop examples? I really like to put a >>>>>>> breakpoint in the Pi class. >>>>>>> >>>>>>> Thanks, >>>>>>> >>>>>>> On Wed, Jan 26, 2011 at 6:46 PM, Tsz Wo (Nicholas), Sze >>>>>>> <s29752-hadoopu...@yahoo.com> wrote: >>>>>>>> Okay, I got it now. You were talking about your programs but not the >>>>>>>> PiEstimator example came from Hadoop. Then, you have to set >>>>>>>> "mapred.output.key.class" and "mapred.output.value.class" as Srihari >>>>>>>> mentioned. Below are the APIs. >>>>>>>> >>>>>>>> //new API >>>>>>>> final Job job = ... >>>>>>>> job.setMapOutputKeyClass(BooleanWritable.class); >>>>>>>> job.setMapOutputValueClass(LongWritable.class); >>>>>>>> >>>>>>>> //old API >>>>>>>> final JobConf jobconf = ... >>>>>>>> jobconf.setOutputKeyClass(BooleanWritable.class); >>>>>>>> jobconf.setOutputValueClass(LongWritable.class); >>>>>>>> >>>>>>>> Nicholas >>>>>>>> >>>>>>>> ________________________________ >>>>>>>> From: Srihari Anantha Padmanabhan <sriha...@yahoo-inc.com> >>>>>>>> To: "mapreduce-user@hadoop.apache.org" >>>>>>>> <mapreduce-user@hadoop.apache.org> >>>>>>>> Sent: Wed, January 26, 2011 10:36:09 AM >>>>>>>> Subject: Re: PiEstimator error - Type mismatch in key from map >>>>>>>> >>>>>>>> I am using Hadoop 0.20.2. I just wrote my own map-reduce program based >>>>>>>> on >>>>>>>> the map-reduce tutorial at >>>>>>>> http://hadoop.apache.org/common/docs/r0.20.2/mapred_tutorial.html >>>>>>>> >>>>>>>> On Jan 26, 2011, at 10:27 AM, Pedro Costa wrote: >>>>>>>> >>>>>>>>> Hadoop 20.1 >>>>>>>>> >>>>>>>>> On Wed, Jan 26, 2011 at 6:26 PM, Tsz Wo (Nicholas), Sze >>>>>>>>> <s29752-hadoopu...@yahoo.com> wrote: >>>>>>>>>> Hi Srihari, >>>>>>>>>> >>>>>>>>>> Same questions to you: Which version of Hadoop are you using? And >>>>>>>>>> where >>>>>>>>>> did >>>>>>>>>> you get the examples? I guess you were able to reproduce it. I >>>>>>>>>> suspect >>>>>>>>>> the >>>>>>>>>> examples and the Hadoop are in different versions. >>>>>>>>>> >>>>>>>>>> Nicholas >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> ________________________________ >>>>>>>>>> From: Srihari Anantha Padmanabhan <sriha...@yahoo-inc.com> >>>>>>>>>> To: "mapreduce-user@hadoop.apache.org" >>>>>>>>>> <mapreduce-user@hadoop.apache.org> >>>>>>>>>> Sent: Wed, January 26, 2011 10:15:08 AM >>>>>>>>>> Subject: Re: PiEstimator error - Type mismatch in key from map >>>>>>>>>> >>>>>>>>>> I got a similar error before in one of my projects. I had to set the >>>>>>>>>> values >>>>>>>>>> for "mapred.output.key.class" and "mapred.output.value.class". >>>>>>>>>> That resolved the issue for me. >>>>>>>>>> Srihari >>>>>>>>>> On Jan 26, 2011, at 10:09 AM, Pedro Costa wrote: >>>>>>>>>> >>>>>>>>>> Yes, I can reproduce it deterministically. But, I also did some >>>>>>>>>> changes to the Hadoop MR code. Most definitely this is the reason. >>>>>>>>>> I'm >>>>>>>>>> looking throughly through the code. >>>>>>>>>> >>>>>>>>>> I'll say something after I find the problem. >>>>>>>>>> >>>>>>>>>> I was just wondering if this error has happened to someone before. >>>>>>>>>> Maybe I could get a hint and try to see what's my problem easily. >>>>>>>>>> >>>>>>>>>> Thanks, >>>>>>>>>> >>>>>>>>>> On Wed, Jan 26, 2011 at 6:02 PM, Tsz Wo (Nicholas), Sze >>>>>>>>>> <s29752-hadoopu...@yahoo.com> wrote: >>>>>>>>>> >>>>>>>>>> Hi Pedro, >>>>>>>>>> >>>>>>>>>> This is interesting. Which version of Hadoop are you using? And >>>>>>>>>> where >>>>>>>>>> did >>>>>>>>>> >>>>>>>>>> you get the example class files? Also, are you able to reproduce it >>>>>>>>>> >>>>>>>>>> deterministically? >>>>>>>>>> >>>>>>>>>> Nicholas >>>>>>>>>> >>>>>>>>>> ________________________________ >>>>>>>>>> >>>>>>>>>> From: Pedro Costa <psdc1...@gmail.com> >>>>>>>>>> >>>>>>>>>> To: mapreduce-user@hadoop.apache.org >>>>>>>>>> >>>>>>>>>> Sent: Wed, January 26, 2011 5:47:01 AM >>>>>>>>>> >>>>>>>>>> Subject: PiEstimator error - Type mismatch in key from map >>>>>>>>>> >>>>>>>>>> Hi, >>>>>>>>>> >>>>>>>>>> I run the PI example of hadoop, and I've got the following error: >>>>>>>>>> >>>>>>>>>> [code] >>>>>>>>>> >>>>>>>>>> java.io.IOException: Type mismatch in key from map: expected >>>>>>>>>> >>>>>>>>>> org.apache.hadoop.io.BooleanWritable, recieved >>>>>>>>>> >>>>>>>>>> org.apache.hadoop.io.LongWritable >>>>>>>>>> >>>>>>>>>> at >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:885) >>>>>>>>>> >>>>>>>>>> at >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:551) >>>>>>>>>> >>>>>>>>>> at >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:81) >>>>>>>>>> >>>>>>>>>> at org.apache.hadoop.mapreduce.Mapper.map(Mapper.java:124) >>>>>>>>>> >>>>>>>>>> at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144) >>>>>>>>>> >>>>>>>>>> at >>>>>>>>>> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:637) >>>>>>>>>> >>>>>>>>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) >>>>>>>>>> >>>>>>>>>> at org.apache.hadoop.mapred.Child.main(Child.java:190) >>>>>>>>>> >>>>>>>>>> [/code] >>>>>>>>>> >>>>>>>>>> I've look at the map function of the class "PiEstimator.class" and it >>>>>>>>>> seems >>>>>>>>>> >>>>>>>>>> ok. >>>>>>>>>> >>>>>>>>>> [code] >>>>>>>>>> >>>>>>>>>> public void map(LongWritable offset, >>>>>>>>>> >>>>>>>>>> LongWritable size, >>>>>>>>>> >>>>>>>>>> OutputCollector<BooleanWritable, LongWritable> out, >>>>>>>>>> >>>>>>>>>> Reporter reporter) throws IOException {} >>>>>>>>>> >>>>>>>>>> [/code] >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> What's wrong with this examples? >>>>>>>>>> >>>>>>>>>> Thanks, >>>>>>>>>> >>>>>>>>>> -- >>>>>>>>>> >>>>>>>>>> Pedro >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> -- >>>>>>>>>> Pedro >>>>>>>>>> >>>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> -- >>>>>>>>> Pedro >>>>>>>> >>>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> -- >>>>>>> Pedro >>>>>>> >>>>>> >>>>>> >>>>>> >>>>>> -- >>>>>> Pedro >>>>> >>>> >>>> >>>> >>>> -- >>>> Pedro >>>> >>> >>> >>> >>> -- >>> Chase Bradford >>> >>> >>> “If in physics there's something you don't understand, you can always >>> hide behind the uncharted depths of nature. But if your program >>> doesn't work, there is no obstinate nature. If it doesn't work, you've >>> messed up.” >>> >>> - Edsger Dijkstra >>> >> >> >> >> -- >> Pedro >> > > > > -- > Chase Bradford > > > “If in physics there's something you don't understand, you can always > hide behind the uncharted depths of nature. But if your program > doesn't work, there is no obstinate nature. If it doesn't work, you've > messed up.” > > - Edsger Dijkstra >
-- Pedro