Re: PiEstimator error - Type mismatch in key from map

Pedro Costa Thu, 27 Jan 2011 08:51:15 -0800

[code]

package org.apache.hadoop.examples;


import java.io.IOException;
import java.math.BigDecimal;
import java.util.Iterator;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


public class PiEstimator extends Configured implements Tool {
        /** tmp directory for input/output */
        static private final Path TMP_DIR = new
Path(PiEstimator.class.getSimpleName() + "_TMP_3_141592654");


        /**
         * Mapper class for Pi estimation.
         * Generate points in a unit square
         * and then count points inside/outside of the inscribed circle of the 
square.
         */
        public static class PiMapper extends MapReduceBase
        implements Mapper<LongWritable, LongWritable, BooleanWritable, 
LongWritable> {

                /** Map method.
                 * @param offset samples starting from the (offset+1)th sample.
                 * @param size the number of samples for this map
                 * @param out output {ture->numInside, false->numOutside}
                 * @param reporter
                 */
                public void map(LongWritable offset,
                                LongWritable size,
                                OutputCollector<BooleanWritable, LongWritable> 
out,
                                Reporter reporter) throws IOException {

                        final HaltonSequence haltonsequence = new 
HaltonSequence(offset.get());
                        long numInside = 0L;
                        long numOutside = 0L;

                        for(long i = 0; i < size.get(); ) {
                                //generate points in a unit square
                                final double[] point = 
haltonsequence.nextPoint();

                                //count points inside/outside of the inscribed 
circle of the square
                                final double x = point[0] - 0.5;
                                final double y = point[1] - 0.5;
                                if (x*x + y*y > 0.25) {
                                        numOutside++;
                                } else {
                                        numInside++;
                                }

                                //report status
                                i++;
                                if (i % 1000 == 0) {
                                        reporter.setStatus("Generated " + i + " 
samples.");
                                }
                        }

                        //output map results
                        out.collect(new BooleanWritable(true), new 
LongWritable(numInside));
                        out.collect(new BooleanWritable(false), new 
LongWritable(numOutside));
                }
        }



        /**
         * Run a map/reduce job for estimating Pi.
         *
         * @return the estimated value of Pi
         */
        public static BigDecimal estimate(int numMaps, long numPoints, JobConf 
jobConf)
        throws IOException {
                //setup job conf
                jobConf.setJobName(PiEstimator.class.getSimpleName());

                jobConf.setInputFormat(SequenceFileInputFormat.class);

                jobConf.setOutputKeyClass(BooleanWritable.class);
                jobConf.setOutputValueClass(LongWritable.class);
                //              
jobConf.setMapOutputKeyClass(BooleanWritable.class);
                //              
jobConf.setMapOutputValueClass(LongWritable.class);
                jobConf.setOutputFormat(SequenceFileOutputFormat.class);

                jobConf.setMapperClass(PiMapper.class);
                jobConf.setNumMapTasks(numMaps);

                jobConf.setReducerClass(PiReducer.class);
                jobConf.setNumReduceTasks(1);

                // turn off speculative execution, because DFS doesn't handle
                // multiple writers to the same file.
                jobConf.setSpeculativeExecution(false);

                //setup input/output directories
                final Path inDir = new Path(TMP_DIR, "in");
                final Path outDir = new Path(TMP_DIR, "out");
                FileInputFormat.setInputPaths(jobConf, inDir);
                FileOutputFormat.setOutputPath(jobConf, outDir);

                final FileSystem fs = FileSystem.get(jobConf);
                if (fs.exists(TMP_DIR)) {
                        throw new IOException("Tmp directory " + 
fs.makeQualified(TMP_DIR)
                                        + " already exists.  Please remove it 
first.");
                }
                if (!fs.mkdirs(inDir)) {
                        throw new IOException("Cannot create input directory " 
+ inDir);
                }

                try {
                        //generate an input file for each map task
                        for(int i=0; i < numMaps; ++i) {
                                final Path file = new Path(inDir, "part"+i);
                                final LongWritable offset = new LongWritable(i 
* numPoints);
                                final LongWritable size = new 
LongWritable(numPoints);
                                final SequenceFile.Writer writer = 
SequenceFile.createWriter(
                                                fs, jobConf, file,
                                                LongWritable.class, 
LongWritable.class, CompressionType.NONE);
                                try {
                                        writer.append(offset, size);
                                } finally {
                                        writer.close();
                                }
                                System.out.println("Wrote input for Map #"+i);
                        }

                        //start a map/reduce job
                        System.out.println("Starting Job");
                        final long startTime = System.currentTimeMillis();
                        JobClient.runJob(jobConf);
                        final double duration = (System.currentTimeMillis() - 
startTime)/1000.0;
                        System.out.println("Job Finished in " + duration + " 
seconds");

                        //read outputs
                        Path inFile = new Path(outDir, "reduce-out");
                        LongWritable numInside = new LongWritable();
                        LongWritable numOutside = new LongWritable();
                        SequenceFile.Reader reader = new 
SequenceFile.Reader(fs, inFile, jobConf);
                        try {
                                reader.next(numInside, numOutside);
                        } finally {
                                reader.close();
                        }

                        //compute estimated value
                        return BigDecimal.valueOf(4).setScale(20)
                        .multiply(BigDecimal.valueOf(numInside.get()))
                        .divide(BigDecimal.valueOf(numMaps))
                        .divide(BigDecimal.valueOf(numPoints));
                } finally {
                        fs.delete(TMP_DIR, true);
                }
        }

        /**
         * Parse arguments and then runs a map/reduce job.
         * Print output in standard out.
         *
         * @return a non-zero if there is an error.  Otherwise, return 0.
         */
        public int run(String[] args) throws Exception {
                if (args.length != 2) {
                        System.err.println("Usage: "+getClass().getName()+" 
<nMaps> <nSamples>");
                        ToolRunner.printGenericCommandUsage(System.err);
                        return -1;
                }

                final int nMaps = Integer.parseInt(args[0]);
                final long nSamples = Long.parseLong(args[1]);

                System.out.println("Number of Maps  = " + nMaps);
                System.out.println("Samples per Map = " + nSamples);

                final JobConf jobConf = new JobConf(getConf(), getClass());
                System.out.println("Estimated value of Pi is "
                                + estimate(nMaps, nSamples, jobConf));
                return 0;
        }
}

[/code]



On Thu, Jan 27, 2011 at 4:44 PM, Chase Bradford
<chase.bradf...@gmail.com> wrote:
> That should be fine, but mapreduce.Mapper.map has this signature:
>
> map(K key, V value, Context)
>
> Your PiEstimator map signature doesn't match, so it's not overriding
> the proper function and is never getting called by the framework.
>
> Could you paste your complete PiMapper class definition and the series
> of calls you make to setup your job?  That would make debugging the
> problem much easier.
>
> Chase
>
>
> On Thu, Jan 27, 2011 at 8:29 AM, Pedro Costa <psdc1...@gmail.com> wrote:
>> Yes, that's the one that's being used ( o.a.h.mapreduce.Mapper ). This
>> is not the right one to use?
>>
>>
>>
>> On Thu, Jan 27, 2011 at 3:40 PM, Chase Bradford
>> <chase.bradf...@gmail.com> wrote:
>>> Are you sure the function signature for you Mapper's map matches the super 
>>> class, and that you specified your Map class in the job setup?  It sounds a 
>>> bit like the base o.a.h.mapreduce.Mapper map implementation is being used 
>>> instead.
>>>
>>>
>>> On Jan 27, 2011, at 2:36 AM, Pedro Costa <psdc1...@gmail.com> wrote:
>>>
>>>> The map output class are well defined:
>>>> keyClass: class org.apache.hadoop.io.BooleanWritable - valClass: class
>>>> org.apache.hadoop.io.LongWritable
>>>>
>>>> but executing the pi example, the values that map function passes is:
>>>> keyClass: class org.apache.hadoop.io.LongWritable - valClass: class
>>>> org.apache.hadoop.io.Text
>>>>
>>>>
>>>> I looked at the PiEstimator.class.PiMapper#map function, and the
>>>> output collector seems ok.
>>>>
>>>> [code]
>>>> public void map(LongWritable offset,
>>>>        LongWritable size,
>>>>        OutputCollector<BooleanWritable, LongWritable> out,
>>>>        Reporter reporter) throws IOException {
>>>> (...)
>>>> out.collect(new BooleanWritable(true), new LongWritable(numInside));
>>>> out.collect(new BooleanWritable(false), new LongWritable(numOutside));
>>>> }
>>>> [/code]
>>>>
>>>> I'm really confused, right now. How can this be happening?
>>>>
>>>>
>>>> On Thu, Jan 27, 2011 at 10:19 AM, Pedro Costa <psdc1...@gmail.com> wrote:
>>>>> Thanks Nicholas, but it didn't worked.
>>>>>
>>>>> Can I do a remote debugging on hadoop examples? I really like to put a
>>>>> breakpoint in the Pi class.
>>>>>
>>>>> Thanks,
>>>>>
>>>>> On Wed, Jan 26, 2011 at 6:46 PM, Tsz Wo (Nicholas), Sze
>>>>> <s29752-hadoopu...@yahoo.com> wrote:
>>>>>> Okay, I got it now.  You were talking about your programs but not the
>>>>>> PiEstimator example came from Hadoop.  Then, you have to set
>>>>>> "mapred.output.key.class" and "mapred.output.value.class" as Srihari
>>>>>> mentioned.  Below are the APIs.
>>>>>>
>>>>>>     //new API
>>>>>>     final Job job = ...
>>>>>>     job.setMapOutputKeyClass(BooleanWritable.class);
>>>>>>     job.setMapOutputValueClass(LongWritable.class);
>>>>>>
>>>>>>     //old API
>>>>>>     final JobConf jobconf = ...
>>>>>>     jobconf.setOutputKeyClass(BooleanWritable.class);
>>>>>>     jobconf.setOutputValueClass(LongWritable.class);
>>>>>>
>>>>>> Nicholas
>>>>>>
>>>>>> ________________________________
>>>>>> From: Srihari Anantha Padmanabhan <sriha...@yahoo-inc.com>
>>>>>> To: "mapreduce-user@hadoop.apache.org" <mapreduce-user@hadoop.apache.org>
>>>>>> Sent: Wed, January 26, 2011 10:36:09 AM
>>>>>> Subject: Re: PiEstimator error - Type mismatch in key from map
>>>>>>
>>>>>> I am using Hadoop 0.20.2. I just wrote my own map-reduce program based on
>>>>>> the map-reduce tutorial at
>>>>>> http://hadoop.apache.org/common/docs/r0.20.2/mapred_tutorial.html
>>>>>>
>>>>>> On Jan 26, 2011, at 10:27 AM, Pedro Costa wrote:
>>>>>>
>>>>>>> Hadoop 20.1
>>>>>>>
>>>>>>> On Wed, Jan 26, 2011 at 6:26 PM, Tsz Wo (Nicholas), Sze
>>>>>>> <s29752-hadoopu...@yahoo.com> wrote:
>>>>>>>> Hi Srihari,
>>>>>>>>
>>>>>>>> Same questions to you: Which version of Hadoop are you using?  And 
>>>>>>>> where
>>>>>>>> did
>>>>>>>> you get the examples?  I guess you were able to reproduce it.  I 
>>>>>>>> suspect
>>>>>>>> the
>>>>>>>> examples and the Hadoop are in different versions.
>>>>>>>>
>>>>>>>> Nicholas
>>>>>>>>
>>>>>>>>
>>>>>>>> ________________________________
>>>>>>>> From: Srihari Anantha Padmanabhan <sriha...@yahoo-inc.com>
>>>>>>>> To: "mapreduce-user@hadoop.apache.org" 
>>>>>>>> <mapreduce-user@hadoop.apache.org>
>>>>>>>> Sent: Wed, January 26, 2011 10:15:08 AM
>>>>>>>> Subject: Re: PiEstimator error - Type mismatch in key from map
>>>>>>>>
>>>>>>>> I got a similar error before in one of my projects. I had to set the
>>>>>>>> values
>>>>>>>> for "mapred.output.key.class" and "mapred.output.value.class".
>>>>>>>> That resolved the issue for me.
>>>>>>>> Srihari
>>>>>>>> On Jan 26, 2011, at 10:09 AM, Pedro Costa wrote:
>>>>>>>>
>>>>>>>> Yes, I can reproduce it deterministically. But, I also did some
>>>>>>>> changes to the Hadoop MR code. Most definitely this is the reason. I'm
>>>>>>>> looking throughly through the code.
>>>>>>>>
>>>>>>>> I'll say something after I find the problem.
>>>>>>>>
>>>>>>>> I was just wondering if this error has happened to someone before.
>>>>>>>> Maybe I could get a hint and try to see what's my problem easily.
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>>
>>>>>>>> On Wed, Jan 26, 2011 at 6:02 PM, Tsz Wo (Nicholas), Sze
>>>>>>>> <s29752-hadoopu...@yahoo.com> wrote:
>>>>>>>>
>>>>>>>> Hi Pedro,
>>>>>>>>
>>>>>>>> This is interesting.  Which version of Hadoop are you using?  And where
>>>>>>>> did
>>>>>>>>
>>>>>>>> you get the example class files?  Also, are you able to reproduce it
>>>>>>>>
>>>>>>>> deterministically?
>>>>>>>>
>>>>>>>> Nicholas
>>>>>>>>
>>>>>>>> ________________________________
>>>>>>>>
>>>>>>>> From: Pedro Costa <psdc1...@gmail.com>
>>>>>>>>
>>>>>>>> To: mapreduce-user@hadoop.apache.org
>>>>>>>>
>>>>>>>> Sent: Wed, January 26, 2011 5:47:01 AM
>>>>>>>>
>>>>>>>> Subject: PiEstimator error - Type mismatch in key from map
>>>>>>>>
>>>>>>>> Hi,
>>>>>>>>
>>>>>>>> I run the PI example of hadoop, and I've got the following error:
>>>>>>>>
>>>>>>>> [code]
>>>>>>>>
>>>>>>>> java.io.IOException: Type mismatch in key from map: expected
>>>>>>>>
>>>>>>>> org.apache.hadoop.io.BooleanWritable, recieved
>>>>>>>>
>>>>>>>> org.apache.hadoop.io.LongWritable
>>>>>>>>
>>>>>>>>     at
>>>>>>>>
>>>>>>>>
>>>>>>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:885)
>>>>>>>>
>>>>>>>>     at
>>>>>>>>
>>>>>>>>
>>>>>>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:551)
>>>>>>>>
>>>>>>>>     at
>>>>>>>>
>>>>>>>>
>>>>>>>> org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:81)
>>>>>>>>
>>>>>>>>     at org.apache.hadoop.mapreduce.Mapper.map(Mapper.java:124)
>>>>>>>>
>>>>>>>>     at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
>>>>>>>>
>>>>>>>>     at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:637)
>>>>>>>>
>>>>>>>>     at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
>>>>>>>>
>>>>>>>>     at org.apache.hadoop.mapred.Child.main(Child.java:190)
>>>>>>>>
>>>>>>>> [/code]
>>>>>>>>
>>>>>>>> I've look at the map function of the class "PiEstimator.class" and it
>>>>>>>> seems
>>>>>>>>
>>>>>>>> ok.
>>>>>>>>
>>>>>>>> [code]
>>>>>>>>
>>>>>>>> public void map(LongWritable offset,
>>>>>>>>
>>>>>>>>         LongWritable size,
>>>>>>>>
>>>>>>>>         OutputCollector<BooleanWritable, LongWritable> out,
>>>>>>>>
>>>>>>>>         Reporter reporter) throws IOException {}
>>>>>>>>
>>>>>>>> [/code]
>>>>>>>>
>>>>>>>>
>>>>>>>> What's wrong with this examples?
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>>
>>>>>>>> --
>>>>>>>>
>>>>>>>> Pedro
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> --
>>>>>>>> Pedro
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> --
>>>>>>> Pedro
>>>>>>
>>>>>>
>>>>>
>>>>>
>>>>>
>>>>> --
>>>>> Pedro
>>>>>
>>>>
>>>>
>>>>
>>>> --
>>>> Pedro
>>>
>>
>>
>>
>> --
>> Pedro
>>
>
>
>
> --
> Chase Bradford
>
>
> “If in physics there's something you don't understand, you can always
> hide behind the uncharted depths of nature. But if your program
> doesn't work, there is no obstinate nature. If it doesn't work, you've
> messed up.”
>
> - Edsger Dijkstra
>



-- 
Pedro

Re: PiEstimator error - Type mismatch in key from map

Reply via email to