Hello Everyone, I'm relatively new to hadoop mapreduce and I'm trying to get this simple modification to the WordCount example to work.
I'm using hadoop-1.0.2, and I've included both a convenient diff and also attached my new WordCount.java file. The thing I am trying to achieve is to have the value class that is output by the map phase be different than the value class output by the reduce phase. Any help would be greatly appreciated! Thanks, Bryan diff --git a/WordCount.java.orig b/WordCount.java index 81a6c21..6a768f7 100644 --- a/WordCount.java.orig +++ b/WordCount.java @@ -33,8 +33,8 @@ public class WordCount { } public static class IntSumReducer - extends Reducer<Text,IntWritable,Text,IntWritable> { - private IntWritable result = new IntWritable(); + extends Reducer<Text,IntWritable,Text,Text> { + private Text result = new Text(); public void reduce(Text key, Iterable<IntWritable> values, Context context @@ -43,7 +43,7 @@ public class WordCount { for (IntWritable val : values) { sum += val.get(); } - result.set(sum); + result.set("" + sum); context.write(key, result); } } @@ -58,10 +58,11 @@ public class WordCount { Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); + job.setMapOutputValueClass(IntWritable.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); - job.setOutputValueClass(IntWritable.class); + job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1);
WordCount.java
Description: Binary data