Your mapper must emit an IntWritable as Value's type if you want to use that in your reducer. Right now you are emitting a Text object instead.
On Oct 16, 2010 8:27 PM, "Tri Doan" <[email protected]> wrote: Saturday i would liek to modify simple word count program so that i can produce text file from given html files ( by extracting text content only beween <title> and </title> and <text> and </text> . When i try to modify map and reduce task. it seems that i could not overwrite inwritable. the error is 10/10/16 09:07:18 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= 10/10/16 09:07:18 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same. 10/10/16 09:07:18 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String). 10/10/16 09:07:18 INFO mapred.FileInputFormat: Total input paths to process : 20 10/10/16 09:07:19 INFO mapred.JobClient: Running job: job_local_0001 10/10/16 09:07:19 INFO mapred.FileInputFormat: Total input paths to process : 20 10/10/16 09:07:19 INFO mapred.MapTask: numReduceTasks: 1 10/10/16 09:07:19 INFO mapred.MapTask: io.sort.mb = 100 10/10/16 09:07:19 INFO mapred.MapTask: data buffer = 79691776/99614720 10/10/16 09:07:19 INFO mapred.MapTask: record buffer = 262144/327680 10/10/16 09:07:19 INFO mapred.MapTask: Starting flush of map output 10/10/16 09:07:20 INFO mapred.JobClient: map 0% reduce 0% 10/10/16 09:07:21 WARN mapred.LocalJobRunner: job_local_0001 java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to org.apache.hadoop.io.IntWritable <<<-------------------- at WordProcess$Reduce.reduce(WordProcess.java:44) at WordProcess$Reduce.reduce(WordProcess.java:1) at org.apache.hadoop.mapred.Task$OldCombinerRunner.combine(Task.java:1151) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.sortAndSpill(MapTask.java:1265) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.flush(MapTask.java:1129) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:359) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:307) at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:177) 10/10/16 09:07:22 INFO mapred.JobClient: Job complete: job_local_0001 10/10/16 09:07:22 INFO mapred.JobClient: Counters: 0 Exception in thread "main" java.io.IOException: Job failed! <-------------------------------------- at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1252) at WordProcess.main(WordProcess.java:88) my code is import java.io.IOException; import java.util.*; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.*; import org.apache.hadoop.io.*; import org.apache.hadoop.mapred.*; import org.apache.hadoop.util.*; public class WordProcess { public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> { private final static IntWritable one = new IntWritable(1); private Text id = new Text(); private Text value = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String line = value.toString(); FileSplit fileSplit = (FileSplit)reporter.getInputSplit(); String fileName = fileSplit.getPath().getName(); id.set(fileName); value.set(line); output.collect(id, value); } } public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, Text> { public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { int sum = 0; String str = ""; String substr1,substr2; Text text = new Text(); while (values.hasNext()) { String s = values.next().toString(); str = str.concat(s); } // locate tags and extract content int x1 = str.indexOf("<TITLE>"); int y1 = str.indexOf("</TITLE>"); substr1 = str.substring(x1+7,y1); int x2 = str.indexOf("<TEXT>"); int y2 = str.indexOf("</TEXT>"); substr2 = str.substring(x2+5,y2); str = substr1 +" "+ substr2; text.set(str); output.collect(key, text); System.out.println(key+","+text); } } public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordProcess.class); conf.setJobName("wordprocess"); conf.setOutputKeyClass(Text.class); // conf.setOutputValueClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(args[1]), true); JobClient.runJob(conf); } } anyone have experience with this problem, pls tell me how to fix thank in advances best regard Tri Doan 1429 Laramie Apt 3, Manhattan KS 66502 USA
