Saturday

i would liek to modify simple word count program so that i can produce text 
file from given html files ( by extracting text content only beween <title> and 
</title> and <text> and </text> . When i try to modify map and reduce task. it 
seems that i could not overwrite inwritable. the error is 
10/10/16 09:07:18 INFO jvm.JvmMetrics: Initializing JVM Metrics with 
processName=JobTracker, sessionId=
10/10/16 09:07:18 WARN mapred.JobClient: Use GenericOptionsParser for parsing 
the arguments. Applications should implement Tool for the same.
10/10/16 09:07:18 WARN mapred.JobClient: No job jar file set.  User classes may 
not be found. See JobConf(Class) or JobConf#setJar(String).
10/10/16 09:07:18 INFO mapred.FileInputFormat: Total input paths to process : 20
10/10/16 09:07:19 INFO mapred.JobClient: Running job: job_local_0001
10/10/16 09:07:19 INFO mapred.FileInputFormat: Total input paths to process : 20
10/10/16 09:07:19 INFO mapred.MapTask: numReduceTasks: 1
10/10/16 09:07:19 INFO mapred.MapTask: io.sort.mb = 100
10/10/16 09:07:19 INFO mapred.MapTask: data buffer = 79691776/99614720
10/10/16 09:07:19 INFO mapred.MapTask: record buffer = 262144/327680
10/10/16 09:07:19 INFO mapred.MapTask: Starting flush of map output
10/10/16 09:07:20 INFO mapred.JobClient:  map 0% reduce 0%
10/10/16 09:07:21 WARN mapred.LocalJobRunner: job_local_0001
java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to 
org.apache.hadoop.io.IntWritable   <<<--------------------
        at WordProcess$Reduce.reduce(WordProcess.java:44)
        at WordProcess$Reduce.reduce(WordProcess.java:1)
        at 
org.apache.hadoop.mapred.Task$OldCombinerRunner.combine(Task.java:1151)
        at 
org.apache.hadoop.mapred.MapTask$MapOutputBuffer.sortAndSpill(MapTask.java:1265)
        at 
org.apache.hadoop.mapred.MapTask$MapOutputBuffer.flush(MapTask.java:1129)
        at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:359)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:307)
        at 
org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:177)
10/10/16 09:07:22 INFO mapred.JobClient: Job complete: job_local_0001
10/10/16 09:07:22 INFO mapred.JobClient: Counters: 0
Exception in thread "main" java.io.IOException: Job failed!     
<--------------------------------------
        at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1252)
        at WordProcess.main(WordProcess.java:88)


my code is


import java.io.IOException;
import java.util.*;
        
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;

        
public class WordProcess {
        
 public static class Map extends MapReduceBase implements Mapper<LongWritable, 
Text, Text, Text> {
    private final static IntWritable one = new IntWritable(1);
    private Text id = new Text();

    private Text value = new Text();
        
    public void map(LongWritable key, Text value, OutputCollector<Text, Text> 
output, Reporter reporter) throws IOException {
        String line = value.toString();

        FileSplit fileSplit = (FileSplit)reporter.getInputSplit();
        String fileName = fileSplit.getPath().getName();

        id.set(fileName);
        value.set(line);
        output.collect(id, value);
                      
    }
 } 
        
 public static class Reduce extends MapReduceBase implements Reducer<Text, 
IntWritable, Text, Text> {

    public void reduce(Text key, Iterator<IntWritable> values, 
OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
        int sum = 0;
 
        String str = "";
        String substr1,substr2;
        Text text = new Text();
        while (values.hasNext()) 
        {
            String s = values.next().toString();
            str = str.concat(s);
            
        }
        // locate tags and extract content
        int x1 = str.indexOf("<TITLE>");
        int y1 = str.indexOf("</TITLE>");
        substr1 = str.substring(x1+7,y1);
        
        int x2 = str.indexOf("<TEXT>");
        int y2 = str.indexOf("</TEXT>");
        substr2 = str.substring(x2+5,y2);
        
        str = substr1 +" "+ substr2;
 
        text.set(str);
        output.collect(key, text);
        System.out.println(key+","+text);
     
    }
 }
        
 public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(WordProcess.class);
    conf.setJobName("wordprocess");
        
    conf.setOutputKeyClass(Text.class);
   // conf.setOutputValueClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);    
   
    conf.setMapperClass(Map.class);
    
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);
        
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
        
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    
    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(args[1]), true);

    JobClient.runJob(conf);
 }
        
}




anyone have experience with this problem, pls tell me how to fix 
thank in advances

best regard
Tri Doan
1429 Laramie Apt 3, Manhattan
KS 66502
USA

Reply via email to