Hi

As i mention in provious question about error message job fail. I see map word, 
then so reduce ( the outputof console). at the end, when i notice there is no 
output file in folder output i check message that i suspect it might show that 
error will be generate from.

I am sorry if the answer for the same question will be available before this 
mail will be posted.

 
10/10/16 12:09:34 INFO mapred.TaskRunner: Task:attempt_local_0001_m_000019_0 is 
done. And is in the process of commiting
10/10/16 12:09:34 INFO mapred.LocalJobRunner: 
file:/F:/work-java/WordCount/input/cranfield0020:0+1245
10/10/16 12:09:34 INFO mapred.TaskRunner: Task 'attempt_local_0001_m_000019_0' 
done.
10/10/16 12:09:34 INFO mapred.LocalJobRunner: 
10/10/16 12:09:34 INFO mapred.Merger: Merging 20 sorted segments
10/10/16 12:09:34 INFO mapred.Merger: Merging 2 intermediate segments out of a 
total of 20
10/10/16 12:09:34 INFO mapred.Merger: Merging 10 intermediate segments out of a 
total of 19
10/10/16 12:09:34 INFO mapred.Merger: Down to the last merge-pass, with 10 
segments left of total size: 18714 bytes
10/10/16 12:09:34 INFO mapred.LocalJobRunner: 
10/10/16 12:09:34 WARN mapred.LocalJobRunner: job_local_0001
java.lang.StringIndexOutOfBoundsException: String index out of range: -7    
<<<------------- does it cause error " job fail"  later?
        at java.lang.String.substring(Unknown Source)
        at WordProcess$Reduce.reduce(WordProcess.java:50)
        at WordProcess$Reduce.reduce(WordProcess.java:1)
        at 
org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:463)
        at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:411)
        at 
org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:216)
10/10/16 12:09:34 INFO mapred.JobClient: Job complete: job_local_0001
10/10/16 12:09:34 INFO mapred.JobClient: Counters: 13
10/10/16 12:09:34 INFO mapred.JobClient:   FileSystemCounters
10/10/16 12:09:34 INFO mapred.JobClient:     FILE_BYTES_READ=484226
10/10/16 12:09:34 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=801974
10/10/16 12:09:34 INFO mapred.JobClient:   Map-Reduce Framework
10/10/16 12:09:34 INFO mapred.JobClient:     Reduce input groups=0
10/10/16 12:09:34 INFO mapred.JobClient:     Combine output records=20
10/10/16 12:09:34 INFO mapred.JobClient:     Map input records=629
10/10/16 12:09:34 INFO mapred.JobClient:     Reduce shuffle bytes=0
10/10/16 12:09:34 INFO mapred.JobClient:     Reduce output records=0
10/10/16 12:09:34 INFO mapred.JobClient:     Spilled Records=33
10/10/16 12:09:34 INFO mapred.JobClient:     Map output bytes=30457
10/10/16 12:09:34 INFO mapred.JobClient:     Map input bytes=21651
10/10/16 12:09:34 INFO mapred.JobClient:     Combine input records=629
10/10/16 12:09:34 INFO mapred.JobClient:     Map output records=629
10/10/16 12:09:34 INFO mapred.JobClient:     Reduce input records=0
Exception in thread "main" java.io.IOException: Job failed!
        at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1252)
        at WordProcess.main(WordProcess.java:86)


 My code


import java.io.IOException;
import java.util.*;
        
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;
      
public class WordProcess {
        
 public static class Map extends MapReduceBase implements Mapper<LongWritable, 
Text, Text, Text> {
   private final static IntWritable one = new IntWritable(1);
    private Text id = new Text();

    private Text value = new Text();
        
    public void map(LongWritable key, Text value, OutputCollector<Text, Text> 
output, Reporter reporter) throws IOException {
        String line = value.toString();

        FileSplit fileSplit = (FileSplit)reporter.getInputSplit();
        String fileName = fileSplit.getPath().getName();

        id.set(fileName);
        value.set(line);
        output.collect(id, value  );
                      
    }
 } 
        
 public static class Reduce extends MapReduceBase implements Reducer<Text, 
Text, Text, Text> {

    public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, 
Text> output, Reporter reporter) throws IOException {
        int sum = 0;
 
        String str = "";
        String substr1,substr2;
        Text text = new Text();
        while (values.hasNext()) 
        {
            String s = values.next().toString();
            str = str.concat(s); // convert into long string to process
            
        }
        // locate tags and extract content
        int x1 = str.indexOf("<TITLE>");
        int y1 = str.indexOf("</TITLE>");
        substr1 = str.substring(x1+7,y1);
        
        int x2 = str.indexOf("<TEXT>");
        int y2 = str.indexOf("</TEXT>");
        substr2 = str.substring(x2+5,y2);
        
        str = "*"+substr1 +"$"+ substr2;
 
        text.set(str);
        output.collect(key, text);
        System.out.println(key+","+text);
     
    }
 }
        
 public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(WordProcess.class);
    conf.setJobName("wordprocess");
        
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);    
   
    conf.setMapperClass(Map.class);
    
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);
        
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
        
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    
    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(args[1]), true);

    JobClient.runJob(conf);
 }
        
}



Tri Doan
1429 Laramie Apt 3, Manhattan
KS 66502
USA

Reply via email to