I'm trying to analyze the output of my hadoop M/R output files. Below is the code in my Job main(), it still does not work yet, it gives me this error:
java.io.FileNotFoundException: hdfs:/Master:9100/user/output/30/part-r-00000 (No such file or directory) at java.io.FileInputStream.open(Native Method) at java.io.FileInputStream.<init>(FileInputStream.java:146) at java.io.FileInputStream.<init>(FileInputStream.java:101) at java.io.FileReader.<init>(FileReader.java:58) at distributed.jobStats.main(jobStats.java:122) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.RunJar.main(RunJar.java:160) Hashmap size is: 0 Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args) .getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: jobStats <in> <out> <job>"); System.exit(2); } conf.set("job", otherArgs[2]); Job job = new Job(conf, "job count"); job.setJarByClass(jobStats.class); job.setMapperClass(jobMapper.class); job.setCombinerClass(jobReducer.class); job.setReducerClass(jobReducer.class); // job.setNumReduceTasks(3); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); try { // this block is to save the hadoop output path args[1] and its file system - either HDFS or S3 Amazon String fileSys = conf.get("fs.default.name"); File file = new File("/home/ec2-user/hadoopOutput.txt"); // if file doesnt exists, then create it if (!file.exists()) { file.createNewFile(); } FileWriter fw = new FileWriter(file.getAbsoluteFile(), false); BufferedWriter bw = new BufferedWriter(fw); bw.write(args[1] + "/part-r-00000 | " + fileSys); bw.close(); fw.close(); System.out.println("Done"); } catch (IOException e) { e.printStackTrace(); } int jobStatus = job.waitForCompletion(true) ? 0 : 1; if (jobStatus == 0) { HashMap<String, Integer> jobCountMap = new HashMap<String, Integer>(); FileSystem fs = FileSystem.get(conf); Path outPath = new Path(args[1]); Path pathPattern = new Path(outPath, "part-r-[0-9]*"); FileStatus[] list = fs.globStatus(pathPattern); for (FileStatus status : list) { try { BufferedReader brr = new BufferedReader(new FileReader(status.getPath().toString())); String line; while ((line = brr.readLine()) != null) { String[] yearjobCount = line.split("\\|"); //the reducer output in this format "xxxx | int" jobCountMap.put(yearjobCount[0].trim(), Integer.parseInt(yearjobCount[1].trim())); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } System.out.println("Hashmap size is: " + jobCountMap.size()); // System.exit(job.waitForCompletion(true) ? 0 : 1); }