Hi,
I have a SequenceFile which contains several jpeg images with (image name,
image bytes) as key-value pairs. My objective is to count the no. of images by
grouping them by the source, something like this :
Nikon Coolpix 100
Sony Cybershot 251
N82 100
The MR code is :
package com.hadoop.basics;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.drew.imaging.ImageMetadataReader;
import com.drew.imaging.ImageProcessingException;
import com.drew.metadata.Directory;
import com.drew.metadata.Metadata;
import com.drew.metadata.exif.ExifIFD0Directory;
public class ImageSummary extends Configured implements Tool {
public static class ImageSourceMapper extends
Mapper<Text, BytesWritable, Text,
IntWritable> {
private static int tagId = 272;
private static final IntWritable one = new
IntWritable(1);
public void map(Text imageName, BytesWritable
imageBytes,
Context context) throws
IOException, InterruptedException {
// TODO Auto-generated method stub
System.out.println("In the map method,
image is "
+
imageName.toString());
byte[] imageInBytes = imageBytes.getBytes();
ByteArrayInputStream bais = new
ByteArrayInputStream(imageInBytes);
BufferedInputStream bis = new
BufferedInputStream(bais);
Metadata imageMD = null;
try {
imageMD =
ImageMetadataReader.readMetadata(bis, true);
} catch (ImageProcessingException e) {
// TODO Auto-generated catch
block
System.out.println("Got an
ImageProcessingException !");
e.printStackTrace();
}
Directory exifIFD0Directory = imageMD
.getDirectory(ExifIFD0Directory.class);
String imageSource =
exifIFD0Directory.getString(tagId);
System.out.println(imageName.toString() + "
is taken using "
+ imageSource);
context.write(new Text(imageSource), one);
System.out.println("Returning from the map
method");
}
}
public static class ImageSourceReducer extends
Reducer<Text, IntWritable, Text,
IntWritable> {
public void reduce(Text imageSource,
Iterator<IntWritable> counts,
Context context) throws
IOException, InterruptedException {
// TODO Auto-generated method stub
System.out.println("In the reduce method");
int finalCount = 0;
while (counts.hasNext()) {
finalCount +=
counts.next().get();
}
context.write(imageSource, new
IntWritable(finalCount));
System.out.println("Returning from the
reduce method");
}
}
public static void main(String[] args) throws Exception {
ToolRunner.run(new ImageSummary(), args);
}
@Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
System.out.println("In ImageSummary.run(...)");
Configuration configuration = getConf();
Job job = new Job(configuration, "Image_Source");
job.setJarByClass(getClass());
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapperClass(ImageSourceMapper.class);
job.setCombinerClass(ImageSourceReducer.class);
job.setReducerClass(ImageSourceReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
SequenceFileInputFormat.addInputPath(job, new
Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
System.out.println("Submitting job");
job.waitForCompletion(true);
int jobStatus = job.isSuccessful() ? 0 : -1;
System.out.println("Returning jobStatus = " +
jobStatus);
return jobStatus;
}
}
The command :
hadoop jar /home/hduser/dumphere/codes/hadoop/imageops.jar
com.hadoop.basics.ImageSummary "/scratchpad/imageOps/WholeImageSeqFile"
"/scratchpad/imageOps/cnt"
The part-file
(/<http://172.25.6.71:50075/browseDirectory.jsp?dir=/&namenodeInfoPort=50070>scratchpad<http://172.25.6.71:50075/browseDirectory.jsp?dir=/scratchpad&namenodeInfoPort=50070>/imageOps<http://172.25.6.71:50075/browseDirectory.jsp?dir=/scratchpad/imageOps&namenodeInfoPort=50070>/cnt<http://172.25.6.71:50075/browseDirectory.jsp?dir=/scratchpad/imageOps/cnt&namenodeInfoPort=50070>/part-r-00000)
COOLPIX L120 1
COOLPIX L120 1
K750i 1
The mapper stdout logs :
stdout logs
In the map method, image is It's a long road....JPG
It's a long road....JPG is taken using COOLPIX L120
Returning from the map method
In the map method, image is Every man is a mountainside....JPG
Every man is a mountainside....JPG is taken using COOLPIX L120
Returning from the map method
In the map method, image is mystic.JPG
mystic.JPG is taken using K750i
Returning from the map method
But nothing is reflected in stdout logs of the reducer.
What have I missed?
Regards,
Omkar Joshi
________________________________
The contents of this e-mail and any attachment(s) may contain confidential or
privileged information for the intended recipient(s). Unintended recipients are
prohibited from taking action on the basis of information in this e-mail and
using or disseminating the information, and must notify the sender and delete
it from their system. L&T Infotech will not accept responsibility or liability
for the accuracy or completeness of, or the presence of any virus or disabling
code in this e-mail"