I am using the folllowing code to do the reduce side join
/* * HadoopMapper.java * * Created on Apr 8, 2012, 5:39:51 PM */ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; // import org.apache.commons.logging.Log; // import org.apache.commons.logging.LogFactory; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.mapred.TextOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.contrib.utils.join.*; /** * * @author */ public class DataJoin extends Configured implements Tool { public static class MapClass extends DataJoinMapperBase { protected Text generateInputTag(String inputFile) { String datasource = inputFile.split("-")[0]; return new Text(datasource); } protected Text generateGroupKey(TaggedMapOutput aRecord) { String line = ((Text) aRecord.getData()).toString(); String[] tokens = line.split(","); String groupKey = tokens[0]; return new Text(groupKey); } protected TaggedMapOutput generateTaggedMapOutput(Object value) { TaggedWritable retv = new TaggedWritable((Text) value); retv.setTag(this.inputTag); return retv; } } public static class Reduce extends DataJoinReducerBase { protected TaggedMapOutput combine(Object[] tags, Object[] values) { if (tags.length < 2) return null; String joinedStr = ""; for (int i=0; i<values.length; i++) { if (i > 0) joinedStr += ","; TaggedWritable tw = (TaggedWritable) values[i]; String line = ((Text) tw.getData()).toString(); String[] tokens = line.split(",", 2); joinedStr += tokens[1]; } TaggedWritable retv = new TaggedWritable(new Text(joinedStr)); retv.setTag((Text) tags[0]); return retv; } } public static class TaggedWritable extends TaggedMapOutput { private Writable data; public TaggedWritable(Writable data) { this.tag = new Text(""); this.data = data; } public Writable getData() { return data; } public void write(DataOutput out) throws IOException { this.tag.write(out); this.data.write(out); } public void readFields(DataInput in) throws IOException { this.tag.readFields(in); this.data.readFields(in); } } public int run(String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf(conf, DataJoin.class); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("DataJoin"); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(TaggedWritable.class); job.set("mapred.textoutputformat.separator", ","); JobClient.runJob(job); return 0; } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new DataJoin(), args); System.exit(res); } } I am able to compile my code. When I run in hadoop I am getting the following error with the combiner 12/04/17 19:59:29 INFO mapred.JobClient: map 100% reduce 27% 12/04/17 19:59:38 INFO mapred.JobClient: map 100% reduce 30% 12/04/17 19:59:47 INFO mapred.JobClient: map 100% reduce 33% 12/04/17 20:00:23 INFO mapred.JobClient: Task Id : attempt_201204061316_0018_r_000000_2, Status : FAILED java.lang.RuntimeException: java.lang.NoSuchMethodException: DataJoin$TaggedWritable.<init>() at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:115) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:62) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40) at org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1136) at org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1076) at org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:246) at org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:242) at org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:106) I checked some forums and found out that the error may occur due to non static class. My program has no non static class! The command I use to run hadoop is /hadoop/core/bin/hadoop jar /export/scratch/lopez/Join/DataJoin.jar DataJoin /export/scratch/user/lopez/Join /export/scratch/user/lopez/Join_Output and the DataJoin.jar file has DataJoin$TaggedWritable packaged in it Could someone please help me -- View this message in context: http://old.nabble.com/Reduce-side-join---Hadoop-default---error-in-combiner-tp33705493p33705493.html Sent from the Hadoop core-dev mailing list archive at Nabble.com.