DataJoinMapperBase is not in the JoinHadoop.jar. When I add it and related classes to JoinHadoop.jar, it works! (although I got an IOException at reduce stage... maybe I should check the code or input files) thanks!
2012/5/4 JunYong Li <lij...@gmail.com> > is any other error log, check > 1. JoinHadoop.jar collectly submit to hadoop > 2. DataJoinMapperBase really in the JoinHadoop.jar > > 2012/5/4 唐方爽 <fstang...@gmail.com> > > > Hi, > > > > I try to run a Hadoop reduce-side join, then I get the following: > > > > java.lang.NoClassDefFoundError: > > org/apache/hadoop/contrib/utils/join/DataJoinMapperBase > > at java.lang.ClassLoader.defineClass1(Native Method) > > at java.lang.ClassLoader.defineClass(ClassLoader.java:791) > > at > > java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142) > > at java.net.URLClassLoader.defineClass(URLClassLoader.java:449) > > at java.net.URLClassLoader.access$100(URLClassLoader.java:71) > > at java.net.URLClassLoader$1.run(URLClassLoader.java:361) > > at java.net.URLClassLoader$1.run(URLClassLoader.java:355) > > at java.security.AccessController.doPrivileged(Native Method) > > at java.net.URLClassLoader.findClass(URLClassLoader.java:354) > > at java.lang.ClassLoader.loadClass(ClassLoader.java:423) > > at java.lang.ClassLoader.loadClass(ClassLoader.java:356) > > at DataJoin.run(DataJoin.java:105) > > at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65) > > at DataJoin.main(DataJoin.java:119) > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > > at > > > > > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > > at > > > > > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > > at java.lang.reflect.Method.invoke(Method.java:601) > > at org.apache.hadoop.util.RunJar.main(RunJar.java:165) > > at org.apache.hadoop.mapred.JobShell.run(JobShell.java:54) > > at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65) > > at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:79) > > at org.apache.hadoop.mapred.JobShell.main(JobShell.java:68) > > Caused by: java.lang.ClassNotFoundException: > > org.apache.hadoop.contrib.utils.join.DataJoinMapperBase > > at java.net.URLClassLoader$1.run(URLClassLoader.java:366) > > at java.net.URLClassLoader$1.run(URLClassLoader.java:355) > > at java.security.AccessController.doPrivileged(Native Method) > > at java.net.URLClassLoader.findClass(URLClassLoader.java:354) > > at java.lang.ClassLoader.loadClass(ClassLoader.java:423) > > at java.lang.ClassLoader.loadClass(ClassLoader.java:356) > > ... 23 more > > > > What's the problem? > > > > The command I use : hadoop jar JoinHadoop.jar DataJoin > > /group/asciaa/fst/input_test_join /group/asciaa/fst/out_test_join > > > > The source is from *Hadoop in action*, chapter5, listing 5.3. I use > eclipse > > to export it as a jar > > My Hadoop is 0.19.2 > > > > Thanks! > > > > The source code: > > > > > > import java.io.DataInput; > > import java.io.DataOutput; > > import java.io.IOException; > > //import java.util.Iterator; > > > > import org.apache.hadoop.conf.Configuration; > > import org.apache.hadoop.conf.Configured; > > import org.apache.hadoop.fs.Path; > > import org.apache.hadoop.io.Text; > > import org.apache.hadoop.io.Writable; > > import org.apache.hadoop.mapred.FileInputFormat; > > import org.apache.hadoop.mapred.FileOutputFormat; > > import org.apache.hadoop.mapred.JobClient; > > import org.apache.hadoop.mapred.JobConf; > > //import org.apache.hadoop.mapred.KeyValueTextInputFormat; > > //import org.apache.hadoop.mapred.MapReduceBase; > > //import org.apache.hadoop.mapred.Mapper; > > //import org.apache.hadoop.mapred.OutputCollector; > > //import org.apache.hadoop.mapred.Reducer; > > //import org.apache.hadoop.mapred.Reporter; > > import org.apache.hadoop.mapred.TextInputFormat; > > import org.apache.hadoop.mapred.TextOutputFormat; > > import org.apache.hadoop.util.Tool; > > import org.apache.hadoop.util.ToolRunner; > > > > import org.apache.hadoop.contrib.utils.join.DataJoinMapperBase; > > import org.apache.hadoop.contrib.utils.join.DataJoinReducerBase; > > import org.apache.hadoop.contrib.utils.join.TaggedMapOutput; > > > > public class DataJoin extends Configured implements Tool { > > > > public static class MapClass extends DataJoinMapperBase { > > > > protected Text generateInputTag(String inputFile) { > > return new Text(inputFile); > > } > > > > protected Text generateGroupKey(TaggedMapOutput aRecord) { > > String line = ((Text) aRecord.getData()).toString(); > > String[] tokens = line.split(","); > > String groupKey = tokens[0]; > > return new Text(groupKey); > > } > > > > protected TaggedMapOutput generateTaggedMapOutput(Object value) { > > TaggedWritable retv = new TaggedWritable((Text) value); > > retv.setTag(this.inputTag); > > return retv; > > } > > } > > > > public static class Reduce extends DataJoinReducerBase { > > > > protected TaggedMapOutput combine(Object[] tags, Object[] values) > { > > if (tags.length < 2) return null; > > String joinedStr = ""; > > for (int i=0; i<values.length; i++) { > > if (i > 0) joinedStr += ","; > > TaggedWritable tw = (TaggedWritable) values[i]; > > String line = ((Text) tw.getData()).toString(); > > String[] tokens = line.split(",", 2); > > joinedStr += tokens[1]; > > } > > TaggedWritable retv = new TaggedWritable(new Text(joinedStr)); > > retv.setTag((Text) tags[0]); > > return retv; > > } > > } > > > > public static class TaggedWritable extends TaggedMapOutput { > > > > private Writable data; > > > > public TaggedWritable(Writable data) { > > this.tag = new Text(""); > > this.data = data; > > } > > > > public Writable getData() { > > return data; > > } > > > > public void write(DataOutput out) throws IOException { > > this.tag.write(out); > > this.data.write(out); > > } > > > > public void readFields(DataInput in) throws IOException { > > this.tag.readFields(in); > > this.data.readFields(in); > > } > > } > > > > public int run(String[] args) throws Exception { > > Configuration conf = getConf(); > > > > JobConf job = new JobConf(conf, DataJoin.class); > > > > Path in = new Path(args[0]); > > Path out = new Path(args[1]); > > FileInputFormat.setInputPaths(job, in); > > FileOutputFormat.setOutputPath(job, out); > > > > job.setJobName("DataJoin"); > > job.setMapperClass(MapClass.class); > > job.setReducerClass(Reduce.class); > > > > job.setInputFormat(TextInputFormat.class); > > job.setOutputFormat(TextOutputFormat.class); > > job.setOutputKeyClass(Text.class); > > job.setOutputValueClass(TaggedWritable.class); > > job.set("mapred.textoutputformat.separator", ","); > > > > JobClient.runJob(job); > > return 0; > > } > > > > public static void main(String[] args) throws Exception { > > int res = ToolRunner.run(new Configuration(), > > new DataJoin(), > > args); > > > > System.exit(res); > > } > > } > > > > > > -- > Regards > Junyong >