It's to soon for the wiki, but the one on the wiki doesn't work with trunk.
I would updated trunk in
src/java/org/apache/hadoop/hbase/mapred/package-info.java
If you replace
mw.put( "count:", Bytes.toBytes(splits[1]));
r.setStatus("Map emitting " + row + " for record " + k.toString());
output.collect(new ImmutableBytesWritable(row.getBytes()), mw);
from my last post with
mw.put( new String(splits[1]+":"),Bytes.toBytes(splits[2]));
r.setStatus("Map emitting " + row + " for record " + k.toString());
output.collect(new ImmutableBytesWritable(row.getBytes()), mw);
I am sure it will be generic in the way he hoped. I guess you could be
even smarter with column families but eh.
>>
>>> /// This is very close to the example in the javadoc
>>> already(Bytes,BatchUpdate) instead of (text/mapwritable), and i find
>>> it to be the easiest way to get people started/motivated with HBase.
>>>
>>>
>>> package org.apache.hadoop.hbase.mapred;
>>>
>>> import org.apache.hadoop.hbase.util.Bytes;
>>>
>>> import java.io.IOException;
>>> import java.util.Iterator;
>>>
>>> import org.apache.hadoop.conf.Configuration;
>>> import org.apache.hadoop.fs.Path;
>>> import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
>>> import org.apache.hadoop.hbase.io.BatchUpdate;
>>>
>>> import org.apache.hadoop.io.LongWritable;
>>> import org.apache.hadoop.io.MapWritable;
>>> import org.apache.hadoop.io.Text;
>>> import org.apache.hadoop.mapred.JobClient;
>>> import org.apache.hadoop.mapred.JobConf;
>>> import org.apache.hadoop.mapred.MapReduceBase;
>>> import org.apache.hadoop.mapred.Mapper;
>>> import org.apache.hadoop.mapred.OutputCollector;
>>> import org.apache.hadoop.mapred.Reporter;
>>> import org.apache.hadoop.util.Tool;
>>> import org.apache.hadoop.util.ToolRunner;
>>> public class SampleUploader extends MapReduceBase
>>> implements Mapper<LongWritable, Text, ImmutableBytesWritable,
>>> BatchUpdate>, Tool
>>> {
>>> private static final String NAME = "SampleUploader";
>>> private Configuration conf;
>>>
>>> public JobConf createSubmittableJob(String[] args) {
>>> JobConf c = new JobConf(getConf(), SampleUploader.class);
>>> c.setJobName(NAME);
>>> c.setInputPath(new Path(args[0]));
>>> c.setMapperClass(this.getClass());
>>> c.setMapOutputKeyClass(ImmutableBytesWritable.class);
>>> c.setMapOutputValueClass(BatchUpdate.class);
>>> c.setReducerClass(TableUploader.class);
>>> TableReduce.initJob(args[1], TableUploader.class, c);
>>> return c;
>>> }
>>>
>>> public void map(LongWritable k, Text v,
>>> OutputCollector<ImmutableBytesWritable, BatchUpdate> output, Reporter
>>> r)
>>> throws IOException {
>>> // Lines are space-delimited; first item is row, next the columnname
>>> and
>>> // then the third the cell value.
>>> String tmp = v.toString();
>>> if (tmp.length() == 0) {
>>> return;
>>> }
>>> String [] splits = v.toString().split(" ");
>>> String row = splits[0];
>>> BatchUpdate mw = new BatchUpdate(row);
>>>
>>> mw.put( "count:", Bytes.toBytes(splits[1]));
>>> r.setStatus("Map emitting " + row + " for record " + k.toString());
>>> output.collect(new ImmutableBytesWritable(row.getBytes()), mw);
>>> }
>>>
>>> public static class TableUploader extends
>>> TableReduce<ImmutableBytesWritable,
>>> BatchUpdate> {
>>>
>>>
>>> @Override
>>> public void reduce(ImmutableBytesWritable key, Iterator<BatchUpdate>
>>> values,
>>> OutputCollector<ImmutableBytesWritable, BatchUpdate> output,
>>> @SuppressWarnings("unused") Reporter reporter)
>>> throws IOException {
>>>
>>> while(values.hasNext()) {
>>> output.collect(key, values.next());
>>> }
>>> }
>>> }
>>>
>>>
>>> static int printUsage() {
>>> System.out.println(NAME + " <input> <table_name>");
>>> return -1;
>>> }
>>>
>>> public int run(@SuppressWarnings("unused") String[] args) throws
>>> Exception {
>>> // Make sure there are exactly 2 parameters left.
>>> if (args.length != 2) {
>>> System.out.println("ERROR: Wrong number of parameters: " +
>>> args.length + " instead of 2.");
>>> return printUsage();
>>> }
>>> JobClient.runJob(createSubmittableJob(args));
>>> return 0;
>>> }
>>> System.out.println("ERROR: Wrong number of parameters: " +
>>> args.length + " instead of 2.");
>>> return printUsage();
>>> }
>>> JobClient.runJob(createSubmittableJob(args));
>>> return 0;
>>> }
>>>
>>> public Configuration getConf() {
>>> return this.conf;
>>> }
>>>
>>> public void setConf(final Configuration c) {
>>> this.conf = c;
>>> }
>>>
>>> public static void main(String[] args) throws Exception {
>>> int errCode = ToolRunner.run(new Configuration(), new SampleUploader(),
>>> args);
>>> System.exit(errCode);
>>> }
>>> }
>>>
>>>
>>>
>>
>
>
>