Example 12-1. A MapReduce application to count the number of rows in an
HBase table
The code below hangs. Could it be because I am using the 0.89 version of
HBase when running, but 0.20 for the API (that's what this is example is
written for?
Thank you,
Mark
public class RowCounter extends Configured implements Tool {
// Name of this 'program'
static final String NAME = "rowcounter";
static class RowCounterMapper
implements TableMap<ImmutableBytesWritable, RowResult> {
private static enum Counters {ROWS}
public void map(ImmutableBytesWritable row, RowResult value,
OutputCollector<ImmutableBytesWritable, RowResult> output,
Reporter reporter)
throws IOException {
boolean content = false;
for (Map.Entry<byte [], Cell> e: value.entrySet()) {
Cell cell = e.getValue();
if (cell != null && cell.getValue().length > 0) {
content = true;
break;
}
}
if (!content) {
// Don't count rows that are all empty values.
return;
}
// Give out same value every time. We're only interested in the row/key
reporter.incrCounter(Counters.ROWS, 1);
}
public void configure(JobConf jc) {
// Nothing to do.
}
public void close() throws IOException {
// Nothing to do.
}
}
public JobConf createSubmittableJob(String[] args) throws IOException {
JobConf c = new JobConf(getConf(), getClass());
c.setJobName(NAME);
// Columns are space delimited
StringBuilder sb = new StringBuilder();
final int columnoffset = 2;
for (int i = columnoffset; i < args.length; i++) {
if (i > columnoffset) {
sb.append(" ");
}
sb.append(args[i]);
}
// Second argument is the table name.
TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
RowCounterMapper.class, ImmutableBytesWritable.class, RowResult.class, c);
c.setNumReduceTasks(0);
// First arg is the output directory.
FileOutputFormat.setOutputPath(c, new Path(args[0]));
return c;
}
static int printUsage() {
System.out.println(NAME +
" <outputdir> <tablename> <column1> [<column2>...]");
return -1;
}
public int run(final String[] args) throws Exception {
// Make sure there are at least 3 parameters
if (args.length < 3) {
System.err.println("ERROR: Wrong number of parameters: " + args.length);
return printUsage();
}
JobClient.runJob(createSubmittableJob(args));
return 0;
}
public static void main(String[] args) throws Exception {
HBaseConfiguration c = new HBaseConfiguration();
int errCode = ToolRunner.run(c, new RowCounter(), args);
System.exit(errCode);
}
}