Example 12-1. A MapReduce application to count the number of rows in an
HBase table

The code below hangs. Could it be because I am using the 0.89 version of
HBase when running, but 0.20 for the API (that's what this is example is
written for?

Thank you,
Mark

public class RowCounter extends Configured implements Tool {
  // Name of this 'program'
  static final String NAME = "rowcounter";

  static class RowCounterMapper
  implements TableMap<ImmutableBytesWritable, RowResult> {
    private static enum Counters {ROWS}

    public void map(ImmutableBytesWritable row, RowResult value,
        OutputCollector<ImmutableBytesWritable, RowResult> output,
        Reporter reporter)
    throws IOException {
      boolean content = false;
      for (Map.Entry<byte [], Cell> e: value.entrySet()) {
        Cell cell = e.getValue();
        if (cell != null && cell.getValue().length > 0) {
          content = true;
          break;
        }
      }
      if (!content) {
        // Don't count rows that are all empty values.
        return;
      }
      // Give out same value every time.  We're only interested in the row/key
      reporter.incrCounter(Counters.ROWS, 1);
    }

    public void configure(JobConf jc) {
      // Nothing to do.
    }

    public void close() throws IOException {
      // Nothing to do.
    }
  }

  public JobConf createSubmittableJob(String[] args) throws IOException {
    JobConf c = new JobConf(getConf(), getClass());
    c.setJobName(NAME);
    // Columns are space delimited
    StringBuilder sb = new StringBuilder();
    final int columnoffset = 2;
    for (int i = columnoffset; i < args.length; i++) {
      if (i > columnoffset) {
        sb.append(" ");
      }
      sb.append(args[i]);
    }
    // Second argument is the table name.
    TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
      RowCounterMapper.class, ImmutableBytesWritable.class, RowResult.class, c);
    c.setNumReduceTasks(0);
    // First arg is the output directory.
    FileOutputFormat.setOutputPath(c, new Path(args[0]));
    return c;
  }

  static int printUsage() {
    System.out.println(NAME +
      " <outputdir> <tablename> <column1> [<column2>...]");
    return -1;
  }

  public int run(final String[] args) throws Exception {
    // Make sure there are at least 3 parameters
    if (args.length < 3) {
      System.err.println("ERROR: Wrong number of parameters: " + args.length);
      return printUsage();
    }
    JobClient.runJob(createSubmittableJob(args));
    return 0;
  }

  public static void main(String[] args) throws Exception {
    HBaseConfiguration c = new HBaseConfiguration();
    int errCode = ToolRunner.run(c, new RowCounter(), args);
    System.exit(errCode);
  }
}

Reply via email to