[14/50] [abbrv] hbase git commit: HBASE-20376 RowCounter and CellCounter documentations are incorrect

zhangduo Mon, 16 Apr 2018 23:48:45 -0700

HBASE-20376 RowCounter and CellCounter documentations are incorrect


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/c4ebf666
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/c4ebf666
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/c4ebf666

Branch: refs/heads/HBASE-19064
Commit: c4ebf666b78f92a6d02652eece8dd95360bd0482
Parents: 5a69465
Author: Peter Somogyi <psomo...@apache.org>
Authored: Tue Apr 10 15:16:03 2018 +0200
Committer: Peter Somogyi <psomo...@apache.org>
Committed: Thu Apr 12 10:00:38 2018 +0200

----------------------------------------------------------------------
 bin/hbase                                       |  6 +++
 .../hadoop/hbase/mapreduce/CellCounter.java     | 47 +++++++++++---------
 .../hadoop/hbase/mapreduce/RowCounter.java      |  6 +--
 .../hadoop/hbase/mapreduce/TestRowCounter.java  | 22 +++++----
 src/main/asciidoc/_chapters/ops_mgt.adoc        | 31 ++++++++-----
 5 files changed, 64 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/c4ebf666/bin/hbase
----------------------------------------------------------------------
diff --git a/bin/hbase b/bin/hbase
index 8e37f5f..f1e2306 100755
--- a/bin/hbase
+++ b/bin/hbase
@@ -106,6 +106,8 @@ if [ $# = 0 ]; then
   echo "  backup          Backup tables for recovery"
   echo "  restore         Restore tables from existing backup image"
   echo "  regionsplitter  Run RegionSplitter tool"
+  echo "  rowcounter      Run RowCounter tool"
+  echo "  cellcounter     Run CellCounter tool"
   echo "  CLASSNAME       Run the class named CLASSNAME"
   exit 1
 fi
@@ -465,6 +467,10 @@ elif [ "$COMMAND" = "version" ] ; then
   CLASS='org.apache.hadoop.hbase.util.VersionInfo'
 elif [ "$COMMAND" = "regionsplitter" ] ; then
   CLASS='org.apache.hadoop.hbase.util.RegionSplitter'
+elif [ "$COMMAND" = "rowcounter" ] ; then
+  CLASS='org.apache.hadoop.hbase.mapreduce.RowCounter'
+elif [ "$COMMAND" = "cellcounter" ] ; then
+  CLASS='org.apache.hadoop.hbase.mapreduce.CellCounter'
 else
   CLASS=$COMMAND
 fi

http://git-wip-us.apache.org/repos/asf/hbase/blob/c4ebf666/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
----------------------------------------------------------------------
diff --git 
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
 
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
index aa79aac..ff0f01c 100644
--- 
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
+++ 
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
@@ -292,33 +292,38 @@ public class CellCounter extends Configured implements 
Tool {
   @Override
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("ERROR: Wrong number of parameters: " + args.length);
-      System.err.println("Usage: CellCounter ");
-      System.err.println("       <tablename> <outputDir> <reportSeparator> 
[^[regex pattern] or " +
-        "[Prefix] for row filter]] --starttime=[starttime] 
--endtime=[endtime]");
-      System.err.println("  Note: -D properties will be applied to the conf 
used. ");
-      System.err.println("  Additionally, all of the SCAN properties from 
TableInputFormat");
-      System.err.println("  can be specified to get fine grained control on 
what is counted..");
-      System.err.println("   -D " + TableInputFormat.SCAN_ROW_START + 
"=<rowkey>");
-      System.err.println("   -D " + TableInputFormat.SCAN_ROW_STOP + 
"=<rowkey>");
-      System.err.println("   -D " + TableInputFormat.SCAN_COLUMNS + "=\"<col1> 
<col2>...\"");
-      System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + 
"=<family1>,<family2>, ...");
-      System.err.println("   -D " + TableInputFormat.SCAN_TIMESTAMP + 
"=<timestamp>");
-      System.err.println("   -D " + TableInputFormat.SCAN_TIMERANGE_START + 
"=<timestamp>");
-      System.err.println("   -D " + TableInputFormat.SCAN_TIMERANGE_END + 
"=<timestamp>");
-      System.err.println("   -D " + TableInputFormat.SCAN_MAXVERSIONS + 
"=<count>");
-      System.err.println("   -D " + TableInputFormat.SCAN_CACHEDROWS + 
"=<count>");
-      System.err.println("   -D " + TableInputFormat.SCAN_BATCHSIZE + 
"=<count>");
-      System.err.println(" <reportSeparator> parameter can be used to override 
the default report separator " +
-          "string : used to separate the rowId/column family name and 
qualifier name.");
-      System.err.println(" [^[regex pattern] or [Prefix] parameter can be used 
to limit the cell counter count " +
-          "operation to a limited subset of rows from the table based on regex 
or prefix pattern.");
+      printUsage(args.length);
       return -1;
     }
     Job job = createSubmittableJob(getConf(), args);
     return (job.waitForCompletion(true) ? 0 : 1);
   }
 
+  private void printUsage(int parameterCount) {
+    System.err.println("ERROR: Wrong number of parameters: " + parameterCount);
+    System.err.println("Usage: hbase cellcounter <tablename> <outputDir> 
[reportSeparator] "
+        + "[^[regex pattern] or [Prefix]] [--starttime=<starttime> 
--endtime=<endtime>]");
+    System.err.println("  Note: -D properties will be applied to the conf 
used.");
+    System.err.println("  Additionally, all of the SCAN properties from 
TableInputFormat can be "
+        + "specified to get fine grained control on what is counted.");
+    System.err.println("   -D" + TableInputFormat.SCAN_ROW_START + 
"=<rowkey>");
+    System.err.println("   -D" + TableInputFormat.SCAN_ROW_STOP + "=<rowkey>");
+    System.err.println("   -D" + TableInputFormat.SCAN_COLUMNS + "=\"<col1> 
<col2>...\"");
+    System.err.println("   -D" + TableInputFormat.SCAN_COLUMN_FAMILY
+        + "=<family1>,<family2>, ...");
+    System.err.println("   -D" + TableInputFormat.SCAN_TIMESTAMP + 
"=<timestamp>");
+    System.err.println("   -D" + TableInputFormat.SCAN_TIMERANGE_START + 
"=<timestamp>");
+    System.err.println("   -D" + TableInputFormat.SCAN_TIMERANGE_END + 
"=<timestamp>");
+    System.err.println("   -D" + TableInputFormat.SCAN_MAXVERSIONS + 
"=<count>");
+    System.err.println("   -D" + TableInputFormat.SCAN_CACHEDROWS + 
"=<count>");
+    System.err.println("   -D" + TableInputFormat.SCAN_BATCHSIZE + "=<count>");
+    System.err.println(" <reportSeparator> parameter can be used to override 
the default report "
+        + "separator string : used to separate the rowId/column family name 
and qualifier name.");
+    System.err.println(" [^[regex pattern] or [Prefix] parameter can be used 
to limit the cell "
+        + "counter count operation to a limited subset of rows from the table 
based on regex or "
+        + "prefix pattern.");
+  }
+
   /**
    * Main entry point.
    * @param args The command line parameters.

http://git-wip-us.apache.org/repos/asf/hbase/blob/c4ebf666/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
----------------------------------------------------------------------
diff --git 
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
 
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
index 9c7b489..7fa5dec 100644
--- 
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
+++ 
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
@@ -221,9 +221,9 @@ public class RowCounter extends Configured implements Tool {
    * Note that we don't document --expected-count, because it's intended for 
test.
    */
   private static void printUsage() {
-    System.err.println("Usage: RowCounter [options] <tablename> " +
-        "[--starttime=[start] --endtime=[end] " +
-        "[--range=[startKey],[endKey][;[startKey],[endKey]...]] [<column1> 
<column2>...]");
+    System.err.println("Usage: hbase rowcounter [options] <tablename> "
+        + "[--starttime=<start> --endtime=<end>] "
+        + "[--range=[startKey],[endKey][;[startKey],[endKey]...]] [<column1> 
<column2>...]");
     System.err.println("For performance consider the following options:\n"
         + "-Dhbase.client.scanner.caching=100\n"
         + "-Dmapreduce.map.speculative=false");

http://git-wip-us.apache.org/repos/asf/hbase/blob/c4ebf666/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
----------------------------------------------------------------------
diff --git 
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
 
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
index b07de7f..18c1874 100644
--- 
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
+++ 
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
@@ -363,13 +363,7 @@ public class TestRowCounter {
       } catch (SecurityException e) {
         assertEquals(-1, newSecurityManager.getExitCode());
         assertTrue(data.toString().contains("Wrong number of parameters:"));
-        assertTrue(data.toString().contains(
-            "Usage: RowCounter [options] <tablename> " +
-            "[--starttime=[start] --endtime=[end] " +
-            "[--range=[startKey],[endKey][;[startKey],[endKey]...]] " +
-            "[<column1> <column2>...]"));
-        
assertTrue(data.toString().contains("-Dhbase.client.scanner.caching=100"));
-        
assertTrue(data.toString().contains("-Dmapreduce.map.speculative=false"));
+        assertUsageContent(data.toString());
       }
       data.reset();
       try {
@@ -383,18 +377,22 @@ public class TestRowCounter {
         assertTrue(data.toString().contains(
             "Please specify range in such format as \"--range=a,b\" or, with 
only one boundary," +
             " \"--range=,b\" or \"--range=a,\""));
-        assertTrue(data.toString().contains(
-            "Usage: RowCounter [options] <tablename> " +
-            "[--starttime=[start] --endtime=[end] " +
-            "[--range=[startKey],[endKey][;[startKey],[endKey]...]] " +
-            "[<column1> <column2>...]"));
+        assertUsageContent(data.toString());
       }
 
     } finally {
       System.setErr(oldPrintStream);
       System.setSecurityManager(SECURITY_MANAGER);
     }
+  }
 
+  private void assertUsageContent(String usage) {
+    assertTrue(usage.contains("Usage: hbase rowcounter [options] <tablename> "
+        + "[--starttime=<start> --endtime=<end>] "
+        + "[--range=[startKey],[endKey][;[startKey],[endKey]...]] [<column1> 
<column2>...]"));
+    assertTrue(usage.contains("For performance consider the following 
options:"));
+    assertTrue(usage.contains("-Dhbase.client.scanner.caching=100"));
+    assertTrue(usage.contains("-Dmapreduce.map.speculative=false"));
   }
 
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/c4ebf666/src/main/asciidoc/_chapters/ops_mgt.adoc
----------------------------------------------------------------------
diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc 
b/src/main/asciidoc/_chapters/ops_mgt.adoc
index 82badb4..38a7dff 100644
--- a/src/main/asciidoc/_chapters/ops_mgt.adoc
+++ b/src/main/asciidoc/_chapters/ops_mgt.adoc
@@ -68,8 +68,12 @@ Some commands take arguments. Pass no args or -h for usage.
   pe              Run PerformanceEvaluation
   ltt             Run LoadTestTool
   canary          Run the Canary tool
-  regionsplitter  Run the RegionSplitter tool
   version         Print the version
+  backup          Backup tables for recovery
+  restore         Restore tables from existing backup image
+  regionsplitter  Run RegionSplitter tool
+  rowcounter      Run RowCounter tool
+  cellcounter     Run CellCounter tool
   CLASSNAME       Run the class named CLASSNAME
 ----
 
@@ -744,24 +748,28 @@ For performance also consider the following options:
 ----
 
 [[rowcounter]]
-=== RowCounter and CellCounter
+=== RowCounter
 
-link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/RowCounter.html[RowCounter]
        is a mapreduce job to count all the rows of a table.
+link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/RowCounter.html[RowCounter]
 is a mapreduce job to count all the rows of a table.
 This is a good utility to use as a sanity check to ensure that HBase can read 
all the blocks of a table if there are any concerns of metadata inconsistency.
-It will run the mapreduce all in a single process but it will run faster if 
you have a MapReduce cluster in place for it to exploit. It is also possible to 
limit
-the time range of data to be scanned by using the `--starttime=[starttime]` 
and `--endtime=[endtime]` flags.
+It will run the mapreduce all in a single process but it will run faster if 
you have a MapReduce cluster in place for it to exploit.
+It is possible to limit the time range of data to be scanned by using the 
`--starttime=[starttime]` and `--endtime=[endtime]` flags.
+The scanned data can be limited based on keys using the 
`--range=[startKey],[endKey][;[startKey],[endKey]...]` option.
 
 ----
-$ bin/hbase org.apache.hadoop.hbase.mapreduce.RowCounter <tablename> 
[<column1> <column2>...]
+$ bin/hbase rowcounter [options] <tablename> [--starttime=<start> 
--endtime=<end>] [--range=[startKey],[endKey][;[startKey],[endKey]...]] 
[<column1> <column2>...]
 ----
 
 RowCounter only counts one version per cell.
 
-Note: caching for the input Scan is configured via 
`hbase.client.scanner.caching` in the job configuration.
+For performance consider to use `-Dhbase.client.scanner.caching=100` and 
`-Dmapreduce.map.speculative=false` options.
+
+[[cellcounter]]
+=== CellCounter
 
 HBase ships another diagnostic mapreduce job called 
link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/CellCounter.html[CellCounter].
 Like RowCounter, it gathers more fine-grained statistics about your table.
-The statistics gathered by RowCounter are more fine-grained and include:
+The statistics gathered by CellCounter are more fine-grained and include:
 
 * Total number of rows in the table.
 * Total number of CFs across all rows.
@@ -772,12 +780,12 @@ The statistics gathered by RowCounter are more 
fine-grained and include:
 
 The program allows you to limit the scope of the run.
 Provide a row regex or prefix to limit the rows to analyze.
-Specify a time range to scan the table by using the `--starttime=[starttime]` 
and `--endtime=[endtime]` flags.
+Specify a time range to scan the table by using the `--starttime=<starttime>` 
and `--endtime=<endtime>` flags.
 
 Use `hbase.mapreduce.scan.column.family` to specify scanning a single column 
family.
 
 ----
-$ bin/hbase org.apache.hadoop.hbase.mapreduce.CellCounter <tablename> 
<outputDir> [regex or prefix]
+$ bin/hbase cellcounter <tablename> <outputDir> [reportSeparator] [regex or 
prefix] [--starttime=<starttime> --endtime=<endtime>]
 ----
 
 Note: just like RowCounter, caching for the input Scan is configured via 
`hbase.client.scanner.caching` in the job configuration.
@@ -785,8 +793,7 @@ Note: just like RowCounter, caching for the input Scan is 
configured via `hbase.
 === mlockall
 
 It is possible to optionally pin your servers in physical memory making them 
less likely to be swapped out in oversubscribed environments by having the 
servers call link:http://linux.die.net/man/2/mlockall[mlockall] on startup.
-See link:https://issues.apache.org/jira/browse/HBASE-4391[HBASE-4391 Add 
ability to
-          start RS as root and call mlockall] for how to build the optional 
library and have it run on startup.
+See link:https://issues.apache.org/jira/browse/HBASE-4391[HBASE-4391 Add 
ability to start RS as root and call mlockall] for how to build the optional 
library and have it run on startup.
 
 [[compaction.tool]]
 === Offline Compaction Tool

[14/50] [abbrv] hbase git commit: HBASE-20376 RowCounter and CellCounter documentations are incorrect

Reply via email to