Repository: hbase
Updated Branches:
  refs/heads/master 3b91ae5b2 -> 8cc56bd18


HBASE-21320 [canary] Cleanup of usage and add commentary

Signed-off-by: Peter Somogyi <psomo...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/8cc56bd1
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/8cc56bd1
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/8cc56bd1

Branch: refs/heads/master
Commit: 8cc56bd18c40ba9a7131336e97c74f8d97d8b2be
Parents: 3b91ae5
Author: Michael Stack <st...@apache.org>
Authored: Mon Oct 15 22:09:17 2018 -0700
Committer: Michael Stack <st...@apache.org>
Committed: Tue Oct 16 22:20:00 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/HConstants.java     |   1 +
 .../org/apache/hadoop/hbase/tool/Canary.java    | 557 ++++++++++---------
 .../hadoop/hbase/tool/TestCanaryTool.java       |  10 +-
 src/main/asciidoc/_chapters/ops_mgt.adoc        | 146 ++---
 4 files changed, 383 insertions(+), 331 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/8cc56bd1/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java 
b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index beb65fa..fbfab4b 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -1325,6 +1325,7 @@ public final class HConstants {
     "hbase.regionserver.region.split.threads.max";
 
   /** Canary config keys */
+  // TODO: Move these defines to Canary Class
   public static final String HBASE_CANARY_WRITE_DATA_TTL_KEY = 
"hbase.canary.write.data.ttl";
 
   public static final String 
HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY =

http://git-wip-us.apache.org/repos/asf/hbase/blob/8cc56bd1/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
index 7a549fc..40f4aa6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
@@ -90,6 +90,7 @@ import org.apache.hadoop.hbase.zookeeper.ZKConfig;
 import org.apache.hadoop.util.GenericOptionsParser;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
+import 
org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.ZooKeeper;
@@ -101,39 +102,45 @@ import org.slf4j.LoggerFactory;
 import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
 
 /**
- * HBase Canary Tool, that that can be used to do
- * "canary monitoring" of a running HBase cluster.
+ * HBase Canary Tool for "canary monitoring" of a running HBase cluster.
  *
- * Here are three modes
- * 1. region mode - Foreach region tries to get one row per column family
- * and outputs some information about failure or latency.
+ * There are three modes:
+ * <ol>
+ * <li>region mode (Default): For each region, try to get one row per column 
family outputting
+ * information on failure (ERROR) or else the latency.
+ * </li>
  *
- * 2. regionserver mode - Foreach regionserver tries to get one row from one 
table
- * selected randomly and outputs some information about failure or latency.
+ * <li>regionserver mode: For each regionserver try to get one row from one 
table selected
+ * randomly outputting information on failure (ERROR) or else the latency.
+ * </li>
  *
- * 3. zookeeper mode - for each zookeeper instance, selects a zNode and
- * outputs some information about failure or latency.
+ * <li>zookeeper mode: for each zookeeper instance, selects a znode outputting 
information on
+ * failure (ERROR) or else the latency.
+ * </li>
+ * </ol>
  */
 @InterfaceAudience.Private
 public final class Canary implements Tool {
-  // Sink interface used by the canary to outputs information
+  /**
+   * Sink interface used by the canary to output information
+   */
   public interface Sink {
-    public long getReadFailureCount();
-    public long incReadFailureCount();
-    public Map<String,String> getReadFailures();
-    public void updateReadFailures(String regionName, String serverName);
-    public long getWriteFailureCount();
-    public long incWriteFailureCount();
-    public Map<String,String> getWriteFailures();
-    public void updateWriteFailures(String regionName, String serverName);
+    long getReadFailureCount();
+    long incReadFailureCount();
+    Map<String,String> getReadFailures();
+    void updateReadFailures(String regionName, String serverName);
+    long getWriteFailureCount();
+    long incWriteFailureCount();
+    Map<String,String> getWriteFailures();
+    void updateWriteFailures(String regionName, String serverName);
   }
 
-  // Simple implementation of canary sink that allows to plot on
-  // file or standard output timings or failures.
+  /**
+   * Simple implementation of canary sink that allows plotting to a file or 
standard output.
+   */
   public static class StdOutSink implements Sink {
     private AtomicLong readFailureCount = new AtomicLong(0),
         writeFailureCount = new AtomicLong(0);
-
     private Map<String, String> readFailures = new ConcurrentHashMap<>();
     private Map<String, String> writeFailures = new ConcurrentHashMap<>();
 
@@ -178,67 +185,75 @@ public final class Canary implements Tool {
     }
   }
 
+  /**
+   * By RegionServer, for 'regionserver' mode.
+   */
   public static class RegionServerStdOutSink extends StdOutSink {
-
     public void publishReadFailure(String table, String server) {
       incReadFailureCount();
-      LOG.error(String.format("Read from table:%s on region server:%s", table, 
server));
+      LOG.error("Read from {} on {}", table, server);
     }
 
     public void publishReadTiming(String table, String server, long msTime) {
-      LOG.info(String.format("Read from table:%s on region server:%s in %dms",
-          table, server, msTime));
+      LOG.info("Read from {} on {} in {}ms", table, server, msTime);
     }
   }
 
+  /**
+   * Output for 'zookeeper' mode.
+   */
   public static class ZookeeperStdOutSink extends StdOutSink {
-
-    public void publishReadFailure(String zNode, String server) {
+    public void publishReadFailure(String znode, String server) {
       incReadFailureCount();
-      LOG.error(String.format("Read from zNode:%s on zookeeper instance:%s", 
zNode, server));
+      LOG.error("Read from {} on {}", znode, server);
     }
 
     public void publishReadTiming(String znode, String server, long msTime) {
-      LOG.info(String.format("Read from zNode:%s on zookeeper instance:%s in 
%dms",
-          znode, server, msTime));
+      LOG.info("Read from {} on {} in {}ms", znode, server, msTime);
     }
   }
 
+  /**
+   * By Region, for 'region'  mode.
+   */
   public static class RegionStdOutSink extends StdOutSink {
-
     private Map<String, LongAdder> perTableReadLatency = new HashMap<>();
     private LongAdder writeLatency = new LongAdder();
 
     public void publishReadFailure(ServerName serverName, RegionInfo region, 
Exception e) {
       incReadFailureCount();
-      LOG.error(String.format("read from region %s on regionserver %s failed", 
region.getRegionNameAsString(), serverName), e);
+      LOG.error("Read from {} on {} failed", region.getRegionNameAsString(), 
serverName, e);
     }
 
-    public void publishReadFailure(ServerName serverName, RegionInfo region, 
ColumnFamilyDescriptor column, Exception e) {
+    public void publishReadFailure(ServerName serverName, RegionInfo region,
+        ColumnFamilyDescriptor column, Exception e) {
       incReadFailureCount();
-      LOG.error(String.format("read from region %s on regionserver %s column 
family %s failed",
-        region.getRegionNameAsString(), serverName, column.getNameAsString()), 
e);
+      LOG.error("Read from {} on {} {} failed", 
region.getRegionNameAsString(), serverName,
+          column.getNameAsString(), e);
     }
 
-    public void publishReadTiming(ServerName serverName, RegionInfo region, 
ColumnFamilyDescriptor column, long msTime) {
-      LOG.info(String.format("read from region %s on regionserver %s column 
family %s in %dms",
-        region.getRegionNameAsString(), serverName, column.getNameAsString(), 
msTime));
+    public void publishReadTiming(ServerName serverName, RegionInfo region,
+        ColumnFamilyDescriptor column, long msTime) {
+      LOG.info("Read from {} on {} {} in {}ms", 
region.getRegionNameAsString(), serverName,
+          column.getNameAsString(), msTime);
     }
 
     public void publishWriteFailure(ServerName serverName, RegionInfo region, 
Exception e) {
       incWriteFailureCount();
-      LOG.error(String.format("write to region %s on regionserver %s failed", 
region.getRegionNameAsString(), serverName), e);
+      LOG.error("Write to {} on {} failed", region.getRegionNameAsString(), 
serverName, e);
     }
 
-    public void publishWriteFailure(ServerName serverName, RegionInfo region, 
ColumnFamilyDescriptor column, Exception e) {
+    public void publishWriteFailure(ServerName serverName, RegionInfo region,
+        ColumnFamilyDescriptor column, Exception e) {
       incWriteFailureCount();
-      LOG.error(String.format("write to region %s on regionserver %s column 
family %s failed",
-        region.getRegionNameAsString(), serverName, column.getNameAsString()), 
e);
+      LOG.error("Write to {} on {} {} failed", region.getRegionNameAsString(), 
serverName,
+          column.getNameAsString(), e);
     }
 
-    public void publishWriteTiming(ServerName serverName, RegionInfo region, 
ColumnFamilyDescriptor column, long msTime) {
-      LOG.info(String.format("write to region %s on regionserver %s column 
family %s in %dms",
-        region.getRegionNameAsString(), serverName, column.getNameAsString(), 
msTime));
+    public void publishWriteTiming(ServerName serverName, RegionInfo region,
+        ColumnFamilyDescriptor column, long msTime) {
+      LOG.info("Write to {} on {} {} in {}ms",
+        region.getRegionNameAsString(), serverName, column.getNameAsString(), 
msTime);
     }
 
     public Map<String, LongAdder> getReadLatencyMap() {
@@ -260,6 +275,9 @@ public final class Canary implements Tool {
     }
   }
 
+  /**
+   * Run a single zookeeper Task and then exit.
+   */
   static class ZookeeperTask implements Callable<Void> {
     private final Connection connection;
     private final String host;
@@ -298,8 +316,8 @@ public final class Canary implements Tool {
   }
 
   /**
-   * For each column family of the region tries to get one row and outputs the 
latency, or the
-   * failure.
+   * Run a single Region Task and then exit. For each column family of the 
Region, get one row and
+   * output latency or failure.
    */
   static class RegionTask implements Callable<Void> {
     public enum TaskType{
@@ -313,8 +331,8 @@ public final class Canary implements Tool {
     private ServerName serverName;
     private LongAdder readWriteLatency;
 
-    RegionTask(Connection connection, RegionInfo region, ServerName 
serverName, RegionStdOutSink sink,
-        TaskType taskType, boolean rawScanEnabled, LongAdder rwLatency) {
+    RegionTask(Connection connection, RegionInfo region, ServerName serverName,
+        RegionStdOutSink sink, TaskType taskType, boolean rawScanEnabled, 
LongAdder rwLatency) {
       this.connection = connection;
       this.region = region;
       this.serverName = serverName;
@@ -340,14 +358,11 @@ public final class Canary implements Tool {
       Table table = null;
       TableDescriptor tableDesc = null;
       try {
-        if (LOG.isDebugEnabled()) {
-          LOG.debug(String.format("reading table descriptor for table %s",
-            region.getTable()));
-        }
+        LOG.debug("Reading table descriptor for table {}", region.getTable());
         table = connection.getTable(region.getTable());
         tableDesc = table.getDescriptor();
       } catch (IOException e) {
-        LOG.debug("sniffRegion failed", e);
+        LOG.debug("sniffRegion {} of {} failed", region.getEncodedName(), e);
         sink.publishReadFailure(serverName, region, e);
         if (table != null) {
           try {
@@ -375,10 +390,7 @@ public final class Canary implements Tool {
           get.addFamily(column.getName());
         } else {
           scan = new Scan();
-          if (LOG.isDebugEnabled()) {
-            LOG.debug(String.format("rawScan : %s for table: %s", 
rawScanEnabled,
-              tableDesc.getTableName()));
-          }
+          LOG.debug("rawScan {} for {}", rawScanEnabled, 
tableDesc.getTableName());
           scan.setRaw(rawScanEnabled);
           scan.setCaching(1);
           scan.setCacheBlocks(false);
@@ -387,12 +399,9 @@ public final class Canary implements Tool {
           scan.setMaxResultSize(1L);
           scan.setOneRowLimit();
         }
-
-        if (LOG.isDebugEnabled()) {
-          LOG.debug(String.format("reading from table %s region %s column 
family %s and key %s",
-            tableDesc.getTableName(), region.getRegionNameAsString(), 
column.getNameAsString(),
-            Bytes.toStringBinary(startKey)));
-        }
+        LOG.debug("Reading from {} {} {} {}", tableDesc.getTableName(),
+            region.getRegionNameAsString(), column.getNameAsString(),
+            Bytes.toStringBinary(startKey));
         try {
           stopWatch.start();
           if (startKey.length > 0) {
@@ -425,7 +434,6 @@ public final class Canary implements Tool {
 
     /**
      * Check writes for the canary table
-     * @return
      */
     private Void write() {
       Table table = null;
@@ -445,11 +453,9 @@ public final class Canary implements Tool {
           Bytes.random(value);
           put.addColumn(column.getName(), HConstants.EMPTY_BYTE_ARRAY, value);
 
-          if (LOG.isDebugEnabled()) {
-            LOG.debug(String.format("writing to table %s region %s column 
family %s and key %s",
-              tableDesc.getTableName(), region.getRegionNameAsString(), 
column.getNameAsString(),
-              Bytes.toStringBinary(rowToCheck)));
-          }
+          LOG.debug("Writing to {} {} {} {}",
+            tableDesc.getTableName(), region.getRegionNameAsString(), 
column.getNameAsString(),
+            Bytes.toStringBinary(rowToCheck));
           try {
             long startTime = System.currentTimeMillis();
             table.put(put);
@@ -470,7 +476,8 @@ public final class Canary implements Tool {
   }
 
   /**
-   * Get one row from a region on the regionserver and outputs the latency, or 
the failure.
+   * Run a single RegionServer Task and then exit.
+   * Get one row from a region on the regionserver and output latency or the 
failure.
    */
   static class RegionServerTask implements Callable<Void> {
     private Connection connection;
@@ -503,11 +510,9 @@ public final class Canary implements Tool {
         table = connection.getTable(tableName);
         startKey = region.getStartKey();
         // Can't do a get on empty start row so do a Scan of first element if 
any instead.
-        if (LOG.isDebugEnabled()) {
-          LOG.debug(String.format("reading from region server %s table %s 
region %s and key %s",
-            serverName, region.getTable(), region.getRegionNameAsString(),
-            Bytes.toStringBinary(startKey)));
-        }
+        LOG.debug("Reading from {} {} {} {}",
+          serverName, region.getTable(), region.getRegionNameAsString(),
+          Bytes.toStringBinary(startKey));
         if (startKey.length > 0) {
           get = new Get(startKey);
           get.setCacheBlocks(false);
@@ -584,23 +589,43 @@ public final class Canary implements Tool {
   private boolean useRegExp;
   private long timeout = DEFAULT_TIMEOUT;
   private boolean failOnError = true;
+
+  /**
+   * True if we are to run in 'regionServer' mode.
+   */
   private boolean regionServerMode = false;
+
+  /**
+   * True if we are to run in zookeeper 'mode'.
+   */
   private boolean zookeeperMode = false;
+
   private long permittedFailures = 0;
   private boolean regionServerAllRegions = false;
   private boolean writeSniffing = false;
   private long configuredWriteTableTimeout = DEFAULT_TIMEOUT;
   private boolean treatFailureAsError = false;
   private TableName writeTableName = DEFAULT_WRITE_TABLE_NAME;
+
+  /**
+   * This is a Map of table to timeout. The timeout is for reading all regions 
in the table; i.e.
+   * we aggregate time to fetch each region and it needs to be less than this 
value else we
+   * log an ERROR.
+   */
   private HashMap<String, Long> configuredReadTableTimeouts = new HashMap<>();
 
   private ExecutorService executor; // threads to retrieve data from 
regionservers
 
   public Canary() {
-    this(new ScheduledThreadPoolExecutor(1), new RegionServerStdOutSink());
+    this(new ScheduledThreadPoolExecutor(1));
+  }
+
+  public Canary(ExecutorService executor) {
+    this(executor, null);
   }
 
-  public Canary(ExecutorService executor, Sink sink) {
+  @VisibleForTesting
+  Canary(ExecutorService executor, Sink sink) {
     this.executor = executor;
     this.sink = sink;
   }
@@ -628,7 +653,7 @@ public final class Canary implements Tool {
           printUsageAndExit();
         }
 
-        if (cmd.equals("-help")) {
+        if (cmd.equals("-help") || cmd.equals("-h")) {
           // user asked for help, print the help and quit.
           printUsageAndExit();
         } else if (cmd.equals("-daemon") && interval == 0) {
@@ -639,7 +664,7 @@ public final class Canary implements Tool {
           i++;
 
           if (i == args.length) {
-            System.err.println("-interval needs a numeric value argument.");
+            System.err.println("-interval takes a numeric seconds value 
argument.");
             printUsageAndExit();
           }
 
@@ -657,7 +682,7 @@ public final class Canary implements Tool {
           this.regionServerAllRegions = true;
         } else if(cmd.equals("-writeSniffing")) {
           this.writeSniffing = true;
-        } else if(cmd.equals("-treatFailureAsError")) {
+        } else if(cmd.equals("-treatFailureAsError") || 
cmd.equals("-failureAsError")) {
           this.treatFailureAsError = true;
         } else if (cmd.equals("-e")) {
           this.useRegExp = true;
@@ -665,35 +690,35 @@ public final class Canary implements Tool {
           i++;
 
           if (i == args.length) {
-            System.err.println("-t needs a numeric value argument.");
+            System.err.println("-t takes a numeric milliseconds value 
argument.");
             printUsageAndExit();
           }
 
           try {
             this.timeout = Long.parseLong(args[i]);
           } catch (NumberFormatException e) {
-            System.err.println("-t needs a numeric value argument.");
+            System.err.println("-t takes a numeric milliseconds value 
argument.");
             printUsageAndExit();
           }
         } else if(cmd.equals("-writeTableTimeout")) {
           i++;
 
           if (i == args.length) {
-            System.err.println("-writeTableTimeout needs a numeric value 
argument.");
+            System.err.println("-writeTableTimeout takes a numeric 
milliseconds value argument.");
             printUsageAndExit();
           }
 
           try {
             this.configuredWriteTableTimeout = Long.parseLong(args[i]);
           } catch (NumberFormatException e) {
-            System.err.println("-writeTableTimeout needs a numeric value 
argument.");
+            System.err.println("-writeTableTimeout takes a numeric 
milliseconds value argument.");
             printUsageAndExit();
           }
         } else if (cmd.equals("-writeTable")) {
           i++;
 
           if (i == args.length) {
-            System.err.println("-writeTable needs a string value argument.");
+            System.err.println("-writeTable takes a string tablename value 
argument.");
             printUsageAndExit();
           }
           this.writeTableName = TableName.valueOf(args[i]);
@@ -711,14 +736,16 @@ public final class Canary implements Tool {
           i++;
 
           if (i == args.length) {
-            System.err.println("-readTableTimeouts needs a comma-separated 
list of read timeouts per table (without spaces).");
+            System.err.println("-readTableTimeouts needs a comma-separated 
list of read " +
+                "millisecond timeouts per table (without spaces).");
             printUsageAndExit();
           }
           String [] tableTimeouts = args[i].split(",");
           for (String tT: tableTimeouts) {
             String [] nameTimeout = tT.split("=");
             if (nameTimeout.length < 2) {
-              System.err.println("Each -readTableTimeouts argument must be of 
the form <tableName>=<read timeout>.");
+              System.err.println("Each -readTableTimeouts argument must be of 
the form " +
+                  "<tableName>=<read timeout> (without spaces).");
               printUsageAndExit();
             }
             long timeoutVal = 0L;
@@ -856,41 +883,56 @@ public final class Canary implements Tool {
 
   private void printUsageAndExit() {
     System.err.println(
-      "Usage: hbase canary [opts] [table1 [table2]...] | [regionserver1 
[regionserver2]..]");
-    System.err.println(" where [opts] are:");
-    System.err.println("   -help          Show this help and exit.");
-    System.err.println("   -regionserver  replace the table argument to 
regionserver,");
-    System.err.println("      which means to enable regionserver mode");
-    System.err.println("   -allRegions    Tries all regions on a 
regionserver,");
-    System.err.println("      only works in regionserver mode.");
-    System.err.println("   -zookeeper    Tries to grab zookeeper.znode.parent 
");
-    System.err.println("      on each zookeeper instance");
-    System.err.println("   -permittedZookeeperFailures <N>    Ignore first N 
failures when attempting to ");
-    System.err.println("      connect to individual zookeeper nodes in the 
ensemble");
-    System.err.println("   -daemon        Continuous check at defined 
intervals.");
-    System.err.println("   -interval <N>  Interval between checks (sec)");
-    System.err.println("   -e             Use table/regionserver as regular 
expression");
-    System.err.println("      which means the table/regionserver is regular 
expression pattern");
-    System.err.println("   -f <B>         stop whole program if first error 
occurs," +
-        " default is true");
-    System.err.println("   -t <N>         timeout for a check, default is 
600000 (millisecs)");
-    System.err.println("   -writeTableTimeout <N>         write timeout for 
the writeTable, default is 600000 (millisecs)");
-    System.err.println("   -readTableTimeouts <tableName>=<read 
timeout>,<tableName>=<read timeout>, ...    "
-        + "comma-separated list of read timeouts per table (no spaces), 
default is 600000 (millisecs)");
-    System.err.println("   -writeSniffing enable the write sniffing in 
canary");
-    System.err.println("   -treatFailureAsError treats read / write failure as 
error");
-    System.err.println("   -writeTable    The table used for write sniffing."
-        + " Default is hbase:canary");
-    System.err.println("   -Dhbase.canary.read.raw.enabled=<true/false> Use 
this flag to enable or disable raw scan during read canary test"
-        + " Default is false and raw is not enabled during scan");
-    System.err
-        .println("   -D<configProperty>=<value> assigning or override the 
configuration params");
+      "Usage: canary [OPTIONS] [<TABLE1> [<TABLE2]...] | [<REGIONSERVER1> 
[<REGIONSERVER2]..]");
+    System.err.println("Where [OPTIONS] are:");
+    System.err.println(" -h,-help        show this help and exit.");
+    System.err.println(" -regionserver   set 'regionserver mode'; gets row 
from random region on " +
+        "server");
+    System.err.println(" -allRegions     get from ALL regions when 
'regionserver mode', not just " +
+        "random one.");
+    System.err.println(" -zookeeper      set 'zookeeper mode'; grab 
zookeeper.znode.parent on " +
+        "each ensemble member");
+    System.err.println(" -daemon         continuous check at defined 
intervals.");
+    System.err.println(" -interval <N>   interval between checks in seconds");
+    System.err.println(" -e              consider table/regionserver argument 
as regular " +
+        "expression");
+    System.err.println(" -f <B>          exit on first error; default=true");
+    System.err.println(" -failureAsError treat read/write failure as error");
+    System.err.println(" -t <N>          timeout for canary-test run; 
default=600000ms");
+    System.err.println(" -writeSniffing  enable write sniffing");
+    System.err.println(" -writeTable     the table used for write sniffing; 
default=hbase:canary");
+    System.err.println(" -writeTableTimeout <N>  timeout for writeTable; 
default=600000ms");
+    System.err.println(" -readTableTimeouts <tableName>=<read timeout>," +
+        "<tableName>=<read timeout>,...");
+    System.err.println("                comma-separated list of table read 
timeouts " +
+        "(no spaces);");
+    System.err.println("                logs 'ERROR' if takes longer. 
default=600000ms");
+    System.err.println(" -permittedZookeeperFailures <N>  Ignore first N 
failures attempting to ");
+    System.err.println("                connect to individual zookeeper nodes 
in ensemble");
+    System.err.println("");
+    System.err.println(" -D<configProperty>=<value> to assign or override 
configuration params");
+    System.err.println(" -Dhbase.canary.read.raw.enabled=<true/false> Set to 
enable/disable " +
+        "raw scan; default=false");
+    System.err.println("");
+    System.err.println("Canary runs in one of three modes: region (default), 
regionserver, or " +
+        "zookeeper.");
+    System.err.println("To sniff/probe all regions, pass no arguments.");
+    System.err.println("To sniff/probe all regions of a table, pass 
tablename.");
+    System.err.println("To sniff/probe regionservers, pass -regionserver, 
etc.");
+    System.err.println("See http://hbase.apache.org/book.html#_canary for 
Canary documentation.");
     System.exit(USAGE_EXIT_CODE);
   }
 
+  Sink getSink(Configuration configuration, Class clazz) {
+    // In test context, this.sink might be set. Use it if non-null. For 
testing.
+    return this.sink != null? this.sink:
+        
(Sink)ReflectionUtils.newInstance(configuration.getClass("hbase.canary.sink.class",
+            clazz, Sink.class));
+  }
+
   /**
    * A Factory method for {@link Monitor}.
-   * Can be overridden by user.
+   * Makes a RegionServerMonitor, or a ZooKeeperMonitor, or a RegionMonitor.
    * @param index a start index for monitor target
    * @param args args passed from user
    * @return a Monitor instance
@@ -899,37 +941,45 @@ public final class Canary implements Tool {
     Monitor monitor = null;
     String[] monitorTargets = null;
 
-    if(index >= 0) {
+    if (index >= 0) {
       int length = args.length - index;
       monitorTargets = new String[length];
       System.arraycopy(args, index, monitorTargets, 0, length);
     }
 
-    if (this.sink instanceof RegionServerStdOutSink || this.regionServerMode) {
+    if (this.regionServerMode) {
       monitor =
           new RegionServerMonitor(connection, monitorTargets, this.useRegExp,
-              (StdOutSink) this.sink, this.executor, 
this.regionServerAllRegions,
+              getSink(connection.getConfiguration(), 
RegionServerStdOutSink.class),
+              this.executor, this.regionServerAllRegions,
               this.treatFailureAsError, this.permittedFailures);
-    } else if (this.sink instanceof ZookeeperStdOutSink || this.zookeeperMode) 
{
+    } else if (this.zookeeperMode) {
       monitor =
           new ZookeeperMonitor(connection, monitorTargets, this.useRegExp,
-              (StdOutSink) this.sink, this.executor, this.treatFailureAsError,
+              getSink(connection.getConfiguration(), 
ZookeeperStdOutSink.class),
+              this.executor, this.treatFailureAsError,
               this.permittedFailures);
     } else {
       monitor =
           new RegionMonitor(connection, monitorTargets, this.useRegExp,
-              (StdOutSink) this.sink, this.executor, this.writeSniffing,
+              getSink(connection.getConfiguration(), RegionStdOutSink.class),
+              this.executor, this.writeSniffing,
               this.writeTableName, this.treatFailureAsError, 
this.configuredReadTableTimeouts,
               this.configuredWriteTableTimeout, this.permittedFailures);
     }
     return monitor;
   }
 
-  // a Monitor super-class can be extended by users
+  /**
+   * A Monitor super-class can be extended by users
+   */
   public static abstract class Monitor implements Runnable, Closeable {
-
     protected Connection connection;
     protected Admin admin;
+    /**
+     * 'Target' dependent on 'mode'. Could be Tables or RegionServers or 
ZNodes.
+     * Passed on the command-line as arguments.
+     */
     protected String[] targets;
     protected boolean useRegExp;
     protected boolean treatFailureAsError;
@@ -999,7 +1049,9 @@ public final class Canary implements Tool {
     }
   }
 
-  // a monitor for region mode
+  /**
+   * A monitor for region mode.
+   */
   private static class RegionMonitor extends Monitor {
     // 10 minutes
     private static final int DEFAULT_WRITE_TABLE_CHECK_PERIOD = 10 * 60 * 1000;
@@ -1014,14 +1066,22 @@ public final class Canary implements Tool {
     private float regionsUpperLimit;
     private int checkPeriod;
     private boolean rawScanEnabled;
+
+    /**
+     * This is a timeout per table. If read of each region in the table 
aggregated takes longer
+     * than what is configured here, we log an ERROR rather than just an INFO.
+     */
     private HashMap<String, Long> configuredReadTableTimeouts;
+
     private long configuredWriteTableTimeout;
 
     public RegionMonitor(Connection connection, String[] monitorTargets, 
boolean useRegExp,
-        StdOutSink sink, ExecutorService executor, boolean writeSniffing, 
TableName writeTableName,
-        boolean treatFailureAsError, HashMap<String, Long> 
configuredReadTableTimeouts, long configuredWriteTableTimeout,
+        Sink sink, ExecutorService executor, boolean writeSniffing, TableName 
writeTableName,
+        boolean treatFailureAsError, HashMap<String, Long> 
configuredReadTableTimeouts,
+        long configuredWriteTableTimeout,
         long allowedFailures) {
-      super(connection, monitorTargets, useRegExp, sink, executor, 
treatFailureAsError, allowedFailures);
+      super(connection, monitorTargets, useRegExp, sink, executor, 
treatFailureAsError,
+          allowedFailures);
       Configuration conf = connection.getConfiguration();
       this.writeSniffing = writeSniffing;
       this.writeTableName = writeTableName;
@@ -1054,9 +1114,12 @@ public final class Canary implements Tool {
           RegionStdOutSink regionSink = this.getSink();
           if (this.targets != null && this.targets.length > 0) {
             String[] tables = generateMonitorTables(this.targets);
-            // Check to see that each table name passed in the 
-readTableTimeouts argument is also passed as a monitor target.
-            if (! new 
HashSet<>(Arrays.asList(tables)).containsAll(this.configuredReadTableTimeouts.keySet()))
 {
-              LOG.error("-readTableTimeouts can only specify read timeouts for 
monitor targets passed via command line.");
+            // Check to see that each table name passed in the 
-readTableTimeouts argument is also
+            // passed as a monitor target.
+            if (!new HashSet<>(Arrays.asList(tables)).
+                containsAll(this.configuredReadTableTimeouts.keySet())) {
+              LOG.error("-readTableTimeouts can only specify read timeouts for 
monitor targets " +
+                  "passed via command line.");
               this.errorCode = USAGE_EXIT_CODE;
               return;
             }
@@ -1082,7 +1145,7 @@ public final class Canary implements Tool {
             // sniff canary table with write operation
             regionSink.initializeWriteLatency();
             LongAdder writeTableLatency = regionSink.getWriteLatency();
-            taskFutures.addAll(Canary.sniff(admin, regionSink, 
admin.getTableDescriptor(writeTableName),
+            taskFutures.addAll(Canary.sniff(admin, regionSink, 
admin.getDescriptor(writeTableName),
               executor, TaskType.WRITE, this.rawScanEnabled, 
writeTableLatency));
           }
 
@@ -1099,23 +1162,26 @@ public final class Canary implements Tool {
             if (actualReadTableLatency.containsKey(tableName)) {
               Long actual = actualReadTableLatency.get(tableName).longValue();
               Long configured = entry.getValue();
-              LOG.info("Read operation for " + tableName + " took " + actual +
-                " ms. The configured read timeout was " + configured + " ms.");
               if (actual > configured) {
-                LOG.error("Read operation for " + tableName + " exceeded the 
configured read timeout.");
+                LOG.error("Read operation for {} took {}ms (Configured read 
timeout {}ms.",
+                    tableName, actual, configured);
+              } else {
+                LOG.info("Read operation for {} took {}ms (Configured read 
timeout {}ms.",
+                    tableName, actual, configured);
               }
             } else {
-              LOG.error("Read operation for " + tableName + " failed!");
+              LOG.error("Read operation for {} failed!", tableName);
             }
           }
           if (this.writeSniffing) {
             String writeTableStringName = 
this.writeTableName.getNameAsString();
             long actualWriteLatency = regionSink.getWriteLatency().longValue();
-            LOG.info("Write operation for " + writeTableStringName + " took " 
+ actualWriteLatency + " ms. The configured write timeout was " +
-              this.configuredWriteTableTimeout + " ms.");
+            LOG.info("Write operation for {} took {}ms. Configured write 
timeout {}ms.",
+                writeTableStringName, actualWriteLatency, 
this.configuredWriteTableTimeout);
             // Check that the writeTable write operation latency does not 
exceed the configured timeout.
             if (actualWriteLatency > this.configuredWriteTableTimeout) {
-              LOG.error("Write operation for " + writeTableStringName + " 
exceeded the configured write timeout.");
+              LOG.error("Write operation for {} exceeded the configured write 
timeout.",
+                  writeTableStringName);
             }
           }
         } catch (Exception e) {
@@ -1123,31 +1189,32 @@ public final class Canary implements Tool {
           this.errorCode = ERROR_EXIT_CODE;
         } finally {
           this.done = true;
-       }
+        }
       }
       this.done = true;
     }
 
+    /**
+     * @return List of tables to use in test.
+     */
     private String[] generateMonitorTables(String[] monitorTargets) throws 
IOException {
       String[] returnTables = null;
 
       if (this.useRegExp) {
         Pattern pattern = null;
-        HTableDescriptor[] tds = null;
+        TableDescriptor[] tds = null;
         Set<String> tmpTables = new TreeSet<>();
         try {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug(String.format("reading list of tables"));
-          }
+          LOG.debug(String.format("reading list of tables"));
           tds = this.admin.listTables(pattern);
           if (tds == null) {
-            tds = new HTableDescriptor[0];
+            tds = new TableDescriptor[0];
           }
           for (String monitorTarget : monitorTargets) {
             pattern = Pattern.compile(monitorTarget);
-            for (HTableDescriptor td : tds) {
-              if (pattern.matcher(td.getNameAsString()).matches()) {
-                tmpTables.add(td.getNameAsString());
+            for (TableDescriptor td : tds) {
+              if 
(pattern.matcher(td.getTableName().getNameAsString()).matches()) {
+                tmpTables.add(td.getTableName().getNameAsString());
               }
             }
           }
@@ -1172,18 +1239,19 @@ public final class Canary implements Tool {
     }
 
     /*
-     * canary entry point to monitor all the tables.
+     * Canary entry point to monitor all the tables.
      */
-    private List<Future<Void>> sniff(TaskType taskType, RegionStdOutSink 
regionSink) throws Exception {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug(String.format("reading list of tables"));
-      }
+    private List<Future<Void>> sniff(TaskType taskType, RegionStdOutSink 
regionSink)
+        throws Exception {
+      LOG.debug("Reading list of tables");
       List<Future<Void>> taskFutures = new LinkedList<>();
-      for (HTableDescriptor table : admin.listTables()) {
-        if (admin.isTableEnabled(table.getTableName())
-            && (!table.getTableName().equals(writeTableName))) {
-          LongAdder readLatency = 
regionSink.initializeAndGetReadLatencyForTable(table.getNameAsString());
-          taskFutures.addAll(Canary.sniff(admin, sink, table, executor, 
taskType, this.rawScanEnabled, readLatency));
+      for (TableDescriptor td: admin.listTableDescriptors()) {
+        if (admin.isTableEnabled(td.getTableName()) &&
+            (!td.getTableName().equals(writeTableName))) {
+          LongAdder readLatency =
+              
regionSink.initializeAndGetReadLatencyForTable(td.getTableName().getNameAsString());
+          taskFutures.addAll(Canary.sniff(admin, sink, td, executor, taskType, 
this.rawScanEnabled,
+              readLatency));
         }
       }
       return taskFutures;
@@ -1231,11 +1299,10 @@ public final class Canary implements Tool {
 
     private void createWriteTable(int numberOfServers) throws IOException {
       int numberOfRegions = (int)(numberOfServers * regionsLowerLimit);
-      LOG.info("Number of live regionservers: " + numberOfServers + ", "
-          + "pre-splitting the canary table into " + numberOfRegions + " 
regions "
-          + "(current lower limit of regions per server is " + 
regionsLowerLimit
-          + " and you can change it by config: "
-          + HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY + " 
)");
+      LOG.info("Number of live regionservers {}, pre-splitting the canary 
table into {} regions " +
+        "(current lower limit of regions per server is {} and you can change 
it with config {}).",
+          numberOfServers, numberOfRegions, regionsLowerLimit,
+          HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY);
       HTableDescriptor desc = new HTableDescriptor(writeTableName);
       HColumnDescriptor family = new 
HColumnDescriptor(CANARY_TABLE_FAMILY_NAME);
       family.setMaxVersions(1);
@@ -1252,59 +1319,40 @@ public final class Canary implements Tool {
    * @throws Exception
    */
   private static List<Future<Void>> sniff(final Admin admin, final Sink sink, 
String tableName,
-      ExecutorService executor, TaskType taskType, boolean rawScanEnabled, 
LongAdder readLatency) throws Exception {
-    if (LOG.isDebugEnabled()) {
-      LOG.debug(String.format("checking table is enabled and getting table 
descriptor for table %s",
-        tableName));
-    }
+      ExecutorService executor, TaskType taskType, boolean rawScanEnabled, 
LongAdder readLatency)
+      throws Exception {
+    LOG.debug("Checking table is enabled and getting table descriptor for 
table {}", tableName);
     if (admin.isTableEnabled(TableName.valueOf(tableName))) {
-      return Canary.sniff(admin, sink, 
admin.getTableDescriptor(TableName.valueOf(tableName)),
+      return Canary.sniff(admin, sink, 
admin.getDescriptor(TableName.valueOf(tableName)),
         executor, taskType, rawScanEnabled, readLatency);
     } else {
-      LOG.warn(String.format("Table %s is not enabled", tableName));
+      LOG.warn("Table {} is not enabled", tableName);
     }
     return new LinkedList<>();
   }
 
   /*
-   * Loops over regions that owns this table, and output some information 
about the state.
+   * Loops over regions of this table, and outputs information about the state.
    */
   private static List<Future<Void>> sniff(final Admin admin, final Sink sink,
-      HTableDescriptor tableDesc, ExecutorService executor, TaskType taskType,
+      TableDescriptor tableDesc, ExecutorService executor, TaskType taskType,
       boolean rawScanEnabled, LongAdder rwLatency) throws Exception {
-
-    if (LOG.isDebugEnabled()) {
-      LOG.debug(String.format("reading list of regions for table %s", 
tableDesc.getTableName()));
-    }
-
-    Table table = null;
-    try {
-      table = admin.getConnection().getTable(tableDesc.getTableName());
-    } catch (TableNotFoundException e) {
-      return new ArrayList<>();
-    }
-    finally {
-      if (table !=null) {
-        table.close();
-      }
-    }
-
-    List<RegionTask> tasks = new ArrayList<>();
-    RegionLocator regionLocator = null;
-    try {
-      regionLocator = 
admin.getConnection().getRegionLocator(tableDesc.getTableName());
-      for (HRegionLocation location : regionLocator.getAllRegionLocations()) {
-        ServerName rs = location.getServerName();
-        RegionInfo region = location.getRegionInfo();
-        tasks.add(new RegionTask(admin.getConnection(), region, rs, 
(RegionStdOutSink) sink, taskType, rawScanEnabled,
-          rwLatency));
-      }
-    } finally {
-      if (regionLocator != null) {
-        regionLocator.close();
+    LOG.debug("Reading list of regions for table {}", 
tableDesc.getTableName());
+    try (Table table = 
admin.getConnection().getTable(tableDesc.getTableName())) {
+      List<RegionTask> tasks = new ArrayList<>();
+      try (RegionLocator regionLocator =
+               
admin.getConnection().getRegionLocator(tableDesc.getTableName())) {
+        for (HRegionLocation location: regionLocator.getAllRegionLocations()) {
+          ServerName rs = location.getServerName();
+          RegionInfo region = location.getRegion();
+          tasks.add(new RegionTask(admin.getConnection(), region, rs, 
(RegionStdOutSink)sink,
+              taskType, rawScanEnabled, rwLatency));
+        }
+        return executor.invokeAll(tasks);
       }
+    } catch (TableNotFoundException e) {
+      return Collections.EMPTY_LIST;
     }
-    return executor.invokeAll(tasks);
   }
 
   //  monitor for zookeeper mode
@@ -1314,8 +1362,9 @@ public final class Canary implements Tool {
     private final int timeout;
 
     protected ZookeeperMonitor(Connection connection, String[] monitorTargets, 
boolean useRegExp,
-        StdOutSink sink, ExecutorService executor, boolean 
treatFailureAsError, long allowedFailures)  {
-      super(connection, monitorTargets, useRegExp, sink, executor, 
treatFailureAsError, allowedFailures);
+        Sink sink, ExecutorService executor, boolean treatFailureAsError, long 
allowedFailures)  {
+      super(connection, monitorTargets, useRegExp,
+          sink, executor, treatFailureAsError, allowedFailures);
       Configuration configuration = connection.getConfiguration();
       znode =
           configuration.get(ZOOKEEPER_ZNODE_PARENT,
@@ -1374,15 +1423,17 @@ public final class Canary implements Tool {
   }
 
 
-  // a monitor for regionserver mode
+  /**
+   * A monitor for regionserver mode
+   */
   private static class RegionServerMonitor extends Monitor {
-
     private boolean allRegions;
 
     public RegionServerMonitor(Connection connection, String[] monitorTargets, 
boolean useRegExp,
-        StdOutSink sink, ExecutorService executor, boolean allRegions,
+        Sink sink, ExecutorService executor, boolean allRegions,
         boolean treatFailureAsError, long allowedFailures) {
-      super(connection, monitorTargets, useRegExp, sink, executor, 
treatFailureAsError, allowedFailures);
+      super(connection, monitorTargets, useRegExp, sink, executor, 
treatFailureAsError,
+          allowedFailures);
       this.allRegions = allRegions;
     }
 
@@ -1413,10 +1464,7 @@ public final class Canary implements Tool {
     private boolean checkNoTableNames() {
       List<String> foundTableNames = new ArrayList<>();
       TableName[] tableNames = null;
-
-      if (LOG.isDebugEnabled()) {
-        LOG.debug(String.format("reading list of tables"));
-      }
+      LOG.debug("Reading list of tables");
       try {
         tableNames = this.admin.listTableNames();
       } catch (IOException e) {
@@ -1452,7 +1500,7 @@ public final class Canary implements Tool {
         AtomicLong successes = new AtomicLong(0);
         successMap.put(serverName, successes);
         if (entry.getValue().isEmpty()) {
-          LOG.error(String.format("Regionserver not serving any regions - %s", 
serverName));
+          LOG.error("Regionserver not serving any regions - {}", serverName);
         } else if (this.allRegions) {
           for (RegionInfo region : entry.getValue()) {
             tasks.add(new RegionServerTask(this.connection,
@@ -1483,8 +1531,8 @@ public final class Canary implements Tool {
         if (this.allRegions) {
           for (Map.Entry<String, List<RegionInfo>> entry : 
rsAndRMap.entrySet()) {
             String serverName = entry.getKey();
-            LOG.info("Successfully read " + successMap.get(serverName) + " 
regions out of "
-                    + entry.getValue().size() + " on regionserver:" + 
serverName);
+            LOG.info("Successfully read {} regions out of {} on regionserver 
{}",
+                successMap.get(serverName), entry.getValue().size(), 
serverName);
           }
         }
       } catch (InterruptedException e) {
@@ -1501,36 +1549,30 @@ public final class Canary implements Tool {
 
     private Map<String, List<RegionInfo>> getAllRegionServerByName() {
       Map<String, List<RegionInfo>> rsAndRMap = new HashMap<>();
-      Table table = null;
-      RegionLocator regionLocator = null;
       try {
-        if (LOG.isDebugEnabled()) {
-          LOG.debug(String.format("reading list of tables and locations"));
-        }
-        HTableDescriptor[] tableDescs = this.admin.listTables();
+        LOG.debug("Reading list of tables and locations");
+        List<TableDescriptor> tableDescs = this.admin.listTableDescriptors();
         List<RegionInfo> regions = null;
-        for (HTableDescriptor tableDesc : tableDescs) {
-          table = 
this.admin.getConnection().getTable(tableDesc.getTableName());
-          regionLocator = 
this.admin.getConnection().getRegionLocator(tableDesc.getTableName());
-
-          for (HRegionLocation location : 
regionLocator.getAllRegionLocations()) {
-            ServerName rs = location.getServerName();
-            String rsName = rs.getHostname();
-            RegionInfo r = location.getRegionInfo();
-
-            if (rsAndRMap.containsKey(rsName)) {
-              regions = rsAndRMap.get(rsName);
-            } else {
-              regions = new ArrayList<>();
-              rsAndRMap.put(rsName, regions);
+        for (TableDescriptor tableDesc: tableDescs) {
+          try (RegionLocator regionLocator =
+                   
this.admin.getConnection().getRegionLocator(tableDesc.getTableName())) {
+            for (HRegionLocation location : 
regionLocator.getAllRegionLocations()) {
+              ServerName rs = location.getServerName();
+              String rsName = rs.getHostname();
+              RegionInfo r = location.getRegion();
+              if (rsAndRMap.containsKey(rsName)) {
+                regions = rsAndRMap.get(rsName);
+              } else {
+                regions = new ArrayList<>();
+                rsAndRMap.put(rsName, regions);
+              }
+              regions.add(r);
             }
-            regions.add(r);
           }
-          table.close();
         }
 
         // get any live regionservers not serving any regions
-        for (ServerName rs : 
this.admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
+        for (ServerName rs: 
this.admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
           .getLiveServerMetrics().keySet()) {
           String rsName = rs.getHostname();
           if (!rsAndRMap.containsKey(rsName)) {
@@ -1538,19 +1580,9 @@ public final class Canary implements Tool {
           }
         }
       } catch (IOException e) {
-        String msg = "Get HTables info failed";
-        LOG.error(msg, e);
+        LOG.error("Get HTables info failed", e);
         this.errorCode = INIT_ERROR_EXIT_CODE;
-      } finally {
-        if (table != null) {
-          try {
-            table.close();
-          } catch (IOException e) {
-            LOG.warn("Close table failed", e);
-          }
-        }
       }
-
       return rsAndRMap;
     }
 
@@ -1576,13 +1608,13 @@ public final class Canary implements Tool {
               }
             }
             if (!regExpFound) {
-              LOG.info("No RegionServerInfo found, regionServerPattern:" + 
rsName);
+              LOG.info("No RegionServerInfo found, regionServerPattern {}", 
rsName);
             }
           } else {
             if (fullRsAndRMap.containsKey(rsName)) {
               filteredRsAndRMap.put(rsName, fullRsAndRMap.get(rsName));
             } else {
-              LOG.info("No RegionServerInfo found, regionServerName:" + 
rsName);
+              LOG.info("No RegionServerInfo found, regionServerName {}", 
rsName);
             }
           }
         }
@@ -1596,20 +1628,19 @@ public final class Canary implements Tool {
   public static void main(String[] args) throws Exception {
     final Configuration conf = HBaseConfiguration.create();
 
-    // loading the generic options to conf
+    // Loading the generic options to conf
     new GenericOptionsParser(conf, args);
 
     int numThreads = conf.getInt("hbase.canary.threads.num", MAX_THREADS_NUM);
-    LOG.info("Number of execution threads " + numThreads);
+    LOG.info("Execution thread count={}", numThreads);
 
+    int exitCode = 0;
     ExecutorService executor = new ScheduledThreadPoolExecutor(numThreads);
-
-    Class<? extends Sink> sinkClass =
-        conf.getClass("hbase.canary.sink.class", RegionServerStdOutSink.class, 
Sink.class);
-    Sink sink = ReflectionUtils.newInstance(sinkClass);
-
-    int exitCode = ToolRunner.run(conf, new Canary(executor, sink), args);
-    executor.shutdown();
+    try {
+      exitCode = ToolRunner.run(conf, new Canary(executor), args);
+    } finally {
+      executor.shutdown();
+    }
     System.exit(exitCode);
   }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/8cc56bd1/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
index cdbf426..6c23764 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.*;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
 import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -114,11 +115,11 @@ public class TestCanaryTool {
     ExecutorService executor = new ScheduledThreadPoolExecutor(1);
     Canary.RegionStdOutSink sink = spy(new Canary.RegionStdOutSink());
     Canary canary = new Canary(executor, sink);
-    String[] args = { "-writeSniffing", "-t", "10000", name.getMethodName() };
+    String[] args = { "-writeSniffing", "-t", "10000", 
tableName.getNameAsString() };
     assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, 
args));
     assertEquals("verify no read error count", 0, 
canary.getReadFailures().size());
     assertEquals("verify no write error count", 0, 
canary.getWriteFailures().size());
-    verify(sink, atLeastOnce()).publishReadTiming(isA(ServerName.class), 
isA(HRegionInfo.class),
+    verify(sink, atLeastOnce()).publishReadTiming(isA(ServerName.class), 
isA(RegionInfo.class),
       isA(ColumnFamilyDescriptor.class), anyLong());
   }
 
@@ -144,7 +145,8 @@ public class TestCanaryTool {
     Canary canary = new Canary(executor, sink);
     String configuredTimeoutStr = tableNames[0].getNameAsString() + "=" + 
Long.MAX_VALUE + "," +
       tableNames[1].getNameAsString() + "=0";
-    String[] args = { "-readTableTimeouts", configuredTimeoutStr, 
name.getMethodName() + "1", name.getMethodName() + "2"};
+    String[] args = {"-readTableTimeouts", configuredTimeoutStr, 
name.getMethodName() + "1",
+      name.getMethodName() + "2"};
     assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, 
args));
     verify(sink, 
times(tableNames.length)).initializeAndGetReadLatencyForTable(isA(String.class));
     for (int i=0; i<2; i++) {
@@ -231,7 +233,7 @@ public class TestCanaryTool {
     conf.setBoolean(HConstants.HBASE_CANARY_READ_RAW_SCAN_KEY, true);
     assertEquals(0, ToolRunner.run(conf, canary, args));
     verify(sink, atLeastOnce())
-        .publishReadTiming(isA(ServerName.class), isA(HRegionInfo.class),
+        .publishReadTiming(isA(ServerName.class), isA(RegionInfo.class),
         isA(ColumnFamilyDescriptor.class), anyLong());
     assertEquals("verify no read error count", 0, 
canary.getReadFailures().size());
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/8cc56bd1/src/main/asciidoc/_chapters/ops_mgt.adoc
----------------------------------------------------------------------
diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc 
b/src/main/asciidoc/_chapters/ops_mgt.adoc
index 443d895..0f5e8b0 100644
--- a/src/main/asciidoc/_chapters/ops_mgt.adoc
+++ b/src/main/asciidoc/_chapters/ops_mgt.adoc
@@ -82,45 +82,54 @@ Others, such as `hbase shell` (<<shell>>), `hbase upgrade` 
(<<upgrading>>), and
 
 === Canary
 
-There is a Canary class can help users to canary-test the HBase cluster 
status, with every column-family for every regions or RegionServer's 
granularity.
-To see the usage, use the `-help` parameter.
-
-----
-$ ${HBASE_HOME}/bin/hbase canary -help
-
-Usage: hbase canary [opts] [table1 [table2]...] | [regionserver1 
[regionserver2]..]
- where [opts] are:
-   -help          Show this help and exit.
-   -regionserver  replace the table argument to regionserver,
-      which means to enable regionserver mode
-   -allRegions    Tries all regions on a regionserver,
-      only works in regionserver mode.
-   -zookeeper    Tries to grab zookeeper.znode.parent
-      on each zookeeper instance
-   -daemon        Continuous check at defined intervals.
-   -interval <N>  Interval between checks (sec)
-   -e             Use table/regionserver as regular expression
-      which means the table/regionserver is regular expression pattern
-   -f <B>         stop whole program if first error occurs, default is true
-   -t <N>         timeout for a check, default is 600000 (millisecs)
-   -writeTableTimeout <N>         write timeout for the writeTable, default is 
600000 (millisecs)
-   -readTableTimeouts <tableName>=<read timeout>,<tableName>=<read timeout>, 
...    comma-separated list of read timeouts per table (no spaces), default is 
600000 (millisecs)
-   -writeSniffing enable the write sniffing in canary
-   -treatFailureAsError treats read / write failure as error
-   -writeTable    The table used for write sniffing. Default is hbase:canary
-   -Dhbase.canary.read.raw.enabled=<true/false> Use this flag to enable or 
disable raw scan during read canary test Default is false and raw is not 
enabled during scan
-   -D<configProperty>=<value> assigning or override the configuration params
+The Canary tool can help users "canary-test" the HBase cluster status.
+The default "region mode" fetches a row from every column-family of every 
regions.
+In "regionserver mode", the Canary tool will fetch a row from a random
+region on each of the cluster's RegionServers. In "zookeeper mode", the
+Canary will read the root znode on each member of the zookeeper ensemble.
+
+To see usage, pass the `-help` parameter (if you pass no
+parameters, the Canary tool starts executing in the default
+region "mode" fetching a row from every region in the cluster).
+
+----
+2018-10-16 13:11:27,037 INFO  [main] tool.Canary: Execution thread count=16
+Usage: canary [OPTIONS] [<TABLE1> [<TABLE2]...] | [<REGIONSERVER1> 
[<REGIONSERVER2]..]
+Where [OPTIONS] are:
+ -h,-help        show this help and exit.
+ -regionserver   set 'regionserver mode'; gets row from random region on server
+ -allRegions     get from ALL regions when 'regionserver mode', not just 
random one.
+ -zookeeper      set 'zookeeper mode'; grab zookeeper.znode.parent on each 
ensemble member
+ -daemon         continuous check at defined intervals.
+ -interval <N>   interval between checks in seconds
+ -e              consider table/regionserver argument as regular expression
+ -f <B>          exit on first error; default=true
+ -failureAsError treat read/write failure as error
+ -t <N>          timeout for canary-test run; default=600000ms
+ -writeSniffing  enable write sniffing
+ -writeTable     the table used for write sniffing; default=hbase:canary
+ -writeTableTimeout <N>  timeout for writeTable; default=600000ms
+ -readTableTimeouts <tableName>=<read timeout>,<tableName>=<read timeout>,...
+                comma-separated list of table read timeouts (no spaces);
+                logs 'ERROR' if takes longer. default=600000ms
+ -permittedZookeeperFailures <N>  Ignore first N failures attempting to
+                connect to individual zookeeper nodes in ensemble
+
+ -D<configProperty>=<value> to assign or override configuration params
+ -Dhbase.canary.read.raw.enabled=<true/false> Set to enable/disable raw scan; 
default=false
+
+Canary runs in one of three modes: region (default), regionserver, or 
zookeeper.
+To sniff/probe all regions, pass no arguments.
+To sniff/probe all regions of a table, pass tablename.
+To sniff/probe regionservers, pass -regionserver, etc.
+See http://hbase.apache.org/book.html#_canary for Canary documentation.
 ----
 
 [NOTE]
-The `Sink` class is instantiated using the `hbase.canary.sink.class` 
configuration property which
-will also determine the used Monitor class. In the absence of this property 
RegionServerStdOutSink
-will be used. You need to use the Sink according to the passed parameters to 
the _canary_ command.
-As an example you have to set `hbase.canary.sink.class` property to
-`org.apache.hadoop.hbase.tool.Canary$RegionStdOutSink` for using table 
parameters.
+The `Sink` class is instantiated using the `hbase.canary.sink.class` 
configuration property.
 
-This tool will return non zero error codes to user for collaborating with 
other monitoring tools, such as Nagios.
-The error code definitions are:
+This tool will return non zero error codes to user for collaborating with 
other monitoring tools,
+such as Nagios.  The error code definitions are:
 
 [source,java]
 ----
@@ -131,9 +140,9 @@ private static final int ERROR_EXIT_CODE = 4;
 private static final int FAILURE_EXIT_CODE = 5;
 ----
 
-Here are some examples based on the following given case.
-There are two Table objects called test-01 and test-02, they have two column 
family cf1 and cf2 respectively, and deployed on the 3 RegionServers.
-see following table.
+Here are some examples based on the following given case: given two Table 
objects called test-01
+and test-02 each with two column family cf1 and cf2 respectively, deployed on 
3 RegionServers.
+See the following table.
 
 [cols="1,1,1", options="header"]
 |===
@@ -145,7 +154,7 @@ see following table.
 | rs3 | r2 | r1
 |===
 
-Following are some examples based on the previous given case.
+Following are some example outputs based on the previous given case.
 
 ==== Canary test for every column family (store) of every region of every table
 
@@ -163,12 +172,13 @@ $ ${HBASE_HOME}/bin/hbase canary
 13/12/09 03:26:32 INFO tool.Canary: read from region 
test-02,0004883,1386559511167.cbda32d5e2e276520712d84eaaa29d84. column family 
cf2 in 8ms
 ----
 
-So you can see, table test-01 has two regions and two column families, so the 
Canary tool will pick 4 small piece of data from 4 (2 region * 2 store) 
different stores.
-This is a default behavior of the this tool does.
+So you can see, table test-01 has two regions and two column families, so the 
Canary tool in the
+default "region mode" will pick 4 small piece of data from 4 (2 region * 2 
store) different stores.
+This is a default behavior.
 
-==== Canary test for every column family (store) of every region of specific 
table(s)
+==== Canary test for every column family (store) of every region of a specific 
table(s)
 
-You can also test one or more specific tables.
+You can also test one or more specific tables by passing table names.
 
 ----
 $ ${HBASE_HOME}/bin/hbase canary test-01 test-02
@@ -176,7 +186,9 @@ $ ${HBASE_HOME}/bin/hbase canary test-01 test-02
 
 ==== Canary test with RegionServer granularity
 
-This will pick one small piece of data from each RegionServer, and can also 
put your RegionServer name as input options for canary-test specific 
RegionServer.
+In "regionserver mode", the Canary tool will pick one small piece of data
+from each RegionServer (You can also pass one or more RegionServer names as 
arguments
+to the canary-test when in "regionserver mode").
 
 ----
 $ ${HBASE_HOME}/bin/hbase canary -regionserver
@@ -188,22 +200,25 @@ $ ${HBASE_HOME}/bin/hbase canary -regionserver
 
 ==== Canary test with regular expression pattern
 
-This will test both table test-01 and test-02.
+You can pass regexes for table names when in "region mode" or for servernames 
when
+in "regionserver mode". The below will test both table test-01 and test-02.
 
 ----
 $ ${HBASE_HOME}/bin/hbase canary -e test-0[1-2]
 ----
 
-==== Run canary test as daemon mode
+==== Run canary test as a "daemon"
 
-Run repeatedly with interval defined in option `-interval` whose default value 
is 60 seconds.
-This daemon will stop itself and return non-zero error code if any error 
occurs, due to the default value of option -f is true.
+Run repeatedly with an interval defined via the option `-interval` (default 
value is 60 seconds).
+This daemon will stop itself and return non-zero error code if any error 
occur. To have
+the daemon keep running across errors, pass the -f flag with its value set to 
false
+(see usage above).
 
 ----
 $ ${HBASE_HOME}/bin/hbase canary -daemon
 ----
 
-Run repeatedly with 5 second intervals and will not stop itself even if errors 
occur in the test.
+To run repeatedly with 5 second intervals and not stop on errors, do the 
following.
 
 ----
 $ ${HBASE_HOME}/bin/hbase canary -daemon -interval 5 -f false
@@ -211,9 +226,11 @@ $ ${HBASE_HOME}/bin/hbase canary -daemon -interval 5 -f 
false
 
 ==== Force timeout if canary test stuck
 
-In some cases the request is stuck and no response is sent back to the client. 
This can happen with dead RegionServers which the master has not yet noticed.
-Because of this we provide a timeout option to kill the canary test and return 
a non-zero error code.
-This run sets the timeout value to 60 seconds, the default value is 600 
seconds.
+In some cases the request is stuck and no response is sent back to the client. 
This
+can happen with dead RegionServers which the master has not yet noticed.
+Because of this we provide a timeout option to kill the canary test and return 
a
+non-zero error code. The below sets the timeout value to 60 seconds (the 
default value
+is 600 seconds).
 
 ----
 $ ${HBASE_HOME}/bin/hbase canary -t 60000
@@ -221,36 +238,37 @@ $ ${HBASE_HOME}/bin/hbase canary -t 60000
 
 ==== Enable write sniffing in canary
 
-By default, the canary tool only check the read operations, it's hard to find 
the problem in the
-write path. To enable the write sniffing, you can run canary with the 
`-writeSniffing` option.
-When the write sniffing is enabled, the canary tool will create an hbase table 
and make sure the
-regions of the table distributed on all region servers. In each sniffing 
period, the canary will
-try to put data to these regions to check the write availability of each 
region server.
+By default, the canary tool only checks read operations. To enable the write 
sniffing,
+you can run the canary with the `-writeSniffing` option set.  When write 
sniffing is
+enabled, the canary tool will create an hbase table and make sure the
+regions of the table are distributed to all region servers. In each sniffing 
period,
+the canary will try to put data to these regions to check the write 
availability of
+each region server.
 ----
 $ ${HBASE_HOME}/bin/hbase canary -writeSniffing
 ----
 
-The default write table is `hbase:canary` and can be specified by the option 
`-writeTable`.
+The default write table is `hbase:canary` and can be specified with the option 
`-writeTable`.
 ----
 $ ${HBASE_HOME}/bin/hbase canary -writeSniffing -writeTable ns:canary
 ----
 
-The default value size of each put is 10 bytes and you can set it by the 
config key:
+The default value size of each put is 10 bytes. You can set it via the config 
key:
 `hbase.canary.write.value.size`.
 
 ==== Treat read / write failure as error
 
-By default, the canary tool only logs read failure, due to e.g. 
RetriesExhaustedException,
-while returning normal exit code. To treat read / write failure as error, you 
can run canary
-with the `-treatFailureAsError` option. When enabled, read / write failure 
would result in error
-exit code.
+By default, the canary tool only logs read failures -- due to e.g. 
RetriesExhaustedException, etc. --
+and will return the 'normal' exit code. To treat read/write failure as errors, 
you can run canary
+with the `-treatFailureAsError` option. When enabled, read/write failures will 
result in an
+error exit code.
 ----
 $ ${HBASE_HOME}/bin/hbase canary -treatFailureAsError
 ----
 
 ==== Running Canary in a Kerberos-enabled Cluster
 
-To run Canary in a Kerberos-enabled cluster, configure the following two 
properties in _hbase-site.xml_:
+To run the Canary in a Kerberos-enabled cluster, configure the following two 
properties in _hbase-site.xml_:
 
 * `hbase.client.keytab.file`
 * `hbase.client.kerberos.principal`

Reply via email to