docbkx from master

enis Tue, 02 Dec 2014 21:54:07 -0800

http://git-wip-us.apache.org/repos/asf/hbase/blob/48d9d27d/src/main/docbkx/book.xml
----------------------------------------------------------------------
diff --git a/src/main/docbkx/book.xml b/src/main/docbkx/book.xml
index d275984..f835dc7 100644
--- a/src/main/docbkx/book.xml
+++ b/src/main/docbkx/book.xml
@@ -39,10 +39,18 @@
           <imageobject>
             <imagedata
               align="center"
-              valign="middle"
+              valign="left"
               fileref="hbase_logo.png" />
           </imageobject>
         </inlinemediaobject>
+        <inlinemediaobject>
+          <imageobject>
+            <imagedata
+              align="center"
+              valign="right"
+              fileref="jumping-orca_rotated_25percent.png" />
+          </imageobject>
+        </inlinemediaobject>
       </link>
     </subtitle>
     <copyright>
@@ -438,25 +446,25 @@
         <para> A namespace can be created, removed or altered. Namespace 
membership is determined
           during table creation by specifying a fully-qualified table name of 
the form:</para>
 
-        <programlisting><![CDATA[<table namespace>:<table 
qualifier>]]></programlisting>
+        <programlisting language="xml"><![CDATA[<table namespace>:<table 
qualifier>]]></programlisting>
 
 
         <example>
           <title>Examples</title>
 
-          <programlisting>
+          <programlisting language="bourne">
 #Create a namespace
 create_namespace 'my_ns'
             </programlisting>
-          <programlisting>
+          <programlisting language="bourne">
 #create my_table in my_ns namespace
 create 'my_ns:my_table', 'fam'
           </programlisting>
-          <programlisting>
+          <programlisting language="bourne">
 #drop namespace
 drop_namespace 'my_ns'
           </programlisting>
-          <programlisting>
+          <programlisting language="bourne">
 #alter namespace
 alter_namespace 'my_ns', {METHOD => 'set', 'PROPERTY_NAME' => 'PROPERTY_VALUE'}
         </programlisting>
@@ -478,7 +486,7 @@ alter_namespace 'my_ns', {METHOD => 'set', 'PROPERTY_NAME' 
=> 'PROPERTY_VALUE'}
         <example>
           <title>Examples</title>
 
-          <programlisting>
+          <programlisting language="bourne">
 #namespace=foo and table qualifier=bar
 create 'foo:bar', 'fam'
 
@@ -534,16 +542,17 @@ create 'bar', 'fam'
       <title>Data Model Operations</title>
       <para>The four primary data model operations are Get, Put, Scan, and 
Delete. Operations are
         applied via <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html";>HTable</link>
-        instances. </para>
+          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html";>Table</link>
+        instances.
+      </para>
       <section
         xml:id="get">
         <title>Get</title>
         <para><link
             
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Get.html";>Get</link>
           returns attributes for a specified row. Gets are executed via <link
-            
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#get%28org.apache.hadoop.hbase.client.Get%29";>
-            HTable.get</link>. </para>
+            
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html#get(org.apache.hadoop.hbase.client.Get)">
+            Table.get</link>. </para>
       </section>
       <section
         xml:id="put">
@@ -552,10 +561,10 @@ create 'bar', 'fam'
             
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Put.html";>Put</link>
           either adds new rows to a table (if the key is new) or can update 
existing rows (if the
           key already exists). Puts are executed via <link
-            
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#put%28org.apache.hadoop.hbase.client.Put%29";>
-            HTable.put</link> (writeBuffer) or <link
-            
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#batch%28java.util.List%29";>
-            HTable.batch</link> (non-writeBuffer). </para>
+            
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html#put(org.apache.hadoop.hbase.client.Put)">
+            Table.put</link> (writeBuffer) or <link
+            
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html#batch(java.util.List,
 java.lang.Object[])">
+            Table.batch</link> (non-writeBuffer). </para>
       </section>
       <section
         xml:id="scan">
@@ -563,27 +572,26 @@ create 'bar', 'fam'
         <para><link
             
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html";>Scan</link>
           allow iteration over multiple rows for specified attributes. </para>
-        <para>The following is an example of a on an HTable table instance. 
Assume that a table is
+        <para>The following is an example of a Scan on a Table instance. 
Assume that a table is
           populated with rows with keys "row1", "row2", "row3", and then 
another set of rows with
-          the keys "abc1", "abc2", and "abc3". The following example shows how 
startRow and stopRow
-          can be applied to a Scan instance to return the rows beginning with 
"row".</para>
-        <programlisting>
+          the keys "abc1", "abc2", and "abc3". The following example shows how 
to set a Scan
+          instance to return the rows beginning with "row".</para>
+<programlisting language="java">
 public static final byte[] CF = "cf".getBytes();
 public static final byte[] ATTR = "attr".getBytes();
 ...
 
-HTable htable = ...      // instantiate HTable
+Table table = ...      // instantiate a Table instance
 
 Scan scan = new Scan();
 scan.addColumn(CF, ATTR);
 scan.setRowPrefixFilter(Bytes.toBytes("row"));
-ResultScanner rs = htable.getScanner(scan);
+ResultScanner rs = table.getScanner(scan);
 try {
   for (Result r = rs.next(); r != null; r = rs.next()) {
   // process result...
 } finally {
   rs.close();  // always close the ResultScanner!
-}
 </programlisting>
         <para>Note that generally the easiest way to specify a specific stop 
point for a scan is by
           using the <link
@@ -596,7 +604,7 @@ try {
         <para><link
             
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Delete.html";>Delete</link>
           removes a row from a table. Deletes are executed via <link
-            
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#delete%28org.apache.hadoop.hbase.client.Delete%29";>
+            
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html#delete(org.apache.hadoop.hbase.client.Delete)">
             HTable.delete</link>. </para>
         <para>HBase does not modify data in place, and so deletes are handled 
by creating new
           markers called <emphasis>tombstones</emphasis>. These tombstones, 
along with the dead
@@ -724,12 +732,12 @@ try {
           xml:id="default_get_example">
           <title>Default Get Example</title>
           <para>The following Get will only retrieve the current version of 
the row</para>
-          <programlisting>
+          <programlisting language="java">
 public static final byte[] CF = "cf".getBytes();
 public static final byte[] ATTR = "attr".getBytes();
 ...
 Get get = new Get(Bytes.toBytes("row1"));
-Result r = htable.get(get);
+Result r = table.get(get);
 byte[] b = r.getValue(CF, ATTR);  // returns current version of value
 </programlisting>
         </section>
@@ -737,13 +745,13 @@ byte[] b = r.getValue(CF, ATTR);  // returns current 
version of value
           xml:id="versioned_get_example">
           <title>Versioned Get Example</title>
           <para>The following Get will return the last 3 versions of the 
row.</para>
-          <programlisting>
+          <programlisting language="java">
 public static final byte[] CF = "cf".getBytes();
 public static final byte[] ATTR = "attr".getBytes();
 ...
 Get get = new Get(Bytes.toBytes("row1"));
 get.setMaxVersions(3);  // will return last 3 versions of row
-Result r = htable.get(get);
+Result r = table.get(get);
 byte[] b = r.getValue(CF, ATTR);  // returns current version of value
 List&lt;KeyValue&gt; kv = r.getColumn(CF, ATTR);  // returns all versions of 
this column
 </programlisting>
@@ -765,27 +773,27 @@ List&lt;KeyValue&gt; kv = r.getColumn(CF, ATTR);  // 
returns all versions of thi
             <title>Implicit Version Example</title>
             <para>The following Put will be implicitly versioned by HBase with 
the current
               time.</para>
-            <programlisting>
+            <programlisting language="java">
 public static final byte[] CF = "cf".getBytes();
 public static final byte[] ATTR = "attr".getBytes();
 ...
 Put put = new Put(Bytes.toBytes(row));
 put.add(CF, ATTR, Bytes.toBytes( data));
-htable.put(put);
+table.put(put);
 </programlisting>
           </section>
           <section
             xml:id="explicit_version_example">
             <title>Explicit Version Example</title>
             <para>The following Put has the version timestamp explicitly 
set.</para>
-            <programlisting>
+            <programlisting language="java">
 public static final byte[] CF = "cf".getBytes();
 public static final byte[] ATTR = "attr".getBytes();
 ...
 Put put = new Put( Bytes.toBytes(row));
 long explicitTimeInMs = 555;  // just an example
 put.add(CF, ATTR, explicitTimeInMs, Bytes.toBytes(data));
-htable.put(put);
+table.put(put);
 </programlisting>
             <para>Caution: the version timestamp is internally by HBase for 
things like time-to-live
               calculations. It's usually best to avoid setting this timestamp 
yourself. Prefer using
@@ -981,7 +989,7 @@ htable.put(put);
         Be sure to use the correct version of the HBase JAR for your system. 
The backticks
           (<literal>`</literal> symbols) cause ths shell to execute the 
sub-commands, setting the
         CLASSPATH as part of the command. This example assumes you use a 
BASH-compatible shell. </para>
-      <screen>$ <userinput>HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase 
classpath` ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-server-VERSION.jar 
rowcounter usertable</userinput></screen>
+      <screen language="bourne">$ 
<userinput>HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase classpath` 
${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-server-VERSION.jar rowcounter 
usertable</userinput></screen>
       <para>When the command runs, internally, the HBase JAR finds the 
dependencies it needs for
         zookeeper, guava, and its other dependencies on the passed 
<envar>HADOOP_CLASSPATH</envar>
         and adds the JARs to the MapReduce job configuration. See the source at
@@ -992,7 +1000,7 @@ htable.put(put);
         <screen>java.lang.RuntimeException: java.lang.ClassNotFoundException: 
org.apache.hadoop.hbase.mapreduce.RowCounter$RowCounterMapper</screen>
         <para>If this occurs, try modifying the command as follows, so that it 
uses the HBase JARs
           from the <filename>target/</filename> directory within the build 
environment.</para>
-        <screen>$ 
<userinput>HADOOP_CLASSPATH=${HBASE_HOME}/target/hbase-server-VERSION.jar:`${HBASE_HOME}/bin/hbase
 classpath` ${HADOOP_HOME}/bin/hadoop jar 
${HBASE_HOME}/target/hbase-server-VERSIION.jar rowcounter 
usertable</userinput></screen>
+        <screen language="bourne">$ 
<userinput>HADOOP_CLASSPATH=${HBASE_HOME}/hbase-server/target/hbase-server-VERSION-SNAPSHOT.jar:`${HBASE_HOME}/bin/hbase
 classpath` ${HADOOP_HOME}/bin/hadoop jar 
${HBASE_HOME}/hbase-server/target/hbase-server-VERSION-SNAPSHOT.jar rowcounter 
usertable</userinput></screen>
       </note>
       <caution>
         <title>Notice to Mapreduce users of HBase 0.96.1 and above</title>
@@ -1042,14 +1050,14 @@ Exception in thread "main" 
java.lang.IllegalAccessError: class
             <code>HADOOP_CLASSPATH</code> environment variable at job 
submission time. When
           launching jobs that package their dependencies, all three of the 
following job launching
           commands satisfy this requirement:</para>
-        <screen>
+        <screen language="bourne">
 $ <userinput>HADOOP_CLASSPATH=/path/to/hbase-protocol.jar:/path/to/hbase/conf 
hadoop jar MyJob.jar MyJobMainClass</userinput>
 $ <userinput>HADOOP_CLASSPATH=$(hbase mapredcp):/path/to/hbase/conf hadoop jar 
MyJob.jar MyJobMainClass</userinput>
 $ <userinput>HADOOP_CLASSPATH=$(hbase classpath) hadoop jar MyJob.jar 
MyJobMainClass</userinput>
         </screen>
         <para>For jars that do not package their dependencies, the following 
command structure is
           necessary:</para>
-        <screen>
+        <screen language="bourne">
 $ <userinput>HADOOP_CLASSPATH=$(hbase mapredcp):/etc/hbase/conf hadoop jar 
MyApp.jar MyJobMainClass -libjars $(hbase mapredcp | tr ':' ',')</userinput> ...
         </screen>
         <para>See also <link
@@ -1100,7 +1108,7 @@ $ <userinput>HADOOP_CLASSPATH=$(hbase 
mapredcp):/etc/hbase/conf hadoop jar MyApp
       <para>The HBase JAR also serves as a Driver for some bundled mapreduce 
jobs. To learn about
         the bundled MapReduce jobs, run the following command.</para>
 
-      <screen>$ <userinput>${HADOOP_HOME}/bin/hadoop jar 
${HBASE_HOME}/hbase-server-VERSION.jar</userinput>
+      <screen language="bourne">$ <userinput>${HADOOP_HOME}/bin/hadoop jar 
${HBASE_HOME}/hbase-server-VERSION.jar</userinput>
 <computeroutput>An example program must be given as the first argument.
 Valid program names are:
   copytable: Export a table from local cluster to peer cluster
@@ -1112,7 +1120,7 @@ Valid program names are:
     </screen>
       <para>Each of the valid program names are bundled MapReduce jobs. To run 
one of the jobs,
         model your command after the following example.</para>
-      <screen>$ <userinput>${HADOOP_HOME}/bin/hadoop jar 
${HBASE_HOME}/hbase-server-VERSION.jar rowcounter myTable</userinput></screen>
+      <screen language="bourne">$ <userinput>${HADOOP_HOME}/bin/hadoop jar 
${HBASE_HOME}/hbase-server-VERSION.jar rowcounter myTable</userinput></screen>
     </section>
 
     <section>
@@ -1174,7 +1182,7 @@ Valid program names are:
         
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/RowCounter.html";>RowCounter</link>
         MapReduce job uses <code>TableInputFormat</code> and does a count of 
all rows in the specified
         table. To run it, use the following command: </para>
-      <screen>$ <userinput>./bin/hadoop jar 
hbase-X.X.X.jar</userinput></screen> 
+      <screen language="bourne">$ <userinput>./bin/hadoop jar 
hbase-X.X.X.jar</userinput></screen> 
       <para>This will
         invoke the HBase MapReduce Driver class. Select 
<literal>rowcounter</literal> from the choice of jobs
         offered. This will print rowcouner usage advice to standard output. 
Specify the tablename,
@@ -1213,7 +1221,7 @@ Valid program names are:
         <para>The following is an example of using HBase as a MapReduce source 
in read-only manner.
           Specifically, there is a Mapper instance but no Reducer, and nothing 
is being emitted from
           the Mapper. There job would be defined as follows...</para>
-        <programlisting>
+        <programlisting language="java">
 Configuration config = HBaseConfiguration.create();
 Job job = new Job(config, "ExampleRead");
 job.setJarByClass(MyReadJob.class);     // class that contains mapper
@@ -1240,7 +1248,7 @@ if (!b) {
   </programlisting>
         <para>...and the mapper instance would extend <link
             
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/TableMapper.html";>TableMapper</link>...</para>
-        <programlisting>
+        <programlisting language="java">
 public static class MyMapper extends TableMapper&lt;Text, Text&gt; {
 
   public void map(ImmutableBytesWritable row, Result value, Context context) 
throws InterruptedException, IOException {
@@ -1254,7 +1262,7 @@ public static class MyMapper extends TableMapper&lt;Text, 
Text&gt; {
         <title>HBase MapReduce Read/Write Example</title>
         <para>The following is an example of using HBase both as a source and 
as a sink with
           MapReduce. This example will simply copy data from one table to 
another.</para>
-        <programlisting>
+        <programlisting language="java">
 Configuration config = HBaseConfiguration.create();
 Job job = new Job(config,"ExampleReadWrite");
 job.setJarByClass(MyReadWriteJob.class);    // class that contains mapper
@@ -1293,7 +1301,7 @@ if (!b) {
         <para>The following is the example mapper, which will create a 
<classname>Put</classname>
           and matching the input <classname>Result</classname> and emit it. 
Note: this is what the
           CopyTable utility does. </para>
-        <programlisting>
+        <programlisting language="java">
 public static class MyMapper extends TableMapper&lt;ImmutableBytesWritable, 
Put&gt;  {
 
        public void map(ImmutableBytesWritable row, Result value, Context 
context) throws IOException, InterruptedException {
@@ -1327,7 +1335,7 @@ public static class MyMapper extends 
TableMapper&lt;ImmutableBytesWritable, Put&
         <para>The following example uses HBase as a MapReduce source and sink 
with a summarization
           step. This example will count the number of distinct instances of a 
value in a table and
           write those summarized counts in another table.
-          <programlisting>
+          <programlisting language="java">
 Configuration config = HBaseConfiguration.create();
 Job job = new Job(config,"ExampleSummary");
 job.setJarByClass(MySummaryJob.class);     // class that contains mapper and 
reducer
@@ -1358,7 +1366,7 @@ if (!b) {
           In this example mapper a column with a String-value is chosen as the 
value to summarize
           upon. This value is used as the key to emit from the mapper, and an
             <classname>IntWritable</classname> represents an instance counter.
-          <programlisting>
+          <programlisting language="java">
 public static class MyMapper extends TableMapper&lt;Text, IntWritable&gt;  {
        public static final byte[] CF = "cf".getBytes();
        public static final byte[] ATTR1 = "attr1".getBytes();
@@ -1376,7 +1384,7 @@ public static class MyMapper extends TableMapper&lt;Text, 
IntWritable&gt;  {
     </programlisting>
           In the reducer, the "ones" are counted (just like any other MR 
example that does this),
           and then emits a <classname>Put</classname>.
-          <programlisting>
+          <programlisting language="java">
 public static class MyTableReducer extends TableReducer&lt;Text, IntWritable, 
ImmutableBytesWritable&gt;  {
        public static final byte[] CF = "cf".getBytes();
        public static final byte[] COUNT = "count".getBytes();
@@ -1401,7 +1409,7 @@ public static class MyTableReducer extends 
TableReducer&lt;Text, IntWritable, Im
         <para>This very similar to the summary example above, with exception 
that this is using
           HBase as a MapReduce source but HDFS as the sink. The differences 
are in the job setup and
           in the reducer. The mapper remains the same. </para>
-        <programlisting>
+        <programlisting language="java">
 Configuration config = HBaseConfiguration.create();
 Job job = new Job(config,"ExampleSummaryToFile");
 job.setJarByClass(MySummaryFileJob.class);     // class that contains mapper 
and reducer
@@ -1430,7 +1438,7 @@ if (!b) {
         <para>As stated above, the previous Mapper can run unchanged with this 
example. As for the
           Reducer, it is a "generic" Reducer instead of extending TableMapper 
and emitting
           Puts.</para>
-        <programlisting>
+        <programlisting language="java">
  public static class MyReducer extends Reducer&lt;Text, IntWritable, Text, 
IntWritable&gt;  {
 
        public void reduce(Text key, Iterable&lt;IntWritable&gt; values, 
Context context) throws IOException, InterruptedException {
@@ -1448,7 +1456,7 @@ if (!b) {
         <title>HBase MapReduce Summary to HBase Without Reducer</title>
         <para>It is also possible to perform summaries without a reducer - if 
you use HBase as the
           reducer. </para>
-        <para>An HBase target table would need to exist for the job summary. 
The HTable method
+        <para>An HBase target table would need to exist for the job summary. 
The Table method
             <code>incrementColumnValue</code> would be used to atomically 
increment values. From a
           performance perspective, it might make sense to keep a Map of values 
with their values to
           be incremeneted for each map-task, and make one update per key at 
during the <code>
@@ -1470,7 +1478,7 @@ if (!b) {
           reducers. Neither is right or wrong, it depends on your use-case. 
Recognize that the more
           reducers that are assigned to the job, the more simultaneous 
connections to the RDBMS will
           be created - this will scale, but only to a point. </para>
-        <programlisting>
+        <programlisting language="java">
  public static class MyRdbmsReducer extends Reducer&lt;Text, IntWritable, 
Text, IntWritable&gt;  {
 
        private Connection c = null;
@@ -1500,12 +1508,14 @@ if (!b) {
       <title>Accessing Other HBase Tables in a MapReduce Job</title>
       <para>Although the framework currently allows one HBase table as input 
to a MapReduce job,
         other HBase tables can be accessed as lookup tables, etc., in a 
MapReduce job via creating
-        an HTable instance in the setup method of the Mapper.
-        <programlisting>public class MyMapper extends TableMapper&lt;Text, 
LongWritable&gt; {
-  private HTable myOtherTable;
+        an Table instance in the setup method of the Mapper.
+        <programlisting language="java">public class MyMapper extends 
TableMapper&lt;Text, LongWritable&gt; {
+  private Table myOtherTable;
 
   public void setup(Context context) {
-    myOtherTable = new HTable("myOtherTable");
+    // In here create a Connection to the cluster and save it or use the 
Connection
+    // from the existing table
+    myOtherTable = connection.getTable("myOtherTable");
   }
 
   public void map(ImmutableBytesWritable row, Result value, Context context) 
throws IOException, InterruptedException {
@@ -1693,9 +1703,7 @@ if (!b) {
     <section
       xml:id="client">
       <title>Client</title>
-      <para>The HBase client <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html";>HTable</link>
-        is responsible for finding RegionServers that are serving the 
particular row range of
+      <para>The HBase client finds the RegionServers that are serving the 
particular row range of
         interest. It does this by querying the <code>hbase:meta</code> table. 
See <xref
           linkend="arch.catalog.meta" /> for details. After locating the 
required region(s), the
         client contacts the RegionServer serving that region, rather than 
going through the master,
@@ -1703,29 +1711,41 @@ if (!b) {
         subsequent requests need not go through the lookup process. Should a 
region be reassigned
         either by the master load balancer or because a RegionServer has died, 
the client will
         requery the catalog tables to determine the new location of the user 
region. </para>
+
       <para>See <xref
           linkend="master.runtime" /> for more information about the impact of 
the Master on HBase
         Client communication. </para>
-      <para>Administrative functions are handled through <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html";>HBaseAdmin</link>
+      <para>Administrative functions are done via an instance of <link
+          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Admin.html";>Admin</link>
       </para>
+
       <section
         xml:id="client.connections">
-        <title>Connections</title>
-        <para>For connection configuration information, see <xref
-            linkend="client_dependencies" />. </para>
+        <title>Cluster Connections</title>
+        <para>The API changed in HBase 1.0. Its been cleaned up and users are 
returned
+          Interfaces to work against rather than particular types. In HBase 
1.0,
+          obtain a cluster Connection from ConnectionFactory and thereafter, 
get from it
+          instances of Table, Admin, and RegionLocator on an as-need basis. 
When done, close
+          obtained instances.  Finally, be sure to cleanup your Connection 
instance before
+          exiting.  Connections are heavyweight objects. Create once and keep 
an instance around.
+          Table, Admin and RegionLocator instances are lightweight. Create as 
you go and then
+          let go as soon as you are done by closing them. See the
+          <link 
xlink:href="/Users/stack/checkouts/hbase.git/target/site/apidocs/org/apache/hadoop/hbase/client/package-summary.html">Client
 Package Javadoc Description</link> for example usage of the new HBase 1.0 
API.</para>
+
+        <para>For connection configuration information, see <xref 
linkend="client_dependencies" />. </para>
+
         <para><emphasis><link
-              
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html";>HTable</link>
-            instances are not thread-safe</emphasis>. Only one thread use an 
instance of HTable at
-          any given time. When creating HTable instances, it is advisable to 
use the same <link
+              
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html";>Table</link>
+            instances are not thread-safe</emphasis>. Only one thread can use 
an instance of Table at
+          any given time. When creating Table instances, it is advisable to 
use the same <link
             
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HBaseConfiguration";>HBaseConfiguration</link>
           instance. This will ensure sharing of ZooKeeper and socket instances 
to the RegionServers
           which is usually what you want. For example, this is 
preferred:</para>
-          <programlisting>HBaseConfiguration conf = 
HBaseConfiguration.create();
+          <programlisting language="java">HBaseConfiguration conf = 
HBaseConfiguration.create();
 HTable table1 = new HTable(conf, "myTable");
 HTable table2 = new HTable(conf, "myTable");</programlisting>
           <para>as opposed to this:</para>
-          <programlisting>HBaseConfiguration conf1 = 
HBaseConfiguration.create();
+          <programlisting language="java">HBaseConfiguration conf1 = 
HBaseConfiguration.create();
 HTable table1 = new HTable(conf1, "myTable");
 HBaseConfiguration conf2 = HBaseConfiguration.create();
 HTable table2 = new HTable(conf2, "myTable");</programlisting>
@@ -1739,7 +1759,7 @@ HTable table2 = new HTable(conf2, 
"myTable");</programlisting>
               the following example:</para>
             <example>
               <title>Pre-Creating a <code>HConnection</code></title>
-              <programlisting>// Create a connection to the cluster.
+              <programlisting language="java">// Create a connection to the 
cluster.
 HConnection connection = HConnectionManager.createConnection(Configuration);
 HTableInterface table = connection.getTable("myTable");
 // use table as needed, the table returned is lightweight
@@ -1796,7 +1816,7 @@ connection.close();</programlisting>
           represents a list of Filters with a relationship of 
<code>FilterList.Operator.MUST_PASS_ALL</code> or
           <code>FilterList.Operator.MUST_PASS_ONE</code> between the Filters.  
The following example shows an 'or' between two
           Filters (checking for either 'my value' or 'my other value' on the 
same attribute).</para>
-<programlisting>
+<programlisting language="java">
 FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ONE);
 SingleColumnValueFilter filter1 = new SingleColumnValueFilter(
        cf,
@@ -1829,7 +1849,7 @@ scan.setFilter(list);
             </code>), inequality (<code>CompareOp.NOT_EQUAL</code>), or ranges 
(e.g.,
               <code>CompareOp.GREATER</code>). The following is example of 
testing equivalence a
             column to a String value "my value"...</para>
-          <programlisting>
+          <programlisting language="java">
 SingleColumnValueFilter filter = new SingleColumnValueFilter(
        cf,
        column,
@@ -1852,7 +1872,7 @@ scan.setFilter(filter);
           <para><link
               
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/RegexStringComparator.html";>RegexStringComparator</link>
             supports regular expressions for value comparisons.</para>
-          <programlisting>
+          <programlisting language="java">
 RegexStringComparator comp = new RegexStringComparator("my.");   // any value 
that starts with 'my'
 SingleColumnValueFilter filter = new SingleColumnValueFilter(
        cf,
@@ -1873,7 +1893,7 @@ scan.setFilter(filter);
               
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/SubstringComparator.html";>SubstringComparator</link>
             can be used to determine if a given substring exists in a value. 
The comparison is
             case-insensitive. </para>
-          <programlisting>
+          <programlisting language="java">
 SubstringComparator comp = new SubstringComparator("y val");   // looking for 
'my value'
 SingleColumnValueFilter filter = new SingleColumnValueFilter(
        cf,
@@ -1930,7 +1950,7 @@ scan.setFilter(filter);
           <para>Note: The same column qualifier can be used in different 
column families. This
             filter returns all matching columns. </para>
           <para>Example: Find all columns in a row and family that start with 
"abc"</para>
-          <programlisting>
+          <programlisting language="java">
 HTableInterface t = ...;
 byte[] row = ...;
 byte[] family = ...;
@@ -1960,7 +1980,7 @@ rs.close();
             prefixes. It can be used to efficiently get discontinuous sets of 
columns from very wide
             rows. </para>
           <para>Example: Find all columns in a row and family that start with 
"abc" or "xyz"</para>
-          <programlisting>
+          <programlisting language="java">
 HTableInterface t = ...;
 byte[] row = ...;
 byte[] family = ...;
@@ -1993,7 +2013,7 @@ rs.close();
             filter returns all matching columns. </para>
           <para>Example: Find all columns in a row and family between "bbbb" 
(inclusive) and "bbdd"
             (inclusive)</para>
-          <programlisting>
+          <programlisting language="java">
 HTableInterface t = ...;
 byte[] row = ...;
 byte[] family = ...;
@@ -2145,46 +2165,56 @@ rs.close();
         xml:id="block.cache">
         <title>Block Cache</title>
 
-        <para>HBase provides three different BlockCache implementations: the 
default onheap
-          LruBlockCache, BucketCache, and SlabCache, which are both (usually) 
offheap. This section
+        <para>HBase provides two different BlockCache implementations: the 
default onheap
+          LruBlockCache and BucketCache, which is (usually) offheap. This 
section
           discusses benefits and drawbacks of each implementation, how to 
choose the appropriate
           option, and configuration options for each.</para>
+
+      <note><title>Block Cache Reporting: UI</title>
+      <para>See the RegionServer UI for detail on caching deploy.  Since 
HBase-0.98.4, the
+          Block Cache detail has been significantly extended showing 
configurations,
+          sizings, current usage, time-in-the-cache, and even detail on block 
counts and types.</para>
+  </note>
+
         <section>
+
           <title>Cache Choices</title>
           <para><classname>LruBlockCache</classname> is the original 
implementation, and is
-              entirely within the Java heap.  <classname>SlabCache</classname> 
and
-              <classname>BucketCache</classname> are mainly intended for 
keeping blockcache
-              data offheap, although BucketCache can also keep data onheap and 
in files.</para>
-          <para><emphasis>SlabCache is deprecated and will be removed in 
1.0!</emphasis></para>
-          <para>BucketCache has seen more production deploys and has more 
deploy options. Fetching
-            will always be slower when fetching from BucketCache or SlabCache, 
as compared with the
-            native onheap LruBlockCache. However, latencies tend to be less 
erratic over time,
-            because there is less garbage collection.</para>
-          <para>Anecdotal evidence indicates that BucketCache requires less 
garbage collection than
-            SlabCache so should be even less erratic (than SlabCache or 
LruBlockCache).</para>
-          <para>SlabCache tends to do more garbage collections, because blocks 
are always moved
-              between L1 and L2, at least given the way 
<classname>DoubleBlockCache</classname>
-              currently works. When you enable SlabCache, you are enabling a 
two tier caching
-              system, an L1 cache which is implemented by an instance of 
LruBlockCache and
-              an offheap L2 cache which is implemented by SlabCache.  
Management of these
-              two tiers and how blocks move between them is done by 
<classname>DoubleBlockCache</classname>
-              when you are using SlabCache. DoubleBlockCache works by caching 
all blocks in L1
-              AND L2.  When blocks are evicted from L1, they are moved to L2.  
See
-              <xref linkend="offheap.blockcache.slabcache" /> for more detail 
on how DoubleBlockCache works.
+              entirely within the Java heap. 
<classname>BucketCache</classname> is mainly
+              intended for keeping blockcache data offheap, although 
BucketCache can also
+              keep data onheap and serve from a file-backed cache.
+              <note><title>BucketCache is production ready as of 
hbase-0.98.6</title>
+                <para>To run with BucketCache, you need HBASE-11678. This was 
included in
+                  hbase-0.98.6.
+                </para>
+              </note>
           </para>
-          <para>The hosting class for BucketCache is 
<classname>CombinedBlockCache</classname>.
-              It keeps all DATA blocks in the BucketCache and meta blocks -- 
INDEX and BLOOM blocks --
+
+          <para>Fetching will always be slower when fetching from BucketCache,
+              as compared to the native onheap LruBlockCache. However, 
latencies tend to be
+              less erratic across time, because there is less garbage 
collection when you use
+              BucketCache since it is managing BlockCache allocations, not the 
GC. If the
+              BucketCache is deployed in offheap mode, this memory is not 
managed by the
+              GC at all. This is why you'd use BucketCache, so your latencies 
are less erratic and to mitigate GCs
+              and heap fragmentation.  See Nick Dimiduk's <link
+              xlink:href="http://www.n10k.com/blog/blockcache-101/";>BlockCache 
101</link> for
+            comparisons running onheap vs offheap tests. Also see
+            <link xlink:href="http://people.apache.org/~stack/bc/";>Comparing 
BlockCache Deploys</link>
+            which finds that if your dataset fits inside your LruBlockCache 
deploy, use it otherwise
+            if you are experiencing cache churn (or you want your cache to 
exist beyond the
+            vagaries of java GC), use BucketCache.
+              </para>
+
+              <para>When you enable BucketCache, you are enabling a two tier 
caching
+              system, an L1 cache which is implemented by an instance of 
LruBlockCache and
+              an offheap L2 cache which is implemented by BucketCache.  
Management of these
+              two tiers and the policy that dictates how blocks move between 
them is done by
+              <classname>CombinedBlockCache</classname>. It keeps all DATA 
blocks in the L2
+              BucketCache and meta blocks -- INDEX and BLOOM blocks --
               onheap in the L1 <classname>LruBlockCache</classname>.
-          </para>
-          <para>Because the hosting class for each implementation
-              (<classname>DoubleBlockCache</classname> vs 
<classname>CombinedBlockCache</classname>)
-              works so differently, it is difficult to do a fair comparison 
between BucketCache and SlabCache.
-            See Nick Dimiduk's <link
-              xlink:href="http://www.n10k.com/blog/blockcache-101/";>BlockCache 
101</link> for some
-          numbers.</para>
-          <para>For more information about the off heap cache options, see 
<xref
-              linkend="offheap.blockcache" />.</para>
+              See <xref linkend="offheap.blockcache" /> for more detail on 
going offheap.</para>
         </section>
+
         <section xml:id="cache.configurations">
             <title>General Cache Configurations</title>
           <para>Apart from the cache implementation itself, you can set some 
general configuration
@@ -2196,6 +2226,7 @@ rs.close();
             introduced in <link 
xlink:href="https://issues.apache.org/jira/browse/HBASE-9857";
               >HBASE-9857</link>.</para>
       </section>
+
         <section
           xml:id="block.cache.design">
           <title>LruBlockCache Design</title>
@@ -2219,7 +2250,7 @@ rs.close();
                 was accessed. Catalog tables are configured like this. This 
group is the last one
                 considered during evictions.</para>
             <para>To mark a column family as in-memory, call
-                
<programlisting>HColumnDescriptor.setInMemory(true);</programlisting> if 
creating a table from java,
+                <programlisting 
language="java">HColumnDescriptor.setInMemory(true);</programlisting> if 
creating a table from java,
                 or set <command>IN_MEMORY => true</command> when creating or 
altering a table in
                 the shell: e.g.  <programlisting>hbase(main):003:0> create  
't', {NAME => 'f', IN_MEMORY => 'true'}</programlisting></para>
             </listitem>
@@ -2334,58 +2365,58 @@ rs.close();
                 and would surely evict data that's currently in use. </para>
             </listitem>
           </itemizedlist>
+          <section xml:id="data.blocks.in.fscache">
+            <title>Caching META blocks only (DATA blocks in fscache)</title>
+            <para>An interesting setup is one where we cache META blocks only 
and we read DATA
+              blocks in on each access. If the DATA blocks fit inside fscache, 
this alternative
+              may make sense when access is completely random across a very 
large dataset.
+              To enable this setup, alter your table and for each column family
+              set <varname>BLOCKCACHE => 'false'</varname>.  You are 
'disabling' the
+              BlockCache for this column family only you can never disable the 
caching of
+              META blocks. Since
+              <link 
xlink:href="https://issues.apache.org/jira/browse/HBASE-4683";>HBASE-4683 Always 
cache index and bloom blocks</link>,
+              we will cache META blocks even if the BlockCache is disabled.
+            </para>
+          </section>
         </section>
         <section
           xml:id="offheap.blockcache">
           <title>Offheap Block Cache</title>
-          <section xml:id="offheap.blockcache.slabcache">
-            <title>Enable SlabCache</title>
-            <para><emphasis>SlabCache is deprecated and will be removed in 
1.0!</emphasis></para>
-            <para> SlabCache is originally described in <link
-                
xlink:href="http://blog.cloudera.com/blog/2012/01/caching-in-hbase-slabcache/";>Caching
-                in Apache HBase: SlabCache</link>. Quoting from the API 
documentation for <link
-                
xlink:href="http://hbase.apache.org/0.94/apidocs/org/apache/hadoop/hbase/io/hfile/DoubleBlockCache.html";>DoubleBlockCache</link>,
-              the hosting class for SlabCache deploys,
-              DoubleBlockCache is an abstraction layer that combines two 
caches, the smaller onHeapCache and the
-              larger offHeapCache. CacheBlock attempts to cache the block in 
both caches, while
-              readblock reads first from the faster on heap cache before 
looking for the block in
-              the off heap cache. Metrics are the combined size and hits and 
misses of both
-              caches.</para>
-            <para>To enable SlabCache, set the float
-                <varname>hbase.offheapcache.percentage</varname> to some value 
between 0 and 1 in
-              the <filename>hbase-site.xml</filename> file on the 
RegionServer. The value will be multiplied by the
-              setting for <varname>-XX:MaxDirectMemorySize</varname> in the 
RegionServer's
-                <filename>hbase-env.sh</filename> configuration file and the 
result is used by
-              SlabCache as its offheap store. The onheap store will be the 
value of the float
-                <varname>HConstants.HFILE_BLOCK_CACHE_SIZE_KEY</varname> 
setting (some value between
-              0 and 1) multiplied by the size of the allocated Java 
heap.</para>
-            <para>Restart (or rolling restart) your cluster for the 
configurations to take effect.
-              Check logs for errors or unexpected behavior.</para>
-          </section>
           <section xml:id="enable.bucketcache">
-            <title>Enable BucketCache</title>
-                <para>The usual deploy of BucketCache is via a
-                    managing class that sets up two caching tiers: an L1 
onheap cache
-                    implemented by LruBlockCache and a second L2 cache 
implemented
-                    with BucketCache. The managing class is <link
-                
xlink:href="http://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.html";>CombinedBlockCache</link>
 by default. The just-previous link describes the mechanism of 
CombinedBlockCache. In short, it works
+            <title>How to Enable BucketCache</title>
+                <para>The usual deploy of BucketCache is via a managing class 
that sets up two caching tiers: an L1 onheap cache
+                    implemented by LruBlockCache and a second L2 cache 
implemented with BucketCache. The managing class is <link
+                        
xlink:href="http://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.html";>CombinedBlockCache</link>
 by default.
+            The just-previous link describes the caching 'policy' implemented 
by CombinedBlockCache. In short, it works
             by keeping meta blocks -- INDEX and BLOOM in the L1, onheap 
LruBlockCache tier -- and DATA
             blocks are kept in the L2, BucketCache tier. It is possible to 
amend this behavior in
             HBase since version 1.0 and ask that a column family have both its 
meta and DATA blocks hosted onheap in the L1 tier by
-            setting <varname>cacheDataInL1</varname> via 
<programlisting>(HColumnDescriptor.setCacheDataInL1(true)</programlisting>
+            setting <varname>cacheDataInL1</varname> via
+                  <code>(HColumnDescriptor.setCacheDataInL1(true)</code>
             or in the shell, creating or amending column families setting 
<varname>CACHE_DATA_IN_L1</varname>
             to true: e.g. <programlisting>hbase(main):003:0> create 't', {NAME 
=> 't', CONFIGURATION => {CACHE_DATA_IN_L1 => 'true'}}</programlisting></para>
-        <para>The BucketCache deploy can be onheap, offheap, or file based. 
You set which via the
-            <varname>hbase.bucketcache.ioengine</varname> setting it to
-            <varname>heap</varname> for BucketCache running as part of the 
java heap,
-            <varname>offheap</varname> for BucketCache to make allocations 
offheap,
-            and <varname>file:PATH_TO_FILE</varname> for BucketCache to use a 
file
-            (Useful in particular if you have some fast i/o attached to the 
box such
+
+        <para>The BucketCache Block Cache can be deployed onheap, offheap, or 
file based.
+            You set which via the
+            <varname>hbase.bucketcache.ioengine</varname> setting.  Setting it 
to
+            <varname>heap</varname> will have BucketCache deployed inside the 
+            allocated java heap. Setting it to <varname>offheap</varname> will 
have
+            BucketCache make its allocations offheap,
+            and an ioengine setting of <varname>file:PATH_TO_FILE</varname> 
will direct
+            BucketCache to use a file caching (Useful in particular if you 
have some fast i/o attached to the box such
             as SSDs).
         </para>
-        <para>To disable CombinedBlockCache, and use the BucketCache as a 
strict L2 cache to the L1
-              LruBlockCache, set 
<varname>CacheConfig.BUCKET_CACHE_COMBINED_KEY</varname> to
-                <literal>false</literal>. In this mode, on eviction from L1, 
blocks go to L2.</para>
+        <para xml:id="raw.l1.l2">It is possible to deploy an L1+L2 setup where 
we bypass the CombinedBlockCache
+            policy and have BucketCache working as a strict L2 cache to the L1
+              LruBlockCache. For such a setup, set 
<varname>CacheConfig.BUCKET_CACHE_COMBINED_KEY</varname> to
+              <literal>false</literal>. In this mode, on eviction from L1, 
blocks go to L2.
+              When a block is cached, it is cached first in L1. When we go to 
look for a cached block,
+              we look first in L1 and if none found, then search L2.  Let us 
call this deploy format,
+              <emphasis><indexterm><primary>Raw 
L1+L2</primary></indexterm></emphasis>.</para>
+          <para>Other BucketCache configs include: specifying a location to 
persist cache to across
+              restarts, how many threads to use writing the cache, etc.  See 
the
+              <link 
xlink:href="https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/io/hfile/CacheConfig.html";>CacheConfig.html</link>
+              class for configuration options and descriptions.</para>
 
             <procedure>
               <title>BucketCache Example Configuration</title>
@@ -2408,7 +2439,7 @@ rs.close();
               <step>
                 <para>Next, add the following configuration to the 
RegionServer's
                     <filename>hbase-site.xml</filename>.</para>
-                <programlisting>
+                <programlisting language="xml">
 <![CDATA[<property>
   <name>hbase.bucketcache.ioengine</name>
   <value>offheap</value>
@@ -2433,8 +2464,89 @@ rs.close();
                 In other words, you configure the L1 LruBlockCache as you 
would normally,
                 as you would when there is no L2 BucketCache present.
             </para>
+            <para><link 
xlink:href="https://issues.apache.org/jira/browse/HBASE-10641";
+                >HBASE-10641</link> introduced the ability to configure 
multiple sizes for the
+              buckets of the bucketcache, in HBase 0.98 and newer. To 
configurable multiple bucket
+              sizes, configure the new property 
<option>hfile.block.cache.sizes</option> (instead of
+                <option>hfile.block.cache.size</option>) to a comma-separated 
list of block sizes,
+              ordered from smallest to largest, with no spaces. The goal is to 
optimize the bucket
+              sizes based on your data access patterns. The following example 
configures buckets of
+              size 4096 and 8192.</para>
+            <screen language="xml"><![CDATA[
+<property>
+  <name>hfile.block.cache.sizes</name>
+  <value>4096,8192</value>
+</property>
+              ]]></screen>
+            <note xml:id="direct.memory">
+                <title>Direct Memory Usage In HBase</title>
+                <para>The default maximum direct memory varies by JVM.  
Traditionally it is 64M
+                    or some relation to allocated heap size (-Xmx) or no limit 
at all (JDK7 apparently).
+                    HBase servers use direct memory, in particular 
short-circuit reading, the hosted DFSClient will
+                    allocate direct memory buffers.  If you do offheap block 
caching, you'll
+                    be making use of direct memory.  Starting your JVM, make 
sure
+                    the <varname>-XX:MaxDirectMemorySize</varname> setting in
+                    <filename>conf/hbase-env.sh</filename> is set to some 
value that is
+                    higher than what you have allocated to your offheap 
blockcache
+                    (<varname>hbase.bucketcache.size</varname>).  It should be 
larger than your offheap block
+                    cache and then some for DFSClient usage (How much the 
DFSClient uses is not
+                    easy to quantify; it is the number of open hfiles * 
<varname>hbase.dfs.client.read.shortcircuit.buffer.size</varname>
+                    where hbase.dfs.client.read.shortcircuit.buffer.size is 
set to 128k in HBase -- see <filename>hbase-default.xml</filename>
+                    default configurations).
+                        Direct memory, which is part of the Java process heap, 
is separate from the object
+                        heap allocated by -Xmx. The value allocated by 
MaxDirectMemorySize must not exceed
+                        physical RAM, and is likely to be less than the total 
available RAM due to other
+                        memory requirements and system constraints.
+                </para>
+              <para>You can see how much memory -- onheap and offheap/direct 
-- a RegionServer is
+                configured to use and how much it is using at any one time by 
looking at the
+                  <emphasis>Server Metrics: Memory</emphasis> tab in the UI. 
It can also be gotten
+                via JMX. In particular the direct memory currently used by the 
server can be found
+                on the <varname>java.nio.type=BufferPool,name=direct</varname> 
bean. Terracotta has
+                a <link
+                  
xlink:href="http://terracotta.org/documentation/4.0/bigmemorygo/configuration/storage-options";
+                  >good write up</link> on using offheap memory in java. It is 
for their product
+                BigMemory but alot of the issues noted apply in general to any 
attempt at going
+                offheap. Check it out.</para>
+            </note>
+              <note 
xml:id="hbase.bucketcache.percentage.in.combinedcache"><title>hbase.bucketcache.percentage.in.combinedcache</title>
+                  <para>This is a pre-HBase 1.0 configuration removed because 
it
+                      was confusing. It was a float that you would set to some 
value
+                      between 0.0 and 1.0.  Its default was 0.9. If the deploy 
was using
+                      CombinedBlockCache, then the LruBlockCache L1 size was 
calculated to
+                      be (1 - 
<varname>hbase.bucketcache.percentage.in.combinedcache</varname>) * 
<varname>size-of-bucketcache</varname> 
+                      and the BucketCache size was 
<varname>hbase.bucketcache.percentage.in.combinedcache</varname> * 
size-of-bucket-cache.
+                      where size-of-bucket-cache itself is EITHER the value of 
the configuration hbase.bucketcache.size
+                      IF it was specified as megabytes OR 
<varname>hbase.bucketcache.size</varname> * 
<varname>-XX:MaxDirectMemorySize</varname> if
+                      <varname>hbase.bucketcache.size</varname> between 0 and 
1.0.
+                  </para>
+                  <para>In 1.0, it should be more straight-forward. L1 
LruBlockCache size
+                      is set as a fraction of java heap using 
hfile.block.cache.size setting
+                      (not the best name) and L2 is set as above either in 
absolute
+                      megabytes or as a fraction of allocated maximum direct 
memory.
+                  </para>
+              </note>
           </section>
         </section>
+        <section>
+          <title>Comprewssed Blockcache</title>
+          <para><link 
xlink:href="https://issues.apache.org/jira/browse/HBASE-11331";
+              >HBASE-11331</link> introduced lazy blockcache decompression, 
more simply referred to
+            as compressed blockcache. When compressed blockcache is enabled. 
data and encoded data
+            blocks are cached in the blockcache in their on-disk format, 
rather than being
+            decompressed and decrypted before caching.</para>
+          <para 
xlink:href="https://issues.apache.org/jira/browse/HBASE-11331";>For a 
RegionServer
+            hosting more data than can fit into cache, enabling this feature 
with SNAPPY compression
+            has been shown to result in 50% increase in throughput and 30% 
improvement in mean
+            latency while, increasing garbage collection by 80% and increasing 
overall CPU load by
+            2%. See HBASE-11331 for more details about how performance was 
measured and achieved.
+            For a RegionServer hosting data that can comfortably fit into 
cache, or if your workload
+            is sensitive to extra CPU or garbage-collection load, you may 
receive less
+            benefit.</para>
+          <para>Compressed blockcache is disabled by default. To enable it, set
+              <code>hbase.block.data.cachecompressed</code> to 
<code>true</code> in
+              <filename>hbase-site.xml</filename> on all RegionServers.</para>
+        </section>
       </section>
 
       <section
@@ -2618,7 +2730,7 @@ rs.close();
                     ZooKeeper splitlog node 
(<filename>/hbase/splitlog</filename>) as tasks. You can
                   view the contents of the splitlog by issuing the following
                     <command>zkcli</command> command. Example output is 
shown.</para>
-                  <screen>ls /hbase/splitlog
+                  <screen language="bourne">ls /hbase/splitlog
 
[hdfs%3A%2F%2Fhost2.sample.com%3A56020%2Fhbase%2F.logs%2Fhost8.sample.com%2C57020%2C1340474893275-splitting%2Fhost8.sample.com%253A57020.1340474893900,
 
 
hdfs%3A%2F%2Fhost2.sample.com%3A56020%2Fhbase%2F.logs%2Fhost3.sample.com%2C57020%2C1340474893299-splitting%2Fhost3.sample.com%253A57020.1340474893931,
 
 
hdfs%3A%2F%2Fhost2.sample.com%3A56020%2Fhbase%2F.logs%2Fhost4.sample.com%2C57020%2C1340474893287-splitting%2Fhost4.sample.com%253A57020.1340474893946]
                  
@@ -2969,6 +3081,205 @@ ctime = Sat Jun 23 11:13:40 PDT 2012
           </para>
         </section>
 
+        <section xml:id="regions.arch.states">
+          <title>Region State Transition</title>
+          <para> HBase maintains a state for each region and persists the 
state in META. The state
+            of the META region itself is persisted in ZooKeeper. You can see 
the states of regions
+            in transition in the Master web UI. Following is the list of 
possible region
+            states.</para>
+
+          <itemizedlist>
+            <title>Possible Region States</title>
+            <listitem>
+              <para>OFFLINE: the region is offline and not opening</para>
+            </listitem>
+            <listitem>
+              <para>OPENING: the region is in the process of being 
opened</para>
+            </listitem>
+            <listitem>
+              <para>OPEN: the region is open and the region server has 
notified the master</para>
+            </listitem>
+            <listitem>
+              <para>FAILED_OPEN: the region server failed to open the 
region</para>
+            </listitem>
+            <listitem>
+              <para>CLOSING: the region is in the process of being 
closed</para>
+            </listitem>
+            <listitem>
+              <para>CLOSED: the region server has closed the region and 
notified the master</para>
+            </listitem>
+            <listitem>
+              <para>FAILED_CLOSE: the region server failed to close the 
region</para>
+            </listitem>
+            <listitem>
+              <para>SPLITTING: the region server notified the master that the 
region is
+                splitting</para>
+            </listitem>
+            <listitem>
+              <para>SPLIT: the region server notified the master that the 
region has finished
+                splitting</para>
+            </listitem>
+            <listitem>
+              <para>SPLITTING_NEW: this region is being created by a split 
which is in
+                progress</para>
+            </listitem>
+            <listitem>
+              <para>MERGING: the region server notified the master that this 
region is being merged
+                with another region</para>
+            </listitem>
+            <listitem>
+              <para>MERGED: the region server notified the master that this 
region has been
+                merged</para>
+            </listitem>
+            <listitem>
+              <para>MERGING_NEW: this region is being created by a merge of 
two regions</para>
+            </listitem>
+          </itemizedlist>
+
+          <figure>
+            <title>Region State Transitions</title>
+            <mediaobject>
+              <imageobject>
+                <imagedata align="center" valign="middle" 
fileref="region_states.png"/>
+              </imageobject>
+              <caption>
+                <para>This graph shows all allowed transitions a region can 
undergo. In the graph,
+                  each node is a state. A node has a color based on the state 
type, for readability.
+                  A directed line in the graph is a possible state 
transition.</para>
+              </caption>
+            </mediaobject>
+          </figure>
+
+          <itemizedlist>
+            <title>Graph Legend</title>
+             <listitem>
+              <para>Brown: Offline state, a special state that can be 
transient (after closed before
+                opening), terminal (regions of disabled tables), or initial 
(regions of newly
+                created tables)</para></listitem>
+             <listitem>
+              <para>Palegreen: Online state that regions can serve 
requests</para></listitem>
+             <listitem>
+              <para>Lightblue: Transient states</para></listitem>
+             <listitem>
+              <para>Red: Failure states that need OPS 
attention</para></listitem>
+             <listitem>
+              <para>Gold: Terminal states of regions 
split/merged</para></listitem>
+             <listitem>
+              <para>Grey: Initial states of regions created through 
split/merge</para></listitem>
+          </itemizedlist>
+
+          <orderedlist>
+            <title>Region State Transitions Explained</title>
+             <listitem>
+              <para>The master moves a region from <literal>OFFLINE</literal> 
to
+                  <literal>OPENING</literal> state and tries to assign the 
region to a region
+                server. The region server may or may not have received the 
open region request. The
+                master retries sending the open region request to the region 
server until the RPC
+                goes through or the master runs out of retries. After the 
region server receives the
+                open region request, the region server begins opening the 
region.</para>
+             </listitem>
+             <listitem>
+              <para>If the master is running out of retries, the master 
prevents the region server
+                from opening the region by moving the region to 
<literal>CLOSING</literal> state and
+                trying to close it, even if the region server is starting to 
open the region.</para>
+             </listitem>
+             <listitem>
+              <para>After the region server opens the region, it continues to 
try to notify the
+                master until the master moves the region to 
<literal>OPEN</literal> state and
+                notifies the region server. The region is now open.</para>
+             </listitem>
+             <listitem>
+              <para>If the region server cannot open the region, it notifies 
the master. The master
+                moves the region to <literal>CLOSED</literal> state and tries 
to open the region on
+                a different region server.</para>
+             </listitem>
+             <listitem>
+              <para>If the master cannot open the region on any of a certain 
number of regions, it
+                moves the region to <literal>FAILED_OPEN</literal> state, and 
takes no further
+                action until an operator intervenes from the HBase shell, or 
the server is
+                dead.</para>
+             </listitem>
+             <listitem>
+              <para>The master moves a region from <literal>OPEN</literal> to
+                  <literal>CLOSING</literal> state. The region server holding 
the region may or may
+                not have received the close region request. The master retries 
sending the close
+                request to the server until the RPC goes through or the master 
runs out of
+                retries.</para>
+             </listitem>
+             <listitem>
+              <para>If the region server is not online, or throws
+                  <code>NotServingRegionException</code>, the master moves the 
region to
+                  <literal>OFFLINE</literal> state and re-assigns it to a 
different region
+                server.</para>
+             </listitem>
+             <listitem>
+              <para>If the region server is online, but not reachable after 
the master runs out of
+                retries, the master moves the region to 
<literal>FAILED_CLOSE</literal> state and
+                takes no further action until an operator intervenes from the 
HBase shell, or the
+                server is dead.</para>
+             </listitem>
+             <listitem>
+              <para>If the region server gets the close region request, it 
closes the region and
+                notifies the master. The master moves the region to 
<literal>CLOSED</literal> state
+                and re-assigns it to a different region server.</para>
+             </listitem>
+             <listitem>
+              <para>Before assigning a region, the master moves the region to
+                  <literal>OFFLINE</literal> state automatically if it is in
+                  <literal>CLOSED</literal> state.</para>
+             </listitem>
+             <listitem>
+              <para>When a region server is about to split a region, it 
notifies the master. The
+                master moves the region to be split from 
<literal>OPEN</literal> to
+                  <literal>SPLITTING</literal> state and add the two new 
regions to be created to
+                the region server. These two regions are in 
<literal>SPLITING_NEW</literal> state
+                initially.</para>
+             </listitem>
+             <listitem>
+              <para>After notifying the master, the region server starts to 
split the region. Once
+                past the point of no return, the region server notifies the 
master again so the
+                master can update the META. However, the master does not 
update the region states
+                until it is notified by the server that the split is done. If 
the split is
+                successful, the splitting region is moved from 
<literal>SPLITTING</literal> to
+                  <literal>SPLIT</literal> state and the two new regions are 
moved from
+                  <literal>SPLITTING_NEW</literal> to <literal>OPEN</literal> 
state.</para>
+             </listitem>
+             <listitem>
+              <para>If the split fails, the splitting region is moved from
+                  <literal>SPLITTING</literal> back to <literal>OPEN</literal> 
state, and the two
+                new regions which were created are moved from 
<literal>SPLITTING_NEW</literal> to
+                  <literal>OFFLINE</literal> state.</para>
+             </listitem>
+             <listitem>
+              <para>When a region server is about to merge two regions, it 
notifies the master
+                first. The master moves the two regions to be merged from 
<literal>OPEN</literal> to
+                  <literal>MERGING</literal>state, and adds the new region 
which will hold the
+                contents of the merged regions region to the region server. 
The new region is in
+                  <literal>MERGING_NEW</literal> state initially.</para>
+             </listitem>
+             <listitem>
+              <para>After notifying the master, the region server starts to 
merge the two regions.
+                Once past the point of no return, the region server notifies 
the master again so the
+                master can update the META. However, the master does not 
update the region states
+                until it is notified by the region server that the merge has 
completed. If the merge
+                is successful, the two merging regions are moved from 
<literal>MERGING</literal> to
+                  <literal>MERGED</literal> state and the new region is moved 
from
+                  <literal>MERGING_NEW</literal> to <literal>OPEN</literal> 
state.</para>
+             </listitem>
+             <listitem>
+              <para>If the merge fails, the two merging regions are moved from
+                  <literal>MERGING</literal> back to <literal>OPEN</literal> 
state, and the new
+                region which was created to hold the contents of the merged 
regions is moved from
+                  <literal>MERGING_NEW</literal> to <literal>OFFLINE</literal> 
state.</para>
+             </listitem>
+             <listitem>
+              <para>For regions in <literal>FAILED_OPEN</literal> or 
<literal>FAILED_CLOSE</literal>
+                states , the master tries to close them again when they are 
reassigned by an
+                operator via HBase Shell. </para>
+             </listitem>
+          </orderedlist>
+        </section>
+
       </section>  <!--  assignment -->
 
       <section xml:id="regions.arch.locality">
@@ -3018,7 +3329,7 @@ ctime = Sat Jun 23 11:13:40 PDT 2012
           Typically a custom split policy should extend HBase's default split 
policy: <link 
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.html";>ConstantSizeRegionSplitPolicy</link>.
           </para>
           <para>The policy can set globally through the HBaseConfiguration 
used or on a per table basis:
-<programlisting>
+<programlisting language="java">
 HTableDescriptor myHtd = ...;
 myHtd.setValue(HTableDescriptor.SPLIT_POLICY, 
MyCustomSplitPolicy.class.getName());
 </programlisting>
@@ -3125,7 +3436,7 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, 
MyCustomSplitPolicy.class.getName(
         opens merged region on the regionserver and reports the merge to 
Master at last.
         </para>
         <para>An example of region merges in the hbase shell
-          <programlisting>$ hbase> merge_region 'ENCODED_REGIONNAME', 
'ENCODED_REGIONNAME'
+          <programlisting language="bourne">$ hbase> merge_region 
'ENCODED_REGIONNAME', 'ENCODED_REGIONNAME'
           hbase> merge_region 'ENCODED_REGIONNAME', 'ENCODED_REGIONNAME', true
           </programlisting>
           It's an asynchronous operation and call returns immediately without 
waiting merge completed.
@@ -3227,10 +3538,10 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, 
MyCustomSplitPolicy.class.getName(
 
         <para>To view a textualized version of hfile content, you can do use
         the <classname>org.apache.hadoop.hbase.io.hfile.HFile
-        </classname>tool. Type the following to see 
usage:<programlisting><code>$ ${HBASE_HOME}/bin/hbase 
org.apache.hadoop.hbase.io.hfile.HFile </code> </programlisting>For
+        </classname>tool. Type the following to see usage:<programlisting 
language="bourne"><code>$ ${HBASE_HOME}/bin/hbase 
org.apache.hadoop.hbase.io.hfile.HFile </code> </programlisting>For
         example, to view the content of the file
         
<filename>hdfs://10.81.47.41:8020/hbase/TEST/1418428042/DSMP/4759508618286845475</filename>,
-        type the following:<programlisting> <code>$ ${HBASE_HOME}/bin/hbase 
org.apache.hadoop.hbase.io.hfile.HFile -v -f 
hdfs://10.81.47.41:8020/hbase/TEST/1418428042/DSMP/4759508618286845475 </code> 
</programlisting>If
+        type the following:<programlisting language="bourne"> <code>$ 
${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.io.hfile.HFile -v -f 
hdfs://10.81.47.41:8020/hbase/TEST/1418428042/DSMP/4759508618286845475 </code> 
</programlisting>If
         you leave off the option -v to see just a summary on the hfile. See
         usage for other things to do with the <classname>HFile</classname>
         tool.</para>
@@ -3315,307 +3626,514 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, 
MyCustomSplitPolicy.class.getName(
         </section>
 
       </section>
-               <section
+        <section
           xml:id="compaction">
           <title>Compaction</title>
-          <para><firstterm>Compaction</firstterm> is an operation which 
reduces the number of
-            StoreFiles, by merging them together, in order to increase 
performance on read
-            operations. Compactions can be resource-intensive to perform, and 
can either help or
-            hinder performance depending on many factors. </para>
-          <para>Compactions fall into two categories: minor and major.</para>
-          <para><firstterm>Minor compactions</firstterm> usually pick up a 
small number of small,
-            adjacent <systemitem>StoreFiles</systemitem> and rewrite them as a 
single
-            <systemitem>StoreFile</systemitem>. Minor compactions do not drop 
deletes or expired
-            cells. If a minor compaction picks up all the 
<systemitem>StoreFiles</systemitem> in a
-            <systemitem>Store</systemitem>, it promotes itself from a minor to 
a major compaction.
-            If there are a lot of small files to be compacted, the algorithm 
tends to favor minor
-            compactions to "clean up" those small files.</para>
-          <para>The goal of a <firstterm>major compaction</firstterm> is to 
end up with a single
-            StoreFile per store. Major compactions also process delete markers 
and max versions.
-            Attempting to process these during a minor compaction could cause 
side effects. </para>
-          
-          <formalpara>
+          <itemizedlist>
+            <title>Ambiguous Terminology</title>
+            <listitem><para>A <firstterm>StoreFile</firstterm> is a facade of 
HFile. In terms of compaction, use of
+              StoreFile seems to have prevailed in the past.</para></listitem>
+            <listitem><para>A <firstterm>Store</firstterm> is the same thing 
as a ColumnFamily.
+              StoreFiles are related to a Store, or 
ColumnFamily.</para></listitem>
+            <listitem>
+              <para>If you want to read more about StoreFiles versus HFiles 
and Stores versus
+                ColumnFamilies, see <link
+                  
xlink:href="https://issues.apache.org/jira/browse/HBASE-11316";>HBASE-11316</link>.</para>
+            </listitem>
+          </itemizedlist>
+          <para>When the MemStore reaches a given size
+              (<code>hbase.hregion.memstore.flush.size)</code>, it flushes its 
contents to a
+            StoreFile. The number of StoreFiles in a Store increases over time.
+              <firstterm>Compaction</firstterm> is an operation which reduces 
the number of
+            StoreFiles in a Store, by merging them together, in order to 
increase performance on
+            read operations. Compactions can be resource-intensive to perform, 
and can either help
+            or hinder performance depending on many factors. </para>
+          <para>Compactions fall into two categories: minor and major. Minor 
and major compactions
+            differ in the following ways.</para>
+          <para><firstterm>Minor compactions</firstterm> usually select a 
small number of small,
+            adjacent StoreFiles and rewrite them as a single StoreFile. Minor 
compactions do not
+            drop (filter out) deletes or expired versions, because of 
potential side effects. See <xref
+              linkend="compaction.and.deletes" /> and <xref
+              linkend="compaction.and.versions" /> for information on how 
deletes and versions are
+            handled in relation to compactions. The end result of a minor 
compaction is fewer,
+            larger StoreFiles for a given Store.</para>
+          <para>The end result of a <firstterm>major compaction</firstterm> is 
a single StoreFile
+            per Store. Major compactions also process delete markers and max 
versions. See <xref
+              linkend="compaction.and.deletes" /> and <xref
+              linkend="compaction.and.versions" /> for information on how 
deletes and versions are
+            handled in relation to compactions.</para>
+
+          <formalpara
+            xml:id="compaction.and.deletes">
             <title>Compaction and Deletions</title>
             <para> When an explicit deletion occurs in HBase, the data is not 
actually deleted.
               Instead, a <firstterm>tombstone</firstterm> marker is written. 
The tombstone marker
               prevents the data from being returned with queries. During a 
major compaction, the
               data is actually deleted, and the tombstone marker is removed 
from the StoreFile. If
-              the deletion happens because of an expired TTL, no tombstone is 
created. Instead, the 
-            expired data is filtered out and is not written back to the 
compacted StoreFile.</para>
+              the deletion happens because of an expired TTL, no tombstone is 
created. Instead, the
+              expired data is filtered out and is not written back to the 
compacted
+              StoreFile.</para>
           </formalpara>
-          
-          <formalpara>
+
+          <formalpara
+            xml:id="compaction.and.versions">
             <title>Compaction and Versions</title>
-            <para> When you create a column family, you can specify the 
maximum number of versions
+            <para> When you create a Column Family, you can specify the 
maximum number of versions
               to keep, by specifying 
<varname>HColumnDescriptor.setMaxVersions(int
                 versions)</varname>. The default value is 
<literal>3</literal>. If more versions
               than the specified maximum exist, the excess versions are 
filtered out and not written
-            back to the compacted StoreFile.</para>
+              back to the compacted StoreFile.</para>
           </formalpara>
-          
+
           <note>
             <title>Major Compactions Can Impact Query Results</title>
-            <para> In some situations, older versions can be inadvertently 
-              resurrected if a newer version is explicitly deleted. See <xref
-                linkend="major.compactions.change.query.results" /> for a more 
in-depth explanation. This
-              situation is only possible before the compaction finishes.
-            </para>
+            <para> In some situations, older versions can be inadvertently 
resurrected if a newer
+              version is explicitly deleted. See <xref
+                linkend="major.compactions.change.query.results" /> for a more 
in-depth explanation.
+              This situation is only possible before the compaction finishes. 
</para>
           </note>
-          
+
           <para>In theory, major compactions improve performance. However, on 
a highly loaded
             system, major compactions can require an inappropriate number of 
resources and adversely
             affect performance. In a default configuration, major compactions 
are scheduled
-            automatically to run once in a 7-day period. This is usually 
inappropriate for systems
+            automatically to run once in a 7-day period. This is sometimes 
inappropriate for systems
             in production. You can manage major compactions manually. See <xref
               linkend="managed.compactions" />. </para>
           <para>Compactions do not perform region merges. See <xref
-            linkend="ops.regionmgt.merge" /> for more information on region 
merging. </para>
+              linkend="ops.regionmgt.merge" /> for more information on region 
merging. </para>
           <section
             xml:id="compaction.file.selection">
-            <title>Algorithm for Compaction File Selection - HBase 0.96.x and 
newer</title>
-            <para>The compaction algorithms used by HBase have evolved over 
time. HBase 0.96
-              introduced new algorithms for compaction file selection. To find 
out about the old
-              algorithms, see <xref
-                linkend="compaction" />. The rest of this section describes 
the new algorithm. File
-              selection happens in several phases and is controlled by several 
configurable
-              parameters. These parameters will be explained in context, and 
then will be given in a
-              table which shows their descriptions, defaults, and implications 
of changing
-              them.</para>
-            
-            <formalpara xml:id="exploringcompaction.policy">
-              <title>The<link
-                
xlink:href="https://issues.apache.org/jira/browse/HBASE-7842";>ExploringCompaction
 Policy</link></title>
-              <para><link
-                
xlink:href="https://issues.apache.org/jira/browse/HBASE-7842";>HBASE-7842</link>
-                was introduced in HBase 0.96 and represents a major change in 
the algorithms for
-                file selection for compactions. Its goal is to do the most 
impactful compaction with
-                the lowest cost, in situations where a lot of files need 
compaction. In such a
-                situation, the list of all eligible files is "explored", and 
files are grouped by
-                size before any ratio-based algorithms are run. This favors 
clean-up of large
-                numbers of small files before larger files are considered. For 
more details, refer
-                to the link to the JIRA. Most of the code for this change can 
be reviewed in
-                
<filename>hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/ExploringCompactionPolicy.java</filename>.</para>
-            </formalpara>
-            
-            <variablelist>
-              <title>Algorithms for Determining File List and Compaction 
Type</title>
-              <varlistentry>
-                <term>Create a list of all files which can possibly be 
compacted, ordered by
-                  sequence ID.</term>
-                <listitem>
+            <title>Compaction Policy - HBase 0.96.x and newer</title>
+            <para>Compacting large StoreFiles, or too many StoreFiles at once, 
can cause more IO
+              load than your cluster is able to handle without causing 
performance problems. The
+              method by which HBase selects which StoreFiles to include in a 
compaction (and whether
+              the compaction is a minor or major compaction) is called the 
<firstterm>compaction
+                policy</firstterm>.</para>
+            <para>Prior to HBase 0.96.x, there was only one compaction policy. 
That original
+              compaction policy is still available as
+                <systemitem>RatioBasedCompactionPolicy</systemitem> The new 
compaction default
+              policy, called 
<systemitem>ExploringCompactionPolicy</systemitem>, was subsequently
+              backported to HBase 0.94 and HBase 0.95, and is the default in 
HBase 0.96 and newer.
+              It was implemented in <link
+                
xlink:href="https://issues.apache.org/jira/browse/HBASE-7842";>HBASE-7842</link>.
 In
+              short, <systemitem>ExploringCompactionPolicy</systemitem> 
attempts to select the best
+              possible set of StoreFiles to compact with the least amount of 
work, while the
+                <systemitem>RatioBasedCompactionPolicy</systemitem> selects 
the first set that meets
+              the criteria.</para>
+            <para>Regardless of the compaction policy used, file selection is 
controlled by several
+              configurable parameters and happens in a multi-step approach. 
These parameters will be
+              explained in context, and then will be given in a table which 
shows their
+              descriptions, defaults, and implications of changing them.</para>
+
+            <section
+              xml:id="compaction.being.stuck">
+              <title>Being Stuck</title>
+              <para>When the MemStore gets too large, it needs to flush its 
contents to a StoreFile.
+                However, a Store can only have 
<varname>hbase.hstore.blockingStoreFiles</varname>
+                files, so the MemStore needs to wait for the number of 
StoreFiles to be reduced by
+                one or more compactions. However, if the MemStore grows larger 
than
+                  <varname>hbase.hregion.memstore.flush.size</varname>, it is 
not able to flush its
+                contents to a StoreFile. If the MemStore is too large and the 
number of StpreFo;es
+                is also too high, the algorithm is said to be "stuck". The 
compaction algorithm
+                checks for this "stuck" situation and provides mechanisms to 
alleviate it.</para>
+            </section>
+
+            <section
+              xml:id="exploringcompaction.policy">
+              <title>The ExploringCompactionPolicy Algorithm</title>
+              <para>The ExploringCompactionPolicy algorithm considers each 
possible set of
+                adjacent StoreFiles before choosing the set where compaction 
will have the most
+                benefit. </para>
+              <para>One situation where the ExploringCompactionPolicy works 
especially well is when
+                you are bulk-loading data and the bulk loads create larger 
StoreFiles than the
+                StoreFiles which are holding data older than the bulk-loaded 
data. This can "trick"
+                HBase into choosing to perform a major compaction each time a 
compaction is needed,
+                and cause a lot of extra overhead. With the 
ExploringCompactionPolicy, major
+                compactions happen much less frequently because minor 
compactions are more
+                efficient.</para>
+              <para>In general, ExploringCompactionPolicy is the right choice 
for most situations,
+                and thus is the default compaction policy. You can also use
+                ExploringCompactionPolicy along with <xref
+                  linkend="ops.stripe" />.</para>
+              <para>The logic of this policy can be examined in
+                  
<filename>hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/ExploringCompactionPolicy.java</filename>.
+                The following is a walk-through of the logic of the
+                ExploringCompactionPolicy.</para>
+              <procedure>
+                <step>
+                  <para>Make a list of all existing StoreFiles in the Store. 
The rest of the
+                    algorithm filters this list to come up with the subset of 
HFiles which will be
+                    chosen for compaction.</para>
+                </step>
+                <step>
+                  <para>If this was a user-requested compaction, attempt to 
perform the requested
+                    compaction type, regardless of what would normally be 
chosen. Note that even if
+                    the user requests a major compaction, it may not be 
possible to perform a major
+                    compaction. This may be because not all StoreFiles in the 
Column Family are
+                    available to compact or because there are too many Stores 
in the Column
+                    Family.</para>
+                </step>
+                <step>
+                  <para>Some StoreFiles are automatically excluded from 
consideration. These
+                    include:</para>
+                  <itemizedlist>
+                    <listitem>
+                      <para>StoreFiles that are larger than
+                          
<varname>hbase.hstore.compaction.max.size</varname></para>
+                    </listitem>
+                    <listitem>
+                      <para>StoreFiles that were created by a bulk-load 
operation which explicitly
+                        excluded compaction. You may decide to exclude 
StoreFiles resulting from
+                        bulk loads, from compaction. To do this, specify the
+                          
<varname>hbase.mapreduce.hfileoutputformat.compaction.exclude</varname>
+                        parameter during the bulk load operation.</para>
+                    </listitem>
+                  </itemizedlist>
+                </step>
+                <step>
+                  <para>Iterate through the list from step 1, and make a list 
of all potential sets
+                    of StoreFiles to compact together. A potential set is a 
grouping of
+                      <varname>hbase.hstore.compaction.min</varname> 
contiguous StoreFiles in the
+                    list. For each set, perform some sanity-checking and 
figure out whether this is
+                    the best compaction that could be done:</para>
+                  <itemizedlist>
+                    <listitem>
+                      <para>If the number of StoreFiles in this set (not the 
size of the StoreFiles)
+                        is fewer than 
<varname>hbase.hstore.compaction.min</varname> or more than
+                          <varname>hbase.hstore.compaction.max</varname>, take 
it out of
+                        consideration.</para>
+                    </listitem>
+                    <listitem>
+                      <para>Compare the size of this set of StoreFiles with 
the size of the smallest
+                        possible compaction that has been found in the list so 
far. If the size of
+                        this set of StoreFiles represents the smallest 
compaction that could be
+                        done, store it to be used as a fall-back if the 
algorithm is "stuck" and no
+                        StoreFiles would otherwise be chosen. See <xref
+                          linkend="compaction.being.stuck" />.</para>
+                    </listitem>
+                    <listitem>
+                      <para>Do size-based sanity checks against each StoreFile 
in this set of
+                        StoreFiles.</para>
+                      <itemizedlist>
+                        <listitem>
+                          <para>If the size of this StoreFile is larger than
+                              
<varname>hbase.hstore.compaction.max.size</varname>, take it out of
+                            consideration.</para>
+                        </listitem>
+                        <listitem>
+                          <para>If the size is greater than or equal to
+                              
<varname>hbase.hstore.compaction.min.size</varname>, sanity-check it
+                            against the file-based ratio to see whether it is 
too large to be
+                            considered. The sanity-checking is successful 
if:</para>
+                          <itemizedlist>
+                            <listitem>
+                              <para>There is only one StoreFile in this set, 
or</para>
+                            </listitem>
+                            <listitem>
+                              <para>For each StoreFile, its size multiplied by
+                                  
<varname>hbase.hstore.compaction.ratio</varname> (or
+                                  
<varname>hbase.hstore.compaction.ratio.offpeak</varname> if
+                                off-peak hours are configured and it is during 
off-peak hours) is
+                                less than the sum of the sizes of the other 
HFiles in the
+                                set.</para>
+                            </listitem>
+                          </itemizedlist>
+                        </listitem>
+                      </itemizedlist>
+                    </listitem>
+                  </itemizedlist>
+                </step>
+                <step>
+                  <para>If this set of StoreFiles is still in consideration, 
compare it to the
+                    previously-selected best compaction. If it is better, 
replace the
+                    previously-selected best compaction with this one.</para>
+                </step>
+                <step>
+                  <para>When the entire list of potential compactions has been 
processed, perform
+                    the best compaction that was found. If no StoreFiles were 
selected for
+                    compaction, but there are multiple StoreFiles, assume the 
algorithm is stuck
+                    (see <xref
+                      linkend="compaction.being.stuck" />) and if so, perform 
the smallest
+                    compaction t


<TRUNCATED>

[7/9] hbase git commit: Blanket update of src/main/docbkx from master

Reply via email to