[07/17] hbase git commit: HBASE-12858 - remove extraneous Docbook files

misty Wed, 14 Jan 2015 20:41:05 -0800

http://git-wip-us.apache.org/repos/asf/hbase/blob/e80b3092/src/main/docbkx/orca.xml
----------------------------------------------------------------------
diff --git a/src/main/docbkx/orca.xml b/src/main/docbkx/orca.xml
deleted file mode 100644
index 29d8727..0000000
--- a/src/main/docbkx/orca.xml
+++ /dev/null
@@ -1,47 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<appendix
-    xml:id="orca"
-    version="5.0"
-    xmlns="http://docbook.org/ns/docbook";
-    xmlns:xlink="http://www.w3.org/1999/xlink";
-    xmlns:xi="http://www.w3.org/2001/XInclude";
-    xmlns:svg="http://www.w3.org/2000/svg";
-    xmlns:m="http://www.w3.org/1998/Math/MathML";
-    xmlns:html="http://www.w3.org/1999/xhtml";
-    xmlns:db="http://docbook.org/ns/docbook";>
-    <!--/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
--->
-    <title>Apache HBase Orca</title>
-    <figure>
-        <title>Apache HBase Orca</title>
-        <mediaobject>
-            <imageobject>
-                <imagedata align="center" valign="right"
-                    fileref="jumping-orca_rotated_25percent.png"/>
-            </imageobject>
-        </mediaobject>
-    </figure>
-    <para><link 
xlink:href="https://issues.apache.org/jira/browse/HBASE-4920";>An Orca is the 
Apache
-            HBase mascot.</link>
-        See NOTICES.txt.  Our Orca logo we got here: 
http://www.vectorfree.com/jumping-orca
-        It is licensed Creative Commons Attribution 3.0.  See 
https://creativecommons.org/licenses/by/3.0/us/
-        We changed the logo by stripping the colored background, inverting
-        it and then rotating it some.
-    </para>
-</appendix>


http://git-wip-us.apache.org/repos/asf/hbase/blob/e80b3092/src/main/docbkx/other_info.xml
----------------------------------------------------------------------
diff --git a/src/main/docbkx/other_info.xml b/src/main/docbkx/other_info.xml
deleted file mode 100644
index 72ff274..0000000
--- a/src/main/docbkx/other_info.xml
+++ /dev/null
@@ -1,83 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<appendix
-    xml:id="other.info"
-    version="5.0"
-    xmlns="http://docbook.org/ns/docbook";
-    xmlns:xlink="http://www.w3.org/1999/xlink";
-    xmlns:xi="http://www.w3.org/2001/XInclude";
-    xmlns:svg="http://www.w3.org/2000/svg";
-    xmlns:m="http://www.w3.org/1998/Math/MathML";
-    xmlns:html="http://www.w3.org/1999/xhtml";
-    xmlns:db="http://docbook.org/ns/docbook";>
-    <!--/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
--->
-    <title>Other Information About HBase</title>
-    <section xml:id="other.info.videos"><title>HBase Videos</title>
-        <para>Introduction to HBase
-            <itemizedlist>
-                <listitem><para><link 
xlink:href="http://www.cloudera.com/content/cloudera/en/resources/library/presentation/chicago_data_summit_apache_hbase_an_introduction_todd_lipcon.html";>Introduction
 to HBase</link> by Todd Lipcon (Chicago Data Summit 2011).
-                </para></listitem>
-                <listitem><para><link 
xlink:href="http://www.cloudera.com/videos/intorduction-hbase-todd-lipcon";>Introduction
 to HBase</link> by Todd Lipcon (2010).
-                </para></listitem>
-            </itemizedlist>
-        </para>
-        <para><link 
xlink:href="http://www.cloudera.com/videos/hadoop-world-2011-presentation-video-building-realtime-big-data-services-at-facebook-with-hadoop-and-hbase";>Building
 Real Time Services at Facebook with HBase</link> by Jonathan Gray (Hadoop 
World 2011).
-        </para>
-        <para><link 
xlink:href="http://www.cloudera.com/videos/hw10_video_how_stumbleupon_built_and_advertising_platform_using_hbase_and_hadoop";>HBase
 and Hadoop, Mixing Real-Time and Batch Processing at StumbleUpon</link> by JD 
Cryans (Hadoop World 2010).
-        </para>
-    </section>
-    <section xml:id="other.info.pres"><title>HBase Presentations 
(Slides)</title>
-        <para><link 
xlink:href="http://www.cloudera.com/content/cloudera/en/resources/library/hadoopworld/hadoop-world-2011-presentation-video-advanced-hbase-schema-design.html";>Advanced
 HBase Schema Design</link> by Lars George (Hadoop World 2011).
-        </para>
-        <para><link 
xlink:href="http://www.slideshare.net/cloudera/chicago-data-summit-apache-hbase-an-introduction";>Introduction
 to HBase</link> by Todd Lipcon (Chicago Data Summit 2011).
-        </para>
-        <para><link 
xlink:href="http://www.slideshare.net/cloudera/hw09-practical-h-base-getting-the-most-from-your-h-base-install";>Getting
 The Most From Your HBase Install</link> by Ryan Rawson, Jonathan Gray (Hadoop 
World 2009).
-        </para>
-    </section>
-    <section xml:id="other.info.papers"><title>HBase Papers</title>
-        <para><link 
xlink:href="http://research.google.com/archive/bigtable.html";>BigTable</link> 
by Google (2006).
-        </para>
-        <para><link 
xlink:href="http://www.larsgeorge.com/2010/05/hbase-file-locality-in-hdfs.html";>HBase
 and HDFS Locality</link> by Lars George (2010).
-        </para>
-        <para><link 
xlink:href="http://ianvarley.com/UT/MR/Varley_MastersReport_Full_2009-08-07.pdf";>No
 Relation: The Mixed Blessings of Non-Relational Databases</link> by Ian Varley 
(2009).
-        </para>
-    </section>
-    <section xml:id="other.info.sites"><title>HBase Sites</title>
-        <para><link 
xlink:href="http://www.cloudera.com/blog/category/hbase/";>Cloudera's HBase 
Blog</link> has a lot of links to useful HBase information.
-            <itemizedlist>
-                <listitem><para><link 
xlink:href="http://www.cloudera.com/blog/2010/04/cap-confusion-problems-with-partition-tolerance/";>CAP
 Confusion</link> is a relevant entry for background information on
-                    distributed storage systems.</para>
-                </listitem>
-            </itemizedlist>
-        </para>
-        <para><link 
xlink:href="http://wiki.apache.org/hadoop/HBase/HBasePresentations";>HBase 
Wiki</link> has a page with a number of presentations.
-        </para>
-        <para><link 
xlink:href="http://refcardz.dzone.com/refcardz/hbase";>HBase RefCard</link> from 
DZone.
-        </para>
-    </section>
-    <section xml:id="other.info.books"><title>HBase Books</title>
-        <para><link 
xlink:href="http://shop.oreilly.com/product/0636920014348.do";>HBase:  The 
Definitive Guide</link> by Lars George.
-        </para>
-    </section>
-    <section xml:id="other.info.books.hadoop"><title>Hadoop Books</title>
-        <para><link 
xlink:href="http://shop.oreilly.com/product/9780596521981.do";>Hadoop:  The 
Definitive Guide</link> by Tom White.
-        </para>
-    </section>
-
-</appendix>

http://git-wip-us.apache.org/repos/asf/hbase/blob/e80b3092/src/main/docbkx/performance.xml
----------------------------------------------------------------------
diff --git a/src/main/docbkx/performance.xml b/src/main/docbkx/performance.xml
deleted file mode 100644
index 42ed79b..0000000
--- a/src/main/docbkx/performance.xml
+++ /dev/null
@@ -1,1207 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<chapter
-  version="5.0"
-  xml:id="performance"
-  xmlns="http://docbook.org/ns/docbook";
-  xmlns:xlink="http://www.w3.org/1999/xlink";
-  xmlns:xi="http://www.w3.org/2001/XInclude";
-  xmlns:svg="http://www.w3.org/2000/svg";
-  xmlns:m="http://www.w3.org/1998/Math/MathML";
-  xmlns:html="http://www.w3.org/1999/xhtml";
-  xmlns:db="http://docbook.org/ns/docbook";>
-  <!--
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
--->
-  <title>Apache HBase Performance Tuning</title>
-
-  <section
-    xml:id="perf.os">
-    <title>Operating System</title>
-    <section
-      xml:id="perf.os.ram">
-      <title>Memory</title>
-      <para>RAM, RAM, RAM. Don't starve HBase.</para>
-    </section>
-    <section
-      xml:id="perf.os.64">
-      <title>64-bit</title>
-      <para>Use a 64-bit platform (and 64-bit JVM).</para>
-    </section>
-    <section
-      xml:id="perf.os.swap">
-      <title>Swapping</title>
-      <para>Watch out for swapping. Set swappiness to 0.</para>
-    </section>
-  </section>
-  <section
-    xml:id="perf.network">
-    <title>Network</title>
-    <para> Perhaps the most important factor in avoiding network issues 
degrading Hadoop and HBase
-      performance is the switching hardware that is used, decisions made early 
in the scope of the
-      project can cause major problems when you double or triple the size of 
your cluster (or more). </para>
-    <para> Important items to consider: <itemizedlist>
-        <listitem>
-          <para>Switching capacity of the device</para>
-        </listitem>
-        <listitem>
-          <para>Number of systems connected</para>
-        </listitem>
-        <listitem>
-          <para>Uplink capacity</para>
-        </listitem>
-      </itemizedlist>
-    </para>
-    <section
-      xml:id="perf.network.1switch">
-      <title>Single Switch</title>
-      <para>The single most important factor in this configuration is that the 
switching capacity of
-        the hardware is capable of handling the traffic which can be generated 
by all systems
-        connected to the switch. Some lower priced commodity hardware can have 
a slower switching
-        capacity than could be utilized by a full switch. </para>
-    </section>
-    <section
-      xml:id="perf.network.2switch">
-      <title>Multiple Switches</title>
-      <para>Multiple switches are a potential pitfall in the architecture. The 
most common
-        configuration of lower priced hardware is a simple 1Gbps uplink from 
one switch to another.
-        This often overlooked pinch point can easily become a bottleneck for 
cluster communication.
-        Especially with MapReduce jobs that are both reading and writing a lot 
of data the
-        communication across this uplink could be saturated. </para>
-      <para>Mitigation of this issue is fairly simple and can be accomplished 
in multiple ways: </para>
-      <itemizedlist>
-        <listitem>
-          <para>Use appropriate hardware for the scale of the cluster which 
you're attempting to
-            build.</para>
-        </listitem>
-        <listitem>
-          <para>Use larger single switch configurations i.e. single 48 port as 
opposed to 2x 24
-            port</para>
-        </listitem>
-        <listitem>
-          <para>Configure port trunking for uplinks to utilize multiple 
interfaces to increase cross
-            switch bandwidth.</para>
-        </listitem>
-      </itemizedlist>
-    </section>
-    <section
-      xml:id="perf.network.multirack">
-      <title>Multiple Racks</title>
-      <para>Multiple rack configurations carry the same potential issues as 
multiple switches, and
-        can suffer performance degradation from two main areas: </para>
-      <itemizedlist>
-        <listitem>
-          <para>Poor switch capacity performance</para>
-        </listitem>
-        <listitem>
-          <para>Insufficient uplink to another rack</para>
-        </listitem>
-      </itemizedlist>
-      <para>If the the switches in your rack have appropriate switching 
capacity to handle all the
-        hosts at full speed, the next most likely issue will be caused by 
homing more of your
-        cluster across racks. The easiest way to avoid issues when spanning 
multiple racks is to use
-        port trunking to create a bonded uplink to other racks. The downside 
of this method however,
-        is in the overhead of ports that could potentially be used. An example 
of this is, creating
-        an 8Gbps port channel from rack A to rack B, using 8 of your 24 ports 
to communicate between
-        racks gives you a poor ROI, using too few however can mean you're not 
getting the most out
-        of your cluster. </para>
-      <para>Using 10Gbe links between racks will greatly increase performance, 
and assuming your
-        switches support a 10Gbe uplink or allow for an expansion card will 
allow you to save your
-        ports for machines as opposed to uplinks. </para>
-    </section>
-    <section
-      xml:id="perf.network.ints">
-      <title>Network Interfaces</title>
-      <para>Are all the network interfaces functioning correctly? Are you 
sure? See the
-        Troubleshooting Case Study in <xref
-          linkend="casestudies.slownode" />. </para>
-    </section>
-  </section>
-  <!-- network -->
-
-  <section
-    xml:id="jvm">
-    <title>Java</title>
-
-    <section
-      xml:id="gc">
-      <title>The Garbage Collector and Apache HBase</title>
-
-      <section
-        xml:id="gcpause">
-        <title>Long GC pauses</title>
-
-        <para xml:id="mslab">In his presentation, <link
-            
xlink:href="http://www.slideshare.net/cloudera/hbase-hug-presentation";>Avoiding 
Full GCs
-            with MemStore-Local Allocation Buffers</link>, Todd Lipcon 
describes two cases of
-          stop-the-world garbage collections common in HBase, especially 
during loading; CMS failure
-          modes and old generation heap fragmentation brought. To address the 
first, start the CMS
-          earlier than default by adding 
<code>-XX:CMSInitiatingOccupancyFraction</code> and setting
-          it down from defaults. Start at 60 or 70 percent (The lower you 
bring down the threshold,
-          the more GCing is done, the more CPU used). To address the second 
fragmentation issue,
-          Todd added an experimental facility, 
<indexterm><primary>MSLAB</primary></indexterm>, that
-          must be explicitly enabled in Apache HBase 0.90.x (Its defaulted to 
be on in Apache 0.92.x
-          HBase). See <code>hbase.hregion.memstore.mslab.enabled</code> to 
true in your
-            <classname>Configuration</classname>. See the cited slides for 
background and detail.
-          The latest jvms do better regards fragmentation so make sure you are 
running a recent
-          release. Read down in the message, <link
-            
xlink:href="http://osdir.com/ml/hotspot-gc-use/2011-11/msg00002.html";>Identifying
-            concurrent mode failures caused by fragmentation</link>. Be aware 
that when enabled,
-          each MemStore instance will occupy at least an MSLAB instance of 
memory. If you have
-          thousands of regions or lots of regions each with many column 
families, this allocation of
-          MSLAB may be responsible for a good portion of your heap allocation 
and in an extreme case
-          cause you to OOME. Disable MSLAB in this case, or lower the amount 
of memory it uses or
-          float less regions per server. </para>
-        <para>If you have a write-heavy workload, check out <link
-            
xlink:href="https://issues.apache.org/jira/browse/HBASE-8163";>HBASE-8163
-            MemStoreChunkPool: An improvement for JAVA GC when using 
MSLAB</link>. It describes
-          configurations to lower the amount of young GC during write-heavy 
loadings. If you do not
-          have HBASE-8163 installed, and you are trying to improve your young 
GC times, one trick to
-          consider -- courtesy of our Liang Xie -- is to set the GC config
-            <varname>-XX:PretenureSizeThreshold</varname> in 
<filename>hbase-env.sh</filename> to be
-          just smaller than the size of 
<varname>hbase.hregion.memstore.mslab.chunksize</varname> so
-          MSLAB allocations happen in the tenured space directly rather than 
first in the young gen.
-          You'd do this because these MSLAB allocations are going to likely 
make it to the old gen
-          anyways and rather than pay the price of a copies between s0 and s1 
in eden space followed
-          by the copy up from young to old gen after the MSLABs have achieved 
sufficient tenure,
-          save a bit of YGC churn and allocate in the old gen directly. </para>
-        <para>For more information about GC logs, see <xref
-            linkend="trouble.log.gc" />. </para>
-    <para>Consider also enabling the offheap Block Cache.  This has been shown 
to mitigate
-        GC pause times.  See <xref linkend="block.cache" /></para>
-      </section>
-    </section>
-  </section>
-
-  <section
-    xml:id="perf.configurations">
-    <title>HBase Configurations</title>
-
-    <para>See <xref
-        linkend="recommended_configurations" />.</para>
-
-    <section
-      xml:id="perf.compactions.and.splits">
-      <title>Managing Compactions</title>
-
-      <para>For larger systems, managing <link
-      linkend="disable.splitting">compactions and splits</link> may be
-      something you want to consider.</para>
-    </section>
-
-    <section xml:id="perf.handlers">
-        <title><varname>hbase.regionserver.handler.count</varname></title>
-        <para>See <xref linkend="hbase.regionserver.handler.count"/>.
-           </para>
-    </section>
-    
-
-
-    <section xml:id="perf.hfile.block.cache.size">
-        <title><varname>hfile.block.cache.size</varname></title>
-        <para>See <xref linkend="hfile.block.cache.size"/>.
-        A memory setting for the RegionServer process.
-        </para>
-    </section>
-    <section xml:id="blockcache.prefetch">
-      <title>Prefetch Option for Blockcache</title>
-      <para><link 
xlink:href="https://issues.apache.org/jira/browse/HBASE-9857";>HBASE-9857</link>
-        adds a new option to prefetch HFile contents when opening the 
blockcache, if a columnfamily
-        or regionserver property is set. This option is available for HBase 
0.98.3 and later. The
-        purpose is to warm the blockcache as rapidly as possible after the 
cache is opened, using
-        in-memory table data, and not counting the prefetching as cache 
misses. This is great for
-        fast reads, but is not a good idea if the data to be preloaded will 
not fit into the
-        blockcache. It is useful for tuning the IO impact of prefetching 
versus the time before all
-        data blocks are in cache. </para>
-      <para>To enable prefetching on a given column family, you can use HBase 
Shell or use the
-        API.</para>
-      <example>
-        <title>Enable Prefetch Using HBase Shell</title>
-        <screen>hbase> create 'MyTable', { NAME => 'myCF', 
PREFETCH_BLOCKS_ON_OPEN => 'true' }</screen>
-      </example>
-      <example>
-        <title>Enable Prefetch Using the API</title>
-        <programlisting language="java">
-// ...
-HTableDescriptor tableDesc = new HTableDescriptor("myTable");
-HColumnDescriptor cfDesc = new HColumnDescriptor("myCF");
-cfDesc.setPrefetchBlocksOnOpen(true);
-tableDesc.addFamily(cfDesc);
-// ...        
-        </programlisting>
-      </example>
-      <para>See the API documentation for <link
-          
xlink:href="https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/io/hfile/CacheConfig.html";
-          >CacheConfig</link>.</para>
-    </section>
-    <section xml:id="perf.rs.memstore.size">
-        
<title><varname>hbase.regionserver.global.memstore.size</varname></title>
-        <para>See <xref linkend="hbase.regionserver.global.memstore.size"/>.
-        This memory setting is often adjusted for the RegionServer process 
depending on needs.
-        </para>
-    </section>
-    <section xml:id="perf.rs.memstore.size.lower.limit">
-        
<title><varname>hbase.regionserver.global.memstore.size.lower.limit</varname></title>
-        <para>See <xref 
linkend="hbase.regionserver.global.memstore.size.lower.limit"/>.
-        This memory setting is often adjusted for the RegionServer process 
depending on needs.
-        </para>
-    </section>
-    <section xml:id="perf.hstore.blockingstorefiles">
-        <title><varname>hbase.hstore.blockingStoreFiles</varname></title>
-        <para>See <xref linkend="hbase.hstore.blockingStoreFiles"/>.
-        If there is blocking in the RegionServer logs, increasing this can 
help.
-        </para>
-    </section>
-    <section xml:id="perf.hregion.memstore.block.multiplier">
-        
<title><varname>hbase.hregion.memstore.block.multiplier</varname></title>
-        <para>See <xref linkend="hbase.hregion.memstore.block.multiplier"/>.
-        If there is enough RAM, increasing this can help.
-        </para>
-    </section>
-    <section xml:id="hbase.regionserver.checksum.verify.performance">
-        <title><varname>hbase.regionserver.checksum.verify</varname></title>
-        <para>Have HBase write the checksum into the datablock and save
-        having to do the checksum seek whenever you read.</para>
-
-        <para>See <xref linkend="hbase.regionserver.checksum.verify"/>,
-        <xref linkend="hbase.hstore.bytes.per.checksum"/> and <xref 
linkend="hbase.hstore.checksum.algorithm"/>
-        For more information see the
-        release note on <link 
xlink:href="https://issues.apache.org/jira/browse/HBASE-5074";>HBASE-5074 
support checksums in HBase block cache</link>.
-        </para>
-    </section>
-    <section>
-      <title>Tuning <code>callQueue</code> Options</title>
-      <para><link 
xlink:href="https://issues.apache.org/jira/browse/HBASE-11355";>HBASE-11355</link>
-        introduces several callQueue tuning mechanisms which can increase 
performance. See the JIRA
-        for some benchmarking information.</para>
-      <itemizedlist>
-        <listitem>
-          <para>To increase the number of callqueues, set
-              <option>hbase.ipc.server.num.callqueue</option> to a value 
greater than
-              <literal>1</literal>.</para>
-        </listitem>
-        <listitem>
-          <para>To split the callqueue into separate read and write queues, set
-              <code>hbase.ipc.server.callqueue.read.ratio</code> to a value 
between
-              <literal>0</literal> and <literal>1</literal>. This factor 
weights the queues toward
-            writes (if below .5) or reads (if above .5). Another way to say 
this is that the factor
-            determines what percentage of the split queues are used for reads. 
The following
-            examples illustrate some of the possibilities. Note that you 
always have at least one
-            write queue, no matter what setting you use.</para>
-          <itemizedlist>
-            <listitem>
-              <para>The default value of <literal>0</literal> does not split 
the queue.</para>
-            </listitem>
-            <listitem>
-              <para>A value of <literal>.3</literal> uses 30% of the queues 
for reading and 60% for
-                writing. Given a value of <literal>10</literal> for
-                  <option>hbase.ipc.server.num.callqueue</option>, 3 queues 
would be used for reads
-                and 7 for writes.</para>
-            </listitem>
-            <listitem>
-              <para>A value of <literal>.5</literal> uses the same number of 
read queues and write
-                queues. Given a value of <literal>10</literal> for
-                <option>hbase.ipc.server.num.callqueue</option>, 5 queues 
would be used for reads
-                and 5 for writes.</para>
-            </listitem>
-            <listitem>
-              <para>A value of <literal>.6</literal> uses 60% of the queues 
for reading and 30% for
-                reading. Given a value of <literal>10</literal> for
-                <option>hbase.ipc.server.num.callqueue</option>, 7 queues 
would be used for reads
-                and 3 for writes.</para>
-            </listitem>
-            <listitem>
-              <para>A value of <literal>1.0</literal> uses one queue to 
process write requests, and
-                all other queues process read requests. A value higher than 
<literal>1.0</literal>
-                has the same effect as a value of <literal>1.0</literal>. 
Given a value of
-                  <literal>10</literal> for 
<option>hbase.ipc.server.num.callqueue</option>, 9
-                queues would be used for reads and 1 for writes.</para>
-            </listitem>
-          </itemizedlist>
-        </listitem>
-        <listitem>
-          <para>You can also split the read queues so that separate queues are 
used for short reads
-            (from Get operations) and long reads (from Scan operations), by 
setting the
-              <option>hbase.ipc.server.callqueue.scan.ratio</option> option. 
This option is a factor
-            between 0 and 1, which determine the ratio of read queues used for 
Gets and Scans. More
-            queues are used for Gets if the value is below 
<literal>.5</literal> and more are used
-            for scans if the value is above <literal>.5</literal>. No matter 
what setting you use,
-            at least one read queue is used for Get operations.</para>
-          <itemizedlist>
-            <listitem>
-              <para>A value of <literal>0</literal> does not split the read 
queue.</para>
-            </listitem>
-            <listitem>
-              <para>A value of <literal>.3</literal> uses 60% of the read 
queues for Gets and 30%
-                for Scans. Given a value of <literal>20</literal> for
-                  <option>hbase.ipc.server.num.callqueue</option> and a value 
of <literal>.5
-                </literal> for 
<option>hbase.ipc.server.callqueue.read.ratio</option>, 10 queues
-                would be used for reads, out of those 10, 7 would be used for 
Gets and 3 for
-                Scans.</para>
-            </listitem>
-            <listitem>
-              <para>A value of <literal>.5</literal> uses half the read queues 
for Gets and half for
-                Scans. Given a value of <literal>20</literal> for
-                  <option>hbase.ipc.server.num.callqueue</option> and a value 
of <literal>.5
-                </literal> for 
<option>hbase.ipc.server.callqueue.read.ratio</option>, 10 queues
-                would be used for reads, out of those 10, 5 would be used for 
Gets and 5 for
-                Scans.</para>
-            </listitem>
-            <listitem>
-              <para>A value of <literal>.6</literal> uses 30% of the read 
queues for Gets and 60%
-                for Scans. Given a value of <literal>20</literal> for
-                  <option>hbase.ipc.server.num.callqueue</option> and a value 
of <literal>.5
-                </literal> for 
<option>hbase.ipc.server.callqueue.read.ratio</option>, 10 queues
-                would be used for reads, out of those 10, 3 would be used for 
Gets and 7 for
-                Scans.</para>
-            </listitem>
-            <listitem>
-              <para>A value of <literal>1.0</literal> uses all but one of the 
read queues for Scans.
-                Given a value of <literal>20</literal> for
-                  <option>hbase.ipc.server.num.callqueue</option> and a value 
of <literal>.5
-                </literal> for 
<option>hbase.ipc.server.callqueue.read.ratio</option>, 10 queues
-                would be used for reads, out of those 10, 1 would be used for 
Gets and 9 for
-                Scans.</para>
-            </listitem>
-          </itemizedlist>
-        </listitem>
-        <listitem>
-          <para>You can use the new option
-              <option>hbase.ipc.server.callqueue.handler.factor</option> to 
programmatically tune
-            the number of queues:</para>
-          <itemizedlist>
-            <listitem>
-              <para>A value of <literal>0</literal> uses a single shared queue 
between all the
-                handlers.</para>
-            </listitem>
-            <listitem>
-              <para>A value of <literal>1</literal> uses a separate queue for 
each handler.</para>
-            </listitem>
-            <listitem>
-              <para>A value between <literal>0</literal> and 
<literal>1</literal> tunes the number
-                of queues against the number of handlers. For instance, a 
value of
-                  <literal>.5</literal> shares one queue between each two 
handlers.</para>
-            </listitem>
-          </itemizedlist>
-          <para>Having more queues, such as in a situation where you have one 
queue per handler,
-            reduces contention when adding a task to a queue or selecting it 
from a queue. The
-            trade-off is that if you have some queues with long-running tasks, 
a handler may end up
-            waiting to execute from that queue rather than processing another 
queue which has
-            waiting tasks.</para>
-        </listitem>
-      </itemizedlist>
-      <para>For these values to take effect on a given Region Server, the 
Region Server must be
-        restarted. These parameters are intended for testing purposes and 
should be used
-        carefully.</para>
-    </section>
-  </section>
-
-
-
-
-  <section
-    xml:id="perf.zookeeper">
-    <title>ZooKeeper</title>
-    <para>See <xref
-        linkend="zookeeper" /> for information on configuring ZooKeeper, and 
see the part about
-      having a dedicated disk. </para>
-  </section>
-  <section
-    xml:id="perf.schema">
-    <title>Schema Design</title>
-
-    <section
-      xml:id="perf.number.of.cfs">
-      <title>Number of Column Families</title>
-      <para>See <xref
-          linkend="number.of.cfs" />.</para>
-    </section>
-    <section
-      xml:id="perf.schema.keys">
-      <title>Key and Attribute Lengths</title>
-      <para>See <xref
-          linkend="keysize" />. See also <xref
-          linkend="perf.compression.however" /> for compression caveats.</para>
-    </section>
-    <section
-      xml:id="schema.regionsize">
-      <title>Table RegionSize</title>
-      <para>The regionsize can be set on a per-table basis via 
<code>setFileSize</code> on <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HTableDescriptor.html";>HTableDescriptor</link>
-        in the event where certain tables require different regionsizes than 
the configured default
-        regionsize. </para>
-      <para>See <xref
-          linkend="ops.capacity.regions" /> for more information. </para>
-    </section>
-    <section
-      xml:id="schema.bloom">
-      <title>Bloom Filters</title>
-      <para>A Bloom filter, named for its creator, Burton Howard Bloom, is a 
data structure which is
-        designed to predict whether a given element is a member of a set of 
data. A positive result
-        from a Bloom filter is not always accurate, but a negative result is 
guaranteed to be
-        accurate. Bloom filters are designed to be "accurate enough" for sets 
of data which are so
-        large that conventional hashing mechanisms would be impractical. For 
more information about
-        Bloom filters in general, refer to <link
-          xlink:href="http://en.wikipedia.org/wiki/Bloom_filter"; />.</para>
-      <para>In terms of HBase, Bloom filters provide a lightweight in-memory 
structure to reduce the
-        number of disk reads for a given Get operation (Bloom filters do not 
work with Scans) to only the StoreFiles likely to
-        contain the desired Row. The potential performance gain increases with 
the number of
-        parallel reads. </para>
-      <para>The Bloom filters themselves are stored in the metadata of each 
HFile and never need to
-        be updated. When an HFile is opened because a region is deployed to a 
RegionServer, the
-        Bloom filter is loaded into memory. </para>
-      <para>HBase includes some tuning mechanisms for folding the Bloom filter 
to reduce the size
-        and keep the false positive rate within a desired range.</para>
-      <para>Bloom filters were introduced in <link
-          
xlink:href="https://issues.apache.org/jira/browse/HBASE-1200";>HBASE-1200</link>.
 Since
-        HBase 0.96, row-based Bloom filters are enabled by default. (<link
-          
xlink:href="https://issues.apache.org/jira/browse/HBASE-8450";>HBASE-</link>)</para>
-      <para>For more information on Bloom filters in relation to HBase, see 
<xref
-          linkend="blooms" /> for more information, or the following Quora 
discussion: <link
-          
xlink:href="http://www.quora.com/How-are-bloom-filters-used-in-HBase";>How are 
bloom
-          filters used in HBase?</link>. </para>
-      
-      <section xml:id="bloom.filters.when">
-        <title>When To Use Bloom Filters</title>
-        <para>Since HBase 0.96, row-based Bloom filters are enabled by 
default. You may choose to
-          disable them or to change some tables to use row+column Bloom 
filters, depending on the
-          characteristics of your data and how it is loaded into HBase.</para>
-
-        <para>To determine whether Bloom filters could have a positive impact, 
check the value of
-          <code>blockCacheHitRatio</code> in the RegionServer metrics. If 
Bloom filters are enabled, the value of
-          <code>blockCacheHitRatio</code> should increase, because the Bloom 
filter is filtering out blocks that
-          are definitely not needed. </para>
-        <para>You can choose to enable Bloom filters for a row or for a 
row+column combination. If
-          you generally scan entire rows, the row+column combination will not 
provide any benefit. A
-          row-based Bloom filter can operate on a row+column Get, but not the 
other way around.
-          However, if you have a large number of column-level Puts, such that 
a row may be present
-          in every StoreFile, a row-based filter will always return a positive 
result and provide no
-          benefit. Unless you have one column per row, row+column Bloom 
filters require more space,
-          in order to store more keys. Bloom filters work best when the size 
of each data entry is
-          at least a few kilobytes in size. </para>
-        <para>Overhead will be reduced when your data is stored in a few 
larger StoreFiles, to avoid
-          extra disk IO during low-level scans to find a specific row. </para>
-        <para>Bloom filters need to be rebuilt upon deletion, so may not be 
appropriate in
-          environments with a large number of deletions.</para>
-      </section>
-      
-      <section>
-        <title>Enabling Bloom Filters</title>
-        <para>Bloom filters are enabled on a Column Family. You can do this by 
using the
-          setBloomFilterType method of HColumnDescriptor or using the HBase 
API. Valid values are
-            <literal>NONE</literal> (the default), <literal>ROW</literal>, or
-            <literal>ROWCOL</literal>. See <xref
-            linkend="bloom.filters.when" /> for more information on 
<literal>ROW</literal> versus
-            <literal>ROWCOL</literal>. See also the API documentation for <link
-            
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HColumnDescriptor.html";>HColumnDescriptor</link>.</para>
-          <para>The following example creates a table and enables a ROWCOL 
Bloom filter on the
-            <literal>colfam1</literal> column family.</para>
-        <screen>
-hbase> <userinput>create 'mytable',{NAME => 'colfam1', BLOOMFILTER => 
'ROWCOL'}</userinput>          
-        </screen>
-      </section>
-      
-      <section>
-        <title>Configuring Server-Wide Behavior of Bloom Filters</title>
-        <para>You can configure the following settings in the 
<filename>hbase-site.xml</filename>.
-        </para>
-        <informaltable>
-          <tgroup cols="3">
-            <thead>
-              <row>
-                <entry>Parameter</entry>
-                <entry>Default</entry>
-                <entry>Description</entry>
-              </row>
-            </thead>
-            <tbody>
-              <row>
-                <entry><para><code>io.hfile.bloom.enabled</code></para></entry>
-                <entry><para><literal>yes</literal></para></entry>
-                <entry><para>Set to <literal>no</literal> to kill bloom 
filters server-wide if
-                    something goes wrong</para></entry>
-              </row>
-              <row>
-                
<entry><para><code>io.hfile.bloom.error.rate</code></para></entry>
-                <entry><para><literal>.01</literal></para></entry>
-                <entry><para>The average false positive rate for bloom 
filters. Folding is used to
-                  maintain the false positive rate. Expressed as a decimal 
representation of a
-                  percentage.</para></entry>
-              </row>
-              <row>
-                
<entry><para><code>io.hfile.bloom.max.fold</code></para></entry>
-                <entry><para><literal>7</literal></para></entry>
-                <entry><para>The guaranteed maximum fold rate. Changing this 
setting should not be
-                  necessary and is not recommended.</para></entry>
-              </row>
-              <row>
-                
<entry><para><code>io.storefile.bloom.max.keys</code></para></entry>
-                <entry><para><literal>128000000</literal></para></entry>
-                <entry><para>For default (single-block) Bloom filters, this 
specifies the maximum
-                    number of keys.</para></entry>
-              </row>
-              <row>
-                
<entry><para><code>io.storefile.delete.family.bloom.enabled</code></para></entry>
-                <entry><para><literal>true</literal></para></entry>
-                <entry><para>Master switch to enable Delete Family Bloom 
filters and store them in
-                  the StoreFile.</para></entry>
-              </row>
-              <row>
-                
<entry><para><code>io.storefile.bloom.block.size</code></para></entry>
-                <entry><para><literal>65536</literal></para></entry>
-                <entry><para>Target Bloom block size. Bloom filter blocks of 
approximately this size
-                    are interleaved with data blocks.</para></entry>
-              </row>
-              <row>
-                
<entry><para><code>hfile.block.bloom.cacheonwrite</code></para></entry>
-                <entry><para><literal>false</literal></para></entry>
-                <entry><para>Enables cache-on-write for inline blocks of a 
compound Bloom filter.</para></entry>
-              </row>
-            </tbody>
-          </tgroup>
-        </informaltable>
-      </section>
-    </section>
-    <section
-      xml:id="schema.cf.blocksize">
-      <title>ColumnFamily BlockSize</title>
-      <para>The blocksize can be configured for each ColumnFamily in a table, 
and this defaults to
-        64k. Larger cell values require larger blocksizes. There is an inverse 
relationship between
-        blocksize and the resulting StoreFile indexes (i.e., if the blocksize 
is doubled then the
-        resulting indexes should be roughly halved). </para>
-      <para>See <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HColumnDescriptor.html";>HColumnDescriptor</link>
-        and <xref
-          linkend="store" />for more information. </para>
-    </section>
-    <section
-      xml:id="cf.in.memory">
-      <title>In-Memory ColumnFamilies</title>
-      <para>ColumnFamilies can optionally be defined as in-memory. Data is 
still persisted to disk,
-        just like any other ColumnFamily. In-memory blocks have the highest 
priority in the <xref
-          linkend="block.cache" />, but it is not a guarantee that the entire 
table will be in
-        memory. </para>
-      <para>See <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HColumnDescriptor.html";>HColumnDescriptor</link>
-        for more information. </para>
-    </section>
-    <section
-      xml:id="perf.compression">
-      <title>Compression</title>
-      <para>Production systems should use compression with their ColumnFamily 
definitions. See <xref
-          linkend="compression" /> for more information. </para>
-      <section
-        xml:id="perf.compression.however">
-        <title>However...</title>
-        <para>Compression deflates data <emphasis>on disk</emphasis>. When 
it's in-memory (e.g., in
-          the MemStore) or on the wire (e.g., transferring between 
RegionServer and Client) it's
-          inflated. So while using ColumnFamily compression is a best 
practice, but it's not going
-          to completely eliminate the impact of over-sized Keys, over-sized 
ColumnFamily names, or
-          over-sized Column names. </para>
-        <para>See <xref
-            linkend="keysize" /> on for schema design tips, and <xref
-            linkend="keyvalue" /> for more information on HBase stores data 
internally. </para>
-      </section>
-    </section>
-  </section>
-  <!--  perf schema -->
-
-  <section
-    xml:id="perf.general">
-    <title>HBase General Patterns</title>
-    <section
-      xml:id="perf.general.constants">
-      <title>Constants</title>
-      <para>When people get started with HBase they have a tendency to write 
code that looks like
-        this:</para>
-      <programlisting language="java">
-Get get = new Get(rowkey);
-Result r = htable.get(get);
-byte[] b = r.getValue(Bytes.toBytes("cf"), Bytes.toBytes("attr"));  // returns 
current version of value
-      </programlisting>
-      <para>But especially when inside loops (and MapReduce jobs), converting 
the columnFamily and
-        column-names to byte-arrays repeatedly is surprisingly expensive. It's 
better to use
-        constants for the byte-arrays, like this:</para>
-      <programlisting language="java">
-public static final byte[] CF = "cf".getBytes();
-public static final byte[] ATTR = "attr".getBytes();
-...
-Get get = new Get(rowkey);
-Result r = htable.get(get);
-byte[] b = r.getValue(CF, ATTR);  // returns current version of value
-      </programlisting>
-    </section>
-
-  </section>
-  <section
-    xml:id="perf.writing">
-    <title>Writing to HBase</title>
-
-    <section
-      xml:id="perf.batch.loading">
-      <title>Batch Loading</title>
-      <para>Use the bulk load tool if you can. See <xref
-          linkend="arch.bulk.load" />. Otherwise, pay attention to the below. 
</para>
-    </section>
-    <!-- batch loading -->
-
-    <section
-      xml:id="precreate.regions">
-      <title> Table Creation: Pre-Creating Regions </title>
-      <para> Tables in HBase are initially created with one region by default. 
For bulk imports,
-        this means that all clients will write to the same region until it is 
large enough to split
-        and become distributed across the cluster. A useful pattern to speed 
up the bulk import
-        process is to pre-create empty regions. Be somewhat conservative in 
this, because too-many
-        regions can actually degrade performance. </para>
-      <para>There are two different approaches to pre-creating splits. The 
first approach is to rely
-        on the default <code>HBaseAdmin</code> strategy (which is implemented 
in
-          <code>Bytes.split</code>)... </para>
-      <programlisting language="java">
-byte[] startKey = ...;         // your lowest key
-byte[] endKey = ...;                   // your highest key
-int numberOfRegions = ...;     // # of regions to create
-admin.createTable(table, startKey, endKey, numberOfRegions);
-      </programlisting>
-      <para>And the other approach is to define the splits yourself... </para>
-      <programlisting language="java">
-byte[][] splits = ...;   // create your own splits
-admin.createTable(table, splits);
-</programlisting>
-      <para> See <xref linkend="rowkey.regionsplits"/> for issues related to 
understanding your
-        keyspace and pre-creating regions. See <xref 
linkend="manual_region_splitting_decisions"/>
-        for discussion on manually pre-splitting regions.</para>
-    </section>
-    <section
-      xml:id="def.log.flush">
-      <title> Table Creation: Deferred Log Flush </title>
-      <para> The default behavior for Puts using the Write Ahead Log (WAL) is 
that
-          <classname>WAL</classname> edits will be written immediately. If 
deferred log flush is
-        used, WAL edits are kept in memory until the flush period. The benefit 
is aggregated and
-        asynchronous <classname>WAL</classname>- writes, but the potential 
downside is that if the
-        RegionServer goes down the yet-to-be-flushed edits are lost. This is 
safer, however, than
-        not using WAL at all with Puts. </para>
-      <para> Deferred log flush can be configured on tables via <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HTableDescriptor.html";>HTableDescriptor</link>.
-        The default value of 
<varname>hbase.regionserver.optionallogflushinterval</varname> is
-        1000ms. </para>
-    </section>
-
-    <section
-      xml:id="perf.hbase.client.autoflush">
-      <title>HBase Client: AutoFlush</title>
-
-      <para>When performing a lot of Puts, make sure that setAutoFlush is set 
to false on your <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html";>HTable</link>
-        instance. Otherwise, the Puts will be sent one at a time to the 
RegionServer. Puts added via
-          <code> htable.add(Put)</code> and <code> htable.add( &lt;List&gt; 
Put)</code> wind up in
-        the same write buffer. If <code>autoFlush = false</code>, these 
messages are not sent until
-        the write-buffer is filled. To explicitly flush the messages, call
-          <methodname>flushCommits</methodname>. Calling 
<methodname>close</methodname> on the
-          <classname>HTable</classname> instance will invoke
-        <methodname>flushCommits</methodname>.</para>
-    </section>
-    <section
-      xml:id="perf.hbase.client.putwal">
-      <title>HBase Client: Turn off WAL on Puts</title>
-      <para>A frequent request is to disable the WAL to increase performance 
of Puts. This is only
-        appropriate for bulk loads, as it puts your data at risk by removing 
the protection of the
-        WAL in the event of a region server crash. Bulk loads can be re-run in 
the event of a crash,
-        with little risk of data loss.</para>
-      <warning>
-        <para>If you disable the WAL for anything other than bulk loads, your 
data is at
-          risk.</para></warning>
-      <para>In general, it is best to use WAL for Puts, and where loading 
throughput is a concern to
-        use <link linkend="perf.batch.loading">bulk loading</link> techniques 
instead. For normal
-        Puts, you are not likely to see a performance improvement which would 
outweigh the risk. To
-        disable the WAL, see <xref linkend="wal.disable"/>.</para>
-    </section>
-    <section
-      xml:id="perf.hbase.client.regiongroup">
-      <title>HBase Client: Group Puts by RegionServer</title>
-      <para>In addition to using the writeBuffer, grouping 
<classname>Put</classname>s by
-        RegionServer can reduce the number of client RPC calls per writeBuffer 
flush. There is a
-        utility <classname>HTableUtil</classname> currently on TRUNK that does 
this, but you can
-        either copy that or implement your own version for those still on 
0.90.x or earlier. </para>
-    </section>
-    <section
-      xml:id="perf.hbase.write.mr.reducer">
-      <title>MapReduce: Skip The Reducer</title>
-      <para>When writing a lot of data to an HBase table from a MR job (e.g., 
with <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.html";>TableOutputFormat</link>),
-        and specifically where Puts are being emitted from the Mapper, skip 
the Reducer step. When a
-        Reducer step is used, all of the output (Puts) from the Mapper will 
get spooled to disk,
-        then sorted/shuffled to other Reducers that will most likely be 
off-node. It's far more
-        efficient to just write directly to HBase. </para>
-      <para>For summary jobs where HBase is used as a source and a sink, then 
writes will be coming
-        from the Reducer step (e.g., summarize values then write out result). 
This is a different
-        processing problem than from the the above case. </para>
-    </section>
-
-    <section
-      xml:id="perf.one.region">
-      <title>Anti-Pattern: One Hot Region</title>
-      <para>If all your data is being written to one region at a time, then 
re-read the section on
-        processing <link
-          linkend="timeseries">timeseries</link> data.</para>
-      <para>Also, if you are pre-splitting regions and all your data is 
<emphasis>still</emphasis>
-        winding up in a single region even though your keys aren't 
monotonically increasing, confirm
-        that your keyspace actually works with the split strategy. There are a 
variety of reasons
-        that regions may appear "well split" but won't work with your data. As 
the HBase client
-        communicates directly with the RegionServers, this can be obtained via 
<link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#getRegionLocation%28byte[]%29";>HTable.getRegionLocation</link>.
 </para>
-      <para>See <xref
-          linkend="precreate.regions" />, as well as <xref
-          linkend="perf.configurations" />
-      </para>
-    </section>
-
-  </section>
-  <!--  writing -->
-
-  <section
-    xml:id="perf.reading">
-    <title>Reading from HBase</title>
-    <para>The mailing list can help if you are having performance issues. For 
example, here is a
-      good general thread on what to look at addressing read-time issues: <link
-        xlink:href="http://search-hadoop.com/m/qOo2yyHtCC1";>HBase Random Read 
latency >
-      100ms</link></para>
-    <section
-      xml:id="perf.hbase.client.caching">
-      <title>Scan Caching</title>
-
-      <para>If HBase is used as an input source for a MapReduce job, for 
example, make sure that the
-        input <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html";>Scan</link>
-        instance to the MapReduce job has <methodname>setCaching</methodname> 
set to something
-        greater than the default (which is 1). Using the default value means 
that the map-task will
-        make call back to the region-server for every record processed. 
Setting this value to 500,
-        for example, will transfer 500 rows at a time to the client to be 
processed. There is a
-        cost/benefit to have the cache value be large because it costs more in 
memory for both
-        client and RegionServer, so bigger isn't always better.</para>
-      <section
-        xml:id="perf.hbase.client.caching.mr">
-        <title>Scan Caching in MapReduce Jobs</title>
-        <para>Scan settings in MapReduce jobs deserve special attention. 
Timeouts can result (e.g.,
-          UnknownScannerException) in Map tasks if it takes longer to process 
a batch of records
-          before the client goes back to the RegionServer for the next set of 
data. This problem can
-          occur because there is non-trivial processing occuring per row. If 
you process rows
-          quickly, set caching higher. If you process rows more slowly (e.g., 
lots of
-          transformations per row, writes), then set caching lower. </para>
-        <para>Timeouts can also happen in a non-MapReduce use case (i.e., 
single threaded HBase
-          client doing a Scan), but the processing that is often performed in 
MapReduce jobs tends
-          to exacerbate this issue. </para>
-      </section>
-    </section>
-    <section
-      xml:id="perf.hbase.client.selection">
-      <title>Scan Attribute Selection</title>
-
-      <para>Whenever a Scan is used to process large numbers of rows (and 
especially when used as a
-        MapReduce source), be aware of which attributes are selected. If 
<code>scan.addFamily</code>
-        is called then <emphasis>all</emphasis> of the attributes in the 
specified ColumnFamily will
-        be returned to the client. If only a small number of the available 
attributes are to be
-        processed, then only those attributes should be specified in the input 
scan because
-        attribute over-selection is a non-trivial performance penalty over 
large datasets. </para>
-    </section>
-    <section
-      xml:id="perf.hbase.client.seek">
-      <title>Avoid scan seeks</title>
-      <para>When columns are selected explicitly with 
<code>scan.addColumn</code>, HBase will
-        schedule seek operations to seek between the selected columns. When 
rows have few columns
-        and each column has only a few versions this can be inefficient. A 
seek operation is
-        generally slower if does not seek at least past 5-10 columns/versions 
or 512-1024
-        bytes.</para>
-      <para>In order to opportunistically look ahead a few columns/versions to 
see if the next
-        column/version can be found that way before a seek operation is 
scheduled, a new attribute
-          <code>Scan.HINT_LOOKAHEAD</code> can be set the on Scan object. The 
following code
-        instructs the RegionServer to attempt two iterations of next before a 
seek is
-        scheduled:</para>
-      <programlisting language="java">
-Scan scan = new Scan();
-scan.addColumn(...);
-scan.setAttribute(Scan.HINT_LOOKAHEAD, Bytes.toBytes(2));
-table.getScanner(scan);
-      </programlisting>
-    </section>
-    <section
-      xml:id="perf.hbase.mr.input">
-      <title>MapReduce - Input Splits</title>
-      <para>For MapReduce jobs that use HBase tables as a source, if there a 
pattern where the
-        "slow" map tasks seem to have the same Input Split (i.e., the 
RegionServer serving the
-        data), see the Troubleshooting Case Study in <xref
-          linkend="casestudies.slownode" />. </para>
-    </section>
-
-    <section
-      xml:id="perf.hbase.client.scannerclose">
-      <title>Close ResultScanners</title>
-
-      <para>This isn't so much about improving performance but rather 
<emphasis>avoiding</emphasis>
-        performance problems. If you forget to close <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/ResultScanner.html";>ResultScanners</link>
-        you can cause problems on the RegionServers. Always have ResultScanner 
processing enclosed
-        in try/catch blocks...</para>
-      <programlisting language="java">
-Scan scan = new Scan();
-// set attrs...
-ResultScanner rs = htable.getScanner(scan);
-try {
-  for (Result r = rs.next(); r != null; r = rs.next()) {
-  // process result...
-} finally {
-  rs.close();  // always close the ResultScanner!
-}
-htable.close();
-      </programlisting>
-    </section>
-
-    <section
-      xml:id="perf.hbase.client.blockcache">
-      <title>Block Cache</title>
-
-      <para><link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html";>Scan</link>
-        instances can be set to use the block cache in the RegionServer via the
-          <methodname>setCacheBlocks</methodname> method. For input Scans to 
MapReduce jobs, this
-        should be <varname>false</varname>. For frequently accessed rows, it 
is advisable to use the
-        block cache.</para>
-
-    <para>Cache more data by moving your Block Cache offheap.  See <xref 
linkend="offheap.blockcache" /></para>
-    </section>
-    <section
-      xml:id="perf.hbase.client.rowkeyonly">
-      <title>Optimal Loading of Row Keys</title>
-      <para>When performing a table <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html";>scan</link>
-        where only the row keys are needed (no families, qualifiers, values or 
timestamps), add a
-        FilterList with a <varname>MUST_PASS_ALL</varname> operator to the 
scanner using
-          <methodname>setFilter</methodname>. The filter list should include 
both a <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/FirstKeyOnlyFilter.html";>FirstKeyOnlyFilter</link>
-        and a <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/KeyOnlyFilter.html";>KeyOnlyFilter</link>.
-        Using this filter combination will result in a worst case scenario of 
a RegionServer reading
-        a single value from disk and minimal network traffic to the client for 
a single row. </para>
-    </section>
-    <section
-      xml:id="perf.hbase.read.dist">
-      <title>Concurrency: Monitor Data Spread</title>
-      <para>When performing a high number of concurrent reads, monitor the 
data spread of the target
-        tables. If the target table(s) have too few regions then the reads 
could likely be served
-        from too few nodes. </para>
-      <para>See <xref
-          linkend="precreate.regions" />, as well as <xref
-          linkend="perf.configurations" />
-      </para>
-    </section>
-    <section
-      xml:id="blooms">
-      <title>Bloom Filters</title>
-      <para>Enabling Bloom Filters can save your having to go to disk and can 
help improve read
-        latencies.</para>
-      <para><link
-          xlink:href="http://en.wikipedia.org/wiki/Bloom_filter";>Bloom 
filters</link> were developed
-        over in <link 
xlink:href="https://issues.apache.org/jira/browse/HBASE-1200";>HBase-1200 Add
-          bloomfilters</link>. For description of the development process -- 
why static blooms rather than dynamic
-            -- and for an overview of the unique properties that pertain to 
blooms in HBase, as well
-            as possible future directions, see the <emphasis>Development 
Process</emphasis> section
-            of the document <link
-              
xlink:href="https://issues.apache.org/jira/secure/attachment/12444007/Bloom_Filters_in_HBase.pdf";>BloomFilters
-              in HBase</link> attached to <link
-              
xlink:href="https://issues.apache.org/jira/browse/HBASE-1200";>HBase-1200</link>.
  The bloom filters described here are actually version two of blooms in HBase. 
In
-            versions up to 0.19.x, HBase had a dynamic bloom option based on 
work done by the <link
-              xlink:href="http://www.one-lab.org";>European Commission One-Lab 
Project 034819</link>.
-            The core of the HBase bloom work was later pulled up into Hadoop 
to implement
-            org.apache.hadoop.io.BloomMapFile. Version 1 of HBase blooms never 
worked that well.
-            Version 2 is a rewrite from scratch though again it starts with 
the one-lab work.</para>
-        
-      <para>See also <xref
-          linkend="schema.bloom" />. </para>
-
-      <section
-        xml:id="bloom_footprint">
-        <title>Bloom StoreFile footprint</title>
-
-        <para>Bloom filters add an entry to the 
<classname>StoreFile</classname> general
-            <classname>FileInfo</classname> data structure and then two extra 
entries to the
-            <classname>StoreFile</classname> metadata section.</para>
-
-        <section>
-          <title>BloomFilter in the <classname>StoreFile</classname>
-            <classname>FileInfo</classname> data structure</title>
-
-          <para><classname>FileInfo</classname> has a 
<varname>BLOOM_FILTER_TYPE</varname> entry
-            which is set to <varname>NONE</varname>, <varname>ROW</varname> or
-              <varname>ROWCOL.</varname></para>
-        </section>
-
-        <section>
-          <title>BloomFilter entries in <classname>StoreFile</classname> 
metadata</title>
-
-          <para><varname>BLOOM_FILTER_META</varname> holds Bloom Size, Hash 
Function used, etc. Its
-            small in size and is cached on 
<classname>StoreFile.Reader</classname> load</para>
-          <para><varname>BLOOM_FILTER_DATA</varname> is the actual bloomfilter 
data. Obtained
-            on-demand. Stored in the LRU cache, if it is enabled (Its enabled 
by default).</para>
-        </section>
-      </section>
-      <section
-        xml:id="config.bloom">
-        <title>Bloom Filter Configuration</title>
-        <section>
-          <title><varname>io.hfile.bloom.enabled</varname> global kill 
switch</title>
-
-          <para><code>io.hfile.bloom.enabled</code> in 
<classname>Configuration</classname> serves
-            as the kill switch in case something goes wrong. Default =
-            <varname>true</varname>.</para>
-        </section>
-
-        <section>
-          <title><varname>io.hfile.bloom.error.rate</varname></title>
-
-          <para><varname>io.hfile.bloom.error.rate</varname> = average false 
positive rate. Default
-            = 1%. Decrease rate by Â½ (e.g. to .5%) == +1 bit per bloom 
entry.</para>
-        </section>
-
-        <section>
-          <title><varname>io.hfile.bloom.max.fold</varname></title>
-
-          <para><varname>io.hfile.bloom.max.fold</varname> = guaranteed 
minimum fold rate. Most
-            people should leave this alone. Default = 7, or can collapse to at 
least 1/128th of
-            original size. See the <emphasis>Development Process</emphasis> 
section of the document <link
-              
xlink:href="https://issues.apache.org/jira/secure/attachment/12444007/Bloom_Filters_in_HBase.pdf";>BloomFilters
-              in HBase</link> for more on what this option means.</para>
-        </section>
-      </section>
-    </section>
-    <!--  bloom  -->
-    <section>
-      <title>Hedged Reads</title>
-      <para>Hedged reads are a feature of HDFS, introduced in <link
-          
xlink:href="https://issues.apache.org/jira/browse/HDFS-5776";>HDFS-5776</link>. 
Normally, a
-        single thread is spawned for each read request. However, if hedged 
reads are enabled, the
-        client waits some configurable amount of time, and if the read does 
not return, the client
-        spawns a second read request, against a different block replica of the 
same data. Whichever
-        read returns first is used, and the other read request is discarded. 
Hedged reads can be
-        helpful for times where a rare slow read is caused by a transient 
error such as a failing
-        disk or flaky network connection.</para>
-      <para> Because a HBase RegionServer is a HDFS client, you can enable 
hedged reads in HBase, by
-        adding the following properties to the RegionServer's hbase-site.xml 
and tuning the values
-        to suit your environment.</para>
-      <itemizedlist>
-        <title>Configuration for Hedged Reads</title>
-        <listitem>
-          <para><code>dfs.client.hedged.read.threadpool.size</code> - the 
number of threads
-            dedicated to servicing hedged reads. If this is set to 0 (the 
default), hedged reads are
-            disabled.</para>
-        </listitem>
-        <listitem>
-          <para><code>dfs.client.hedged.read.threshold.millis</code> - the 
number of milliseconds to
-            wait before spawning a second read thread.</para>
-        </listitem>
-      </itemizedlist>
-      <example>
-        <title>Hedged Reads Configuration Example</title>
-        <screen><![CDATA[<property>
-  <name>dfs.client.hedged.read.threadpool.size</name>
-  <value>20</value>  <!-- 20 threads -->
-</property>
-<property>
-  <name>dfs.client.hedged.read.threshold.millis</name>
-  <value>10</value>  <!-- 10 milliseconds -->
-</property>]]></screen>
-      </example>
-      <para>Use the following metrics to tune the settings for hedged reads on
-        your cluster. See <xref linkend="hbase_metrics"/>  for more 
information.</para>
-      <itemizedlist>
-        <title>Metrics for Hedged Reads</title>
-        <listitem>
-          <para>hedgedReadOps - the number of times hedged read threads have 
been triggered. This
-            could indicate that read requests are often slow, or that hedged 
reads are triggered too
-            quickly.</para>
-        </listitem>
-        <listitem>
-          <para>hedgeReadOpsWin - the number of times the hedged read thread 
was faster than the
-            original thread. This could indicate that a given RegionServer is 
having trouble
-            servicing requests.</para>
-        </listitem>
-      </itemizedlist>
-    </section>
-
-  </section>
-  <!--  reading -->
-
-  <section
-    xml:id="perf.deleting">
-    <title>Deleting from HBase</title>
-    <section
-      xml:id="perf.deleting.queue">
-      <title>Using HBase Tables as Queues</title>
-      <para>HBase tables are sometimes used as queues. In this case, special 
care must be taken to
-        regularly perform major compactions on tables used in this manner. As 
is documented in <xref
-          linkend="datamodel" />, marking rows as deleted creates additional 
StoreFiles which then
-        need to be processed on reads. Tombstones only get cleaned up with 
major compactions. </para>
-      <para>See also <xref
-          linkend="compaction" /> and <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html#majorCompact%28java.lang.String%29";>HBaseAdmin.majorCompact</link>.
-      </para>
-    </section>
-    <section
-      xml:id="perf.deleting.rpc">
-      <title>Delete RPC Behavior</title>
-      <para>Be aware that <code>htable.delete(Delete)</code> doesn't use the 
writeBuffer. It will
-        execute an RegionServer RPC with each invocation. For a large number 
of deletes, consider
-          <code>htable.delete(List)</code>. </para>
-      <para>See <link
-          
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#delete%28org.apache.hadoop.hbase.client.Delete%29";
 />
-      </para>
-    </section>
-  </section>
-  <!--  deleting -->
-
-  <section
-    xml:id="perf.hdfs">
-    <title>HDFS</title>
-    <para>Because HBase runs on <xref
-        linkend="arch.hdfs" /> it is important to understand how it works and 
how it affects HBase. </para>
-    <section
-      xml:id="perf.hdfs.curr">
-      <title>Current Issues With Low-Latency Reads</title>
-      <para>The original use-case for HDFS was batch processing. As such, 
there low-latency reads
-        were historically not a priority. With the increased adoption of 
Apache HBase this is
-        changing, and several improvements are already in development. See the 
<link
-          
xlink:href="https://issues.apache.org/jira/browse/HDFS-1599";>Umbrella Jira 
Ticket for HDFS
-          Improvements for HBase</link>. </para>
-    </section>
-    <section
-      xml:id="perf.hdfs.configs.localread">
-      <title>Leveraging local data</title>
-      <para>Since Hadoop 1.0.0 (also 0.22.1, 0.23.1, CDH3u3 and HDP 1.0) via 
<link
-          
xlink:href="https://issues.apache.org/jira/browse/HDFS-2246";>HDFS-2246</link>, 
it is
-        possible for the DFSClient to take a "short circuit" and read directly 
from the disk instead
-        of going through the DataNode when the data is local. What this means 
for HBase is that the
-        RegionServers can read directly off their machine's disks instead of 
having to open a socket
-        to talk to the DataNode, the former being generally much faster. See 
JD's <link
-              
xlink:href="http://files.meetup.com/1350427/hug_ebay_jdcryans.pdf";>Performance
-              Talk</link>. Also see <link
-          xlink:href="http://search-hadoop.com/m/zV6dKrLCVh1";>HBase, mail # 
dev - read short
-          circuit</link> thread for more discussion around short circuit 
reads. </para>
-      <para>To enable "short circuit" reads, it will depend on your version of 
Hadoop. The original
-        shortcircuit read patch was much improved upon in Hadoop 2 in <link
-          
xlink:href="https://issues.apache.org/jira/browse/HDFS-347";>HDFS-347</link>. 
See <link
-          
xlink:href="http://blog.cloudera.com/blog/2013/08/how-improved-short-circuit-local-reads-bring-better-performance-and-security-to-hadoop/";
 />
-        for details on the difference between the old and new implementations. 
See <link
-          
xlink:href="http://archive.cloudera.com/cdh4/cdh/4/hadoop/hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html";>Hadoop
-          shortcircuit reads configuration page</link> for how to enable the 
latter, better version
-        of shortcircuit. For example, here is a minimal config. enabling 
short-circuit reads added
-        to <filename>hbase-site.xml</filename>: </para>
-      <programlisting language="xml"><![CDATA[<property>
-  <name>dfs.client.read.shortcircuit</name>
-  <value>true</value>
-  <description>
-    This configuration parameter turns on short-circuit local reads.
-  </description>
-</property>
-<property>
-  <name>dfs.domain.socket.path</name>
-  <value>/home/stack/sockets/short_circuit_read_socket_PORT</value>
-  <description>
-    Optional.  This is a path to a UNIX domain socket that will be used for
-    communication between the DataNode and local HDFS clients.
-    If the string "_PORT" is present in this path, it will be replaced by the
-    TCP port of the DataNode.
-  </description>
-</property>]]></programlisting>
-      <para>Be careful about permissions for the directory that hosts the 
shared domain socket;
-        dfsclient will complain if open to other than the hbase user. </para>
-      <para>If you are running on an old Hadoop, one that is without <link
-          
xlink:href="https://issues.apache.org/jira/browse/HDFS-347";>HDFS-347</link> but 
that has
-          <link 
xlink:href="https://issues.apache.org/jira/browse/HDFS-2246";>HDFS-2246</link>, 
you
-        must set two configurations. First, the hdfs-site.xml needs to be 
amended. Set the property
-          <varname>dfs.block.local-path-access.user</varname> to be the 
<emphasis>only</emphasis>
-        user that can use the shortcut. This has to be the user that started 
HBase. Then in
-        hbase-site.xml, set <varname>dfs.client.read.shortcircuit</varname> to 
be
-          <varname>true</varname>
-      </para>
-        
-      <para> Services -- at least the HBase RegionServers -- will need to be 
restarted in order to
-        pick up the new configurations. </para>
-      <note
-        xml:id="dfs.client.read.shortcircuit.buffer.size">
-        <title>dfs.client.read.shortcircuit.buffer.size</title>
-        <para>The default for this value is too high when running on a highly 
trafficed HBase. In
-          HBase, if this value has not been set, we set it down from the 
default of 1M to 128k
-          (Since HBase 0.98.0 and 0.96.1). See <link
-            
xlink:href="https://issues.apache.org/jira/browse/HBASE-8143";>HBASE-8143 HBase 
on Hadoop
-            2 with local short circuit reads (ssr) causes OOM</link>). The 
Hadoop DFSClient in HBase
-          will allocate a direct byte buffer of this size for 
<emphasis>each</emphasis> block it has
-          open; given HBase keeps its HDFS files open all the time, this can 
add up quickly.</para>
-      </note>
-    </section>
-
-    <section
-      xml:id="perf.hdfs.comp">
-      <title>Performance Comparisons of HBase vs. HDFS</title>
-      <para>A fairly common question on the dist-list is why HBase isn't as 
performant as HDFS files
-        in a batch context (e.g., as a MapReduce source or sink). The short 
answer is that HBase is
-        doing a lot more than HDFS (e.g., reading the KeyValues, returning the 
most current row or
-        specified timestamps, etc.), and as such HBase is 4-5 times slower 
than HDFS in this
-        processing context. There is room for improvement and this gap will, 
over time, be reduced,
-        but HDFS will always be faster in this use-case. </para>
-    </section>
-  </section>
-
-  <section
-    xml:id="perf.ec2">
-    <title>Amazon EC2</title>
-    <para>Performance questions are common on Amazon EC2 environments because 
it is a shared
-      environment. You will not see the same throughput as a dedicated server. 
In terms of running
-      tests on EC2, run them several times for the same reason (i.e., it's a 
shared environment and
-      you don't know what else is happening on the server). </para>
-    <para>If you are running on EC2 and post performance questions on the 
dist-list, please state
-      this fact up-front that because EC2 issues are practically a separate 
class of performance
-      issues. </para>
-  </section>
-
-  <section
-    xml:id="perf.hbase.mr.cluster">
-    <title>Collocating HBase and MapReduce</title>
-    <para>It is often recommended to have different clusters for HBase and 
MapReduce. A better
-      qualification of this is: don't collocate a HBase that serves live 
requests with a heavy MR
-      workload. OLTP and OLAP-optimized systems have conflicting requirements 
and one will lose to
-      the other, usually the former. For example, short latency-sensitive disk 
reads will have to
-      wait in line behind longer reads that are trying to squeeze out as much 
throughput as
-      possible. MR jobs that write to HBase will also generate flushes and 
compactions, which will
-      in turn invalidate blocks in the <xref
-        linkend="block.cache" />. </para>
-    <para>If you need to process the data from your live HBase cluster in MR, 
you can ship the
-      deltas with <xref
-        linkend="copy.table" /> or use replication to get the new data in real 
time on the OLAP
-      cluster. In the worst case, if you really need to collocate both, set MR 
to use less Map and
-      Reduce slots than you'd normally configure, possibly just one. </para>
-    <para>When HBase is used for OLAP operations, it's preferable to set it up 
in a hardened way
-      like configuring the ZooKeeper session timeout higher and giving more 
memory to the MemStores
-      (the argument being that the Block Cache won't be used much since the 
workloads are usually
-      long scans). </para>
-  </section>
-
-  <section
-    xml:id="perf.casestudy">
-    <title>Case Studies</title>
-    <para>For Performance and Troubleshooting Case Studies, see <xref
-        linkend="casestudies" />. </para>
-  </section>
-</chapter>

http://git-wip-us.apache.org/repos/asf/hbase/blob/e80b3092/src/main/docbkx/preface.xml
----------------------------------------------------------------------
diff --git a/src/main/docbkx/preface.xml b/src/main/docbkx/preface.xml
deleted file mode 100644
index a8f6895..0000000
--- a/src/main/docbkx/preface.xml
+++ /dev/null
@@ -1,83 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<preface
-    version="5.0"
-    xml:id="preface"
-    xmlns="http://docbook.org/ns/docbook";
-    xmlns:xlink="http://www.w3.org/1999/xlink";
-    xmlns:xi="http://www.w3.org/2001/XInclude";
-    xmlns:svg="http://www.w3.org/2000/svg";
-    xmlns:m="http://www.w3.org/1998/Math/MathML";
-    xmlns:html="http://www.w3.org/1999/xhtml";
-    xmlns:db="http://docbook.org/ns/docbook";>
-    <!--
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
--->
-    <title>Preface</title>
-
-    <para>This is the official reference guide for the <link
-            xlink:href="http://hbase.apache.org/";>HBase</link> version it 
ships with. Herein you
-        will find either the definitive documentation on an HBase topic as of 
its standing when the
-        referenced HBase version shipped, or it will point to the location in 
<link
-            
xlink:href="http://hbase.apache.org/apidocs/index.html";>javadoc</link>, <link
-            
xlink:href="https://issues.apache.org/jira/browse/HBASE";>JIRA</link> or <link
-            xlink:href="http://wiki.apache.org/hadoop/Hbase";>wiki</link> where 
the pertinent
-        information can be found.</para>
-
-    <formalpara>
-        <title>About This Guide</title>
-        <para>This reference guide is a work in progress. The source for this 
guide can be found in
-            the <filename>src/main/docbkx</filename> directory of the HBase 
source. This reference
-            guide is marked up using <link 
xlink:href="http://www.docbook.org/";>DocBook</link> from
-            which the the finished guide is generated as part of the 'site' 
build target. Run
-            <programlisting language="bourne">mvn site</programlisting> to 
generate this documentation. Amendments and
-            improvements to the documentation are welcomed. Click <link
-                
xlink:href="https://issues.apache.org/jira/secure/CreateIssueDetails!init.jspa?pid=12310753&amp;issuetype=1&amp;components=12312132&amp;summary=SHORT+DESCRIPTION";
-                >this link</link> to file a new documentation bug against 
Apache HBase with some
-            values pre-selected.</para>
-    </formalpara>
-    <formalpara>
-        <title>Contributing to the Documentation</title>
-        <para>For an overview of Docbook and suggestions to get started 
contributing to the documentation, see <xref 
linkend="appendix_contributing_to_documentation" />.</para>
-    </formalpara>
-    <formalpara>
-        <title>Providing Feedback</title>
-        <para>This guide allows you to leave comments or questions on any 
page, using Disqus. Look
-            for the Comments area at the bottom of the page. Answering these 
questions is a
-            volunteer effort, and may be delayed.</para>
-    </formalpara>
-    
-    <note
-        xml:id="headsup">
-        <title>Heads-up if this is your first foray into the world of 
distributed
-            computing...</title>
-        <para> If this is your first foray into the wonderful world of 
Distributed Computing, then
-            you are in for some interesting times. First off, distributed 
systems are hard; making a
-            distributed system hum requires a disparate skillset that spans 
systems (hardware and
-            software) and networking. Your cluster' operation can hiccup 
because of any of a myriad
-            set of reasons from bugs in HBase itself through misconfigurations 
-- misconfiguration
-            of HBase but also operating system misconfigurations -- through to 
hardware problems
-            whether it be a bug in your network card drivers or an 
underprovisioned RAM bus (to
-            mention two recent examples of hardware issues that manifested as 
"HBase is slow"). You
-            will also need to do a recalibration if up to this your computing 
has been bound to a
-            single box. Here is one good starting point: <link
-                
xlink:href="http://en.wikipedia.org/wiki/Fallacies_of_Distributed_Computing";>Fallacies
-                of Distributed Computing</link>. That said, you are welcome. 
Its a fun place to be.
-            Yours, the HBase Community. </para>
-    </note>
-</preface>

[07/17] hbase git commit: HBASE-12858 - remove extraneous Docbook files

Reply via email to