Repository: hbase-site Updated Branches: refs/heads/asf-site dbfeb6d66 -> 36e5b7d69
http://git-wip-us.apache.org/repos/asf/hbase-site/blob/f17356a7/xref/org/apache/hadoop/hbase/util/RegionSplitter.html ---------------------------------------------------------------------- diff --git a/xref/org/apache/hadoop/hbase/util/RegionSplitter.html b/xref/org/apache/hadoop/hbase/util/RegionSplitter.html index 5cae107..c705d7f 100644 --- a/xref/org/apache/hadoop/hbase/util/RegionSplitter.html +++ b/xref/org/apache/hadoop/hbase/util/RegionSplitter.html @@ -30,1103 +30,1096 @@ <a class="jxr_linenumber" name="20" href="#20">20</a> <a class="jxr_linenumber" name="21" href="#21">21</a> <strong class="jxr_keyword">import</strong> java.io.IOException; <a class="jxr_linenumber" name="22" href="#22">22</a> <strong class="jxr_keyword">import</strong> java.math.BigInteger; -<a class="jxr_linenumber" name="23" href="#23">23</a> <strong class="jxr_keyword">import</strong> java.util.Arrays; -<a class="jxr_linenumber" name="24" href="#24">24</a> <strong class="jxr_keyword">import</strong> java.util.Collection; -<a class="jxr_linenumber" name="25" href="#25">25</a> <strong class="jxr_keyword">import</strong> java.util.Collections; -<a class="jxr_linenumber" name="26" href="#26">26</a> <strong class="jxr_keyword">import</strong> java.util.Comparator; -<a class="jxr_linenumber" name="27" href="#27">27</a> <strong class="jxr_keyword">import</strong> java.util.LinkedList; -<a class="jxr_linenumber" name="28" href="#28">28</a> <strong class="jxr_keyword">import</strong> java.util.List; +<a class="jxr_linenumber" name="23" href="#23">23</a> +<a class="jxr_linenumber" name="24" href="#24">24</a> <strong class="jxr_keyword">import</strong> java.util.Arrays; +<a class="jxr_linenumber" name="25" href="#25">25</a> <strong class="jxr_keyword">import</strong> java.util.Collection; +<a class="jxr_linenumber" name="26" href="#26">26</a> <strong class="jxr_keyword">import</strong> java.util.LinkedList; +<a class="jxr_linenumber" name="27" href="#27">27</a> <strong class="jxr_keyword">import</strong> java.util.List; +<a class="jxr_linenumber" name="28" href="#28">28</a> <strong class="jxr_keyword">import</strong> java.util.Map; <a class="jxr_linenumber" name="29" href="#29">29</a> <strong class="jxr_keyword">import</strong> java.util.Set; <a class="jxr_linenumber" name="30" href="#30">30</a> <strong class="jxr_keyword">import</strong> java.util.TreeMap; -<a class="jxr_linenumber" name="31" href="#31">31</a> -<a class="jxr_linenumber" name="32" href="#32">32</a> <strong class="jxr_keyword">import</strong> org.apache.commons.cli.CommandLine; -<a class="jxr_linenumber" name="33" href="#33">33</a> <strong class="jxr_keyword">import</strong> org.apache.commons.cli.GnuParser; -<a class="jxr_linenumber" name="34" href="#34">34</a> <strong class="jxr_keyword">import</strong> org.apache.commons.cli.HelpFormatter; -<a class="jxr_linenumber" name="35" href="#35">35</a> <strong class="jxr_keyword">import</strong> org.apache.commons.cli.OptionBuilder; -<a class="jxr_linenumber" name="36" href="#36">36</a> <strong class="jxr_keyword">import</strong> org.apache.commons.cli.Options; -<a class="jxr_linenumber" name="37" href="#37">37</a> <strong class="jxr_keyword">import</strong> org.apache.commons.cli.ParseException; -<a class="jxr_linenumber" name="38" href="#38">38</a> <strong class="jxr_keyword">import</strong> org.apache.commons.lang.ArrayUtils; -<a class="jxr_linenumber" name="39" href="#39">39</a> <strong class="jxr_keyword">import</strong> org.apache.commons.lang.StringUtils; -<a class="jxr_linenumber" name="40" href="#40">40</a> <strong class="jxr_keyword">import</strong> org.apache.commons.logging.Log; -<a class="jxr_linenumber" name="41" href="#41">41</a> <strong class="jxr_keyword">import</strong> org.apache.commons.logging.LogFactory; -<a class="jxr_linenumber" name="42" href="#42">42</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.conf.Configuration; -<a class="jxr_linenumber" name="43" href="#43">43</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.FSDataInputStream; -<a class="jxr_linenumber" name="44" href="#44">44</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.FSDataOutputStream; -<a class="jxr_linenumber" name="45" href="#45">45</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.FileSystem; -<a class="jxr_linenumber" name="46" href="#46">46</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.Path; -<a class="jxr_linenumber" name="47" href="#47">47</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.ClusterStatus; -<a class="jxr_linenumber" name="48" href="#48">48</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HBaseConfiguration; -<a class="jxr_linenumber" name="49" href="#49">49</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HColumnDescriptor; -<a class="jxr_linenumber" name="50" href="#50">50</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HRegionInfo; -<a class="jxr_linenumber" name="51" href="#51">51</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HRegionLocation; -<a class="jxr_linenumber" name="52" href="#52">52</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HTableDescriptor; -<a class="jxr_linenumber" name="53" href="#53">53</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.MetaTableAccessor; -<a class="jxr_linenumber" name="54" href="#54">54</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.ServerName; -<a class="jxr_linenumber" name="55" href="#55">55</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.TableName; -<a class="jxr_linenumber" name="56" href="#56">56</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.classification.InterfaceAudience; -<a class="jxr_linenumber" name="57" href="#57">57</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.Admin; -<a class="jxr_linenumber" name="58" href="#58">58</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.ClusterConnection; -<a class="jxr_linenumber" name="59" href="#59">59</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.Connection; -<a class="jxr_linenumber" name="60" href="#60">60</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.ConnectionFactory; -<a class="jxr_linenumber" name="61" href="#61">61</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.NoServerForRegionException; -<a class="jxr_linenumber" name="62" href="#62">62</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.RegionLocator; -<a class="jxr_linenumber" name="63" href="#63">63</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.Table; -<a class="jxr_linenumber" name="64" href="#64">64</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.regionserver.HRegionFileSystem; -<a class="jxr_linenumber" name="65" href="#65">65</a> -<a class="jxr_linenumber" name="66" href="#66">66</a> <strong class="jxr_keyword">import</strong> com.google.common.base.Preconditions; -<a class="jxr_linenumber" name="67" href="#67">67</a> <strong class="jxr_keyword">import</strong> com.google.common.collect.Lists; -<a class="jxr_linenumber" name="68" href="#68">68</a> <strong class="jxr_keyword">import</strong> com.google.common.collect.Maps; -<a class="jxr_linenumber" name="69" href="#69">69</a> <strong class="jxr_keyword">import</strong> com.google.common.collect.Sets; -<a class="jxr_linenumber" name="70" href="#70">70</a> -<a class="jxr_linenumber" name="71" href="#71">71</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="72" href="#72">72</a> <em class="jxr_javadoccomment"> * The {@link RegionSplitter} class provides several utilities to help in the</em> -<a class="jxr_linenumber" name="73" href="#73">73</a> <em class="jxr_javadoccomment"> * administration lifecycle for developers who choose to manually split regions</em> -<a class="jxr_linenumber" name="74" href="#74">74</a> <em class="jxr_javadoccomment"> * instead of having HBase handle that automatically. The most useful utilities</em> -<a class="jxr_linenumber" name="75" href="#75">75</a> <em class="jxr_javadoccomment"> * are:</em> -<a class="jxr_linenumber" name="76" href="#76">76</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="77" href="#77">77</a> <em class="jxr_javadoccomment"> * <ul></em> -<a class="jxr_linenumber" name="78" href="#78">78</a> <em class="jxr_javadoccomment"> * <li>Create a table with a specified number of pre-split regions</em> -<a class="jxr_linenumber" name="79" href="#79">79</a> <em class="jxr_javadoccomment"> * <li>Execute a rolling split of all regions on an existing table</em> -<a class="jxr_linenumber" name="80" href="#80">80</a> <em class="jxr_javadoccomment"> * </ul></em> -<a class="jxr_linenumber" name="81" href="#81">81</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="82" href="#82">82</a> <em class="jxr_javadoccomment"> * Both operations can be safely done on a live server.</em> -<a class="jxr_linenumber" name="83" href="#83">83</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="84" href="#84">84</a> <em class="jxr_javadoccomment"> * <b>Question:</b> How do I turn off automatic splitting? <br></em> -<a class="jxr_linenumber" name="85" href="#85">85</a> <em class="jxr_javadoccomment"> * <b>Answer:</b> Automatic splitting is determined by the configuration value</em> -<a class="jxr_linenumber" name="86" href="#86">86</a> <em class="jxr_javadoccomment"> * <i>HConstants.HREGION_MAX_FILESIZE</i>. It is not recommended that you set this</em> -<a class="jxr_linenumber" name="87" href="#87">87</a> <em class="jxr_javadoccomment"> * to Long.MAX_VALUE in case you forget about manual splits. A suggested setting</em> -<a class="jxr_linenumber" name="88" href="#88">88</a> <em class="jxr_javadoccomment"> * is 100GB, which would result in &gt; 1hr major compactions if reached.</em> -<a class="jxr_linenumber" name="89" href="#89">89</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="90" href="#90">90</a> <em class="jxr_javadoccomment"> * <b>Question:</b> Why did the original authors decide to manually split? <br></em> -<a class="jxr_linenumber" name="91" href="#91">91</a> <em class="jxr_javadoccomment"> * <b>Answer:</b> Specific workload characteristics of our use case allowed us</em> -<a class="jxr_linenumber" name="92" href="#92">92</a> <em class="jxr_javadoccomment"> * to benefit from a manual split system.</em> -<a class="jxr_linenumber" name="93" href="#93">93</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="94" href="#94">94</a> <em class="jxr_javadoccomment"> * <ul></em> -<a class="jxr_linenumber" name="95" href="#95">95</a> <em class="jxr_javadoccomment"> * <li>Data (~1k) that would grow instead of being replaced</em> -<a class="jxr_linenumber" name="96" href="#96">96</a> <em class="jxr_javadoccomment"> * <li>Data growth was roughly uniform across all regions</em> -<a class="jxr_linenumber" name="97" href="#97">97</a> <em class="jxr_javadoccomment"> * <li>OLTP workload. Data loss is a big deal.</em> -<a class="jxr_linenumber" name="98" href="#98">98</a> <em class="jxr_javadoccomment"> * </ul></em> -<a class="jxr_linenumber" name="99" href="#99">99</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="100" href="#100">100</a> <em class="jxr_javadoccomment"> * <b>Question:</b> Why is manual splitting good for this workload? <br></em> -<a class="jxr_linenumber" name="101" href="#101">101</a> <em class="jxr_javadoccomment"> * <b>Answer:</b> Although automated splitting is not a bad option, there are</em> -<a class="jxr_linenumber" name="102" href="#102">102</a> <em class="jxr_javadoccomment"> * benefits to manual splitting.</em> -<a class="jxr_linenumber" name="103" href="#103">103</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="104" href="#104">104</a> <em class="jxr_javadoccomment"> * <ul></em> -<a class="jxr_linenumber" name="105" href="#105">105</a> <em class="jxr_javadoccomment"> * <li>With growing amounts of data, splits will continually be needed. Since</em> -<a class="jxr_linenumber" name="106" href="#106">106</a> <em class="jxr_javadoccomment"> * you always know exactly what regions you have, long-term debugging and</em> -<a class="jxr_linenumber" name="107" href="#107">107</a> <em class="jxr_javadoccomment"> * profiling is much easier with manual splits. It is hard to trace the logs to</em> -<a class="jxr_linenumber" name="108" href="#108">108</a> <em class="jxr_javadoccomment"> * understand region level problems if it keeps splitting and getting renamed.</em> -<a class="jxr_linenumber" name="109" href="#109">109</a> <em class="jxr_javadoccomment"> * <li>Data offlining bugs + unknown number of split regions == oh crap! If an</em> -<a class="jxr_linenumber" name="110" href="#110">110</a> <em class="jxr_javadoccomment"> * WAL or StoreFile was mistakenly unprocessed by HBase due to a weird bug and</em> -<a class="jxr_linenumber" name="111" href="#111">111</a> <em class="jxr_javadoccomment"> * you notice it a day or so later, you can be assured that the regions</em> -<a class="jxr_linenumber" name="112" href="#112">112</a> <em class="jxr_javadoccomment"> * specified in these files are the same as the current regions and you have</em> -<a class="jxr_linenumber" name="113" href="#113">113</a> <em class="jxr_javadoccomment"> * less headaches trying to restore/replay your data.</em> -<a class="jxr_linenumber" name="114" href="#114">114</a> <em class="jxr_javadoccomment"> * <li>You can finely tune your compaction algorithm. With roughly uniform data</em> -<a class="jxr_linenumber" name="115" href="#115">115</a> <em class="jxr_javadoccomment"> * growth, it's easy to cause split / compaction storms as the regions all</em> -<a class="jxr_linenumber" name="116" href="#116">116</a> <em class="jxr_javadoccomment"> * roughly hit the same data size at the same time. With manual splits, you can</em> -<a class="jxr_linenumber" name="117" href="#117">117</a> <em class="jxr_javadoccomment"> * let staggered, time-based major compactions spread out your network IO load.</em> -<a class="jxr_linenumber" name="118" href="#118">118</a> <em class="jxr_javadoccomment"> * </ul></em> -<a class="jxr_linenumber" name="119" href="#119">119</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="120" href="#120">120</a> <em class="jxr_javadoccomment"> * <b>Question:</b> What's the optimal number of pre-split regions to create? <br></em> -<a class="jxr_linenumber" name="121" href="#121">121</a> <em class="jxr_javadoccomment"> * <b>Answer:</b> Mileage will vary depending upon your application.</em> -<a class="jxr_linenumber" name="122" href="#122">122</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="123" href="#123">123</a> <em class="jxr_javadoccomment"> * The short answer for our application is that we started with 10 pre-split</em> -<a class="jxr_linenumber" name="124" href="#124">124</a> <em class="jxr_javadoccomment"> * regions / server and watched our data growth over time. It's better to err on</em> -<a class="jxr_linenumber" name="125" href="#125">125</a> <em class="jxr_javadoccomment"> * the side of too little regions and rolling split later.</em> -<a class="jxr_linenumber" name="126" href="#126">126</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="127" href="#127">127</a> <em class="jxr_javadoccomment"> * The more complicated answer is that this depends upon the largest storefile</em> -<a class="jxr_linenumber" name="128" href="#128">128</a> <em class="jxr_javadoccomment"> * in your region. With a growing data size, this will get larger over time. You</em> -<a class="jxr_linenumber" name="129" href="#129">129</a> <em class="jxr_javadoccomment"> * want the largest region to be just big enough that the</em> -<a class="jxr_linenumber" name="130" href="#130">130</a> <em class="jxr_javadoccomment"> * {@link org.apache.hadoop.hbase.regionserver.HStore} compact</em> -<a class="jxr_linenumber" name="131" href="#131">131</a> <em class="jxr_javadoccomment"> * selection algorithm only compacts it due to a timed major. If you don't, your</em> -<a class="jxr_linenumber" name="132" href="#132">132</a> <em class="jxr_javadoccomment"> * cluster can be prone to compaction storms as the algorithm decides to run</em> -<a class="jxr_linenumber" name="133" href="#133">133</a> <em class="jxr_javadoccomment"> * major compactions on a large series of regions all at once. Note that</em> -<a class="jxr_linenumber" name="134" href="#134">134</a> <em class="jxr_javadoccomment"> * compaction storms are due to the uniform data growth, not the manual split</em> -<a class="jxr_linenumber" name="135" href="#135">135</a> <em class="jxr_javadoccomment"> * decision.</em> -<a class="jxr_linenumber" name="136" href="#136">136</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="137" href="#137">137</a> <em class="jxr_javadoccomment"> * If you pre-split your regions too thin, you can increase the major compaction</em> -<a class="jxr_linenumber" name="138" href="#138">138</a> <em class="jxr_javadoccomment"> * interval by configuring HConstants.MAJOR_COMPACTION_PERIOD. If your data size</em> -<a class="jxr_linenumber" name="139" href="#139">139</a> <em class="jxr_javadoccomment"> * grows too large, use this script to perform a network IO safe rolling split</em> -<a class="jxr_linenumber" name="140" href="#140">140</a> <em class="jxr_javadoccomment"> * of all regions.</em> -<a class="jxr_linenumber" name="141" href="#141">141</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="142" href="#142">142</a> @InterfaceAudience.Private -<a class="jxr_linenumber" name="143" href="#143">143</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../../org/apache/hadoop/hbase/util/RegionSplitter.html">RegionSplitter</a> { -<a class="jxr_linenumber" name="144" href="#144">144</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">final</strong> Log LOG = LogFactory.getLog(RegionSplitter.<strong class="jxr_keyword">class</strong>); -<a class="jxr_linenumber" name="145" href="#145">145</a> -<a class="jxr_linenumber" name="146" href="#146">146</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="147" href="#147">147</a> <em class="jxr_javadoccomment"> * A generic interface for the RegionSplitter code to use for all it's</em> -<a class="jxr_linenumber" name="148" href="#148">148</a> <em class="jxr_javadoccomment"> * functionality. Note that the original authors of this code use</em> -<a class="jxr_linenumber" name="149" href="#149">149</a> <em class="jxr_javadoccomment"> * {@link HexStringSplit} to partition their table and set it as default, but</em> -<a class="jxr_linenumber" name="150" href="#150">150</a> <em class="jxr_javadoccomment"> * provided this for your custom algorithm. To use, create a new derived class</em> -<a class="jxr_linenumber" name="151" href="#151">151</a> <em class="jxr_javadoccomment"> * from this interface and call {@link RegionSplitter#createPresplitTable} or</em> -<a class="jxr_linenumber" name="152" href="#152">152</a> <em class="jxr_javadoccomment"> * RegionSplitter#rollingSplit(TableName, SplitAlgorithm, Configuration) with the</em> -<a class="jxr_linenumber" name="153" href="#153">153</a> <em class="jxr_javadoccomment"> * argument splitClassName giving the name of your class.</em> -<a class="jxr_linenumber" name="154" href="#154">154</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="155" href="#155">155</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">interface</strong> <a href="../../../../../org/apache/hadoop/hbase/util/RegionSplitter.html">SplitAlgorithm</a> { -<a class="jxr_linenumber" name="156" href="#156">156</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="157" href="#157">157</a> <em class="jxr_javadoccomment"> * Split a pre-existing region into 2 regions.</em> -<a class="jxr_linenumber" name="158" href="#158">158</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="159" href="#159">159</a> <em class="jxr_javadoccomment"> * @param start</em> -<a class="jxr_linenumber" name="160" href="#160">160</a> <em class="jxr_javadoccomment"> * first row (inclusive)</em> -<a class="jxr_linenumber" name="161" href="#161">161</a> <em class="jxr_javadoccomment"> * @param end</em> -<a class="jxr_linenumber" name="162" href="#162">162</a> <em class="jxr_javadoccomment"> * last row (exclusive)</em> -<a class="jxr_linenumber" name="163" href="#163">163</a> <em class="jxr_javadoccomment"> * @return the split row to use</em> -<a class="jxr_linenumber" name="164" href="#164">164</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="165" href="#165">165</a> byte[] split(byte[] start, byte[] end); -<a class="jxr_linenumber" name="166" href="#166">166</a> -<a class="jxr_linenumber" name="167" href="#167">167</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="168" href="#168">168</a> <em class="jxr_javadoccomment"> * Split an entire table.</em> -<a class="jxr_linenumber" name="169" href="#169">169</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="170" href="#170">170</a> <em class="jxr_javadoccomment"> * @param numRegions</em> -<a class="jxr_linenumber" name="171" href="#171">171</a> <em class="jxr_javadoccomment"> * number of regions to split the table into</em> -<a class="jxr_linenumber" name="172" href="#172">172</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="173" href="#173">173</a> <em class="jxr_javadoccomment"> * @throws RuntimeException</em> -<a class="jxr_linenumber" name="174" href="#174">174</a> <em class="jxr_javadoccomment"> * user input is validated at this time. may throw a runtime</em> -<a class="jxr_linenumber" name="175" href="#175">175</a> <em class="jxr_javadoccomment"> * exception in response to a parse failure</em> -<a class="jxr_linenumber" name="176" href="#176">176</a> <em class="jxr_javadoccomment"> * @return array of split keys for the initial regions of the table. The</em> -<a class="jxr_linenumber" name="177" href="#177">177</a> <em class="jxr_javadoccomment"> * length of the returned array should be numRegions-1.</em> -<a class="jxr_linenumber" name="178" href="#178">178</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="179" href="#179">179</a> byte[][] split(<strong class="jxr_keyword">int</strong> numRegions); -<a class="jxr_linenumber" name="180" href="#180">180</a> -<a class="jxr_linenumber" name="181" href="#181">181</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="182" href="#182">182</a> <em class="jxr_javadoccomment"> * In HBase, the first row is represented by an empty byte array. This might</em> -<a class="jxr_linenumber" name="183" href="#183">183</a> <em class="jxr_javadoccomment"> * cause problems with your split algorithm or row printing. All your APIs</em> -<a class="jxr_linenumber" name="184" href="#184">184</a> <em class="jxr_javadoccomment"> * will be passed firstRow() instead of empty array.</em> -<a class="jxr_linenumber" name="185" href="#185">185</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="186" href="#186">186</a> <em class="jxr_javadoccomment"> * @return your representation of your first row</em> -<a class="jxr_linenumber" name="187" href="#187">187</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="188" href="#188">188</a> byte[] firstRow(); -<a class="jxr_linenumber" name="189" href="#189">189</a> -<a class="jxr_linenumber" name="190" href="#190">190</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="191" href="#191">191</a> <em class="jxr_javadoccomment"> * In HBase, the last row is represented by an empty byte array. This might</em> -<a class="jxr_linenumber" name="192" href="#192">192</a> <em class="jxr_javadoccomment"> * cause problems with your split algorithm or row printing. All your APIs</em> -<a class="jxr_linenumber" name="193" href="#193">193</a> <em class="jxr_javadoccomment"> * will be passed firstRow() instead of empty array.</em> -<a class="jxr_linenumber" name="194" href="#194">194</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="195" href="#195">195</a> <em class="jxr_javadoccomment"> * @return your representation of your last row</em> -<a class="jxr_linenumber" name="196" href="#196">196</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="197" href="#197">197</a> byte[] lastRow(); -<a class="jxr_linenumber" name="198" href="#198">198</a> -<a class="jxr_linenumber" name="199" href="#199">199</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="200" href="#200">200</a> <em class="jxr_javadoccomment"> * In HBase, the last row is represented by an empty byte array. Set this</em> -<a class="jxr_linenumber" name="201" href="#201">201</a> <em class="jxr_javadoccomment"> * value to help the split code understand how to evenly divide the first</em> -<a class="jxr_linenumber" name="202" href="#202">202</a> <em class="jxr_javadoccomment"> * region.</em> -<a class="jxr_linenumber" name="203" href="#203">203</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="204" href="#204">204</a> <em class="jxr_javadoccomment"> * @param userInput</em> -<a class="jxr_linenumber" name="205" href="#205">205</a> <em class="jxr_javadoccomment"> * raw user input (may throw RuntimeException on parse failure)</em> -<a class="jxr_linenumber" name="206" href="#206">206</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="207" href="#207">207</a> <strong class="jxr_keyword">void</strong> setFirstRow(String userInput); -<a class="jxr_linenumber" name="208" href="#208">208</a> -<a class="jxr_linenumber" name="209" href="#209">209</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="210" href="#210">210</a> <em class="jxr_javadoccomment"> * In HBase, the last row is represented by an empty byte array. Set this</em> -<a class="jxr_linenumber" name="211" href="#211">211</a> <em class="jxr_javadoccomment"> * value to help the split code understand how to evenly divide the last</em> -<a class="jxr_linenumber" name="212" href="#212">212</a> <em class="jxr_javadoccomment"> * region. Note that this last row is inclusive for all rows sharing the</em> -<a class="jxr_linenumber" name="213" href="#213">213</a> <em class="jxr_javadoccomment"> * same prefix.</em> -<a class="jxr_linenumber" name="214" href="#214">214</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="215" href="#215">215</a> <em class="jxr_javadoccomment"> * @param userInput</em> -<a class="jxr_linenumber" name="216" href="#216">216</a> <em class="jxr_javadoccomment"> * raw user input (may throw RuntimeException on parse failure)</em> -<a class="jxr_linenumber" name="217" href="#217">217</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="218" href="#218">218</a> <strong class="jxr_keyword">void</strong> setLastRow(String userInput); -<a class="jxr_linenumber" name="219" href="#219">219</a> -<a class="jxr_linenumber" name="220" href="#220">220</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="221" href="#221">221</a> <em class="jxr_javadoccomment"> * @param input</em> -<a class="jxr_linenumber" name="222" href="#222">222</a> <em class="jxr_javadoccomment"> * user or file input for row</em> -<a class="jxr_linenumber" name="223" href="#223">223</a> <em class="jxr_javadoccomment"> * @return byte array representation of this row for HBase</em> -<a class="jxr_linenumber" name="224" href="#224">224</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="225" href="#225">225</a> byte[] strToRow(String input); -<a class="jxr_linenumber" name="226" href="#226">226</a> -<a class="jxr_linenumber" name="227" href="#227">227</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="228" href="#228">228</a> <em class="jxr_javadoccomment"> * @param row</em> -<a class="jxr_linenumber" name="229" href="#229">229</a> <em class="jxr_javadoccomment"> * byte array representing a row in HBase</em> -<a class="jxr_linenumber" name="230" href="#230">230</a> <em class="jxr_javadoccomment"> * @return String to use for debug &amp; file printing</em> -<a class="jxr_linenumber" name="231" href="#231">231</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="232" href="#232">232</a> String rowToStr(byte[] row); -<a class="jxr_linenumber" name="233" href="#233">233</a> -<a class="jxr_linenumber" name="234" href="#234">234</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="235" href="#235">235</a> <em class="jxr_javadoccomment"> * @return the separator character to use when storing / printing the row</em> -<a class="jxr_linenumber" name="236" href="#236">236</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="237" href="#237">237</a> String separator(); -<a class="jxr_linenumber" name="238" href="#238">238</a> -<a class="jxr_linenumber" name="239" href="#239">239</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="240" href="#240">240</a> <em class="jxr_javadoccomment"> * Set the first row</em> -<a class="jxr_linenumber" name="241" href="#241">241</a> <em class="jxr_javadoccomment"> * @param userInput byte array of the row key.</em> -<a class="jxr_linenumber" name="242" href="#242">242</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="243" href="#243">243</a> <strong class="jxr_keyword">void</strong> setFirstRow(byte[] userInput); -<a class="jxr_linenumber" name="244" href="#244">244</a> -<a class="jxr_linenumber" name="245" href="#245">245</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="246" href="#246">246</a> <em class="jxr_javadoccomment"> * Set the last row</em> -<a class="jxr_linenumber" name="247" href="#247">247</a> <em class="jxr_javadoccomment"> * @param userInput byte array of the row key.</em> -<a class="jxr_linenumber" name="248" href="#248">248</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="249" href="#249">249</a> <strong class="jxr_keyword">void</strong> setLastRow(byte[] userInput); -<a class="jxr_linenumber" name="250" href="#250">250</a> } -<a class="jxr_linenumber" name="251" href="#251">251</a> -<a class="jxr_linenumber" name="252" href="#252">252</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="253" href="#253">253</a> <em class="jxr_javadoccomment"> * The main function for the RegionSplitter application. Common uses:</em> -<a class="jxr_linenumber" name="254" href="#254">254</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="255" href="#255">255</a> <em class="jxr_javadoccomment"> * <ul></em> -<a class="jxr_linenumber" name="256" href="#256">256</a> <em class="jxr_javadoccomment"> * <li>create a table named 'myTable' with 60 pre-split regions containing 2</em> -<a class="jxr_linenumber" name="257" href="#257">257</a> <em class="jxr_javadoccomment"> * column families 'test' &amp; 'rs', assuming the keys are hex-encoded ASCII:</em> -<a class="jxr_linenumber" name="258" href="#258">258</a> <em class="jxr_javadoccomment"> * <ul></em> -<a class="jxr_linenumber" name="259" href="#259">259</a> <em class="jxr_javadoccomment"> * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs</em> -<a class="jxr_linenumber" name="260" href="#260">260</a> <em class="jxr_javadoccomment"> * myTable HexStringSplit</em> -<a class="jxr_linenumber" name="261" href="#261">261</a> <em class="jxr_javadoccomment"> * </ul></em> -<a class="jxr_linenumber" name="262" href="#262">262</a> <em class="jxr_javadoccomment"> * <li>perform a rolling split of 'myTable' (i.e. 60 =&gt; 120 regions), # 2</em> -<a class="jxr_linenumber" name="263" href="#263">263</a> <em class="jxr_javadoccomment"> * outstanding splits at a time, assuming keys are uniformly distributed</em> -<a class="jxr_linenumber" name="264" href="#264">264</a> <em class="jxr_javadoccomment"> * bytes:</em> -<a class="jxr_linenumber" name="265" href="#265">265</a> <em class="jxr_javadoccomment"> * <ul></em> -<a class="jxr_linenumber" name="266" href="#266">266</a> <em class="jxr_javadoccomment"> * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -r -o 2 myTable</em> -<a class="jxr_linenumber" name="267" href="#267">267</a> <em class="jxr_javadoccomment"> * UniformSplit</em> +<a class="jxr_linenumber" name="31" href="#31">31</a> <strong class="jxr_keyword">import</strong> org.apache.commons.cli.CommandLine; +<a class="jxr_linenumber" name="32" href="#32">32</a> <strong class="jxr_keyword">import</strong> org.apache.commons.cli.GnuParser; +<a class="jxr_linenumber" name="33" href="#33">33</a> <strong class="jxr_keyword">import</strong> org.apache.commons.cli.HelpFormatter; +<a class="jxr_linenumber" name="34" href="#34">34</a> <strong class="jxr_keyword">import</strong> org.apache.commons.cli.OptionBuilder; +<a class="jxr_linenumber" name="35" href="#35">35</a> <strong class="jxr_keyword">import</strong> org.apache.commons.cli.Options; +<a class="jxr_linenumber" name="36" href="#36">36</a> <strong class="jxr_keyword">import</strong> org.apache.commons.cli.ParseException; +<a class="jxr_linenumber" name="37" href="#37">37</a> <strong class="jxr_keyword">import</strong> org.apache.commons.lang.ArrayUtils; +<a class="jxr_linenumber" name="38" href="#38">38</a> <strong class="jxr_keyword">import</strong> org.apache.commons.lang.StringUtils; +<a class="jxr_linenumber" name="39" href="#39">39</a> <strong class="jxr_keyword">import</strong> org.apache.commons.logging.Log; +<a class="jxr_linenumber" name="40" href="#40">40</a> <strong class="jxr_keyword">import</strong> org.apache.commons.logging.LogFactory; +<a class="jxr_linenumber" name="41" href="#41">41</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.conf.Configuration; +<a class="jxr_linenumber" name="42" href="#42">42</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.FSDataInputStream; +<a class="jxr_linenumber" name="43" href="#43">43</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.FSDataOutputStream; +<a class="jxr_linenumber" name="44" href="#44">44</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.FileSystem; +<a class="jxr_linenumber" name="45" href="#45">45</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.Path; +<a class="jxr_linenumber" name="46" href="#46">46</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.ClusterStatus; +<a class="jxr_linenumber" name="47" href="#47">47</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HBaseConfiguration; +<a class="jxr_linenumber" name="48" href="#48">48</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HColumnDescriptor; +<a class="jxr_linenumber" name="49" href="#49">49</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HRegionInfo; +<a class="jxr_linenumber" name="50" href="#50">50</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HRegionLocation; +<a class="jxr_linenumber" name="51" href="#51">51</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HTableDescriptor; +<a class="jxr_linenumber" name="52" href="#52">52</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.MetaTableAccessor; +<a class="jxr_linenumber" name="53" href="#53">53</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.ServerName; +<a class="jxr_linenumber" name="54" href="#54">54</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.TableName; +<a class="jxr_linenumber" name="55" href="#55">55</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.classification.InterfaceAudience; +<a class="jxr_linenumber" name="56" href="#56">56</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.Admin; +<a class="jxr_linenumber" name="57" href="#57">57</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.ClusterConnection; +<a class="jxr_linenumber" name="58" href="#58">58</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.Connection; +<a class="jxr_linenumber" name="59" href="#59">59</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.ConnectionFactory; +<a class="jxr_linenumber" name="60" href="#60">60</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.NoServerForRegionException; +<a class="jxr_linenumber" name="61" href="#61">61</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.RegionLocator; +<a class="jxr_linenumber" name="62" href="#62">62</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.Table; +<a class="jxr_linenumber" name="63" href="#63">63</a> <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.regionserver.HRegionFileSystem; +<a class="jxr_linenumber" name="64" href="#64">64</a> +<a class="jxr_linenumber" name="65" href="#65">65</a> <strong class="jxr_keyword">import</strong> com.google.common.base.Preconditions; +<a class="jxr_linenumber" name="66" href="#66">66</a> <strong class="jxr_keyword">import</strong> com.google.common.collect.Lists; +<a class="jxr_linenumber" name="67" href="#67">67</a> <strong class="jxr_keyword">import</strong> com.google.common.collect.Maps; +<a class="jxr_linenumber" name="68" href="#68">68</a> <strong class="jxr_keyword">import</strong> com.google.common.collect.Sets; +<a class="jxr_linenumber" name="69" href="#69">69</a> +<a class="jxr_linenumber" name="70" href="#70">70</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="71" href="#71">71</a> <em class="jxr_javadoccomment"> * The {@link RegionSplitter} class provides several utilities to help in the</em> +<a class="jxr_linenumber" name="72" href="#72">72</a> <em class="jxr_javadoccomment"> * administration lifecycle for developers who choose to manually split regions</em> +<a class="jxr_linenumber" name="73" href="#73">73</a> <em class="jxr_javadoccomment"> * instead of having HBase handle that automatically. The most useful utilities</em> +<a class="jxr_linenumber" name="74" href="#74">74</a> <em class="jxr_javadoccomment"> * are:</em> +<a class="jxr_linenumber" name="75" href="#75">75</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="76" href="#76">76</a> <em class="jxr_javadoccomment"> * <ul></em> +<a class="jxr_linenumber" name="77" href="#77">77</a> <em class="jxr_javadoccomment"> * <li>Create a table with a specified number of pre-split regions</em> +<a class="jxr_linenumber" name="78" href="#78">78</a> <em class="jxr_javadoccomment"> * <li>Execute a rolling split of all regions on an existing table</em> +<a class="jxr_linenumber" name="79" href="#79">79</a> <em class="jxr_javadoccomment"> * </ul></em> +<a class="jxr_linenumber" name="80" href="#80">80</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="81" href="#81">81</a> <em class="jxr_javadoccomment"> * Both operations can be safely done on a live server.</em> +<a class="jxr_linenumber" name="82" href="#82">82</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="83" href="#83">83</a> <em class="jxr_javadoccomment"> * <b>Question:</b> How do I turn off automatic splitting? <br></em> +<a class="jxr_linenumber" name="84" href="#84">84</a> <em class="jxr_javadoccomment"> * <b>Answer:</b> Automatic splitting is determined by the configuration value</em> +<a class="jxr_linenumber" name="85" href="#85">85</a> <em class="jxr_javadoccomment"> * <i>HConstants.HREGION_MAX_FILESIZE</i>. It is not recommended that you set this</em> +<a class="jxr_linenumber" name="86" href="#86">86</a> <em class="jxr_javadoccomment"> * to Long.MAX_VALUE in case you forget about manual splits. A suggested setting</em> +<a class="jxr_linenumber" name="87" href="#87">87</a> <em class="jxr_javadoccomment"> * is 100GB, which would result in &gt; 1hr major compactions if reached.</em> +<a class="jxr_linenumber" name="88" href="#88">88</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="89" href="#89">89</a> <em class="jxr_javadoccomment"> * <b>Question:</b> Why did the original authors decide to manually split? <br></em> +<a class="jxr_linenumber" name="90" href="#90">90</a> <em class="jxr_javadoccomment"> * <b>Answer:</b> Specific workload characteristics of our use case allowed us</em> +<a class="jxr_linenumber" name="91" href="#91">91</a> <em class="jxr_javadoccomment"> * to benefit from a manual split system.</em> +<a class="jxr_linenumber" name="92" href="#92">92</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="93" href="#93">93</a> <em class="jxr_javadoccomment"> * <ul></em> +<a class="jxr_linenumber" name="94" href="#94">94</a> <em class="jxr_javadoccomment"> * <li>Data (~1k) that would grow instead of being replaced</em> +<a class="jxr_linenumber" name="95" href="#95">95</a> <em class="jxr_javadoccomment"> * <li>Data growth was roughly uniform across all regions</em> +<a class="jxr_linenumber" name="96" href="#96">96</a> <em class="jxr_javadoccomment"> * <li>OLTP workload. Data loss is a big deal.</em> +<a class="jxr_linenumber" name="97" href="#97">97</a> <em class="jxr_javadoccomment"> * </ul></em> +<a class="jxr_linenumber" name="98" href="#98">98</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="99" href="#99">99</a> <em class="jxr_javadoccomment"> * <b>Question:</b> Why is manual splitting good for this workload? <br></em> +<a class="jxr_linenumber" name="100" href="#100">100</a> <em class="jxr_javadoccomment"> * <b>Answer:</b> Although automated splitting is not a bad option, there are</em> +<a class="jxr_linenumber" name="101" href="#101">101</a> <em class="jxr_javadoccomment"> * benefits to manual splitting.</em> +<a class="jxr_linenumber" name="102" href="#102">102</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="103" href="#103">103</a> <em class="jxr_javadoccomment"> * <ul></em> +<a class="jxr_linenumber" name="104" href="#104">104</a> <em class="jxr_javadoccomment"> * <li>With growing amounts of data, splits will continually be needed. Since</em> +<a class="jxr_linenumber" name="105" href="#105">105</a> <em class="jxr_javadoccomment"> * you always know exactly what regions you have, long-term debugging and</em> +<a class="jxr_linenumber" name="106" href="#106">106</a> <em class="jxr_javadoccomment"> * profiling is much easier with manual splits. It is hard to trace the logs to</em> +<a class="jxr_linenumber" name="107" href="#107">107</a> <em class="jxr_javadoccomment"> * understand region level problems if it keeps splitting and getting renamed.</em> +<a class="jxr_linenumber" name="108" href="#108">108</a> <em class="jxr_javadoccomment"> * <li>Data offlining bugs + unknown number of split regions == oh crap! If an</em> +<a class="jxr_linenumber" name="109" href="#109">109</a> <em class="jxr_javadoccomment"> * WAL or StoreFile was mistakenly unprocessed by HBase due to a weird bug and</em> +<a class="jxr_linenumber" name="110" href="#110">110</a> <em class="jxr_javadoccomment"> * you notice it a day or so later, you can be assured that the regions</em> +<a class="jxr_linenumber" name="111" href="#111">111</a> <em class="jxr_javadoccomment"> * specified in these files are the same as the current regions and you have</em> +<a class="jxr_linenumber" name="112" href="#112">112</a> <em class="jxr_javadoccomment"> * less headaches trying to restore/replay your data.</em> +<a class="jxr_linenumber" name="113" href="#113">113</a> <em class="jxr_javadoccomment"> * <li>You can finely tune your compaction algorithm. With roughly uniform data</em> +<a class="jxr_linenumber" name="114" href="#114">114</a> <em class="jxr_javadoccomment"> * growth, it's easy to cause split / compaction storms as the regions all</em> +<a class="jxr_linenumber" name="115" href="#115">115</a> <em class="jxr_javadoccomment"> * roughly hit the same data size at the same time. With manual splits, you can</em> +<a class="jxr_linenumber" name="116" href="#116">116</a> <em class="jxr_javadoccomment"> * let staggered, time-based major compactions spread out your network IO load.</em> +<a class="jxr_linenumber" name="117" href="#117">117</a> <em class="jxr_javadoccomment"> * </ul></em> +<a class="jxr_linenumber" name="118" href="#118">118</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="119" href="#119">119</a> <em class="jxr_javadoccomment"> * <b>Question:</b> What's the optimal number of pre-split regions to create? <br></em> +<a class="jxr_linenumber" name="120" href="#120">120</a> <em class="jxr_javadoccomment"> * <b>Answer:</b> Mileage will vary depending upon your application.</em> +<a class="jxr_linenumber" name="121" href="#121">121</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="122" href="#122">122</a> <em class="jxr_javadoccomment"> * The short answer for our application is that we started with 10 pre-split</em> +<a class="jxr_linenumber" name="123" href="#123">123</a> <em class="jxr_javadoccomment"> * regions / server and watched our data growth over time. It's better to err on</em> +<a class="jxr_linenumber" name="124" href="#124">124</a> <em class="jxr_javadoccomment"> * the side of too little regions and rolling split later.</em> +<a class="jxr_linenumber" name="125" href="#125">125</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="126" href="#126">126</a> <em class="jxr_javadoccomment"> * The more complicated answer is that this depends upon the largest storefile</em> +<a class="jxr_linenumber" name="127" href="#127">127</a> <em class="jxr_javadoccomment"> * in your region. With a growing data size, this will get larger over time. You</em> +<a class="jxr_linenumber" name="128" href="#128">128</a> <em class="jxr_javadoccomment"> * want the largest region to be just big enough that the</em> +<a class="jxr_linenumber" name="129" href="#129">129</a> <em class="jxr_javadoccomment"> * {@link org.apache.hadoop.hbase.regionserver.HStore} compact</em> +<a class="jxr_linenumber" name="130" href="#130">130</a> <em class="jxr_javadoccomment"> * selection algorithm only compacts it due to a timed major. If you don't, your</em> +<a class="jxr_linenumber" name="131" href="#131">131</a> <em class="jxr_javadoccomment"> * cluster can be prone to compaction storms as the algorithm decides to run</em> +<a class="jxr_linenumber" name="132" href="#132">132</a> <em class="jxr_javadoccomment"> * major compactions on a large series of regions all at once. Note that</em> +<a class="jxr_linenumber" name="133" href="#133">133</a> <em class="jxr_javadoccomment"> * compaction storms are due to the uniform data growth, not the manual split</em> +<a class="jxr_linenumber" name="134" href="#134">134</a> <em class="jxr_javadoccomment"> * decision.</em> +<a class="jxr_linenumber" name="135" href="#135">135</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="136" href="#136">136</a> <em class="jxr_javadoccomment"> * If you pre-split your regions too thin, you can increase the major compaction</em> +<a class="jxr_linenumber" name="137" href="#137">137</a> <em class="jxr_javadoccomment"> * interval by configuring HConstants.MAJOR_COMPACTION_PERIOD. If your data size</em> +<a class="jxr_linenumber" name="138" href="#138">138</a> <em class="jxr_javadoccomment"> * grows too large, use this script to perform a network IO safe rolling split</em> +<a class="jxr_linenumber" name="139" href="#139">139</a> <em class="jxr_javadoccomment"> * of all regions.</em> +<a class="jxr_linenumber" name="140" href="#140">140</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="141" href="#141">141</a> @InterfaceAudience.Private +<a class="jxr_linenumber" name="142" href="#142">142</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../../org/apache/hadoop/hbase/util/RegionSplitter.html">RegionSplitter</a> { +<a class="jxr_linenumber" name="143" href="#143">143</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">final</strong> Log LOG = LogFactory.getLog(RegionSplitter.<strong class="jxr_keyword">class</strong>); +<a class="jxr_linenumber" name="144" href="#144">144</a> +<a class="jxr_linenumber" name="145" href="#145">145</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="146" href="#146">146</a> <em class="jxr_javadoccomment"> * A generic interface for the RegionSplitter code to use for all it's</em> +<a class="jxr_linenumber" name="147" href="#147">147</a> <em class="jxr_javadoccomment"> * functionality. Note that the original authors of this code use</em> +<a class="jxr_linenumber" name="148" href="#148">148</a> <em class="jxr_javadoccomment"> * {@link HexStringSplit} to partition their table and set it as default, but</em> +<a class="jxr_linenumber" name="149" href="#149">149</a> <em class="jxr_javadoccomment"> * provided this for your custom algorithm. To use, create a new derived class</em> +<a class="jxr_linenumber" name="150" href="#150">150</a> <em class="jxr_javadoccomment"> * from this interface and call {@link RegionSplitter#createPresplitTable} or</em> +<a class="jxr_linenumber" name="151" href="#151">151</a> <em class="jxr_javadoccomment"> * RegionSplitter#rollingSplit(TableName, SplitAlgorithm, Configuration) with the</em> +<a class="jxr_linenumber" name="152" href="#152">152</a> <em class="jxr_javadoccomment"> * argument splitClassName giving the name of your class.</em> +<a class="jxr_linenumber" name="153" href="#153">153</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="154" href="#154">154</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">interface</strong> <a href="../../../../../org/apache/hadoop/hbase/util/RegionSplitter.html">SplitAlgorithm</a> { +<a class="jxr_linenumber" name="155" href="#155">155</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="156" href="#156">156</a> <em class="jxr_javadoccomment"> * Split a pre-existing region into 2 regions.</em> +<a class="jxr_linenumber" name="157" href="#157">157</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="158" href="#158">158</a> <em class="jxr_javadoccomment"> * @param start</em> +<a class="jxr_linenumber" name="159" href="#159">159</a> <em class="jxr_javadoccomment"> * first row (inclusive)</em> +<a class="jxr_linenumber" name="160" href="#160">160</a> <em class="jxr_javadoccomment"> * @param end</em> +<a class="jxr_linenumber" name="161" href="#161">161</a> <em class="jxr_javadoccomment"> * last row (exclusive)</em> +<a class="jxr_linenumber" name="162" href="#162">162</a> <em class="jxr_javadoccomment"> * @return the split row to use</em> +<a class="jxr_linenumber" name="163" href="#163">163</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="164" href="#164">164</a> byte[] split(byte[] start, byte[] end); +<a class="jxr_linenumber" name="165" href="#165">165</a> +<a class="jxr_linenumber" name="166" href="#166">166</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="167" href="#167">167</a> <em class="jxr_javadoccomment"> * Split an entire table.</em> +<a class="jxr_linenumber" name="168" href="#168">168</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="169" href="#169">169</a> <em class="jxr_javadoccomment"> * @param numRegions</em> +<a class="jxr_linenumber" name="170" href="#170">170</a> <em class="jxr_javadoccomment"> * number of regions to split the table into</em> +<a class="jxr_linenumber" name="171" href="#171">171</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="172" href="#172">172</a> <em class="jxr_javadoccomment"> * @throws RuntimeException</em> +<a class="jxr_linenumber" name="173" href="#173">173</a> <em class="jxr_javadoccomment"> * user input is validated at this time. may throw a runtime</em> +<a class="jxr_linenumber" name="174" href="#174">174</a> <em class="jxr_javadoccomment"> * exception in response to a parse failure</em> +<a class="jxr_linenumber" name="175" href="#175">175</a> <em class="jxr_javadoccomment"> * @return array of split keys for the initial regions of the table. The</em> +<a class="jxr_linenumber" name="176" href="#176">176</a> <em class="jxr_javadoccomment"> * length of the returned array should be numRegions-1.</em> +<a class="jxr_linenumber" name="177" href="#177">177</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="178" href="#178">178</a> byte[][] split(<strong class="jxr_keyword">int</strong> numRegions); +<a class="jxr_linenumber" name="179" href="#179">179</a> +<a class="jxr_linenumber" name="180" href="#180">180</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="181" href="#181">181</a> <em class="jxr_javadoccomment"> * In HBase, the first row is represented by an empty byte array. This might</em> +<a class="jxr_linenumber" name="182" href="#182">182</a> <em class="jxr_javadoccomment"> * cause problems with your split algorithm or row printing. All your APIs</em> +<a class="jxr_linenumber" name="183" href="#183">183</a> <em class="jxr_javadoccomment"> * will be passed firstRow() instead of empty array.</em> +<a class="jxr_linenumber" name="184" href="#184">184</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="185" href="#185">185</a> <em class="jxr_javadoccomment"> * @return your representation of your first row</em> +<a class="jxr_linenumber" name="186" href="#186">186</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="187" href="#187">187</a> byte[] firstRow(); +<a class="jxr_linenumber" name="188" href="#188">188</a> +<a class="jxr_linenumber" name="189" href="#189">189</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="190" href="#190">190</a> <em class="jxr_javadoccomment"> * In HBase, the last row is represented by an empty byte array. This might</em> +<a class="jxr_linenumber" name="191" href="#191">191</a> <em class="jxr_javadoccomment"> * cause problems with your split algorithm or row printing. All your APIs</em> +<a class="jxr_linenumber" name="192" href="#192">192</a> <em class="jxr_javadoccomment"> * will be passed firstRow() instead of empty array.</em> +<a class="jxr_linenumber" name="193" href="#193">193</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="194" href="#194">194</a> <em class="jxr_javadoccomment"> * @return your representation of your last row</em> +<a class="jxr_linenumber" name="195" href="#195">195</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="196" href="#196">196</a> byte[] lastRow(); +<a class="jxr_linenumber" name="197" href="#197">197</a> +<a class="jxr_linenumber" name="198" href="#198">198</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="199" href="#199">199</a> <em class="jxr_javadoccomment"> * In HBase, the last row is represented by an empty byte array. Set this</em> +<a class="jxr_linenumber" name="200" href="#200">200</a> <em class="jxr_javadoccomment"> * value to help the split code understand how to evenly divide the first</em> +<a class="jxr_linenumber" name="201" href="#201">201</a> <em class="jxr_javadoccomment"> * region.</em> +<a class="jxr_linenumber" name="202" href="#202">202</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="203" href="#203">203</a> <em class="jxr_javadoccomment"> * @param userInput</em> +<a class="jxr_linenumber" name="204" href="#204">204</a> <em class="jxr_javadoccomment"> * raw user input (may throw RuntimeException on parse failure)</em> +<a class="jxr_linenumber" name="205" href="#205">205</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="206" href="#206">206</a> <strong class="jxr_keyword">void</strong> setFirstRow(String userInput); +<a class="jxr_linenumber" name="207" href="#207">207</a> +<a class="jxr_linenumber" name="208" href="#208">208</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="209" href="#209">209</a> <em class="jxr_javadoccomment"> * In HBase, the last row is represented by an empty byte array. Set this</em> +<a class="jxr_linenumber" name="210" href="#210">210</a> <em class="jxr_javadoccomment"> * value to help the split code understand how to evenly divide the last</em> +<a class="jxr_linenumber" name="211" href="#211">211</a> <em class="jxr_javadoccomment"> * region. Note that this last row is inclusive for all rows sharing the</em> +<a class="jxr_linenumber" name="212" href="#212">212</a> <em class="jxr_javadoccomment"> * same prefix.</em> +<a class="jxr_linenumber" name="213" href="#213">213</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="214" href="#214">214</a> <em class="jxr_javadoccomment"> * @param userInput</em> +<a class="jxr_linenumber" name="215" href="#215">215</a> <em class="jxr_javadoccomment"> * raw user input (may throw RuntimeException on parse failure)</em> +<a class="jxr_linenumber" name="216" href="#216">216</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="217" href="#217">217</a> <strong class="jxr_keyword">void</strong> setLastRow(String userInput); +<a class="jxr_linenumber" name="218" href="#218">218</a> +<a class="jxr_linenumber" name="219" href="#219">219</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="220" href="#220">220</a> <em class="jxr_javadoccomment"> * @param input</em> +<a class="jxr_linenumber" name="221" href="#221">221</a> <em class="jxr_javadoccomment"> * user or file input for row</em> +<a class="jxr_linenumber" name="222" href="#222">222</a> <em class="jxr_javadoccomment"> * @return byte array representation of this row for HBase</em> +<a class="jxr_linenumber" name="223" href="#223">223</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="224" href="#224">224</a> byte[] strToRow(String input); +<a class="jxr_linenumber" name="225" href="#225">225</a> +<a class="jxr_linenumber" name="226" href="#226">226</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="227" href="#227">227</a> <em class="jxr_javadoccomment"> * @param row</em> +<a class="jxr_linenumber" name="228" href="#228">228</a> <em class="jxr_javadoccomment"> * byte array representing a row in HBase</em> +<a class="jxr_linenumber" name="229" href="#229">229</a> <em class="jxr_javadoccomment"> * @return String to use for debug &amp; file printing</em> +<a class="jxr_linenumber" name="230" href="#230">230</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="231" href="#231">231</a> String rowToStr(byte[] row); +<a class="jxr_linenumber" name="232" href="#232">232</a> +<a class="jxr_linenumber" name="233" href="#233">233</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="234" href="#234">234</a> <em class="jxr_javadoccomment"> * @return the separator character to use when storing / printing the row</em> +<a class="jxr_linenumber" name="235" href="#235">235</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="236" href="#236">236</a> String separator(); +<a class="jxr_linenumber" name="237" href="#237">237</a> +<a class="jxr_linenumber" name="238" href="#238">238</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="239" href="#239">239</a> <em class="jxr_javadoccomment"> * Set the first row</em> +<a class="jxr_linenumber" name="240" href="#240">240</a> <em class="jxr_javadoccomment"> * @param userInput byte array of the row key.</em> +<a class="jxr_linenumber" name="241" href="#241">241</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="242" href="#242">242</a> <strong class="jxr_keyword">void</strong> setFirstRow(byte[] userInput); +<a class="jxr_linenumber" name="243" href="#243">243</a> +<a class="jxr_linenumber" name="244" href="#244">244</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="245" href="#245">245</a> <em class="jxr_javadoccomment"> * Set the last row</em> +<a class="jxr_linenumber" name="246" href="#246">246</a> <em class="jxr_javadoccomment"> * @param userInput byte array of the row key.</em> +<a class="jxr_linenumber" name="247" href="#247">247</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="248" href="#248">248</a> <strong class="jxr_keyword">void</strong> setLastRow(byte[] userInput); +<a class="jxr_linenumber" name="249" href="#249">249</a> } +<a class="jxr_linenumber" name="250" href="#250">250</a> +<a class="jxr_linenumber" name="251" href="#251">251</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="252" href="#252">252</a> <em class="jxr_javadoccomment"> * The main function for the RegionSplitter application. Common uses:</em> +<a class="jxr_linenumber" name="253" href="#253">253</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="254" href="#254">254</a> <em class="jxr_javadoccomment"> * <ul></em> +<a class="jxr_linenumber" name="255" href="#255">255</a> <em class="jxr_javadoccomment"> * <li>create a table named 'myTable' with 60 pre-split regions containing 2</em> +<a class="jxr_linenumber" name="256" href="#256">256</a> <em class="jxr_javadoccomment"> * column families 'test' &amp; 'rs', assuming the keys are hex-encoded ASCII:</em> +<a class="jxr_linenumber" name="257" href="#257">257</a> <em class="jxr_javadoccomment"> * <ul></em> +<a class="jxr_linenumber" name="258" href="#258">258</a> <em class="jxr_javadoccomment"> * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs</em> +<a class="jxr_linenumber" name="259" href="#259">259</a> <em class="jxr_javadoccomment"> * myTable HexStringSplit</em> +<a class="jxr_linenumber" name="260" href="#260">260</a> <em class="jxr_javadoccomment"> * </ul></em> +<a class="jxr_linenumber" name="261" href="#261">261</a> <em class="jxr_javadoccomment"> * <li>perform a rolling split of 'myTable' (i.e. 60 =&gt; 120 regions), # 2</em> +<a class="jxr_linenumber" name="262" href="#262">262</a> <em class="jxr_javadoccomment"> * outstanding splits at a time, assuming keys are uniformly distributed</em> +<a class="jxr_linenumber" name="263" href="#263">263</a> <em class="jxr_javadoccomment"> * bytes:</em> +<a class="jxr_linenumber" name="264" href="#264">264</a> <em class="jxr_javadoccomment"> * <ul></em> +<a class="jxr_linenumber" name="265" href="#265">265</a> <em class="jxr_javadoccomment"> * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -r -o 2 myTable</em> +<a class="jxr_linenumber" name="266" href="#266">266</a> <em class="jxr_javadoccomment"> * UniformSplit</em> +<a class="jxr_linenumber" name="267" href="#267">267</a> <em class="jxr_javadoccomment"> * </ul></em> <a class="jxr_linenumber" name="268" href="#268">268</a> <em class="jxr_javadoccomment"> * </ul></em> -<a class="jxr_linenumber" name="269" href="#269">269</a> <em class="jxr_javadoccomment"> * </ul></em> -<a class="jxr_linenumber" name="270" href="#270">270</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="271" href="#271">271</a> <em class="jxr_javadoccomment"> * There are two SplitAlgorithms built into RegionSplitter, HexStringSplit</em> -<a class="jxr_linenumber" name="272" href="#272">272</a> <em class="jxr_javadoccomment"> * and UniformSplit. These are different strategies for choosing region</em> -<a class="jxr_linenumber" name="273" href="#273">273</a> <em class="jxr_javadoccomment"> * boundaries. See their source code for details.</em> -<a class="jxr_linenumber" name="274" href="#274">274</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="275" href="#275">275</a> <em class="jxr_javadoccomment"> * @param args</em> -<a class="jxr_linenumber" name="276" href="#276">276</a> <em class="jxr_javadoccomment"> * Usage: RegionSplitter &lt;TABLE&gt; &lt;SPLITALGORITHM&gt;</em> -<a class="jxr_linenumber" name="277" href="#277">277</a> <em class="jxr_javadoccomment"> * &lt;-c &lt;# regions&gt; -f &lt;family:family:...&gt; | -r</em> -<a class="jxr_linenumber" name="278" href="#278">278</a> <em class="jxr_javadoccomment"> * [-o &lt;# outstanding splits&gt;]&gt;</em> -<a class="jxr_linenumber" name="279" href="#279">279</a> <em class="jxr_javadoccomment"> * [-D &lt;conf.param=value&gt;]</em> -<a class="jxr_linenumber" name="280" href="#280">280</a> <em class="jxr_javadoccomment"> * @throws IOException</em> -<a class="jxr_linenumber" name="281" href="#281">281</a> <em class="jxr_javadoccomment"> * HBase IO problem</em> -<a class="jxr_linenumber" name="282" href="#282">282</a> <em class="jxr_javadoccomment"> * @throws InterruptedException</em> -<a class="jxr_linenumber" name="283" href="#283">283</a> <em class="jxr_javadoccomment"> * user requested exit</em> -<a class="jxr_linenumber" name="284" href="#284">284</a> <em class="jxr_javadoccomment"> * @throws ParseException</em> -<a class="jxr_linenumber" name="285" href="#285">285</a> <em class="jxr_javadoccomment"> * problem parsing user input</em> -<a class="jxr_linenumber" name="286" href="#286">286</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="287" href="#287">287</a> @SuppressWarnings(<span class="jxr_string">"static-access"</span>) -<a class="jxr_linenumber" name="288" href="#288">288</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">void</strong> main(String[] args) <strong class="jxr_keyword">throws</strong> IOException, -<a class="jxr_linenumber" name="289" href="#289">289</a> InterruptedException, ParseException { -<a class="jxr_linenumber" name="290" href="#290">290</a> Configuration conf = HBaseConfiguration.create(); -<a class="jxr_linenumber" name="291" href="#291">291</a> -<a class="jxr_linenumber" name="292" href="#292">292</a> <em class="jxr_comment">// parse user input</em> -<a class="jxr_linenumber" name="293" href="#293">293</a> Options opt = <strong class="jxr_keyword">new</strong> Options(); -<a class="jxr_linenumber" name="294" href="#294">294</a> opt.addOption(OptionBuilder.withArgName(<span class="jxr_string">"property=value"</span>).hasArg() -<a class="jxr_linenumber" name="295" href="#295">295</a> .withDescription(<span class="jxr_string">"Override HBase Configuration Settings"</span>).create(<span class="jxr_string">"D"</span>)); -<a class="jxr_linenumber" name="296" href="#296">296</a> opt.addOption(OptionBuilder.withArgName(<span class="jxr_string">"region count"</span>).hasArg() -<a class="jxr_linenumber" name="297" href="#297">297</a> .withDescription( -<a class="jxr_linenumber" name="298" href="#298">298</a> <span class="jxr_string">"Create a new table with a pre-split number of regions"</span>) -<a class="jxr_linenumber" name="299" href="#299">299</a> .create(<span class="jxr_string">"c"</span>)); -<a class="jxr_linenumber" name="300" href="#300">300</a> opt.addOption(OptionBuilder.withArgName(<span class="jxr_string">"family:family:..."</span>).hasArg() -<a class="jxr_linenumber" name="301" href="#301">301</a> .withDescription( -<a class="jxr_linenumber" name="302" href="#302">302</a> <span class="jxr_string">"Column Families to create with new table. Required with -c"</span>) -<a class="jxr_linenumber" name="303" href="#303">303</a> .create(<span class="jxr_string">"f"</span>)); -<a class="jxr_linenumber" name="304" href="#304">304</a> opt.addOption(<span class="jxr_string">"h"</span>, false, <span class="jxr_string">"Print this usage help"</span>); -<a class="jxr_linenumber" name="305" href="#305">305</a> opt.addOption(<span class="jxr_string">"r"</span>, false, <span class="jxr_string">"Perform a rolling split of an existing region"</span>); -<a class="jxr_linenumber" name="306" href="#306">306</a> opt.addOption(OptionBuilder.withArgName(<span class="jxr_string">"count"</span>).hasArg().withDescription( -<a class="jxr_linenumber" name="307" href="#307">307</a> <span class="jxr_string">"Max outstanding splits that have unfinished major compactions"</span>) -<a class="jxr_linenumber" name="308" href="#308">308</a> .create(<span class="jxr_string">"o"</span>)); -<a class="jxr_linenumber" name="309" href="#309">309</a> opt.addOption(<strong class="jxr_keyword">null</strong>, <span class="jxr_string">"firstrow"</span>, <strong class="jxr_keyword">true</strong>, -<a class="jxr_linenumber" name="310" href="#310">310</a> <span class="jxr_string">"First Row in Table for Split Algorithm"</span>); -<a class="jxr_linenumber" name="311" href="#311">311</a> opt.addOption(<strong class="jxr_keyword">null</strong>, <span class="jxr_string">"lastrow"</span>, <strong class="jxr_keyword">true</strong>, -<a class="jxr_linenumber" name="312" href="#312">312</a> <span class="jxr_string">"Last Row in Table for Split Algorithm"</span>); -<a class="jxr_linenumber" name="313" href="#313">313</a> opt.addOption(<strong class="jxr_keyword">null</strong>, <span class="jxr_string">"risky"</span>, false, -<a class="jxr_linenumber" name="314" href="#314">314</a> <span class="jxr_string">"Skip verification steps to complete quickly."</span> -<a class="jxr_linenumber" name="315" href="#315">315</a> + <span class="jxr_string">"STRONGLY DISCOURAGED for production systems. "</span>); -<a class="jxr_linenumber" name="316" href="#316">316</a> CommandLine cmd = <strong class="jxr_keyword">new</strong> GnuParser().parse(opt, args); -<a class="jxr_linenumber" name="317" href="#317">317</a> -<a class="jxr_linenumber" name="318" href="#318">318</a> <strong class="jxr_keyword">if</strong> (cmd.hasOption(<span class="jxr_string">"D"</span>)) { -<a class="jxr_linenumber" name="319" href="#319">319</a> <strong class="jxr_keyword">for</strong> (String confOpt : cmd.getOptionValues(<span class="jxr_string">"D"</span>)) { -<a class="jxr_linenumber" name="320" href="#320">320</a> String[] kv = confOpt.split(<span class="jxr_string">"="</span>, 2); -<a class="jxr_linenumber" name="321" href="#321">321</a> <strong class="jxr_keyword">if</strong> (kv.length == 2) { -<a class="jxr_linenumber" name="322" href="#322">322</a> conf.set(kv[0], kv[1]); -<a class="jxr_linenumber" name="323" href="#323">323</a> LOG.debug(<span class="jxr_string">"-D configuration override: "</span> + kv[0] + <span class="jxr_string">"="</span> + kv[1]); -<a class="jxr_linenumber" name="324" href="#324">324</a> } <strong class="jxr_keyword">else</strong> { -<a class="jxr_linenumber" name="325" href="#325">325</a> <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> ParseException(<span class="jxr_string">"-D option format invalid: "</span> + confOpt); -<a class="jxr_linenumber" name="326" href="#326">326</a> } -<a class="jxr_linenumber" name="327" href="#327">327</a> } -<a class="jxr_linenumber" name="328" href="#328">328</a> } -<a class="jxr_linenumber" name="329" href="#329">329</a> -<a class="jxr_linenumber" name="330" href="#330">330</a> <strong class="jxr_keyword">if</strong> (cmd.hasOption(<span class="jxr_string">"risky"</span>)) { -<a class="jxr_linenumber" name="331" href="#331">331</a> conf.setBoolean(<span class="jxr_string">"split.verify"</span>, false); -<a class="jxr_linenumber" name="332" href="#332">332</a> } -<a class="jxr_linenumber" name="333" href="#333">333</a> -<a class="jxr_linenumber" name="334" href="#334">334</a> <strong class="jxr_keyword">boolean</strong> createTable = cmd.hasOption(<span class="jxr_string">"c"</span>) && cmd.hasOption(<span class="jxr_string">"f"</span>); -<a class="jxr_linenumber" name="335" href="#335">335</a> <strong class="jxr_keyword">boolean</strong> rollingSplit = cmd.hasOption(<span class="jxr_string">"r"</span>); -<a class="jxr_linenumber" name="336" href="#336">336</a> <strong class="jxr_keyword">boolean</strong> oneOperOnly = createTable ^ rollingSplit; -<a class="jxr_linenumber" name="337" href="#337">337</a> -<a class="jxr_linenumber" name="338" href="#338">338</a> <strong class="jxr_keyword">if</strong> (2 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption(<span class="jxr_string">"h"</span>)) { -<a class="jxr_linenumber" name="339" href="#339">339</a> <strong class="jxr_keyword">new</strong> HelpFormatter().printHelp(<span class="jxr_string">"RegionSplitter <TABLE> <SPLITALGORITHM>\n"</span>+ -<a class="jxr_linenumber" name="340" href="#340">340</a> <span class="jxr_string">"SPLITALGORITHM is a java class name of a class implementing "</span> + -<a class="jxr_linenumber" name="341" href="#341">341</a> <span class="jxr_string">"SplitAlgorithm, or one of the special strings HexStringSplit "</span> + -<a class="jxr_linenumber" name="342" href="#342">342</a> <span class="jxr_string">"or UniformSplit, which are built-in split algorithms. "</span> + -<a class="jxr_linenumber" name="343" href="#343">343</a> <span class="jxr_string">"HexStringSplit treats keys as hexadecimal ASCII, and "</span> + -<a class="jxr_linenumber" name="344" href="#344">344</a> <span class="jxr_string">"UniformSplit treats keys as arbitrary bytes."</span>, opt); -<a class="jxr_linenumber" name="345" href="#345">345</a> <strong class="jxr_keyword">return</strong>; -<a class="jxr_linenumber" name="346" href="#346">346</a> } -<a class="jxr_linenumber" name="347" href="#347">347</a> <a href="../../../../../org/apache/hadoop/hbase/TableName.html">TableName</a> tableName = TableName.valueOf(cmd.getArgs()[0]); -<a class="jxr_linenumber" name="348" href="#348">348</a> String splitClass = cmd.getArgs()[1]; -<a class="jxr_linenumber" name="349" href="#349">349</a> <a href="../../../../../org/apache/hadoop/hbase/util/RegionSplitter.html">SplitAlgorithm</a> splitAlgo = newSplitAlgoInstance(conf, splitClass); -<a class="jxr_linenumber" name="350" href="#350">350</a> -<a class="jxr_linenumber" name="351" href="#351">351</a> <strong class="jxr_keyword">if</strong> (cmd.hasOption(<span class="jxr_string">"firstrow"</span>)) { -<a class="jxr_linenumber" name="352" href="#352">352</a> splitAlgo.setFirstRow(cmd.getOptionValue(<span class="jxr_string">"firstrow"</span>)); -<a class="jxr_linenumber" name="353" href="#353">353</a> } -<a class="jxr_linenumber" name="354" href="#354">354</a> <strong class="jxr_keyword">if</strong> (cmd.hasOption(<span class="jxr_string">"lastrow"</span>)) { -<a class="jxr_linenumber" name="355" href="#355">355</a> splitAlgo.setLastRow(cmd.getOptionValue(<span class="jxr_string">"lastrow"</span>)); -<a class="jxr_linenumber" name="356" href="#356">356</a> } -<a class="jxr_linenumber" name="357" href="#357">357</a> -<a class="jxr_linenumber" name="358" href="#358">358</a> <strong class="jxr_keyword">if</strong> (createTable) { -<a class="jxr_linenumber" name="359" href="#359">359</a> conf.set(<span class="jxr_string">"split.count"</span>, cmd.getOptionValue(<span class="jxr_string">"c"</span>)); -<a class="jxr_linenumber" name="360" href="#360">360</a> createPresplitTable(tableName, splitAlgo, cmd.getOptionValue(<span class="jxr_string">"f"</span>).split(<span class="jxr_string">":"</span>), conf); -<a class="jxr_linenumber" name="361" href="#361">361</a> } -<a class="jxr_linenumber" name="362" href="#362">362</a> -<a class="jxr_linenumber" name="363" href="#363">363</a> <strong class="jxr_keyword">if</strong> (rollingSplit) { -<a class="jxr_linenumber" name="364" href="#364">364</a> <strong class="jxr_keyword">if</strong> (cmd.hasOption(<span class="jxr_string">"o"</span>)) { -<a class="jxr_linenumber" name="365" href="#365">365</a> conf.set(<span class="jxr_string">"split.outstanding"</span>, cmd.getOptionValue(<span class="jxr_string">"o"</span>)); -<a class="jxr_linenumber" name="366" href="#366">366</a> } -<a class="jxr_linenumber" name="367" href="#367">367</a> rollingSplit(tableName, splitAlgo, conf); -<a class="jxr_linenumber" name="368" href="#368">368</a> } -<a class="jxr_linenumber" name="369" href="#369">369</a> } -<a class="jxr_linenumber" name="370" href="#370">370</a> -<a class="jxr_linenumber" name="371" href="#371">371</a> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">void</strong> createPresplitTable(<a href="../../../../../org/apache/hadoop/hbase/TableName.html">TableName</a> tableName, <a href="../../../../../org/apache/hadoop/hbase/util/RegionSplitter.html">SplitAlgorithm</a> splitAlgo, -<a class="jxr_linenumber" name="372" href="#372">372</a> String[] columnFamilies, Configuration conf) -<a class="jxr_linenumber" name="373" href="#373">373</a> <strong class="jxr_keyword">throws</strong> IOException, InterruptedException { -<a class="jxr_linenumber" name="374" href="#374">374</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> splitCount = conf.getInt(<span class="jxr_string">"split.count"</span>, 0); -<a class="jxr_linenumber" name="375" href="#375">375</a> Preconditions.checkArgument(splitCount > 1, <span class="jxr_string">"Split count must be > 1"</span>); -<a class="jxr_linenumber" name="376" href="#376">376</a> -<a class="jxr_linenumber" name="377" href="#377">377</a> Preconditions.checkArgument(columnFamilies.length > 0, -<a class="jxr_linenumber" name="378" href="#378">378</a> <span class="jxr_string">"Must specify at least one column family. "</span>); -<a class="jxr_linenumber" name="379" href="#379">379</a> LOG.debug(<span class="jxr_string">"Creating table "</span> + tableName + <span class="jxr_string">" with "</span> + columnFamilies.length -<a class="jxr_linenumber" name="380" href="#380">380</a> + <span class="jxr_string">" column families. Presplitting to "</span> + splitCount + <span class="jxr_string">" regions"</span>); -<a class="jxr_linenumber" name="381" href="#381">381</a> -<a class="jxr_linenumber" name="382" href="#382">382</a> <a href="../../../../../org/apache/hadoop/hbase/HTableDescriptor.html">HTableDescriptor</a> desc = <strong class="jxr_keyword">new</strong> <a href="../../../../../org/apache/hadoop/hbase/HTableDescriptor.html">HTableDescriptor</a>(tableName); -<a class="jxr_linenumber" name="383" href="#383">383</a> <strong class="jxr_keyword">for</strong> (String cf : columnFamilies) { -<a class="jxr_linenumber" name="384" href="#384">384</a> desc.addFamily(<strong class="jxr_keyword">new</strong> <a href="../../../../../org/apache/hadoop/hbase/HColumnDescriptor.html">HColumnDescriptor</a>(Bytes.toBytes(cf))); -<a class="jxr_linenumber" name="385" href="#385">385</a> } -<a class="jxr_linenumber" name="386" href="#386">386</a> <strong class="jxr_keyword">try</strong> (Connection connection = ConnectionFactory.createConnection(conf)) { -<a class="jxr_linenumber" name="387" href="#387">387</a> <a href="../../../../../org/apache/hadoop/hbase/client/Admin.html">Admin</a> admin = connection.getAdmin(); -<a class="jxr_linenumber" name="388" href="#388">388</a> <strong class="jxr_keyword">try</strong> { -<a class="jxr_linenumber" name="389" href="#389">389</a> Preconditions.checkArgument(!admin.tableExists(tableName), -<a class="jxr_linenumber" name="390" href="#390">390</a> <span class="jxr_string">"Table already exists: "</span> + tableName); -<a class="jxr_linenumber" name="391" href="#391">391</a> admin.createTable(desc, splitAlgo.split(splitCount)); -<a class="jxr_linenumber" name="392" href="#392">392</a> } <strong class="jxr_keyword">finally</strong> { -<a class="jxr_linenumber" name="393" href="#393">393</a> admin.close(); -<a class="jxr_linenumber" name="394" href="#394">394</a> } -<a class="jxr_linenumber" name="395" href="#395">395</a> LOG.debug(<span class="jxr_string">"Table created! Waiting for regions to show online in META..."</span>); -<a class="jxr_linenumber" name="396" href="#396">396</a> <strong class="jxr_keyword">if</strong> (!conf.getBoolean(<span class="jxr_string">"split.verify"</span>, <strong class="jxr_keyword">true</strong>)) { -<a class="jxr_linenumber" name="397" href="#397">397</a> <em class="jxr_comment">// NOTE: createTable is synchronous on the table, but not on the regions</em> -<a class="jxr_linenumber" name="398" href="#398">398</a> <strong class="jxr_keyword">int</strong> onlineRegions = 0; -<a class="jxr_linenumber" name="399" href="#399">399</a> <strong class="jxr_keyword">while</strong> (onlineRegions < splitCount) { -<a class="jxr_linenumber" name="400" href="#400">400</a> onlineRegions = MetaTableAccessor.getRegionCount(connection, tableName); -<a class="jxr_linenumber" name="401" href="#401">401</a> LOG.debug(onlineRegions + <span class="jxr_string">" of "</span> + splitCount + <span class="jxr_string">" regions online..."</span>); -<a class="jxr_linenumber" name="402" href="#402">402</a> <strong class="jxr_keyword">if</strong> (onlineRegions < splitCount) { -<a class="jxr_linenumber" name="403" href="#403">403</a> Thread.sleep(10 * 1000); <em class="jxr_comment">// sleep</em> -<a class="jxr_linenumber" name="404" href="#404">404</a> } -<a class="jxr_linenumber" name="405" href="#405">405</a> } -<a class="jxr_linenumber" name="406" href="#406">406</a> } -<a class="jxr_linenumber" name="407" href="#407">407</a> LOG.debug(<span class="jxr_string">"Finished creating table with "</span> + splitCount + <span class="jxr_string">" regions"</span>); -<a class="jxr_linenumber" name="408" href="#408">408</a> } -<a class="jxr_linenumber" name="409" href="#409">409</a> } -<a class="jxr_linenumber" name="410" href="#410">410</a> -<a class="jxr_linenumber" name="411" href="#411">411</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="412" href="#412">412</a> <em class="jxr_javadoccomment"> * Alternative getCurrentNrHRS which is no longer available.</em> -<a class="jxr_linenumber" name="413" href="#413">413</a> <em class="jxr_javadoccomment"> * @param connection</em> -<a class="jxr_linenumber" name="414" href="#414">414</a> <em class="jxr_javadoccomment"> * @return Rough count of regionservers out on cluster.</em> -<a class="jxr_linenumber" name="415" href="#415">415</a> <em class="jxr_javadoccomment"> * @throws IOException </em> -<a class="jxr_linenumber" name="416" href="#416">416</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="417" href="#417">417</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong> getRegionServerCount(<strong class="jxr_keyword">final</strong> <a href="../../../../../org/apache/hadoop/hbase/client/Connection.html">Connection</a> connection) <strong class="jxr_keyword">throws</strong> IOException { -<a class="jxr_linenumber" name="418" href="#418">418</a> <strong class="jxr_keyword">try</strong> (Admin admin = connection.getAdmin()) { -<a class="jxr_linenumber" name="419" href="#419">419</a> <a href="../../../../../org/apache/hadoop/hbase/ClusterStatus.html">ClusterStatus</a> status = admin.getClusterStatus(); -<a class="jxr_linenumber" name="420" href="#420">420</a> Collection<ServerName> servers = status.getServers(); -<a class="jxr_linenumber" name="421" href="#421">421</a> <strong class="jxr_keyword">return</strong> servers == <strong class="jxr_keyword">null</strong> || servers.isEmpty()? 0: servers.size(); -<a class="jxr_linenumber" name="422" href="#422">422</a> } -<a class="jxr_linenumber" name="423" href="#423">423</a> } -<a class="jxr_linenumber" name="424" href="#424">424</a> -<a class="jxr_linenumber" name="425" href="#425">425</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> byte [] readFile(<strong class="jxr_keyword">final</strong> FileSystem fs, <strong class="jxr_keyword">final</strong> Path path) <strong class="jxr_keyword">throws</strong> IOException { -<a class="jxr_linenumber" name="426" href="#426">426</a> FSDataInputStream tmpIn = fs.open(path); -<a class="jxr_linenumber" name="427" href="#427">427</a> <strong class="jxr_keyword">try</strong> { -<a class="jxr_linenumber" name="428" href="#428">428</a> byte [] rawData = <strong class="jxr_keyword">new</strong> byte[tmpIn.available()]; -<a class="jxr_linenumber" name="429" href="#429">429</a> tmpIn.readFully(rawData); -<a class="jxr_linenumber" name="430" href="#430">430</a> <strong class="jxr_keyword">return</strong> rawData; -<a class="jxr_linenumber" name="431" href="#431">431</a> } <strong class="jxr_keyword">finally</strong> { -<a class="jxr_linenumber" name="432" href="#432">432</a> tmpIn.close(); -<a class="jxr_linenumber" name="433" href="#433">433</a> } -<a class="jxr_linenumber" name="434" href="#434">434</a> } -<a class="jxr_linenumber" name="435" href="#435">435</a> -<a class="jxr_linenumber" name="436" href="#436">436</a> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">void</strong> rollingSplit(<a href="../../../../../org/apache/hadoop/hbase/TableName.html">TableName</a> tableName, <a href="../../../../../org/apache/hadoop/hbase/util/RegionSplitter.html">SplitAlgorithm</a> splitAlgo, Configuration conf) -<a class="jxr_linenumber" name="437" href="#437">437</a> <strong class="jxr_keyword">throws</strong> IOException, InterruptedException { -<a class="jxr_linenumber" name="438" href="#438">438</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> minOS = conf.getInt(<span class="jxr_string">"split.outstanding"</span>, 2); -<a class="jxr_linenumber" name="439" href="#439">439</a> <strong cla <TRUNCATED>