http://git-wip-us.apache.org/repos/asf/hbase-site/blob/c7e84622/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.html
----------------------------------------------------------------------
diff --git 
a/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.html 
b/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.html
index ef7c1e1..a541dfa 100644
--- a/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.html
+++ b/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.html
@@ -26,51 +26,51 @@
 <span class="sourceLineNo">018</span> */<a name="line.18"></a>
 <span class="sourceLineNo">019</span>package 
org.apache.hadoop.hbase.mapreduce;<a name="line.19"></a>
 <span class="sourceLineNo">020</span><a name="line.20"></a>
-<span class="sourceLineNo">021</span>import 
com.google.protobuf.InvalidProtocolBufferException;<a name="line.21"></a>
-<span class="sourceLineNo">022</span>import 
com.codahale.metrics.MetricRegistry;<a name="line.22"></a>
-<span class="sourceLineNo">023</span>import org.apache.commons.logging.Log;<a 
name="line.23"></a>
-<span class="sourceLineNo">024</span>import 
org.apache.commons.logging.LogFactory;<a name="line.24"></a>
-<span class="sourceLineNo">025</span>import 
org.apache.hadoop.conf.Configuration;<a name="line.25"></a>
-<span class="sourceLineNo">026</span>import org.apache.hadoop.fs.FileSystem;<a 
name="line.26"></a>
-<span class="sourceLineNo">027</span>import org.apache.hadoop.fs.Path;<a 
name="line.27"></a>
-<span class="sourceLineNo">028</span>import 
org.apache.hadoop.hbase.HBaseConfiguration;<a name="line.28"></a>
-<span class="sourceLineNo">029</span>import 
org.apache.hadoop.hbase.HConstants;<a name="line.29"></a>
-<span class="sourceLineNo">030</span>import 
org.apache.hadoop.hbase.MetaTableAccessor;<a name="line.30"></a>
-<span class="sourceLineNo">031</span>import 
org.apache.hadoop.hbase.TableName;<a name="line.31"></a>
-<span class="sourceLineNo">032</span>import 
org.apache.hadoop.hbase.classification.InterfaceAudience;<a name="line.32"></a>
-<span class="sourceLineNo">033</span>import 
org.apache.hadoop.hbase.classification.InterfaceStability;<a name="line.33"></a>
-<span class="sourceLineNo">034</span>import 
org.apache.hadoop.hbase.client.Connection;<a name="line.34"></a>
-<span class="sourceLineNo">035</span>import 
org.apache.hadoop.hbase.client.ConnectionFactory;<a name="line.35"></a>
-<span class="sourceLineNo">036</span>import 
org.apache.hadoop.hbase.client.Put;<a name="line.36"></a>
-<span class="sourceLineNo">037</span>import 
org.apache.hadoop.hbase.client.Scan;<a name="line.37"></a>
-<span class="sourceLineNo">038</span>import 
org.apache.hadoop.hbase.io.ImmutableBytesWritable;<a name="line.38"></a>
-<span class="sourceLineNo">039</span>import 
org.apache.hadoop.hbase.protobuf.ProtobufUtil;<a name="line.39"></a>
-<span class="sourceLineNo">040</span>import 
org.apache.hadoop.hbase.protobuf.generated.ClientProtos;<a name="line.40"></a>
-<span class="sourceLineNo">041</span>import 
org.apache.hadoop.hbase.security.User;<a name="line.41"></a>
-<span class="sourceLineNo">042</span>import 
org.apache.hadoop.hbase.security.UserProvider;<a name="line.42"></a>
-<span class="sourceLineNo">043</span>import 
org.apache.hadoop.hbase.security.token.TokenUtil;<a name="line.43"></a>
-<span class="sourceLineNo">044</span>import 
org.apache.hadoop.hbase.util.Base64;<a name="line.44"></a>
-<span class="sourceLineNo">045</span>import 
org.apache.hadoop.hbase.util.Bytes;<a name="line.45"></a>
-<span class="sourceLineNo">046</span>import 
org.apache.hadoop.hbase.zookeeper.ZKConfig;<a name="line.46"></a>
-<span class="sourceLineNo">047</span>import org.apache.hadoop.io.Writable;<a 
name="line.47"></a>
-<span class="sourceLineNo">048</span>import 
org.apache.hadoop.mapreduce.InputFormat;<a name="line.48"></a>
-<span class="sourceLineNo">049</span>import org.apache.hadoop.mapreduce.Job;<a 
name="line.49"></a>
-<span class="sourceLineNo">050</span>import 
org.apache.hadoop.util.StringUtils;<a name="line.50"></a>
-<span class="sourceLineNo">051</span><a name="line.51"></a>
-<span class="sourceLineNo">052</span>import java.io.File;<a name="line.52"></a>
-<span class="sourceLineNo">053</span>import java.io.IOException;<a 
name="line.53"></a>
-<span class="sourceLineNo">054</span>import java.net.URL;<a name="line.54"></a>
-<span class="sourceLineNo">055</span>import java.net.URLDecoder;<a 
name="line.55"></a>
-<span class="sourceLineNo">056</span>import java.util.ArrayList;<a 
name="line.56"></a>
-<span class="sourceLineNo">057</span>import java.util.Collection;<a 
name="line.57"></a>
-<span class="sourceLineNo">058</span>import java.util.Enumeration;<a 
name="line.58"></a>
-<span class="sourceLineNo">059</span>import java.util.HashMap;<a 
name="line.59"></a>
-<span class="sourceLineNo">060</span>import java.util.HashSet;<a 
name="line.60"></a>
-<span class="sourceLineNo">061</span>import java.util.List;<a 
name="line.61"></a>
-<span class="sourceLineNo">062</span>import java.util.Map;<a 
name="line.62"></a>
-<span class="sourceLineNo">063</span>import java.util.Set;<a 
name="line.63"></a>
-<span class="sourceLineNo">064</span>import java.util.zip.ZipEntry;<a 
name="line.64"></a>
-<span class="sourceLineNo">065</span>import java.util.zip.ZipFile;<a 
name="line.65"></a>
+<span class="sourceLineNo">021</span>import java.io.File;<a name="line.21"></a>
+<span class="sourceLineNo">022</span>import java.io.IOException;<a 
name="line.22"></a>
+<span class="sourceLineNo">023</span>import java.net.URL;<a name="line.23"></a>
+<span class="sourceLineNo">024</span>import java.net.URLDecoder;<a 
name="line.24"></a>
+<span class="sourceLineNo">025</span>import java.util.ArrayList;<a 
name="line.25"></a>
+<span class="sourceLineNo">026</span>import java.util.Collection;<a 
name="line.26"></a>
+<span class="sourceLineNo">027</span>import java.util.Enumeration;<a 
name="line.27"></a>
+<span class="sourceLineNo">028</span>import java.util.HashMap;<a 
name="line.28"></a>
+<span class="sourceLineNo">029</span>import java.util.HashSet;<a 
name="line.29"></a>
+<span class="sourceLineNo">030</span>import java.util.List;<a 
name="line.30"></a>
+<span class="sourceLineNo">031</span>import java.util.Map;<a 
name="line.31"></a>
+<span class="sourceLineNo">032</span>import java.util.Set;<a 
name="line.32"></a>
+<span class="sourceLineNo">033</span>import java.util.zip.ZipEntry;<a 
name="line.33"></a>
+<span class="sourceLineNo">034</span>import java.util.zip.ZipFile;<a 
name="line.34"></a>
+<span class="sourceLineNo">035</span><a name="line.35"></a>
+<span class="sourceLineNo">036</span>import org.apache.commons.logging.Log;<a 
name="line.36"></a>
+<span class="sourceLineNo">037</span>import 
org.apache.commons.logging.LogFactory;<a name="line.37"></a>
+<span class="sourceLineNo">038</span>import 
org.apache.hadoop.conf.Configuration;<a name="line.38"></a>
+<span class="sourceLineNo">039</span>import org.apache.hadoop.fs.FileSystem;<a 
name="line.39"></a>
+<span class="sourceLineNo">040</span>import org.apache.hadoop.fs.Path;<a 
name="line.40"></a>
+<span class="sourceLineNo">041</span>import 
org.apache.hadoop.hbase.HBaseConfiguration;<a name="line.41"></a>
+<span class="sourceLineNo">042</span>import 
org.apache.hadoop.hbase.HConstants;<a name="line.42"></a>
+<span class="sourceLineNo">043</span>import 
org.apache.hadoop.hbase.MetaTableAccessor;<a name="line.43"></a>
+<span class="sourceLineNo">044</span>import 
org.apache.hadoop.hbase.TableName;<a name="line.44"></a>
+<span class="sourceLineNo">045</span>import 
org.apache.hadoop.hbase.classification.InterfaceAudience;<a name="line.45"></a>
+<span class="sourceLineNo">046</span>import 
org.apache.hadoop.hbase.classification.InterfaceStability;<a name="line.46"></a>
+<span class="sourceLineNo">047</span>import 
org.apache.hadoop.hbase.client.Connection;<a name="line.47"></a>
+<span class="sourceLineNo">048</span>import 
org.apache.hadoop.hbase.client.ConnectionFactory;<a name="line.48"></a>
+<span class="sourceLineNo">049</span>import 
org.apache.hadoop.hbase.client.Put;<a name="line.49"></a>
+<span class="sourceLineNo">050</span>import 
org.apache.hadoop.hbase.client.Scan;<a name="line.50"></a>
+<span class="sourceLineNo">051</span>import 
org.apache.hadoop.hbase.io.ImmutableBytesWritable;<a name="line.51"></a>
+<span class="sourceLineNo">052</span>import 
org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;<a name="line.52"></a>
+<span class="sourceLineNo">053</span>import 
org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;<a 
name="line.53"></a>
+<span class="sourceLineNo">054</span>import 
org.apache.hadoop.hbase.security.User;<a name="line.54"></a>
+<span class="sourceLineNo">055</span>import 
org.apache.hadoop.hbase.security.UserProvider;<a name="line.55"></a>
+<span class="sourceLineNo">056</span>import 
org.apache.hadoop.hbase.security.token.TokenUtil;<a name="line.56"></a>
+<span class="sourceLineNo">057</span>import 
org.apache.hadoop.hbase.util.Base64;<a name="line.57"></a>
+<span class="sourceLineNo">058</span>import 
org.apache.hadoop.hbase.util.Bytes;<a name="line.58"></a>
+<span class="sourceLineNo">059</span>import 
org.apache.hadoop.hbase.zookeeper.ZKConfig;<a name="line.59"></a>
+<span class="sourceLineNo">060</span>import org.apache.hadoop.io.Writable;<a 
name="line.60"></a>
+<span class="sourceLineNo">061</span>import 
org.apache.hadoop.mapreduce.InputFormat;<a name="line.61"></a>
+<span class="sourceLineNo">062</span>import org.apache.hadoop.mapreduce.Job;<a 
name="line.62"></a>
+<span class="sourceLineNo">063</span>import 
org.apache.hadoop.util.StringUtils;<a name="line.63"></a>
+<span class="sourceLineNo">064</span><a name="line.64"></a>
+<span class="sourceLineNo">065</span>import 
com.codahale.metrics.MetricRegistry;<a name="line.65"></a>
 <span class="sourceLineNo">066</span><a name="line.66"></a>
 <span class="sourceLineNo">067</span>/**<a name="line.67"></a>
 <span class="sourceLineNo">068</span> * Utility for {@link TableMapper} and 
{@link TableReducer}<a name="line.68"></a>
@@ -583,461 +583,454 @@
 <span class="sourceLineNo">575</span>   */<a name="line.575"></a>
 <span class="sourceLineNo">576</span>  public static Scan 
convertStringToScan(String base64) throws IOException {<a name="line.576"></a>
 <span class="sourceLineNo">577</span>    byte [] decoded = 
Base64.decode(base64);<a name="line.577"></a>
-<span class="sourceLineNo">578</span>    ClientProtos.Scan scan;<a 
name="line.578"></a>
-<span class="sourceLineNo">579</span>    try {<a name="line.579"></a>
-<span class="sourceLineNo">580</span>      scan = 
ClientProtos.Scan.parseFrom(decoded);<a name="line.580"></a>
-<span class="sourceLineNo">581</span>    } catch 
(InvalidProtocolBufferException ipbe) {<a name="line.581"></a>
-<span class="sourceLineNo">582</span>      throw new IOException(ipbe);<a 
name="line.582"></a>
-<span class="sourceLineNo">583</span>    }<a name="line.583"></a>
-<span class="sourceLineNo">584</span><a name="line.584"></a>
-<span class="sourceLineNo">585</span>    return ProtobufUtil.toScan(scan);<a 
name="line.585"></a>
-<span class="sourceLineNo">586</span>  }<a name="line.586"></a>
-<span class="sourceLineNo">587</span><a name="line.587"></a>
-<span class="sourceLineNo">588</span>  /**<a name="line.588"></a>
-<span class="sourceLineNo">589</span>   * Use this before submitting a 
TableReduce job. It will<a name="line.589"></a>
-<span class="sourceLineNo">590</span>   * appropriately set up the JobConf.<a 
name="line.590"></a>
-<span class="sourceLineNo">591</span>   *<a name="line.591"></a>
-<span class="sourceLineNo">592</span>   * @param table  The output table.<a 
name="line.592"></a>
-<span class="sourceLineNo">593</span>   * @param reducer  The reducer class to 
use.<a name="line.593"></a>
-<span class="sourceLineNo">594</span>   * @param job  The current job to 
adjust.<a name="line.594"></a>
-<span class="sourceLineNo">595</span>   * @throws IOException When determining 
the region count fails.<a name="line.595"></a>
-<span class="sourceLineNo">596</span>   */<a name="line.596"></a>
-<span class="sourceLineNo">597</span>  public static void 
initTableReducerJob(String table,<a name="line.597"></a>
-<span class="sourceLineNo">598</span>    Class&lt;? extends TableReducer&gt; 
reducer, Job job)<a name="line.598"></a>
-<span class="sourceLineNo">599</span>  throws IOException {<a 
name="line.599"></a>
-<span class="sourceLineNo">600</span>    initTableReducerJob(table, reducer, 
job, null);<a name="line.600"></a>
-<span class="sourceLineNo">601</span>  }<a name="line.601"></a>
-<span class="sourceLineNo">602</span><a name="line.602"></a>
-<span class="sourceLineNo">603</span>  /**<a name="line.603"></a>
-<span class="sourceLineNo">604</span>   * Use this before submitting a 
TableReduce job. It will<a name="line.604"></a>
-<span class="sourceLineNo">605</span>   * appropriately set up the JobConf.<a 
name="line.605"></a>
-<span class="sourceLineNo">606</span>   *<a name="line.606"></a>
-<span class="sourceLineNo">607</span>   * @param table  The output table.<a 
name="line.607"></a>
-<span class="sourceLineNo">608</span>   * @param reducer  The reducer class to 
use.<a name="line.608"></a>
-<span class="sourceLineNo">609</span>   * @param job  The current job to 
adjust.<a name="line.609"></a>
-<span class="sourceLineNo">610</span>   * @param partitioner  Partitioner to 
use. Pass &lt;code&gt;null&lt;/code&gt; to use<a name="line.610"></a>
-<span class="sourceLineNo">611</span>   * default partitioner.<a 
name="line.611"></a>
-<span class="sourceLineNo">612</span>   * @throws IOException When determining 
the region count fails.<a name="line.612"></a>
-<span class="sourceLineNo">613</span>   */<a name="line.613"></a>
-<span class="sourceLineNo">614</span>  public static void 
initTableReducerJob(String table,<a name="line.614"></a>
-<span class="sourceLineNo">615</span>    Class&lt;? extends TableReducer&gt; 
reducer, Job job,<a name="line.615"></a>
-<span class="sourceLineNo">616</span>    Class partitioner) throws IOException 
{<a name="line.616"></a>
-<span class="sourceLineNo">617</span>    initTableReducerJob(table, reducer, 
job, partitioner, null, null, null);<a name="line.617"></a>
-<span class="sourceLineNo">618</span>  }<a name="line.618"></a>
-<span class="sourceLineNo">619</span><a name="line.619"></a>
-<span class="sourceLineNo">620</span>  /**<a name="line.620"></a>
-<span class="sourceLineNo">621</span>   * Use this before submitting a 
TableReduce job. It will<a name="line.621"></a>
-<span class="sourceLineNo">622</span>   * appropriately set up the JobConf.<a 
name="line.622"></a>
-<span class="sourceLineNo">623</span>   *<a name="line.623"></a>
-<span class="sourceLineNo">624</span>   * @param table  The output table.<a 
name="line.624"></a>
-<span class="sourceLineNo">625</span>   * @param reducer  The reducer class to 
use.<a name="line.625"></a>
-<span class="sourceLineNo">626</span>   * @param job  The current job to 
adjust.  Make sure the passed job is<a name="line.626"></a>
-<span class="sourceLineNo">627</span>   * carrying all necessary HBase 
configuration.<a name="line.627"></a>
-<span class="sourceLineNo">628</span>   * @param partitioner  Partitioner to 
use. Pass &lt;code&gt;null&lt;/code&gt; to use<a name="line.628"></a>
-<span class="sourceLineNo">629</span>   * default partitioner.<a 
name="line.629"></a>
-<span class="sourceLineNo">630</span>   * @param quorumAddress Distant cluster 
to write to; default is null for<a name="line.630"></a>
-<span class="sourceLineNo">631</span>   * output to the cluster that is 
designated in &lt;code&gt;hbase-site.xml&lt;/code&gt;.<a name="line.631"></a>
-<span class="sourceLineNo">632</span>   * Set this String to the zookeeper 
ensemble of an alternate remote cluster<a name="line.632"></a>
-<span class="sourceLineNo">633</span>   * when you would have the reduce write 
a cluster that is other than the<a name="line.633"></a>
-<span class="sourceLineNo">634</span>   * default; e.g. copying tables between 
clusters, the source would be<a name="line.634"></a>
-<span class="sourceLineNo">635</span>   * designated by 
&lt;code&gt;hbase-site.xml&lt;/code&gt; and this param would have the<a 
name="line.635"></a>
-<span class="sourceLineNo">636</span>   * ensemble address of the remote 
cluster.  The format to pass is particular.<a name="line.636"></a>
-<span class="sourceLineNo">637</span>   * Pass &lt;code&gt; 
&amp;lt;hbase.zookeeper.quorum&amp;gt;:&amp;lt;<a name="line.637"></a>
-<span class="sourceLineNo">638</span>   *             
hbase.zookeeper.client.port&amp;gt;:&amp;lt;zookeeper.znode.parent&amp;gt;<a 
name="line.638"></a>
-<span class="sourceLineNo">639</span>   * &lt;/code&gt; such as 
&lt;code&gt;server,server2,server3:2181:/hbase&lt;/code&gt;.<a 
name="line.639"></a>
-<span class="sourceLineNo">640</span>   * @param serverClass redefined 
hbase.regionserver.class<a name="line.640"></a>
-<span class="sourceLineNo">641</span>   * @param serverImpl redefined 
hbase.regionserver.impl<a name="line.641"></a>
-<span class="sourceLineNo">642</span>   * @throws IOException When determining 
the region count fails.<a name="line.642"></a>
-<span class="sourceLineNo">643</span>   */<a name="line.643"></a>
-<span class="sourceLineNo">644</span>  public static void 
initTableReducerJob(String table,<a name="line.644"></a>
-<span class="sourceLineNo">645</span>    Class&lt;? extends TableReducer&gt; 
reducer, Job job,<a name="line.645"></a>
-<span class="sourceLineNo">646</span>    Class partitioner, String 
quorumAddress, String serverClass,<a name="line.646"></a>
-<span class="sourceLineNo">647</span>    String serverImpl) throws IOException 
{<a name="line.647"></a>
-<span class="sourceLineNo">648</span>    initTableReducerJob(table, reducer, 
job, partitioner, quorumAddress,<a name="line.648"></a>
-<span class="sourceLineNo">649</span>        serverClass, serverImpl, true);<a 
name="line.649"></a>
-<span class="sourceLineNo">650</span>  }<a name="line.650"></a>
-<span class="sourceLineNo">651</span><a name="line.651"></a>
-<span class="sourceLineNo">652</span>  /**<a name="line.652"></a>
-<span class="sourceLineNo">653</span>   * Use this before submitting a 
TableReduce job. It will<a name="line.653"></a>
-<span class="sourceLineNo">654</span>   * appropriately set up the JobConf.<a 
name="line.654"></a>
-<span class="sourceLineNo">655</span>   *<a name="line.655"></a>
-<span class="sourceLineNo">656</span>   * @param table  The output table.<a 
name="line.656"></a>
-<span class="sourceLineNo">657</span>   * @param reducer  The reducer class to 
use.<a name="line.657"></a>
-<span class="sourceLineNo">658</span>   * @param job  The current job to 
adjust.  Make sure the passed job is<a name="line.658"></a>
-<span class="sourceLineNo">659</span>   * carrying all necessary HBase 
configuration.<a name="line.659"></a>
-<span class="sourceLineNo">660</span>   * @param partitioner  Partitioner to 
use. Pass &lt;code&gt;null&lt;/code&gt; to use<a name="line.660"></a>
-<span class="sourceLineNo">661</span>   * default partitioner.<a 
name="line.661"></a>
-<span class="sourceLineNo">662</span>   * @param quorumAddress Distant cluster 
to write to; default is null for<a name="line.662"></a>
-<span class="sourceLineNo">663</span>   * output to the cluster that is 
designated in &lt;code&gt;hbase-site.xml&lt;/code&gt;.<a name="line.663"></a>
-<span class="sourceLineNo">664</span>   * Set this String to the zookeeper 
ensemble of an alternate remote cluster<a name="line.664"></a>
-<span class="sourceLineNo">665</span>   * when you would have the reduce write 
a cluster that is other than the<a name="line.665"></a>
-<span class="sourceLineNo">666</span>   * default; e.g. copying tables between 
clusters, the source would be<a name="line.666"></a>
-<span class="sourceLineNo">667</span>   * designated by 
&lt;code&gt;hbase-site.xml&lt;/code&gt; and this param would have the<a 
name="line.667"></a>
-<span class="sourceLineNo">668</span>   * ensemble address of the remote 
cluster.  The format to pass is particular.<a name="line.668"></a>
-<span class="sourceLineNo">669</span>   * Pass &lt;code&gt; 
&amp;lt;hbase.zookeeper.quorum&amp;gt;:&amp;lt;<a name="line.669"></a>
-<span class="sourceLineNo">670</span>   *             
hbase.zookeeper.client.port&amp;gt;:&amp;lt;zookeeper.znode.parent&amp;gt;<a 
name="line.670"></a>
-<span class="sourceLineNo">671</span>   * &lt;/code&gt; such as 
&lt;code&gt;server,server2,server3:2181:/hbase&lt;/code&gt;.<a 
name="line.671"></a>
-<span class="sourceLineNo">672</span>   * @param serverClass redefined 
hbase.regionserver.class<a name="line.672"></a>
-<span class="sourceLineNo">673</span>   * @param serverImpl redefined 
hbase.regionserver.impl<a name="line.673"></a>
-<span class="sourceLineNo">674</span>   * @param addDependencyJars upload 
HBase jars and jars for any of the configured<a name="line.674"></a>
-<span class="sourceLineNo">675</span>   *           job classes via the 
distributed cache (tmpjars).<a name="line.675"></a>
-<span class="sourceLineNo">676</span>   * @throws IOException When determining 
the region count fails.<a name="line.676"></a>
-<span class="sourceLineNo">677</span>   */<a name="line.677"></a>
-<span class="sourceLineNo">678</span>  public static void 
initTableReducerJob(String table,<a name="line.678"></a>
-<span class="sourceLineNo">679</span>    Class&lt;? extends TableReducer&gt; 
reducer, Job job,<a name="line.679"></a>
-<span class="sourceLineNo">680</span>    Class partitioner, String 
quorumAddress, String serverClass,<a name="line.680"></a>
-<span class="sourceLineNo">681</span>    String serverImpl, boolean 
addDependencyJars) throws IOException {<a name="line.681"></a>
-<span class="sourceLineNo">682</span><a name="line.682"></a>
-<span class="sourceLineNo">683</span>    Configuration conf = 
job.getConfiguration();<a name="line.683"></a>
-<span class="sourceLineNo">684</span>    HBaseConfiguration.merge(conf, 
HBaseConfiguration.create(conf));<a name="line.684"></a>
-<span class="sourceLineNo">685</span>    
job.setOutputFormatClass(TableOutputFormat.class);<a name="line.685"></a>
-<span class="sourceLineNo">686</span>    if (reducer != null) 
job.setReducerClass(reducer);<a name="line.686"></a>
-<span class="sourceLineNo">687</span>    
conf.set(TableOutputFormat.OUTPUT_TABLE, table);<a name="line.687"></a>
-<span class="sourceLineNo">688</span>    conf.setStrings("io.serializations", 
conf.get("io.serializations"),<a name="line.688"></a>
-<span class="sourceLineNo">689</span>        
MutationSerialization.class.getName(), ResultSerialization.class.getName());<a 
name="line.689"></a>
-<span class="sourceLineNo">690</span>    // If passed a quorum/ensemble 
address, pass it on to TableOutputFormat.<a name="line.690"></a>
-<span class="sourceLineNo">691</span>    if (quorumAddress != null) {<a 
name="line.691"></a>
-<span class="sourceLineNo">692</span>      // Calling this will validate the 
format<a name="line.692"></a>
-<span class="sourceLineNo">693</span>      
ZKConfig.validateClusterKey(quorumAddress);<a name="line.693"></a>
-<span class="sourceLineNo">694</span>      
conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);<a name="line.694"></a>
-<span class="sourceLineNo">695</span>    }<a name="line.695"></a>
-<span class="sourceLineNo">696</span>    if (serverClass != null &amp;&amp; 
serverImpl != null) {<a name="line.696"></a>
-<span class="sourceLineNo">697</span>      
conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);<a 
name="line.697"></a>
-<span class="sourceLineNo">698</span>      
conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);<a 
name="line.698"></a>
-<span class="sourceLineNo">699</span>    }<a name="line.699"></a>
-<span class="sourceLineNo">700</span>    
job.setOutputKeyClass(ImmutableBytesWritable.class);<a name="line.700"></a>
-<span class="sourceLineNo">701</span>    
job.setOutputValueClass(Writable.class);<a name="line.701"></a>
-<span class="sourceLineNo">702</span>    if (partitioner == 
HRegionPartitioner.class) {<a name="line.702"></a>
-<span class="sourceLineNo">703</span>      
job.setPartitionerClass(HRegionPartitioner.class);<a name="line.703"></a>
-<span class="sourceLineNo">704</span>      int regions = 
MetaTableAccessor.getRegionCount(conf, TableName.valueOf(table));<a 
name="line.704"></a>
-<span class="sourceLineNo">705</span>      if (job.getNumReduceTasks() &gt; 
regions) {<a name="line.705"></a>
-<span class="sourceLineNo">706</span>        job.setNumReduceTasks(regions);<a 
name="line.706"></a>
-<span class="sourceLineNo">707</span>      }<a name="line.707"></a>
-<span class="sourceLineNo">708</span>    } else if (partitioner != null) {<a 
name="line.708"></a>
-<span class="sourceLineNo">709</span>      
job.setPartitionerClass(partitioner);<a name="line.709"></a>
-<span class="sourceLineNo">710</span>    }<a name="line.710"></a>
+<span class="sourceLineNo">578</span>    return 
ProtobufUtil.toScan(ClientProtos.Scan.parseFrom(decoded));<a 
name="line.578"></a>
+<span class="sourceLineNo">579</span>  }<a name="line.579"></a>
+<span class="sourceLineNo">580</span><a name="line.580"></a>
+<span class="sourceLineNo">581</span>  /**<a name="line.581"></a>
+<span class="sourceLineNo">582</span>   * Use this before submitting a 
TableReduce job. It will<a name="line.582"></a>
+<span class="sourceLineNo">583</span>   * appropriately set up the JobConf.<a 
name="line.583"></a>
+<span class="sourceLineNo">584</span>   *<a name="line.584"></a>
+<span class="sourceLineNo">585</span>   * @param table  The output table.<a 
name="line.585"></a>
+<span class="sourceLineNo">586</span>   * @param reducer  The reducer class to 
use.<a name="line.586"></a>
+<span class="sourceLineNo">587</span>   * @param job  The current job to 
adjust.<a name="line.587"></a>
+<span class="sourceLineNo">588</span>   * @throws IOException When determining 
the region count fails.<a name="line.588"></a>
+<span class="sourceLineNo">589</span>   */<a name="line.589"></a>
+<span class="sourceLineNo">590</span>  public static void 
initTableReducerJob(String table,<a name="line.590"></a>
+<span class="sourceLineNo">591</span>    Class&lt;? extends TableReducer&gt; 
reducer, Job job)<a name="line.591"></a>
+<span class="sourceLineNo">592</span>  throws IOException {<a 
name="line.592"></a>
+<span class="sourceLineNo">593</span>    initTableReducerJob(table, reducer, 
job, null);<a name="line.593"></a>
+<span class="sourceLineNo">594</span>  }<a name="line.594"></a>
+<span class="sourceLineNo">595</span><a name="line.595"></a>
+<span class="sourceLineNo">596</span>  /**<a name="line.596"></a>
+<span class="sourceLineNo">597</span>   * Use this before submitting a 
TableReduce job. It will<a name="line.597"></a>
+<span class="sourceLineNo">598</span>   * appropriately set up the JobConf.<a 
name="line.598"></a>
+<span class="sourceLineNo">599</span>   *<a name="line.599"></a>
+<span class="sourceLineNo">600</span>   * @param table  The output table.<a 
name="line.600"></a>
+<span class="sourceLineNo">601</span>   * @param reducer  The reducer class to 
use.<a name="line.601"></a>
+<span class="sourceLineNo">602</span>   * @param job  The current job to 
adjust.<a name="line.602"></a>
+<span class="sourceLineNo">603</span>   * @param partitioner  Partitioner to 
use. Pass &lt;code&gt;null&lt;/code&gt; to use<a name="line.603"></a>
+<span class="sourceLineNo">604</span>   * default partitioner.<a 
name="line.604"></a>
+<span class="sourceLineNo">605</span>   * @throws IOException When determining 
the region count fails.<a name="line.605"></a>
+<span class="sourceLineNo">606</span>   */<a name="line.606"></a>
+<span class="sourceLineNo">607</span>  public static void 
initTableReducerJob(String table,<a name="line.607"></a>
+<span class="sourceLineNo">608</span>    Class&lt;? extends TableReducer&gt; 
reducer, Job job,<a name="line.608"></a>
+<span class="sourceLineNo">609</span>    Class partitioner) throws IOException 
{<a name="line.609"></a>
+<span class="sourceLineNo">610</span>    initTableReducerJob(table, reducer, 
job, partitioner, null, null, null);<a name="line.610"></a>
+<span class="sourceLineNo">611</span>  }<a name="line.611"></a>
+<span class="sourceLineNo">612</span><a name="line.612"></a>
+<span class="sourceLineNo">613</span>  /**<a name="line.613"></a>
+<span class="sourceLineNo">614</span>   * Use this before submitting a 
TableReduce job. It will<a name="line.614"></a>
+<span class="sourceLineNo">615</span>   * appropriately set up the JobConf.<a 
name="line.615"></a>
+<span class="sourceLineNo">616</span>   *<a name="line.616"></a>
+<span class="sourceLineNo">617</span>   * @param table  The output table.<a 
name="line.617"></a>
+<span class="sourceLineNo">618</span>   * @param reducer  The reducer class to 
use.<a name="line.618"></a>
+<span class="sourceLineNo">619</span>   * @param job  The current job to 
adjust.  Make sure the passed job is<a name="line.619"></a>
+<span class="sourceLineNo">620</span>   * carrying all necessary HBase 
configuration.<a name="line.620"></a>
+<span class="sourceLineNo">621</span>   * @param partitioner  Partitioner to 
use. Pass &lt;code&gt;null&lt;/code&gt; to use<a name="line.621"></a>
+<span class="sourceLineNo">622</span>   * default partitioner.<a 
name="line.622"></a>
+<span class="sourceLineNo">623</span>   * @param quorumAddress Distant cluster 
to write to; default is null for<a name="line.623"></a>
+<span class="sourceLineNo">624</span>   * output to the cluster that is 
designated in &lt;code&gt;hbase-site.xml&lt;/code&gt;.<a name="line.624"></a>
+<span class="sourceLineNo">625</span>   * Set this String to the zookeeper 
ensemble of an alternate remote cluster<a name="line.625"></a>
+<span class="sourceLineNo">626</span>   * when you would have the reduce write 
a cluster that is other than the<a name="line.626"></a>
+<span class="sourceLineNo">627</span>   * default; e.g. copying tables between 
clusters, the source would be<a name="line.627"></a>
+<span class="sourceLineNo">628</span>   * designated by 
&lt;code&gt;hbase-site.xml&lt;/code&gt; and this param would have the<a 
name="line.628"></a>
+<span class="sourceLineNo">629</span>   * ensemble address of the remote 
cluster.  The format to pass is particular.<a name="line.629"></a>
+<span class="sourceLineNo">630</span>   * Pass &lt;code&gt; 
&amp;lt;hbase.zookeeper.quorum&amp;gt;:&amp;lt;<a name="line.630"></a>
+<span class="sourceLineNo">631</span>   *             
hbase.zookeeper.client.port&amp;gt;:&amp;lt;zookeeper.znode.parent&amp;gt;<a 
name="line.631"></a>
+<span class="sourceLineNo">632</span>   * &lt;/code&gt; such as 
&lt;code&gt;server,server2,server3:2181:/hbase&lt;/code&gt;.<a 
name="line.632"></a>
+<span class="sourceLineNo">633</span>   * @param serverClass redefined 
hbase.regionserver.class<a name="line.633"></a>
+<span class="sourceLineNo">634</span>   * @param serverImpl redefined 
hbase.regionserver.impl<a name="line.634"></a>
+<span class="sourceLineNo">635</span>   * @throws IOException When determining 
the region count fails.<a name="line.635"></a>
+<span class="sourceLineNo">636</span>   */<a name="line.636"></a>
+<span class="sourceLineNo">637</span>  public static void 
initTableReducerJob(String table,<a name="line.637"></a>
+<span class="sourceLineNo">638</span>    Class&lt;? extends TableReducer&gt; 
reducer, Job job,<a name="line.638"></a>
+<span class="sourceLineNo">639</span>    Class partitioner, String 
quorumAddress, String serverClass,<a name="line.639"></a>
+<span class="sourceLineNo">640</span>    String serverImpl) throws IOException 
{<a name="line.640"></a>
+<span class="sourceLineNo">641</span>    initTableReducerJob(table, reducer, 
job, partitioner, quorumAddress,<a name="line.641"></a>
+<span class="sourceLineNo">642</span>        serverClass, serverImpl, true);<a 
name="line.642"></a>
+<span class="sourceLineNo">643</span>  }<a name="line.643"></a>
+<span class="sourceLineNo">644</span><a name="line.644"></a>
+<span class="sourceLineNo">645</span>  /**<a name="line.645"></a>
+<span class="sourceLineNo">646</span>   * Use this before submitting a 
TableReduce job. It will<a name="line.646"></a>
+<span class="sourceLineNo">647</span>   * appropriately set up the JobConf.<a 
name="line.647"></a>
+<span class="sourceLineNo">648</span>   *<a name="line.648"></a>
+<span class="sourceLineNo">649</span>   * @param table  The output table.<a 
name="line.649"></a>
+<span class="sourceLineNo">650</span>   * @param reducer  The reducer class to 
use.<a name="line.650"></a>
+<span class="sourceLineNo">651</span>   * @param job  The current job to 
adjust.  Make sure the passed job is<a name="line.651"></a>
+<span class="sourceLineNo">652</span>   * carrying all necessary HBase 
configuration.<a name="line.652"></a>
+<span class="sourceLineNo">653</span>   * @param partitioner  Partitioner to 
use. Pass &lt;code&gt;null&lt;/code&gt; to use<a name="line.653"></a>
+<span class="sourceLineNo">654</span>   * default partitioner.<a 
name="line.654"></a>
+<span class="sourceLineNo">655</span>   * @param quorumAddress Distant cluster 
to write to; default is null for<a name="line.655"></a>
+<span class="sourceLineNo">656</span>   * output to the cluster that is 
designated in &lt;code&gt;hbase-site.xml&lt;/code&gt;.<a name="line.656"></a>
+<span class="sourceLineNo">657</span>   * Set this String to the zookeeper 
ensemble of an alternate remote cluster<a name="line.657"></a>
+<span class="sourceLineNo">658</span>   * when you would have the reduce write 
a cluster that is other than the<a name="line.658"></a>
+<span class="sourceLineNo">659</span>   * default; e.g. copying tables between 
clusters, the source would be<a name="line.659"></a>
+<span class="sourceLineNo">660</span>   * designated by 
&lt;code&gt;hbase-site.xml&lt;/code&gt; and this param would have the<a 
name="line.660"></a>
+<span class="sourceLineNo">661</span>   * ensemble address of the remote 
cluster.  The format to pass is particular.<a name="line.661"></a>
+<span class="sourceLineNo">662</span>   * Pass &lt;code&gt; 
&amp;lt;hbase.zookeeper.quorum&amp;gt;:&amp;lt;<a name="line.662"></a>
+<span class="sourceLineNo">663</span>   *             
hbase.zookeeper.client.port&amp;gt;:&amp;lt;zookeeper.znode.parent&amp;gt;<a 
name="line.663"></a>
+<span class="sourceLineNo">664</span>   * &lt;/code&gt; such as 
&lt;code&gt;server,server2,server3:2181:/hbase&lt;/code&gt;.<a 
name="line.664"></a>
+<span class="sourceLineNo">665</span>   * @param serverClass redefined 
hbase.regionserver.class<a name="line.665"></a>
+<span class="sourceLineNo">666</span>   * @param serverImpl redefined 
hbase.regionserver.impl<a name="line.666"></a>
+<span class="sourceLineNo">667</span>   * @param addDependencyJars upload 
HBase jars and jars for any of the configured<a name="line.667"></a>
+<span class="sourceLineNo">668</span>   *           job classes via the 
distributed cache (tmpjars).<a name="line.668"></a>
+<span class="sourceLineNo">669</span>   * @throws IOException When determining 
the region count fails.<a name="line.669"></a>
+<span class="sourceLineNo">670</span>   */<a name="line.670"></a>
+<span class="sourceLineNo">671</span>  public static void 
initTableReducerJob(String table,<a name="line.671"></a>
+<span class="sourceLineNo">672</span>    Class&lt;? extends TableReducer&gt; 
reducer, Job job,<a name="line.672"></a>
+<span class="sourceLineNo">673</span>    Class partitioner, String 
quorumAddress, String serverClass,<a name="line.673"></a>
+<span class="sourceLineNo">674</span>    String serverImpl, boolean 
addDependencyJars) throws IOException {<a name="line.674"></a>
+<span class="sourceLineNo">675</span><a name="line.675"></a>
+<span class="sourceLineNo">676</span>    Configuration conf = 
job.getConfiguration();<a name="line.676"></a>
+<span class="sourceLineNo">677</span>    HBaseConfiguration.merge(conf, 
HBaseConfiguration.create(conf));<a name="line.677"></a>
+<span class="sourceLineNo">678</span>    
job.setOutputFormatClass(TableOutputFormat.class);<a name="line.678"></a>
+<span class="sourceLineNo">679</span>    if (reducer != null) 
job.setReducerClass(reducer);<a name="line.679"></a>
+<span class="sourceLineNo">680</span>    
conf.set(TableOutputFormat.OUTPUT_TABLE, table);<a name="line.680"></a>
+<span class="sourceLineNo">681</span>    conf.setStrings("io.serializations", 
conf.get("io.serializations"),<a name="line.681"></a>
+<span class="sourceLineNo">682</span>        
MutationSerialization.class.getName(), ResultSerialization.class.getName());<a 
name="line.682"></a>
+<span class="sourceLineNo">683</span>    // If passed a quorum/ensemble 
address, pass it on to TableOutputFormat.<a name="line.683"></a>
+<span class="sourceLineNo">684</span>    if (quorumAddress != null) {<a 
name="line.684"></a>
+<span class="sourceLineNo">685</span>      // Calling this will validate the 
format<a name="line.685"></a>
+<span class="sourceLineNo">686</span>      
ZKConfig.validateClusterKey(quorumAddress);<a name="line.686"></a>
+<span class="sourceLineNo">687</span>      
conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);<a name="line.687"></a>
+<span class="sourceLineNo">688</span>    }<a name="line.688"></a>
+<span class="sourceLineNo">689</span>    if (serverClass != null &amp;&amp; 
serverImpl != null) {<a name="line.689"></a>
+<span class="sourceLineNo">690</span>      
conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);<a 
name="line.690"></a>
+<span class="sourceLineNo">691</span>      
conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);<a 
name="line.691"></a>
+<span class="sourceLineNo">692</span>    }<a name="line.692"></a>
+<span class="sourceLineNo">693</span>    
job.setOutputKeyClass(ImmutableBytesWritable.class);<a name="line.693"></a>
+<span class="sourceLineNo">694</span>    
job.setOutputValueClass(Writable.class);<a name="line.694"></a>
+<span class="sourceLineNo">695</span>    if (partitioner == 
HRegionPartitioner.class) {<a name="line.695"></a>
+<span class="sourceLineNo">696</span>      
job.setPartitionerClass(HRegionPartitioner.class);<a name="line.696"></a>
+<span class="sourceLineNo">697</span>      int regions = 
MetaTableAccessor.getRegionCount(conf, TableName.valueOf(table));<a 
name="line.697"></a>
+<span class="sourceLineNo">698</span>      if (job.getNumReduceTasks() &gt; 
regions) {<a name="line.698"></a>
+<span class="sourceLineNo">699</span>        job.setNumReduceTasks(regions);<a 
name="line.699"></a>
+<span class="sourceLineNo">700</span>      }<a name="line.700"></a>
+<span class="sourceLineNo">701</span>    } else if (partitioner != null) {<a 
name="line.701"></a>
+<span class="sourceLineNo">702</span>      
job.setPartitionerClass(partitioner);<a name="line.702"></a>
+<span class="sourceLineNo">703</span>    }<a name="line.703"></a>
+<span class="sourceLineNo">704</span><a name="line.704"></a>
+<span class="sourceLineNo">705</span>    if (addDependencyJars) {<a 
name="line.705"></a>
+<span class="sourceLineNo">706</span>      addDependencyJars(job);<a 
name="line.706"></a>
+<span class="sourceLineNo">707</span>    }<a name="line.707"></a>
+<span class="sourceLineNo">708</span><a name="line.708"></a>
+<span class="sourceLineNo">709</span>    initCredentials(job);<a 
name="line.709"></a>
+<span class="sourceLineNo">710</span>  }<a name="line.710"></a>
 <span class="sourceLineNo">711</span><a name="line.711"></a>
-<span class="sourceLineNo">712</span>    if (addDependencyJars) {<a 
name="line.712"></a>
-<span class="sourceLineNo">713</span>      addDependencyJars(job);<a 
name="line.713"></a>
-<span class="sourceLineNo">714</span>    }<a name="line.714"></a>
-<span class="sourceLineNo">715</span><a name="line.715"></a>
-<span class="sourceLineNo">716</span>    initCredentials(job);<a 
name="line.716"></a>
-<span class="sourceLineNo">717</span>  }<a name="line.717"></a>
-<span class="sourceLineNo">718</span><a name="line.718"></a>
-<span class="sourceLineNo">719</span>  /**<a name="line.719"></a>
-<span class="sourceLineNo">720</span>   * Ensures that the given number of 
reduce tasks for the given job<a name="line.720"></a>
-<span class="sourceLineNo">721</span>   * configuration does not exceed the 
number of regions for the given table.<a name="line.721"></a>
-<span class="sourceLineNo">722</span>   *<a name="line.722"></a>
-<span class="sourceLineNo">723</span>   * @param table  The table to get the 
region count for.<a name="line.723"></a>
-<span class="sourceLineNo">724</span>   * @param job  The current job to 
adjust.<a name="line.724"></a>
-<span class="sourceLineNo">725</span>   * @throws IOException When retrieving 
the table details fails.<a name="line.725"></a>
-<span class="sourceLineNo">726</span>   */<a name="line.726"></a>
-<span class="sourceLineNo">727</span>  public static void 
limitNumReduceTasks(String table, Job job)<a name="line.727"></a>
-<span class="sourceLineNo">728</span>  throws IOException {<a 
name="line.728"></a>
-<span class="sourceLineNo">729</span>    int regions =<a name="line.729"></a>
-<span class="sourceLineNo">730</span>      
MetaTableAccessor.getRegionCount(job.getConfiguration(), 
TableName.valueOf(table));<a name="line.730"></a>
-<span class="sourceLineNo">731</span>    if (job.getNumReduceTasks() &gt; 
regions)<a name="line.731"></a>
-<span class="sourceLineNo">732</span>      job.setNumReduceTasks(regions);<a 
name="line.732"></a>
-<span class="sourceLineNo">733</span>  }<a name="line.733"></a>
-<span class="sourceLineNo">734</span><a name="line.734"></a>
-<span class="sourceLineNo">735</span>  /**<a name="line.735"></a>
-<span class="sourceLineNo">736</span>   * Sets the number of reduce tasks for 
the given job configuration to the<a name="line.736"></a>
-<span class="sourceLineNo">737</span>   * number of regions the given table 
has.<a name="line.737"></a>
-<span class="sourceLineNo">738</span>   *<a name="line.738"></a>
-<span class="sourceLineNo">739</span>   * @param table  The table to get the 
region count for.<a name="line.739"></a>
-<span class="sourceLineNo">740</span>   * @param job  The current job to 
adjust.<a name="line.740"></a>
-<span class="sourceLineNo">741</span>   * @throws IOException When retrieving 
the table details fails.<a name="line.741"></a>
-<span class="sourceLineNo">742</span>   */<a name="line.742"></a>
-<span class="sourceLineNo">743</span>  public static void 
setNumReduceTasks(String table, Job job)<a name="line.743"></a>
-<span class="sourceLineNo">744</span>  throws IOException {<a 
name="line.744"></a>
-<span class="sourceLineNo">745</span>    
job.setNumReduceTasks(MetaTableAccessor.getRegionCount(job.getConfiguration(),<a
 name="line.745"></a>
-<span class="sourceLineNo">746</span>       TableName.valueOf(table)));<a 
name="line.746"></a>
-<span class="sourceLineNo">747</span>  }<a name="line.747"></a>
-<span class="sourceLineNo">748</span><a name="line.748"></a>
-<span class="sourceLineNo">749</span>  /**<a name="line.749"></a>
-<span class="sourceLineNo">750</span>   * Sets the number of rows to return 
and cache with each scanner iteration.<a name="line.750"></a>
-<span class="sourceLineNo">751</span>   * Higher caching values will enable 
faster mapreduce jobs at the expense of<a name="line.751"></a>
-<span class="sourceLineNo">752</span>   * requiring more heap to contain the 
cached rows.<a name="line.752"></a>
-<span class="sourceLineNo">753</span>   *<a name="line.753"></a>
-<span class="sourceLineNo">754</span>   * @param job The current job to 
adjust.<a name="line.754"></a>
-<span class="sourceLineNo">755</span>   * @param batchSize The number of rows 
to return in batch with each scanner<a name="line.755"></a>
-<span class="sourceLineNo">756</span>   * iteration.<a name="line.756"></a>
-<span class="sourceLineNo">757</span>   */<a name="line.757"></a>
-<span class="sourceLineNo">758</span>  public static void 
setScannerCaching(Job job, int batchSize) {<a name="line.758"></a>
-<span class="sourceLineNo">759</span>    
job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);<a 
name="line.759"></a>
-<span class="sourceLineNo">760</span>  }<a name="line.760"></a>
-<span class="sourceLineNo">761</span><a name="line.761"></a>
-<span class="sourceLineNo">762</span>  /**<a name="line.762"></a>
-<span class="sourceLineNo">763</span>   * Add HBase and its dependencies 
(only) to the job configuration.<a name="line.763"></a>
-<span class="sourceLineNo">764</span>   * &lt;p&gt;<a name="line.764"></a>
-<span class="sourceLineNo">765</span>   * This is intended as a low-level API, 
facilitating code reuse between this<a name="line.765"></a>
-<span class="sourceLineNo">766</span>   * class and its mapred counterpart. It 
also of use to external tools that<a name="line.766"></a>
-<span class="sourceLineNo">767</span>   * need to build a MapReduce job that 
interacts with HBase but want<a name="line.767"></a>
-<span class="sourceLineNo">768</span>   * fine-grained control over the jars 
shipped to the cluster.<a name="line.768"></a>
-<span class="sourceLineNo">769</span>   * &lt;/p&gt;<a name="line.769"></a>
-<span class="sourceLineNo">770</span>   * @param conf The Configuration object 
to extend with dependencies.<a name="line.770"></a>
-<span class="sourceLineNo">771</span>   * @see 
org.apache.hadoop.hbase.mapred.TableMapReduceUtil<a name="line.771"></a>
-<span class="sourceLineNo">772</span>   * @see &lt;a 
href="https://issues.apache.org/jira/browse/PIG-3285"&gt;PIG-3285&lt;/a&gt;<a 
name="line.772"></a>
-<span class="sourceLineNo">773</span>   */<a name="line.773"></a>
-<span class="sourceLineNo">774</span>  public static void 
addHBaseDependencyJars(Configuration conf) throws IOException {<a 
name="line.774"></a>
-<span class="sourceLineNo">775</span><a name="line.775"></a>
-<span class="sourceLineNo">776</span>    // PrefixTreeCodec is part of the 
hbase-prefix-tree module. If not included in MR jobs jar<a name="line.776"></a>
-<span class="sourceLineNo">777</span>    // dependencies, MR jobs that write 
encoded hfiles will fail.<a name="line.777"></a>
-<span class="sourceLineNo">778</span>    // We used reflection here so to 
prevent a circular module dependency.<a name="line.778"></a>
-<span class="sourceLineNo">779</span>    // TODO - if we extract the MR into a 
module, make it depend on hbase-prefix-tree.<a name="line.779"></a>
-<span class="sourceLineNo">780</span>    Class prefixTreeCodecClass = null;<a 
name="line.780"></a>
-<span class="sourceLineNo">781</span>    try {<a name="line.781"></a>
-<span class="sourceLineNo">782</span>      prefixTreeCodecClass =<a 
name="line.782"></a>
-<span class="sourceLineNo">783</span>          
Class.forName("org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec");<a 
name="line.783"></a>
-<span class="sourceLineNo">784</span>    } catch (ClassNotFoundException e) 
{<a name="line.784"></a>
-<span class="sourceLineNo">785</span>      // this will show up in unit tests 
but should not show in real deployments<a name="line.785"></a>
-<span class="sourceLineNo">786</span>      LOG.warn("The hbase-prefix-tree 
module jar containing PrefixTreeCodec is not present." +<a name="line.786"></a>
-<span class="sourceLineNo">787</span>          "  Continuing without it.");<a 
name="line.787"></a>
-<span class="sourceLineNo">788</span>    }<a name="line.788"></a>
-<span class="sourceLineNo">789</span><a name="line.789"></a>
-<span class="sourceLineNo">790</span>    addDependencyJarsForClasses(conf,<a 
name="line.790"></a>
-<span class="sourceLineNo">791</span>      // explicitly pull a class from 
each module<a name="line.791"></a>
-<span class="sourceLineNo">792</span>      
org.apache.hadoop.hbase.HConstants.class,                      // 
hbase-common<a name="line.792"></a>
-<span class="sourceLineNo">793</span>      
org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class, // 
hbase-protocol<a name="line.793"></a>
-<span class="sourceLineNo">794</span>      
org.apache.hadoop.hbase.client.Put.class,                      // 
hbase-client<a name="line.794"></a>
-<span class="sourceLineNo">795</span>      
org.apache.hadoop.hbase.CompatibilityFactory.class,            // 
hbase-hadoop-compat<a name="line.795"></a>
-<span class="sourceLineNo">796</span>      
org.apache.hadoop.hbase.mapreduce.TableMapper.class,           // 
hbase-server<a name="line.796"></a>
-<span class="sourceLineNo">797</span>      prefixTreeCodecClass, //  
hbase-prefix-tree (if null will be skipped)<a name="line.797"></a>
-<span class="sourceLineNo">798</span>      // pull necessary dependencies<a 
name="line.798"></a>
-<span class="sourceLineNo">799</span>      
org.apache.zookeeper.ZooKeeper.class,<a name="line.799"></a>
-<span class="sourceLineNo">800</span>      io.netty.channel.Channel.class,<a 
name="line.800"></a>
-<span class="sourceLineNo">801</span>      
com.google.protobuf.Message.class,<a name="line.801"></a>
-<span class="sourceLineNo">802</span>      
com.google.common.collect.Lists.class,<a name="line.802"></a>
-<span class="sourceLineNo">803</span>      org.apache.htrace.Trace.class,<a 
name="line.803"></a>
-<span class="sourceLineNo">804</span>      
com.codahale.metrics.MetricRegistry.class);<a name="line.804"></a>
-<span class="sourceLineNo">805</span>  }<a name="line.805"></a>
-<span class="sourceLineNo">806</span><a name="line.806"></a>
-<span class="sourceLineNo">807</span>  /**<a name="line.807"></a>
-<span class="sourceLineNo">808</span>   * Returns a classpath string built 
from the content of the "tmpjars" value in {@code conf}.<a name="line.808"></a>
-<span class="sourceLineNo">809</span>   * Also exposed to shell scripts via 
`bin/hbase mapredcp`.<a name="line.809"></a>
-<span class="sourceLineNo">810</span>   */<a name="line.810"></a>
-<span class="sourceLineNo">811</span>  public static String 
buildDependencyClasspath(Configuration conf) {<a name="line.811"></a>
-<span class="sourceLineNo">812</span>    if (conf == null) {<a 
name="line.812"></a>
-<span class="sourceLineNo">813</span>      throw new 
IllegalArgumentException("Must provide a configuration object.");<a 
name="line.813"></a>
-<span class="sourceLineNo">814</span>    }<a name="line.814"></a>
-<span class="sourceLineNo">815</span>    Set&lt;String&gt; paths = new 
HashSet&lt;String&gt;(conf.getStringCollection("tmpjars"));<a 
name="line.815"></a>
-<span class="sourceLineNo">816</span>    if (paths.size() == 0) {<a 
name="line.816"></a>
-<span class="sourceLineNo">817</span>      throw new 
IllegalArgumentException("Configuration contains no tmpjars.");<a 
name="line.817"></a>
-<span class="sourceLineNo">818</span>    }<a name="line.818"></a>
-<span class="sourceLineNo">819</span>    StringBuilder sb = new 
StringBuilder();<a name="line.819"></a>
-<span class="sourceLineNo">820</span>    for (String s : paths) {<a 
name="line.820"></a>
-<span class="sourceLineNo">821</span>      // entries can take the form 
'file:/path/to/file.jar'.<a name="line.821"></a>
-<span class="sourceLineNo">822</span>      int idx = s.indexOf(":");<a 
name="line.822"></a>
-<span class="sourceLineNo">823</span>      if (idx != -1) s = s.substring(idx 
+ 1);<a name="line.823"></a>
-<span class="sourceLineNo">824</span>      if (sb.length() &gt; 0) 
sb.append(File.pathSeparator);<a name="line.824"></a>
-<span class="sourceLineNo">825</span>      sb.append(s);<a name="line.825"></a>
-<span class="sourceLineNo">826</span>    }<a name="line.826"></a>
-<span class="sourceLineNo">827</span>    return sb.toString();<a 
name="line.827"></a>
-<span class="sourceLineNo">828</span>  }<a name="line.828"></a>
-<span class="sourceLineNo">829</span><a name="line.829"></a>
-<span class="sourceLineNo">830</span>  /**<a name="line.830"></a>
-<span class="sourceLineNo">831</span>   * Add the HBase dependency jars as 
well as jars for any of the configured<a name="line.831"></a>
-<span class="sourceLineNo">832</span>   * job classes to the job 
configuration, so that JobClient will ship them<a name="line.832"></a>
-<span class="sourceLineNo">833</span>   * to the cluster and add them to the 
DistributedCache.<a name="line.833"></a>
-<span class="sourceLineNo">834</span>   */<a name="line.834"></a>
-<span class="sourceLineNo">835</span>  public static void 
addDependencyJars(Job job) throws IOException {<a name="line.835"></a>
-<span class="sourceLineNo">836</span>    
addHBaseDependencyJars(job.getConfiguration());<a name="line.836"></a>
-<span class="sourceLineNo">837</span>    try {<a name="line.837"></a>
-<span class="sourceLineNo">838</span>      
addDependencyJarsForClasses(job.getConfiguration(),<a name="line.838"></a>
-<span class="sourceLineNo">839</span>          // when making changes here, 
consider also mapred.TableMapReduceUtil<a name="line.839"></a>
-<span class="sourceLineNo">840</span>          // pull job classes<a 
name="line.840"></a>
-<span class="sourceLineNo">841</span>          job.getMapOutputKeyClass(),<a 
name="line.841"></a>
-<span class="sourceLineNo">842</span>          job.getMapOutputValueClass(),<a 
name="line.842"></a>
-<span class="sourceLineNo">843</span>          job.getInputFormatClass(),<a 
name="line.843"></a>
-<span class="sourceLineNo">844</span>          job.getOutputKeyClass(),<a 
name="line.844"></a>
-<span class="sourceLineNo">845</span>          job.getOutputValueClass(),<a 
name="line.845"></a>
-<span class="sourceLineNo">846</span>          job.getOutputFormatClass(),<a 
name="line.846"></a>
-<span class="sourceLineNo">847</span>          job.getPartitionerClass(),<a 
name="line.847"></a>
-<span class="sourceLineNo">848</span>          job.getCombinerClass());<a 
name="line.848"></a>
-<span class="sourceLineNo">849</span>    } catch (ClassNotFoundException e) 
{<a name="line.849"></a>
-<span class="sourceLineNo">850</span>      throw new IOException(e);<a 
name="line.850"></a>
-<span class="sourceLineNo">851</span>    }<a name="line.851"></a>
-<span class="sourceLineNo">852</span>  }<a name="line.852"></a>
-<span class="sourceLineNo">853</span><a name="line.853"></a>
-<span class="sourceLineNo">854</span>  /**<a name="line.854"></a>
-<span class="sourceLineNo">855</span>   * Add the jars containing the given 
classes to the job's configuration<a name="line.855"></a>
-<span class="sourceLineNo">856</span>   * such that JobClient will ship them 
to the cluster and add them to<a name="line.856"></a>
-<span class="sourceLineNo">857</span>   * the DistributedCache.<a 
name="line.857"></a>
-<span class="sourceLineNo">858</span>   * @deprecated rely on {@link 
#addDependencyJars(Job)} instead.<a name="line.858"></a>
-<span class="sourceLineNo">859</span>   */<a name="line.859"></a>
-<span class="sourceLineNo">860</span>  @Deprecated<a name="line.860"></a>
-<span class="sourceLineNo">861</span>  public static void 
addDependencyJars(Configuration conf,<a name="line.861"></a>
-<span class="sourceLineNo">862</span>      Class&lt;?&gt;... classes) throws 
IOException {<a name="line.862"></a>
-<span class="sourceLineNo">863</span>    LOG.warn("The 
addDependencyJars(Configuration, Class&lt;?&gt;...) method has been deprecated 
since it"<a name="line.863"></a>
-<span class="sourceLineNo">864</span>             + " is easy to use 
incorrectly. Most users should rely on addDependencyJars(Job) " +<a 
name="line.864"></a>
-<span class="sourceLineNo">865</span>             "instead. See HBASE-8386 for 
more details.");<a name="line.865"></a>
-<span class="sourceLineNo">866</span>    addDependencyJarsForClasses(conf, 
classes);<a name="line.866"></a>
-<span class="sourceLineNo">867</span>  }<a name="line.867"></a>
-<span class="sourceLineNo">868</span><a name="line.868"></a>
-<span class="sourceLineNo">869</span>  /**<a name="line.869"></a>
-<span class="sourceLineNo">870</span>   * Add the jars containing the given 
classes to the job's configuration<a name="line.870"></a>
-<span class="sourceLineNo">871</span>   * such that JobClient will ship them 
to the cluster and add them to<a name="line.871"></a>
-<span class="sourceLineNo">872</span>   * the DistributedCache.<a 
name="line.872"></a>
-<span class="sourceLineNo">873</span>   *<a name="line.873"></a>
-<span class="sourceLineNo">874</span>   * N.B. that this method at most adds 
one jar per class given. If there is more than one<a name="line.874"></a>
-<span class="sourceLineNo">875</span>   * jar available containing a class 
with the same name as a given class, we don't define<a name="line.875"></a>
-<span class="sourceLineNo">876</span>   * which of those jars might be 
chosen.<a name="line.876"></a>
-<span class="sourceLineNo">877</span>   *<a name="line.877"></a>
-<span class="sourceLineNo">878</span>   * @param conf The Hadoop Configuration 
to modify<a name="line.878"></a>
-<span class="sourceLineNo">879</span>   * @param classes will add just those 
dependencies needed to find the given classes<a name="line.879"></a>
-<span class="sourceLineNo">880</span>   * @throws IOException if an underlying 
library call fails.<a name="line.880"></a>
-<span class="sourceLineNo">881</span>   */<a name="line.881"></a>
-<span class="sourceLineNo">882</span>  @InterfaceAudience.Private<a 
name="line.882"></a>
-<span class="sourceLineNo">883</span>  public static void 
addDependencyJarsForClasses(Configuration conf,<a name="line.883"></a>
-<span class="sourceLineNo">884</span>      Class&lt;?&gt;... classes) throws 
IOException {<a name="line.884"></a>
-<span class="sourceLineNo">885</span><a name="line.885"></a>
-<span class="sourceLineNo">886</span>    FileSystem localFs = 
FileSystem.getLocal(conf);<a name="line.886"></a>
-<span class="sourceLineNo">887</span>    Set&lt;String&gt; jars = new 
HashSet&lt;String&gt;();<a name="line.887"></a>
-<span class="sourceLineNo">888</span>    // Add jars that are already in the 
tmpjars variable<a name="line.888"></a>
-<span class="sourceLineNo">889</span>    
jars.addAll(conf.getStringCollection("tmpjars"));<a name="line.889"></a>
-<span class="sourceLineNo">890</span><a name="line.890"></a>
-<span class="sourceLineNo">891</span>    // add jars as we find them to a map 
of contents jar name so that we can avoid<a name="line.891"></a>
-<span class="sourceLineNo">892</span>    // creating new jars for classes that 
have already been packaged.<a name="line.892"></a>
-<span class="sourceLineNo">893</span>    Map&lt;String, String&gt; 
packagedClasses = new HashMap&lt;String, String&gt;();<a name="line.893"></a>
-<span class="sourceLineNo">894</span><a name="line.894"></a>
-<span class="sourceLineNo">895</span>    // Add jars containing the specified 
classes<a name="line.895"></a>
-<span class="sourceLineNo">896</span>    for (Class&lt;?&gt; clazz : classes) 
{<a name="line.896"></a>
-<span class="sourceLineNo">897</span>      if (clazz == null) continue;<a 
name="line.897"></a>
-<span class="sourceLineNo">898</span><a name="line.898"></a>
-<span class="sourceLineNo">899</span>      Path path = findOrCreateJar(clazz, 
localFs, packagedClasses);<a name="line.899"></a>
-<span class="sourceLineNo">900</span>      if (path == null) {<a 
name="line.900"></a>
-<span class="sourceLineNo">901</span>        LOG.warn("Could not find jar for 
class " + clazz +<a name="line.901"></a>
-<span class="sourceLineNo">902</span>                 " in order to ship it to 
the cluster.");<a name="line.902"></a>
-<span class="sourceLineNo">903</span>        continue;<a name="line.903"></a>
-<span class="sourceLineNo">904</span>      }<a name="line.904"></a>
-<span class="sourceLineNo">905</span>      if (!localFs.exists(path)) {<a 
name="line.905"></a>
-<span class="sourceLineNo">906</span>        LOG.warn("Could not validate jar 
file " + path + " for class "<a name="line.906"></a>
-<span class="sourceLineNo">907</span>                 + clazz);<a 
name="line.907"></a>
-<span class="sourceLineNo">908</span>        continue;<a name="line.908"></a>
-<span class="sourceLineNo">909</span>      }<a name="line.909"></a>
-<span class="sourceLineNo">910</span>      jars.add(path.toString());<a 
name="line.910"></a>
-<span class="sourceLineNo">911</span>    }<a name="line.911"></a>
-<span class="sourceLineNo">912</span>    if (jars.isEmpty()) return;<a 
name="line.912"></a>
-<span class="sourceLineNo">913</span><a name="line.913"></a>
-<span class="sourceLineNo">914</span>    conf.set("tmpjars", 
StringUtils.arrayToString(jars.toArray(new String[jars.size()])));<a 
name="line.914"></a>
-<span class="sourceLineNo">915</span>  }<a name="line.915"></a>
-<span class="sourceLineNo">916</span><a name="line.916"></a>
-<span class="sourceLineNo">917</span>  /**<a name="line.917"></a>
-<span class="sourceLineNo">918</span>   * Finds the Jar for a class or creates 
it if it doesn't exist. If the class is in<a name="line.918"></a>
-<span class="sourceLineNo">919</span>   * a directory in the classpath, it 
creates a Jar on the fly with the<a name="line.919"></a>
-<span class="sourceLineNo">920</span>   * contents of the directory and 
returns the path to that Jar. If a Jar is<a name="line.920"></a>
-<span class="sourceLineNo">921</span>   * created, it is created in the system 
temporary directory. Otherwise,<a name="line.921"></a>
-<span class="sourceLineNo">922</span>   * returns an existing jar that 
contains a class of the same name. Maintains<a name="line.922"></a>
-<span class="sourceLineNo">923</span>   * a mapping from jar contents to the 
tmp jar created.<a name="line.923"></a>
-<span class="sourceLineNo">924</span>   * @param my_class the class to find.<a 
name="line.924"></a>
-<span class="sourceLineNo">925</span>   * @param fs the FileSystem with which 
to qualify the returned path.<a name="line.925"></a>
-<span class="sourceLineNo">926</span>   * @param packagedClasses a map of 
class name to path.<a name="line.926"></a>
-<span class="sourceLineNo">927</span>   * @return a jar file that contains the 
class.<a name="line.927"></a>
-<span class="sourceLineNo">928</span>   * @throws IOException<a 
name="line.928"></a>
-<span class="sourceLineNo">929</span>   */<a name="line.929"></a>
-<span class="sourceLineNo">930</span>  private static Path 
findOrCreateJar(Class&lt;?&gt; my_class, FileSystem fs,<a name="line.930"></a>
-<span class="sourceLineNo">931</span>      Map&lt;String, String&gt; 
packagedClasses)<a name="line.931"></a>
-<span class="sourceLineNo">932</span>  throws IOException {<a 
name="line.932"></a>
-<span class="sourceLineNo">933</span>    // attempt to locate an existing jar 
for the class.<a name="line.933"></a>
-<span class="sourceLineNo">934</span>    String jar = 
findContainingJar(my_class, packagedClasses);<a name="line.934"></a>
-<span class="sourceLineNo">935</span>    if (null == jar || jar.isEmpty()) {<a 
name="line.935"></a>
-<span class="sourceLineNo">936</span>      jar = getJar(my_class);<a 
name="line.936"></a>
-<span class="sourceLineNo">937</span>      updateMap(jar, packagedClasses);<a 
name="line.937"></a>
-<span class="sourceLineNo">938</span>    }<a name="line.938"></a>
-<span class="sourceLineNo">939</span><a name="line.939"></a>
-<span class="sourceLineNo">940</span>    if (null == jar || jar.isEmpty()) {<a 
name="line.940"></a>
-<span class="sourceLineNo">941</span>      return null;<a name="line.941"></a>
-<span class="sourceLineNo">942</span>    }<a name="line.942"></a>
-<span class="sourceLineNo">943</span><a name="line.943"></a>
-<span class="sourceLineNo">944</span>    LOG.debug(String.format("For class 
%s, using jar %s", my_class.getName(), jar));<a name="line.944"></a>
-<span class="sourceLineNo">945</span>    return new 
Path(jar).makeQualified(fs);<a name="line.945"></a>
-<span class="sourceLineNo">946</span>  }<a name="line.946"></a>
-<span class="sourceLineNo">947</span><a name="line.947"></a>
-<span class="sourceLineNo">948</span>  /**<a name="line.948"></a>
-<span class="sourceLineNo">949</span>   * Add entries to 
&lt;code&gt;packagedClasses&lt;/code&gt; corresponding to class files<a 
name="line.949"></a>
-<span class="sourceLineNo">950</span>   * contained in 
&lt;code&gt;jar&lt;/code&gt;.<a name="line.950"></a>
-<span class="sourceLineNo">951</span>   * @param jar The jar who's content to 
list.<a name="line.951"></a>
-<span class="sourceLineNo">952</span>   * @param packagedClasses map[class 
-&gt; jar]<a name="line.952"></a>
-<span class="sourceLineNo">953</span>   */<a name="line.953"></a>
-<span class="sourceLineNo">954</span>  private static void updateMap(String 
jar, Map&lt;String, String&gt; packagedClasses) throws IOException {<a 
name="line.954"></a>
-<span class="sourceLineNo">955</span>    if (null == jar || jar.isEmpty()) {<a 
name="line.955"></a>
-<span class="sourceLineNo">956</span>      return;<a name="line.956"></a>
-<span class="sourceLineNo">957</span>    }<a name="line.957"></a>
-<span class="sourceLineNo">958</span>    ZipFile zip = null;<a 
name="line.958"></a>
-<span class="sourceLineNo">959</span>    try {<a name="line.959"></a>
-<span class="sourceLineNo">960</span>      zip = new ZipFile(jar);<a 
name="line.960"></a>
-<span class="sourceLineNo">961</span>      for (Enumeration&lt;? extends 
ZipEntry&gt; iter = zip.entries(); iter.hasMoreElements();) {<a 
name="line.961"></a>
-<span class="sourceLineNo">962</span>        ZipEntry entry = 
iter.nextElement();<a name="line.962"></a>
-<span class="sourceLineNo">963</span>        if 
(entry.getName().endsWith("class")) {<a name="line.963"></a>
-<span class="sourceLineNo">964</span>          
packagedClasses.put(entry.getName(), jar);<a name="line.964"></a>
-<span class="sourceLineNo">965</span>        }<a name="line.965"></a>
-<span class="sourceLineNo">966</span>      }<a name="line.966"></a>
-<span class="sourceLineNo">967</span>    } finally {<a name="line.967"></a>
-<span class="sourceLineNo">968</span>      if (null != zip) zip.close();<a 
name="line.968"></a>
-<span class="sourceLineNo">969</span>    }<a name="line.969"></a>
-<span class="sourceLineNo">970</span>  }<a name="line.970"></a>
-<span class="sourceLineNo">971</span><a name="line.971"></a>
-<span class="sourceLineNo">972</span>  /**<a name="line.972"></a>
-<span class="sourceLineNo">973</span>   * Find a jar that contains a class of 
the same name, if any. It will return<a name="line.973"></a>
-<span class="sourceLineNo">974</span>   * a jar file, even if that is not the 
first thing on the class path that<a name="line.974"></a>
-<span class="sourceLineNo">975</span>   * has a class with the same name. 
Looks first on the classpath and then in<a name="line.975"></a>
-<span class="sourceLineNo">976</span>   * the 
&lt;code&gt;packagedClasses&lt;/code&gt; map.<a name="line.976"></a>
-<span class="sourceLineNo">977</span>   * @param my_class the class to find.<a 
name="line.977"></a>
-<span class="sourceLineNo">978</span>   * @return a jar file that contains the 
class, or null.<a name="line.978"></a>
-<span class="sourceLineNo">979</span>   * @throws IOException<a 
name="line.979"></a>
-<span class="sourceLineNo">980</span>   */<a name="line.980"></a>
-<span class="sourceLineNo">981</span>  private static String 
findContainingJar(Class&lt;?&gt; my_class, Map&lt;String, String&gt; 
packagedClasses)<a name="line.981"></a>
-<span class="sourceLineNo">982</span>      throws IOException {<a 
name="line.982"></a>
-<span class="sourceLineNo">983</span>    ClassLoader loader = 
my_class.getClassLoader();<a name="line.983"></a>
-<span class="sourceLineNo">984</span><a name="line.984"></a>
-<span class="sourceLineNo">985</span>    String class_file = 
my_class.getName().replaceAll("\\.", "/") + ".class";<a name="line.985"></a>
-<span class="sourceLineNo">986</span><a name="line.986"></a>
-<span class="sourceLineNo">987</span>    if (loader != null) {<a 
name="line.987"></a>
-<span class="sourceLineNo">988</span>      // first search the classpath<a 
name="line.988"></a>
-<span class="sourceLineNo">989</span>      for (Enumeration&lt;URL&gt; itr = 
loader.getResources(class_file); itr.hasMoreElements();) {<a 
name="line.989"></a>
-<span class="sourceLineNo">990</span>        URL url = itr.nextElement();<a 
name="line.990"></a>
-<span class="sourceLineNo">991</span>        if 
("jar".equals(url.getProtocol())) {<a name="line.991"></a>
-<span class="sourceLineNo">992</span>          String toReturn = 
url.getPath();<a name="line.992"></a>
-<span class="sourceLineNo">993</span>          if 
(toReturn.startsWith("file:")) {<a name="line.993"></a>
-<span class="sourceLineNo">994</span>            toReturn = 
toReturn.substring("file:".length());<a name="line.994"></a>
-<span class="sourceLineNo">995</span>          }<a name="line.995"></a>
-<span class="sourceLineNo">996</span>          // URLDecoder is a misnamed 
class, since it actually decodes<a name="line.996"></a>
-<span class="sourceLineNo">997</span>          // x-www-form-urlencoded MIME 
type rather than actual<a name="line.997"></a>
-<span class="sourceLineNo">998</span>          // URL encoding (which the file 
path has). Therefore it would<a name="line.998"></a>
-<span class="sourceLineNo">999</span>          // decode +s to ' 's which is 
incorrect (spaces are actually<a name="line.999"></a>
-<span class="sourceLineNo">1000</span>          // either unencoded or encoded 
as "%20"). Replace +s first, so<a name="line.1000"></a>
-<span class="sourceLineNo">1001</span>          // that they are kept sacred 
during the decoding process.<a name="line.1001"></a>
-<span class="sourceLineNo">1002</span>          toReturn = 
toReturn.replaceAll("\\+", "%2B");<a name="line.1002"></a>
-<span class="sourceLineNo">1003</span>          toReturn = 
URLDecoder.decode(toReturn, "UTF-8");<a name="line.1003"></a>
-<span class="sourceLineNo">1004</span>          return 
toReturn.replaceAll("!.*$", "");<a name="line.1004"></a>
-<span class="sourceLineNo">1005</span>        }<a name="line.1005"></a>
-<span class="sourceLineNo">1006</span>      }<a name="line.1006"></a>
-<span class="sourceLineNo">1007</span>    }<a name="line.1007"></a>
-<span class="sourceLineNo">1008</span><a name="line.1008"></a>
-<span class="sourceLineNo">1009</span>    // now look in any jars we've 
packaged using JarFinder. Returns null when<a name="line.1009"></a>
-<span class="sourceLineNo">1010</span>    // no jar is found.<a 
name="line.1010"></a>
-<span class="sourceLineNo">1011</span>    return 
packagedClasses.get(class_file);<a name="line.1011"></a>
-<span class="sourceLineNo">1012</span>  }<a name="line.1012"></a>
-<span class="sourceLineNo">1013</span><a name="line.1013"></a>
-<span class="sourceLineNo">1014</span>  /**<a name="line.1014"></a>
-<span class="sourceLineNo">1015</span>   * Invoke 'getJar' on a custom 
JarFinder implementation. Useful for some job<a name="line.1015"></a>
-<span class="sourceLineNo">1016</span>   * configuration contexts (HBASE-8140) 
and also for testing on MRv2.<a name="line.1016"></a>
-<span class="sourceLineNo">1017</span>   * check if we have HADOOP-9426.<a 
name="line.1017"></a>
-<span class="sourceLineNo">1018</span>   * @param my_class the class to 
find.<a name="line.1018"></a>
-<span class="sourceLineNo">1019</span>   * @return a jar file that contains 
the class, or null.<a name="line.1019"></a>
-<span class="sourceLineNo">1020</span>   */<a name="line.1020"></a>
-<span class="sourceLineNo">1021</span>  private static String 
getJar(Class&lt;?&gt; my_class) {<a name="line.1021"></a>
-<span class="sourceLineNo">1022</span>    String ret = null;<a 
name="line.1022"></a>
-<span class="sourceLineNo">1023</span>    try {<a name="line.1023"></a>
-<span class="sourceLineNo">1024</span>      ret = 
JarFinder.getJar(my_class);<a name="line.1024"></a>
-<span class="sourceLineNo">1025</span>    } catch (Exception e) {<a 
name="line.1025"></a>
-<span class="sourceLineNo">1026</span>      // toss all other exceptions, 
related to reflection failure<a name="line.1026"></a>
-<span class="sourceLineNo">1027</span>      throw new RuntimeException("getJar 
invocation failed.", e);<a name="line.1027"></a>
-<span class="sourceLineNo">1028</span>    }<a name="line.1028"></a>
-<span class="sourceLineNo">1029</span><a name="line.1029"></a>
-<span class="sourceLineNo">1030</span>    return ret;<a name="line.1030"></a>
-<span class="sourceLineNo">1031</span>  }<a name="line.1031"></a>
-<span class="sourceLineNo">1032</span>}<a name="line.1032"></a>
+<span class="sourceLineNo">712</span>  /**<a name="line.712"></a>
+<span class="sourceLineNo">713</span>   * Ensures that the given number of 
reduce tasks for the given job<a name="line.713"></a>
+<span class="sourceLineNo">714</span>   * configuration does not exceed the 
number of regions for the given table.<a name="line.714"></a>
+<span class="sourceLineNo">715</span>   *<a name="line.715"></a>
+<span class="sourceLineNo">716</span>   * @param table  The table to get the 
region count for.<a name="line.716"></a>
+<span class="sourceLineNo">717</span>   * @param job  The current job to 
adjust.<a name="line.717"></a>
+<span class="sourceLineNo">718</span>   * @throws IOException When retrieving 
the table details fails.<a name="line.718"></a>
+<span class="sourceLineNo">719</span>   */<a name="line.719"></a>
+<span class="sourceLineNo">720</span>  public static void 
limitNumReduceTasks(String table, Job job)<a name="line.720"></a>
+<span class="sourceLineNo">721</span>  throws IOException {<a 
name="line.721"></a>
+<span class="sourceLineNo">722</span>    int regions =<a name="line.722"></a>
+<span class="sourceLineNo">723</span>      
MetaTableAccessor.getRegionCount(job.getConfiguration(), 
TableName.valueOf(table));<a name="line.723"></a>
+<span class="sourceLineNo">724</span>    if (job.getNumReduceTasks() &gt; 
regions)<a name="line.724"></a>
+<span class="sourceLineNo">725</span>      job.setNumReduceTasks(regions);<a 
name="line.725"></a>
+<span class="sourceLineNo">726</span>  }<a name="line.726"></a>
+<span class="sourceLineNo">727</span><a name="line.727"></a>
+<span class="sourceLineNo">728</span>  /**<a name="line.728"></a>
+<span class="sourceLineNo">729</span>   * Sets the number of reduce tasks for 
the given job configuration to the<a name="line.729"></a>
+<span class="sourceLineNo">730</span>   * number of regions the given table 
has.<a name="line.730"></a>
+<span class="sourceLineNo">731</span>   *<a name="line.731"></a>
+<span class="sourceLineNo">732</span>   * @param table  The table to get the 
region count for.<a name="line.732"></a>
+<span class="sourceLineNo">733</span>   * @param job  The current job to 
adjust.<a name="line.733"></a>
+<span class="sourceLineNo">734</span>   * @throws IOException When retrieving 
the table details fails.<a name="line.734"></a>
+<span class="sourceLineNo">735</span>   */<a name="line.735"></a>
+<span class="sourceLineNo">736</span>  public static void 
setNumReduceTasks(String table, Job job)<a name="line.736"></a>
+<span class="sourceLineNo">737</span>  throws IOException {<a 
name="line.737"></a>
+<span class="sourceLineNo">738</span>    
job.setNumReduceTasks(MetaTableAccessor.getRegionCount(job.getConfiguration(),<a
 name="line.738"></a>
+<span class="sourceLineNo">739</span>       TableName.valueOf(table)));<a 
name="line.739"></a>
+<span class="sourceLineNo">740</span>  }<a name="line.740"></a>
+<span class="sourceLineNo">741</span><a name="line.741"></a>
+<span class="sourceLineNo">742</span>  /**<a name="line.742"></a>
+<span class="sourceLineNo">743</span>   * Sets the number of rows to return 
and cache with each scanner iteration.<a name="line.743"></a>
+<span class="sourceLineNo">744</span>   * Higher caching values will enable 
faster mapreduce jobs at the expense of<a name="line.744"></a>
+<span class="sourceLineNo">745</span>   * requiring more heap to contain the 
cached rows.<a name="line.745"></a>
+<span class="sourceLineNo">746</span>   *<a name="line.746"></a>
+<span class="sourceLineNo">747</span>   * @param job The current job to 
adjust.<a name="line.747"></a>
+<span class="sourceLineNo">748</span>   * @param batchSize The number of rows 
to return in batch with each scanner<a name="line.748"></a>
+<span class="sourceLineNo">749</span>   * iteration.<a name="line.749"></a>
+<span class="sourceLineNo">750</span>   */<a name="line.750"></a>
+<span class="sourceLineNo">751</span>  public static void 
setScannerCaching(Job job, int batchSize) {<a name="line.751"></a>
+<span class="sourceLineNo">752</span>    
job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);<a 
name="line.752"></a>
+<span class="sourceLineNo">753</span>  }<a name="line.753"></a>
+<span class="sourceLineNo">754</span><a name="line.754"></a>
+<span class="sourceLineNo">755</span>  /**<a name="line.755"></a>
+<span class="sourceLineNo">756</span>   * Add HBase and its dependencies 
(only) to the job configuration.<a name="line.756"></a>
+<span class="sourceLineNo">757</span>   * &lt;p&gt;<a name="line.757"></a>
+<span class="sourceLineNo">758</span>   * This is intended as a low-level API, 
facilitating code reuse between this<a name="line.758"></a>
+<span class="sourceLineNo">759</span>   * class and its mapred counterpart. It 
also of use to external tools that<a name="line.759"></a>
+<span class="sourceLineNo">760</span>   * need to build a MapReduce job that 
interacts with HBase but want<a name="line.760"></a>
+<span class="sourceLineNo">761</span>   * fine-grained control over the jars 
shipped to the cluster.<a name="line.761"></a>
+<span class="sourceLineNo">762</span>   * &lt;/p&gt;<a name="line.762"></a>
+<span class="sourceLineNo">763</span>   * @param conf The Configuration object 
to extend with dependencies.<a name="line.763"></a>
+<span class="sourceLineNo">764</span>   * @see 
org.apache.hadoop.hbase.mapred.TableMapReduceUtil<a name="line.764"></a>
+<span class="sourceLineNo">765</span>   * @see &lt;a 
href="https://issues.apache.org/jira/browse/PIG-3285"&gt;PIG-3285&lt;/a&gt;<a 
name="line.765"></a>
+<span class="sourceLineNo">766</span>   */<a name="line.766"></a>
+<span class="sourceLineNo">767</span>  public static void 
addHBaseDependencyJars(Configuration conf) throws IOException {<a 
name="line.767"></a>
+<span class="sourceLineNo">768</span><a name="line.768"></a>
+<span class="sourceLineNo">769</span>    // PrefixTreeCodec is part of the 
hbase-prefix-tree module. If not included in MR jobs jar<a name="line.769"></a>
+<span class="sourceLineNo">770</span>    // dependencies, MR jobs that write 
encoded hfiles will fail.<a name="line.770"></a>
+<span class="sourceLineNo">771</span>    // We used reflection here so to 
prevent a circular module dependency.<a name="line.771"></a>
+<span class="sourceLineNo">772</span>    // TODO - if we extract the MR into a 
module, make it depend on hbase-prefix-tree.<a name="line.772"></a>
+<span class="sourceLineNo">773</span>    Class prefixTreeCodecClass = null;<a 
name="line.773"></a>
+<span class="sourceLineNo">774</span>    try {<a name="line.774"></a>
+<span class="sourceLineNo">775</span>      prefixTreeCodecClass =<a 
name="line.775"></a>
+<span class="sourceLineNo">776</span>          
Class.forName("org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec");<a 
name="line.776"></a>
+<span class="sourceLineNo">777</span>    } catch (ClassNotFoundException e) 
{<a name="line.777"></a>
+<span class="sourceLineNo">778</span>      // this will show up in unit tests 
but should not show in real deployments<a name="line.778"></a>
+<span class="sourceLineNo">779</span>      LOG.warn("The hbase-prefix-tree 
module jar containing PrefixTreeCodec is not present." +<a name="line.779"></a>
+<span class="sourceLineNo">780</span>          "  Continuing without it.");<a 
name="line.780"></a>
+<span class="sourceLineNo">781</span>    }<a name="line.781"></a>
+<span class="sourceLineNo">782</span><a name="line.782"></a>
+<span class="sourceLineNo">783</span>    addDependencyJarsForClasses(conf,<a 
name="line.783"></a>
+<span class="sourceLineNo">784</span>      // explicitly pull a class from 
each module<a name="line.784"></a>
+<span class="sourceLineNo">785</span>      
org.apache.hadoop.hbase.HConstants.class,                      // 
hbase-common<a name="line.785"></a>
+<span class="sourceLineNo">786</span>      
org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class, // 
hbase-protocol<a name="line.786"></a>
+<span class="sourceLineNo">787</span>      
org.apache.hadoop.hbase.client.Put.class,                      // 
hbase-client<a name="line.787"></a>
+<span class="sourceLineNo">788</span>      
org.apache.hadoop.hbase.CompatibilityFactory.class,            // 
hbase-hadoop-compat<a name="line.788"></a>
+<span class="sourceLineNo">789</span>      
org.apache.hadoop.hbase.mapreduce.TableMapper.class,           // 
hbase-server<a name="line.789"></a>
+<span class="sourceLineNo">790</span>      prefixTreeCodecClass, //  
hbase-prefix-tree (if null will be skipped)<a name="line.790"></a>
+<span class="sourceLineNo">791</span>      // pull necessary dependencies<a 
name="line.791"></a>
+<span class="sourceLineNo">792</span>      
org.apache.zookeeper.ZooKeeper.class,<a name="line.792"></a>
+<span class="sourceLineNo">793</span>      io.netty.channel.Channel.class,<a 
name="line.793"></a>
+<span class="sourceLineNo">794</span>      
com.google.protobuf.Message.class,<a name="line.794"></a>
+<span class="sourceLineNo">795</span>      
com.google.common.collect.Lists.class,<a name="line.795"></a>
+<span class="sourceLineNo">796</span>      org.apache.htrace.Trace.class,<a 
name="line.796"></a>
+<span class="sourceLineNo">797</span>      
com.codahale.metrics.MetricRegistry.class);<a name="line.797"></a>
+<span class="sourceLineNo">798</span>  }<a name="line.798"></a>
+<span class="sourceLineNo">799</span><a name="line.799"></a>
+<span class="sourceLineNo">800</span>  /**<a name="line.800"></a>
+<span class="sourceLineNo">801</span>   * Returns a classpath string built 
from the content of the "tmpjars" value in {@code conf}.<a name="line.801"></a>
+<span class="sourceLineNo">802</span>   * Also exposed to shell scripts via 
`bin/hbase mapredcp`.<a name="line.802"></a>
+<span class="sourceLineNo">803</span>   */<a name="line.803"></a>
+<span class="sourceLineNo">804</span>  public static String 
buildDependencyClasspath(Configuration conf) {<a name="line.804"></a>
+<span class="sourceLineNo">805</span>    if (conf == null) {<a 
name="line.805"></a>
+<span class="sourceLineNo">806</span>      throw new 
IllegalArgumentException("Must provide a configuration object.");<a 
name="line.806"></a>
+<span class="sourceLineNo">807</span>    }<a name="line.807"></a>
+<span class="sourceLineNo">808</span>    Set&lt;String&gt; paths = new 
HashSet&lt;String&gt;(conf.getStringCollection("tmpjars"));<a 
name="line.808"></a>
+<span class="sourceLineNo">809</span>    if (paths.size() == 0) {<a 
name="line.809"></a>
+<span class="sourceLineNo">810</span>      throw new 
IllegalArgumentException("Configuration contains no tmpjars.");<a 
name="line.810"></a>
+<span class="sourceLineNo">811</span>    }<a name="line.811"></a>
+<span class="sourceLineNo">812</span>    StringBuilder sb = new 
StringBuilder();<a name="line.812"></a>
+<span class="sourceLineNo">813</span>    for (String s : paths) {<a 
name="line.813"></a>
+<span class="sourceLineNo">814</span>      // entries can take the form 
'file:/path/to/file.jar'.<a name="line.814"></a>
+<span class="sourceLineNo">815</span>      int idx = s.indexOf(":");<a 
name="line.815"></a>
+<span class="sourceLineNo">816</span>      if (idx != -1) s = s.substring(idx 
+ 1);<a name="line.816"></a>
+<span class="sourceLineNo">817</span>      if (sb.length() &gt; 0) 
sb.append(File.pathSeparator);<a name="line.817"></a>
+<span class="sourceLineNo">818</span>      sb.append(s);<a name="line.818"></a>
+<span class="sourceLineNo">819</span>    }<a name="line.819"></a>
+<span class="sourceLineNo">820</span>    return sb.toString();<a 
name="line.820"></a>
+<span class="sourceLineNo">821</span>  }<a name="line.821"></a>
+<span class="sourceLineNo">822</span><a name="line.822"></a>
+<span class="sourceLineNo">823</span>  /**<a name="line.823"></a>
+<span class="sourceLineNo">824</span>   * Add the HBase dependency jars as 
well as jars for any of the configured<a name="line.824"></a>
+<span class="sourceLineNo">825</span>   * job classes to the job 
configuration, so that JobClient will ship them<a name="line.825"></a>
+<span class="sourceLineNo">826</span>   * to the cluster and add them to the 
DistributedCache.<a name="line.826"></a>
+<span class="sourceLineNo">827</span>   */<a name="line.827"></a>
+<span class="sourceLineNo">828</span>  public static void 
addDependencyJars(Job job) throws IOException {<a name="line.828"></a>
+<span class="sourceLineNo">829</span>    
addHBaseDependencyJars(job.getConfiguration());<a name="line.829"></a>
+<span class="sourceLineNo">830</span>    try {<a name="line.830"></a>
+<span class="sourceLineNo">831</span>      
addDependencyJarsForClasses(job.getConfiguration(),<a name="line.831"></a>
+<span class="sourceLineNo">832</span>          // when making changes here, 
consider also mapred.TableMapReduceUtil<a name="line.832"></a>
+<span class="sourceLineNo">833</span>          // pull job classes<a 
name="line.833"></a>
+<span class="sourceLineNo">834</span>          job.getMapOutputKeyClass(),<a 
name="line.834"></a>
+<span class="sourceLineNo">835</span>          job.getMapOutputValueClass(),<a 
name="line.835"></a>
+<span class="sourceLineNo">836</span>          job.getInputFormatClass(),<a 
name="line.836"></a>
+<span class="sourceLineNo">837</span>          job.getOutputKeyClass(),<a 
name="line.837"></a>
+<span class="sourceLineNo">838</span>          job.getOutputValueClass(),<a 
name="line.838"></a>
+<span class="sourceLineNo">839</span>          job.getOutputFormatClass(),<a 
name="line.839"></a>
+<span class="sourceLineNo">840</span>          job.getPartitionerClass(),<a 
name="line.840"></a>
+<span class="sourceLineNo">841</span>          job.getCombinerClass());<a 
name="line.841"></a>
+<span class="sourceLineNo">842</span>    } catch (ClassNotFoundException e) 
{<a name="line.842"></a>
+<span class="sourceLineNo">843</span>      throw new IOException(e);<a 
name="line.843"></a>
+<span class="sourceLineNo">844</span>    }<a name="line.844"></a>
+<span class="sourceLineNo">845</span>  }<a name="line.845"></a>
+<span class="sourceLineNo">846</span><a name="line.846"></a>
+<span class="sourceLineNo">847</span>  /**<a name="line.847"></a>
+<span class="sourceLineNo">848</span>   * Add the jars containing the given 
classes to the job's configuration<a name="line.848"></a>
+<span class="sourceLineNo">849</span>   * such that JobClient will ship them 
to the cluster and add them to<a name="line.849"></a>
+<span class="sourceLineNo">850</span>   * the DistributedCache.<a 
name="line.850"></a>
+<span class="sourceLineNo">851</span>   * @deprecated rely on {@link 
#addDependencyJars(Job)} instead.<a name="line.851"></a>
+<span class="sourceLineNo">852</span>   */<a name="line.852"></a>
+<span class="sourceLineNo">853</span>  @Deprecated<a name="line.853"></a>
+<span class="sourceLineNo">854</span>  public static void 
addDependencyJars(Configuration conf,<a name="line.854"></a>
+<span class="sourceLineNo">855</span>      Class&lt;?&gt;... classes) throws 
IOException {<a name="line.855"></a>
+<span class="sourceLineNo">856</span>    LOG.warn("The 
addDependencyJars(Configuration, Class&lt;?&gt;...) method has been deprecated 
since it"<a name="line.856"></a>
+<span class="sourceLineNo">857</span>             + " is easy to use 
incorrectly. Most users should rely on addDependencyJars(Job) " +<a 
name="line.857"></a>
+<span class="sourceLineNo">858</span>             "instead. See HBASE-8386 for 
more details.");<a name="line.858"></a>
+<span class="sourceLineNo">859</span>    addDependencyJarsForClasses(conf, 
classes);<a name="line.859"></a>
+<span class="sourceLineNo">860</span>  }<a name="line.860"></a>
+<span class="sourceLineNo">861</span><a name="line.861"></a>
+<span class="sourceLineNo">862</span>  /**<a name="line.862"></a>
+<span class="sourceLineNo">863</span>   * Add the jars containing the given 
classes to the job's configuration<a name="line.863"></a>
+<span class="sourceLineNo">864</span>   * such that JobClient will ship them 
to the cluster and add them to<a name="line.864"></a>
+<span class="sourceLineNo">865</span>   * the DistributedCache.<a 
name="line.865"></a>
+<span class="sourceLineNo">866</span>   *<a name="line.866"></a>
+<span class="sourceLineNo">867</span>   * N.B. that this method at most adds 
one jar per class given. If there is more than one<a name="line.867"></a>
+<span class="sourceLineNo">868</span>   * jar available containing a class 
with the same name as a given class, we don't define<a name="line.868"></a>
+<span class="sourceLineNo">869</span>   * which of those jars might be 
chosen.<a name="line.869"></a>
+<span class="sourceLineNo">870</span>   *<a name="line.870"></a>
+<span class="sourceLineNo">871</span>   * @param conf The Hadoop Configuration 
to modify<a name="line.871"></a>
+<span class="sourceLineNo">872</span>   * @param classes will add just those 
dependencies needed to find the given classes<a name="line.872"></a>
+<span class="sourceLineNo">873</span>   * @throws IOException if an underlying 
library call fails.<a name="line.873"></a>
+<span class="sourceLineNo">874</span>   */<a name="line.874"></a>
+<span class="sourceLineNo">875</span>  @InterfaceAudience.Private<a 
name="line.875"></a>
+<span class="sourceLineNo">876</span>  public static void 
addDependencyJarsForClasses(Configuration conf,<a name="line.876"></a>
+<span class="sourceLineNo">877</span>      Class&lt;?&gt;... classes) throws 
IOException {<a name="line.877"></a>
+<span class="

<TRUNCATED>

Reply via email to