http://git-wip-us.apache.org/repos/asf/hbase/blob/fc29f802/apidocs/src-html/org/apache/hadoop/hbase/mapred/TableInputFormatBase.html ---------------------------------------------------------------------- diff --git a/apidocs/src-html/org/apache/hadoop/hbase/mapred/TableInputFormatBase.html b/apidocs/src-html/org/apache/hadoop/hbase/mapred/TableInputFormatBase.html index eae5051..94c93f4 100644 --- a/apidocs/src-html/org/apache/hadoop/hbase/mapred/TableInputFormatBase.html +++ b/apidocs/src-html/org/apache/hadoop/hbase/mapred/TableInputFormatBase.html @@ -36,312 +36,291 @@ <span class="sourceLineNo">028</span>import org.apache.hadoop.hbase.HConstants;<a name="line.28"></a> <span class="sourceLineNo">029</span>import org.apache.hadoop.hbase.TableName;<a name="line.29"></a> <span class="sourceLineNo">030</span>import org.apache.hadoop.hbase.client.Connection;<a name="line.30"></a> -<span class="sourceLineNo">031</span>import org.apache.hadoop.hbase.client.HTable;<a name="line.31"></a> -<span class="sourceLineNo">032</span>import org.apache.hadoop.hbase.client.RegionLocator;<a name="line.32"></a> -<span class="sourceLineNo">033</span>import org.apache.hadoop.hbase.client.Result;<a name="line.33"></a> -<span class="sourceLineNo">034</span>import org.apache.hadoop.hbase.client.Table;<a name="line.34"></a> -<span class="sourceLineNo">035</span>import org.apache.hadoop.hbase.filter.Filter;<a name="line.35"></a> -<span class="sourceLineNo">036</span>import org.apache.hadoop.hbase.io.ImmutableBytesWritable;<a name="line.36"></a> -<span class="sourceLineNo">037</span>import org.apache.hadoop.mapred.InputFormat;<a name="line.37"></a> -<span class="sourceLineNo">038</span>import org.apache.hadoop.mapred.InputSplit;<a name="line.38"></a> -<span class="sourceLineNo">039</span>import org.apache.hadoop.mapred.JobConf;<a name="line.39"></a> -<span class="sourceLineNo">040</span>import org.apache.hadoop.mapred.RecordReader;<a name="line.40"></a> -<span class="sourceLineNo">041</span>import org.apache.hadoop.mapred.Reporter;<a name="line.41"></a> -<span class="sourceLineNo">042</span><a name="line.42"></a> -<span class="sourceLineNo">043</span>/**<a name="line.43"></a> -<span class="sourceLineNo">044</span> * A Base for {@link TableInputFormat}s. Receives a {@link Table}, a<a name="line.44"></a> -<span class="sourceLineNo">045</span> * byte[] of input columns and optionally a {@link Filter}.<a name="line.45"></a> -<span class="sourceLineNo">046</span> * Subclasses may use other TableRecordReader implementations.<a name="line.46"></a> -<span class="sourceLineNo">047</span> *<a name="line.47"></a> -<span class="sourceLineNo">048</span> * Subclasses MUST ensure initializeTable(Connection, TableName) is called for an instance to<a name="line.48"></a> -<span class="sourceLineNo">049</span> * function properly. Each of the entry points to this class used by the MapReduce framework,<a name="line.49"></a> -<span class="sourceLineNo">050</span> * {@link #getRecordReader(InputSplit, JobConf, Reporter)} and {@link #getSplits(JobConf, int)},<a name="line.50"></a> -<span class="sourceLineNo">051</span> * will call {@link #initialize(JobConf)} as a convenient centralized location to handle<a name="line.51"></a> -<span class="sourceLineNo">052</span> * retrieving the necessary configuration information. If your subclass overrides either of these<a name="line.52"></a> -<span class="sourceLineNo">053</span> * methods, either call the parent version or call initialize yourself.<a name="line.53"></a> -<span class="sourceLineNo">054</span> *<a name="line.54"></a> -<span class="sourceLineNo">055</span> * <p><a name="line.55"></a> -<span class="sourceLineNo">056</span> * An example of a subclass:<a name="line.56"></a> -<span class="sourceLineNo">057</span> * <pre><a name="line.57"></a> -<span class="sourceLineNo">058</span> * class ExampleTIF extends TableInputFormatBase {<a name="line.58"></a> -<span class="sourceLineNo">059</span> *<a name="line.59"></a> -<span class="sourceLineNo">060</span> * {@literal @}Override<a name="line.60"></a> -<span class="sourceLineNo">061</span> * protected void initialize(JobConf context) throws IOException {<a name="line.61"></a> -<span class="sourceLineNo">062</span> * // We are responsible for the lifecycle of this connection until we hand it over in<a name="line.62"></a> -<span class="sourceLineNo">063</span> * // initializeTable.<a name="line.63"></a> -<span class="sourceLineNo">064</span> * Connection connection =<a name="line.64"></a> -<span class="sourceLineNo">065</span> * ConnectionFactory.createConnection(HBaseConfiguration.create(job));<a name="line.65"></a> -<span class="sourceLineNo">066</span> * TableName tableName = TableName.valueOf("exampleTable");<a name="line.66"></a> -<span class="sourceLineNo">067</span> * // mandatory. once passed here, TableInputFormatBase will handle closing the connection.<a name="line.67"></a> -<span class="sourceLineNo">068</span> * initializeTable(connection, tableName);<a name="line.68"></a> -<span class="sourceLineNo">069</span> * byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),<a name="line.69"></a> -<span class="sourceLineNo">070</span> * Bytes.toBytes("columnB") };<a name="line.70"></a> -<span class="sourceLineNo">071</span> * // mandatory<a name="line.71"></a> -<span class="sourceLineNo">072</span> * setInputColumns(inputColumns);<a name="line.72"></a> -<span class="sourceLineNo">073</span> * // optional, by default we'll get everything for the given columns.<a name="line.73"></a> -<span class="sourceLineNo">074</span> * Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));<a name="line.74"></a> -<span class="sourceLineNo">075</span> * setRowFilter(exampleFilter);<a name="line.75"></a> -<span class="sourceLineNo">076</span> * }<a name="line.76"></a> -<span class="sourceLineNo">077</span> * }<a name="line.77"></a> -<span class="sourceLineNo">078</span> * </pre><a name="line.78"></a> -<span class="sourceLineNo">079</span> */<a name="line.79"></a> -<span class="sourceLineNo">080</span><a name="line.80"></a> -<span class="sourceLineNo">081</span>@InterfaceAudience.Public<a name="line.81"></a> -<span class="sourceLineNo">082</span>@InterfaceStability.Stable<a name="line.82"></a> -<span class="sourceLineNo">083</span>public abstract class TableInputFormatBase<a name="line.83"></a> -<span class="sourceLineNo">084</span>implements InputFormat<ImmutableBytesWritable, Result> {<a name="line.84"></a> -<span class="sourceLineNo">085</span> private static final Log LOG = LogFactory.getLog(TableInputFormatBase.class);<a name="line.85"></a> -<span class="sourceLineNo">086</span> private byte [][] inputColumns;<a name="line.86"></a> -<span class="sourceLineNo">087</span> private Table table;<a name="line.87"></a> -<span class="sourceLineNo">088</span> private RegionLocator regionLocator;<a name="line.88"></a> -<span class="sourceLineNo">089</span> private Connection connection;<a name="line.89"></a> -<span class="sourceLineNo">090</span> private TableRecordReader tableRecordReader;<a name="line.90"></a> -<span class="sourceLineNo">091</span> private Filter rowFilter;<a name="line.91"></a> -<span class="sourceLineNo">092</span><a name="line.92"></a> -<span class="sourceLineNo">093</span> private static final String NOT_INITIALIZED = "The input format instance has not been properly " +<a name="line.93"></a> -<span class="sourceLineNo">094</span> "initialized. Ensure you call initializeTable either in your constructor or initialize " +<a name="line.94"></a> -<span class="sourceLineNo">095</span> "method";<a name="line.95"></a> -<span class="sourceLineNo">096</span> private static final String INITIALIZATION_ERROR = "Cannot create a record reader because of a" +<a name="line.96"></a> -<span class="sourceLineNo">097</span> " previous error. Please look at the previous logs lines from" +<a name="line.97"></a> -<span class="sourceLineNo">098</span> " the task's full log for more details.";<a name="line.98"></a> -<span class="sourceLineNo">099</span><a name="line.99"></a> -<span class="sourceLineNo">100</span> /**<a name="line.100"></a> -<span class="sourceLineNo">101</span> * Builds a TableRecordReader. If no TableRecordReader was provided, uses<a name="line.101"></a> -<span class="sourceLineNo">102</span> * the default.<a name="line.102"></a> -<span class="sourceLineNo">103</span> *<a name="line.103"></a> -<span class="sourceLineNo">104</span> * @see org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit,<a name="line.104"></a> -<span class="sourceLineNo">105</span> * JobConf, Reporter)<a name="line.105"></a> -<span class="sourceLineNo">106</span> */<a name="line.106"></a> -<span class="sourceLineNo">107</span> public RecordReader<ImmutableBytesWritable, Result> getRecordReader(<a name="line.107"></a> -<span class="sourceLineNo">108</span> InputSplit split, JobConf job, Reporter reporter)<a name="line.108"></a> -<span class="sourceLineNo">109</span> throws IOException {<a name="line.109"></a> -<span class="sourceLineNo">110</span> // In case a subclass uses the deprecated approach or calls initializeTable directly<a name="line.110"></a> -<span class="sourceLineNo">111</span> if (table == null) {<a name="line.111"></a> -<span class="sourceLineNo">112</span> initialize(job);<a name="line.112"></a> -<span class="sourceLineNo">113</span> }<a name="line.113"></a> -<span class="sourceLineNo">114</span> // null check in case our child overrides getTable to not throw.<a name="line.114"></a> -<span class="sourceLineNo">115</span> try {<a name="line.115"></a> -<span class="sourceLineNo">116</span> if (getTable() == null) {<a name="line.116"></a> -<span class="sourceLineNo">117</span> // initialize() must not have been implemented in the subclass.<a name="line.117"></a> -<span class="sourceLineNo">118</span> throw new IOException(INITIALIZATION_ERROR);<a name="line.118"></a> -<span class="sourceLineNo">119</span> }<a name="line.119"></a> -<span class="sourceLineNo">120</span> } catch (IllegalStateException exception) {<a name="line.120"></a> -<span class="sourceLineNo">121</span> throw new IOException(INITIALIZATION_ERROR, exception);<a name="line.121"></a> -<span class="sourceLineNo">122</span> }<a name="line.122"></a> -<span class="sourceLineNo">123</span><a name="line.123"></a> -<span class="sourceLineNo">124</span> TableSplit tSplit = (TableSplit) split;<a name="line.124"></a> -<span class="sourceLineNo">125</span> // if no table record reader was provided use default<a name="line.125"></a> -<span class="sourceLineNo">126</span> final TableRecordReader trr = this.tableRecordReader == null ? new TableRecordReader() :<a name="line.126"></a> -<span class="sourceLineNo">127</span> this.tableRecordReader;<a name="line.127"></a> -<span class="sourceLineNo">128</span> trr.setStartRow(tSplit.getStartRow());<a name="line.128"></a> -<span class="sourceLineNo">129</span> trr.setEndRow(tSplit.getEndRow());<a name="line.129"></a> -<span class="sourceLineNo">130</span> trr.setHTable(this.table);<a name="line.130"></a> -<span class="sourceLineNo">131</span> trr.setInputColumns(this.inputColumns);<a name="line.131"></a> -<span class="sourceLineNo">132</span> trr.setRowFilter(this.rowFilter);<a name="line.132"></a> -<span class="sourceLineNo">133</span> trr.init();<a name="line.133"></a> -<span class="sourceLineNo">134</span> return new RecordReader<ImmutableBytesWritable, Result>() {<a name="line.134"></a> -<span class="sourceLineNo">135</span><a name="line.135"></a> -<span class="sourceLineNo">136</span> @Override<a name="line.136"></a> -<span class="sourceLineNo">137</span> public void close() throws IOException {<a name="line.137"></a> -<span class="sourceLineNo">138</span> trr.close();<a name="line.138"></a> -<span class="sourceLineNo">139</span> closeTable();<a name="line.139"></a> -<span class="sourceLineNo">140</span> }<a name="line.140"></a> -<span class="sourceLineNo">141</span><a name="line.141"></a> -<span class="sourceLineNo">142</span> @Override<a name="line.142"></a> -<span class="sourceLineNo">143</span> public ImmutableBytesWritable createKey() {<a name="line.143"></a> -<span class="sourceLineNo">144</span> return trr.createKey();<a name="line.144"></a> -<span class="sourceLineNo">145</span> }<a name="line.145"></a> -<span class="sourceLineNo">146</span><a name="line.146"></a> -<span class="sourceLineNo">147</span> @Override<a name="line.147"></a> -<span class="sourceLineNo">148</span> public Result createValue() {<a name="line.148"></a> -<span class="sourceLineNo">149</span> return trr.createValue();<a name="line.149"></a> -<span class="sourceLineNo">150</span> }<a name="line.150"></a> -<span class="sourceLineNo">151</span><a name="line.151"></a> -<span class="sourceLineNo">152</span> @Override<a name="line.152"></a> -<span class="sourceLineNo">153</span> public long getPos() throws IOException {<a name="line.153"></a> -<span class="sourceLineNo">154</span> return trr.getPos();<a name="line.154"></a> -<span class="sourceLineNo">155</span> }<a name="line.155"></a> -<span class="sourceLineNo">156</span><a name="line.156"></a> -<span class="sourceLineNo">157</span> @Override<a name="line.157"></a> -<span class="sourceLineNo">158</span> public float getProgress() throws IOException {<a name="line.158"></a> -<span class="sourceLineNo">159</span> return trr.getProgress();<a name="line.159"></a> -<span class="sourceLineNo">160</span> }<a name="line.160"></a> -<span class="sourceLineNo">161</span><a name="line.161"></a> -<span class="sourceLineNo">162</span> @Override<a name="line.162"></a> -<span class="sourceLineNo">163</span> public boolean next(ImmutableBytesWritable key, Result value) throws IOException {<a name="line.163"></a> -<span class="sourceLineNo">164</span> return trr.next(key, value);<a name="line.164"></a> -<span class="sourceLineNo">165</span> }<a name="line.165"></a> -<span class="sourceLineNo">166</span> };<a name="line.166"></a> -<span class="sourceLineNo">167</span> }<a name="line.167"></a> -<span class="sourceLineNo">168</span><a name="line.168"></a> -<span class="sourceLineNo">169</span> /**<a name="line.169"></a> -<span class="sourceLineNo">170</span> * Calculates the splits that will serve as input for the map tasks.<a name="line.170"></a> -<span class="sourceLineNo">171</span> *<a name="line.171"></a> -<span class="sourceLineNo">172</span> * Splits are created in number equal to the smallest between numSplits and<a name="line.172"></a> -<span class="sourceLineNo">173</span> * the number of {@link org.apache.hadoop.hbase.regionserver.HRegion}s in the table. <a name="line.173"></a> -<span class="sourceLineNo">174</span> * If the number of splits is smaller than the number of <a name="line.174"></a> -<span class="sourceLineNo">175</span> * {@link org.apache.hadoop.hbase.regionserver.HRegion}s then splits are spanned across<a name="line.175"></a> -<span class="sourceLineNo">176</span> * multiple {@link org.apache.hadoop.hbase.regionserver.HRegion}s <a name="line.176"></a> -<span class="sourceLineNo">177</span> * and are grouped the most evenly possible. In the<a name="line.177"></a> -<span class="sourceLineNo">178</span> * case splits are uneven the bigger splits are placed first in the<a name="line.178"></a> -<span class="sourceLineNo">179</span> * {@link InputSplit} array.<a name="line.179"></a> -<span class="sourceLineNo">180</span> *<a name="line.180"></a> -<span class="sourceLineNo">181</span> * @param job the map task {@link JobConf}<a name="line.181"></a> -<span class="sourceLineNo">182</span> * @param numSplits a hint to calculate the number of splits (mapred.map.tasks).<a name="line.182"></a> -<span class="sourceLineNo">183</span> *<a name="line.183"></a> -<span class="sourceLineNo">184</span> * @return the input splits<a name="line.184"></a> -<span class="sourceLineNo">185</span> *<a name="line.185"></a> -<span class="sourceLineNo">186</span> * @see org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop.mapred.JobConf, int)<a name="line.186"></a> -<span class="sourceLineNo">187</span> */<a name="line.187"></a> -<span class="sourceLineNo">188</span> public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {<a name="line.188"></a> -<span class="sourceLineNo">189</span> if (this.table == null) {<a name="line.189"></a> -<span class="sourceLineNo">190</span> initialize(job);<a name="line.190"></a> -<span class="sourceLineNo">191</span> }<a name="line.191"></a> -<span class="sourceLineNo">192</span> // null check in case our child overrides getTable to not throw.<a name="line.192"></a> -<span class="sourceLineNo">193</span> try {<a name="line.193"></a> -<span class="sourceLineNo">194</span> if (getTable() == null) {<a name="line.194"></a> -<span class="sourceLineNo">195</span> // initialize() must not have been implemented in the subclass.<a name="line.195"></a> -<span class="sourceLineNo">196</span> throw new IOException(INITIALIZATION_ERROR);<a name="line.196"></a> -<span class="sourceLineNo">197</span> }<a name="line.197"></a> -<span class="sourceLineNo">198</span> } catch (IllegalStateException exception) {<a name="line.198"></a> -<span class="sourceLineNo">199</span> throw new IOException(INITIALIZATION_ERROR, exception);<a name="line.199"></a> -<span class="sourceLineNo">200</span> }<a name="line.200"></a> -<span class="sourceLineNo">201</span><a name="line.201"></a> -<span class="sourceLineNo">202</span> byte [][] startKeys = this.regionLocator.getStartKeys();<a name="line.202"></a> -<span class="sourceLineNo">203</span> if (startKeys == null || startKeys.length == 0) {<a name="line.203"></a> -<span class="sourceLineNo">204</span> throw new IOException("Expecting at least one region");<a name="line.204"></a> -<span class="sourceLineNo">205</span> }<a name="line.205"></a> -<span class="sourceLineNo">206</span> if (this.inputColumns == null || this.inputColumns.length == 0) {<a name="line.206"></a> -<span class="sourceLineNo">207</span> throw new IOException("Expecting at least one column");<a name="line.207"></a> -<span class="sourceLineNo">208</span> }<a name="line.208"></a> -<span class="sourceLineNo">209</span> int realNumSplits = numSplits > startKeys.length? startKeys.length:<a name="line.209"></a> -<span class="sourceLineNo">210</span> numSplits;<a name="line.210"></a> -<span class="sourceLineNo">211</span> InputSplit[] splits = new InputSplit[realNumSplits];<a name="line.211"></a> -<span class="sourceLineNo">212</span> int middle = startKeys.length / realNumSplits;<a name="line.212"></a> -<span class="sourceLineNo">213</span> int startPos = 0;<a name="line.213"></a> -<span class="sourceLineNo">214</span> for (int i = 0; i < realNumSplits; i++) {<a name="line.214"></a> -<span class="sourceLineNo">215</span> int lastPos = startPos + middle;<a name="line.215"></a> -<span class="sourceLineNo">216</span> lastPos = startKeys.length % realNumSplits > i ? lastPos + 1 : lastPos;<a name="line.216"></a> -<span class="sourceLineNo">217</span> String regionLocation = regionLocator.getRegionLocation(startKeys[startPos]).<a name="line.217"></a> -<span class="sourceLineNo">218</span> getHostname();<a name="line.218"></a> -<span class="sourceLineNo">219</span> splits[i] = new TableSplit(this.table.getName(),<a name="line.219"></a> -<span class="sourceLineNo">220</span> startKeys[startPos], ((i + 1) < realNumSplits) ? startKeys[lastPos]:<a name="line.220"></a> -<span class="sourceLineNo">221</span> HConstants.EMPTY_START_ROW, regionLocation);<a name="line.221"></a> -<span class="sourceLineNo">222</span> LOG.info("split: " + i + "->" + splits[i]);<a name="line.222"></a> -<span class="sourceLineNo">223</span> startPos = lastPos;<a name="line.223"></a> -<span class="sourceLineNo">224</span> }<a name="line.224"></a> -<span class="sourceLineNo">225</span> return splits;<a name="line.225"></a> -<span class="sourceLineNo">226</span> }<a name="line.226"></a> -<span class="sourceLineNo">227</span><a name="line.227"></a> -<span class="sourceLineNo">228</span> /**<a name="line.228"></a> -<span class="sourceLineNo">229</span> * Allows subclasses to initialize the table information.<a name="line.229"></a> -<span class="sourceLineNo">230</span> *<a name="line.230"></a> -<span class="sourceLineNo">231</span> * @param connection The Connection to the HBase cluster. MUST be unmanaged. We will close.<a name="line.231"></a> -<span class="sourceLineNo">232</span> * @param tableName The {@link TableName} of the table to process.<a name="line.232"></a> -<span class="sourceLineNo">233</span> * @throws IOException<a name="line.233"></a> -<span class="sourceLineNo">234</span> */<a name="line.234"></a> -<span class="sourceLineNo">235</span> protected void initializeTable(Connection connection, TableName tableName) throws IOException {<a name="line.235"></a> -<span class="sourceLineNo">236</span> if (this.table != null || this.connection != null) {<a name="line.236"></a> -<span class="sourceLineNo">237</span> LOG.warn("initializeTable called multiple times. Overwriting connection and table " +<a name="line.237"></a> -<span class="sourceLineNo">238</span> "reference; TableInputFormatBase will not close these old references when done.");<a name="line.238"></a> -<span class="sourceLineNo">239</span> }<a name="line.239"></a> -<span class="sourceLineNo">240</span> this.table = connection.getTable(tableName);<a name="line.240"></a> -<span class="sourceLineNo">241</span> this.regionLocator = connection.getRegionLocator(tableName);<a name="line.241"></a> -<span class="sourceLineNo">242</span> this.connection = connection;<a name="line.242"></a> -<span class="sourceLineNo">243</span> }<a name="line.243"></a> -<span class="sourceLineNo">244</span><a name="line.244"></a> -<span class="sourceLineNo">245</span> /**<a name="line.245"></a> -<span class="sourceLineNo">246</span> * @param inputColumns to be passed in {@link Result} to the map task.<a name="line.246"></a> -<span class="sourceLineNo">247</span> */<a name="line.247"></a> -<span class="sourceLineNo">248</span> protected void setInputColumns(byte [][] inputColumns) {<a name="line.248"></a> -<span class="sourceLineNo">249</span> this.inputColumns = inputColumns;<a name="line.249"></a> -<span class="sourceLineNo">250</span> }<a name="line.250"></a> -<span class="sourceLineNo">251</span><a name="line.251"></a> -<span class="sourceLineNo">252</span> /**<a name="line.252"></a> -<span class="sourceLineNo">253</span> * Allows subclasses to get the {@link HTable}.<a name="line.253"></a> -<span class="sourceLineNo">254</span> * @deprecated use {@link #getTable()}<a name="line.254"></a> -<span class="sourceLineNo">255</span> */<a name="line.255"></a> -<span class="sourceLineNo">256</span> @Deprecated<a name="line.256"></a> -<span class="sourceLineNo">257</span> protected HTable getHTable() {<a name="line.257"></a> -<span class="sourceLineNo">258</span> return (HTable) getTable();<a name="line.258"></a> +<span class="sourceLineNo">031</span>import org.apache.hadoop.hbase.client.RegionLocator;<a name="line.31"></a> +<span class="sourceLineNo">032</span>import org.apache.hadoop.hbase.client.Result;<a name="line.32"></a> +<span class="sourceLineNo">033</span>import org.apache.hadoop.hbase.client.Table;<a name="line.33"></a> +<span class="sourceLineNo">034</span>import org.apache.hadoop.hbase.filter.Filter;<a name="line.34"></a> +<span class="sourceLineNo">035</span>import org.apache.hadoop.hbase.io.ImmutableBytesWritable;<a name="line.35"></a> +<span class="sourceLineNo">036</span>import org.apache.hadoop.mapred.InputFormat;<a name="line.36"></a> +<span class="sourceLineNo">037</span>import org.apache.hadoop.mapred.InputSplit;<a name="line.37"></a> +<span class="sourceLineNo">038</span>import org.apache.hadoop.mapred.JobConf;<a name="line.38"></a> +<span class="sourceLineNo">039</span>import org.apache.hadoop.mapred.RecordReader;<a name="line.39"></a> +<span class="sourceLineNo">040</span>import org.apache.hadoop.mapred.Reporter;<a name="line.40"></a> +<span class="sourceLineNo">041</span><a name="line.41"></a> +<span class="sourceLineNo">042</span>/**<a name="line.42"></a> +<span class="sourceLineNo">043</span> * A Base for {@link TableInputFormat}s. Receives a {@link Table}, a<a name="line.43"></a> +<span class="sourceLineNo">044</span> * byte[] of input columns and optionally a {@link Filter}.<a name="line.44"></a> +<span class="sourceLineNo">045</span> * Subclasses may use other TableRecordReader implementations.<a name="line.45"></a> +<span class="sourceLineNo">046</span> *<a name="line.46"></a> +<span class="sourceLineNo">047</span> * Subclasses MUST ensure initializeTable(Connection, TableName) is called for an instance to<a name="line.47"></a> +<span class="sourceLineNo">048</span> * function properly. Each of the entry points to this class used by the MapReduce framework,<a name="line.48"></a> +<span class="sourceLineNo">049</span> * {@link #getRecordReader(InputSplit, JobConf, Reporter)} and {@link #getSplits(JobConf, int)},<a name="line.49"></a> +<span class="sourceLineNo">050</span> * will call {@link #initialize(JobConf)} as a convenient centralized location to handle<a name="line.50"></a> +<span class="sourceLineNo">051</span> * retrieving the necessary configuration information. If your subclass overrides either of these<a name="line.51"></a> +<span class="sourceLineNo">052</span> * methods, either call the parent version or call initialize yourself.<a name="line.52"></a> +<span class="sourceLineNo">053</span> *<a name="line.53"></a> +<span class="sourceLineNo">054</span> * <p><a name="line.54"></a> +<span class="sourceLineNo">055</span> * An example of a subclass:<a name="line.55"></a> +<span class="sourceLineNo">056</span> * <pre><a name="line.56"></a> +<span class="sourceLineNo">057</span> * class ExampleTIF extends TableInputFormatBase {<a name="line.57"></a> +<span class="sourceLineNo">058</span> *<a name="line.58"></a> +<span class="sourceLineNo">059</span> * {@literal @}Override<a name="line.59"></a> +<span class="sourceLineNo">060</span> * protected void initialize(JobConf context) throws IOException {<a name="line.60"></a> +<span class="sourceLineNo">061</span> * // We are responsible for the lifecycle of this connection until we hand it over in<a name="line.61"></a> +<span class="sourceLineNo">062</span> * // initializeTable.<a name="line.62"></a> +<span class="sourceLineNo">063</span> * Connection connection =<a name="line.63"></a> +<span class="sourceLineNo">064</span> * ConnectionFactory.createConnection(HBaseConfiguration.create(job));<a name="line.64"></a> +<span class="sourceLineNo">065</span> * TableName tableName = TableName.valueOf("exampleTable");<a name="line.65"></a> +<span class="sourceLineNo">066</span> * // mandatory. once passed here, TableInputFormatBase will handle closing the connection.<a name="line.66"></a> +<span class="sourceLineNo">067</span> * initializeTable(connection, tableName);<a name="line.67"></a> +<span class="sourceLineNo">068</span> * byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),<a name="line.68"></a> +<span class="sourceLineNo">069</span> * Bytes.toBytes("columnB") };<a name="line.69"></a> +<span class="sourceLineNo">070</span> * // mandatory<a name="line.70"></a> +<span class="sourceLineNo">071</span> * setInputColumns(inputColumns);<a name="line.71"></a> +<span class="sourceLineNo">072</span> * // optional, by default we'll get everything for the given columns.<a name="line.72"></a> +<span class="sourceLineNo">073</span> * Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));<a name="line.73"></a> +<span class="sourceLineNo">074</span> * setRowFilter(exampleFilter);<a name="line.74"></a> +<span class="sourceLineNo">075</span> * }<a name="line.75"></a> +<span class="sourceLineNo">076</span> * }<a name="line.76"></a> +<span class="sourceLineNo">077</span> * </pre><a name="line.77"></a> +<span class="sourceLineNo">078</span> */<a name="line.78"></a> +<span class="sourceLineNo">079</span><a name="line.79"></a> +<span class="sourceLineNo">080</span>@InterfaceAudience.Public<a name="line.80"></a> +<span class="sourceLineNo">081</span>@InterfaceStability.Stable<a name="line.81"></a> +<span class="sourceLineNo">082</span>public abstract class TableInputFormatBase<a name="line.82"></a> +<span class="sourceLineNo">083</span>implements InputFormat<ImmutableBytesWritable, Result> {<a name="line.83"></a> +<span class="sourceLineNo">084</span> private static final Log LOG = LogFactory.getLog(TableInputFormatBase.class);<a name="line.84"></a> +<span class="sourceLineNo">085</span> private byte [][] inputColumns;<a name="line.85"></a> +<span class="sourceLineNo">086</span> private Table table;<a name="line.86"></a> +<span class="sourceLineNo">087</span> private RegionLocator regionLocator;<a name="line.87"></a> +<span class="sourceLineNo">088</span> private Connection connection;<a name="line.88"></a> +<span class="sourceLineNo">089</span> private TableRecordReader tableRecordReader;<a name="line.89"></a> +<span class="sourceLineNo">090</span> private Filter rowFilter;<a name="line.90"></a> +<span class="sourceLineNo">091</span><a name="line.91"></a> +<span class="sourceLineNo">092</span> private static final String NOT_INITIALIZED = "The input format instance has not been properly " +<a name="line.92"></a> +<span class="sourceLineNo">093</span> "initialized. Ensure you call initializeTable either in your constructor or initialize " +<a name="line.93"></a> +<span class="sourceLineNo">094</span> "method";<a name="line.94"></a> +<span class="sourceLineNo">095</span> private static final String INITIALIZATION_ERROR = "Cannot create a record reader because of a" +<a name="line.95"></a> +<span class="sourceLineNo">096</span> " previous error. Please look at the previous logs lines from" +<a name="line.96"></a> +<span class="sourceLineNo">097</span> " the task's full log for more details.";<a name="line.97"></a> +<span class="sourceLineNo">098</span><a name="line.98"></a> +<span class="sourceLineNo">099</span> /**<a name="line.99"></a> +<span class="sourceLineNo">100</span> * Builds a TableRecordReader. If no TableRecordReader was provided, uses<a name="line.100"></a> +<span class="sourceLineNo">101</span> * the default.<a name="line.101"></a> +<span class="sourceLineNo">102</span> *<a name="line.102"></a> +<span class="sourceLineNo">103</span> * @see org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit,<a name="line.103"></a> +<span class="sourceLineNo">104</span> * JobConf, Reporter)<a name="line.104"></a> +<span class="sourceLineNo">105</span> */<a name="line.105"></a> +<span class="sourceLineNo">106</span> public RecordReader<ImmutableBytesWritable, Result> getRecordReader(<a name="line.106"></a> +<span class="sourceLineNo">107</span> InputSplit split, JobConf job, Reporter reporter)<a name="line.107"></a> +<span class="sourceLineNo">108</span> throws IOException {<a name="line.108"></a> +<span class="sourceLineNo">109</span> // In case a subclass uses the deprecated approach or calls initializeTable directly<a name="line.109"></a> +<span class="sourceLineNo">110</span> if (table == null) {<a name="line.110"></a> +<span class="sourceLineNo">111</span> initialize(job);<a name="line.111"></a> +<span class="sourceLineNo">112</span> }<a name="line.112"></a> +<span class="sourceLineNo">113</span> // null check in case our child overrides getTable to not throw.<a name="line.113"></a> +<span class="sourceLineNo">114</span> try {<a name="line.114"></a> +<span class="sourceLineNo">115</span> if (getTable() == null) {<a name="line.115"></a> +<span class="sourceLineNo">116</span> // initialize() must not have been implemented in the subclass.<a name="line.116"></a> +<span class="sourceLineNo">117</span> throw new IOException(INITIALIZATION_ERROR);<a name="line.117"></a> +<span class="sourceLineNo">118</span> }<a name="line.118"></a> +<span class="sourceLineNo">119</span> } catch (IllegalStateException exception) {<a name="line.119"></a> +<span class="sourceLineNo">120</span> throw new IOException(INITIALIZATION_ERROR, exception);<a name="line.120"></a> +<span class="sourceLineNo">121</span> }<a name="line.121"></a> +<span class="sourceLineNo">122</span><a name="line.122"></a> +<span class="sourceLineNo">123</span> TableSplit tSplit = (TableSplit) split;<a name="line.123"></a> +<span class="sourceLineNo">124</span> // if no table record reader was provided use default<a name="line.124"></a> +<span class="sourceLineNo">125</span> final TableRecordReader trr = this.tableRecordReader == null ? new TableRecordReader() :<a name="line.125"></a> +<span class="sourceLineNo">126</span> this.tableRecordReader;<a name="line.126"></a> +<span class="sourceLineNo">127</span> trr.setStartRow(tSplit.getStartRow());<a name="line.127"></a> +<span class="sourceLineNo">128</span> trr.setEndRow(tSplit.getEndRow());<a name="line.128"></a> +<span class="sourceLineNo">129</span> trr.setHTable(this.table);<a name="line.129"></a> +<span class="sourceLineNo">130</span> trr.setInputColumns(this.inputColumns);<a name="line.130"></a> +<span class="sourceLineNo">131</span> trr.setRowFilter(this.rowFilter);<a name="line.131"></a> +<span class="sourceLineNo">132</span> trr.init();<a name="line.132"></a> +<span class="sourceLineNo">133</span> return new RecordReader<ImmutableBytesWritable, Result>() {<a name="line.133"></a> +<span class="sourceLineNo">134</span><a name="line.134"></a> +<span class="sourceLineNo">135</span> @Override<a name="line.135"></a> +<span class="sourceLineNo">136</span> public void close() throws IOException {<a name="line.136"></a> +<span class="sourceLineNo">137</span> trr.close();<a name="line.137"></a> +<span class="sourceLineNo">138</span> closeTable();<a name="line.138"></a> +<span class="sourceLineNo">139</span> }<a name="line.139"></a> +<span class="sourceLineNo">140</span><a name="line.140"></a> +<span class="sourceLineNo">141</span> @Override<a name="line.141"></a> +<span class="sourceLineNo">142</span> public ImmutableBytesWritable createKey() {<a name="line.142"></a> +<span class="sourceLineNo">143</span> return trr.createKey();<a name="line.143"></a> +<span class="sourceLineNo">144</span> }<a name="line.144"></a> +<span class="sourceLineNo">145</span><a name="line.145"></a> +<span class="sourceLineNo">146</span> @Override<a name="line.146"></a> +<span class="sourceLineNo">147</span> public Result createValue() {<a name="line.147"></a> +<span class="sourceLineNo">148</span> return trr.createValue();<a name="line.148"></a> +<span class="sourceLineNo">149</span> }<a name="line.149"></a> +<span class="sourceLineNo">150</span><a name="line.150"></a> +<span class="sourceLineNo">151</span> @Override<a name="line.151"></a> +<span class="sourceLineNo">152</span> public long getPos() throws IOException {<a name="line.152"></a> +<span class="sourceLineNo">153</span> return trr.getPos();<a name="line.153"></a> +<span class="sourceLineNo">154</span> }<a name="line.154"></a> +<span class="sourceLineNo">155</span><a name="line.155"></a> +<span class="sourceLineNo">156</span> @Override<a name="line.156"></a> +<span class="sourceLineNo">157</span> public float getProgress() throws IOException {<a name="line.157"></a> +<span class="sourceLineNo">158</span> return trr.getProgress();<a name="line.158"></a> +<span class="sourceLineNo">159</span> }<a name="line.159"></a> +<span class="sourceLineNo">160</span><a name="line.160"></a> +<span class="sourceLineNo">161</span> @Override<a name="line.161"></a> +<span class="sourceLineNo">162</span> public boolean next(ImmutableBytesWritable key, Result value) throws IOException {<a name="line.162"></a> +<span class="sourceLineNo">163</span> return trr.next(key, value);<a name="line.163"></a> +<span class="sourceLineNo">164</span> }<a name="line.164"></a> +<span class="sourceLineNo">165</span> };<a name="line.165"></a> +<span class="sourceLineNo">166</span> }<a name="line.166"></a> +<span class="sourceLineNo">167</span><a name="line.167"></a> +<span class="sourceLineNo">168</span> /**<a name="line.168"></a> +<span class="sourceLineNo">169</span> * Calculates the splits that will serve as input for the map tasks.<a name="line.169"></a> +<span class="sourceLineNo">170</span> *<a name="line.170"></a> +<span class="sourceLineNo">171</span> * Splits are created in number equal to the smallest between numSplits and<a name="line.171"></a> +<span class="sourceLineNo">172</span> * the number of {@link org.apache.hadoop.hbase.regionserver.HRegion}s in the table. <a name="line.172"></a> +<span class="sourceLineNo">173</span> * If the number of splits is smaller than the number of <a name="line.173"></a> +<span class="sourceLineNo">174</span> * {@link org.apache.hadoop.hbase.regionserver.HRegion}s then splits are spanned across<a name="line.174"></a> +<span class="sourceLineNo">175</span> * multiple {@link org.apache.hadoop.hbase.regionserver.HRegion}s <a name="line.175"></a> +<span class="sourceLineNo">176</span> * and are grouped the most evenly possible. In the<a name="line.176"></a> +<span class="sourceLineNo">177</span> * case splits are uneven the bigger splits are placed first in the<a name="line.177"></a> +<span class="sourceLineNo">178</span> * {@link InputSplit} array.<a name="line.178"></a> +<span class="sourceLineNo">179</span> *<a name="line.179"></a> +<span class="sourceLineNo">180</span> * @param job the map task {@link JobConf}<a name="line.180"></a> +<span class="sourceLineNo">181</span> * @param numSplits a hint to calculate the number of splits (mapred.map.tasks).<a name="line.181"></a> +<span class="sourceLineNo">182</span> *<a name="line.182"></a> +<span class="sourceLineNo">183</span> * @return the input splits<a name="line.183"></a> +<span class="sourceLineNo">184</span> *<a name="line.184"></a> +<span class="sourceLineNo">185</span> * @see org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop.mapred.JobConf, int)<a name="line.185"></a> +<span class="sourceLineNo">186</span> */<a name="line.186"></a> +<span class="sourceLineNo">187</span> public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {<a name="line.187"></a> +<span class="sourceLineNo">188</span> if (this.table == null) {<a name="line.188"></a> +<span class="sourceLineNo">189</span> initialize(job);<a name="line.189"></a> +<span class="sourceLineNo">190</span> }<a name="line.190"></a> +<span class="sourceLineNo">191</span> // null check in case our child overrides getTable to not throw.<a name="line.191"></a> +<span class="sourceLineNo">192</span> try {<a name="line.192"></a> +<span class="sourceLineNo">193</span> if (getTable() == null) {<a name="line.193"></a> +<span class="sourceLineNo">194</span> // initialize() must not have been implemented in the subclass.<a name="line.194"></a> +<span class="sourceLineNo">195</span> throw new IOException(INITIALIZATION_ERROR);<a name="line.195"></a> +<span class="sourceLineNo">196</span> }<a name="line.196"></a> +<span class="sourceLineNo">197</span> } catch (IllegalStateException exception) {<a name="line.197"></a> +<span class="sourceLineNo">198</span> throw new IOException(INITIALIZATION_ERROR, exception);<a name="line.198"></a> +<span class="sourceLineNo">199</span> }<a name="line.199"></a> +<span class="sourceLineNo">200</span><a name="line.200"></a> +<span class="sourceLineNo">201</span> byte [][] startKeys = this.regionLocator.getStartKeys();<a name="line.201"></a> +<span class="sourceLineNo">202</span> if (startKeys == null || startKeys.length == 0) {<a name="line.202"></a> +<span class="sourceLineNo">203</span> throw new IOException("Expecting at least one region");<a name="line.203"></a> +<span class="sourceLineNo">204</span> }<a name="line.204"></a> +<span class="sourceLineNo">205</span> if (this.inputColumns == null || this.inputColumns.length == 0) {<a name="line.205"></a> +<span class="sourceLineNo">206</span> throw new IOException("Expecting at least one column");<a name="line.206"></a> +<span class="sourceLineNo">207</span> }<a name="line.207"></a> +<span class="sourceLineNo">208</span> int realNumSplits = numSplits > startKeys.length? startKeys.length:<a name="line.208"></a> +<span class="sourceLineNo">209</span> numSplits;<a name="line.209"></a> +<span class="sourceLineNo">210</span> InputSplit[] splits = new InputSplit[realNumSplits];<a name="line.210"></a> +<span class="sourceLineNo">211</span> int middle = startKeys.length / realNumSplits;<a name="line.211"></a> +<span class="sourceLineNo">212</span> int startPos = 0;<a name="line.212"></a> +<span class="sourceLineNo">213</span> for (int i = 0; i < realNumSplits; i++) {<a name="line.213"></a> +<span class="sourceLineNo">214</span> int lastPos = startPos + middle;<a name="line.214"></a> +<span class="sourceLineNo">215</span> lastPos = startKeys.length % realNumSplits > i ? lastPos + 1 : lastPos;<a name="line.215"></a> +<span class="sourceLineNo">216</span> String regionLocation = regionLocator.getRegionLocation(startKeys[startPos]).<a name="line.216"></a> +<span class="sourceLineNo">217</span> getHostname();<a name="line.217"></a> +<span class="sourceLineNo">218</span> splits[i] = new TableSplit(this.table.getName(),<a name="line.218"></a> +<span class="sourceLineNo">219</span> startKeys[startPos], ((i + 1) < realNumSplits) ? startKeys[lastPos]:<a name="line.219"></a> +<span class="sourceLineNo">220</span> HConstants.EMPTY_START_ROW, regionLocation);<a name="line.220"></a> +<span class="sourceLineNo">221</span> LOG.info("split: " + i + "->" + splits[i]);<a name="line.221"></a> +<span class="sourceLineNo">222</span> startPos = lastPos;<a name="line.222"></a> +<span class="sourceLineNo">223</span> }<a name="line.223"></a> +<span class="sourceLineNo">224</span> return splits;<a name="line.224"></a> +<span class="sourceLineNo">225</span> }<a name="line.225"></a> +<span class="sourceLineNo">226</span><a name="line.226"></a> +<span class="sourceLineNo">227</span> /**<a name="line.227"></a> +<span class="sourceLineNo">228</span> * Allows subclasses to initialize the table information.<a name="line.228"></a> +<span class="sourceLineNo">229</span> *<a name="line.229"></a> +<span class="sourceLineNo">230</span> * @param connection The Connection to the HBase cluster. MUST be unmanaged. We will close.<a name="line.230"></a> +<span class="sourceLineNo">231</span> * @param tableName The {@link TableName} of the table to process.<a name="line.231"></a> +<span class="sourceLineNo">232</span> * @throws IOException<a name="line.232"></a> +<span class="sourceLineNo">233</span> */<a name="line.233"></a> +<span class="sourceLineNo">234</span> protected void initializeTable(Connection connection, TableName tableName) throws IOException {<a name="line.234"></a> +<span class="sourceLineNo">235</span> if (this.table != null || this.connection != null) {<a name="line.235"></a> +<span class="sourceLineNo">236</span> LOG.warn("initializeTable called multiple times. Overwriting connection and table " +<a name="line.236"></a> +<span class="sourceLineNo">237</span> "reference; TableInputFormatBase will not close these old references when done.");<a name="line.237"></a> +<span class="sourceLineNo">238</span> }<a name="line.238"></a> +<span class="sourceLineNo">239</span> this.table = connection.getTable(tableName);<a name="line.239"></a> +<span class="sourceLineNo">240</span> this.regionLocator = connection.getRegionLocator(tableName);<a name="line.240"></a> +<span class="sourceLineNo">241</span> this.connection = connection;<a name="line.241"></a> +<span class="sourceLineNo">242</span> }<a name="line.242"></a> +<span class="sourceLineNo">243</span><a name="line.243"></a> +<span class="sourceLineNo">244</span> /**<a name="line.244"></a> +<span class="sourceLineNo">245</span> * @param inputColumns to be passed in {@link Result} to the map task.<a name="line.245"></a> +<span class="sourceLineNo">246</span> */<a name="line.246"></a> +<span class="sourceLineNo">247</span> protected void setInputColumns(byte [][] inputColumns) {<a name="line.247"></a> +<span class="sourceLineNo">248</span> this.inputColumns = inputColumns;<a name="line.248"></a> +<span class="sourceLineNo">249</span> }<a name="line.249"></a> +<span class="sourceLineNo">250</span><a name="line.250"></a> +<span class="sourceLineNo">251</span> /**<a name="line.251"></a> +<span class="sourceLineNo">252</span> * Allows subclasses to get the {@link Table}.<a name="line.252"></a> +<span class="sourceLineNo">253</span> */<a name="line.253"></a> +<span class="sourceLineNo">254</span> protected Table getTable() {<a name="line.254"></a> +<span class="sourceLineNo">255</span> if (table == null) {<a name="line.255"></a> +<span class="sourceLineNo">256</span> throw new IllegalStateException(NOT_INITIALIZED);<a name="line.256"></a> +<span class="sourceLineNo">257</span> }<a name="line.257"></a> +<span class="sourceLineNo">258</span> return this.table;<a name="line.258"></a> <span class="sourceLineNo">259</span> }<a name="line.259"></a> <span class="sourceLineNo">260</span><a name="line.260"></a> <span class="sourceLineNo">261</span> /**<a name="line.261"></a> -<span class="sourceLineNo">262</span> * Allows subclasses to get the {@link Table}.<a name="line.262"></a> -<span class="sourceLineNo">263</span> */<a name="line.263"></a> -<span class="sourceLineNo">264</span> protected Table getTable() {<a name="line.264"></a> -<span class="sourceLineNo">265</span> if (table == null) {<a name="line.265"></a> -<span class="sourceLineNo">266</span> throw new IllegalStateException(NOT_INITIALIZED);<a name="line.266"></a> -<span class="sourceLineNo">267</span> }<a name="line.267"></a> -<span class="sourceLineNo">268</span> return this.table;<a name="line.268"></a> +<span class="sourceLineNo">262</span> * Allows subclasses to set the {@link TableRecordReader}.<a name="line.262"></a> +<span class="sourceLineNo">263</span> *<a name="line.263"></a> +<span class="sourceLineNo">264</span> * @param tableRecordReader<a name="line.264"></a> +<span class="sourceLineNo">265</span> * to provide other {@link TableRecordReader} implementations.<a name="line.265"></a> +<span class="sourceLineNo">266</span> */<a name="line.266"></a> +<span class="sourceLineNo">267</span> protected void setTableRecordReader(TableRecordReader tableRecordReader) {<a name="line.267"></a> +<span class="sourceLineNo">268</span> this.tableRecordReader = tableRecordReader;<a name="line.268"></a> <span class="sourceLineNo">269</span> }<a name="line.269"></a> <span class="sourceLineNo">270</span><a name="line.270"></a> <span class="sourceLineNo">271</span> /**<a name="line.271"></a> -<span class="sourceLineNo">272</span> * Allows subclasses to set the {@link HTable}.<a name="line.272"></a> +<span class="sourceLineNo">272</span> * Allows subclasses to set the {@link Filter} to be used.<a name="line.272"></a> <span class="sourceLineNo">273</span> *<a name="line.273"></a> -<span class="sourceLineNo">274</span> * @param table to get the data from<a name="line.274"></a> -<span class="sourceLineNo">275</span> * @deprecated use {@link #initializeTable(Connection,TableName)}<a name="line.275"></a> -<span class="sourceLineNo">276</span> */<a name="line.276"></a> -<span class="sourceLineNo">277</span> @Deprecated<a name="line.277"></a> -<span class="sourceLineNo">278</span> protected void setHTable(HTable table) {<a name="line.278"></a> -<span class="sourceLineNo">279</span> this.table = table;<a name="line.279"></a> -<span class="sourceLineNo">280</span> }<a name="line.280"></a> -<span class="sourceLineNo">281</span><a name="line.281"></a> -<span class="sourceLineNo">282</span> /**<a name="line.282"></a> -<span class="sourceLineNo">283</span> * Allows subclasses to set the {@link TableRecordReader}.<a name="line.283"></a> -<span class="sourceLineNo">284</span> *<a name="line.284"></a> -<span class="sourceLineNo">285</span> * @param tableRecordReader<a name="line.285"></a> -<span class="sourceLineNo">286</span> * to provide other {@link TableRecordReader} implementations.<a name="line.286"></a> -<span class="sourceLineNo">287</span> */<a name="line.287"></a> -<span class="sourceLineNo">288</span> protected void setTableRecordReader(TableRecordReader tableRecordReader) {<a name="line.288"></a> -<span class="sourceLineNo">289</span> this.tableRecordReader = tableRecordReader;<a name="line.289"></a> -<span class="sourceLineNo">290</span> }<a name="line.290"></a> -<span class="sourceLineNo">291</span><a name="line.291"></a> -<span class="sourceLineNo">292</span> /**<a name="line.292"></a> -<span class="sourceLineNo">293</span> * Allows subclasses to set the {@link Filter} to be used.<a name="line.293"></a> -<span class="sourceLineNo">294</span> *<a name="line.294"></a> -<span class="sourceLineNo">295</span> * @param rowFilter<a name="line.295"></a> -<span class="sourceLineNo">296</span> */<a name="line.296"></a> -<span class="sourceLineNo">297</span> protected void setRowFilter(Filter rowFilter) {<a name="line.297"></a> -<span class="sourceLineNo">298</span> this.rowFilter = rowFilter;<a name="line.298"></a> -<span class="sourceLineNo">299</span> }<a name="line.299"></a> -<span class="sourceLineNo">300</span><a name="line.300"></a> -<span class="sourceLineNo">301</span> /**<a name="line.301"></a> -<span class="sourceLineNo">302</span> * Handle subclass specific set up.<a name="line.302"></a> -<span class="sourceLineNo">303</span> * Each of the entry points used by the MapReduce framework,<a name="line.303"></a> -<span class="sourceLineNo">304</span> * {@link #getRecordReader(InputSplit, JobConf, Reporter)} and {@link #getSplits(JobConf, int)},<a name="line.304"></a> -<span class="sourceLineNo">305</span> * will call {@link #initialize(JobConf)} as a convenient centralized location to handle<a name="line.305"></a> -<span class="sourceLineNo">306</span> * retrieving the necessary configuration information and calling<a name="line.306"></a> -<span class="sourceLineNo">307</span> * {@link #initializeTable(Connection, TableName)}.<a name="line.307"></a> -<span class="sourceLineNo">308</span> *<a name="line.308"></a> -<span class="sourceLineNo">309</span> * Subclasses should implement their initialize call such that it is safe to call multiple times.<a name="line.309"></a> -<span class="sourceLineNo">310</span> * The current TableInputFormatBase implementation relies on a non-null table reference to decide<a name="line.310"></a> -<span class="sourceLineNo">311</span> * if an initialize call is needed, but this behavior may change in the future. In particular,<a name="line.311"></a> -<span class="sourceLineNo">312</span> * it is critical that initializeTable not be called multiple times since this will leak<a name="line.312"></a> -<span class="sourceLineNo">313</span> * Connection instances.<a name="line.313"></a> -<span class="sourceLineNo">314</span> *<a name="line.314"></a> -<span class="sourceLineNo">315</span> */<a name="line.315"></a> -<span class="sourceLineNo">316</span> protected void initialize(JobConf job) throws IOException {<a name="line.316"></a> -<span class="sourceLineNo">317</span> }<a name="line.317"></a> -<span class="sourceLineNo">318</span><a name="line.318"></a> -<span class="sourceLineNo">319</span> /**<a name="line.319"></a> -<span class="sourceLineNo">320</span> * Close the Table and related objects that were initialized via<a name="line.320"></a> -<span class="sourceLineNo">321</span> * {@link #initializeTable(Connection, TableName)}.<a name="line.321"></a> -<span class="sourceLineNo">322</span> *<a name="line.322"></a> -<span class="sourceLineNo">323</span> * @throws IOException<a name="line.323"></a> -<span class="sourceLineNo">324</span> */<a name="line.324"></a> -<span class="sourceLineNo">325</span> protected void closeTable() throws IOException {<a name="line.325"></a> -<span class="sourceLineNo">326</span> close(table, connection);<a name="line.326"></a> -<span class="sourceLineNo">327</span> table = null;<a name="line.327"></a> -<span class="sourceLineNo">328</span> connection = null;<a name="line.328"></a> -<span class="sourceLineNo">329</span> }<a name="line.329"></a> -<span class="sourceLineNo">330</span><a name="line.330"></a> -<span class="sourceLineNo">331</span> private void close(Closeable... closables) throws IOException {<a name="line.331"></a> -<span class="sourceLineNo">332</span> for (Closeable c : closables) {<a name="line.332"></a> -<span class="sourceLineNo">333</span> if(c != null) { c.close(); }<a name="line.333"></a> -<span class="sourceLineNo">334</span> }<a name="line.334"></a> -<span class="sourceLineNo">335</span> }<a name="line.335"></a> -<span class="sourceLineNo">336</span>}<a name="line.336"></a> +<span class="sourceLineNo">274</span> * @param rowFilter<a name="line.274"></a> +<span class="sourceLineNo">275</span> */<a name="line.275"></a> +<span class="sourceLineNo">276</span> protected void setRowFilter(Filter rowFilter) {<a name="line.276"></a> +<span class="sourceLineNo">277</span> this.rowFilter = rowFilter;<a name="line.277"></a> +<span class="sourceLineNo">278</span> }<a name="line.278"></a> +<span class="sourceLineNo">279</span><a name="line.279"></a> +<span class="sourceLineNo">280</span> /**<a name="line.280"></a> +<span class="sourceLineNo">281</span> * Handle subclass specific set up.<a name="line.281"></a> +<span class="sourceLineNo">282</span> * Each of the entry points used by the MapReduce framework,<a name="line.282"></a> +<span class="sourceLineNo">283</span> * {@link #getRecordReader(InputSplit, JobConf, Reporter)} and {@link #getSplits(JobConf, int)},<a name="line.283"></a> +<span class="sourceLineNo">284</span> * will call {@link #initialize(JobConf)} as a convenient centralized location to handle<a name="line.284"></a> +<span class="sourceLineNo">285</span> * retrieving the necessary configuration information and calling<a name="line.285"></a> +<span class="sourceLineNo">286</span> * {@link #initializeTable(Connection, TableName)}.<a name="line.286"></a> +<span class="sourceLineNo">287</span> *<a name="line.287"></a> +<span class="sourceLineNo">288</span> * Subclasses should implement their initialize call such that it is safe to call multiple times.<a name="line.288"></a> +<span class="sourceLineNo">289</span> * The current TableInputFormatBase implementation relies on a non-null table reference to decide<a name="line.289"></a> +<span class="sourceLineNo">290</span> * if an initialize call is needed, but this behavior may change in the future. In particular,<a name="line.290"></a> +<span class="sourceLineNo">291</span> * it is critical that initializeTable not be called multiple times since this will leak<a name="line.291"></a> +<span class="sourceLineNo">292</span> * Connection instances.<a name="line.292"></a> +<span class="sourceLineNo">293</span> *<a name="line.293"></a> +<span class="sourceLineNo">294</span> */<a name="line.294"></a> +<span class="sourceLineNo">295</span> protected void initialize(JobConf job) throws IOException {<a name="line.295"></a> +<span class="sourceLineNo">296</span> }<a name="line.296"></a> +<span class="sourceLineNo">297</span><a name="line.297"></a> +<span class="sourceLineNo">298</span> /**<a name="line.298"></a> +<span class="sourceLineNo">299</span> * Close the Table and related objects that were initialized via<a name="line.299"></a> +<span class="sourceLineNo">300</span> * {@link #initializeTable(Connection, TableName)}.<a name="line.300"></a> +<span class="sourceLineNo">301</span> *<a name="line.301"></a> +<span class="sourceLineNo">302</span> * @throws IOException<a name="line.302"></a> +<span class="sourceLineNo">303</span> */<a name="line.303"></a> +<span class="sourceLineNo">304</span> protected void closeTable() throws IOException {<a name="line.304"></a> +<span class="sourceLineNo">305</span> close(table, connection);<a name="line.305"></a> +<span class="sourceLineNo">306</span> table = null;<a name="line.306"></a> +<span class="sourceLineNo">307</span> connection = null;<a name="line.307"></a> +<span class="sourceLineNo">308</span> }<a name="line.308"></a> +<span class="sourceLineNo">309</span><a name="line.309"></a> +<span class="sourceLineNo">310</span> private void close(Closeable... closables) throws IOException {<a name="line.310"></a> +<span class="sourceLineNo">311</span> for (Closeable c : closables) {<a name="line.311"></a> +<span class="sourceLineNo">312</span> if(c != null) { c.close(); }<a name="line.312"></a> +<span class="sourceLineNo">313</span> }<a name="line.313"></a> +<span class="sourceLineNo">314</span> }<a name="line.314"></a> +<span class="sourceLineNo">315</span>}<a name="line.315"></a>
http://git-wip-us.apache.org/repos/asf/hbase/blob/fc29f802/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.html ---------------------------------------------------------------------- diff --git a/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.html b/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.html deleted file mode 100644 index 440a490..0000000 --- a/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.html +++ /dev/null @@ -1,274 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> -<html lang="en"> -<head> -<title>Source code</title> -<link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style"> -</head> -<body> -<div class="sourceContainer"> -<pre><span class="sourceLineNo">001</span>/**<a name="line.1"></a> -<span class="sourceLineNo">002</span> *<a name="line.2"></a> -<span class="sourceLineNo">003</span> * Licensed to the Apache Software Foundation (ASF) under one<a name="line.3"></a> -<span class="sourceLineNo">004</span> * or more contributor license agreements. See the NOTICE file<a name="line.4"></a> -<span class="sourceLineNo">005</span> * distributed with this work for additional information<a name="line.5"></a> -<span class="sourceLineNo">006</span> * regarding copyright ownership. The ASF licenses this file<a name="line.6"></a> -<span class="sourceLineNo">007</span> * to you under the Apache License, Version 2.0 (the<a name="line.7"></a> -<span class="sourceLineNo">008</span> * "License"); you may not use this file except in compliance<a name="line.8"></a> -<span class="sourceLineNo">009</span> * with the License. You may obtain a copy of the License at<a name="line.9"></a> -<span class="sourceLineNo">010</span> *<a name="line.10"></a> -<span class="sourceLineNo">011</span> * http://www.apache.org/licenses/LICENSE-2.0<a name="line.11"></a> -<span class="sourceLineNo">012</span> *<a name="line.12"></a> -<span class="sourceLineNo">013</span> * Unless required by applicable law or agreed to in writing, software<a name="line.13"></a> -<span class="sourceLineNo">014</span> * distributed under the License is distributed on an "AS IS" BASIS,<a name="line.14"></a> -<span class="sourceLineNo">015</span> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.<a name="line.15"></a> -<span class="sourceLineNo">016</span> * See the License for the specific language governing permissions and<a name="line.16"></a> -<span class="sourceLineNo">017</span> * limitations under the License.<a name="line.17"></a> -<span class="sourceLineNo">018</span> */<a name="line.18"></a> -<span class="sourceLineNo">019</span>package org.apache.hadoop.hbase.mapreduce;<a name="line.19"></a> -<span class="sourceLineNo">020</span><a name="line.20"></a> -<span class="sourceLineNo">021</span>import java.io.IOException;<a name="line.21"></a> -<span class="sourceLineNo">022</span>import java.util.List;<a name="line.22"></a> -<span class="sourceLineNo">023</span>import java.util.Map;<a name="line.23"></a> -<span class="sourceLineNo">024</span><a name="line.24"></a> -<span class="sourceLineNo">025</span>import org.apache.commons.logging.Log;<a name="line.25"></a> -<span class="sourceLineNo">026</span>import org.apache.commons.logging.LogFactory;<a name="line.26"></a> -<span class="sourceLineNo">027</span>import org.apache.hadoop.hbase.classification.InterfaceAudience;<a name="line.27"></a> -<span class="sourceLineNo">028</span>import org.apache.hadoop.hbase.classification.InterfaceStability;<a name="line.28"></a> -<span class="sourceLineNo">029</span>import org.apache.hadoop.conf.Configuration;<a name="line.29"></a> -<span class="sourceLineNo">030</span>import org.apache.hadoop.hbase.HTableDescriptor;<a name="line.30"></a> -<span class="sourceLineNo">031</span>import org.apache.hadoop.hbase.KeyValue;<a name="line.31"></a> -<span class="sourceLineNo">032</span>import org.apache.hadoop.hbase.client.HTable;<a name="line.32"></a> -<span class="sourceLineNo">033</span>import org.apache.hadoop.hbase.client.Table;<a name="line.33"></a> -<span class="sourceLineNo">034</span>import org.apache.hadoop.hbase.io.ImmutableBytesWritable;<a name="line.34"></a> -<span class="sourceLineNo">035</span>import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;<a name="line.35"></a> -<span class="sourceLineNo">036</span>import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;<a name="line.36"></a> -<span class="sourceLineNo">037</span>import org.apache.hadoop.hbase.regionserver.BloomType;<a name="line.37"></a> -<span class="sourceLineNo">038</span>import org.apache.hadoop.mapreduce.Job;<a name="line.38"></a> -<span class="sourceLineNo">039</span>import org.apache.hadoop.mapreduce.RecordWriter;<a name="line.39"></a> -<span class="sourceLineNo">040</span>import org.apache.hadoop.mapreduce.TaskAttemptContext;<a name="line.40"></a> -<span class="sourceLineNo">041</span>import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;<a name="line.41"></a> -<span class="sourceLineNo">042</span><a name="line.42"></a> -<span class="sourceLineNo">043</span>import com.google.common.annotations.VisibleForTesting;<a name="line.43"></a> -<span class="sourceLineNo">044</span><a name="line.44"></a> -<span class="sourceLineNo">045</span>/**<a name="line.45"></a> -<span class="sourceLineNo">046</span> * Writes HFiles. Passed KeyValues must arrive in order.<a name="line.46"></a> -<span class="sourceLineNo">047</span> * Writes current time as the sequence id for the file. Sets the major compacted<a name="line.47"></a> -<span class="sourceLineNo">048</span> * attribute on created hfiles. Calling write(null,null) will forcibly roll<a name="line.48"></a> -<span class="sourceLineNo">049</span> * all HFiles being written.<a name="line.49"></a> -<span class="sourceLineNo">050</span> * <p><a name="line.50"></a> -<span class="sourceLineNo">051</span> * Using this class as part of a MapReduce job is best done<a name="line.51"></a> -<span class="sourceLineNo">052</span> * using {@link #configureIncrementalLoad(Job, HTable)}.<a name="line.52"></a> -<span class="sourceLineNo">053</span> * @see KeyValueSortReducer<a name="line.53"></a> -<span class="sourceLineNo">054</span> * @deprecated use {@link HFileOutputFormat2} instead.<a name="line.54"></a> -<span class="sourceLineNo">055</span> */<a name="line.55"></a> -<span class="sourceLineNo">056</span>@Deprecated<a name="line.56"></a> -<span class="sourceLineNo">057</span>@InterfaceAudience.Public<a name="line.57"></a> -<span class="sourceLineNo">058</span>@InterfaceStability.Stable<a name="line.58"></a> -<span class="sourceLineNo">059</span>public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> {<a name="line.59"></a> -<span class="sourceLineNo">060</span> private static final Log LOG = LogFactory.getLog(HFileOutputFormat.class);<a name="line.60"></a> -<span class="sourceLineNo">061</span><a name="line.61"></a> -<span class="sourceLineNo">062</span> // This constant is public since the client can modify this when setting<a name="line.62"></a> -<span class="sourceLineNo">063</span> // up their conf object and thus refer to this symbol.<a name="line.63"></a> -<span class="sourceLineNo">064</span> // It is present for backwards compatibility reasons. Use it only to<a name="line.64"></a> -<span class="sourceLineNo">065</span> // override the auto-detection of datablock encoding.<a name="line.65"></a> -<span class="sourceLineNo">066</span> public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =<a name="line.66"></a> -<span class="sourceLineNo">067</span> HFileOutputFormat2.DATABLOCK_ENCODING_OVERRIDE_CONF_KEY;<a name="line.67"></a> -<span class="sourceLineNo">068</span><a name="line.68"></a> -<span class="sourceLineNo">069</span> @Override<a name="line.69"></a> -<span class="sourceLineNo">070</span> public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(<a name="line.70"></a> -<span class="sourceLineNo">071</span> final TaskAttemptContext context) throws IOException, InterruptedException {<a name="line.71"></a> -<span class="sourceLineNo">072</span> return HFileOutputFormat2.createRecordWriter(context);<a name="line.72"></a> -<span class="sourceLineNo">073</span> }<a name="line.73"></a> -<span class="sourceLineNo">074</span><a name="line.74"></a> -<span class="sourceLineNo">075</span> /**<a name="line.75"></a> -<span class="sourceLineNo">076</span> * Configure a MapReduce Job to perform an incremental load into the given<a name="line.76"></a> -<span class="sourceLineNo">077</span> * table. This<a name="line.77"></a> -<span class="sourceLineNo">078</span> * <ul><a name="line.78"></a> -<span class="sourceLineNo">079</span> * <li>Inspects the table to configure a total order partitioner</li><a name="line.79"></a> -<span class="sourceLineNo">080</span> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li><a name="line.80"></a> -<span class="sourceLineNo">081</span> * <li>Sets the number of reduce tasks to match the current number of regions</li><a name="line.81"></a> -<span class="sourceLineNo">082</span> * <li>Sets the output key/value class to match HFileOutputFormat's requirements</li><a name="line.82"></a> -<span class="sourceLineNo">083</span> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or<a name="line.83"></a> -<span class="sourceLineNo">084</span> * PutSortReducer)</li><a name="line.84"></a> -<span class="sourceLineNo">085</span> * </ul><a name="line.85"></a> -<span class="sourceLineNo">086</span> * The user should be sure to set the map output value class to either KeyValue or Put before<a name="line.86"></a> -<span class="sourceLineNo">087</span> * running this function.<a name="line.87"></a> -<span class="sourceLineNo">088</span> */<a name="line.88"></a> -<span class="sourceLineNo">089</span> public static void configureIncrementalLoad(Job job, HTable table)<a name="line.89"></a> -<span class="sourceLineNo">090</span> throws IOException {<a name="line.90"></a> -<span class="sourceLineNo">091</span> HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(),<a name="line.91"></a> -<span class="sourceLineNo">092</span> table.getRegionLocator());<a name="line.92"></a> -<span class="sourceLineNo">093</span> }<a name="line.93"></a> -<span class="sourceLineNo">094</span><a name="line.94"></a> -<span class="sourceLineNo">095</span> /**<a name="line.95"></a> -<span class="sourceLineNo">096</span> * Runs inside the task to deserialize column family to compression algorithm<a name="line.96"></a> -<span class="sourceLineNo">097</span> * map from the configuration.<a name="line.97"></a> -<span class="sourceLineNo">098</span> *<a name="line.98"></a> -<span class="sourceLineNo">099</span> * @param conf to read the serialized values from<a name="line.99"></a> -<span class="sourceLineNo">100</span> * @return a map from column family to the configured compression algorithm<a name="line.100"></a> -<span class="sourceLineNo">101</span> */<a name="line.101"></a> -<span class="sourceLineNo">102</span> @VisibleForTesting<a name="line.102"></a> -<span class="sourceLineNo">103</span> static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration<a name="line.103"></a> -<span class="sourceLineNo">104</span> conf) {<a name="line.104"></a> -<span class="sourceLineNo">105</span> return HFileOutputFormat2.createFamilyCompressionMap(conf);<a name="line.105"></a> -<span class="sourceLineNo">106</span> }<a name="line.106"></a> -<span class="sourceLineNo">107</span><a name="line.107"></a> -<span class="sourceLineNo">108</span> /**<a name="line.108"></a> -<span class="sourceLineNo">109</span> * Runs inside the task to deserialize column family to bloom filter type<a name="line.109"></a> -<span class="sourceLineNo">110</span> * map from the configuration.<a name="line.110"></a> -<span class="sourceLineNo">111</span> *<a name="line.111"></a> -<span class="sourceLineNo">112</span> * @param conf to read the serialized values from<a name="line.112"></a> -<span class="sourceLineNo">113</span> * @return a map from column family to the the configured bloom filter type<a name="line.113"></a> -<span class="sourceLineNo">114</span> */<a name="line.114"></a> -<span class="sourceLineNo">115</span> @VisibleForTesting<a name="line.115"></a> -<span class="sourceLineNo">116</span> static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {<a name="line.116"></a> -<span class="sourceLineNo">117</span> return HFileOutputFormat2.createFamilyBloomTypeMap(conf);<a name="line.117"></a> -<span class="sourceLineNo">118</span> }<a name="line.118"></a> -<span class="sourceLineNo">119</span><a name="line.119"></a> -<span class="sourceLineNo">120</span> /**<a name="line.120"></a> -<span class="sourceLineNo">121</span> * Runs inside the task to deserialize column family to block size<a name="line.121"></a> -<span class="sourceLineNo">122</span> * map from the configuration.<a name="line.122"></a> -<span class="sourceLineNo">123</span> *<a name="line.123"></a> -<span class="sourceLineNo">124</span> * @param conf to read the serialized values from<a name="line.124"></a> -<span class="sourceLineNo">125</span> * @return a map from column family to the configured block size<a name="line.125"></a> -<span class="sourceLineNo">126</span> */<a name="line.126"></a> -<span class="sourceLineNo">127</span> @VisibleForTesting<a name="line.127"></a> -<span class="sourceLineNo">128</span> static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {<a name="line.128"></a> -<span class="sourceLineNo">129</span> return HFileOutputFormat2.createFamilyBlockSizeMap(conf);<a name="line.129"></a> -<span class="sourceLineNo">130</span> }<a name="line.130"></a> -<span class="sourceLineNo">131</span><a name="line.131"></a> -<span class="sourceLineNo">132</span> /**<a name="line.132"></a> -<span class="sourceLineNo">133</span> * Runs inside the task to deserialize column family to data block encoding<a name="line.133"></a> -<span class="sourceLineNo">134</span> * type map from the configuration.<a name="line.134"></a> -<span class="sourceLineNo">135</span> *<a name="line.135"></a> -<span class="sourceLineNo">136</span> * @param conf to read the serialized values from<a name="line.136"></a> -<span class="sourceLineNo">137</span> * @return a map from column family to HFileDataBlockEncoder for the<a name="line.137"></a> -<span class="sourceLineNo">138</span> * configured data block type for the family<a name="line.138"></a> -<span class="sourceLineNo">139</span> */<a name="line.139"></a> -<span class="sourceLineNo">140</span> @VisibleForTesting<a name="line.140"></a> -<span class="sourceLineNo">141</span> static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(<a name="line.141"></a> -<span class="sourceLineNo">142</span> Configuration conf) {<a name="line.142"></a> -<span class="sourceLineNo">143</span> return HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf);<a name="line.143"></a> -<span class="sourceLineNo">144</span> }<a name="line.144"></a> -<span class="sourceLineNo">145</span><a name="line.145"></a> -<span class="sourceLineNo">146</span> /**<a name="line.146"></a> -<span class="sourceLineNo">147</span> * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against<a name="line.147"></a> -<span class="sourceLineNo">148</span> * <code>splitPoints</code>. Cleans up the partitions file after job exists.<a name="line.148"></a> -<span class="sourceLineNo">149</span> */<a name="line.149"></a> -<span class="sourceLineNo">150</span> static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)<a name="line.150"></a> -<span class="sourceLineNo">151</span> throws IOException {<a name="line.151"></a> -<span class="sourceLineNo">152</span> HFileOutputFormat2.configurePartitioner(job, splitPoints);<a name="line.152"></a> -<span class="sourceLineNo">153</span> }<a name="line.153"></a> -<span class="sourceLineNo">154</span><a name="line.154"></a> -<span class="sourceLineNo">155</span> static void configureCompression(Table table, Configuration conf) throws IOException {<a name="line.155"></a> -<span class="sourceLineNo">156</span> HFileOutputFormat2.configureCompression(conf, table.getTableDescriptor());<a name="line.156"></a> -<span class="sourceLineNo">157</span> }<a name="line.157"></a> -<span class="sourceLineNo">158</span><a name="line.158"></a> -<span class="sourceLineNo">159</span> /**<a name="line.159"></a> -<span class="sourceLineNo">160</span> * Serialize column family to block size map to configuration.<a name="line.160"></a> -<span class="sourceLineNo">161</span> * Invoked while configuring the MR job for incremental load.<a name="line.161"></a> -<span class="sourceLineNo">162</span> *<a name="line.162"></a> -<span class="sourceLineNo">163</span> * @param table to read the properties from<a name="line.163"></a> -<span class="sourceLineNo">164</span> * @param conf to persist serialized values into<a name="line.164"></a> -<span class="sourceLineNo">165</span> * @throws IOException<a name="line.165"></a> -<span class="sourceLineNo">166</span> * on failure to read column family descriptors<a name="line.166"></a> -<span class="sourceLineNo">167</span> */<a name="line.167"></a> -<span class="sourceLineNo">168</span> @VisibleForTesting<a name="line.168"></a> -<span class="sourceLineNo">169</span> static void configureBlockSize(Table table, Configuration conf) throws IOException {<a name="line.169"></a> -<span class="sourceLineNo">170</span> HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf);<a name="line.170"></a> -<span class="sourceLineNo">171</span> }<a name="line.171"></a> -<span class="sourceLineNo">172</span><a name="line.172"></a> -<span class="sourceLineNo">173</span> /**<a name="line.173"></a> -<span class="sourceLineNo">174</span> * Serialize column family to bloom type map to configuration.<a name="line.174"></a> -<span class="sourceLineNo">175</span> * Invoked while configuring the MR job for incremental load.<a name="line.175"></a> -<span class="sourceLineNo">176</span> *<a name="line.176"></a> -<span class="sourceLineNo">177</span> * @param table to read the properties from<a name="line.177"></a> -<span class="sourceLineNo">178</span> * @param conf to persist serialized values into<a name="line.178"></a> -<span class="sourceLineNo">179</span> * @throws IOException<a name="line.179"></a> -<span class="sourceLineNo">180</span> * on failure to read column family descriptors<a name="line.180"></a> -<span class="sourceLineNo">181</span> */<a name="line.181"></a> -<span class="sourceLineNo">182</span> @VisibleForTesting<a name="line.182"></a> -<span class="sourceLineNo">183</span> static void configureBloomType(Table table, Configuration conf) throws IOException {<a name="line.183"></a> -<span class="sourceLineNo">184</span> HFileOutputFormat2.configureBloomType(table.getTableDescriptor(), conf);<a name="line.184"></a> -<span class="sourceLineNo">185</span> }<a name="line.185"></a> -<span class="sourceLineNo">186</span><a name="line.186"></a> -<span class="sourceLineNo">187</span> /**<a name="line.187"></a> -<span class="sourceLineNo">188</span> * Serialize column family to data block encoding map to configuration.<a name="line.188"></a> -<span class="sourceLineNo">189</span> * Invoked while configuring the MR job for incremental load.<a name="line.189"></a> -<span class="sourceLineNo">190</span> *<a name="line.190"></a> -<span class="sourceLineNo">191</span> * @param table to read the properties from<a name="line.191"></a> -<span class="sourceLineNo">192</span> * @param conf to persist serialized values into<a name="line.192"></a> -<span class="sourceLineNo">193</span> * @throws IOException<a name="line.193"></a> -<span class="sourceLineNo">194</span> * on failure to read column family descriptors<a name="line.194"></a> -<span class="sourceLineNo">195</span> */<a name="line.195"></a> -<span class="sourceLineNo">196</span> @VisibleForTesting<a name="line.196"></a> -<span class="sourceLineNo">197</span> static void configureDataBlockEncoding(Table table,<a name="line.197"></a> -<span class="sourceLineNo">198</span> Configuration conf) throws IOException {<a name="line.198"></a> -<span class="sourceLineNo">199</span> HTableDescriptor tableDescriptor = table.getTableDescriptor();<a name="line.199"></a> -<span class="sourceLineNo">200</span> HFileOutputFormat2.configureDataBlockEncoding(tableDescriptor, conf);<a name="line.200"></a> -<span class="sourceLineNo">201</span> }<a name="line.201"></a> -<span class="sourceLineNo">202</span>}<a name="line.202"></a> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -</pre> -</div> -</body> -</html>
