http://git-wip-us.apache.org/repos/asf/hbase-site/blob/62e361eb/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/CellCounter.html ---------------------------------------------------------------------- diff --git a/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/CellCounter.html b/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/CellCounter.html index 8ca5756..aae49fe 100644 --- a/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/CellCounter.html +++ b/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/CellCounter.html @@ -139,206 +139,208 @@ <span class="sourceLineNo">131</span> */<a name="line.131"></a> <span class="sourceLineNo">132</span><a name="line.132"></a> <span class="sourceLineNo">133</span> @Override<a name="line.133"></a> -<span class="sourceLineNo">134</span> public void map(ImmutableBytesWritable row, Result values,<a name="line.134"></a> -<span class="sourceLineNo">135</span> Context context)<a name="line.135"></a> -<span class="sourceLineNo">136</span> throws IOException {<a name="line.136"></a> -<span class="sourceLineNo">137</span> Preconditions.checkState(values != null,<a name="line.137"></a> -<span class="sourceLineNo">138</span> "values passed to the map is null");<a name="line.138"></a> -<span class="sourceLineNo">139</span><a name="line.139"></a> -<span class="sourceLineNo">140</span> try {<a name="line.140"></a> -<span class="sourceLineNo">141</span> byte[] currentRow = values.getRow();<a name="line.141"></a> -<span class="sourceLineNo">142</span> if (lastRow == null || !Bytes.equals(lastRow, currentRow)) {<a name="line.142"></a> -<span class="sourceLineNo">143</span> lastRow = currentRow;<a name="line.143"></a> -<span class="sourceLineNo">144</span> currentRowKey = Bytes.toStringBinary(currentRow);<a name="line.144"></a> -<span class="sourceLineNo">145</span> currentFamily = null;<a name="line.145"></a> -<span class="sourceLineNo">146</span> currentQualifier = null;<a name="line.146"></a> -<span class="sourceLineNo">147</span> context.getCounter(Counters.ROWS).increment(1);<a name="line.147"></a> -<span class="sourceLineNo">148</span> context.write(new Text("Total ROWS"), new IntWritable(1));<a name="line.148"></a> -<span class="sourceLineNo">149</span> }<a name="line.149"></a> -<span class="sourceLineNo">150</span> if (!values.isEmpty()) {<a name="line.150"></a> -<span class="sourceLineNo">151</span> int cellCount = 0;<a name="line.151"></a> -<span class="sourceLineNo">152</span> for (Cell value : values.listCells()) {<a name="line.152"></a> -<span class="sourceLineNo">153</span> cellCount++;<a name="line.153"></a> -<span class="sourceLineNo">154</span> if (currentFamily == null || !CellUtil.matchingFamily(value, currentFamily)) {<a name="line.154"></a> -<span class="sourceLineNo">155</span> currentFamily = CellUtil.cloneFamily(value);<a name="line.155"></a> -<span class="sourceLineNo">156</span> currentFamilyName = Bytes.toStringBinary(currentFamily);<a name="line.156"></a> -<span class="sourceLineNo">157</span> currentQualifier = null;<a name="line.157"></a> -<span class="sourceLineNo">158</span> context.getCounter("CF", currentFamilyName).increment(1);<a name="line.158"></a> -<span class="sourceLineNo">159</span> if (1 == context.getCounter("CF", currentFamilyName).getValue()) {<a name="line.159"></a> -<span class="sourceLineNo">160</span> context.write(new Text("Total Families Across all Rows"), new IntWritable(1));<a name="line.160"></a> -<span class="sourceLineNo">161</span> context.write(new Text(currentFamily), new IntWritable(1));<a name="line.161"></a> -<span class="sourceLineNo">162</span> }<a name="line.162"></a> -<span class="sourceLineNo">163</span> }<a name="line.163"></a> -<span class="sourceLineNo">164</span> if (currentQualifier == null || !CellUtil.matchingQualifier(value, currentQualifier)) {<a name="line.164"></a> -<span class="sourceLineNo">165</span> currentQualifier = CellUtil.cloneQualifier(value);<a name="line.165"></a> -<span class="sourceLineNo">166</span> currentQualifierName = currentFamilyName + separator +<a name="line.166"></a> -<span class="sourceLineNo">167</span> Bytes.toStringBinary(currentQualifier);<a name="line.167"></a> -<span class="sourceLineNo">168</span> currentRowQualifierName = currentRowKey + separator + currentQualifierName;<a name="line.168"></a> -<span class="sourceLineNo">169</span><a name="line.169"></a> -<span class="sourceLineNo">170</span> context.write(new Text("Total Qualifiers across all Rows"),<a name="line.170"></a> -<span class="sourceLineNo">171</span> new IntWritable(1));<a name="line.171"></a> -<span class="sourceLineNo">172</span> context.write(new Text(currentQualifierName), new IntWritable(1));<a name="line.172"></a> -<span class="sourceLineNo">173</span> }<a name="line.173"></a> -<span class="sourceLineNo">174</span> // Increment versions<a name="line.174"></a> -<span class="sourceLineNo">175</span> context.write(new Text(currentRowQualifierName + "_Versions"), new IntWritable(1));<a name="line.175"></a> -<span class="sourceLineNo">176</span> }<a name="line.176"></a> -<span class="sourceLineNo">177</span> context.getCounter(Counters.CELLS).increment(cellCount);<a name="line.177"></a> -<span class="sourceLineNo">178</span> }<a name="line.178"></a> -<span class="sourceLineNo">179</span> } catch (InterruptedException e) {<a name="line.179"></a> -<span class="sourceLineNo">180</span> e.printStackTrace();<a name="line.180"></a> -<span class="sourceLineNo">181</span> }<a name="line.181"></a> -<span class="sourceLineNo">182</span> }<a name="line.182"></a> -<span class="sourceLineNo">183</span> }<a name="line.183"></a> -<span class="sourceLineNo">184</span><a name="line.184"></a> -<span class="sourceLineNo">185</span> static class IntSumReducer<Key> extends Reducer<Key, IntWritable,<a name="line.185"></a> -<span class="sourceLineNo">186</span> Key, IntWritable> {<a name="line.186"></a> -<span class="sourceLineNo">187</span><a name="line.187"></a> -<span class="sourceLineNo">188</span> private IntWritable result = new IntWritable();<a name="line.188"></a> -<span class="sourceLineNo">189</span> public void reduce(Key key, Iterable<IntWritable> values,<a name="line.189"></a> -<span class="sourceLineNo">190</span> Context context)<a name="line.190"></a> -<span class="sourceLineNo">191</span> throws IOException, InterruptedException {<a name="line.191"></a> -<span class="sourceLineNo">192</span> int sum = 0;<a name="line.192"></a> -<span class="sourceLineNo">193</span> for (IntWritable val : values) {<a name="line.193"></a> -<span class="sourceLineNo">194</span> sum += val.get();<a name="line.194"></a> -<span class="sourceLineNo">195</span> }<a name="line.195"></a> -<span class="sourceLineNo">196</span> result.set(sum);<a name="line.196"></a> -<span class="sourceLineNo">197</span> context.write(key, result);<a name="line.197"></a> -<span class="sourceLineNo">198</span> }<a name="line.198"></a> -<span class="sourceLineNo">199</span> }<a name="line.199"></a> -<span class="sourceLineNo">200</span><a name="line.200"></a> -<span class="sourceLineNo">201</span> /**<a name="line.201"></a> -<span class="sourceLineNo">202</span> * Sets up the actual job.<a name="line.202"></a> -<span class="sourceLineNo">203</span> *<a name="line.203"></a> -<span class="sourceLineNo">204</span> * @param conf The current configuration.<a name="line.204"></a> -<span class="sourceLineNo">205</span> * @param args The command line parameters.<a name="line.205"></a> -<span class="sourceLineNo">206</span> * @return The newly created job.<a name="line.206"></a> -<span class="sourceLineNo">207</span> * @throws IOException When setting up the job fails.<a name="line.207"></a> -<span class="sourceLineNo">208</span> */<a name="line.208"></a> -<span class="sourceLineNo">209</span> public static Job createSubmittableJob(Configuration conf, String[] args)<a name="line.209"></a> -<span class="sourceLineNo">210</span> throws IOException {<a name="line.210"></a> -<span class="sourceLineNo">211</span> String tableName = args[0];<a name="line.211"></a> -<span class="sourceLineNo">212</span> Path outputDir = new Path(args[1]);<a name="line.212"></a> -<span class="sourceLineNo">213</span> String reportSeparatorString = (args.length > 2) ? args[2]: ":";<a name="line.213"></a> -<span class="sourceLineNo">214</span> conf.set("ReportSeparator", reportSeparatorString);<a name="line.214"></a> -<span class="sourceLineNo">215</span> Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));<a name="line.215"></a> -<span class="sourceLineNo">216</span> job.setJarByClass(CellCounter.class);<a name="line.216"></a> -<span class="sourceLineNo">217</span> Scan scan = getConfiguredScanForJob(conf, args);<a name="line.217"></a> -<span class="sourceLineNo">218</span> TableMapReduceUtil.initTableMapperJob(tableName, scan,<a name="line.218"></a> -<span class="sourceLineNo">219</span> CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);<a name="line.219"></a> -<span class="sourceLineNo">220</span> job.setNumReduceTasks(1);<a name="line.220"></a> -<span class="sourceLineNo">221</span> job.setMapOutputKeyClass(Text.class);<a name="line.221"></a> -<span class="sourceLineNo">222</span> job.setMapOutputValueClass(IntWritable.class);<a name="line.222"></a> -<span class="sourceLineNo">223</span> job.setOutputFormatClass(TextOutputFormat.class);<a name="line.223"></a> -<span class="sourceLineNo">224</span> job.setOutputKeyClass(Text.class);<a name="line.224"></a> -<span class="sourceLineNo">225</span> job.setOutputValueClass(IntWritable.class);<a name="line.225"></a> -<span class="sourceLineNo">226</span> FileOutputFormat.setOutputPath(job, outputDir);<a name="line.226"></a> -<span class="sourceLineNo">227</span> job.setReducerClass(IntSumReducer.class);<a name="line.227"></a> -<span class="sourceLineNo">228</span> return job;<a name="line.228"></a> -<span class="sourceLineNo">229</span> }<a name="line.229"></a> -<span class="sourceLineNo">230</span><a name="line.230"></a> -<span class="sourceLineNo">231</span> private static Scan getConfiguredScanForJob(Configuration conf, String[] args)<a name="line.231"></a> -<span class="sourceLineNo">232</span> throws IOException {<a name="line.232"></a> -<span class="sourceLineNo">233</span> // create scan with any properties set from TableInputFormat<a name="line.233"></a> -<span class="sourceLineNo">234</span> Scan s = TableInputFormat.createScanFromConfiguration(conf);<a name="line.234"></a> -<span class="sourceLineNo">235</span> // Set Scan Versions<a name="line.235"></a> -<span class="sourceLineNo">236</span> if (conf.get(TableInputFormat.SCAN_MAXVERSIONS) == null) {<a name="line.236"></a> -<span class="sourceLineNo">237</span> // default to all versions unless explicitly set<a name="line.237"></a> -<span class="sourceLineNo">238</span> s.setMaxVersions(Integer.MAX_VALUE);<a name="line.238"></a> -<span class="sourceLineNo">239</span> }<a name="line.239"></a> -<span class="sourceLineNo">240</span> s.setCacheBlocks(false);<a name="line.240"></a> -<span class="sourceLineNo">241</span> // Set RowFilter or Prefix Filter if applicable.<a name="line.241"></a> -<span class="sourceLineNo">242</span> Filter rowFilter = getRowFilter(args);<a name="line.242"></a> -<span class="sourceLineNo">243</span> if (rowFilter!= null) {<a name="line.243"></a> -<span class="sourceLineNo">244</span> LOG.info("Setting Row Filter for counter.");<a name="line.244"></a> -<span class="sourceLineNo">245</span> s.setFilter(rowFilter);<a name="line.245"></a> -<span class="sourceLineNo">246</span> }<a name="line.246"></a> -<span class="sourceLineNo">247</span> // Set TimeRange if defined<a name="line.247"></a> -<span class="sourceLineNo">248</span> long timeRange[] = getTimeRange(args);<a name="line.248"></a> -<span class="sourceLineNo">249</span> if (timeRange != null) {<a name="line.249"></a> -<span class="sourceLineNo">250</span> LOG.info("Setting TimeRange for counter.");<a name="line.250"></a> -<span class="sourceLineNo">251</span> s.setTimeRange(timeRange[0], timeRange[1]);<a name="line.251"></a> -<span class="sourceLineNo">252</span> }<a name="line.252"></a> -<span class="sourceLineNo">253</span> return s;<a name="line.253"></a> -<span class="sourceLineNo">254</span> }<a name="line.254"></a> -<span class="sourceLineNo">255</span><a name="line.255"></a> -<span class="sourceLineNo">256</span><a name="line.256"></a> -<span class="sourceLineNo">257</span> private static Filter getRowFilter(String[] args) {<a name="line.257"></a> -<span class="sourceLineNo">258</span> Filter rowFilter = null;<a name="line.258"></a> -<span class="sourceLineNo">259</span> String filterCriteria = (args.length > 3) ? args[3]: null;<a name="line.259"></a> -<span class="sourceLineNo">260</span> if (filterCriteria == null) return null;<a name="line.260"></a> -<span class="sourceLineNo">261</span> if (filterCriteria.startsWith("^")) {<a name="line.261"></a> -<span class="sourceLineNo">262</span> String regexPattern = filterCriteria.substring(1, filterCriteria.length());<a name="line.262"></a> -<span class="sourceLineNo">263</span> rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regexPattern));<a name="line.263"></a> -<span class="sourceLineNo">264</span> } else {<a name="line.264"></a> -<span class="sourceLineNo">265</span> rowFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria));<a name="line.265"></a> -<span class="sourceLineNo">266</span> }<a name="line.266"></a> -<span class="sourceLineNo">267</span> return rowFilter;<a name="line.267"></a> -<span class="sourceLineNo">268</span> }<a name="line.268"></a> -<span class="sourceLineNo">269</span><a name="line.269"></a> -<span class="sourceLineNo">270</span> private static long[] getTimeRange(String[] args) throws IOException {<a name="line.270"></a> -<span class="sourceLineNo">271</span> final String startTimeArgKey = "--starttime=";<a name="line.271"></a> -<span class="sourceLineNo">272</span> final String endTimeArgKey = "--endtime=";<a name="line.272"></a> -<span class="sourceLineNo">273</span> long startTime = 0L;<a name="line.273"></a> -<span class="sourceLineNo">274</span> long endTime = 0L;<a name="line.274"></a> -<span class="sourceLineNo">275</span><a name="line.275"></a> -<span class="sourceLineNo">276</span> for (int i = 1; i < args.length; i++) {<a name="line.276"></a> -<span class="sourceLineNo">277</span> System.out.println("i:" + i + "arg[i]" + args[i]);<a name="line.277"></a> -<span class="sourceLineNo">278</span> if (args[i].startsWith(startTimeArgKey)) {<a name="line.278"></a> -<span class="sourceLineNo">279</span> startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));<a name="line.279"></a> -<span class="sourceLineNo">280</span> }<a name="line.280"></a> -<span class="sourceLineNo">281</span> if (args[i].startsWith(endTimeArgKey)) {<a name="line.281"></a> -<span class="sourceLineNo">282</span> endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));<a name="line.282"></a> -<span class="sourceLineNo">283</span> }<a name="line.283"></a> -<span class="sourceLineNo">284</span> }<a name="line.284"></a> -<span class="sourceLineNo">285</span><a name="line.285"></a> -<span class="sourceLineNo">286</span> if (startTime == 0 && endTime == 0)<a name="line.286"></a> -<span class="sourceLineNo">287</span> return null;<a name="line.287"></a> -<span class="sourceLineNo">288</span><a name="line.288"></a> -<span class="sourceLineNo">289</span> endTime = endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime;<a name="line.289"></a> -<span class="sourceLineNo">290</span> return new long [] {startTime, endTime};<a name="line.290"></a> -<span class="sourceLineNo">291</span> }<a name="line.291"></a> -<span class="sourceLineNo">292</span><a name="line.292"></a> -<span class="sourceLineNo">293</span> @Override<a name="line.293"></a> -<span class="sourceLineNo">294</span> public int run(String[] args) throws Exception {<a name="line.294"></a> -<span class="sourceLineNo">295</span> if (args.length < 2) {<a name="line.295"></a> -<span class="sourceLineNo">296</span> System.err.println("ERROR: Wrong number of parameters: " + args.length);<a name="line.296"></a> -<span class="sourceLineNo">297</span> System.err.println("Usage: CellCounter ");<a name="line.297"></a> -<span class="sourceLineNo">298</span> System.err.println(" <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +<a name="line.298"></a> -<span class="sourceLineNo">299</span> "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");<a name="line.299"></a> -<span class="sourceLineNo">300</span> System.err.println(" Note: -D properties will be applied to the conf used. ");<a name="line.300"></a> -<span class="sourceLineNo">301</span> System.err.println(" Additionally, all of the SCAN properties from TableInputFormat");<a name="line.301"></a> -<span class="sourceLineNo">302</span> System.err.println(" can be specified to get fine grained control on what is counted..");<a name="line.302"></a> -<span class="sourceLineNo">303</span> System.err.println(" -D " + TableInputFormat.SCAN_ROW_START + "=<rowkey>");<a name="line.303"></a> -<span class="sourceLineNo">304</span> System.err.println(" -D " + TableInputFormat.SCAN_ROW_STOP + "=<rowkey>");<a name="line.304"></a> -<span class="sourceLineNo">305</span> System.err.println(" -D " + TableInputFormat.SCAN_COLUMNS + "=\"<col1> <col2>...\"");<a name="line.305"></a> -<span class="sourceLineNo">306</span> System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");<a name="line.306"></a> -<span class="sourceLineNo">307</span> System.err.println(" -D " + TableInputFormat.SCAN_TIMESTAMP + "=<timestamp>");<a name="line.307"></a> -<span class="sourceLineNo">308</span> System.err.println(" -D " + TableInputFormat.SCAN_TIMERANGE_START + "=<timestamp>");<a name="line.308"></a> -<span class="sourceLineNo">309</span> System.err.println(" -D " + TableInputFormat.SCAN_TIMERANGE_END + "=<timestamp>");<a name="line.309"></a> -<span class="sourceLineNo">310</span> System.err.println(" -D " + TableInputFormat.SCAN_MAXVERSIONS + "=<count>");<a name="line.310"></a> -<span class="sourceLineNo">311</span> System.err.println(" -D " + TableInputFormat.SCAN_CACHEDROWS + "=<count>");<a name="line.311"></a> -<span class="sourceLineNo">312</span> System.err.println(" -D " + TableInputFormat.SCAN_BATCHSIZE + "=<count>");<a name="line.312"></a> -<span class="sourceLineNo">313</span> System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +<a name="line.313"></a> -<span class="sourceLineNo">314</span> "string : used to separate the rowId/column family name and qualifier name.");<a name="line.314"></a> -<span class="sourceLineNo">315</span> System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +<a name="line.315"></a> -<span class="sourceLineNo">316</span> "operation to a limited subset of rows from the table based on regex or prefix pattern.");<a name="line.316"></a> -<span class="sourceLineNo">317</span> return -1;<a name="line.317"></a> -<span class="sourceLineNo">318</span> }<a name="line.318"></a> -<span class="sourceLineNo">319</span> Job job = createSubmittableJob(getConf(), args);<a name="line.319"></a> -<span class="sourceLineNo">320</span> return (job.waitForCompletion(true) ? 0 : 1);<a name="line.320"></a> -<span class="sourceLineNo">321</span> }<a name="line.321"></a> -<span class="sourceLineNo">322</span><a name="line.322"></a> -<span class="sourceLineNo">323</span> /**<a name="line.323"></a> -<span class="sourceLineNo">324</span> * Main entry point.<a name="line.324"></a> -<span class="sourceLineNo">325</span> * @param args The command line parameters.<a name="line.325"></a> -<span class="sourceLineNo">326</span> * @throws Exception When running the job fails.<a name="line.326"></a> -<span class="sourceLineNo">327</span> */<a name="line.327"></a> -<span class="sourceLineNo">328</span> public static void main(String[] args) throws Exception {<a name="line.328"></a> -<span class="sourceLineNo">329</span> int errCode = ToolRunner.run(HBaseConfiguration.create(), new CellCounter(), args);<a name="line.329"></a> -<span class="sourceLineNo">330</span> System.exit(errCode);<a name="line.330"></a> -<span class="sourceLineNo">331</span> }<a name="line.331"></a> -<span class="sourceLineNo">332</span><a name="line.332"></a> -<span class="sourceLineNo">333</span>}<a name="line.333"></a> +<span class="sourceLineNo">134</span> @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH",<a name="line.134"></a> +<span class="sourceLineNo">135</span> justification="Findbugs is blind to the Precondition null check")<a name="line.135"></a> +<span class="sourceLineNo">136</span> public void map(ImmutableBytesWritable row, Result values,<a name="line.136"></a> +<span class="sourceLineNo">137</span> Context context)<a name="line.137"></a> +<span class="sourceLineNo">138</span> throws IOException {<a name="line.138"></a> +<span class="sourceLineNo">139</span> Preconditions.checkState(values != null,<a name="line.139"></a> +<span class="sourceLineNo">140</span> "values passed to the map is null");<a name="line.140"></a> +<span class="sourceLineNo">141</span><a name="line.141"></a> +<span class="sourceLineNo">142</span> try {<a name="line.142"></a> +<span class="sourceLineNo">143</span> byte[] currentRow = values.getRow();<a name="line.143"></a> +<span class="sourceLineNo">144</span> if (lastRow == null || !Bytes.equals(lastRow, currentRow)) {<a name="line.144"></a> +<span class="sourceLineNo">145</span> lastRow = currentRow;<a name="line.145"></a> +<span class="sourceLineNo">146</span> currentRowKey = Bytes.toStringBinary(currentRow);<a name="line.146"></a> +<span class="sourceLineNo">147</span> currentFamily = null;<a name="line.147"></a> +<span class="sourceLineNo">148</span> currentQualifier = null;<a name="line.148"></a> +<span class="sourceLineNo">149</span> context.getCounter(Counters.ROWS).increment(1);<a name="line.149"></a> +<span class="sourceLineNo">150</span> context.write(new Text("Total ROWS"), new IntWritable(1));<a name="line.150"></a> +<span class="sourceLineNo">151</span> }<a name="line.151"></a> +<span class="sourceLineNo">152</span> if (!values.isEmpty()) {<a name="line.152"></a> +<span class="sourceLineNo">153</span> int cellCount = 0;<a name="line.153"></a> +<span class="sourceLineNo">154</span> for (Cell value : values.listCells()) {<a name="line.154"></a> +<span class="sourceLineNo">155</span> cellCount++;<a name="line.155"></a> +<span class="sourceLineNo">156</span> if (currentFamily == null || !CellUtil.matchingFamily(value, currentFamily)) {<a name="line.156"></a> +<span class="sourceLineNo">157</span> currentFamily = CellUtil.cloneFamily(value);<a name="line.157"></a> +<span class="sourceLineNo">158</span> currentFamilyName = Bytes.toStringBinary(currentFamily);<a name="line.158"></a> +<span class="sourceLineNo">159</span> currentQualifier = null;<a name="line.159"></a> +<span class="sourceLineNo">160</span> context.getCounter("CF", currentFamilyName).increment(1);<a name="line.160"></a> +<span class="sourceLineNo">161</span> if (1 == context.getCounter("CF", currentFamilyName).getValue()) {<a name="line.161"></a> +<span class="sourceLineNo">162</span> context.write(new Text("Total Families Across all Rows"), new IntWritable(1));<a name="line.162"></a> +<span class="sourceLineNo">163</span> context.write(new Text(currentFamily), new IntWritable(1));<a name="line.163"></a> +<span class="sourceLineNo">164</span> }<a name="line.164"></a> +<span class="sourceLineNo">165</span> }<a name="line.165"></a> +<span class="sourceLineNo">166</span> if (currentQualifier == null || !CellUtil.matchingQualifier(value, currentQualifier)) {<a name="line.166"></a> +<span class="sourceLineNo">167</span> currentQualifier = CellUtil.cloneQualifier(value);<a name="line.167"></a> +<span class="sourceLineNo">168</span> currentQualifierName = currentFamilyName + separator +<a name="line.168"></a> +<span class="sourceLineNo">169</span> Bytes.toStringBinary(currentQualifier);<a name="line.169"></a> +<span class="sourceLineNo">170</span> currentRowQualifierName = currentRowKey + separator + currentQualifierName;<a name="line.170"></a> +<span class="sourceLineNo">171</span><a name="line.171"></a> +<span class="sourceLineNo">172</span> context.write(new Text("Total Qualifiers across all Rows"),<a name="line.172"></a> +<span class="sourceLineNo">173</span> new IntWritable(1));<a name="line.173"></a> +<span class="sourceLineNo">174</span> context.write(new Text(currentQualifierName), new IntWritable(1));<a name="line.174"></a> +<span class="sourceLineNo">175</span> }<a name="line.175"></a> +<span class="sourceLineNo">176</span> // Increment versions<a name="line.176"></a> +<span class="sourceLineNo">177</span> context.write(new Text(currentRowQualifierName + "_Versions"), new IntWritable(1));<a name="line.177"></a> +<span class="sourceLineNo">178</span> }<a name="line.178"></a> +<span class="sourceLineNo">179</span> context.getCounter(Counters.CELLS).increment(cellCount);<a name="line.179"></a> +<span class="sourceLineNo">180</span> }<a name="line.180"></a> +<span class="sourceLineNo">181</span> } catch (InterruptedException e) {<a name="line.181"></a> +<span class="sourceLineNo">182</span> e.printStackTrace();<a name="line.182"></a> +<span class="sourceLineNo">183</span> }<a name="line.183"></a> +<span class="sourceLineNo">184</span> }<a name="line.184"></a> +<span class="sourceLineNo">185</span> }<a name="line.185"></a> +<span class="sourceLineNo">186</span><a name="line.186"></a> +<span class="sourceLineNo">187</span> static class IntSumReducer<Key> extends Reducer<Key, IntWritable,<a name="line.187"></a> +<span class="sourceLineNo">188</span> Key, IntWritable> {<a name="line.188"></a> +<span class="sourceLineNo">189</span><a name="line.189"></a> +<span class="sourceLineNo">190</span> private IntWritable result = new IntWritable();<a name="line.190"></a> +<span class="sourceLineNo">191</span> public void reduce(Key key, Iterable<IntWritable> values,<a name="line.191"></a> +<span class="sourceLineNo">192</span> Context context)<a name="line.192"></a> +<span class="sourceLineNo">193</span> throws IOException, InterruptedException {<a name="line.193"></a> +<span class="sourceLineNo">194</span> int sum = 0;<a name="line.194"></a> +<span class="sourceLineNo">195</span> for (IntWritable val : values) {<a name="line.195"></a> +<span class="sourceLineNo">196</span> sum += val.get();<a name="line.196"></a> +<span class="sourceLineNo">197</span> }<a name="line.197"></a> +<span class="sourceLineNo">198</span> result.set(sum);<a name="line.198"></a> +<span class="sourceLineNo">199</span> context.write(key, result);<a name="line.199"></a> +<span class="sourceLineNo">200</span> }<a name="line.200"></a> +<span class="sourceLineNo">201</span> }<a name="line.201"></a> +<span class="sourceLineNo">202</span><a name="line.202"></a> +<span class="sourceLineNo">203</span> /**<a name="line.203"></a> +<span class="sourceLineNo">204</span> * Sets up the actual job.<a name="line.204"></a> +<span class="sourceLineNo">205</span> *<a name="line.205"></a> +<span class="sourceLineNo">206</span> * @param conf The current configuration.<a name="line.206"></a> +<span class="sourceLineNo">207</span> * @param args The command line parameters.<a name="line.207"></a> +<span class="sourceLineNo">208</span> * @return The newly created job.<a name="line.208"></a> +<span class="sourceLineNo">209</span> * @throws IOException When setting up the job fails.<a name="line.209"></a> +<span class="sourceLineNo">210</span> */<a name="line.210"></a> +<span class="sourceLineNo">211</span> public static Job createSubmittableJob(Configuration conf, String[] args)<a name="line.211"></a> +<span class="sourceLineNo">212</span> throws IOException {<a name="line.212"></a> +<span class="sourceLineNo">213</span> String tableName = args[0];<a name="line.213"></a> +<span class="sourceLineNo">214</span> Path outputDir = new Path(args[1]);<a name="line.214"></a> +<span class="sourceLineNo">215</span> String reportSeparatorString = (args.length > 2) ? args[2]: ":";<a name="line.215"></a> +<span class="sourceLineNo">216</span> conf.set("ReportSeparator", reportSeparatorString);<a name="line.216"></a> +<span class="sourceLineNo">217</span> Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));<a name="line.217"></a> +<span class="sourceLineNo">218</span> job.setJarByClass(CellCounter.class);<a name="line.218"></a> +<span class="sourceLineNo">219</span> Scan scan = getConfiguredScanForJob(conf, args);<a name="line.219"></a> +<span class="sourceLineNo">220</span> TableMapReduceUtil.initTableMapperJob(tableName, scan,<a name="line.220"></a> +<span class="sourceLineNo">221</span> CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);<a name="line.221"></a> +<span class="sourceLineNo">222</span> job.setNumReduceTasks(1);<a name="line.222"></a> +<span class="sourceLineNo">223</span> job.setMapOutputKeyClass(Text.class);<a name="line.223"></a> +<span class="sourceLineNo">224</span> job.setMapOutputValueClass(IntWritable.class);<a name="line.224"></a> +<span class="sourceLineNo">225</span> job.setOutputFormatClass(TextOutputFormat.class);<a name="line.225"></a> +<span class="sourceLineNo">226</span> job.setOutputKeyClass(Text.class);<a name="line.226"></a> +<span class="sourceLineNo">227</span> job.setOutputValueClass(IntWritable.class);<a name="line.227"></a> +<span class="sourceLineNo">228</span> FileOutputFormat.setOutputPath(job, outputDir);<a name="line.228"></a> +<span class="sourceLineNo">229</span> job.setReducerClass(IntSumReducer.class);<a name="line.229"></a> +<span class="sourceLineNo">230</span> return job;<a name="line.230"></a> +<span class="sourceLineNo">231</span> }<a name="line.231"></a> +<span class="sourceLineNo">232</span><a name="line.232"></a> +<span class="sourceLineNo">233</span> private static Scan getConfiguredScanForJob(Configuration conf, String[] args)<a name="line.233"></a> +<span class="sourceLineNo">234</span> throws IOException {<a name="line.234"></a> +<span class="sourceLineNo">235</span> // create scan with any properties set from TableInputFormat<a name="line.235"></a> +<span class="sourceLineNo">236</span> Scan s = TableInputFormat.createScanFromConfiguration(conf);<a name="line.236"></a> +<span class="sourceLineNo">237</span> // Set Scan Versions<a name="line.237"></a> +<span class="sourceLineNo">238</span> if (conf.get(TableInputFormat.SCAN_MAXVERSIONS) == null) {<a name="line.238"></a> +<span class="sourceLineNo">239</span> // default to all versions unless explicitly set<a name="line.239"></a> +<span class="sourceLineNo">240</span> s.setMaxVersions(Integer.MAX_VALUE);<a name="line.240"></a> +<span class="sourceLineNo">241</span> }<a name="line.241"></a> +<span class="sourceLineNo">242</span> s.setCacheBlocks(false);<a name="line.242"></a> +<span class="sourceLineNo">243</span> // Set RowFilter or Prefix Filter if applicable.<a name="line.243"></a> +<span class="sourceLineNo">244</span> Filter rowFilter = getRowFilter(args);<a name="line.244"></a> +<span class="sourceLineNo">245</span> if (rowFilter!= null) {<a name="line.245"></a> +<span class="sourceLineNo">246</span> LOG.info("Setting Row Filter for counter.");<a name="line.246"></a> +<span class="sourceLineNo">247</span> s.setFilter(rowFilter);<a name="line.247"></a> +<span class="sourceLineNo">248</span> }<a name="line.248"></a> +<span class="sourceLineNo">249</span> // Set TimeRange if defined<a name="line.249"></a> +<span class="sourceLineNo">250</span> long timeRange[] = getTimeRange(args);<a name="line.250"></a> +<span class="sourceLineNo">251</span> if (timeRange != null) {<a name="line.251"></a> +<span class="sourceLineNo">252</span> LOG.info("Setting TimeRange for counter.");<a name="line.252"></a> +<span class="sourceLineNo">253</span> s.setTimeRange(timeRange[0], timeRange[1]);<a name="line.253"></a> +<span class="sourceLineNo">254</span> }<a name="line.254"></a> +<span class="sourceLineNo">255</span> return s;<a name="line.255"></a> +<span class="sourceLineNo">256</span> }<a name="line.256"></a> +<span class="sourceLineNo">257</span><a name="line.257"></a> +<span class="sourceLineNo">258</span><a name="line.258"></a> +<span class="sourceLineNo">259</span> private static Filter getRowFilter(String[] args) {<a name="line.259"></a> +<span class="sourceLineNo">260</span> Filter rowFilter = null;<a name="line.260"></a> +<span class="sourceLineNo">261</span> String filterCriteria = (args.length > 3) ? args[3]: null;<a name="line.261"></a> +<span class="sourceLineNo">262</span> if (filterCriteria == null) return null;<a name="line.262"></a> +<span class="sourceLineNo">263</span> if (filterCriteria.startsWith("^")) {<a name="line.263"></a> +<span class="sourceLineNo">264</span> String regexPattern = filterCriteria.substring(1, filterCriteria.length());<a name="line.264"></a> +<span class="sourceLineNo">265</span> rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regexPattern));<a name="line.265"></a> +<span class="sourceLineNo">266</span> } else {<a name="line.266"></a> +<span class="sourceLineNo">267</span> rowFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria));<a name="line.267"></a> +<span class="sourceLineNo">268</span> }<a name="line.268"></a> +<span class="sourceLineNo">269</span> return rowFilter;<a name="line.269"></a> +<span class="sourceLineNo">270</span> }<a name="line.270"></a> +<span class="sourceLineNo">271</span><a name="line.271"></a> +<span class="sourceLineNo">272</span> private static long[] getTimeRange(String[] args) throws IOException {<a name="line.272"></a> +<span class="sourceLineNo">273</span> final String startTimeArgKey = "--starttime=";<a name="line.273"></a> +<span class="sourceLineNo">274</span> final String endTimeArgKey = "--endtime=";<a name="line.274"></a> +<span class="sourceLineNo">275</span> long startTime = 0L;<a name="line.275"></a> +<span class="sourceLineNo">276</span> long endTime = 0L;<a name="line.276"></a> +<span class="sourceLineNo">277</span><a name="line.277"></a> +<span class="sourceLineNo">278</span> for (int i = 1; i < args.length; i++) {<a name="line.278"></a> +<span class="sourceLineNo">279</span> System.out.println("i:" + i + "arg[i]" + args[i]);<a name="line.279"></a> +<span class="sourceLineNo">280</span> if (args[i].startsWith(startTimeArgKey)) {<a name="line.280"></a> +<span class="sourceLineNo">281</span> startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));<a name="line.281"></a> +<span class="sourceLineNo">282</span> }<a name="line.282"></a> +<span class="sourceLineNo">283</span> if (args[i].startsWith(endTimeArgKey)) {<a name="line.283"></a> +<span class="sourceLineNo">284</span> endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));<a name="line.284"></a> +<span class="sourceLineNo">285</span> }<a name="line.285"></a> +<span class="sourceLineNo">286</span> }<a name="line.286"></a> +<span class="sourceLineNo">287</span><a name="line.287"></a> +<span class="sourceLineNo">288</span> if (startTime == 0 && endTime == 0)<a name="line.288"></a> +<span class="sourceLineNo">289</span> return null;<a name="line.289"></a> +<span class="sourceLineNo">290</span><a name="line.290"></a> +<span class="sourceLineNo">291</span> endTime = endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime;<a name="line.291"></a> +<span class="sourceLineNo">292</span> return new long [] {startTime, endTime};<a name="line.292"></a> +<span class="sourceLineNo">293</span> }<a name="line.293"></a> +<span class="sourceLineNo">294</span><a name="line.294"></a> +<span class="sourceLineNo">295</span> @Override<a name="line.295"></a> +<span class="sourceLineNo">296</span> public int run(String[] args) throws Exception {<a name="line.296"></a> +<span class="sourceLineNo">297</span> if (args.length < 2) {<a name="line.297"></a> +<span class="sourceLineNo">298</span> System.err.println("ERROR: Wrong number of parameters: " + args.length);<a name="line.298"></a> +<span class="sourceLineNo">299</span> System.err.println("Usage: CellCounter ");<a name="line.299"></a> +<span class="sourceLineNo">300</span> System.err.println(" <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +<a name="line.300"></a> +<span class="sourceLineNo">301</span> "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");<a name="line.301"></a> +<span class="sourceLineNo">302</span> System.err.println(" Note: -D properties will be applied to the conf used. ");<a name="line.302"></a> +<span class="sourceLineNo">303</span> System.err.println(" Additionally, all of the SCAN properties from TableInputFormat");<a name="line.303"></a> +<span class="sourceLineNo">304</span> System.err.println(" can be specified to get fine grained control on what is counted..");<a name="line.304"></a> +<span class="sourceLineNo">305</span> System.err.println(" -D " + TableInputFormat.SCAN_ROW_START + "=<rowkey>");<a name="line.305"></a> +<span class="sourceLineNo">306</span> System.err.println(" -D " + TableInputFormat.SCAN_ROW_STOP + "=<rowkey>");<a name="line.306"></a> +<span class="sourceLineNo">307</span> System.err.println(" -D " + TableInputFormat.SCAN_COLUMNS + "=\"<col1> <col2>...\"");<a name="line.307"></a> +<span class="sourceLineNo">308</span> System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<family1>,<family2>, ...");<a name="line.308"></a> +<span class="sourceLineNo">309</span> System.err.println(" -D " + TableInputFormat.SCAN_TIMESTAMP + "=<timestamp>");<a name="line.309"></a> +<span class="sourceLineNo">310</span> System.err.println(" -D " + TableInputFormat.SCAN_TIMERANGE_START + "=<timestamp>");<a name="line.310"></a> +<span class="sourceLineNo">311</span> System.err.println(" -D " + TableInputFormat.SCAN_TIMERANGE_END + "=<timestamp>");<a name="line.311"></a> +<span class="sourceLineNo">312</span> System.err.println(" -D " + TableInputFormat.SCAN_MAXVERSIONS + "=<count>");<a name="line.312"></a> +<span class="sourceLineNo">313</span> System.err.println(" -D " + TableInputFormat.SCAN_CACHEDROWS + "=<count>");<a name="line.313"></a> +<span class="sourceLineNo">314</span> System.err.println(" -D " + TableInputFormat.SCAN_BATCHSIZE + "=<count>");<a name="line.314"></a> +<span class="sourceLineNo">315</span> System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +<a name="line.315"></a> +<span class="sourceLineNo">316</span> "string : used to separate the rowId/column family name and qualifier name.");<a name="line.316"></a> +<span class="sourceLineNo">317</span> System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +<a name="line.317"></a> +<span class="sourceLineNo">318</span> "operation to a limited subset of rows from the table based on regex or prefix pattern.");<a name="line.318"></a> +<span class="sourceLineNo">319</span> return -1;<a name="line.319"></a> +<span class="sourceLineNo">320</span> }<a name="line.320"></a> +<span class="sourceLineNo">321</span> Job job = createSubmittableJob(getConf(), args);<a name="line.321"></a> +<span class="sourceLineNo">322</span> return (job.waitForCompletion(true) ? 0 : 1);<a name="line.322"></a> +<span class="sourceLineNo">323</span> }<a name="line.323"></a> +<span class="sourceLineNo">324</span><a name="line.324"></a> +<span class="sourceLineNo">325</span> /**<a name="line.325"></a> +<span class="sourceLineNo">326</span> * Main entry point.<a name="line.326"></a> +<span class="sourceLineNo">327</span> * @param args The command line parameters.<a name="line.327"></a> +<span class="sourceLineNo">328</span> * @throws Exception When running the job fails.<a name="line.328"></a> +<span class="sourceLineNo">329</span> */<a name="line.329"></a> +<span class="sourceLineNo">330</span> public static void main(String[] args) throws Exception {<a name="line.330"></a> +<span class="sourceLineNo">331</span> int errCode = ToolRunner.run(HBaseConfiguration.create(), new CellCounter(), args);<a name="line.331"></a> +<span class="sourceLineNo">332</span> System.exit(errCode);<a name="line.332"></a> +<span class="sourceLineNo">333</span> }<a name="line.333"></a> +<span class="sourceLineNo">334</span><a name="line.334"></a> +<span class="sourceLineNo">335</span>}<a name="line.335"></a>
http://git-wip-us.apache.org/repos/asf/hbase-site/blob/62e361eb/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/CopyTable.html ---------------------------------------------------------------------- diff --git a/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/CopyTable.html b/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/CopyTable.html index 28bc12a..fa09065 100644 --- a/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/CopyTable.html +++ b/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/CopyTable.html @@ -219,7 +219,7 @@ <span class="sourceLineNo">211</span> System.err.println();<a name="line.211"></a> <span class="sourceLineNo">212</span> System.err.println("Examples:");<a name="line.212"></a> <span class="sourceLineNo">213</span> System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");<a name="line.213"></a> -<span class="sourceLineNo">214</span> System.err.println(" $ bin/hbase " +<a name="line.214"></a> +<span class="sourceLineNo">214</span> System.err.println(" $ hbase " +<a name="line.214"></a> <span class="sourceLineNo">215</span> "org.apache.hadoop.hbase.mapreduce.CopyTable --starttime=1265875194289 --endtime=1265878794289 " +<a name="line.215"></a> <span class="sourceLineNo">216</span> "--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");<a name="line.216"></a> <span class="sourceLineNo">217</span> System.err.println("For performance consider the following general option:\n"<a name="line.217"></a> http://git-wip-us.apache.org/repos/asf/hbase-site/blob/62e361eb/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/Export.html ---------------------------------------------------------------------- diff --git a/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/Export.html b/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/Export.html index e687bb9..703000b 100644 --- a/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/Export.html +++ b/apidocs/src-html/org/apache/hadoop/hbase/mapreduce/Export.html @@ -118,94 +118,93 @@ <span class="sourceLineNo">110</span> if (raw) {<a name="line.110"></a> <span class="sourceLineNo">111</span> s.setRaw(raw);<a name="line.111"></a> <span class="sourceLineNo">112</span> }<a name="line.112"></a> -<span class="sourceLineNo">113</span> <a name="line.113"></a> -<span class="sourceLineNo">114</span> if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {<a name="line.114"></a> -<span class="sourceLineNo">115</span> s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));<a name="line.115"></a> -<span class="sourceLineNo">116</span> }<a name="line.116"></a> -<span class="sourceLineNo">117</span> // Set RowFilter or Prefix Filter if applicable.<a name="line.117"></a> -<span class="sourceLineNo">118</span> Filter exportFilter = getExportFilter(args);<a name="line.118"></a> -<span class="sourceLineNo">119</span> if (exportFilter!= null) {<a name="line.119"></a> -<span class="sourceLineNo">120</span> LOG.info("Setting Scan Filter for Export.");<a name="line.120"></a> -<span class="sourceLineNo">121</span> s.setFilter(exportFilter);<a name="line.121"></a> -<span class="sourceLineNo">122</span> }<a name="line.122"></a> -<span class="sourceLineNo">123</span><a name="line.123"></a> -<span class="sourceLineNo">124</span> int batching = conf.getInt(EXPORT_BATCHING, -1);<a name="line.124"></a> -<span class="sourceLineNo">125</span> if (batching != -1){<a name="line.125"></a> -<span class="sourceLineNo">126</span> try {<a name="line.126"></a> -<span class="sourceLineNo">127</span> s.setBatch(batching);<a name="line.127"></a> -<span class="sourceLineNo">128</span> } catch (IncompatibleFilterException e) {<a name="line.128"></a> -<span class="sourceLineNo">129</span> LOG.error("Batching could not be set", e);<a name="line.129"></a> -<span class="sourceLineNo">130</span> }<a name="line.130"></a> -<span class="sourceLineNo">131</span> }<a name="line.131"></a> -<span class="sourceLineNo">132</span> LOG.info("versions=" + versions + ", starttime=" + startTime +<a name="line.132"></a> -<span class="sourceLineNo">133</span> ", endtime=" + endTime + ", keepDeletedCells=" + raw);<a name="line.133"></a> -<span class="sourceLineNo">134</span> return s;<a name="line.134"></a> -<span class="sourceLineNo">135</span> }<a name="line.135"></a> -<span class="sourceLineNo">136</span><a name="line.136"></a> -<span class="sourceLineNo">137</span> private static Filter getExportFilter(String[] args) {<a name="line.137"></a> -<span class="sourceLineNo">138</span> Filter exportFilter = null;<a name="line.138"></a> -<span class="sourceLineNo">139</span> String filterCriteria = (args.length > 5) ? args[5]: null;<a name="line.139"></a> -<span class="sourceLineNo">140</span> if (filterCriteria == null) return null;<a name="line.140"></a> -<span class="sourceLineNo">141</span> if (filterCriteria.startsWith("^")) {<a name="line.141"></a> -<span class="sourceLineNo">142</span> String regexPattern = filterCriteria.substring(1, filterCriteria.length());<a name="line.142"></a> -<span class="sourceLineNo">143</span> exportFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regexPattern));<a name="line.143"></a> -<span class="sourceLineNo">144</span> } else {<a name="line.144"></a> -<span class="sourceLineNo">145</span> exportFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria));<a name="line.145"></a> -<span class="sourceLineNo">146</span> }<a name="line.146"></a> -<span class="sourceLineNo">147</span> return exportFilter;<a name="line.147"></a> -<span class="sourceLineNo">148</span> }<a name="line.148"></a> -<span class="sourceLineNo">149</span><a name="line.149"></a> -<span class="sourceLineNo">150</span> /*<a name="line.150"></a> -<span class="sourceLineNo">151</span> * @param errorMsg Error message. Can be null.<a name="line.151"></a> -<span class="sourceLineNo">152</span> */<a name="line.152"></a> -<span class="sourceLineNo">153</span> private static void usage(final String errorMsg) {<a name="line.153"></a> -<span class="sourceLineNo">154</span> if (errorMsg != null && errorMsg.length() > 0) {<a name="line.154"></a> -<span class="sourceLineNo">155</span> System.err.println("ERROR: " + errorMsg);<a name="line.155"></a> -<span class="sourceLineNo">156</span> }<a name="line.156"></a> -<span class="sourceLineNo">157</span> System.err.println("Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +<a name="line.157"></a> -<span class="sourceLineNo">158</span> "[<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]\n");<a name="line.158"></a> -<span class="sourceLineNo">159</span> System.err.println(" Note: -D properties will be applied to the conf used. ");<a name="line.159"></a> -<span class="sourceLineNo">160</span> System.err.println(" For example: ");<a name="line.160"></a> -<span class="sourceLineNo">161</span> System.err.println(" -D mapreduce.output.fileoutputformat.compress=true");<a name="line.161"></a> -<span class="sourceLineNo">162</span> System.err.println(" -D mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.GzipCodec");<a name="line.162"></a> -<span class="sourceLineNo">163</span> System.err.println(" -D mapreduce.output.fileoutputformat.compress.type=BLOCK");<a name="line.163"></a> -<span class="sourceLineNo">164</span> System.err.println(" Additionally, the following SCAN properties can be specified");<a name="line.164"></a> -<span class="sourceLineNo">165</span> System.err.println(" to control/limit what is exported..");<a name="line.165"></a> -<span class="sourceLineNo">166</span> System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");<a name="line.166"></a> -<span class="sourceLineNo">167</span> System.err.println(" -D " + RAW_SCAN + "=true");<a name="line.167"></a> -<span class="sourceLineNo">168</span> System.err.println(" -D " + TableInputFormat.SCAN_ROW_START + "=<ROWSTART>");<a name="line.168"></a> -<span class="sourceLineNo">169</span> System.err.println(" -D " + TableInputFormat.SCAN_ROW_STOP + "=<ROWSTOP>");<a name="line.169"></a> -<span class="sourceLineNo">170</span> System.err.println(" -D " + JOB_NAME_CONF_KEY<a name="line.170"></a> -<span class="sourceLineNo">171</span> + "=jobName - use the specified mapreduce job name for the export");<a name="line.171"></a> -<span class="sourceLineNo">172</span> System.err.println("For performance consider the following properties:\n"<a name="line.172"></a> -<span class="sourceLineNo">173</span> + " -Dhbase.client.scanner.caching=100\n"<a name="line.173"></a> -<span class="sourceLineNo">174</span> + " -Dmapreduce.map.speculative=false\n"<a name="line.174"></a> -<span class="sourceLineNo">175</span> + " -Dmapreduce.reduce.speculative=false");<a name="line.175"></a> -<span class="sourceLineNo">176</span> System.err.println("For tables with very wide rows consider setting the batch size as below:\n"<a name="line.176"></a> -<span class="sourceLineNo">177</span> + " -D" + EXPORT_BATCHING + "=10");<a name="line.177"></a> -<span class="sourceLineNo">178</span> }<a name="line.178"></a> +<span class="sourceLineNo">113</span> for (String columnFamily : conf.getTrimmedStrings(TableInputFormat.SCAN_COLUMN_FAMILY)) {<a name="line.113"></a> +<span class="sourceLineNo">114</span> s.addFamily(Bytes.toBytes(columnFamily));<a name="line.114"></a> +<span class="sourceLineNo">115</span> }<a name="line.115"></a> +<span class="sourceLineNo">116</span> // Set RowFilter or Prefix Filter if applicable.<a name="line.116"></a> +<span class="sourceLineNo">117</span> Filter exportFilter = getExportFilter(args);<a name="line.117"></a> +<span class="sourceLineNo">118</span> if (exportFilter!= null) {<a name="line.118"></a> +<span class="sourceLineNo">119</span> LOG.info("Setting Scan Filter for Export.");<a name="line.119"></a> +<span class="sourceLineNo">120</span> s.setFilter(exportFilter);<a name="line.120"></a> +<span class="sourceLineNo">121</span> }<a name="line.121"></a> +<span class="sourceLineNo">122</span><a name="line.122"></a> +<span class="sourceLineNo">123</span> int batching = conf.getInt(EXPORT_BATCHING, -1);<a name="line.123"></a> +<span class="sourceLineNo">124</span> if (batching != -1){<a name="line.124"></a> +<span class="sourceLineNo">125</span> try {<a name="line.125"></a> +<span class="sourceLineNo">126</span> s.setBatch(batching);<a name="line.126"></a> +<span class="sourceLineNo">127</span> } catch (IncompatibleFilterException e) {<a name="line.127"></a> +<span class="sourceLineNo">128</span> LOG.error("Batching could not be set", e);<a name="line.128"></a> +<span class="sourceLineNo">129</span> }<a name="line.129"></a> +<span class="sourceLineNo">130</span> }<a name="line.130"></a> +<span class="sourceLineNo">131</span> LOG.info("versions=" + versions + ", starttime=" + startTime +<a name="line.131"></a> +<span class="sourceLineNo">132</span> ", endtime=" + endTime + ", keepDeletedCells=" + raw);<a name="line.132"></a> +<span class="sourceLineNo">133</span> return s;<a name="line.133"></a> +<span class="sourceLineNo">134</span> }<a name="line.134"></a> +<span class="sourceLineNo">135</span><a name="line.135"></a> +<span class="sourceLineNo">136</span> private static Filter getExportFilter(String[] args) {<a name="line.136"></a> +<span class="sourceLineNo">137</span> Filter exportFilter = null;<a name="line.137"></a> +<span class="sourceLineNo">138</span> String filterCriteria = (args.length > 5) ? args[5]: null;<a name="line.138"></a> +<span class="sourceLineNo">139</span> if (filterCriteria == null) return null;<a name="line.139"></a> +<span class="sourceLineNo">140</span> if (filterCriteria.startsWith("^")) {<a name="line.140"></a> +<span class="sourceLineNo">141</span> String regexPattern = filterCriteria.substring(1, filterCriteria.length());<a name="line.141"></a> +<span class="sourceLineNo">142</span> exportFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regexPattern));<a name="line.142"></a> +<span class="sourceLineNo">143</span> } else {<a name="line.143"></a> +<span class="sourceLineNo">144</span> exportFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria));<a name="line.144"></a> +<span class="sourceLineNo">145</span> }<a name="line.145"></a> +<span class="sourceLineNo">146</span> return exportFilter;<a name="line.146"></a> +<span class="sourceLineNo">147</span> }<a name="line.147"></a> +<span class="sourceLineNo">148</span><a name="line.148"></a> +<span class="sourceLineNo">149</span> /*<a name="line.149"></a> +<span class="sourceLineNo">150</span> * @param errorMsg Error message. Can be null.<a name="line.150"></a> +<span class="sourceLineNo">151</span> */<a name="line.151"></a> +<span class="sourceLineNo">152</span> private static void usage(final String errorMsg) {<a name="line.152"></a> +<span class="sourceLineNo">153</span> if (errorMsg != null && errorMsg.length() > 0) {<a name="line.153"></a> +<span class="sourceLineNo">154</span> System.err.println("ERROR: " + errorMsg);<a name="line.154"></a> +<span class="sourceLineNo">155</span> }<a name="line.155"></a> +<span class="sourceLineNo">156</span> System.err.println("Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +<a name="line.156"></a> +<span class="sourceLineNo">157</span> "[<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]\n");<a name="line.157"></a> +<span class="sourceLineNo">158</span> System.err.println(" Note: -D properties will be applied to the conf used. ");<a name="line.158"></a> +<span class="sourceLineNo">159</span> System.err.println(" For example: ");<a name="line.159"></a> +<span class="sourceLineNo">160</span> System.err.println(" -D mapreduce.output.fileoutputformat.compress=true");<a name="line.160"></a> +<span class="sourceLineNo">161</span> System.err.println(" -D mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.GzipCodec");<a name="line.161"></a> +<span class="sourceLineNo">162</span> System.err.println(" -D mapreduce.output.fileoutputformat.compress.type=BLOCK");<a name="line.162"></a> +<span class="sourceLineNo">163</span> System.err.println(" Additionally, the following SCAN properties can be specified");<a name="line.163"></a> +<span class="sourceLineNo">164</span> System.err.println(" to control/limit what is exported..");<a name="line.164"></a> +<span class="sourceLineNo">165</span> System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<family1>,<family2>, ...");<a name="line.165"></a> +<span class="sourceLineNo">166</span> System.err.println(" -D " + RAW_SCAN + "=true");<a name="line.166"></a> +<span class="sourceLineNo">167</span> System.err.println(" -D " + TableInputFormat.SCAN_ROW_START + "=<ROWSTART>");<a name="line.167"></a> +<span class="sourceLineNo">168</span> System.err.println(" -D " + TableInputFormat.SCAN_ROW_STOP + "=<ROWSTOP>");<a name="line.168"></a> +<span class="sourceLineNo">169</span> System.err.println(" -D " + JOB_NAME_CONF_KEY<a name="line.169"></a> +<span class="sourceLineNo">170</span> + "=jobName - use the specified mapreduce job name for the export");<a name="line.170"></a> +<span class="sourceLineNo">171</span> System.err.println("For performance consider the following properties:\n"<a name="line.171"></a> +<span class="sourceLineNo">172</span> + " -Dhbase.client.scanner.caching=100\n"<a name="line.172"></a> +<span class="sourceLineNo">173</span> + " -Dmapreduce.map.speculative=false\n"<a name="line.173"></a> +<span class="sourceLineNo">174</span> + " -Dmapreduce.reduce.speculative=false");<a name="line.174"></a> +<span class="sourceLineNo">175</span> System.err.println("For tables with very wide rows consider setting the batch size as below:\n"<a name="line.175"></a> +<span class="sourceLineNo">176</span> + " -D" + EXPORT_BATCHING + "=10");<a name="line.176"></a> +<span class="sourceLineNo">177</span> }<a name="line.177"></a> +<span class="sourceLineNo">178</span><a name="line.178"></a> <span class="sourceLineNo">179</span><a name="line.179"></a> -<span class="sourceLineNo">180</span><a name="line.180"></a> -<span class="sourceLineNo">181</span> @Override<a name="line.181"></a> -<span class="sourceLineNo">182</span> public int run(String[] args) throws Exception {<a name="line.182"></a> -<span class="sourceLineNo">183</span> if (args.length < 2) {<a name="line.183"></a> -<span class="sourceLineNo">184</span> usage("Wrong number of arguments: " + args.length);<a name="line.184"></a> -<span class="sourceLineNo">185</span> return -1;<a name="line.185"></a> -<span class="sourceLineNo">186</span> }<a name="line.186"></a> -<span class="sourceLineNo">187</span> Job job = createSubmittableJob(getConf(), args);<a name="line.187"></a> -<span class="sourceLineNo">188</span> return (job.waitForCompletion(true) ? 0 : 1);<a name="line.188"></a> -<span class="sourceLineNo">189</span> }<a name="line.189"></a> -<span class="sourceLineNo">190</span><a name="line.190"></a> -<span class="sourceLineNo">191</span> /**<a name="line.191"></a> -<span class="sourceLineNo">192</span> * Main entry point.<a name="line.192"></a> -<span class="sourceLineNo">193</span> * @param args The command line parameters.<a name="line.193"></a> -<span class="sourceLineNo">194</span> * @throws Exception When running the job fails.<a name="line.194"></a> -<span class="sourceLineNo">195</span> */<a name="line.195"></a> -<span class="sourceLineNo">196</span> public static void main(String[] args) throws Exception {<a name="line.196"></a> -<span class="sourceLineNo">197</span> int errCode = ToolRunner.run(HBaseConfiguration.create(), new Export(), args);<a name="line.197"></a> -<span class="sourceLineNo">198</span> System.exit(errCode);<a name="line.198"></a> -<span class="sourceLineNo">199</span> }<a name="line.199"></a> -<span class="sourceLineNo">200</span>}<a name="line.200"></a> +<span class="sourceLineNo">180</span> @Override<a name="line.180"></a> +<span class="sourceLineNo">181</span> public int run(String[] args) throws Exception {<a name="line.181"></a> +<span class="sourceLineNo">182</span> if (args.length < 2) {<a name="line.182"></a> +<span class="sourceLineNo">183</span> usage("Wrong number of arguments: " + args.length);<a name="line.183"></a> +<span class="sourceLineNo">184</span> return -1;<a name="line.184"></a> +<span class="sourceLineNo">185</span> }<a name="line.185"></a> +<span class="sourceLineNo">186</span> Job job = createSubmittableJob(getConf(), args);<a name="line.186"></a> +<span class="sourceLineNo">187</span> return (job.waitForCompletion(true) ? 0 : 1);<a name="line.187"></a> +<span class="sourceLineNo">188</span> }<a name="line.188"></a> +<span class="sourceLineNo">189</span><a name="line.189"></a> +<span class="sourceLineNo">190</span> /**<a name="line.190"></a> +<span class="sourceLineNo">191</span> * Main entry point.<a name="line.191"></a> +<span class="sourceLineNo">192</span> * @param args The command line parameters.<a name="line.192"></a> +<span class="sourceLineNo">193</span> * @throws Exception When running the job fails.<a name="line.193"></a> +<span class="sourceLineNo">194</span> */<a name="line.194"></a> +<span class="sourceLineNo">195</span> public static void main(String[] args) throws Exception {<a name="line.195"></a> +<span class="sourceLineNo">196</span> int errCode = ToolRunner.run(HBaseConfiguration.create(), new Export(), args);<a name="line.196"></a> +<span class="sourceLineNo">197</span> System.exit(errCode);<a name="line.197"></a> +<span class="sourceLineNo">198</span> }<a name="line.198"></a> +<span class="sourceLineNo">199</span>}<a name="line.199"></a>
