HBASE-18640 Move mapreduce out of hbase-server into separate module.
- Moves out o.a.h.h.{mapred, mapreduce} to new hbase-mapreduce module which
depends
on hbase-server because of classes like *Snapshot{Input,Output}Format.java,
WALs, replication, etc
- hbase-backup depends on it for WALPlayer and MR job stuff
- A bunch of tools needed to be pulled into hbase-mapreduce becuase of their
dependencies on MR.
These are: CompactionTool, LoadTestTool, PerformanceEvaluation, ExportSnapshot
This is better place of them than hbase-server. But ideal place would be in
separate hbase-tools module.
- There were some tests in hbase-server which were digging into these tools for
static util funtions or
confs. Moved these to better/easily shared place. For eg. security related
stuff to HBaseKerberosUtils.
- Note that hbase-mapreduce has secondPartExecution tests. On my machine they
took like 20 min, so maybe
more on apache jenkins. That's basically equal reduction of runtime of
hbase-server tests, which is a
big win!
Change-Id: Ieeb7235014717ca83ee5cb13b2a27fddfa6838e8
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/59d03410
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/59d03410
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/59d03410
Branch: refs/heads/branch-2
Commit: 59d034106f9309190066eceac794f2d9d61b2575
Parents: 95bc464
Author: Apekshit Sharma <[email protected]>
Authored: Sun Aug 20 14:34:16 2017 -0700
Committer: Apekshit Sharma <[email protected]>
Committed: Sat Aug 26 01:55:33 2017 -0700
----------------------------------------------------------------------
hbase-assembly/pom.xml | 4 +
.../src/main/assembly/hadoop-two-compat.xml | 1 +
hbase-assembly/src/main/assembly/src.xml | 1 +
hbase-backup/pom.xml | 10 +
hbase-examples/pom.xml | 4 +
hbase-it/pom.xml | 16 +
.../hadoop/hbase/IntegrationTestIngest.java | 5 +-
.../IntegrationTestIngestStripeCompactions.java | 4 +-
.../hbase/IntegrationTestIngestWithMOB.java | 5 +-
.../hbase/IntegrationTestRegionReplicaPerf.java | 3 +-
.../mapreduce/IntegrationTestImportTsv.java | 1 -
.../test/IntegrationTestLoadAndVerify.java | 2 +-
hbase-mapreduce/pom.xml | 316 +++
.../org/apache/hadoop/hbase/mapred/Driver.java | 52 +
.../hadoop/hbase/mapred/GroupingTableMap.java | 157 ++
.../hadoop/hbase/mapred/HRegionPartitioner.java | 95 +
.../hadoop/hbase/mapred/IdentityTableMap.java | 76 +
.../hbase/mapred/IdentityTableReduce.java | 61 +
.../mapred/MultiTableSnapshotInputFormat.java | 128 +
.../apache/hadoop/hbase/mapred/RowCounter.java | 121 +
.../hadoop/hbase/mapred/TableInputFormat.java | 90 +
.../hbase/mapred/TableInputFormatBase.java | 313 +++
.../apache/hadoop/hbase/mapred/TableMap.java | 38 +
.../hadoop/hbase/mapred/TableMapReduceUtil.java | 376 +++
.../hadoop/hbase/mapred/TableOutputFormat.java | 134 +
.../hadoop/hbase/mapred/TableRecordReader.java | 139 +
.../hbase/mapred/TableRecordReaderImpl.java | 259 ++
.../apache/hadoop/hbase/mapred/TableReduce.java | 38 +
.../hbase/mapred/TableSnapshotInputFormat.java | 166 ++
.../apache/hadoop/hbase/mapred/TableSplit.java | 154 +
.../hadoop/hbase/mapred/package-info.java | 26 +
.../hadoop/hbase/mapreduce/CellCounter.java | 333 +++
.../hadoop/hbase/mapreduce/CellCreator.java | 134 +
.../hadoop/hbase/mapreduce/CopyTable.java | 386 +++
.../DefaultVisibilityExpressionResolver.java | 144 +
.../apache/hadoop/hbase/mapreduce/Driver.java | 64 +
.../apache/hadoop/hbase/mapreduce/Export.java | 197 ++
.../hbase/mapreduce/GroupingTableMapper.java | 177 ++
.../hbase/mapreduce/HFileInputFormat.java | 174 ++
.../hbase/mapreduce/HFileOutputFormat2.java | 902 ++++++
.../hbase/mapreduce/HRegionPartitioner.java | 140 +
.../hadoop/hbase/mapreduce/HashTable.java | 747 +++++
.../hbase/mapreduce/IdentityTableMapper.java | 67 +
.../hbase/mapreduce/IdentityTableReducer.java | 79 +
.../apache/hadoop/hbase/mapreduce/Import.java | 780 ++++++
.../hadoop/hbase/mapreduce/ImportTsv.java | 793 ++++++
.../hadoop/hbase/mapreduce/JarFinder.java | 186 ++
.../hbase/mapreduce/KeyValueSerialization.java | 88 +
.../hbase/mapreduce/KeyValueSortReducer.java | 57 +
.../mapreduce/MultiTableHFileOutputFormat.java | 122 +
.../hbase/mapreduce/MultiTableInputFormat.java | 104 +
.../mapreduce/MultiTableInputFormatBase.java | 296 ++
.../hbase/mapreduce/MultiTableOutputFormat.java | 176 ++
.../MultiTableSnapshotInputFormat.java | 106 +
.../MultiTableSnapshotInputFormatImpl.java | 252 ++
.../mapreduce/MultithreadedTableMapper.java | 301 ++
.../hbase/mapreduce/MutationSerialization.java | 98 +
.../hadoop/hbase/mapreduce/PutCombiner.java | 98 +
.../hadoop/hbase/mapreduce/PutSortReducer.java | 147 +
.../hbase/mapreduce/RegionSizeCalculator.java | 127 +
.../hbase/mapreduce/ResultSerialization.java | 158 ++
.../hadoop/hbase/mapreduce/RowCounter.java | 265 ++
.../mapreduce/SimpleTotalOrderPartitioner.java | 143 +
.../hadoop/hbase/mapreduce/SyncTable.java | 786 ++++++
.../hbase/mapreduce/TableInputFormat.java | 294 ++
.../hbase/mapreduce/TableInputFormatBase.java | 652 +++++
.../hbase/mapreduce/TableMapReduceUtil.java | 1027 +++++++
.../hadoop/hbase/mapreduce/TableMapper.java | 38 +
.../hbase/mapreduce/TableOutputCommitter.java | 67 +
.../hbase/mapreduce/TableOutputFormat.java | 239 ++
.../hbase/mapreduce/TableRecordReader.java | 147 +
.../hbase/mapreduce/TableRecordReaderImpl.java | 315 +++
.../hadoop/hbase/mapreduce/TableReducer.java | 45 +
.../mapreduce/TableSnapshotInputFormat.java | 209 ++
.../mapreduce/TableSnapshotInputFormatImpl.java | 410 +++
.../hadoop/hbase/mapreduce/TableSplit.java | 395 +++
.../hadoop/hbase/mapreduce/TextSortReducer.java | 213 ++
.../hbase/mapreduce/TsvImporterMapper.java | 232 ++
.../hbase/mapreduce/TsvImporterTextMapper.java | 128 +
.../mapreduce/VisibilityExpressionResolver.java | 45 +
.../hadoop/hbase/mapreduce/WALInputFormat.java | 344 +++
.../hadoop/hbase/mapreduce/WALPlayer.java | 384 +++
.../hadoop/hbase/mapreduce/package-info.java | 26 +
.../replication/VerifyReplication.java | 700 +++++
.../hbase/regionserver/CompactionTool.java | 470 ++++
.../hadoop/hbase/snapshot/ExportSnapshot.java | 1111 ++++++++
.../util/MapreduceDependencyClasspathTool.java | 73 +
.../hadoop/hbase/PerformanceEvaluation.java | 2627 ++++++++++++++++++
.../hadoop/hbase/ScanPerformanceEvaluation.java | 406 +++
.../hadoop/hbase/TestPerformanceEvaluation.java | 218 ++
.../apache/hadoop/hbase/mapred/TestDriver.java | 41 +
.../hbase/mapred/TestGroupingTableMap.java | 181 ++
.../hbase/mapred/TestIdentityTableMap.java | 64 +
.../TestMultiTableSnapshotInputFormat.java | 135 +
.../hadoop/hbase/mapred/TestRowCounter.java | 163 ++
.../hadoop/hbase/mapred/TestSplitTable.java | 116 +
.../hbase/mapred/TestTableInputFormat.java | 460 +++
.../hadoop/hbase/mapred/TestTableMapReduce.java | 103 +
.../hbase/mapred/TestTableMapReduceUtil.java | 272 ++
.../TestTableOutputFormatConnectionExhaust.java | 104 +
.../mapred/TestTableSnapshotInputFormat.java | 271 ++
...opSecurityEnabledUserProviderForTesting.java | 41 +
.../MultiTableInputFormatTestBase.java | 277 ++
.../hadoop/hbase/mapreduce/NMapInputFormat.java | 134 +
.../TableSnapshotInputFormatTestBase.java | 231 ++
.../hadoop/hbase/mapreduce/TestCellCounter.java | 376 +++
.../hadoop/hbase/mapreduce/TestCopyTable.java | 262 ++
.../mapreduce/TestGroupingTableMapper.java | 68 +
.../hbase/mapreduce/TestHFileOutputFormat2.java | 1496 ++++++++++
.../hbase/mapreduce/TestHRegionPartitioner.java | 71 +
.../hadoop/hbase/mapreduce/TestHashTable.java | 194 ++
.../hbase/mapreduce/TestImportExport.java | 726 +++++
.../TestImportTSVWithOperationAttributes.java | 266 ++
.../hbase/mapreduce/TestImportTSVWithTTLs.java | 175 ++
.../TestImportTSVWithVisibilityLabels.java | 495 ++++
.../hadoop/hbase/mapreduce/TestImportTsv.java | 571 ++++
.../hbase/mapreduce/TestImportTsvParser.java | 314 +++
.../hadoop/hbase/mapreduce/TestJarFinder.java | 132 +
.../TestLoadIncrementalHFilesSplitRecovery.java | 669 +++++
.../mapreduce/TestMultiTableInputFormat.java | 49 +
.../TestMultiTableSnapshotInputFormat.java | 92 +
.../TestMultiTableSnapshotInputFormatImpl.java | 186 ++
.../mapreduce/TestMultithreadedTableMapper.java | 264 ++
.../mapreduce/TestRegionSizeCalculator.java | 160 ++
.../hadoop/hbase/mapreduce/TestRowCounter.java | 400 +++
.../TestSecureLoadIncrementalHFiles.java | 70 +
...ecureLoadIncrementalHFilesSplitRecovery.java | 69 +
.../TestSimpleTotalOrderPartitioner.java | 81 +
.../hadoop/hbase/mapreduce/TestSyncTable.java | 339 +++
.../hbase/mapreduce/TestTableInputFormat.java | 481 ++++
.../mapreduce/TestTableInputFormatBase.java | 53 +
.../mapreduce/TestTableInputFormatScan1.java | 200 ++
.../mapreduce/TestTableInputFormatScan2.java | 118 +
.../mapreduce/TestTableInputFormatScanBase.java | 287 ++
.../hbase/mapreduce/TestTableMapReduce.java | 174 ++
.../hbase/mapreduce/TestTableMapReduceBase.java | 233 ++
.../hbase/mapreduce/TestTableMapReduceUtil.java | 99 +
.../mapreduce/TestTableSnapshotInputFormat.java | 373 +++
.../hadoop/hbase/mapreduce/TestTableSplit.java | 129 +
.../hbase/mapreduce/TestTimeRangeMapRed.java | 211 ++
.../hadoop/hbase/mapreduce/TestWALPlayer.java | 231 ++
.../hbase/mapreduce/TestWALRecordReader.java | 276 ++
.../mapreduce/TsvImporterCustomTestMapper.java | 80 +
.../TsvImporterCustomTestMapperForOprAttr.java | 57 +
.../replication/TestReplicationSmallTests.java | 1059 +++++++
.../hbase/snapshot/TestExportSnapshot.java | 381 +++
.../snapshot/TestExportSnapshotHelpers.java | 91 +
.../snapshot/TestExportSnapshotNoCluster.java | 112 +
.../hbase/snapshot/TestMobExportSnapshot.java | 65 +
.../snapshot/TestMobSecureExportSnapshot.java | 59 +
.../snapshot/TestSecureExportSnapshot.java | 64 +
.../apache/hadoop/hbase/util/LoadTestTool.java | 915 ++++++
.../src/test/resources/hbase-site.xml | 161 ++
.../src/test/resources/hbase-site2.xml | 146 +
.../src/test/resources/hdfs-site.xml | 32 +
.../src/test/resources/log4j.properties | 68 +
.../src/test/resources/mapred-queues.xml | 75 +
.../src/test/resources/mapred-site.xml | 34 +
.../PerformanceEvaluation_Counter.properties | 28 +
.../hbase/mapreduce/exportedTableIn94Format | Bin 0 -> 374 bytes
hbase-rest/pom.xml | 10 +
.../hbase/rest/PerformanceEvaluation.java | 6 +-
.../hbase/client/TableSnapshotScanner.java | 4 +-
.../org/apache/hadoop/hbase/mapred/Driver.java | 52 -
.../hadoop/hbase/mapred/GroupingTableMap.java | 157 --
.../hadoop/hbase/mapred/HRegionPartitioner.java | 96 -
.../hadoop/hbase/mapred/IdentityTableMap.java | 76 -
.../hbase/mapred/IdentityTableReduce.java | 61 -
.../mapred/MultiTableSnapshotInputFormat.java | 128 -
.../apache/hadoop/hbase/mapred/RowCounter.java | 121 -
.../hadoop/hbase/mapred/TableInputFormat.java | 90 -
.../hbase/mapred/TableInputFormatBase.java | 313 ---
.../apache/hadoop/hbase/mapred/TableMap.java | 38 -
.../hadoop/hbase/mapred/TableMapReduceUtil.java | 376 ---
.../hadoop/hbase/mapred/TableOutputFormat.java | 134 -
.../hadoop/hbase/mapred/TableRecordReader.java | 139 -
.../hbase/mapred/TableRecordReaderImpl.java | 259 --
.../apache/hadoop/hbase/mapred/TableReduce.java | 38 -
.../hbase/mapred/TableSnapshotInputFormat.java | 166 --
.../apache/hadoop/hbase/mapred/TableSplit.java | 154 -
.../hadoop/hbase/mapred/package-info.java | 26 -
.../hadoop/hbase/mapreduce/CellCounter.java | 333 ---
.../hadoop/hbase/mapreduce/CellCreator.java | 134 -
.../hadoop/hbase/mapreduce/CopyTable.java | 386 ---
.../DefaultVisibilityExpressionResolver.java | 144 -
.../apache/hadoop/hbase/mapreduce/Driver.java | 64 -
.../apache/hadoop/hbase/mapreduce/Export.java | 197 --
.../hbase/mapreduce/GroupingTableMapper.java | 177 --
.../hbase/mapreduce/HFileInputFormat.java | 174 --
.../hbase/mapreduce/HFileOutputFormat2.java | 902 ------
.../hbase/mapreduce/HRegionPartitioner.java | 140 -
.../hadoop/hbase/mapreduce/HashTable.java | 747 -----
.../hbase/mapreduce/IdentityTableMapper.java | 67 -
.../hbase/mapreduce/IdentityTableReducer.java | 79 -
.../apache/hadoop/hbase/mapreduce/Import.java | 780 ------
.../hadoop/hbase/mapreduce/ImportTsv.java | 793 ------
.../hadoop/hbase/mapreduce/JarFinder.java | 186 --
.../hbase/mapreduce/KeyValueSerialization.java | 88 -
.../hbase/mapreduce/KeyValueSortReducer.java | 56 -
.../mapreduce/MultiTableHFileOutputFormat.java | 122 -
.../hbase/mapreduce/MultiTableInputFormat.java | 104 -
.../mapreduce/MultiTableInputFormatBase.java | 297 --
.../hbase/mapreduce/MultiTableOutputFormat.java | 176 --
.../MultiTableSnapshotInputFormat.java | 106 -
.../MultiTableSnapshotInputFormatImpl.java | 252 --
.../mapreduce/MultithreadedTableMapper.java | 301 --
.../hbase/mapreduce/MutationSerialization.java | 98 -
.../hadoop/hbase/mapreduce/PutCombiner.java | 98 -
.../hadoop/hbase/mapreduce/PutSortReducer.java | 147 -
.../hbase/mapreduce/ResultSerialization.java | 158 --
.../hadoop/hbase/mapreduce/RowCounter.java | 265 --
.../mapreduce/SimpleTotalOrderPartitioner.java | 143 -
.../hadoop/hbase/mapreduce/SyncTable.java | 786 ------
.../hbase/mapreduce/TableInputFormat.java | 294 --
.../hbase/mapreduce/TableInputFormatBase.java | 653 -----
.../hbase/mapreduce/TableMapReduceUtil.java | 1027 -------
.../hadoop/hbase/mapreduce/TableMapper.java | 38 -
.../hbase/mapreduce/TableOutputCommitter.java | 67 -
.../hbase/mapreduce/TableOutputFormat.java | 239 --
.../hbase/mapreduce/TableRecordReader.java | 147 -
.../hbase/mapreduce/TableRecordReaderImpl.java | 315 ---
.../hadoop/hbase/mapreduce/TableReducer.java | 45 -
.../mapreduce/TableSnapshotInputFormat.java | 210 --
.../mapreduce/TableSnapshotInputFormatImpl.java | 412 ---
.../hadoop/hbase/mapreduce/TableSplit.java | 395 ---
.../hadoop/hbase/mapreduce/TextSortReducer.java | 213 --
.../hbase/mapreduce/TsvImporterMapper.java | 232 --
.../hbase/mapreduce/TsvImporterTextMapper.java | 128 -
.../mapreduce/VisibilityExpressionResolver.java | 45 -
.../hadoop/hbase/mapreduce/WALInputFormat.java | 344 ---
.../hadoop/hbase/mapreduce/WALPlayer.java | 384 ---
.../hadoop/hbase/mapreduce/package-info.java | 26 -
.../replication/VerifyReplication.java | 700 -----
.../hbase/regionserver/CompactionTool.java | 470 ----
.../hadoop/hbase/snapshot/ExportSnapshot.java | 1111 --------
.../util/MapreduceDependencyClasspathTool.java | 73 -
.../hadoop/hbase/util/RegionSizeCalculator.java | 146 -
.../hadoop/hbase/PerformanceEvaluation.java | 2626 -----------------
.../hadoop/hbase/ScanPerformanceEvaluation.java | 406 ---
.../hadoop/hbase/TestPerformanceEvaluation.java | 218 --
.../hbase/client/TestTableSnapshotScanner.java | 18 +-
.../apache/hadoop/hbase/mapred/TestDriver.java | 41 -
.../hbase/mapred/TestGroupingTableMap.java | 181 --
.../hbase/mapred/TestIdentityTableMap.java | 64 -
.../TestMultiTableSnapshotInputFormat.java | 135 -
.../hadoop/hbase/mapred/TestRowCounter.java | 163 --
.../hadoop/hbase/mapred/TestSplitTable.java | 116 -
.../hbase/mapred/TestTableInputFormat.java | 461 ---
.../hadoop/hbase/mapred/TestTableMapReduce.java | 103 -
.../hbase/mapred/TestTableMapReduceUtil.java | 272 --
.../TestTableOutputFormatConnectionExhaust.java | 104 -
.../mapred/TestTableSnapshotInputFormat.java | 271 --
...opSecurityEnabledUserProviderForTesting.java | 41 -
.../MultiTableInputFormatTestBase.java | 277 --
.../hadoop/hbase/mapreduce/NMapInputFormat.java | 134 -
.../TableSnapshotInputFormatTestBase.java | 231 --
.../hadoop/hbase/mapreduce/TestCellCounter.java | 376 ---
.../hadoop/hbase/mapreduce/TestCopyTable.java | 262 --
.../mapreduce/TestGroupingTableMapper.java | 68 -
.../hbase/mapreduce/TestHFileOutputFormat2.java | 1495 ----------
.../hbase/mapreduce/TestHRegionPartitioner.java | 71 -
.../hadoop/hbase/mapreduce/TestHashTable.java | 194 --
.../hbase/mapreduce/TestImportExport.java | 727 -----
.../TestImportTSVWithOperationAttributes.java | 266 --
.../hbase/mapreduce/TestImportTSVWithTTLs.java | 175 --
.../TestImportTSVWithVisibilityLabels.java | 495 ----
.../hadoop/hbase/mapreduce/TestImportTsv.java | 571 ----
.../hbase/mapreduce/TestImportTsvParser.java | 314 ---
.../hadoop/hbase/mapreduce/TestJarFinder.java | 132 -
.../mapreduce/TestLoadIncrementalHFiles.java | 1 -
.../TestLoadIncrementalHFilesSplitRecovery.java | 669 -----
.../mapreduce/TestMultiTableInputFormat.java | 49 -
.../TestMultiTableSnapshotInputFormat.java | 92 -
.../TestMultiTableSnapshotInputFormatImpl.java | 186 --
.../mapreduce/TestMultithreadedTableMapper.java | 264 --
.../hadoop/hbase/mapreduce/TestRowCounter.java | 400 ---
.../TestSecureLoadIncrementalHFiles.java | 70 -
...ecureLoadIncrementalHFilesSplitRecovery.java | 69 -
.../TestSimpleTotalOrderPartitioner.java | 81 -
.../hadoop/hbase/mapreduce/TestSyncTable.java | 339 ---
.../hbase/mapreduce/TestTableInputFormat.java | 481 ----
.../mapreduce/TestTableInputFormatBase.java | 53 -
.../mapreduce/TestTableInputFormatScan1.java | 200 --
.../mapreduce/TestTableInputFormatScan2.java | 118 -
.../mapreduce/TestTableInputFormatScanBase.java | 287 --
.../hbase/mapreduce/TestTableMapReduce.java | 174 --
.../hbase/mapreduce/TestTableMapReduceBase.java | 233 --
.../hbase/mapreduce/TestTableMapReduceUtil.java | 99 -
.../mapreduce/TestTableSnapshotInputFormat.java | 384 ---
.../hadoop/hbase/mapreduce/TestTableSplit.java | 129 -
.../hbase/mapreduce/TestTimeRangeMapRed.java | 211 --
.../hadoop/hbase/mapreduce/TestWALPlayer.java | 231 --
.../hbase/mapreduce/TestWALRecordReader.java | 276 --
.../mapreduce/TsvImporterCustomTestMapper.java | 80 -
.../TsvImporterCustomTestMapperForOprAttr.java | 58 -
.../hbase/namespace/TestNamespaceAuditor.java | 8 +-
.../regionserver/TestHRegionFileSystem.java | 7 +-
.../replication/TestReplicationSmallTests.java | 1059 -------
.../hbase/security/HBaseKerberosUtils.java | 26 +-
.../hbase/snapshot/TestExportSnapshot.java | 381 ---
.../snapshot/TestExportSnapshotHelpers.java | 91 -
.../snapshot/TestExportSnapshotNoCluster.java | 112 -
.../hbase/snapshot/TestMobExportSnapshot.java | 65 -
.../snapshot/TestMobSecureExportSnapshot.java | 59 -
.../snapshot/TestSecureExportSnapshot.java | 64 -
.../apache/hadoop/hbase/util/HFileTestUtil.java | 14 +
.../util/LoadTestDataGeneratorWithTags.java | 3 +-
.../apache/hadoop/hbase/util/LoadTestTool.java | 968 -------
.../hadoop/hbase/util/MultiThreadedAction.java | 2 +-
.../hbase/util/MultiThreadedReaderWithACL.java | 3 +-
.../hbase/util/MultiThreadedUpdaterWithACL.java | 3 +-
.../hadoop/hbase/util/RestartMetaTest.java | 8 +-
.../hbase/util/TestRegionSizeCalculator.java | 159 --
.../hbase/util/test/LoadTestDataGenerator.java | 23 +
.../PerformanceEvaluation_Counter.properties | 28 -
.../hbase/mapreduce/exportedTableIn94Format | Bin 374 -> 0 bytes
hbase-spark/pom.xml | 4 +
.../hbase/spark/TestJavaHBaseContext.java | 1 -
pom.xml | 13 +
src/main/asciidoc/_chapters/ops_mgt.adoc | 32 +-
320 files changed, 38781 insertions(+), 37899 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-assembly/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-assembly/pom.xml b/hbase-assembly/pom.xml
index 84b5676..5935d78 100644
--- a/hbase-assembly/pom.xml
+++ b/hbase-assembly/pom.xml
@@ -195,6 +195,10 @@
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ </dependency>
<!-- To dump tools in hbase-procedure into cached_classpath.txt. -->
<dependency>
<groupId>org.apache.hbase</groupId>
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-assembly/src/main/assembly/hadoop-two-compat.xml
----------------------------------------------------------------------
diff --git a/hbase-assembly/src/main/assembly/hadoop-two-compat.xml
b/hbase-assembly/src/main/assembly/hadoop-two-compat.xml
index 1592a3b..a66237b 100644
--- a/hbase-assembly/src/main/assembly/hadoop-two-compat.xml
+++ b/hbase-assembly/src/main/assembly/hadoop-two-compat.xml
@@ -50,6 +50,7 @@
<include>org.apache.hbase:hbase-thrift</include>
<include>org.apache.hbase:hbase-external-blockcache</include>
<include>org.apache.hbase:hbase-backup</include>
+ <include>org.apache.hbase:hbase-mapreduce</include>
</includes>
<!-- Binaries for the dependencies also go in the hbase-jars directory
-->
<binaries>
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-assembly/src/main/assembly/src.xml
----------------------------------------------------------------------
diff --git a/hbase-assembly/src/main/assembly/src.xml
b/hbase-assembly/src/main/assembly/src.xml
index 3fd7562..a2ca40e 100644
--- a/hbase-assembly/src/main/assembly/src.xml
+++ b/hbase-assembly/src/main/assembly/src.xml
@@ -60,6 +60,7 @@
<include>org.apache.hbase:hbase-testing-util</include>
<include>org.apache.hbase:hbase-thrift</include>
<include>org.apache.hbase:hbase-backup</include>
+ <include>org.apache.hbase:hbase-mapreduce</include>
</includes>
<!-- Include all the sources in the top directory -->
<sources>
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-backup/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-backup/pom.xml b/hbase-backup/pom.xml
index 88e0643..0e3401f 100644
--- a/hbase-backup/pom.xml
+++ b/hbase-backup/pom.xml
@@ -109,6 +109,16 @@
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-examples/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-examples/pom.xml b/hbase-examples/pom.xml
index fad2bf6..7e6539e 100644
--- a/hbase-examples/pom.xml
+++ b/hbase-examples/pom.xml
@@ -146,6 +146,10 @@
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
<artifactId>hbase-endpoint</artifactId>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-it/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-it/pom.xml b/hbase-it/pom.xml
index 2ba3e0e..527de7f 100644
--- a/hbase-it/pom.xml
+++ b/hbase-it/pom.xml
@@ -200,6 +200,22 @@
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
<artifactId>hbase-rsgroup</artifactId>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
----------------------------------------------------------------------
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
index 76be4e8a..9bc3131 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
@@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.testclassification.IntegrationTests;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.HFileTestUtil;
import org.apache.hadoop.hbase.util.LoadTestTool;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.util.StringUtils;
@@ -70,7 +71,7 @@ public class IntegrationTestIngest extends
IntegrationTestBase {
protected String[] LOAD_TEST_TOOL_INIT_ARGS = {
LoadTestTool.OPT_COLUMN_FAMILIES,
LoadTestTool.OPT_COMPRESSION,
- LoadTestTool.OPT_DATA_BLOCK_ENCODING,
+ HFileTestUtil.OPT_DATA_BLOCK_ENCODING,
LoadTestTool.OPT_INMEMORY,
LoadTestTool.OPT_ENCRYPTION,
LoadTestTool.OPT_NUM_REGIONS_PER_SERVER,
@@ -138,7 +139,7 @@ public class IntegrationTestIngest extends
IntegrationTestBase {
String familiesString = getConf().get(
String.format("%s.%s", clazz, LoadTestTool.OPT_COLUMN_FAMILIES));
if (familiesString == null) {
- for (byte[] family : LoadTestTool.DEFAULT_COLUMN_FAMILIES) {
+ for (byte[] family : HFileTestUtil.DEFAULT_COLUMN_FAMILIES) {
families.add(Bytes.toString(family));
}
} else {
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java
----------------------------------------------------------------------
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java
index d64fbb0..fc79abb 100644
---
a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java
+++
b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java
@@ -25,7 +25,7 @@ import org.apache.hadoop.hbase.regionserver.HStore;
import org.apache.hadoop.hbase.regionserver.StoreEngine;
import org.apache.hadoop.hbase.regionserver.StripeStoreEngine;
import org.apache.hadoop.hbase.testclassification.IntegrationTests;
-import org.apache.hadoop.hbase.util.LoadTestTool;
+import org.apache.hadoop.hbase.util.HFileTestUtil;
import org.apache.hadoop.util.ToolRunner;
import org.junit.experimental.categories.Category;
@@ -41,7 +41,7 @@ public class IntegrationTestIngestStripeCompactions extends
IntegrationTestInges
HTableDescriptor htd = new HTableDescriptor(getTablename());
htd.setConfiguration(StoreEngine.STORE_ENGINE_CLASS_KEY,
StripeStoreEngine.class.getName());
htd.setConfiguration(HStore.BLOCKING_STOREFILES_KEY, "100");
- HColumnDescriptor hcd = new
HColumnDescriptor(LoadTestTool.DEFAULT_COLUMN_FAMILY);
+ HColumnDescriptor hcd = new
HColumnDescriptor(HFileTestUtil.DEFAULT_COLUMN_FAMILY);
HBaseTestingUtility.createPreSplitLoadTestTable(util.getConfiguration(),
htd, hcd);
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java
----------------------------------------------------------------------
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java
index 5bbb12b..010e4b9 100644
---
a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java
+++
b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.testclassification.IntegrationTests;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.HFileTestUtil;
import org.apache.hadoop.hbase.util.LoadTestDataGeneratorWithMOB;
import org.apache.hadoop.hbase.util.LoadTestTool;
import org.apache.hadoop.util.ToolRunner;
@@ -44,7 +45,7 @@ import org.junit.experimental.categories.Category;
public class IntegrationTestIngestWithMOB extends IntegrationTestIngest {
private static final char COLON = ':';
- private byte[] mobColumnFamily = LoadTestTool.DEFAULT_COLUMN_FAMILY;
+ private byte[] mobColumnFamily = HFileTestUtil.DEFAULT_COLUMN_FAMILY;
public static final String THRESHOLD = "threshold";
public static final String MIN_MOB_DATA_SIZE = "minMobDataSize";
public static final String MAX_MOB_DATA_SIZE = "maxMobDataSize";
@@ -56,7 +57,7 @@ public class IntegrationTestIngestWithMOB extends
IntegrationTestIngest {
//similar to LOAD_TEST_TOOL_INIT_ARGS except OPT_IN_MEMORY is removed
protected String[] LOAD_TEST_TOOL_MOB_INIT_ARGS = {
LoadTestTool.OPT_COMPRESSION,
- LoadTestTool.OPT_DATA_BLOCK_ENCODING,
+ HFileTestUtil.OPT_DATA_BLOCK_ENCODING,
LoadTestTool.OPT_ENCRYPTION,
LoadTestTool.OPT_NUM_REGIONS_PER_SERVER,
LoadTestTool.OPT_REGION_REPLICATION,
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java
----------------------------------------------------------------------
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java
index d649bdb..3135bd0 100644
---
a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java
+++
b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java
@@ -72,6 +72,7 @@ public class IntegrationTestRegionReplicaPerf extends
IntegrationTestBase {
private static final String PRIMARY_TIMEOUT_DEFAULT = "" + 10 * 1000; // 10
ms
private static final String NUM_RS_KEY = "numRs";
private static final String NUM_RS_DEFAULT = "" + 3;
+ public static final String FAMILY_NAME = "info";
/** Extract a descriptive statistic from a {@link
com.codahale.metrics.Histogram}. */
private enum Stat {
@@ -236,7 +237,7 @@ public class IntegrationTestRegionReplicaPerf extends
IntegrationTestBase {
@Override
protected Set<String> getColumnFamilies() {
- return Sets.newHashSet(Bytes.toString(PerformanceEvaluation.FAMILY_NAME));
+ return Sets.newHashSet(FAMILY_NAME);
}
/** Compute the mean of the given {@code stat} from a timing results. */
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
----------------------------------------------------------------------
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
index 9d04bf9..fb7acf4 100644
---
a/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
+++
b/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
@@ -29,7 +29,6 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
-import java.util.UUID;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java
----------------------------------------------------------------------
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java
index f042521..b9d16a1 100644
---
a/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java
+++
b/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.IntegrationTestBase;
import org.apache.hadoop.hbase.IntegrationTestingUtility;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.testclassification.IntegrationTests;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
@@ -55,7 +56,6 @@ import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.ScannerCallable;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.NMapInputFormat;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl;
import org.apache.hadoop.hbase.util.AbstractHBaseTool;
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-mapreduce/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/pom.xml b/hbase-mapreduce/pom.xml
new file mode 100644
index 0000000..2d3f859
--- /dev/null
+++ b/hbase-mapreduce/pom.xml
@@ -0,0 +1,316 @@
+<?xml version="1.0"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <!--
+ /**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ -->
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>hbase</artifactId>
+ <groupId>org.apache.hbase</groupId>
+ <version>2.0.0-alpha3-SNAPSHOT</version>
+ <relativePath>..</relativePath>
+ </parent>
+ <artifactId>hbase-mapreduce</artifactId>
+ <name>Apache HBase - MapReduce</name>
+ <description>
+ This module contains implementations of InputFormat, OutputFormat, Mapper,
Reducer, etc which
+ are needed for running MR jobs on tables, WALs, HFiles and other HBase
specific constructs.
+ It also contains a bunch of tools: RowCounter, ImportTsv, Import, Export,
CompactionTool,
+ ExportSnapshot, WALPlayer, etc
+ </description>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-site-plugin</artifactId>
+ <configuration>
+ <skip>true</skip>
+ </configuration>
+ </plugin>
+ <plugin>
+ <!--Make it so assembly:single does nothing in here-->
+ <artifactId>maven-assembly-plugin</artifactId>
+ <configuration>
+ <skipAssembly>true</skipAssembly>
+ </configuration>
+ </plugin>
+ <!-- Testing plugins -->
+ <plugin>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <properties>
+ <property>
+ <name>listener</name>
+
<value>org.apache.hadoop.hbase.ServerResourceCheckerJUnitListener</value>
+ </property>
+ </properties>
+ <systemPropertyVariables>
+
<org.apache.hadoop.hbase.shaded.io.netty.packagePrefix>org.apache.hadoop.hbase.shaded.</org.apache.hadoop.hbase.shaded.io.netty.packagePrefix>
+ </systemPropertyVariables>
+ </configuration>
+ </plugin>
+ <!-- Make a jar and put the sources in the jar -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-source-plugin</artifactId>
+ </plugin>
+ </plugins>
+ <pluginManagement>
+ <plugins>
+ <!--This plugin's configuration is used to store Eclipse m2e settings
+ only. It has no influence on the Maven build itself.-->
+ <plugin>
+ <groupId>org.eclipse.m2e</groupId>
+ <artifactId>lifecycle-mapping</artifactId>
+ <version>1.0.0</version>
+ <configuration>
+ <lifecycleMappingMetadata>
+ <pluginExecutions>
+ <pluginExecution>
+ <pluginExecutionFilter>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <versionRange>[3.2,)</versionRange>
+ <goals>
+ <goal>compile</goal>
+ </goals>
+ </pluginExecutionFilter>
+ <action>
+ <ignore></ignore>
+ </action>
+ </pluginExecution>
+ </pluginExecutions>
+ </lifecycleMappingMetadata>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+ </build>
+
+ <dependencies>
+ <!-- Intra-project dependencies -->
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>jdk.tools</groupId>
+ <artifactId>jdk.tools</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <type>test-jar</type>
+ </dependency>
+ <!-- General dependencies -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ <profiles>
+ <!-- Skip the tests in this module -->
+ <profile>
+ <id>skipMapReduceTests</id>
+ <activation>
+ <property>
+ <name>skipMapReduceTests</name>
+ </property>
+ </activation>
+ <properties>
+ <surefire.skipFirstPart>true</surefire.skipFirstPart>
+ <surefire.skipSecondPart>true</surefire.skipSecondPart>
+ </properties>
+ </profile>
+ <!-- profile against Hadoop 2.x: This is the default. -->
+ <profile>
+ <id>hadoop-2.0</id>
+ <activation>
+ <property>
+ <!--Below formatting for dev-support/generate-hadoopX-poms.sh-->
+ <!--h2--><name>!hadoop.profile</name>
+ </property>
+ </activation>
+ <dependencies>
+ <dependency>
+ <groupId>com.github.stephenc.findbugs</groupId>
+ <artifactId>findbugs-annotations</artifactId>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>net.java.dev.jets3t</groupId>
+ <artifactId>jets3t</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>jsp-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-server</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-json</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-compiler</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-runtime</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ </profile>
+
+ <!--
+ profile for building against Hadoop 3.0.x. Activate using:
+ mvn -Dhadoop.profile=3.0
+ -->
+ <profile>
+ <id>hadoop-3.0</id>
+ <activation>
+ <property>
+ <name>hadoop.profile</name>
+ <value>3.0</value>
+ </property>
+ </activation>
+ <properties>
+ <hadoop.version>${hadoop-three.version}</hadoop.version>
+ </properties>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ </dependency>
+ </dependencies>
+ </profile>
+ </profiles>
+</project>
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
----------------------------------------------------------------------
diff --git
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
new file mode 100644
index 0000000..618c14a
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
@@ -0,0 +1,52 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
+import org.apache.hadoop.util.ProgramDriver;
+
+import
org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Driver for hbase mapreduce jobs. Select which to run by passing name of job
+ * to this main.
+ */
[email protected](HBaseInterfaceAudience.TOOLS)
[email protected]
+public class Driver {
+
+ private static ProgramDriver pgd = new ProgramDriver();
+
+ @VisibleForTesting
+ static void setProgramDriver(ProgramDriver pgd0) {
+ pgd = pgd0;
+ }
+
+ /**
+ * @param args
+ * @throws Throwable
+ */
+ public static void main(String[] args) throws Throwable {
+ pgd.addClass(RowCounter.NAME, RowCounter.class, "Count rows in HBase
table");
+ ProgramDriver.class.getMethod("driver", new Class[] { String[].class })
+ .invoke(pgd, new Object[] { args });
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
----------------------------------------------------------------------
diff --git
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
new file mode 100644
index 0000000..a534224
--- /dev/null
+++
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
@@ -0,0 +1,157 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+
+/**
+ * Extract grouping columns from input record
+ */
[email protected]
+public class GroupingTableMap
+extends MapReduceBase
+implements TableMap<ImmutableBytesWritable,Result> {
+
+ /**
+ * JobConf parameter to specify the columns used to produce the key passed to
+ * collect from the map phase
+ */
+ public static final String GROUP_COLUMNS =
+ "hbase.mapred.groupingtablemap.columns";
+
+ protected byte [][] columns;
+
+ /**
+ * Use this before submitting a TableMap job. It will appropriately set up
the
+ * JobConf.
+ *
+ * @param table table to be processed
+ * @param columns space separated list of columns to fetch
+ * @param groupColumns space separated list of columns used to form the key
+ * used in collect
+ * @param mapper map class
+ * @param job job configuration object
+ */
+ @SuppressWarnings("unchecked")
+ public static void initJob(String table, String columns, String groupColumns,
+ Class<? extends TableMap> mapper, JobConf job) {
+
+ TableMapReduceUtil.initTableMapJob(table, columns, mapper,
+ ImmutableBytesWritable.class, Result.class, job);
+ job.set(GROUP_COLUMNS, groupColumns);
+ }
+
+ @Override
+ public void configure(JobConf job) {
+ super.configure(job);
+ String[] cols = job.get(GROUP_COLUMNS, "").split(" ");
+ columns = new byte[cols.length][];
+ for(int i = 0; i < cols.length; i++) {
+ columns[i] = Bytes.toBytes(cols[i]);
+ }
+ }
+
+ /**
+ * Extract the grouping columns from value to construct a new key.
+ *
+ * Pass the new key and value to reduce.
+ * If any of the grouping columns are not found in the value, the record is
skipped.
+ * @param key
+ * @param value
+ * @param output
+ * @param reporter
+ * @throws IOException
+ */
+ public void map(ImmutableBytesWritable key, Result value,
+ OutputCollector<ImmutableBytesWritable,Result> output,
+ Reporter reporter) throws IOException {
+
+ byte[][] keyVals = extractKeyValues(value);
+ if(keyVals != null) {
+ ImmutableBytesWritable tKey = createGroupKey(keyVals);
+ output.collect(tKey, value);
+ }
+ }
+
+ /**
+ * Extract columns values from the current record. This method returns
+ * null if any of the columns are not found.
+ *
+ * Override this method if you want to deal with nulls differently.
+ *
+ * @param r
+ * @return array of byte values
+ */
+ protected byte[][] extractKeyValues(Result r) {
+ byte[][] keyVals = null;
+ ArrayList<byte[]> foundList = new ArrayList<>();
+ int numCols = columns.length;
+ if (numCols > 0) {
+ for (Cell value: r.listCells()) {
+ byte [] column = KeyValue.makeColumn(CellUtil.cloneFamily(value),
+ CellUtil.cloneQualifier(value));
+ for (int i = 0; i < numCols; i++) {
+ if (Bytes.equals(column, columns[i])) {
+ foundList.add(CellUtil.cloneValue(value));
+ break;
+ }
+ }
+ }
+ if(foundList.size() == numCols) {
+ keyVals = foundList.toArray(new byte[numCols][]);
+ }
+ }
+ return keyVals;
+ }
+
+ /**
+ * Create a key by concatenating multiple column values.
+ * Override this function in order to produce different types of keys.
+ *
+ * @param vals
+ * @return key generated by concatenating multiple column values
+ */
+ protected ImmutableBytesWritable createGroupKey(byte[][] vals) {
+ if(vals == null) {
+ return null;
+ }
+ StringBuilder sb = new StringBuilder();
+ for(int i = 0; i < vals.length; i++) {
+ if(i > 0) {
+ sb.append(" ");
+ }
+ sb.append(Bytes.toString(vals[i]));
+ }
+ return new ImmutableBytesWritable(Bytes.toBytesBinary(sb.toString()));
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
----------------------------------------------------------------------
diff --git
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
new file mode 100644
index 0000000..4f5323a
--- /dev/null
+++
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
@@ -0,0 +1,95 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Partitioner;
+
+/**
+ * This is used to partition the output keys into groups of keys.
+ * Keys are grouped according to the regions that currently exist
+ * so that each reducer fills a single region so load is distributed.
+ *
+ * @param <K2>
+ * @param <V2>
+ */
[email protected]
+public class HRegionPartitioner<K2,V2>
+implements Partitioner<ImmutableBytesWritable, V2> {
+ private static final Log LOG = LogFactory.getLog(HRegionPartitioner.class);
+ // Connection and locator are not cleaned up; they just die when partitioner
is done.
+ private Connection connection;
+ private RegionLocator locator;
+ private byte[][] startKeys;
+
+ public void configure(JobConf job) {
+ try {
+ this.connection =
ConnectionFactory.createConnection(HBaseConfiguration.create(job));
+ TableName tableName =
TableName.valueOf(job.get(TableOutputFormat.OUTPUT_TABLE));
+ this.locator = this.connection.getRegionLocator(tableName);
+ } catch (IOException e) {
+ LOG.error(e);
+ }
+
+ try {
+ this.startKeys = this.locator.getStartKeys();
+ } catch (IOException e) {
+ LOG.error(e);
+ }
+ }
+
+ public int getPartition(ImmutableBytesWritable key, V2 value, int
numPartitions) {
+ byte[] region = null;
+ // Only one region return 0
+ if (this.startKeys.length == 1){
+ return 0;
+ }
+ try {
+ // Not sure if this is cached after a split so we could have problems
+ // here if a region splits while mapping
+ region =
locator.getRegionLocation(key.get()).getRegionInfo().getStartKey();
+ } catch (IOException e) {
+ LOG.error(e);
+ }
+ for (int i = 0; i < this.startKeys.length; i++){
+ if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){
+ if (i >= numPartitions-1){
+ // cover if we have less reduces then regions.
+ return (Integer.toString(i).hashCode()
+ & Integer.MAX_VALUE) % numPartitions;
+ }
+ return i;
+ }
+ }
+ // if above fails to find start key that match we need to return something
+ return 0;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
----------------------------------------------------------------------
diff --git
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
new file mode 100644
index 0000000..dfacff9
--- /dev/null
+++
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
@@ -0,0 +1,76 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * Pass the given key and record as-is to reduce
+ */
[email protected]
+public class IdentityTableMap
+extends MapReduceBase
+implements TableMap<ImmutableBytesWritable, Result> {
+
+ /** constructor */
+ public IdentityTableMap() {
+ super();
+ }
+
+ /**
+ * Use this before submitting a TableMap job. It will
+ * appropriately set up the JobConf.
+ *
+ * @param table table name
+ * @param columns columns to scan
+ * @param mapper mapper class
+ * @param job job configuration
+ */
+ @SuppressWarnings("unchecked")
+ public static void initJob(String table, String columns,
+ Class<? extends TableMap> mapper, JobConf job) {
+ TableMapReduceUtil.initTableMapJob(table, columns, mapper,
+ ImmutableBytesWritable.class,
+ Result.class, job);
+ }
+
+ /**
+ * Pass the key, value to reduce
+ * @param key
+ * @param value
+ * @param output
+ * @param reporter
+ * @throws IOException
+ */
+ public void map(ImmutableBytesWritable key, Result value,
+ OutputCollector<ImmutableBytesWritable,Result> output,
+ Reporter reporter) throws IOException {
+
+ // convert
+ output.collect(key, value);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
----------------------------------------------------------------------
diff --git
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
new file mode 100644
index 0000000..9c2e604
--- /dev/null
+++
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
@@ -0,0 +1,61 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * Write to table each key, record pair
+ */
[email protected]
+public class IdentityTableReduce
+extends MapReduceBase
+implements TableReduce<ImmutableBytesWritable, Put> {
+ @SuppressWarnings("unused")
+ private static final Log LOG =
+ LogFactory.getLog(IdentityTableReduce.class.getName());
+
+ /**
+ * No aggregation, output pairs of (key, record)
+ * @param key
+ * @param values
+ * @param output
+ * @param reporter
+ * @throws IOException
+ */
+ public void reduce(ImmutableBytesWritable key, Iterator<Put> values,
+ OutputCollector<ImmutableBytesWritable, Put> output,
+ Reporter reporter)
+ throws IOException {
+
+ while(values.hasNext()) {
+ output.collect(key, values.next());
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
new file mode 100644
index 0000000..81dbb15
--- /dev/null
+++
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.MultiTableSnapshotInputFormatImpl;
+import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * MultiTableSnapshotInputFormat generalizes {@link
org.apache.hadoop.hbase.mapred
+ * .TableSnapshotInputFormat}
+ * allowing a MapReduce job to run over one or more table snapshots, with one
or more scans
+ * configured for each.
+ * Internally, the input format delegates to {@link
org.apache.hadoop.hbase.mapreduce
+ * .TableSnapshotInputFormat}
+ * and thus has the same performance advantages; see {@link
org.apache.hadoop.hbase.mapreduce
+ * .TableSnapshotInputFormat} for
+ * more details.
+ * Usage is similar to TableSnapshotInputFormat, with the following exception:
+ * initMultiTableSnapshotMapperJob takes in a map
+ * from snapshot name to a collection of scans. For each snapshot in the map,
each corresponding
+ * scan will be applied;
+ * the overall dataset for the job is defined by the concatenation of the
regions and tables
+ * included in each snapshot/scan
+ * pair.
+ * {@link TableMapReduceUtil#initMultiTableSnapshotMapperJob(Map,
+ * Class, Class, Class, JobConf, boolean, Path)}
+ * can be used to configure the job.
+ * <pre>{@code
+ * Job job = new Job(conf);
+ * Map<String, Collection<Scan>> snapshotScans = ImmutableMap.of(
+ * "snapshot1", ImmutableList.of(new Scan(Bytes.toBytes("a"),
Bytes.toBytes("b"))),
+ * "snapshot2", ImmutableList.of(new Scan(Bytes.toBytes("1"),
Bytes.toBytes("2")))
+ * );
+ * Path restoreDir = new Path("/tmp/snapshot_restore_dir")
+ * TableMapReduceUtil.initTableSnapshotMapperJob(
+ * snapshotScans, MyTableMapper.class, MyMapKeyOutput.class,
+ * MyMapOutputValueWritable.class, job, true, restoreDir);
+ * }
+ * </pre>
+ * Internally, this input format restores each snapshot into a subdirectory of
the given tmp
+ * directory. Input splits and
+ * record readers are created as described in {@link
org.apache.hadoop.hbase.mapreduce
+ * .TableSnapshotInputFormat}
+ * (one per region).
+ * See {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat} for
more notes on
+ * permissioning; the
+ * same caveats apply here.
+ *
+ * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
+ * @see org.apache.hadoop.hbase.client.TableSnapshotScanner
+ */
[email protected]
+public class MultiTableSnapshotInputFormat extends TableSnapshotInputFormat
+ implements InputFormat<ImmutableBytesWritable, Result> {
+
+ private final MultiTableSnapshotInputFormatImpl delegate;
+
+ public MultiTableSnapshotInputFormat() {
+ this.delegate = new MultiTableSnapshotInputFormatImpl();
+ }
+
+ @Override
+ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException
{
+ List<TableSnapshotInputFormatImpl.InputSplit> splits =
delegate.getSplits(job);
+ InputSplit[] results = new InputSplit[splits.size()];
+ for (int i = 0; i < splits.size(); i++) {
+ results[i] = new TableSnapshotRegionSplit(splits.get(i));
+ }
+ return results;
+ }
+
+ @Override
+ public RecordReader<ImmutableBytesWritable, Result>
getRecordReader(InputSplit split, JobConf job,
+ Reporter reporter) throws IOException {
+ return new TableSnapshotRecordReader((TableSnapshotRegionSplit) split,
job);
+ }
+
+ /**
+ * Configure conf to read from snapshotScans, with snapshots restored to a
subdirectory of
+ * restoreDir.
+ * Sets: {@link org.apache.hadoop.hbase.mapreduce
+ * .MultiTableSnapshotInputFormatImpl#RESTORE_DIRS_KEY},
+ * {@link org.apache.hadoop.hbase.mapreduce
+ * .MultiTableSnapshotInputFormatImpl#SNAPSHOT_TO_SCANS_KEY}
+ *
+ * @param conf
+ * @param snapshotScans
+ * @param restoreDir
+ * @throws IOException
+ */
+ public static void setInput(Configuration conf, Map<String,
Collection<Scan>> snapshotScans,
+ Path restoreDir) throws IOException {
+ new MultiTableSnapshotInputFormatImpl().setInput(conf, snapshotScans,
restoreDir);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
----------------------------------------------------------------------
diff --git
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
new file mode 100644
index 0000000..43560fd
--- /dev/null
+++
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
@@ -0,0 +1,121 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A job with a map to count rows.
+ * Map outputs table rows IF the input row has columns that have content.
+ * Uses a org.apache.hadoop.mapred.lib.IdentityReducer
+ */
[email protected]
+public class RowCounter extends Configured implements Tool {
+ // Name of this 'program'
+ static final String NAME = "rowcounter";
+
+ /**
+ * Mapper that runs the count.
+ */
+ static class RowCounterMapper
+ implements TableMap<ImmutableBytesWritable, Result> {
+ private static enum Counters {ROWS}
+
+ public void map(ImmutableBytesWritable row, Result values,
+ OutputCollector<ImmutableBytesWritable, Result> output,
+ Reporter reporter)
+ throws IOException {
+ // Count every row containing data, whether it's in qualifiers or
values
+ reporter.incrCounter(Counters.ROWS, 1);
+ }
+
+ public void configure(JobConf jc) {
+ // Nothing to do.
+ }
+
+ public void close() throws IOException {
+ // Nothing to do.
+ }
+ }
+
+ /**
+ * @param args
+ * @return the JobConf
+ * @throws IOException
+ */
+ public JobConf createSubmittableJob(String[] args) throws IOException {
+ JobConf c = new JobConf(getConf(), getClass());
+ c.setJobName(NAME);
+ // Columns are space delimited
+ StringBuilder sb = new StringBuilder();
+ final int columnoffset = 2;
+ for (int i = columnoffset; i < args.length; i++) {
+ if (i > columnoffset) {
+ sb.append(" ");
+ }
+ sb.append(args[i]);
+ }
+ // Second argument is the table name.
+ TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
+ RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c);
+ c.setNumReduceTasks(0);
+ // First arg is the output directory.
+ FileOutputFormat.setOutputPath(c, new Path(args[0]));
+ return c;
+ }
+
+ static int printUsage() {
+ System.out.println(NAME +
+ " <outputdir> <tablename> <column1> [<column2>...]");
+ return -1;
+ }
+
+ public int run(final String[] args) throws Exception {
+ // Make sure there are at least 3 parameters
+ if (args.length < 3) {
+ System.err.println("ERROR: Wrong number of parameters: " + args.length);
+ return printUsage();
+ }
+ JobClient.runJob(createSubmittableJob(args));
+ return 0;
+ }
+
+ /**
+ * @param args
+ * @throws Exception
+ */
+ public static void main(String[] args) throws Exception {
+ int errCode = ToolRunner.run(HBaseConfiguration.create(), new
RowCounter(), args);
+ System.exit(errCode);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
----------------------------------------------------------------------
diff --git
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
new file mode 100644
index 0000000..208849a
--- /dev/null
+++
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
@@ -0,0 +1,90 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Convert HBase tabular data into a format that is consumable by Map/Reduce.
+ */
[email protected]
+public class TableInputFormat extends TableInputFormatBase implements
+ JobConfigurable {
+ private static final Log LOG = LogFactory.getLog(TableInputFormat.class);
+
+ /**
+ * space delimited list of columns
+ */
+ public static final String COLUMN_LIST = "hbase.mapred.tablecolumns";
+
+ public void configure(JobConf job) {
+ try {
+ initialize(job);
+ } catch (Exception e) {
+ LOG.error(StringUtils.stringifyException(e));
+ }
+ }
+
+ @Override
+ protected void initialize(JobConf job) throws IOException {
+ Path[] tableNames = FileInputFormat.getInputPaths(job);
+ String colArg = job.get(COLUMN_LIST);
+ String[] colNames = colArg.split(" ");
+ byte [][] m_cols = new byte[colNames.length][];
+ for (int i = 0; i < m_cols.length; i++) {
+ m_cols[i] = Bytes.toBytes(colNames[i]);
+ }
+ setInputColumns(m_cols);
+ Connection connection = ConnectionFactory.createConnection(job);
+ initializeTable(connection, TableName.valueOf(tableNames[0].getName()));
+ }
+
+ public void validateInput(JobConf job) throws IOException {
+ // expecting exactly one path
+ Path [] tableNames = FileInputFormat.getInputPaths(job);
+ if (tableNames == null || tableNames.length > 1) {
+ throw new IOException("expecting one table name");
+ }
+
+ // connected to table?
+ if (getTable() == null) {
+ throw new IOException("could not connect to table '" +
+ tableNames[0].getName() + "'");
+ }
+
+ // expecting at least one column
+ String colArg = job.get(COLUMN_LIST);
+ if (colArg == null || colArg.length() == 0) {
+ throw new IOException("expecting at least one column");
+ }
+ }
+}