jinhyukify commented on code in PR #8321:
URL: https://github.com/apache/hbase/pull/8321#discussion_r3367104499
##########
hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java:
##########
@@ -181,4 +183,173 @@ ImmutableMap.<Integer, ImmutableBytesWritable> builder()
TEST_UTIL.deleteTable(tableName);
TEST_UTIL.cleanupDataTestDirOnTestFS();
}
+
+ @Test
+ public void testHashTableWithSha256(TestInfo testInfo) throws Exception {
+ final TableName tableName =
TableName.valueOf(testInfo.getTestMethod().get().getName());
+ final byte[] family = Bytes.toBytes("family");
+ final byte[] column1 = Bytes.toBytes("c1");
+ final byte[] column2 = Bytes.toBytes("c2");
+ final byte[] column3 = Bytes.toBytes("c3");
+
+ int numRows = 100;
+ int numRegions = 10;
+ int numHashFiles = 3;
+
+ byte[][] splitRows = new byte[numRegions - 1][];
+ for (int i = 1; i < numRegions; i++) {
+ splitRows[i - 1] = Bytes.toBytes(numRows * i / numRegions);
+ }
+
+ long timestamp = 1430764183454L;
+ Table t1 = TEST_UTIL.createTable(tableName, family, splitRows);
+ for (int i = 0; i < numRows; i++) {
+ Put p = new Put(Bytes.toBytes(i), timestamp);
+ p.addColumn(family, column1, column1);
+ p.addColumn(family, column2, column2);
+ p.addColumn(family, column3, column3);
+ t1.put(p);
+ }
+ t1.close();
+
+ HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration());
+ Path testDir =
TEST_UTIL.getDataTestDirOnTestFS(tableName.getNameAsString());
+
+ long batchSize = 300;
+ int code = hashTable.run(
+ new String[] { "--batchsize=" + batchSize, "--numhashfiles=" +
numHashFiles, "--scanbatch=2",
+ "--hashAlgorithm=SHA-256", tableName.getNameAsString(),
testDir.toString() });
+ assertEquals(0, code, "test job failed");
+
+ FileSystem fs = TEST_UTIL.getTestFileSystem();
+ HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(),
testDir);
+ assertEquals("SHA-256", tableHash.hashAlgorithm,
+ "manifest must record the algorithm used to produce the digests");
+
+ ImmutableMap<Integer, ImmutableBytesWritable> expectedHashes =
+ ImmutableMap.<Integer, ImmutableBytesWritable> builder()
Review Comment:
Generated SHA-256 hashes using this script.
https://gist.github.com/jinhyukify/705b9b6b81d06973556d5645353efcbb
##########
hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java:
##########
@@ -665,7 +679,7 @@ private boolean doCommandLine(final String[] args) {
for (int i = 0; i < args.length - NUM_ARGS; i++) {
String cmd = args[i];
- if (cmd.equals("-h") || cmd.startsWith("--h")) {
+ if (cmd.equals("-h") || cmd.equals("--help")) {
Review Comment:
Without this change, we cannot use options that start with `--h` including
`--hashAlgorithm`
##########
hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java:
##########
@@ -189,6 +197,8 @@ void readPropertiesFile(FileSystem fs, Path path) throws
IOException {
if (endTimeString != null) {
endTime = Long.parseLong(endTimeString);
}
+
+ hashAlgorithm = p.getProperty("hashAlgorithm", DEFAULT_HASH_ALGORITHM);
Review Comment:
For old HashTable files, we fall back to the original MD5 algorithm.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]