Repository: hbase Updated Branches: refs/heads/HBASE-14850 4fa416223 -> 8afef174d (forced update)
HBASE-15287 mapreduce.RowCounter returns incorrect result with binary row key inputs (Matt Warhaftig) Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/e9211e41 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/e9211e41 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/e9211e41 Branch: refs/heads/HBASE-14850 Commit: e9211e415a208a3f16b1d6a5d9fc730824e0df92 Parents: d815211 Author: tedyu <[email protected]> Authored: Sat Apr 16 12:46:21 2016 -0700 Committer: tedyu <[email protected]> Committed: Sat Apr 16 12:46:21 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hbase/mapred/GroupingTableMap.java | 2 +- .../hadoop/hbase/mapreduce/CellCounter.java | 2 +- .../hadoop/hbase/mapreduce/CopyTable.java | 4 +- .../apache/hadoop/hbase/mapreduce/Export.java | 6 +-- .../hbase/mapreduce/GroupingTableMapper.java | 2 +- .../hadoop/hbase/mapreduce/RowCounter.java | 4 +- .../mapreduce/SimpleTotalOrderPartitioner.java | 2 +- .../hbase/mapreduce/TableInputFormat.java | 4 +- .../hadoop/hbase/mapreduce/TestCellCounter.java | 42 +++++++++++++++++++- .../hadoop/hbase/mapreduce/TestCopyTable.java | 18 ++++----- .../hbase/mapreduce/TestImportExport.java | 16 +++++++- .../hadoop/hbase/mapreduce/TestRowCounter.java | 5 ++- 12 files changed, 79 insertions(+), 28 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java index 6cd0602..ee6da75 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java @@ -154,6 +154,6 @@ implements TableMap<ImmutableBytesWritable,Result> { } sb.append(Bytes.toString(vals[i])); } - return new ImmutableBytesWritable(Bytes.toBytes(sb.toString())); + return new ImmutableBytesWritable(Bytes.toBytesBinary(sb.toString())); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java index 00f197c..aaa32bd 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java @@ -241,7 +241,7 @@ public class CellCounter extends Configured implements Tool { String regexPattern = filterCriteria.substring(1, filterCriteria.length()); rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regexPattern)); } else { - rowFilter = new PrefixFilter(Bytes.toBytes(filterCriteria)); + rowFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria)); } return rowFilter; } http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java index dd8b891..c1e8a82 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java @@ -111,11 +111,11 @@ public class CopyTable extends Configured implements Tool { } if (startRow != null) { - scan.setStartRow(Bytes.toBytes(startRow)); + scan.setStartRow(Bytes.toBytesBinary(startRow)); } if (stopRow != null) { - scan.setStopRow(Bytes.toBytes(stopRow)); + scan.setStopRow(Bytes.toBytesBinary(stopRow)); } if(families != null) { http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java index 66c0057..56d229a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java @@ -100,10 +100,10 @@ public class Export extends Configured implements Tool { s.setCacheBlocks(false); // set Start and Stop row if (conf.get(TableInputFormat.SCAN_ROW_START) != null) { - s.setStartRow(Bytes.toBytes(conf.get(TableInputFormat.SCAN_ROW_START))); + s.setStartRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_START))); } if (conf.get(TableInputFormat.SCAN_ROW_STOP) != null) { - s.setStopRow(Bytes.toBytes(conf.get(TableInputFormat.SCAN_ROW_STOP))); + s.setStopRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_STOP))); } // Set Scan Column Family boolean raw = Boolean.parseBoolean(conf.get(RAW_SCAN)); @@ -142,7 +142,7 @@ public class Export extends Configured implements Tool { String regexPattern = filterCriteria.substring(1, filterCriteria.length()); exportFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regexPattern)); } else { - exportFilter = new PrefixFilter(Bytes.toBytes(filterCriteria)); + exportFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria)); } return exportFilter; } http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java index e9c8927..8a9fa49 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java @@ -145,7 +145,7 @@ extends TableMapper<ImmutableBytesWritable,Result> implements Configurable { } sb.append(Bytes.toString(vals[i])); } - return new ImmutableBytesWritable(Bytes.toBytes(sb.toString())); + return new ImmutableBytesWritable(Bytes.toBytesBinary(sb.toString())); } /** http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index 8522a61..720d4b1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -147,10 +147,10 @@ public class RowCounter extends Configured implements Tool { Scan scan = new Scan(); scan.setCacheBlocks(false); if (startKey != null && !startKey.equals("")) { - scan.setStartRow(Bytes.toBytes(startKey)); + scan.setStartRow(Bytes.toBytesBinary(startKey)); } if (endKey != null && !endKey.equals("")) { - scan.setStopRow(Bytes.toBytes(endKey)); + scan.setStopRow(Bytes.toBytesBinary(endKey)); } if (sb.length() > 0) { for (String columnName : sb.toString().trim().split(" ")) { http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java index fe967ff..2257054 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java @@ -94,7 +94,7 @@ implements Configurable { } LOG.warn("Using deprecated configuration " + deprecatedKey + " - please use static accessor methods instead."); - return Bytes.toBytes(oldStyleVal); + return Bytes.toBytesBinary(oldStyleVal); } @Override http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java index 814d82c..ebeb158 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java @@ -129,11 +129,11 @@ implements Configurable { scan = new Scan(); if (conf.get(SCAN_ROW_START) != null) { - scan.setStartRow(Bytes.toBytes(conf.get(SCAN_ROW_START))); + scan.setStartRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_START))); } if (conf.get(SCAN_ROW_STOP) != null) { - scan.setStopRow(Bytes.toBytes(conf.get(SCAN_ROW_STOP))); + scan.setStopRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_STOP))); } if (conf.get(SCAN_COLUMNS) != null) { http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java index bd2f82a..60e9b33 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java @@ -47,8 +47,8 @@ import static org.junit.Assert.fail; @Category({MapReduceTests.class, LargeTests.class}) public class TestCellCounter { private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); - private static final byte[] ROW1 = Bytes.toBytes("row1"); - private static final byte[] ROW2 = Bytes.toBytes("row2"); + private static final byte[] ROW1 = Bytes.toBytesBinary("\\x01row1"); + private static final byte[] ROW2 = Bytes.toBytesBinary("\\x01row2"); private static final String FAMILY_A_STRING = "a"; private static final String FAMILY_B_STRING = "b"; private static final byte[] FAMILY_A = Bytes.toBytes(FAMILY_A_STRING); @@ -112,6 +112,44 @@ public class TestCellCounter { } /** + * Test CellCounter all data should print to output + */ + @Test(timeout = 300000) + public void testCellCounterPrefix() throws Exception { + TableName sourceTable = TableName.valueOf("testCellCounterPrefix"); + byte[][] families = { FAMILY_A, FAMILY_B }; + Table t = UTIL.createTable(sourceTable, families); + try { + Put p = new Put(ROW1); + p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11")); + p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12")); + p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13")); + t.put(p); + p = new Put(ROW2); + p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21")); + p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22")); + p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23")); + t.put(p); + String[] args = { sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "\\x01row1" }; + runCount(args); + FileInputStream inputStream = + new FileInputStream(OUTPUT_DIR + File.separator + "part-r-00000"); + String data = IOUtils.toString(inputStream); + inputStream.close(); + assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2")); + assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2")); + assertTrue(data.contains("Total ROWS" + "\t" + "1")); + assertTrue(data.contains("b;q" + "\t" + "1")); + assertTrue(data.contains("a;q" + "\t" + "1")); + assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1")); + assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1")); + } finally { + t.close(); + FileUtil.fullyDelete(new File(OUTPUT_DIR)); + } + } + + /** * Test CellCounter with time range all data should print to output */ @Test (timeout=300000) http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java index 628ca08..a57b394 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java @@ -119,7 +119,7 @@ public class TestCopyTable { public void testCopyTable() throws Exception { doCopyTableTest(false); } - + /** * Simple end-to-end test with bulkload. */ @@ -127,16 +127,16 @@ public class TestCopyTable { public void testCopyTableWithBulkload() throws Exception { doCopyTableTest(true); } - + @Test public void testStartStopRow() throws Exception { final TableName TABLENAME1 = TableName.valueOf("testStartStopRow1"); final TableName TABLENAME2 = TableName.valueOf("testStartStopRow2"); final byte[] FAMILY = Bytes.toBytes("family"); final byte[] COLUMN1 = Bytes.toBytes("c1"); - final byte[] ROW0 = Bytes.toBytes("row0"); - final byte[] ROW1 = Bytes.toBytes("row1"); - final byte[] ROW2 = Bytes.toBytes("row2"); + final byte[] ROW0 = Bytes.toBytesBinary("\\x01row0"); + final byte[] ROW1 = Bytes.toBytesBinary("\\x01row1"); + final byte[] ROW2 = Bytes.toBytesBinary("\\x01row2"); Table t1 = TEST_UTIL.createTable(TABLENAME1, FAMILY); Table t2 = TEST_UTIL.createTable(TABLENAME2, FAMILY); @@ -156,8 +156,8 @@ public class TestCopyTable { assertEquals( 0, ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()), - copy, new String[] { "--new.name=" + TABLENAME2, "--startrow=row1", - "--stoprow=row2", TABLENAME1.getNameAsString() })); + copy, new String[] { "--new.name=" + TABLENAME2, "--startrow=\\x01row1", + "--stoprow=\\x01row2", TABLENAME1.getNameAsString() })); // verify the data was copied into table 2 // row1 exist, row0, row2 do not exist @@ -169,11 +169,11 @@ public class TestCopyTable { g = new Get(ROW0); r = t2.get(g); assertEquals(0, r.size()); - + g = new Get(ROW2); r = t2.get(g); assertEquals(0, r.size()); - + t1.close(); t2.close(); TEST_UTIL.deleteTable(TABLENAME1); http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java index 094fe1c..50146fd 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java @@ -93,8 +93,9 @@ import org.mockito.stubbing.Answer; public class TestImportExport { private static final Log LOG = LogFactory.getLog(TestImportExport.class); private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); - private static final byte[] ROW1 = Bytes.toBytes("row1"); - private static final byte[] ROW2 = Bytes.toBytes("row2"); + private static final byte[] ROW1 = Bytes.toBytesBinary("\\x32row1"); + private static final byte[] ROW2 = Bytes.toBytesBinary("\\x32row2"); + private static final byte[] ROW3 = Bytes.toBytesBinary("\\x32row3"); private static final String FAMILYA_STRING = "a"; private static final String FAMILYB_STRING = "b"; private static final byte[] FAMILYA = Bytes.toBytes(FAMILYA_STRING); @@ -181,9 +182,17 @@ public class TestImportExport { p.addColumn(FAMILYA, QUAL, now + 1, QUAL); p.addColumn(FAMILYA, QUAL, now + 2, QUAL); t.put(p); + p = new Put(ROW3); + p.addColumn(FAMILYA, QUAL, now, QUAL); + p.addColumn(FAMILYA, QUAL, now + 1, QUAL); + p.addColumn(FAMILYA, QUAL, now + 2, QUAL); + t.put(p); } String[] args = new String[] { + // Only export row1 & row2. + "-D" + TableInputFormat.SCAN_ROW_START + "=\\x32row1", + "-D" + TableInputFormat.SCAN_ROW_STOP + "=\\x32row3", EXPORT_TABLE, FQ_OUTPUT_DIR, "1000", // max number of key versions per key to export @@ -207,6 +216,9 @@ public class TestImportExport { g.setMaxVersions(); r = t.get(g); assertEquals(3, r.size()); + g = new Get(ROW3); + r = t.get(g); + assertEquals(0, r.size()); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java index 6657d0f..4a719dc 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java @@ -149,7 +149,7 @@ public class TestRowCounter { @Test public void testRowCounterColumnAndRowRange() throws Exception { String[] args = new String[] { - TABLE_NAME, "--range=rov,rox", COL_FAM + ":" + COL1 + TABLE_NAME, "--range=\\x00rov,\\x00rox", COL_FAM + ":" + COL1 }; runRowCount(args, 8); } @@ -245,7 +245,8 @@ public class TestRowCounter { // write few rows with two columns int i = 0; for (; i < TOTAL_ROWS - ROWS_WITH_ONE_COL; i++) { - byte[] row = Bytes.toBytes("row" + i); + // Use binary rows values to test for HBASE-15287. + byte[] row = Bytes.toBytesBinary("\\x00row" + i); Put put = new Put(row); put.addColumn(family, col1, value); put.addColumn(family, col2, value);
