This is an automated email from the ASF dual-hosted git repository. elserj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hbase-operator-tools.git
The following commit(s) were added to refs/heads/master by this push: new e3a8f96 HBASE-26656 Utility to correct corrupt RegionInfo's in hbase:meta e3a8f96 is described below commit e3a8f96d0b9e9b985d7f6f2952aaa929047c5b08 Author: Josh Elser <els...@apache.org> AuthorDate: Tue Dec 14 18:02:38 2021 -0500 HBASE-26656 Utility to correct corrupt RegionInfo's in hbase:meta A standalone utility which corrects hbase:meta given the problem described by HBASE-23328. Includes the ability to both "report" corrupt regions as well as correct them. This tool will ensure that other HBCK2 utilities continue to work without additional modification. Signed-off-by: Peter Somogyi <psomo...@apache.org> Closes #102 --- .../src/main/java/org/apache/hbase/HBCK2.java | 55 ++++ .../main/java/org/apache/hbase/HBCKRegionInfo.java | 150 ++++++++++ .../org/apache/hbase/RegionInfoMismatchTool.java | 182 +++++++++++++ .../apache/hbase/TestRegionInfoMismatchTool.java | 302 +++++++++++++++++++++ 4 files changed, 689 insertions(+) diff --git a/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java b/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java index a788a83..84dc834 100644 --- a/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java +++ b/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java @@ -107,6 +107,7 @@ public class HBCK2 extends Configured implements org.apache.hadoop.util.Tool { private static final String RECOVER_UNKNOWN = "recoverUnknown"; private static final String GENERATE_TABLE_INFO = "generateMissingTableDescriptorFile"; private static final String FIX_META = "fixMeta"; + private static final String REGIONINFO_MISMATCH = "regionInfoMismatch"; // TODO update this map in case of the name of a method changes in Hbck interface // in org.apache.hadoop.hbase.client package. Or a new command is added and the hbck command // does not equals to the method name in Hbck interface. @@ -422,6 +423,23 @@ public class HBCK2 extends Configured implements org.apache.hadoop.util.Tool { return hbck.scheduleSCPsForUnknownServers(); } + /** + * Runs the RegionInfoMismatchTool using CLI options. + */ + void regionInfoMismatch(String[] args) throws Exception { + // CLI Options + Options options = new Options(); + Option dryRunOption = Option.builder("f").longOpt("fix").hasArg(false).build(); + options.addOption(dryRunOption); + // Parse command-line. + CommandLineParser parser = new DefaultParser(); + CommandLine commandLine = parser.parse(options, args, false); + final boolean fix = commandLine.hasOption(dryRunOption.getOpt()); + try (ClusterConnection connection = connect()) { + new RegionInfoMismatchTool(connection).run(fix); + } + } + private HBaseProtos.ServerName parseServerName(String serverName) { ServerName sn = ServerName.parseServerName(serverName); return HBaseProtos.ServerName.newBuilder().setHostName(sn.getHostname()). @@ -472,6 +490,8 @@ public class HBCK2 extends Configured implements org.apache.hadoop.util.Tool { writer.println(); usageUnassigns(writer); writer.println(); + usageRegioninfoMismatch(writer); + writer.println(); writer.close(); return sw.toString(); } @@ -728,6 +748,27 @@ public class HBCK2 extends Configured implements org.apache.hadoop.util.Tool { writer.println(" hbase:meta tool. See the HBCK2 README for how to use."); } + private static void usageRegioninfoMismatch(PrintWriter writer) { + writer.println(" " + REGIONINFO_MISMATCH); + writer.println(" Options:"); + writer.println(" -f,--fix Update hbase:meta with the corrections"); + writer.println(" It is recommended to first run this utility without the fix"); + writer.println(" option to ensure that the utility is generating the correct"); + writer.println(" serialized RegionInfo data structures. Inspect the output to"); + writer.println(" confirm that the hbase:meta rowkey matches the new RegionInfo."); + writer.println(); + writer.println(" This tool will read hbase:meta and report any regions whose rowkey"); + writer.println(" and cell value differ in their encoded region name. HBASE-23328 "); + writer.println(" illustrates a problem for read-replica enabled tables in which "); + writer.println(" the encoded region name (the MD5 hash) does not match between "); + writer.println(" the rowkey and the value. This problem is generally harmless "); + writer.println(" for normal operation, but can break other HBCK2 tools."); + writer.println(); + writer.println(" Run this command to determine if any regions are affected by "); + writer.println(" this bug and use the -f/--fix option to then correct any"); + writer.println(" affected regions."); + } + static void showErrorMessage(String error) { if (error != null) { System.out.println("ERROR: " + error); @@ -1034,6 +1075,20 @@ public class HBCK2 extends Configured implements org.apache.hadoop.util.Tool { tableInfoGenerator.generateTableDescriptorFileIfMissing(commands[1].trim()); break; + case REGIONINFO_MISMATCH: + // `commands` includes the `regionInfoMismatch` argument. + if (commands.length > 2) { + showErrorMessage(command + " takes one optional argument, got more than one."); + return EXIT_FAILURE; + } + try { + regionInfoMismatch(commands); + } catch (Exception e) { + e.printStackTrace(); + return EXIT_FAILURE; + } + break; + default: showErrorMessage("Unsupported command: " + command); return EXIT_FAILURE; diff --git a/hbase-hbck2/src/main/java/org/apache/hbase/HBCKRegionInfo.java b/hbase-hbck2/src/main/java/org/apache/hbase/HBCKRegionInfo.java new file mode 100644 index 0000000..5119b1c --- /dev/null +++ b/hbase-hbck2/src/main/java/org/apache/hbase/HBCKRegionInfo.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hbase; + + +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.util.ByteArrayHashKey; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.HashKey; +import org.apache.hadoop.hbase.util.JenkinsHash; + +/** + * A copy of utilities from {@link org.apache.hadoop.hbase.client.RegionInfo} to + * copy internal methods into HBCK2 for stability to avoid using Private methods. + */ +public final class HBCKRegionInfo { + + /** + * Separator used to demarcate the encodedName in a region name + * in the new format. See description on new format above. + */ + static final int ENC_SEPARATOR = '.'; + + static final int MD5_HEX_LENGTH = 32; + + static final int DEFAULT_REPLICA_ID = 0; + + static final byte REPLICA_ID_DELIMITER = (byte)'_'; + + private HBCKRegionInfo() {} + + /** + * Does region name contain its encoded name? + * @param regionName region name + * @return boolean indicating if this a new format region + * name which contains its encoded name. + */ + public static boolean hasEncodedName(final byte[] regionName) { + // check if region name ends in ENC_SEPARATOR + return (regionName.length >= 1) && + (regionName[regionName.length - 1] == RegionInfo.ENC_SEPARATOR); + } + + /** + * @return the encodedName + */ + public static String encodeRegionName(final byte [] regionName) { + String encodedName; + if (hasEncodedName(regionName)) { + // region is in new format: + // <tableName>,<startKey>,<regionIdTimeStamp>/encodedName/ + encodedName = Bytes.toString(regionName, + regionName.length - MD5_HEX_LENGTH - 1, + MD5_HEX_LENGTH); + } else { + // old format region name. First hbase:meta region also + // use this format.EncodedName is the JenkinsHash value. + HashKey<byte[]> key = new ByteArrayHashKey(regionName, 0, regionName.length); + int hashVal = Math.abs(JenkinsHash.getInstance().hash(key, 0)); + encodedName = String.valueOf(hashVal); + } + return encodedName; + } + + /** + * Separate elements of a regionName. + * Region name is of the format: + * <code>tablename,startkey,regionIdTimestamp[_replicaId][.encodedName.]</code>. + * Startkey can contain the delimiter (',') so we parse from the start and then parse from + * the end. + * @return Array of byte[] containing tableName, startKey and id OR null if not parseable + * as a region name. + */ + public static byte [][] parseRegionNameOrReturnNull(final byte[] regionName) { + int offset = -1; + for (int i = 0; i < regionName.length; i++) { + if (regionName[i] == HConstants.DELIMITER) { + offset = i; + break; + } + } + if (offset == -1) { + return null; + } + byte[] tableName = new byte[offset]; + System.arraycopy(regionName, 0, tableName, 0, offset); + offset = -1; + + int endOffset = regionName.length; + // check whether regionName contains encodedName + if (regionName.length > MD5_HEX_LENGTH + 2 && + regionName[regionName.length-1] == ENC_SEPARATOR && + regionName[regionName.length-MD5_HEX_LENGTH-2] == ENC_SEPARATOR) { + endOffset = endOffset - MD5_HEX_LENGTH - 2; + } + + // parse from end + byte[] replicaId = null; + int idEndOffset = endOffset; + for (int i = endOffset - 1; i > 0; i--) { + if (regionName[i] == REPLICA_ID_DELIMITER) { //replicaId may or may not be present + replicaId = new byte[endOffset - i - 1]; + System.arraycopy(regionName, i + 1, replicaId, 0, + endOffset - i - 1); + idEndOffset = i; + // do not break, continue to search for id + } + if (regionName[i] == HConstants.DELIMITER) { + offset = i; + break; + } + } + if (offset == -1) { + return null; + } + byte [] startKey = HConstants.EMPTY_BYTE_ARRAY; + if(offset != tableName.length + 1) { + startKey = new byte[offset - tableName.length - 1]; + System.arraycopy(regionName, tableName.length + 1, startKey, 0, + offset - tableName.length - 1); + } + byte [] id = new byte[idEndOffset - offset - 1]; + System.arraycopy(regionName, offset + 1, id, 0, + idEndOffset - offset - 1); + byte [][] elements = new byte[replicaId == null ? 3 : 4][]; + elements[0] = tableName; + elements[1] = startKey; + elements[2] = id; + if (replicaId != null) { + elements[3] = replicaId; + } + return elements; + } +} diff --git a/hbase-hbck2/src/main/java/org/apache/hbase/RegionInfoMismatchTool.java b/hbase-hbck2/src/main/java/org/apache/hbase/RegionInfoMismatchTool.java new file mode 100644 index 0000000..bc37423 --- /dev/null +++ b/hbase-hbck2/src/main/java/org/apache/hbase/RegionInfoMismatchTool.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hbase; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.exceptions.DeserializationException; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.HBCKMetaTableAccessor.MetaScanner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Standalone utility to correct the bug corrected by HBASE-23328 in which + * the region name in the rowkey of a row in meta does not match the name + * which is stored in the value of the info:regioninfo column of the same row. + */ +public class RegionInfoMismatchTool { + private static final Logger LOG = LoggerFactory.getLogger(RegionInfoMismatchTool.class); + + private final Connection connection; + + public RegionInfoMismatchTool(Connection connection) { + this.connection = connection; + } + + static class MalformedRegion { + byte[] regionName; + RegionInfo regionInfo; + + MalformedRegion(byte[] regionName, RegionInfo regionInfo) { + this.regionName = regionName; + this.regionInfo = regionInfo; + } + + byte[] getRegionName() { + return regionName; + } + + RegionInfo getRegionInfo() { + return regionInfo; + } + + @Override + public String toString() { + return "regionName=" + Bytes.toStringBinary(regionName) + ", regioninfo=" + + regionInfo.toString(); + } + } + + /** + * Returns a list of {@link MalformedRegion}'s which exist in meta. If there are + * no malformed regions, the returned list will be empty. + */ + List<MalformedRegion> getMalformedRegions() throws IOException { + try (Table meta = connection.getTable(TableName.META_TABLE_NAME)) { + MetaScanner<MalformedRegion> scanner = new MetaScanner<>(); + return scanner.scanMeta(connection, + scan -> scan.addFamily(HConstants.CATALOG_FAMILY), + r -> { + Cell riCell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY, + HConstants.REGIONINFO_QUALIFIER); + RegionInfo info = RegionInfo.parseFromOrNull(riCell.getValueArray(), + riCell.getValueOffset(), riCell.getValueLength()); + // Get the expected value from the RegionInfo in the cell value + byte[] valueEncodedRegionName = info.getEncodedNameAsBytes(); + // Compare that to what is actually in the rowkey + HBCKMetaTableAccessor.getMetaKeyForRegion(info); + byte[] rowKeyRegionName = CellUtil.cloneRow(riCell); + byte[] rowkeyEncodedRegionName = Bytes.toBytes( + HBCKRegionInfo.encodeRegionName(rowKeyRegionName)); + // If they are equal, we are good. + if (Arrays.equals(rowkeyEncodedRegionName, valueEncodedRegionName)) { + // Returning null will cause `scanMeta` to ignore this row + LOG.debug("Ignoring region {} because rowkey aligns with value", info); + return null; + } + + LOG.debug("Found mismatched region {} and {}", Bytes.toStringBinary(rowKeyRegionName), + Bytes.toStringBinary(valueEncodedRegionName)); + // Only return row/regioninfo pairs that are wrong + return new MalformedRegion(rowKeyRegionName, info); + }); + } + } + + /** + * Run the RegionInfoMistmatchTool. Use the {@code fix} argument to control whether this method + * will report problems or fix problems. + * + * @param fix True if hbase:meta should be updated. False to report on any problems. + */ + public void run(boolean fix) throws IOException, DeserializationException { + run(System.out, fix); + } + + void run(PrintStream out, boolean fix) throws IOException, DeserializationException { + List<MalformedRegion> regionsToFix = getMalformedRegions(); + if (!fix) { + out.println("Fix mode is disabled, printing all malformed regions detected:"); + for (MalformedRegion r : regionsToFix) { + out.println("Rowkey " + HBCKRegionInfo.encodeRegionName(r.getRegionName()) + + " does not match " + r.getRegionInfo()); + } + } + out.println("Found " + regionsToFix.size() + " regions to fix."); + try (Table meta = connection.getTable(TableName.META_TABLE_NAME)) { + for (MalformedRegion regionToFix : regionsToFix) { + final byte[] regionName = regionToFix.getRegionName(); + final RegionInfo wrongRegionInfo = regionToFix.getRegionInfo(); + + // The encoded region name is an MD5 hash, but the regionID is what is actually + // broken by HBASE-23328 + byte[][] regionNameParts = HBCKRegionInfo.parseRegionNameOrReturnNull(regionName); + if (regionNameParts == null) { + throw new RuntimeException("Couldn't parse parts from " + + Bytes.toStringBinary(regionName)); + } + int i = 0; + for (byte[] part : regionNameParts) { + LOG.debug("Region name part[{}]: {}", i++, Bytes.toStringBinary(part)); + } + // Third component of a region name is just a literal numeric (not a binary-encoded long) + long regionId = Long.parseLong(Bytes.toString(regionNameParts[2])); + RegionInfo correctedRegionInfo = RegionInfoBuilder.newBuilder(wrongRegionInfo) + .setRegionId(regionId) + .setReplicaId(0) + .build(); + + String rowkeyEncodedRegionName = HBCKRegionInfo.encodeRegionName(regionName); + String updatedValueEncodedRegionName = correctedRegionInfo.getEncodedName(); + if (!rowkeyEncodedRegionName.equals(updatedValueEncodedRegionName)) { + out.println("Aborting: sanity-check failed on updated RegionInfo. Expected encoded " + + "region name " +rowkeyEncodedRegionName + " but got " + + updatedValueEncodedRegionName + "."); + out.println("Incorrectly created RegionInfo was: " + correctedRegionInfo); + throw new RuntimeException("Failed sanity-check on corrected RegionInfo"); + } + + out.println("Updating RegionInfo for " + Bytes.toStringBinary(regionName) + " to " + + correctedRegionInfo); + + // Write the update back to meta. + if (fix) { + meta.put(HBCKMetaTableAccessor.makePutFromRegionInfo(correctedRegionInfo, + System.currentTimeMillis())); + } + } + if (!fix) { + out.println("Fix mode is not enabled, hbase:meta was not updated. See the tool output for" + + " a list of detected problematic regions. Re-run the tool without the dry run option" + + " to persist updates to hbase:meta."); + } + } + } +} diff --git a/hbase-hbck2/src/test/java/org/apache/hbase/TestRegionInfoMismatchTool.java b/hbase-hbck2/src/test/java/org/apache/hbase/TestRegionInfoMismatchTool.java new file mode 100644 index 0000000..7debaf9 --- /dev/null +++ b/hbase-hbck2/src/test/java/org/apache/hbase/TestRegionInfoMismatchTool.java @@ -0,0 +1,302 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hbase; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.Cell.Type; +import org.apache.hadoop.hbase.CellBuilderFactory; +import org.apache.hadoop.hbase.CellBuilderType; +import org.apache.hadoop.hbase.CellScanner; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.exceptions.DeserializationException; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.RegionInfoMismatchTool.MalformedRegion; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestRegionInfoMismatchTool { + private static final Logger LOG = LoggerFactory.getLogger(TestRegionInfoMismatchTool.class); + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private RegionInfoMismatchTool tool; + private TableName tableName; + private Connection connection; + private Admin admin; + + @Rule + public TestName testName = new TestName(); + + @BeforeClass + public static void beforeClass() throws Exception { + TEST_UTIL.startMiniCluster(1); + } + + @AfterClass + public static void afterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void before() throws IOException { + this.connection = TEST_UTIL.getConnection(); + this.admin = TEST_UTIL.getAdmin(); + this.tool = new RegionInfoMismatchTool(connection); + this.tableName = TableName.valueOf(testName.getMethodName()); + } + + @After + public void after() throws IOException { + if (admin.tableExists(tableName)) { + if (admin.isTableEnabled(tableName)) { + admin.disableTable(tableName); + } + admin.deleteTable(tableName); + } + } + + @Test + public void testNoReportOnHealthy() throws Exception { + admin.createTable(TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")) + .setRegionReplication(2) + .build()); + List<RegionInfo> regions = HBCKMetaTableAccessor.getTableRegions( + connection, tableName); + + assertEquals(1, regions.size()); + // Should find no malformed regions on a brand new table + assertEquals(0, tool.getMalformedRegions().size()); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(baos); + + // Verify that nothing would be printed to the console either. + tool.run(out, false); + out.close(); + String outputAsString = baos.toString(); + LOG.info("Output from tool: " + outputAsString); + assertTrue("Expected no output to be printed", + outputAsString.contains("Found 0 regions to fix")); + } + + @Test + public void testReportOneCorruptRegion() throws Exception { + admin.createTable(TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")) + .setRegionReplication(2) + .build(), new byte[][] {Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")}); + List<RegionInfo> regions = HBCKMetaTableAccessor.getTableRegions( + connection, tableName); + + // Log hbase:meta to be helpful + printMeta(connection); + + assertEquals(4, regions.size()); + // Should find no malformed regions on a brand new table + List<MalformedRegion> malformedRegions = tool.getMalformedRegions(); + assertEquals("Found malformed regions: " + malformedRegions, 0, malformedRegions.size()); + + // Mess up info:regioninfo for the first region in this table. + RegionInfo regionToCorrupt = regions.get(0); + RegionInfo corruptedRegion = corruptRegionInfo(regionToCorrupt); + try (Table meta = connection.getTable(TableName.META_TABLE_NAME)) { + meta.put(makePutFromRegionInfo(regionToCorrupt, corruptedRegion)); + } + + // Log hbase:meta to be helpful + printMeta(connection); + + // Run the tool and validate we get the expected number of regions back + malformedRegions = tool.getMalformedRegions(); + assertEquals("Found malformed regions: " + malformedRegions, 1, malformedRegions.size()); + + assertArrayEquals(regionToCorrupt.getEncodedNameAsBytes(), + encodeRegionName(malformedRegions.get(0).getRegionName())); + assertEquals(corruptedRegion, malformedRegions.get(0).getRegionInfo()); + } + + @Test + public void testReportManyCorruptRegions() throws Exception { + admin.createTable(TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")) + .setRegionReplication(2) + .build(), new byte[][] {Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")}); + List<RegionInfo> regions = HBCKMetaTableAccessor.getTableRegions( + connection, tableName); + LOG.info("Created regions {}", regions); + + assertEquals(4, regions.size()); + // Should find no malformed regions on a brand new table + List<MalformedRegion> malformedRegions = tool.getMalformedRegions(); + assertEquals("Found malformed regions: " + malformedRegions, 0, malformedRegions.size()); + + // For each region in this table, mess up the info:regioninfo + List<RegionInfo> corruptedRegions = new ArrayList<>(); + for (RegionInfo regionToCorrupt : regions) { + RegionInfo corruptedRegion = corruptRegionInfo(regionToCorrupt); + corruptedRegions.add(corruptedRegion); + try (Table meta = connection.getTable(TableName.META_TABLE_NAME)) { + meta.put(makePutFromRegionInfo(regionToCorrupt, corruptedRegion)); + } + } + + // Log hbase:meta to be helpful + printMeta(connection); + + // Run the tool + malformedRegions = tool.getMalformedRegions(); + LOG.info("Found malformed regions {}", malformedRegions); + // Make sure we got back the expected 4 regions + assertEquals(4, malformedRegions.size()); + + // Validate that the tool found the expected regions with the correct data. + for (int i = 0; i < regions.size(); i++) { + RegionInfo originalRegion = regions.get(i); + RegionInfo corruptedRegion = corruptedRegions.get(i); + assertArrayEquals("Comparing " + + Bytes.toStringBinary(originalRegion.getEncodedNameAsBytes()) + " and " + + Bytes.toStringBinary(encodeRegionName(malformedRegions.get(i).getRegionName())), + originalRegion.getEncodedNameAsBytes(), + encodeRegionName(malformedRegions.get(i).getRegionName())); + assertEquals(corruptedRegion, malformedRegions.get(i).getRegionInfo()); + } + } + + @Test + public void testFixOneCorruptRegion() throws Exception { + // Validates that there is a corrupt region + testReportOneCorruptRegion(); + + // Fix meta (fix=true) + tool.run(true); + + // Validate the we fixed the corrupt region + List<MalformedRegion> malformedRegions = tool.getMalformedRegions(); + assertEquals("Found latent malformed regions: " + malformedRegions, 0, malformedRegions.size()); + } + + @Test + public void testDryRunDoesntUpdateMeta() throws Exception { + testReportOneCorruptRegion(); + + // Do not actually fix meta (fix=false) + tool.run(false); + + // Validate that the region should still be listed as corrupt + List<MalformedRegion> malformedRegions = tool.getMalformedRegions(); + assertEquals("1 malformed region should still be present", 1, malformedRegions.size()); + } + + @Test + public void testFixManyCorruptRegions() throws Exception { + testReportManyCorruptRegions(); + + // Fix meta (fix=true) + tool.run(true); + + // Validate that we fixed all corrupt regions + List<MalformedRegion> malformedRegions = tool.getMalformedRegions(); + assertEquals("Found latent malformed regions: " + malformedRegions, 0, malformedRegions.size()); + } + + // Copy from HBCKMetaTableAccessor so we can introduce the "bug" into the cell value + Put makePutFromRegionInfo(RegionInfo originalRegionInfo, RegionInfo corruptRegionInfo) + throws IOException { + System.out.println("Changing " + originalRegionInfo + " to " + corruptRegionInfo); + Put put = new Put(originalRegionInfo.getRegionName()); + //copied from MetaTableAccessor + put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) + .setRow(put.getRow()) + .setFamily(HConstants.CATALOG_FAMILY) + .setQualifier(HConstants.REGIONINFO_QUALIFIER) + .setType(Type.Put) + // Hack in our own encoded name. + .setValue(RegionInfo.toByteArray(corruptRegionInfo)) + .build()); + return put; + } + + RegionInfo corruptRegionInfo(RegionInfo region) { + if (region.getReplicaId() != 0) { + throw new IllegalArgumentException("Passed in region should be default replica"); + } + return RegionInfoBuilder.newBuilder(region).setReplicaId(1).build(); + } + + void printMeta(Connection conn) throws IOException, DeserializationException { + try (Table meta = conn.getTable(TableName.META_TABLE_NAME)) { + Scan s = new Scan(); + s.addFamily(HConstants.CATALOG_FAMILY).addFamily(HConstants.TABLE_FAMILY); + try (ResultScanner scanner = meta.getScanner(s)) { + Result r = null; + while ((r = scanner.next()) != null) { + CellScanner cells = r.cellScanner(); + while (cells.advance()) { + printCell(cells.current()); + } + } + } + } + } + + void printCell(Cell cell) throws DeserializationException { + LOG.info(CellUtil.toString(cell, true)); + if (Bytes.equals(CellUtil.cloneFamily(cell), HConstants.CATALOG_FAMILY) && + Bytes.equals(CellUtil.cloneQualifier(cell), HConstants.REGIONINFO_QUALIFIER)) { + LOG.info("Deserialized RegionInfo=" + + RegionInfo.parseFrom(CellUtil.cloneValue(cell))); + } + } + + /** + * Encodes the given Region NAME (the rowkey) into the "encoded Region name" (the MD5 hash). + */ + byte[] encodeRegionName(byte[] regionName) { + return Bytes.toBytes(HBCKRegionInfo.encodeRegionName(regionName)); + } +}