This is an automated email from the ASF dual-hosted git repository.

elserj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase-operator-tools.git


The following commit(s) were added to refs/heads/master by this push:
     new e3a8f96  HBASE-26656 Utility to correct corrupt RegionInfo's in 
hbase:meta
e3a8f96 is described below

commit e3a8f96d0b9e9b985d7f6f2952aaa929047c5b08
Author: Josh Elser <els...@apache.org>
AuthorDate: Tue Dec 14 18:02:38 2021 -0500

    HBASE-26656 Utility to correct corrupt RegionInfo's in hbase:meta
    
    A standalone utility which corrects hbase:meta given the problem
    described by HBASE-23328. Includes the ability to both "report" corrupt
    regions as well as correct them. This tool will ensure that other
    HBCK2 utilities continue to work without additional modification.
    
    Signed-off-by: Peter Somogyi <psomo...@apache.org>
    
    Closes #102
---
 .../src/main/java/org/apache/hbase/HBCK2.java      |  55 ++++
 .../main/java/org/apache/hbase/HBCKRegionInfo.java | 150 ++++++++++
 .../org/apache/hbase/RegionInfoMismatchTool.java   | 182 +++++++++++++
 .../apache/hbase/TestRegionInfoMismatchTool.java   | 302 +++++++++++++++++++++
 4 files changed, 689 insertions(+)

diff --git a/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java 
b/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java
index a788a83..84dc834 100644
--- a/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java
+++ b/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java
@@ -107,6 +107,7 @@ public class HBCK2 extends Configured implements 
org.apache.hadoop.util.Tool {
   private static final String RECOVER_UNKNOWN = "recoverUnknown";
   private static final String GENERATE_TABLE_INFO = 
"generateMissingTableDescriptorFile";
   private static final String FIX_META = "fixMeta";
+  private static final String REGIONINFO_MISMATCH = "regionInfoMismatch";
   // TODO update this map in case of the name of a method changes in Hbck 
interface
   //  in org.apache.hadoop.hbase.client package. Or a new command is added and 
the hbck command
   //  does not equals to the method name in Hbck interface.
@@ -422,6 +423,23 @@ public class HBCK2 extends Configured implements 
org.apache.hadoop.util.Tool {
     return hbck.scheduleSCPsForUnknownServers();
   }
 
+  /**
+   * Runs the RegionInfoMismatchTool using CLI options.
+   */
+  void regionInfoMismatch(String[] args) throws Exception {
+    // CLI Options
+    Options options = new Options();
+    Option dryRunOption = 
Option.builder("f").longOpt("fix").hasArg(false).build();
+    options.addOption(dryRunOption);
+    // Parse command-line.
+    CommandLineParser parser = new DefaultParser();
+    CommandLine commandLine = parser.parse(options, args, false);
+    final boolean fix = commandLine.hasOption(dryRunOption.getOpt());
+    try (ClusterConnection connection = connect()) {
+      new RegionInfoMismatchTool(connection).run(fix);
+    }
+  }
+
   private HBaseProtos.ServerName parseServerName(String serverName) {
     ServerName sn = ServerName.parseServerName(serverName);
     return HBaseProtos.ServerName.newBuilder().setHostName(sn.getHostname()).
@@ -472,6 +490,8 @@ public class HBCK2 extends Configured implements 
org.apache.hadoop.util.Tool {
     writer.println();
     usageUnassigns(writer);
     writer.println();
+    usageRegioninfoMismatch(writer);
+    writer.println();
     writer.close();
     return sw.toString();
   }
@@ -728,6 +748,27 @@ public class HBCK2 extends Configured implements 
org.apache.hadoop.util.Tool {
     writer.println("   hbase:meta tool. See the HBCK2 README for how to use.");
   }
 
+  private static void usageRegioninfoMismatch(PrintWriter writer) {
+    writer.println(" " + REGIONINFO_MISMATCH);
+    writer.println("   Options:");
+    writer.println("   -f,--fix Update hbase:meta with the corrections");
+    writer.println("   It is recommended to first run this utility without the 
fix");
+    writer.println("   option to ensure that the utility is generating the 
correct");
+    writer.println("   serialized RegionInfo data structures. Inspect the 
output to");
+    writer.println("   confirm that the hbase:meta rowkey matches the new 
RegionInfo.");
+    writer.println();
+    writer.println("   This tool will read hbase:meta and report any regions 
whose rowkey");
+    writer.println("   and cell value differ in their encoded region name. 
HBASE-23328 ");
+    writer.println("   illustrates a problem for read-replica enabled tables 
in which ");
+    writer.println("   the encoded region name (the MD5 hash) does not match 
between ");
+    writer.println("   the rowkey and the value. This problem is generally 
harmless ");
+    writer.println("   for normal operation, but can break other HBCK2 
tools.");
+    writer.println();
+    writer.println("   Run this command to determine if any regions are 
affected by ");
+    writer.println("   this bug and use the -f/--fix option to then correct 
any");
+    writer.println("   affected regions.");
+  }
+
   static void showErrorMessage(String error) {
     if (error != null) {
       System.out.println("ERROR: " + error);
@@ -1034,6 +1075,20 @@ public class HBCK2 extends Configured implements 
org.apache.hadoop.util.Tool {
         
tableInfoGenerator.generateTableDescriptorFileIfMissing(commands[1].trim());
         break;
 
+      case REGIONINFO_MISMATCH:
+        // `commands` includes the `regionInfoMismatch` argument.
+        if (commands.length > 2) {
+          showErrorMessage(command + " takes one optional argument, got more 
than one.");
+          return EXIT_FAILURE;
+        }
+        try {
+          regionInfoMismatch(commands);
+        } catch (Exception e) {
+          e.printStackTrace();
+          return EXIT_FAILURE;
+        }
+        break;
+
       default:
         showErrorMessage("Unsupported command: " + command);
         return EXIT_FAILURE;
diff --git a/hbase-hbck2/src/main/java/org/apache/hbase/HBCKRegionInfo.java 
b/hbase-hbck2/src/main/java/org/apache/hbase/HBCKRegionInfo.java
new file mode 100644
index 0000000..5119b1c
--- /dev/null
+++ b/hbase-hbck2/src/main/java/org/apache/hbase/HBCKRegionInfo.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hbase;
+
+
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.util.ByteArrayHashKey;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.HashKey;
+import org.apache.hadoop.hbase.util.JenkinsHash;
+
+/**
+ * A copy of utilities from {@link org.apache.hadoop.hbase.client.RegionInfo} 
to
+ * copy internal methods into HBCK2 for stability to avoid using Private 
methods.
+ */
+public final class HBCKRegionInfo {
+
+  /**
+   * Separator used to demarcate the encodedName in a region name
+   * in the new format. See description on new format above.
+   */
+  static final int ENC_SEPARATOR = '.';
+
+  static final int MD5_HEX_LENGTH = 32;
+
+  static final int DEFAULT_REPLICA_ID = 0;
+
+  static final byte REPLICA_ID_DELIMITER = (byte)'_';
+
+  private HBCKRegionInfo() {}
+
+  /**
+   * Does region name contain its encoded name?
+   * @param regionName region name
+   * @return boolean indicating if this a new format region
+   *         name which contains its encoded name.
+   */
+  public static boolean hasEncodedName(final byte[] regionName) {
+    // check if region name ends in ENC_SEPARATOR
+    return (regionName.length >= 1) &&
+      (regionName[regionName.length - 1] == RegionInfo.ENC_SEPARATOR);
+  }
+
+  /**
+   * @return the encodedName
+   */
+  public static String encodeRegionName(final byte [] regionName) {
+    String encodedName;
+    if (hasEncodedName(regionName)) {
+      // region is in new format:
+      // <tableName>,<startKey>,<regionIdTimeStamp>/encodedName/
+      encodedName = Bytes.toString(regionName,
+      regionName.length - MD5_HEX_LENGTH - 1,
+      MD5_HEX_LENGTH);
+    } else {
+      // old format region name. First hbase:meta region also
+      // use this format.EncodedName is the JenkinsHash value.
+      HashKey<byte[]> key = new ByteArrayHashKey(regionName, 0, 
regionName.length);
+      int hashVal = Math.abs(JenkinsHash.getInstance().hash(key, 0));
+      encodedName = String.valueOf(hashVal);
+    }
+    return encodedName;
+  }
+
+  /**
+   * Separate elements of a regionName.
+   * Region name is of the format:
+   * 
<code>tablename,startkey,regionIdTimestamp[_replicaId][.encodedName.]</code>.
+   * Startkey can contain the delimiter (',') so we parse from the start and 
then parse from
+   * the end.
+   * @return Array of byte[] containing tableName, startKey and id OR null if 
not parseable
+   *   as a region name.
+   */
+  public static byte [][] parseRegionNameOrReturnNull(final byte[] regionName) 
{
+    int offset = -1;
+    for (int i = 0; i < regionName.length; i++) {
+      if (regionName[i] == HConstants.DELIMITER) {
+        offset = i;
+        break;
+      }
+    }
+    if (offset == -1) {
+      return null;
+    }
+    byte[] tableName = new byte[offset];
+    System.arraycopy(regionName, 0, tableName, 0, offset);
+    offset = -1;
+
+    int endOffset = regionName.length;
+    // check whether regionName contains encodedName
+    if (regionName.length > MD5_HEX_LENGTH + 2 &&
+        regionName[regionName.length-1] == ENC_SEPARATOR &&
+        regionName[regionName.length-MD5_HEX_LENGTH-2] == ENC_SEPARATOR) {
+      endOffset = endOffset - MD5_HEX_LENGTH - 2;
+    }
+
+    // parse from end
+    byte[] replicaId = null;
+    int idEndOffset = endOffset;
+    for (int i = endOffset - 1; i > 0; i--) {
+      if (regionName[i] == REPLICA_ID_DELIMITER) { //replicaId may or may not 
be present
+        replicaId = new byte[endOffset - i - 1];
+        System.arraycopy(regionName, i + 1, replicaId, 0,
+          endOffset - i - 1);
+        idEndOffset = i;
+        // do not break, continue to search for id
+      }
+      if (regionName[i] == HConstants.DELIMITER) {
+        offset = i;
+        break;
+      }
+    }
+    if (offset == -1) {
+      return null;
+    }
+    byte [] startKey = HConstants.EMPTY_BYTE_ARRAY;
+    if(offset != tableName.length + 1) {
+      startKey = new byte[offset - tableName.length - 1];
+      System.arraycopy(regionName, tableName.length + 1, startKey, 0,
+        offset - tableName.length - 1);
+    }
+    byte [] id = new byte[idEndOffset - offset - 1];
+    System.arraycopy(regionName, offset + 1, id, 0,
+      idEndOffset - offset - 1);
+    byte [][] elements = new byte[replicaId == null ? 3 : 4][];
+    elements[0] = tableName;
+    elements[1] = startKey;
+    elements[2] = id;
+    if (replicaId != null) {
+      elements[3] = replicaId;
+    }
+    return elements;
+  }
+}
diff --git 
a/hbase-hbck2/src/main/java/org/apache/hbase/RegionInfoMismatchTool.java 
b/hbase-hbck2/src/main/java/org/apache/hbase/RegionInfoMismatchTool.java
new file mode 100644
index 0000000..bc37423
--- /dev/null
+++ b/hbase-hbck2/src/main/java/org/apache/hbase/RegionInfoMismatchTool.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hbase;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.exceptions.DeserializationException;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hbase.HBCKMetaTableAccessor.MetaScanner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Standalone utility to correct the bug corrected by HBASE-23328 in which
+ * the region name in the rowkey of a row in meta does not match the name
+ * which is stored in the value of the info:regioninfo column of the same row.
+ */
+public class RegionInfoMismatchTool {
+  private static final Logger LOG = 
LoggerFactory.getLogger(RegionInfoMismatchTool.class);
+
+  private final Connection connection;
+
+  public RegionInfoMismatchTool(Connection connection) {
+    this.connection = connection;
+  }
+
+  static class MalformedRegion {
+    byte[] regionName;
+    RegionInfo regionInfo;
+
+    MalformedRegion(byte[] regionName, RegionInfo regionInfo) {
+      this.regionName = regionName;
+      this.regionInfo = regionInfo;
+    }
+
+    byte[] getRegionName() {
+      return regionName;
+    }
+
+    RegionInfo getRegionInfo() {
+      return regionInfo;
+    }
+
+    @Override
+    public String toString() {
+      return "regionName=" + Bytes.toStringBinary(regionName) + ", regioninfo="
+        + regionInfo.toString();
+    }
+  }
+
+  /**
+   * Returns a list of {@link MalformedRegion}'s which exist in meta. If there 
are
+   * no malformed regions, the returned list will be empty.
+   */
+  List<MalformedRegion> getMalformedRegions() throws IOException {
+    try (Table meta = connection.getTable(TableName.META_TABLE_NAME)) {
+      MetaScanner<MalformedRegion> scanner = new MetaScanner<>();
+      return scanner.scanMeta(connection,
+        scan -> scan.addFamily(HConstants.CATALOG_FAMILY),
+        r -> {
+          Cell riCell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY,
+              HConstants.REGIONINFO_QUALIFIER);
+          RegionInfo info = RegionInfo.parseFromOrNull(riCell.getValueArray(),
+              riCell.getValueOffset(), riCell.getValueLength());
+          // Get the expected value from the RegionInfo in the cell value
+          byte[] valueEncodedRegionName = info.getEncodedNameAsBytes();
+          // Compare that to what is actually in the rowkey
+          HBCKMetaTableAccessor.getMetaKeyForRegion(info);
+          byte[] rowKeyRegionName = CellUtil.cloneRow(riCell);
+          byte[] rowkeyEncodedRegionName = Bytes.toBytes(
+              HBCKRegionInfo.encodeRegionName(rowKeyRegionName));
+          // If they are equal, we are good.
+          if (Arrays.equals(rowkeyEncodedRegionName, valueEncodedRegionName)) {
+            // Returning null will cause `scanMeta` to ignore this row
+            LOG.debug("Ignoring region {} because rowkey aligns with value", 
info);
+            return null;
+          }
+
+          LOG.debug("Found mismatched region {} and {}", 
Bytes.toStringBinary(rowKeyRegionName),
+              Bytes.toStringBinary(valueEncodedRegionName));
+          // Only return row/regioninfo pairs that are wrong
+          return new MalformedRegion(rowKeyRegionName, info);
+        });
+    }
+  }
+
+  /**
+   * Run the RegionInfoMistmatchTool. Use the {@code fix} argument to control 
whether this method
+   * will report problems or fix problems.
+   *
+   * @param fix True if hbase:meta should be updated. False to report on any 
problems.
+   */
+  public void run(boolean fix) throws IOException, DeserializationException {
+    run(System.out, fix);
+  }
+
+  void run(PrintStream out, boolean fix) throws IOException, 
DeserializationException {
+    List<MalformedRegion> regionsToFix = getMalformedRegions();
+    if (!fix) {
+      out.println("Fix mode is disabled, printing all malformed regions 
detected:");
+      for (MalformedRegion r : regionsToFix) {
+        out.println("Rowkey " + 
HBCKRegionInfo.encodeRegionName(r.getRegionName())
+            + " does not match " + r.getRegionInfo());
+      }
+    }
+    out.println("Found " + regionsToFix.size() + " regions to fix.");
+    try (Table meta = connection.getTable(TableName.META_TABLE_NAME)) {
+      for (MalformedRegion regionToFix : regionsToFix) {
+        final byte[] regionName = regionToFix.getRegionName();
+        final RegionInfo wrongRegionInfo = regionToFix.getRegionInfo();
+
+        // The encoded region name is an MD5 hash, but the regionID is what is 
actually
+        // broken by HBASE-23328
+        byte[][] regionNameParts = 
HBCKRegionInfo.parseRegionNameOrReturnNull(regionName);
+        if (regionNameParts == null) {
+          throw new RuntimeException("Couldn't parse parts from "
+              + Bytes.toStringBinary(regionName));
+        }
+        int i = 0;
+        for (byte[] part : regionNameParts) {
+          LOG.debug("Region name part[{}]: {}", i++, 
Bytes.toStringBinary(part));
+        }
+        // Third component of a region name is just a literal numeric (not a 
binary-encoded long)
+        long regionId = Long.parseLong(Bytes.toString(regionNameParts[2]));
+        RegionInfo correctedRegionInfo = 
RegionInfoBuilder.newBuilder(wrongRegionInfo)
+            .setRegionId(regionId)
+            .setReplicaId(0)
+            .build();
+
+        String rowkeyEncodedRegionName = 
HBCKRegionInfo.encodeRegionName(regionName);
+        String updatedValueEncodedRegionName = 
correctedRegionInfo.getEncodedName();
+        if (!rowkeyEncodedRegionName.equals(updatedValueEncodedRegionName)) {
+          out.println("Aborting: sanity-check failed on updated RegionInfo. 
Expected encoded "
+              + "region name " +rowkeyEncodedRegionName + " but got "
+              + updatedValueEncodedRegionName + ".");
+          out.println("Incorrectly created RegionInfo was: " + 
correctedRegionInfo);
+          throw new RuntimeException("Failed sanity-check on corrected 
RegionInfo");
+        }
+
+        out.println("Updating RegionInfo for " + 
Bytes.toStringBinary(regionName) + " to "
+            + correctedRegionInfo);
+
+        // Write the update back to meta.
+        if (fix) {
+          
meta.put(HBCKMetaTableAccessor.makePutFromRegionInfo(correctedRegionInfo,
+                System.currentTimeMillis()));
+        }
+      }
+      if (!fix) {
+        out.println("Fix mode is not enabled, hbase:meta was not updated. See 
the tool output for"
+            + " a list of detected problematic regions. Re-run the tool 
without the dry run option"
+            + " to persist updates to hbase:meta.");
+      }
+    }
+  }
+}
diff --git 
a/hbase-hbck2/src/test/java/org/apache/hbase/TestRegionInfoMismatchTool.java 
b/hbase-hbck2/src/test/java/org/apache/hbase/TestRegionInfoMismatchTool.java
new file mode 100644
index 0000000..7debaf9
--- /dev/null
+++ b/hbase-hbck2/src/test/java/org/apache/hbase/TestRegionInfoMismatchTool.java
@@ -0,0 +1,302 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hbase;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.Cell.Type;
+import org.apache.hadoop.hbase.CellBuilderFactory;
+import org.apache.hadoop.hbase.CellBuilderType;
+import org.apache.hadoop.hbase.CellScanner;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.exceptions.DeserializationException;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hbase.RegionInfoMismatchTool.MalformedRegion;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestRegionInfoMismatchTool {
+  private static final Logger LOG = 
LoggerFactory.getLogger(TestRegionInfoMismatchTool.class);
+  private final static HBaseTestingUtility TEST_UTIL = new 
HBaseTestingUtility();
+  private RegionInfoMismatchTool tool;
+  private TableName tableName;
+  private Connection connection;
+  private Admin admin;
+
+  @Rule
+  public TestName testName = new TestName();
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    TEST_UTIL.startMiniCluster(1);
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void before() throws IOException {
+    this.connection = TEST_UTIL.getConnection();
+    this.admin = TEST_UTIL.getAdmin();
+    this.tool = new RegionInfoMismatchTool(connection);
+    this.tableName = TableName.valueOf(testName.getMethodName());
+  }
+
+  @After
+  public void after() throws IOException {
+    if (admin.tableExists(tableName)) {
+      if (admin.isTableEnabled(tableName)) {
+        admin.disableTable(tableName);
+      }
+      admin.deleteTable(tableName);
+    }
+  }
+
+  @Test
+  public void testNoReportOnHealthy() throws Exception {
+    admin.createTable(TableDescriptorBuilder.newBuilder(tableName)
+        .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f"))
+        .setRegionReplication(2)
+        .build());
+    List<RegionInfo> regions = HBCKMetaTableAccessor.getTableRegions(
+        connection, tableName);
+
+    assertEquals(1, regions.size());
+    // Should find no malformed regions on a brand new table
+    assertEquals(0, tool.getMalformedRegions().size());
+
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintStream out = new PrintStream(baos);
+
+    // Verify that nothing would be printed to the console either.
+    tool.run(out, false);
+    out.close();
+    String outputAsString = baos.toString();
+    LOG.info("Output from tool: " + outputAsString);
+    assertTrue("Expected no output to be printed",
+        outputAsString.contains("Found 0 regions to fix"));
+  }
+
+  @Test
+  public void testReportOneCorruptRegion() throws Exception {
+    admin.createTable(TableDescriptorBuilder.newBuilder(tableName)
+        .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f"))
+        .setRegionReplication(2)
+        .build(), new byte[][] {Bytes.toBytes("a"), Bytes.toBytes("b"), 
Bytes.toBytes("c")});
+    List<RegionInfo> regions = HBCKMetaTableAccessor.getTableRegions(
+        connection, tableName);
+
+    // Log hbase:meta to be helpful
+    printMeta(connection);
+
+    assertEquals(4, regions.size());
+    // Should find no malformed regions on a brand new table
+    List<MalformedRegion> malformedRegions = tool.getMalformedRegions();
+    assertEquals("Found malformed regions: " + malformedRegions, 0, 
malformedRegions.size());
+
+    // Mess up info:regioninfo for the first region in this table.
+    RegionInfo regionToCorrupt = regions.get(0);
+    RegionInfo corruptedRegion = corruptRegionInfo(regionToCorrupt);
+    try (Table meta = connection.getTable(TableName.META_TABLE_NAME)) {
+      meta.put(makePutFromRegionInfo(regionToCorrupt, corruptedRegion));
+    }
+
+    // Log hbase:meta to be helpful
+    printMeta(connection);
+
+    // Run the tool and validate we get the expected number of regions back
+    malformedRegions = tool.getMalformedRegions();
+    assertEquals("Found malformed regions: " + malformedRegions, 1, 
malformedRegions.size());
+
+    assertArrayEquals(regionToCorrupt.getEncodedNameAsBytes(),
+        encodeRegionName(malformedRegions.get(0).getRegionName()));
+    assertEquals(corruptedRegion, malformedRegions.get(0).getRegionInfo());
+  }
+
+  @Test
+  public void testReportManyCorruptRegions() throws Exception {
+    admin.createTable(TableDescriptorBuilder.newBuilder(tableName)
+        .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f"))
+        .setRegionReplication(2)
+        .build(), new byte[][] {Bytes.toBytes("a"), Bytes.toBytes("b"), 
Bytes.toBytes("c")});
+    List<RegionInfo> regions = HBCKMetaTableAccessor.getTableRegions(
+        connection, tableName);
+    LOG.info("Created regions {}", regions);
+
+    assertEquals(4, regions.size());
+    // Should find no malformed regions on a brand new table
+    List<MalformedRegion> malformedRegions = tool.getMalformedRegions();
+    assertEquals("Found malformed regions: " + malformedRegions, 0, 
malformedRegions.size());
+
+    // For each region in this table, mess up the info:regioninfo
+    List<RegionInfo> corruptedRegions = new ArrayList<>();
+    for (RegionInfo regionToCorrupt : regions) {
+      RegionInfo corruptedRegion = corruptRegionInfo(regionToCorrupt);
+      corruptedRegions.add(corruptedRegion);
+      try (Table meta = connection.getTable(TableName.META_TABLE_NAME)) {
+        meta.put(makePutFromRegionInfo(regionToCorrupt, corruptedRegion));
+      }
+    }
+
+    // Log hbase:meta to be helpful
+    printMeta(connection);
+
+    // Run the tool
+    malformedRegions = tool.getMalformedRegions();
+    LOG.info("Found malformed regions {}", malformedRegions);
+    // Make sure we got back the expected 4 regions
+    assertEquals(4, malformedRegions.size());
+
+    // Validate that the tool found the expected regions with the correct data.
+    for (int i = 0; i < regions.size(); i++) {
+      RegionInfo originalRegion = regions.get(i);
+      RegionInfo corruptedRegion = corruptedRegions.get(i);
+      assertArrayEquals("Comparing "
+          + Bytes.toStringBinary(originalRegion.getEncodedNameAsBytes()) + " 
and "
+          + 
Bytes.toStringBinary(encodeRegionName(malformedRegions.get(i).getRegionName())),
+          originalRegion.getEncodedNameAsBytes(),
+          encodeRegionName(malformedRegions.get(i).getRegionName()));
+      assertEquals(corruptedRegion, malformedRegions.get(i).getRegionInfo());
+    }
+  }
+
+  @Test
+  public void testFixOneCorruptRegion() throws Exception {
+    // Validates that there is a corrupt region
+    testReportOneCorruptRegion();
+
+    // Fix meta (fix=true)
+    tool.run(true);
+
+    // Validate the we fixed the corrupt region
+    List<MalformedRegion> malformedRegions = tool.getMalformedRegions();
+    assertEquals("Found latent malformed regions: " + malformedRegions, 0, 
malformedRegions.size());
+  }
+
+  @Test
+  public void testDryRunDoesntUpdateMeta() throws Exception {
+    testReportOneCorruptRegion();
+
+    // Do not actually fix meta (fix=false)
+    tool.run(false);
+
+    // Validate that the region should still be listed as corrupt
+    List<MalformedRegion> malformedRegions = tool.getMalformedRegions();
+    assertEquals("1 malformed region should still be present", 1, 
malformedRegions.size());
+  }
+
+  @Test
+  public void testFixManyCorruptRegions() throws Exception {
+    testReportManyCorruptRegions();
+
+    // Fix meta (fix=true)
+    tool.run(true);
+
+    // Validate that we fixed all corrupt regions
+    List<MalformedRegion> malformedRegions = tool.getMalformedRegions();
+    assertEquals("Found latent malformed regions: " + malformedRegions, 0, 
malformedRegions.size());
+  }
+
+  // Copy from HBCKMetaTableAccessor so we can introduce the "bug" into the 
cell value
+  Put makePutFromRegionInfo(RegionInfo originalRegionInfo, RegionInfo 
corruptRegionInfo)
+      throws IOException {
+    System.out.println("Changing " + originalRegionInfo + " to " + 
corruptRegionInfo);
+    Put put = new Put(originalRegionInfo.getRegionName());
+    //copied from MetaTableAccessor
+    put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY)
+      .setRow(put.getRow())
+      .setFamily(HConstants.CATALOG_FAMILY)
+      .setQualifier(HConstants.REGIONINFO_QUALIFIER)
+      .setType(Type.Put)
+      // Hack in our own encoded name.
+      .setValue(RegionInfo.toByteArray(corruptRegionInfo))
+      .build());
+    return put;
+  }
+
+  RegionInfo corruptRegionInfo(RegionInfo region) {
+    if (region.getReplicaId() != 0) {
+      throw new IllegalArgumentException("Passed in region should be default 
replica");
+    }
+    return RegionInfoBuilder.newBuilder(region).setReplicaId(1).build();
+  }
+
+  void printMeta(Connection conn) throws IOException, DeserializationException 
{
+    try (Table meta = conn.getTable(TableName.META_TABLE_NAME)) {
+      Scan s = new Scan();
+      
s.addFamily(HConstants.CATALOG_FAMILY).addFamily(HConstants.TABLE_FAMILY);
+      try (ResultScanner scanner = meta.getScanner(s)) {
+        Result r = null;
+        while ((r = scanner.next()) != null) {
+          CellScanner cells = r.cellScanner();
+          while (cells.advance()) {
+            printCell(cells.current());
+          }
+        }
+      }
+    }
+  }
+
+  void printCell(Cell cell) throws DeserializationException {
+    LOG.info(CellUtil.toString(cell, true));
+    if (Bytes.equals(CellUtil.cloneFamily(cell), HConstants.CATALOG_FAMILY) &&
+        Bytes.equals(CellUtil.cloneQualifier(cell), 
HConstants.REGIONINFO_QUALIFIER)) {
+      LOG.info("Deserialized RegionInfo="
+        + RegionInfo.parseFrom(CellUtil.cloneValue(cell)));
+    }
+  }
+
+  /**
+   * Encodes the given Region NAME (the rowkey) into the "encoded Region name" 
(the MD5 hash).
+   */
+  byte[] encodeRegionName(byte[] regionName) {
+    return Bytes.toBytes(HBCKRegionInfo.encodeRegionName(regionName));
+  }
+}

Reply via email to