Repository: hbase
Updated Branches:
  refs/heads/master a452487a9 -> 86b35b268


HBASE-21072 Block out HBCK1 in hbase2
Write the hbase-1.x hbck1 lock file to block out hbck1 instances writing
state to an hbase-2.x cluster (could do damage).
Set hbase.write.hbck1.lock.file to false to disable this writing.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/86b35b26
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/86b35b26
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/86b35b26

Branch: refs/heads/master
Commit: 86b35b26870be3a04304a4483c08fcf7c50d55f5
Parents: a452487
Author: Michael Stack <st...@apache.org>
Authored: Wed Aug 22 22:44:00 2018 -0700
Committer: Michael Stack <st...@apache.org>
Committed: Fri Aug 24 09:26:43 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/master/HMaster.java |   9 ++
 .../org/apache/hadoop/hbase/util/HBaseFsck.java | 109 +++++++++++++------
 .../apache/hadoop/hbase/master/TestMaster.java  |  42 ++++++-
 3 files changed, 126 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/86b35b26/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 50794f4..92afa9c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -193,6 +193,7 @@ import org.apache.hadoop.hbase.util.Addressing;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.CompressionTest;
 import org.apache.hadoop.hbase.util.EncryptionTest;
+import org.apache.hadoop.hbase.util.HBaseFsck;
 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
 import org.apache.hadoop.hbase.util.HasThread;
 import org.apache.hadoop.hbase.util.IdLock;
@@ -915,6 +916,14 @@ public class HMaster extends HRegionServer implements 
MasterServices {
     ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
     this.clusterId = clusterId.toString();
 
+    // Precaution. Put in place the old hbck1 lock file to fence out old 
hbase1s running their
+    // hbck1s against an hbase2 cluster; it could do damage. To skip this 
behavior, set
+    // hbase.write.hbck1.lock.file to false.
+    if (this.conf.getBoolean("hbase.write.hbck1.lock.file", true)) {
+      HBaseFsck.checkAndMarkRunningHbck(this.conf,
+          HBaseFsck.createLockRetryCounterFactory(this.conf).create());
+    }
+
     status.setStatus("Initialze ServerManager and schedule SCP for crash 
servers");
     this.serverManager = createServerManager(this);
     createProcedureExecutor();

http://git-wip-us.apache.org/repos/asf/hbase/blob/86b35b26/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
index 820a4e0..b43262d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
@@ -144,6 +144,7 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
+import 
org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
 import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.yetus.audience.InterfaceStability;
@@ -164,7 +165,10 @@ import 
org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminServic
 
 /**
  * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
- * table integrity problems in a corrupted HBase.
+ * table integrity problems in a corrupted HBase. This tool was written for 
hbase-1.x. It does not
+ * work with hbase-2.x; it can read state but is not allowed to change state; 
i.e. effect 'repair'.
+ * See hbck2 (HBASE-19121) for a hbck tool for hbase2.
+ *
  * <p>
  * Region consistency checks verify that hbase:meta, region deployment on 
region
  * servers and the state of data in HDFS (.regioninfo files) all are in
@@ -217,7 +221,12 @@ public class HBaseFsck extends Configured implements 
Closeable {
   private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
   private static final int DEFAULT_MAX_MERGE = 5;
   private static final String TO_BE_LOADED = "to_be_loaded";
-  private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
+  /**
+   * Here is where hbase-1.x used to default the lock for hbck1.
+   * It puts in place a lock when it goes to write/make changes.
+   */
+  @VisibleForTesting
+  public static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
   private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
   private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // 
milliseconds
   private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // 
milliseconds
@@ -369,40 +378,75 @@ public class HBaseFsck extends Configured implements 
Closeable {
     super(conf);
     errors = getErrorReporter(getConf());
     this.executor = exec;
-    lockFileRetryCounterFactory = new RetryCounterFactory(
-      getConf().getInt("hbase.hbck.lockfile.attempts", 
DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
-      getConf().getInt(
-        "hbase.hbck.lockfile.attempt.sleep.interval", 
DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
-      getConf().getInt(
-        "hbase.hbck.lockfile.attempt.maxsleeptime", 
DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
-    createZNodeRetryCounterFactory = new RetryCounterFactory(
-      getConf().getInt("hbase.hbck.createznode.attempts", 
DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
-      getConf().getInt(
-        "hbase.hbck.createznode.attempt.sleep.interval",
-        DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
-      getConf().getInt(
-        "hbase.hbck.createznode.attempt.maxsleeptime",
-        DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
+    lockFileRetryCounterFactory = createLockRetryCounterFactory(getConf());
+    createZNodeRetryCounterFactory = createZnodeRetryCounterFactory(getConf());
     zkw = createZooKeeperWatcher();
   }
 
-  private class FileLockCallable implements Callable<FSDataOutputStream> {
+  /**
+   * @return A retry counter factory configured for retrying lock file 
creation.
+   */
+  public static RetryCounterFactory 
createLockRetryCounterFactory(Configuration conf) {
+    return new RetryCounterFactory(
+        conf.getInt("hbase.hbck.lockfile.attempts", 
DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
+        conf.getInt("hbase.hbck.lockfile.attempt.sleep.interval",
+            DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
+        conf.getInt("hbase.hbck.lockfile.attempt.maxsleeptime",
+            DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
+  }
+
+  /**
+   * @return A retry counter factory configured for retrying znode creation.
+   */
+  private static RetryCounterFactory 
createZnodeRetryCounterFactory(Configuration conf) {
+    return new RetryCounterFactory(
+        conf.getInt("hbase.hbck.createznode.attempts", 
DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
+        conf.getInt("hbase.hbck.createznode.attempt.sleep.interval",
+            DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
+        conf.getInt("hbase.hbck.createznode.attempt.maxsleeptime",
+            DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
+  }
+
+  /**
+   * @return Return the tmp dir this tool writes too.
+   */
+  @VisibleForTesting
+  public static Path getTmpDir(Configuration conf) throws IOException {
+    return new Path(FSUtils.getRootDir(conf), HConstants.HBASE_TEMP_DIRECTORY);
+  }
+
+  private static class FileLockCallable implements 
Callable<FSDataOutputStream> {
     RetryCounter retryCounter;
+    private final Configuration conf;
+    private Path hbckLockPath = null;
 
-    public FileLockCallable(RetryCounter retryCounter) {
+    public FileLockCallable(Configuration conf, RetryCounter retryCounter) {
       this.retryCounter = retryCounter;
+      this.conf = conf;
+    }
+
+    /**
+     * @return Will be <code>null</code> unless you call {@link #call()}
+     */
+    Path getHbckLockPath() {
+      return this.hbckLockPath;
     }
+
     @Override
     public FSDataOutputStream call() throws IOException {
       try {
-        FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
-        FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
+        FileSystem fs = FSUtils.getCurrentFileSystem(this.conf);
+        FsPermission defaultPerms = FSUtils.getFilePermissions(fs, this.conf,
             HConstants.DATA_FILE_UMASK_KEY);
-        Path tmpDir = new Path(FSUtils.getRootDir(getConf()), 
HConstants.HBASE_TEMP_DIRECTORY);
+        Path tmpDir = getTmpDir(conf);
+        this.hbckLockPath = new Path(tmpDir, HBCK_LOCK_FILE);
         fs.mkdirs(tmpDir);
-        HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
-        final FSDataOutputStream out = createFileWithRetries(fs, 
HBCK_LOCK_PATH, defaultPerms);
+        final FSDataOutputStream out = createFileWithRetries(fs, 
this.hbckLockPath, defaultPerms);
         out.writeBytes(InetAddress.getLocalHost().toString());
+        // Add a note into the file we write on why hbase2 is writing out an 
hbck1 lock file.
+        out.writeBytes(" Written by an hbase-2.x Master to block an " +
+            "attempt by an hbase-1.x HBCK tool making modification to state. " 
+
+            "See 'HBCK must match HBase server version' in the hbase 
refguide.");
         out.flush();
         return out;
       } catch(RemoteException e) {
@@ -417,7 +461,6 @@ public class HBaseFsck extends Configured implements 
Closeable {
     private FSDataOutputStream createFileWithRetries(final FileSystem fs,
         final Path hbckLockFilePath, final FsPermission defaultPerms)
         throws IOException {
-
       IOException exception = null;
       do {
         try {
@@ -449,13 +492,13 @@ public class HBaseFsck extends Configured implements 
Closeable {
    * @return FSDataOutputStream object corresponding to the newly opened lock 
file
    * @throws IOException if IO failure occurs
    */
-  private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
-    RetryCounter retryCounter = lockFileRetryCounterFactory.create();
-    FileLockCallable callable = new FileLockCallable(retryCounter);
+  public static Pair<Path, FSDataOutputStream> 
checkAndMarkRunningHbck(Configuration conf,
+      RetryCounter retryCounter) throws IOException {
+    FileLockCallable callable = new FileLockCallable(conf, retryCounter);
     ExecutorService executor = Executors.newFixedThreadPool(1);
     FutureTask<FSDataOutputStream> futureTask = new FutureTask<>(callable);
     executor.execute(futureTask);
-    final int timeoutInSeconds = getConf().getInt(
+    final int timeoutInSeconds = conf.getInt(
       "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
     FSDataOutputStream stream = null;
     try {
@@ -472,7 +515,7 @@ public class HBaseFsck extends Configured implements 
Closeable {
     } finally {
       executor.shutdownNow();
     }
-    return stream;
+    return new Pair<Path, FSDataOutputStream>(callable.getHbckLockPath(), 
stream);
   }
 
   private void unlockHbck() {
@@ -481,8 +524,7 @@ public class HBaseFsck extends Configured implements 
Closeable {
       do {
         try {
           IOUtils.closeQuietly(hbckOutFd);
-          FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()),
-              HBCK_LOCK_PATH, true);
+          FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), 
HBCK_LOCK_PATH, true);
           LOG.info("Finishing hbck");
           return;
         } catch (IOException ioe) {
@@ -511,7 +553,10 @@ public class HBaseFsck extends Configured implements 
Closeable {
 
     if (isExclusive()) {
       // Grab the lock
-      hbckOutFd = checkAndMarkRunningHbck();
+      Pair<Path, FSDataOutputStream> pair =
+          checkAndMarkRunningHbck(getConf(), 
this.lockFileRetryCounterFactory.create());
+      HBCK_LOCK_PATH = pair.getFirst();
+      this.hbckOutFd = pair.getSecond();
       if (hbckOutFd == null) {
         setRetCode(-1);
         LOG.error("Another instance of hbck is fixing HBase, exiting this 
instance. " +

http://git-wip-us.apache.org/repos/asf/hbase/blob/86b35b26/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java
index 8abaf60..81bdb02 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master;
 
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -26,6 +27,8 @@ import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -42,10 +45,10 @@ import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.client.TableState;
-import org.apache.hadoop.hbase.master.assignment.RegionStates;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.HBaseFsck;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.util.StringUtils;
@@ -222,8 +225,43 @@ public class TestMaster {
         TEST_UTIL.getHBaseCluster().getMaster().getServerManager()
             .getFlushedSequenceIdByRegion();
     assertTrue(regionMapBefore.equals(regionMapAfter));
+  }
 
-
+  @Test
+  public void testBlockingHbkc1WithLockFile() throws IOException {
+    // This is how the patch to the lock file is created inside in HBaseFsck. 
Too hard to use its
+    // actual method without disturbing HBaseFsck... Do the below mimic 
instead.
+    Path hbckLockPath = new 
Path(HBaseFsck.getTmpDir(TEST_UTIL.getConfiguration()),
+        HBaseFsck.HBCK_LOCK_FILE);
+    FileSystem fs = TEST_UTIL.getTestFileSystem();
+    assertTrue(fs.exists(hbckLockPath));
+    TEST_UTIL.getMiniHBaseCluster().
+        
killMaster(TEST_UTIL.getMiniHBaseCluster().getMaster().getServerName());
+    assertTrue(fs.exists(hbckLockPath));
+    TEST_UTIL.getMiniHBaseCluster().startMaster();
+    TEST_UTIL.waitFor(30000, () -> TEST_UTIL.getMiniHBaseCluster().getMaster() 
!= null &&
+        TEST_UTIL.getMiniHBaseCluster().getMaster().isInitialized());
+    assertTrue(fs.exists(hbckLockPath));
+    // Start a second Master. Should be fine.
+    TEST_UTIL.getMiniHBaseCluster().startMaster();
+    assertTrue(fs.exists(hbckLockPath));
+    fs.delete(hbckLockPath, true);
+    assertFalse(fs.exists(hbckLockPath));
+    // Kill all Masters.
+    TEST_UTIL.getMiniHBaseCluster().getLiveMasterThreads().stream().
+        map(sn -> sn.getMaster().getServerName()).forEach(sn -> {
+          try {
+            TEST_UTIL.getMiniHBaseCluster().killMaster(sn);
+          } catch (IOException e) {
+            e.printStackTrace();
+          }
+        });
+    // Start a new one.
+    TEST_UTIL.getMiniHBaseCluster().startMaster();
+    TEST_UTIL.waitFor(30000, () -> TEST_UTIL.getMiniHBaseCluster().getMaster() 
!= null &&
+        TEST_UTIL.getMiniHBaseCluster().getMaster().isInitialized());
+    // Assert lock gets put in place again.
+    assertTrue(fs.exists(hbckLockPath));
   }
 }
 

Reply via email to