Repository: hbase
Updated Branches:
  refs/heads/master 5ded29441 -> 5cc845b71


HBASE-21503 Replication normal source can get stuck due potential race 
conditions between source wal reader and wal provider initialization threads.

Found and analysed by Wellington Chevreuil


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/5cc845b7
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/5cc845b7
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/5cc845b7

Branch: refs/heads/master
Commit: 5cc845b713853645f1e25b29caa556d79cfdc551
Parents: 5ded294
Author: Duo Zhang <zhang...@apache.org>
Authored: Wed Nov 21 09:57:24 2018 +0800
Committer: Duo Zhang <zhang...@apache.org>
Committed: Wed Nov 21 17:14:54 2018 +0800

----------------------------------------------------------------------
 .../hadoop/hbase/wal/AbstractFSWALProvider.java | 64 +++++++++-------
 .../wal/TestRaceBetweenGetWALAndGetWALs.java    | 78 ++++++++++++++++++++
 2 files changed, 117 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/5cc845b7/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractFSWALProvider.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractFSWALProvider.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractFSWALProvider.java
index ccdc95f..1f24548 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractFSWALProvider.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractFSWALProvider.java
@@ -24,6 +24,8 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Objects;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -44,6 +46,7 @@ import org.apache.hadoop.hbase.util.CancelableProgressable;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.LeaseNotRecoveredException;
 import 
org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
+import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
 
 /**
  * Base class of a WAL Provider that returns a single thread safe WAL that 
writes to Hadoop FS. By
@@ -86,9 +89,10 @@ public abstract class AbstractFSWALProvider<T extends 
AbstractFSWAL<?>> implemen
   protected String logPrefix;
 
   /**
-   * we synchronized on walCreateLock to prevent wal recreation in different 
threads
+   * We use walCreateLock to prevent wal recreation in different threads, and 
also prevent getWALs
+   * missing the newly created WAL, see HBASE-21503 for more details.
    */
-  private final Object walCreateLock = new Object();
+  private final ReadWriteLock walCreateLock = new ReentrantReadWriteLock();
 
   /**
    * @param factory factory that made us, identity used for FS layout. may not 
be null
@@ -119,38 +123,48 @@ public abstract class AbstractFSWALProvider<T extends 
AbstractFSWAL<?>> implemen
 
   @Override
   public List<WAL> getWALs() {
-    if (wal == null) {
-      return Collections.emptyList();
+    if (wal != null) {
+      return Lists.newArrayList(wal);
+    }
+    walCreateLock.readLock().lock();
+    try {
+      if (wal == null) {
+        return Collections.emptyList();
+      } else {
+        return Lists.newArrayList(wal);
+      }
+    } finally {
+      walCreateLock.readLock().unlock();
     }
-    List<WAL> wals = new ArrayList<>(1);
-    wals.add(wal);
-    return wals;
   }
 
   @Override
   public T getWAL(RegionInfo region) throws IOException {
     T walCopy = wal;
-    if (walCopy == null) {
-      // only lock when need to create wal, and need to lock since
-      // creating hlog on fs is time consuming
-      synchronized (walCreateLock) {
-        walCopy = wal;
-        if (walCopy == null) {
-          walCopy = createWAL();
-          boolean succ = false;
-          try {
-            walCopy.init();
-            succ = true;
-          } finally {
-            if (!succ) {
-              walCopy.close();
-            }
-          }
-          wal = walCopy;
+    if (walCopy != null) {
+      return walCopy;
+    }
+    walCreateLock.writeLock().lock();
+    try {
+      walCopy = wal;
+      if (walCopy != null) {
+        return walCopy;
+      }
+      walCopy = createWAL();
+      boolean succ = false;
+      try {
+        walCopy.init();
+        succ = true;
+      } finally {
+        if (!succ) {
+          walCopy.close();
         }
       }
+      wal = walCopy;
+      return walCopy;
+    } finally {
+      walCreateLock.writeLock().unlock();
     }
-    return walCopy;
   }
 
   protected abstract T createWAL() throws IOException;

http://git-wip-us.apache.org/repos/asf/hbase/blob/5cc845b7/hbase-server/src/test/java/org/apache/hadoop/hbase/wal/TestRaceBetweenGetWALAndGetWALs.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/wal/TestRaceBetweenGetWALAndGetWALs.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/wal/TestRaceBetweenGetWALAndGetWALs.java
new file mode 100644
index 0000000..26ff118
--- /dev/null
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/wal/TestRaceBetweenGetWALAndGetWALs.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.wal;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertSame;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ForkJoinPool;
+import java.util.concurrent.Future;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.Threads;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
+
+import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
+
+/**
+ * Testcase for HBASE-21503.
+ */
+@Category({ RegionServerTests.class, SmallTests.class })
+public class TestRaceBetweenGetWALAndGetWALs {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+    HBaseClassTestRule.forClass(TestRaceBetweenGetWALAndGetWALs.class);
+
+  private static Future<List<WAL>> GET_WALS_FUTURE;
+
+  private static final class FSWALProvider extends 
AbstractFSWALProvider<AbstractFSWAL<?>> {
+
+    @Override
+    protected AbstractFSWAL<?> createWAL() throws IOException {
+      // just like what may do in the WALListeners, schedule an asynchronous 
task to call the
+      // getWALs method.
+      GET_WALS_FUTURE = ForkJoinPool.commonPool().submit(this::getWALs);
+      // sleep a while to make the getWALs arrive before we return
+      Threads.sleep(2000);
+      return Mockito.mock(AbstractFSWAL.class);
+    }
+
+    @Override
+    protected void doInit(Configuration conf) throws IOException {
+    }
+  }
+
+  @Test
+  public void testRace() throws IOException, InterruptedException, 
ExecutionException {
+    FSWALProvider p = new FSWALProvider();
+    WAL wal = p.getWAL(null);
+    assertNotNull(GET_WALS_FUTURE);
+    List<WAL> wals = GET_WALS_FUTURE.get();
+    assertSame(wal, Iterables.getOnlyElement(wals));
+  }
+}

Reply via email to