This is an automated email from the ASF dual-hosted git repository.

xucang pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 926c113  HBASE-25848: Add flexibility to backup replication in case 
replication filter throws an exception
926c113 is described below

commit 926c1132c067d7fb3c9140f9df8509fad103a5ed
Author: Sandeep Pal <[email protected]>
AuthorDate: Fri May 21 12:14:26 2021 -0700

    HBASE-25848: Add flexibility to backup replication in case replication 
filter throws an exception
    
    Signed-off-by: Xu Cang <[email protected]>
---
 .../regionserver/ReplicationSourceWALReader.java   |  7 ++-
 .../WALEntryFilterRetryableException.java          | 40 +++++++++++++
 .../regionserver/TestWALEntryStream.java           | 68 ++++++++++++++++++++++
 3 files changed, 112 insertions(+), 3 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java
index 57c0a16..57f4cba 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java
@@ -152,11 +152,12 @@ class ReplicationSourceWALReader extends Thread {
               addBatchToShippingQueue(batch);
             }
           }
-        } catch (IOException e) { // stream related
+        } catch (WALEntryFilterRetryableException | IOException e) { // stream 
related
           if (handleEofException(e, batch)) {
             sleepMultiplier = 1;
           } else {
-            LOG.warn("Failed to read stream of replication entries", e);
+            LOG.warn("Failed to read stream of replication entries "
+              + "or replication filter is recovering", e);
             if (sleepMultiplier < maxRetriesMultiplier) {
               sleepMultiplier++;
             }
@@ -281,7 +282,7 @@ class ReplicationSourceWALReader extends Thread {
    * logs from replication queue
    * @return true only the IOE can be handled
    */
-  private boolean handleEofException(IOException e, WALEntryBatch batch)
+  private boolean handleEofException(Exception e, WALEntryBatch batch)
       throws InterruptedException {
     PriorityBlockingQueue<Path> queue = logQueue.getQueue(walGroupId);
     // Dump the log even if logQueue size is 1 if the source is from recovered 
Source
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALEntryFilterRetryableException.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALEntryFilterRetryableException.java
new file mode 100644
index 0000000..f93f8b0
--- /dev/null
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALEntryFilterRetryableException.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.replication.regionserver;
+
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * This exception should be thrown from any wal filter when the filter is 
expected
+ * to recover from the failures and it wants the replication to backup till it 
fails.
+ * There is special handling in replication wal reader to catch this exception 
and
+ * retry.
+ */
[email protected](HBaseInterfaceAudience.REPLICATION)
+public class WALEntryFilterRetryableException extends RuntimeException {
+  private static final long serialVersionUID = 1L;
+
+  public WALEntryFilterRetryableException(String m, Throwable t) {
+    super(m, t);
+  }
+
+  public WALEntryFilterRetryableException(String m) {
+    super(m);
+  }
+}
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestWALEntryStream.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestWALEntryStream.java
index ae9bb67..b7d2a08 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestWALEntryStream.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestWALEntryStream.java
@@ -121,6 +121,7 @@ public class TestWALEntryStream {
   public static void setUpBeforeClass() throws Exception {
     TEST_UTIL = new HBaseTestingUtility();
     CONF = TEST_UTIL.getConfiguration();
+    CONF.setLong("replication.source.sleepforretries", 10);
     TEST_UTIL.startMiniDFSCluster(3);
 
     cluster = TEST_UTIL.getDFSCluster();
@@ -413,6 +414,17 @@ public class TestWALEntryStream {
     return reader;
   }
 
+  private ReplicationSourceWALReader createReaderWithBadReplicationFilter(int 
numFailures,
+      Configuration conf) {
+    ReplicationSource source = mockReplicationSource(false, conf);
+    when(source.isPeerEnabled()).thenReturn(true);
+    ReplicationSourceWALReader reader =
+      new ReplicationSourceWALReader(fs, conf, logQueue, 0,
+        getIntermittentFailingFilter(numFailures), source, fakeWalGroupId);
+    reader.start();
+    return reader;
+  }
+
   @Test
   public void testReplicationSourceWALReader() throws Exception {
     appendEntriesToLogAndSync(3);
@@ -446,6 +458,36 @@ public class TestWALEntryStream {
   }
 
   @Test
+  public void testReplicationSourceWALReaderWithFailingFilter() throws 
Exception {
+    appendEntriesToLogAndSync(3);
+    // get ending position
+    long position;
+    try (WALEntryStream entryStream =
+      new WALEntryStream(logQueue, CONF, 0, log, null,
+        new MetricsSource("1"), fakeWalGroupId)) {
+      entryStream.next();
+      entryStream.next();
+      entryStream.next();
+      position = entryStream.getPosition();
+    }
+
+    // start up a reader
+    Path walPath = getQueue().peek();
+    int numFailuresInFilter = 5;
+    ReplicationSourceWALReader reader = createReaderWithBadReplicationFilter(
+      numFailuresInFilter, CONF);
+    WALEntryBatch entryBatch = reader.take();
+    assertEquals(numFailuresInFilter, FailingWALEntryFilter.numFailures());
+
+    // should've batched up our entries
+    assertNotNull(entryBatch);
+    assertEquals(3, entryBatch.getWalEntries().size());
+    assertEquals(position, entryBatch.getLastWalPosition());
+    assertEquals(walPath, entryBatch.getLastWalPath());
+    assertEquals(3, entryBatch.getNbRowKeys());
+  }
+
+  @Test
   public void testReplicationSourceWALReaderRecovered() throws Exception {
     appendEntriesToLogAndSync(10);
     Path walPath = getQueue().peek();
@@ -636,6 +678,32 @@ public class TestWALEntryStream {
     };
   }
 
+  private WALEntryFilter getIntermittentFailingFilter(int numFailuresInFilter) 
{
+    return new FailingWALEntryFilter(numFailuresInFilter);
+  }
+
+  public static class FailingWALEntryFilter implements WALEntryFilter {
+    private int numFailures = 0;
+    private static int countFailures = 0;
+
+    public FailingWALEntryFilter(int numFailuresInFilter) {
+      numFailures = numFailuresInFilter;
+    }
+
+    @Override
+    public Entry filter(Entry entry) {
+      if (countFailures == numFailures) {
+        return entry;
+      }
+      countFailures = countFailures + 1;
+      throw new WALEntryFilterRetryableException("failing filter");
+    }
+
+    public static int numFailures(){
+      return countFailures;
+    }
+  }
+
   class PathWatcher implements WALActionsListener {
 
     Path currentPath;

Reply via email to