This is an automated email from the ASF dual-hosted git repository.
xucang pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.3 by this push:
new 73456aa HBASE-25848: Add flexibility to backup replication in case
replication filter throws an exception (#3288)
73456aa is described below
commit 73456aaec9643d2ff284036df47c4e316a4d6306
Author: Sandeep Pal <[email protected]>
AuthorDate: Fri May 21 11:58:05 2021 -0700
HBASE-25848: Add flexibility to backup replication in case replication
filter throws an exception (#3288)
---
.../regionserver/ReplicationSourceWALReader.java | 7 ++-
.../WALEntryFilterRetryableException.java | 40 ++++++++++++
.../regionserver/TestWALEntryStream.java | 71 +++++++++++++++++++++-
3 files changed, 113 insertions(+), 5 deletions(-)
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java
index d3c44a5..f61babe 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java
@@ -146,11 +146,12 @@ class ReplicationSourceWALReader extends Thread {
entryStream.reset(); // reuse stream
}
}
- } catch (IOException e) { // stream related
+ } catch (WALEntryFilterRetryableException | IOException e) { // stream
related
if (handleEofException(e)) {
sleepMultiplier = 1;
} else {
- LOG.warn("Failed to read stream of replication entries", e);
+ LOG.warn("Failed to read stream of replication entries "
+ + "or replication filter is recovering", e);
if (sleepMultiplier < maxRetriesMultiplier) {
sleepMultiplier ++;
}
@@ -248,7 +249,7 @@ class ReplicationSourceWALReader extends Thread {
* enabled, then dump the log
* @return true only the IOE can be handled
*/
- private boolean handleEofException(IOException e) {
+ private boolean handleEofException(Exception e) {
// Dump the log even if logQueue size is 1 if the source is from recovered
Source
// since we don't add current log to recovered source queue so it is safe
to remove.
if ((e instanceof EOFException || e.getCause() instanceof EOFException) &&
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALEntryFilterRetryableException.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALEntryFilterRetryableException.java
new file mode 100644
index 0000000..f93f8b0
--- /dev/null
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALEntryFilterRetryableException.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.replication.regionserver;
+
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * This exception should be thrown from any wal filter when the filter is
expected
+ * to recover from the failures and it wants the replication to backup till it
fails.
+ * There is special handling in replication wal reader to catch this exception
and
+ * retry.
+ */
[email protected](HBaseInterfaceAudience.REPLICATION)
+public class WALEntryFilterRetryableException extends RuntimeException {
+ private static final long serialVersionUID = 1L;
+
+ public WALEntryFilterRetryableException(String m, Throwable t) {
+ super(m, t);
+ }
+
+ public WALEntryFilterRetryableException(String m) {
+ super(m);
+ }
+}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestWALEntryStream.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestWALEntryStream.java
index 1db9c17..87ae41a 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestWALEntryStream.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestWALEntryStream.java
@@ -25,7 +25,6 @@ import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.when;
-
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
@@ -118,6 +117,7 @@ public class TestWALEntryStream {
public static void setUpBeforeClass() throws Exception {
TEST_UTIL = new HBaseTestingUtility();
CONF = TEST_UTIL.getConfiguration();
+ CONF.setLong("replication.source.sleepforretries", 10);
TEST_UTIL.startMiniDFSCluster(3);
cluster = TEST_UTIL.getDFSCluster();
@@ -396,6 +396,17 @@ public class TestWALEntryStream {
return reader;
}
+ private ReplicationSourceWALReader createReaderWithBadReplicationFilter(int
numFailures,
+ Configuration conf) {
+ ReplicationSource source = mockReplicationSource(false, conf);
+ when(source.isPeerEnabled()).thenReturn(true);
+ ReplicationSourceWALReader reader =
+ new ReplicationSourceWALReader(fs, conf, walQueue, 0,
+ getIntermittentFailingFilter(numFailures), source);
+ reader.start();
+ return reader;
+ }
+
@Test
public void testReplicationSourceWALReader() throws Exception {
appendEntriesToLogAndSync(3);
@@ -428,6 +439,36 @@ public class TestWALEntryStream {
}
@Test
+ public void testReplicationSourceWALReaderWithFailingFilter() throws
Exception {
+ appendEntriesToLogAndSync(3);
+ // get ending position
+ long position;
+ try (WALEntryStream entryStream =
+ new WALEntryStream(walQueue, CONF, 0, log, null,
+ new MetricsSource("1"))) {
+ entryStream.next();
+ entryStream.next();
+ entryStream.next();
+ position = entryStream.getPosition();
+ }
+
+ // start up a reader
+ Path walPath = walQueue.peek();
+ int numFailuresInFilter = 5;
+ ReplicationSourceWALReader reader = createReaderWithBadReplicationFilter(
+ numFailuresInFilter, CONF);
+ WALEntryBatch entryBatch = reader.take();
+ assertEquals(numFailuresInFilter, FailingWALEntryFilter.numFailures());
+
+ // should've batched up our entries
+ assertNotNull(entryBatch);
+ assertEquals(3, entryBatch.getWalEntries().size());
+ assertEquals(position, entryBatch.getLastWalPosition());
+ assertEquals(walPath, entryBatch.getLastWalPath());
+ assertEquals(3, entryBatch.getNbRowKeys());
+ }
+
+ @Test
public void testReplicationSourceWALReaderRecovered() throws Exception {
appendEntriesToLogAndSync(10);
Path walPath = walQueue.peek();
@@ -616,6 +657,32 @@ public class TestWALEntryStream {
};
}
+ private WALEntryFilter getIntermittentFailingFilter(int numFailuresInFilter)
{
+ return new FailingWALEntryFilter(numFailuresInFilter);
+ }
+
+ public static class FailingWALEntryFilter implements WALEntryFilter {
+ private int numFailures = 0;
+ private static int countFailures = 0;
+
+ public FailingWALEntryFilter(int numFailuresInFilter) {
+ numFailures = numFailuresInFilter;
+ }
+
+ @Override
+ public Entry filter(Entry entry) {
+ if (countFailures == numFailures) {
+ return entry;
+ }
+ countFailures = countFailures + 1;
+ throw new WALEntryFilterRetryableException("failing filter");
+ }
+
+ public static int numFailures(){
+ return countFailures;
+ }
+ }
+
class PathWatcher implements WALActionsListener {
Path currentPath;
@@ -634,7 +701,7 @@ public class TestWALEntryStream {
long size = log.getLogFileSizeIfBeingWritten(walQueue.peek()).getAsLong();
AtomicLong fileLength = new AtomicLong(size - 1);
try (WALEntryStream entryStream = new WALEntryStream(walQueue, CONF, 0,
- p -> OptionalLong.of(fileLength.get()), null, new MetricsSource("1")))
{
+ p -> OptionalLong.of(fileLength.get()), null, new MetricsSource("1"))) {
assertTrue(entryStream.hasNext());
assertNotNull(entryStream.next());
// can not get log 2