This is an automated email from the ASF dual-hosted git repository.
sijie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
The following commit(s) were added to refs/heads/master by this push:
new 42aae8d Add printreplicationworkerid option to ListUnderreplicatedCmd.
42aae8d is described below
commit 42aae8d8abe5ee13e5b76283adadd3456cf87558
Author: cguttapalem <[email protected]>
AuthorDate: Sun May 27 22:56:33 2018 -0700
Add printreplicationworkerid option to ListUnderreplicatedCmd.
Descriptions of the changes in this PR:
Add printreplicationworkerid option to ListUnderreplicatedCmd.
This helps in knowing who is holding lock on UnderReplicated ledger,
and diagnosing that ReplicationWorker if we are seeing any issues in
rereplication.
Author: cguttapalem <[email protected]>
Reviewers: Enrico Olivelli <[email protected]>, Sijie Guo
<[email protected]>
This closes #1446 from reddycharan/printreplicationworkerid
---
.../org/apache/bookkeeper/bookie/BookieShell.java | 25 ++++++++++---
.../meta/LedgerUnderreplicationManager.java | 13 +++++++
.../meta/ZkLedgerUnderreplicationManager.java | 28 +++++++++++++++
.../TestLedgerUnderreplicationManager.java | 42 ++++++++++++++++++++++
4 files changed, 104 insertions(+), 4 deletions(-)
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
index 8fc0833..6dc2883 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
@@ -104,6 +104,7 @@ import
org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback;
import org.apache.bookkeeper.replication.AuditorElector;
import org.apache.bookkeeper.replication.ReplicationException;
import
org.apache.bookkeeper.replication.ReplicationException.CompatibilityException;
+import
org.apache.bookkeeper.replication.ReplicationException.UnavailableException;
import org.apache.bookkeeper.stats.NullStatsLogger;
import org.apache.bookkeeper.tools.cli.commands.bookie.LastMarkCommand;
import org.apache.bookkeeper.tools.cli.commands.client.SimpleTestCommand;
@@ -876,6 +877,7 @@ public class BookieShell implements Tool {
opts.addOption("missingreplica", true, "Bookie Id of missing
replica");
opts.addOption("excludingmissingreplica", true, "Bookie Id of
missing replica to ignore");
opts.addOption("printmissingreplica", false, "Whether to print
missingreplicas list?");
+ opts.addOption("printreplicationworkerid", false, "Whether to
print replicationworkerid?");
}
@Override
@@ -892,7 +894,7 @@ public class BookieShell implements Tool {
@Override
String getUsage() {
return "listunderreplicated [[-missingreplica <bookieaddress>]"
- + " [-excludingmissingreplica <bookieaddress>]]
[-printmissingreplica]";
+ + " [-excludingmissingreplica <bookieaddress>]]
[-printmissingreplica] [-printreplicationworkerid]";
}
@Override
@@ -901,6 +903,7 @@ public class BookieShell implements Tool {
final String includingBookieId =
cmdLine.getOptionValue("missingreplica");
final String excludingBookieId =
cmdLine.getOptionValue("excludingmissingreplica");
final boolean printMissingReplica =
cmdLine.hasOption("printmissingreplica");
+ final boolean printReplicationWorkerId =
cmdLine.hasOption("printreplicationworkerid");
final Predicate<List<String>> predicate;
if (!StringUtils.isBlank(includingBookieId) &&
!StringUtils.isBlank(excludingBookieId)) {
@@ -927,12 +930,26 @@ public class BookieShell implements Tool {
Iterator<Map.Entry<Long, List<String>>> iter =
underreplicationManager
.listLedgersToRereplicate(predicate,
printMissingReplica);
while (iter.hasNext()) {
-
System.out.println(ledgerIdFormatter.formatLedgerId(iter.next().getKey()));
+ Map.Entry<Long, List<String>> urLedgerMapEntry =
iter.next();
+ long urLedgerId = urLedgerMapEntry.getKey();
+
System.out.println(ledgerIdFormatter.formatLedgerId(urLedgerId));
if (printMissingReplica) {
- iter.next().getValue().forEach((missingReplica) -> {
- System.out.println("\t" + missingReplica);
+ urLedgerMapEntry.getValue().forEach((missingReplica)
-> {
+ System.out.println("\tMissingReplica : " +
missingReplica);
});
}
+ if (printReplicationWorkerId) {
+ try {
+ String replicationWorkerId =
underreplicationManager
+
.getReplicationWorkerIdRereplicatingLedger(urLedgerId);
+ if (replicationWorkerId != null) {
+ System.out.println("\tReplicationWorkerId : "
+ replicationWorkerId);
+ }
+ } catch (UnavailableException e) {
+ LOG.error("Failed to get ReplicationWorkerId
rereplicating ledger {} -- {}", urLedgerId,
+ e.getMessage());
+ }
+ }
}
return null;
});
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerUnderreplicationManager.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerUnderreplicationManager.java
index 21c6a4f..532f8d2 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerUnderreplicationManager.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerUnderreplicationManager.java
@@ -164,4 +164,17 @@ public interface LedgerUnderreplicationManager extends
AutoCloseable {
*/
void notifyLostBookieRecoveryDelayChanged(GenericCallback<Void> cb)
throws ReplicationException.UnavailableException;
+
+ /**
+ * If a replicationworker has acquired lock on an underreplicated ledger,
+ * then getReplicationWorkerIdRereplicatingLedger should return
+ * ReplicationWorkerId (BookieId) of the ReplicationWorker that is holding
+ * lock. If lock for the underreplicated ledger is not yet acquired or if
it
+ * is released then it is supposed to return null.
+ *
+ * @param ledgerId
+ * @return
+ * @throws ReplicationException.UnavailableException
+ */
+ String getReplicationWorkerIdRereplicatingLedger(long ledgerId) throws
ReplicationException.UnavailableException;
}
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManager.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManager.java
index 4289bcd..fbf2839 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManager.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManager.java
@@ -23,6 +23,7 @@ import static com.google.common.base.Charsets.UTF_8;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.protobuf.TextFormat;
+import com.google.protobuf.TextFormat.ParseException;
import java.net.UnknownHostException;
import java.util.AbstractMap;
@@ -811,4 +812,31 @@ public class ZkLedgerUnderreplicationManager implements
LedgerUnderreplicationMa
throw new ReplicationException.UnavailableException("Interrupted
while contacting zookeeper", ie);
}
}
+
+ @Override
+ public String getReplicationWorkerIdRereplicatingLedger(long ledgerId)
+ throws ReplicationException.UnavailableException {
+ String replicationWorkerId = null;
+ try {
+ byte[] lockData = zkc.getData(getUrLedgerLockZnode(urLockPath,
ledgerId), false, null);
+ LockDataFormat.Builder lockDataBuilder =
LockDataFormat.newBuilder();
+ TextFormat.merge(new String(lockData, UTF_8), lockDataBuilder);
+ LockDataFormat lock = lockDataBuilder.build();
+ replicationWorkerId = lock.getBookieId();
+ } catch (KeeperException.NoNodeException e) {
+ // this is ok.
+ } catch (KeeperException e) {
+ LOG.error("Error while getting ReplicationWorkerId rereplicating
Ledger", e);
+ throw new ReplicationException.UnavailableException(
+ "Error while getting ReplicationWorkerId rereplicating
Ledger", e);
+ } catch (InterruptedException e) {
+ LOG.error("Got interrupted while getting ReplicationWorkerId
rereplicating Ledger", e);
+ Thread.currentThread().interrupt();
+ throw new ReplicationException.UnavailableException("Interrupted
while contacting zookeeper", e);
+ } catch (ParseException e) {
+ LOG.error("Error while parsing ZK data of lock", e);
+ throw new ReplicationException.UnavailableException("Error while
parsing ZK data of lock", e);
+ }
+ return replicationWorkerId;
+ }
}
diff --git
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestLedgerUnderreplicationManager.java
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestLedgerUnderreplicationManager.java
index 229af16..5c7d5e6 100644
---
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestLedgerUnderreplicationManager.java
+++
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestLedgerUnderreplicationManager.java
@@ -51,6 +51,7 @@ import
org.apache.bookkeeper.meta.LedgerUnderreplicationManager;
import org.apache.bookkeeper.meta.ZkLayoutManager;
import org.apache.bookkeeper.meta.ZkLedgerUnderreplicationManager;
import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase;
+import org.apache.bookkeeper.net.DNS;
import org.apache.bookkeeper.proto.DataFormats.UnderreplicatedLedgerFormat;
import
org.apache.bookkeeper.replication.ReplicationException.CompatibilityException;
import
org.apache.bookkeeper.replication.ReplicationException.UnavailableException;
@@ -372,6 +373,47 @@ public class TestLedgerUnderreplicationManager {
}
/**
+ * If replicationworker has acquired lock on it, then
+ * getReplicationWorkerIdRereplicatingLedger should return
+ * ReplicationWorkerId (BookieId) of the ReplicationWorker that is holding
+ * lock. If lock for the underreplicated ledger is not yet acquired or if
it
+ * is released then it is supposed to return null.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testGetReplicationWorkerIdRereplicatingLedger() throws
Exception {
+ String missingReplica1 = "localhost:3181";
+ String missingReplica2 = "localhost:3182";
+
+ LedgerUnderreplicationManager m1 =
lmf1.newLedgerUnderreplicationManager();
+
+ Long ledgerA = 0xfeadeefdacL;
+ m1.markLedgerUnderreplicated(ledgerA, missingReplica1);
+ m1.markLedgerUnderreplicated(ledgerA, missingReplica2);
+
+ // lock is not yet acquired so replicationWorkerIdRereplicatingLedger
+ // should
+ assertEquals("ReplicationWorkerId of the lock", null,
m1.getReplicationWorkerIdRereplicatingLedger(ledgerA));
+
+ Future<Long> fA = getLedgerToReplicate(m1);
+ Long lA = fA.get(5, TimeUnit.SECONDS);
+ assertEquals("Should be the ledger that was just marked", lA, ledgerA);
+
+ /*
+ * ZkLedgerUnderreplicationManager.getLockData uses
+ * DNS.getDefaultHost("default") as the bookieId.
+ *
+ */
+ assertEquals("ReplicationWorkerId of the lock",
DNS.getDefaultHost("default"),
+ m1.getReplicationWorkerIdRereplicatingLedger(ledgerA));
+
+ m1.markLedgerReplicated(lA);
+
+ assertEquals("ReplicationWorkerId of the lock", null,
m1.getReplicationWorkerIdRereplicatingLedger(ledgerA));
+ }
+
+ /**
* Test that when a ledger is marked as underreplicated with
* the same missing replica twice, only marking as replicated
* will be enough to remove it from the list.
--
To stop receiving notification emails like this one, please contact
[email protected].