This is an automated email from the ASF dual-hosted git repository.

sijie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git


The following commit(s) were added to refs/heads/master by this push:
     new 42aae8d  Add printreplicationworkerid option to ListUnderreplicatedCmd.
42aae8d is described below

commit 42aae8d8abe5ee13e5b76283adadd3456cf87558
Author: cguttapalem <[email protected]>
AuthorDate: Sun May 27 22:56:33 2018 -0700

    Add printreplicationworkerid option to ListUnderreplicatedCmd.
    
    Descriptions of the changes in this PR:
    
    Add printreplicationworkerid option to ListUnderreplicatedCmd.
    This helps in knowing who is holding lock on UnderReplicated ledger,
    and diagnosing that ReplicationWorker if we are seeing any issues in
    rereplication.
    
    Author: cguttapalem <[email protected]>
    
    Reviewers: Enrico Olivelli <[email protected]>, Sijie Guo 
<[email protected]>
    
    This closes #1446 from reddycharan/printreplicationworkerid
---
 .../org/apache/bookkeeper/bookie/BookieShell.java  | 25 ++++++++++---
 .../meta/LedgerUnderreplicationManager.java        | 13 +++++++
 .../meta/ZkLedgerUnderreplicationManager.java      | 28 +++++++++++++++
 .../TestLedgerUnderreplicationManager.java         | 42 ++++++++++++++++++++++
 4 files changed, 104 insertions(+), 4 deletions(-)

diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
index 8fc0833..6dc2883 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
@@ -104,6 +104,7 @@ import 
org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback;
 import org.apache.bookkeeper.replication.AuditorElector;
 import org.apache.bookkeeper.replication.ReplicationException;
 import 
org.apache.bookkeeper.replication.ReplicationException.CompatibilityException;
+import 
org.apache.bookkeeper.replication.ReplicationException.UnavailableException;
 import org.apache.bookkeeper.stats.NullStatsLogger;
 import org.apache.bookkeeper.tools.cli.commands.bookie.LastMarkCommand;
 import org.apache.bookkeeper.tools.cli.commands.client.SimpleTestCommand;
@@ -876,6 +877,7 @@ public class BookieShell implements Tool {
             opts.addOption("missingreplica", true, "Bookie Id of missing 
replica");
             opts.addOption("excludingmissingreplica", true, "Bookie Id of 
missing replica to ignore");
             opts.addOption("printmissingreplica", false, "Whether to print 
missingreplicas list?");
+            opts.addOption("printreplicationworkerid", false, "Whether to 
print replicationworkerid?");
         }
 
         @Override
@@ -892,7 +894,7 @@ public class BookieShell implements Tool {
         @Override
         String getUsage() {
             return "listunderreplicated [[-missingreplica <bookieaddress>]"
-                    + " [-excludingmissingreplica <bookieaddress>]] 
[-printmissingreplica]";
+                    + " [-excludingmissingreplica <bookieaddress>]] 
[-printmissingreplica] [-printreplicationworkerid]";
         }
 
         @Override
@@ -901,6 +903,7 @@ public class BookieShell implements Tool {
             final String includingBookieId = 
cmdLine.getOptionValue("missingreplica");
             final String excludingBookieId = 
cmdLine.getOptionValue("excludingmissingreplica");
             final boolean printMissingReplica = 
cmdLine.hasOption("printmissingreplica");
+            final boolean printReplicationWorkerId = 
cmdLine.hasOption("printreplicationworkerid");
 
             final Predicate<List<String>> predicate;
             if (!StringUtils.isBlank(includingBookieId) && 
!StringUtils.isBlank(excludingBookieId)) {
@@ -927,12 +930,26 @@ public class BookieShell implements Tool {
                 Iterator<Map.Entry<Long, List<String>>> iter = 
underreplicationManager
                         .listLedgersToRereplicate(predicate, 
printMissingReplica);
                 while (iter.hasNext()) {
-                    
System.out.println(ledgerIdFormatter.formatLedgerId(iter.next().getKey()));
+                    Map.Entry<Long, List<String>> urLedgerMapEntry = 
iter.next();
+                    long urLedgerId = urLedgerMapEntry.getKey();
+                    
System.out.println(ledgerIdFormatter.formatLedgerId(urLedgerId));
                     if (printMissingReplica) {
-                        iter.next().getValue().forEach((missingReplica) -> {
-                            System.out.println("\t" + missingReplica);
+                        urLedgerMapEntry.getValue().forEach((missingReplica) 
-> {
+                            System.out.println("\tMissingReplica : " + 
missingReplica);
                         });
                     }
+                    if (printReplicationWorkerId) {
+                        try {
+                            String replicationWorkerId = 
underreplicationManager
+                                    
.getReplicationWorkerIdRereplicatingLedger(urLedgerId);
+                            if (replicationWorkerId != null) {
+                                System.out.println("\tReplicationWorkerId : " 
+ replicationWorkerId);
+                            }
+                        } catch (UnavailableException e) {
+                            LOG.error("Failed to get ReplicationWorkerId 
rereplicating ledger {} -- {}", urLedgerId,
+                                    e.getMessage());
+                        }
+                    }
                 }
                 return null;
             });
diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerUnderreplicationManager.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerUnderreplicationManager.java
index 21c6a4f..532f8d2 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerUnderreplicationManager.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerUnderreplicationManager.java
@@ -164,4 +164,17 @@ public interface LedgerUnderreplicationManager extends 
AutoCloseable {
      */
     void notifyLostBookieRecoveryDelayChanged(GenericCallback<Void> cb)
             throws ReplicationException.UnavailableException;
+
+    /**
+     * If a replicationworker has acquired lock on an underreplicated ledger,
+     * then getReplicationWorkerIdRereplicatingLedger should return
+     * ReplicationWorkerId (BookieId) of the ReplicationWorker that is holding
+     * lock. If lock for the underreplicated ledger is not yet acquired or if 
it
+     * is released then it is supposed to return null.
+     *
+     * @param ledgerId
+     * @return
+     * @throws ReplicationException.UnavailableException
+     */
+    String getReplicationWorkerIdRereplicatingLedger(long ledgerId) throws 
ReplicationException.UnavailableException;
 }
diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManager.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManager.java
index 4289bcd..fbf2839 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManager.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManager.java
@@ -23,6 +23,7 @@ import static com.google.common.base.Charsets.UTF_8;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Joiner;
 import com.google.protobuf.TextFormat;
+import com.google.protobuf.TextFormat.ParseException;
 
 import java.net.UnknownHostException;
 import java.util.AbstractMap;
@@ -811,4 +812,31 @@ public class ZkLedgerUnderreplicationManager implements 
LedgerUnderreplicationMa
             throw new ReplicationException.UnavailableException("Interrupted 
while contacting zookeeper", ie);
         }
     }
+
+    @Override
+    public String getReplicationWorkerIdRereplicatingLedger(long ledgerId)
+            throws ReplicationException.UnavailableException {
+        String replicationWorkerId = null;
+        try {
+            byte[] lockData = zkc.getData(getUrLedgerLockZnode(urLockPath, 
ledgerId), false, null);
+            LockDataFormat.Builder lockDataBuilder = 
LockDataFormat.newBuilder();
+            TextFormat.merge(new String(lockData, UTF_8), lockDataBuilder);
+            LockDataFormat lock = lockDataBuilder.build();
+            replicationWorkerId = lock.getBookieId();
+        } catch (KeeperException.NoNodeException e) {
+            // this is ok.
+        } catch (KeeperException e) {
+            LOG.error("Error while getting ReplicationWorkerId rereplicating 
Ledger", e);
+            throw new ReplicationException.UnavailableException(
+                    "Error while getting ReplicationWorkerId rereplicating 
Ledger", e);
+        } catch (InterruptedException e) {
+            LOG.error("Got interrupted while getting ReplicationWorkerId 
rereplicating Ledger", e);
+            Thread.currentThread().interrupt();
+            throw new ReplicationException.UnavailableException("Interrupted 
while contacting zookeeper", e);
+        } catch (ParseException e) {
+            LOG.error("Error while parsing ZK data of lock", e);
+            throw new ReplicationException.UnavailableException("Error while 
parsing ZK data of lock", e);
+        }
+        return replicationWorkerId;
+    }
 }
diff --git 
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestLedgerUnderreplicationManager.java
 
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestLedgerUnderreplicationManager.java
index 229af16..5c7d5e6 100644
--- 
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestLedgerUnderreplicationManager.java
+++ 
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestLedgerUnderreplicationManager.java
@@ -51,6 +51,7 @@ import 
org.apache.bookkeeper.meta.LedgerUnderreplicationManager;
 import org.apache.bookkeeper.meta.ZkLayoutManager;
 import org.apache.bookkeeper.meta.ZkLedgerUnderreplicationManager;
 import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase;
+import org.apache.bookkeeper.net.DNS;
 import org.apache.bookkeeper.proto.DataFormats.UnderreplicatedLedgerFormat;
 import 
org.apache.bookkeeper.replication.ReplicationException.CompatibilityException;
 import 
org.apache.bookkeeper.replication.ReplicationException.UnavailableException;
@@ -372,6 +373,47 @@ public class TestLedgerUnderreplicationManager {
     }
 
     /**
+     * If replicationworker has acquired lock on it, then
+     * getReplicationWorkerIdRereplicatingLedger should return
+     * ReplicationWorkerId (BookieId) of the ReplicationWorker that is holding
+     * lock. If lock for the underreplicated ledger is not yet acquired or if 
it
+     * is released then it is supposed to return null.
+     *
+     * @throws Exception
+     */
+    @Test
+    public void testGetReplicationWorkerIdRereplicatingLedger() throws 
Exception {
+        String missingReplica1 = "localhost:3181";
+        String missingReplica2 = "localhost:3182";
+
+        LedgerUnderreplicationManager m1 = 
lmf1.newLedgerUnderreplicationManager();
+
+        Long ledgerA = 0xfeadeefdacL;
+        m1.markLedgerUnderreplicated(ledgerA, missingReplica1);
+        m1.markLedgerUnderreplicated(ledgerA, missingReplica2);
+
+        // lock is not yet acquired so replicationWorkerIdRereplicatingLedger
+        // should
+        assertEquals("ReplicationWorkerId of the lock", null, 
m1.getReplicationWorkerIdRereplicatingLedger(ledgerA));
+
+        Future<Long> fA = getLedgerToReplicate(m1);
+        Long lA = fA.get(5, TimeUnit.SECONDS);
+        assertEquals("Should be the ledger that was just marked", lA, ledgerA);
+
+        /*
+         * ZkLedgerUnderreplicationManager.getLockData uses
+         * DNS.getDefaultHost("default") as the bookieId.
+         *
+         */
+        assertEquals("ReplicationWorkerId of the lock", 
DNS.getDefaultHost("default"),
+                m1.getReplicationWorkerIdRereplicatingLedger(ledgerA));
+
+        m1.markLedgerReplicated(lA);
+
+        assertEquals("ReplicationWorkerId of the lock", null, 
m1.getReplicationWorkerIdRereplicatingLedger(ledgerA));
+    }
+
+    /**
      * Test that when a ledger is marked as underreplicated with
      * the same missing replica twice, only marking as replicated
      * will be enough to remove it from the list.

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to