This is an automated email from the ASF dual-hosted git repository.

yong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git


The following commit(s) were added to refs/heads/master by this push:
     new 2aa59c7  Add skip unrecoverable ledger option for bookkeeper shell 
recover command (#2870)
2aa59c7 is described below

commit 2aa59c74cdd1de13c33a593f6dccc888823c4a83
Author: Hang Chen <[email protected]>
AuthorDate: Wed Nov 3 08:54:34 2021 +0800

    Add skip unrecoverable ledger option for bookkeeper shell recover command 
(#2870)
    
    ### Motivation
    When we use `bin/bookkeeper shell recover bookieId` command to recover 
specific bookie's ledgers, the recover process will exit when occurs recover 
ledger failed.
    
    In our production bookkeeper cluster, we found some ledgers in Open state 
and has no entry.  When we call `bin/bookkeeper shell recover bookieId` 
command, it will traverse all the ledgers level by level. In the end, for each 
ledger, it will call the following code to process recover.
    ```Java
    Processor<Long> ledgerProcessor = new Processor<Long>() {
                @Override
                public void process(Long ledgerId, AsyncCallback.VoidCallback 
iterCallback) {
                    recoverLedger(bookiesSrc, ledgerId, dryrun, 
skipOpenLedgers, skipUnrecoverableLedgers, iterCallback);
                }
    };
    ```
    
    In the `recoverLedger` method, it will call `asyncOpenLedgerNoRecovery` to 
open ledger and get LAC if the ledger in `OPEN` state. For the `getLAC` 
request, if the request ledger has no entry, it will return entry = -1 and 
return ERROR for this `getLAC` request.
    
https://github.com/apache/bookkeeper/blob/98ddf8149592572eebcfaf6bdd4916f295ffd9d7/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java#L756-L769
    
    And for the `asyncOpenLedgerNoRecovery` callback, it will return error for 
this process. It will stop the recover process of the following ledgers.
    
    In the end, the recover command runs failed, and the following ledger can't 
be recovered.
    
    ### Changes
    We should expose a flag for user to determine whether to move forward to 
recover the following ledgers when some ledgers recover failed.
    
    So, I provide the parameter `sku` to handle this case.
---
 .../org/apache/bookkeeper/bookie/BookieShell.java  |  3 +
 .../apache/bookkeeper/client/BookKeeperAdmin.java  | 76 ++++++++++++++++++----
 .../tools/cli/commands/bookies/RecoverCommand.java | 11 +++-
 .../apache/bookkeeper/bookie/BookieShellTest.java  | 21 ++++--
 4 files changed, 89 insertions(+), 22 deletions(-)

diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
index 8d00220..ffaa18b 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
@@ -481,6 +481,7 @@ public class BookieShell implements Tool {
             opts.addOption("l", "ledger", true, "Recover a specific ledger");
             opts.addOption("sk", "skipOpenLedgers", false, "Skip recovering 
open ledgers");
             opts.addOption("d", "deleteCookie", false, "Delete cookie node for 
the bookie.");
+            opts.addOption("sku", "skipUnrecoverableLedgers", false, "Skip 
unrecoverable ledgers.");
         }
 
         @Override
@@ -513,6 +514,7 @@ public class BookieShell implements Tool {
             boolean force = cmdLine.hasOption("f");
             boolean skipOpenLedgers = cmdLine.hasOption("sk");
             boolean removeCookies = !dryrun && cmdLine.hasOption("d");
+            boolean skipUnrecoverableLedgers = cmdLine.hasOption("sku");
 
             Long ledgerId = getOptionLedgerIdValue(cmdLine, "ledger", -1);
 
@@ -525,6 +527,7 @@ public class BookieShell implements Tool {
             flags.ledger(ledgerId);
             flags.skipOpenLedgers(skipOpenLedgers);
             flags.query(query);
+            flags.skipUnrecoverableLedgers(skipUnrecoverableLedgers);
             boolean result = cmd.apply(bkConf, flags);
             return (result) ? 0 : -1;
         }
diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
index 91e2552..4eadb12 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
@@ -574,10 +574,15 @@ public class BookKeeperAdmin implements AutoCloseable {
     }
 
     public void recoverBookieData(final Set<BookieId> bookiesSrc, boolean 
dryrun, boolean skipOpenLedgers)
-            throws InterruptedException, BKException {
+        throws InterruptedException, BKException {
+        recoverBookieData(bookiesSrc, dryrun, skipOpenLedgers, false);
+    }
+
+    public void recoverBookieData(final Set<BookieId> bookiesSrc, boolean 
dryrun, boolean skipOpenLedgers,
+                                  boolean skipUnrecoverableLedgers) throws 
InterruptedException, BKException {
         SyncObject sync = new SyncObject();
         // Call the async method to recover bookie data.
-        asyncRecoverBookieData(bookiesSrc, dryrun, skipOpenLedgers, new 
RecoverCallback() {
+        asyncRecoverBookieData(bookiesSrc, dryrun, skipOpenLedgers, 
skipUnrecoverableLedgers, new RecoverCallback() {
             @Override
             public void recoverComplete(int rc, Object ctx) {
                 LOG.info("Recover bookie operation completed with rc: {}", 
BKException.codeLogger(rc));
@@ -657,12 +662,13 @@ public class BookKeeperAdmin implements AutoCloseable {
 
     public void asyncRecoverBookieData(final Set<BookieId> bookieSrc,
                                        final RecoverCallback cb, final Object 
context) {
-        asyncRecoverBookieData(bookieSrc, false, false, cb, context);
+        asyncRecoverBookieData(bookieSrc, false, false, false, cb, context);
     }
 
     public void asyncRecoverBookieData(final Set<BookieId> bookieSrc, boolean 
dryrun,
-                                       final boolean skipOpenLedgers, final 
RecoverCallback cb, final Object context) {
-        getActiveLedgers(bookieSrc, dryrun, skipOpenLedgers, cb, context);
+                                       final boolean skipOpenLedgers, final 
boolean skipUnrecoverableLedgers,
+                                       final RecoverCallback cb, final Object 
context) {
+        getActiveLedgers(bookieSrc, dryrun, skipOpenLedgers, 
skipUnrecoverableLedgers, cb, context);
     }
 
     /**
@@ -709,7 +715,8 @@ public class BookKeeperAdmin implements AutoCloseable {
      *            Context for the RecoverCallback to call.
      */
     private void getActiveLedgers(final Set<BookieId> bookiesSrc, final 
boolean dryrun,
-                                  final boolean skipOpenLedgers, final 
RecoverCallback cb, final Object context) {
+                                  final boolean skipOpenLedgers, final boolean 
skipUnrecoverableLedgers,
+                                  final RecoverCallback cb, final Object 
context) {
         // Wrapper class around the RecoverCallback so it can be used
         // as the final VoidCallback to process ledgers
         class RecoverCallbackWrapper implements AsyncCallback.VoidCallback {
@@ -728,7 +735,7 @@ public class BookKeeperAdmin implements AutoCloseable {
         Processor<Long> ledgerProcessor = new Processor<Long>() {
             @Override
             public void process(Long ledgerId, AsyncCallback.VoidCallback 
iterCallback) {
-                recoverLedger(bookiesSrc, ledgerId, dryrun, skipOpenLedgers, 
iterCallback);
+                recoverLedger(bookiesSrc, ledgerId, dryrun, skipOpenLedgers, 
skipUnrecoverableLedgers, iterCallback);
             }
         };
         bkc.getLedgerManager().asyncProcessLedgers(
@@ -755,6 +762,31 @@ public class BookKeeperAdmin implements AutoCloseable {
      */
     private void recoverLedger(final Set<BookieId> bookiesSrc, final long lId, 
final boolean dryrun,
                                final boolean skipOpenLedgers, final 
AsyncCallback.VoidCallback finalLedgerIterCb) {
+        recoverLedger(bookiesSrc, lId, dryrun, skipOpenLedgers, false, 
finalLedgerIterCb);
+    }
+
+    /**
+     * This method asynchronously recovers a given ledger if any of the ledger
+     * entries were stored on the failed bookie.
+     *
+     * @param bookiesSrc
+     *            Source bookies that had a failure. We want to replicate the
+     *            ledger fragments that were stored there.
+     * @param lId
+     *            Ledger id we want to recover.
+     * @param dryrun
+     *            printing the recovery plan without actually recovering 
bookies
+     * @param skipOpenLedgers
+     *            Skip recovering open ledgers.
+     * @param skipUnrecoverableLedgers
+     *            Skip unrecoverable ledgers.
+     * @param finalLedgerIterCb
+     *            IterationCallback to invoke once we've recovered the current
+     *            ledger.
+     */
+    private void recoverLedger(final Set<BookieId> bookiesSrc, final long lId, 
final boolean dryrun,
+                               final boolean skipOpenLedgers, final boolean 
skipUnrecoverableLedgers,
+                               final AsyncCallback.VoidCallback 
finalLedgerIterCb) {
         if (LOG.isDebugEnabled()) {
             LOG.debug("Recovering ledger : {}", lId);
         }
@@ -763,8 +795,13 @@ public class BookKeeperAdmin implements AutoCloseable {
             @Override
             public void openComplete(int rc, final LedgerHandle lh, Object 
ctx) {
                 if (rc != BKException.Code.OK) {
-                    LOG.error("BK error opening ledger: " + lId, 
BKException.create(rc));
-                    finalLedgerIterCb.processResult(rc, null, null);
+                    if (skipUnrecoverableLedgers) {
+                        LOG.warn("BK error opening ledger: {}, skip recover 
it.", lId, BKException.create(rc));
+                        finalLedgerIterCb.processResult(BKException.Code.OK, 
null, null);
+                    } else {
+                        LOG.error("BK error opening ledger: {}", lId, 
BKException.create(rc));
+                        finalLedgerIterCb.processResult(rc, null, null);
+                    }
                     return;
                 }
 
@@ -798,13 +835,20 @@ public class BookKeeperAdmin implements AutoCloseable {
                         @Override
                         public void openComplete(int newrc, final LedgerHandle 
newlh, Object newctx) {
                             if (newrc != BKException.Code.OK) {
-                                LOG.error("BK error close ledger: " + lId, 
BKException.create(newrc));
-                                finalLedgerIterCb.processResult(newrc, null, 
null);
+                                if (skipUnrecoverableLedgers) {
+                                    LOG.warn("BK error opening ledger: {}, 
skip recover it.",
+                                        lId, BKException.create(newrc));
+                                    
finalLedgerIterCb.processResult(BKException.Code.OK, null, null);
+                                } else {
+                                    LOG.error("BK error close ledger: {}", 
lId, BKException.create(newrc));
+                                    finalLedgerIterCb.processResult(newrc, 
null, null);
+                                }
                                 return;
                             }
                             bkc.mainWorkerPool.submit(() -> {
                                 // do recovery
-                                recoverLedger(bookiesSrc, lId, dryrun, 
skipOpenLedgers, finalLedgerIterCb);
+                                recoverLedger(bookiesSrc, lId, dryrun, 
skipOpenLedgers,
+                                    skipUnrecoverableLedgers, 
finalLedgerIterCb);
                             });
                         }
                     }, null);
@@ -815,7 +859,13 @@ public class BookKeeperAdmin implements AutoCloseable {
                     @Override
                     public void processResult(int rc, String path, Object ctx) 
{
                         if (BKException.Code.OK != rc) {
-                            LOG.error("Failed to recover ledger {} : {}", lId, 
BKException.codeLogger(rc));
+                            if (skipUnrecoverableLedgers) {
+                                LOG.warn("Failed to recover ledger: {} : {}, 
skip recover it.", lId,
+                                    BKException.codeLogger(rc));
+                                rc = BKException.Code.OK;
+                            } else {
+                                LOG.error("Failed to recover ledger {} : {}", 
lId, BKException.codeLogger(rc));
+                            }
                         } else {
                             LOG.info("Recovered ledger {}.", lId);
                         }
diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/RecoverCommand.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/RecoverCommand.java
index cd7ede0..ff1339f 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/RecoverCommand.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/RecoverCommand.java
@@ -100,6 +100,9 @@ public class RecoverCommand extends 
BookieCommand<RecoverCommand.RecoverFlags> {
 
         @Parameter(names = { "-bs", "--bokiesrc" }, description = "Bookie 
address")
         private String bookieAddress;
+
+        @Parameter(names = {"-sku", "--skipunrecoverableledgers"}, description 
= "Skip unrecoverable ledgers")
+        private boolean skipUnrecoverableLedgers;
     }
 
     @Override
@@ -118,6 +121,7 @@ public class RecoverCommand extends 
BookieCommand<RecoverCommand.RecoverFlags> {
         boolean force = flags.force;
         boolean skipOpenLedgers = flags.skipOpenLedgers;
         boolean removeCookies = !dryrun && flags.deleteCookie;
+        boolean skipUnrecoverableLedgers = flags.skipUnrecoverableLedgers;
 
         Long ledgerId = flags.ledger;
 
@@ -153,7 +157,7 @@ public class RecoverCommand extends 
BookieCommand<RecoverCommand.RecoverFlags> {
             if (DEFAULT_ID != ledgerId) {
                 return bkRecoveryLedger(admin, ledgerId, bookieAddrs, dryrun, 
skipOpenLedgers, removeCookies);
             }
-            return bkRecovery(admin, bookieAddrs, dryrun, skipOpenLedgers, 
removeCookies);
+            return bkRecovery(admin, bookieAddrs, dryrun, skipOpenLedgers, 
removeCookies, skipUnrecoverableLedgers);
         } finally {
             admin.close();
         }
@@ -259,9 +263,10 @@ public class RecoverCommand extends 
BookieCommand<RecoverCommand.RecoverFlags> {
                            Set<BookieId> bookieAddrs,
                            boolean dryrun,
                            boolean skipOpenLedgers,
-                           boolean removeCookies)
+                           boolean removeCookies,
+                           boolean skipUnrecoverableLedgers)
         throws InterruptedException, BKException {
-        bkAdmin.recoverBookieData(bookieAddrs, dryrun, skipOpenLedgers);
+        bkAdmin.recoverBookieData(bookieAddrs, dryrun, skipOpenLedgers, 
skipUnrecoverableLedgers);
         if (removeCookies) {
             deleteCookies(bkAdmin.getConf(), bookieAddrs);
         }
diff --git 
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShellTest.java
 
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShellTest.java
index 2c9eb81..2da712f 100644
--- 
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShellTest.java
+++ 
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShellTest.java
@@ -280,7 +280,7 @@ public class BookieShellTest {
     public void testRecoverCmdRecoverDefault() throws Exception {
         // default behavior
         testRecoverCmdRecover(
-            false, false, false,
+            false, false, false, false,
             "-force", "127.0.0.1:3181");
     }
 
@@ -288,7 +288,7 @@ public class BookieShellTest {
     public void testRecoverCmdRecoverDeleteCookie() throws Exception {
         // dryrun
         testRecoverCmdRecover(
-            false, false, true,
+            false, false, true, false,
             "-force", "-deleteCookie", "127.0.0.1:3181");
     }
 
@@ -296,7 +296,7 @@ public class BookieShellTest {
     public void testRecoverCmdRecoverSkipOpenLedgersDeleteCookie() throws 
Exception {
         // dryrun
         testRecoverCmdRecover(
-            false, true, true,
+            false, true, true, false,
             "-force", "-deleteCookie", "-skipOpenLedgers", "127.0.0.1:3181");
     }
 
@@ -304,7 +304,7 @@ public class BookieShellTest {
     public void testRecoverCmdRecoverDryrun() throws Exception {
         // dryrun
         testRecoverCmdRecover(
-            true, false, false,
+            true, false, false, false,
             "-force", "-dryrun", "127.0.0.1:3181");
     }
 
@@ -312,14 +312,23 @@ public class BookieShellTest {
     public void testRecoverCmdRecoverDryrunDeleteCookie() throws Exception {
         // dryrun & removeCookie : removeCookie should be false
         testRecoverCmdRecover(
-            true, false, false,
+            true, false, false, false,
             "-force", "-dryrun", "-deleteCookie", "127.0.0.1:3181");
     }
 
+    @Test
+    public void testRecoverCmdRecoverSkipUnrecoverableLedgers() throws 
Exception {
+        // skipUnrecoverableLedgers
+        testRecoverCmdRecover(
+            false, false, false, true,
+            "-force", "-sku", "127.0.0.1:3181");
+    }
+
     @SuppressWarnings("unchecked")
     void testRecoverCmdRecover(boolean dryrun,
                                boolean skipOpenLedgers,
                                boolean removeCookies,
+                               boolean skipUnrecoverableLedgers,
                                String... args) throws Exception {
         RecoverCmd cmd = (RecoverCmd) shell.commands.get("recover");
         CommandLine cmdLine = parseCommandLine(cmd, args);
@@ -328,7 +337,7 @@ public class BookieShellTest {
             .verifyNew(BookKeeperAdmin.class, times(1))
             .withArguments(any(ClientConfiguration.class));
         verify(admin, times(1))
-            .recoverBookieData(any(Set.class), eq(dryrun), 
eq(skipOpenLedgers));
+            .recoverBookieData(any(Set.class), eq(dryrun), 
eq(skipOpenLedgers), eq(skipUnrecoverableLedgers));
         verify(admin, times(1)).close();
         if (removeCookies) {
             PowerMockito.verifyStatic(MetadataDrivers.class);

Reply via email to