This is an automated email from the ASF dual-hosted git repository.

sijie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git


The following commit(s) were added to refs/heads/master by this push:
     new 1f8b26d  ISSUE #870: ScanAndCompareGarbageCollector: harden against LM 
bugs
1f8b26d is described below

commit 1f8b26d5f02ae6bccf0d586bcb145f4461f4fde6
Author: Samuel Just <[email protected]>
AuthorDate: Tue Dec 26 22:48:00 2017 -0800

    ISSUE #870: ScanAndCompareGarbageCollector: harden against LM bugs
    
    The idea behind this patch is to make it more likely that more than one
    LedgerManager bug would be required to erroneously believe a ledger to be
    deleted.
    1) Remove the special handling for the completely empty LM.  It's not
    really a common case nor is it much of an optimization even when it
    happens.
    2) Add a config variable to cause the bookie to also check the
    LedgerManager.readLedgerMetadata path before actually removing a ledger.
    The implementations of readLedgerMetadata and the iterator are
    sufficiently different that it seems worth it to have the option of
    checking both to guard somewhat against a bug in either.
    
    (bug W-4292747)
    (bug W-3027938)
    Signed-off-by: Samuel Just <sjustsalesforce.com>
    Signed-off-by: Charan Reddy Guttapalem <cguttapalemsalesforce.com>
    
    Master Issue: #870
    
    Author: Samuel Just <[email protected]>
    
    Reviewers: Ivan Kelly <[email protected]>, Arvin <None>, Sijie Guo 
<[email protected]>
    
    This closes #876 from athanatos/forupstream/issue-870, closes #870
---
 bookkeeper-server/conf/bk_server.conf              |   3 +
 .../bookie/ScanAndCompareGarbageCollector.java     |  55 +++--
 .../bookkeeper/conf/ServerConfiguration.java       |  20 ++
 .../org/apache/bookkeeper/meta/GcLedgersTest.java  | 226 ++++++++++++++++++++-
 4 files changed, 283 insertions(+), 21 deletions(-)

diff --git a/bookkeeper-server/conf/bk_server.conf 
b/bookkeeper-server/conf/bk_server.conf
index ebc0ec7..c7ea6b0 100755
--- a/bookkeeper-server/conf/bk_server.conf
+++ b/bookkeeper-server/conf/bk_server.conf
@@ -161,6 +161,9 @@ journalDirectory=/tmp/bk-txn
 # log files are deleted after GC.
 # isForceGCAllowWhenNoSpace=false
 
+# True if the bookie should double check readMetadata prior to gc
+# verifyMetadataOnGC=false
+
 #############################################################################
 ## TLS settings
 #############################################################################
diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
index 7399824..b42d302 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
@@ -28,8 +28,10 @@ import java.util.List;
 import java.util.NavigableSet;
 import java.util.Set;
 import java.util.SortedMap;
+import java.util.TreeSet;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.Semaphore;
+import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.bookkeeper.client.BKException;
 import org.apache.bookkeeper.client.LedgerMetadata;
 import org.apache.bookkeeper.conf.ServerConfiguration;
@@ -76,6 +78,7 @@ public class ScanAndCompareGarbageCollector implements 
GarbageCollector{
     private final long gcOverReplicatedLedgerIntervalMillis;
     private long lastOverReplicatedLedgerGcTimeMillis;
     private final String zkLedgersRootPath;
+    private final boolean verifyMetadataOnGc;
 
     public ScanAndCompareGarbageCollector(LedgerManager ledgerManager, 
CompactableLedgerStorage ledgerStorage,
             ServerConfiguration conf) throws IOException {
@@ -91,6 +94,7 @@ public class ScanAndCompareGarbageCollector implements 
GarbageCollector{
         this.zkLedgersRootPath = conf.getZkLedgersRootPath();
         LOG.info("Over Replicated Ledger Deletion : enabled=" + 
enableGcOverReplicatedLedger + ", interval="
                 + gcOverReplicatedLedgerIntervalMillis);
+        verifyMetadataOnGc = conf.getVerifyMetadataOnGC();
     }
 
     @Override
@@ -106,18 +110,6 @@ public class ScanAndCompareGarbageCollector implements 
GarbageCollector{
             NavigableSet<Long> bkActiveLedgers = 
Sets.newTreeSet(ledgerStorage.getActiveLedgersInRange(0,
                     Long.MAX_VALUE));
 
-            // Iterate over all the ledger on the metadata store
-            LedgerRangeIterator ledgerRangeIterator = 
ledgerManager.getLedgerRanges();
-
-            if (!ledgerRangeIterator.hasNext()) {
-                // Empty global active ledgers, need to remove all local 
active ledgers.
-                for (long ledgerId : bkActiveLedgers) {
-                    garbageCleaner.clean(ledgerId);
-                }
-            }
-
-            long lastEnd = -1;
-
             long curTime = MathUtils.now();
             boolean checkOverreplicatedLedgers = (enableGcOverReplicatedLedger 
&& curTime
                     - lastOverReplicatedLedgerGcTimeMillis > 
gcOverReplicatedLedgerIntervalMillis);
@@ -134,27 +126,50 @@ public class ScanAndCompareGarbageCollector implements 
GarbageCollector{
                 lastOverReplicatedLedgerGcTimeMillis = MathUtils.now();
             }
 
-            while (ledgerRangeIterator.hasNext()) {
-                LedgerRange lRange = ledgerRangeIterator.next();
-
-                Long start = lastEnd + 1;
-                Long end = lRange.end();
-                if (!ledgerRangeIterator.hasNext()) {
+            // Iterate over all the ledger on the metadata store
+            LedgerRangeIterator ledgerRangeIterator = 
ledgerManager.getLedgerRanges();
+            Set<Long> ledgersInMetadata = null;
+            long start;
+            long end = -1;
+            boolean done = false;
+            while (!done) {
+                start = end + 1;
+                if (ledgerRangeIterator.hasNext()) {
+                    LedgerRange lRange = ledgerRangeIterator.next();
+                    ledgersInMetadata = lRange.getLedgers();
+                    end = lRange.end();
+                } else {
+                    ledgersInMetadata = new TreeSet<>();
                     end = Long.MAX_VALUE;
+                    done = true;
                 }
 
                 Iterable<Long> subBkActiveLedgers = 
bkActiveLedgers.subSet(start, true, end, true);
 
-                Set<Long> ledgersInMetadata = lRange.getLedgers();
                 if (LOG.isDebugEnabled()) {
                     LOG.debug("Active in metadata {}, Active in bookie {}", 
ledgersInMetadata, subBkActiveLedgers);
                 }
                 for (Long bkLid : subBkActiveLedgers) {
                     if (!ledgersInMetadata.contains(bkLid)) {
+                        if (verifyMetadataOnGc) {
+                            CountDownLatch latch = new CountDownLatch(1);
+                            final AtomicInteger metaRC = new AtomicInteger(0);
+                            ledgerManager.readLedgerMetadata(bkLid, (int rc, 
LedgerMetadata x) -> {
+                                metaRC.set(rc);
+                                latch.countDown();
+                            });
+                            latch.await();
+                            if (metaRC.get() != 
BKException.Code.NoSuchLedgerExistsException) {
+                                LOG.warn(
+                                        "Ledger {} Missing in metadata list, 
but ledgerManager returned rc: {}.",
+                                        bkLid,
+                                        metaRC.get());
+                                continue;
+                            }
+                        }
                         garbageCleaner.clean(bkLid);
                     }
                 }
-                lastEnd = end;
             }
         } catch (Throwable t) {
             // ignore exception, collecting garbage next time
diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
index 3990e74..a975742 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
@@ -53,6 +53,7 @@ public class ServerConfiguration extends 
AbstractConfiguration<ServerConfigurati
     protected static final String IS_FORCE_GC_ALLOW_WHEN_NO_SPACE = 
"isForceGCAllowWhenNoSpace";
     protected static final String GC_OVERREPLICATED_LEDGER_WAIT_TIME = 
"gcOverreplicatedLedgerWaitTime";
     protected static final String USE_TRANSACTIONAL_COMPACTION = 
"useTransactionalCompaction";
+    protected static final String VERIFY_METADATA_ON_GC = "verifyMetadataOnGC";
     // Sync Parameters
     protected static final String FLUSH_INTERVAL = "flushInterval";
     protected static final String FLUSH_ENTRYLOG_INTERVAL_BYTES = 
"flushEntrylogBytes";
@@ -311,6 +312,25 @@ public class ServerConfiguration extends 
AbstractConfiguration<ServerConfigurati
     }
 
     /**
+     * Get whether the bookie is configured to double check prior to gc.
+     *
+     * @return use transactional compaction
+     */
+    public boolean getVerifyMetadataOnGC() {
+        return this.getBoolean(VERIFY_METADATA_ON_GC, false);
+    }
+
+    /**
+     * Set whether the bookie is configured to double check prior to gc.
+     * @param verifyMetadataOnGC
+     * @return server configuration
+     */
+    public ServerConfiguration setVerifyMetadataOnGc(boolean 
verifyMetadataOnGC) {
+        this.setProperty(VERIFY_METADATA_ON_GC, verifyMetadataOnGC);
+        return this;
+    }
+
+    /**
      * Get flush interval. Default value is 10 second. It isn't useful to 
decrease
      * this value, since ledger storage only checkpoints when an entry logger 
file
      * is rolled.
diff --git 
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/GcLedgersTest.java 
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/GcLedgersTest.java
index 5c234d1..730490b 100644
--- 
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/GcLedgersTest.java
+++ 
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/GcLedgersTest.java
@@ -43,6 +43,7 @@ import java.util.SortedSet;
 import java.util.TreeSet;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.bookkeeper.bookie.BookieException;
 import org.apache.bookkeeper.bookie.CheckpointSource;
@@ -139,7 +140,7 @@ public class GcLedgersTest extends LedgerManagerTestCase {
                     }
                    });
         assertTrue(latch.await(10, TimeUnit.SECONDS));
-        assertEquals("Remove should have succeeded", 0, rc.get());
+        assertEquals("Remove should have succeeded for ledgerId: " + ledgerId, 
0, rc.get());
     }
 
     @Test
@@ -315,6 +316,229 @@ public class GcLedgersTest extends LedgerManagerTestCase {
         assertEquals("Should have cleaned first ledger" + first, (long) first, 
(long) cleaned.get(0));
     }
 
+    /*
+     * in this scenario no ledger is created, so ledgeriterator's hasNext call 
would return false and next would be
+     * null. GarbageCollector.gc is expected to behave normally
+     */
+    @Test
+    public void testGcLedgersWithNoLedgers() throws Exception {
+        final SortedSet<Long> createdLedgers = 
Collections.synchronizedSortedSet(new TreeSet<Long>());
+        final List<Long> cleaned = new ArrayList<Long>();
+
+        // no ledger created
+
+        final GarbageCollector garbageCollector = new 
ScanAndCompareGarbageCollector(getLedgerManager(),
+                new MockLedgerStorage(), baseConf);
+        AtomicBoolean cleanerCalled = new AtomicBoolean(false);
+
+        GarbageCollector.GarbageCleaner cleaner = new 
GarbageCollector.GarbageCleaner() {
+            @Override
+            public void clean(long ledgerId) {
+                LOG.info("Cleaned {}", ledgerId);
+                cleanerCalled.set(true);
+            }
+        };
+
+        validateLedgerRangeIterator(createdLedgers);
+
+        garbageCollector.gc(cleaner);
+        assertFalse("Should have cleaned nothing, since no ledger is created", 
cleanerCalled.get());
+    }
+
+    // in this scenario all the created ledgers are in one single ledger range.
+    @Test
+    public void testGcLedgersWithLedgersInSameLedgerRange() throws Exception {
+        baseConf.setVerifyMetadataOnGc(true);
+        final SortedSet<Long> createdLedgers = 
Collections.synchronizedSortedSet(new TreeSet<Long>());
+        final SortedSet<Long> cleaned = Collections.synchronizedSortedSet(new 
TreeSet<Long>());
+
+        // Create few ledgers which span over just one ledger range in the 
hierarchical ledger manager implementation
+        final int numLedgers = 5;
+
+        createLedgers(numLedgers, createdLedgers);
+
+        final GarbageCollector garbageCollector = new 
ScanAndCompareGarbageCollector(getLedgerManager(),
+                new MockLedgerStorage(), baseConf);
+        GarbageCollector.GarbageCleaner cleaner = new 
GarbageCollector.GarbageCleaner() {
+            @Override
+            public void clean(long ledgerId) {
+                LOG.info("Cleaned {}", ledgerId);
+                cleaned.add(ledgerId);
+            }
+        };
+
+        validateLedgerRangeIterator(createdLedgers);
+
+        garbageCollector.gc(cleaner);
+        assertTrue("Should have cleaned nothing", cleaned.isEmpty());
+
+        for (long ledgerId : createdLedgers) {
+            removeLedger(ledgerId);
+        }
+
+        garbageCollector.gc(cleaner);
+        assertEquals("Should have cleaned all the created ledgers", 
createdLedgers, cleaned);
+    }
+
+    /*
+     * in this test scenario no created ledger is deleted, but ledgeriterator 
is screwed up and returns hasNext to be
+     * false and next to be null. So even in this case it is expected not to 
clean any ledger's data.
+     *
+     * This testcase is needed for validating fix of bug - W-4292747.
+     *
+     * ScanAndCompareGarbageCollector/GC should clean data of ledger only if 
both the LedgerManager.getLedgerRanges says
+     * that ledger is not existing and also ledgerManager.readLedgerMetadata 
fails with error
+     * NoSuchLedgerExistsException.
+     *
+     */
+    @Test
+    public void testGcLedgersIfLedgerManagerIteratorFails() throws Exception {
+        baseConf.setVerifyMetadataOnGc(true);
+        final SortedSet<Long> createdLedgers = 
Collections.synchronizedSortedSet(new TreeSet<Long>());
+        final SortedSet<Long> cleaned = Collections.synchronizedSortedSet(new 
TreeSet<Long>());
+
+        // Create few ledgers
+        final int numLedgers = 5;
+
+        createLedgers(numLedgers, createdLedgers);
+
+        LedgerManager mockLedgerManager = new 
CleanupLedgerManager(getLedgerManager()) {
+            @Override
+            public LedgerRangeIterator getLedgerRanges() {
+                return new LedgerRangeIterator() {
+                    @Override
+                    public LedgerRange next() throws IOException {
+                        return null;
+                    }
+
+                    @Override
+                    public boolean hasNext() throws IOException {
+                        return false;
+                    }
+                };
+            }
+        };
+
+        final GarbageCollector garbageCollector = new 
ScanAndCompareGarbageCollector(mockLedgerManager,
+                new MockLedgerStorage(), baseConf);
+        GarbageCollector.GarbageCleaner cleaner = new 
GarbageCollector.GarbageCleaner() {
+            @Override
+            public void clean(long ledgerId) {
+                LOG.info("Cleaned {}", ledgerId);
+                cleaned.add(ledgerId);
+            }
+        };
+
+        validateLedgerRangeIterator(createdLedgers);
+
+        garbageCollector.gc(cleaner);
+        assertTrue("Should have cleaned nothing", cleaned.isEmpty());
+    }
+
+    /*
+     * In this test scenario no ledger is deleted, but 
LedgerManager.readLedgerMetadata says there is NoSuchLedger. So
+     * even in that case, GarbageCollector.gc shouldn't delete ledgers data.
+     *
+     * Consider the possible scenario - when the LedgerIterator is created 
that ledger is not deleted, so as per
+     * LedgerIterator that is live ledger. But right after the LedgerIterator 
creation that ledger is deleted, so
+     * readLedgerMetadata call would return NoSuchLedger. In this testscenario 
we are validating that as per Iterator if
+     * that ledger is alive though currently that ledger is deleted, we should 
not clean data of that ledger.
+     *
+     * ScanAndCompareGarbageCollector/GC should clean data of ledger only if 
both the LedgerManager.getLedgerRanges says
+     * that ledger is not existing and also ledgerManager.readLedgerMetadata 
fails with error
+     * NoSuchLedgerExistsException.
+     *
+     */
+    @Test
+    public void testGcLedgersIfReadLedgerMetadataSaysNoSuchLedger() throws 
Exception {
+        final SortedSet<Long> createdLedgers = 
Collections.synchronizedSortedSet(new TreeSet<Long>());
+        final SortedSet<Long> cleaned = Collections.synchronizedSortedSet(new 
TreeSet<Long>());
+
+        // Create few ledgers
+        final int numLedgers = 5;
+
+        createLedgers(numLedgers, createdLedgers);
+
+        LedgerManager mockLedgerManager = new 
CleanupLedgerManager(getLedgerManager()) {
+            @Override
+            public void readLedgerMetadata(long ledgerId, 
GenericCallback<LedgerMetadata> readCb) {
+                
readCb.operationComplete(BKException.Code.NoSuchLedgerExistsException, null);
+            }
+        };
+
+        final GarbageCollector garbageCollector = new 
ScanAndCompareGarbageCollector(mockLedgerManager,
+                new MockLedgerStorage(), baseConf);
+        GarbageCollector.GarbageCleaner cleaner = new 
GarbageCollector.GarbageCleaner() {
+            @Override
+            public void clean(long ledgerId) {
+                LOG.info("Cleaned {}", ledgerId);
+                cleaned.add(ledgerId);
+            }
+        };
+
+        validateLedgerRangeIterator(createdLedgers);
+
+        garbageCollector.gc(cleaner);
+        assertTrue("Should have cleaned nothing", cleaned.isEmpty());
+    }
+
+    /*
+     * In this test scenario all the created ledgers are deleted, but 
LedgerManager.readLedgerMetadata fails with
+     * ZKException. So even in this case, GarbageCollector.gc shouldn't delete 
ledgers data.
+     *
+     * ScanAndCompareGarbageCollector/GC should clean data of ledger only if 
both the LedgerManager.getLedgerRanges says
+     * that ledger is not existing and also ledgerManager.readLedgerMetadata 
fails with error
+     * NoSuchLedgerExistsException, but is shouldn't delete if the 
readLedgerMetadata fails with any other error.
+     */
+    @Test
+    public void testGcLedgersIfReadLedgerMetadataFailsForDeletedLedgers() 
throws Exception {
+        baseConf.setVerifyMetadataOnGc(true);
+        final SortedSet<Long> createdLedgers = 
Collections.synchronizedSortedSet(new TreeSet<Long>());
+        final SortedSet<Long> cleaned = Collections.synchronizedSortedSet(new 
TreeSet<Long>());
+
+        // Create few ledgers
+        final int numLedgers = 5;
+
+        createLedgers(numLedgers, createdLedgers);
+
+        LedgerManager mockLedgerManager = new 
CleanupLedgerManager(getLedgerManager()) {
+            @Override
+            public void readLedgerMetadata(long ledgerId, 
GenericCallback<LedgerMetadata> readCb) {
+                readCb.operationComplete(BKException.Code.ZKException, null);
+            }
+        };
+
+        final GarbageCollector garbageCollector = new 
ScanAndCompareGarbageCollector(mockLedgerManager,
+                new MockLedgerStorage(), baseConf);
+        GarbageCollector.GarbageCleaner cleaner = new 
GarbageCollector.GarbageCleaner() {
+            @Override
+            public void clean(long ledgerId) {
+                LOG.info("Cleaned {}", ledgerId);
+                cleaned.add(ledgerId);
+            }
+        };
+
+        validateLedgerRangeIterator(createdLedgers);
+
+        for (long ledgerId : createdLedgers) {
+            removeLedger(ledgerId);
+        }
+
+        garbageCollector.gc(cleaner);
+        assertTrue("Should have cleaned nothing", cleaned.isEmpty());
+    }
+
+    public void validateLedgerRangeIterator(SortedSet<Long> createdLedgers) 
throws IOException {
+        SortedSet<Long> scannedLedgers = new TreeSet<Long>();
+        LedgerRangeIterator iterator = getLedgerManager().getLedgerRanges();
+        while (iterator.hasNext()) {
+            LedgerRange ledgerRange = iterator.next();
+            scannedLedgers.addAll(ledgerRange.getLedgers());
+        }
+
+        assertEquals(createdLedgers, scannedLedgers);
+    }
+
     class MockLedgerStorage implements CompactableLedgerStorage {
 
         @Override

-- 
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].

Reply via email to