Author: amitj
Date: Wed Jul 20 03:57:50 2016
New Revision: 1753436

URL: http://svn.apache.org/viewvc?rev=1753436&view=rev
Log:
OAK-4200:  [BlobGC] Improve collection times of blobs available

* Added an overridden JMX method to force retrieval of blob ids from blobstore 
rather than using any local tracking

Modified:
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
    
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java
    
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
    
jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java?rev=1753436&r1=1753435&r2=1753436&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java
 Wed Jul 20 03:57:50 2016
@@ -93,6 +93,22 @@ public class  BlobGC extends AnnotatedSt
         return getBlobGCStatus();
     }
 
+    @Override
+    public CompositeData startBlobGC(final boolean markOnly, final boolean 
forceBlobIdRetrieve) {
+        if (gcOp.isDone()) {
+            gcOp = newManagementOperation(OP_NAME, new Callable<String>() {
+                @Override
+                public String call() throws Exception {
+                    long t0 = nanoTime();
+                    blobGarbageCollector.collectGarbage(markOnly, 
forceBlobIdRetrieve);
+                    return "Blob gc completed in " + formatTime(nanoTime() - 
t0);
+                }
+            });
+            executor.execute(gcOp);
+        }
+        return getBlobGCStatus();
+    }
+
     @Nonnull
     @Override
     public CompositeData getBlobGCStatus() {

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java?rev=1753436&r1=1753435&r2=1753436&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java
 Wed Jul 20 03:57:50 2016
@@ -46,6 +46,22 @@ public interface BlobGCMBean {
                      "This mode is to be used when the underlying BlobStore is 
shared between multiple " +
                      "different repositories. For all other cases set it to 
false to perform full garbage collection")
                                 boolean markOnly);
+
+    /**
+     * Initiate a data store garbage collection operation.
+     *
+     * @param markOnly whether to only mark references and not sweep in the 
mark and sweep operation.
+     * @param forceBlobIdRetrieve whether to force retrieve blob ids from 
datastore
+     * @return  the status of the operation right after it was initiated
+     */
+    CompositeData startBlobGC(@Name("markOnly")
+    @Description("Set to true to only mark references and not sweep in the 
mark and sweep operation. " +
+        "This mode is to be used when the underlying BlobStore is shared 
between multiple " +
+        "different repositories. For all other cases set it to false to 
perform full garbage collection")
+        boolean markOnly, @Name("forceBlobIdRetrieve")
+    @Description("Set to true to force retrieve all ids from the datastore 
bypassing any local tracking")
+        boolean forceBlobIdRetrieve);
+
     /**
      * Data store garbage collection status
      *

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java?rev=1753436&r1=1753435&r2=1753436&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java
 Wed Jul 20 03:57:50 2016
@@ -31,7 +31,18 @@ public interface BlobGarbageCollector {
      * @throws Exception the exception
      */
     void collectGarbage(boolean markOnly) throws Exception;
-    
+
+    /**
+     * Marks garbage blobs from the passed node store instance.
+     * Collects them only if markOnly is false. Also forces retrieval of
+     * blob ids from the blob store rather than using any local tracking.
+     *
+     * @param markOnly whether to only mark references and not sweep in the 
mark and sweep operation.
+     * @param forceBlobRetrieve whether to force retrieve of blob ids from 
datastore
+     * @throws Exception
+     */
+    void collectGarbage(boolean markOnly, boolean forceBlobRetrieve) throws 
Exception;
+
     /**
      * Retuns the list of stats
      * 

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java?rev=1753436&r1=1753435&r2=1753436&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
 Wed Jul 20 03:57:50 2016
@@ -170,7 +170,12 @@ public class MarkSweepGarbageCollector i
 
     @Override
     public void collectGarbage(boolean markOnly) throws Exception {
-        markAndSweep(markOnly);
+        markAndSweep(markOnly, false);
+    }
+
+    @Override
+    public void collectGarbage(boolean markOnly, boolean forceBlobRetrieve) 
throws Exception {
+        markAndSweep(markOnly, forceBlobRetrieve);
     }
 
     /**
@@ -245,9 +250,10 @@ public class MarkSweepGarbageCollector i
      * Mark and sweep. Main entry method for GC.
      *
      * @param markOnly whether to mark only
+     * @param forceBlobRetrieve force retrieve blob ids
      * @throws Exception the exception
      */
-    protected void markAndSweep(boolean markOnly) throws Exception {
+    protected void markAndSweep(boolean markOnly, boolean forceBlobRetrieve) 
throws Exception {
         boolean threw = true;
         GarbageCollectorFileState fs = new GarbageCollectorFileState(root);
         try {
@@ -257,7 +263,7 @@ public class MarkSweepGarbageCollector i
             long markStart = System.currentTimeMillis();
             mark(fs);
             if (!markOnly) {
-                long deleteCount = sweep(fs, markStart);
+                long deleteCount = sweep(fs, markStart, forceBlobRetrieve);
                 threw = false;
 
                 long maxTime = getLastMaxModifiedTime(markStart) > 0 ? 
getLastMaxModifiedTime(markStart) : markStart;
@@ -348,8 +354,9 @@ public class MarkSweepGarbageCollector i
      * @throws Exception the exception
      * @param fs the garbage collector file state
      * @param markStart the start time of mark to take as reference for 
deletion
+     * @param forceBlobRetrieve
      */
-    protected long sweep(GarbageCollectorFileState fs, long markStart) throws 
Exception {
+    protected long sweep(GarbageCollectorFileState fs, long markStart, boolean 
forceBlobRetrieve) throws Exception {
         long earliestRefAvailTime;
         // Merge all the blob references available from all the reference 
files in the data store meta store
         // Only go ahead if merge succeeded
@@ -363,7 +370,7 @@ public class MarkSweepGarbageCollector i
         }
 
         // Find all blob references after iterating over the whole repository
-        (new BlobIdRetriever(fs)).call();
+        (new BlobIdRetriever(fs, forceBlobRetrieve)).call();
 
         // Calculate the references not used
         difference(fs);
@@ -513,7 +520,8 @@ public class MarkSweepGarbageCollector i
             LOG.info("Starting blob consistency check");
     
             // Find all blobs available in the blob store
-            ListenableFutureTask<Integer> blobIdRetriever = 
ListenableFutureTask.create(new BlobIdRetriever(fs));
+            ListenableFutureTask<Integer> blobIdRetriever = 
ListenableFutureTask.create(new BlobIdRetriever(fs,
+                false));
             executor.execute(blobIdRetriever);
     
             // Mark all used blob references
@@ -552,23 +560,25 @@ public class MarkSweepGarbageCollector i
      */
     private class BlobIdRetriever implements Callable<Integer> {
         private final GarbageCollectorFileState fs;
-    
-        public BlobIdRetriever(GarbageCollectorFileState fs) {
+        private final boolean forceRetrieve;
+
+        public BlobIdRetriever(GarbageCollectorFileState fs, boolean 
forceBlobRetrieve) {
             this.fs = fs;
+            this.forceRetrieve = forceBlobRetrieve;
         }
     
         @Override
         public Integer call() throws Exception {
-            BlobCollectionType.get(blobStore).retrieve(blobStore, fs, 
getBatchCount());
-            long length = fs.getAvailableRefs().length();
-            LOG.info("Length of blob ids file retrieved {}", length);
+            if (!forceRetrieve) {
+                BlobCollectionType.get(blobStore).retrieve(blobStore, fs, 
getBatchCount());
+                LOG.info("Length of blob ids file retrieved from tracker {}", 
fs.getAvailableRefs().length());
+            }
 
             // If the length is 0 then references not available from the 
tracker
             // retrieve from the data store
             if (fs.getAvailableRefs().length() <= 0) {
                 BlobCollectionType.DEFAULT.retrieve(blobStore, fs, 
getBatchCount());
-                length = fs.getAvailableRefs().length();
-                LOG.info("Length of blob ids file retrieved {}", length);
+                LOG.info("Length of blob ids file retrieved {}", 
fs.getAvailableRefs().length());
 
                 BlobCollectionType.get(blobStore).track(blobStore, fs);
             }

Modified: 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java?rev=1753436&r1=1753435&r2=1753436&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java
 Wed Jul 20 03:57:50 2016
@@ -29,6 +29,7 @@ import java.util.Set;
 import java.util.concurrent.ScheduledFuture;
 
 import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 import org.apache.jackrabbit.oak.api.Blob;
 import org.apache.jackrabbit.oak.plugins.blob.BlobTrackingStore;
 import org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector;
@@ -144,6 +145,69 @@ public class DataStoreTrackerGCTest {
         assertEquals(state.blobsPresent, retrieveTracked(tracker));
     }
 
+    private HashSet<String> addNodeSpecialChars(DocumentNodeStore ds) throws 
Exception {
+        List<String> specialCharSets =
+            Lists.newArrayList("q\\%22afdg\\%22", "a\nbcd", "a\n\rabcd", 
"012\\efg" );
+        HashSet<String> set = new HashSet<String>();
+        NodeBuilder a = ds.getRoot().builder();
+        int toBeDeleted = 0;
+        for (int i = 0; i < specialCharSets.size(); i++) {
+            Blob b = ds.createBlob(randomStream(i, 18432));
+            NodeBuilder n = a.child("cspecial" + i);
+            n.child(specialCharSets.get(i)).setProperty("x", b);
+            Iterator<String> idIter =
+                ((GarbageCollectableBlobStore) ds.getBlobStore())
+                    .resolveChunks(b.toString());
+            List<String> ids = Lists.newArrayList(idIter);
+            if (toBeDeleted != i) {
+                set.addAll(ids);
+            }
+        }
+        ds.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // Delete one node again
+        a = ds.getRoot().builder();
+        a.child("cspecial" + 0).remove();
+        ds.merge(a, INSTANCE, EMPTY);
+        long maxAge = 10; // hours
+        // 1. Go past GC age and check no GC done as nothing deleted
+        clock.waitUntil(clock.getTime() + MINUTES.toMillis(maxAge));
+
+        VersionGarbageCollector vGC = ds.getVersionGarbageCollector();
+        VersionGarbageCollector.VersionGCStats stats = vGC.gc(0, MILLISECONDS);
+        return set;
+    }
+
+    @Test
+    public void gcForcedRetrieve() throws Exception {
+        Cluster cluster = new Cluster("cluster1");
+        BlobStore s = cluster.blobStore;
+        BlobIdTracker tracker = (BlobIdTracker) ((BlobTrackingStore) 
s).getTracker();
+        DataStoreState state = init(cluster.nodeStore, 0);
+        ScheduledFuture<?> scheduledFuture = newSingleThreadScheduledExecutor()
+            .schedule(tracker.new SnapshotJob(), 0, MILLISECONDS);
+        scheduledFuture.get();
+        // All blobs added should be tracked now
+        assertEquals(state.blobsAdded, retrieveTracked(tracker));
+
+        // Do addition and deletion which would not have been tracked as yet
+        Set<String> newBlobs = addNodeSpecialChars(cluster.nodeStore);
+        state.blobsAdded.addAll(newBlobs);
+        state.blobsPresent.addAll(newBlobs);
+
+        // The new blobs should not be found now as new snapshot not done
+        assertEquals(Sets.difference(state.blobsAdded, 
retrieveTracked(tracker)), newBlobs);
+
+        //force gc to retrieve blob ids from datastore
+        cluster.gc.collectGarbage(false, true);
+        Set<String> existingAfterGC = iterate(s);
+
+        // Check the state of the blob store after gc
+        assertEquals(state.blobsPresent, existingAfterGC);
+        // Tracked blobs should reflect deletions after gc and also the 
additions after
+        assertEquals(state.blobsPresent, retrieveTracked(tracker));
+    }
+
     @Test
     public void gcWithInlined() throws Exception {
         Cluster cluster = new Cluster("cluster1");

Modified: 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java?rev=1753436&r1=1753435&r2=1753436&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
 Wed Jul 20 03:57:50 2016
@@ -460,7 +460,7 @@ public class MongoBlobGCTest extends Abs
         }
         
         @Override
-        protected void markAndSweep(boolean markOnly) throws Exception {
+        protected void markAndSweep(boolean markOnly, boolean 
forceBlobRetrieve) throws Exception {
             boolean threw = true;
             GarbageCollectorFileState fs = new GarbageCollectorFileState(root);
             try {
@@ -481,7 +481,7 @@ public class MongoBlobGCTest extends Abs
                     Thread.sleep(maxLastModifiedInterval + 100);
                     LOG.info("Slept {} to make additional blobs old", 
maxLastModifiedInterval + 100);
     
-                    long deleteCount = sweep(fs, markStart);
+                    long deleteCount = sweep(fs, markStart, forceBlobRetrieve);
                     threw = false;
             
                     LOG.info("Blob garbage collection completed in {}. Number 
of blobs deleted [{}]", sw.toString(),

Modified: 
jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java?rev=1753436&r1=1753435&r2=1753436&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java
 Wed Jul 20 03:57:50 2016
@@ -436,7 +436,7 @@ public class SegmentDataStoreBlobGCIT {
         }
         
         @Override
-        protected void markAndSweep(boolean markOnly) throws Exception {
+        protected void markAndSweep(boolean markOnly, boolean 
forceBlobRetrieve) throws Exception {
             boolean threw = true;
             GarbageCollectorFileState fs = new GarbageCollectorFileState(root);
             try {
@@ -457,7 +457,7 @@ public class SegmentDataStoreBlobGCIT {
                     Thread.sleep(maxLastModifiedInterval + 100);
                     LOG.info("Slept {} to make additional blobs old", 
maxLastModifiedInterval + 100);
                     
-                    long deleteCount = sweep(fs, markStart);
+                    long deleteCount = sweep(fs, markStart, forceBlobRetrieve);
                     threw = false;
                     
                     LOG.info("Blob garbage collection completed in {}. Number 
of blobs deleted [{}]", sw.toString(),


Reply via email to