Author: amitj
Date: Wed Jul 20 03:57:50 2016
New Revision: 1753436
URL: http://svn.apache.org/viewvc?rev=1753436&view=rev
Log:
OAK-4200: [BlobGC] Improve collection times of blobs available
* Added an overridden JMX method to force retrieval of blob ids from blobstore
rather than using any local tracking
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java?rev=1753436&r1=1753435&r2=1753436&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java
Wed Jul 20 03:57:50 2016
@@ -93,6 +93,22 @@ public class BlobGC extends AnnotatedSt
return getBlobGCStatus();
}
+ @Override
+ public CompositeData startBlobGC(final boolean markOnly, final boolean
forceBlobIdRetrieve) {
+ if (gcOp.isDone()) {
+ gcOp = newManagementOperation(OP_NAME, new Callable<String>() {
+ @Override
+ public String call() throws Exception {
+ long t0 = nanoTime();
+ blobGarbageCollector.collectGarbage(markOnly,
forceBlobIdRetrieve);
+ return "Blob gc completed in " + formatTime(nanoTime() -
t0);
+ }
+ });
+ executor.execute(gcOp);
+ }
+ return getBlobGCStatus();
+ }
+
@Nonnull
@Override
public CompositeData getBlobGCStatus() {
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java?rev=1753436&r1=1753435&r2=1753436&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java
Wed Jul 20 03:57:50 2016
@@ -46,6 +46,22 @@ public interface BlobGCMBean {
"This mode is to be used when the underlying BlobStore is
shared between multiple " +
"different repositories. For all other cases set it to
false to perform full garbage collection")
boolean markOnly);
+
+ /**
+ * Initiate a data store garbage collection operation.
+ *
+ * @param markOnly whether to only mark references and not sweep in the
mark and sweep operation.
+ * @param forceBlobIdRetrieve whether to force retrieve blob ids from
datastore
+ * @return the status of the operation right after it was initiated
+ */
+ CompositeData startBlobGC(@Name("markOnly")
+ @Description("Set to true to only mark references and not sweep in the
mark and sweep operation. " +
+ "This mode is to be used when the underlying BlobStore is shared
between multiple " +
+ "different repositories. For all other cases set it to false to
perform full garbage collection")
+ boolean markOnly, @Name("forceBlobIdRetrieve")
+ @Description("Set to true to force retrieve all ids from the datastore
bypassing any local tracking")
+ boolean forceBlobIdRetrieve);
+
/**
* Data store garbage collection status
*
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java?rev=1753436&r1=1753435&r2=1753436&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java
Wed Jul 20 03:57:50 2016
@@ -31,7 +31,18 @@ public interface BlobGarbageCollector {
* @throws Exception the exception
*/
void collectGarbage(boolean markOnly) throws Exception;
-
+
+ /**
+ * Marks garbage blobs from the passed node store instance.
+ * Collects them only if markOnly is false. Also forces retrieval of
+ * blob ids from the blob store rather than using any local tracking.
+ *
+ * @param markOnly whether to only mark references and not sweep in the
mark and sweep operation.
+ * @param forceBlobRetrieve whether to force retrieve of blob ids from
datastore
+ * @throws Exception
+ */
+ void collectGarbage(boolean markOnly, boolean forceBlobRetrieve) throws
Exception;
+
/**
* Retuns the list of stats
*
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java?rev=1753436&r1=1753435&r2=1753436&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
Wed Jul 20 03:57:50 2016
@@ -170,7 +170,12 @@ public class MarkSweepGarbageCollector i
@Override
public void collectGarbage(boolean markOnly) throws Exception {
- markAndSweep(markOnly);
+ markAndSweep(markOnly, false);
+ }
+
+ @Override
+ public void collectGarbage(boolean markOnly, boolean forceBlobRetrieve)
throws Exception {
+ markAndSweep(markOnly, forceBlobRetrieve);
}
/**
@@ -245,9 +250,10 @@ public class MarkSweepGarbageCollector i
* Mark and sweep. Main entry method for GC.
*
* @param markOnly whether to mark only
+ * @param forceBlobRetrieve force retrieve blob ids
* @throws Exception the exception
*/
- protected void markAndSweep(boolean markOnly) throws Exception {
+ protected void markAndSweep(boolean markOnly, boolean forceBlobRetrieve)
throws Exception {
boolean threw = true;
GarbageCollectorFileState fs = new GarbageCollectorFileState(root);
try {
@@ -257,7 +263,7 @@ public class MarkSweepGarbageCollector i
long markStart = System.currentTimeMillis();
mark(fs);
if (!markOnly) {
- long deleteCount = sweep(fs, markStart);
+ long deleteCount = sweep(fs, markStart, forceBlobRetrieve);
threw = false;
long maxTime = getLastMaxModifiedTime(markStart) > 0 ?
getLastMaxModifiedTime(markStart) : markStart;
@@ -348,8 +354,9 @@ public class MarkSweepGarbageCollector i
* @throws Exception the exception
* @param fs the garbage collector file state
* @param markStart the start time of mark to take as reference for
deletion
+ * @param forceBlobRetrieve
*/
- protected long sweep(GarbageCollectorFileState fs, long markStart) throws
Exception {
+ protected long sweep(GarbageCollectorFileState fs, long markStart, boolean
forceBlobRetrieve) throws Exception {
long earliestRefAvailTime;
// Merge all the blob references available from all the reference
files in the data store meta store
// Only go ahead if merge succeeded
@@ -363,7 +370,7 @@ public class MarkSweepGarbageCollector i
}
// Find all blob references after iterating over the whole repository
- (new BlobIdRetriever(fs)).call();
+ (new BlobIdRetriever(fs, forceBlobRetrieve)).call();
// Calculate the references not used
difference(fs);
@@ -513,7 +520,8 @@ public class MarkSweepGarbageCollector i
LOG.info("Starting blob consistency check");
// Find all blobs available in the blob store
- ListenableFutureTask<Integer> blobIdRetriever =
ListenableFutureTask.create(new BlobIdRetriever(fs));
+ ListenableFutureTask<Integer> blobIdRetriever =
ListenableFutureTask.create(new BlobIdRetriever(fs,
+ false));
executor.execute(blobIdRetriever);
// Mark all used blob references
@@ -552,23 +560,25 @@ public class MarkSweepGarbageCollector i
*/
private class BlobIdRetriever implements Callable<Integer> {
private final GarbageCollectorFileState fs;
-
- public BlobIdRetriever(GarbageCollectorFileState fs) {
+ private final boolean forceRetrieve;
+
+ public BlobIdRetriever(GarbageCollectorFileState fs, boolean
forceBlobRetrieve) {
this.fs = fs;
+ this.forceRetrieve = forceBlobRetrieve;
}
@Override
public Integer call() throws Exception {
- BlobCollectionType.get(blobStore).retrieve(blobStore, fs,
getBatchCount());
- long length = fs.getAvailableRefs().length();
- LOG.info("Length of blob ids file retrieved {}", length);
+ if (!forceRetrieve) {
+ BlobCollectionType.get(blobStore).retrieve(blobStore, fs,
getBatchCount());
+ LOG.info("Length of blob ids file retrieved from tracker {}",
fs.getAvailableRefs().length());
+ }
// If the length is 0 then references not available from the
tracker
// retrieve from the data store
if (fs.getAvailableRefs().length() <= 0) {
BlobCollectionType.DEFAULT.retrieve(blobStore, fs,
getBatchCount());
- length = fs.getAvailableRefs().length();
- LOG.info("Length of blob ids file retrieved {}", length);
+ LOG.info("Length of blob ids file retrieved {}",
fs.getAvailableRefs().length());
BlobCollectionType.get(blobStore).track(blobStore, fs);
}
Modified:
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java?rev=1753436&r1=1753435&r2=1753436&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java
Wed Jul 20 03:57:50 2016
@@ -29,6 +29,7 @@ import java.util.Set;
import java.util.concurrent.ScheduledFuture;
import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.plugins.blob.BlobTrackingStore;
import org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector;
@@ -144,6 +145,69 @@ public class DataStoreTrackerGCTest {
assertEquals(state.blobsPresent, retrieveTracked(tracker));
}
+ private HashSet<String> addNodeSpecialChars(DocumentNodeStore ds) throws
Exception {
+ List<String> specialCharSets =
+ Lists.newArrayList("q\\%22afdg\\%22", "a\nbcd", "a\n\rabcd",
"012\\efg" );
+ HashSet<String> set = new HashSet<String>();
+ NodeBuilder a = ds.getRoot().builder();
+ int toBeDeleted = 0;
+ for (int i = 0; i < specialCharSets.size(); i++) {
+ Blob b = ds.createBlob(randomStream(i, 18432));
+ NodeBuilder n = a.child("cspecial" + i);
+ n.child(specialCharSets.get(i)).setProperty("x", b);
+ Iterator<String> idIter =
+ ((GarbageCollectableBlobStore) ds.getBlobStore())
+ .resolveChunks(b.toString());
+ List<String> ids = Lists.newArrayList(idIter);
+ if (toBeDeleted != i) {
+ set.addAll(ids);
+ }
+ }
+ ds.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+ // Delete one node again
+ a = ds.getRoot().builder();
+ a.child("cspecial" + 0).remove();
+ ds.merge(a, INSTANCE, EMPTY);
+ long maxAge = 10; // hours
+ // 1. Go past GC age and check no GC done as nothing deleted
+ clock.waitUntil(clock.getTime() + MINUTES.toMillis(maxAge));
+
+ VersionGarbageCollector vGC = ds.getVersionGarbageCollector();
+ VersionGarbageCollector.VersionGCStats stats = vGC.gc(0, MILLISECONDS);
+ return set;
+ }
+
+ @Test
+ public void gcForcedRetrieve() throws Exception {
+ Cluster cluster = new Cluster("cluster1");
+ BlobStore s = cluster.blobStore;
+ BlobIdTracker tracker = (BlobIdTracker) ((BlobTrackingStore)
s).getTracker();
+ DataStoreState state = init(cluster.nodeStore, 0);
+ ScheduledFuture<?> scheduledFuture = newSingleThreadScheduledExecutor()
+ .schedule(tracker.new SnapshotJob(), 0, MILLISECONDS);
+ scheduledFuture.get();
+ // All blobs added should be tracked now
+ assertEquals(state.blobsAdded, retrieveTracked(tracker));
+
+ // Do addition and deletion which would not have been tracked as yet
+ Set<String> newBlobs = addNodeSpecialChars(cluster.nodeStore);
+ state.blobsAdded.addAll(newBlobs);
+ state.blobsPresent.addAll(newBlobs);
+
+ // The new blobs should not be found now as new snapshot not done
+ assertEquals(Sets.difference(state.blobsAdded,
retrieveTracked(tracker)), newBlobs);
+
+ //force gc to retrieve blob ids from datastore
+ cluster.gc.collectGarbage(false, true);
+ Set<String> existingAfterGC = iterate(s);
+
+ // Check the state of the blob store after gc
+ assertEquals(state.blobsPresent, existingAfterGC);
+ // Tracked blobs should reflect deletions after gc and also the
additions after
+ assertEquals(state.blobsPresent, retrieveTracked(tracker));
+ }
+
@Test
public void gcWithInlined() throws Exception {
Cluster cluster = new Cluster("cluster1");
Modified:
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java?rev=1753436&r1=1753435&r2=1753436&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
Wed Jul 20 03:57:50 2016
@@ -460,7 +460,7 @@ public class MongoBlobGCTest extends Abs
}
@Override
- protected void markAndSweep(boolean markOnly) throws Exception {
+ protected void markAndSweep(boolean markOnly, boolean
forceBlobRetrieve) throws Exception {
boolean threw = true;
GarbageCollectorFileState fs = new GarbageCollectorFileState(root);
try {
@@ -481,7 +481,7 @@ public class MongoBlobGCTest extends Abs
Thread.sleep(maxLastModifiedInterval + 100);
LOG.info("Slept {} to make additional blobs old",
maxLastModifiedInterval + 100);
- long deleteCount = sweep(fs, markStart);
+ long deleteCount = sweep(fs, markStart, forceBlobRetrieve);
threw = false;
LOG.info("Blob garbage collection completed in {}. Number
of blobs deleted [{}]", sw.toString(),
Modified:
jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java?rev=1753436&r1=1753435&r2=1753436&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java
(original)
+++
jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java
Wed Jul 20 03:57:50 2016
@@ -436,7 +436,7 @@ public class SegmentDataStoreBlobGCIT {
}
@Override
- protected void markAndSweep(boolean markOnly) throws Exception {
+ protected void markAndSweep(boolean markOnly, boolean
forceBlobRetrieve) throws Exception {
boolean threw = true;
GarbageCollectorFileState fs = new GarbageCollectorFileState(root);
try {
@@ -457,7 +457,7 @@ public class SegmentDataStoreBlobGCIT {
Thread.sleep(maxLastModifiedInterval + 100);
LOG.info("Slept {} to make additional blobs old",
maxLastModifiedInterval + 100);
- long deleteCount = sweep(fs, markStart);
+ long deleteCount = sweep(fs, markStart, forceBlobRetrieve);
threw = false;
LOG.info("Blob garbage collection completed in {}. Number
of blobs deleted [{}]", sw.toString(),