Author: chetanm
Date: Fri Mar 28 17:11:08 2014
New Revision: 1582834
URL: http://svn.apache.org/r1582834
Log:
OAK-1341 - DocumentNodeStore: Implement revision garbage collection (WIP)
GC of split documents
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorTest.java
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java?rev=1582834&r1=1582833&r2=1582834&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
Fri Mar 28 17:11:08 2014
@@ -253,6 +253,10 @@ public final class NodeDocument extends
this.type = type;
}
+ public int typeCode() {
+ return type;
+ }
+
static SplitDocType valueOf(Integer type){
if(type == null){
return NONE;
@@ -364,6 +368,19 @@ public final class NodeDocument extends
}
/**
+ * Checks if revision time of all entries in this document is less than
the passed
+ * time
+ *
+ * @param maxRevisionTime timemstamp (in millis) of revision to check
+ * @return <tt>true</tt> if timestamp of maximum revision stored in this
document
+ * is less than than the passed revision timestamp
+ */
+ public boolean hasAllRevisionLessThan(long maxRevisionTime){
+ Long maxRevTimeStamp = (Long) get(SD_MAX_REV_TIME_IN_SECS);
+ return maxRevTimeStamp != null && maxRevTimeStamp <
TimeUnit.MILLISECONDS.toSeconds(maxRevisionTime);
+ }
+
+ /**
* Determines if this document is a split document
*
* @return <tt>true</tt> if this document is a split document
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java?rev=1582834&r1=1582833&r2=1582834&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
Fri Mar 28 17:11:08 2014
@@ -20,9 +20,13 @@
package org.apache.jackrabbit.oak.plugins.document;
import java.util.List;
+import java.util.Set;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+import static
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
public class VersionGCSupport {
private final DocumentStore store;
@@ -32,14 +36,29 @@ public class VersionGCSupport {
}
public Iterable<NodeDocument> getPossiblyDeletedDocs(final long
lastModifiedTime) {
- //Fetch all documents.
- List<NodeDocument> nodes =
store.query(Collection.NODES,NodeDocument.MIN_ID_VALUE,
- NodeDocument.MAX_ID_VALUE, Integer.MAX_VALUE);
- return Iterables.filter(nodes, new Predicate<NodeDocument>() {
+ return Iterables.filter(getAllDocuments(), new
Predicate<NodeDocument>() {
@Override
public boolean apply(NodeDocument input) {
return input.wasDeletedOnce() &&
!input.hasBeenModifiedSince(lastModifiedTime);
}
});
}
+
+ public int deleteSplitDocuments(Set<SplitDocType> gcTypes, long
oldestRevTimeStamp) {
+ List<String> docsToDelete = Lists.newArrayList();
+ for(NodeDocument doc : getAllDocuments()){
+ SplitDocType splitType = doc.getSplitDocType();
+ if(gcTypes.contains(splitType) &&
doc.hasAllRevisionLessThan(oldestRevTimeStamp)){
+ docsToDelete.add(doc.getId());
+ }
+ }
+ store.remove(Collection.NODES, docsToDelete);
+ return docsToDelete.size();
+ }
+
+ private List<NodeDocument> getAllDocuments() {
+ //Fetch all documents.
+ return store.query(Collection.NODES,NodeDocument.MIN_ID_VALUE,
+ NodeDocument.MAX_ID_VALUE, Integer.MAX_VALUE);
+ }
}
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1582834&r1=1582833&r2=1582834&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
Fri Mar 28 17:11:08 2014
@@ -22,7 +22,9 @@ package org.apache.jackrabbit.oak.plugin
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.EnumSet;
import java.util.List;
+import java.util.Set;
import java.util.concurrent.TimeUnit;
import com.google.common.collect.ImmutableList;
@@ -37,6 +39,14 @@ class VersionGarbageCollector {
private final Logger log = LoggerFactory.getLogger(getClass());
+ /**
+ * Split document types which can be safely Garbage Collected
+ */
+ private static final Set<NodeDocument.SplitDocType> GC_TYPES = EnumSet.of(
+ NodeDocument.SplitDocType.DEFAULT_NO_CHILD,
+ NodeDocument.SplitDocType.PROP_COMMIT_ONLY,
+ NodeDocument.SplitDocType.INTERMEDIATE);
+
private volatile long maxRevisionAge = TimeUnit.DAYS.toMillis(1);
VersionGarbageCollector(DocumentNodeStore nodeStore) {
@@ -67,10 +77,16 @@ class VersionGarbageCollector {
}
collectDeletedDocuments(stats, headRevision, oldestRevTimeStamp);
+ collectSplitDocuments(stats, oldestRevTimeStamp);
return stats;
}
+ private void collectSplitDocuments(VersionGCStats stats, long
oldestRevTimeStamp) {
+ int count = versionStore.deleteSplitDocuments(GC_TYPES,
oldestRevTimeStamp);
+ stats.splitDocGCCount += count;
+ }
+
private void collectDeletedDocuments(VersionGCStats stats, Revision
headRevision, long oldestRevTimeStamp) {
List<String> docIdsToDelete = new ArrayList<String>();
Iterable<NodeDocument> itr =
versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
@@ -92,7 +108,7 @@ class VersionGarbageCollector {
close(itr);
}
nodeStore.getDocumentStore().remove(Collection.NODES, docIdsToDelete);
- stats.deletedDocCount += docIdsToDelete.size();
+ stats.deletedDocGCCount += docIdsToDelete.size();
}
public void setMaxRevisionAge(long maxRevisionAge) {
@@ -105,7 +121,8 @@ class VersionGarbageCollector {
public static class VersionGCStats {
boolean ignoredGCDueToCheckPoint;
- int deletedDocCount;
+ int deletedDocGCCount;
+ int splitDocGCCount;
}
private void close(Object obj){
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java?rev=1582834&r1=1582833&r2=1582834&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
Fri Mar 28 17:11:08 2014
@@ -19,19 +19,27 @@
package org.apache.jackrabbit.oak.plugins.document.mongo;
+import java.util.Set;
+
import com.google.common.base.Function;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.QueryBuilder;
import com.mongodb.ReadPreference;
+import com.mongodb.WriteConcern;
+import com.mongodb.WriteResult;
import org.apache.jackrabbit.oak.plugins.document.Collection;
import org.apache.jackrabbit.oak.plugins.document.Commit;
import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
import org.apache.jackrabbit.oak.plugins.document.VersionGCSupport;
import org.apache.jackrabbit.oak.plugins.document.util.CloseableIterable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import static com.google.common.collect.Iterables.transform;
+import static com.mongodb.QueryBuilder.start;
+import static
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
/**
* Mongo specific version of VersionGCSupport which uses mongo queries
@@ -41,6 +49,7 @@ import static com.google.common.collect.
* documents. In such case read from secondaries are preferred</p>
*/
public class MongoVersionGCSupport extends VersionGCSupport {
+ private final Logger log = LoggerFactory.getLogger(getClass());
private final MongoDocumentStore store;
public MongoVersionGCSupport(MongoDocumentStore store) {
@@ -51,8 +60,8 @@ public class MongoVersionGCSupport exten
@Override
public CloseableIterable<NodeDocument> getPossiblyDeletedDocs(final long
lastModifiedTime) {
//_deletedOnce == true && _modified < lastModifiedTime
- DBObject query = QueryBuilder
-
.start(NodeDocument.DELETED_ONCE).is(Boolean.TRUE)
+ DBObject query =
+ start(NodeDocument.DELETED_ONCE).is(Boolean.TRUE)
.put(NodeDocument.MODIFIED_IN_SECS).lessThan(Commit.getModifiedInSecs(lastModifiedTime))
.get();
DBCursor cursor =
getNodeCollection().find(query).setReadPreference(ReadPreference.secondaryPreferred());
@@ -64,6 +73,28 @@ public class MongoVersionGCSupport exten
}), cursor);
}
+ @Override
+ public int deleteSplitDocuments(Set<SplitDocType> gcTypes, long
oldestRevTimeStamp) {
+ //OR condition has to be first as we have a index for that
+ QueryBuilder orClause = start();
+ for(SplitDocType type : gcTypes){
+ orClause.or(start(NodeDocument.SD_TYPE).is(type.typeCode()).get());
+ }
+ DBObject query = start()
+ .and(
+ orClause.get(),
+ start(NodeDocument.SD_MAX_REV_TIME_IN_SECS)
+ .lessThan(Commit.getModifiedInSecs(oldestRevTimeStamp))
+ .get()
+ ).get();
+ WriteResult writeResult = getNodeCollection().remove(query,
WriteConcern.SAFE);
+ if (writeResult.getError() != null) {
+ //TODO This might be temporary error or we fail fast and let next
cycle try again
+ log.warn("Error occurred while deleting old split documents from
Mongo {}", writeResult.getError());
+ }
+ return writeResult.getN();
+ }
+
private DBCollection getNodeCollection(){
return store.getDBCollection(Collection.NODES);
}
Modified:
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java?rev=1582834&r1=1582833&r2=1582834&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
Fri Mar 28 17:11:08 2014
@@ -31,6 +31,9 @@ import org.apache.jackrabbit.oak.commons
import org.apache.jackrabbit.oak.spi.blob.MemoryBlobStore;
import org.apache.jackrabbit.oak.plugins.document.memory.MemoryDocumentStore;
import org.apache.jackrabbit.oak.plugins.document.util.Utils;
+import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
+import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.junit.Test;
import com.google.common.collect.Iterables;
@@ -325,6 +328,29 @@ public class DocumentSplitTest extends B
}
@Test
+ public void testSplitPropAndCommitOnly() throws Exception{
+ DocumentStore store = mk.getDocumentStore();
+ DocumentNodeStore ns = mk.getNodeStore();
+ NodeBuilder b1 = ns.getRoot().builder();
+ b1.child("test").child("foo").child("bar");
+ ns.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+ //Commit on a node which has a child and where the commit root
+ // is parent
+ for (int i = 0; i < NodeDocument.NUM_REVS_THRESHOLD; i++) {
+ b1 = ns.getRoot().builder();
+ b1.child("test").child("foo").setProperty("prop",i);
+ b1.child("test").setProperty("prop",i);
+ ns.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+ }
+ ns.runBackgroundOperations();
+ NodeDocument doc = store.find(NODES, Utils.getIdFromPath("/test/foo"));
+ List<NodeDocument> prevDocs =
ImmutableList.copyOf(doc.getAllPreviousDocs());
+ assertEquals(1, prevDocs.size());
+ assertEquals(SplitDocType.PROP_COMMIT_ONLY,
prevDocs.get(0).getSplitDocType());
+ }
+
+ @Test
public void cascadingSplit() {
cascadingSplit("/test/node");
}
Modified:
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorTest.java?rev=1582834&r1=1582833&r2=1582834&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorTest.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorTest.java
Fri Mar 28 17:11:08 2014
@@ -24,12 +24,17 @@ import java.util.*;
import java.util.Collection;
import java.util.concurrent.TimeUnit;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.*;
+import static
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
import static
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
+import org.apache.jackrabbit.oak.plugins.document.util.Utils;
import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
@@ -117,7 +122,7 @@ public class VersionGarbageCollectorTest
//1. Go past GC age and check no GC done as nothing deleted
clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
VersionGCStats stats = gc.gc();
- assertEquals(0, stats.deletedDocCount);
+ assertEquals(0, stats.deletedDocGCCount);
//Remove x/y
NodeBuilder b2 = store.getRoot().builder();
@@ -132,13 +137,13 @@ public class VersionGarbageCollectorTest
gc.setMaxRevisionAge(maxAge*2);
clock.waitUntil(clock.getTime() + delta);
stats = gc.gc();
- assertEquals(0, stats.deletedDocCount);
+ assertEquals(0, stats.deletedDocGCCount);
//3. Check that deleted doc does get collected post maxAge
clock.waitUntil(clock.getTime() + gc.getMaxRevisionAge() + delta);
stats = gc.gc();
- assertEquals(1, stats.deletedDocCount);
+ assertEquals(1, stats.deletedDocGCCount);
//4. Check that a revived doc (deleted and created again) does not get
gc
NodeBuilder b3 = store.getRoot().builder();
@@ -151,9 +156,60 @@ public class VersionGarbageCollectorTest
clock.waitUntil(clock.getTime() + gc.getMaxRevisionAge() + delta);
stats = gc.gc();
- assertEquals(0, stats.deletedDocCount);
+ assertEquals(0, stats.deletedDocGCCount);
- //TODO Add test scenario for deletion along with previous docs
+ }
+
+ @Test
+ public void gcSplitDocs() throws Exception{
+ long maxAge = TimeUnit.HOURS.toMillis(1), delta =
TimeUnit.MINUTES.toMillis(10);
+ gc.setMaxRevisionAge(maxAge);
+
+ NodeBuilder b1 = store.getRoot().builder();
+ b1.child("test").child("foo").child("bar");
+ b1.child("test2").child("foo");
+ store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+ //Commit on a node which has a child and where the commit root
+ // is parent
+ for (int i = 0; i < NodeDocument.NUM_REVS_THRESHOLD; i++) {
+ b1 = store.getRoot().builder();
+ //This updates a middle node i.e. one which has child bar
+ //Should result in SplitDoc of type PROP_COMMIT_ONLY
+ b1.child("test").child("foo").setProperty("prop",i);
+
+ //This should result in SplitDoc of type DEFAULT_NO_CHILD
+ b1.child("test2").child("foo").setProperty("prop", i);
+ store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+ }
+ store.runBackgroundOperations();
+
+ List<NodeDocument> previousDocTestFoo =
+ ImmutableList.copyOf(getDoc("/test/foo").getAllPreviousDocs());
+ List<NodeDocument> previousDocTestFoo2 =
+
ImmutableList.copyOf(getDoc("/test2/foo").getAllPreviousDocs());
+
+ assertEquals(1, previousDocTestFoo.size());
+ assertEquals(1, previousDocTestFoo2.size());
+
+ assertEquals(SplitDocType.PROP_COMMIT_ONLY,
previousDocTestFoo.get(0).getSplitDocType());
+ assertEquals(SplitDocType.DEFAULT_NO_CHILD,
previousDocTestFoo2.get(0).getSplitDocType());
+
+ clock.waitUntil(clock.getTime() + gc.getMaxRevisionAge() + delta);
+ VersionGCStats stats = gc.gc();
+ assertEquals(2, stats.splitDocGCCount);
+
+ //Previous doc should be removed
+ assertNull(getDoc(previousDocTestFoo.get(0).getPath()));
+ assertNull(getDoc(previousDocTestFoo2.get(0).getPath()));
+
+ //Following would not work for Mongo as the delete happened on the
server side
+ //And entries from cache are not evicted
+
//assertTrue(ImmutableList.copyOf(getDoc("/test2/foo").getAllPreviousDocs()).isEmpty());
+ }
+
+ private NodeDocument getDoc(String path){
+ return store.getDocumentStore().find(NODES, Utils.getIdFromPath(path),
0);
}
}
\ No newline at end of file