Author: chetanm
Date: Fri Mar 28 17:11:08 2014
New Revision: 1582834

URL: http://svn.apache.org/r1582834
Log:
OAK-1341 - DocumentNodeStore: Implement revision garbage collection (WIP)

GC of split documents

Modified:
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
    
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
    
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorTest.java

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java?rev=1582834&r1=1582833&r2=1582834&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
 Fri Mar 28 17:11:08 2014
@@ -253,6 +253,10 @@ public final class NodeDocument extends 
             this.type = type;
         }
 
+        public int typeCode() {
+            return type;
+        }
+
         static SplitDocType valueOf(Integer type){
             if(type == null){
                 return NONE;
@@ -364,6 +368,19 @@ public final class NodeDocument extends 
     }
 
     /**
+     * Checks if revision time of all entries in this document is less than 
the passed
+     * time
+     *
+     * @param maxRevisionTime timemstamp (in millis) of revision to check
+     * @return <tt>true</tt> if timestamp of maximum revision stored in this 
document
+     * is less than than the passed revision timestamp
+     */
+    public boolean hasAllRevisionLessThan(long maxRevisionTime){
+        Long maxRevTimeStamp = (Long) get(SD_MAX_REV_TIME_IN_SECS);
+        return maxRevTimeStamp != null && maxRevTimeStamp < 
TimeUnit.MILLISECONDS.toSeconds(maxRevisionTime);
+    }
+
+    /**
      * Determines if this document is a split document
      *
      * @return <tt>true</tt> if this document is a split document

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java?rev=1582834&r1=1582833&r2=1582834&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
 Fri Mar 28 17:11:08 2014
@@ -20,9 +20,13 @@
 package org.apache.jackrabbit.oak.plugins.document;
 
 import java.util.List;
+import java.util.Set;
 
 import com.google.common.base.Predicate;
 import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
 
 public class VersionGCSupport {
     private final DocumentStore store;
@@ -32,14 +36,29 @@ public class VersionGCSupport {
     }
 
     public Iterable<NodeDocument> getPossiblyDeletedDocs(final long 
lastModifiedTime) {
-        //Fetch all documents.
-        List<NodeDocument> nodes = 
store.query(Collection.NODES,NodeDocument.MIN_ID_VALUE,
-                NodeDocument.MAX_ID_VALUE, Integer.MAX_VALUE);
-        return Iterables.filter(nodes, new Predicate<NodeDocument>() {
+        return Iterables.filter(getAllDocuments(), new 
Predicate<NodeDocument>() {
             @Override
             public boolean apply(NodeDocument input) {
                 return input.wasDeletedOnce() && 
!input.hasBeenModifiedSince(lastModifiedTime);
             }
         });
     }
+
+    public int deleteSplitDocuments(Set<SplitDocType> gcTypes, long 
oldestRevTimeStamp) {
+        List<String> docsToDelete = Lists.newArrayList();
+        for(NodeDocument doc : getAllDocuments()){
+            SplitDocType splitType = doc.getSplitDocType();
+            if(gcTypes.contains(splitType) && 
doc.hasAllRevisionLessThan(oldestRevTimeStamp)){
+                docsToDelete.add(doc.getId());
+            }
+        }
+        store.remove(Collection.NODES, docsToDelete);
+        return docsToDelete.size();
+    }
+
+    private List<NodeDocument> getAllDocuments() {
+        //Fetch all documents.
+        return store.query(Collection.NODES,NodeDocument.MIN_ID_VALUE,
+                NodeDocument.MAX_ID_VALUE, Integer.MAX_VALUE);
+    }
 }

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1582834&r1=1582833&r2=1582834&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
 Fri Mar 28 17:11:08 2014
@@ -22,7 +22,9 @@ package org.apache.jackrabbit.oak.plugin
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.EnumSet;
 import java.util.List;
+import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
 import com.google.common.collect.ImmutableList;
@@ -37,6 +39,14 @@ class VersionGarbageCollector {
 
     private final Logger log = LoggerFactory.getLogger(getClass());
 
+    /**
+     * Split document types which can be safely Garbage Collected
+     */
+    private static final Set<NodeDocument.SplitDocType> GC_TYPES = EnumSet.of(
+            NodeDocument.SplitDocType.DEFAULT_NO_CHILD,
+            NodeDocument.SplitDocType.PROP_COMMIT_ONLY,
+            NodeDocument.SplitDocType.INTERMEDIATE);
+
     private volatile long maxRevisionAge = TimeUnit.DAYS.toMillis(1);
 
     VersionGarbageCollector(DocumentNodeStore nodeStore) {
@@ -67,10 +77,16 @@ class VersionGarbageCollector {
         }
 
         collectDeletedDocuments(stats, headRevision, oldestRevTimeStamp);
+        collectSplitDocuments(stats, oldestRevTimeStamp);
 
         return stats;
     }
 
+    private void collectSplitDocuments(VersionGCStats stats, long 
oldestRevTimeStamp) {
+        int count = versionStore.deleteSplitDocuments(GC_TYPES, 
oldestRevTimeStamp);
+        stats.splitDocGCCount += count;
+    }
+
     private void collectDeletedDocuments(VersionGCStats stats, Revision 
headRevision, long oldestRevTimeStamp) {
         List<String> docIdsToDelete = new ArrayList<String>();
         Iterable<NodeDocument> itr = 
versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
@@ -92,7 +108,7 @@ class VersionGarbageCollector {
             close(itr);
         }
         nodeStore.getDocumentStore().remove(Collection.NODES, docIdsToDelete);
-        stats.deletedDocCount += docIdsToDelete.size();
+        stats.deletedDocGCCount += docIdsToDelete.size();
     }
 
     public void setMaxRevisionAge(long maxRevisionAge) {
@@ -105,7 +121,8 @@ class VersionGarbageCollector {
 
     public static class VersionGCStats {
         boolean ignoredGCDueToCheckPoint;
-        int deletedDocCount;
+        int deletedDocGCCount;
+        int splitDocGCCount;
     }
 
     private void close(Object obj){

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java?rev=1582834&r1=1582833&r2=1582834&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
 Fri Mar 28 17:11:08 2014
@@ -19,19 +19,27 @@
 
 package org.apache.jackrabbit.oak.plugins.document.mongo;
 
+import java.util.Set;
+
 import com.google.common.base.Function;
 import com.mongodb.DBCollection;
 import com.mongodb.DBCursor;
 import com.mongodb.DBObject;
 import com.mongodb.QueryBuilder;
 import com.mongodb.ReadPreference;
+import com.mongodb.WriteConcern;
+import com.mongodb.WriteResult;
 import org.apache.jackrabbit.oak.plugins.document.Collection;
 import org.apache.jackrabbit.oak.plugins.document.Commit;
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
 import org.apache.jackrabbit.oak.plugins.document.VersionGCSupport;
 import org.apache.jackrabbit.oak.plugins.document.util.CloseableIterable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import static com.google.common.collect.Iterables.transform;
+import static com.mongodb.QueryBuilder.start;
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
 
 /**
  * Mongo specific version of VersionGCSupport which uses mongo queries
@@ -41,6 +49,7 @@ import static com.google.common.collect.
  * documents. In such case read from secondaries are preferred</p>
  */
 public class MongoVersionGCSupport extends VersionGCSupport {
+    private final Logger log = LoggerFactory.getLogger(getClass());
     private final MongoDocumentStore store;
 
     public MongoVersionGCSupport(MongoDocumentStore store) {
@@ -51,8 +60,8 @@ public class MongoVersionGCSupport exten
     @Override
     public CloseableIterable<NodeDocument> getPossiblyDeletedDocs(final long 
lastModifiedTime) {
         //_deletedOnce == true && _modified < lastModifiedTime
-        DBObject query = QueryBuilder
-                                
.start(NodeDocument.DELETED_ONCE).is(Boolean.TRUE)
+        DBObject query =
+                start(NodeDocument.DELETED_ONCE).is(Boolean.TRUE)
                                 
.put(NodeDocument.MODIFIED_IN_SECS).lessThan(Commit.getModifiedInSecs(lastModifiedTime))
                         .get();
         DBCursor cursor = 
getNodeCollection().find(query).setReadPreference(ReadPreference.secondaryPreferred());
@@ -64,6 +73,28 @@ public class MongoVersionGCSupport exten
         }), cursor);
     }
 
+    @Override
+    public int deleteSplitDocuments(Set<SplitDocType> gcTypes, long 
oldestRevTimeStamp) {
+        //OR condition has to be first as we have a index for that
+        QueryBuilder orClause = start();
+        for(SplitDocType type : gcTypes){
+            orClause.or(start(NodeDocument.SD_TYPE).is(type.typeCode()).get());
+        }
+        DBObject query = start()
+                .and(
+                    orClause.get(),
+                    start(NodeDocument.SD_MAX_REV_TIME_IN_SECS)
+                        .lessThan(Commit.getModifiedInSecs(oldestRevTimeStamp))
+                        .get()
+                ).get();
+        WriteResult writeResult = getNodeCollection().remove(query, 
WriteConcern.SAFE);
+        if (writeResult.getError() != null) {
+            //TODO This might be temporary error or we fail fast and let next 
cycle try again
+            log.warn("Error occurred while deleting old split documents from 
Mongo {}", writeResult.getError());
+        }
+        return writeResult.getN();
+    }
+
     private DBCollection getNodeCollection(){
         return store.getDBCollection(Collection.NODES);
     }

Modified: 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java?rev=1582834&r1=1582833&r2=1582834&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
 Fri Mar 28 17:11:08 2014
@@ -31,6 +31,9 @@ import org.apache.jackrabbit.oak.commons
 import org.apache.jackrabbit.oak.spi.blob.MemoryBlobStore;
 import org.apache.jackrabbit.oak.plugins.document.memory.MemoryDocumentStore;
 import org.apache.jackrabbit.oak.plugins.document.util.Utils;
+import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
+import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.junit.Test;
 
 import com.google.common.collect.Iterables;
@@ -325,6 +328,29 @@ public class DocumentSplitTest extends B
     }
 
     @Test
+    public void testSplitPropAndCommitOnly() throws Exception{
+        DocumentStore store = mk.getDocumentStore();
+        DocumentNodeStore ns = mk.getNodeStore();
+        NodeBuilder b1 = ns.getRoot().builder();
+        b1.child("test").child("foo").child("bar");
+        ns.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        //Commit on a node which has a child and where the commit root
+        // is parent
+        for (int i = 0; i < NodeDocument.NUM_REVS_THRESHOLD; i++) {
+            b1 = ns.getRoot().builder();
+            b1.child("test").child("foo").setProperty("prop",i);
+            b1.child("test").setProperty("prop",i);
+            ns.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        }
+        ns.runBackgroundOperations();
+        NodeDocument doc = store.find(NODES, Utils.getIdFromPath("/test/foo"));
+        List<NodeDocument> prevDocs = 
ImmutableList.copyOf(doc.getAllPreviousDocs());
+        assertEquals(1, prevDocs.size());
+        assertEquals(SplitDocType.PROP_COMMIT_ONLY, 
prevDocs.get(0).getSplitDocType());
+    }
+
+    @Test
     public void cascadingSplit() {
         cascadingSplit("/test/node");
     }

Modified: 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorTest.java?rev=1582834&r1=1582833&r2=1582834&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorTest.java
 Fri Mar 28 17:11:08 2014
@@ -24,12 +24,17 @@ import java.util.*;
 import java.util.Collection;
 import java.util.concurrent.TimeUnit;
 
+import static org.apache.jackrabbit.oak.plugins.document.Collection.*;
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
 import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
+import org.apache.jackrabbit.oak.plugins.document.util.Utils;
 import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
 import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
@@ -117,7 +122,7 @@ public class VersionGarbageCollectorTest
         //1. Go past GC age and check no GC done as nothing deleted
         clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
         VersionGCStats stats = gc.gc();
-        assertEquals(0, stats.deletedDocCount);
+        assertEquals(0, stats.deletedDocGCCount);
 
         //Remove x/y
         NodeBuilder b2 = store.getRoot().builder();
@@ -132,13 +137,13 @@ public class VersionGarbageCollectorTest
         gc.setMaxRevisionAge(maxAge*2);
         clock.waitUntil(clock.getTime() + delta);
         stats = gc.gc();
-        assertEquals(0, stats.deletedDocCount);
+        assertEquals(0, stats.deletedDocGCCount);
 
         //3. Check that deleted doc does get collected post maxAge
         clock.waitUntil(clock.getTime() + gc.getMaxRevisionAge() + delta);
 
         stats = gc.gc();
-        assertEquals(1, stats.deletedDocCount);
+        assertEquals(1, stats.deletedDocGCCount);
 
         //4. Check that a revived doc (deleted and created again) does not get 
gc
         NodeBuilder b3 = store.getRoot().builder();
@@ -151,9 +156,60 @@ public class VersionGarbageCollectorTest
 
         clock.waitUntil(clock.getTime() + gc.getMaxRevisionAge() + delta);
         stats = gc.gc();
-        assertEquals(0, stats.deletedDocCount);
+        assertEquals(0, stats.deletedDocGCCount);
 
-        //TODO Add test scenario for deletion along with previous docs
+    }
+
+    @Test
+    public void gcSplitDocs() throws Exception{
+        long maxAge = TimeUnit.HOURS.toMillis(1), delta = 
TimeUnit.MINUTES.toMillis(10);
+        gc.setMaxRevisionAge(maxAge);
+
+        NodeBuilder b1 = store.getRoot().builder();
+        b1.child("test").child("foo").child("bar");
+        b1.child("test2").child("foo");
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        //Commit on a node which has a child and where the commit root
+        // is parent
+        for (int i = 0; i < NodeDocument.NUM_REVS_THRESHOLD; i++) {
+            b1 = store.getRoot().builder();
+            //This updates a middle node i.e. one which has child bar
+            //Should result in SplitDoc of type PROP_COMMIT_ONLY
+            b1.child("test").child("foo").setProperty("prop",i);
+
+            //This should result in SplitDoc of type DEFAULT_NO_CHILD
+            b1.child("test2").child("foo").setProperty("prop", i);
+            store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        }
+        store.runBackgroundOperations();
+
+        List<NodeDocument> previousDocTestFoo =
+                ImmutableList.copyOf(getDoc("/test/foo").getAllPreviousDocs());
+        List<NodeDocument> previousDocTestFoo2 =
+                
ImmutableList.copyOf(getDoc("/test2/foo").getAllPreviousDocs());
+
+        assertEquals(1, previousDocTestFoo.size());
+        assertEquals(1, previousDocTestFoo2.size());
+
+        assertEquals(SplitDocType.PROP_COMMIT_ONLY, 
previousDocTestFoo.get(0).getSplitDocType());
+        assertEquals(SplitDocType.DEFAULT_NO_CHILD, 
previousDocTestFoo2.get(0).getSplitDocType());
+
+        clock.waitUntil(clock.getTime() + gc.getMaxRevisionAge() + delta);
+        VersionGCStats stats = gc.gc();
+        assertEquals(2, stats.splitDocGCCount);
+
+        //Previous doc should be removed
+        assertNull(getDoc(previousDocTestFoo.get(0).getPath()));
+        assertNull(getDoc(previousDocTestFoo2.get(0).getPath()));
+
+        //Following would not work for Mongo as the delete happened on the 
server side
+        //And entries from cache are not evicted
+        
//assertTrue(ImmutableList.copyOf(getDoc("/test2/foo").getAllPreviousDocs()).isEmpty());
+    }
+
+    private NodeDocument getDoc(String path){
+        return store.getDocumentStore().find(NODES, Utils.getIdFromPath(path), 
0);
     }
 
 }
\ No newline at end of file


Reply via email to