Author: chetanm
Date: Fri Mar 28 09:46:53 2014
New Revision: 1582659

URL: http://svn.apache.org/r1582659
Log:
OAK-1341 - DocumentNodeStore: Implement revision garbage collection (WIP)

GC of split document
-- Mark split docs depending on types
-- Added new prop and created sparse index on _sdType

Modified:
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
    
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java?rev=1582659&r1=1582658&r2=1582659&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
 Fri Mar 28 09:46:53 2014
@@ -20,9 +20,11 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.NavigableMap;
+import java.util.Queue;
 import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
@@ -34,7 +36,10 @@ import javax.annotation.Nullable;
 
 import com.google.common.base.Function;
 import com.google.common.base.Predicate;
+import com.google.common.collect.AbstractIterator;
 import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Queues;
 import org.apache.jackrabbit.oak.cache.CacheValue;
 import org.apache.jackrabbit.oak.commons.PathUtils;
 import org.apache.jackrabbit.oak.plugins.document.memory.MemoryDocumentStore;
@@ -194,6 +199,73 @@ public final class NodeDocument extends 
 
     public static final String HAS_BINARY_FLAG = "_bin";
 
+    //~----------------------------< Split Document Types >
+
+    /**
+     * Defines the type of split document. Its value is an integer whose value 
is
+     * defined as per
+     *
+     * @see 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType
+     */
+    public static final String SD_TYPE = "_sdType";
+
+    /**
+     * Property name which refers to timestamp (long) of the latest revision 
kept
+     * in the document
+     */
+    public static final String SD_MAX_REV_TS = "_sdMaxRevTs";
+
+    /**
+     * A document which is created from splitting a main document can be 
classified
+     * into multiple types depending on the content i.e. weather it contains
+     * REVISIONS, COMMIT_ROOT, property history etc
+     */
+    public static enum SplitDocType {
+        /**
+         * Not a split document
+         */
+        NONE(-1),
+        /**
+         * A split document which contains all types of data
+         */
+        DEFAULT(10),
+        /**
+         * A split document which contains all types of data. In addition
+         * when the split document was created the main document did not had
+         * any child.
+         */
+        DEFAULT_NO_CHILD(20),
+        /**
+         * A split document which does not contain REVISIONS history
+         */
+        PROP_COMMIT_ONLY(30),
+        /**
+         * Its an intermediate split document which only contains version 
ranges
+         * and does not contain any other attributes
+         */
+        INTERMEDIATE(40)
+        ;
+
+        final int type;
+
+        private SplitDocType(int type){
+            this.type = type;
+        }
+
+        static SplitDocType valueOf(Integer type){
+            if(type == null){
+                return NONE;
+            }
+            for(SplitDocType docType : values()){
+                if(docType.type == type){
+                    return docType;
+                }
+            }
+            throw new IllegalArgumentException("Not a valid SplitDocType :" + 
type);
+        }
+    }
+
+
     /**
      * Properties to ignore when a document is split.
      */
@@ -260,7 +332,8 @@ public final class NodeDocument extends 
     }
 
     /**
-     * Returns <tt>true</tt> if this node possibly has children
+     * Returns <tt>true</tt> if this node possibly has children.
+     * If false then that indicates that there are no child
      *
      * @return <tt>true</tt> if this node has children
      */
@@ -290,6 +363,24 @@ public final class NodeDocument extends 
     }
 
     /**
+     * Determines if this document is a split document
+     *
+     * @return <tt>true</tt> if this document is a split document
+     */
+    public boolean isSplitDocument(){
+        return getSplitDocType() != SplitDocType.NONE;
+    }
+
+    /**
+     * Determines the type of split document
+     *
+     * @return type of Split Document
+     */
+    public SplitDocType getSplitDocType(){
+        return SplitDocType.valueOf((Integer) get(SD_TYPE));
+    }
+
+    /**
      * Mark this instance as up-to-date (matches the state in persistence
      * store).
      *
@@ -835,6 +926,7 @@ public final class NodeDocument extends 
             // check size of old document
             NodeDocument oldDoc = new NodeDocument(store);
             UpdateUtils.applyChanges(oldDoc, old, 
context.getRevisionComparator());
+            setSplitDocProps(this, oldDoc, old, high);
             // only split if half of the data can be moved to old document
             if (oldDoc.getMemory() > getMemory() * SPLIT_RATIO) {
                 splitOps.add(old);
@@ -873,6 +965,7 @@ public final class NodeDocument extends 
                 for (Range r : entry.getValue()) {
                     setPrevious(intermediate, r);
                 }
+                setIntermediateDocProps(intermediate, h);
                 splitOps.add(intermediate);
             }
         }
@@ -953,17 +1046,7 @@ public final class NodeDocument extends 
                 @Override
                 public NodeDocument apply(Map.Entry<Revision, Range> input) {
                     if (input.getValue().includes(revision)) {
-                        Revision r = input.getKey();
-                        int h = input.getValue().height;
-                        String prevId = Utils.getPreviousIdFor(mainPath, r, h);
-                        //TODO Use the maxAge variant such that in case of 
Mongo call for
-                        //previous doc are directed towards replicas first
-                        NodeDocument prev = store.find(Collection.NODES, 
prevId);
-                        if (prev != null) {
-                            return prev;
-                        } else {
-                            LOG.warn("Document with previous revisions not 
found: " + prevId);
-                        }
+                       return getPreviousDoc(input.getKey(), input.getValue());
                     }
                     return null;
                 }
@@ -977,35 +1060,43 @@ public final class NodeDocument extends 
     }
 
     @Nonnull
-    Iterable<NodeDocument> getAllPreviousDocs() {
+    Iterator<NodeDocument> getAllPreviousDocs() {
         if (getPreviousRanges().isEmpty()) {
-            return Collections.emptyList();
+            return Iterators.emptyIterator();
         }
-        final String mainPath = getMainPath();
-        return filter(transform(getPreviousRanges().entrySet(),
-                new Function<Map.Entry<Revision, Range>, NodeDocument>() {
-                    @Override
-                    public NodeDocument apply(Map.Entry<Revision, Range> 
input) {
-                        Revision r = input.getKey();
-                        int h = input.getValue().height;
-                        String prevId = Utils.getPreviousIdFor(mainPath, r, h);
-                        //TODO Use the maxAge variant such that in case of 
Mongo call for
-                        //previous doc are directed towards replicas first
-                        NodeDocument prev = store.find(Collection.NODES, 
prevId);
-                        if (prev != null) {
-                            return prev;
-                        } else {
-                            LOG.warn("Document with previous revisions not 
found: " + prevId);
-                        }
-                        return null;
+        //Currently this method would fire one query per previous doc
+        //If that poses a problem we can try to find all prev doc by relying
+        //on property that all prevDoc id would starts <depth+2>:/path/to/node
+        return new AbstractIterator<NodeDocument>(){
+            private Queue<Map.Entry<Revision, Range>> previousRanges =
+                    Queues.newArrayDeque(getPreviousRanges().entrySet());
+            @Override
+            protected NodeDocument computeNext() {
+                if(!previousRanges.isEmpty()){
+                    Map.Entry<Revision, Range> e = previousRanges.remove();
+                    NodeDocument prev = getPreviousDoc(e.getKey(), 
e.getValue());
+                    if(prev != null){
+                        
previousRanges.addAll(prev.getPreviousRanges().entrySet());
+                        return prev;
                     }
                 }
-        ), new Predicate<NodeDocument>() {
-            @Override
-            public boolean apply(@Nullable NodeDocument input) {
-                return input != null;
+                return endOfData();
             }
-        });
+        };
+    }
+
+    private NodeDocument getPreviousDoc(Revision rev, Range range){
+        int h = range.height;
+        String prevId = Utils.getPreviousIdFor(getMainPath(), rev, h);
+        //TODO Use the maxAge variant such that in case of Mongo call for
+        //previous doc are directed towards replicas first
+        NodeDocument prev = store.find(Collection.NODES, prevId);
+        if (prev != null) {
+            return prev;
+        } else {
+            LOG.warn("Document with previous revisions not found: " + prevId);
+        }
+        return null;
     }
 
     /**
@@ -1141,6 +1232,18 @@ public final class NodeDocument extends 
         checkNotNull(op).set(HAS_BINARY_FLAG, HAS_BINARY_VAL);
     }
 
+    //----------------------------< internal modifiers 
>------------------------
+
+    private static void setSplitDocType(@Nonnull UpdateOp op,
+                                        @Nonnull SplitDocType type) {
+        checkNotNull(op).set(SD_TYPE, type.type);
+    }
+
+    private static void setSplitDocMaxRev(@Nonnull UpdateOp op,
+                                          @Nonnull Revision maxRev) {
+        checkNotNull(op).set(SD_MAX_REV_TS, maxRev.getTimestamp());
+    }
+
     //----------------------------< internal 
>----------------------------------
 
     /**
@@ -1191,6 +1294,39 @@ public final class NodeDocument extends 
     }
 
     /**
+     * Set various split document related flag/properties
+     *
+     * @param mainDoc main document from which split document is being created
+     * @param old updateOp of the old document created via split
+     * @param oldDoc old document created via split
+     * @param maxRev max revision stored in the split document oldDoc
+     */
+    private static void setSplitDocProps(NodeDocument mainDoc, NodeDocument 
oldDoc,
+                                         UpdateOp old, Revision maxRev) {
+        setSplitDocMaxRev(old, maxRev);
+
+        SplitDocType type = SplitDocType.DEFAULT;
+        if(!mainDoc.hasChildren()){
+            type = SplitDocType.DEFAULT_NO_CHILD;
+        } else if (oldDoc.getLocalRevisions().isEmpty()){
+            type = SplitDocType.PROP_COMMIT_ONLY;
+        }
+
+        setSplitDocType(old,type);
+    }
+
+    /**
+     * Set various properties for intermediate split document
+     *
+     * @param intermediate updateOp of the intermediate doc getting created
+     * @param maxRev max revision stored in the intermediate
+     */
+    private static void setIntermediateDocProps(UpdateOp intermediate, 
Revision maxRev) {
+        setSplitDocMaxRev(intermediate, maxRev);
+        setSplitDocType(intermediate,SplitDocType.INTERMEDIATE);
+    }
+
+    /**
      * Checks that revision x is newer than another revision.
      *
      * @param x the revision to check

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1582659&r1=1582658&r2=1582659&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
 Fri Mar 28 09:46:53 2014
@@ -25,6 +25,7 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
 
+import com.google.common.collect.ImmutableList;
 import org.apache.jackrabbit.oak.plugins.document.mongo.MongoDocumentStore;
 import org.apache.jackrabbit.oak.plugins.document.mongo.MongoVersionGCSupport;
 import org.slf4j.Logger;
@@ -82,7 +83,7 @@ class VersionGarbageCollector {
                 if (doc.getNodeAtRevision(nodeStore, headRevision, null) == 
null) {
                     docIdsToDelete.add(doc.getId());
                     //Collect id of all previous docs also
-                    for (NodeDocument prevDoc : doc.getAllPreviousDocs()) {
+                    for (NodeDocument prevDoc : 
ImmutableList.copyOf(doc.getAllPreviousDocs())) {
                         docIdsToDelete.add(prevDoc.getId());
                     }
                 }

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java?rev=1582659&r1=1582658&r2=1582659&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
 Fri Mar 28 09:46:53 2014
@@ -133,7 +133,7 @@ public class MongoDocumentStore implemen
 
         // index on the _bin flag to faster access nodes with binaries for GC
         index = new BasicDBObject();
-        index.put(NodeDocument.HAS_BINARY_FLAG, Integer.valueOf(1));
+        index.put(NodeDocument.HAS_BINARY_FLAG, 1);
         options = new BasicDBObject();
         options.put("unique", Boolean.FALSE);
         options.put("sparse", Boolean.TRUE);
@@ -146,6 +146,13 @@ public class MongoDocumentStore implemen
         options.put("sparse", Boolean.TRUE);
         this.nodes.ensureIndex(index, options);
 
+        index = new BasicDBObject();
+        index.put(NodeDocument.SD_TYPE, 1);
+        options = new BasicDBObject();
+        options.put("unique", Boolean.FALSE);
+        options.put("sparse", Boolean.TRUE);
+        this.nodes.ensureIndex(index, options);
+
 
         // TODO expire entries if the parent was changed
         if (builder.useOffHeapCache()) {

Modified: 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java?rev=1582659&r1=1582658&r2=1582659&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
 Fri Mar 28 09:46:53 2014
@@ -24,6 +24,9 @@ import java.util.Map;
 import java.util.Random;
 import java.util.Set;
 
+import com.google.common.base.Predicate;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterators;
 import org.apache.jackrabbit.oak.commons.PathUtils;
 import org.apache.jackrabbit.oak.spi.blob.MemoryBlobStore;
 import org.apache.jackrabbit.oak.plugins.document.memory.MemoryDocumentStore;
@@ -35,6 +38,7 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
@@ -72,6 +76,10 @@ public class DocumentSplitTest extends B
         }
         // check if document is still there
         assertNotNull(ns.getNode("/", Revision.fromString(head)));
+
+        NodeDocument prevDoc = 
Iterators.getOnlyElement(doc.getAllPreviousDocs());
+        assertEquals(SplitDocType.DEFAULT, prevDoc.getSplitDocType());
+
         mk.commit("/", "+\"baz\":{}", null, null);
         ns.setAsyncDelay(0);
         mk.backgroundWrite();
@@ -143,7 +151,6 @@ public class DocumentSplitTest extends B
     @Test
     public void splitPropertyRevisions() throws Exception {
         DocumentStore store = mk.getDocumentStore();
-        DocumentNodeStore ns = mk.getNodeStore();
         mk.commit("/", "+\"foo\":{}", null, null);
         NodeDocument doc = store.find(NODES, Utils.getIdFromPath("/foo"));
         assertNotNull(doc);
@@ -302,6 +309,22 @@ public class DocumentSplitTest extends B
     }
 
     @Test
+    public void testSplitDocNoChild() throws Exception{
+        DocumentStore store = mk.getDocumentStore();
+        DocumentNodeStore ns = mk.getNodeStore();
+        mk.commit("/", "+\"test\":{\"node\":{}}", null, null);
+        mk.commit("/test", "+\"foo\":{}+\"bar\":{}", null, null);
+        for (int i = 0; i < NodeDocument.NUM_REVS_THRESHOLD; i++) {
+            mk.commit("/test/foo", "^\"prop\":" + i, null, null);
+        }
+        ns.runBackgroundOperations();
+        NodeDocument doc = store.find(NODES, Utils.getIdFromPath("/test/foo"));
+        List<NodeDocument> prevDocs = 
ImmutableList.copyOf(doc.getAllPreviousDocs());
+        assertEquals(1, prevDocs.size());
+        assertEquals(SplitDocType.DEFAULT_NO_CHILD, 
prevDocs.get(0).getSplitDocType());
+    }
+
+    @Test
     public void cascadingSplit() {
         cascadingSplit("/test/node");
     }
@@ -344,6 +367,17 @@ public class DocumentSplitTest extends B
         NodeDocument doc = store.find(NODES, Utils.getIdFromPath(path));
         assertNotNull(doc);
         assertEquals(2, doc.getPreviousRanges().size());
+
+        List<NodeDocument> prevDocs = 
ImmutableList.copyOf(doc.getAllPreviousDocs());
+        //1 intermediate and 11 previous doc
+        assertEquals(1 + 11, prevDocs.size());
+        assertTrue(Iterables.any(prevDocs, new Predicate<NodeDocument>() {
+            @Override
+            public boolean apply(NodeDocument input) {
+                return input.getSplitDocType() == SplitDocType.INTERMEDIATE;
+            }
+        }));
+
         for (String s : revs) {
             Revision r = Revision.fromString(s);
             if (doc.getLocalRevisions().containsKey(r)) {


Reply via email to