Author: chetanm
Date: Fri Mar 28 09:46:53 2014
New Revision: 1582659
URL: http://svn.apache.org/r1582659
Log:
OAK-1341 - DocumentNodeStore: Implement revision garbage collection (WIP)
GC of split document
-- Mark split docs depending on types
-- Added new prop and created sparse index on _sdType
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java?rev=1582659&r1=1582658&r2=1582659&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
Fri Mar 28 09:46:53 2014
@@ -20,9 +20,11 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
+import java.util.Queue;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
@@ -34,7 +36,10 @@ import javax.annotation.Nullable;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
+import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Queues;
import org.apache.jackrabbit.oak.cache.CacheValue;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.plugins.document.memory.MemoryDocumentStore;
@@ -194,6 +199,73 @@ public final class NodeDocument extends
public static final String HAS_BINARY_FLAG = "_bin";
+ //~----------------------------< Split Document Types >
+
+ /**
+ * Defines the type of split document. Its value is an integer whose value
is
+ * defined as per
+ *
+ * @see
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType
+ */
+ public static final String SD_TYPE = "_sdType";
+
+ /**
+ * Property name which refers to timestamp (long) of the latest revision
kept
+ * in the document
+ */
+ public static final String SD_MAX_REV_TS = "_sdMaxRevTs";
+
+ /**
+ * A document which is created from splitting a main document can be
classified
+ * into multiple types depending on the content i.e. weather it contains
+ * REVISIONS, COMMIT_ROOT, property history etc
+ */
+ public static enum SplitDocType {
+ /**
+ * Not a split document
+ */
+ NONE(-1),
+ /**
+ * A split document which contains all types of data
+ */
+ DEFAULT(10),
+ /**
+ * A split document which contains all types of data. In addition
+ * when the split document was created the main document did not had
+ * any child.
+ */
+ DEFAULT_NO_CHILD(20),
+ /**
+ * A split document which does not contain REVISIONS history
+ */
+ PROP_COMMIT_ONLY(30),
+ /**
+ * Its an intermediate split document which only contains version
ranges
+ * and does not contain any other attributes
+ */
+ INTERMEDIATE(40)
+ ;
+
+ final int type;
+
+ private SplitDocType(int type){
+ this.type = type;
+ }
+
+ static SplitDocType valueOf(Integer type){
+ if(type == null){
+ return NONE;
+ }
+ for(SplitDocType docType : values()){
+ if(docType.type == type){
+ return docType;
+ }
+ }
+ throw new IllegalArgumentException("Not a valid SplitDocType :" +
type);
+ }
+ }
+
+
/**
* Properties to ignore when a document is split.
*/
@@ -260,7 +332,8 @@ public final class NodeDocument extends
}
/**
- * Returns <tt>true</tt> if this node possibly has children
+ * Returns <tt>true</tt> if this node possibly has children.
+ * If false then that indicates that there are no child
*
* @return <tt>true</tt> if this node has children
*/
@@ -290,6 +363,24 @@ public final class NodeDocument extends
}
/**
+ * Determines if this document is a split document
+ *
+ * @return <tt>true</tt> if this document is a split document
+ */
+ public boolean isSplitDocument(){
+ return getSplitDocType() != SplitDocType.NONE;
+ }
+
+ /**
+ * Determines the type of split document
+ *
+ * @return type of Split Document
+ */
+ public SplitDocType getSplitDocType(){
+ return SplitDocType.valueOf((Integer) get(SD_TYPE));
+ }
+
+ /**
* Mark this instance as up-to-date (matches the state in persistence
* store).
*
@@ -835,6 +926,7 @@ public final class NodeDocument extends
// check size of old document
NodeDocument oldDoc = new NodeDocument(store);
UpdateUtils.applyChanges(oldDoc, old,
context.getRevisionComparator());
+ setSplitDocProps(this, oldDoc, old, high);
// only split if half of the data can be moved to old document
if (oldDoc.getMemory() > getMemory() * SPLIT_RATIO) {
splitOps.add(old);
@@ -873,6 +965,7 @@ public final class NodeDocument extends
for (Range r : entry.getValue()) {
setPrevious(intermediate, r);
}
+ setIntermediateDocProps(intermediate, h);
splitOps.add(intermediate);
}
}
@@ -953,17 +1046,7 @@ public final class NodeDocument extends
@Override
public NodeDocument apply(Map.Entry<Revision, Range> input) {
if (input.getValue().includes(revision)) {
- Revision r = input.getKey();
- int h = input.getValue().height;
- String prevId = Utils.getPreviousIdFor(mainPath, r, h);
- //TODO Use the maxAge variant such that in case of
Mongo call for
- //previous doc are directed towards replicas first
- NodeDocument prev = store.find(Collection.NODES,
prevId);
- if (prev != null) {
- return prev;
- } else {
- LOG.warn("Document with previous revisions not
found: " + prevId);
- }
+ return getPreviousDoc(input.getKey(), input.getValue());
}
return null;
}
@@ -977,35 +1060,43 @@ public final class NodeDocument extends
}
@Nonnull
- Iterable<NodeDocument> getAllPreviousDocs() {
+ Iterator<NodeDocument> getAllPreviousDocs() {
if (getPreviousRanges().isEmpty()) {
- return Collections.emptyList();
+ return Iterators.emptyIterator();
}
- final String mainPath = getMainPath();
- return filter(transform(getPreviousRanges().entrySet(),
- new Function<Map.Entry<Revision, Range>, NodeDocument>() {
- @Override
- public NodeDocument apply(Map.Entry<Revision, Range>
input) {
- Revision r = input.getKey();
- int h = input.getValue().height;
- String prevId = Utils.getPreviousIdFor(mainPath, r, h);
- //TODO Use the maxAge variant such that in case of
Mongo call for
- //previous doc are directed towards replicas first
- NodeDocument prev = store.find(Collection.NODES,
prevId);
- if (prev != null) {
- return prev;
- } else {
- LOG.warn("Document with previous revisions not
found: " + prevId);
- }
- return null;
+ //Currently this method would fire one query per previous doc
+ //If that poses a problem we can try to find all prev doc by relying
+ //on property that all prevDoc id would starts <depth+2>:/path/to/node
+ return new AbstractIterator<NodeDocument>(){
+ private Queue<Map.Entry<Revision, Range>> previousRanges =
+ Queues.newArrayDeque(getPreviousRanges().entrySet());
+ @Override
+ protected NodeDocument computeNext() {
+ if(!previousRanges.isEmpty()){
+ Map.Entry<Revision, Range> e = previousRanges.remove();
+ NodeDocument prev = getPreviousDoc(e.getKey(),
e.getValue());
+ if(prev != null){
+
previousRanges.addAll(prev.getPreviousRanges().entrySet());
+ return prev;
}
}
- ), new Predicate<NodeDocument>() {
- @Override
- public boolean apply(@Nullable NodeDocument input) {
- return input != null;
+ return endOfData();
}
- });
+ };
+ }
+
+ private NodeDocument getPreviousDoc(Revision rev, Range range){
+ int h = range.height;
+ String prevId = Utils.getPreviousIdFor(getMainPath(), rev, h);
+ //TODO Use the maxAge variant such that in case of Mongo call for
+ //previous doc are directed towards replicas first
+ NodeDocument prev = store.find(Collection.NODES, prevId);
+ if (prev != null) {
+ return prev;
+ } else {
+ LOG.warn("Document with previous revisions not found: " + prevId);
+ }
+ return null;
}
/**
@@ -1141,6 +1232,18 @@ public final class NodeDocument extends
checkNotNull(op).set(HAS_BINARY_FLAG, HAS_BINARY_VAL);
}
+ //----------------------------< internal modifiers
>------------------------
+
+ private static void setSplitDocType(@Nonnull UpdateOp op,
+ @Nonnull SplitDocType type) {
+ checkNotNull(op).set(SD_TYPE, type.type);
+ }
+
+ private static void setSplitDocMaxRev(@Nonnull UpdateOp op,
+ @Nonnull Revision maxRev) {
+ checkNotNull(op).set(SD_MAX_REV_TS, maxRev.getTimestamp());
+ }
+
//----------------------------< internal
>----------------------------------
/**
@@ -1191,6 +1294,39 @@ public final class NodeDocument extends
}
/**
+ * Set various split document related flag/properties
+ *
+ * @param mainDoc main document from which split document is being created
+ * @param old updateOp of the old document created via split
+ * @param oldDoc old document created via split
+ * @param maxRev max revision stored in the split document oldDoc
+ */
+ private static void setSplitDocProps(NodeDocument mainDoc, NodeDocument
oldDoc,
+ UpdateOp old, Revision maxRev) {
+ setSplitDocMaxRev(old, maxRev);
+
+ SplitDocType type = SplitDocType.DEFAULT;
+ if(!mainDoc.hasChildren()){
+ type = SplitDocType.DEFAULT_NO_CHILD;
+ } else if (oldDoc.getLocalRevisions().isEmpty()){
+ type = SplitDocType.PROP_COMMIT_ONLY;
+ }
+
+ setSplitDocType(old,type);
+ }
+
+ /**
+ * Set various properties for intermediate split document
+ *
+ * @param intermediate updateOp of the intermediate doc getting created
+ * @param maxRev max revision stored in the intermediate
+ */
+ private static void setIntermediateDocProps(UpdateOp intermediate,
Revision maxRev) {
+ setSplitDocMaxRev(intermediate, maxRev);
+ setSplitDocType(intermediate,SplitDocType.INTERMEDIATE);
+ }
+
+ /**
* Checks that revision x is newer than another revision.
*
* @param x the revision to check
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1582659&r1=1582658&r2=1582659&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
Fri Mar 28 09:46:53 2014
@@ -25,6 +25,7 @@ import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
+import com.google.common.collect.ImmutableList;
import org.apache.jackrabbit.oak.plugins.document.mongo.MongoDocumentStore;
import org.apache.jackrabbit.oak.plugins.document.mongo.MongoVersionGCSupport;
import org.slf4j.Logger;
@@ -82,7 +83,7 @@ class VersionGarbageCollector {
if (doc.getNodeAtRevision(nodeStore, headRevision, null) ==
null) {
docIdsToDelete.add(doc.getId());
//Collect id of all previous docs also
- for (NodeDocument prevDoc : doc.getAllPreviousDocs()) {
+ for (NodeDocument prevDoc :
ImmutableList.copyOf(doc.getAllPreviousDocs())) {
docIdsToDelete.add(prevDoc.getId());
}
}
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java?rev=1582659&r1=1582658&r2=1582659&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
Fri Mar 28 09:46:53 2014
@@ -133,7 +133,7 @@ public class MongoDocumentStore implemen
// index on the _bin flag to faster access nodes with binaries for GC
index = new BasicDBObject();
- index.put(NodeDocument.HAS_BINARY_FLAG, Integer.valueOf(1));
+ index.put(NodeDocument.HAS_BINARY_FLAG, 1);
options = new BasicDBObject();
options.put("unique", Boolean.FALSE);
options.put("sparse", Boolean.TRUE);
@@ -146,6 +146,13 @@ public class MongoDocumentStore implemen
options.put("sparse", Boolean.TRUE);
this.nodes.ensureIndex(index, options);
+ index = new BasicDBObject();
+ index.put(NodeDocument.SD_TYPE, 1);
+ options = new BasicDBObject();
+ options.put("unique", Boolean.FALSE);
+ options.put("sparse", Boolean.TRUE);
+ this.nodes.ensureIndex(index, options);
+
// TODO expire entries if the parent was changed
if (builder.useOffHeapCache()) {
Modified:
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java?rev=1582659&r1=1582658&r2=1582659&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentSplitTest.java
Fri Mar 28 09:46:53 2014
@@ -24,6 +24,9 @@ import java.util.Map;
import java.util.Random;
import java.util.Set;
+import com.google.common.base.Predicate;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterators;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.spi.blob.MemoryBlobStore;
import org.apache.jackrabbit.oak.plugins.document.memory.MemoryDocumentStore;
@@ -35,6 +38,7 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
@@ -72,6 +76,10 @@ public class DocumentSplitTest extends B
}
// check if document is still there
assertNotNull(ns.getNode("/", Revision.fromString(head)));
+
+ NodeDocument prevDoc =
Iterators.getOnlyElement(doc.getAllPreviousDocs());
+ assertEquals(SplitDocType.DEFAULT, prevDoc.getSplitDocType());
+
mk.commit("/", "+\"baz\":{}", null, null);
ns.setAsyncDelay(0);
mk.backgroundWrite();
@@ -143,7 +151,6 @@ public class DocumentSplitTest extends B
@Test
public void splitPropertyRevisions() throws Exception {
DocumentStore store = mk.getDocumentStore();
- DocumentNodeStore ns = mk.getNodeStore();
mk.commit("/", "+\"foo\":{}", null, null);
NodeDocument doc = store.find(NODES, Utils.getIdFromPath("/foo"));
assertNotNull(doc);
@@ -302,6 +309,22 @@ public class DocumentSplitTest extends B
}
@Test
+ public void testSplitDocNoChild() throws Exception{
+ DocumentStore store = mk.getDocumentStore();
+ DocumentNodeStore ns = mk.getNodeStore();
+ mk.commit("/", "+\"test\":{\"node\":{}}", null, null);
+ mk.commit("/test", "+\"foo\":{}+\"bar\":{}", null, null);
+ for (int i = 0; i < NodeDocument.NUM_REVS_THRESHOLD; i++) {
+ mk.commit("/test/foo", "^\"prop\":" + i, null, null);
+ }
+ ns.runBackgroundOperations();
+ NodeDocument doc = store.find(NODES, Utils.getIdFromPath("/test/foo"));
+ List<NodeDocument> prevDocs =
ImmutableList.copyOf(doc.getAllPreviousDocs());
+ assertEquals(1, prevDocs.size());
+ assertEquals(SplitDocType.DEFAULT_NO_CHILD,
prevDocs.get(0).getSplitDocType());
+ }
+
+ @Test
public void cascadingSplit() {
cascadingSplit("/test/node");
}
@@ -344,6 +367,17 @@ public class DocumentSplitTest extends B
NodeDocument doc = store.find(NODES, Utils.getIdFromPath(path));
assertNotNull(doc);
assertEquals(2, doc.getPreviousRanges().size());
+
+ List<NodeDocument> prevDocs =
ImmutableList.copyOf(doc.getAllPreviousDocs());
+ //1 intermediate and 11 previous doc
+ assertEquals(1 + 11, prevDocs.size());
+ assertTrue(Iterables.any(prevDocs, new Predicate<NodeDocument>() {
+ @Override
+ public boolean apply(NodeDocument input) {
+ return input.getSplitDocType() == SplitDocType.INTERMEDIATE;
+ }
+ }));
+
for (String s : revs) {
Revision r = Revision.fromString(s);
if (doc.getLocalRevisions().containsKey(r)) {