Author: thomasm
Date: Tue Mar 18 14:52:12 2014
New Revision: 1578937

URL: http://svn.apache.org/r1578937
Log:
OAK-1510 MongoDB / DocumentNodeStore DataStore GC performance

Modified:
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/BlobReferenceIterator.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/Commit.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/CommitDiff.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/memory/MemoryDocumentStore.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java?rev=1578937&r1=1578936&r2=1578937&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
 Tue Mar 18 14:52:12 2014
@@ -70,6 +70,12 @@ public class MarkSweepGarbageCollector i
 
     public static final int DEFAULT_BATCH_COUNT = 2048;
 
+    public static final String NOT_RUNNING = "NotRunning";
+
+    public static final String MARKING = "Running-Marking";
+
+    public static final String SWEEPING = "Running-Sweeping";
+
     /** The max last modified time of blobs to consider for garbage 
collection. */
     private long maxLastModifiedTime;
 
@@ -91,6 +97,12 @@ public class MarkSweepGarbageCollector i
     /** The batch count. */
     private int batchCount = DEFAULT_BATCH_COUNT;
 
+    /** Flag to indicate whether to run in a debug mode **/
+    private boolean debugMode = Boolean.getBoolean("debugModeGC") | 
LOG.isDebugEnabled();
+
+    /** Flag to indicate the state of the gc **/
+    private String state;
+
     /**
      * Gets the max last modified time considered for garbage collection.
      * 
@@ -146,6 +158,15 @@ public class MarkSweepGarbageCollector i
     }
 
     /**
+     * Gets the state of the gc process.
+     * 
+     * @return the state
+     */
+    protected String getState() {
+        return state;
+    }
+
+    /**
      * @param nodeStore the node store
      * @param root the root
      * @param batchCount the batch count
@@ -206,6 +227,7 @@ public class MarkSweepGarbageCollector i
             LOG.debug("garbage collector finished");
         } finally {
             fs.complete();
+            state = NOT_RUNNING;
         }
     }
 
@@ -216,6 +238,7 @@ public class MarkSweepGarbageCollector i
      *             the exception
      */
     protected void mark() throws Exception {
+        state = MARKING;
         LOG.debug("Starting mark phase of the garbage collector");
 
         // Find all blobs available in the blob store
@@ -287,6 +310,7 @@ public class MarkSweepGarbageCollector i
      *             Signals that an I/O exception has occurred.
      */
     protected void sweep() throws IOException {
+        state = SWEEPING;        
         LOG.debug("Starting sweep phase of the garbage collector");
 
         ConcurrentLinkedQueue<String> exceptionQueue = new 
ConcurrentLinkedQueue<String>();
@@ -428,12 +452,21 @@ public class MarkSweepGarbageCollector i
             int referencesFound = 0;
             while (blobIterator.hasNext()) {
                 Blob blob = blobIterator.next();
+
+                if (debugMode) {
+                    LOG.debug("BlobId : " + blob.toString());
+                }
+
                 if (blob.toString().length() != 0) {
                     Iterator<String> idIter = ((GarbageCollectableBlobStore) 
nodeStore
                             .getBlobStore())
                             .resolveChunks(blob.toString());
                     while (idIter.hasNext()) {
-                        referencedBlobs.add(idIter.next());
+                        String id = idIter.next();
+                        referencedBlobs.add(id);
+                        if (debugMode) {
+                            LOG.debug("chunkId : " + id);
+                        }
                     }
                 }
 

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/BlobReferenceIterator.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/BlobReferenceIterator.java?rev=1578937&r1=1578936&r2=1578937&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/BlobReferenceIterator.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/BlobReferenceIterator.java
 Tue Mar 18 14:52:12 2014
@@ -95,7 +95,10 @@ public class BlobReferenceIterator imple
 
     private boolean loadBatchQuery() {
         // read about BATCH_SIZE documents
-        List<NodeDocument> list = docStore.query(Collection.NODES, fromKey, 
"999999", BATCH_SIZE);
+        List<NodeDocument> list =
+                docStore.query(Collection.NODES, fromKey, ";", 
NodeDocument.HAS_BINARY_FLAG, 
+                        NodeDocument.HAS_BINARY_VAL,
+                        BATCH_SIZE);
         boolean hasMore = false;
         for (NodeDocument doc : list) {
             if (doc.getId().equals(fromKey)) {

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/Commit.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/Commit.java?rev=1578937&r1=1578936&r2=1578937&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/Commit.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/Commit.java
 Tue Mar 18 14:52:12 2014
@@ -62,7 +62,10 @@ public class Commit {
     
     private HashSet<String> addedNodes = new HashSet<String>();
     private HashSet<String> removedNodes = new HashSet<String>();
-    
+
+    /** Set of all nodes which have binary properties. **/
+    private HashSet<String> nodesWithBinaries = Sets.newHashSet();
+
     Commit(DocumentNodeStore nodeStore, Revision baseRevision, Revision 
revision) {
         this.baseRevision = baseRevision;
         this.revision = revision;
@@ -123,6 +126,10 @@ public class Commit {
         op.setMapEntry(key, revision, value);
     }
 
+    void markNodeHavingBinary(String path) {
+        this.nodesWithBinaries.add(path);
+    }
+
     void addNode(DocumentNodeState n) {
         String path = n.getPath();
         if (operations.containsKey(path)) {
@@ -185,6 +192,7 @@ public class Commit {
     private void applyInternal() {
         if (!operations.isEmpty()) {
             updateParentChildStatus();
+            updateBinaryStatus();
             applyToDocumentStore();
         }
     }
@@ -192,11 +200,28 @@ public class Commit {
     private void prepare(Revision baseRevision) {
         if (!operations.isEmpty()) {
             updateParentChildStatus();
+            updateBinaryStatus();
             applyToDocumentStore(baseRevision);
         }
     }
 
     /**
+     * Update the binary status in the update op.
+     */
+    private void updateBinaryStatus() {
+        DocumentStore store = this.nodeStore.getDocumentStore();
+
+        for (String path : this.nodesWithBinaries) {
+            NodeDocument nd =
+                    (NodeDocument) store.getIfCached(Collection.NODES, 
Utils.getIdFromPath(path));
+            if ((nd == null) || (nd.hasBinary() != 1)) {
+                UpdateOp updateParentOp = getUpdateOperationForNode(path);
+                NodeDocument.setHasBinary(updateParentOp);
+            }
+        }
+    }
+
+    /**
      * Apply the changes to the document store.
      */
     void applyToDocumentStore() {

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/CommitDiff.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/CommitDiff.java?rev=1578937&r1=1578936&r2=1578937&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/CommitDiff.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/CommitDiff.java
 Tue Mar 18 14:52:12 2014
@@ -20,6 +20,7 @@ import javax.annotation.Nonnull;
 
 import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
 import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.commons.PathUtils;
 import org.apache.jackrabbit.oak.kernel.BlobSerializer;
 import org.apache.jackrabbit.oak.kernel.JsonSerializer;
@@ -111,5 +112,9 @@ class CommitDiff implements NodeStateDif
         JsonSerializer serializer = new JsonSerializer(builder, blobs);
         serializer.serialize(property);
         commit.updateProperty(path, property.getName(), serializer.toString());
+        if ((property.getType() == Type.BINARY) 
+                || (property.getType() == Type.BINARIES)) {
+            this.commit.markNodeHavingBinary(this.path);
+        }
     }
 }

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java?rev=1578937&r1=1578936&r2=1578937&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
 Tue Mar 18 14:52:12 2014
@@ -164,11 +164,15 @@ public final class NodeDocument extends 
      */
     public static final String PATH = "_path";
 
+    public static final String HAS_BINARY_FLAG = "_bin";
+    
     /**
      * Properties to ignore when a document is split.
      */
     private static final Set<String> IGNORE_ON_SPLIT = ImmutableSet.of(ID, 
MOD_COUNT, MODIFIED, PREVIOUS,
-            LAST_REV, CHILDREN_FLAG);
+            LAST_REV, CHILDREN_FLAG, HAS_BINARY_FLAG);
+
+    public static final long HAS_BINARY_VAL = 1;
 
     final DocumentStore store;
 
@@ -268,6 +272,11 @@ public final class NodeDocument extends 
         return lastCheckTime.get();
     }
 
+    public int hasBinary() {
+        Integer flag = (Integer) get(HAS_BINARY_FLAG);
+        return flag != null ? flag.intValue() : 0;
+    }
+
     /**
      * @return a map of the last known revision for each clusterId.
      */
@@ -942,6 +951,10 @@ public final class NodeDocument extends 
                 checkNotNull(low).toString());
     }
 
+    public static void setHasBinary(@Nonnull UpdateOp op) {
+        checkNotNull(op).set(HAS_BINARY_FLAG, HAS_BINARY_VAL);
+    }
+
     //----------------------------< internal 
>----------------------------------
 
     /**

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/memory/MemoryDocumentStore.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/memory/MemoryDocumentStore.java?rev=1578937&r1=1578936&r2=1578937&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/memory/MemoryDocumentStore.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/memory/MemoryDocumentStore.java
 Tue Mar 18 14:52:12 2014
@@ -118,7 +118,7 @@ public class MemoryDocumentStore impleme
             for (T doc : sub.values()) {
                 if (indexedProperty != null) {
                     Long value = (Long) doc.get(indexedProperty);
-                    if (value < startValue) {
+                    if (value == null || value < startValue) {
                         continue;
                     }
                 }

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java?rev=1578937&r1=1578936&r2=1578937&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
 Tue Mar 18 14:52:12 2014
@@ -125,6 +125,14 @@ public class MongoDocumentStore implemen
         options.put("unique", Boolean.FALSE);
         nodes.ensureIndex(index, options);
 
+        // index on the _bin flag to faster access nodes with binaries for GC
+        index = new BasicDBObject();
+        index.put(NodeDocument.HAS_BINARY_FLAG, Integer.valueOf(1));
+        options = new BasicDBObject();
+        options.put("unique", Boolean.FALSE);
+        options.put("sparse", Boolean.TRUE);
+        this.nodes.ensureIndex(index, options);
+
         // TODO expire entries if the parent was changed
         if (builder.useOffHeapCache()) {
             nodesCache = createOffHeapCache(builder);


Reply via email to