Author: thomasm
Date: Tue Mar 18 14:52:12 2014
New Revision: 1578937
URL: http://svn.apache.org/r1578937
Log:
OAK-1510 MongoDB / DocumentNodeStore DataStore GC performance
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/BlobReferenceIterator.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/Commit.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/CommitDiff.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/memory/MemoryDocumentStore.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java?rev=1578937&r1=1578936&r2=1578937&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
Tue Mar 18 14:52:12 2014
@@ -70,6 +70,12 @@ public class MarkSweepGarbageCollector i
public static final int DEFAULT_BATCH_COUNT = 2048;
+ public static final String NOT_RUNNING = "NotRunning";
+
+ public static final String MARKING = "Running-Marking";
+
+ public static final String SWEEPING = "Running-Sweeping";
+
/** The max last modified time of blobs to consider for garbage
collection. */
private long maxLastModifiedTime;
@@ -91,6 +97,12 @@ public class MarkSweepGarbageCollector i
/** The batch count. */
private int batchCount = DEFAULT_BATCH_COUNT;
+ /** Flag to indicate whether to run in a debug mode **/
+ private boolean debugMode = Boolean.getBoolean("debugModeGC") |
LOG.isDebugEnabled();
+
+ /** Flag to indicate the state of the gc **/
+ private String state;
+
/**
* Gets the max last modified time considered for garbage collection.
*
@@ -146,6 +158,15 @@ public class MarkSweepGarbageCollector i
}
/**
+ * Gets the state of the gc process.
+ *
+ * @return the state
+ */
+ protected String getState() {
+ return state;
+ }
+
+ /**
* @param nodeStore the node store
* @param root the root
* @param batchCount the batch count
@@ -206,6 +227,7 @@ public class MarkSweepGarbageCollector i
LOG.debug("garbage collector finished");
} finally {
fs.complete();
+ state = NOT_RUNNING;
}
}
@@ -216,6 +238,7 @@ public class MarkSweepGarbageCollector i
* the exception
*/
protected void mark() throws Exception {
+ state = MARKING;
LOG.debug("Starting mark phase of the garbage collector");
// Find all blobs available in the blob store
@@ -287,6 +310,7 @@ public class MarkSweepGarbageCollector i
* Signals that an I/O exception has occurred.
*/
protected void sweep() throws IOException {
+ state = SWEEPING;
LOG.debug("Starting sweep phase of the garbage collector");
ConcurrentLinkedQueue<String> exceptionQueue = new
ConcurrentLinkedQueue<String>();
@@ -428,12 +452,21 @@ public class MarkSweepGarbageCollector i
int referencesFound = 0;
while (blobIterator.hasNext()) {
Blob blob = blobIterator.next();
+
+ if (debugMode) {
+ LOG.debug("BlobId : " + blob.toString());
+ }
+
if (blob.toString().length() != 0) {
Iterator<String> idIter = ((GarbageCollectableBlobStore)
nodeStore
.getBlobStore())
.resolveChunks(blob.toString());
while (idIter.hasNext()) {
- referencedBlobs.add(idIter.next());
+ String id = idIter.next();
+ referencedBlobs.add(id);
+ if (debugMode) {
+ LOG.debug("chunkId : " + id);
+ }
}
}
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/BlobReferenceIterator.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/BlobReferenceIterator.java?rev=1578937&r1=1578936&r2=1578937&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/BlobReferenceIterator.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/BlobReferenceIterator.java
Tue Mar 18 14:52:12 2014
@@ -95,7 +95,10 @@ public class BlobReferenceIterator imple
private boolean loadBatchQuery() {
// read about BATCH_SIZE documents
- List<NodeDocument> list = docStore.query(Collection.NODES, fromKey,
"999999", BATCH_SIZE);
+ List<NodeDocument> list =
+ docStore.query(Collection.NODES, fromKey, ";",
NodeDocument.HAS_BINARY_FLAG,
+ NodeDocument.HAS_BINARY_VAL,
+ BATCH_SIZE);
boolean hasMore = false;
for (NodeDocument doc : list) {
if (doc.getId().equals(fromKey)) {
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/Commit.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/Commit.java?rev=1578937&r1=1578936&r2=1578937&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/Commit.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/Commit.java
Tue Mar 18 14:52:12 2014
@@ -62,7 +62,10 @@ public class Commit {
private HashSet<String> addedNodes = new HashSet<String>();
private HashSet<String> removedNodes = new HashSet<String>();
-
+
+ /** Set of all nodes which have binary properties. **/
+ private HashSet<String> nodesWithBinaries = Sets.newHashSet();
+
Commit(DocumentNodeStore nodeStore, Revision baseRevision, Revision
revision) {
this.baseRevision = baseRevision;
this.revision = revision;
@@ -123,6 +126,10 @@ public class Commit {
op.setMapEntry(key, revision, value);
}
+ void markNodeHavingBinary(String path) {
+ this.nodesWithBinaries.add(path);
+ }
+
void addNode(DocumentNodeState n) {
String path = n.getPath();
if (operations.containsKey(path)) {
@@ -185,6 +192,7 @@ public class Commit {
private void applyInternal() {
if (!operations.isEmpty()) {
updateParentChildStatus();
+ updateBinaryStatus();
applyToDocumentStore();
}
}
@@ -192,11 +200,28 @@ public class Commit {
private void prepare(Revision baseRevision) {
if (!operations.isEmpty()) {
updateParentChildStatus();
+ updateBinaryStatus();
applyToDocumentStore(baseRevision);
}
}
/**
+ * Update the binary status in the update op.
+ */
+ private void updateBinaryStatus() {
+ DocumentStore store = this.nodeStore.getDocumentStore();
+
+ for (String path : this.nodesWithBinaries) {
+ NodeDocument nd =
+ (NodeDocument) store.getIfCached(Collection.NODES,
Utils.getIdFromPath(path));
+ if ((nd == null) || (nd.hasBinary() != 1)) {
+ UpdateOp updateParentOp = getUpdateOperationForNode(path);
+ NodeDocument.setHasBinary(updateParentOp);
+ }
+ }
+ }
+
+ /**
* Apply the changes to the document store.
*/
void applyToDocumentStore() {
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/CommitDiff.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/CommitDiff.java?rev=1578937&r1=1578936&r2=1578937&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/CommitDiff.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/CommitDiff.java
Tue Mar 18 14:52:12 2014
@@ -20,6 +20,7 @@ import javax.annotation.Nonnull;
import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.kernel.BlobSerializer;
import org.apache.jackrabbit.oak.kernel.JsonSerializer;
@@ -111,5 +112,9 @@ class CommitDiff implements NodeStateDif
JsonSerializer serializer = new JsonSerializer(builder, blobs);
serializer.serialize(property);
commit.updateProperty(path, property.getName(), serializer.toString());
+ if ((property.getType() == Type.BINARY)
+ || (property.getType() == Type.BINARIES)) {
+ this.commit.markNodeHavingBinary(this.path);
+ }
}
}
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java?rev=1578937&r1=1578936&r2=1578937&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
Tue Mar 18 14:52:12 2014
@@ -164,11 +164,15 @@ public final class NodeDocument extends
*/
public static final String PATH = "_path";
+ public static final String HAS_BINARY_FLAG = "_bin";
+
/**
* Properties to ignore when a document is split.
*/
private static final Set<String> IGNORE_ON_SPLIT = ImmutableSet.of(ID,
MOD_COUNT, MODIFIED, PREVIOUS,
- LAST_REV, CHILDREN_FLAG);
+ LAST_REV, CHILDREN_FLAG, HAS_BINARY_FLAG);
+
+ public static final long HAS_BINARY_VAL = 1;
final DocumentStore store;
@@ -268,6 +272,11 @@ public final class NodeDocument extends
return lastCheckTime.get();
}
+ public int hasBinary() {
+ Integer flag = (Integer) get(HAS_BINARY_FLAG);
+ return flag != null ? flag.intValue() : 0;
+ }
+
/**
* @return a map of the last known revision for each clusterId.
*/
@@ -942,6 +951,10 @@ public final class NodeDocument extends
checkNotNull(low).toString());
}
+ public static void setHasBinary(@Nonnull UpdateOp op) {
+ checkNotNull(op).set(HAS_BINARY_FLAG, HAS_BINARY_VAL);
+ }
+
//----------------------------< internal
>----------------------------------
/**
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/memory/MemoryDocumentStore.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/memory/MemoryDocumentStore.java?rev=1578937&r1=1578936&r2=1578937&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/memory/MemoryDocumentStore.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/memory/MemoryDocumentStore.java
Tue Mar 18 14:52:12 2014
@@ -118,7 +118,7 @@ public class MemoryDocumentStore impleme
for (T doc : sub.values()) {
if (indexedProperty != null) {
Long value = (Long) doc.get(indexedProperty);
- if (value < startValue) {
+ if (value == null || value < startValue) {
continue;
}
}
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java?rev=1578937&r1=1578936&r2=1578937&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
Tue Mar 18 14:52:12 2014
@@ -125,6 +125,14 @@ public class MongoDocumentStore implemen
options.put("unique", Boolean.FALSE);
nodes.ensureIndex(index, options);
+ // index on the _bin flag to faster access nodes with binaries for GC
+ index = new BasicDBObject();
+ index.put(NodeDocument.HAS_BINARY_FLAG, Integer.valueOf(1));
+ options = new BasicDBObject();
+ options.put("unique", Boolean.FALSE);
+ options.put("sparse", Boolean.TRUE);
+ this.nodes.ensureIndex(index, options);
+
// TODO expire entries if the parent was changed
if (builder.useOffHeapCache()) {
nodesCache = createOffHeapCache(builder);