Author: reschke
Date: Tue Feb 7 15:12:08 2017
New Revision: 1782008
URL: http://svn.apache.org/viewvc?rev=1782008&view=rev
Log:
OAK-5571: VersionGarbageCollector can remove leaf nodes eagerly
(patch by Stefan Eissing)
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1782008&r1=1782007&r2=1782008&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
Tue Feb 7 15:12:08 2017
@@ -38,6 +38,7 @@ import com.google.common.base.Joiner;
import com.google.common.base.Predicate;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
@@ -115,6 +116,7 @@ public class VersionGarbageCollector {
boolean ignoredGCDueToCheckPoint;
boolean canceled;
int deletedDocGCCount;
+ int deletedLeafDocGCCount;
int splitDocGCCount;
int intermediateSplitDocGCCount;
final Stopwatch collectDeletedDocs = Stopwatch.createUnstarted();
@@ -127,7 +129,7 @@ public class VersionGarbageCollector {
return "VersionGCStats{" +
"ignoredGCDueToCheckPoint=" + ignoredGCDueToCheckPoint +
", canceled=" + canceled +
- ", deletedDocGCCount=" + deletedDocGCCount +
+ ", deletedDocGCCount=" + deletedDocGCCount + " (of which
leaf: " + deletedLeafDocGCCount + ")" +
", splitDocGCCount=" + splitDocGCCount +
", intermediateSplitDocGCCount=" +
intermediateSplitDocGCCount +
", timeToCollectDeletedDocs=" + collectDeletedDocs +
@@ -138,6 +140,78 @@ public class VersionGarbageCollector {
}
}
+ private enum GCPhase {
+ NONE,
+ COLLECTING,
+ DELETING,
+ SORTING,
+ SPLITS_CLEANUP
+ }
+
+ /**
+ * Keeps track of timers when switching GC phases.
+ *
+ * Could be merged with VersionGCStats, however this way the public class
is kept unchanged.
+ */
+ private static class GCPhases {
+
+ final VersionGCStats stats;
+ final Stopwatch elapsed;
+ private final List<GCPhase> phases = Lists.newArrayList();
+ private final Map<GCPhase, Stopwatch> watches = Maps.newHashMap();
+
+ GCPhases() {
+ this.stats = new VersionGCStats();
+ this.elapsed = Stopwatch.createStarted();
+ this.watches.put(GCPhase.NONE, Stopwatch.createStarted());
+ this.watches.put(GCPhase.COLLECTING, stats.collectDeletedDocs);
+ this.watches.put(GCPhase.DELETING, stats.deleteDeletedDocs);
+ this.watches.put(GCPhase.SORTING, stats.sortDocIds);
+ this.watches.put(GCPhase.SPLITS_CLEANUP,
stats.collectAndDeleteSplitDocs);
+ }
+
+ public void start(GCPhase started) {
+ suspend(currentWatch());
+ this.phases.add(started);
+ resume(currentWatch());
+ }
+
+ public void stop(GCPhase phase) {
+ if (!phases.isEmpty() && phase == phases.get(phases.size() - 1)) {
+ suspend(currentWatch());
+ phases.remove(phases.size() - 1);
+ resume(currentWatch());
+ }
+ }
+
+ public void close() {
+ while (!phases.isEmpty()) {
+ suspend(currentWatch());
+ phases.remove(phases.size() - 1);
+ }
+ this.elapsed.stop();
+ }
+
+ private GCPhase current() {
+ return phases.isEmpty()? GCPhase.NONE : phases.get(phases.size() -
1);
+ }
+
+ private Stopwatch currentWatch() {
+ return watches.get(current());
+ }
+
+ private void resume(Stopwatch w) {
+ if (!w.isRunning()) {
+ w.start();
+ }
+ }
+ private void suspend(Stopwatch w) {
+ if (w.isRunning()) {
+ w.stop();
+ }
+ }
+ }
+
private class GCJob {
private final long maxRevisionAgeMillis;
@@ -157,8 +231,7 @@ public class VersionGarbageCollector {
}
private VersionGCStats gc(long maxRevisionAgeInMillis) throws
IOException {
- Stopwatch sw = Stopwatch.createStarted();
- VersionGCStats stats = new VersionGCStats();
+ GCPhases phases = new GCPhases();
final long oldestRevTimeStamp = nodeStore.getClock().getTime() -
maxRevisionAgeInMillis;
final RevisionVector headRevision = nodeStore.getHeadRevision();
@@ -173,33 +246,33 @@ public class VersionGarbageCollector {
checkpoint.toReadableString(),
Utils.timestampToString(oldestRevTimeStamp)
);
- stats.ignoredGCDueToCheckPoint = true;
- return stats;
+ phases.stats.ignoredGCDueToCheckPoint = true;
+ return phases.stats;
}
- collectDeletedDocuments(stats, headRevision, oldestRevTimeStamp);
- collectSplitDocuments(stats, oldestRevTimeStamp);
+ collectDeletedDocuments(phases, headRevision, oldestRevTimeStamp);
+ collectSplitDocuments(phases, oldestRevTimeStamp);
- sw.stop();
- stats.canceled = cancel.get();
- log.info("Revision garbage collection finished in {}. {}", sw,
stats);
- return stats;
+ phases.close();
+ phases.stats.canceled = cancel.get();
+ log.info("Revision garbage collection finished in {}. {}",
phases.elapsed, phases.stats);
+ return phases.stats;
}
- private void collectSplitDocuments(VersionGCStats stats, long
oldestRevTimeStamp) {
- stats.collectAndDeleteSplitDocs.start();
- versionStore.deleteSplitDocuments(GC_TYPES, oldestRevTimeStamp,
stats);
- stats.collectAndDeleteSplitDocs.stop();
+ private void collectSplitDocuments(GCPhases phases, long
oldestRevTimeStamp) {
+ phases.start(GCPhase.SPLITS_CLEANUP);
+ versionStore.deleteSplitDocuments(GC_TYPES, oldestRevTimeStamp,
phases.stats);
+ phases.stop(GCPhase.SPLITS_CLEANUP);
}
- private void collectDeletedDocuments(VersionGCStats stats,
+ private void collectDeletedDocuments(GCPhases phases,
RevisionVector headRevision,
long oldestRevTimeStamp)
throws IOException {
int docsTraversed = 0;
DeletedDocsGC gc = new DeletedDocsGC(headRevision, cancel);
try {
- stats.collectDeletedDocs.start();
+ phases.start(GCPhase.COLLECTING);
Iterable<NodeDocument> itr =
versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
try {
for (NodeDocument doc : itr) {
@@ -217,23 +290,32 @@ public class VersionGarbageCollector {
docsTraversed, gc.getNumDocuments());
}
gc.possiblyDeleted(doc);
+ if (gc.hasLeafBatch()) {
+ phases.start(GCPhase.DELETING);
+ gc.removeLeafDocuments(phases.stats);
+ phases.stop(GCPhase.DELETING);
+ }
}
} finally {
Utils.closeIfCloseable(itr);
}
- stats.collectDeletedDocs.stop();
+ phases.stop(GCPhase.COLLECTING);
if (gc.getNumDocuments() == 0){
return;
}
- stats.sortDocIds.start();
+ phases.start(GCPhase.DELETING);
+ gc.removeLeafDocuments(phases.stats);
+ phases.stop(GCPhase.DELETING);
+
+ phases.start(GCPhase.SORTING);
gc.ensureSorted();
- stats.sortDocIds.stop();
+ phases.stop(GCPhase.SORTING);
- stats.deleteDeletedDocs.start();
- gc.removeDocuments(stats);
- stats.deleteDeletedDocs.stop();
+ phases.start(GCPhase.DELETING);
+ gc.removeDocuments(phases.stats);
+ phases.stop(GCPhase.DELETING);
} finally {
gc.close();
}
@@ -247,6 +329,7 @@ public class VersionGarbageCollector {
private final RevisionVector headRevision;
private final AtomicBoolean cancel;
+ private final List<String> leafDocIdsToDelete = Lists.newArrayList();
private final StringSort docIdsToDelete = newStringSort();
private final StringSort prevDocIdsToDelete = newStringSort();
private final Set<String> exclude = Sets.newHashSet();
@@ -264,7 +347,7 @@ public class VersionGarbageCollector {
* This number does not include the previous documents.
*/
long getNumDocuments() {
- return docIdsToDelete.getSize();
+ return docIdsToDelete.getSize() + leafDocIdsToDelete.size();
}
/**
@@ -289,9 +372,14 @@ public class VersionGarbageCollector {
return;
}
if (doc.getNodeAtRevision(nodeStore, headRevision, null) == null) {
- addDocument(id);
// Collect id of all previous docs also
- addPreviousDocuments(previousDocIdsFor(doc));
+ Iterator<String> previousDocs = previousDocIdsFor(doc);
+ if (!doc.hasChildren() && !previousDocs.hasNext()) {
+ addLeafDocument(id);
+ } else {
+ addDocument(id);
+ addPreviousDocuments(previousDocs);
+ }
}
}
@@ -304,11 +392,22 @@ public class VersionGarbageCollector {
* @param stats to track the number of removed documents.
*/
void removeDocuments(VersionGCStats stats) throws IOException {
- stats.deletedDocGCCount += removeDeletedDocuments();
+ removeLeafDocuments(stats);
+ stats.deletedDocGCCount +=
removeDeletedDocuments(getDocIdsToDelete(), "(other)");
// FIXME: this is incorrect because that method also removes
intermediate docs
stats.splitDocGCCount += removeDeletedPreviousDocuments();
}
+ boolean hasLeafBatch() {
+ return leafDocIdsToDelete.size() >= DELETE_BATCH_SIZE;
+ }
+
+ void removeLeafDocuments(VersionGCStats stats) throws IOException {
+ int removeCount = removeDeletedDocuments(getLeafDocIdsToDelete(),
"(leaf)");
+ stats.deletedLeafDocGCCount += removeCount;
+ stats.deletedDocGCCount += removeCount;
+ }
+
public void close() {
try {
docIdsToDelete.close();
@@ -357,6 +456,10 @@ public class VersionGarbageCollector {
docIdsToDelete.add(id);
}
+ private void addLeafDocument(String id) throws IOException {
+ leafDocIdsToDelete.add(id);
+ }
+
private long getNumPreviousDocuments() {
return prevDocIdsToDelete.getSize() - exclude.size();
}
@@ -372,6 +475,10 @@ public class VersionGarbageCollector {
return docIdsToDelete.getIds();
}
+ private Iterator<String> getLeafDocIdsToDelete() throws IOException {
+ return leafDocIdsToDelete.iterator();
+ }
+
private void concurrentModification(NodeDocument doc) {
Iterator<NodeDocument> it = doc.getAllPreviousDocs();
while (it.hasNext()) {
@@ -390,9 +497,8 @@ public class VersionGarbageCollector {
});
}
- private int removeDeletedDocuments() throws IOException {
- Iterator<String> docIdsToDelete = getDocIdsToDelete();
- log.info("Proceeding to delete [{}] documents", getNumDocuments());
+ private int removeDeletedDocuments(Iterator<String> docIdsToDelete,
String label) throws IOException {
+ log.info("Proceeding to delete [{}] documents [{}]",
getNumDocuments(), label);
Iterator<List<String>> idListItr = partition(docIdsToDelete,
DELETE_BATCH_SIZE);
int deletedCount = 0;
Modified:
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java?rev=1782008&r1=1782007&r2=1782008&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
Tue Feb 7 15:12:08 2017
@@ -156,6 +156,7 @@ public class VersionGCDeletionTest {
VersionGCStats stats = gc.gc(maxAge * 2, HOURS);
assertEquals(noOfDocsToDelete * 2 + 1, stats.deletedDocGCCount);
+ assertEquals(noOfDocsToDelete, stats.deletedLeafDocGCCount);
assertNull(ts.find(Collection.NODES, "1:/x"));
@@ -204,6 +205,7 @@ public class VersionGCDeletionTest {
VersionGCStats stats = gc.gc(maxAge * 2, HOURS);
assertEquals(noOfDocsToDelete * 2 + 1, stats.deletedDocGCCount);
+ assertEquals(noOfDocsToDelete, stats.deletedLeafDocGCCount);
}
@Test
@@ -340,6 +342,7 @@ public class VersionGCDeletionTest {
VersionGarbageCollector gc = store.getVersionGarbageCollector();
VersionGCStats stats = gc.gc(30, MINUTES);
assertEquals(90, stats.deletedDocGCCount);
+ assertEquals(90, stats.deletedLeafDocGCCount);
queries.release(2);
Modified:
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java?rev=1782008&r1=1782007&r2=1782008&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
Tue Feb 7 15:12:08 2017
@@ -180,12 +180,14 @@ public class VersionGarbageCollectorIT {
clock.waitUntil(clock.getTime() + delta);
stats = gc.gc(maxAge*2, HOURS);
assertEquals(0, stats.deletedDocGCCount);
+ assertEquals(0, stats.deletedLeafDocGCCount);
//3. Check that deleted doc does get collected post maxAge
clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
stats = gc.gc(maxAge*2, HOURS);
assertEquals(1, stats.deletedDocGCCount);
+ assertEquals(1, stats.deletedLeafDocGCCount);
//4. Check that a revived doc (deleted and created again) does not get
gc
NodeBuilder b3 = store.getRoot().builder();
@@ -199,6 +201,7 @@ public class VersionGarbageCollectorIT {
clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
stats = gc.gc(maxAge*2, HOURS);
assertEquals(0, stats.deletedDocGCCount);
+ assertEquals(0, stats.deletedLeafDocGCCount);
}
@@ -249,6 +252,7 @@ public class VersionGarbageCollectorIT {
clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge) + delta);
VersionGCStats stats = gc.gc(maxAge, HOURS);
assertEquals(2, stats.splitDocGCCount);
+ assertEquals(0, stats.deletedLeafDocGCCount);
//Previous doc should be removed
assertNull(getDoc(previousDocTestFoo.get(0).getPath()));
@@ -306,6 +310,7 @@ public class VersionGarbageCollectorIT {
clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge) + delta);
VersionGCStats stats = gc.gc(maxAge, HOURS);
assertEquals(10, stats.splitDocGCCount);
+ assertEquals(0, stats.deletedLeafDocGCCount);
DocumentNodeState test = getDoc("/test").getNodeAtRevision(
store, store.getHeadRevision(), null);
@@ -342,6 +347,7 @@ public class VersionGarbageCollectorIT {
VersionGCStats stats = gc.gc(maxAge, HOURS);
assertEquals(1, stats.deletedDocGCCount);
+ assertEquals(1, stats.deletedLeafDocGCCount);
Set<String> children = Sets.newHashSet();
for (ChildNodeEntry entry : store.getRoot().getChildNodeEntries()) {
@@ -369,6 +375,7 @@ public class VersionGarbageCollectorIT {
VersionGCStats stats = gc.gc(maxAge, HOURS);
assertEquals(2, stats.splitDocGCCount);
+ assertEquals(0, stats.deletedLeafDocGCCount);
NodeDocument doc = getDoc("/foo");
assertNotNull(doc);
@@ -500,6 +507,7 @@ public class VersionGarbageCollectorIT {
VersionGCStats stats = f.get();
assertEquals(1, stats.deletedDocGCCount);
assertEquals(2, stats.splitDocGCCount);
+ assertEquals(0, stats.deletedLeafDocGCCount);
}
// OAK-4819
@@ -530,6 +538,7 @@ public class VersionGarbageCollectorIT {
// gc must not fail
VersionGCStats stats = gc.gc(maxAge, HOURS);
assertEquals(1, stats.deletedDocGCCount);
+ assertEquals(1, stats.deletedLeafDocGCCount);
}
@Test