This is an automated email from the ASF dual-hosted git repository. daim pushed a commit to branch OAK-8646 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 280eaf266669f95e4970b4b2300045b90c6cd4ee Author: Stefan Egli <[email protected]> AuthorDate: Wed Aug 30 19:13:31 2023 +0200 OAK-8646 : Clean up changes from orphaned branch commits, as part of detailed gc --- .../plugins/document/VersionGarbageCollector.java | 226 ++++++++- .../oak/plugins/document/BranchCommitGCTest.java | 550 +++++++++++++++++++-- 2 files changed, 721 insertions(+), 55 deletions(-) diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java index d20f5683bc..7cd1604ec8 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java @@ -19,6 +19,7 @@ package org.apache.jackrabbit.oak.plugins.document; + import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; @@ -30,6 +31,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.Map.Entry; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; @@ -45,6 +47,9 @@ import org.apache.jackrabbit.guava.common.collect.Maps; import org.apache.jackrabbit.guava.common.collect.Sets; import org.apache.jackrabbit.oak.commons.sort.StringSort; +import org.apache.jackrabbit.oak.plugins.document.UpdateOp.Key; +import org.apache.jackrabbit.oak.plugins.document.UpdateOp.Operation; +import org.apache.jackrabbit.oak.plugins.document.UpdateOp.Operation.Type; import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval; import org.apache.jackrabbit.oak.plugins.document.util.Utils; import org.apache.jackrabbit.oak.spi.gc.DelegatingGCMonitor; @@ -79,6 +84,7 @@ import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocTy import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH; import static org.apache.jackrabbit.oak.stats.StatisticsProvider.NOOP; +import static org.apache.jackrabbit.oak.plugins.document.util.Utils.isCommitted; import static org.slf4j.helpers.MessageFormatter.arrayFormat; public class VersionGarbageCollector { @@ -667,7 +673,7 @@ public class VersionGarbageCollector { boolean foundDoc = true; long oldModifiedMs = oldestModifiedMs; - try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) { + try (DetailedGC gc = new DetailedGC(headRevision, toModifiedMs, monitor, cancel)) { long fromModifiedMs = oldestModifiedMs; String fromId = ofNullable(oldestModifiedDocId).orElse(MIN_ID_VALUE); NodeDocument lastDoc; @@ -837,6 +843,7 @@ public class VersionGarbageCollector { private class DetailedGC implements Closeable { private final RevisionVector headRevision; + private final long toModifiedMs; private final GCMonitor monitor; private final AtomicBoolean cancel; private final Stopwatch timer; @@ -855,14 +862,18 @@ public class VersionGarbageCollector { private final Map<String, Integer> deletedPropsCountMap; private int garbageDocsCount; private int totalGarbageDocsCount; + private final Revision revisionForModified; - public DetailedGC(@NotNull RevisionVector headRevision, @NotNull GCMonitor monitor, @NotNull AtomicBoolean cancel) { + public DetailedGC(@NotNull RevisionVector headRevision, long toModifiedMs, @NotNull GCMonitor monitor, @NotNull AtomicBoolean cancel) { this.headRevision = requireNonNull(headRevision); + this.toModifiedMs = toModifiedMs; this.monitor = monitor; this.cancel = cancel; this.updateOpList = new ArrayList<>(); this.deletedPropsCountMap = new HashMap<>(); this.timer = createUnstarted(); + // clusterId is not used + this.revisionForModified = Revision.newRevision(0); } public void collectGarbage(final NodeDocument doc, final GCPhases phases) { @@ -874,7 +885,7 @@ public class VersionGarbageCollector { op.equals(MODIFIED_IN_SECS, doc.getModified()); collectDeletedProperties(doc, phases, op); - collectUnmergedBranchCommitDocument(doc, phases, op); + collectUnmergedBranchCommitDocument(doc, toModifiedMs, phases, op); collectOldRevisions(doc, phases, op); // only add if there are changes for this doc if (op.hasChanges()) { @@ -889,14 +900,6 @@ public class VersionGarbageCollector { return garbageDocsCount > 0; } - private void collectUnmergedBranchCommitDocument(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) { - if (phases.start(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC)){ - // TODO add unmerged BC collection logic - phases.stop(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC); - } - - } - private void collectDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) { // get Map of all properties along with their values @@ -929,6 +932,206 @@ public class VersionGarbageCollector { } } + private void collectUnmergedBranchCommitDocument(final NodeDocument doc, + long toModifiedMillis, final GCPhases phases, final UpdateOp updateOp) { + if (!phases.start(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC)) { + // GC was cancelled, stop + return; + } + + // from + // https://jackrabbit.apache.org/oak/docs/nodestore/documentmk.html#previous-documents + // "branch commits are not moved to previous documents until the branch is + // merged." + // i.e. if we're looking for unmerged branch commits, they cannot be in + // previous documents, they have to be in the main one - hence we have to use + // getLocalBranchCommits here + final Set<Revision> localBranchCommits = doc.getLocalBranchCommits(); + if (localBranchCommits.isEmpty()) { + // nothing to do then + phases.stop(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC); + return; + } + + // !Note, the _bc sub-document was introduced with Oak 1.8 and is not present + // in older versions. The branch commit revision is added to _bc whenever a + // change is done on a document with a branch commit. This helps the + // DocumentNodeStore to more easily identify branch commit changes." + // The current implementation of "collectUnmergedBranchCommitDocument" only + // supports branch commits that are created after Oak 1.8 + for (Revision bcRevision : localBranchCommits) { + if (!isRevisionOlderThan(bcRevision, toModifiedMillis)) { + // only even consider revisions that are older than the provided + // timestamp - otherwise skip this + continue; + } + final String commitValue = nodeStore.getCommitValue(bcRevision, doc); + if (isCommitted(commitValue)) { + // obviously don't do anything with merged (committed) branch commits + continue; + } + removeUnmergedBCRevision(bcRevision, doc, updateOp); + } + // now for any of the handled system properties (the normal properties would + // already be cleaned up by cleanupDeletedProperties), the resulting + // subdocument could in theory become empty after removing all unmerged branch + // commit revisions is done later. + // so we need to go through all of them and check if we'd have removed + // the entirety - and then, instead of individually remove revisions, just + // delete the entire property. + if (updateOp.hasChanges()) { + for (Entry<String, Integer> e : getSystemRemoveMapEntryCounts(updateOp) + .entrySet()) { + final String prop = e.getKey(); + final Object d = doc.data.get(prop); + if (!(d instanceof Map)) { + // unexpected and would likely indicate a bug, hence log.error + log.error( + "collectUnmergedBranchCommitDocument: property without subdocument as expected. id={}, prop={}", + doc.getId(), prop); + continue; + } + @SuppressWarnings("rawtypes") + final Map m = (Map) d; + if (m.size() != e.getValue()) { + // then we're not removing all revisions - so cannot cleanup + continue; + } + // then we're removing all revisions - so replace those REMOVE_MAP_ENTRY + // with one whole remove(prop) + final Iterator<Entry<Key, Operation>> it = updateOp.getChanges().entrySet() + .iterator(); + while (it.hasNext()) { + if (it.next().getKey().getName().equals(prop)) { + it.remove(); + } + } + updateOp.remove(prop); + } + } + phases.stop(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC); + } + + /** small helper to count number of REMOVE_MAP_ENTRY per system property */ + private Map<String, Integer> getSystemRemoveMapEntryCounts(final UpdateOp updateOp) { + final Map<String, Integer> propMap = new HashMap<>(); + for (Entry<Key, Operation> e : updateOp.getChanges().entrySet()) { + if (e.getValue().type != Type.REMOVE_MAP_ENTRY) { + // only count REMOVE_MAP_ENTRY types, skip the rest + continue; + } + final String propName = e.getKey().getName(); + if (!propName.startsWith("_")) { + // only count system properties, skip the rest + continue; + } + Integer count = propMap.getOrDefault(propName, 0); + propMap.put(propName, count + 1); + } + return propMap; + } + + /** small helper to check if the revision is older than the timestamp */ + private boolean isRevisionOlderThan(Revision revision, long toModifiedMillis) { + long time = revision.getTimestamp(); + return time <= toModifiedMillis; + } + + /** + * Clean up one uncommitted branch commit in the provided document. The caller + * establishes whether a branch commit revision is committed or not - this is no + * longer checked in this method. The resulting operations are added to the + * provided updateOp. + * <p/> + * The actions depend on the exacty property key - here's the comprehensive list + * of system properties and how they are handled: + * <ul> + * <li>"_id" and "_path" are not affected</li> + * <li>"_prev" and "stalePrev" are not affected</li> + * <li>"_lastRev" and "_sweepRev" are not affected</li> + * <li>"_bc" is cleaned up</li> + * <li>"_revisions" is cleaned up</li> + * <li>"_commitRoot" is cleaned up</li> + * <li>"_collisions" might be a special case but is also cleaned up</li> + * <li>"_modified" and "_deletedOnce" are just single values and thus not + * generally affected, but they are updated when "_deleted" is cleaned up</li> + * <li>"_deleted" is cleaned up - plus "_modified" and "_deletedOnce" are also + * updated for this, to have classic GC later potentially delete the + * document</li> + * <li>"_children" is only ever set to true, never back - hence no need for + * cleanup/modification by DetailedGC</li> + * <li>"_bin" is only ever set to 1, never back - hence no need for + * cleanup/modification by DetailedGC</li> + * </ul> + * + * @param unmergedBCRevision the unmerged branch commit revision - the caller + * makes sure this revision is indeed not merged and + * old enough to be removed + * @param doc the document from which the uncommittedBCRevision + * should be removed + * @param updateOp the resulting operations yet to be applied + * @param propMap + */ + private void removeUnmergedBCRevision(Revision unmergedBCRevision, + NodeDocument doc, UpdateOp updateOp) { + // caller ensures the provided revision is an unmerged branch commit + NodeDocument.removeBranchCommit(updateOp, unmergedBCRevision); + + // phase 1 : remove unmerged bc revisions from _deleted - unmerged branch + // commits can only be in the local set + final String unmergedDeleted = doc.getLocalDeleted().get(unmergedBCRevision); + if (unmergedDeleted != null) { + NodeDocument.removeDeleted(updateOp, unmergedBCRevision); + + // phase 2: the document could now effectively be "deleted" the actual + // removal from the document from the store is left to DeletedDocsGC (to + // avoid code duplication) + + // if unmergedDeleted is "false", then it was created with this branch + // commit, but that was never merged. when we now remove that, it could be + // that it is then deleted. + + // to know whether or not the node is actually deleted, would potentially + // require several commit value lookups. + // in order to keep the execution time of detailGC in this regard small, + // the code here stops with any further checks and just sets + // "_deletedOnce" to true and updates "_modified" - the DeletedDocsGC + // later would either resurrect or delete the document properly (eg + // including previous docs) + if ("false".equals(unmergedDeleted)) { + if (!doc.wasDeletedOnce()) { + NodeDocument.setDeletedOnce(updateOp); + } + NodeDocument.setModified(updateOp, revisionForModified); + } + } + // phase 3 : go through other system properties + if (doc.getLocalCommitRoot().containsKey(unmergedBCRevision)) { + NodeDocument.removeCommitRoot(updateOp, unmergedBCRevision); + } + if (doc.getLocalRevisions().containsKey(unmergedBCRevision)) { + NodeDocument.removeRevision(updateOp, unmergedBCRevision); + } + if (doc.getLocalMap(NodeDocument.COLLISIONS) + .containsKey(unmergedBCRevision)) { + NodeDocument.removeCollision(updateOp, unmergedBCRevision); + } + // phase 4 : go through normal properties + for (String propName : doc.getPropertyNames()) { + // first check if this property might have been (flagged to be) + // entirely removed by the collectDeletedProperties - in which case + // there is a corresponding UpdateOp + final Operation op = updateOp.getChanges().get(new Key(propName, null)); + if (op != null && op.type == Type.REMOVE) { + // ignore this property then, it will be removed entirely + continue; + } + if (doc.getLocalMap(propName).containsKey(unmergedBCRevision)) { + updateOp.removeMapEntry(propName, unmergedBCRevision); + } + } + } + private void collectOldRevisions(NodeDocument doc, GCPhases phases, UpdateOp updateOp) { if (phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){ @@ -1019,6 +1222,7 @@ public class VersionGarbageCollector { private final Set<String> exclude = Sets.newHashSet(); private boolean sorted = false; private final Stopwatch timer; + @SuppressWarnings("unused") private final VersionGCOptions options; private final GCMonitor monitor; diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BranchCommitGCTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BranchCommitGCTest.java index 2aca59d7af..79d7f9bbae 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BranchCommitGCTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BranchCommitGCTest.java @@ -16,20 +16,32 @@ */ package org.apache.jackrabbit.oak.plugins.document; +import static java.util.concurrent.TimeUnit.HOURS; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.function.Consumer; + +import org.apache.jackrabbit.oak.plugins.document.DocumentMK.Builder; +import org.apache.jackrabbit.oak.plugins.document.util.Utils; import org.apache.jackrabbit.oak.spi.commit.CommitInfo; import org.apache.jackrabbit.oak.spi.commit.EmptyHook; import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.jackrabbit.oak.spi.state.NodeStore; import org.apache.jackrabbit.oak.stats.Clock; -import org.junit.Ignore; -import org.junit.Test; -import org.junit.Rule; -import org.junit.Before; import org.junit.After; - -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.assertEquals; -import static java.util.concurrent.TimeUnit.HOURS; -import static org.apache.jackrabbit.oak.plugins.document.TestUtils.persistToBranch; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; public class BranchCommitGCTest { @@ -38,85 +50,535 @@ public class BranchCommitGCTest { private Clock clock; private DocumentNodeStore store; private VersionGarbageCollector gc; + private VersionGarbageCollector.VersionGCStats stats; @Before public void setUp() throws InterruptedException { clock = new Clock.Virtual(); clock.waitUntil(System.currentTimeMillis()); Revision.setClock(clock); - store = builderProvider.newBuilder() - .clock(clock) - .setLeaseCheckMode(LeaseCheckMode.DISABLED) - .setAsyncDelay(0) - .getNodeStore(); + store = builderProvider.newBuilder().clock(clock) + .setLeaseCheckMode(LeaseCheckMode.DISABLED).setDetailedGCEnabled(true) + .setAsyncDelay(0).getNodeStore(); gc = store.getVersionGarbageCollector(); } @After public void tearDown() throws Exception { + assertNoEmptyProperties(); if (store != null) { store.dispose(); } Revision.resetClockToDefault(); } - @Ignore + private void assertNoEmptyProperties() { + for (NodeDocument nd : Utils.getAllDocuments(store.getDocumentStore())) { + for (Entry<String, Object> e : nd.data.entrySet()) { + Object v = e.getValue(); + if (v instanceof Map) { + @SuppressWarnings("rawtypes") + Map m = (Map) v; + if (m.isEmpty() + && (e.getKey().equals("_commitRoot") + || e.getKey().equals("_collisions")) + && nd.getId().equals("0:/")) { + // skip : root document apparently has an empty _commitRoot:{} + continue; + } + assertFalse("document has empty property : id=" + nd.getId() + + ", property=" + e.getKey() + ", document=" + nd.asString(), + m.isEmpty()); + } + } + } + } + + @Test + public void unmergedAddChildren() throws Exception { + RevisionVector br = unmergedBranchCommit(b -> { + b.child("a"); + b.child("b"); + }); + assertExists("1:/a"); + assertExists("1:/b"); + assertFalse( + store.getDocumentStore().find(Collection.NODES, "1:/a").wasDeletedOnce()); + assertFalse( + store.getDocumentStore().find(Collection.NODES, "1:/b").wasDeletedOnce()); + + store.runBackgroundOperations(); + + // wait two hours + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + // clean everything older than one hour + VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS); + + assertEquals(3, stats.updatedDetailedGCDocsCount); + + assertTrue( + store.getDocumentStore().find(Collection.NODES, "1:/a").wasDeletedOnce()); + assertTrue( + store.getDocumentStore().find(Collection.NODES, "1:/b").wasDeletedOnce()); + + // now do another gc to get those documents actually deleted + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + stats = gc.gc(1, HOURS); + assertEquals(0, stats.updatedDetailedGCDocsCount); + assertEquals(2, stats.deletedDocGCCount); + assertNotExists("1:/a"); + assertNotExists("1:/b"); + assertBranchRevisionRemovedFromAllDocuments(br); + } + + @Test + public void unmergedAddThenMergedAddAndRemoveChildren() throws Exception { + RevisionVector br = unmergedBranchCommit(b -> { + b.child("a"); + b.child("b"); + }); + assertExists("1:/a"); + assertExists("1:/b"); + assertFalse( + store.getDocumentStore().find(Collection.NODES, "1:/a").wasDeletedOnce()); + assertFalse( + store.getDocumentStore().find(Collection.NODES, "1:/b").wasDeletedOnce()); + + store.runBackgroundOperations(); + + mergedBranchCommit(b -> { + b.child("a"); + b.child("b"); + }); + + store.runBackgroundOperations(); + + mergedBranchCommit(b -> { + b.child("a").remove(); + b.child("b").remove(); + }); + + store.runBackgroundOperations(); + + // wait two hours + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + // clean everything older than one hour + VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS); + + assertTrue( + "stats.updatedDetailedGCDocsCount expected 1 or less, was: " + + stats.updatedDetailedGCDocsCount, + stats.updatedDetailedGCDocsCount <= 1); + assertEquals(2, stats.deletedDocGCCount); + + assertNotExists("1:/a"); + assertNotExists("1:/b"); + + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + stats = gc.gc(1, HOURS); + assertEquals(0, stats.updatedDetailedGCDocsCount); + assertEquals(0, stats.deletedDocGCCount); + assertBranchRevisionRemovedFromAllDocuments(br); + } + + @Test + public void unmergedAddTwoChildren() throws Exception { + RevisionVector br1 = unmergedBranchCommit(b -> { + b.child("a"); + b.child("b"); + }); + RevisionVector br2 = unmergedBranchCommit(b -> { + b.child("a"); + b.child("b"); + }); + assertExists("1:/a"); + assertExists("1:/b"); + assertFalse( + store.getDocumentStore().find(Collection.NODES, "1:/a").wasDeletedOnce()); + assertFalse( + store.getDocumentStore().find(Collection.NODES, "1:/b").wasDeletedOnce()); + + store.runBackgroundOperations(); + + Long originalModified = store.getDocumentStore().find(Collection.NODES, "1:/a") + .getModified(); + + // wait two hours + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + // clean everything older than one hour + VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS); + + Long laterModified = store.getDocumentStore().find(Collection.NODES, "1:/a") + .getModified(); + assertNotEquals(originalModified, laterModified); + + assertEquals(3, stats.updatedDetailedGCDocsCount); + assertEquals(0, stats.deletedDocGCCount); + + assertExists("1:/a"); + assertExists("1:/b"); + + // now do another gc to get those documents actually deleted + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + stats = gc.gc(1, HOURS); + assertEquals(0, stats.updatedDetailedGCDocsCount); + assertEquals(2, stats.deletedDocGCCount); + assertBranchRevisionRemovedFromAllDocuments(br1); + assertBranchRevisionRemovedFromAllDocuments(br2); + } + @Test - public void orphanedBranchCommitDetect() throws Exception { + public void unmergedAddsThenMergedAddsChildren() throws Exception { + RevisionVector br1 = unmergedBranchCommit(b -> { + b.child("a"); + b.child("b"); + }); + RevisionVector br2 = unmergedBranchCommit(b -> { + b.child("a"); + b.child("b"); + }); + assertExists("1:/a"); + assertExists("1:/b"); + assertFalse( + store.getDocumentStore().find(Collection.NODES, "1:/a").wasDeletedOnce()); + assertFalse( + store.getDocumentStore().find(Collection.NODES, "1:/b").wasDeletedOnce()); - NodeBuilder b1 = store.getRoot().builder(); - b1.child("a"); - b1.child("b"); - persistToBranch(b1); + store.runBackgroundOperations(); - // b1 must see 'a' and 'b' - assertTrue(b1.hasChildNode("a")); - assertTrue(b1.hasChildNode("b")); + mergedBranchCommit(b -> { + b.child("a"); + b.child("b"); + }); store.runBackgroundOperations(); // wait two hours clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); // clean everything older than one hours - VersionGarbageCollector.VersionGCStats stats= gc.gc(1, HOURS); + VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS); - //This will fail as of now but will pass once BranchCommit GC code is merged. - assertEquals(1, stats.deletedDocGCCount); + assertTrue("should have been 2 or more, was: " + stats.updatedDetailedGCDocsCount, + stats.updatedDetailedGCDocsCount >= 2); + assertEquals(0, stats.deletedDocGCCount); + + assertExists("1:/a"); + assertExists("1:/b"); + + // now do another gc to get those documents actually deleted + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + stats = gc.gc(1, HOURS); + assertEquals(0, stats.updatedDetailedGCDocsCount); + assertEquals(0, stats.deletedDocGCCount); + assertBranchRevisionRemovedFromAllDocuments(br1); + assertBranchRevisionRemovedFromAllDocuments(br2); } - @Ignore @Test - public void orphanedModifiedBranchCommitDetect() throws Exception { + public void unmergedAddsThenMergedAddThenUnmergedRemovesChildren() throws Exception { + RevisionVector br1 = unmergedBranchCommit(b -> { + b.child("a"); + b.child("b"); + }); + RevisionVector br2 = unmergedBranchCommit(b -> { + b.child("a"); + b.child("b"); + }); + assertExists("1:/a"); + assertExists("1:/b"); + assertFalse( + store.getDocumentStore().find(Collection.NODES, "1:/a").wasDeletedOnce()); + assertFalse( + store.getDocumentStore().find(Collection.NODES, "1:/b").wasDeletedOnce()); - NodeBuilder b = store.getRoot().builder(); - b.child("foo"); - b.child("test"); - persistToBranch(b); + store.runBackgroundOperations(); + + mergedBranchCommit(b -> { + b.child("a"); + b.child("b"); + }); - // b must see 'foo' and 'test' - assertTrue(b.hasChildNode("foo")); - assertTrue(b.hasChildNode("test")); + store.runBackgroundOperations(); - merge(b); + RevisionVector br3 = unmergedBranchCommit(b -> { + b.child("a").remove(); + b.child("b").remove(); + }); + RevisionVector br4 = unmergedBranchCommit(b -> { + b.child("a").remove(); + b.child("b").remove(); + }); + + store.runBackgroundOperations(); + + // wait two hours + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + // clean everything older than one hour + VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS); - b = store.getRoot().builder(); - b.child("test").remove(); - b.getChildNode("foo").child("childfoo"); - persistToBranch(b); + assertEquals(3, stats.updatedDetailedGCDocsCount); + assertEquals(0, stats.deletedDocGCCount); + + assertExists("1:/a"); + assertExists("1:/b"); + + // now do another gc to get those documents actually deleted + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + stats = gc.gc(1, HOURS); + assertEquals(0, stats.updatedDetailedGCDocsCount); + assertEquals(0, stats.deletedDocGCCount); + assertBranchRevisionRemovedFromAllDocuments(br1); + assertBranchRevisionRemovedFromAllDocuments(br2); + assertBranchRevisionRemovedFromAllDocuments(br3); + assertBranchRevisionRemovedFromAllDocuments(br4); + } + + private void assertNotExists(String id) { + NodeDocument doc = store.getDocumentStore().find(Collection.NODES, id); + assertNull("doc exists but was expected not to : id=" + id, doc); + } + + private void assertExists(String id) { + NodeDocument doc = store.getDocumentStore().find(Collection.NODES, id); + assertNotNull("doc does not exist : id=" + id, doc); + } + + @Test + public void unmergedAddAndRemoveChild() throws Exception { + mergedBranchCommit(b -> { + b.child("foo"); + b.child("test"); + }); + RevisionVector br = unmergedBranchCommit(b -> { + b.child("test").remove(); + b.getChildNode("foo").child("childfoo"); + }); store.runBackgroundOperations(); // wait two hours clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); // clean everything older than one hours - VersionGarbageCollector.VersionGCStats stats= gc.gc(1, HOURS); + VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS); - //This will fail as of now but will pass once BranchCommit GC code is merged. + // first gc round will only mark document for deleting by second round + assertEquals(0, stats.deletedDocGCCount); + assertEquals(4, stats.updatedDetailedGCDocsCount); + + // wait two hours + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + // now do second gc round to get those documents actually deleted + stats = gc.gc(1, HOURS); + assertEquals(0, stats.updatedDetailedGCDocsCount); assertEquals(1, stats.deletedDocGCCount); + assertBranchRevisionRemovedFromAllDocuments(br); + } + + private void assertBranchRevisionRemovedFromAllDocuments(RevisionVector br) { + assertTrue(br.isBranch()); + Revision br1 = br.getRevision(1); + Revision r1 = br1.asTrunkRevision(); + for (NodeDocument nd : Utils.getAllDocuments(store.getDocumentStore())) { + if (nd.getId().equals("0:/")) { + NodeDocument target = new NodeDocument(store.getDocumentStore()); + nd.deepCopy(target); + } + System.out.println("nd=" + nd.asString()); + if (nd.asString().contains(r1.toString())) { + System.out.println("break"); + } + assertFalse("document not fully cleaned up: " + nd.asString(), + nd.asString().contains(r1.toString())); + } + } + + @Test + public void unmergedRemoveProperty() throws Exception { + mergedBranchCommit(b -> b.child("foo")); + mergedBranchCommit(b -> b.child("foo").setProperty("a", "b")); + // do a gc without waiting, to check that works fine + store.runBackgroundOperations(); + stats = gc.gc(1, HOURS); + assertEquals(0, stats.updatedDetailedGCDocsCount); + + RevisionVector br = unmergedBranchCommit(b -> b.child("foo").removeProperty("a")); + mergedBranchCommit(b -> b.child("foo").setProperty("c", "d")); + store.runBackgroundOperations(); + + // wait two hours + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + // clean everything older than one hours + stats = gc.gc(1, HOURS); + + assertEquals(2, stats.updatedDetailedGCDocsCount); + assertBranchRevisionRemovedFromAllDocuments(br); + } + + @Test + public void unmergedAddProperty() throws Exception { + mergedBranchCommit(b -> b.child("foo")); + RevisionVector br = unmergedBranchCommit( + b -> b.child("foo").setProperty("a", "b")); + store.runBackgroundOperations(); + + // wait two hours + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + // clean everything older than one hours + stats = gc.gc(1, HOURS); + + assertEquals(2, stats.updatedDetailedGCDocsCount); + assertBranchRevisionRemovedFromAllDocuments(br); + } + + @Test + public void unmergedRemoveChild() throws Exception { + mergedBranchCommit(b -> { + b.child("foo"); + b.child("bar"); + }); + // do a gc without waiting, to check that works fine + store.runBackgroundOperations(); + stats = gc.gc(1, HOURS); + assertEquals(0, stats.updatedDetailedGCDocsCount); + + final List<RevisionVector> brs = new LinkedList<>(); + for (int j = 0; j < 10; j++) { + brs.add(unmergedBranchCommit(b -> b.child("foo").remove())); + } + store.runBackgroundOperations(); + + NodeDocument doc = store.getDocumentStore().find(Collection.NODES, "1:/foo"); + Long originalModified = doc.getModified(); + + // wait two hours + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + // clean everything older than one hours + stats = gc.gc(1, HOURS); + + assertEquals(2, stats.updatedDetailedGCDocsCount); + + doc = store.getDocumentStore().find(Collection.NODES, "1:/foo"); + Long finalModified = doc.getModified(); + + assertEquals(originalModified, finalModified); + + for (RevisionVector br : brs) { + assertBranchRevisionRemovedFromAllDocuments(br); + } } - private void merge(NodeBuilder builder) + @Test + public void unmergedMergedRemoveChild() throws Exception { + mergedBranchCommit(b -> { + b.child("foo"); + b.child("bar"); + }); + // do a gc without waiting, to check that works fine + store.runBackgroundOperations(); + stats = gc.gc(1, HOURS); + assertEquals(0, stats.updatedDetailedGCDocsCount); + + for (int i = 0; i < 50; i++) { + mergedBranchCommit(b -> b.child("foo").remove()); + mergedBranchCommit(b -> b.child("foo")); + } + final List<RevisionVector> brs = new LinkedList<>(); + for (int j = 0; j < 10; j++) { + brs.add(unmergedBranchCommit(b -> b.child("foo").remove())); + } + store.runBackgroundOperations(); + + // wait two hours + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + // clean everything older than one hours + stats = gc.gc(1, HOURS); + + assertEquals(2, stats.updatedDetailedGCDocsCount); + for (RevisionVector br : brs) { + assertBranchRevisionRemovedFromAllDocuments(br); + } + } + + @Test + public void unmergedThenMergedRemoveProperty() throws Exception { + mergedBranchCommit(b -> b.child("foo")); + mergedBranchCommit(b -> b.child("foo").setProperty("a", "b")); + mergedBranchCommit(b -> b.child("foo").setProperty("c", "d")); + // do a gc without waiting, to check that works fine + store.runBackgroundOperations(); + stats = gc.gc(1, HOURS); + assertEquals(0, stats.updatedDetailedGCDocsCount); + + RevisionVector br = unmergedBranchCommit(b -> { + b.setProperty("rootProp", "v"); + b.child("foo").removeProperty("a"); + }); + store.runBackgroundOperations(); + DocumentNodeStore store2 = newStore(2); + mergedBranchCommit(store2, b -> b.child("foo").removeProperty("a")); + store2.runBackgroundOperations(); + store2.dispose(); + store.runBackgroundOperations(); + + // wait two hours + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + // clean everything older than one hours + stats = gc.gc(1, HOURS); + + assertEquals(2, stats.updatedDetailedGCDocsCount); + assertBranchRevisionRemovedFromAllDocuments(br); + } + + private DocumentNodeStore newStore(int clusterId) { + Builder builder = builderProvider.newBuilder().clock(clock) + .setLeaseCheckMode(LeaseCheckMode.DISABLED).setDetailedGCEnabled(true) + .setAsyncDelay(0).setDocumentStore(store.getDocumentStore()); + if (clusterId > 0) { + builder.setClusterId(clusterId); + } + DocumentNodeStore store2 = builder.getNodeStore(); + return store2; + } + + private RevisionVector mergedBranchCommit(Consumer<NodeBuilder> buildFunction) throws Exception { - store.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + return build(store, true, true, buildFunction); + } + + private RevisionVector mergedBranchCommit(NodeStore store, + Consumer<NodeBuilder> buildFunction) throws Exception { + return build(store, true, true, buildFunction); + } + + private RevisionVector unmergedBranchCommit(Consumer<NodeBuilder> buildFunction) + throws Exception { + RevisionVector result = build(store, true, false, buildFunction); + assertTrue(result.isBranch()); + return result; + } + + private RevisionVector build(NodeStore store, boolean persistToBranch, boolean merge, + Consumer<NodeBuilder> buildFunction) throws Exception { + if (!persistToBranch && !merge) { + throw new IllegalArgumentException("must either persistToBranch or merge"); + } + NodeBuilder b = store.getRoot().builder(); + buildFunction.accept(b); + RevisionVector result = null; + if (persistToBranch) { + DocumentRootBuilder drb = (DocumentRootBuilder) b; + drb.persist(); + DocumentNodeState ns = (DocumentNodeState) drb.getNodeState(); + result = ns.getLastRevision(); + } + if (merge) { + DocumentNodeState dns = (DocumentNodeState) merge(b); + result = dns.getLastRevision(); + } + return result; + } + + private NodeState merge(NodeBuilder builder) throws Exception { + return store.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); } }
