This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch OAK-8646
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 280eaf266669f95e4970b4b2300045b90c6cd4ee
Author: Stefan Egli <[email protected]>
AuthorDate: Wed Aug 30 19:13:31 2023 +0200

    OAK-8646 : Clean up changes from orphaned branch commits, as part of 
detailed gc
---
 .../plugins/document/VersionGarbageCollector.java  | 226 ++++++++-
 .../oak/plugins/document/BranchCommitGCTest.java   | 550 +++++++++++++++++++--
 2 files changed, 721 insertions(+), 55 deletions(-)

diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index d20f5683bc..7cd1604ec8 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -19,6 +19,7 @@
 
 package org.apache.jackrabbit.oak.plugins.document;
 
+
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -30,6 +31,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
+import java.util.Map.Entry;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
@@ -45,6 +47,9 @@ import org.apache.jackrabbit.guava.common.collect.Maps;
 import org.apache.jackrabbit.guava.common.collect.Sets;
 
 import org.apache.jackrabbit.oak.commons.sort.StringSort;
+import org.apache.jackrabbit.oak.plugins.document.UpdateOp.Key;
+import org.apache.jackrabbit.oak.plugins.document.UpdateOp.Operation;
+import org.apache.jackrabbit.oak.plugins.document.UpdateOp.Operation.Type;
 import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval;
 import org.apache.jackrabbit.oak.plugins.document.util.Utils;
 import org.apache.jackrabbit.oak.spi.gc.DelegatingGCMonitor;
@@ -79,6 +84,7 @@ import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocTy
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH;
 import static org.apache.jackrabbit.oak.stats.StatisticsProvider.NOOP;
+import static 
org.apache.jackrabbit.oak.plugins.document.util.Utils.isCommitted;
 import static org.slf4j.helpers.MessageFormatter.arrayFormat;
 
 public class VersionGarbageCollector {
@@ -667,7 +673,7 @@ public class VersionGarbageCollector {
             boolean foundDoc = true;
             long oldModifiedMs = oldestModifiedMs;
 
-            try (DetailedGC gc = new DetailedGC(headRevision, monitor, 
cancel)) {
+            try (DetailedGC gc = new DetailedGC(headRevision, toModifiedMs, 
monitor, cancel)) {
                 long fromModifiedMs = oldestModifiedMs;
                 String fromId = 
ofNullable(oldestModifiedDocId).orElse(MIN_ID_VALUE);
                 NodeDocument lastDoc;
@@ -837,6 +843,7 @@ public class VersionGarbageCollector {
     private class DetailedGC implements Closeable {
 
         private final RevisionVector headRevision;
+        private final long toModifiedMs;
         private final GCMonitor monitor;
         private final AtomicBoolean cancel;
         private final Stopwatch timer;
@@ -855,14 +862,18 @@ public class VersionGarbageCollector {
         private final Map<String, Integer> deletedPropsCountMap;
         private int garbageDocsCount;
         private int totalGarbageDocsCount;
+        private final Revision revisionForModified;
 
-        public DetailedGC(@NotNull RevisionVector headRevision, @NotNull 
GCMonitor monitor, @NotNull AtomicBoolean cancel) {
+        public DetailedGC(@NotNull RevisionVector headRevision, long 
toModifiedMs, @NotNull GCMonitor monitor, @NotNull AtomicBoolean cancel) {
             this.headRevision = requireNonNull(headRevision);
+            this.toModifiedMs = toModifiedMs;
             this.monitor = monitor;
             this.cancel = cancel;
             this.updateOpList = new ArrayList<>();
             this.deletedPropsCountMap = new HashMap<>();
             this.timer = createUnstarted();
+            // clusterId is not used
+            this.revisionForModified = Revision.newRevision(0);
         }
 
         public void collectGarbage(final NodeDocument doc, final GCPhases 
phases) {
@@ -874,7 +885,7 @@ public class VersionGarbageCollector {
             op.equals(MODIFIED_IN_SECS, doc.getModified());
 
             collectDeletedProperties(doc, phases, op);
-            collectUnmergedBranchCommitDocument(doc, phases, op);
+            collectUnmergedBranchCommitDocument(doc, toModifiedMs, phases, op);
             collectOldRevisions(doc, phases, op);
             // only add if there are changes for this doc
             if (op.hasChanges()) {
@@ -889,14 +900,6 @@ public class VersionGarbageCollector {
             return garbageDocsCount > 0;
         }
 
-        private void collectUnmergedBranchCommitDocument(final NodeDocument 
doc, final GCPhases phases, final UpdateOp updateOp) {
-            if (phases.start(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC)){
-                // TODO add unmerged BC collection logic
-                phases.stop(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC);
-            }
-
-        }
-
         private void collectDeletedProperties(final NodeDocument doc, final 
GCPhases phases, final UpdateOp updateOp) {
 
             // get Map of all properties along with their values
@@ -929,6 +932,206 @@ public class VersionGarbageCollector {
             }
         }
 
+        private void collectUnmergedBranchCommitDocument(final NodeDocument 
doc,
+                long toModifiedMillis, final GCPhases phases, final UpdateOp 
updateOp) {
+            if (!phases.start(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC)) {
+                // GC was cancelled, stop
+                return;
+            }
+
+            // from
+            // 
https://jackrabbit.apache.org/oak/docs/nodestore/documentmk.html#previous-documents
+            // "branch commits are not moved to previous documents until the 
branch is
+            // merged."
+            // i.e. if we're looking for unmerged branch commits, they cannot 
be in
+            // previous documents, they have to be in the main one - hence we 
have to use
+            // getLocalBranchCommits here
+            final Set<Revision> localBranchCommits = 
doc.getLocalBranchCommits();
+            if (localBranchCommits.isEmpty()) {
+                // nothing to do then
+                phases.stop(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC);
+                return;
+            }
+
+            // !Note, the _bc sub-document was introduced with Oak 1.8 and is 
not present
+            // in older versions. The branch commit revision is added to _bc 
whenever a
+            // change is done on a document with a branch commit. This helps 
the
+            // DocumentNodeStore to more easily identify branch commit 
changes."
+            // The current implementation of 
"collectUnmergedBranchCommitDocument" only
+            // supports branch commits that are created after Oak 1.8
+            for (Revision bcRevision : localBranchCommits) {
+                if (!isRevisionOlderThan(bcRevision, toModifiedMillis)) {
+                    // only even consider revisions that are older than the 
provided
+                    // timestamp - otherwise skip this
+                    continue;
+                }
+                final String commitValue = 
nodeStore.getCommitValue(bcRevision, doc);
+                if (isCommitted(commitValue)) {
+                    // obviously don't do anything with merged (committed) 
branch commits
+                    continue;
+                }
+                removeUnmergedBCRevision(bcRevision, doc, updateOp);
+            }
+            // now for any of the handled system properties (the normal 
properties would
+            // already be cleaned up by cleanupDeletedProperties), the 
resulting
+            // subdocument could in theory become empty after removing all 
unmerged branch
+            // commit revisions is done later.
+            // so we need to go through all of them and check if we'd have 
removed
+            // the entirety - and then, instead of individually remove 
revisions, just
+            // delete the entire property.
+            if (updateOp.hasChanges()) {
+                for (Entry<String, Integer> e : 
getSystemRemoveMapEntryCounts(updateOp)
+                        .entrySet()) {
+                    final String prop = e.getKey();
+                    final Object d = doc.data.get(prop);
+                    if (!(d instanceof Map)) {
+                        // unexpected and would likely indicate a bug, hence 
log.error
+                        log.error(
+                                "collectUnmergedBranchCommitDocument: property 
without subdocument as expected. id={}, prop={}",
+                                doc.getId(), prop);
+                        continue;
+                    }
+                    @SuppressWarnings("rawtypes")
+                    final Map m = (Map) d;
+                    if (m.size() != e.getValue()) {
+                        // then we're not removing all revisions - so cannot 
cleanup
+                        continue;
+                    }
+                    // then we're removing all revisions - so replace those 
REMOVE_MAP_ENTRY
+                    // with one whole remove(prop)
+                    final Iterator<Entry<Key, Operation>> it = 
updateOp.getChanges().entrySet()
+                            .iterator();
+                    while (it.hasNext()) {
+                        if (it.next().getKey().getName().equals(prop)) {
+                            it.remove();
+                        }
+                    }
+                    updateOp.remove(prop);
+                }
+            }
+            phases.stop(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC);
+        }
+
+        /** small helper to count number of REMOVE_MAP_ENTRY per system 
property */
+        private Map<String, Integer> getSystemRemoveMapEntryCounts(final 
UpdateOp updateOp) {
+            final Map<String, Integer> propMap = new HashMap<>();
+            for (Entry<Key, Operation> e : updateOp.getChanges().entrySet()) {
+                if (e.getValue().type != Type.REMOVE_MAP_ENTRY) {
+                    // only count REMOVE_MAP_ENTRY types, skip the rest
+                    continue;
+                }
+                final String propName = e.getKey().getName();
+                if (!propName.startsWith("_")) {
+                    // only count system properties, skip the rest
+                    continue;
+                }
+                Integer count = propMap.getOrDefault(propName, 0);
+                propMap.put(propName, count + 1);
+            }
+            return propMap;
+        }
+
+        /** small helper to check if the revision is older than the timestamp 
*/
+        private boolean isRevisionOlderThan(Revision revision, long 
toModifiedMillis) {
+            long time = revision.getTimestamp();
+            return time <= toModifiedMillis;
+        }
+
+        /**
+         * Clean up one uncommitted branch commit in the provided document. 
The caller
+         * establishes whether a branch commit revision is committed or not - 
this is no
+         * longer checked in this method. The resulting operations are added 
to the
+         * provided updateOp.
+         * <p/>
+         * The actions depend on the exacty property key - here's the 
comprehensive list
+         * of system properties and how they are handled:
+         * <ul>
+         * <li>"_id" and "_path" are not affected</li>
+         * <li>"_prev" and "stalePrev" are not affected</li>
+         * <li>"_lastRev" and "_sweepRev" are not affected</li>
+         * <li>"_bc" is cleaned up</li>
+         * <li>"_revisions" is cleaned up</li>
+         * <li>"_commitRoot" is cleaned up</li>
+         * <li>"_collisions" might be a special case but is also cleaned 
up</li>
+         * <li>"_modified" and "_deletedOnce" are just single values and thus 
not
+         * generally affected, but they are updated when "_deleted" is cleaned 
up</li>
+         * <li>"_deleted" is cleaned up - plus "_modified" and "_deletedOnce" 
are also
+         * updated for this, to have classic GC later potentially delete the
+         * document</li>
+         * <li>"_children" is only ever set to true, never back - hence no 
need for
+         * cleanup/modification by DetailedGC</li>
+         * <li>"_bin" is only ever set to 1, never back - hence no need for
+         * cleanup/modification by DetailedGC</li>
+         * </ul>
+         *
+         * @param unmergedBCRevision the unmerged branch commit revision - the 
caller
+         *                           makes sure this revision is indeed not 
merged and
+         *                           old enough to be removed
+         * @param doc                the document from which the 
uncommittedBCRevision
+         *                           should be removed
+         * @param updateOp           the resulting operations yet to be applied
+         * @param propMap
+         */
+        private void removeUnmergedBCRevision(Revision unmergedBCRevision,
+                NodeDocument doc, UpdateOp updateOp) {
+            // caller ensures the provided revision is an unmerged branch 
commit
+            NodeDocument.removeBranchCommit(updateOp, unmergedBCRevision);
+
+            // phase 1 : remove unmerged bc revisions from _deleted - unmerged 
branch
+            // commits can only be in the local set
+            final String unmergedDeleted = 
doc.getLocalDeleted().get(unmergedBCRevision);
+            if (unmergedDeleted != null) {
+                NodeDocument.removeDeleted(updateOp, unmergedBCRevision);
+
+                // phase 2: the document could now effectively be "deleted" 
the actual
+                // removal from the document from the store is left to 
DeletedDocsGC (to
+                // avoid code duplication)
+
+                // if unmergedDeleted is "false", then it was created with 
this branch
+                // commit, but that was never merged. when we now remove that, 
it could be
+                // that it is then deleted.
+
+                // to know whether or not the node is actually deleted, would 
potentially
+                // require several commit value lookups.
+                // in order to keep the execution time of detailGC in this 
regard small,
+                // the code here stops with any further checks and just sets
+                // "_deletedOnce" to true and updates "_modified" - the 
DeletedDocsGC
+                // later would either resurrect or delete the document 
properly (eg
+                // including previous docs)
+                if ("false".equals(unmergedDeleted)) {
+                    if (!doc.wasDeletedOnce()) {
+                        NodeDocument.setDeletedOnce(updateOp);
+                    }
+                    NodeDocument.setModified(updateOp, revisionForModified);
+                }
+            }
+            // phase 3 : go through other system properties
+            if (doc.getLocalCommitRoot().containsKey(unmergedBCRevision)) {
+                NodeDocument.removeCommitRoot(updateOp, unmergedBCRevision);
+            }
+            if (doc.getLocalRevisions().containsKey(unmergedBCRevision)) {
+                NodeDocument.removeRevision(updateOp, unmergedBCRevision);
+            }
+            if (doc.getLocalMap(NodeDocument.COLLISIONS)
+                    .containsKey(unmergedBCRevision)) {
+                NodeDocument.removeCollision(updateOp, unmergedBCRevision);
+            }
+            // phase 4 : go through normal properties
+            for (String propName : doc.getPropertyNames()) {
+                // first check if this property might have been (flagged to be)
+                // entirely removed by the collectDeletedProperties - in which 
case
+                // there is a corresponding UpdateOp
+                final Operation op = updateOp.getChanges().get(new 
Key(propName, null));
+                if (op != null && op.type == Type.REMOVE) {
+                    // ignore this property then, it will be removed entirely
+                    continue;
+                }
+                if (doc.getLocalMap(propName).containsKey(unmergedBCRevision)) 
{
+                    updateOp.removeMapEntry(propName, unmergedBCRevision);
+                }
+            }
+        }
+
         private void collectOldRevisions(NodeDocument doc, GCPhases phases, 
UpdateOp updateOp) {
 
             if (phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
@@ -1019,6 +1222,7 @@ public class VersionGarbageCollector {
         private final Set<String> exclude = Sets.newHashSet();
         private boolean sorted = false;
         private final Stopwatch timer;
+        @SuppressWarnings("unused")
         private final VersionGCOptions options;
         private final GCMonitor monitor;
 
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BranchCommitGCTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BranchCommitGCTest.java
index 2aca59d7af..79d7f9bbae 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BranchCommitGCTest.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BranchCommitGCTest.java
@@ -16,20 +16,32 @@
  */
 package org.apache.jackrabbit.oak.plugins.document;
 
+import static java.util.concurrent.TimeUnit.HOURS;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.function.Consumer;
+
+import org.apache.jackrabbit.oak.plugins.document.DocumentMK.Builder;
+import org.apache.jackrabbit.oak.plugins.document.util.Utils;
 import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
 import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
 import org.apache.jackrabbit.oak.stats.Clock;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.junit.Rule;
-import org.junit.Before;
 import org.junit.After;
-
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.assertEquals;
-import static java.util.concurrent.TimeUnit.HOURS;
-import static 
org.apache.jackrabbit.oak.plugins.document.TestUtils.persistToBranch;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
 
 public class BranchCommitGCTest {
 
@@ -38,85 +50,535 @@ public class BranchCommitGCTest {
     private Clock clock;
     private DocumentNodeStore store;
     private VersionGarbageCollector gc;
+    private VersionGarbageCollector.VersionGCStats stats;
 
     @Before
     public void setUp() throws InterruptedException {
         clock = new Clock.Virtual();
         clock.waitUntil(System.currentTimeMillis());
         Revision.setClock(clock);
-        store = builderProvider.newBuilder()
-                .clock(clock)
-                .setLeaseCheckMode(LeaseCheckMode.DISABLED)
-                .setAsyncDelay(0)
-                .getNodeStore();
+        store = builderProvider.newBuilder().clock(clock)
+                
.setLeaseCheckMode(LeaseCheckMode.DISABLED).setDetailedGCEnabled(true)
+                .setAsyncDelay(0).getNodeStore();
         gc = store.getVersionGarbageCollector();
     }
 
     @After
     public void tearDown() throws Exception {
+        assertNoEmptyProperties();
         if (store != null) {
             store.dispose();
         }
         Revision.resetClockToDefault();
     }
 
-    @Ignore
+    private void assertNoEmptyProperties() {
+        for (NodeDocument nd : 
Utils.getAllDocuments(store.getDocumentStore())) {
+            for (Entry<String, Object> e : nd.data.entrySet()) {
+                Object v = e.getValue();
+                if (v instanceof Map) {
+                    @SuppressWarnings("rawtypes")
+                    Map m = (Map) v;
+                    if (m.isEmpty()
+                            && (e.getKey().equals("_commitRoot")
+                                    || e.getKey().equals("_collisions"))
+                            && nd.getId().equals("0:/")) {
+                        // skip : root document apparently has an empty 
_commitRoot:{}
+                        continue;
+                    }
+                    assertFalse("document has empty property : id=" + 
nd.getId()
+                            + ", property=" + e.getKey() + ", document=" + 
nd.asString(),
+                            m.isEmpty());
+                }
+            }
+        }
+    }
+
+    @Test
+    public void unmergedAddChildren() throws Exception {
+        RevisionVector br = unmergedBranchCommit(b -> {
+            b.child("a");
+            b.child("b");
+        });
+        assertExists("1:/a");
+        assertExists("1:/b");
+        assertFalse(
+                store.getDocumentStore().find(Collection.NODES, 
"1:/a").wasDeletedOnce());
+        assertFalse(
+                store.getDocumentStore().find(Collection.NODES, 
"1:/b").wasDeletedOnce());
+
+        store.runBackgroundOperations();
+
+        // wait two hours
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        // clean everything older than one hour
+        VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS);
+
+        assertEquals(3, stats.updatedDetailedGCDocsCount);
+
+        assertTrue(
+                store.getDocumentStore().find(Collection.NODES, 
"1:/a").wasDeletedOnce());
+        assertTrue(
+                store.getDocumentStore().find(Collection.NODES, 
"1:/b").wasDeletedOnce());
+
+        // now do another gc to get those documents actually deleted
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        stats = gc.gc(1, HOURS);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+        assertEquals(2, stats.deletedDocGCCount);
+        assertNotExists("1:/a");
+        assertNotExists("1:/b");
+        assertBranchRevisionRemovedFromAllDocuments(br);
+    }
+
+    @Test
+    public void unmergedAddThenMergedAddAndRemoveChildren() throws Exception {
+        RevisionVector br = unmergedBranchCommit(b -> {
+            b.child("a");
+            b.child("b");
+        });
+        assertExists("1:/a");
+        assertExists("1:/b");
+        assertFalse(
+                store.getDocumentStore().find(Collection.NODES, 
"1:/a").wasDeletedOnce());
+        assertFalse(
+                store.getDocumentStore().find(Collection.NODES, 
"1:/b").wasDeletedOnce());
+
+        store.runBackgroundOperations();
+
+        mergedBranchCommit(b -> {
+            b.child("a");
+            b.child("b");
+        });
+
+        store.runBackgroundOperations();
+
+        mergedBranchCommit(b -> {
+            b.child("a").remove();
+            b.child("b").remove();
+        });
+
+        store.runBackgroundOperations();
+
+        // wait two hours
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        // clean everything older than one hour
+        VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS);
+
+        assertTrue(
+                "stats.updatedDetailedGCDocsCount expected 1 or less, was: "
+                        + stats.updatedDetailedGCDocsCount,
+                stats.updatedDetailedGCDocsCount <= 1);
+        assertEquals(2, stats.deletedDocGCCount);
+
+        assertNotExists("1:/a");
+        assertNotExists("1:/b");
+
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        stats = gc.gc(1, HOURS);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+        assertEquals(0, stats.deletedDocGCCount);
+        assertBranchRevisionRemovedFromAllDocuments(br);
+    }
+
+    @Test
+    public void unmergedAddTwoChildren() throws Exception {
+        RevisionVector br1 = unmergedBranchCommit(b -> {
+            b.child("a");
+            b.child("b");
+        });
+        RevisionVector br2 = unmergedBranchCommit(b -> {
+            b.child("a");
+            b.child("b");
+        });
+        assertExists("1:/a");
+        assertExists("1:/b");
+        assertFalse(
+                store.getDocumentStore().find(Collection.NODES, 
"1:/a").wasDeletedOnce());
+        assertFalse(
+                store.getDocumentStore().find(Collection.NODES, 
"1:/b").wasDeletedOnce());
+
+        store.runBackgroundOperations();
+
+        Long originalModified = 
store.getDocumentStore().find(Collection.NODES, "1:/a")
+                .getModified();
+
+        // wait two hours
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        // clean everything older than one hour
+        VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS);
+
+        Long laterModified = store.getDocumentStore().find(Collection.NODES, 
"1:/a")
+                .getModified();
+        assertNotEquals(originalModified, laterModified);
+
+        assertEquals(3, stats.updatedDetailedGCDocsCount);
+        assertEquals(0, stats.deletedDocGCCount);
+
+        assertExists("1:/a");
+        assertExists("1:/b");
+
+        // now do another gc to get those documents actually deleted
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        stats = gc.gc(1, HOURS);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+        assertEquals(2, stats.deletedDocGCCount);
+        assertBranchRevisionRemovedFromAllDocuments(br1);
+        assertBranchRevisionRemovedFromAllDocuments(br2);
+    }
+
     @Test
-    public void orphanedBranchCommitDetect() throws Exception {
+    public void unmergedAddsThenMergedAddsChildren() throws Exception {
+        RevisionVector br1 = unmergedBranchCommit(b -> {
+            b.child("a");
+            b.child("b");
+        });
+        RevisionVector br2 = unmergedBranchCommit(b -> {
+            b.child("a");
+            b.child("b");
+        });
+        assertExists("1:/a");
+        assertExists("1:/b");
+        assertFalse(
+                store.getDocumentStore().find(Collection.NODES, 
"1:/a").wasDeletedOnce());
+        assertFalse(
+                store.getDocumentStore().find(Collection.NODES, 
"1:/b").wasDeletedOnce());
 
-        NodeBuilder b1 = store.getRoot().builder();
-        b1.child("a");
-        b1.child("b");
-        persistToBranch(b1);
+        store.runBackgroundOperations();
 
-        // b1 must see 'a' and 'b'
-        assertTrue(b1.hasChildNode("a"));
-        assertTrue(b1.hasChildNode("b"));
+        mergedBranchCommit(b -> {
+            b.child("a");
+            b.child("b");
+        });
 
         store.runBackgroundOperations();
 
         // wait two hours
         clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
         // clean everything older than one hours
-        VersionGarbageCollector.VersionGCStats stats= gc.gc(1, HOURS);
+        VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS);
 
-        //This will fail as of now but will pass once BranchCommit GC code is 
merged.
-        assertEquals(1, stats.deletedDocGCCount);
+        assertTrue("should have been 2 or more, was: " + 
stats.updatedDetailedGCDocsCount,
+                stats.updatedDetailedGCDocsCount >= 2);
+        assertEquals(0, stats.deletedDocGCCount);
+
+        assertExists("1:/a");
+        assertExists("1:/b");
+
+        // now do another gc to get those documents actually deleted
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        stats = gc.gc(1, HOURS);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+        assertEquals(0, stats.deletedDocGCCount);
+        assertBranchRevisionRemovedFromAllDocuments(br1);
+        assertBranchRevisionRemovedFromAllDocuments(br2);
     }
 
-    @Ignore
     @Test
-    public void orphanedModifiedBranchCommitDetect() throws Exception {
+    public void unmergedAddsThenMergedAddThenUnmergedRemovesChildren() throws 
Exception {
+        RevisionVector br1 = unmergedBranchCommit(b -> {
+            b.child("a");
+            b.child("b");
+        });
+        RevisionVector br2 = unmergedBranchCommit(b -> {
+            b.child("a");
+            b.child("b");
+        });
+        assertExists("1:/a");
+        assertExists("1:/b");
+        assertFalse(
+                store.getDocumentStore().find(Collection.NODES, 
"1:/a").wasDeletedOnce());
+        assertFalse(
+                store.getDocumentStore().find(Collection.NODES, 
"1:/b").wasDeletedOnce());
 
-        NodeBuilder b = store.getRoot().builder();
-        b.child("foo");
-        b.child("test");
-        persistToBranch(b);
+        store.runBackgroundOperations();
+
+        mergedBranchCommit(b -> {
+            b.child("a");
+            b.child("b");
+        });
 
-        // b must see 'foo' and 'test'
-        assertTrue(b.hasChildNode("foo"));
-        assertTrue(b.hasChildNode("test"));
+        store.runBackgroundOperations();
 
-        merge(b);
+        RevisionVector br3 = unmergedBranchCommit(b -> {
+            b.child("a").remove();
+            b.child("b").remove();
+        });
+        RevisionVector br4 = unmergedBranchCommit(b -> {
+            b.child("a").remove();
+            b.child("b").remove();
+        });
+
+        store.runBackgroundOperations();
+
+        // wait two hours
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        // clean everything older than one hour
+        VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS);
 
-        b = store.getRoot().builder();
-        b.child("test").remove();
-        b.getChildNode("foo").child("childfoo");
-        persistToBranch(b);
+        assertEquals(3, stats.updatedDetailedGCDocsCount);
+        assertEquals(0, stats.deletedDocGCCount);
+
+        assertExists("1:/a");
+        assertExists("1:/b");
+
+        // now do another gc to get those documents actually deleted
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        stats = gc.gc(1, HOURS);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+        assertEquals(0, stats.deletedDocGCCount);
+        assertBranchRevisionRemovedFromAllDocuments(br1);
+        assertBranchRevisionRemovedFromAllDocuments(br2);
+        assertBranchRevisionRemovedFromAllDocuments(br3);
+        assertBranchRevisionRemovedFromAllDocuments(br4);
+    }
+
+    private void assertNotExists(String id) {
+        NodeDocument doc = store.getDocumentStore().find(Collection.NODES, id);
+        assertNull("doc exists but was expected not to : id=" + id, doc);
+    }
+
+    private void assertExists(String id) {
+        NodeDocument doc = store.getDocumentStore().find(Collection.NODES, id);
+        assertNotNull("doc does not exist : id=" + id, doc);
+    }
+
+    @Test
+    public void unmergedAddAndRemoveChild() throws Exception {
+        mergedBranchCommit(b -> {
+            b.child("foo");
+            b.child("test");
+        });
+        RevisionVector br = unmergedBranchCommit(b -> {
+            b.child("test").remove();
+            b.getChildNode("foo").child("childfoo");
+        });
         store.runBackgroundOperations();
 
         // wait two hours
         clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
         // clean everything older than one hours
-        VersionGarbageCollector.VersionGCStats stats= gc.gc(1, HOURS);
+        VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS);
 
-        //This will fail as of now but will pass once BranchCommit GC code is 
merged.
+        // first gc round will only mark document for deleting by second round
+        assertEquals(0, stats.deletedDocGCCount);
+        assertEquals(4, stats.updatedDetailedGCDocsCount);
+
+        // wait two hours
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        // now do second gc round to get those documents actually deleted
+        stats = gc.gc(1, HOURS);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
         assertEquals(1, stats.deletedDocGCCount);
+        assertBranchRevisionRemovedFromAllDocuments(br);
+    }
+
+    private void assertBranchRevisionRemovedFromAllDocuments(RevisionVector 
br) {
+        assertTrue(br.isBranch());
+        Revision br1 = br.getRevision(1);
+        Revision r1 = br1.asTrunkRevision();
+        for (NodeDocument nd : 
Utils.getAllDocuments(store.getDocumentStore())) {
+            if (nd.getId().equals("0:/")) {
+                NodeDocument target = new 
NodeDocument(store.getDocumentStore());
+                nd.deepCopy(target);
+            }
+            System.out.println("nd=" + nd.asString());
+            if (nd.asString().contains(r1.toString())) {
+                System.out.println("break");
+            }
+            assertFalse("document not fully cleaned up: " + nd.asString(),
+                    nd.asString().contains(r1.toString()));
+        }
+    }
+
+    @Test
+    public void unmergedRemoveProperty() throws Exception {
+        mergedBranchCommit(b -> b.child("foo"));
+        mergedBranchCommit(b -> b.child("foo").setProperty("a", "b"));
+        // do a gc without waiting, to check that works fine
+        store.runBackgroundOperations();
+        stats = gc.gc(1, HOURS);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+
+        RevisionVector br = unmergedBranchCommit(b -> 
b.child("foo").removeProperty("a"));
+        mergedBranchCommit(b -> b.child("foo").setProperty("c", "d"));
+        store.runBackgroundOperations();
+
+        // wait two hours
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        // clean everything older than one hours
+        stats = gc.gc(1, HOURS);
+
+        assertEquals(2, stats.updatedDetailedGCDocsCount);
+        assertBranchRevisionRemovedFromAllDocuments(br);
+    }
+
+    @Test
+    public void unmergedAddProperty() throws Exception {
+        mergedBranchCommit(b -> b.child("foo"));
+        RevisionVector br = unmergedBranchCommit(
+                b -> b.child("foo").setProperty("a", "b"));
+        store.runBackgroundOperations();
+
+        // wait two hours
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        // clean everything older than one hours
+        stats = gc.gc(1, HOURS);
+
+        assertEquals(2, stats.updatedDetailedGCDocsCount);
+        assertBranchRevisionRemovedFromAllDocuments(br);
+    }
+
+    @Test
+    public void unmergedRemoveChild() throws Exception {
+        mergedBranchCommit(b -> {
+            b.child("foo");
+            b.child("bar");
+        });
+        // do a gc without waiting, to check that works fine
+        store.runBackgroundOperations();
+        stats = gc.gc(1, HOURS);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+
+        final List<RevisionVector> brs = new LinkedList<>();
+        for (int j = 0; j < 10; j++) {
+            brs.add(unmergedBranchCommit(b -> b.child("foo").remove()));
+        }
+        store.runBackgroundOperations();
+
+        NodeDocument doc = store.getDocumentStore().find(Collection.NODES, 
"1:/foo");
+        Long originalModified = doc.getModified();
+
+        // wait two hours
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        // clean everything older than one hours
+        stats = gc.gc(1, HOURS);
+
+        assertEquals(2, stats.updatedDetailedGCDocsCount);
+
+        doc = store.getDocumentStore().find(Collection.NODES, "1:/foo");
+        Long finalModified = doc.getModified();
+
+        assertEquals(originalModified, finalModified);
+
+        for (RevisionVector br : brs) {
+            assertBranchRevisionRemovedFromAllDocuments(br);
+        }
     }
 
-    private void merge(NodeBuilder builder)
+    @Test
+    public void unmergedMergedRemoveChild() throws Exception {
+        mergedBranchCommit(b -> {
+            b.child("foo");
+            b.child("bar");
+        });
+        // do a gc without waiting, to check that works fine
+        store.runBackgroundOperations();
+        stats = gc.gc(1, HOURS);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+
+        for (int i = 0; i < 50; i++) {
+            mergedBranchCommit(b -> b.child("foo").remove());
+            mergedBranchCommit(b -> b.child("foo"));
+        }
+        final List<RevisionVector> brs = new LinkedList<>();
+        for (int j = 0; j < 10; j++) {
+            brs.add(unmergedBranchCommit(b -> b.child("foo").remove()));
+        }
+        store.runBackgroundOperations();
+
+        // wait two hours
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        // clean everything older than one hours
+        stats = gc.gc(1, HOURS);
+
+        assertEquals(2, stats.updatedDetailedGCDocsCount);
+        for (RevisionVector br : brs) {
+            assertBranchRevisionRemovedFromAllDocuments(br);
+        }
+    }
+
+    @Test
+    public void unmergedThenMergedRemoveProperty() throws Exception {
+        mergedBranchCommit(b -> b.child("foo"));
+        mergedBranchCommit(b -> b.child("foo").setProperty("a", "b"));
+        mergedBranchCommit(b -> b.child("foo").setProperty("c", "d"));
+        // do a gc without waiting, to check that works fine
+        store.runBackgroundOperations();
+        stats = gc.gc(1, HOURS);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+
+        RevisionVector br = unmergedBranchCommit(b -> {
+            b.setProperty("rootProp", "v");
+            b.child("foo").removeProperty("a");
+        });
+        store.runBackgroundOperations();
+        DocumentNodeStore store2 = newStore(2);
+        mergedBranchCommit(store2, b -> b.child("foo").removeProperty("a"));
+        store2.runBackgroundOperations();
+        store2.dispose();
+        store.runBackgroundOperations();
+
+        // wait two hours
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+        // clean everything older than one hours
+        stats = gc.gc(1, HOURS);
+
+        assertEquals(2, stats.updatedDetailedGCDocsCount);
+        assertBranchRevisionRemovedFromAllDocuments(br);
+    }
+
+    private DocumentNodeStore newStore(int clusterId) {
+        Builder builder = builderProvider.newBuilder().clock(clock)
+                
.setLeaseCheckMode(LeaseCheckMode.DISABLED).setDetailedGCEnabled(true)
+                .setAsyncDelay(0).setDocumentStore(store.getDocumentStore());
+        if (clusterId > 0) {
+            builder.setClusterId(clusterId);
+        }
+        DocumentNodeStore store2 = builder.getNodeStore();
+        return store2;
+    }
+
+    private RevisionVector mergedBranchCommit(Consumer<NodeBuilder> 
buildFunction)
             throws Exception {
-        store.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        return build(store, true, true, buildFunction);
+    }
+
+    private RevisionVector mergedBranchCommit(NodeStore store,
+            Consumer<NodeBuilder> buildFunction) throws Exception {
+        return build(store, true, true, buildFunction);
+    }
+
+    private RevisionVector unmergedBranchCommit(Consumer<NodeBuilder> 
buildFunction)
+            throws Exception {
+        RevisionVector result = build(store, true, false, buildFunction);
+        assertTrue(result.isBranch());
+        return result;
+    }
+
+    private RevisionVector build(NodeStore store, boolean persistToBranch, 
boolean merge,
+            Consumer<NodeBuilder> buildFunction) throws Exception {
+        if (!persistToBranch && !merge) {
+            throw new IllegalArgumentException("must either persistToBranch or 
merge");
+        }
+        NodeBuilder b = store.getRoot().builder();
+        buildFunction.accept(b);
+        RevisionVector result = null;
+        if (persistToBranch) {
+            DocumentRootBuilder drb = (DocumentRootBuilder) b;
+            drb.persist();
+            DocumentNodeState ns = (DocumentNodeState) drb.getNodeState();
+            result = ns.getLastRevision();
+        }
+        if (merge) {
+            DocumentNodeState dns = (DocumentNodeState) merge(b);
+            result = dns.getLastRevision();
+        }
+        return result;
+    }
+
+    private NodeState merge(NodeBuilder builder) throws Exception {
+        return store.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
     }
 
 }


Reply via email to