rishabhdaim commented on code in PR #1090:
URL: https://github.com/apache/jackrabbit-oak/pull/1090#discussion_r1314932211


##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -902,6 +905,206 @@ private void collectDeletedProperties(final NodeDocument 
doc, final GCPhases pha
             }
         }
 
+        private void collectUnmergedBranchCommitDocument(final NodeDocument 
doc,
+                long toModifiedMillis, final GCPhases phases, final UpdateOp 
updateOp) {
+            if (!phases.start(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC)) {
+                // GC was cancelled, stop
+                return;
+            }
+
+            // from
+            // 
https://jackrabbit.apache.org/oak/docs/nodestore/documentmk.html#previous-documents
+            // "branch commits are not moved to previous documents until the 
branch is
+            // merged."
+            // i.e. if we're looking for unmerged branch commits, they cannot 
be in
+            // previous documents, they have to be in the main one - hence we 
have to use
+            // getLocalBranchCommits here
+            final Set<Revision> localBranchCommits = 
doc.getLocalBranchCommits();
+            if (localBranchCommits.isEmpty()) {
+                // nothing to do then
+                phases.stop(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC);
+                return;
+            }
+
+            // !Note, the _bc sub-document was introduced with Oak 1.8 and is 
not present
+            // in older versions. The branch commit revision is added to _bc 
whenever a
+            // change is done on a document with a branch commit. This helps 
the
+            // DocumentNodeStore to more easily identify branch commit 
changes."
+            // The current implementation of 
"collectUnmergedBranchCommitDocument" only
+            // supports branch commits that are created after Oak 1.8
+            for (Revision bcRevision : localBranchCommits) {
+                if (!isRevisionOlderThan(bcRevision, toModifiedMillis)) {
+                    // only even consider revisions that are older than the 
provided
+                    // timestamp - otherwise skip this
+                    continue;
+                }
+                final String commitValue = 
nodeStore.getCommitValue(bcRevision, doc);
+                if (isCommitted(commitValue)) {
+                    // obviously don't do anything with merged (committed) 
branch commits
+                    continue;
+                }
+                removeUnmergedBCRevision(bcRevision, doc, updateOp);
+            }
+            // now for any of the handled system properties (the normal 
properties would
+            // already be cleaned up by cleanupDeletedProperties), the 
resulting
+            // subdocument could in theory become empty after removing all 
unmerged branch
+            // commit revisions is done later.
+            // so we need to go through all of them and check if we'd have 
removed
+            // the entirety - and then, instead of individually remove 
revisions, just
+            // delete the entire property.
+            if (updateOp.hasChanges()) {
+                for (Entry<String, Integer> e : 
getSystemRemoveMapEntryCounts(updateOp)
+                        .entrySet()) {
+                    final String prop = e.getKey();
+                    final Object d = doc.data.get(prop);
+                    if (!(d instanceof Map)) {
+                        // unexpected and would likely indicate a bug, hence 
log.error
+                        log.error(
+                                "collectUnmergedBranchCommitDocument: property 
without subdocument as expected. id={}, prop={}",
+                                doc.getId(), prop);
+                        continue;
+                    }
+                    @SuppressWarnings("rawtypes")
+                    final Map m = (Map) d;
+                    if (m.size() != e.getValue()) {
+                        // then we're not removing all revisions - so cannot 
cleanup
+                        continue;
+                    }
+                    // then we're removing all revisions - so replace those 
REMOVE_MAP_ENTRY
+                    // with one whole remove(prop)
+                    final Iterator<Entry<Key, Operation>> it = 
updateOp.getChanges().entrySet()
+                            .iterator();
+                    while (it.hasNext()) {
+                        if (it.next().getKey().getName().equals(prop)) {
+                            it.remove();
+                        }
+                    }
+                    updateOp.remove(prop);
+                }

Review Comment:
   ```suggestion
   getSystemRemoveMapEntryCounts(updateOp)
                               .entrySet().stream()
                               .filter(e -> filterEmptyProps(doc, e.getKey(), 
e.getValue()))
                               .forEach(e -> {
                                   final String prop = e.getKey();
                                   
updateOp.getChanges().entrySet().removeIf(opEntry -> Objects.equals(prop, 
opEntry.getKey().getName()));
                                   updateOp.remove(prop);
                       });
   
                   /**
            * Filter all the would be empty system properties (after cleanup 
operation).
            * <p>
            * It verfies this by comparing the size of sub-document with given 
<code>value</code>
            *
            * @param doc {@link NodeDocument} on whose properties needs to be 
checked
            * @param prop Name of sub-document which needs to checked whether 
it would be empty after cleanup or not
            * @param value expected no. of entries
            * @return true if sub-document would eventually be empty or not
            */
           private boolean filterEmptyProps(final NodeDocument doc, final 
String prop, final int value) {
               final Object d = doc.data.get(prop);
               if (d instanceof Map) {
                   @SuppressWarnings("rawtypes") final Map m = (Map) d;
                   // then we're not removing all revisions - so cannot cleanup
                   return m.size() == value;
               } else {
                   // unexpected and would likely indicate a bug, hence 
log.error
                   log.error("collectUnmergedBranchCommitDocument: property 
without subdocument as expected. id={}, prop={}", doc.getId(), prop);
                   return false;
               }
           }
           
           
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@jackrabbit.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to