This is an automated email from the ASF dual-hosted git repository. daim pushed a commit to branch OAK-11875 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit a598c4f6046d7101efe6b683225b706413984330 Author: rishabhdaim <rishabhdaim1...@gmail.com> AuthorDate: Wed Sep 3 13:36:52 2025 +0530 OAK-11875 : avoid removing props/docs if they have empty split props --- .../oak/plugins/document/NodeDocument.java | 25 ++++++++ .../plugins/document/VersionGarbageCollector.java | 19 ++++++ .../oak/plugins/document/NodeDocumentTest.java | 69 ++++++++++++++++++++++ 3 files changed, 113 insertions(+) diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java index d21d932806..0ed7a425a2 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java @@ -1850,6 +1850,31 @@ public final class NodeDocument extends Document { .collect(toSet()); } + /** + * Returns names of all the properties that exist in previous/split documents. + * <p> + * Note: property names returned are escaped + * + * @return Set of property names (escaped) that exist in split documents + * @see Utils#unescapePropertyName(String) + * @see Utils#escapePropertyName(String) + */ + @NotNull + Set<String> getSplitPropertyNames() { + // If there are no previous documents, no properties are in split documents + if (getPreviousRanges().isEmpty()) { + return Collections.emptySet(); + } + + return getPropertyNames().stream() + .filter(property -> { + // Check if this property exists in any previous document + Iterator<NodeDocument> prevDocs = getPreviousDocs(property, null).iterator(); + return prevDocs.hasNext(); // True if at least one previous doc has this property + }) + .collect(toSet()); + } + /** * @return the {@link #REVISIONS} stored on this document. */ diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java index c41c943c54..a7b92f1c9c 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java @@ -44,6 +44,7 @@ import java.util.stream.StreamSupport; import org.apache.jackrabbit.oak.commons.collections.IterableUtils; import org.apache.jackrabbit.oak.commons.collections.IteratorUtils; +import org.apache.jackrabbit.oak.commons.collections.SetUtils; import org.apache.jackrabbit.oak.commons.sort.StringSort; import org.apache.jackrabbit.oak.commons.time.Stopwatch; import org.apache.jackrabbit.oak.plugins.document.UpdateOp.Key; @@ -1477,6 +1478,24 @@ public class VersionGarbageCollector { .map(p -> p.stream().map(Utils::escapePropertyName).collect(toSet())) .orElse(emptySet()); + // OAK-11875 + final Set<String> splitProps = doc.getSplitPropertyNames(); + // Skip optimization if there are no split properties + if (!splitProps.isEmpty()) { + // Only calculate difference if we have split properties + Set<String> propsToBeDeleted = SetUtils.difference(properties, retainPropSet); + + // Check for intersection between sets directly + if (!Collections.disjoint(splitProps, propsToBeDeleted)) { + if (AUDIT_LOG.isInfoEnabled()) { + AUDIT_LOG.info("<Skipping> empty props deletion in [{}] due to presence of deleted Split Properties.", doc.getId()); + } + phases.stop(GCPhase.FULL_GC_COLLECT_PROPS); + return; + } + } + + final int deletedPropsCount = properties.stream() .filter(p -> !retainPropSet.contains(p)) .mapToInt(x -> { diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java index bac2076e62..a6deb3ba7e 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java @@ -1309,6 +1309,75 @@ public class NodeDocumentTest { ns.dispose(); } + @Test + public void testGetSplitPropertyNames() { + // Setup a document store + DocumentStore store = new MemoryDocumentStore(); + + // Create a root document with some properties + String rootId = Utils.getIdFromPath(Path.ROOT); + UpdateOp rootOp = new UpdateOp(rootId, true); + + // Add properties to root document + Revision r1 = new Revision(1, 0, 1); + rootOp.setMapEntry("onlyInRoot", r1, "value"); + rootOp.setMapEntry("inBothDocs", r1, "rootValue"); + NodeDocument.setRevision(rootOp, r1, "c"); + + // Create a previous document with some properties + Revision r2 = new Revision(2, 0, 1); + String prevId = Utils.getPreviousIdFor(Path.ROOT, r2, 0); + UpdateOp prevOp = new UpdateOp(prevId, true); + + // Add properties to previous document + prevOp.setMapEntry("onlyInPrev", r2, "value"); + prevOp.setMapEntry("inBothDocs", r2, "prevValue"); + NodeDocument.setRevision(prevOp, r2, "c"); + + // Set up the previous range in the root document + NodeDocument.setPrevious(rootOp, new Range(r2, r2, 0)); + + // Create the documents + store.create(NODES, Arrays.asList(rootOp, prevOp)); + + // Get the root document and check its exclusive properties + NodeDocument rootDoc = store.find(NODES, rootId); + Set<String> splitPropertyNames = rootDoc.getSplitPropertyNames(); + + // Verify only properties exclusive to the root document are returned + assertFalse(splitPropertyNames.contains("onlyInRoot")); + assertFalse(splitPropertyNames.contains("onlyInPrev")); + assertTrue(splitPropertyNames.contains("inBothDocs")); + } + + @Test + public void testGetSplitPropertyNamesNoPreviousDocs() { + // Setup a document store + DocumentStore store = new MemoryDocumentStore(); + + // Create a root document with some properties + String rootId = Utils.getIdFromPath(Path.ROOT); + UpdateOp rootOp = new UpdateOp(rootId, true); + + // Add properties to root document + Revision r1 = new Revision(1, 0, 1); + rootOp.setMapEntry("prop1", r1, "value1"); + rootOp.setMapEntry("prop2", r1, "value2"); + NodeDocument.setRevision(rootOp, r1, "c"); + + // Create the document + store.create(NODES, Collections.singletonList(rootOp)); + + // Get the root document and check its exclusive properties + NodeDocument rootDoc = store.find(NODES, rootId); + Set<String> splitPropertyNames = rootDoc.getSplitPropertyNames(); + Set<String> allProps = rootDoc.getPropertyNames(); + + // When no previous documents, all properties should be exclusive + assertTrue(Collections.disjoint(splitPropertyNames, allProps)); + assertTrue(splitPropertyNames.isEmpty()); + } + private DocumentNodeStore createTestStore(int numChanges) throws Exception { return createTestStore(new MemoryDocumentStore(), 0, numChanges); }