This is an automated email from the ASF dual-hosted git repository.

joscorbe pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 6ca71c76f3 OAK-11444 [full-gc] Save document id and empty properties 
names before deletion (#2038)
6ca71c76f3 is described below

commit 6ca71c76f33326ac7936da02baf6a04cb0af30c9
Author: Daniel Iancu <dan.i.ia...@gmail.com>
AuthorDate: Mon Mar 31 15:33:46 2025 +0300

    OAK-11444 [full-gc] Save document id and empty properties names before 
deletion (#2038)
---
 .../jackrabbit/oak/run/RevisionsCommand.java       |   9 +
 .../oak/plugins/document/Configuration.java        |   5 +
 .../plugins/document/DocumentNodeStoreBuilder.java |  10 +
 .../plugins/document/DocumentNodeStoreService.java |   1 +
 .../oak/plugins/document/FullGcNodeBin.java        |  82 ++++++++
 .../oak/plugins/document/VersionGCSupport.java     |   4 +
 .../plugins/document/VersionGarbageCollector.java  |  12 +-
 .../mongo/MongoDocumentNodeStoreBuilderBase.java   |   2 +-
 .../plugins/document/mongo/MongoDocumentStore.java |  17 ++
 .../plugins/document/mongo/MongoFullGcNodeBin.java | 171 ++++++++++++++++
 .../document/mongo/MongoVersionGCSupport.java      |  22 ++-
 .../document/rdb/RDBDocumentNodeStoreBuilder.java  |  12 ++
 .../DocumentNodeStoreServiceConfigurationTest.java |   8 +
 .../mongo/MongoDocumentNodeStoreBuilderTest.java   |   6 +
 .../document/mongo/MongoFullGcNodeBinTest.java     | 216 +++++++++++++++++++++
 .../rdb/RDBDocumentNodeStoreBuilderTest.java       |   7 +
 16 files changed, 569 insertions(+), 15 deletions(-)

diff --git 
a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java 
b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java
index 07ed36ad35..0d9c611d1e 100644
--- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java
+++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java
@@ -147,6 +147,7 @@ public class RevisionsCommand implements Command {
         final OptionSpec<Boolean> dryRun;
         final OptionSpec<Boolean> embeddedVerification;
         final OptionSpec<Integer> fullGcMode;
+        final OptionSpec<Boolean> fullGCAuditLoggingEnabled;
 
         RevisionsOptions(String usage) {
             super(usage);
@@ -208,6 +209,8 @@ public class RevisionsCommand implements Command {
                             "to be considered for Full GC i.e. Version Garbage 
Collector (Full GC) logic will only consider those " +
                             "nodes for Full GC which are not accessed recently 
(currentTime - lastModifiedTime > fullGcMaxAge). Default: 86400 (one day)")
                     
.withOptionalArg().ofType(Long.class).defaultsTo(TimeUnit.DAYS.toSeconds(1));
+            fullGCAuditLoggingEnabled = 
parser.accepts("fullGCAuditLoggingEnabled", "Enable audit logging for Full GC")
+                    .withOptionalArg().ofType(Boolean.class).defaultsTo(FALSE);
         }
 
         public RevisionsOptions parse(String[] args) {
@@ -306,6 +309,10 @@ public class RevisionsCommand implements Command {
         boolean doCompaction() {
             return options.has(compact);
         }
+
+        Boolean isFullGCAuditLoggingEnabled() {
+            return options.has(fullGCAuditLoggingEnabled);
+        }
     }
 
     @Override
@@ -375,6 +382,7 @@ public class RevisionsCommand implements Command {
         builder.setFullGCBatchSize(options.getFullGcBatchSize());
         builder.setFullGCProgressSize(options.getFullGcProgressSize());
         
builder.setFullGcMaxAgeMillis(SECONDS.toMillis(options.getFullGcMaxAge()));
+        
builder.setFullGCAuditLoggingEnabled(options.isFullGCAuditLoggingEnabled());
 
         // create a VersionGCSupport while builder is read-write
         VersionGCSupport gcSupport = builder.createVersionGCSupport();
@@ -408,6 +416,7 @@ public class RevisionsCommand implements Command {
         System.out.println("FullGcProgressSize is : " + 
options.getFullGcProgressSize());
         System.out.println("FullGcMaxAgeInSecs is : " + 
options.getFullGcMaxAge());
         System.out.println("FullGcMaxAgeMillis is : " + 
builder.getFullGcMaxAgeMillis());
+        System.out.println("FullGCAuditLoggingEnabled is : " + 
options.isFullGCAuditLoggingEnabled());
         VersionGarbageCollector gc = createVersionGC(builder.build(), 
gcSupport, options.isDryRun(), builder);
 
         VersionGCOptions gcOptions = gc.getOptions();
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java
index 0c82ea8f2a..eb5e9e1f51 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java
@@ -400,4 +400,9 @@ import static 
org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreServic
             name = "Invisible for discovery",
             description = "Boolean value indicating whether the instance 
should be discoverable by the cluster. The default value is " + 
DocumentNodeStoreService.DEFAULT_INVISIBLE_FOR_DISCOVERY)
     boolean invisibleForDiscovery() default 
DocumentNodeStoreService.DEFAULT_INVISIBLE_FOR_DISCOVERY;
+
+    @AttributeDefinition(
+        name = "Enable Full GC Persistent Audit Logging",
+        description = "This parameter will enable/disable the saving of 
deleted document IDs and properties during FullGC into a persistent storage, 
e.g Mongo collection")
+    boolean fullGCAuditLoggingEnabled() default false;
 }
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java
index 7b94796fe9..702cdca0bc 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java
@@ -185,6 +185,7 @@ public class DocumentNodeStoreBuilder<T extends 
DocumentNodeStoreBuilder<T>> {
     private int fullGCBatchSize = 
DocumentNodeStoreService.DEFAULT_FGC_BATCH_SIZE;
     private int fullGCProgressSize = 
DocumentNodeStoreService.DEFAULT_FGC_PROGRESS_SIZE;
     private double fullGCDelayFactor = 
DocumentNodeStoreService.DEFAULT_FGC_DELAY_FACTOR;
+    private boolean fullGCAuditLoggingEnabled;
     private long suspendTimeoutMillis = DEFAULT_SUSPEND_TIMEOUT;
 
     /**
@@ -317,6 +318,15 @@ public class DocumentNodeStoreBuilder<T extends 
DocumentNodeStoreBuilder<T>> {
         return this.fullGCEnabled;
     }
 
+    public T setFullGCAuditLoggingEnabled(boolean b) {
+        this.fullGCAuditLoggingEnabled = b;
+        return thisBuilder();
+    }
+
+    public boolean isFullGCAuditLoggingEnabled() {
+        return this.fullGCAuditLoggingEnabled;
+    }
+
     public T setFullGCIncludePaths(@Nullable String[] includePaths) {
         if (isNull(includePaths) || includePaths.length == 0 || 
Arrays.equals(includePaths, new String[]{"/"})) {
             this.fullGCIncludePaths = Set.of();
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
index 780cc3b011..9790267abd 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
@@ -534,6 +534,7 @@ public class DocumentNodeStoreService {
                 setFullGCBatchSize(config.fullGCBatchSize()).
                 setFullGCProgressSize(config.fullGCProgressSize()).
                 setFullGCDelayFactor(config.fullGCDelayFactor()).
+                
setFullGCAuditLoggingEnabled(config.fullGCAuditLoggingEnabled()).
                 setSuspendTimeoutMillis(config.suspendTimeoutMillis()).
                 
setClusterIdReuseDelayAfterRecovery(config.clusterIdReuseDelayAfterRecoveryMillis()).
                 setRecoveryDelayMillis(config.recoveryDelayMillis()).
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/FullGcNodeBin.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/FullGcNodeBin.java
new file mode 100644
index 0000000000..662b8ebe46
--- /dev/null
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/FullGcNodeBin.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.document;
+
+import java.util.List;
+import java.util.Map;
+/**
+ *  This class is as a wrapper around DocumentStore that expose two methods 
used to clean garbage from NODES collection
+ *  public int remove(Map<String, Long> orphanOrDeletedRemovalMap)
+ *  public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList)
+ *  When enabled
+ *  Each method saves the document ID or empty properties names (that will be 
deleted) to a separate _bin collection as a BinDocument then delegates deletion 
to DocumentStore
+ *
+ *  When disabled (default)
+ *  Each method delegates directly to DocumentStore
+ */
+public interface FullGcNodeBin {
+
+    static FullGcNodeBin noBin(DocumentStore store) {
+        return new FullGcNodeBin() {
+            @Override
+            public int remove(Map<String, Long> orphanOrDeletedRemovalMap) {
+                return store.remove(Collection.NODES, 
orphanOrDeletedRemovalMap);
+            }
+
+            @Override
+            public List<NodeDocument> findAndUpdate(List<UpdateOp> 
updateOpList) {
+                return store.findAndUpdate(Collection.NODES, updateOpList);
+            }
+
+            @Override
+            public void setEnabled(boolean value) {
+                // no-op
+            }
+        };
+    }
+
+    /**
+     * Remove orphaned or deleted documents from the NODES collection
+     * If bin is enabled, the document IDs are saved to the SETTINGS 
collection with ID prefixed with '/bin/'
+     * If document ID cannot be saved then the removal of the document fails
+     * If the bin is disabled, the document IDs are directly removed from the 
NODES collection
+     *
+     * @param orphanOrDeletedRemovalMap the keys of the documents to remove 
with the corresponding timestamps
+     * @return the number of documents removed
+     * @see DocumentStore#remove(Collection, Map)
+     */
+    int remove(Map<String, Long> orphanOrDeletedRemovalMap);
+
+    /**
+     * Performs a conditional update
+     * If the bin is enabled, the removed properties are saved to the SETTINGS 
collection with ID prefixed with '/bin/' and empty value
+     * If the document ID and properties  cannot be saved then the removal of 
the property fails
+     * If bin is disabled, the removed properties are directly removed from 
the NODES collection
+     *
+     * @param updateOpList the update operation List
+     * @return the list containing old documents
+     * @see DocumentStore#findAndUpdate(Collection, List)
+     */
+    List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList);
+
+    /**
+     * Enable or disable the bin
+     * @param value true to enable, false to disable
+     */
+    void setEnabled(boolean value);
+}
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index fae8af8674..c1735d7369 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -307,4 +307,8 @@ public class VersionGCSupport {
         Revision r = IterableUtils.getFirst(doc.getAllChanges(), null);
         return r != null && sweepRevs.isRevisionNewer(r);
     }
+
+    public FullGcNodeBin getFullGCBin() {
+        return FullGcNodeBin.noBin(store);
+    }
 }
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 7bf95c0b76..271604d61f 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -1947,15 +1947,9 @@ public class VersionGarbageCollector {
                 }
                 if (!isFullGCDryRun) {
                     // only delete these in case it is not a dryRun
-
                     if (!orphanOrDeletedRemovalMap.isEmpty()) {
-                        // use remove() with the modified check to rule
-                        // out any further race-condition where this removal
-                        // races with a un-orphan/re-creation as a result of 
which
-                        // the node should now not be removed. The modified 
check
-                        // ensures a node would then not be removed
-                        // (and as a result the removedSize != map.size())
-                        final int removedSize = ds.remove(NODES, 
orphanOrDeletedRemovalMap);
+
+                        final int removedSize = 
versionStore.getFullGCBin().remove(orphanOrDeletedRemovalMap);
                         stats.updatedFullGCDocsCount += removedSize;
                         stats.deletedDocGCCount += removedSize;
                         stats.deletedOrphanNodesCount += removedSize;
@@ -1973,7 +1967,7 @@ public class VersionGarbageCollector {
                     }
 
                     if (!updateOpList.isEmpty()) {
-                        List<NodeDocument> oldDocs = ds.findAndUpdate(NODES, 
updateOpList);
+                        List<NodeDocument> oldDocs = 
versionStore.getFullGCBin().findAndUpdate(updateOpList);
 
 
                         int deletedProps = 
oldDocs.stream().filter(Objects::nonNull).mapToInt(d -> 
deletedPropsCountMap.getOrDefault(d.getId(), 0)).sum();
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderBase.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderBase.java
index 46f6f7decd..1f69130b4e 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderBase.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderBase.java
@@ -172,7 +172,7 @@ public abstract class MongoDocumentNodeStoreBuilderBase<T 
extends MongoDocumentN
     public VersionGCSupport createVersionGCSupport() {
         DocumentStore store = getDocumentStore();
         if (store instanceof MongoDocumentStore) {
-            return new MongoVersionGCSupport((MongoDocumentStore) store);
+            return new MongoVersionGCSupport((MongoDocumentStore) store, 
isFullGCAuditLoggingEnabled());
         } else {
             return super.createVersionGCSupport();
         }
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
index b24014fe1e..060a90d2a6 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
@@ -42,6 +42,7 @@ import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
 
 import org.apache.commons.io.IOUtils;
+import com.mongodb.client.model.IndexOptions;
 import org.apache.jackrabbit.guava.common.base.Stopwatch;
 import org.apache.jackrabbit.guava.common.collect.Iterators;
 import org.apache.jackrabbit.guava.common.util.concurrent.AtomicDouble;
@@ -166,6 +167,8 @@ public class MongoDocumentStore implements DocumentStore {
      * which we block any data modification operation when system has been 
throttled.
      */
     public static final long DEFAULT_THROTTLING_TIME_MS = 
Long.getLong("oak.mongo.throttlingTime", 20);
+
+    private static final @NotNull String BIN_COLLECTION = "bin";
     /**
      * nodeNameLimit for node name based on Mongo Version
      */
@@ -348,6 +351,9 @@ public class MongoDocumentStore implements DocumentStore {
 
         if (!readOnly) {
             ensureIndexes(db, status);
+            if (builder.isFullGCAuditLoggingEnabled()) {
+                ensureFullGcTTLIndex();
+            }
         }
 
         this.nodeLocks = new StripedNodeDocumentLocks();
@@ -465,6 +471,13 @@ public class MongoDocumentStore implements DocumentStore {
         createIndex(journal, JournalEntry.MODIFIED, true, false, false);
     }
 
+    private void ensureFullGcTTLIndex() {
+        //TTL index for full GC bin documents to expire after 90 days
+        //see https://issues.apache.org/jira/browse/OAK-11444
+        IndexOptions indexOptions = new 
IndexOptions().expireAfter(TimeUnit.DAYS.toSeconds(90), TimeUnit.SECONDS);
+        connection.getCollection(BIN_COLLECTION).createIndex(new 
org.bson.Document(MongoFullGcNodeBin.GC_COLLECTED_AT, 1), indexOptions);
+    }
+
     private void createCollection(MongoDatabase db, String collectionName, 
MongoStatus mongoStatus) {
         CreateCollectionOptions options = new CreateCollectionOptions();
 
@@ -2011,6 +2024,10 @@ public class MongoDocumentStore implements DocumentStore 
{
         return getDBCollection(collection).withReadPreference(readPreference);
     }
 
+    <T extends Document> MongoCollection<BasicDBObject> getBinCollection() {
+        return this.connection.getCollection(BIN_COLLECTION);
+    }
+
     MongoDatabase getDatabase() {
         return connection.getDatabase();
     }
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java
new file mode 100644
index 0000000000..8a8d837222
--- /dev/null
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.document.mongo;
+
+import com.mongodb.BasicDBObject;
+import org.apache.jackrabbit.oak.plugins.document.Collection;
+import org.apache.jackrabbit.oak.plugins.document.Document;
+import org.apache.jackrabbit.oak.plugins.document.DocumentStore;
+import org.apache.jackrabbit.oak.plugins.document.FullGcNodeBin;
+import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
+import org.apache.jackrabbit.oak.plugins.document.UpdateOp;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import static org.slf4j.LoggerFactory.getLogger;
+
+import java.time.Instant;
+import java.util.Collections;
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ *  This class is as a wrapper around DocumentStore that expose two methods 
used to clean garbage from NODES collection
+ *  public int remove(Map<String, Long> orphanOrDeletedRemovalMap)
+ *  public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList)
+ *  When enabled
+ *  Each method saves the document ID or empty properties names (that will be 
deleted) to a separate _bin collection as a BinDocument then delegates deletion 
to DocumentStore
+ *
+ *  When disabled (default)
+ *  Each method delegates directly to DocumentStore
+ */
+public class MongoFullGcNodeBin implements FullGcNodeBin {
+    public static final String GC_COLLECTED_AT = "_gcCollectedAt";
+    private static final Logger LOG = 
LoggerFactory.getLogger(MongoFullGcNodeBin.class);
+
+    private final MongoDocumentStore mongoDocumentStore;
+    private boolean enabled;
+
+    public MongoFullGcNodeBin(MongoDocumentStore ds) {
+        this(ds, false);
+    }
+
+    public MongoFullGcNodeBin(MongoDocumentStore store, boolean 
fullGcBinEnabled) {
+        mongoDocumentStore = store;
+        enabled = fullGcBinEnabled;
+    }
+
+    /**
+     * Remove orphaned or deleted documents from the NODES collection
+     * If bin is enabled, the document IDs are saved to the BIN collection 
with ID prefixed with '/bin/'
+     * If document ID cannot be saved then the removal of the document fails
+     * If the bin is disabled, the document IDs are directly removed from the 
NODES collection
+     *
+     * @param orphanOrDeletedRemovalMap the keys of the documents to remove 
with the corresponding timestamps
+     * @return the number of documents removed
+     * @see DocumentStore#remove(Collection, Map)
+     */
+    @Override
+    public int remove(Map<String, Long> orphanOrDeletedRemovalMap) {
+        if (orphanOrDeletedRemovalMap.isEmpty() || 
!addToBin(orphanOrDeletedRemovalMap)) {
+            return 0;
+        }
+
+        // use remove() with the modified check to rule
+        // out any further race-condition where this removal
+        // races with a un-orphan/re-creation as a result of which
+        // the node should now not be removed. The modified check
+        // ensures a node would then not be removed
+        // (and as a result the removedSize != map.size())
+        return mongoDocumentStore.remove(Collection.NODES, 
orphanOrDeletedRemovalMap);
+    }
+
+
+    /**
+     * Performs a conditional update
+     * If the bin is enabled, the removed properties are saved to the BIN 
collection with ID prefixed with '/bin/' and empty value
+     * If the document ID and properties  cannot be saved then the removal of 
the property fails
+     * If bin is disabled, the removed properties are directly removed from 
the NODES collection
+     *
+     * @param updateOpList the update operation List
+     * @return the list containing old documents
+     * @see DocumentStore#findAndUpdate(Collection, List)
+     */
+    @Override
+    public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList) {
+        LOG.info("Updating {} documents", updateOpList.size());
+        if (updateOpList.isEmpty() || !addToBin(updateOpList)) {
+            return Collections.emptyList();
+        }
+        return mongoDocumentStore.findAndUpdate(Collection.NODES, 
updateOpList);
+    }
+
+    private boolean addToBin(Map<String, Long> orphanOrDeletedRemovalMap) {
+        if (!enabled) {
+            LOG.info("Bin is disabled, skipping adding delete candidate 
documents to bin");
+            return true;
+        }
+        LOG.info("Adding {} delete candidate documents to bin", 
orphanOrDeletedRemovalMap.size());
+        List<BasicDBObject> docs = orphanOrDeletedRemovalMap.keySet().stream()
+            .map(e -> new UpdateOp(e, true))
+            .map(this::toBasicDBObject)
+            .collect(Collectors.toList());
+        try {
+            return persist(docs);
+        } catch (Exception e) {
+            LOG.error("Error while adding delete candidate documents to bin: 
{}", docs, e);
+        }
+        return false;
+    }
+
+    private boolean addToBin(List<UpdateOp> updateOpList) {
+        if (!enabled) {
+            LOG.info("Bin is disabled, skipping adding removed properties to 
bin");
+            return true;
+        }
+        LOG.info("Adding {} removed properties to bin", updateOpList.size());
+        List<BasicDBObject> binOpList = 
updateOpList.stream().map(this::toBasicDBObject).collect(Collectors.toList());
+        try {
+            return persist(binOpList);
+        } catch (Exception e) {
+            LOG.error("Error while adding removed properties to bin: {}", 
binOpList, e);
+        }
+        return false;
+    }
+
+    private boolean persist(List<BasicDBObject> inserts) {
+        mongoDocumentStore.getBinCollection().insertMany(inserts);
+        return true;
+    }
+
+    private BasicDBObject toBasicDBObject(UpdateOp op) {
+        BasicDBObject doc = new BasicDBObject();
+        doc.put(Document.ID, "/bin/" + op.getId() + "-" + 
Instant.now().toEpochMilli());
+        //copy removed properties to the new document
+        op.getChanges().forEach((k, v) -> {
+            if (v.type == UpdateOp.Operation.Type.REMOVE) {
+                doc.put(k.getName(), "");
+            }
+        });
+        //this property is used to track the time when the document was added 
to the bin
+        //it can be used as a TTL index property to automatically remove the 
document after a certain time
+        //see 
https://www.mongodb.com/docs/manual/core/index-ttl/#std-label-index-feature-ttl
+        doc.put(MongoFullGcNodeBin.GC_COLLECTED_AT, new Date());
+        return doc;
+    }
+
+    @Override
+    public void setEnabled(boolean value) {
+        this.enabled = value;
+        LOG.info("Full GC Bin changed to {}", enabled ? "enabled" : 
"disabled");
+    }
+
+    public boolean isEnabled() {
+        return enabled;
+    }
+}
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index ffc02602fd..931a7b2d19 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -29,8 +29,10 @@ import static java.util.Optional.ofNullable;
 import static com.mongodb.client.model.Filters.and;
 import static com.mongodb.client.model.Filters.lt;
 import static java.util.Collections.emptyList;
+import org.apache.jackrabbit.oak.commons.properties.SystemPropertySupplier;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
+import org.apache.jackrabbit.oak.plugins.document.FullGcNodeBin;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.DELETED_ONCE;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
@@ -109,10 +111,15 @@ public class MongoVersionGCSupport extends 
VersionGCSupport {
     /**
      * The batch size for the query of possibly deleted docs.
      */
-    private final int batchSize = Integer.getInteger(
-            "oak.mongo.queryDeletedDocsBatchSize", 1000);
+    private final int batchSize = SystemPropertySupplier.create(
+        "oak.mongo.queryDeletedDocsBatchSize", 1000).get();
+    private final MongoFullGcNodeBin fullGcBin;
 
     public MongoVersionGCSupport(MongoDocumentStore store) {
+        this(store, false);
+    }
+
+    public MongoVersionGCSupport(MongoDocumentStore store, boolean 
fullGcBinEnabled) {
         super(store);
         this.store = store;
         if(hasIndex(getNodeCollection(), SD_TYPE, SD_MAX_REV_TIME_IN_SECS)) {
@@ -129,6 +136,7 @@ public class MongoVersionGCSupport extends VersionGCSupport 
{
         } else {
             modifiedIdHint = null;
         }
+        this.fullGcBin = new MongoFullGcNodeBin(store, fullGcBinEnabled);
     }
 
     @Override
@@ -241,9 +249,8 @@ public class MongoVersionGCSupport extends VersionGCSupport 
{
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, 
final long toModified, final int limit,
                                                   @NotNull final String 
fromId, @NotNull Set<String> includedPathPrefixes,
                                                   @NotNull Set<String> 
excludedPathPrefixes) {
-        LOG.info("getModifiedDocs fromModified: {}, toModified: {}, limit: {}, 
fromId: {}, includedPathPrefixes: {}, excludedPathPrefixes: {}",
-                fromModified, toModified, limit, fromId, includedPathPrefixes, 
excludedPathPrefixes);
-
+        LOG.info("getModifiedDocs fromModified: {} ({}), toModified: {} ({}), 
limit: {}, fromId: {}, includedPathPrefixes: {}, excludedPathPrefixes: {}",
+                fromModified, Utils.timestampToString(fromModified), 
toModified, Utils.timestampToString(toModified), limit, fromId, 
includedPathPrefixes, excludedPathPrefixes);
         final long fromModifiedQuery;
         if (MIN_ID_VALUE.equals(fromId)) {
             // If fromId is MIN_ID_VALUE, round fromModified to 5 second 
resolution
@@ -476,6 +483,11 @@ public class MongoVersionGCSupport extends 
VersionGCSupport {
         LOG.debug(sb.toString());
     }
 
+    @Override
+    public FullGcNodeBin getFullGCBin() {
+        return fullGcBin;
+    }
+
     private static String getID(BasicDBObject document) {
         return String.valueOf(document.get(Document.ID));
     }
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java
index 18cfb61f60..bff7f0ff17 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java
@@ -136,6 +136,18 @@ public class RDBDocumentNodeStoreBuilder
         return thisBuilder();
     }
 
+    @Override
+    public boolean isFullGCAuditLoggingEnabled() {
+        // fullGC is non supported for RDB
+        return false;
+    }
+
+    @Override
+    public RDBDocumentNodeStoreBuilder setFullGCAuditLoggingEnabled(boolean b) 
{
+        // fullGC is non supported for RDB
+        return thisBuilder();
+    }
+
     @Override
     public Set<String> getFullGCIncludePaths() {
         return of();
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java
index 58a29042f5..2e9429a9ad 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java
@@ -104,6 +104,7 @@ public class DocumentNodeStoreServiceConfigurationTest {
         assertEquals(DEFAULT_EMBEDDED_VERIFICATION_ENABLED, 
config.embeddedVerificationEnabled());
         assertEquals(DocumentNodeStoreService.DEFAULT_FULL_GC_MAX_AGE, 
config.fullGcMaxAgeInSecs());
         assertEquals(CommitQueue.DEFAULT_SUSPEND_TIMEOUT, 
config.suspendTimeoutMillis());
+        assertFalse(config.fullGCAuditLoggingEnabled());
     }
 
     @Test
@@ -170,6 +171,13 @@ public class DocumentNodeStoreServiceConfigurationTest {
         assertEquals(batchSize, config.fullGCBatchSize());
     }
 
+    @Test
+    public void fullGCAuditLoggingEnabled() throws Exception {
+        addConfigurationEntry(preset, "fullGCAuditLoggingEnabled", true);
+        Configuration config = createConfiguration();
+        assertTrue(config.fullGCAuditLoggingEnabled());
+    }
+
     @Test
     public void invisibleForDiscoveryFalse() throws Exception {
         boolean batchSize = false;
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java
index 0c3f82f36b..d900d10f71 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java
@@ -132,4 +132,10 @@ public class MongoDocumentNodeStoreBuilderTest {
         final int fullGcModeNone = 0;
         assertEquals(builder.getFullGCMode(), fullGcModeNone);
     }
+
+    @Test
+    public void isFullGCAuditLoggingEnabled() {
+        MongoDocumentNodeStoreBuilder builder = new 
MongoDocumentNodeStoreBuilder();
+        assertFalse(builder.isFullGCAuditLoggingEnabled());
+    }
 }
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinTest.java
new file mode 100644
index 0000000000..f2dfdf9d34
--- /dev/null
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinTest.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.document.mongo;
+
+import com.mongodb.BasicDBObject;
+import com.mongodb.client.MongoCollection;
+import org.apache.jackrabbit.oak.plugins.document.Collection;
+import org.apache.jackrabbit.oak.plugins.document.Document;
+import org.apache.jackrabbit.oak.plugins.document.FullGcNodeBin;
+import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
+import org.apache.jackrabbit.oak.plugins.document.UpdateOp;
+import org.junit.After;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+import static org.mockito.ArgumentMatchers.anyList;
+import static org.mockito.ArgumentMatchers.anyMap;
+import static org.mockito.ArgumentMatchers.eq;
+import org.mockito.InjectMocks;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoInteractions;
+import static org.mockito.Mockito.when;
+import org.mockito.MockitoAnnotations;
+
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class MongoFullGcNodeBinTest {
+
+    private static final List<NodeDocument> FIND_AND_UPDATE_RETURN_VALUE = 
List.of();
+    @Mock
+    MongoDocumentStore documentStore;
+
+
+    MongoFullGcNodeBin fullGcBin;
+
+    @Mock MongoCollection<BasicDBObject> mockBinCollection;
+
+
+    @Before
+    public void setUp() throws Exception {
+        MockitoAnnotations.openMocks(this);
+        fullGcBin = new MongoFullGcNodeBin(documentStore, true);
+        when(documentStore.remove(eq(Collection.NODES), 
anyMap())).thenAnswer(invocation -> {
+            Map<String, Long> map = invocation.getArgument(1);
+            return map.size();
+        });
+
+        when(documentStore.findAndUpdate(eq(Collection.NODES), 
anyList())).thenAnswer(invocation -> {
+            return FIND_AND_UPDATE_RETURN_VALUE;
+        });
+
+        when(documentStore.getBinCollection()).thenReturn(mockBinCollection);
+    }
+
+    @After
+    public void tearDown() {
+        Mockito.reset(documentStore, mockBinCollection);
+    }
+
+    @Test
+    public void defaultDisabled() {
+        assertFalse(new MongoFullGcNodeBin(this.documentStore).isEnabled());
+    }
+
+    @Test
+    public void enableWithConstructor() {
+        assertTrue(new MongoFullGcNodeBin(this.documentStore, 
true).isEnabled());
+    }
+
+    @Test
+    public void remove() {
+        Map<String, Long> orphanOrDeletedRemovalMap = new HashMap<>();
+        orphanOrDeletedRemovalMap.put("key1", 1L);
+        orphanOrDeletedRemovalMap.put("key2", 2L);
+
+
+        int removed = fullGcBin.remove(orphanOrDeletedRemovalMap);
+
+        //verify returned value
+        assertEquals(orphanOrDeletedRemovalMap.size(), removed);
+
+        //verify removed documents are added to bin
+        ArgumentCaptor<List<BasicDBObject>> argumentCaptor = 
ArgumentCaptor.forClass(List.class);
+        verify(mockBinCollection).insertMany(argumentCaptor.capture());
+        assertEquals(orphanOrDeletedRemovalMap.size(), 
argumentCaptor.getValue().size());
+        
assertTrue(argumentCaptor.getValue().get(0).get(Document.ID).toString().matches("^\\/bin\\/key1\\-\\d+$"));
+        
assertTrue(argumentCaptor.getValue().get(1).get(Document.ID).toString().matches("^\\/bin\\/key2\\-\\d+$"));
+
+        //verify documents are removed
+        verify(documentStore).remove(Collection.NODES, 
orphanOrDeletedRemovalMap);
+    }
+
+    @Test
+    public void removeWhenCopyToBinFails() {
+        Map<String, Long> orphanOrDeletedRemovalMap = new HashMap<>();
+        orphanOrDeletedRemovalMap.put("key", 1L);
+        doThrow(new RuntimeException("Error while adding documents to 
bin")).when(mockBinCollection).insertMany(anyList());
+
+        int removed = fullGcBin.remove(orphanOrDeletedRemovalMap);
+
+        assertEquals(0, removed);
+        verify(documentStore, never()).remove(Collection.NODES, 
orphanOrDeletedRemovalMap);
+    }
+
+    @Test
+    public void removeEmptyMap() {
+        int removed = fullGcBin.remove(Map.of());
+        assertEquals(0, removed);
+        Mockito.verifyNoInteractions(documentStore);
+    }
+
+    @Test
+    public void removeWhenBinDisabled() {
+        fullGcBin.setEnabled(false);
+        Map<String, Long> orphanOrDeletedRemovalMap = new HashMap<>();
+        orphanOrDeletedRemovalMap.put("key", 1L);
+
+        fullGcBin.remove(orphanOrDeletedRemovalMap);
+
+        verify(mockBinCollection, never()).insertMany(anyList());
+    }
+
+    @Test
+    public void findAndUpdate() {
+        UpdateOp doc1 = new UpdateOp("doc1", false);
+        doc1.remove("prop1.1");
+        doc1.set("prop1.2", "value1.2");
+        UpdateOp doc2 = new UpdateOp("doc2", false);
+        doc2.remove("prop2.1");
+        doc2.remove("prop2.2");
+
+        List<UpdateOp> properties = List.of(doc1, doc2);
+        List<NodeDocument> modifiedDocs = fullGcBin.findAndUpdate(properties);
+
+        //verify removed properties are added to bin
+        ArgumentCaptor<List<BasicDBObject>> argumentCaptor = 
ArgumentCaptor.forClass(List.class);
+        verify(mockBinCollection).insertMany(argumentCaptor.capture());
+
+        List<BasicDBObject> binOpList = argumentCaptor.getValue();
+        BasicDBObject binDoc1 = binOpList.get(0);
+
+
+        
assertTrue(binDoc1.get(Document.ID).toString().matches("^\\/bin\\/doc1\\-\\d+$"));
+        assertTrue(binDoc1.containsField("prop1.1"));
+        assertFalse(binDoc1.containsField("prop1.2"));//only removed props are 
saved
+        assertGcTimestampAdded(binDoc1);
+
+        BasicDBObject binDoc2 = binOpList.get(1);
+        
assertTrue(binDoc2.get(Document.ID).toString().matches("^\\/bin\\/doc2\\-\\d+$"));
+        assertTrue(binDoc2.containsField("prop2.1"));
+        assertTrue(binDoc2.containsField("prop2.2"));
+
+        assertGcTimestampAdded(binDoc2);
+
+
+        //verify removed properties are removed from the original document
+        verify(documentStore).findAndUpdate(Collection.NODES, properties);
+
+        //verify returned value
+        assertEquals(FIND_AND_UPDATE_RETURN_VALUE, modifiedDocs);
+    }
+
+    private static void assertGcTimestampAdded(BasicDBObject binDoc2) {
+        assertTrue(binDoc2.containsField("_gcCollectedAt"));
+        assertTrue(binDoc2.get("_gcCollectedAt") instanceof Date);
+    }
+
+    @Test
+    public void findAndUpdateWhenCopyToBinFails() {
+        doThrow(new RuntimeException("Error while adding documents to 
bin")).when(mockBinCollection).insertMany(anyList());
+        UpdateOp doc1 = new UpdateOp("doc1", false);
+        doc1.remove("prop1");
+        fullGcBin.findAndUpdate(List.of(doc1));
+        verify(documentStore, never()).findAndUpdate(eq(Collection.NODES), 
anyList());
+    }
+
+    @Test
+    public void findAndUpdateWhenBinDisabled() {
+        fullGcBin.setEnabled(false);
+        UpdateOp doc1 = new UpdateOp("doc1", false);
+        doc1.remove("prop1");
+        fullGcBin.findAndUpdate(List.of(doc1));
+        verify(mockBinCollection, never()).insertMany(anyList());
+    }
+
+    @Test
+    public void findAndUpdateWhenEmptyList() {
+        fullGcBin.findAndUpdate(List.of());
+        verifyNoInteractions(documentStore);
+    }
+}
\ No newline at end of file
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java
index 1aa9d47c6d..642b05e20e 100755
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java
@@ -121,4 +121,11 @@ public class RDBDocumentNodeStoreBuilderTest {
         builder.setFullGcMaxAgeMillis(30 * 24 * 60 * 60 * 1000L);
         assertEquals(0, builder.getFullGcMaxAgeMillis());
     }
+
+    @Test
+    public void fullGcAuditLoggingEnabled() {
+        RDBDocumentNodeStoreBuilder builder = new 
RDBDocumentNodeStoreBuilder();
+        builder.setFullGCAuditLoggingEnabled(true);
+        assertFalse(builder.isFullGCAuditLoggingEnabled());
+    }
 }


Reply via email to