This is an automated email from the ASF dual-hosted git repository. joscorbe pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push: new 6ca71c76f3 OAK-11444 [full-gc] Save document id and empty properties names before deletion (#2038) 6ca71c76f3 is described below commit 6ca71c76f33326ac7936da02baf6a04cb0af30c9 Author: Daniel Iancu <dan.i.ia...@gmail.com> AuthorDate: Mon Mar 31 15:33:46 2025 +0300 OAK-11444 [full-gc] Save document id and empty properties names before deletion (#2038) --- .../jackrabbit/oak/run/RevisionsCommand.java | 9 + .../oak/plugins/document/Configuration.java | 5 + .../plugins/document/DocumentNodeStoreBuilder.java | 10 + .../plugins/document/DocumentNodeStoreService.java | 1 + .../oak/plugins/document/FullGcNodeBin.java | 82 ++++++++ .../oak/plugins/document/VersionGCSupport.java | 4 + .../plugins/document/VersionGarbageCollector.java | 12 +- .../mongo/MongoDocumentNodeStoreBuilderBase.java | 2 +- .../plugins/document/mongo/MongoDocumentStore.java | 17 ++ .../plugins/document/mongo/MongoFullGcNodeBin.java | 171 ++++++++++++++++ .../document/mongo/MongoVersionGCSupport.java | 22 ++- .../document/rdb/RDBDocumentNodeStoreBuilder.java | 12 ++ .../DocumentNodeStoreServiceConfigurationTest.java | 8 + .../mongo/MongoDocumentNodeStoreBuilderTest.java | 6 + .../document/mongo/MongoFullGcNodeBinTest.java | 216 +++++++++++++++++++++ .../rdb/RDBDocumentNodeStoreBuilderTest.java | 7 + 16 files changed, 569 insertions(+), 15 deletions(-) diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java index 07ed36ad35..0d9c611d1e 100644 --- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java +++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java @@ -147,6 +147,7 @@ public class RevisionsCommand implements Command { final OptionSpec<Boolean> dryRun; final OptionSpec<Boolean> embeddedVerification; final OptionSpec<Integer> fullGcMode; + final OptionSpec<Boolean> fullGCAuditLoggingEnabled; RevisionsOptions(String usage) { super(usage); @@ -208,6 +209,8 @@ public class RevisionsCommand implements Command { "to be considered for Full GC i.e. Version Garbage Collector (Full GC) logic will only consider those " + "nodes for Full GC which are not accessed recently (currentTime - lastModifiedTime > fullGcMaxAge). Default: 86400 (one day)") .withOptionalArg().ofType(Long.class).defaultsTo(TimeUnit.DAYS.toSeconds(1)); + fullGCAuditLoggingEnabled = parser.accepts("fullGCAuditLoggingEnabled", "Enable audit logging for Full GC") + .withOptionalArg().ofType(Boolean.class).defaultsTo(FALSE); } public RevisionsOptions parse(String[] args) { @@ -306,6 +309,10 @@ public class RevisionsCommand implements Command { boolean doCompaction() { return options.has(compact); } + + Boolean isFullGCAuditLoggingEnabled() { + return options.has(fullGCAuditLoggingEnabled); + } } @Override @@ -375,6 +382,7 @@ public class RevisionsCommand implements Command { builder.setFullGCBatchSize(options.getFullGcBatchSize()); builder.setFullGCProgressSize(options.getFullGcProgressSize()); builder.setFullGcMaxAgeMillis(SECONDS.toMillis(options.getFullGcMaxAge())); + builder.setFullGCAuditLoggingEnabled(options.isFullGCAuditLoggingEnabled()); // create a VersionGCSupport while builder is read-write VersionGCSupport gcSupport = builder.createVersionGCSupport(); @@ -408,6 +416,7 @@ public class RevisionsCommand implements Command { System.out.println("FullGcProgressSize is : " + options.getFullGcProgressSize()); System.out.println("FullGcMaxAgeInSecs is : " + options.getFullGcMaxAge()); System.out.println("FullGcMaxAgeMillis is : " + builder.getFullGcMaxAgeMillis()); + System.out.println("FullGCAuditLoggingEnabled is : " + options.isFullGCAuditLoggingEnabled()); VersionGarbageCollector gc = createVersionGC(builder.build(), gcSupport, options.isDryRun(), builder); VersionGCOptions gcOptions = gc.getOptions(); diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java index 0c82ea8f2a..eb5e9e1f51 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java @@ -400,4 +400,9 @@ import static org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreServic name = "Invisible for discovery", description = "Boolean value indicating whether the instance should be discoverable by the cluster. The default value is " + DocumentNodeStoreService.DEFAULT_INVISIBLE_FOR_DISCOVERY) boolean invisibleForDiscovery() default DocumentNodeStoreService.DEFAULT_INVISIBLE_FOR_DISCOVERY; + + @AttributeDefinition( + name = "Enable Full GC Persistent Audit Logging", + description = "This parameter will enable/disable the saving of deleted document IDs and properties during FullGC into a persistent storage, e.g Mongo collection") + boolean fullGCAuditLoggingEnabled() default false; } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java index 7b94796fe9..702cdca0bc 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java @@ -185,6 +185,7 @@ public class DocumentNodeStoreBuilder<T extends DocumentNodeStoreBuilder<T>> { private int fullGCBatchSize = DocumentNodeStoreService.DEFAULT_FGC_BATCH_SIZE; private int fullGCProgressSize = DocumentNodeStoreService.DEFAULT_FGC_PROGRESS_SIZE; private double fullGCDelayFactor = DocumentNodeStoreService.DEFAULT_FGC_DELAY_FACTOR; + private boolean fullGCAuditLoggingEnabled; private long suspendTimeoutMillis = DEFAULT_SUSPEND_TIMEOUT; /** @@ -317,6 +318,15 @@ public class DocumentNodeStoreBuilder<T extends DocumentNodeStoreBuilder<T>> { return this.fullGCEnabled; } + public T setFullGCAuditLoggingEnabled(boolean b) { + this.fullGCAuditLoggingEnabled = b; + return thisBuilder(); + } + + public boolean isFullGCAuditLoggingEnabled() { + return this.fullGCAuditLoggingEnabled; + } + public T setFullGCIncludePaths(@Nullable String[] includePaths) { if (isNull(includePaths) || includePaths.length == 0 || Arrays.equals(includePaths, new String[]{"/"})) { this.fullGCIncludePaths = Set.of(); diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java index 780cc3b011..9790267abd 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java @@ -534,6 +534,7 @@ public class DocumentNodeStoreService { setFullGCBatchSize(config.fullGCBatchSize()). setFullGCProgressSize(config.fullGCProgressSize()). setFullGCDelayFactor(config.fullGCDelayFactor()). + setFullGCAuditLoggingEnabled(config.fullGCAuditLoggingEnabled()). setSuspendTimeoutMillis(config.suspendTimeoutMillis()). setClusterIdReuseDelayAfterRecovery(config.clusterIdReuseDelayAfterRecoveryMillis()). setRecoveryDelayMillis(config.recoveryDelayMillis()). diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/FullGcNodeBin.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/FullGcNodeBin.java new file mode 100644 index 0000000000..662b8ebe46 --- /dev/null +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/FullGcNodeBin.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jackrabbit.oak.plugins.document; + +import java.util.List; +import java.util.Map; +/** + * This class is as a wrapper around DocumentStore that expose two methods used to clean garbage from NODES collection + * public int remove(Map<String, Long> orphanOrDeletedRemovalMap) + * public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList) + * When enabled + * Each method saves the document ID or empty properties names (that will be deleted) to a separate _bin collection as a BinDocument then delegates deletion to DocumentStore + * + * When disabled (default) + * Each method delegates directly to DocumentStore + */ +public interface FullGcNodeBin { + + static FullGcNodeBin noBin(DocumentStore store) { + return new FullGcNodeBin() { + @Override + public int remove(Map<String, Long> orphanOrDeletedRemovalMap) { + return store.remove(Collection.NODES, orphanOrDeletedRemovalMap); + } + + @Override + public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList) { + return store.findAndUpdate(Collection.NODES, updateOpList); + } + + @Override + public void setEnabled(boolean value) { + // no-op + } + }; + } + + /** + * Remove orphaned or deleted documents from the NODES collection + * If bin is enabled, the document IDs are saved to the SETTINGS collection with ID prefixed with '/bin/' + * If document ID cannot be saved then the removal of the document fails + * If the bin is disabled, the document IDs are directly removed from the NODES collection + * + * @param orphanOrDeletedRemovalMap the keys of the documents to remove with the corresponding timestamps + * @return the number of documents removed + * @see DocumentStore#remove(Collection, Map) + */ + int remove(Map<String, Long> orphanOrDeletedRemovalMap); + + /** + * Performs a conditional update + * If the bin is enabled, the removed properties are saved to the SETTINGS collection with ID prefixed with '/bin/' and empty value + * If the document ID and properties cannot be saved then the removal of the property fails + * If bin is disabled, the removed properties are directly removed from the NODES collection + * + * @param updateOpList the update operation List + * @return the list containing old documents + * @see DocumentStore#findAndUpdate(Collection, List) + */ + List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList); + + /** + * Enable or disable the bin + * @param value true to enable, false to disable + */ + void setEnabled(boolean value); +} diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java index fae8af8674..c1735d7369 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java @@ -307,4 +307,8 @@ public class VersionGCSupport { Revision r = IterableUtils.getFirst(doc.getAllChanges(), null); return r != null && sweepRevs.isRevisionNewer(r); } + + public FullGcNodeBin getFullGCBin() { + return FullGcNodeBin.noBin(store); + } } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java index 7bf95c0b76..271604d61f 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java @@ -1947,15 +1947,9 @@ public class VersionGarbageCollector { } if (!isFullGCDryRun) { // only delete these in case it is not a dryRun - if (!orphanOrDeletedRemovalMap.isEmpty()) { - // use remove() with the modified check to rule - // out any further race-condition where this removal - // races with a un-orphan/re-creation as a result of which - // the node should now not be removed. The modified check - // ensures a node would then not be removed - // (and as a result the removedSize != map.size()) - final int removedSize = ds.remove(NODES, orphanOrDeletedRemovalMap); + + final int removedSize = versionStore.getFullGCBin().remove(orphanOrDeletedRemovalMap); stats.updatedFullGCDocsCount += removedSize; stats.deletedDocGCCount += removedSize; stats.deletedOrphanNodesCount += removedSize; @@ -1973,7 +1967,7 @@ public class VersionGarbageCollector { } if (!updateOpList.isEmpty()) { - List<NodeDocument> oldDocs = ds.findAndUpdate(NODES, updateOpList); + List<NodeDocument> oldDocs = versionStore.getFullGCBin().findAndUpdate(updateOpList); int deletedProps = oldDocs.stream().filter(Objects::nonNull).mapToInt(d -> deletedPropsCountMap.getOrDefault(d.getId(), 0)).sum(); diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderBase.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderBase.java index 46f6f7decd..1f69130b4e 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderBase.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderBase.java @@ -172,7 +172,7 @@ public abstract class MongoDocumentNodeStoreBuilderBase<T extends MongoDocumentN public VersionGCSupport createVersionGCSupport() { DocumentStore store = getDocumentStore(); if (store instanceof MongoDocumentStore) { - return new MongoVersionGCSupport((MongoDocumentStore) store); + return new MongoVersionGCSupport((MongoDocumentStore) store, isFullGCAuditLoggingEnabled()); } else { return super.createVersionGCSupport(); } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java index b24014fe1e..060a90d2a6 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java @@ -42,6 +42,7 @@ import java.util.stream.Collectors; import java.util.stream.StreamSupport; import org.apache.commons.io.IOUtils; +import com.mongodb.client.model.IndexOptions; import org.apache.jackrabbit.guava.common.base.Stopwatch; import org.apache.jackrabbit.guava.common.collect.Iterators; import org.apache.jackrabbit.guava.common.util.concurrent.AtomicDouble; @@ -166,6 +167,8 @@ public class MongoDocumentStore implements DocumentStore { * which we block any data modification operation when system has been throttled. */ public static final long DEFAULT_THROTTLING_TIME_MS = Long.getLong("oak.mongo.throttlingTime", 20); + + private static final @NotNull String BIN_COLLECTION = "bin"; /** * nodeNameLimit for node name based on Mongo Version */ @@ -348,6 +351,9 @@ public class MongoDocumentStore implements DocumentStore { if (!readOnly) { ensureIndexes(db, status); + if (builder.isFullGCAuditLoggingEnabled()) { + ensureFullGcTTLIndex(); + } } this.nodeLocks = new StripedNodeDocumentLocks(); @@ -465,6 +471,13 @@ public class MongoDocumentStore implements DocumentStore { createIndex(journal, JournalEntry.MODIFIED, true, false, false); } + private void ensureFullGcTTLIndex() { + //TTL index for full GC bin documents to expire after 90 days + //see https://issues.apache.org/jira/browse/OAK-11444 + IndexOptions indexOptions = new IndexOptions().expireAfter(TimeUnit.DAYS.toSeconds(90), TimeUnit.SECONDS); + connection.getCollection(BIN_COLLECTION).createIndex(new org.bson.Document(MongoFullGcNodeBin.GC_COLLECTED_AT, 1), indexOptions); + } + private void createCollection(MongoDatabase db, String collectionName, MongoStatus mongoStatus) { CreateCollectionOptions options = new CreateCollectionOptions(); @@ -2011,6 +2024,10 @@ public class MongoDocumentStore implements DocumentStore { return getDBCollection(collection).withReadPreference(readPreference); } + <T extends Document> MongoCollection<BasicDBObject> getBinCollection() { + return this.connection.getCollection(BIN_COLLECTION); + } + MongoDatabase getDatabase() { return connection.getDatabase(); } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java new file mode 100644 index 0000000000..8a8d837222 --- /dev/null +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.document.mongo; + +import com.mongodb.BasicDBObject; +import org.apache.jackrabbit.oak.plugins.document.Collection; +import org.apache.jackrabbit.oak.plugins.document.Document; +import org.apache.jackrabbit.oak.plugins.document.DocumentStore; +import org.apache.jackrabbit.oak.plugins.document.FullGcNodeBin; +import org.apache.jackrabbit.oak.plugins.document.NodeDocument; +import org.apache.jackrabbit.oak.plugins.document.UpdateOp; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static org.slf4j.LoggerFactory.getLogger; + +import java.time.Instant; +import java.util.Collections; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * This class is as a wrapper around DocumentStore that expose two methods used to clean garbage from NODES collection + * public int remove(Map<String, Long> orphanOrDeletedRemovalMap) + * public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList) + * When enabled + * Each method saves the document ID or empty properties names (that will be deleted) to a separate _bin collection as a BinDocument then delegates deletion to DocumentStore + * + * When disabled (default) + * Each method delegates directly to DocumentStore + */ +public class MongoFullGcNodeBin implements FullGcNodeBin { + public static final String GC_COLLECTED_AT = "_gcCollectedAt"; + private static final Logger LOG = LoggerFactory.getLogger(MongoFullGcNodeBin.class); + + private final MongoDocumentStore mongoDocumentStore; + private boolean enabled; + + public MongoFullGcNodeBin(MongoDocumentStore ds) { + this(ds, false); + } + + public MongoFullGcNodeBin(MongoDocumentStore store, boolean fullGcBinEnabled) { + mongoDocumentStore = store; + enabled = fullGcBinEnabled; + } + + /** + * Remove orphaned or deleted documents from the NODES collection + * If bin is enabled, the document IDs are saved to the BIN collection with ID prefixed with '/bin/' + * If document ID cannot be saved then the removal of the document fails + * If the bin is disabled, the document IDs are directly removed from the NODES collection + * + * @param orphanOrDeletedRemovalMap the keys of the documents to remove with the corresponding timestamps + * @return the number of documents removed + * @see DocumentStore#remove(Collection, Map) + */ + @Override + public int remove(Map<String, Long> orphanOrDeletedRemovalMap) { + if (orphanOrDeletedRemovalMap.isEmpty() || !addToBin(orphanOrDeletedRemovalMap)) { + return 0; + } + + // use remove() with the modified check to rule + // out any further race-condition where this removal + // races with a un-orphan/re-creation as a result of which + // the node should now not be removed. The modified check + // ensures a node would then not be removed + // (and as a result the removedSize != map.size()) + return mongoDocumentStore.remove(Collection.NODES, orphanOrDeletedRemovalMap); + } + + + /** + * Performs a conditional update + * If the bin is enabled, the removed properties are saved to the BIN collection with ID prefixed with '/bin/' and empty value + * If the document ID and properties cannot be saved then the removal of the property fails + * If bin is disabled, the removed properties are directly removed from the NODES collection + * + * @param updateOpList the update operation List + * @return the list containing old documents + * @see DocumentStore#findAndUpdate(Collection, List) + */ + @Override + public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList) { + LOG.info("Updating {} documents", updateOpList.size()); + if (updateOpList.isEmpty() || !addToBin(updateOpList)) { + return Collections.emptyList(); + } + return mongoDocumentStore.findAndUpdate(Collection.NODES, updateOpList); + } + + private boolean addToBin(Map<String, Long> orphanOrDeletedRemovalMap) { + if (!enabled) { + LOG.info("Bin is disabled, skipping adding delete candidate documents to bin"); + return true; + } + LOG.info("Adding {} delete candidate documents to bin", orphanOrDeletedRemovalMap.size()); + List<BasicDBObject> docs = orphanOrDeletedRemovalMap.keySet().stream() + .map(e -> new UpdateOp(e, true)) + .map(this::toBasicDBObject) + .collect(Collectors.toList()); + try { + return persist(docs); + } catch (Exception e) { + LOG.error("Error while adding delete candidate documents to bin: {}", docs, e); + } + return false; + } + + private boolean addToBin(List<UpdateOp> updateOpList) { + if (!enabled) { + LOG.info("Bin is disabled, skipping adding removed properties to bin"); + return true; + } + LOG.info("Adding {} removed properties to bin", updateOpList.size()); + List<BasicDBObject> binOpList = updateOpList.stream().map(this::toBasicDBObject).collect(Collectors.toList()); + try { + return persist(binOpList); + } catch (Exception e) { + LOG.error("Error while adding removed properties to bin: {}", binOpList, e); + } + return false; + } + + private boolean persist(List<BasicDBObject> inserts) { + mongoDocumentStore.getBinCollection().insertMany(inserts); + return true; + } + + private BasicDBObject toBasicDBObject(UpdateOp op) { + BasicDBObject doc = new BasicDBObject(); + doc.put(Document.ID, "/bin/" + op.getId() + "-" + Instant.now().toEpochMilli()); + //copy removed properties to the new document + op.getChanges().forEach((k, v) -> { + if (v.type == UpdateOp.Operation.Type.REMOVE) { + doc.put(k.getName(), ""); + } + }); + //this property is used to track the time when the document was added to the bin + //it can be used as a TTL index property to automatically remove the document after a certain time + //see https://www.mongodb.com/docs/manual/core/index-ttl/#std-label-index-feature-ttl + doc.put(MongoFullGcNodeBin.GC_COLLECTED_AT, new Date()); + return doc; + } + + @Override + public void setEnabled(boolean value) { + this.enabled = value; + LOG.info("Full GC Bin changed to {}", enabled ? "enabled" : "disabled"); + } + + public boolean isEnabled() { + return enabled; + } +} diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java index ffc02602fd..931a7b2d19 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java @@ -29,8 +29,10 @@ import static java.util.Optional.ofNullable; import static com.mongodb.client.model.Filters.and; import static com.mongodb.client.model.Filters.lt; import static java.util.Collections.emptyList; +import org.apache.jackrabbit.oak.commons.properties.SystemPropertySupplier; import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; import static org.apache.jackrabbit.oak.plugins.document.Document.ID; +import org.apache.jackrabbit.oak.plugins.document.FullGcNodeBin; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.DELETED_ONCE; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; @@ -109,10 +111,15 @@ public class MongoVersionGCSupport extends VersionGCSupport { /** * The batch size for the query of possibly deleted docs. */ - private final int batchSize = Integer.getInteger( - "oak.mongo.queryDeletedDocsBatchSize", 1000); + private final int batchSize = SystemPropertySupplier.create( + "oak.mongo.queryDeletedDocsBatchSize", 1000).get(); + private final MongoFullGcNodeBin fullGcBin; public MongoVersionGCSupport(MongoDocumentStore store) { + this(store, false); + } + + public MongoVersionGCSupport(MongoDocumentStore store, boolean fullGcBinEnabled) { super(store); this.store = store; if(hasIndex(getNodeCollection(), SD_TYPE, SD_MAX_REV_TIME_IN_SECS)) { @@ -129,6 +136,7 @@ public class MongoVersionGCSupport extends VersionGCSupport { } else { modifiedIdHint = null; } + this.fullGcBin = new MongoFullGcNodeBin(store, fullGcBinEnabled); } @Override @@ -241,9 +249,8 @@ public class MongoVersionGCSupport extends VersionGCSupport { public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit, @NotNull final String fromId, @NotNull Set<String> includedPathPrefixes, @NotNull Set<String> excludedPathPrefixes) { - LOG.info("getModifiedDocs fromModified: {}, toModified: {}, limit: {}, fromId: {}, includedPathPrefixes: {}, excludedPathPrefixes: {}", - fromModified, toModified, limit, fromId, includedPathPrefixes, excludedPathPrefixes); - + LOG.info("getModifiedDocs fromModified: {} ({}), toModified: {} ({}), limit: {}, fromId: {}, includedPathPrefixes: {}, excludedPathPrefixes: {}", + fromModified, Utils.timestampToString(fromModified), toModified, Utils.timestampToString(toModified), limit, fromId, includedPathPrefixes, excludedPathPrefixes); final long fromModifiedQuery; if (MIN_ID_VALUE.equals(fromId)) { // If fromId is MIN_ID_VALUE, round fromModified to 5 second resolution @@ -476,6 +483,11 @@ public class MongoVersionGCSupport extends VersionGCSupport { LOG.debug(sb.toString()); } + @Override + public FullGcNodeBin getFullGCBin() { + return fullGcBin; + } + private static String getID(BasicDBObject document) { return String.valueOf(document.get(Document.ID)); } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java index 18cfb61f60..bff7f0ff17 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java @@ -136,6 +136,18 @@ public class RDBDocumentNodeStoreBuilder return thisBuilder(); } + @Override + public boolean isFullGCAuditLoggingEnabled() { + // fullGC is non supported for RDB + return false; + } + + @Override + public RDBDocumentNodeStoreBuilder setFullGCAuditLoggingEnabled(boolean b) { + // fullGC is non supported for RDB + return thisBuilder(); + } + @Override public Set<String> getFullGCIncludePaths() { return of(); diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java index 58a29042f5..2e9429a9ad 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java @@ -104,6 +104,7 @@ public class DocumentNodeStoreServiceConfigurationTest { assertEquals(DEFAULT_EMBEDDED_VERIFICATION_ENABLED, config.embeddedVerificationEnabled()); assertEquals(DocumentNodeStoreService.DEFAULT_FULL_GC_MAX_AGE, config.fullGcMaxAgeInSecs()); assertEquals(CommitQueue.DEFAULT_SUSPEND_TIMEOUT, config.suspendTimeoutMillis()); + assertFalse(config.fullGCAuditLoggingEnabled()); } @Test @@ -170,6 +171,13 @@ public class DocumentNodeStoreServiceConfigurationTest { assertEquals(batchSize, config.fullGCBatchSize()); } + @Test + public void fullGCAuditLoggingEnabled() throws Exception { + addConfigurationEntry(preset, "fullGCAuditLoggingEnabled", true); + Configuration config = createConfiguration(); + assertTrue(config.fullGCAuditLoggingEnabled()); + } + @Test public void invisibleForDiscoveryFalse() throws Exception { boolean batchSize = false; diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java index 0c3f82f36b..d900d10f71 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java @@ -132,4 +132,10 @@ public class MongoDocumentNodeStoreBuilderTest { final int fullGcModeNone = 0; assertEquals(builder.getFullGCMode(), fullGcModeNone); } + + @Test + public void isFullGCAuditLoggingEnabled() { + MongoDocumentNodeStoreBuilder builder = new MongoDocumentNodeStoreBuilder(); + assertFalse(builder.isFullGCAuditLoggingEnabled()); + } } diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinTest.java new file mode 100644 index 0000000000..f2dfdf9d34 --- /dev/null +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinTest.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jackrabbit.oak.plugins.document.mongo; + +import com.mongodb.BasicDBObject; +import com.mongodb.client.MongoCollection; +import org.apache.jackrabbit.oak.plugins.document.Collection; +import org.apache.jackrabbit.oak.plugins.document.Document; +import org.apache.jackrabbit.oak.plugins.document.FullGcNodeBin; +import org.apache.jackrabbit.oak.plugins.document.NodeDocument; +import org.apache.jackrabbit.oak.plugins.document.UpdateOp; +import org.junit.After; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import org.junit.Before; +import org.junit.Test; +import org.mockito.ArgumentCaptor; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.ArgumentMatchers.eq; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoInteractions; +import static org.mockito.Mockito.when; +import org.mockito.MockitoAnnotations; + +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class MongoFullGcNodeBinTest { + + private static final List<NodeDocument> FIND_AND_UPDATE_RETURN_VALUE = List.of(); + @Mock + MongoDocumentStore documentStore; + + + MongoFullGcNodeBin fullGcBin; + + @Mock MongoCollection<BasicDBObject> mockBinCollection; + + + @Before + public void setUp() throws Exception { + MockitoAnnotations.openMocks(this); + fullGcBin = new MongoFullGcNodeBin(documentStore, true); + when(documentStore.remove(eq(Collection.NODES), anyMap())).thenAnswer(invocation -> { + Map<String, Long> map = invocation.getArgument(1); + return map.size(); + }); + + when(documentStore.findAndUpdate(eq(Collection.NODES), anyList())).thenAnswer(invocation -> { + return FIND_AND_UPDATE_RETURN_VALUE; + }); + + when(documentStore.getBinCollection()).thenReturn(mockBinCollection); + } + + @After + public void tearDown() { + Mockito.reset(documentStore, mockBinCollection); + } + + @Test + public void defaultDisabled() { + assertFalse(new MongoFullGcNodeBin(this.documentStore).isEnabled()); + } + + @Test + public void enableWithConstructor() { + assertTrue(new MongoFullGcNodeBin(this.documentStore, true).isEnabled()); + } + + @Test + public void remove() { + Map<String, Long> orphanOrDeletedRemovalMap = new HashMap<>(); + orphanOrDeletedRemovalMap.put("key1", 1L); + orphanOrDeletedRemovalMap.put("key2", 2L); + + + int removed = fullGcBin.remove(orphanOrDeletedRemovalMap); + + //verify returned value + assertEquals(orphanOrDeletedRemovalMap.size(), removed); + + //verify removed documents are added to bin + ArgumentCaptor<List<BasicDBObject>> argumentCaptor = ArgumentCaptor.forClass(List.class); + verify(mockBinCollection).insertMany(argumentCaptor.capture()); + assertEquals(orphanOrDeletedRemovalMap.size(), argumentCaptor.getValue().size()); + assertTrue(argumentCaptor.getValue().get(0).get(Document.ID).toString().matches("^\\/bin\\/key1\\-\\d+$")); + assertTrue(argumentCaptor.getValue().get(1).get(Document.ID).toString().matches("^\\/bin\\/key2\\-\\d+$")); + + //verify documents are removed + verify(documentStore).remove(Collection.NODES, orphanOrDeletedRemovalMap); + } + + @Test + public void removeWhenCopyToBinFails() { + Map<String, Long> orphanOrDeletedRemovalMap = new HashMap<>(); + orphanOrDeletedRemovalMap.put("key", 1L); + doThrow(new RuntimeException("Error while adding documents to bin")).when(mockBinCollection).insertMany(anyList()); + + int removed = fullGcBin.remove(orphanOrDeletedRemovalMap); + + assertEquals(0, removed); + verify(documentStore, never()).remove(Collection.NODES, orphanOrDeletedRemovalMap); + } + + @Test + public void removeEmptyMap() { + int removed = fullGcBin.remove(Map.of()); + assertEquals(0, removed); + Mockito.verifyNoInteractions(documentStore); + } + + @Test + public void removeWhenBinDisabled() { + fullGcBin.setEnabled(false); + Map<String, Long> orphanOrDeletedRemovalMap = new HashMap<>(); + orphanOrDeletedRemovalMap.put("key", 1L); + + fullGcBin.remove(orphanOrDeletedRemovalMap); + + verify(mockBinCollection, never()).insertMany(anyList()); + } + + @Test + public void findAndUpdate() { + UpdateOp doc1 = new UpdateOp("doc1", false); + doc1.remove("prop1.1"); + doc1.set("prop1.2", "value1.2"); + UpdateOp doc2 = new UpdateOp("doc2", false); + doc2.remove("prop2.1"); + doc2.remove("prop2.2"); + + List<UpdateOp> properties = List.of(doc1, doc2); + List<NodeDocument> modifiedDocs = fullGcBin.findAndUpdate(properties); + + //verify removed properties are added to bin + ArgumentCaptor<List<BasicDBObject>> argumentCaptor = ArgumentCaptor.forClass(List.class); + verify(mockBinCollection).insertMany(argumentCaptor.capture()); + + List<BasicDBObject> binOpList = argumentCaptor.getValue(); + BasicDBObject binDoc1 = binOpList.get(0); + + + assertTrue(binDoc1.get(Document.ID).toString().matches("^\\/bin\\/doc1\\-\\d+$")); + assertTrue(binDoc1.containsField("prop1.1")); + assertFalse(binDoc1.containsField("prop1.2"));//only removed props are saved + assertGcTimestampAdded(binDoc1); + + BasicDBObject binDoc2 = binOpList.get(1); + assertTrue(binDoc2.get(Document.ID).toString().matches("^\\/bin\\/doc2\\-\\d+$")); + assertTrue(binDoc2.containsField("prop2.1")); + assertTrue(binDoc2.containsField("prop2.2")); + + assertGcTimestampAdded(binDoc2); + + + //verify removed properties are removed from the original document + verify(documentStore).findAndUpdate(Collection.NODES, properties); + + //verify returned value + assertEquals(FIND_AND_UPDATE_RETURN_VALUE, modifiedDocs); + } + + private static void assertGcTimestampAdded(BasicDBObject binDoc2) { + assertTrue(binDoc2.containsField("_gcCollectedAt")); + assertTrue(binDoc2.get("_gcCollectedAt") instanceof Date); + } + + @Test + public void findAndUpdateWhenCopyToBinFails() { + doThrow(new RuntimeException("Error while adding documents to bin")).when(mockBinCollection).insertMany(anyList()); + UpdateOp doc1 = new UpdateOp("doc1", false); + doc1.remove("prop1"); + fullGcBin.findAndUpdate(List.of(doc1)); + verify(documentStore, never()).findAndUpdate(eq(Collection.NODES), anyList()); + } + + @Test + public void findAndUpdateWhenBinDisabled() { + fullGcBin.setEnabled(false); + UpdateOp doc1 = new UpdateOp("doc1", false); + doc1.remove("prop1"); + fullGcBin.findAndUpdate(List.of(doc1)); + verify(mockBinCollection, never()).insertMany(anyList()); + } + + @Test + public void findAndUpdateWhenEmptyList() { + fullGcBin.findAndUpdate(List.of()); + verifyNoInteractions(documentStore); + } +} \ No newline at end of file diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java index 1aa9d47c6d..642b05e20e 100755 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java @@ -121,4 +121,11 @@ public class RDBDocumentNodeStoreBuilderTest { builder.setFullGcMaxAgeMillis(30 * 24 * 60 * 60 * 1000L); assertEquals(0, builder.getFullGcMaxAgeMillis()); } + + @Test + public void fullGcAuditLoggingEnabled() { + RDBDocumentNodeStoreBuilder builder = new RDBDocumentNodeStoreBuilder(); + builder.setFullGCAuditLoggingEnabled(true); + assertFalse(builder.isFullGCAuditLoggingEnabled()); + } }