This is an automated email from the ASF dual-hosted git repository. daim pushed a commit to branch OAK-10370 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit bdbc8acfd73150795bc1bf848831737c1f6257fa Author: Rishabh Kumar <[email protected]> AuthorDate: Wed Nov 29 14:59:00 2023 +0530 OAK-10370 : added dryRun mode for detailedGC --- .../plugins/document/DocumentNodeStoreHelper.java | 4 +- .../jackrabbit/oak/run/RevisionsCommand.java | 38 +++++++++++++----- .../jackrabbit/oak/run/RevisionsCommandTest.java | 11 ++++++ .../oak/plugins/document/DocumentNodeStore.java | 2 +- .../plugins/document/VersionGarbageCollector.java | 46 ++++++++++++---------- .../oak/plugins/document/VersionGCQueryTest.java | 4 +- .../oak/plugins/document/VersionGCTest.java | 2 +- .../document/VersionGarbageCollectorIT.java | 15 +++---- 8 files changed, 76 insertions(+), 46 deletions(-) diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java index f77aa2c676..f11232b24f 100644 --- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java +++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java @@ -72,8 +72,8 @@ public class DocumentNodeStoreHelper { } public static VersionGarbageCollector createVersionGC( - DocumentNodeStore nodeStore, VersionGCSupport gcSupport, boolean detailedGCEnabled) { - return new VersionGarbageCollector(nodeStore, gcSupport, detailedGCEnabled); + DocumentNodeStore nodeStore, VersionGCSupport gcSupport, boolean detailedGCEnabled, boolean isDetailedGCDryRun) { + return new VersionGarbageCollector(nodeStore, gcSupport, detailedGCEnabled, isDetailedGCDryRun); } private static Iterable<BlobReferences> scan(DocumentNodeStore store, diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java index b995910c57..2ca22e38c9 100644 --- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java +++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java @@ -56,6 +56,8 @@ import org.apache.jackrabbit.oak.spi.blob.MemoryBlobStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static java.lang.Boolean.FALSE; +import static java.lang.Boolean.TRUE; import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreHelper.createVersionGC; import static org.apache.jackrabbit.oak.plugins.document.FormatVersion.versionOf; @@ -77,9 +79,10 @@ public class RevisionsCommand implements Command { "where sub-command is one of", " info give information about the revisions state without performing", " any modifications", - " collect perform garbage collection", - " reset clear all persisted metadata", - " sweep clean up uncommitted changes" + " collect perform garbage collection", + " reset clear all persisted metadata", + " sweep clean up uncommitted changes", + " detailedGC perform detailed garbage collection i.e. remove unmerged branch commits, old revisions, deleted properties etc" ); private static final ImmutableList<String> LOGGER_NAMES = ImmutableList.of( @@ -93,6 +96,7 @@ public class RevisionsCommand implements Command { static final String CMD_COLLECT = "collect"; static final String CMD_RESET = "reset"; static final String CMD_SWEEP = "sweep"; + static final String CMD_DETAILED_GC = "detailedGC"; final OptionSpec<?> once; final OptionSpec<Integer> limit; @@ -101,6 +105,7 @@ public class RevisionsCommand implements Command { final OptionSpec<Double> delay; final OptionSpec<?> continuous; final OptionSpec<?> verbose; + final OptionSpec<Boolean> dryRun; RevisionsOptions(String usage) { super(usage); @@ -117,6 +122,8 @@ public class RevisionsCommand implements Command { timeLimit = parser .accepts("timeLimit", "cancel garbage collection after n seconds").withRequiredArg() .ofType(Long.class).defaultsTo(-1L); + dryRun = parser.accepts("dryRun", "dryRun of detailedGC i.e. only print what needed to be deleted") + .withRequiredArg().ofType(Boolean.class).defaultsTo(TRUE); continuous = parser .accepts("continuous", "run continuously (collect only)"); verbose = parser @@ -144,6 +151,10 @@ public class RevisionsCommand implements Command { return limit.value(options); } + boolean isDryRun() { + return dryRun.value(options); + } + long getOlderThan() { return olderThan.value(options); } @@ -176,11 +187,13 @@ public class RevisionsCommand implements Command { if (RevisionsOptions.CMD_INFO.equals(subCmd)) { info(options, closer); } else if (RevisionsOptions.CMD_COLLECT.equals(subCmd)) { - collect(options, closer); + collect(options, closer, false); } else if (RevisionsOptions.CMD_RESET.equals(subCmd)) { reset(options, closer); } else if (RevisionsOptions.CMD_SWEEP.equals(subCmd)) { sweep(options, closer); + } else if (RevisionsOptions.CMD_DETAILED_GC.equals(subCmd)) { + collect(options, closer, true); } else { System.err.println("unknown revisions command: " + subCmd); } @@ -203,13 +216,16 @@ public class RevisionsCommand implements Command { } private VersionGarbageCollector bootstrapVGC(RevisionsOptions options, - Closer closer) + Closer closer, boolean detailedGCEnabled) throws IOException { DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, closer); if (builder == null) { System.err.println("revisions mode only available for DocumentNodeStore"); System.exit(1); } + // set detailedGC + builder.setDetailedGCEnabled(detailedGCEnabled); + // create a VersionGCSupport while builder is read-write VersionGCSupport gcSupport = builder.createVersionGCSupport(); // check for matching format version @@ -227,7 +243,9 @@ public class RevisionsCommand implements Command { useMemoryBlobStore(builder); // create a version GC that operates on a read-only DocumentNodeStore // and a GC support with a writable DocumentStore - VersionGarbageCollector gc = createVersionGC(builder.build(), gcSupport, isDetailedGCEnabled(builder)); + System.out.println("DryRun is enabled : " + options.isDryRun()); + VersionGarbageCollector gc = createVersionGC(builder.build(), gcSupport, isDetailedGCEnabled(builder), + options.isDryRun()); VersionGCOptions gcOptions = gc.getOptions(); gcOptions = gcOptions.withDelayFactor(options.getDelay()); @@ -243,7 +261,7 @@ public class RevisionsCommand implements Command { private void info(RevisionsOptions options, Closer closer) throws IOException { - VersionGarbageCollector gc = bootstrapVGC(options, closer); + VersionGarbageCollector gc = bootstrapVGC(options, closer, false); System.out.println("retrieving gc info"); VersionGCInfo info = gc.getInfo(options.getOlderThan(), SECONDS); @@ -263,9 +281,9 @@ public class RevisionsCommand implements Command { info.estimatedIterations); } - private void collect(final RevisionsOptions options, Closer closer) + private void collect(final RevisionsOptions options, Closer closer, boolean detailedGCEnabled) throws IOException { - VersionGarbageCollector gc = bootstrapVGC(options, closer); + VersionGarbageCollector gc = bootstrapVGC(options, closer, detailedGCEnabled); ExecutorService executor = Executors.newSingleThreadExecutor(); final Semaphore finished = new Semaphore(0); try { @@ -350,7 +368,7 @@ public class RevisionsCommand implements Command { private void reset(RevisionsOptions options, Closer closer) throws IOException { - VersionGarbageCollector gc = bootstrapVGC(options, closer); + VersionGarbageCollector gc = bootstrapVGC(options, closer, false); System.out.println("resetting recommendations and statistics"); gc.reset(); } diff --git a/oak-run/src/test/java/org/apache/jackrabbit/oak/run/RevisionsCommandTest.java b/oak-run/src/test/java/org/apache/jackrabbit/oak/run/RevisionsCommandTest.java index ebd09b7a5e..087a11818f 100644 --- a/oak-run/src/test/java/org/apache/jackrabbit/oak/run/RevisionsCommandTest.java +++ b/oak-run/src/test/java/org/apache/jackrabbit/oak/run/RevisionsCommandTest.java @@ -34,12 +34,14 @@ import org.apache.jackrabbit.oak.plugins.document.MongoConnectionFactory; import org.apache.jackrabbit.oak.plugins.document.MongoUtils; import org.apache.jackrabbit.oak.plugins.document.Revision; import org.apache.jackrabbit.oak.plugins.document.UpdateOp; +import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector; import org.apache.jackrabbit.oak.plugins.document.util.MongoConnection; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; +import static org.apache.commons.lang3.reflect.FieldUtils.writeField; import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath; import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertNotNull; @@ -105,6 +107,15 @@ public class RevisionsCommandTest { assertTrue(output.contains("starting gc collect")); } + @Test + public void detailedGC() { + ns.dispose(); + + String output = captureSystemOut(new RevisionsCmd("detailedGC")); + assertTrue(output.contains("DryRun is enabled : true")); + assertTrue(output.contains("starting gc collect")); + } + @Test public void sweep() throws Exception { int clusterId = ns.getClusterId(); diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java index a22f711f1c..4cb808aa0a 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java @@ -642,7 +642,7 @@ public final class DocumentNodeStore this.branches = new UnmergedBranches(); this.asyncDelay = builder.getAsyncDelay(); this.versionGarbageCollector = new VersionGarbageCollector( - this, builder.createVersionGCSupport(), isDetailedGCEnabled(builder)); + this, builder.createVersionGCSupport(), isDetailedGCEnabled(builder), false); this.versionGarbageCollector.setStatisticsProvider(builder.getStatisticsProvider()); this.versionGarbageCollector.setGCMonitor(builder.getGCMonitor()); this.journalGarbageCollector = new JournalGarbageCollector( diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java index cd0de7c88e..2a36d4fdd3 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java @@ -131,6 +131,7 @@ public class VersionGarbageCollector { private final DocumentNodeStore nodeStore; private final DocumentStore ds; private final boolean detailedGCEnabled; + private final boolean isDetailedGCDryRun; private final VersionGCSupport versionStore; private final AtomicReference<GCJob> collector = newReference(); private VersionGCOptions options; @@ -140,11 +141,13 @@ public class VersionGarbageCollector { VersionGarbageCollector(DocumentNodeStore nodeStore, VersionGCSupport gcSupport, - final boolean detailedGCEnabled) { + final boolean detailedGCEnabled, + final boolean isDetailedGCDryRun) { this.nodeStore = nodeStore; this.versionStore = gcSupport; this.ds = gcSupport.getDocumentStore(); this.detailedGCEnabled = detailedGCEnabled; + this.isDetailedGCDryRun = isDetailedGCDryRun; this.options = new VersionGCOptions(); } @@ -1161,17 +1164,15 @@ public class VersionGarbageCollector { public void removeGarbage(final VersionGCStats stats) { if (updateOpList.isEmpty()) { - if (log.isDebugEnabled()) { + if (log.isDebugEnabled() || isDetailedGCDryRun) { log.debug("Skipping removal of detailed garbage, cause no garbage detected"); } return; } - int updatedDocs; - monitor.info("Proceeding to update [{}] documents", updateOpList.size()); - if (log.isDebugEnabled()) { + if (log.isDebugEnabled() || isDetailedGCDryRun) { String collect = updateOpList.stream().map(UpdateOp::getId).collect(joining(",")); log.debug("Performing batch update of documents with following id's [{}]", collect); } @@ -1183,24 +1184,27 @@ public class VersionGarbageCollector { timer.reset().start(); try { - List<NodeDocument> oldDocs = ds.findAndUpdate(NODES, updateOpList); - int deletedProps = oldDocs.stream().filter(Objects::nonNull).mapToInt(d -> deletedPropsCountMap.getOrDefault(d.getId(), 0)).sum(); - updatedDocs = (int) oldDocs.stream().filter(Objects::nonNull).count(); - stats.updatedDetailedGCDocsCount += updatedDocs; - stats.deletedPropsCount += deletedProps; - stats.deletedUnmergedBCCount += deletedUnmergedBCSet.size(); + if (!isDetailedGCDryRun) { + // only delete these in case it is not a dryRun + List<NodeDocument> oldDocs = ds.findAndUpdate(NODES, updateOpList); + int deletedProps = oldDocs.stream().filter(Objects::nonNull).mapToInt(d -> deletedPropsCountMap.getOrDefault(d.getId(), 0)).sum(); + int updatedDocs = (int) oldDocs.stream().filter(Objects::nonNull).count(); + stats.updatedDetailedGCDocsCount += updatedDocs; + stats.deletedPropsCount += deletedProps; + stats.deletedUnmergedBCCount += deletedUnmergedBCSet.size(); + + if (log.isDebugEnabled()) { + log.debug("Updated [{}] documents, deleted [{}] properties, deleted [{}] unmergedBranchCommits", + updatedDocs, deletedProps, deletedUnmergedBCSet.size()); + } - if (log.isDebugEnabled()) { - log.debug("Updated [{}] documents, deleted [{}] properties, deleted [{}] unmergedBranchCommits", - updatedDocs, deletedProps, deletedUnmergedBCSet.size()); + // save stats + detailedGCStats.propertiesDeleted(deletedProps); + detailedGCStats.unmergedBranchCommitsDeleted(deletedUnmergedBCSet.size()); + detailedGCStats.documentsUpdated(updatedDocs); + // fix for sonar : converted to long before operation + detailedGCStats.documentsUpdateSkipped((long)oldDocs.size() - updatedDocs); } - - // save stats - detailedGCStats.propertiesDeleted(deletedProps); - detailedGCStats.unmergedBranchCommitsDeleted(deletedUnmergedBCSet.size()); - detailedGCStats.documentsUpdated(updatedDocs); - // fix for sonar : converted to long before operation - detailedGCStats.documentsUpdateSkipped((long)oldDocs.size() - updatedDocs); } finally { // now reset delete metadata updateOpList.clear(); diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCQueryTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCQueryTest.java index 27333b57cf..ab3cfd30f3 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCQueryTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCQueryTest.java @@ -107,7 +107,7 @@ public class VersionGCQueryTest { clock.waitUntil(clock.getTime() + TimeUnit.HOURS.toMillis(1)); VersionGarbageCollector gc = new VersionGarbageCollector( - ns, new VersionGCSupport(store), false); + ns, new VersionGCSupport(store), false, false); prevDocIds.clear(); VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES); assertEquals(11, stats.deletedDocGCCount); @@ -140,7 +140,7 @@ public class VersionGCQueryTest { clock.waitUntil(clock.getTime() + TimeUnit.HOURS.toMillis(1)); VersionGarbageCollector gc = new VersionGarbageCollector( - ns, new VersionGCSupport(store), false); + ns, new VersionGCSupport(store), false, false); prevDocIds.clear(); VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES); assertEquals(1, stats.deletedDocGCCount); diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java index 39ca136c8d..14b871a7da 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java @@ -408,7 +408,7 @@ public class VersionGCTest { deletedOnceCountCalls.incrementAndGet(); return Iterables.size(Utils.getSelectedDocuments(store, NodeDocument.DELETED_ONCE, 1)); } - }, false); + }, false, false); // run first RGC gc.gc(1, TimeUnit.HOURS); diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java index d8ff6acf88..2457fd445f 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java @@ -94,8 +94,6 @@ import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.RDBFixture; import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats; import org.apache.jackrabbit.oak.plugins.document.bundlor.BundlingConfigInitializer; -import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats; -import org.apache.jackrabbit.oak.plugins.document.bundlor.BundlingConfigInitializer; import org.apache.jackrabbit.oak.plugins.document.mongo.MongoTestUtils; import org.apache.jackrabbit.oak.plugins.document.rdb.RDBOptions; import org.apache.jackrabbit.oak.plugins.document.util.Utils; @@ -108,7 +106,6 @@ import org.apache.jackrabbit.oak.stats.Clock; import org.jetbrains.annotations.NotNull; import org.junit.After; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -935,7 +932,7 @@ public class VersionGarbageCollectorIT { } }; - VersionGarbageCollector gc = new VersionGarbageCollector(store, gcSupport, true); + VersionGarbageCollector gc = new VersionGarbageCollector(store, gcSupport, true, false); stats = gc.gc(maxAge*2, HOURS); assertEquals(0, stats.updatedDetailedGCDocsCount); assertEquals(0, stats.deletedPropsCount); @@ -1001,7 +998,7 @@ public class VersionGarbageCollectorIT { } }; - gcRef.set(new VersionGarbageCollector(store, gcSupport, true)); + gcRef.set(new VersionGarbageCollector(store, gcSupport, true, false)); //3. Check that deleted property does get collected post maxAge clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); @@ -1610,7 +1607,7 @@ public class VersionGarbageCollectorIT { }); } }; - final VersionGarbageCollector gc = new VersionGarbageCollector(store, gcSupport, false); + final VersionGarbageCollector gc = new VersionGarbageCollector(store, gcSupport, false, false); // start GC -> will try to remove /foo and /bar Future<VersionGCStats> f = execService.submit(new Callable<VersionGCStats>() { @Override @@ -1771,7 +1768,7 @@ public class VersionGarbageCollectorIT { return super.getPossiblyDeletedDocs(fromModified, toModified); } }; - gcRef.set(new VersionGarbageCollector(store, gcSupport, false)); + gcRef.set(new VersionGarbageCollector(store, gcSupport, false, false)); VersionGCStats stats = gcRef.get().gc(30, TimeUnit.MINUTES); assertTrue(stats.canceled); assertEquals(0, stats.deletedDocGCCount); @@ -1823,7 +1820,7 @@ public class VersionGarbageCollectorIT { return super.getPossiblyDeletedDocs(prevLastModifiedTime, lastModifiedTime).iterator(); } }; - gcRef.set(new VersionGarbageCollector(store, gcSupport, false)); + gcRef.set(new VersionGarbageCollector(store, gcSupport, false, false)); VersionGCStats stats = gcRef.get().gc(30, TimeUnit.MINUTES); assertTrue(stats.canceled); assertEquals(0, stats.deletedDocGCCount); @@ -1852,7 +1849,7 @@ public class VersionGarbageCollectorIT { }); } }; - final VersionGarbageCollector gc = new VersionGarbageCollector(store, nonReportingGcSupport, false); + final VersionGarbageCollector gc = new VersionGarbageCollector(store, nonReportingGcSupport, false, false); final long maxAgeHours = 1; final long clockDelta = HOURS.toMillis(maxAgeHours) + MINUTES.toMillis(5);
