This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch OAK-10370
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit bdbc8acfd73150795bc1bf848831737c1f6257fa
Author: Rishabh Kumar <[email protected]>
AuthorDate: Wed Nov 29 14:59:00 2023 +0530

    OAK-10370 : added dryRun mode for detailedGC
---
 .../plugins/document/DocumentNodeStoreHelper.java  |  4 +-
 .../jackrabbit/oak/run/RevisionsCommand.java       | 38 +++++++++++++-----
 .../jackrabbit/oak/run/RevisionsCommandTest.java   | 11 ++++++
 .../oak/plugins/document/DocumentNodeStore.java    |  2 +-
 .../plugins/document/VersionGarbageCollector.java  | 46 ++++++++++++----------
 .../oak/plugins/document/VersionGCQueryTest.java   |  4 +-
 .../oak/plugins/document/VersionGCTest.java        |  2 +-
 .../document/VersionGarbageCollectorIT.java        | 15 +++----
 8 files changed, 76 insertions(+), 46 deletions(-)

diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java
index f77aa2c676..f11232b24f 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java
@@ -72,8 +72,8 @@ public class DocumentNodeStoreHelper {
     }
 
     public static VersionGarbageCollector createVersionGC(
-            DocumentNodeStore nodeStore, VersionGCSupport gcSupport, boolean 
detailedGCEnabled) {
-        return new VersionGarbageCollector(nodeStore, gcSupport, 
detailedGCEnabled);
+            DocumentNodeStore nodeStore, VersionGCSupport gcSupport, boolean 
detailedGCEnabled, boolean isDetailedGCDryRun) {
+        return new VersionGarbageCollector(nodeStore, gcSupport, 
detailedGCEnabled, isDetailedGCDryRun);
     }
 
     private static Iterable<BlobReferences> scan(DocumentNodeStore store,
diff --git 
a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java 
b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java
index b995910c57..2ca22e38c9 100644
--- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java
+++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java
@@ -56,6 +56,8 @@ import org.apache.jackrabbit.oak.spi.blob.MemoryBlobStore;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static java.lang.Boolean.FALSE;
+import static java.lang.Boolean.TRUE;
 import static java.util.concurrent.TimeUnit.SECONDS;
 import static 
org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreHelper.createVersionGC;
 import static 
org.apache.jackrabbit.oak.plugins.document.FormatVersion.versionOf;
@@ -77,9 +79,10 @@ public class RevisionsCommand implements Command {
             "where sub-command is one of",
             "  info     give information about the revisions state without 
performing",
             "           any modifications",
-            "  collect  perform garbage collection",
-            "  reset    clear all persisted metadata",
-            "  sweep    clean up uncommitted changes"
+            "  collect      perform garbage collection",
+            "  reset        clear all persisted metadata",
+            "  sweep        clean up uncommitted changes",
+            "  detailedGC   perform detailed garbage collection i.e. remove 
unmerged branch commits, old revisions, deleted properties etc"
     );
 
     private static final ImmutableList<String> LOGGER_NAMES = ImmutableList.of(
@@ -93,6 +96,7 @@ public class RevisionsCommand implements Command {
         static final String CMD_COLLECT = "collect";
         static final String CMD_RESET = "reset";
         static final String CMD_SWEEP = "sweep";
+        static final String CMD_DETAILED_GC = "detailedGC";
 
         final OptionSpec<?> once;
         final OptionSpec<Integer> limit;
@@ -101,6 +105,7 @@ public class RevisionsCommand implements Command {
         final OptionSpec<Double> delay;
         final OptionSpec<?> continuous;
         final OptionSpec<?> verbose;
+        final OptionSpec<Boolean> dryRun;
 
         RevisionsOptions(String usage) {
             super(usage);
@@ -117,6 +122,8 @@ public class RevisionsCommand implements Command {
             timeLimit = parser
                     .accepts("timeLimit", "cancel garbage collection after n 
seconds").withRequiredArg()
                     .ofType(Long.class).defaultsTo(-1L);
+            dryRun = parser.accepts("dryRun", "dryRun of detailedGC i.e. only 
print what needed to be deleted")
+                    .withRequiredArg().ofType(Boolean.class).defaultsTo(TRUE);
             continuous = parser
                     .accepts("continuous", "run continuously (collect only)");
             verbose = parser
@@ -144,6 +151,10 @@ public class RevisionsCommand implements Command {
             return limit.value(options);
         }
 
+        boolean isDryRun() {
+            return dryRun.value(options);
+        }
+
         long getOlderThan() {
             return olderThan.value(options);
         }
@@ -176,11 +187,13 @@ public class RevisionsCommand implements Command {
             if (RevisionsOptions.CMD_INFO.equals(subCmd)) {
                 info(options, closer);
             } else if (RevisionsOptions.CMD_COLLECT.equals(subCmd)) {
-                collect(options, closer);
+                collect(options, closer, false);
             } else if (RevisionsOptions.CMD_RESET.equals(subCmd)) {
                 reset(options, closer);
             } else if (RevisionsOptions.CMD_SWEEP.equals(subCmd)) {
                 sweep(options, closer);
+            } else if (RevisionsOptions.CMD_DETAILED_GC.equals(subCmd)) {
+                collect(options, closer, true);
             } else {
                 System.err.println("unknown revisions command: " + subCmd);
             }
@@ -203,13 +216,16 @@ public class RevisionsCommand implements Command {
     }
 
     private VersionGarbageCollector bootstrapVGC(RevisionsOptions options,
-                                                 Closer closer)
+                                                 Closer closer, boolean 
detailedGCEnabled)
             throws IOException {
         DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, 
closer);
         if (builder == null) {
             System.err.println("revisions mode only available for 
DocumentNodeStore");
             System.exit(1);
         }
+        // set detailedGC
+        builder.setDetailedGCEnabled(detailedGCEnabled);
+
         // create a VersionGCSupport while builder is read-write
         VersionGCSupport gcSupport = builder.createVersionGCSupport();
         // check for matching format version
@@ -227,7 +243,9 @@ public class RevisionsCommand implements Command {
         useMemoryBlobStore(builder);
         // create a version GC that operates on a read-only DocumentNodeStore
         // and a GC support with a writable DocumentStore
-        VersionGarbageCollector gc = createVersionGC(builder.build(), 
gcSupport, isDetailedGCEnabled(builder));
+        System.out.println("DryRun is enabled : " + options.isDryRun());
+        VersionGarbageCollector gc = createVersionGC(builder.build(), 
gcSupport, isDetailedGCEnabled(builder),
+                options.isDryRun());
 
         VersionGCOptions gcOptions = gc.getOptions();
         gcOptions = gcOptions.withDelayFactor(options.getDelay());
@@ -243,7 +261,7 @@ public class RevisionsCommand implements Command {
 
     private void info(RevisionsOptions options, Closer closer)
             throws IOException {
-        VersionGarbageCollector gc = bootstrapVGC(options, closer);
+        VersionGarbageCollector gc = bootstrapVGC(options, closer, false);
         System.out.println("retrieving gc info");
         VersionGCInfo info = gc.getInfo(options.getOlderThan(), SECONDS);
 
@@ -263,9 +281,9 @@ public class RevisionsCommand implements Command {
                 info.estimatedIterations);
     }
 
-    private void collect(final RevisionsOptions options, Closer closer)
+    private void collect(final RevisionsOptions options, Closer closer, 
boolean detailedGCEnabled)
             throws IOException {
-        VersionGarbageCollector gc = bootstrapVGC(options, closer);
+        VersionGarbageCollector gc = bootstrapVGC(options, closer, 
detailedGCEnabled);
         ExecutorService executor = Executors.newSingleThreadExecutor();
         final Semaphore finished = new Semaphore(0);
         try {
@@ -350,7 +368,7 @@ public class RevisionsCommand implements Command {
 
     private void reset(RevisionsOptions options, Closer closer)
             throws IOException {
-        VersionGarbageCollector gc = bootstrapVGC(options, closer);
+        VersionGarbageCollector gc = bootstrapVGC(options, closer, false);
         System.out.println("resetting recommendations and statistics");
         gc.reset();
     }
diff --git 
a/oak-run/src/test/java/org/apache/jackrabbit/oak/run/RevisionsCommandTest.java 
b/oak-run/src/test/java/org/apache/jackrabbit/oak/run/RevisionsCommandTest.java
index ebd09b7a5e..087a11818f 100644
--- 
a/oak-run/src/test/java/org/apache/jackrabbit/oak/run/RevisionsCommandTest.java
+++ 
b/oak-run/src/test/java/org/apache/jackrabbit/oak/run/RevisionsCommandTest.java
@@ -34,12 +34,14 @@ import 
org.apache.jackrabbit.oak.plugins.document.MongoConnectionFactory;
 import org.apache.jackrabbit.oak.plugins.document.MongoUtils;
 import org.apache.jackrabbit.oak.plugins.document.Revision;
 import org.apache.jackrabbit.oak.plugins.document.UpdateOp;
+import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector;
 import org.apache.jackrabbit.oak.plugins.document.util.MongoConnection;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Rule;
 import org.junit.Test;
 
+import static org.apache.commons.lang3.reflect.FieldUtils.writeField;
 import static 
org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath;
 import static org.hamcrest.CoreMatchers.containsString;
 import static org.junit.Assert.assertNotNull;
@@ -105,6 +107,15 @@ public class RevisionsCommandTest {
         assertTrue(output.contains("starting gc collect"));
     }
 
+    @Test
+    public void detailedGC() {
+        ns.dispose();
+
+        String output = captureSystemOut(new RevisionsCmd("detailedGC"));
+        assertTrue(output.contains("DryRun is enabled : true"));
+        assertTrue(output.contains("starting gc collect"));
+    }
+
     @Test
     public void sweep() throws Exception {
         int clusterId = ns.getClusterId();
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java
index a22f711f1c..4cb808aa0a 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java
@@ -642,7 +642,7 @@ public final class DocumentNodeStore
         this.branches = new UnmergedBranches();
         this.asyncDelay = builder.getAsyncDelay();
         this.versionGarbageCollector = new VersionGarbageCollector(
-                this, builder.createVersionGCSupport(), 
isDetailedGCEnabled(builder));
+                this, builder.createVersionGCSupport(), 
isDetailedGCEnabled(builder), false);
         
this.versionGarbageCollector.setStatisticsProvider(builder.getStatisticsProvider());
         this.versionGarbageCollector.setGCMonitor(builder.getGCMonitor());
         this.journalGarbageCollector = new JournalGarbageCollector(
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index cd0de7c88e..2a36d4fdd3 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -131,6 +131,7 @@ public class VersionGarbageCollector {
     private final DocumentNodeStore nodeStore;
     private final DocumentStore ds;
     private final boolean detailedGCEnabled;
+    private final boolean isDetailedGCDryRun;
     private final VersionGCSupport versionStore;
     private final AtomicReference<GCJob> collector = newReference();
     private VersionGCOptions options;
@@ -140,11 +141,13 @@ public class VersionGarbageCollector {
 
     VersionGarbageCollector(DocumentNodeStore nodeStore,
                             VersionGCSupport gcSupport,
-                            final boolean detailedGCEnabled) {
+                            final boolean detailedGCEnabled,
+                            final boolean isDetailedGCDryRun) {
         this.nodeStore = nodeStore;
         this.versionStore = gcSupport;
         this.ds = gcSupport.getDocumentStore();
         this.detailedGCEnabled = detailedGCEnabled;
+        this.isDetailedGCDryRun = isDetailedGCDryRun;
         this.options = new VersionGCOptions();
     }
 
@@ -1161,17 +1164,15 @@ public class VersionGarbageCollector {
         public void removeGarbage(final VersionGCStats stats) {
 
             if (updateOpList.isEmpty()) {
-                if (log.isDebugEnabled()) {
+                if (log.isDebugEnabled() || isDetailedGCDryRun) {
                     log.debug("Skipping removal of detailed garbage, cause no 
garbage detected");
                 }
                 return;
             }
 
-            int updatedDocs;
-
             monitor.info("Proceeding to update [{}] documents", 
updateOpList.size());
 
-            if (log.isDebugEnabled()) {
+            if (log.isDebugEnabled() || isDetailedGCDryRun) {
                 String collect = 
updateOpList.stream().map(UpdateOp::getId).collect(joining(","));
                 log.debug("Performing batch update of documents with following 
id's [{}]", collect);
             }
@@ -1183,24 +1184,27 @@ public class VersionGarbageCollector {
 
             timer.reset().start();
             try {
-                List<NodeDocument> oldDocs = ds.findAndUpdate(NODES, 
updateOpList);
-                int deletedProps = 
oldDocs.stream().filter(Objects::nonNull).mapToInt(d -> 
deletedPropsCountMap.getOrDefault(d.getId(), 0)).sum();
-                updatedDocs = (int) 
oldDocs.stream().filter(Objects::nonNull).count();
-                stats.updatedDetailedGCDocsCount += updatedDocs;
-                stats.deletedPropsCount += deletedProps;
-                stats.deletedUnmergedBCCount += deletedUnmergedBCSet.size();
+                if (!isDetailedGCDryRun) {
+                    // only delete these in case it is not a dryRun
+                    List<NodeDocument> oldDocs = ds.findAndUpdate(NODES, 
updateOpList);
+                    int deletedProps = 
oldDocs.stream().filter(Objects::nonNull).mapToInt(d -> 
deletedPropsCountMap.getOrDefault(d.getId(), 0)).sum();
+                    int updatedDocs = (int) 
oldDocs.stream().filter(Objects::nonNull).count();
+                    stats.updatedDetailedGCDocsCount += updatedDocs;
+                    stats.deletedPropsCount += deletedProps;
+                    stats.deletedUnmergedBCCount += 
deletedUnmergedBCSet.size();
+
+                    if (log.isDebugEnabled()) {
+                        log.debug("Updated [{}] documents, deleted [{}] 
properties, deleted [{}] unmergedBranchCommits",
+                                updatedDocs, deletedProps, 
deletedUnmergedBCSet.size());
+                    }
 
-                if (log.isDebugEnabled()) {
-                    log.debug("Updated [{}] documents, deleted [{}] 
properties, deleted [{}] unmergedBranchCommits",
-                            updatedDocs, deletedProps, 
deletedUnmergedBCSet.size());
+                    // save stats
+                    detailedGCStats.propertiesDeleted(deletedProps);
+                    
detailedGCStats.unmergedBranchCommitsDeleted(deletedUnmergedBCSet.size());
+                    detailedGCStats.documentsUpdated(updatedDocs);
+                    // fix for sonar : converted to long before operation
+                    
detailedGCStats.documentsUpdateSkipped((long)oldDocs.size() - updatedDocs);
                 }
-
-                // save stats
-                detailedGCStats.propertiesDeleted(deletedProps);
-                
detailedGCStats.unmergedBranchCommitsDeleted(deletedUnmergedBCSet.size());
-                detailedGCStats.documentsUpdated(updatedDocs);
-                // fix for sonar : converted to long before operation
-                detailedGCStats.documentsUpdateSkipped((long)oldDocs.size() - 
updatedDocs);
             } finally {
                 // now reset delete metadata
                 updateOpList.clear();
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCQueryTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCQueryTest.java
index 27333b57cf..ab3cfd30f3 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCQueryTest.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCQueryTest.java
@@ -107,7 +107,7 @@ public class VersionGCQueryTest {
         clock.waitUntil(clock.getTime() + TimeUnit.HOURS.toMillis(1));
 
         VersionGarbageCollector gc = new VersionGarbageCollector(
-                ns, new VersionGCSupport(store), false);
+                ns, new VersionGCSupport(store), false, false);
         prevDocIds.clear();
         VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
         assertEquals(11, stats.deletedDocGCCount);
@@ -140,7 +140,7 @@ public class VersionGCQueryTest {
         clock.waitUntil(clock.getTime() + TimeUnit.HOURS.toMillis(1));
 
         VersionGarbageCollector gc = new VersionGarbageCollector(
-                ns, new VersionGCSupport(store), false);
+                ns, new VersionGCSupport(store), false, false);
         prevDocIds.clear();
         VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
         assertEquals(1, stats.deletedDocGCCount);
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
index 39ca136c8d..14b871a7da 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
@@ -408,7 +408,7 @@ public class VersionGCTest {
                 deletedOnceCountCalls.incrementAndGet();
                 return Iterables.size(Utils.getSelectedDocuments(store, 
NodeDocument.DELETED_ONCE, 1));
             }
-        }, false);
+        }, false, false);
 
         // run first RGC
         gc.gc(1, TimeUnit.HOURS);
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index d8ff6acf88..2457fd445f 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -94,8 +94,6 @@ import org.apache.jackrabbit.oak.api.Type;
 import 
org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.RDBFixture;
 import 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
 import 
org.apache.jackrabbit.oak.plugins.document.bundlor.BundlingConfigInitializer;
-import 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
-import 
org.apache.jackrabbit.oak.plugins.document.bundlor.BundlingConfigInitializer;
 import org.apache.jackrabbit.oak.plugins.document.mongo.MongoTestUtils;
 import org.apache.jackrabbit.oak.plugins.document.rdb.RDBOptions;
 import org.apache.jackrabbit.oak.plugins.document.util.Utils;
@@ -108,7 +106,6 @@ import org.apache.jackrabbit.oak.stats.Clock;
 import org.jetbrains.annotations.NotNull;
 import org.junit.After;
 import org.junit.Before;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -935,7 +932,7 @@ public class VersionGarbageCollectorIT {
             }
         };
 
-        VersionGarbageCollector gc = new VersionGarbageCollector(store, 
gcSupport, true);
+        VersionGarbageCollector gc = new VersionGarbageCollector(store, 
gcSupport, true, false);
         stats = gc.gc(maxAge*2, HOURS);
         assertEquals(0, stats.updatedDetailedGCDocsCount);
         assertEquals(0, stats.deletedPropsCount);
@@ -1001,7 +998,7 @@ public class VersionGarbageCollectorIT {
             }
         };
 
-        gcRef.set(new VersionGarbageCollector(store, gcSupport, true));
+        gcRef.set(new VersionGarbageCollector(store, gcSupport, true, false));
 
         //3. Check that deleted property does get collected post maxAge
         clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
@@ -1610,7 +1607,7 @@ public class VersionGarbageCollectorIT {
                         });
             }
         };
-        final VersionGarbageCollector gc = new VersionGarbageCollector(store, 
gcSupport, false);
+        final VersionGarbageCollector gc = new VersionGarbageCollector(store, 
gcSupport, false, false);
         // start GC -> will try to remove /foo and /bar
         Future<VersionGCStats> f = execService.submit(new 
Callable<VersionGCStats>() {
             @Override
@@ -1771,7 +1768,7 @@ public class VersionGarbageCollectorIT {
                 return super.getPossiblyDeletedDocs(fromModified, toModified);
             }
         };
-        gcRef.set(new VersionGarbageCollector(store, gcSupport, false));
+        gcRef.set(new VersionGarbageCollector(store, gcSupport, false, false));
         VersionGCStats stats = gcRef.get().gc(30, TimeUnit.MINUTES);
         assertTrue(stats.canceled);
         assertEquals(0, stats.deletedDocGCCount);
@@ -1823,7 +1820,7 @@ public class VersionGarbageCollectorIT {
                 return super.getPossiblyDeletedDocs(prevLastModifiedTime, 
lastModifiedTime).iterator();
             }
         };
-        gcRef.set(new VersionGarbageCollector(store, gcSupport, false));
+        gcRef.set(new VersionGarbageCollector(store, gcSupport, false, false));
         VersionGCStats stats = gcRef.get().gc(30, TimeUnit.MINUTES);
         assertTrue(stats.canceled);
         assertEquals(0, stats.deletedDocGCCount);
@@ -1852,7 +1849,7 @@ public class VersionGarbageCollectorIT {
                         });
             }
         };
-        final VersionGarbageCollector gc = new VersionGarbageCollector(store, 
nonReportingGcSupport, false);
+        final VersionGarbageCollector gc = new VersionGarbageCollector(store, 
nonReportingGcSupport, false, false);
         final long maxAgeHours = 1;
         final long clockDelta = HOURS.toMillis(maxAgeHours) + 
MINUTES.toMillis(5);
 

Reply via email to