This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 8f0eb886cc96468308aca293a7a9df2db9c87820
Author: Rishabh Kumar <[email protected]>
AuthorDate: Wed Apr 26 21:16:44 2023 +0530

    OAK-10199 : override getModifiedDocs() for RDB and added unit cases for 
deletedProps
---
 .../oak/plugins/document/NodeDocument.java         |  6 ++-
 .../plugins/document/VersionGarbageCollector.java  | 52 +++++++++++++++++++-
 .../document/mongo/MongoVersionGCSupport.java      | 18 +++----
 .../plugins/document/rdb/RDBDocumentStoreJDBC.java |  1 +
 .../plugins/document/rdb/RDBVersionGCSupport.java  | 56 ++++++++++++++++++++++
 .../oak/plugins/document/VersionGCTest.java        |  3 --
 6 files changed, 120 insertions(+), 16 deletions(-)

diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
index 7000ba0055..d3dc518a95 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
@@ -32,7 +32,6 @@ import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
-import java.util.stream.Collectors;
 
 import org.apache.jackrabbit.guava.common.base.Function;
 import org.apache.jackrabbit.guava.common.base.Predicate;
@@ -68,7 +67,6 @@ import static 
org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.mergeSorted;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
 import static java.util.Objects.requireNonNull;
-import static java.util.stream.Collectors.toMap;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static 
org.apache.jackrabbit.oak.plugins.document.StableRevisionComparator.REVERSE;
 import static org.apache.jackrabbit.oak.plugins.document.UpdateOp.Key;
@@ -1664,12 +1662,16 @@ public final class NodeDocument extends Document {
 
     /**
      * Returns name of all the properties on this document
+<<<<<<< HEAD
      * <p>
      * Note: property names returned are escaped
      *
      * @return Set of all property names (escaped)
      * @see Utils#unescapePropertyName(String)
      * @see Utils#escapePropertyName(String)
+=======
+     * @return Set of all property names
+>>>>>>> 710308b51a (OAK-10199 : override getModifiedDocs() for RDB and added 
unit cases for deletedProps)
      */
     @NotNull
     Set<String> getPropertyNames() {
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 8b48bce243..7f04441bcc 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -31,10 +31,10 @@ import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
-import java.util.SortedMap;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
 
 import org.apache.jackrabbit.guava.common.base.Function;
 import org.apache.jackrabbit.guava.common.base.Joiner;
@@ -318,7 +318,6 @@ public class VersionGarbageCollector {
     }
 
     public static class VersionGCStats {
-        public long oldestModifiedGced;
         boolean ignoredGCDueToCheckPoint;
         boolean detailedGCDryRunMode;
         boolean ignoredDetailedGCDueToCheckPoint;
@@ -1316,6 +1315,55 @@ public class VersionGarbageCollector {
             }
         }
     }
+    private void delayOnModifications(final long durationMs, final 
AtomicBoolean cancel) {
+        long delayMs = round(durationMs * options.delayFactor);
+        if (!cancel.get() && delayMs > 0) {
+            try {
+                Clock clock = nodeStore.getClock();
+                clock.waitUntil(clock.getTime() + delayMs);
+            }
+            catch (InterruptedException ex) {
+                /* ignore */
+            }
+        }
+
+        public void removeGarbage(final VersionGCStats stats) {
+
+            if (updateOpList.isEmpty()) {
+                if (log.isDebugEnabled()) {
+                    log.debug("Skipping removal of detailed garbage, cause no 
garbage detected");
+                }
+                return;
+            }
+
+            int updatedDocs;
+
+            monitor.info("Proceeding to update [{}] documents", 
updateOpList.size());
+
+            if (log.isDebugEnabled()) {
+                String collect = 
updateOpList.stream().map(UpdateOp::getId).collect(Collectors.joining(","));
+                log.trace("Performing batch update of documents with following 
id's. \n" + collect);
+            }
+
+            if (cancel.get()) {
+                log.info("Aborting the removal of detailed garbage since RGC 
had been cancelled");
+                return;
+            }
+
+            timer.reset().start();
+            try {
+                // TODO create an api to bulk update findAndUpdate Ops
+                updatedDocs = (int) updateOpList.stream().map(op -> 
ds.findAndUpdate(NODES, op)).filter(Objects::nonNull).count();
+                stats.updatedDetailedGCDocsCount += updatedDocs;
+                log.info("Updated [{}] documents", updatedDocs);
+                // now reset delete metadata
+                updateOpList.clear();
+                garbageDocsCount = 0;
+            } finally {
+                delayOnModifications(timer.stop().elapsed(MILLISECONDS), 
cancel);
+            }
+        }
+    }
     private void delayOnModifications(final long durationMs, final 
AtomicBoolean cancel) {
         long delayMs = round(durationMs * options.delayFactor);
         if (!cancel.get() && delayMs > 0) {
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index 37724f821c..b9dc2da855 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -22,7 +22,6 @@ package org.apache.jackrabbit.oak.plugins.document.mongo;
 import static com.mongodb.client.model.Filters.eq;
 import static com.mongodb.client.model.Filters.exists;
 import static com.mongodb.client.model.Filters.gt;
-import static com.mongodb.client.model.Filters.gte;
 import static com.mongodb.client.model.Filters.or;
 import static java.util.Optional.empty;
 import static java.util.Optional.ofNullable;
@@ -50,6 +49,7 @@ import java.util.List;
 import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
+import java.util.function.Consumer;
 import java.util.regex.Pattern;
 
 import com.mongodb.client.MongoCursor;
@@ -120,10 +120,10 @@ public class MongoVersionGCSupport extends 
VersionGCSupport {
     @Override
     public CloseableIterable<NodeDocument> getPossiblyDeletedDocs(final long 
fromModified, final long toModified) {
         //_deletedOnce == true && _modified >= fromModified && _modified < 
toModified
-        Bson query = and(
+        Bson query = Filters.and(
                 Filters.eq(DELETED_ONCE, true),
-                gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
-                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified))
+                Filters.gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
+                Filters.lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified))
         );
         FindIterable<BasicDBObject> cursor = getNodeCollection()
                 .find(query).batchSize(batchSize);
@@ -277,9 +277,9 @@ public class MongoVersionGCSupport extends VersionGCSupport 
{
         }
         // OAK-8351: this (last) query only contains SD_TYPE and 
SD_MAX_REV_TIME_IN_SECS
         // so mongodb should really use that _sdType_1__sdMaxRevTime_1 index
-        result.add(and(
+        result.add(Filters.and(
                 Filters.or(orClauses),
-                lt(SD_MAX_REV_TIME_IN_SECS, 
getModifiedInSecs(oldestRevTimeStamp))
+                Filters.lt(SD_MAX_REV_TIME_IN_SECS, 
getModifiedInSecs(oldestRevTimeStamp))
                 ));
 
         return result;
@@ -310,16 +310,16 @@ public class MongoVersionGCSupport extends 
VersionGCSupport {
             Bson idPathClause = Filters.or(
                     Filters.regex(ID, Pattern.compile(".*" + idSuffix)),
                     // previous documents with long paths do not have a '-' in 
the id
-                    and(
+                    Filters.and(
                             Filters.regex(ID, Pattern.compile("[^-]*")),
                             Filters.regex(PATH, Pattern.compile(".*" + 
idSuffix))
                     )
             );
 
             long minMaxRevTimeInSecs = Math.min(maxRevTimeInSecs, 
getModifiedInSecs(r.getTimestamp()));
-            result.add(and(
+            result.add(Filters.and(
                     Filters.eq(SD_TYPE, DEFAULT_NO_BRANCH.typeCode()),
-                    lt(SD_MAX_REV_TIME_IN_SECS, minMaxRevTimeInSecs),
+                    Filters.lt(SD_MAX_REV_TIME_IN_SECS, minMaxRevTimeInSecs),
                     idPathClause
                     ));
         }
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
index 5caa65d875..26fc1311fa 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
@@ -715,6 +715,7 @@ public class RDBDocumentStoreJDBC {
         }
 
         if (sortBy != null) {
+            // FIXME : order should be determined via sortBy field
             query.append(" order by ID");
         }
 
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index a463499793..f26268bcd3 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -16,7 +16,13 @@
  */
 package org.apache.jackrabbit.oak.plugins.document.rdb;
 
+import static java.util.Collections.emptyList;
+import static java.util.List.of;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
+import static 
org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.EMPTY_KEY_PATTERN;
 
 import java.io.Closeable;
 import java.io.IOException;
@@ -85,6 +91,29 @@ public class RDBVersionGCSupport extends VersionGCSupport {
         }
     }
 
+    /**
+     * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} 
value
+     * within the given range .The two passed modified timestamps are in 
milliseconds
+     * since the epoch and the implementation will convert them to seconds at
+     * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
+     * then perform the comparison.
+     *
+     * @param fromModified the lower bound modified timestamp (inclusive)
+     * @param toModified   the upper bound modified timestamp (exclusive)
+     * @param limit        the limit of documents to return
+     * @return matching documents.
+     */
+    @Override
+    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, 
final long toModified, final int limit) {
+        List<QueryCondition> conditions = of(new 
QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)),
+                new QueryCondition(MODIFIED_IN_SECS, ">=", 
getModifiedInSecs(fromModified)));
+        if (MODE == 1) {
+            return getIterator(EMPTY_KEY_PATTERN, conditions);
+        } else {
+            return store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, 
conditions, limit, MODIFIED_IN_SECS);
+        }
+    }
+
     @Override
     protected Iterable<NodeDocument> identifyGarbage(final Set<SplitDocType> 
gcTypes, final RevisionVector sweepRevs,
             final long oldestRevTimeStamp) {
@@ -239,6 +268,33 @@ public class RDBVersionGCSupport extends VersionGCSupport {
         }
     }
 
+    /**
+     * Retrieve the time of the oldest modified document.
+     *
+     * @param clock System Clock
+     * @return the timestamp of the oldest modified document.
+     */
+    @Override
+    public long getOldestModifiedTimestamp(Clock clock) {
+        long modifiedMs = Long.MIN_VALUE;
+
+        LOG.info("getOldestModifiedTimestamp() <- start");
+        try {
+            long modifiedSec = store.getMinValue(NODES, MODIFIED_IN_SECS, 
null, null, EMPTY_KEY_PATTERN, emptyList());
+            modifiedMs = TimeUnit.SECONDS.toMillis(modifiedSec);
+        } catch (DocumentStoreException ex) {
+            LOG.error("getOldestModifiedTimestamp()", ex);
+        }
+
+        if (modifiedMs > 0) {
+            LOG.info("getOldestModifiedTimestamp() -> {}", 
Utils.timestampToString(modifiedMs));
+            return modifiedMs;
+        } else {
+            LOG.info("getOldestModifiedTimestamp() -> none found, return 
current time");
+            return clock.getTime();
+        }
+    }
+
     @Override
     public long getDeletedOnceCount() {
         return store.queryCount(Collection.NODES, null, null, 
RDBDocumentStore.EMPTY_KEY_PATTERN,
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
index 9f816523ef..985aea7078 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
@@ -44,7 +44,6 @@ import org.jetbrains.annotations.NotNull;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
-import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 
@@ -477,7 +476,6 @@ public class VersionGCTest {
 
     // OAK-10199
     @Test
-    @Ignore
     public void testDetailGcDocumentRead_disabled() throws Exception {
         disableDetailGC(gc);
         VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
@@ -486,7 +484,6 @@ public class VersionGCTest {
     }
 
     @Test
-    @Ignore
     public void testDetailGcDocumentRead_enabled() throws Exception {
         enableDetailGC(gc);
         VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);

Reply via email to