This is an automated email from the ASF dual-hosted git repository. daim pushed a commit to branch DetailedGC/OAK-10199 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 8f0eb886cc96468308aca293a7a9df2db9c87820 Author: Rishabh Kumar <[email protected]> AuthorDate: Wed Apr 26 21:16:44 2023 +0530 OAK-10199 : override getModifiedDocs() for RDB and added unit cases for deletedProps --- .../oak/plugins/document/NodeDocument.java | 6 ++- .../plugins/document/VersionGarbageCollector.java | 52 +++++++++++++++++++- .../document/mongo/MongoVersionGCSupport.java | 18 +++---- .../plugins/document/rdb/RDBDocumentStoreJDBC.java | 1 + .../plugins/document/rdb/RDBVersionGCSupport.java | 56 ++++++++++++++++++++++ .../oak/plugins/document/VersionGCTest.java | 3 -- 6 files changed, 120 insertions(+), 16 deletions(-) diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java index 7000ba0055..d3dc518a95 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java @@ -32,7 +32,6 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Collectors; import org.apache.jackrabbit.guava.common.base.Function; import org.apache.jackrabbit.guava.common.base.Predicate; @@ -68,7 +67,6 @@ import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; import static org.apache.jackrabbit.guava.common.collect.Iterables.mergeSorted; import static org.apache.jackrabbit.guava.common.collect.Iterables.transform; import static java.util.Objects.requireNonNull; -import static java.util.stream.Collectors.toMap; import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; import static org.apache.jackrabbit.oak.plugins.document.StableRevisionComparator.REVERSE; import static org.apache.jackrabbit.oak.plugins.document.UpdateOp.Key; @@ -1664,12 +1662,16 @@ public final class NodeDocument extends Document { /** * Returns name of all the properties on this document +<<<<<<< HEAD * <p> * Note: property names returned are escaped * * @return Set of all property names (escaped) * @see Utils#unescapePropertyName(String) * @see Utils#escapePropertyName(String) +======= + * @return Set of all property names +>>>>>>> 710308b51a (OAK-10199 : override getModifiedDocs() for RDB and added unit cases for deletedProps) */ @NotNull Set<String> getPropertyNames() { diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java index 8b48bce243..7f04441bcc 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java @@ -31,10 +31,10 @@ import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; -import java.util.SortedMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; import org.apache.jackrabbit.guava.common.base.Function; import org.apache.jackrabbit.guava.common.base.Joiner; @@ -318,7 +318,6 @@ public class VersionGarbageCollector { } public static class VersionGCStats { - public long oldestModifiedGced; boolean ignoredGCDueToCheckPoint; boolean detailedGCDryRunMode; boolean ignoredDetailedGCDueToCheckPoint; @@ -1316,6 +1315,55 @@ public class VersionGarbageCollector { } } } + private void delayOnModifications(final long durationMs, final AtomicBoolean cancel) { + long delayMs = round(durationMs * options.delayFactor); + if (!cancel.get() && delayMs > 0) { + try { + Clock clock = nodeStore.getClock(); + clock.waitUntil(clock.getTime() + delayMs); + } + catch (InterruptedException ex) { + /* ignore */ + } + } + + public void removeGarbage(final VersionGCStats stats) { + + if (updateOpList.isEmpty()) { + if (log.isDebugEnabled()) { + log.debug("Skipping removal of detailed garbage, cause no garbage detected"); + } + return; + } + + int updatedDocs; + + monitor.info("Proceeding to update [{}] documents", updateOpList.size()); + + if (log.isDebugEnabled()) { + String collect = updateOpList.stream().map(UpdateOp::getId).collect(Collectors.joining(",")); + log.trace("Performing batch update of documents with following id's. \n" + collect); + } + + if (cancel.get()) { + log.info("Aborting the removal of detailed garbage since RGC had been cancelled"); + return; + } + + timer.reset().start(); + try { + // TODO create an api to bulk update findAndUpdate Ops + updatedDocs = (int) updateOpList.stream().map(op -> ds.findAndUpdate(NODES, op)).filter(Objects::nonNull).count(); + stats.updatedDetailedGCDocsCount += updatedDocs; + log.info("Updated [{}] documents", updatedDocs); + // now reset delete metadata + updateOpList.clear(); + garbageDocsCount = 0; + } finally { + delayOnModifications(timer.stop().elapsed(MILLISECONDS), cancel); + } + } + } private void delayOnModifications(final long durationMs, final AtomicBoolean cancel) { long delayMs = round(durationMs * options.delayFactor); if (!cancel.get() && delayMs > 0) { diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java index 37724f821c..b9dc2da855 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java @@ -22,7 +22,6 @@ package org.apache.jackrabbit.oak.plugins.document.mongo; import static com.mongodb.client.model.Filters.eq; import static com.mongodb.client.model.Filters.exists; import static com.mongodb.client.model.Filters.gt; -import static com.mongodb.client.model.Filters.gte; import static com.mongodb.client.model.Filters.or; import static java.util.Optional.empty; import static java.util.Optional.ofNullable; @@ -50,6 +49,7 @@ import java.util.List; import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; import java.util.regex.Pattern; import com.mongodb.client.MongoCursor; @@ -120,10 +120,10 @@ public class MongoVersionGCSupport extends VersionGCSupport { @Override public CloseableIterable<NodeDocument> getPossiblyDeletedDocs(final long fromModified, final long toModified) { //_deletedOnce == true && _modified >= fromModified && _modified < toModified - Bson query = and( + Bson query = Filters.and( Filters.eq(DELETED_ONCE, true), - gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), - lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)) + Filters.gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), + Filters.lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)) ); FindIterable<BasicDBObject> cursor = getNodeCollection() .find(query).batchSize(batchSize); @@ -277,9 +277,9 @@ public class MongoVersionGCSupport extends VersionGCSupport { } // OAK-8351: this (last) query only contains SD_TYPE and SD_MAX_REV_TIME_IN_SECS // so mongodb should really use that _sdType_1__sdMaxRevTime_1 index - result.add(and( + result.add(Filters.and( Filters.or(orClauses), - lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(oldestRevTimeStamp)) + Filters.lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(oldestRevTimeStamp)) )); return result; @@ -310,16 +310,16 @@ public class MongoVersionGCSupport extends VersionGCSupport { Bson idPathClause = Filters.or( Filters.regex(ID, Pattern.compile(".*" + idSuffix)), // previous documents with long paths do not have a '-' in the id - and( + Filters.and( Filters.regex(ID, Pattern.compile("[^-]*")), Filters.regex(PATH, Pattern.compile(".*" + idSuffix)) ) ); long minMaxRevTimeInSecs = Math.min(maxRevTimeInSecs, getModifiedInSecs(r.getTimestamp())); - result.add(and( + result.add(Filters.and( Filters.eq(SD_TYPE, DEFAULT_NO_BRANCH.typeCode()), - lt(SD_MAX_REV_TIME_IN_SECS, minMaxRevTimeInSecs), + Filters.lt(SD_MAX_REV_TIME_IN_SECS, minMaxRevTimeInSecs), idPathClause )); } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java index 5caa65d875..26fc1311fa 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java @@ -715,6 +715,7 @@ public class RDBDocumentStoreJDBC { } if (sortBy != null) { + // FIXME : order should be determined via sortBy field query.append(" order by ID"); } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java index a463499793..f26268bcd3 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java @@ -16,7 +16,13 @@ */ package org.apache.jackrabbit.oak.plugins.document.rdb; +import static java.util.Collections.emptyList; +import static java.util.List.of; import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; +import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs; +import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.EMPTY_KEY_PATTERN; import java.io.Closeable; import java.io.IOException; @@ -85,6 +91,29 @@ public class RDBVersionGCSupport extends VersionGCSupport { } } + /** + * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value + * within the given range .The two passed modified timestamps are in milliseconds + * since the epoch and the implementation will convert them to seconds at + * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and + * then perform the comparison. + * + * @param fromModified the lower bound modified timestamp (inclusive) + * @param toModified the upper bound modified timestamp (exclusive) + * @param limit the limit of documents to return + * @return matching documents. + */ + @Override + public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit) { + List<QueryCondition> conditions = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)), + new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified))); + if (MODE == 1) { + return getIterator(EMPTY_KEY_PATTERN, conditions); + } else { + return store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, conditions, limit, MODIFIED_IN_SECS); + } + } + @Override protected Iterable<NodeDocument> identifyGarbage(final Set<SplitDocType> gcTypes, final RevisionVector sweepRevs, final long oldestRevTimeStamp) { @@ -239,6 +268,33 @@ public class RDBVersionGCSupport extends VersionGCSupport { } } + /** + * Retrieve the time of the oldest modified document. + * + * @param clock System Clock + * @return the timestamp of the oldest modified document. + */ + @Override + public long getOldestModifiedTimestamp(Clock clock) { + long modifiedMs = Long.MIN_VALUE; + + LOG.info("getOldestModifiedTimestamp() <- start"); + try { + long modifiedSec = store.getMinValue(NODES, MODIFIED_IN_SECS, null, null, EMPTY_KEY_PATTERN, emptyList()); + modifiedMs = TimeUnit.SECONDS.toMillis(modifiedSec); + } catch (DocumentStoreException ex) { + LOG.error("getOldestModifiedTimestamp()", ex); + } + + if (modifiedMs > 0) { + LOG.info("getOldestModifiedTimestamp() -> {}", Utils.timestampToString(modifiedMs)); + return modifiedMs; + } else { + LOG.info("getOldestModifiedTimestamp() -> none found, return current time"); + return clock.getTime(); + } + } + @Override public long getDeletedOnceCount() { return store.queryCount(Collection.NODES, null, null, RDBDocumentStore.EMPTY_KEY_PATTERN, diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java index 9f816523ef..985aea7078 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java @@ -44,7 +44,6 @@ import org.jetbrains.annotations.NotNull; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; -import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; @@ -477,7 +476,6 @@ public class VersionGCTest { // OAK-10199 @Test - @Ignore public void testDetailGcDocumentRead_disabled() throws Exception { disableDetailGC(gc); VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES); @@ -486,7 +484,6 @@ public class VersionGCTest { } @Test - @Ignore public void testDetailGcDocumentRead_enabled() throws Exception { enableDetailGC(gc); VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
