This is an automated email from the ASF dual-hosted git repository. daim pushed a commit to branch DetailedGC/OAK-10199 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 613708a52bf95be7029f3b1a9ce91bd26467bbb6 Author: Rishabh Kumar <[email protected]> AuthorDate: Mon Jun 19 13:45:20 2023 +0530 OAK-10199 : updated logic to fetch nodes by sorting them on the basis of _modified & _id --- .../plugins/document/VersionGCRecommendations.java | 4 -- .../oak/plugins/document/VersionGCSupport.java | 2 +- .../plugins/document/VersionGarbageCollector.java | 50 ---------------------- .../oak/plugins/document/rdb/RDBDocumentStore.java | 16 +++++-- .../plugins/document/rdb/RDBDocumentStoreJDBC.java | 16 ++++--- .../plugins/document/rdb/RDBVersionGCSupport.java | 41 +++++++++--------- 6 files changed, 44 insertions(+), 85 deletions(-) diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java index c8f02bf7d7..0115b07d76 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java @@ -23,7 +23,6 @@ import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; -import com.google.common.collect.ImmutableMap; import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats; import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval; import org.apache.jackrabbit.oak.spi.gc.GCMonitor; @@ -46,9 +45,6 @@ import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP; import static org.apache.jackrabbit.oak.plugins.document.util.Utils.timestampToString; -import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP; -import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP; - /** * Gives a recommendation about parameters for the next revision garbage collection run. */ diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java index b76803ce23..705ebc9a3c 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java @@ -59,7 +59,7 @@ public class VersionGCSupport { /** * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value - * within the given range and the {@link NodeDocument#DELETED} set to + * within the given range and the {@link NodeDocument# DELETED} set to * {@code true}. The two passed modified timestamps are in milliseconds * since the epoch and the implementation will convert them to seconds at * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java index 7f04441bcc..08b05aea91 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java @@ -34,7 +34,6 @@ import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; -import java.util.stream.Collectors; import org.apache.jackrabbit.guava.common.base.Function; import org.apache.jackrabbit.guava.common.base.Joiner; @@ -1315,55 +1314,6 @@ public class VersionGarbageCollector { } } } - private void delayOnModifications(final long durationMs, final AtomicBoolean cancel) { - long delayMs = round(durationMs * options.delayFactor); - if (!cancel.get() && delayMs > 0) { - try { - Clock clock = nodeStore.getClock(); - clock.waitUntil(clock.getTime() + delayMs); - } - catch (InterruptedException ex) { - /* ignore */ - } - } - - public void removeGarbage(final VersionGCStats stats) { - - if (updateOpList.isEmpty()) { - if (log.isDebugEnabled()) { - log.debug("Skipping removal of detailed garbage, cause no garbage detected"); - } - return; - } - - int updatedDocs; - - monitor.info("Proceeding to update [{}] documents", updateOpList.size()); - - if (log.isDebugEnabled()) { - String collect = updateOpList.stream().map(UpdateOp::getId).collect(Collectors.joining(",")); - log.trace("Performing batch update of documents with following id's. \n" + collect); - } - - if (cancel.get()) { - log.info("Aborting the removal of detailed garbage since RGC had been cancelled"); - return; - } - - timer.reset().start(); - try { - // TODO create an api to bulk update findAndUpdate Ops - updatedDocs = (int) updateOpList.stream().map(op -> ds.findAndUpdate(NODES, op)).filter(Objects::nonNull).count(); - stats.updatedDetailedGCDocsCount += updatedDocs; - log.info("Updated [{}] documents", updatedDocs); - // now reset delete metadata - updateOpList.clear(); - garbageDocsCount = 0; - } finally { - delayOnModifications(timer.stop().elapsed(MILLISECONDS), cancel); - } - } - } private void delayOnModifications(final long durationMs, final AtomicBoolean cancel) { long delayMs = round(durationMs * options.delayFactor); if (!cancel.get() && delayMs > 0) { diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java index 82c09e213d..3a9ef2d95d 100755 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java @@ -971,8 +971,8 @@ public class RDBDocumentStore implements DocumentStore { public static String VERSIONPROP = "__version"; // set of supported indexed properties - private static final Set<String> INDEXEDPROPERTIES = new HashSet<String>(Arrays.asList(new String[] { MODIFIED, - NodeDocument.HAS_BINARY_FLAG, NodeDocument.DELETED_ONCE, NodeDocument.SD_TYPE, NodeDocument.SD_MAX_REV_TIME_IN_SECS, VERSIONPROP })); + private static final Set<String> INDEXEDPROPERTIES = new HashSet<>(Arrays.asList(MODIFIED, + NodeDocument.HAS_BINARY_FLAG, NodeDocument.DELETED_ONCE, NodeDocument.SD_TYPE, NodeDocument.SD_MAX_REV_TIME_IN_SECS, VERSIONPROP, ID)); // set of required table columns private static final Set<String> REQUIREDCOLUMNS = Collections.unmodifiableSet(new HashSet<String>(Arrays.asList( @@ -1840,7 +1840,7 @@ public class RDBDocumentStore implements DocumentStore { } protected <T extends Document> Iterable<T> queryAsIterable(final Collection<T> collection, String fromKey, String toKey, - final List<String> excludeKeyPatterns, final List<QueryCondition> conditions, final int limit, final String sortBy) { + final List<String> excludeKeyPatterns, final List<QueryCondition> conditions, final int limit, final List<String> sortBy) { final RDBTableMetaData tmd = getTable(collection); Set<String> allowedProps = Sets.intersection(INDEXEDPROPERTIES, tmd.getColumnProperties()); @@ -1853,6 +1853,16 @@ public class RDBDocumentStore implements DocumentStore { } } + if (sortBy != null && !sortBy.isEmpty()) { + for (String key: sortBy) { + if (!allowedProps.contains(key)) { + final String message = "indexed property " + key + " not supported. supported properties are " + allowedProps; + LOG.error(message); + throw new UnsupportedIndexedPropertyException(message); + } + } + } + final String from = collection == Collection.NODES && NodeDocument.MIN_ID_VALUE.equals(fromKey) ? null : fromKey; final String to = collection == Collection.NODES && NodeDocument.MAX_ID_VALUE.equals(toKey) ? null : toKey; diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java index 26fc1311fa..59dfb968b0 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java @@ -16,6 +16,8 @@ */ package org.apache.jackrabbit.oak.plugins.document.rdb; +import static java.util.List.of; +import static java.util.stream.Collectors.joining; import static org.apache.jackrabbit.guava.common.collect.Iterables.transform; import static org.apache.jackrabbit.guava.common.collect.Sets.newHashSet; import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.CHAR2OCTETRATIO; @@ -459,7 +461,7 @@ public class RDBDocumentStoreJDBC { + excludeKeyPatterns + ", conditions=" + conditions + ", limit=" + limit) : null); stmt = prepareQuery(connection, tmd, fields, minId, - maxId, excludeKeyPatterns, conditions, limit, "ID"); + maxId, excludeKeyPatterns, conditions, limit, of("ID")); rs = stmt.executeQuery(); while (rs.next() && result.size() < limit) { int field = 1; @@ -554,7 +556,7 @@ public class RDBDocumentStoreJDBC { @NotNull public Iterator<RDBRow> queryAsIterator(RDBConnectionHandler ch, RDBTableMetaData tmd, String minId, String maxId, - List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, String sortBy) throws SQLException { + List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, List<String> sortBy) throws SQLException { return new ResultSetIterator(ch, tmd, minId, maxId, excludeKeyPatterns, conditions, limit, sortBy); } @@ -573,7 +575,7 @@ public class RDBDocumentStoreJDBC { private long pstart; public ResultSetIterator(RDBConnectionHandler ch, RDBTableMetaData tmd, String minId, String maxId, - List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, String sortBy) throws SQLException { + List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, List<String> sortBy) throws SQLException { long start = System.currentTimeMillis(); try { this.ch = ch; @@ -695,7 +697,7 @@ public class RDBDocumentStoreJDBC { @NotNull private PreparedStatement prepareQuery(Connection connection, RDBTableMetaData tmd, String columns, String minId, String maxId, - List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, String sortBy) throws SQLException { + List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, List<String> sortBy) throws SQLException { StringBuilder selectClause = new StringBuilder(); @@ -714,9 +716,8 @@ public class RDBDocumentStoreJDBC { query.append(" where ").append(whereClause); } - if (sortBy != null) { - // FIXME : order should be determined via sortBy field - query.append(" order by ID"); + if (sortBy != null && !sortBy.isEmpty()) { + query.append(" order by ").append(sortBy.stream().map(INDEXED_PROP_MAPPING::get).collect(joining(", "))); } if (limit != Integer.MAX_VALUE) { @@ -967,6 +968,7 @@ public class RDBDocumentStoreJDBC { tmp.put(NodeDocument.SD_TYPE, "SDTYPE"); tmp.put(NodeDocument.SD_MAX_REV_TIME_IN_SECS, "SDMAXREVTIME"); tmp.put(RDBDocumentStore.VERSIONPROP, "VERSION"); + tmp.put(NodeDocument.ID, "ID"); INDEXED_PROP_MAPPING = Collections.unmodifiableMap(tmp); } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java index f26268bcd3..0d2f678911 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java @@ -16,11 +16,13 @@ */ package org.apache.jackrabbit.oak.plugins.document.rdb; -import static java.util.Collections.emptyList; import static java.util.List.of; import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; +import static org.apache.jackrabbit.oak.plugins.document.Document.ID; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs; import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.EMPTY_KEY_PATTERN; @@ -99,18 +101,21 @@ public class RDBVersionGCSupport extends VersionGCSupport { * then perform the comparison. * * @param fromModified the lower bound modified timestamp (inclusive) - * @param toModified the upper bound modified timestamp (exclusive) - * @param limit the limit of documents to return + * @param toModified the upper bound modified timestamp (exclusive) + * @param limit the limit of documents to return + * @param fromId the lower bound {@link NodeDocument#ID} (exclusive) * @return matching documents. */ @Override - public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit) { + public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit, + final String fromId) { List<QueryCondition> conditions = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)), - new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified))); + new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified)), + new QueryCondition(ID, ">", of(fromId))); if (MODE == 1) { return getIterator(EMPTY_KEY_PATTERN, conditions); } else { - return store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, conditions, limit, MODIFIED_IN_SECS); + return store.queryAsIterable(NODES, fromId, null, EMPTY_KEY_PATTERN, conditions, limit, of(MODIFIED_IN_SECS, ID)); } } @@ -275,24 +280,20 @@ public class RDBVersionGCSupport extends VersionGCSupport { * @return the timestamp of the oldest modified document. */ @Override - public long getOldestModifiedTimestamp(Clock clock) { - long modifiedMs = Long.MIN_VALUE; + public NodeDocument getOldestModifiedDoc(Clock clock) { + NodeDocument doc = NULL; - LOG.info("getOldestModifiedTimestamp() <- start"); + LOG.info("getOldestModifiedDoc() <- start"); + Iterable<NodeDocument> modifiedDocs = null; try { - long modifiedSec = store.getMinValue(NODES, MODIFIED_IN_SECS, null, null, EMPTY_KEY_PATTERN, emptyList()); - modifiedMs = TimeUnit.SECONDS.toMillis(modifiedSec); + modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE); + doc = modifiedDocs.iterator().hasNext() ? modifiedDocs.iterator().next() : NULL; } catch (DocumentStoreException ex) { - LOG.error("getOldestModifiedTimestamp()", ex); - } - - if (modifiedMs > 0) { - LOG.info("getOldestModifiedTimestamp() -> {}", Utils.timestampToString(modifiedMs)); - return modifiedMs; - } else { - LOG.info("getOldestModifiedTimestamp() -> none found, return current time"); - return clock.getTime(); + LOG.error("getOldestModifiedDoc()", ex); + } finally { + Utils.closeIfCloseable(modifiedDocs); } + return doc; } @Override
