This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 613708a52bf95be7029f3b1a9ce91bd26467bbb6
Author: Rishabh Kumar <[email protected]>
AuthorDate: Mon Jun 19 13:45:20 2023 +0530

    OAK-10199 : updated logic to fetch nodes by sorting them on the basis of 
_modified & _id
---
 .../plugins/document/VersionGCRecommendations.java |  4 --
 .../oak/plugins/document/VersionGCSupport.java     |  2 +-
 .../plugins/document/VersionGarbageCollector.java  | 50 ----------------------
 .../oak/plugins/document/rdb/RDBDocumentStore.java | 16 +++++--
 .../plugins/document/rdb/RDBDocumentStoreJDBC.java | 16 ++++---
 .../plugins/document/rdb/RDBVersionGCSupport.java  | 41 +++++++++---------
 6 files changed, 44 insertions(+), 85 deletions(-)

diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index c8f02bf7d7..0115b07d76 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -23,7 +23,6 @@ import java.util.Map;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 
-import com.google.common.collect.ImmutableMap;
 import 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
 import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval;
 import org.apache.jackrabbit.oak.spi.gc.GCMonitor;
@@ -46,9 +45,6 @@ import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector
 import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP;
 import static 
org.apache.jackrabbit.oak.plugins.document.util.Utils.timestampToString;
 
-import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP;
-import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP;
-
 /**
  * Gives a recommendation about parameters for the next revision garbage 
collection run.
  */
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index b76803ce23..705ebc9a3c 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -59,7 +59,7 @@ public class VersionGCSupport {
 
     /**
      * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} 
value
-     * within the given range and the {@link NodeDocument#DELETED} set to
+     * within the given range and the {@link NodeDocument#  DELETED} set to
      * {@code true}. The two passed modified timestamps are in milliseconds
      * since the epoch and the implementation will convert them to seconds at
      * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 7f04441bcc..08b05aea91 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -34,7 +34,6 @@ import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
-import java.util.stream.Collectors;
 
 import org.apache.jackrabbit.guava.common.base.Function;
 import org.apache.jackrabbit.guava.common.base.Joiner;
@@ -1315,55 +1314,6 @@ public class VersionGarbageCollector {
             }
         }
     }
-    private void delayOnModifications(final long durationMs, final 
AtomicBoolean cancel) {
-        long delayMs = round(durationMs * options.delayFactor);
-        if (!cancel.get() && delayMs > 0) {
-            try {
-                Clock clock = nodeStore.getClock();
-                clock.waitUntil(clock.getTime() + delayMs);
-            }
-            catch (InterruptedException ex) {
-                /* ignore */
-            }
-        }
-
-        public void removeGarbage(final VersionGCStats stats) {
-
-            if (updateOpList.isEmpty()) {
-                if (log.isDebugEnabled()) {
-                    log.debug("Skipping removal of detailed garbage, cause no 
garbage detected");
-                }
-                return;
-            }
-
-            int updatedDocs;
-
-            monitor.info("Proceeding to update [{}] documents", 
updateOpList.size());
-
-            if (log.isDebugEnabled()) {
-                String collect = 
updateOpList.stream().map(UpdateOp::getId).collect(Collectors.joining(","));
-                log.trace("Performing batch update of documents with following 
id's. \n" + collect);
-            }
-
-            if (cancel.get()) {
-                log.info("Aborting the removal of detailed garbage since RGC 
had been cancelled");
-                return;
-            }
-
-            timer.reset().start();
-            try {
-                // TODO create an api to bulk update findAndUpdate Ops
-                updatedDocs = (int) updateOpList.stream().map(op -> 
ds.findAndUpdate(NODES, op)).filter(Objects::nonNull).count();
-                stats.updatedDetailedGCDocsCount += updatedDocs;
-                log.info("Updated [{}] documents", updatedDocs);
-                // now reset delete metadata
-                updateOpList.clear();
-                garbageDocsCount = 0;
-            } finally {
-                delayOnModifications(timer.stop().elapsed(MILLISECONDS), 
cancel);
-            }
-        }
-    }
     private void delayOnModifications(final long durationMs, final 
AtomicBoolean cancel) {
         long delayMs = round(durationMs * options.delayFactor);
         if (!cancel.get() && delayMs > 0) {
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
index 82c09e213d..3a9ef2d95d 100755
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
@@ -971,8 +971,8 @@ public class RDBDocumentStore implements DocumentStore {
     public static String VERSIONPROP = "__version";
 
     // set of supported indexed properties
-    private static final Set<String> INDEXEDPROPERTIES = new 
HashSet<String>(Arrays.asList(new String[] { MODIFIED,
-            NodeDocument.HAS_BINARY_FLAG, NodeDocument.DELETED_ONCE, 
NodeDocument.SD_TYPE, NodeDocument.SD_MAX_REV_TIME_IN_SECS, VERSIONPROP }));
+    private static final Set<String> INDEXEDPROPERTIES = new 
HashSet<>(Arrays.asList(MODIFIED,
+            NodeDocument.HAS_BINARY_FLAG, NodeDocument.DELETED_ONCE, 
NodeDocument.SD_TYPE, NodeDocument.SD_MAX_REV_TIME_IN_SECS, VERSIONPROP, ID));
 
     // set of required table columns
     private static final Set<String> REQUIREDCOLUMNS = 
Collections.unmodifiableSet(new HashSet<String>(Arrays.asList(
@@ -1840,7 +1840,7 @@ public class RDBDocumentStore implements DocumentStore {
     }
 
     protected <T extends Document> Iterable<T> queryAsIterable(final 
Collection<T> collection, String fromKey, String toKey,
-            final List<String> excludeKeyPatterns, final List<QueryCondition> 
conditions, final int limit, final String sortBy) {
+            final List<String> excludeKeyPatterns, final List<QueryCondition> 
conditions, final int limit, final List<String> sortBy) {
 
         final RDBTableMetaData tmd = getTable(collection);
         Set<String> allowedProps = Sets.intersection(INDEXEDPROPERTIES, 
tmd.getColumnProperties());
@@ -1853,6 +1853,16 @@ public class RDBDocumentStore implements DocumentStore {
             }
         }
 
+        if (sortBy != null && !sortBy.isEmpty()) {
+            for (String key: sortBy) {
+                if (!allowedProps.contains(key)) {
+                    final String message = "indexed property " + key + " not 
supported. supported properties are " + allowedProps;
+                    LOG.error(message);
+                    throw new UnsupportedIndexedPropertyException(message);
+                }
+            }
+        }
+
         final String from = collection == Collection.NODES && 
NodeDocument.MIN_ID_VALUE.equals(fromKey) ? null : fromKey;
         final String to = collection == Collection.NODES && 
NodeDocument.MAX_ID_VALUE.equals(toKey) ? null : toKey;
 
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
index 26fc1311fa..59dfb968b0 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
@@ -16,6 +16,8 @@
  */
 package org.apache.jackrabbit.oak.plugins.document.rdb;
 
+import static java.util.List.of;
+import static java.util.stream.Collectors.joining;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
 import static org.apache.jackrabbit.guava.common.collect.Sets.newHashSet;
 import static 
org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.CHAR2OCTETRATIO;
@@ -459,7 +461,7 @@ public class RDBDocumentStoreJDBC {
                             + excludeKeyPatterns + ", conditions=" + 
conditions + ", limit=" + limit)
                     : null);
             stmt = prepareQuery(connection, tmd, fields, minId,
-                    maxId, excludeKeyPatterns, conditions, limit, "ID");
+                    maxId, excludeKeyPatterns, conditions, limit, of("ID"));
             rs = stmt.executeQuery();
             while (rs.next() && result.size() < limit) {
                 int field = 1;
@@ -554,7 +556,7 @@ public class RDBDocumentStoreJDBC {
 
     @NotNull
     public Iterator<RDBRow> queryAsIterator(RDBConnectionHandler ch, 
RDBTableMetaData tmd, String minId, String maxId,
-            List<String> excludeKeyPatterns, List<QueryCondition> conditions, 
int limit, String sortBy) throws SQLException {
+            List<String> excludeKeyPatterns, List<QueryCondition> conditions, 
int limit, List<String> sortBy) throws SQLException {
         return new ResultSetIterator(ch, tmd, minId, maxId, 
excludeKeyPatterns, conditions, limit, sortBy);
     }
 
@@ -573,7 +575,7 @@ public class RDBDocumentStoreJDBC {
         private long pstart;
 
         public ResultSetIterator(RDBConnectionHandler ch, RDBTableMetaData 
tmd, String minId, String maxId,
-                List<String> excludeKeyPatterns, List<QueryCondition> 
conditions, int limit, String sortBy) throws SQLException {
+                List<String> excludeKeyPatterns, List<QueryCondition> 
conditions, int limit, List<String> sortBy) throws SQLException {
             long start = System.currentTimeMillis();
             try {
                 this.ch = ch;
@@ -695,7 +697,7 @@ public class RDBDocumentStoreJDBC {
 
     @NotNull
     private PreparedStatement prepareQuery(Connection connection, 
RDBTableMetaData tmd, String columns, String minId, String maxId,
-            List<String> excludeKeyPatterns, List<QueryCondition> conditions, 
int limit, String sortBy) throws SQLException {
+            List<String> excludeKeyPatterns, List<QueryCondition> conditions, 
int limit, List<String> sortBy) throws SQLException {
 
         StringBuilder selectClause = new StringBuilder();
 
@@ -714,9 +716,8 @@ public class RDBDocumentStoreJDBC {
             query.append(" where ").append(whereClause);
         }
 
-        if (sortBy != null) {
-            // FIXME : order should be determined via sortBy field
-            query.append(" order by ID");
+        if (sortBy != null && !sortBy.isEmpty()) {
+            query.append(" order by 
").append(sortBy.stream().map(INDEXED_PROP_MAPPING::get).collect(joining(", 
")));
         }
 
         if (limit != Integer.MAX_VALUE) {
@@ -967,6 +968,7 @@ public class RDBDocumentStoreJDBC {
         tmp.put(NodeDocument.SD_TYPE, "SDTYPE");
         tmp.put(NodeDocument.SD_MAX_REV_TIME_IN_SECS, "SDMAXREVTIME");
         tmp.put(RDBDocumentStore.VERSIONPROP, "VERSION");
+        tmp.put(NodeDocument.ID, "ID");
         INDEXED_PROP_MAPPING = Collections.unmodifiableMap(tmp);
     }
 
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index f26268bcd3..0d2f678911 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -16,11 +16,13 @@
  */
 package org.apache.jackrabbit.oak.plugins.document.rdb;
 
-import static java.util.Collections.emptyList;
 import static java.util.List.of;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
 import static 
org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.EMPTY_KEY_PATTERN;
 
@@ -99,18 +101,21 @@ public class RDBVersionGCSupport extends VersionGCSupport {
      * then perform the comparison.
      *
      * @param fromModified the lower bound modified timestamp (inclusive)
-     * @param toModified   the upper bound modified timestamp (exclusive)
-     * @param limit        the limit of documents to return
+     * @param toModified the upper bound modified timestamp (exclusive)
+     * @param limit the limit of documents to return
+     * @param fromId the lower bound {@link NodeDocument#ID} (exclusive)
      * @return matching documents.
      */
     @Override
-    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, 
final long toModified, final int limit) {
+    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, 
final long toModified, final int limit,
+                                                  final String fromId) {
         List<QueryCondition> conditions = of(new 
QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)),
-                new QueryCondition(MODIFIED_IN_SECS, ">=", 
getModifiedInSecs(fromModified)));
+                new QueryCondition(MODIFIED_IN_SECS, ">=", 
getModifiedInSecs(fromModified)),
+                new QueryCondition(ID, ">", of(fromId)));
         if (MODE == 1) {
             return getIterator(EMPTY_KEY_PATTERN, conditions);
         } else {
-            return store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, 
conditions, limit, MODIFIED_IN_SECS);
+            return store.queryAsIterable(NODES, fromId, null, 
EMPTY_KEY_PATTERN, conditions, limit, of(MODIFIED_IN_SECS, ID));
         }
     }
 
@@ -275,24 +280,20 @@ public class RDBVersionGCSupport extends VersionGCSupport 
{
      * @return the timestamp of the oldest modified document.
      */
     @Override
-    public long getOldestModifiedTimestamp(Clock clock) {
-        long modifiedMs = Long.MIN_VALUE;
+    public NodeDocument getOldestModifiedDoc(Clock clock) {
+        NodeDocument doc = NULL;
 
-        LOG.info("getOldestModifiedTimestamp() <- start");
+        LOG.info("getOldestModifiedDoc() <- start");
+        Iterable<NodeDocument> modifiedDocs = null;
         try {
-            long modifiedSec = store.getMinValue(NODES, MODIFIED_IN_SECS, 
null, null, EMPTY_KEY_PATTERN, emptyList());
-            modifiedMs = TimeUnit.SECONDS.toMillis(modifiedSec);
+            modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, 
MIN_ID_VALUE);
+            doc = modifiedDocs.iterator().hasNext() ? 
modifiedDocs.iterator().next() : NULL;
         } catch (DocumentStoreException ex) {
-            LOG.error("getOldestModifiedTimestamp()", ex);
-        }
-
-        if (modifiedMs > 0) {
-            LOG.info("getOldestModifiedTimestamp() -> {}", 
Utils.timestampToString(modifiedMs));
-            return modifiedMs;
-        } else {
-            LOG.info("getOldestModifiedTimestamp() -> none found, return 
current time");
-            return clock.getTime();
+            LOG.error("getOldestModifiedDoc()", ex);
+        } finally {
+            Utils.closeIfCloseable(modifiedDocs);
         }
+        return doc;
     }
 
     @Override

Reply via email to