This is an automated email from the ASF dual-hosted git repository.

thomasm pushed a commit to branch OAK-10341
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/OAK-10341 by this push:
     new 8154b2b1f5 OAK-10341 TreeStore
8154b2b1f5 is described below

commit 8154b2b1f5e4904f0a439517b6d178bf87d481c0
Author: Thomas Mueller <[email protected]>
AuthorDate: Thu Jul 13 10:55:26 2023 +0200

    OAK-10341 TreeStore
---
 .../document/tree/store/GarbageCollection.java     | 80 ++++++++++++++++++++++
 .../index/indexer/document/tree/store/Session.java | 67 +++++++++++++++---
 2 files changed, 137 insertions(+), 10 deletions(-)

diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/GarbageCollection.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/GarbageCollection.java
new file mode 100644
index 0000000000..69f2776bed
--- /dev/null
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/GarbageCollection.java
@@ -0,0 +1,80 @@
+package org.apache.jackrabbit.oak.index.indexer.document.tree.store;
+
+import java.util.HashSet;
+import java.util.List;
+
+/**
+ * Remove unreferenced files from the store.
+ */
+public class GarbageCollection {
+
+    private final Store store;
+
+    public GarbageCollection(Store store) {
+        this.store = store;
+    }
+
+    /**
+     * Run garbage collection.
+     *
+     * @param rootFiles
+     * @return the result
+     */
+    public GarbageCollectionResult run(List<String> rootFiles) {
+        HashSet<String> used = mark(rootFiles);
+        return sweep(used);
+    }
+
+    HashSet<String> mark(List<String> rootFiles) {
+        HashSet<String> used = new HashSet<>();
+        for(String root : rootFiles) {
+            mark(root, used);
+        }
+        return used;
+    }
+
+    private void mark(String fileName, HashSet<String> used) {
+        used.add(fileName);
+        if (fileName.startsWith(Session.LEAF_PREFIX)) {
+            return;
+        }
+        // root or inner node
+        PageFile file = store.get(fileName);
+        if (file.isInnerNode()) {
+            for (int i = 0; i < file.getValueCount(); i++) {
+                mark(file.getChildValue(i), used);
+            }
+        }
+    }
+
+    private GarbageCollectionResult sweep(HashSet<String> used) {
+        GarbageCollectionResult result = new GarbageCollectionResult();
+        // TODO keep files that were very recently updated
+        // (don't remove files that are part of a concurrent flush)
+        HashSet<String> removeSet = new HashSet<String>();
+        for (String key: new HashSet<>(store.keySet())) {
+            if (!used.contains(key)) {
+                removeSet.add(key);
+                result.countRemoved++;
+            } else {
+                result.countKept++;
+            }
+        }
+        store.remove(removeSet);
+        return result;
+    }
+
+    /**
+     * Garbage collection results.
+     */
+    public static class GarbageCollectionResult {
+        public long sizeInBytes;
+        public long countRemoved;
+        public long countKept;
+
+        public String toString() {
+            return "removed: " + countRemoved + " kept: " + countKept + " 
size: " + sizeInBytes;
+        }
+    }
+
+}
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/Session.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/Session.java
index 3f472406da..783f107287 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/Session.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/Session.java
@@ -17,6 +17,7 @@
 package org.apache.jackrabbit.oak.index.indexer.document.tree.store;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.LinkedHashSet;
 import java.util.List;
@@ -38,11 +39,11 @@ public class Session {
     private static final int DEFAULT_CACHE_SIZE = 128;
     private static final int DEFAULT_MAX_FILE_SIZE = 16 * 1024;
     private static final int DEFAULT_CACHE_SIZE_MB = 16;
-    private static final int DEFAULT_MAX_ROOTS = Integer.MAX_VALUE;
+    private static final int DEFAULT_MAX_ROOTS = 10;
 
-    static final String ROOT_NAME = "root";
+    public static final String ROOT_NAME = "root";
+    public static final String LEAF_PREFIX = "data_";
     static final String INNER_NODE_PREFIX = "node_";
-    static final String LEAF_PREFIX = "data_";
     static final String DELETED = new String("DELETED");
 
     static final boolean MULTI_ROOT = true;
@@ -145,7 +146,7 @@ public class Session {
     private void mergeRootsIfNeeded() {
         List<String> roots = getRootFileNames();
         if (roots.size() > maxRoots) {
-            mergeRoots();
+            mergeRoots(Integer.MAX_VALUE);
         }
     }
 
@@ -438,27 +439,48 @@ public class Session {
     }
 
     /**
-     * Merge all roots.
+     * Merge a number of roots. Merging will create a new root, which contains 
the
+     * data of the previous roots. If there are less roots, this method 
returns without merging.
+     *
+     * @param max the number of roots to merge (Integer.MAX_VALUE for all)
      */
-    public void mergeRoots() {
+    public void mergeRoots(int max) {
+        List<String> list = getRootFileNames();
+        if (list.size() < max) {
+            return;
+        }
         PageFile root = getFile(ROOT_NAME);
         String rootFileCopy = ROOT_NAME + "_" + updateId;
         root = copyPageFile(root);
         root.setModified(true);
         putFile(rootFileCopy, root);
-        Iterator<Entry<String, String>> it = iterator();
+        Iterator<Entry<String, String>> it = iterator(null, max);
         PageFile newRoot = newPageFile(false);
         newRoot.setNextRoot(rootFileCopy);
         putFile(ROOT_NAME, newRoot);
         while(it.hasNext()) {
             Entry<String, String> e = it.next();
             put(e.getKey(), e.getValue());
+            // TODO we can remove files that are processed
         }
         newRoot = getFile(ROOT_NAME);
-        newRoot.setNextRoot(null);
+        if (max < list.size()) {
+            newRoot.setNextRoot(list.get(max));
+        } else {
+            newRoot.setNextRoot(null);
+        }
         flush();
     }
 
+    /**
+     * Get the number of roots.
+     *
+     * @return the number of roots
+     */
+    public int getRootCount() {
+        return getRootFileNames().size();
+    }
+
     /**
      * Make the current tree read-only and switch to a new root.
      * If there are already too many roots, then they will be merged.
@@ -558,11 +580,14 @@ public class Session {
      *                   the beginning
      * @return the result
      */
-
     public Iterator<Entry<String, String>> iterator(String largerThan) {
+        return iterator(largerThan, Integer.MAX_VALUE);
+    }
+
+    private Iterator<Entry<String, String>> iterator(String largerThan, int 
maxRootCount) {
         ArrayList<SortedStream> streams = new ArrayList<>();
         String next = ROOT_NAME;
-        while (true) {
+        for (int i = 0; i < maxRootCount; i++) {
             streams.add(new SortedStream(next, immutableRootIterator(next, 
largerThan)));
             next = getFile(next).getNextRoot();
             if (next == null) {
@@ -818,6 +843,28 @@ public class Session {
         return null;
     }
 
+    // ===============================================================
+    // garbage collection
+
+    public void runGC() {
+        new GarbageCollection(store).run(getRootFileNames());
+    }
+
+    // ===============================================================
+    // info
+
+    public String getInfo() {
+        StringBuilder buff = new StringBuilder();
+        GarbageCollection gc = new GarbageCollection(store);
+        int rootId = 0;
+        for (String r : getRootFileNames()) {
+            buff.append(String.format("root #%d contains %d files (file name 
%s)\n",
+                    rootId, gc.mark(Collections.singletonList(r)).size(), r));
+            rootId++;
+        }
+        return buff.toString();
+    }
+
     // ===============================================================
     // partitioning
 

Reply via email to