This is an automated email from the ASF dual-hosted git repository.
thomasm pushed a commit to branch OAK-10341
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/OAK-10341 by this push:
new 8154b2b1f5 OAK-10341 TreeStore
8154b2b1f5 is described below
commit 8154b2b1f5e4904f0a439517b6d178bf87d481c0
Author: Thomas Mueller <[email protected]>
AuthorDate: Thu Jul 13 10:55:26 2023 +0200
OAK-10341 TreeStore
---
.../document/tree/store/GarbageCollection.java | 80 ++++++++++++++++++++++
.../index/indexer/document/tree/store/Session.java | 67 +++++++++++++++---
2 files changed, 137 insertions(+), 10 deletions(-)
diff --git
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/GarbageCollection.java
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/GarbageCollection.java
new file mode 100644
index 0000000000..69f2776bed
--- /dev/null
+++
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/GarbageCollection.java
@@ -0,0 +1,80 @@
+package org.apache.jackrabbit.oak.index.indexer.document.tree.store;
+
+import java.util.HashSet;
+import java.util.List;
+
+/**
+ * Remove unreferenced files from the store.
+ */
+public class GarbageCollection {
+
+ private final Store store;
+
+ public GarbageCollection(Store store) {
+ this.store = store;
+ }
+
+ /**
+ * Run garbage collection.
+ *
+ * @param rootFiles
+ * @return the result
+ */
+ public GarbageCollectionResult run(List<String> rootFiles) {
+ HashSet<String> used = mark(rootFiles);
+ return sweep(used);
+ }
+
+ HashSet<String> mark(List<String> rootFiles) {
+ HashSet<String> used = new HashSet<>();
+ for(String root : rootFiles) {
+ mark(root, used);
+ }
+ return used;
+ }
+
+ private void mark(String fileName, HashSet<String> used) {
+ used.add(fileName);
+ if (fileName.startsWith(Session.LEAF_PREFIX)) {
+ return;
+ }
+ // root or inner node
+ PageFile file = store.get(fileName);
+ if (file.isInnerNode()) {
+ for (int i = 0; i < file.getValueCount(); i++) {
+ mark(file.getChildValue(i), used);
+ }
+ }
+ }
+
+ private GarbageCollectionResult sweep(HashSet<String> used) {
+ GarbageCollectionResult result = new GarbageCollectionResult();
+ // TODO keep files that were very recently updated
+ // (don't remove files that are part of a concurrent flush)
+ HashSet<String> removeSet = new HashSet<String>();
+ for (String key: new HashSet<>(store.keySet())) {
+ if (!used.contains(key)) {
+ removeSet.add(key);
+ result.countRemoved++;
+ } else {
+ result.countKept++;
+ }
+ }
+ store.remove(removeSet);
+ return result;
+ }
+
+ /**
+ * Garbage collection results.
+ */
+ public static class GarbageCollectionResult {
+ public long sizeInBytes;
+ public long countRemoved;
+ public long countKept;
+
+ public String toString() {
+ return "removed: " + countRemoved + " kept: " + countKept + "
size: " + sizeInBytes;
+ }
+ }
+
+}
diff --git
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/Session.java
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/Session.java
index 3f472406da..783f107287 100644
---
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/Session.java
+++
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/Session.java
@@ -17,6 +17,7 @@
package org.apache.jackrabbit.oak.index.indexer.document.tree.store;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
@@ -38,11 +39,11 @@ public class Session {
private static final int DEFAULT_CACHE_SIZE = 128;
private static final int DEFAULT_MAX_FILE_SIZE = 16 * 1024;
private static final int DEFAULT_CACHE_SIZE_MB = 16;
- private static final int DEFAULT_MAX_ROOTS = Integer.MAX_VALUE;
+ private static final int DEFAULT_MAX_ROOTS = 10;
- static final String ROOT_NAME = "root";
+ public static final String ROOT_NAME = "root";
+ public static final String LEAF_PREFIX = "data_";
static final String INNER_NODE_PREFIX = "node_";
- static final String LEAF_PREFIX = "data_";
static final String DELETED = new String("DELETED");
static final boolean MULTI_ROOT = true;
@@ -145,7 +146,7 @@ public class Session {
private void mergeRootsIfNeeded() {
List<String> roots = getRootFileNames();
if (roots.size() > maxRoots) {
- mergeRoots();
+ mergeRoots(Integer.MAX_VALUE);
}
}
@@ -438,27 +439,48 @@ public class Session {
}
/**
- * Merge all roots.
+ * Merge a number of roots. Merging will create a new root, which contains
the
+ * data of the previous roots. If there are less roots, this method
returns without merging.
+ *
+ * @param max the number of roots to merge (Integer.MAX_VALUE for all)
*/
- public void mergeRoots() {
+ public void mergeRoots(int max) {
+ List<String> list = getRootFileNames();
+ if (list.size() < max) {
+ return;
+ }
PageFile root = getFile(ROOT_NAME);
String rootFileCopy = ROOT_NAME + "_" + updateId;
root = copyPageFile(root);
root.setModified(true);
putFile(rootFileCopy, root);
- Iterator<Entry<String, String>> it = iterator();
+ Iterator<Entry<String, String>> it = iterator(null, max);
PageFile newRoot = newPageFile(false);
newRoot.setNextRoot(rootFileCopy);
putFile(ROOT_NAME, newRoot);
while(it.hasNext()) {
Entry<String, String> e = it.next();
put(e.getKey(), e.getValue());
+ // TODO we can remove files that are processed
}
newRoot = getFile(ROOT_NAME);
- newRoot.setNextRoot(null);
+ if (max < list.size()) {
+ newRoot.setNextRoot(list.get(max));
+ } else {
+ newRoot.setNextRoot(null);
+ }
flush();
}
+ /**
+ * Get the number of roots.
+ *
+ * @return the number of roots
+ */
+ public int getRootCount() {
+ return getRootFileNames().size();
+ }
+
/**
* Make the current tree read-only and switch to a new root.
* If there are already too many roots, then they will be merged.
@@ -558,11 +580,14 @@ public class Session {
* the beginning
* @return the result
*/
-
public Iterator<Entry<String, String>> iterator(String largerThan) {
+ return iterator(largerThan, Integer.MAX_VALUE);
+ }
+
+ private Iterator<Entry<String, String>> iterator(String largerThan, int
maxRootCount) {
ArrayList<SortedStream> streams = new ArrayList<>();
String next = ROOT_NAME;
- while (true) {
+ for (int i = 0; i < maxRootCount; i++) {
streams.add(new SortedStream(next, immutableRootIterator(next,
largerThan)));
next = getFile(next).getNextRoot();
if (next == null) {
@@ -818,6 +843,28 @@ public class Session {
return null;
}
+ // ===============================================================
+ // garbage collection
+
+ public void runGC() {
+ new GarbageCollection(store).run(getRootFileNames());
+ }
+
+ // ===============================================================
+ // info
+
+ public String getInfo() {
+ StringBuilder buff = new StringBuilder();
+ GarbageCollection gc = new GarbageCollection(store);
+ int rootId = 0;
+ for (String r : getRootFileNames()) {
+ buff.append(String.format("root #%d contains %d files (file name
%s)\n",
+ rootId, gc.mark(Collections.singletonList(r)).size(), r));
+ rootId++;
+ }
+ return buff.toString();
+ }
+
// ===============================================================
// partitioning