This is an automated email from the ASF dual-hosted git repository.
thomasm pushed a commit to branch OAK-10341
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/OAK-10341 by this push:
new d578e2956c OAK-10341 TreeStore
d578e2956c is described below
commit d578e2956cb6de79cef668b9a4d27299214ad2aa
Author: Thomas Mueller <[email protected]>
AuthorDate: Fri Jul 7 11:47:56 2023 +0200
OAK-10341 TreeStore
---
.../apache/jackrabbit/oak/index/IndexOptions.java | 8 +
.../indexer/document/DocumentStoreIndexerBase.java | 46 +++++
.../oak/index/indexer/document/tree/TreeStore.java | 111 ++++++++++-
.../indexer/document/tree/TreeStoreNodeState.java | 214 +++++++++++++++++++++
.../indexer/document/tree/TreeStoreTest.java} | 25 +--
.../apache/jackrabbit/oak/index/IndexCommand.java | 15 +-
6 files changed, 395 insertions(+), 24 deletions(-)
diff --git
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java
index 90e4b8b7a4..e4b21efd13 100644
---
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java
+++
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java
@@ -56,6 +56,7 @@ public class IndexOptions implements OptionsBean {
private final OptionSpec<Void> docTraversal;
private final OptionSpec<Void> enableCowCor;
private final OptionSpec<Void> buildFlatFileStoreSeparately;
+ private final OptionSpec<Void> useTreeStore;
private final OptionSpec<Integer> consistencyCheck;
private final OptionSpec<Long> asyncDelay;
protected OptionSet options;
@@ -113,6 +114,8 @@ public class IndexOptions implements OptionsBean {
enableCowCor = parser.accepts("enable-cow-cor", "Enables COW/COR
during async indexing using oak-run");
buildFlatFileStoreSeparately =
parser.accepts("build-flatfilestore-separately", "Builds FlatFileStore as a
separate step and then uses it as part of the doc-traversal-mode for
reindexing");
+ useTreeStore = parser.accepts("use-tree-store", "Use a pre-built tree
store");
+
indexImportDir = parser.accepts("index-import-dir", "Directory
containing index files. This " +
"is required when --index-import operation is selected")
.requiredIf(importIndex)
@@ -233,6 +236,10 @@ public class IndexOptions implements OptionsBean {
return options.has(buildFlatFileStoreSeparately);
}
+ public boolean useTreeStore() {
+ return options.has(useTreeStore);
+ }
+
public String getCheckpoint(){
return checkpoint.value(options);
}
@@ -272,4 +279,5 @@ public class IndexOptions implements OptionsBean {
}
return result;
}
+
}
diff --git
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
index df936687ee..a8650c5fbd 100644
---
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
+++
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
@@ -30,6 +30,8 @@ import
org.apache.jackrabbit.oak.index.indexer.document.flatfile.DefaultMemoryMa
import
org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileNodeStoreBuilder;
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore;
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.MemoryManager;
+import
org.apache.jackrabbit.oak.index.indexer.document.flatfile.NodeStateEntryReader;
+import org.apache.jackrabbit.oak.index.indexer.document.tree.TreeStore;
import org.apache.jackrabbit.oak.plugins.document.Collection;
import org.apache.jackrabbit.oak.plugins.document.DocumentNodeState;
import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
@@ -227,6 +229,50 @@ public abstract class DocumentStoreIndexerBase implements
Closeable {
return flatFileStore;
}
+ public void reindexUsingTreeStore() throws CommitFailedException,
IOException {
+ NodeStateEntryReader reader = new
NodeStateEntryReader(indexHelper.getGCBlobStore());
+ TreeStore treeStore = new TreeStore(new File("target"), reader);
+
+ // TODO this is mostly a copy of reindex()
+
+ IndexingProgressReporter progressReporter =
+ new IndexingProgressReporter(IndexUpdateCallback.NOOP,
NodeTraversalCallback.NOOP);
+ configureEstimators(progressReporter);
+
+ NodeState checkpointedState =
indexerSupport.retrieveNodeStateForCheckpoint();
+ NodeStore copyOnWriteStore = new MemoryNodeStore(checkpointedState);
+ indexerSupport.switchIndexLanesAndReindexFlag(copyOnWriteStore);
+
+ NodeBuilder builder = copyOnWriteStore.getRoot().builder();
+ CompositeIndexer indexer = prepareIndexers(copyOnWriteStore, builder,
progressReporter);
+ if (indexer.isEmpty()) {
+ return;
+ }
+
+ closer.register(indexer);
+
+ progressReporter.reset();
+
+ progressReporter.reindexingTraversalStart("/");
+
+ preIndexOpertaions(indexer.getIndexers());
+
+ Stopwatch indexerWatch = Stopwatch.createStarted();
+
+ for (NodeStateEntry entry : treeStore) {
+ reportDocumentRead(entry.getPath(), progressReporter);
+ indexer.index(entry);
+ }
+
+ progressReporter.reindexingTraversalEnd();
+ progressReporter.logReport();
+ log.info("Completed the indexing in {}", indexerWatch);
+
+ copyOnWriteStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+ indexerSupport.postIndexWork(copyOnWriteStore);
+ }
+
public void reindex() throws CommitFailedException, IOException {
IndexingProgressReporter progressReporter =
new IndexingProgressReporter(IndexUpdateCallback.NOOP,
NodeTraversalCallback.NOOP);
diff --git
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
index 750ac749e8..6d63f0e329 100644
---
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
+++
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
@@ -19,23 +19,126 @@
package org.apache.jackrabbit.oak.index.indexer.document.tree;
import java.io.Closeable;
+import java.io.File;
import java.io.IOException;
import java.util.Iterator;
+import java.util.Map.Entry;
+import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry;
+import
org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry.NodeStateEntryBuilder;
+import
org.apache.jackrabbit.oak.index.indexer.document.flatfile.NodeStateEntryReader;
+import org.apache.jackrabbit.oak.index.indexer.document.tree.store.Session;
+import org.apache.jackrabbit.oak.index.indexer.document.tree.store.Store;
+import
org.apache.jackrabbit.oak.index.indexer.document.tree.store.StoreBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
public class TreeStore implements Iterable<NodeStateEntry>, Closeable {
+ private final Store store;
+ private final Session session;
+ private final NodeStateEntryReader entryReader;
+
+ public TreeStore(File directory, NodeStateEntryReader entryReader) {
+ this.entryReader = entryReader;
+ this.store = StoreBuilder.build("type=file\n" + "dir=" +
directory.getAbsolutePath());
+ this.session = new Session(store);
+ }
+
@Override
public void close() throws IOException {
- // TODO
-
+ session.flush();
+ store.close();
}
@Override
public Iterator<NodeStateEntry> iterator() {
- // TODO
- return null;
+ Iterator<Entry<String, String>> it = session.iterator();
+ return new Iterator<NodeStateEntry>() {
+
+ NodeStateEntry current;
+
+ {
+ fetch();
+ }
+
+ private void fetch() {
+ while (it.hasNext()) {
+ Entry<String, String> e = it.next();
+ if (e.getValue().isEmpty()) {
+ continue;
+ }
+ current = getNodeStateEntry(e.getKey(), e.getValue());
+ }
+ current = null;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return current != null;
+ }
+
+ @Override
+ public NodeStateEntry next() {
+ NodeStateEntry result = current;
+ fetch();
+ return result;
+ }
+
+ };
+ }
+
+ NodeStateEntry getNodeStateEntry(String path) {
+ return new NodeStateEntryBuilder(getNodeState(path), path).build();
+ }
+
+ NodeStateEntry getNodeStateEntry(String path, String value) {
+ return new NodeStateEntryBuilder(getNodeState(path, value),
path).build();
+ }
+
+ NodeState getNodeState(String path) {
+ String value = session.get(path);
+ if (value == null || value.isEmpty()) {
+ throw new IllegalArgumentException(path);
+ }
+ return getNodeState(path, value);
+ }
+
+ NodeState getNodeState(String path, String value) {
+ String line = path + "|" + value;
+ NodeStateEntry entry = entryReader.read(line);
+ NodeState wrapped = new TreeStoreNodeState(entry.getNodeState(), path,
this);
+ return wrapped;
+ }
+
+ /**
+ * The child node entry for the given path.
+ *
+ * @param path the path, e.g. /hello/world
+ * @return the child node entry, e.g. /hello<tab>world
+ */
+ public static String toChildNodeEntry(String path) {
+ if (path.equals("/")) {
+ return "\t";
+ }
+ String nodeName = PathUtils.getName(path);
+ String parentPath = PathUtils.getParentPath(path);
+ return parentPath + "\t" + nodeName;
+ }
+
+ /**
+ * The child node entry for the given parent and child.
+ *
+ * @param path the parentPath, e.g. /hello
+ * @param childName the name of the child node, e.g. world
+ * @return the child node entry, e.g. /hello<tab>world
+ */
+ public static String toChildNodeEntry(String parentPath, String childName)
{
+ return parentPath + "\t" + childName;
+ }
+
+ public Session getSession() {
+ return session;
}
}
diff --git
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreNodeState.java
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreNodeState.java
new file mode 100644
index 0000000000..b6afccffdb
--- /dev/null
+++
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreNodeState.java
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.index.indexer.document.tree;
+
+import static org.apache.jackrabbit.guava.common.collect.Iterators.transform;
+
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry;
+import org.apache.jackrabbit.oak.plugins.memory.MemoryChildNodeEntry;
+import org.apache.jackrabbit.oak.spi.state.AbstractNodeState;
+import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStateDiff;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+public class TreeStoreNodeState implements NodeState {
+ private final NodeState delegate;
+ private final String path;
+ private final TreeStore treeStore;
+
+ public TreeStoreNodeState(NodeState delegate, String path, TreeStore
treeStore) {
+ this.delegate = delegate;
+ this.path = path;
+ this.treeStore = treeStore;
+ }
+
+ @Override
+ public boolean exists() {
+ return delegate.exists();
+ }
+
+ @Override
+ public boolean hasProperty(@NotNull String name) {
+ return delegate.hasProperty(name);
+ }
+
+ @Nullable
+ @Override
+ public PropertyState getProperty(@NotNull String name) {
+ return delegate.getProperty(name);
+ }
+
+ @Override
+ public boolean getBoolean(@NotNull String name) {
+ return delegate.getBoolean(name);
+ }
+
+ @Override
+ public long getLong(String name) {
+ return delegate.getLong(name);
+ }
+
+ @Nullable
+ @Override
+ public String getString(String name) {
+ return delegate.getString(name);
+ }
+
+ @NotNull
+ @Override
+ public Iterable<String> getStrings(@NotNull String name) {
+ return delegate.getStrings(name);
+ }
+
+ @Nullable
+ @Override
+ public String getName(@NotNull String name) {
+ return delegate.getName(name);
+ }
+
+ @NotNull
+ @Override
+ public Iterable<String> getNames(@NotNull String name) {
+ return delegate.getNames(name);
+ }
+
+ @Override
+ public long getPropertyCount() {
+ return delegate.getPropertyCount();
+ }
+
+ @NotNull
+ @Override
+ public Iterable<? extends PropertyState> getProperties() {
+ return delegate.getProperties();
+ }
+
+ @NotNull
+ @Override
+ public NodeBuilder builder() {
+ return delegate.builder();
+ }
+
+ @Override
+ public boolean compareAgainstBaseState(NodeState base, NodeStateDiff diff)
{
+ return AbstractNodeState.compareAgainstBaseState(this, base, diff);
+ }
+
+ // ~-------------------------------< child node access >
+
+ @Override
+ public boolean hasChildNode(@NotNull String name) {
+ String entry = TreeStore.toChildNodeEntry(path, name);
+ String result = treeStore.getSession().get(entry);
+ return "".equals(result);
+ }
+
+ @NotNull
+ @Override
+ public NodeState getChildNode(@NotNull String name) throws
IllegalArgumentException {
+ // TODO
+ return null;
+// String path = PathUtils.concat(path, name);
+ }
+
+ @Override
+ public long getChildNodeCount(long max) {
+ long result = 0;
+ Iterator<String> it = getChildNodeNamesIterator();
+ while (it.hasNext()) {
+ result++;
+ if (result > max) {
+ return Long.MAX_VALUE;
+ }
+ it.next();
+ }
+ return result;
+ }
+
+ @Override
+ public Iterable<String> getChildNodeNames() {
+ return new Iterable<String>() {
+ public Iterator<String> iterator() {
+ return getChildNodeNamesIterator();
+ }
+ };
+ }
+
+ @NotNull
+ @Override
+ public Iterable<? extends ChildNodeEntry> getChildNodeEntries() {
+ return () -> transform(getChildNodeIterator(),
+ s -> new MemoryChildNodeEntry(PathUtils.getName(s.getPath()),
s.getNodeState()));
+ }
+
+ private Iterator<NodeStateEntry> getChildNodeIterator() {
+ return transform(getChildNodeNamesIterator(),
+ s -> treeStore.getNodeStateEntry(PathUtils.concat(path, s)));
+ }
+
+ Iterator<String> getChildNodeNamesIterator() {
+ Iterator<Entry<String, String>> it =
treeStore.getSession().iterator(path);
+ return new Iterator<String>() {
+ String current;
+ {
+ fetch();
+ }
+
+ private void fetch() {
+ if (!it.hasNext()) {
+ current = null;
+ } else {
+ Entry<String, String> e = it.next();
+ if (!e.getValue().isEmpty()) {
+ current = null;
+ } else {
+ String key = e.getKey();
+ int index = key.lastIndexOf('\t');
+ if (index < 0) {
+ throw new IllegalArgumentException(key);
+ }
+ current = key.substring(index + 1);
+ }
+ }
+ }
+
+ public boolean hasNext() {
+ return current != null;
+ }
+
+ public String next() {
+ String result = current;
+ if (result == null) {
+ throw new IllegalStateException();
+ }
+ fetch();
+ return result;
+ }
+ };
+ }
+
+}
\ No newline at end of file
diff --git
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java
similarity index 63%
copy from
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
copy to
oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java
index 750ac749e8..c452ca05e7 100644
---
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
+++
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java
@@ -18,24 +18,19 @@
*/
package org.apache.jackrabbit.oak.index.indexer.document.tree;
-import java.io.Closeable;
-import java.io.IOException;
-import java.util.Iterator;
+import static org.junit.Assert.assertEquals;
-import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry;
+import org.junit.Test;
-public class TreeStore implements Iterable<NodeStateEntry>, Closeable {
+public class TreeStoreTest {
- @Override
- public void close() throws IOException {
- // TODO
+ @Test
+ public void test() {
+ assertEquals("\t", TreeStore.toChildNodeEntry("/"));
+ assertEquals("/\tabc", TreeStore.toChildNodeEntry("/abc"));
+ assertEquals("/hello\tworld",
TreeStore.toChildNodeEntry("/hello/world"));
+ assertEquals("/\tabc", TreeStore.toChildNodeEntry("/", "abc"));
+ assertEquals("/hello\tworld", TreeStore.toChildNodeEntry("/hello",
"world"));
}
-
- @Override
- public Iterator<NodeStateEntry> iterator() {
- // TODO
- return null;
- }
-
}
diff --git
a/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java
b/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java
index 55f9aa08ce..b76b032ae1 100644
--- a/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java
+++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java
@@ -31,6 +31,7 @@ import org.apache.jackrabbit.oak.api.CommitFailedException;
import org.apache.jackrabbit.oak.index.async.AsyncIndexerLucene;
import org.apache.jackrabbit.oak.index.indexer.document.DocumentStoreIndexer;
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore;
+import org.apache.jackrabbit.oak.index.indexer.document.tree.TreeStore;
import org.apache.jackrabbit.oak.plugins.index.importer.IndexDefinitionUpdater;
import org.apache.jackrabbit.oak.run.cli.CommonOptions;
import org.apache.jackrabbit.oak.run.cli.DocumentBuilderCustomizer;
@@ -243,12 +244,16 @@ public class IndexCommand implements Command {
if (opts.getCommonOpts().isMongo() && idxOpts.isDocTraversalMode()) {
log.info("Using Document order traversal to perform reindexing");
try (DocumentStoreIndexer indexer = new
DocumentStoreIndexer(extendedIndexHelper, indexerSupport)) {
- if (idxOpts.buildFlatFileStoreSeparately()) {
- FlatFileStore ffs = indexer.buildFlatFileStore();
- String pathToFFS = ffs.getFlatFileStorePath();
- System.setProperty(OAK_INDEXER_SORTED_FILE_PATH,
pathToFFS);
+ if (idxOpts.useTreeStore()) {
+ indexer.reindexUsingTreeStore();
+ } else {
+ if (idxOpts.buildFlatFileStoreSeparately()) {
+ FlatFileStore ffs = indexer.buildFlatFileStore();
+ String pathToFFS = ffs.getFlatFileStorePath();
+ System.setProperty(OAK_INDEXER_SORTED_FILE_PATH,
pathToFFS);
+ }
+ indexer.reindex();
}
- indexer.reindex();
}
} else {
try (OutOfBandIndexer indexer = new
OutOfBandIndexer(extendedIndexHelper, indexerSupport)) {