This is an automated email from the ASF dual-hosted git repository.

thomasm pushed a commit to branch OAK-10341
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/OAK-10341 by this push:
     new d578e2956c OAK-10341 TreeStore
d578e2956c is described below

commit d578e2956cb6de79cef668b9a4d27299214ad2aa
Author: Thomas Mueller <[email protected]>
AuthorDate: Fri Jul 7 11:47:56 2023 +0200

    OAK-10341 TreeStore
---
 .../apache/jackrabbit/oak/index/IndexOptions.java  |   8 +
 .../indexer/document/DocumentStoreIndexerBase.java |  46 +++++
 .../oak/index/indexer/document/tree/TreeStore.java | 111 ++++++++++-
 .../indexer/document/tree/TreeStoreNodeState.java  | 214 +++++++++++++++++++++
 .../indexer/document/tree/TreeStoreTest.java}      |  25 +--
 .../apache/jackrabbit/oak/index/IndexCommand.java  |  15 +-
 6 files changed, 395 insertions(+), 24 deletions(-)

diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java
index 90e4b8b7a4..e4b21efd13 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java
@@ -56,6 +56,7 @@ public class IndexOptions implements OptionsBean {
     private final OptionSpec<Void> docTraversal;
     private final OptionSpec<Void> enableCowCor;
     private final OptionSpec<Void> buildFlatFileStoreSeparately;
+    private final OptionSpec<Void> useTreeStore;
     private final OptionSpec<Integer> consistencyCheck;
     private final OptionSpec<Long> asyncDelay;
     protected OptionSet options;
@@ -113,6 +114,8 @@ public class IndexOptions implements OptionsBean {
         enableCowCor = parser.accepts("enable-cow-cor", "Enables COW/COR 
during async indexing using oak-run");
         buildFlatFileStoreSeparately = 
parser.accepts("build-flatfilestore-separately", "Builds FlatFileStore as a 
separate step and then uses it as part of the doc-traversal-mode for 
reindexing");
 
+        useTreeStore = parser.accepts("use-tree-store", "Use a pre-built tree 
store");
+
         indexImportDir = parser.accepts("index-import-dir", "Directory 
containing index files. This " +
                 "is required when --index-import operation is selected")
                 .requiredIf(importIndex)
@@ -233,6 +236,10 @@ public class IndexOptions implements OptionsBean {
         return options.has(buildFlatFileStoreSeparately);
     }
 
+    public boolean useTreeStore() {
+        return options.has(useTreeStore);
+    }
+
     public String getCheckpoint(){
         return checkpoint.value(options);
     }
@@ -272,4 +279,5 @@ public class IndexOptions implements OptionsBean {
         }
         return result;
     }
+
 }
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
index df936687ee..a8650c5fbd 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
@@ -30,6 +30,8 @@ import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.DefaultMemoryMa
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileNodeStoreBuilder;
 import org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore;
 import org.apache.jackrabbit.oak.index.indexer.document.flatfile.MemoryManager;
+import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.NodeStateEntryReader;
+import org.apache.jackrabbit.oak.index.indexer.document.tree.TreeStore;
 import org.apache.jackrabbit.oak.plugins.document.Collection;
 import org.apache.jackrabbit.oak.plugins.document.DocumentNodeState;
 import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
@@ -227,6 +229,50 @@ public abstract class DocumentStoreIndexerBase implements 
Closeable {
         return flatFileStore;
     }
 
+    public void reindexUsingTreeStore() throws CommitFailedException, 
IOException {
+        NodeStateEntryReader reader = new 
NodeStateEntryReader(indexHelper.getGCBlobStore());
+        TreeStore treeStore = new TreeStore(new File("target"), reader);
+
+        // TODO this is mostly a copy of reindex()
+
+        IndexingProgressReporter progressReporter =
+                new IndexingProgressReporter(IndexUpdateCallback.NOOP, 
NodeTraversalCallback.NOOP);
+        configureEstimators(progressReporter);
+
+        NodeState checkpointedState = 
indexerSupport.retrieveNodeStateForCheckpoint();
+        NodeStore copyOnWriteStore = new MemoryNodeStore(checkpointedState);
+        indexerSupport.switchIndexLanesAndReindexFlag(copyOnWriteStore);
+
+        NodeBuilder builder = copyOnWriteStore.getRoot().builder();
+        CompositeIndexer indexer = prepareIndexers(copyOnWriteStore, builder, 
progressReporter);
+        if (indexer.isEmpty()) {
+            return;
+        }
+
+        closer.register(indexer);
+
+        progressReporter.reset();
+
+        progressReporter.reindexingTraversalStart("/");
+
+        preIndexOpertaions(indexer.getIndexers());
+
+        Stopwatch indexerWatch = Stopwatch.createStarted();
+
+        for (NodeStateEntry entry : treeStore) {
+            reportDocumentRead(entry.getPath(), progressReporter);
+            indexer.index(entry);
+        }
+
+        progressReporter.reindexingTraversalEnd();
+        progressReporter.logReport();
+        log.info("Completed the indexing in {}", indexerWatch);
+
+        copyOnWriteStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        indexerSupport.postIndexWork(copyOnWriteStore);
+    }
+
     public void reindex() throws CommitFailedException, IOException {
         IndexingProgressReporter progressReporter =
                 new IndexingProgressReporter(IndexUpdateCallback.NOOP, 
NodeTraversalCallback.NOOP);
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
index 750ac749e8..6d63f0e329 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
@@ -19,23 +19,126 @@
 package org.apache.jackrabbit.oak.index.indexer.document.tree;
 
 import java.io.Closeable;
+import java.io.File;
 import java.io.IOException;
 import java.util.Iterator;
+import java.util.Map.Entry;
 
+import org.apache.jackrabbit.oak.commons.PathUtils;
 import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry;
+import 
org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry.NodeStateEntryBuilder;
+import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.NodeStateEntryReader;
+import org.apache.jackrabbit.oak.index.indexer.document.tree.store.Session;
+import org.apache.jackrabbit.oak.index.indexer.document.tree.store.Store;
+import 
org.apache.jackrabbit.oak.index.indexer.document.tree.store.StoreBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
 
 public class TreeStore implements Iterable<NodeStateEntry>, Closeable {
 
+    private final Store store;
+    private final Session session;
+    private final NodeStateEntryReader entryReader;
+
+    public TreeStore(File directory, NodeStateEntryReader entryReader) {
+        this.entryReader = entryReader;
+        this.store = StoreBuilder.build("type=file\n" + "dir=" + 
directory.getAbsolutePath());
+        this.session = new Session(store);
+    }
+
     @Override
     public void close() throws IOException {
-        // TODO
-        
+        session.flush();
+        store.close();
     }
 
     @Override
     public Iterator<NodeStateEntry> iterator() {
-        // TODO
-        return null;
+        Iterator<Entry<String, String>> it = session.iterator();
+        return new Iterator<NodeStateEntry>() {
+
+            NodeStateEntry current;
+
+            {
+                fetch();
+            }
+
+            private void fetch() {
+                while (it.hasNext()) {
+                    Entry<String, String> e = it.next();
+                    if (e.getValue().isEmpty()) {
+                        continue;
+                    }
+                    current = getNodeStateEntry(e.getKey(), e.getValue());
+                }
+                current = null;
+            }
+
+            @Override
+            public boolean hasNext() {
+                return current != null;
+            }
+
+            @Override
+            public NodeStateEntry next() {
+                NodeStateEntry result = current;
+                fetch();
+                return result;
+            }
+
+        };
+    }
+
+    NodeStateEntry getNodeStateEntry(String path) {
+        return new NodeStateEntryBuilder(getNodeState(path), path).build();
+    }
+
+    NodeStateEntry getNodeStateEntry(String path, String value) {
+        return new NodeStateEntryBuilder(getNodeState(path, value), 
path).build();
+    }
+
+    NodeState getNodeState(String path) {
+        String value = session.get(path);
+        if (value == null || value.isEmpty()) {
+            throw new IllegalArgumentException(path);
+        }
+        return getNodeState(path, value);
+    }
+
+    NodeState getNodeState(String path, String value) {
+        String line = path + "|" + value;
+        NodeStateEntry entry = entryReader.read(line);
+        NodeState wrapped = new TreeStoreNodeState(entry.getNodeState(), path, 
this);
+        return wrapped;
+    }
+
+    /**
+     * The child node entry for the given path.
+     *
+     * @param path the path, e.g. /hello/world
+     * @return the child node entry, e.g. /hello<tab>world
+     */
+    public static String toChildNodeEntry(String path) {
+        if (path.equals("/")) {
+            return "\t";
+        }
+        String nodeName = PathUtils.getName(path);
+        String parentPath = PathUtils.getParentPath(path);
+        return parentPath + "\t" + nodeName;
+    }
+
+    /**
+     * The child node entry for the given parent and child.
+     *
+     * @param path the parentPath, e.g. /hello
+     * @param childName the name of the child node, e.g. world
+     * @return the child node entry, e.g. /hello<tab>world
+     */
+    public static String toChildNodeEntry(String parentPath, String childName) 
{
+        return parentPath + "\t" + childName;
+    }
+
+    public Session getSession() {
+        return session;
     }
 
 }
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreNodeState.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreNodeState.java
new file mode 100644
index 0000000000..b6afccffdb
--- /dev/null
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreNodeState.java
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.index.indexer.document.tree;
+
+import static org.apache.jackrabbit.guava.common.collect.Iterators.transform;
+
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry;
+import org.apache.jackrabbit.oak.plugins.memory.MemoryChildNodeEntry;
+import org.apache.jackrabbit.oak.spi.state.AbstractNodeState;
+import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStateDiff;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+public class TreeStoreNodeState implements NodeState {
+    private final NodeState delegate;
+    private final String path;
+    private final TreeStore treeStore;
+
+    public TreeStoreNodeState(NodeState delegate, String path, TreeStore 
treeStore) {
+        this.delegate = delegate;
+        this.path = path;
+        this.treeStore = treeStore;
+    }
+
+    @Override
+    public boolean exists() {
+        return delegate.exists();
+    }
+
+    @Override
+    public boolean hasProperty(@NotNull String name) {
+        return delegate.hasProperty(name);
+    }
+
+    @Nullable
+    @Override
+    public PropertyState getProperty(@NotNull String name) {
+        return delegate.getProperty(name);
+    }
+
+    @Override
+    public boolean getBoolean(@NotNull String name) {
+        return delegate.getBoolean(name);
+    }
+
+    @Override
+    public long getLong(String name) {
+        return delegate.getLong(name);
+    }
+
+    @Nullable
+    @Override
+    public String getString(String name) {
+        return delegate.getString(name);
+    }
+
+    @NotNull
+    @Override
+    public Iterable<String> getStrings(@NotNull String name) {
+        return delegate.getStrings(name);
+    }
+
+    @Nullable
+    @Override
+    public String getName(@NotNull String name) {
+        return delegate.getName(name);
+    }
+
+    @NotNull
+    @Override
+    public Iterable<String> getNames(@NotNull String name) {
+        return delegate.getNames(name);
+    }
+
+    @Override
+    public long getPropertyCount() {
+        return delegate.getPropertyCount();
+    }
+
+    @NotNull
+    @Override
+    public Iterable<? extends PropertyState> getProperties() {
+        return delegate.getProperties();
+    }
+
+    @NotNull
+    @Override
+    public NodeBuilder builder() {
+        return delegate.builder();
+    }
+
+    @Override
+    public boolean compareAgainstBaseState(NodeState base, NodeStateDiff diff) 
{
+        return AbstractNodeState.compareAgainstBaseState(this, base, diff);
+    }
+
+    // ~-------------------------------< child node access >
+
+    @Override
+    public boolean hasChildNode(@NotNull String name) {
+        String entry = TreeStore.toChildNodeEntry(path, name);
+        String result = treeStore.getSession().get(entry);
+        return "".equals(result);
+    }
+
+    @NotNull
+    @Override
+    public NodeState getChildNode(@NotNull String name) throws 
IllegalArgumentException {
+        // TODO
+        return null;
+//        String path = PathUtils.concat(path, name);
+    }
+
+    @Override
+    public long getChildNodeCount(long max) {
+        long result = 0;
+        Iterator<String> it = getChildNodeNamesIterator();
+        while (it.hasNext()) {
+            result++;
+            if (result > max) {
+                return Long.MAX_VALUE;
+            }
+            it.next();
+        }
+        return result;
+    }
+
+    @Override
+    public Iterable<String> getChildNodeNames() {
+        return new Iterable<String>() {
+            public Iterator<String> iterator() {
+                return getChildNodeNamesIterator();
+            }
+        };
+    }
+    
+    @NotNull
+    @Override
+    public Iterable<? extends ChildNodeEntry> getChildNodeEntries() {
+        return () -> transform(getChildNodeIterator(), 
+                s -> new MemoryChildNodeEntry(PathUtils.getName(s.getPath()), 
s.getNodeState()));
+    }
+    
+    private Iterator<NodeStateEntry> getChildNodeIterator() {
+        return transform(getChildNodeNamesIterator(), 
+                s -> treeStore.getNodeStateEntry(PathUtils.concat(path, s)));
+    }
+
+    Iterator<String> getChildNodeNamesIterator() {
+        Iterator<Entry<String, String>> it = 
treeStore.getSession().iterator(path);
+        return new Iterator<String>() {
+            String current;
+            {
+                fetch();
+            }
+
+            private void fetch() {
+                if (!it.hasNext()) {
+                    current = null;
+                } else {
+                    Entry<String, String> e = it.next();
+                    if (!e.getValue().isEmpty()) {
+                        current = null;
+                    } else {
+                        String key = e.getKey();
+                        int index = key.lastIndexOf('\t');
+                        if (index < 0) {
+                            throw new IllegalArgumentException(key);
+                        }
+                        current = key.substring(index + 1);
+                    }
+                }
+            }
+
+            public boolean hasNext() {
+                return current != null;
+            }
+
+            public String next() {
+                String result = current;
+                if (result == null) {
+                    throw new IllegalStateException();
+                }
+                fetch();
+                return result;
+            }
+        };        
+    }
+
+}
\ No newline at end of file
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java
similarity index 63%
copy from 
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
copy to 
oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java
index 750ac749e8..c452ca05e7 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
+++ 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java
@@ -18,24 +18,19 @@
  */
 package org.apache.jackrabbit.oak.index.indexer.document.tree;
 
-import java.io.Closeable;
-import java.io.IOException;
-import java.util.Iterator;
+import static org.junit.Assert.assertEquals;
 
-import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry;
+import org.junit.Test;
 
-public class TreeStore implements Iterable<NodeStateEntry>, Closeable {
+public class TreeStoreTest {
 
-    @Override
-    public void close() throws IOException {
-        // TODO
+    @Test
+    public void test() {
+        assertEquals("\t", TreeStore.toChildNodeEntry("/"));
+        assertEquals("/\tabc", TreeStore.toChildNodeEntry("/abc"));
+        assertEquals("/hello\tworld", 
TreeStore.toChildNodeEntry("/hello/world"));
         
+        assertEquals("/\tabc", TreeStore.toChildNodeEntry("/", "abc"));
+        assertEquals("/hello\tworld", TreeStore.toChildNodeEntry("/hello", 
"world"));
     }
-
-    @Override
-    public Iterator<NodeStateEntry> iterator() {
-        // TODO
-        return null;
-    }
-
 }
diff --git 
a/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java 
b/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java
index 55f9aa08ce..b76b032ae1 100644
--- a/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java
+++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java
@@ -31,6 +31,7 @@ import org.apache.jackrabbit.oak.api.CommitFailedException;
 import org.apache.jackrabbit.oak.index.async.AsyncIndexerLucene;
 import org.apache.jackrabbit.oak.index.indexer.document.DocumentStoreIndexer;
 import org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore;
+import org.apache.jackrabbit.oak.index.indexer.document.tree.TreeStore;
 import org.apache.jackrabbit.oak.plugins.index.importer.IndexDefinitionUpdater;
 import org.apache.jackrabbit.oak.run.cli.CommonOptions;
 import org.apache.jackrabbit.oak.run.cli.DocumentBuilderCustomizer;
@@ -243,12 +244,16 @@ public class IndexCommand implements Command {
         if (opts.getCommonOpts().isMongo() && idxOpts.isDocTraversalMode()) {
             log.info("Using Document order traversal to perform reindexing");
             try (DocumentStoreIndexer indexer = new 
DocumentStoreIndexer(extendedIndexHelper, indexerSupport)) {
-                if (idxOpts.buildFlatFileStoreSeparately()) {
-                    FlatFileStore ffs = indexer.buildFlatFileStore();
-                    String pathToFFS = ffs.getFlatFileStorePath();
-                    System.setProperty(OAK_INDEXER_SORTED_FILE_PATH, 
pathToFFS);
+                if (idxOpts.useTreeStore()) {
+                    indexer.reindexUsingTreeStore();
+                } else {
+                    if (idxOpts.buildFlatFileStoreSeparately()) {
+                        FlatFileStore ffs = indexer.buildFlatFileStore();
+                        String pathToFFS = ffs.getFlatFileStorePath();
+                        System.setProperty(OAK_INDEXER_SORTED_FILE_PATH, 
pathToFFS);
+                    }
+                    indexer.reindex();
                 }
-                indexer.reindex();
             }
         } else {
             try (OutOfBandIndexer indexer = new 
OutOfBandIndexer(extendedIndexHelper, indexerSupport)) {

Reply via email to