This is an automated email from the ASF dual-hosted git repository.

ngupta pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/trunk by this push:
     new a316bb3  OAK-9714 | Adding support to be able to build FlatFileStore 
independe… (#514)
a316bb3 is described below

commit a316bb31e648eef28e9e7b700b657a08035cb77f
Author: nit0906 <nitingupta0...@gmail.com>
AuthorDate: Mon Mar 21 18:30:54 2022 +0530

    OAK-9714 | Adding support to be able to build FlatFileStore independe… 
(#514)
    
    * OAK-9714 | Adding support to be able to build FlatFileStore independent 
of the index method in DocumentStoreIndexer
---
 .../oak/plugins/index/importer/IndexImporter.java  | 16 ++++++-
 .../oak/plugins/index/importer/package-info.java   |  2 +-
 oak-run-commons/pom.xml                            |  5 +++
 .../apache/jackrabbit/oak/index/IndexOptions.java  |  6 +++
 .../jackrabbit/oak/index/IndexerSupport.java       |  2 +-
 .../indexer/document/DocumentStoreIndexerBase.java | 52 ++++++++++++++++++++--
 .../flatfile/FlatFileNodeStoreBuilder.java         |  2 +-
 .../indexer/document/flatfile/FlatFileStore.java   |  9 ++++
 .../apache/jackrabbit/oak/index/IndexCommand.java  |  7 +++
 9 files changed, 93 insertions(+), 8 deletions(-)

diff --git 
a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/IndexImporter.java
 
b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/IndexImporter.java
index 37bd81b..fb86df7 100644
--- 
a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/IndexImporter.java
+++ 
b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/IndexImporter.java
@@ -57,6 +57,11 @@ public class IndexImporter {
      * Symbolic name use to indicate sync indexes
      */
     static final String ASYNC_LANE_SYNC = "sync";
+    /*
+    * System property name for flag for preserve checkpoint. If this is set to 
true, then checkpoint cleanup will be skipped.
+    * Default is set to false.
+     */
+    public static final String OAK_INDEX_IMPORTER_PRESERVE_CHECKPOINT = 
"oak.index.importer.preserveCheckpoint";
 
     private final Logger log = LoggerFactory.getLogger(getClass());
     private final NodeStore nodeStore;
@@ -69,6 +74,7 @@ public class IndexImporter {
     private final IndexEditorProvider indexEditorProvider;
     private final AsyncIndexerLock indexerLock;
     private final IndexDefinitionUpdater indexDefinitionUpdater;
+    private final boolean preserveCheckpoint = 
Boolean.getBoolean(OAK_INDEX_IMPORTER_PRESERVE_CHECKPOINT);
 
     public IndexImporter(NodeStore nodeStore, File indexDir, 
IndexEditorProvider indexEditorProvider,
                          AsyncIndexerLock indexerLock) throws IOException {
@@ -290,8 +296,14 @@ public class IndexImporter {
     }
 
     private void releaseCheckpoint() {
-        nodeStore.release(indexerInfo.checkpoint);
-        log.info("Released the referred checkpoint [{}]", 
indexerInfo.checkpoint);
+        if (preserveCheckpoint) {
+            log.info("Preserving the referred checkpoint [{}]. This could have 
been done in case this checkpoint is needed by a process later on." +
+                    " Please make sure to remove the checkpoint once it's no 
longer needed.", indexerInfo.checkpoint);
+        } else {
+            nodeStore.release(indexerInfo.checkpoint);
+            log.info("Released the referred checkpoint [{}]", 
indexerInfo.checkpoint);
+        }
+
     }
 
     private void incrementReIndexCount(NodeBuilder definition) {
diff --git 
a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/package-info.java
 
b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/package-info.java
index 9872f08..2ebba64 100644
--- 
a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/package-info.java
+++ 
b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/package-info.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-@Version("0.1.0")
+@Version("0.2.0")
 package org.apache.jackrabbit.oak.plugins.index.importer;
 
 import org.osgi.annotation.versioning.Version;
\ No newline at end of file
diff --git a/oak-run-commons/pom.xml b/oak-run-commons/pom.xml
index e8e1d77..d19a44f 100644
--- a/oak-run-commons/pom.xml
+++ b/oak-run-commons/pom.xml
@@ -61,6 +61,11 @@
             <artifactId>oak-segment-tar</artifactId>
             <version>${project.version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.jackrabbit</groupId>
+            <artifactId>oak-search</artifactId>
+            <version>${project.version}</version>
+        </dependency>
          <dependency>
             <groupId>org.apache.jackrabbit</groupId>
             <artifactId>oak-segment-remote</artifactId>
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java
index ab25f61..4e3f717 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java
@@ -54,6 +54,7 @@ public class IndexOptions implements OptionsBean {
     private final OptionSpec<Void> importIndex;
     private final OptionSpec<Void> docTraversal;
     private final OptionSpec<Void> enableCowCor;
+    private final OptionSpec<Void> buildFlatFileStoreSeparately;
     private final OptionSpec<Integer> consistencyCheck;
     private final OptionSpec<Long> asyncDelay;
     protected OptionSet options;
@@ -108,6 +109,7 @@ public class IndexOptions implements OptionsBean {
         docTraversal = parser.accepts("doc-traversal-mode", "Use Document 
traversal mode for reindex in " +
                 "DocumentNodeStore setups. This may provide better performance 
in some cases (experimental)");
         enableCowCor = parser.accepts("enable-cow-cor", "Enables COW/COR 
during async indexing using oak-run");
+        buildFlatFileStoreSeparately = 
parser.accepts("build-flatfilestore-separately", "Builds FlatFileStore as a 
separate step and then uses it as part of the doc-traversal-mode for 
reindexing");
 
         indexImportDir = parser.accepts("index-import-dir", "Directory 
containing index files. This " +
                 "is required when --index-import operation is selected")
@@ -221,6 +223,10 @@ public class IndexOptions implements OptionsBean {
         return options.has(enableCowCor);
     }
 
+    public boolean buildFlatFileStoreSeparately() {
+        return options.has(buildFlatFileStoreSeparately);
+    }
+
     public String getCheckpoint(){
         return checkpoint.value(options);
     }
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexerSupport.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexerSupport.java
index ff46f82..88f9fbf 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexerSupport.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexerSupport.java
@@ -113,7 +113,7 @@ public class IndexerSupport {
         return checkpointedState;
     }
 
-    private void updateIndexDefinitions(NodeBuilder rootBuilder) throws 
IOException, CommitFailedException {
+    public void updateIndexDefinitions(NodeBuilder rootBuilder) throws 
IOException, CommitFailedException {
         if (indexDefinitions != null) {
             new IndexDefinitionUpdater(indexDefinitions).apply(rootBuilder);
         }
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
index d25d5ef..4dfe938 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
@@ -27,6 +27,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Predicate;
 
 import com.codahale.metrics.MetricRegistry;
 import com.google.common.base.Stopwatch;
@@ -51,10 +52,12 @@ import 
org.apache.jackrabbit.oak.plugins.index.IndexUpdateCallback;
 import org.apache.jackrabbit.oak.plugins.index.NodeTraversalCallback;
 import 
org.apache.jackrabbit.oak.plugins.index.progress.IndexingProgressReporter;
 import org.apache.jackrabbit.oak.plugins.index.progress.MetricRateEstimator;
+import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
 import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore;
 import org.apache.jackrabbit.oak.plugins.metric.MetricStatisticsProvider;
 import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
 import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
+import org.apache.jackrabbit.oak.spi.filter.PathFilter;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.apache.jackrabbit.oak.spi.state.NodeStateUtils;
@@ -64,6 +67,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import static com.google.common.base.Preconditions.checkNotNull;
+import static 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileNodeStoreBuilder.OAK_INDEXER_SORTED_FILE_PATH;
 import static 
org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME;
 
 public abstract class DocumentStoreIndexerBase implements Closeable{
@@ -101,15 +105,23 @@ public abstract class DocumentStoreIndexerBase implements 
Closeable{
         private final MongoDocumentStore documentStore;
         private final Logger traversalLogger;
         private final CompositeIndexer indexer;
+        private final Predicate<String> pathPredicate;
+
 
         private MongoNodeStateEntryTraverserFactory(RevisionVector 
rootRevision, DocumentNodeStore documentNodeStore,
                                                    MongoDocumentStore 
documentStore, Logger traversalLogger,
                                                    CompositeIndexer indexer) {
+            this(rootRevision, documentNodeStore, documentStore, 
traversalLogger, indexer, null);
+        }
+
+        private MongoNodeStateEntryTraverserFactory(RevisionVector 
rootRevision, DocumentNodeStore documentNodeStore,
+                                                    MongoDocumentStore 
documentStore, Logger traversalLogger, CompositeIndexer indexer, 
Predicate<String> pathPredicate) {
             this.rootRevision = rootRevision;
             this.documentNodeStore = documentNodeStore;
             this.documentStore = documentStore;
             this.traversalLogger = traversalLogger;
             this.indexer = indexer;
+            this.pathPredicate = pathPredicate;
         }
 
         @Override
@@ -129,11 +141,15 @@ public abstract class DocumentStoreIndexerBase implements 
Closeable{
                                 }
                                 traversalLogger.trace(id);
                             })
-                            .withPathPredicate(indexer::shouldInclude);
+                            .withPathPredicate((pathPredicate != null) ? 
pathPredicate : indexer::shouldInclude);
         }
     }
 
     private FlatFileStore buildFlatFileStore(NodeState checkpointedState, 
CompositeIndexer indexer) throws IOException {
+        return buildFlatFileStore(checkpointedState, indexer, null, null);
+    }
+
+    private FlatFileStore buildFlatFileStore(NodeState checkpointedState, 
CompositeIndexer indexer, Predicate<String> pathPredicate, Set<String> 
preferredPathElements) throws IOException {
 
         Stopwatch flatFileStoreWatch = Stopwatch.createStarted();
         int executionCount = 1;
@@ -154,10 +170,10 @@ public abstract class DocumentStoreIndexerBase implements 
Closeable{
                 builder = new 
FlatFileNodeStoreBuilder(indexHelper.getWorkDir(), memoryManager)
                         .withLastModifiedBreakPoints(lastModifiedBreakPoints)
                         .withBlobStore(indexHelper.getGCBlobStore())
-                        
.withPreferredPathElements(indexer.getRelativeIndexedNodeNames())
+                        .withPreferredPathElements((preferredPathElements != 
null) ? preferredPathElements : indexer.getRelativeIndexedNodeNames())
                         
.addExistingDataDumpDir(indexerSupport.getExistingDataDumpDir())
                         .withNodeStateEntryTraverserFactory(new 
MongoNodeStateEntryTraverserFactory(rootDocumentState.getRootRevision(),
-                                nodeStore, getMongoDocumentStore(), 
traversalLog, indexer));
+                                nodeStore, getMongoDocumentStore(), 
traversalLog, indexer, pathPredicate));
                 for (File dir : previousDownloadDirs) {
                     builder.addExistingDataDumpDir(dir);
                 }
@@ -188,6 +204,36 @@ public abstract class DocumentStoreIndexerBase implements 
Closeable{
         return flatFileStore;
     }
 
+    /**
+     *
+     * @return an Instance of FlatFileStore, whose getFlatFileStorePath() 
method can be used to get the absolute path to this store.
+     * @throws IOException
+     * @throws CommitFailedException
+     */
+    public FlatFileStore buildFlatFileStore() throws IOException, 
CommitFailedException {
+        NodeState checkpointedState = 
indexerSupport.retrieveNodeStateForCheckpoint();
+        NodeStore copyOnWriteStore = new MemoryNodeStore(checkpointedState);
+        NodeBuilder builder = copyOnWriteStore.getRoot().builder();
+        NodeState root = builder.getNodeState();
+        indexerSupport.updateIndexDefinitions(builder);
+        IndexDefinition.Builder indexDefBuilder = new 
IndexDefinition.Builder();
+
+        Set<String> preferredPathElements = new HashSet<>();
+        Set<IndexDefinition> indexDefinitions = new HashSet<>();
+
+        for (String indexPath : indexHelper.getIndexPaths()) {
+            NodeBuilder idxBuilder = IndexerSupport.childBuilder(builder, 
indexPath, false);
+            IndexDefinition indexDf = 
indexDefBuilder.defn(idxBuilder.getNodeState()).indexPath(indexPath).root(root).build();
+            preferredPathElements.addAll(indexDf.getRelativeNodeNames());
+            indexDefinitions.add(indexDf);
+        }
+        Predicate<String> predicate = s -> 
indexDefinitions.stream().anyMatch(indexDef -> 
indexDef.getPathFilter().filter(s) != PathFilter.Result.EXCLUDE);
+        FlatFileStore flatFileStore = buildFlatFileStore(checkpointedState, 
null, predicate, preferredPathElements);
+        log.info("FlatFileStore built at {}. To use this flatFileStore in a 
reindex step, set System Property-{} with value {}",
+                flatFileStore.getFlatFileStorePath(), 
OAK_INDEXER_SORTED_FILE_PATH, flatFileStore.getFlatFileStorePath());
+        return flatFileStore;
+    }
+
     public void reindex() throws CommitFailedException, IOException {
         IndexingProgressReporter progressReporter =
                 new IndexingProgressReporter(IndexUpdateCallback.NOOP, 
NodeTraversalCallback.NOOP);
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java
index 00c1258..a50deb7 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java
@@ -53,7 +53,7 @@ public class FlatFileNodeStoreBuilder {
      * Allowed values are the values from enum {@link SortStrategyType}
      */
     static final String OAK_INDEXER_SORT_STRATEGY_TYPE = 
"oak.indexer.sortStrategyType";
-    private static final String OAK_INDEXER_SORTED_FILE_PATH = 
"oak.indexer.sortedFilePath";
+    public static final String OAK_INDEXER_SORTED_FILE_PATH = 
"oak.indexer.sortedFilePath";
 
 
     /**
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileStore.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileStore.java
index 105bfd5..9ec9e68 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileStore.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileStore.java
@@ -45,11 +45,20 @@ public class FlatFileStore implements 
Iterable<NodeStateEntry>, Closeable{
     public FlatFileStore(BlobStore blobStore, File storeFile, 
NodeStateEntryReader entryReader, Set<String> preferredPathElements, boolean 
compressionEnabled) {
         this.blobStore = blobStore;
         this.storeFile = storeFile;
+        if (!(storeFile.exists() && storeFile.isFile() && 
storeFile.canRead())) {
+            String msg = String.format("Cannot read store file at [%s]",
+                    storeFile.getAbsolutePath());
+            throw new IllegalArgumentException(msg);
+        }
         this.entryReader = entryReader;
         this.preferredPathElements = preferredPathElements;
         this.compressionEnabled = compressionEnabled;
     }
 
+    public String getFlatFileStorePath() {
+        return storeFile.getAbsolutePath();
+    }
+
     public long getEntryCount() {
         return entryCount;
     }
diff --git 
a/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java 
b/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java
index 5277888..c0138ed 100644
--- a/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java
+++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java
@@ -41,6 +41,7 @@ import org.apache.felix.inventory.Format;
 import org.apache.jackrabbit.oak.api.CommitFailedException;
 import org.apache.jackrabbit.oak.index.async.AsyncIndexerLucene;
 import org.apache.jackrabbit.oak.index.indexer.document.DocumentStoreIndexer;
+import org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore;
 import org.apache.jackrabbit.oak.plugins.index.importer.IndexDefinitionUpdater;
 import org.apache.jackrabbit.oak.run.cli.CommonOptions;
 import org.apache.jackrabbit.oak.run.cli.DocumentBuilderCustomizer;
@@ -57,6 +58,7 @@ import org.slf4j.LoggerFactory;
 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 import static java.util.Collections.emptyMap;
+import static 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileNodeStoreBuilder.OAK_INDEXER_SORTED_FILE_PATH;
 
 public class IndexCommand implements Command {
     private static final Logger log = 
LoggerFactory.getLogger(IndexCommand.class);
@@ -228,6 +230,11 @@ public class IndexCommand implements Command {
         if (opts.getCommonOpts().isMongo() && idxOpts.isDocTraversalMode()) {
             log.info("Using Document order traversal to perform reindexing");
             try (DocumentStoreIndexer indexer = new 
DocumentStoreIndexer(extendedIndexHelper, indexerSupport)) {
+                if (idxOpts.buildFlatFileStoreSeparately()) {
+                    FlatFileStore ffs = indexer.buildFlatFileStore();
+                    String pathToFFS = ffs.getFlatFileStorePath();
+                    System.setProperty(OAK_INDEXER_SORTED_FILE_PATH, 
pathToFFS);
+                }
                 indexer.reindex();
             }
         } else {

Reply via email to