This is an automated email from the ASF dual-hosted git repository. ngupta pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push: new a316bb3 OAK-9714 | Adding support to be able to build FlatFileStore independe… (#514) a316bb3 is described below commit a316bb31e648eef28e9e7b700b657a08035cb77f Author: nit0906 <nitingupta0...@gmail.com> AuthorDate: Mon Mar 21 18:30:54 2022 +0530 OAK-9714 | Adding support to be able to build FlatFileStore independe… (#514) * OAK-9714 | Adding support to be able to build FlatFileStore independent of the index method in DocumentStoreIndexer --- .../oak/plugins/index/importer/IndexImporter.java | 16 ++++++- .../oak/plugins/index/importer/package-info.java | 2 +- oak-run-commons/pom.xml | 5 +++ .../apache/jackrabbit/oak/index/IndexOptions.java | 6 +++ .../jackrabbit/oak/index/IndexerSupport.java | 2 +- .../indexer/document/DocumentStoreIndexerBase.java | 52 ++++++++++++++++++++-- .../flatfile/FlatFileNodeStoreBuilder.java | 2 +- .../indexer/document/flatfile/FlatFileStore.java | 9 ++++ .../apache/jackrabbit/oak/index/IndexCommand.java | 7 +++ 9 files changed, 93 insertions(+), 8 deletions(-) diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/IndexImporter.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/IndexImporter.java index 37bd81b..fb86df7 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/IndexImporter.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/IndexImporter.java @@ -57,6 +57,11 @@ public class IndexImporter { * Symbolic name use to indicate sync indexes */ static final String ASYNC_LANE_SYNC = "sync"; + /* + * System property name for flag for preserve checkpoint. If this is set to true, then checkpoint cleanup will be skipped. + * Default is set to false. + */ + public static final String OAK_INDEX_IMPORTER_PRESERVE_CHECKPOINT = "oak.index.importer.preserveCheckpoint"; private final Logger log = LoggerFactory.getLogger(getClass()); private final NodeStore nodeStore; @@ -69,6 +74,7 @@ public class IndexImporter { private final IndexEditorProvider indexEditorProvider; private final AsyncIndexerLock indexerLock; private final IndexDefinitionUpdater indexDefinitionUpdater; + private final boolean preserveCheckpoint = Boolean.getBoolean(OAK_INDEX_IMPORTER_PRESERVE_CHECKPOINT); public IndexImporter(NodeStore nodeStore, File indexDir, IndexEditorProvider indexEditorProvider, AsyncIndexerLock indexerLock) throws IOException { @@ -290,8 +296,14 @@ public class IndexImporter { } private void releaseCheckpoint() { - nodeStore.release(indexerInfo.checkpoint); - log.info("Released the referred checkpoint [{}]", indexerInfo.checkpoint); + if (preserveCheckpoint) { + log.info("Preserving the referred checkpoint [{}]. This could have been done in case this checkpoint is needed by a process later on." + + " Please make sure to remove the checkpoint once it's no longer needed.", indexerInfo.checkpoint); + } else { + nodeStore.release(indexerInfo.checkpoint); + log.info("Released the referred checkpoint [{}]", indexerInfo.checkpoint); + } + } private void incrementReIndexCount(NodeBuilder definition) { diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/package-info.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/package-info.java index 9872f08..2ebba64 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/package-info.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/importer/package-info.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -@Version("0.1.0") +@Version("0.2.0") package org.apache.jackrabbit.oak.plugins.index.importer; import org.osgi.annotation.versioning.Version; \ No newline at end of file diff --git a/oak-run-commons/pom.xml b/oak-run-commons/pom.xml index e8e1d77..d19a44f 100644 --- a/oak-run-commons/pom.xml +++ b/oak-run-commons/pom.xml @@ -61,6 +61,11 @@ <artifactId>oak-segment-tar</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.apache.jackrabbit</groupId> + <artifactId>oak-search</artifactId> + <version>${project.version}</version> + </dependency> <dependency> <groupId>org.apache.jackrabbit</groupId> <artifactId>oak-segment-remote</artifactId> diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java index ab25f61..4e3f717 100644 --- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java +++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java @@ -54,6 +54,7 @@ public class IndexOptions implements OptionsBean { private final OptionSpec<Void> importIndex; private final OptionSpec<Void> docTraversal; private final OptionSpec<Void> enableCowCor; + private final OptionSpec<Void> buildFlatFileStoreSeparately; private final OptionSpec<Integer> consistencyCheck; private final OptionSpec<Long> asyncDelay; protected OptionSet options; @@ -108,6 +109,7 @@ public class IndexOptions implements OptionsBean { docTraversal = parser.accepts("doc-traversal-mode", "Use Document traversal mode for reindex in " + "DocumentNodeStore setups. This may provide better performance in some cases (experimental)"); enableCowCor = parser.accepts("enable-cow-cor", "Enables COW/COR during async indexing using oak-run"); + buildFlatFileStoreSeparately = parser.accepts("build-flatfilestore-separately", "Builds FlatFileStore as a separate step and then uses it as part of the doc-traversal-mode for reindexing"); indexImportDir = parser.accepts("index-import-dir", "Directory containing index files. This " + "is required when --index-import operation is selected") @@ -221,6 +223,10 @@ public class IndexOptions implements OptionsBean { return options.has(enableCowCor); } + public boolean buildFlatFileStoreSeparately() { + return options.has(buildFlatFileStoreSeparately); + } + public String getCheckpoint(){ return checkpoint.value(options); } diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexerSupport.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexerSupport.java index ff46f82..88f9fbf 100644 --- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexerSupport.java +++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/IndexerSupport.java @@ -113,7 +113,7 @@ public class IndexerSupport { return checkpointedState; } - private void updateIndexDefinitions(NodeBuilder rootBuilder) throws IOException, CommitFailedException { + public void updateIndexDefinitions(NodeBuilder rootBuilder) throws IOException, CommitFailedException { if (indexDefinitions != null) { new IndexDefinitionUpdater(indexDefinitions).apply(rootBuilder); } diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java index d25d5ef..4dfe938 100644 --- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java +++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java @@ -27,6 +27,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Predicate; import com.codahale.metrics.MetricRegistry; import com.google.common.base.Stopwatch; @@ -51,10 +52,12 @@ import org.apache.jackrabbit.oak.plugins.index.IndexUpdateCallback; import org.apache.jackrabbit.oak.plugins.index.NodeTraversalCallback; import org.apache.jackrabbit.oak.plugins.index.progress.IndexingProgressReporter; import org.apache.jackrabbit.oak.plugins.index.progress.MetricRateEstimator; +import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition; import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore; import org.apache.jackrabbit.oak.plugins.metric.MetricStatisticsProvider; import org.apache.jackrabbit.oak.spi.commit.CommitInfo; import org.apache.jackrabbit.oak.spi.commit.EmptyHook; +import org.apache.jackrabbit.oak.spi.filter.PathFilter; import org.apache.jackrabbit.oak.spi.state.NodeBuilder; import org.apache.jackrabbit.oak.spi.state.NodeState; import org.apache.jackrabbit.oak.spi.state.NodeStateUtils; @@ -64,6 +67,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileNodeStoreBuilder.OAK_INDEXER_SORTED_FILE_PATH; import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME; public abstract class DocumentStoreIndexerBase implements Closeable{ @@ -101,15 +105,23 @@ public abstract class DocumentStoreIndexerBase implements Closeable{ private final MongoDocumentStore documentStore; private final Logger traversalLogger; private final CompositeIndexer indexer; + private final Predicate<String> pathPredicate; + private MongoNodeStateEntryTraverserFactory(RevisionVector rootRevision, DocumentNodeStore documentNodeStore, MongoDocumentStore documentStore, Logger traversalLogger, CompositeIndexer indexer) { + this(rootRevision, documentNodeStore, documentStore, traversalLogger, indexer, null); + } + + private MongoNodeStateEntryTraverserFactory(RevisionVector rootRevision, DocumentNodeStore documentNodeStore, + MongoDocumentStore documentStore, Logger traversalLogger, CompositeIndexer indexer, Predicate<String> pathPredicate) { this.rootRevision = rootRevision; this.documentNodeStore = documentNodeStore; this.documentStore = documentStore; this.traversalLogger = traversalLogger; this.indexer = indexer; + this.pathPredicate = pathPredicate; } @Override @@ -129,11 +141,15 @@ public abstract class DocumentStoreIndexerBase implements Closeable{ } traversalLogger.trace(id); }) - .withPathPredicate(indexer::shouldInclude); + .withPathPredicate((pathPredicate != null) ? pathPredicate : indexer::shouldInclude); } } private FlatFileStore buildFlatFileStore(NodeState checkpointedState, CompositeIndexer indexer) throws IOException { + return buildFlatFileStore(checkpointedState, indexer, null, null); + } + + private FlatFileStore buildFlatFileStore(NodeState checkpointedState, CompositeIndexer indexer, Predicate<String> pathPredicate, Set<String> preferredPathElements) throws IOException { Stopwatch flatFileStoreWatch = Stopwatch.createStarted(); int executionCount = 1; @@ -154,10 +170,10 @@ public abstract class DocumentStoreIndexerBase implements Closeable{ builder = new FlatFileNodeStoreBuilder(indexHelper.getWorkDir(), memoryManager) .withLastModifiedBreakPoints(lastModifiedBreakPoints) .withBlobStore(indexHelper.getGCBlobStore()) - .withPreferredPathElements(indexer.getRelativeIndexedNodeNames()) + .withPreferredPathElements((preferredPathElements != null) ? preferredPathElements : indexer.getRelativeIndexedNodeNames()) .addExistingDataDumpDir(indexerSupport.getExistingDataDumpDir()) .withNodeStateEntryTraverserFactory(new MongoNodeStateEntryTraverserFactory(rootDocumentState.getRootRevision(), - nodeStore, getMongoDocumentStore(), traversalLog, indexer)); + nodeStore, getMongoDocumentStore(), traversalLog, indexer, pathPredicate)); for (File dir : previousDownloadDirs) { builder.addExistingDataDumpDir(dir); } @@ -188,6 +204,36 @@ public abstract class DocumentStoreIndexerBase implements Closeable{ return flatFileStore; } + /** + * + * @return an Instance of FlatFileStore, whose getFlatFileStorePath() method can be used to get the absolute path to this store. + * @throws IOException + * @throws CommitFailedException + */ + public FlatFileStore buildFlatFileStore() throws IOException, CommitFailedException { + NodeState checkpointedState = indexerSupport.retrieveNodeStateForCheckpoint(); + NodeStore copyOnWriteStore = new MemoryNodeStore(checkpointedState); + NodeBuilder builder = copyOnWriteStore.getRoot().builder(); + NodeState root = builder.getNodeState(); + indexerSupport.updateIndexDefinitions(builder); + IndexDefinition.Builder indexDefBuilder = new IndexDefinition.Builder(); + + Set<String> preferredPathElements = new HashSet<>(); + Set<IndexDefinition> indexDefinitions = new HashSet<>(); + + for (String indexPath : indexHelper.getIndexPaths()) { + NodeBuilder idxBuilder = IndexerSupport.childBuilder(builder, indexPath, false); + IndexDefinition indexDf = indexDefBuilder.defn(idxBuilder.getNodeState()).indexPath(indexPath).root(root).build(); + preferredPathElements.addAll(indexDf.getRelativeNodeNames()); + indexDefinitions.add(indexDf); + } + Predicate<String> predicate = s -> indexDefinitions.stream().anyMatch(indexDef -> indexDef.getPathFilter().filter(s) != PathFilter.Result.EXCLUDE); + FlatFileStore flatFileStore = buildFlatFileStore(checkpointedState, null, predicate, preferredPathElements); + log.info("FlatFileStore built at {}. To use this flatFileStore in a reindex step, set System Property-{} with value {}", + flatFileStore.getFlatFileStorePath(), OAK_INDEXER_SORTED_FILE_PATH, flatFileStore.getFlatFileStorePath()); + return flatFileStore; + } + public void reindex() throws CommitFailedException, IOException { IndexingProgressReporter progressReporter = new IndexingProgressReporter(IndexUpdateCallback.NOOP, NodeTraversalCallback.NOOP); diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java index 00c1258..a50deb7 100644 --- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java +++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java @@ -53,7 +53,7 @@ public class FlatFileNodeStoreBuilder { * Allowed values are the values from enum {@link SortStrategyType} */ static final String OAK_INDEXER_SORT_STRATEGY_TYPE = "oak.indexer.sortStrategyType"; - private static final String OAK_INDEXER_SORTED_FILE_PATH = "oak.indexer.sortedFilePath"; + public static final String OAK_INDEXER_SORTED_FILE_PATH = "oak.indexer.sortedFilePath"; /** diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileStore.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileStore.java index 105bfd5..9ec9e68 100644 --- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileStore.java +++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileStore.java @@ -45,11 +45,20 @@ public class FlatFileStore implements Iterable<NodeStateEntry>, Closeable{ public FlatFileStore(BlobStore blobStore, File storeFile, NodeStateEntryReader entryReader, Set<String> preferredPathElements, boolean compressionEnabled) { this.blobStore = blobStore; this.storeFile = storeFile; + if (!(storeFile.exists() && storeFile.isFile() && storeFile.canRead())) { + String msg = String.format("Cannot read store file at [%s]", + storeFile.getAbsolutePath()); + throw new IllegalArgumentException(msg); + } this.entryReader = entryReader; this.preferredPathElements = preferredPathElements; this.compressionEnabled = compressionEnabled; } + public String getFlatFileStorePath() { + return storeFile.getAbsolutePath(); + } + public long getEntryCount() { return entryCount; } diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java index 5277888..c0138ed 100644 --- a/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java +++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java @@ -41,6 +41,7 @@ import org.apache.felix.inventory.Format; import org.apache.jackrabbit.oak.api.CommitFailedException; import org.apache.jackrabbit.oak.index.async.AsyncIndexerLucene; import org.apache.jackrabbit.oak.index.indexer.document.DocumentStoreIndexer; +import org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore; import org.apache.jackrabbit.oak.plugins.index.importer.IndexDefinitionUpdater; import org.apache.jackrabbit.oak.run.cli.CommonOptions; import org.apache.jackrabbit.oak.run.cli.DocumentBuilderCustomizer; @@ -57,6 +58,7 @@ import org.slf4j.LoggerFactory; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static java.util.Collections.emptyMap; +import static org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileNodeStoreBuilder.OAK_INDEXER_SORTED_FILE_PATH; public class IndexCommand implements Command { private static final Logger log = LoggerFactory.getLogger(IndexCommand.class); @@ -228,6 +230,11 @@ public class IndexCommand implements Command { if (opts.getCommonOpts().isMongo() && idxOpts.isDocTraversalMode()) { log.info("Using Document order traversal to perform reindexing"); try (DocumentStoreIndexer indexer = new DocumentStoreIndexer(extendedIndexHelper, indexerSupport)) { + if (idxOpts.buildFlatFileStoreSeparately()) { + FlatFileStore ffs = indexer.buildFlatFileStore(); + String pathToFFS = ffs.getFlatFileStorePath(); + System.setProperty(OAK_INDEXER_SORTED_FILE_PATH, pathToFFS); + } indexer.reindex(); } } else {