Author: chetanm Date: Mon Jun 19 10:22:13 2017 New Revision: 1799173 URL: http://svn.apache.org/viewvc?rev=1799173&view=rev Log: OAK-6248 - Enable use of pre extracted text cache
Pre extracted text directory can be specified with `--pre-extracted-text-dir` option Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexHelper.java jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java?rev=1799173&r1=1799172&r2=1799173&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java (original) +++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java Mon Jun 19 10:22:13 2017 @@ -101,6 +101,8 @@ public class IndexCommand implements Com IndexHelper indexHelper = new IndexHelper(store, blobStore, statisticsProvider, indexOpts.getOutDir(), indexOpts.getWorkDir(), indexOpts.getIndexPaths()); + configurePreExtractionSupport(indexOpts, indexHelper); + closer.register(indexHelper); dumpIndexStats(indexOpts, indexHelper); @@ -110,6 +112,14 @@ public class IndexCommand implements Com reindexIndex(indexOpts, indexHelper); } + private void configurePreExtractionSupport(IndexOptions indexOpts, IndexHelper indexHelper) throws IOException { + File preExtractedTextDir = indexOpts.getPreExtractedTextDir(); + if (preExtractedTextDir != null) { + indexHelper.setPreExtractedTextDir(preExtractedTextDir); + log.info("Using pre-extracted text directory {}", getPath(preExtractedTextDir)); + } + } + private void reindexIndex(IndexOptions indexOpts, IndexHelper indexHelper) throws IOException, CommitFailedException { if (!indexOpts.isReindex()){ return; Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexHelper.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexHelper.java?rev=1799173&r1=1799172&r2=1799173&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexHelper.java (original) +++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexHelper.java Mon Jun 19 10:22:13 2017 @@ -35,6 +35,7 @@ import javax.annotation.Nonnull; import com.google.common.collect.ImmutableList; import com.google.common.io.Closer; +import org.apache.commons.io.FileUtils; import org.apache.jackrabbit.oak.commons.concurrent.ExecutorCloser; import org.apache.jackrabbit.oak.plugins.index.AsyncIndexInfoService; import org.apache.jackrabbit.oak.plugins.index.AsyncIndexInfoServiceImpl; @@ -42,8 +43,10 @@ import org.apache.jackrabbit.oak.plugins import org.apache.jackrabbit.oak.plugins.index.IndexInfoServiceImpl; import org.apache.jackrabbit.oak.plugins.index.IndexPathService; import org.apache.jackrabbit.oak.plugins.index.IndexPathServiceImpl; +import org.apache.jackrabbit.oak.plugins.index.datastore.DataStoreTextWriter; import org.apache.jackrabbit.oak.plugins.index.inventory.IndexDefinitionPrinter; import org.apache.jackrabbit.oak.plugins.index.inventory.IndexPrinter; +import org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache; import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexInfoProvider; import org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexInfoProvider; import org.apache.jackrabbit.oak.spi.blob.BlobStore; @@ -68,6 +71,7 @@ class IndexHelper implements Closeable{ private final Closer closer = Closer.create(); private final BlobStore blobStore; private final StatisticsProvider statisticsProvider; + private ExtractedTextCache extractedTextCache; IndexHelper(NodeStore store, BlobStore blobStore, StatisticsProvider statisticsProvider, File outputDir, File workDir, List<String> indexPaths) { @@ -143,6 +147,17 @@ class IndexHelper implements Closeable{ return luceneIndexHelper; } + public ExtractedTextCache getExtractedTextCache() { + if (extractedTextCache == null) { + extractedTextCache = new ExtractedTextCache(FileUtils.ONE_MB * 5, TimeUnit.HOURS.toSeconds(5)); + } + return extractedTextCache; + } + + public void setPreExtractedTextDir(File dir) throws IOException { + getExtractedTextCache().setExtractedTextProvider(new DataStoreTextWriter(dir, true)); + } + @Override public void close() throws IOException { closer.close(); Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java?rev=1799173&r1=1799172&r2=1799173&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java (original) +++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java Mon Jun 19 10:22:13 2017 @@ -39,15 +39,11 @@ import org.apache.jackrabbit.oak.run.cli public class IndexOptions implements OptionsBean { - public static final OptionsBeanFactory FACTORY = new OptionsBeanFactory() { - @Override - public OptionsBean newInstance(OptionParser parser) { - return new IndexOptions(parser); - } - }; + public static final OptionsBeanFactory FACTORY = IndexOptions::new; private final OptionSpec<File> workDirOpt; private final OptionSpec<File> outputDirOpt; + private final OptionSpec<File> preExtractedTextOpt; private final OptionSpec<Void> stats; private final OptionSpec<Void> definitions; private final OptionSpec<Void> dumpIndex; @@ -65,6 +61,8 @@ public class IndexOptions implements Opt .withRequiredArg().ofType(File.class).defaultsTo(new File("temp")); outputDirOpt = parser.accepts("index-out-dir", "Directory used for output files") .withRequiredArg().ofType(File.class).defaultsTo(new File("indexing-result")); + preExtractedTextOpt = parser.accepts("pre-extracted-text-dir", "Directory storing pre extracted text") + .withRequiredArg().ofType(File.class); stats = parser.accepts("index-info", "Collects and dumps various statistics related to the indexes"); definitions = parser.accepts("index-definitions", "Collects and dumps index definitions"); @@ -128,6 +126,10 @@ public class IndexOptions implements Opt return outputDirOpt.value(options); } + public File getPreExtractedTextDir() { + return preExtractedTextOpt.value(options); + } + public boolean dumpStats(){ return options.has(stats) || !anyActionSelected(); } Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java?rev=1799173&r1=1799172&r2=1799173&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java (original) +++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java Mon Jun 19 10:22:13 2017 @@ -22,10 +22,7 @@ package org.apache.jackrabbit.oak.index; import java.io.Closeable; import java.io.File; import java.io.IOException; -import java.util.concurrent.TimeUnit; -import org.apache.commons.io.FileUtils; -import org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache; import org.apache.jackrabbit.oak.plugins.index.lucene.IndexCopier; import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorProvider; import org.apache.jackrabbit.oak.plugins.index.lucene.directory.ActiveDeletedBlobCollectorFactory.BlobDeletionCallback; @@ -35,9 +32,6 @@ import org.apache.jackrabbit.oak.spi.blo class LuceneIndexHelper implements Closeable { private final IndexHelper indexHelper; private IndexCopier indexCopier; - //TODO Set pre extracted text provider - private final ExtractedTextCache textCache = - new ExtractedTextCache(FileUtils.ONE_MB * 5, TimeUnit.HOURS.toSeconds(5)); private DirectoryFactory directoryFactory; LuceneIndexHelper(IndexHelper indexHelper) { @@ -49,7 +43,7 @@ class LuceneIndexHelper implements Close if (directoryFactory != null) { editor = new LuceneIndexEditorProvider( getIndexCopier(), - textCache, + indexHelper.getExtractedTextCache(), null, indexHelper.getMountInfoProvider() ) { @@ -61,7 +55,7 @@ class LuceneIndexHelper implements Close } else { editor = new LuceneIndexEditorProvider( getIndexCopier(), - textCache, + indexHelper.getExtractedTextCache(), null, indexHelper.getMountInfoProvider() );