Author: chetanm Date: Thu Dec 21 04:45:16 2017 New Revision: 1818877 URL: http://svn.apache.org/viewvc?rev=1818877&view=rev Log: OAK-7102 - Refactor DocumentIndexer logic to enable different sort approaches
Refactor the sorting logic to a strategy class Added: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/StoreAndSortStrategy.java - copied, changed from r1818876, jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java?rev=1818877&r1=1818876&r2=1818877&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java (original) +++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java Thu Dec 21 04:45:16 2017 @@ -19,38 +19,32 @@ package org.apache.jackrabbit.oak.index.indexer.document.flatfile; -import java.io.BufferedWriter; import java.io.File; import java.io.IOException; import java.util.Collections; -import com.google.common.base.Stopwatch; import com.google.common.collect.Iterables; import org.apache.commons.io.FileUtils; -import org.apache.jackrabbit.oak.commons.IOUtils; import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry; import org.apache.jackrabbit.oak.spi.blob.BlobStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static com.google.common.base.StandardSystemProperty.LINE_SEPARATOR; import static com.google.common.collect.Iterables.size; public class FlatFileNodeStoreBuilder { private static final String OAK_INDEXER_USE_ZIP = "oak.indexer.useZip"; - private static final String OAK_INDEXER_DELETE_ORIGINAL = "oak.indexer.deleteOriginal"; - private static final String OAK_INDEXER_MAX_SORT_MEMORY_IN_GB = "oak.indexer.maxSortMemoryInGB"; private static final String OAK_INDEXER_SORTED_FILE_PATH = "oak.indexer.sortedFilePath"; private final Logger log = LoggerFactory.getLogger(getClass()); private final Iterable<NodeStateEntry> nodeStates; private final File workDir; private Iterable<String> preferredPathElements = Collections.emptySet(); private BlobStore blobStore; + private PathElementComparator comparator; + private NodeStateEntryWriter entryWriter; private long entryCount = 0; private boolean useZip = Boolean.getBoolean(OAK_INDEXER_USE_ZIP); - private boolean deleteOriginal = Boolean.parseBoolean(System.getProperty(OAK_INDEXER_DELETE_ORIGINAL, "true")); - private int maxMemory = Integer.getInteger(OAK_INDEXER_MAX_SORT_MEMORY_IN_GB, 3); public FlatFileNodeStoreBuilder(Iterable<NodeStateEntry> nodeStates, File workDir) { this.nodeStates = nodeStates; @@ -68,7 +62,9 @@ public class FlatFileNodeStoreBuilder { } public FlatFileStore build() throws IOException { - log.info("Preferred path elements are {}", Iterables.toString(preferredPathElements)); + logFlags(); + comparator = new PathElementComparator(preferredPathElements); + entryWriter = new NodeStateEntryWriter(blobStore); FlatFileStore store = new FlatFileStore(createdSortedStoreFile(), new NodeStateEntryReader(blobStore), size(preferredPathElements), false); if (entryCount > 0) { @@ -92,45 +88,16 @@ public class FlatFileNodeStoreBuilder { } } else { File flatFileStoreDir = createStoreDir(); - File storeFile = writeToStore(flatFileStoreDir, "store.json"); - return sortStoreFile(storeFile); + StoreAndSortStrategy strategy = new StoreAndSortStrategy(nodeStates, comparator, + entryWriter, flatFileStoreDir, useZip); + entryCount = strategy.getEntryCount(); + return strategy.createSortedStoreFile(); } } - private File sortStoreFile(File storeFile) throws IOException { - File sortWorkDir = new File(storeFile.getParent(), "sort-work-dir"); - FileUtils.forceMkdir(sortWorkDir); - NodeStateEntrySorter sorter = - new NodeStateEntrySorter(new PathElementComparator(preferredPathElements), storeFile, sortWorkDir); - - logFlags(); - - sorter.setUseZip(useZip); - sorter.setMaxMemoryInGB(maxMemory); - sorter.setDeleteOriginal(deleteOriginal); - sorter.sort(); - return sorter.getSortedFile(); - } - private void logFlags() { + log.info("Preferred path elements are {}", Iterables.toString(preferredPathElements)); log.info("Compression enabled while sorting : {} ({})", useZip, OAK_INDEXER_USE_ZIP); - log.info("Delete original dump from traversal : {} ({})", deleteOriginal, OAK_INDEXER_DELETE_ORIGINAL); - log.info("Max heap memory (GB) to be used for merge sort : {} ({})", maxMemory, OAK_INDEXER_MAX_SORT_MEMORY_IN_GB); - } - - private File writeToStore(File dir, String fileName) throws IOException { - File file = new File(dir, fileName); - Stopwatch sw = Stopwatch.createStarted(); - NodeStateEntryWriter entryWriter = new NodeStateEntryWriter(blobStore); - try (BufferedWriter w = FlatFileStoreUtils.createWriter(file, false)) { - for (NodeStateEntry e : nodeStates) { - String line = entryWriter.toString(e); - w.append(line).append(LINE_SEPARATOR.value()); - entryCount++; - } - } - log.info("Dumped {} nodestates in json format in {} ({})",entryCount, sw, IOUtils.humanReadableByteCount(file.length())); - return file; } private File createStoreDir() throws IOException { Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java?rev=1818877&r1=1818876&r2=1818877&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java (original) +++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java Thu Dec 21 04:45:16 2017 @@ -28,7 +28,6 @@ import org.apache.jackrabbit.oak.spi.blo import org.apache.jackrabbit.oak.spi.state.NodeState; import static com.google.common.base.Preconditions.checkState; -import static com.google.common.base.StandardSystemProperty.LINE_SEPARATOR; public class NodeStateEntryWriter { private static final String OAK_CHILD_ORDER = ":childOrder"; Copied: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/StoreAndSortStrategy.java (from r1818876, jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java) URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/StoreAndSortStrategy.java?p2=jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/StoreAndSortStrategy.java&p1=jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java&r1=1818876&r2=1818877&rev=1818877&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java (original) +++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/StoreAndSortStrategy.java Thu Dec 21 04:45:16 2017 @@ -22,106 +22,68 @@ package org.apache.jackrabbit.oak.index. import java.io.BufferedWriter; import java.io.File; import java.io.IOException; -import java.util.Collections; import com.google.common.base.Stopwatch; -import com.google.common.collect.Iterables; import org.apache.commons.io.FileUtils; import org.apache.jackrabbit.oak.commons.IOUtils; import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry; -import org.apache.jackrabbit.oak.spi.blob.BlobStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static com.google.common.base.StandardSystemProperty.LINE_SEPARATOR; -import static com.google.common.collect.Iterables.size; -public class FlatFileNodeStoreBuilder { - private static final String OAK_INDEXER_USE_ZIP = "oak.indexer.useZip"; +class StoreAndSortStrategy { private static final String OAK_INDEXER_DELETE_ORIGINAL = "oak.indexer.deleteOriginal"; private static final String OAK_INDEXER_MAX_SORT_MEMORY_IN_GB = "oak.indexer.maxSortMemoryInGB"; - private static final String OAK_INDEXER_SORTED_FILE_PATH = "oak.indexer.sortedFilePath"; + private final Logger log = LoggerFactory.getLogger(getClass()); private final Iterable<NodeStateEntry> nodeStates; - private final File workDir; - private Iterable<String> preferredPathElements = Collections.emptySet(); - private BlobStore blobStore; - private long entryCount = 0; - - private boolean useZip = Boolean.getBoolean(OAK_INDEXER_USE_ZIP); + private final PathElementComparator comparator; + private final NodeStateEntryWriter entryWriter; + private final File storeDir; + private final boolean compressionEnabled; + private long entryCount; private boolean deleteOriginal = Boolean.parseBoolean(System.getProperty(OAK_INDEXER_DELETE_ORIGINAL, "true")); private int maxMemory = Integer.getInteger(OAK_INDEXER_MAX_SORT_MEMORY_IN_GB, 3); - public FlatFileNodeStoreBuilder(Iterable<NodeStateEntry> nodeStates, File workDir) { - this.nodeStates = nodeStates; - this.workDir = workDir; - } - public FlatFileNodeStoreBuilder withBlobStore(BlobStore blobStore) { - this.blobStore = blobStore; - return this; - } - - public FlatFileNodeStoreBuilder withPreferredPathElements(Iterable<String> preferredPathElements) { - this.preferredPathElements = preferredPathElements; - return this; + public StoreAndSortStrategy(Iterable<NodeStateEntry> nodeStates, PathElementComparator comparator, + NodeStateEntryWriter entryWriter, File storeDir, boolean compressionEnabled) { + this.nodeStates = nodeStates; + this.comparator = comparator; + this.entryWriter = entryWriter; + this.storeDir = storeDir; + this.compressionEnabled = compressionEnabled; } - public FlatFileStore build() throws IOException { - log.info("Preferred path elements are {}", Iterables.toString(preferredPathElements)); - FlatFileStore store = new FlatFileStore(createdSortedStoreFile(), new NodeStateEntryReader(blobStore), - size(preferredPathElements), false); - if (entryCount > 0) { - store.setEntryCount(entryCount); - } - return store; + public File createSortedStoreFile() throws IOException { + File storeFile = writeToStore(storeDir, "store.json"); + return sortStoreFile(storeFile); } - private File createdSortedStoreFile() throws IOException { - String sortedFilePath = System.getProperty(OAK_INDEXER_SORTED_FILE_PATH); - if (sortedFilePath != null) { - File sortedFile = new File(sortedFilePath); - if (sortedFile.exists() && sortedFile.isFile() && sortedFile.canRead()) { - log.info("Reading from provided sorted file [{}] (via system property '{}')", - sortedFile.getAbsolutePath(), OAK_INDEXER_SORTED_FILE_PATH); - return sortedFile; - } else { - String msg = String.format("Cannot read sorted file at [%s] configured via system property '%s'", - sortedFile.getAbsolutePath(), OAK_INDEXER_SORTED_FILE_PATH); - throw new IllegalArgumentException(msg); - } - } else { - File flatFileStoreDir = createStoreDir(); - File storeFile = writeToStore(flatFileStoreDir, "store.json"); - return sortStoreFile(storeFile); - } + public long getEntryCount() { + return entryCount; } private File sortStoreFile(File storeFile) throws IOException { File sortWorkDir = new File(storeFile.getParent(), "sort-work-dir"); FileUtils.forceMkdir(sortWorkDir); NodeStateEntrySorter sorter = - new NodeStateEntrySorter(new PathElementComparator(preferredPathElements), storeFile, sortWorkDir); + new NodeStateEntrySorter(comparator, storeFile, sortWorkDir); logFlags(); - sorter.setUseZip(useZip); + sorter.setUseZip(compressionEnabled); sorter.setMaxMemoryInGB(maxMemory); sorter.setDeleteOriginal(deleteOriginal); sorter.sort(); return sorter.getSortedFile(); } - private void logFlags() { - log.info("Compression enabled while sorting : {} ({})", useZip, OAK_INDEXER_USE_ZIP); - log.info("Delete original dump from traversal : {} ({})", deleteOriginal, OAK_INDEXER_DELETE_ORIGINAL); - log.info("Max heap memory (GB) to be used for merge sort : {} ({})", maxMemory, OAK_INDEXER_MAX_SORT_MEMORY_IN_GB); - } - private File writeToStore(File dir, String fileName) throws IOException { + entryCount = 0; File file = new File(dir, fileName); Stopwatch sw = Stopwatch.createStarted(); - NodeStateEntryWriter entryWriter = new NodeStateEntryWriter(blobStore); try (BufferedWriter w = FlatFileStoreUtils.createWriter(file, false)) { for (NodeStateEntry e : nodeStates) { String line = entryWriter.toString(e); @@ -133,9 +95,8 @@ public class FlatFileNodeStoreBuilder { return file; } - private File createStoreDir() throws IOException { - File dir = new File(workDir, "flat-file-store"); - FileUtils.forceMkdir(dir); - return dir; + private void logFlags() { + log.info("Delete original dump from traversal : {} ({})", deleteOriginal, OAK_INDEXER_DELETE_ORIGINAL); + log.info("Max heap memory (GB) to be used for merge sort : {} ({})", maxMemory, OAK_INDEXER_MAX_SORT_MEMORY_IN_GB); } }