Author: chetanm
Date: Thu Dec 21 04:45:16 2017
New Revision: 1818877

URL: http://svn.apache.org/viewvc?rev=1818877&view=rev
Log:
OAK-7102 - Refactor DocumentIndexer logic to enable different sort approaches

Refactor the sorting logic to a strategy class

Added:
    
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/StoreAndSortStrategy.java
      - copied, changed from r1818876, 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java
Modified:
    
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java
    
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java

Modified: 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java?rev=1818877&r1=1818876&r2=1818877&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java
 Thu Dec 21 04:45:16 2017
@@ -19,38 +19,32 @@
 
 package org.apache.jackrabbit.oak.index.indexer.document.flatfile;
 
-import java.io.BufferedWriter;
 import java.io.File;
 import java.io.IOException;
 import java.util.Collections;
 
-import com.google.common.base.Stopwatch;
 import com.google.common.collect.Iterables;
 import org.apache.commons.io.FileUtils;
-import org.apache.jackrabbit.oak.commons.IOUtils;
 import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry;
 import org.apache.jackrabbit.oak.spi.blob.BlobStore;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static com.google.common.base.StandardSystemProperty.LINE_SEPARATOR;
 import static com.google.common.collect.Iterables.size;
 
 public class FlatFileNodeStoreBuilder {
     private static final String OAK_INDEXER_USE_ZIP = "oak.indexer.useZip";
-    private static final String OAK_INDEXER_DELETE_ORIGINAL = 
"oak.indexer.deleteOriginal";
-    private static final String OAK_INDEXER_MAX_SORT_MEMORY_IN_GB = 
"oak.indexer.maxSortMemoryInGB";
     private static final String OAK_INDEXER_SORTED_FILE_PATH = 
"oak.indexer.sortedFilePath";
     private final Logger log = LoggerFactory.getLogger(getClass());
     private final Iterable<NodeStateEntry> nodeStates;
     private final File workDir;
     private Iterable<String> preferredPathElements = Collections.emptySet();
     private BlobStore blobStore;
+    private PathElementComparator comparator;
+    private NodeStateEntryWriter entryWriter;
     private long entryCount = 0;
 
     private boolean useZip = Boolean.getBoolean(OAK_INDEXER_USE_ZIP);
-    private boolean deleteOriginal = 
Boolean.parseBoolean(System.getProperty(OAK_INDEXER_DELETE_ORIGINAL, "true"));
-    private int maxMemory = 
Integer.getInteger(OAK_INDEXER_MAX_SORT_MEMORY_IN_GB, 3);
 
     public FlatFileNodeStoreBuilder(Iterable<NodeStateEntry> nodeStates, File 
workDir) {
         this.nodeStates = nodeStates;
@@ -68,7 +62,9 @@ public class FlatFileNodeStoreBuilder {
     }
 
     public FlatFileStore build() throws IOException {
-        log.info("Preferred path elements are {}", 
Iterables.toString(preferredPathElements));
+        logFlags();
+        comparator = new PathElementComparator(preferredPathElements);
+        entryWriter = new NodeStateEntryWriter(blobStore);
         FlatFileStore store = new FlatFileStore(createdSortedStoreFile(), new 
NodeStateEntryReader(blobStore),
                 size(preferredPathElements), false);
         if (entryCount > 0) {
@@ -92,45 +88,16 @@ public class FlatFileNodeStoreBuilder {
             }
         } else {
             File flatFileStoreDir = createStoreDir();
-            File storeFile = writeToStore(flatFileStoreDir, "store.json");
-            return sortStoreFile(storeFile);
+            StoreAndSortStrategy strategy = new 
StoreAndSortStrategy(nodeStates, comparator,
+                    entryWriter, flatFileStoreDir, useZip);
+            entryCount = strategy.getEntryCount();
+            return strategy.createSortedStoreFile();
         }
     }
 
-    private File sortStoreFile(File storeFile) throws IOException {
-        File sortWorkDir = new File(storeFile.getParent(), "sort-work-dir");
-        FileUtils.forceMkdir(sortWorkDir);
-        NodeStateEntrySorter sorter =
-                new NodeStateEntrySorter(new 
PathElementComparator(preferredPathElements), storeFile, sortWorkDir);
-
-        logFlags();
-
-        sorter.setUseZip(useZip);
-        sorter.setMaxMemoryInGB(maxMemory);
-        sorter.setDeleteOriginal(deleteOriginal);
-        sorter.sort();
-        return sorter.getSortedFile();
-    }
-
     private void logFlags() {
+        log.info("Preferred path elements are {}", 
Iterables.toString(preferredPathElements));
         log.info("Compression enabled while sorting : {} ({})", useZip, 
OAK_INDEXER_USE_ZIP);
-        log.info("Delete original dump from traversal : {} ({})", 
deleteOriginal, OAK_INDEXER_DELETE_ORIGINAL);
-        log.info("Max heap memory (GB) to be used for merge sort : {} ({})", 
maxMemory, OAK_INDEXER_MAX_SORT_MEMORY_IN_GB);
-    }
-
-    private File writeToStore(File dir, String fileName) throws IOException {
-        File file = new File(dir, fileName);
-        Stopwatch sw = Stopwatch.createStarted();
-        NodeStateEntryWriter entryWriter = new NodeStateEntryWriter(blobStore);
-        try (BufferedWriter w = FlatFileStoreUtils.createWriter(file, false)) {
-            for (NodeStateEntry e : nodeStates) {
-                String line = entryWriter.toString(e);
-                w.append(line).append(LINE_SEPARATOR.value());
-                entryCount++;
-            }
-        }
-        log.info("Dumped {} nodestates in json format in {} ({})",entryCount, 
sw, IOUtils.humanReadableByteCount(file.length()));
-        return file;
     }
 
     private File createStoreDir() throws IOException {

Modified: 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java?rev=1818877&r1=1818876&r2=1818877&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java
 Thu Dec 21 04:45:16 2017
@@ -28,7 +28,6 @@ import org.apache.jackrabbit.oak.spi.blo
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 
 import static com.google.common.base.Preconditions.checkState;
-import static com.google.common.base.StandardSystemProperty.LINE_SEPARATOR;
 
 public class NodeStateEntryWriter {
     private static final String OAK_CHILD_ORDER = ":childOrder";

Copied: 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/StoreAndSortStrategy.java
 (from r1818876, 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java)
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/StoreAndSortStrategy.java?p2=jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/StoreAndSortStrategy.java&p1=jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java&r1=1818876&r2=1818877&rev=1818877&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/StoreAndSortStrategy.java
 Thu Dec 21 04:45:16 2017
@@ -22,106 +22,68 @@ package org.apache.jackrabbit.oak.index.
 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.IOException;
-import java.util.Collections;
 
 import com.google.common.base.Stopwatch;
-import com.google.common.collect.Iterables;
 import org.apache.commons.io.FileUtils;
 import org.apache.jackrabbit.oak.commons.IOUtils;
 import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry;
-import org.apache.jackrabbit.oak.spi.blob.BlobStore;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import static com.google.common.base.StandardSystemProperty.LINE_SEPARATOR;
-import static com.google.common.collect.Iterables.size;
 
-public class FlatFileNodeStoreBuilder {
-    private static final String OAK_INDEXER_USE_ZIP = "oak.indexer.useZip";
+class StoreAndSortStrategy {
     private static final String OAK_INDEXER_DELETE_ORIGINAL = 
"oak.indexer.deleteOriginal";
     private static final String OAK_INDEXER_MAX_SORT_MEMORY_IN_GB = 
"oak.indexer.maxSortMemoryInGB";
-    private static final String OAK_INDEXER_SORTED_FILE_PATH = 
"oak.indexer.sortedFilePath";
+
     private final Logger log = LoggerFactory.getLogger(getClass());
     private final Iterable<NodeStateEntry> nodeStates;
-    private final File workDir;
-    private Iterable<String> preferredPathElements = Collections.emptySet();
-    private BlobStore blobStore;
-    private long entryCount = 0;
-
-    private boolean useZip = Boolean.getBoolean(OAK_INDEXER_USE_ZIP);
+    private final PathElementComparator comparator;
+    private final NodeStateEntryWriter entryWriter;
+    private final File storeDir;
+    private final boolean compressionEnabled;
+    private long entryCount;
     private boolean deleteOriginal = 
Boolean.parseBoolean(System.getProperty(OAK_INDEXER_DELETE_ORIGINAL, "true"));
     private int maxMemory = 
Integer.getInteger(OAK_INDEXER_MAX_SORT_MEMORY_IN_GB, 3);
 
-    public FlatFileNodeStoreBuilder(Iterable<NodeStateEntry> nodeStates, File 
workDir) {
-        this.nodeStates = nodeStates;
-        this.workDir = workDir;
-    }
 
-    public FlatFileNodeStoreBuilder withBlobStore(BlobStore blobStore) {
-        this.blobStore = blobStore;
-        return this;
-    }
-
-    public FlatFileNodeStoreBuilder withPreferredPathElements(Iterable<String> 
preferredPathElements) {
-        this.preferredPathElements = preferredPathElements;
-        return this;
+    public StoreAndSortStrategy(Iterable<NodeStateEntry> nodeStates, 
PathElementComparator comparator,
+                                NodeStateEntryWriter entryWriter, File 
storeDir, boolean compressionEnabled) {
+        this.nodeStates = nodeStates;
+        this.comparator = comparator;
+        this.entryWriter = entryWriter;
+        this.storeDir = storeDir;
+        this.compressionEnabled = compressionEnabled;
     }
 
-    public FlatFileStore build() throws IOException {
-        log.info("Preferred path elements are {}", 
Iterables.toString(preferredPathElements));
-        FlatFileStore store = new FlatFileStore(createdSortedStoreFile(), new 
NodeStateEntryReader(blobStore),
-                size(preferredPathElements), false);
-        if (entryCount > 0) {
-            store.setEntryCount(entryCount);
-        }
-        return store;
+    public File createSortedStoreFile() throws IOException {
+        File storeFile = writeToStore(storeDir, "store.json");
+        return sortStoreFile(storeFile);
     }
 
-    private File createdSortedStoreFile() throws IOException {
-        String sortedFilePath = 
System.getProperty(OAK_INDEXER_SORTED_FILE_PATH);
-        if (sortedFilePath != null) {
-            File sortedFile = new File(sortedFilePath);
-            if (sortedFile.exists() && sortedFile.isFile() && 
sortedFile.canRead()) {
-                log.info("Reading from provided sorted file [{}] (via system 
property '{}')",
-                        sortedFile.getAbsolutePath(), 
OAK_INDEXER_SORTED_FILE_PATH);
-                return sortedFile;
-            } else {
-                String msg = String.format("Cannot read sorted file at [%s] 
configured via system property '%s'",
-                        sortedFile.getAbsolutePath(), 
OAK_INDEXER_SORTED_FILE_PATH);
-                throw new IllegalArgumentException(msg);
-            }
-        } else {
-            File flatFileStoreDir = createStoreDir();
-            File storeFile = writeToStore(flatFileStoreDir, "store.json");
-            return sortStoreFile(storeFile);
-        }
+    public long getEntryCount() {
+        return entryCount;
     }
 
     private File sortStoreFile(File storeFile) throws IOException {
         File sortWorkDir = new File(storeFile.getParent(), "sort-work-dir");
         FileUtils.forceMkdir(sortWorkDir);
         NodeStateEntrySorter sorter =
-                new NodeStateEntrySorter(new 
PathElementComparator(preferredPathElements), storeFile, sortWorkDir);
+                new NodeStateEntrySorter(comparator, storeFile, sortWorkDir);
 
         logFlags();
 
-        sorter.setUseZip(useZip);
+        sorter.setUseZip(compressionEnabled);
         sorter.setMaxMemoryInGB(maxMemory);
         sorter.setDeleteOriginal(deleteOriginal);
         sorter.sort();
         return sorter.getSortedFile();
     }
 
-    private void logFlags() {
-        log.info("Compression enabled while sorting : {} ({})", useZip, 
OAK_INDEXER_USE_ZIP);
-        log.info("Delete original dump from traversal : {} ({})", 
deleteOriginal, OAK_INDEXER_DELETE_ORIGINAL);
-        log.info("Max heap memory (GB) to be used for merge sort : {} ({})", 
maxMemory, OAK_INDEXER_MAX_SORT_MEMORY_IN_GB);
-    }
-
     private File writeToStore(File dir, String fileName) throws IOException {
+        entryCount = 0;
         File file = new File(dir, fileName);
         Stopwatch sw = Stopwatch.createStarted();
-        NodeStateEntryWriter entryWriter = new NodeStateEntryWriter(blobStore);
         try (BufferedWriter w = FlatFileStoreUtils.createWriter(file, false)) {
             for (NodeStateEntry e : nodeStates) {
                 String line = entryWriter.toString(e);
@@ -133,9 +95,8 @@ public class FlatFileNodeStoreBuilder {
         return file;
     }
 
-    private File createStoreDir() throws IOException {
-        File dir = new File(workDir, "flat-file-store");
-        FileUtils.forceMkdir(dir);
-        return dir;
+    private void logFlags() {
+        log.info("Delete original dump from traversal : {} ({})", 
deleteOriginal, OAK_INDEXER_DELETE_ORIGINAL);
+        log.info("Max heap memory (GB) to be used for merge sort : {} ({})", 
maxMemory, OAK_INDEXER_MAX_SORT_MEMORY_IN_GB);
     }
 }


Reply via email to