Author: amitj
Date: Fri Jul  1 08:03:08 2016
New Revision: 1750887

URL: http://svn.apache.org/viewvc?rev=1750887&view=rev
Log:
OAK-4476: Option to check datastore consistency in oak-run

* New command 'datastorecheck'
* Supports dumping all blob ids as well as all blob references from the blob 
store
* Extracted some common code into FileIOUtils

Added:
    
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java
    
jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java
Removed:
    
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DumpDataStoreReferencesCommand.java
Modified:
    
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java
    
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
    
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java
    jackrabbit/oak/trunk/oak-run/README.md
    jackrabbit/oak/trunk/oak-run/pom.xml
    
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Mode.java
    
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Utils.java

Modified: 
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java?rev=1750887&r1=1750886&r2=1750887&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java
 Fri Jul  1 08:03:08 2016
@@ -18,25 +18,36 @@ package org.apache.jackrabbit.oak.common
 
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
+import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.Comparator;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Set;
 
 import javax.annotation.Nullable;
 
 import com.google.common.base.Charsets;
 import com.google.common.base.Function;
+import com.google.common.collect.AbstractIterator;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.PeekingIterator;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.LineIterator;
 
 import static com.google.common.collect.Sets.newHashSet;
 import static com.google.common.io.Closeables.close;
+import static com.google.common.io.Files.move;
 import static com.google.common.io.Files.newWriter;
+import static java.io.File.createTempFile;
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static 
org.apache.jackrabbit.oak.commons.sort.EscapeUtils.escapeLineBreak;
 import static 
org.apache.jackrabbit.oak.commons.sort.EscapeUtils.unescapeLineBreaks;
+import static 
org.apache.jackrabbit.oak.commons.sort.ExternalSort.mergeSortedFiles;
+import static org.apache.jackrabbit.oak.commons.sort.ExternalSort.sortInBatch;
 
 /**
  * Simple File utils
@@ -46,6 +57,49 @@ public final class FileIOUtils {
     private FileIOUtils() {
     }
 
+    public final static Comparator<String> lexComparator = new 
Comparator<String>() {
+        @Override public int compare(String s1, String s2) {
+            return s1.compareTo(s2);
+        }
+    };
+
+    /**
+     * Sorts the given file externally using the {@link #lexComparator} and 
removes duplicates.
+     *
+     * @param file file whose contents needs to be sorted
+     */
+    public static void sort(File file) throws IOException {
+        File sorted = createTempFile("temp", null);
+        merge(sortInBatch(file, lexComparator, true), sorted);
+        move(sorted, file);
+    }
+
+    /**
+     * Sorts the given file externally with the given comparator and removes 
duplicates.
+     *
+     * @param file file whose contents needs to be sorted
+     * @param comparator to compare
+     * @throws IOException
+     */
+    public static void sort(File file, Comparator<String> comparator) throws 
IOException {
+        File sorted = createTempFile("temp", null);
+        merge(sortInBatch(file, comparator, true), sorted);
+        move(sorted, file);
+    }
+
+    /**
+     * Merges a list of files after sorting with the {@link #lexComparator}.
+     *
+     * @param files files to merge
+     * @param output merge output file
+     * @throws IOException
+     */
+    public static void merge(List<File> files, File output) throws IOException 
{
+        mergeSortedFiles(
+            files,
+            output, lexComparator, true);
+    }
+
     /**
      * Writes a string as a new line into the given buffered writer and 
optionally
      * escapes the line for line breaks.
@@ -157,4 +211,94 @@ public final class FileIOUtils {
             return delegate.compare(func.apply(s1), func.apply(s2));
         }
     }
+
+    /**
+     * FileLineDifferenceIterator class which iterates over the difference of 
2 files line by line.
+     *
+     * If there is a scope for lines in files containing line break characters 
it should be
+     * ensured that both the file are written with
+     * {@link #writeAsLine(BufferedWriter, String, boolean)} with true to 
escape line break
+     * characters.
+     */
+    public static class FileLineDifferenceIterator extends 
AbstractIterator<String> implements Closeable {
+        private final PeekingIterator<String> peekMarked;
+        private final LineIterator marked;
+        private final LineIterator all;
+        private Function<String, String> transformer = new Function<String, 
String>() {
+            @Override
+            public String apply(String input) {
+                return input;
+            }
+        };
+
+        public FileLineDifferenceIterator(LineIterator marked, LineIterator 
available) throws IOException {
+            this(marked, available, null);
+        }
+
+        public FileLineDifferenceIterator(File marked, File available,
+            @Nullable Function<String, String> transformer) throws IOException 
{
+            this(FileUtils.lineIterator(marked), 
FileUtils.lineIterator(available), transformer);
+        }
+
+        public FileLineDifferenceIterator(LineIterator marked, LineIterator 
available,
+            @Nullable Function<String, String> transformer) throws IOException 
{
+            this.marked = marked;
+            this.peekMarked = Iterators.peekingIterator(marked);
+            this.all = available;
+            if (transformer != null) {
+                this.transformer = transformer;
+            }
+        }
+
+        @Override
+        protected String computeNext() {
+            String diff = computeNextDiff();
+            if (diff == null) {
+                close();
+                return endOfData();
+            }
+            return diff;
+        }
+
+        @Override
+        public void close() {
+            LineIterator.closeQuietly(marked);
+            LineIterator.closeQuietly(all);
+        }
+
+        private String computeNextDiff() {
+            if (!all.hasNext()) {
+                return null;
+            }
+
+            //Marked finish the rest of all are part of diff
+            if (!peekMarked.hasNext()) {
+                return all.next();
+            }
+
+            String diff = null;
+            while (all.hasNext() && diff == null) {
+                diff = all.next();
+                while (peekMarked.hasNext()) {
+                    String marked = peekMarked.peek();
+                    int comparisonResult = 
transformer.apply(diff).compareTo(transformer.apply((marked)));
+                    if (comparisonResult > 0) {
+                        //Extra entries in marked. Ignore them and move on
+                        peekMarked.next();
+                    } else if (comparisonResult == 0) {
+                        //Matching entry found in marked move past it. Not a
+                        //dif candidate
+                        peekMarked.next();
+                        diff = null;
+                        break;
+                    } else {
+                        //This entry is not found in marked entries
+                        //hence part of diff
+                        return diff;
+                    }
+                }
+            }
+            return diff;
+        }
+    }
 }

Modified: 
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java?rev=1750887&r1=1750886&r2=1750887&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java
 Fri Jul  1 08:03:08 2016
@@ -18,13 +18,17 @@
  */
 package org.apache.jackrabbit.oak.commons;
 
+import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.FileReader;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.nio.CharBuffer;
 import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetEncoder;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
@@ -32,17 +36,21 @@ import java.util.Random;
 import java.util.Set;
 
 import com.google.common.base.Charsets;
-import com.google.common.collect.Lists;
-import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
+import static com.google.common.collect.Lists.newArrayList;
 import static com.google.common.collect.Sets.newHashSet;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.lexComparator;
+import static 
org.apache.jackrabbit.oak.commons.FileIOUtils.lineBreakAwareComparator;
 import static org.apache.jackrabbit.oak.commons.FileIOUtils.readStringsAsSet;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;
 import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings;
 import static 
org.apache.jackrabbit.oak.commons.sort.EscapeUtils.escapeLineBreak;
 import static 
org.apache.jackrabbit.oak.commons.sort.EscapeUtils.unescapeLineBreaks;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
 
 
 /**
@@ -61,11 +69,11 @@ public class FileIOUtilsTest {
         File f = folder.newFile();
 
         int count = writeStrings(added.iterator(), f, false);
-        Assert.assertEquals(added.size(), count);
+        assertEquals(added.size(), count);
 
         Set<String> retrieved = readStringsAsSet(new FileInputStream(f), 
false);
 
-        Assert.assertEquals(added, retrieved);
+        assertEquals(added, retrieved);
     }
 
     @Test
@@ -73,10 +81,10 @@ public class FileIOUtilsTest {
         Set<String> added = newHashSet(getLineBreakStrings());
         File f = folder.newFile();
         int count = writeStrings(added.iterator(), f, true);
-        Assert.assertEquals(added.size(), count);
+        assertEquals(added.size(), count);
 
         Set<String> retrieved = readStringsAsSet(new FileInputStream(f), true);
-        Assert.assertEquals(added, retrieved);
+        assertEquals(added, retrieved);
     }
 
     @Test
@@ -88,38 +96,71 @@ public class FileIOUtilsTest {
             added.add(getRandomTestString());
         }
         int count = writeStrings(added.iterator(), f, true);
-        Assert.assertEquals(added.size(), count);
+        assertEquals(added.size(), count);
 
         Set<String> retrieved = readStringsAsSet(new FileInputStream(f), true);
-        Assert.assertEquals(added, retrieved);
+        assertEquals(added, retrieved);
     }
 
     @Test
     public void compareWithLineBreaks() throws Exception {
-        Comparator<String> lexCmp = new Comparator<String>() {
-            @Override public int compare(String s1, String s2) {
-                return s1.compareTo(s2);
-            }
-        };
-        Comparator<String> cmp = FileIOUtils.lineBreakAwareComparator(lexCmp);
+        Comparator<String> cmp = lineBreakAwareComparator(lexComparator);
 
         List<String> strs = getLineBreakStrings();
-        Collections.sort(strs, lexCmp);
+        Collections.sort(strs);
 
         // Escape line breaks and then compare with string sorted
         List<String> escapedStrs = escape(getLineBreakStrings());
         Collections.sort(escapedStrs, cmp);
 
-        Assert.assertEquals(strs, unescape(escapedStrs));
+        assertEquals(strs, unescape(escapedStrs));
+    }
+
+    @Test
+    public void sortTest() throws IOException {
+        List<String> list = newArrayList("a", "z", "e", "b");
+        File f = folder.newFile();
+        writeStrings(list.iterator(), f, false);
+        sort(f);
+
+        BufferedReader reader =
+            new BufferedReader(new InputStreamReader(new FileInputStream(f), 
Charsets.UTF_8));
+        String line = null;
+        List<String> retrieved = newArrayList();
+        while ((line = reader.readLine()) != null) {
+            retrieved.add(line);
+        }
+        IOUtils.closeQuietly(reader);
+        Collections.sort(list);
+        assertArrayEquals(Arrays.toString(list.toArray()), list.toArray(), 
retrieved.toArray());
+    }
+
+    @Test
+    public void sortCustomComparatorTest() throws IOException {
+        List<String> list = getLineBreakStrings();
+        File f = folder.newFile();
+        writeStrings(list.iterator(), f, true);
+        sort(f, lineBreakAwareComparator(lexComparator));
+
+        BufferedReader reader =
+            new BufferedReader(new InputStreamReader(new FileInputStream(f), 
Charsets.UTF_8));
+        String line = null;
+        List<String> retrieved = newArrayList();
+        while ((line = reader.readLine()) != null) {
+            retrieved.add(unescapeLineBreaks(line));
+        }
+        IOUtils.closeQuietly(reader);
+        Collections.sort(list);
+        assertArrayEquals(Arrays.toString(list.toArray()), list.toArray(), 
retrieved.toArray());
     }
 
     private static List<String> getLineBreakStrings() {
-        return Lists.newArrayList("ab\nc\r", "ab\\z", "a\\\\z\nc",
+        return newArrayList("ab\nc\r", "ab\\z", "a\\\\z\nc",
             "/a", "/a/b\nc", "/a/b\rd", "/a/b\r\ne", "/a/c");
     }
 
     private static List<String> escape(List<String> list) {
-        List<String> escaped = Lists.newArrayList();
+        List<String> escaped = newArrayList();
         for (String s : list) {
             escaped.add(escapeLineBreak(s));
         }
@@ -127,7 +168,7 @@ public class FileIOUtilsTest {
     }
 
     private static List<String> unescape(List<String> list) {
-        List<String> unescaped = Lists.newArrayList();
+        List<String> unescaped = newArrayList();
         for (String s : list) {
             unescaped.add(unescapeLineBreaks(s));
         }

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java?rev=1750887&r1=1750886&r2=1750887&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
 Fri Jul  1 08:03:08 2016
@@ -18,7 +18,6 @@ package org.apache.jackrabbit.oak.plugin
 
 import java.io.BufferedWriter;
 import java.io.ByteArrayInputStream;
-import java.io.Closeable;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileWriter;
@@ -46,11 +45,8 @@ import com.google.common.base.Function;
 import com.google.common.base.Joiner;
 import com.google.common.base.StandardSystemProperty;
 import com.google.common.base.Stopwatch;
-import com.google.common.collect.AbstractIterator;
-import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
-import com.google.common.collect.PeekingIterator;
 import com.google.common.io.Closeables;
 import com.google.common.io.Files;
 import com.google.common.util.concurrent.ListenableFutureTask;
@@ -59,6 +55,7 @@ import org.apache.commons.io.LineIterato
 import org.apache.jackrabbit.core.data.DataRecord;
 import org.apache.jackrabbit.core.data.DataStoreException;
 import org.apache.jackrabbit.oak.commons.FileIOUtils;
+import 
org.apache.jackrabbit.oak.commons.FileIOUtils.FileLineDifferenceIterator;
 import org.apache.jackrabbit.oak.commons.IOUtils;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils;
 import 
org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils.SharedStoreRecordType;
@@ -79,14 +76,22 @@ public class MarkSweepGarbageCollector i
 
     public static final Logger LOG = 
LoggerFactory.getLogger(MarkSweepGarbageCollector.class);
 
-    public static final String NEWLINE = 
StandardSystemProperty.LINE_SEPARATOR.value();
-
     public static final String TEMP_DIR = 
StandardSystemProperty.JAVA_IO_TMPDIR.value();
 
     public static final int DEFAULT_BATCH_COUNT = 2048;
     
     public static final String DELIM = ",";
-    
+
+    private static final Function<String, String> transformer = new 
Function<String, String>() {
+        @Nullable
+        @Override
+        public String apply(@Nullable String input) {
+            if (input != null) {
+                return input.split(DELIM)[0];
+            }
+            return "";
+        }};
+
     /** The last modified time before current time of blobs to consider for 
garbage collection. */
     private final long maxLastModifiedInterval;
 
@@ -296,7 +301,8 @@ public class MarkSweepGarbageCollector i
 
         FileLineDifferenceIterator iter = new FileLineDifferenceIterator(
                 fs.getMarkedRefs(),
-                fs.getAvailableRefs());
+                fs.getAvailableRefs(),
+                transformer);
         calculateDifference(fs, iter);
 
         LOG.debug("Ending difference phase of the garbage collector");
@@ -542,12 +548,12 @@ public class MarkSweepGarbageCollector i
                     "Blob garbage collection [{}]", count.get());
             // sort the marked references with the first part of the key
             GarbageCollectorFileState.sort(fs.getMarkedRefs(), 
-                                              new Comparator<String>() {
-                                                    @Override
-                                                    public int compare(String 
s1, String s2) {
-                                                        return 
s1.split(DELIM)[0].compareTo(s2.split(DELIM)[0]);
-                                                    }
-                                                });
+                new Comparator<String>() {
+                    @Override
+                    public int compare(String s1, String s2) {
+                        return 
s1.split(DELIM)[0].compareTo(s2.split(DELIM)[0]);
+                    }
+                });
         } finally {
             IOUtils.closeQuietly(writer);
         }
@@ -566,7 +572,6 @@ public class MarkSweepGarbageCollector i
         long candidates = 0;
         
         try {
-            Stopwatch sw = Stopwatch.createStarted();
             LOG.info("Starting blob consistency check");
     
             // Find all blobs available in the blob store
@@ -585,7 +590,10 @@ public class MarkSweepGarbageCollector i
             }
             
             LOG.trace("Starting difference phase of the consistency check");
-            FileLineDifferenceIterator iter = new 
FileLineDifferenceIterator(fs.getAvailableRefs(), fs.getMarkedRefs());
+            FileLineDifferenceIterator iter = new FileLineDifferenceIterator(
+                fs.getAvailableRefs(),
+                fs.getMarkedRefs(),
+                transformer);
             candidates = calculateDifference(fs, iter);
             LOG.trace("Ending difference phase of the consistency check");
             
@@ -647,81 +655,6 @@ public class MarkSweepGarbageCollector i
         }
     }
 
-
-    /**
-     * FileLineDifferenceIterator class which iterates over the difference of 
2 files line by line.
-     */
-    static class FileLineDifferenceIterator extends AbstractIterator<String> 
implements Closeable {
-        private final PeekingIterator<String> peekMarked;
-        private final LineIterator marked;
-        private final LineIterator all;
-
-        public FileLineDifferenceIterator(File marked, File available) throws 
IOException {
-            this(FileUtils.lineIterator(marked), 
FileUtils.lineIterator(available));
-        }
-
-        public FileLineDifferenceIterator(LineIterator marked, LineIterator 
available) throws IOException {
-            this.marked = marked;
-            this.peekMarked = Iterators.peekingIterator(marked);
-            this.all = available;
-        }
-
-        @Override
-        protected String computeNext() {
-            String diff = computeNextDiff();
-            if (diff == null) {
-                close();
-                return endOfData();
-            }
-            return diff;
-        }
-
-        @Override
-        public void close() {
-            LineIterator.closeQuietly(marked);
-            LineIterator.closeQuietly(all);
-        }
-        
-        private String getKey(String row) {
-            return row.split(DELIM)[0];
-        }
-        
-        private String computeNextDiff() {
-            if (!all.hasNext()) {
-                return null;
-            }
-
-            //Marked finish the rest of all are part of diff
-            if (!peekMarked.hasNext()) {
-                return all.next();
-            }
-            
-            String diff = null;
-            while (all.hasNext() && diff == null) {
-                diff = all.next();
-                while (peekMarked.hasNext()) {
-                    String marked = peekMarked.peek();
-                    int comparisonResult = 
getKey(diff).compareTo(getKey(marked));
-                    if (comparisonResult > 0) {
-                        //Extra entries in marked. Ignore them and move on
-                        peekMarked.next();
-                    } else if (comparisonResult == 0) {
-                        //Matching entry found in marked move past it. Not a
-                        //dif candidate
-                        peekMarked.next();
-                        diff = null;
-                        break;
-                    } else {
-                        //This entry is not found in marked entries
-                        //hence part of diff
-                        return diff;
-                    }
-                }
-            }
-            return diff;
-        }
-    }
-
     /**
      * Provides a readable string for given timestamp
      */

Modified: 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java?rev=1750887&r1=1750886&r2=1750887&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java
 Fri Jul  1 08:03:08 2016
@@ -28,17 +28,20 @@ import java.util.List;
 import java.util.Random;
 import java.util.TreeSet;
 
+import javax.annotation.Nullable;
+
+import com.google.common.base.Function;
 import com.google.common.base.Joiner;
 import com.google.common.base.Splitter;
 import com.google.common.base.StandardSystemProperty;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import org.apache.commons.io.LineIterator;
+import 
org.apache.jackrabbit.oak.commons.FileIOUtils.FileLineDifferenceIterator;
 import org.junit.Test;
 
 import static java.util.Arrays.asList;
 import static 
org.apache.jackrabbit.oak.commons.sort.EscapeUtils.escapeLineBreak;
-import static 
org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector.FileLineDifferenceIterator;
 import static org.hamcrest.CoreMatchers.is;
 import static org.junit.Assert.assertThat;
 
@@ -105,7 +108,7 @@ public class FileLineDifferenceIteratorT
     public void testDiffLineBreakChars() throws IOException {
         List<String> all = getLineBreakStrings();
         List<String> marked = getLineBreakStrings();
-        List<String> diff = remove(marked, 3, 2);
+        remove(marked, 3, 2);
 
         // without escaping, the line breaks will be resolved
         assertDiff(Joiner.on(",").join(marked), Joiner.on(",").join(all),
@@ -122,6 +125,18 @@ public class FileLineDifferenceIteratorT
         assertDiff(Joiner.on(",").join(marked), Joiner.on(",").join(all), 
diff);
     }
 
+    @Test
+    public void testDiffTransform() throws IOException {
+        assertTransformed("a:x,b:y", "a:1,b:2,c:3,e:4,h", asList("c:3", "e:4", 
"h"));
+        assertTransformed("a,b,d,e", "a,b,c", asList("c"));
+        assertTransformed("a:1,b:2,d:3,e:4,f:5", "a:z,b:y,c:x,f:w", 
asList("c:x"));
+        assertTransformed("a,b,d,e,f", "a,b,c,f,h", asList("c", "h"));
+        assertTransformed("3:1,7:6", "2:0,3:6,5:3,9:1", asList("2:0", "5:3", 
"9:1"));
+        assertTransformed("", "", Collections.<String> emptyList());
+        assertTransformed("", "a, b", asList("a", "b"));
+        assertTransformed("", "a:4, b:1", asList("a:4", "b:1"));
+    }
+
     private static List<String> getLineBreakStrings() {
         return Lists.newArrayList("ab\nc\r", "ab\\z", "a\\\\z\nc",
             "/a", "/a/b\nc", "/a/b\rd", "/a/b\r\ne", "/a/c");
@@ -149,7 +164,7 @@ public class FileLineDifferenceIteratorT
         Iterator<String> itr = createItr(all, marked);
         assertThat("marked: " + marked + " all: " + all, 
ImmutableList.copyOf(itr), is(diff));
     }
-    
+
     private static void assertDiff(String marked, String all, List<String> 
diff) throws IOException {
         Iterator<String> itr = createItr(marked, all);
         assertThat("marked: " + marked + " all: " + all, 
ImmutableList.copyOf(itr), is(diff));
@@ -165,4 +180,18 @@ public class FileLineDifferenceIteratorT
         return new LineIterator(new StringReader(lines));
     }
 
+    private static void assertTransformed(String marked, String all, 
List<String> diff) throws IOException {
+        Iterator<String> itr = new FileLineDifferenceIterator(lineItr(marked), 
lineItr(all),
+            new Function<String, String>() {
+                @Nullable @Override
+                public String apply(@Nullable String input) {
+                    if (input != null) {
+                        return input.split(":")[0];
+                    }
+                    return null;
+                }
+            });
+
+        assertThat("marked: " + marked + " all: " + all, 
ImmutableList.copyOf(itr), is(diff));
+    }
 }

Modified: jackrabbit/oak/trunk/oak-run/README.md
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/README.md?rev=1750887&r1=1750886&r2=1750887&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/README.md (original)
+++ jackrabbit/oak/trunk/oak-run/README.md Fri Jul  1 08:03:08 2016
@@ -26,7 +26,7 @@ The following runmodes are currently ava
     * garbage         : Identifies blob garbage on a DocumentMK repository
     * tarmkdiff       : Show changes between revisions on TarMk
     * tarmkrecovery   : Lists candidates for head journal entries
-    * dumpdatastorerefs : Dump all the blob references used to a file 
+    * datastorecheck  : Consistency checker for data store 
     * resetclusterid  : Resets the cluster id   
     * help            : Print a list of available runmodes
     
@@ -975,15 +975,60 @@ The following options are available:
 
     --version-v10           - Uses V10 version repository reading (see 
OAK-2527)
 
-Oak Dump DataStore References
------------------------------
+Oak DataStore Check
+-------------------
 
-Dumps all the DataStore/BlobStore references used. Use the following commmand
+Consistency checker for the DataStore.
+Also can be used to list all the blob references in the node store and all the 
blob ids available in the data store. 
+Use the following command:
+
+    $ java -jar oak-run-*.jar datastorecheck [--id] [--ref] [--consistency] \
+            [--store <path>|<mongo_uri>] \
+            [--s3ds <s3ds_config>|--fds <fds_config>] \
+            [--dump <path>]
 
-    $ java -jar oak-run-*.jar dumpdatastorerefs \
-            { /path/to/oak/repository | mongodb://host:port/database } 
[/path/to/dump]
+The following options are available:
+
+    --id             - List all the ids in the data store
+    --ref            - List all the blob references in the node store
+    --consistency    - Lists all the missing blobs by doind a consistency check
+    Atleast one of the above should be specified
+    
+    --store          - Path to the segment store of mongo uri (Required for 
--ref & --consistency option above)
+    --dump           - Path where to dump the files (Optional). Otherwise, 
files will be dumped in the user tmp directory.
+    --s3ds           - Path to the S3DataStore configuration file
+    --fds            - Path to the FileDataStore configuration file ('path' 
property is mandatory)
+
+Note:
+For using S3DataStore the following additional jars have to be downloaded
+    - 
[commons-logging-1.1.3.jar](http://central.maven.org/maven2/commons-logging/commons-logging/1.1.3/commons-logging-1.1.3.jar)
+    - 
[httpcore-4.4.4.jar](http://central.maven.org/maven2/org/apache/httpcomponents/httpcore/4.4.4/httpcore-4.4.4.jar)
+    - 
[aws-java-sdk-1.10.76.jar](http://central.maven.org/maven2/com/amazonaws/aws-java-sdk/1.10.76/aws-java-sdk-1.10.76.jar)
+    
+The command to be executed for S3DataStore
+
+    java -classpath 
oak-run-*.jar:httpcore-4.4.4.jar:aws-java-sdk-osgi-1.10.76.jar:commons-logging-1.1.3.jar
 \
+        org.apache.jackrabbit.oak.run.Main \
+        datastorecheck --id --ref --consistency \
+        --store <path>|<mongo_uri> \
+        --s3ds <s3ds_config> \
+        --dump <dump_path>
+
+The config files should be formatted according to the OSGi configuration admin 
specification
+
+    E.g.
+    cat > org.apache.jackrabbit.oak.plugins.S3DataStore.config << EOF 
+    accessKey="XXXXXXXXX"
+    secretKey="YYYYYY"
+    s3Bucket="bucket1"
+    s3Region="region1"
+    EOF
+    
+    cat > org.apache.jackrabbit.oak.plugins.FileDataStore.config << EOF 
+    path="/data/datastore"
+    EOF        
+    
 
-This will create a dump file with name starting with 'marked-'.The dump path 
is optional and if not specified the file will be created in the user tmp 
directory.
 
 Reset Cluster Id
 ---------------

Modified: jackrabbit/oak/trunk/oak-run/pom.xml
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/pom.xml?rev=1750887&r1=1750886&r2=1750887&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-run/pom.xml Fri Jul  1 08:03:08 2016
@@ -423,11 +423,24 @@
     </dependency>
 
     <dependency>
+      <groupId>org.apache.jackrabbit</groupId>
+      <artifactId>oak-blob-cloud</artifactId>
+      <version>${project.version}</version>
+      <optional>true</optional>
+    </dependency>
+
+    <dependency>
       <groupId>org.apache.sling</groupId>
       <artifactId>org.apache.sling.testing.osgi-mock</artifactId>
       <scope>compile</scope>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.felix</groupId>
+      <artifactId>org.apache.felix.configadmin</artifactId>
+      <version>1.8.8</version>
+    </dependency>
+
     <!-- Findbugs annotations -->
     <dependency>
       <groupId>com.google.code.findbugs</groupId>
@@ -440,7 +453,7 @@
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
-    
+
   </dependencies>
 
   <profiles>

Added: 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java?rev=1750887&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java
 (added)
+++ 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java
 Fri Jul  1 08:03:08 2016
@@ -0,0 +1,298 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.run;
+
+import java.io.BufferedWriter;
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import javax.annotation.Nullable;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Function;
+import com.google.common.base.Joiner;
+import com.google.common.base.Stopwatch;
+import com.google.common.collect.Maps;
+import com.google.common.io.Closer;
+import com.google.common.io.Files;
+import com.mongodb.MongoClient;
+import com.mongodb.MongoClientURI;
+import com.mongodb.MongoURI;
+import joptsimple.ArgumentAcceptingOptionSpec;
+import joptsimple.OptionParser;
+import joptsimple.OptionSet;
+import joptsimple.OptionSpec;
+import joptsimple.OptionSpecBuilder;
+import 
org.apache.jackrabbit.oak.commons.FileIOUtils.FileLineDifferenceIterator;
+import org.apache.jackrabbit.oak.plugins.blob.BlobReferenceRetriever;
+import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
+import 
org.apache.jackrabbit.oak.plugins.document.DocumentBlobReferenceRetriever;
+import org.apache.jackrabbit.oak.plugins.document.DocumentMK;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.plugins.segment.SegmentBlobReferenceRetriever;
+import org.apache.jackrabbit.oak.plugins.segment.file.FileStore;
+import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
+
+import static com.google.common.base.StandardSystemProperty.JAVA_IO_TMPDIR;
+import static com.google.common.base.Stopwatch.createStarted;
+import static com.google.common.io.Closeables.close;
+import static java.util.Arrays.asList;
+import static org.apache.commons.io.FileUtils.forceDelete;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeAsLine;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings;
+import static 
org.apache.jackrabbit.oak.plugins.segment.FileStoreHelper.openFileStore;
+
+/**
+ * Command to check data store consistency and also optionally retrieve ids & 
references
+ */
+public class DataStoreCheckCommand implements Command {
+    private static final String DELIM = ",";
+
+    @Override
+    public void execute(String... args) throws Exception {
+        OptionParser parser = new OptionParser();
+        parser.allowsUnrecognizedOptions();
+
+        String helpStr =
+            "datastorecheck [--id] [--ref] [--consistency] [--store 
<path>|<mongo_uri>] "
+                + "[--s3ds <s3ds_config>|--fds <fds_config>] [--dump <path>]";
+
+        Closer closer = Closer.create();
+        try {
+            // Options for operations requested
+            OptionSpecBuilder idOp = parser.accepts("id", "Get ids");
+            OptionSpecBuilder refOp = parser.accepts("ref", "Get references");
+            OptionSpecBuilder consistencyOp = parser.accepts("consistency", 
"Check consistency");
+
+            // Node Store - needed for --ref, --consistency
+            ArgumentAcceptingOptionSpec<String> store = 
parser.accepts("store", "Node Store")
+                .requiredIf(refOp, 
consistencyOp).withRequiredArg().ofType(String.class);
+            // Optional argument to specify the dump path
+            ArgumentAcceptingOptionSpec<String> dump = parser.accepts("dump", 
"Dump Path")
+                .withRequiredArg().ofType(String.class);
+            OptionSpec
+                segmentTar = parser.accepts("segment-tar", "Use 
oak-segment-tar instead of oak-segment");
+
+            OptionSpec<?> help = parser.acceptsAll(asList("h", "?", "help"),
+                "show help").forHelp();
+
+            // Required rules (any one of --id, --ref, --consistency)
+            idOp.requiredUnless(refOp, consistencyOp);
+            refOp.requiredUnless(idOp, consistencyOp);
+            consistencyOp.requiredUnless(idOp, refOp);
+
+            OptionSet options = null;
+            try {
+                options = parser.parse(args);
+            } catch (Exception e) {
+                System.err.println(e);
+                parser.printHelpOn(System.err);
+                return;
+            }
+
+            if (options.has(help)) {
+                parser.printHelpOn(System.out);
+                return;
+            }
+
+            String dumpPath = JAVA_IO_TMPDIR.value();
+            if (options.has(dump)) {
+                dumpPath = options.valueOf(dump);
+            }
+
+            GarbageCollectableBlobStore blobStore  = null;
+            BlobReferenceRetriever marker = null;
+            if (options.has(store)) {
+                String source = options.valueOf(store);
+                if (source.startsWith(MongoURI.MONGODB_PREFIX)) {
+                    MongoClientURI uri = new MongoClientURI(source);
+                    MongoClient client = new MongoClient(uri);
+                    DocumentNodeStore nodeStore =
+                        new 
DocumentMK.Builder().setMongoDB(client.getDB(uri.getDatabase())).getNodeStore();
+                    closer.register(Utils.asCloseable(nodeStore));
+                    blobStore = (GarbageCollectableBlobStore) 
nodeStore.getBlobStore();
+                    marker = new DocumentBlobReferenceRetriever(nodeStore);
+                } else if (options.has(segmentTar)) {
+                    marker = SegmentTarUtils.newBlobReferenceRetriever(source, 
closer);
+                } else {
+                    FileStore fileStore = openFileStore(source);
+                    closer.register(Utils.asCloseable(fileStore));
+                    marker = new 
SegmentBlobReferenceRetriever(fileStore.getTracker());
+                }
+            }
+
+            // Initialize S3/FileDataStore if configured
+            GarbageCollectableBlobStore dataStore  = 
Utils.bootstrapDataStore(args, closer);
+            if (dataStore != null) {
+                blobStore = dataStore;
+            }
+
+            // blob store still not initialized means configuration not 
supported
+            if (blobStore == null) {
+                System.err.println("Operation not defined for SegmentNodeStore 
without external datastore");
+                parser.printHelpOn(System.err);
+                return;
+            }
+
+            FileRegister register = new FileRegister(options);
+            closer.register(register);
+
+            if (options.has(idOp) || options.has(consistencyOp)) {
+                retrieveBlobIds(blobStore,
+                    register.createFile(idOp, dumpPath));
+            }
+
+            if (options.has(refOp) || options.has(consistencyOp)) {
+                retrieveBlobReferences(blobStore, marker,
+                    register.createFile(refOp, dumpPath));
+            }
+
+            if (options.has(consistencyOp)) {
+                checkConsistency(register.get(idOp), register.get(refOp),
+                    register.createFile(consistencyOp, dumpPath));
+            }
+        } catch (Throwable t) {
+            t.printStackTrace();
+        } finally {
+            closer.close();
+        }
+    }
+
+    static class FileRegister implements Closeable {
+        Map<OptionSpec, File> opFiles = Maps.newHashMap();
+        String suffix = String.valueOf(System.currentTimeMillis());
+        OptionSet options;
+
+        public FileRegister(OptionSet options) {
+            this.options = options;
+        }
+
+        public File createFile(OptionSpec spec, String path) {
+            File f = new File(path, spec.toString() + suffix);
+            opFiles.put(spec, f);
+            return f;
+        }
+
+        public File get(OptionSpec spec) {
+            return opFiles.get(spec);
+        }
+
+        @Override
+        public void close() throws IOException {
+            Iterator<Map.Entry<OptionSpec, File>> iterator = 
opFiles.entrySet().iterator();
+            while (iterator.hasNext()) {
+                Map.Entry<OptionSpec, File> entry = iterator.next();
+                File f = entry.getValue();
+                if (options.has(entry.getKey()) && f.length() != 0) {
+                    System.out.println(entry.getKey().toString() + " - " + 
f.getAbsolutePath());
+                } else {
+                    if (f.exists()) {
+                        forceDelete(f);
+                    }
+                }
+            }
+        }
+    }
+
+    private static void checkConsistency(File ids, File refs, File missing) 
throws IOException {
+        System.out.println("Starting consistency check");
+        Stopwatch watch = createStarted();
+
+        FileLineDifferenceIterator iter = new FileLineDifferenceIterator(ids, 
refs, new Function<String, String>() {
+            @Nullable
+            @Override
+            public String apply(@Nullable String input) {
+                if (input != null) {
+                    return input.split(DELIM)[0];
+                }
+                return "";
+            }});
+        long candidates = writeStrings(iter, missing, true);
+
+        System.out.println("Consistency check found " + candidates + " missing 
blobs");
+        if (candidates > 0) {
+            System.out.println("Consistency check failure for the data store");
+        }
+        System.out.println("Finished in " + watch.elapsed(TimeUnit.SECONDS) + 
" seconds");
+    }
+
+    private static void retrieveBlobReferences(GarbageCollectableBlobStore 
blobStore,
+            BlobReferenceRetriever marker, File marked) throws IOException {
+        final BufferedWriter writer = Files.newWriter(marked, Charsets.UTF_8);
+        final AtomicInteger count = new AtomicInteger();
+        boolean threw = true;
+        try {
+            final Joiner delimJoiner = Joiner.on(DELIM).skipNulls();
+            final GarbageCollectableBlobStore finalBlobStore = blobStore;
+
+            System.out.println("Starting dump of blob references");
+            Stopwatch watch = createStarted();
+
+            marker.collectReferences(
+                new ReferenceCollector() {
+                    @Override
+                    public void addReference(String blobId, String nodeId) {
+                        try {
+                            Iterator<String> idIter = 
finalBlobStore.resolveChunks(blobId);
+
+                            while (idIter.hasNext()) {
+                                String id = delimJoiner.join(idIter.next(), 
nodeId);
+                                count.getAndIncrement();
+                                writeAsLine(writer, id, true);
+                            }
+                        } catch (Exception e) {
+                            throw new RuntimeException("Error in retrieving 
references", e);
+                        }
+                    }
+                }
+            );
+            writer.flush();
+            sort(marked, new Comparator<String>() {
+                @Override
+                public int compare(String s1, String s2) {
+                    return s1.split(DELIM)[0].compareTo(s2.split(DELIM)[0]);
+                }
+            });
+            System.out.println(count.get() + " blob references found");
+            System.out.println("Finished in " + 
watch.elapsed(TimeUnit.SECONDS) + " seconds");
+            threw = false;
+        } finally {
+            close(writer, threw);
+        }
+    }
+
+    private static void retrieveBlobIds(GarbageCollectableBlobStore blobStore, 
File blob)
+        throws Exception {
+
+        System.out.println("Starting dump of blob ids");
+        Stopwatch watch = createStarted();
+
+        Iterator<String> blobIter = blobStore.getAllChunkIds(0);
+        int count = writeStrings(blobIter, blob, true);
+
+        sort(blob);
+        System.out.println(count + " blob ids found");
+        System.out.println("Finished in " + watch.elapsed(TimeUnit.SECONDS) + 
" seconds");
+    }
+}

Modified: 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Mode.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Mode.java?rev=1750887&r1=1750886&r2=1750887&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Mode.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Mode.java
 Fri Jul  1 08:03:08 2016
@@ -41,7 +41,7 @@ enum Mode {
     GARBAGE("garbage", new GarbageCommand()),
     TARMKDIFF("tarmkdiff", new FileStoreDiffCommand()),
     TARMKRECOVERY("tarmkrecovery", new FileStoreRevisionRecoveryCommand()),
-    DUMPDATASTOREREFS("dumpdatastorerefs", new 
DumpDataStoreReferencesCommand()),
+    DUMPDATASTOREREFS("dumpdatastorerefs", new DataStoreCheckCommand()),
     RESETCLUSTERID("resetclusterid", new ResetClusterIdCommand()),
     PERSISTENTCACHE("persistentcache", new PersistentCacheCommand()),
     HELP("help", new HelpCommand());

Modified: 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Utils.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Utils.java?rev=1750887&r1=1750886&r2=1750887&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Utils.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Utils.java
 Fri Jul  1 08:03:08 2016
@@ -18,21 +18,38 @@
 package org.apache.jackrabbit.oak.run;
 
 import static java.util.Arrays.asList;
+import static org.apache.jackrabbit.oak.commons.PropertiesUtil.populate;
 
 import java.io.Closeable;
+import java.io.FileInputStream;
 import java.io.IOException;
+import java.util.Dictionary;
+import java.util.Enumeration;
 import java.util.List;
+import java.util.Properties;
 
+import javax.annotation.Nullable;
+import javax.jcr.RepositoryException;
+
+import com.google.common.collect.Maps;
 import com.google.common.io.Closer;
 import com.mongodb.MongoClientURI;
 import com.mongodb.MongoURI;
+import joptsimple.ArgumentAcceptingOptionSpec;
 import joptsimple.OptionParser;
 import joptsimple.OptionSet;
 import joptsimple.OptionSpec;
+import org.apache.felix.cm.file.ConfigurationHandler;
+import org.apache.jackrabbit.core.data.DataStore;
+import org.apache.jackrabbit.core.data.DataStoreException;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.OakFileDataStore;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedS3DataStore;
 import org.apache.jackrabbit.oak.plugins.document.DocumentMK;
 import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
 import org.apache.jackrabbit.oak.plugins.document.util.MongoConnection;
 import org.apache.jackrabbit.oak.plugins.segment.file.FileStore;
+import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
 import org.apache.jackrabbit.oak.spi.state.NodeStore;
 
 class Utils {
@@ -87,6 +104,44 @@ class Utils {
         return SegmentUtils.bootstrapNodeStore(src, closer);
     }
 
+    @Nullable
+    public static GarbageCollectableBlobStore bootstrapDataStore(String[] 
args, Closer closer)
+        throws IOException, RepositoryException {
+        OptionParser parser = new OptionParser();
+        parser.allowsUnrecognizedOptions();
+
+        ArgumentAcceptingOptionSpec<String> s3dsConfig =
+            parser.accepts("s3ds", "S3DataStore 
config").withRequiredArg().ofType(String.class);
+        ArgumentAcceptingOptionSpec<String> fdsConfig =
+            parser.accepts("fds", "FileDataStore 
config").withRequiredArg().ofType(String.class);
+
+        OptionSet options = parser.parse(args);
+
+        if (!options.has(s3dsConfig) && !options.has(fdsConfig)) {
+            return null;
+        }
+
+        DataStore delegate;
+        if (options.has(s3dsConfig)) {
+            SharedS3DataStore s3ds = new SharedS3DataStore();
+            String cfgPath = s3dsConfig.value(options);
+            Properties props = loadAndTransformProps(cfgPath);
+            s3ds.setProperties(props);
+            s3ds.init(null);
+            delegate = s3ds;
+        } else {
+            delegate = new OakFileDataStore();
+            String cfgPath = fdsConfig.value(options);
+            Properties props = loadAndTransformProps(cfgPath);
+            populate(delegate, Maps.fromProperties(props), true);
+            delegate.init(null);
+        }
+        DataStoreBlobStore blobStore = new DataStoreBlobStore(delegate);
+        closer.register(Utils.asCloseable(blobStore));
+
+        return blobStore;
+    }
+
     static Closeable asCloseable(final FileStore fs) {
         return new Closeable() {
 
@@ -116,4 +171,30 @@ class Utils {
             }
         };
     }
+
+    static Closeable asCloseable(final DataStoreBlobStore blobStore) {
+        return new Closeable() {
+
+            @Override
+            public void close() throws IOException {
+                try {
+                    blobStore.close();
+                } catch (DataStoreException e) {
+                    throw new IOException(e);
+                }
+            }
+        };
+    }
+
+
+    private static Properties loadAndTransformProps(String cfgPath) throws 
IOException {
+        Dictionary dict = ConfigurationHandler.read(new 
FileInputStream(cfgPath));
+        Properties props = new Properties();
+        Enumeration keys = dict.keys();
+        while (keys.hasMoreElements()) {
+            String key = (String) keys.nextElement();
+            props.put(key, dict.get(key));
+        }
+        return props;
+    }
 }

Added: 
jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java?rev=1750887&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java
 (added)
+++ 
jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java
 Fri Jul  1 08:03:08 2016
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.run;
+
+import java.io.BufferedWriter;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileDescriptor;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import com.google.common.io.Files;
+import org.apache.commons.io.filefilter.FileFilterUtils;
+import org.apache.jackrabbit.oak.commons.FileIOUtils;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.OakFileDataStore;
+import org.apache.jackrabbit.oak.plugins.segment.SegmentBlob;
+import org.apache.jackrabbit.oak.plugins.segment.SegmentNodeStore;
+import org.apache.jackrabbit.oak.plugins.segment.file.FileStore;
+import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
+import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests for {@link DataStoreCheckCommand}
+ */
+public class DataStoreCheckTest {
+    private static final Logger log = 
LoggerFactory.getLogger(DataStoreCheckTest.class);
+
+    @Rule
+    public final TemporaryFolder temporaryFolder = new TemporaryFolder(new 
File("target"));
+
+    private String storePath;
+
+    private Set<String> blobsAdded;
+
+    private String cfgFilePath;
+
+    private String dsPath;
+
+    @Before
+    public void setup() throws Exception {
+        OakFileDataStore delegate = new OakFileDataStore();
+        dsPath = temporaryFolder.newFolder().getAbsolutePath();
+        delegate.setPath(dsPath);
+        delegate.init(null);
+        DataStoreBlobStore blobStore = new DataStoreBlobStore(delegate);
+
+        File storeFile = temporaryFolder.newFolder();
+        storePath = storeFile.getAbsolutePath();
+        FileStore.Builder builder = FileStore.builder(storeFile)
+            .withBlobStore(blobStore).withMaxFileSize(256)
+            .withCacheSize(64).withMemoryMapping(false);
+        FileStore fileStore = builder.build();
+        NodeStore store = SegmentNodeStore.builder(fileStore).build();
+
+        /* Create nodes with blobs stored in DS*/
+        NodeBuilder a = store.getRoot().builder();
+        int numBlobs = 10;
+        blobsAdded = Sets.newHashSet();
+        for (int i = 0; i < numBlobs; i++) {
+            SegmentBlob b = (SegmentBlob) store.createBlob(randomStream(i, 
18342));
+            Iterator<String> idIter = blobStore.resolveChunks(b.getBlobId());
+            while (idIter.hasNext()) {
+                String chunk = idIter.next();
+                blobsAdded.add(chunk);
+            }
+            a.child("c" + i).setProperty("x", b);
+        }
+
+        store.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        log.info("Created blobs : {}", blobsAdded);
+
+        File cfgFile = temporaryFolder.newFile();
+        BufferedWriter writer = Files.newWriter(cfgFile, Charsets.UTF_8);
+        FileIOUtils.writeAsLine(writer, "path=\"" + dsPath + "\"",false);
+        writer.close();
+        cfgFilePath = cfgFile.getAbsolutePath();
+
+        fileStore.close();
+        blobStore.close();
+    }
+
+    @After
+    public void tearDown() {
+        System.setErr(new PrintStream(new 
FileOutputStream(FileDescriptor.out)));
+    }
+
+    @Test
+    public void testCorrect() throws Exception {
+        File dump = temporaryFolder.newFolder();
+        testAllParams(dump);
+    }
+
+    @Test
+    public void testConsistency() throws Exception {
+        File dump = temporaryFolder.newFolder();
+
+        // Delete a random blob from datastore
+        OakFileDataStore delegate = new OakFileDataStore();
+        delegate.setPath(dsPath);
+        delegate.init(null);
+        DataStoreBlobStore blobStore = new DataStoreBlobStore(delegate);
+
+        Random rand = new Random();
+        String deletedBlobId = Iterables.get(blobsAdded, 
rand.nextInt(blobsAdded.size()));
+        blobsAdded.remove(deletedBlobId);
+        long count = 
blobStore.countDeleteChunks(ImmutableList.of(deletedBlobId), 0);
+        assertEquals(1, count);
+
+        testAllParams(dump);
+
+        assertFileEquals(dump, "[id]", blobsAdded);
+        assertFileEquals(dump, "[ref]", Sets.union(blobsAdded, 
Sets.newHashSet(deletedBlobId)));
+        assertFileEquals(dump, "[consistency]", 
Sets.newHashSet(deletedBlobId));
+    }
+
+    public void testAllParams(File dump) throws Exception {
+        DataStoreCheckCommand checkCommand = new DataStoreCheckCommand();
+        List<String> argsList = Lists
+            .newArrayList("--id", "--ref", "--consistency", "--fds", 
cfgFilePath, "--store", storePath,
+                "--dump", dump.getAbsolutePath());
+
+        checkCommand.execute(argsList.toArray(new String[0]));
+    }
+
+    @Test
+    public void testMissingOpParams() throws Exception {
+        File dump = temporaryFolder.newFolder();
+        List<String> argsList = Lists
+            .newArrayList("--fds", cfgFilePath, "--store", storePath,
+                "--dump", dump.getAbsolutePath());
+        testIncorrectParams(argsList, "Missing "
+            + "required option(s) ['id', 'ref', 'consistency']");
+    }
+
+    @Test
+    public void testTarNoDS() throws Exception {
+        File dump = temporaryFolder.newFolder();
+        List<String> argsList = Lists
+            .newArrayList("--id", "--ref", "--consistency", "--store", 
storePath,
+                "--dump", dump.getAbsolutePath());
+        testIncorrectParams(argsList, "Operation not defined for 
SegmentNodeStore without external datastore");
+
+    }
+
+    @Test
+    public void testOpNoStore() throws Exception {
+        File dump = temporaryFolder.newFolder();
+        List<String> argsList = Lists
+            .newArrayList("--consistency", "--fds", cfgFilePath,
+                "--dump", dump.getAbsolutePath());
+        testIncorrectParams(argsList, "Missing required option(s) ['store']");
+
+        argsList = Lists
+            .newArrayList("--ref", "--fds", cfgFilePath,
+                "--dump", dump.getAbsolutePath());
+        testIncorrectParams(argsList, "Missing required option(s) ['store']");
+    }
+
+    public static void testIncorrectParams(List<String> argList, String 
assertMsg) throws Exception {
+        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+        System.setErr(new PrintStream(buffer));
+
+        DataStoreCheckCommand checkCommand = new DataStoreCheckCommand();
+
+        checkCommand.execute(argList.toArray(new String[0]));
+        String message = buffer.toString(Charsets.UTF_8.toString());
+        Assert.assertTrue(message.contains(assertMsg));
+        System.setErr(new PrintStream(new 
FileOutputStream(FileDescriptor.out)));
+    }
+
+    private static void assertFileEquals(File dump, String prefix, Set<String> 
blobsAdded)
+        throws IOException {
+        File files[] =
+            FileFilterUtils.filter(FileFilterUtils.prefixFileFilter(prefix), 
dump.listFiles());
+        Assert.assertNotNull(files);
+        Assert.assertTrue(files.length == 1);
+        Assert.assertTrue(files[0].exists());
+        assertEquals(blobsAdded,
+            FileIOUtils.readStringsAsSet(new FileInputStream(files[0]), 
false));
+    }
+
+    static InputStream randomStream(int seed, int size) {
+        Random r = new Random(seed);
+        byte[] data = new byte[size];
+        r.nextBytes(data);
+        return new ByteArrayInputStream(data);
+    }
+}


Reply via email to