Author: amitj
Date: Wed Jul 20 03:56:47 2016
New Revision: 1753431

URL: http://svn.apache.org/viewvc?rev=1753431&view=rev
Log:
OAK-4200:  [BlobGC] Improve collection times of blobs available

* Renamed FileIOUtils.CloseableFileIterator to 
FileIOUtils.BurnOnCloseFileIterator
* Tests for BurnOnCloseFileIterator
* Enabled UTF_8 encoding for iterator on files in FileLineDifferenceIterator

Modified:
    
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java
    
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/BlobIdTracker.java

Modified: 
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java?rev=1753431&r1=1753430&r2=1753431&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java
 Wed Jul 20 03:56:47 2016
@@ -299,7 +299,8 @@ public final class FileIOUtils {
 
         public FileLineDifferenceIterator(File marked, File available,
             @Nullable Function<String, String> transformer) throws IOException 
{
-            this(FileUtils.lineIterator(marked), 
FileUtils.lineIterator(available), transformer);
+            this(FileUtils.lineIterator(marked, UTF_8.toString()),
+                FileUtils.lineIterator(available, UTF_8.toString()), 
transformer);
         }
 
         public FileLineDifferenceIterator(LineIterator marked, LineIterator 
available,
@@ -369,21 +370,27 @@ public final class FileIOUtils {
      * Also has a transformer to transform the output. If the underlying file 
is
      * provide then it deletes the file on {@link #close()}.
      *
+     * If there is a scope for lines in the file containing line break 
characters it should be
+     * ensured that the files is written with
+     * {@link #writeAsLine(BufferedWriter, String, boolean)} with true to 
escape line break
+     * characters and should be properly unescaped on read.
+     * A custom transformer can also be provided to unescape.
+     *
      * @param <T> the type of elements in the iterator
      */
-    public static class CloseableFileIterator<T> extends AbstractIterator<T> 
implements Closeable {
+    public static class BurnOnCloseFileIterator<T> extends AbstractIterator<T> 
implements Closeable {
         private final Logger log = LoggerFactory.getLogger(getClass());
 
         private final LineIterator iterator;
         private final Function<String, T> transformer;
         private File backingFile;
 
-        public CloseableFileIterator(LineIterator iterator, Function<String, 
T> transformer) {
+        public BurnOnCloseFileIterator(LineIterator iterator, Function<String, 
T> transformer) {
             this.iterator = iterator;
             this.transformer = transformer;
         }
 
-        public CloseableFileIterator(LineIterator iterator, File backingFile,
+        public BurnOnCloseFileIterator(LineIterator iterator, File backingFile,
             Function<String, T> transformer) {
             this.iterator = iterator;
             this.transformer = transformer;
@@ -412,16 +419,16 @@ public final class FileIOUtils {
             }
         }
 
-        public static CloseableFileIterator<String> wrap(LineIterator iter) {
-            return new CloseableFileIterator<String>(iter, new 
Function<String, String>() {
+        public static BurnOnCloseFileIterator<String> wrap(LineIterator iter) {
+            return new BurnOnCloseFileIterator<String>(iter, new 
Function<String, String>() {
                 public String apply(String s) {
                     return s;
                 }
             });
         }
 
-        public static CloseableFileIterator<String> wrap(LineIterator iter, 
File backingFile) {
-            return new CloseableFileIterator<String>(iter, backingFile,
+        public static BurnOnCloseFileIterator<String> wrap(LineIterator iter, 
File backingFile) {
+            return new BurnOnCloseFileIterator<String>(iter, backingFile,
                 new Function<String, String>() {
                     public String apply(String s) {
                         return s;

Modified: 
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java?rev=1753431&r1=1753430&r2=1753431&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java
 Wed Jul 20 03:56:47 2016
@@ -32,16 +32,23 @@ import java.nio.charset.CharsetEncoder;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Random;
 import java.util.Set;
 
-import com.google.common.base.Charsets;
+import javax.annotation.Nullable;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Sets;
+import org.apache.commons.io.FileUtils;
+import org.apache.jackrabbit.oak.commons.FileIOUtils.BurnOnCloseFileIterator;
 import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
+import static com.google.common.base.Charsets.UTF_8;
 import static com.google.common.collect.Lists.newArrayList;
 import static com.google.common.collect.Sets.newHashSet;
 import static com.google.common.collect.Sets.union;
@@ -73,10 +80,7 @@ public class FileIOUtilsTest {
     @Test
     public void writeReadStrings() throws Exception {
         Set<String> added = newHashSet("a", "z", "e", "b");
-        File f = folder.newFile();
-
-        int count = writeStrings(added.iterator(), f, false);
-        assertEquals(added.size(), count);
+        File f = assertWrite(added.iterator(), false, added.size());
 
         Set<String> retrieved = readStringsAsSet(new FileInputStream(f), 
false);
 
@@ -86,33 +90,29 @@ public class FileIOUtilsTest {
     @Test
     public void writeReadStringsWithLineBreaks() throws IOException {
         Set<String> added = newHashSet(getLineBreakStrings());
-        File f = folder.newFile();
-        int count = writeStrings(added.iterator(), f, true);
-        assertEquals(added.size(), count);
+        File f = assertWrite(added.iterator(), true, added.size());
 
         Set<String> retrieved = readStringsAsSet(new FileInputStream(f), true);
+
         assertEquals(added, retrieved);
     }
 
     @Test
     public void writeReadRandomStrings() throws Exception {
         Set<String> added = newHashSet();
-        File f = folder.newFile();
-
         for (int i = 0; i < 100; i++) {
             added.add(getRandomTestString());
         }
-        int count = writeStrings(added.iterator(), f, true);
-        assertEquals(added.size(), count);
+        File f = assertWrite(added.iterator(), true, added.size());
 
         Set<String> retrieved = readStringsAsSet(new FileInputStream(f), true);
+
         assertEquals(added, retrieved);
     }
 
     @Test
     public void compareWithLineBreaks() throws Exception {
         Comparator<String> cmp = lineBreakAwareComparator(lexComparator);
-
         List<String> strs = getLineBreakStrings();
         Collections.sort(strs);
 
@@ -126,12 +126,12 @@ public class FileIOUtilsTest {
     @Test
     public void sortTest() throws IOException {
         List<String> list = newArrayList("a", "z", "e", "b");
-        File f = folder.newFile();
-        writeStrings(list.iterator(), f, false);
+        File f = assertWrite(list.iterator(), false, list.size());
+
         sort(f);
 
         BufferedReader reader =
-            new BufferedReader(new InputStreamReader(new FileInputStream(f), 
Charsets.UTF_8));
+            new BufferedReader(new InputStreamReader(new FileInputStream(f), 
UTF_8));
         String line = null;
         List<String> retrieved = newArrayList();
         while ((line = reader.readLine()) != null) {
@@ -145,12 +145,12 @@ public class FileIOUtilsTest {
     @Test
     public void sortCustomComparatorTest() throws IOException {
         List<String> list = getLineBreakStrings();
-        File f = folder.newFile();
-        writeStrings(list.iterator(), f, true);
+        File f = assertWrite(list.iterator(), true, list.size());
+
         sort(f, lineBreakAwareComparator(lexComparator));
 
         BufferedReader reader =
-            new BufferedReader(new InputStreamReader(new FileInputStream(f), 
Charsets.UTF_8));
+            new BufferedReader(new InputStreamReader(new FileInputStream(f), 
UTF_8));
         String line = null;
         List<String> retrieved = newArrayList();
         while ((line = reader.readLine()) != null) {
@@ -173,16 +173,13 @@ public class FileIOUtilsTest {
     @Test
     public void appendTest() throws IOException {
         Set<String> added1 = newHashSet("a", "z", "e", "b");
-        File f1 = folder.newFile();
-        writeStrings(added1.iterator(), f1, false);
+        File f1 = assertWrite(added1.iterator(), false, added1.size());
 
         Set<String> added2 = newHashSet("2", "3", "5", "6");
-        File f2 = folder.newFile();
-        writeStrings(added2.iterator(), f2, false);
+        File f2 = assertWrite(added2.iterator(), false, added2.size());
 
         Set<String> added3 = newHashSet("t", "y", "8", "9");
-        File f3 = folder.newFile();
-        writeStrings(added3.iterator(), f3, false);
+        File f3 = assertWrite(added3.iterator(), false, added3.size());
 
         append(newArrayList(f2, f3), f1, true);
         assertEquals(union(union(added1, added2), added3),
@@ -195,18 +192,16 @@ public class FileIOUtilsTest {
     @Test
     public void appendTestNoDelete() throws IOException {
         Set<String> added1 = newHashSet("a", "z", "e", "b");
-        File f1 = folder.newFile();
-        writeStrings(added1.iterator(), f1, false);
+        File f1 = assertWrite(added1.iterator(), false, added1.size());
 
         Set<String> added2 = newHashSet("2", "3", "5", "6");
-        File f2 = folder.newFile();
-        writeStrings(added2.iterator(), f2, false);
+        File f2 = assertWrite(added2.iterator(), false, added2.size());
 
         Set<String> added3 = newHashSet("t", "y", "8", "9");
-        File f3 = folder.newFile();
-        writeStrings(added3.iterator(), f3, false);
+        File f3 = assertWrite(added3.iterator(), false, added3.size());
 
         append(newArrayList(f2, f3), f1, false);
+
         assertEquals(union(union(added1, added2), added3),
             readStringsAsSet(new FileInputStream(f1), false));
         assertTrue(f2.exists());
@@ -217,19 +212,16 @@ public class FileIOUtilsTest {
     @Test
     public void appendRandomizedTest() throws Exception {
         Set<String> added1 = newHashSet();
-        File f1 = folder.newFile();
-
         for (int i = 0; i < 100; i++) {
             added1.add(getRandomTestString());
         }
-        int count = writeStrings(added1.iterator(), f1, true);
-        assertEquals(added1.size(), count);
+        File f1 = assertWrite(added1.iterator(), true, added1.size());
 
         Set<String> added2 = newHashSet("2", "3", "5", "6");
-        File f2 = folder.newFile();
-        writeStrings(added2.iterator(), f2, true);
+        File f2 = assertWrite(added2.iterator(), true, added2.size());
 
         append(newArrayList(f2), f1, true);
+
         assertEquals(union(added1, added2),
             readStringsAsSet(new FileInputStream(f1), true));
     }
@@ -237,18 +229,70 @@ public class FileIOUtilsTest {
     @Test
     public void appendWithLineBreaksTest() throws IOException {
         Set<String> added1 = newHashSet(getLineBreakStrings());
-        File f1 = folder.newFile();
-        int count = writeStrings(added1.iterator(), f1, true);
-        assertEquals(added1.size(), count);
+        File f1 = assertWrite(added1.iterator(), true, added1.size());
 
         Set<String> added2 = newHashSet("2", "3", "5", "6");
-        File f2 = folder.newFile();
-        writeStrings(added2.iterator(), f2, true);
+        File f2 = assertWrite(added2.iterator(), true, added2.size());
 
         append(newArrayList(f1), f2, true);
+
         assertEquals(union(added1, added2), readStringsAsSet(new 
FileInputStream(f2), true));
     }
 
+    @Test
+    public void fileIteratorTest() throws Exception {
+        Set<String> added = newHashSet("a", "z", "e", "b");
+        File f = assertWrite(added.iterator(), false, added.size());
+
+        BurnOnCloseFileIterator iterator =
+            BurnOnCloseFileIterator.wrap(FileUtils.lineIterator(f));
+
+        assertEquals(added, Sets.newHashSet(iterator));
+        assertTrue(f.exists());
+    }
+
+    @Test
+    public void fileIteratorBurnTest() throws Exception {
+        Set<String> added = newHashSet("a", "z", "e", "b");
+        File f = assertWrite(added.iterator(), false, added.size());
+
+        BurnOnCloseFileIterator iterator =
+            BurnOnCloseFileIterator.wrap(FileUtils.lineIterator(f), f);
+
+        assertEquals(added, Sets.newHashSet(iterator));
+        assertTrue(!f.exists());
+    }
+
+    @Test
+    public void fileIteratorLineBreakTest() throws IOException {
+        Set<String> added = newHashSet(getLineBreakStrings());
+        File f = assertWrite(added.iterator(), true, added.size());
+
+        BurnOnCloseFileIterator iterator =
+            new BurnOnCloseFileIterator<String>(FileUtils.lineIterator(f),
+                new Function<String, String>() {
+                    @Nullable @Override public String apply(@Nullable String 
input) {
+                        return unescapeLineBreaks(input);
+                    }
+                });
+
+        assertEquals(added, Sets.newHashSet(iterator));
+    }
+
+    @Test
+    public void fileIteratorRandomizedTest() throws Exception {
+        Set<String> added = newHashSet();
+        for (int i = 0; i < 100; i++) {
+            added.add(getRandomTestString());
+        }
+        File f = assertWrite(added.iterator(), false, added.size());
+
+        BurnOnCloseFileIterator iterator =
+            BurnOnCloseFileIterator.wrap(FileUtils.lineIterator(f, 
UTF_8.toString()), f);
+
+        assertEquals(added, Sets.newHashSet(iterator));
+        assertTrue(!f.exists());
+    }
 
     private static List<String> getLineBreakStrings() {
         return newArrayList("ab\nc\r", "ab\\z", "a\\\\z\nc",
@@ -271,6 +315,14 @@ public class FileIOUtilsTest {
         return unescaped;
     }
 
+    private File assertWrite(Iterator<String> iterator, boolean escape, int 
size)
+        throws IOException {
+        File f = folder.newFile();
+        int count = writeStrings(iterator, f, escape);
+        assertEquals(size, count);
+        return f;
+    }
+
     private static String getRandomTestString() throws Exception {
         boolean valid = false;
         StringBuilder buffer = new StringBuilder();
@@ -280,7 +332,7 @@ public class FileIOUtilsTest {
                 buffer.append((char) (RANDOM.nextInt(Character.MAX_VALUE)));
             }
             String s = buffer.toString();
-            CharsetEncoder encoder = 
Charset.forName(Charsets.UTF_8.toString()).newEncoder();
+            CharsetEncoder encoder = 
Charset.forName(UTF_8.toString()).newEncoder();
             try {
                 encoder.encode(CharBuffer.wrap(s));
                 valid = true;

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/BlobIdTracker.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/BlobIdTracker.java?rev=1753431&r1=1753430&r2=1753431&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/BlobIdTracker.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/BlobIdTracker.java
 Wed Jul 20 03:56:47 2016
@@ -61,7 +61,7 @@ import static org.apache.commons.io.File
 import static org.apache.commons.io.FilenameUtils.concat;
 import static org.apache.commons.io.FilenameUtils.removeExtension;
 import static org.apache.commons.io.IOUtils.closeQuietly;
-import static 
org.apache.jackrabbit.oak.commons.FileIOUtils.CloseableFileIterator.wrap;
+import static 
org.apache.jackrabbit.oak.commons.FileIOUtils.BurnOnCloseFileIterator.wrap;
 import static org.apache.jackrabbit.oak.commons.FileIOUtils.append;
 import static org.apache.jackrabbit.oak.commons.FileIOUtils.copy;
 import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;


Reply via email to