Author: amitj
Date: Wed Jul 20 03:56:47 2016
New Revision: 1753431
URL: http://svn.apache.org/viewvc?rev=1753431&view=rev
Log:
OAK-4200: [BlobGC] Improve collection times of blobs available
* Renamed FileIOUtils.CloseableFileIterator to
FileIOUtils.BurnOnCloseFileIterator
* Tests for BurnOnCloseFileIterator
* Enabled UTF_8 encoding for iterator on files in FileLineDifferenceIterator
Modified:
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/BlobIdTracker.java
Modified:
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java?rev=1753431&r1=1753430&r2=1753431&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java
(original)
+++
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java
Wed Jul 20 03:56:47 2016
@@ -299,7 +299,8 @@ public final class FileIOUtils {
public FileLineDifferenceIterator(File marked, File available,
@Nullable Function<String, String> transformer) throws IOException
{
- this(FileUtils.lineIterator(marked),
FileUtils.lineIterator(available), transformer);
+ this(FileUtils.lineIterator(marked, UTF_8.toString()),
+ FileUtils.lineIterator(available, UTF_8.toString()),
transformer);
}
public FileLineDifferenceIterator(LineIterator marked, LineIterator
available,
@@ -369,21 +370,27 @@ public final class FileIOUtils {
* Also has a transformer to transform the output. If the underlying file
is
* provide then it deletes the file on {@link #close()}.
*
+ * If there is a scope for lines in the file containing line break
characters it should be
+ * ensured that the files is written with
+ * {@link #writeAsLine(BufferedWriter, String, boolean)} with true to
escape line break
+ * characters and should be properly unescaped on read.
+ * A custom transformer can also be provided to unescape.
+ *
* @param <T> the type of elements in the iterator
*/
- public static class CloseableFileIterator<T> extends AbstractIterator<T>
implements Closeable {
+ public static class BurnOnCloseFileIterator<T> extends AbstractIterator<T>
implements Closeable {
private final Logger log = LoggerFactory.getLogger(getClass());
private final LineIterator iterator;
private final Function<String, T> transformer;
private File backingFile;
- public CloseableFileIterator(LineIterator iterator, Function<String,
T> transformer) {
+ public BurnOnCloseFileIterator(LineIterator iterator, Function<String,
T> transformer) {
this.iterator = iterator;
this.transformer = transformer;
}
- public CloseableFileIterator(LineIterator iterator, File backingFile,
+ public BurnOnCloseFileIterator(LineIterator iterator, File backingFile,
Function<String, T> transformer) {
this.iterator = iterator;
this.transformer = transformer;
@@ -412,16 +419,16 @@ public final class FileIOUtils {
}
}
- public static CloseableFileIterator<String> wrap(LineIterator iter) {
- return new CloseableFileIterator<String>(iter, new
Function<String, String>() {
+ public static BurnOnCloseFileIterator<String> wrap(LineIterator iter) {
+ return new BurnOnCloseFileIterator<String>(iter, new
Function<String, String>() {
public String apply(String s) {
return s;
}
});
}
- public static CloseableFileIterator<String> wrap(LineIterator iter,
File backingFile) {
- return new CloseableFileIterator<String>(iter, backingFile,
+ public static BurnOnCloseFileIterator<String> wrap(LineIterator iter,
File backingFile) {
+ return new BurnOnCloseFileIterator<String>(iter, backingFile,
new Function<String, String>() {
public String apply(String s) {
return s;
Modified:
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java?rev=1753431&r1=1753430&r2=1753431&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java
(original)
+++
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java
Wed Jul 20 03:56:47 2016
@@ -32,16 +32,23 @@ import java.nio.charset.CharsetEncoder;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
+import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.Set;
-import com.google.common.base.Charsets;
+import javax.annotation.Nullable;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Sets;
+import org.apache.commons.io.FileUtils;
+import org.apache.jackrabbit.oak.commons.FileIOUtils.BurnOnCloseFileIterator;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
+import static com.google.common.base.Charsets.UTF_8;
import static com.google.common.collect.Lists.newArrayList;
import static com.google.common.collect.Sets.newHashSet;
import static com.google.common.collect.Sets.union;
@@ -73,10 +80,7 @@ public class FileIOUtilsTest {
@Test
public void writeReadStrings() throws Exception {
Set<String> added = newHashSet("a", "z", "e", "b");
- File f = folder.newFile();
-
- int count = writeStrings(added.iterator(), f, false);
- assertEquals(added.size(), count);
+ File f = assertWrite(added.iterator(), false, added.size());
Set<String> retrieved = readStringsAsSet(new FileInputStream(f),
false);
@@ -86,33 +90,29 @@ public class FileIOUtilsTest {
@Test
public void writeReadStringsWithLineBreaks() throws IOException {
Set<String> added = newHashSet(getLineBreakStrings());
- File f = folder.newFile();
- int count = writeStrings(added.iterator(), f, true);
- assertEquals(added.size(), count);
+ File f = assertWrite(added.iterator(), true, added.size());
Set<String> retrieved = readStringsAsSet(new FileInputStream(f), true);
+
assertEquals(added, retrieved);
}
@Test
public void writeReadRandomStrings() throws Exception {
Set<String> added = newHashSet();
- File f = folder.newFile();
-
for (int i = 0; i < 100; i++) {
added.add(getRandomTestString());
}
- int count = writeStrings(added.iterator(), f, true);
- assertEquals(added.size(), count);
+ File f = assertWrite(added.iterator(), true, added.size());
Set<String> retrieved = readStringsAsSet(new FileInputStream(f), true);
+
assertEquals(added, retrieved);
}
@Test
public void compareWithLineBreaks() throws Exception {
Comparator<String> cmp = lineBreakAwareComparator(lexComparator);
-
List<String> strs = getLineBreakStrings();
Collections.sort(strs);
@@ -126,12 +126,12 @@ public class FileIOUtilsTest {
@Test
public void sortTest() throws IOException {
List<String> list = newArrayList("a", "z", "e", "b");
- File f = folder.newFile();
- writeStrings(list.iterator(), f, false);
+ File f = assertWrite(list.iterator(), false, list.size());
+
sort(f);
BufferedReader reader =
- new BufferedReader(new InputStreamReader(new FileInputStream(f),
Charsets.UTF_8));
+ new BufferedReader(new InputStreamReader(new FileInputStream(f),
UTF_8));
String line = null;
List<String> retrieved = newArrayList();
while ((line = reader.readLine()) != null) {
@@ -145,12 +145,12 @@ public class FileIOUtilsTest {
@Test
public void sortCustomComparatorTest() throws IOException {
List<String> list = getLineBreakStrings();
- File f = folder.newFile();
- writeStrings(list.iterator(), f, true);
+ File f = assertWrite(list.iterator(), true, list.size());
+
sort(f, lineBreakAwareComparator(lexComparator));
BufferedReader reader =
- new BufferedReader(new InputStreamReader(new FileInputStream(f),
Charsets.UTF_8));
+ new BufferedReader(new InputStreamReader(new FileInputStream(f),
UTF_8));
String line = null;
List<String> retrieved = newArrayList();
while ((line = reader.readLine()) != null) {
@@ -173,16 +173,13 @@ public class FileIOUtilsTest {
@Test
public void appendTest() throws IOException {
Set<String> added1 = newHashSet("a", "z", "e", "b");
- File f1 = folder.newFile();
- writeStrings(added1.iterator(), f1, false);
+ File f1 = assertWrite(added1.iterator(), false, added1.size());
Set<String> added2 = newHashSet("2", "3", "5", "6");
- File f2 = folder.newFile();
- writeStrings(added2.iterator(), f2, false);
+ File f2 = assertWrite(added2.iterator(), false, added2.size());
Set<String> added3 = newHashSet("t", "y", "8", "9");
- File f3 = folder.newFile();
- writeStrings(added3.iterator(), f3, false);
+ File f3 = assertWrite(added3.iterator(), false, added3.size());
append(newArrayList(f2, f3), f1, true);
assertEquals(union(union(added1, added2), added3),
@@ -195,18 +192,16 @@ public class FileIOUtilsTest {
@Test
public void appendTestNoDelete() throws IOException {
Set<String> added1 = newHashSet("a", "z", "e", "b");
- File f1 = folder.newFile();
- writeStrings(added1.iterator(), f1, false);
+ File f1 = assertWrite(added1.iterator(), false, added1.size());
Set<String> added2 = newHashSet("2", "3", "5", "6");
- File f2 = folder.newFile();
- writeStrings(added2.iterator(), f2, false);
+ File f2 = assertWrite(added2.iterator(), false, added2.size());
Set<String> added3 = newHashSet("t", "y", "8", "9");
- File f3 = folder.newFile();
- writeStrings(added3.iterator(), f3, false);
+ File f3 = assertWrite(added3.iterator(), false, added3.size());
append(newArrayList(f2, f3), f1, false);
+
assertEquals(union(union(added1, added2), added3),
readStringsAsSet(new FileInputStream(f1), false));
assertTrue(f2.exists());
@@ -217,19 +212,16 @@ public class FileIOUtilsTest {
@Test
public void appendRandomizedTest() throws Exception {
Set<String> added1 = newHashSet();
- File f1 = folder.newFile();
-
for (int i = 0; i < 100; i++) {
added1.add(getRandomTestString());
}
- int count = writeStrings(added1.iterator(), f1, true);
- assertEquals(added1.size(), count);
+ File f1 = assertWrite(added1.iterator(), true, added1.size());
Set<String> added2 = newHashSet("2", "3", "5", "6");
- File f2 = folder.newFile();
- writeStrings(added2.iterator(), f2, true);
+ File f2 = assertWrite(added2.iterator(), true, added2.size());
append(newArrayList(f2), f1, true);
+
assertEquals(union(added1, added2),
readStringsAsSet(new FileInputStream(f1), true));
}
@@ -237,18 +229,70 @@ public class FileIOUtilsTest {
@Test
public void appendWithLineBreaksTest() throws IOException {
Set<String> added1 = newHashSet(getLineBreakStrings());
- File f1 = folder.newFile();
- int count = writeStrings(added1.iterator(), f1, true);
- assertEquals(added1.size(), count);
+ File f1 = assertWrite(added1.iterator(), true, added1.size());
Set<String> added2 = newHashSet("2", "3", "5", "6");
- File f2 = folder.newFile();
- writeStrings(added2.iterator(), f2, true);
+ File f2 = assertWrite(added2.iterator(), true, added2.size());
append(newArrayList(f1), f2, true);
+
assertEquals(union(added1, added2), readStringsAsSet(new
FileInputStream(f2), true));
}
+ @Test
+ public void fileIteratorTest() throws Exception {
+ Set<String> added = newHashSet("a", "z", "e", "b");
+ File f = assertWrite(added.iterator(), false, added.size());
+
+ BurnOnCloseFileIterator iterator =
+ BurnOnCloseFileIterator.wrap(FileUtils.lineIterator(f));
+
+ assertEquals(added, Sets.newHashSet(iterator));
+ assertTrue(f.exists());
+ }
+
+ @Test
+ public void fileIteratorBurnTest() throws Exception {
+ Set<String> added = newHashSet("a", "z", "e", "b");
+ File f = assertWrite(added.iterator(), false, added.size());
+
+ BurnOnCloseFileIterator iterator =
+ BurnOnCloseFileIterator.wrap(FileUtils.lineIterator(f), f);
+
+ assertEquals(added, Sets.newHashSet(iterator));
+ assertTrue(!f.exists());
+ }
+
+ @Test
+ public void fileIteratorLineBreakTest() throws IOException {
+ Set<String> added = newHashSet(getLineBreakStrings());
+ File f = assertWrite(added.iterator(), true, added.size());
+
+ BurnOnCloseFileIterator iterator =
+ new BurnOnCloseFileIterator<String>(FileUtils.lineIterator(f),
+ new Function<String, String>() {
+ @Nullable @Override public String apply(@Nullable String
input) {
+ return unescapeLineBreaks(input);
+ }
+ });
+
+ assertEquals(added, Sets.newHashSet(iterator));
+ }
+
+ @Test
+ public void fileIteratorRandomizedTest() throws Exception {
+ Set<String> added = newHashSet();
+ for (int i = 0; i < 100; i++) {
+ added.add(getRandomTestString());
+ }
+ File f = assertWrite(added.iterator(), false, added.size());
+
+ BurnOnCloseFileIterator iterator =
+ BurnOnCloseFileIterator.wrap(FileUtils.lineIterator(f,
UTF_8.toString()), f);
+
+ assertEquals(added, Sets.newHashSet(iterator));
+ assertTrue(!f.exists());
+ }
private static List<String> getLineBreakStrings() {
return newArrayList("ab\nc\r", "ab\\z", "a\\\\z\nc",
@@ -271,6 +315,14 @@ public class FileIOUtilsTest {
return unescaped;
}
+ private File assertWrite(Iterator<String> iterator, boolean escape, int
size)
+ throws IOException {
+ File f = folder.newFile();
+ int count = writeStrings(iterator, f, escape);
+ assertEquals(size, count);
+ return f;
+ }
+
private static String getRandomTestString() throws Exception {
boolean valid = false;
StringBuilder buffer = new StringBuilder();
@@ -280,7 +332,7 @@ public class FileIOUtilsTest {
buffer.append((char) (RANDOM.nextInt(Character.MAX_VALUE)));
}
String s = buffer.toString();
- CharsetEncoder encoder =
Charset.forName(Charsets.UTF_8.toString()).newEncoder();
+ CharsetEncoder encoder =
Charset.forName(UTF_8.toString()).newEncoder();
try {
encoder.encode(CharBuffer.wrap(s));
valid = true;
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/BlobIdTracker.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/BlobIdTracker.java?rev=1753431&r1=1753430&r2=1753431&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/BlobIdTracker.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/BlobIdTracker.java
Wed Jul 20 03:56:47 2016
@@ -61,7 +61,7 @@ import static org.apache.commons.io.File
import static org.apache.commons.io.FilenameUtils.concat;
import static org.apache.commons.io.FilenameUtils.removeExtension;
import static org.apache.commons.io.IOUtils.closeQuietly;
-import static
org.apache.jackrabbit.oak.commons.FileIOUtils.CloseableFileIterator.wrap;
+import static
org.apache.jackrabbit.oak.commons.FileIOUtils.BurnOnCloseFileIterator.wrap;
import static org.apache.jackrabbit.oak.commons.FileIOUtils.append;
import static org.apache.jackrabbit.oak.commons.FileIOUtils.copy;
import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;