Author: amitj Date: Tue Aug 2 05:10:56 2016 New Revision: 1754818 URL: http://svn.apache.org/viewvc?rev=1754818&view=rev Log: OAK-4454: Create consistent API in ExternalSort to write and read escaped line breaks Merging r1750886 from trunk
Added: jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java - copied unchanged from r1750886, jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java jackrabbit/oak/branches/1.2/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java - copied unchanged from r1750886, jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java Modified: jackrabbit/oak/branches/1.2/ (props changed) jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/SharedBlobStoreGCTest.java jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Main.java Propchange: jackrabbit/oak/branches/1.2/ ------------------------------------------------------------------------------ --- svn:mergeinfo (original) +++ svn:mergeinfo Tue Aug 2 05:10:56 2016 @@ -1,4 +1,4 @@ /jackrabbit/oak/branches/1.0:1665962 /jackrabbit/oak/branches/1.4:1745750,1747354,1750078,1750512 -/jackrabbit/oak/trunkjackrabbit/oak/trunkjackrabbit/trunk:1345480 Modified: jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java?rev=1754818&r1=1754817&r2=1754818&view=diff ============================================================================== --- jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java (original) +++ jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java Tue Aug 2 05:10:56 2016 @@ -31,9 +31,9 @@ import static com.google.common.base.Pre * to handle data which contains line break. If left unescaped * then such data interferes with the processing of such utilities */ -abstract class EscapeUtils { +public abstract class EscapeUtils { - static String escapeLineBreak(@Nullable String line) { + public static String escapeLineBreak(@Nullable String line) { if (line == null) { return null; } @@ -43,7 +43,7 @@ abstract class EscapeUtils { return line; } - static String unescapeLineBreaks(@Nullable String line) { + public static String unescapeLineBreaks(@Nullable String line) { if (line == null) { return null; } Modified: jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java?rev=1754818&r1=1754817&r2=1754818&view=diff ============================================================================== --- jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java (original) +++ jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java Tue Aug 2 05:10:56 2016 @@ -201,7 +201,7 @@ public class ExternalSort { // in bytes long currentblocksize = 0; while ((currentblocksize < blocksize) - && ((line = readLine(fbr)) != null)) { + && ((line = fbr.readLine()) != null)) { // as long as you have enough memory if (counter < numHeader) { counter++; @@ -296,7 +296,7 @@ public class ExternalSort { for (String r : tmplist) { // Skip duplicate lines if (!distinct || (lastLine == null || (lastLine != null && cmp.compare(r, lastLine) != 0))) { - writeLine(fbw, r); + fbw.write(r); fbw.newLine(); lastLine = r; } @@ -454,7 +454,7 @@ public class ExternalSort { String r = bfb.pop(); // Skip duplicate lines if (!distinct || (lastLine == null || (lastLine != null && cmp.compare(r, lastLine) != 0))) { - writeLine(fbw, r); + fbw.write(r); fbw.newLine(); lastLine = r; } @@ -628,15 +628,6 @@ public class ExternalSort { return r1.compareTo(r2); } }; - - public static String readLine(BufferedReader br) throws IOException { - return EscapeUtils.unescapeLineBreaks(br.readLine()); - } - - public static void writeLine(BufferedWriter wr, String line) throws IOException { - wr.write(EscapeUtils.escapeLineBreak(line)); - } - } class BinaryFileBuffer { @@ -656,7 +647,7 @@ class BinaryFileBuffer { private void reload() throws IOException { try { - if ((this.cache = ExternalSort.readLine(fbr)) == null) { + if ((this.cache = fbr.readLine()) == null) { this.empty = true; this.cache = null; } else { Modified: jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java?rev=1754818&r1=1754817&r2=1754818&view=diff ============================================================================== --- jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java (original) +++ jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java Tue Aug 2 05:10:56 2016 @@ -38,6 +38,7 @@ import com.google.common.io.Files; import org.apache.commons.io.FileUtils; import org.apache.commons.io.LineIterator; +import org.apache.jackrabbit.oak.commons.FileIOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -176,7 +177,7 @@ public class StringSort implements Itera public PersistentState(Comparator<String> comparator, File workDir) { this.workDir = workDir; - this.comparator = comparator; + this.comparator = FileIOUtils.lineBreakAwareComparator(comparator); } public BufferedWriter getWriter() throws FileNotFoundException { Modified: jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java?rev=1754818&r1=1754817&r2=1754818&view=diff ============================================================================== --- jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (original) +++ jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java Tue Aug 2 05:10:56 2016 @@ -39,6 +39,8 @@ import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import javax.annotation.Nullable; + import com.google.common.base.Charsets; import com.google.common.base.Function; import com.google.common.base.Joiner; @@ -56,16 +58,14 @@ import org.apache.commons.io.FileUtils; import org.apache.commons.io.LineIterator; import org.apache.jackrabbit.core.data.DataRecord; import org.apache.jackrabbit.core.data.DataStoreException; +import org.apache.jackrabbit.oak.commons.FileIOUtils; import org.apache.jackrabbit.oak.commons.IOUtils; -import org.apache.jackrabbit.oak.commons.sort.ExternalSort; import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils; import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils.SharedStoreRecordType; import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.annotation.Nullable; - import static com.google.common.collect.Lists.newArrayList; /** @@ -451,8 +451,7 @@ public class MarkSweepGarbageCollector i */ static void saveBatchToFile(List<String> ids, BufferedWriter writer) throws IOException { for (String id : ids) { - ExternalSort.writeLine(writer, id); - writer.append(NEWLINE); + FileIOUtils.writeAsLine(writer, id, true); } ids.clear(); writer.flush(); Modified: jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java?rev=1754818&r1=1754817&r2=1754818&view=diff ============================================================================== --- jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java (original) +++ jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java Tue Aug 2 05:10:56 2016 @@ -32,10 +32,12 @@ import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.base.StandardSystemProperty; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import org.apache.commons.io.LineIterator; import org.junit.Test; import static java.util.Arrays.asList; +import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.escapeLineBreak; import static org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector.FileLineDifferenceIterator; import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.assertThat; @@ -98,7 +100,51 @@ public class FileLineDifferenceIteratorT assertReverseDiff("a,0xb,d,e,f", "a,d", asList("0xb", "e", "f")); assertReverseDiff("a,0xb,d,e,f", "a,d,e,f,g", asList("0xb")); } - + + @Test + public void testDiffLineBreakChars() throws IOException { + List<String> all = getLineBreakStrings(); + List<String> marked = getLineBreakStrings(); + List<String> diff = remove(marked, 3, 2); + + // without escaping, the line breaks will be resolved + assertDiff(Joiner.on(",").join(marked), Joiner.on(",").join(all), + asList("/a", "c", "/a/b")); + } + + @Test + public void testDiffEscapedLineBreakChars() throws IOException { + // Escaped characters + List<String> all = escape(getLineBreakStrings()); + List<String> marked = escape(getLineBreakStrings()); + List<String> diff = remove(marked, 3, 2); + + assertDiff(Joiner.on(",").join(marked), Joiner.on(",").join(all), diff); + } + + private static List<String> getLineBreakStrings() { + return Lists.newArrayList("ab\nc\r", "ab\\z", "a\\\\z\nc", + "/a", "/a/b\nc", "/a/b\rd", "/a/b\r\ne", "/a/c"); + } + + private static List<String> remove(List<String> list, int idx, int count) { + List<String> diff = Lists.newArrayList(); + int i = 0; + while (i < count) { + diff.add(list.remove(idx)); + i++; + } + return diff; + } + + private static List<String> escape(List<String> list) { + List<String> escaped = Lists.newArrayList(); + for (String s : list) { + escaped.add(escapeLineBreak(s)); + } + return escaped; + } + private static void assertReverseDiff(String marked, String all, List<String> diff) throws IOException { Iterator<String> itr = createItr(all, marked); assertThat("marked: " + marked + " all: " + all, ImmutableList.copyOf(itr), is(diff)); Modified: jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java?rev=1754818&r1=1754817&r2=1754818&view=diff ============================================================================== --- jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java (original) +++ jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java Tue Aug 2 05:10:56 2016 @@ -145,20 +145,21 @@ public class MongoBlobGCTest extends Abs } private HashSet<String> addNodeSpecialChars() throws Exception { + List<String> specialCharSets = + Lists.newArrayList("q\\%22afdg\\%22", "a\nbcd", "a\n\rabcd", "012\\efg" ); + DocumentNodeStore ds = mk.getNodeStore(); HashSet<String> set = new HashSet<String>(); - DocumentNodeStore s = mk.getNodeStore(); - NodeBuilder a = s.getRoot().builder(); - int number = 1; - for (int i = 0; i < number; i++) { - Blob b = s.createBlob(randomStream(i, 18432)); + NodeBuilder a = ds.getRoot().builder(); + for (int i = 0; i < specialCharSets.size(); i++) { + Blob b = ds.createBlob(randomStream(i, 18432)); NodeBuilder n = a.child("cspecial"); - n.child("q\\%22afdg\\%22").setProperty("x", b); + n.child(specialCharSets.get(i)).setProperty("x", b); Iterator<String> idIter = - ((GarbageCollectableBlobStore) s.getBlobStore()) + ((GarbageCollectableBlobStore) ds.getBlobStore()) .resolveChunks(b.toString()); set.addAll(Lists.newArrayList(idIter)); } - s.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY); + ds.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY); return set; } Modified: jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/SharedBlobStoreGCTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/SharedBlobStoreGCTest.java?rev=1754818&r1=1754817&r2=1754818&view=diff ============================================================================== --- jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/SharedBlobStoreGCTest.java (original) +++ jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/SharedBlobStoreGCTest.java Tue Aug 2 05:10:56 2016 @@ -301,13 +301,14 @@ public class SharedBlobStoreGCTest { } private HashSet<String> addNodeSpecialChars() throws Exception { + List<String> specialCharSets = + Lists.newArrayList("q\\%22afdg\\%22", "a\nbcd", "a\n\rabcd", "012\\efg" ); HashSet<String> set = new HashSet<String>(); NodeBuilder a = ds.getRoot().builder(); - int number = 1; - for (int i = 0; i < number; i++) { + for (int i = 0; i < specialCharSets.size(); i++) { Blob b = ds.createBlob(randomStream(i, 18432)); NodeBuilder n = a.child("cspecial"); - n.child("q\\%22afdg\\%22").setProperty("x", b); + n.child(specialCharSets.get(i)).setProperty("x", b); Iterator<String> idIter = ((GarbageCollectableBlobStore) ds.getBlobStore()) .resolveChunks(b.toString()); Modified: jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Main.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Main.java?rev=1754818&r1=1754817&r2=1754818&view=diff ============================================================================== --- jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Main.java (original) +++ jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Main.java Tue Aug 2 05:10:56 2016 @@ -82,9 +82,11 @@ import org.apache.jackrabbit.oak.api.Con import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.benchmark.BenchmarkRunner; import org.apache.jackrabbit.oak.checkpoint.Checkpoints; +import org.apache.jackrabbit.oak.commons.FileIOUtils; import org.apache.jackrabbit.oak.commons.IOUtils; import org.apache.jackrabbit.oak.commons.PathUtils; import org.apache.jackrabbit.oak.commons.json.JsopBuilder; +import org.apache.jackrabbit.oak.commons.sort.EscapeUtils; import org.apache.jackrabbit.oak.commons.sort.ExternalSort; import org.apache.jackrabbit.oak.console.Console; import org.apache.jackrabbit.oak.explorer.Explorer; @@ -1227,7 +1229,7 @@ public final class Main { count.getAndIncrement(); if (idBatch.size() >= 1024) { for (String rec : idBatch) { - ExternalSort.writeLine(writer, rec); + FileIOUtils.writeAsLine(writer, rec, true); writer.append(StandardSystemProperty.LINE_SEPARATOR.value()); writer.flush(); } @@ -1242,7 +1244,7 @@ public final class Main { ); if (!idBatch.isEmpty()) { for (String rec : idBatch) { - ExternalSort.writeLine(writer, rec); + FileIOUtils.writeAsLine(writer, rec, true); writer.append(StandardSystemProperty.LINE_SEPARATOR.value()); writer.flush(); }