Author: thomasm
Date: Wed Mar 12 10:22:24 2014
New Revision: 1576678

URL: http://svn.apache.org/r1576678
Log:
OAK-377 Data store garbage collection (test cases for external sort)

Added:
    
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/
    
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java
    jackrabbit/oak/trunk/oak-commons/src/test/resources/
    jackrabbit/oak/trunk/oak-commons/src/test/resources/org/
    jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/
    jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/
    
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/
    
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/
    
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/
    
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-1.csv
    
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-1.txt
    
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.txt

Added: 
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java?rev=1576678&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java
 (added)
+++ 
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java
 Wed Mar 12 10:22:24 2014
@@ -0,0 +1,407 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.commons.sort;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.nio.channels.FileChannel;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Scanner;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Unit test for simple App.
+ * 
+ * Source copied from a publicly available library.
+ * 
+ * @see <a
+ *      
href="https://code.google.com/p/externalsortinginjava/";>https://code.google.com/p/externalsortinginjava</a>
+ * 
+ *      Goal: offer a generic external-memory sorting program in Java.
+ * 
+ *      It must be : - hackable (easy to adapt) - scalable to large files -
+ *      sensibly efficient.
+ * 
+ *      This software is in the public domain.
+ * 
+ *      Usage: java org/apache/oak/commons/sort//ExternalSort somefile.txt
+ *      out.txt
+ * 
+ *      You can change the default maximal number of temporary files with the 
-t
+ *      flag: java org/apache/oak/commons/sort/ExternalSort somefile.txt 
out.txt
+ *      -t 3
+ * 
+ *      You can change the default maximum memory available with the -m flag:
+ *      java org/apache/oak/commons/sort/ExternalSort somefile.txt out.txt -m
+ *      8192
+ * 
+ *      For very large files, you might want to use an appropriate flag to
+ *      allocate more memory to the Java VM: java -Xms2G
+ *      org/apache/oak/commons/sort/ExternalSort somefile.txt out.txt
+ * 
+ *      By (in alphabetical order) Philippe Beaudoin, Eleftherios Chetzakis, 
Jon
+ *      Elsas, Christan Grant, Daniel Haran, Daniel Lemire, Sugumaran
+ *      Harikrishnan, Jerry Yang, First published: April 2010 originally posted
+ *      at
+ *      
http://lemire.me/blog/archives/2010/04/01/external-memory-sorting-in-java
+ */
+public class ExternalSortTest {
+    private static final String TEST_FILE1_TXT = "test-file-1.txt";
+    private static final String TEST_FILE2_TXT = "test-file-2.txt";
+    private static final String TEST_FILE1_CSV = "test-file-1.csv";
+    private static final String[] EXPECTED_SORT_RESULTS = { "a", "b", "b", "e",
+            "f", "i", "m", "o", "u", "u", "x", "y", "z" };
+    private static final String[] EXPECTED_MERGE_RESULTS = { "a", "a", "b",
+            "c", "c", "d", "e", "e", "f", "g", "g", "h", "i", "j", "k" };
+    private static final String[] EXPECTED_MERGE_DISTINCT_RESULTS = { "a", "b",
+            "c", "d", "e", "f", "g", "h", "i", "j", "k" };
+    private static final String[] EXPECTED_HEADER_RESULTS = { "HEADER, HEADER",
+            "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k" };
+    private static final String[] EXPECTED_DISTINCT_RESULTS = { "a", "b", "e",
+            "f", "i", "m", "o", "u", "x", "y", "z" };
+    private static final String[] SAMPLE = { "f", "m", "b", "e", "i", "o", "u",
+            "x", "a", "y", "z", "b", "u" };
+
+    private File file1;
+    private File file2;
+    private File csvFile;
+    private List<File> fileList;
+
+    /**
+     * @throws Exception
+     */
+    @Before
+    public void setUp() throws Exception {
+        this.fileList = new ArrayList<File>(3);
+        this.file1 = new File(this.getClass().
+                getResource(TEST_FILE1_TXT).toURI());
+        this.file2 = new File(this.getClass().
+                getResource(TEST_FILE2_TXT).toURI());
+        this.csvFile = new File(this.getClass().
+                getResource(TEST_FILE1_CSV).toURI());
+
+        File tmpFile1 = new File(this.file1.getPath().toString() + ".tmp");
+        File tmpFile2 = new File(this.file2.getPath().toString() + ".tmp");
+
+        copyFile(this.file1, tmpFile1);
+        copyFile(this.file2, tmpFile2);
+
+        this.fileList.add(tmpFile1);
+        this.fileList.add(tmpFile2);
+    }
+
+    /**
+     * @throws Exception
+     */
+    @After
+    public void tearDown() throws Exception {
+        this.file1 = null;
+        this.file2 = null;
+        this.csvFile = null;
+        for (File f : this.fileList) {
+            f.delete();
+        }
+        this.fileList.clear();
+        this.fileList = null;
+    }
+
+    private static void copyFile(File sourceFile, File destFile)
+            throws IOException {
+        if (!destFile.exists()) {
+            destFile.createNewFile();
+        }
+
+        FileChannel source = null;
+        FileChannel destination = null;
+
+        try {
+            source = new FileInputStream(sourceFile).getChannel();
+            destination = new FileOutputStream(destFile).getChannel();
+            destination.transferFrom(source, 0, source.size());
+        } finally {
+            if (source != null) {
+                source.close();
+            }
+            if (destination != null) {
+                destination.close();
+            }
+        }
+    }
+
+    @Test
+    public void testEmptyFiles() throws Exception {
+        File f1 = File.createTempFile("tmp", "unit");
+        File f2 = File.createTempFile("tmp", "unit");
+        ExternalSort.mergeSortedFiles(ExternalSort.sortInBatch(f1), f2);
+        if (f2.length() != 0) {
+            throw new RuntimeException("empty files should end up emtpy");
+        }
+    }
+
+    @Test
+    public void testMergeSortedFiles() throws Exception {
+        String line;
+        List<String> result;
+        BufferedReader bf;
+        Comparator<String> cmp = new Comparator<String>() {
+            @Override
+            public int compare(String o1, String o2) {
+                return o1.compareTo(o2);
+            }
+        };
+        File out = File.createTempFile("test_results", ".tmp", null);
+        ExternalSort.mergeSortedFiles(this.fileList, out, cmp,
+                Charset.defaultCharset(), false);
+
+        bf = new BufferedReader(new FileReader(out));
+
+        result = new ArrayList<String>();
+        while ((line = bf.readLine()) != null) {
+            result.add(line);
+        }
+        bf.close();
+        assertArrayEquals(Arrays.toString(result.toArray()),
+                EXPECTED_MERGE_RESULTS, result.toArray());
+    }
+
+    @Test
+    public void testMergeSortedFilesDistinct() throws Exception {
+        String line;
+        List<String> result;
+        BufferedReader bf;
+        Comparator<String> cmp = new Comparator<String>() {
+            @Override
+            public int compare(String o1, String o2) {
+                return o1.compareTo(o2);
+            }
+        };
+        File out = File.createTempFile("test_results", ".tmp", null);
+        ExternalSort.mergeSortedFiles(this.fileList, out, cmp,
+                Charset.defaultCharset(), true);
+
+        bf = new BufferedReader(new FileReader(out));
+
+        result = new ArrayList<String>();
+        while ((line = bf.readLine()) != null) {
+            result.add(line);
+        }
+        bf.close();
+        assertArrayEquals(Arrays.toString(result.toArray()),
+                EXPECTED_MERGE_DISTINCT_RESULTS, result.toArray());
+    }
+
+    @Test
+    public void testMergeSortedFilesAppend() throws Exception {
+        String line;
+        List<String> result;
+        BufferedReader bf;
+        Comparator<String> cmp = new Comparator<String>() {
+            @Override
+            public int compare(String o1, String o2) {
+                return o1.compareTo(o2);
+            }
+        };
+
+        File out = File.createTempFile("test_results", ".tmp", null);
+        writeStringToFile(out, "HEADER, HEADER\n");
+
+        ExternalSort.mergeSortedFiles(this.fileList, out, cmp,
+                Charset.defaultCharset(), true, true, false);
+
+        bf = new BufferedReader(new FileReader(out));
+
+        result = new ArrayList<String>();
+        while ((line = bf.readLine()) != null) {
+            result.add(line);
+        }
+        bf.close();
+        assertArrayEquals(Arrays.toString(result.toArray()),
+                EXPECTED_HEADER_RESULTS, result.toArray());
+    }
+
+    @Test
+    public void testSortAndSave() throws Exception {
+        File f;
+        String line;
+        List<String> result;
+        BufferedReader bf;
+
+        List<String> sample = Arrays.asList(SAMPLE);
+        Comparator<String> cmp = new Comparator<String>() {
+            @Override
+            public int compare(String o1, String o2) {
+                return o1.compareTo(o2);
+            }
+        };
+        f = ExternalSort.sortAndSave(sample, cmp, Charset.defaultCharset(),
+                null, false, false);
+        assertNotNull(f);
+        assertTrue(f.exists());
+        assertTrue(f.length() > 0);
+        bf = new BufferedReader(new FileReader(f));
+
+        result = new ArrayList<String>();
+        while ((line = bf.readLine()) != null) {
+            result.add(line);
+        }
+        bf.close();
+        assertArrayEquals(Arrays.toString(result.toArray()),
+                EXPECTED_SORT_RESULTS, result.toArray());
+    }
+
+    @Test
+    public void testSortAndSaveDistinct() throws Exception {
+        File f;
+        String line;
+        List<String> result;
+        BufferedReader bf;
+        List<String> sample = Arrays.asList(SAMPLE);
+        Comparator<String> cmp = new Comparator<String>() {
+            @Override
+            public int compare(String o1, String o2) {
+                return o1.compareTo(o2);
+            }
+        };
+
+        f = ExternalSort.sortAndSave(sample, cmp, Charset.defaultCharset(),
+                null, true, false);
+        assertNotNull(f);
+        assertTrue(f.exists());
+        assertTrue(f.length() > 0);
+        bf = new BufferedReader(new FileReader(f));
+
+        result = new ArrayList<String>();
+        while ((line = bf.readLine()) != null) {
+            result.add(line);
+        }
+        bf.close();
+        assertArrayEquals(Arrays.toString(result.toArray()),
+                EXPECTED_DISTINCT_RESULTS, result.toArray());
+    }
+
+    @Test
+    public void testSortInBatch() throws Exception {
+        Comparator<String> cmp = new Comparator<String>() {
+            @Override
+            public int compare(String o1, String o2) {
+                return o1.compareTo(o2);
+            }
+        };
+
+        List<File> listOfFiles = ExternalSort.sortInBatch(this.csvFile, cmp,
+                ExternalSort.DEFAULTMAXTEMPFILES, 
ExternalSort.DEFAULT_MAX_MEM_BYTES, 
+                Charset.defaultCharset(),
+                null, false, 1, false);
+        assertEquals(1, listOfFiles.size());
+
+        ArrayList<String> result = readLines(listOfFiles.get(0));
+        assertArrayEquals(Arrays.toString(result.toArray()),
+                EXPECTED_MERGE_DISTINCT_RESULTS, result.toArray());
+    }
+
+    /**
+     * Sample case to sort csv file.
+     * 
+     * @throws Exception
+     * 
+     */
+    @Test
+    public void testCSVSorting() throws Exception {
+        testCSVSortingWithParams(false);
+        testCSVSortingWithParams(true);
+    }
+
+    /**
+     * Sample case to sort csv file.
+     * 
+     * @param usegzip use compression for temporary files
+     * @throws Exception
+     * 
+     */
+    public void testCSVSortingWithParams(boolean usegzip) throws Exception {
+
+        File out = File.createTempFile("test_results", ".tmp", null);
+
+        Comparator<String> cmp = new Comparator<String>() {
+            @Override
+            public int compare(String o1, String o2) {
+                return o1.compareTo(o2);
+            }
+        };
+
+        // read header
+        FileReader fr = new FileReader(this.csvFile);
+        Scanner scan = new Scanner(fr);
+        String head = scan.nextLine();
+
+        // write to the file
+        writeStringToFile(out, head + "\n");
+
+        // omit the first line, which is the header..
+        List<File> listOfFiles = ExternalSort.sortInBatch(this.csvFile, cmp,
+                ExternalSort.DEFAULTMAXTEMPFILES, 
+                ExternalSort.DEFAULT_MAX_MEM_BYTES,
+                Charset.defaultCharset(),
+                null, false, 1, usegzip);
+
+        // now merge with append
+        ExternalSort.mergeSortedFiles(listOfFiles, out, cmp,
+                Charset.defaultCharset(), false, true, usegzip);
+
+        ArrayList<String> result = readLines(out);
+
+        assertEquals(12, result.size());
+        assertArrayEquals(Arrays.toString(result.toArray()),
+                EXPECTED_HEADER_RESULTS, result.toArray());
+
+    }
+
+    public static ArrayList<String> readLines(File f) throws IOException {
+        BufferedReader r = new BufferedReader(new FileReader(f));
+        ArrayList<String> answer = new ArrayList<String>();
+        String line;
+        while ((line = r.readLine()) != null) {
+            answer.add(line);
+        }
+        return answer;
+    }
+
+    public static void writeStringToFile(File f, String s) throws IOException {
+        FileOutputStream out = new FileOutputStream(f);
+        try {
+            out.write(s.getBytes());
+        } finally {
+            out.close();
+        }
+    }
+
+}
\ No newline at end of file

Added: 
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-1.csv
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-1.csv?rev=1576678&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-1.csv
 (added)
+++ 
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-1.csv
 Wed Mar 12 10:22:24 2014
@@ -0,0 +1,12 @@
+HEADER, HEADER
+a
+b
+k
+c
+d
+i
+j
+e
+h
+f
+g

Added: 
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-1.txt
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-1.txt?rev=1576678&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-1.txt
 (added)
+++ 
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-1.txt
 Wed Mar 12 10:22:24 2014
@@ -0,0 +1,8 @@
+a
+b
+c
+d
+e
+f
+g
+h

Added: 
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.txt
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.txt?rev=1576678&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.txt
 (added)
+++ 
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.txt
 Wed Mar 12 10:22:24 2014
@@ -0,0 +1,7 @@
+a
+c
+e
+g
+i
+j
+k


Reply via email to