Author: thomasm
Date: Wed Mar 12 10:22:41 2014
New Revision: 1576679
URL: http://svn.apache.org/r1576679
Log:
OAK-98 Source code formatting, code conventions, Javadocs
Modified:
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSizeEstimator.java
Modified:
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java?rev=1576679&r1=1576678&r2=1576679&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java
(original)
+++
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java
Wed Mar 12 10:22:41 2014
@@ -76,13 +76,13 @@ public class ExternalSort {
public static void sort(File input, File output) throws IOException {
ExternalSort.mergeSortedFiles(ExternalSort.sortInBatch(input), output);
}
-
- static int DEFAULTMAXTEMPFILES = 1024;
+
+ static final int DEFAULTMAXTEMPFILES = 1024;
/**
* Defines the default maximum memory to be used while sorting (8 MB)
*/
- static long DEFAULT_MAX_MEM_BYTES = 8388608L;
+ static final long DEFAULT_MAX_MEM_BYTES = 8388608L;
// we divide the file into small blocks. If the blocks
// are too small, we shall create too many temporary files.
@@ -117,8 +117,6 @@ public class ExternalSort {
*
* @param file
* some flat file
- * @param cmp
- * string comparator
* @return a list of temporary flat files
*/
public static List<File> sortInBatch(File file)
@@ -172,7 +170,7 @@ public class ExternalSort {
* string comparator
* @param maxtmpfiles
* maximal number of temporary files
- * @param Charset
+ * @param cs
* character set to use (can use Charset.defaultCharset())
* @param tmpdirectory
* location of the temporary files (set to null for default
location)
@@ -190,8 +188,8 @@ public class ExternalSort {
List<File> files = new ArrayList<File>();
BufferedReader fbr = new BufferedReader(new InputStreamReader(
new FileInputStream(file), cs));
- long blocksize = estimateBestSizeOfBlocks(file, maxtmpfiles,
maxMemory);// in
-
// bytes
+ // in bytes
+ long blocksize = estimateBestSizeOfBlocks(file, maxtmpfiles,
maxMemory);
try {
List<String> tmplist = new ArrayList<String>();
@@ -199,7 +197,8 @@ public class ExternalSort {
try {
int counter = 0;
while (line != null) {
- long currentblocksize = 0;// in bytes
+ // in bytes
+ long currentblocksize = 0;
while ((currentblocksize < blocksize)
&& ((line = fbr.readLine()) != null)) {
// as long as you have enough memory
@@ -243,7 +242,7 @@ public class ExternalSort {
* string comparator
* @param maxtmpfiles
* maximal number of temporary files
- * @param Charset
+ * @param cs
* character set to use (can use Charset.defaultCharset())
* @param tmpdirectory
* location of the temporary files (set to null for default
location)
@@ -281,13 +280,14 @@ public class ExternalSort {
"flatfile", tmpdirectory);
newtmpfile.deleteOnExit();
OutputStream out = new FileOutputStream(newtmpfile);
- int ZIPBUFFERSIZE = 2048;
- if (usegzip)
- out = new GZIPOutputStream(out, ZIPBUFFERSIZE) {
+ int zipBufferSize = 2048;
+ if (usegzip) {
+ out = new GZIPOutputStream(out, zipBufferSize) {
{
def.setLevel(Deflater.BEST_SPEED);
}
};
+ }
BufferedWriter fbw = new BufferedWriter(new OutputStreamWriter(
out, cs));
String lastLine = null;
@@ -329,7 +329,7 @@ public class ExternalSort {
* This merges a bunch of temporary flat files
*
* @param files
- * @param output
+ * @param outputfile
* file
* @return The number of lines sorted. (P. Beaudoin)
*/
@@ -342,7 +342,7 @@ public class ExternalSort {
* This merges a bunch of temporary flat files
*
* @param files
- * @param output
+ * @param outputfile
* file
* @return The number of lines sorted. (P. Beaudoin)
*/
@@ -356,7 +356,7 @@ public class ExternalSort {
* This merges a bunch of temporary flat files
*
* @param files
- * @param output
+ * @param outputfile
* file
* @return The number of lines sorted. (P. Beaudoin)
*/
@@ -372,8 +372,6 @@ public class ExternalSort {
*
* @param files
* The {@link List} of sorted {@link File}s to be merged.
- * @param Charset
- * character set to use to load the strings
* @param distinct
* Pass <code>true</code> if duplicate lines should be
discarded. ([email protected])
* @param outputfile
@@ -393,22 +391,14 @@ public class ExternalSort {
public static int mergeSortedFiles(List<File> files, File outputfile,
final Comparator<String> cmp, Charset cs, boolean distinct,
boolean append, boolean usegzip) throws IOException {
- PriorityQueue<BinaryFileBuffer> pq = new
PriorityQueue<BinaryFileBuffer>(
- 11, new Comparator<BinaryFileBuffer>() {
- @Override
- public int compare(BinaryFileBuffer i,
- BinaryFileBuffer j) {
- return cmp.compare(i.peek(), j.peek());
- }
- });
ArrayList<BinaryFileBuffer> bfbs = new ArrayList<BinaryFileBuffer>();
for (File f : files) {
- final int BUFFERSIZE = 2048;
+ final int bufferSize = 2048;
InputStream in = new FileInputStream(f);
BufferedReader br;
if (usegzip) {
br = new BufferedReader(new InputStreamReader(
- new GZIPInputStream(in, BUFFERSIZE), cs));
+ new GZIPInputStream(in, bufferSize), cs));
} else {
br = new BufferedReader(new InputStreamReader(in,
cs));
@@ -420,15 +410,16 @@ public class ExternalSort {
BufferedWriter fbw = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(outputfile, append), cs));
int rowcounter = merge(fbw, cmp, distinct, bfbs);
- for (File f : files)
+ for (File f : files) {
f.delete();
+ }
return rowcounter;
}
/**
* This merges several BinaryFileBuffer to an output writer.
*
- * @param BufferedWriter
+ * @param fbw
* A buffer where we write the data.
* @param cmp
* A comparator object that tells us how to sort the lines.
@@ -449,9 +440,11 @@ public class ExternalSort {
return cmp.compare(i.peek(), j.peek());
}
});
- for (BinaryFileBuffer bfb : buffers)
- if (!bfb.empty())
+ for (BinaryFileBuffer bfb : buffers) {
+ if (!bfb.empty()) {
pq.add(bfb);
+ }
+ }
int rowcounter = 0;
String lastLine = null;
try {
@@ -473,8 +466,9 @@ public class ExternalSort {
}
} finally {
fbw.close();
- for (BinaryFileBuffer bfb : pq)
+ for (BinaryFileBuffer bfb : pq) {
bfb.close();
+ }
}
return rowcounter;
@@ -485,8 +479,6 @@ public class ExternalSort {
*
* @param files
* The {@link List} of sorted {@link File}s to be merged.
- * @param Charset
- * character set to use to load the strings
* @param distinct
* Pass <code>true</code> if duplicate lines should be
discarded. ([email protected])
* @param outputfile
@@ -509,9 +501,9 @@ public class ExternalSort {
* This merges a bunch of temporary flat files
*
* @param files
- * @param output
+ * @param outputfile
* file
- * @param Charset
+ * @param cs
* character set to use to load the strings
* @return The number of lines sorted. (P. Beaudoin)
*/
@@ -555,12 +547,12 @@ public class ExternalSort {
if (args[param].equals("-v")
|| args[param].equals("--verbose")) {
verbose = true;
- } else if ((args[param].equals("-h") || args[param]
- .equals("--help"))) {
+ } else if (args[param].equals("-h") || args[param]
+ .equals("--help")) {
displayUsage();
return;
- } else if ((args[param].equals("-d") || args[param]
- .equals("--distinct"))) {
+ } else if (args[param].equals("-d") || args[param]
+ .equals("--distinct")) {
distinct = true;
} else if ((args[param].equals("-t") || args[param]
.equals("--maxtmpfiles"))
@@ -585,8 +577,8 @@ public class ExternalSort {
&& args.length > param + 1) {
param++;
cs = Charset.forName(args[param]);
- } else if ((args[param].equals("-z") || args[param]
- .equals("--gzip"))) {
+ } else if (args[param].equals("-z") || args[param]
+ .equals("--gzip")) {
usegzip = true;
} else if ((args[param].equals("-H") || args[param]
.equals("--header")) && args.length > param + 1) {
@@ -601,13 +593,14 @@ public class ExternalSort {
param++;
tempFileStore = new File(args[param]);
} else {
- if (inputfile == null)
+ if (inputfile == null) {
inputfile = args[param];
- else if (outputfile == null)
+ } else if (outputfile == null) {
outputfile = args[param];
- else
+ } else {
System.out.println("Unparsed: "
+ args[param]);
+ }
}
}
if (outputfile == null) {
@@ -620,9 +613,10 @@ public class ExternalSort {
List<File> l = sortInBatch(new File(inputfile), comparator,
maxtmpfiles, maxMemory, cs, tempFileStore, distinct,
headersize,
usegzip);
- if (verbose)
+ if (verbose) {
System.out
.println("created " + l.size() + " tmp files");
+ }
mergeSortedFiles(l, new File(outputfile), comparator, cs,
distinct, false, usegzip);
}
@@ -670,8 +664,9 @@ class BinaryFileBuffer {
}
public String peek() {
- if (empty())
+ if (empty()) {
return null;
+ }
return this.cache.toString();
}
Modified:
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSizeEstimator.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSizeEstimator.java?rev=1576679&r1=1576678&r2=1576679&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSizeEstimator.java
(original)
+++
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSizeEstimator.java
Wed Mar 12 10:22:41 2014
@@ -29,58 +29,59 @@ package org.apache.jackrabbit.oak.common
*/
public final class StringSizeEstimator {
- private static int OBJ_HEADER;
- private static int ARR_HEADER;
- private static int INT_FIELDS = 12;
- private static int OBJ_REF;
- private static int OBJ_OVERHEAD;
- private static boolean IS_64_BIT_JVM;
-
- /**
- * Private constructor to prevent instantiation.
- */
- private StringSizeEstimator() {
- }
-
- /**
- * Class initializations.
- */
- static {
- // By default we assume 64 bit JVM
- // (defensive approach since we will get
- // larger estimations in case we are not sure)
- IS_64_BIT_JVM = true;
- // check the system property "sun.arch.data.model"
- // not very safe, as it might not work for all JVM
implementations
- // nevertheless the worst thing that might happen is that the
JVM is 32bit
- // but we assume its 64bit, so we will be counting a few extra
bytes per string object
- // no harm done here since this is just an approximation.
- String arch = System.getProperty("sun.arch.data.model");
- if (arch != null) {
- if (arch.indexOf("32") != -1) {
- // If exists and is 32 bit then we assume a
32bit JVM
- IS_64_BIT_JVM = false;
- }
- }
- // The sizes below are a bit rough as we don't take into
account
- // advanced JVM options such as compressed oops
- // however if our calculation is not accurate it'll be a bit
over
- // so there is no danger of an out of memory error because of
this.
- OBJ_HEADER = IS_64_BIT_JVM ? 16 : 8;
- ARR_HEADER = IS_64_BIT_JVM ? 24 : 12;
- OBJ_REF = IS_64_BIT_JVM ? 8 : 4;
- OBJ_OVERHEAD = OBJ_HEADER + INT_FIELDS + OBJ_REF + ARR_HEADER;
-
- }
-
- /**
- * Estimates the size of a {@link String} object in bytes.
- *
- * @param s The string to estimate memory footprint.
- * @return The <strong>estimated</strong> size in bytes.
- */
- public static long estimatedSizeOf(String s) {
- return (s.length() * 2) + OBJ_OVERHEAD;
- }
+ private static final int OBJ_HEADER;
+ private static final int ARR_HEADER;
+ private static final int INT_FIELDS = 12;
+ private static final int OBJ_REF;
+ private static final int OBJ_OVERHEAD;
+ private static final boolean IS_64_BIT_JVM;
+
+ /**
+ * Private constructor to prevent instantiation.
+ */
+ private StringSizeEstimator() {
+ }
+
+ /**
+ * Class initializations.
+ */
+ static {
+ // By default we assume 64 bit JVM
+ // (defensive approach since we will get
+ // larger estimations in case we are not sure)
+ boolean is64Bit = true;
+ // check the system property "sun.arch.data.model"
+ // not very safe, as it might not work for all JVM implementations
+ // nevertheless the worst thing that might happen is that the JVM is
32bit
+ // but we assume its 64bit, so we will be counting a few extra bytes
per string object
+ // no harm done here since this is just an approximation.
+ String arch = System.getProperty("sun.arch.data.model");
+ if (arch != null) {
+ if (arch.indexOf("32") != -1) {
+ // If exists and is 32 bit then we assume a 32bit JVM
+ is64Bit = false;
+ }
+ }
+ IS_64_BIT_JVM = is64Bit;
+ // The sizes below are a bit rough as we don't take into account
+ // advanced JVM options such as compressed oops
+ // however if our calculation is not accurate it'll be a bit over
+ // so there is no danger of an out of memory error because of this.
+ OBJ_HEADER = IS_64_BIT_JVM ? 16 : 8;
+ ARR_HEADER = IS_64_BIT_JVM ? 24 : 12;
+ OBJ_REF = IS_64_BIT_JVM ? 8 : 4;
+ OBJ_OVERHEAD = OBJ_HEADER + INT_FIELDS + OBJ_REF + ARR_HEADER;
+
+ }
+
+ /**
+ * Estimates the size of a {@link String} object in bytes.
+ *
+ * @param s The string to estimate memory footprint.
+ * @return The <strong>estimated</strong> size in bytes.
+ */
+ public static long estimatedSizeOf(String s) {
+ return (s.length() * 2) + OBJ_OVERHEAD;
+ }
}