Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/EncodingDetector.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/EncodingDetector.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/EncodingDetector.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/EncodingDetector.java Fri Jan 9 06:34:33 2015 @@ -35,27 +35,26 @@ import java.util.List; /** * A simple class for detecting character encodings. - * + * * <p> * Broadly this encompasses two functions, which are distinctly separate: - * + * * <ol> - * <li>Auto detecting a set of "clues" from input text.</li> - * <li>Taking a set of clues and making a "best guess" as to the - * "real" encoding.</li> + * <li>Auto detecting a set of "clues" from input text.</li> + * <li>Taking a set of clues and making a "best guess" as to the "real" + * encoding.</li> * </ol> * </p> - * + * * <p> - * A caller will often have some extra information about what the - * encoding might be (e.g. from the HTTP header or HTML meta-tags, often - * wrong but still potentially useful clues). The types of clues may differ - * from caller to caller. Thus a typical calling sequence is: + * A caller will often have some extra information about what the encoding might + * be (e.g. from the HTTP header or HTML meta-tags, often wrong but still + * potentially useful clues). The types of clues may differ from caller to + * caller. Thus a typical calling sequence is: * <ul> - * <li>Run step (1) to generate a set of auto-detected clues;</li> - * <li>Combine these clues with the caller-dependent "extra clues" - * available;</li> - * <li>Run step (2) to guess what the most probable answer is.</li> + * <li>Run step (1) to generate a set of auto-detected clues;</li> + * <li>Combine these clues with the caller-dependent "extra clues" available;</li> + * <li>Run step (2) to guess what the most probable answer is.</li> * </p> */ public class EncodingDetector { @@ -90,34 +89,32 @@ public class EncodingDetector { @Override public String toString() { - return value + " (" + source + - ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + ")"; + return value + " (" + source + + ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + ")"; } public boolean isEmpty() { - return (value==null || "".equals(value)); + return (value == null || "".equals(value)); } public boolean meetsThreshold() { - return (confidence < 0 || - (minConfidence >= 0 && confidence >= minConfidence)); + return (confidence < 0 || (minConfidence >= 0 && confidence >= minConfidence)); } } - public static final Logger LOG = LoggerFactory.getLogger(EncodingDetector.class); + public static final Logger LOG = LoggerFactory + .getLogger(EncodingDetector.class); public static final int NO_THRESHOLD = -1; - public static final String MIN_CONFIDENCE_KEY = - "encodingdetector.charset.min.confidence"; + public static final String MIN_CONFIDENCE_KEY = "encodingdetector.charset.min.confidence"; - private static final HashMap<String, String> ALIASES = - new HashMap<String, String>(); + private static final HashMap<String, String> ALIASES = new HashMap<String, String>(); private static final HashSet<String> DETECTABLES = new HashSet<String>(); // CharsetDetector will die without a minimum amount of data. - private static final int MIN_LENGTH=4; + private static final int MIN_LENGTH = 4; static { DETECTABLES.add("text/html"); @@ -130,23 +127,22 @@ public class EncodingDetector { DETECTABLES.add("application/rss+xml"); DETECTABLES.add("application/xhtml+xml"); /* - * the following map is not an alias mapping table, but - * maps character encodings which are often used in mislabelled - * documents to their correct encodings. For instance, - * there are a lot of documents labelled 'ISO-8859-1' which contain - * characters not covered by ISO-8859-1 but covered by windows-1252. - * Because windows-1252 is a superset of ISO-8859-1 (sharing code points - * for the common part), it's better to treat ISO-8859-1 as - * synonymous with windows-1252 than to reject, as invalid, documents - * labelled as ISO-8859-1 that have characters outside ISO-8859-1. + * the following map is not an alias mapping table, but maps character + * encodings which are often used in mislabelled documents to their correct + * encodings. For instance, there are a lot of documents labelled + * 'ISO-8859-1' which contain characters not covered by ISO-8859-1 but + * covered by windows-1252. Because windows-1252 is a superset of ISO-8859-1 + * (sharing code points for the common part), it's better to treat + * ISO-8859-1 as synonymous with windows-1252 than to reject, as invalid, + * documents labelled as ISO-8859-1 that have characters outside ISO-8859-1. */ ALIASES.put("ISO-8859-1", "windows-1252"); ALIASES.put("EUC-KR", "x-windows-949"); ALIASES.put("x-EUC-CN", "GB18030"); ALIASES.put("GBK", "GB18030"); - //ALIASES.put("Big5", "Big5HKSCS"); - //ALIASES.put("TIS620", "Cp874"); - //ALIASES.put("ISO-8859-11", "Cp874"); + // ALIASES.put("Big5", "Big5HKSCS"); + // ALIASES.put("TIS620", "Cp874"); + // ALIASES.put("ISO-8859-11", "Cp874"); } @@ -164,16 +160,16 @@ public class EncodingDetector { public void autoDetectClues(WebPage page, boolean filter) { autoDetectClues(page.getContent(), page.getContentType(), - parseCharacterEncoding(page.getHeaders().get(CONTENT_TYPE_UTF8)), filter); + parseCharacterEncoding(page.getHeaders().get(CONTENT_TYPE_UTF8)), + filter); } private void autoDetectClues(ByteBuffer dataBuffer, CharSequence typeUtf8, - String encoding, boolean filter) { + String encoding, boolean filter) { int length = dataBuffer.remaining(); String type = TableUtil.toString(typeUtf8); - if (minConfidence >= 0 && DETECTABLES.contains(type) - && length > MIN_LENGTH) { + if (minConfidence >= 0 && DETECTABLES.contains(type) && length > MIN_LENGTH) { CharsetMatch[] matches = null; // do all these in a try/catch; setText and detect/detectAll @@ -214,12 +210,14 @@ public class EncodingDetector { /** * Guess the encoding with the previously specified list of clues. - * - * @param row URL's row - * @param defaultValue Default encoding to return if no encoding can be - * detected with enough confidence. Note that this will <b>not</b> be - * normalized with {@link EncodingDetector#resolveEncodingAlias} - * + * + * @param row + * URL's row + * @param defaultValue + * Default encoding to return if no encoding can be detected with + * enough confidence. Note that this will <b>not</b> be normalized + * with {@link EncodingDetector#resolveEncodingAlias} + * * @return Guessed encoding or defaultValue */ public String guessEncoding(WebPage page, String defaultValue) { @@ -230,33 +228,33 @@ public class EncodingDetector { /** * Guess the encoding with the previously specified list of clues. - * - * @param baseUrl Base URL - * @param defaultValue Default encoding to return if no encoding can be - * detected with enough confidence. Note that this will <b>not</b> be - * normalized with {@link EncodingDetector#resolveEncodingAlias} - * + * + * @param baseUrl + * Base URL + * @param defaultValue + * Default encoding to return if no encoding can be detected with + * enough confidence. Note that this will <b>not</b> be normalized + * with {@link EncodingDetector#resolveEncodingAlias} + * * @return Guessed encoding or defaultValue */ private String guessEncoding(String baseUrl, String defaultValue) { /* - * This algorithm could be replaced by something more sophisticated; - * ideally we would gather a bunch of data on where various clues - * (autodetect, HTTP headers, HTML meta tags, etc.) disagree, tag each with - * the correct answer, and use machine learning/some statistical method - * to generate a better heuristic. + * This algorithm could be replaced by something more sophisticated; ideally + * we would gather a bunch of data on where various clues (autodetect, HTTP + * headers, HTML meta tags, etc.) disagree, tag each with the correct + * answer, and use machine learning/some statistical method to generate a + * better heuristic. */ - if (LOG.isTraceEnabled()) { findDisagreements(baseUrl, clues); } /* - * Go down the list of encoding "clues". Use a clue if: - * 1. Has a confidence value which meets our confidence threshold, OR - * 2. Doesn't meet the threshold, but is the best try, - * since nothing else is available. + * Go down the list of encoding "clues". Use a clue if: 1. Has a confidence + * value which meets our confidence threshold, OR 2. Doesn't meet the + * threshold, but is the best try, since nothing else is available. */ EncodingClue defaultClue = new EncodingClue(defaultValue, "default"); EncodingClue bestClue = defaultClue; @@ -268,8 +266,8 @@ public class EncodingDetector { String charset = clue.value; if (minConfidence >= 0 && clue.confidence >= minConfidence) { if (LOG.isTraceEnabled()) { - LOG.trace(baseUrl + ": Choosing encoding: " + charset + - " with confidence " + clue.confidence); + LOG.trace(baseUrl + ": Choosing encoding: " + charset + + " with confidence " + clue.confidence); } return resolveEncodingAlias(charset).toLowerCase(); } else if (clue.confidence == NO_THRESHOLD && bestClue == defaultClue) { @@ -289,10 +287,10 @@ public class EncodingDetector { } /* - * Strictly for analysis, look for "disagreements." The top guess from - * each source is examined; if these meet the threshold and disagree, then - * we log the information -- useful for testing or generating training data - * for a better heuristic. + * Strictly for analysis, look for "disagreements." The top guess from each + * source is examined; if these meet the threshold and disagree, then we log + * the information -- useful for testing or generating training data for a + * better heuristic. */ private void findDisagreements(String url, List<EncodingClue> newClues) { HashSet<String> valsSeen = new HashSet<String>(); @@ -314,9 +312,9 @@ public class EncodingDetector { if (disagreement) { // dump all values in case of disagreement StringBuffer sb = new StringBuffer(); - sb.append("Disagreement: "+url+"; "); + sb.append("Disagreement: " + url + "; "); for (int i = 0; i < newClues.size(); i++) { - if (i>0) { + if (i > 0) { sb.append(", "); } sb.append(newClues.get(i)); @@ -331,7 +329,7 @@ public class EncodingDetector { return null; String canonicalName = new String(Charset.forName(encoding).name()); return ALIASES.containsKey(canonicalName) ? ALIASES.get(canonicalName) - : canonicalName; + : canonicalName; } catch (Exception e) { LOG.warn("Invalid encoding " + encoding + " detected, using default."); return null; @@ -339,13 +337,12 @@ public class EncodingDetector { } /** - * Parse the character encoding from the specified content type header. - * If the content type is null, or there is no explicit character encoding, - * <code>null</code> is returned. - * <br /> - * This method was copied from org.apache.catalina.util.RequestUtil, - * which is licensed under the Apache License, Version 2.0 (the "License"). - * + * Parse the character encoding from the specified content type header. If the + * content type is null, or there is no explicit character encoding, + * <code>null</code> is returned. <br /> + * This method was copied from org.apache.catalina.util.RequestUtil, which is + * licensed under the Apache License, Version 2.0 (the "License"). + * * @param contentTypeUtf8 */ public static String parseCharacterEncoding(CharSequence contentTypeUtf8) { @@ -361,51 +358,36 @@ public class EncodingDetector { encoding = encoding.substring(0, end); encoding = encoding.trim(); if ((encoding.length() > 2) && (encoding.startsWith("\"")) - && (encoding.endsWith("\""))) + && (encoding.endsWith("\""))) encoding = encoding.substring(1, encoding.length() - 1); return (encoding.trim()); } - /*public static void main(String[] args) throws IOException { - if (args.length != 1) { - System.err.println("Usage: EncodingDetector <file>"); - System.exit(1); - } - - Configuration conf = NutchConfiguration.create(); - EncodingDetector detector = - new EncodingDetector(NutchConfiguration.create()); - - // do everything as bytes; don't want any conversion - BufferedInputStream istr = - new BufferedInputStream(new FileInputStream(args[0])); - ByteArrayOutputStream ostr = new ByteArrayOutputStream(); - byte[] bytes = new byte[1000]; - boolean more = true; - while (more) { - int len = istr.read(bytes); - if (len < bytes.length) { - more = false; - if (len > 0) { - ostr.write(bytes, 0, len); - } - } else { - ostr.write(bytes); - } - } - - byte[] data = ostr.toByteArray(); - MimeUtil mimeTypes = new MimeUtil(conf); - - // make a fake Content - Content content = - new Content("", "", data, "text/html", new Metadata(), mimeTypes); - - detector.autoDetectClues(content, true); - String encoding = detector.guessEncoding(content, - conf.get("parser.character.encoding.default")); - System.out.println("Guessed encoding: " + encoding); - }*/ + /* + * public static void main(String[] args) throws IOException { if (args.length + * != 1) { System.err.println("Usage: EncodingDetector <file>"); + * System.exit(1); } + * + * Configuration conf = NutchConfiguration.create(); EncodingDetector detector + * = new EncodingDetector(NutchConfiguration.create()); + * + * // do everything as bytes; don't want any conversion BufferedInputStream + * istr = new BufferedInputStream(new FileInputStream(args[0])); + * ByteArrayOutputStream ostr = new ByteArrayOutputStream(); byte[] bytes = + * new byte[1000]; boolean more = true; while (more) { int len = + * istr.read(bytes); if (len < bytes.length) { more = false; if (len > 0) { + * ostr.write(bytes, 0, len); } } else { ostr.write(bytes); } } + * + * byte[] data = ostr.toByteArray(); MimeUtil mimeTypes = new MimeUtil(conf); + * + * // make a fake Content Content content = new Content("", "", data, + * "text/html", new Metadata(), mimeTypes); + * + * detector.autoDetectClues(content, true); String encoding = + * detector.guessEncoding(content, + * conf.get("parser.character.encoding.default")); + * System.out.println("Guessed encoding: " + encoding); } + */ }
Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/FSUtils.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/FSUtils.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/FSUtils.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/FSUtils.java Fri Jan 9 06:34:33 2015 @@ -33,16 +33,20 @@ public class FSUtils { * path. If removeOld is set to false then the old path will be set to the * name current.old. * - * @param fs The FileSystem. - * @param current The end path, the one being replaced. - * @param replacement The path to replace with. - * @param removeOld True if we are removing the current path. + * @param fs + * The FileSystem. + * @param current + * The end path, the one being replaced. + * @param replacement + * The path to replace with. + * @param removeOld + * True if we are removing the current path. * - * @throws IOException If an error occurs during replacement. + * @throws IOException + * If an error occurs during replacement. */ public static void replace(FileSystem fs, Path current, Path replacement, - boolean removeOld) - throws IOException { + boolean removeOld) throws IOException { // rename any current path to old Path old = new Path(current + ".old"); @@ -60,12 +64,14 @@ public class FSUtils { /** * Closes a group of SequenceFile readers. * - * @param readers The SequenceFile readers to close. - * @throws IOException If an error occurs while closing a reader. + * @param readers + * The SequenceFile readers to close. + * @throws IOException + * If an error occurs while closing a reader. */ public static void closeReaders(SequenceFile.Reader[] readers) - throws IOException { - + throws IOException { + // loop through the readers, closing one by one if (readers != null) { for (int i = 0; i < readers.length; i++) { @@ -80,12 +86,13 @@ public class FSUtils { /** * Closes a group of MapFile readers. * - * @param readers The MapFile readers to close. - * @throws IOException If an error occurs while closing a reader. + * @param readers + * The MapFile readers to close. + * @throws IOException + * If an error occurs while closing a reader. */ - public static void closeReaders(MapFile.Reader[] readers) - throws IOException { - + public static void closeReaders(MapFile.Reader[] readers) throws IOException { + // loop through the readers closing one by one if (readers != null) { for (int i = 0; i < readers.length; i++) { Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/GZIPUtils.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/GZIPUtils.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/GZIPUtils.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/GZIPUtils.java Fri Jan 9 06:34:33 2015 @@ -28,19 +28,18 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * A collection of utility methods for working on GZIPed data. + * A collection of utility methods for working on GZIPed data. */ public class GZIPUtils { - + private static final Logger LOG = LoggerFactory.getLogger(GZIPUtils.class); - private static final int EXPECTED_COMPRESSION_RATIO= 5; - private static final int BUF_SIZE= 4096; + private static final int EXPECTED_COMPRESSION_RATIO = 5; + private static final int BUF_SIZE = 4096; /** - * Returns an gunzipped copy of the input array. If the gzipped - * input has been truncated or corrupted, a best-effort attempt is - * made to unzip as much as possible. If no data can be extracted - * <code>null</code> is returned. + * Returns an gunzipped copy of the input array. If the gzipped input has been + * truncated or corrupted, a best-effort attempt is made to unzip as much as + * possible. If no data can be extracted <code>null</code> is returned. */ public static final byte[] unzipBestEffort(byte[] in) { return unzipBestEffort(in, Integer.MAX_VALUE); @@ -48,33 +47,32 @@ public class GZIPUtils { /** * Returns an gunzipped copy of the input array, truncated to - * <code>sizeLimit</code> bytes, if necessary. If the gzipped input - * has been truncated or corrupted, a best-effort attempt is made to - * unzip as much as possible. If no data can be extracted - * <code>null</code> is returned. + * <code>sizeLimit</code> bytes, if necessary. If the gzipped input has been + * truncated or corrupted, a best-effort attempt is made to unzip as much as + * possible. If no data can be extracted <code>null</code> is returned. */ public static final byte[] unzipBestEffort(byte[] in, int sizeLimit) { try { - // decompress using GZIPInputStream - ByteArrayOutputStream outStream = - new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length); + // decompress using GZIPInputStream + ByteArrayOutputStream outStream = new ByteArrayOutputStream( + EXPECTED_COMPRESSION_RATIO * in.length); - GZIPInputStream inStream = - new GZIPInputStream ( new ByteArrayInputStream(in) ); + GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream( + in)); byte[] buf = new byte[BUF_SIZE]; int written = 0; while (true) { try { int size = inStream.read(buf); - if (size <= 0) + if (size <= 0) break; if ((written + size) > sizeLimit) { outStream.write(buf, 0, sizeLimit - written); break; } outStream.write(buf, 0, size); - written+= size; + written += size; } catch (Exception e) { break; } @@ -91,23 +89,23 @@ public class GZIPUtils { } } - /** - * Returns an gunzipped copy of the input array. - * @throws IOException if the input cannot be properly decompressed + * Returns an gunzipped copy of the input array. + * + * @throws IOException + * if the input cannot be properly decompressed */ public static final byte[] unzip(byte[] in) throws IOException { - // decompress using GZIPInputStream - ByteArrayOutputStream outStream = - new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length); + // decompress using GZIPInputStream + ByteArrayOutputStream outStream = new ByteArrayOutputStream( + EXPECTED_COMPRESSION_RATIO * in.length); - GZIPInputStream inStream = - new GZIPInputStream ( new ByteArrayInputStream(in) ); + GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(in)); byte[] buf = new byte[BUF_SIZE]; while (true) { int size = inStream.read(buf); - if (size <= 0) + if (size <= 0) break; outStream.write(buf, 0, size); } @@ -121,11 +119,11 @@ public class GZIPUtils { */ public static final byte[] zip(byte[] in) { try { - // compress using GZIPOutputStream - ByteArrayOutputStream byteOut= - new ByteArrayOutputStream(in.length / EXPECTED_COMPRESSION_RATIO); + // compress using GZIPOutputStream + ByteArrayOutputStream byteOut = new ByteArrayOutputStream(in.length + / EXPECTED_COMPRESSION_RATIO); - GZIPOutputStream outStream= new GZIPOutputStream(byteOut); + GZIPOutputStream outStream = new GZIPOutputStream(byteOut); try { outStream.write(in); @@ -142,9 +140,9 @@ public class GZIPUtils { return byteOut.toByteArray(); } catch (IOException e) { - LOG.error("Failed with IOException", e); + LOG.error("Failed with IOException", e); return null; } } - + } Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/GenericWritableConfigurable.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/GenericWritableConfigurable.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/GenericWritableConfigurable.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/GenericWritableConfigurable.java Fri Jan 9 06:34:33 2015 @@ -24,12 +24,15 @@ import org.apache.hadoop.conf.Configurat import org.apache.hadoop.io.GenericWritable; import org.apache.hadoop.io.Writable; -/** A generic Writable wrapper that can inject Configuration to {@link Configurable}s */ -public abstract class GenericWritableConfigurable extends GenericWritable - implements Configurable { +/** + * A generic Writable wrapper that can inject Configuration to + * {@link Configurable}s + */ +public abstract class GenericWritableConfigurable extends GenericWritable + implements Configurable { private Configuration conf; - + public Configuration getConf() { return conf; } @@ -37,7 +40,7 @@ public abstract class GenericWritableCon public void setConf(Configuration conf) { this.conf = conf; } - + @Override public void readFields(DataInput in) throws IOException { byte type = in.readByte(); @@ -50,8 +53,8 @@ public abstract class GenericWritableCon } Writable w = get(); if (w instanceof Configurable) - ((Configurable)w).setConf(conf); + ((Configurable) w).setConf(conf); w.readFields(in); } - + } Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/HadoopFSUtil.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/HadoopFSUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/HadoopFSUtil.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/HadoopFSUtil.java Fri Jan 9 06:34:33 2015 @@ -25,48 +25,48 @@ import org.apache.hadoop.fs.PathFilter; public class HadoopFSUtil { - /** - * Returns PathFilter that passes all paths through. - */ - public static PathFilter getPassAllFilter() { - return new PathFilter() { - public boolean accept(Path arg0) { - return true; - } - }; - } + /** + * Returns PathFilter that passes all paths through. + */ + public static PathFilter getPassAllFilter() { + return new PathFilter() { + public boolean accept(Path arg0) { + return true; + } + }; + } + + /** + * Returns PathFilter that passes directories through. + */ + public static PathFilter getPassDirectoriesFilter(final FileSystem fs) { + return new PathFilter() { + public boolean accept(final Path path) { + try { + return fs.getFileStatus(path).isDir(); + } catch (IOException ioe) { + return false; + } + } - /** - * Returns PathFilter that passes directories through. - */ - public static PathFilter getPassDirectoriesFilter(final FileSystem fs) { - return new PathFilter() { - public boolean accept(final Path path) { - try { - return fs.getFileStatus(path).isDir(); - } catch (IOException ioe) { - return false; - } - } + }; + } - }; + /** + * Turns an array of FileStatus into an array of Paths. + */ + public static Path[] getPaths(FileStatus[] stats) { + if (stats == null) { + return null; } - - /** - * Turns an array of FileStatus into an array of Paths. - */ - public static Path[] getPaths(FileStatus[] stats) { - if (stats == null) { - return null; - } - if (stats.length == 0) { - return new Path[0]; - } - Path[] res = new Path[stats.length]; - for (int i = 0; i < stats.length; i++) { - res[i] = stats[i].getPath(); - } - return res; + if (stats.length == 0) { + return new Path[0]; + } + Path[] res = new Path[stats.length]; + for (int i = 0; i < stats.length; i++) { + res[i] = stats[i].getPath(); } + return res; + } } Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/Histogram.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/Histogram.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/Histogram.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/Histogram.java Fri Jan 9 06:34:33 2015 @@ -72,8 +72,8 @@ public class Histogram<E> { } public List<E> sortInverseByValue() { - List<Map.Entry<E, HistogramEntry>> list = - new Vector<Map.Entry<E, HistogramEntry>>(map.entrySet()); + List<Map.Entry<E, HistogramEntry>> list = new Vector<Map.Entry<E, HistogramEntry>>( + map.entrySet()); // Sort the list using an annonymous inner class implementing Comparator for // the compare method @@ -93,8 +93,8 @@ public class Histogram<E> { } public List<E> sortByValue() { - List<Map.Entry<E, HistogramEntry>> list = - new Vector<Map.Entry<E, HistogramEntry>>(map.entrySet()); + List<Map.Entry<E, HistogramEntry>> list = new Vector<Map.Entry<E, HistogramEntry>>( + map.entrySet()); // Sort the list using an annonymous inner class implementing Comparator for // the compare method Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/IdentityPageReducer.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/IdentityPageReducer.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/IdentityPageReducer.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/IdentityPageReducer.java Fri Jan 9 06:34:33 2015 @@ -21,12 +21,12 @@ import java.io.IOException; import org.apache.nutch.storage.WebPage; import org.apache.gora.mapreduce.GoraReducer; -public class IdentityPageReducer -extends GoraReducer<String, WebPage, String, WebPage> { +public class IdentityPageReducer extends + GoraReducer<String, WebPage, String, WebPage> { @Override - protected void reduce(String key, Iterable<WebPage> values, - Context context) throws IOException, InterruptedException { + protected void reduce(String key, Iterable<WebPage> values, Context context) + throws IOException, InterruptedException { for (WebPage page : values) { context.write(key, page); } Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/LockUtil.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/LockUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/LockUtil.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/LockUtil.java Fri Jan 9 06:34:33 2015 @@ -28,22 +28,29 @@ import org.apache.hadoop.fs.Path; * @author Andrzej Bialecki */ public class LockUtil { - + /** * Create a lock file. - * @param fs filesystem - * @param lockFile name of the lock file - * @param accept if true, and the target file exists, consider it valid. If false - * and the target file exists, throw an IOException. - * @throws IOException if accept is false, and the target file already exists, - * or if it's a directory. + * + * @param fs + * filesystem + * @param lockFile + * name of the lock file + * @param accept + * if true, and the target file exists, consider it valid. If false + * and the target file exists, throw an IOException. + * @throws IOException + * if accept is false, and the target file already exists, or if + * it's a directory. */ - public static void createLockFile(FileSystem fs, Path lockFile, boolean accept) throws IOException { + public static void createLockFile(FileSystem fs, Path lockFile, boolean accept) + throws IOException { if (fs.exists(lockFile)) { - if(!accept) + if (!accept) throw new IOException("lock file " + lockFile + " already exists."); if (fs.getFileStatus(lockFile).isDir()) - throw new IOException("lock file " + lockFile + " already exists and is a directory."); + throw new IOException("lock file " + lockFile + + " already exists and is a directory."); // do nothing - the file already exists. } else { // make sure parents exist @@ -55,16 +62,23 @@ public class LockUtil { /** * Remove lock file. NOTE: applications enforce the semantics of this file - * this method simply removes any file with a given name. - * @param fs filesystem - * @param lockFile lock file name + * + * @param fs + * filesystem + * @param lockFile + * lock file name * @return false, if the lock file doesn't exist. True, if it existed and was - * successfully removed. - * @throws IOException if lock file exists but it is a directory. + * successfully removed. + * @throws IOException + * if lock file exists but it is a directory. */ - public static boolean removeLockFile(FileSystem fs, Path lockFile) throws IOException { - if (!fs.exists(lockFile)) return false; + public static boolean removeLockFile(FileSystem fs, Path lockFile) + throws IOException { + if (!fs.exists(lockFile)) + return false; if (fs.getFileStatus(lockFile).isDir()) - throw new IOException("lock file " + lockFile + " exists but is a directory!"); + throw new IOException("lock file " + lockFile + + " exists but is a directory!"); return fs.delete(lockFile, false); } } Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java Fri Jan 9 06:34:33 2015 @@ -37,7 +37,7 @@ import org.apache.tika.mime.MimeTypesFac // Slf4j logging imports import org.slf4j.Logger; import org.slf4j.LoggerFactory; - + // imported for Javadoc import org.apache.nutch.protocol.ProtocolOutput; @@ -45,12 +45,12 @@ import org.apache.nutch.protocol.Protoco * @author mattmann * @since NUTCH-608 * - * <p> - * This is a facade class to insulate Nutch from its underlying Mime Type - * substrate library, <a href="http://incubator.apache.org/tika/">Apache Tika</a>. - * Any mime handling code should be placed in this utility class, and hidden - * from the Nutch classes that rely on it. - * </p> + * <p> + * This is a facade class to insulate Nutch from its underlying Mime Type + * substrate library, <a href="http://incubator.apache.org/tika/">Apache + * Tika</a>. Any mime handling code should be placed in this utility + * class, and hidden from the Nutch classes that rely on it. + * </p> */ public final class MimeUtil { @@ -66,7 +66,8 @@ public final class MimeUtil { private boolean mimeMagic; /* our log stream */ - private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class.getName()); + private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class + .getName()); public MimeUtil(Configuration conf) { tika = new Tika(); @@ -75,25 +76,26 @@ public final class MimeUtil { .getName()); if (mimeTypez == null) { try { - String customMimeTypeFile = conf.get("mime.types.file"); - if (customMimeTypeFile!=null && customMimeTypeFile.equals("")==false){ - try { - mimeTypez = MimeTypesFactory.create(conf - .getConfResourceAsInputStream(customMimeTypeFile)); - } - catch (Exception e){ - LOG.error("Can't load mime.types.file : "+customMimeTypeFile+" using Tika's default"); - } + String customMimeTypeFile = conf.get("mime.types.file"); + if (customMimeTypeFile != null + && customMimeTypeFile.equals("") == false) { + try { + mimeTypez = MimeTypesFactory.create(conf + .getConfResourceAsInputStream(customMimeTypeFile)); + } catch (Exception e) { + LOG.error("Can't load mime.types.file : " + customMimeTypeFile + + " using Tika's default"); } - if (mimeTypez==null) - mimeTypez = MimeTypes.getDefaultMimeTypes(); + } + if (mimeTypez == null) + mimeTypez = MimeTypes.getDefaultMimeTypes(); } catch (Exception e) { - LOG.error("Exception in MimeUtil "+e.getMessage()); + LOG.error("Exception in MimeUtil " + e.getMessage()); throw new RuntimeException(e); } objectCache.setObject(MimeTypes.class.getName(), mimeTypez); } - + this.mimeTypes = mimeTypez; this.mimeMagic = conf.getBoolean("mime.type.magic", true); } @@ -129,14 +131,13 @@ public final class MimeUtil { /** * A facade interface to trying all the possible mime type resolution * strategies available within Tika. First, the mime type provided in - * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}. - * Then the cleaned mime type is looked up in the underlying Tika - * {@link MimeTypes} registry, by its cleaned name. If the {@link MimeType} - * is found, then that mime type is used, otherwise URL resolution is - * used to try and determine the mime type. However, if - * <code>mime.type.magic</code> is enabled in {@link NutchConfiguration}, - * then mime type magic resolution is used to try and obtain a - * better-than-the-default approximation of the {@link MimeType}. + * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}. Then + * the cleaned mime type is looked up in the underlying Tika {@link MimeTypes} + * registry, by its cleaned name. If the {@link MimeType} is found, then that + * mime type is used, otherwise URL resolution is used to try and determine + * the mime type. However, if <code>mime.type.magic</code> is enabled in + * {@link NutchConfiguration}, then mime type magic resolution is used to try + * and obtain a better-than-the-default approximation of the {@link MimeType}. * * @param typeName * The original mime type, returned from a {@link ProtocolOutput}. @@ -177,7 +178,7 @@ public final class MimeUtil { throw new RuntimeException(e); } } else { - retType = type.getName(); + retType = type.getName(); } // if magic is enabled use mime magic to guess if the mime type returned @@ -195,14 +196,15 @@ public final class MimeUtil { InputStream stream = TikaInputStream.get(data); try { magicType = tika.detect(stream, tikaMeta); - } finally { - stream.close(); + } finally { + stream.close(); } - } catch (IOException ignore) {} + } catch (IOException ignore) { + } if (magicType != null && !magicType.equals(MimeTypes.OCTET_STREAM) - && !magicType.equals(MimeTypes.PLAIN_TEXT) - && retType != null && !retType.equals(magicType)) { + && !magicType.equals(MimeTypes.PLAIN_TEXT) && retType != null + && !retType.equals(magicType)) { // If magic enabled and the current mime type differs from that of the // one returned from the magic, take the magic mimeType @@ -225,12 +227,12 @@ public final class MimeUtil { /** * Facade interface to Tika's underlying {@link MimeTypes#getMimeType(String)} * method. - * + * * @param url * A string representation of the document {@link URL} to sense the * {@link MimeType} for. - * @return An appropriate {@link MimeType}, identified from the given - * Document url in string form. + * @return An appropriate {@link MimeType}, identified from the given Document + * url in string form. */ public String getMimeType(String url) { return tika.detect(url); @@ -239,11 +241,11 @@ public final class MimeUtil { /** * A facade interface to Tika's underlying {@link MimeTypes#forName(String)} * method. - * + * * @param name * The name of a valid {@link MimeType} in the Tika mime registry. - * @return The object representation of the {@link MimeType}, if it exists, - * or null otherwise. + * @return The object representation of the {@link MimeType}, if it exists, or + * null otherwise. */ public String forName(String name) { try { @@ -258,7 +260,7 @@ public final class MimeUtil { /** * Facade interface to Tika's underlying {@link MimeTypes#getMimeType(File)} * method. - * + * * @param f * The {@link File} to sense the {@link MimeType} for. * @return The {@link MimeType} of the given {@link File}, or null if it Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/NodeWalker.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/NodeWalker.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/NodeWalker.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/NodeWalker.java Fri Jan 9 06:34:33 2015 @@ -22,13 +22,17 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; /** - * <p>A utility class that allows the walking of any DOM tree using a stack - * instead of recursion. As the node tree is walked the next node is popped - * off of the stack and all of its children are automatically added to the - * stack to be called in tree order.</p> + * <p> + * A utility class that allows the walking of any DOM tree using a stack instead + * of recursion. As the node tree is walked the next node is popped off of the + * stack and all of its children are automatically added to the stack to be + * called in tree order. + * </p> * - * <p>Currently this class is not thread safe. It is assumed that only one - * thread will be accessing the <code>NodeWalker</code> at any given time.</p> + * <p> + * Currently this class is not thread safe. It is assumed that only one thread + * will be accessing the <code>NodeWalker</code> at any given time. + * </p> */ public class NodeWalker { @@ -36,7 +40,7 @@ public class NodeWalker { private Node currentNode; private NodeList currentChildren; private Stack<Node> nodes; - + /** * Starts the <code>Node</code> tree from the root node. * @@ -47,69 +51,77 @@ public class NodeWalker { nodes = new Stack<Node>(); nodes.add(rootNode); } - + /** - * <p>Returns the next <code>Node</code> on the stack and pushes all of its - * children onto the stack, allowing us to walk the node tree without the - * use of recursion. If there are no more nodes on the stack then null is - * returned.</p> + * <p> + * Returns the next <code>Node</code> on the stack and pushes all of its + * children onto the stack, allowing us to walk the node tree without the use + * of recursion. If there are no more nodes on the stack then null is + * returned. + * </p> * - * @return Node The next <code>Node</code> on the stack or null if there - * isn't a next node. + * @return Node The next <code>Node</code> on the stack or null if there isn't + * a next node. */ public Node nextNode() { - + // if no next node return null if (!hasNext()) { return null; } - + // pop the next node off of the stack and push all of its children onto // the stack currentNode = nodes.pop(); currentChildren = currentNode.getChildNodes(); int childLen = (currentChildren != null) ? currentChildren.getLength() : 0; - + // put the children node on the stack in first to last order for (int i = childLen - 1; i >= 0; i--) { nodes.add(currentChildren.item(i)); } - + return currentNode; } - + /** - * <p>Skips over and removes from the node stack the children of the last - * node. When getting a next node from the walker, that node's children - * are automatically added to the stack. You can call this method to remove - * those children from the stack.</p> - * - * <p>This is useful when you don't want to process deeper into the - * current path of the node tree but you want to continue processing sibling - * nodes.</p> - * + * <p> + * Skips over and removes from the node stack the children of the last node. + * When getting a next node from the walker, that node's children are + * automatically added to the stack. You can call this method to remove those + * children from the stack. + * </p> + * + * <p> + * This is useful when you don't want to process deeper into the current path + * of the node tree but you want to continue processing sibling nodes. + * </p> + * */ public void skipChildren() { - + int childLen = (currentChildren != null) ? currentChildren.getLength() : 0; - - for (int i = 0 ; i < childLen ; i++) { + + for (int i = 0; i < childLen; i++) { Node child = nodes.peek(); if (child.equals(currentChildren.item(i))) { nodes.pop(); } } } - + /** * Return the current node. + * * @return Node */ public Node getCurrentNode() { return currentNode; } - - /** * Returns true if there are more nodes on the current stack. + + /** + * * Returns true if there are more nodes on the current stack. + * * @return */ public boolean hasNext() { Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/NutchConfiguration.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/NutchConfiguration.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/NutchConfiguration.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/NutchConfiguration.java Fri Jan 9 06:34:33 2015 @@ -23,37 +23,42 @@ import java.util.UUID; import org.apache.hadoop.conf.Configuration; - -/** Utility to create Hadoop {@link Configuration}s that include Nutch-specific - * resources. */ +/** + * Utility to create Hadoop {@link Configuration}s that include Nutch-specific + * resources. + */ public class NutchConfiguration { public static final String UUID_KEY = "nutch.conf.uuid"; - - private NutchConfiguration() {} // singleton - + + private NutchConfiguration() { + } // singleton + /* - * Configuration.hashCode() doesn't return values that - * correspond to a unique set of parameters. This is a workaround - * so that we can track instances of Configuration created by Nutch. + * Configuration.hashCode() doesn't return values that correspond to a unique + * set of parameters. This is a workaround so that we can track instances of + * Configuration created by Nutch. */ private static void setUUID(Configuration conf) { UUID uuid = UUID.randomUUID(); conf.set(UUID_KEY, uuid.toString()); } - + /** - * Retrieve a Nutch UUID of this configuration object, or null - * if the configuration was created elsewhere. - * @param conf configuration instance + * Retrieve a Nutch UUID of this configuration object, or null if the + * configuration was created elsewhere. + * + * @param conf + * configuration instance * @return uuid or null */ public static String getUUID(Configuration conf) { return conf.get(UUID_KEY); } - /** Create a {@link Configuration} for Nutch. This will load the standard - * Nutch resources, <code>nutch-default.xml</code> and - * <code>nutch-site.xml</code> overrides. + /** + * Create a {@link Configuration} for Nutch. This will load the standard Nutch + * resources, <code>nutch-default.xml</code> and <code>nutch-site.xml</code> + * overrides. */ public static Configuration create() { Configuration conf = new Configuration(); @@ -61,14 +66,19 @@ public class NutchConfiguration { addNutchResources(conf); return conf; } - - /** Create a {@link Configuration} from supplied properties. - * @param addNutchResources if true, then first <code>nutch-default.xml</code>, - * and then <code>nutch-site.xml</code> will be loaded prior to applying the - * properties. Otherwise these resources won't be used. - * @param nutchProperties a set of properties to define (or override) + + /** + * Create a {@link Configuration} from supplied properties. + * + * @param addNutchResources + * if true, then first <code>nutch-default.xml</code>, and then + * <code>nutch-site.xml</code> will be loaded prior to applying the + * properties. Otherwise these resources won't be used. + * @param nutchProperties + * a set of properties to define (or override) */ - public static Configuration create(boolean addNutchResources, Properties nutchProperties) { + public static Configuration create(boolean addNutchResources, + Properties nutchProperties) { Configuration conf = new Configuration(); setUUID(conf); if (addNutchResources) { @@ -83,8 +93,8 @@ public class NutchConfiguration { /** * Add the standard Nutch resources to {@link Configuration}. * - * @param conf Configuration object to which - * configuration is to be added. + * @param conf + * Configuration object to which configuration is to be added. */ private static Configuration addNutchResources(Configuration conf) { conf.addResource("nutch-default.xml"); @@ -92,4 +102,3 @@ public class NutchConfiguration { return conf; } } - Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJob.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJob.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJob.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJob.java Fri Jan 9 06:34:33 2015 @@ -35,10 +35,10 @@ public class NutchJob extends Job { public NutchJob(Configuration conf, String jobName) throws IOException { super(conf, jobName); - //prefix jobName with crawlId if not empty + // prefix jobName with crawlId if not empty String crawlId = conf.get("storage.crawl.id"); if (!StringUtils.isEmpty(crawlId)) { - jobName = "["+crawlId+"]"+jobName; + jobName = "[" + crawlId + "]" + jobName; setJobName(jobName); } setJarByClass(this.getClass()); Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJobConf.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJobConf.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJobConf.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJobConf.java Fri Jan 9 06:34:33 2015 @@ -20,7 +20,7 @@ package org.apache.nutch.util; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.JobConf; -/** A {@link JobConf} for Nutch jobs. */ +/** A {@link JobConf} for Nutch jobs. */ public class NutchJobConf extends JobConf { public NutchJobConf(Configuration conf) { @@ -28,4 +28,3 @@ public class NutchJobConf extends JobCon } } - Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/NutchTool.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/NutchTool.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/NutchTool.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/NutchTool.java Fri Jan 9 06:34:33 2015 @@ -26,19 +26,20 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.nutch.metadata.Nutch; public abstract class NutchTool extends Configured { - - protected HashMap<String,Object> results = new HashMap<String,Object>(); - protected Map<String,Object> status = - Collections.synchronizedMap(new HashMap<String,Object>()); + + protected HashMap<String, Object> results = new HashMap<String, Object>(); + protected Map<String, Object> status = Collections + .synchronizedMap(new HashMap<String, Object>()); protected Job currentJob; protected int numJobs; protected int currentJobNum; - - /** Runs the tool, using a map of arguments. - * May return results, or null. + + /** + * Runs the tool, using a map of arguments. May return results, or null. */ - public abstract Map<String,Object> run(Map<String,Object> args) throws Exception; - + public abstract Map<String, Object> run(Map<String, Object> args) + throws Exception; + /** Returns relative progress of the tool, a float in range [0,1]. */ public float getProgress() { float res = 0; @@ -55,29 +56,31 @@ public abstract class NutchTool extends } // take into account multiple jobs if (numJobs > 1) { - res = (currentJobNum + res) / (float)numJobs; + res = (currentJobNum + res) / (float) numJobs; } status.put(Nutch.STAT_PROGRESS, res); return res; } - - + /** Returns current status of the running tool. */ - public Map<String,Object> getStatus() { + public Map<String, Object> getStatus() { return status; } - - /** Stop the job with the possibility to resume. Subclasses should - * override this, since by default it calls {@link #killJob()}. + + /** + * Stop the job with the possibility to resume. Subclasses should override + * this, since by default it calls {@link #killJob()}. + * * @return true if succeeded, false otherwise */ public boolean stopJob() throws Exception { return killJob(); } - + /** - * Kill the job immediately. Clients should assume that any results - * that the job produced so far are in inconsistent state or missing. + * Kill the job immediately. Clients should assume that any results that the + * job produced so far are in inconsistent state or missing. + * * @return true if succeeded, false otherwise. * @throws Exception */ Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/ObjectCache.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/ObjectCache.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/ObjectCache.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/ObjectCache.java Fri Jan 9 06:34:33 2015 @@ -24,35 +24,33 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; public class ObjectCache { - + private static final Logger LOG = LoggerFactory.getLogger(ObjectCache.class); - - private static final WeakHashMap<Configuration, ObjectCache> CACHE = - new WeakHashMap<Configuration, ObjectCache>(); + + private static final WeakHashMap<Configuration, ObjectCache> CACHE = new WeakHashMap<Configuration, ObjectCache>(); private final HashMap<String, Object> objectMap; - + private ObjectCache() { objectMap = new HashMap<String, Object>(); } - + public static ObjectCache get(Configuration conf) { ObjectCache objectCache = CACHE.get(conf); if (objectCache == null) { - LOG.debug("No object cache found for conf=" + conf - + ", instantiating a new object cache"); + LOG.debug("No object cache found for conf=" + conf + + ", instantiating a new object cache"); objectCache = new ObjectCache(); CACHE.put(conf, objectCache); } return objectCache; } - + public Object getObject(String key) { return objectMap.get(key); } - + public void setObject(String key, Object value) { objectMap.put(key, value); } } - Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/PrefixStringMatcher.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/PrefixStringMatcher.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/PrefixStringMatcher.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/PrefixStringMatcher.java Fri Jan 9 06:34:33 2015 @@ -21,46 +21,47 @@ import java.util.Collection; import java.util.Iterator; /** - * A class for efficiently matching <code>String</code>s against a set - * of prefixes. + * A class for efficiently matching <code>String</code>s against a set of + * prefixes. */ public class PrefixStringMatcher extends TrieStringMatcher { /** * Creates a new <code>PrefixStringMatcher</code> which will match - * <code>String</code>s with any prefix in the supplied array. - * Zero-length <code>Strings</code> are ignored. + * <code>String</code>s with any prefix in the supplied array. Zero-length + * <code>Strings</code> are ignored. */ public PrefixStringMatcher(String[] prefixes) { super(); - for (int i= 0; i < prefixes.length; i++) + for (int i = 0; i < prefixes.length; i++) addPatternForward(prefixes[i]); } /** * Creates a new <code>PrefixStringMatcher</code> which will match - * <code>String</code>s with any prefix in the supplied + * <code>String</code>s with any prefix in the supplied * <code>Collection</code>. - * - * @throws ClassCastException if any <code>Object</code>s in the - * collection are not <code>String</code>s + * + * @throws ClassCastException + * if any <code>Object</code>s in the collection are not + * <code>String</code>s */ public PrefixStringMatcher(Collection<String> prefixes) { super(); - Iterator<String> iter= prefixes.iterator(); + Iterator<String> iter = prefixes.iterator(); while (iter.hasNext()) addPatternForward(iter.next()); } /** - * Returns true if the given <code>String</code> is matched by a - * prefix in the trie + * Returns true if the given <code>String</code> is matched by a prefix in the + * trie */ public boolean matches(String input) { - TrieNode node= root; - for (int i= 0; i < input.length(); i++) { - node= node.getChild(input.charAt(i)); - if (node == null) + TrieNode node = root; + for (int i = 0; i < input.length(); i++) { + node = node.getChild(input.charAt(i)); + if (node == null) return false; if (node.isTerminal()) return true; @@ -73,13 +74,13 @@ public class PrefixStringMatcher extends * or <code>null<code> if no match exists. */ public String shortestMatch(String input) { - TrieNode node= root; - for (int i= 0; i < input.length(); i++) { - node= node.getChild(input.charAt(i)); - if (node == null) + TrieNode node = root; + for (int i = 0; i < input.length(); i++) { + node = node.getChild(input.charAt(i)); + if (node == null) return null; if (node.isTerminal()) - return input.substring(0, i+1); + return input.substring(0, i + 1); } return null; } @@ -89,29 +90,26 @@ public class PrefixStringMatcher extends * or <code>null<code> if no match exists. */ public String longestMatch(String input) { - TrieNode node= root; - String result= null; - for (int i= 0; i < input.length(); i++) { - node= node.getChild(input.charAt(i)); - if (node == null) + TrieNode node = root; + String result = null; + for (int i = 0; i < input.length(); i++) { + node = node.getChild(input.charAt(i)); + if (node == null) break; if (node.isTerminal()) - result= input.substring(0, i+1); + result = input.substring(0, i + 1); } return result; } public static final void main(String[] argv) { - PrefixStringMatcher matcher= - new PrefixStringMatcher( - new String[] - {"abcd", "abc", "aac", "baz", "foo", "foobar"} ); - - String[] tests= {"a", "ab", "abc", "abcdefg", "apple", "aa", "aac", - "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", - "kite", }; + PrefixStringMatcher matcher = new PrefixStringMatcher(new String[] { + "abcd", "abc", "aac", "baz", "foo", "foobar" }); - for (int i= 0; i < tests.length; i++) { + String[] tests = { "a", "ab", "abc", "abcdefg", "apple", "aa", "aac", + "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", }; + + for (int i = 0; i < tests.length; i++) { System.out.println("testing: " + tests[i]); System.out.println(" matches: " + matcher.matches(tests[i])); System.out.println(" shortest: " + matcher.shortestMatch(tests[i])); Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java Fri Jan 9 06:34:33 2015 @@ -20,42 +20,42 @@ package org.apache.nutch.util; import java.nio.ByteBuffer; /** - * A collection of String processing utility methods. + * A collection of String processing utility methods. */ public class StringUtil { /** - * Returns a copy of <code>s</code> padded with trailing spaces so - * that it's length is <code>length</code>. Strings already - * <code>length</code> characters long or longer are not altered. + * Returns a copy of <code>s</code> padded with trailing spaces so that it's + * length is <code>length</code>. Strings already <code>length</code> + * characters long or longer are not altered. */ public static String rightPad(String s, int length) { - StringBuffer sb= new StringBuffer(s); - for (int i= length - s.length(); i > 0; i--) + StringBuffer sb = new StringBuffer(s); + for (int i = length - s.length(); i > 0; i--) sb.append(" "); return sb.toString(); } /** - * Returns a copy of <code>s</code> padded with leading spaces so - * that it's length is <code>length</code>. Strings already - * <code>length</code> characters long or longer are not altered. + * Returns a copy of <code>s</code> padded with leading spaces so that it's + * length is <code>length</code>. Strings already <code>length</code> + * characters long or longer are not altered. */ public static String leftPad(String s, int length) { - StringBuffer sb= new StringBuffer(); - for (int i= length - s.length(); i > 0; i--) + StringBuffer sb = new StringBuffer(); + for (int i = length - s.length(); i > 0; i--) sb.append(" "); sb.append(s); return sb.toString(); } - - private static final char[] HEX_DIGITS = - {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; + private static final char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6', + '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; /** * Convenience call for {@link #toHexString(ByteBuffer, String, int)}, where * <code>sep = null; lineLen = Integer.MAX_VALUE</code>. + * * @param buf */ public static String toHexString(ByteBuffer buf) { @@ -65,19 +65,25 @@ public class StringUtil { /** * Get a text representation of a ByteBuffer as hexadecimal String, where each * pair of hexadecimal digits corresponds to consecutive bytes in the array. - * @param buf input data - * @param sep separate every pair of hexadecimal digits with this separator, or - * null if no separation is needed. - * @param lineLen break the output String into lines containing output for lineLen - * bytes. + * + * @param buf + * input data + * @param sep + * separate every pair of hexadecimal digits with this separator, or + * null if no separation is needed. + * @param lineLen + * break the output String into lines containing output for lineLen + * bytes. */ public static String toHexString(ByteBuffer buf, String sep, int lineLen) { - return toHexString(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining(), sep, lineLen); + return toHexString(buf.array(), buf.arrayOffset() + buf.position(), + buf.remaining(), sep, lineLen); } /** * Convenience call for {@link #toHexString(byte[], String, int)}, where * <code>sep = null; lineLen = Integer.MAX_VALUE</code>. + * * @param buf */ public static String toHexString(byte[] buf) { @@ -87,11 +93,15 @@ public class StringUtil { /** * Get a text representation of a byte[] as hexadecimal String, where each * pair of hexadecimal digits corresponds to consecutive bytes in the array. - * @param buf input data - * @param sep separate every pair of hexadecimal digits with this separator, or - * null if no separation is needed. - * @param lineLen break the output String into lines containing output for lineLen - * bytes. + * + * @param buf + * input data + * @param sep + * separate every pair of hexadecimal digits with this separator, or + * null if no separation is needed. + * @param lineLen + * break the output String into lines containing output for lineLen + * bytes. */ public static String toHexString(byte[] buf, String sep, int lineLen) { return toHexString(buf, 0, buf.length, sep, lineLen); @@ -100,39 +110,53 @@ public class StringUtil { /** * Get a text representation of a byte[] as hexadecimal String, where each * pair of hexadecimal digits corresponds to consecutive bytes in the array. - * @param buf input data - * @param of the offset into the byte[] to start reading - * @param cb the number of bytes to read from the byte[] - * @param sep separate every pair of hexadecimal digits with this separator, or - * null if no separation is needed. - * @param lineLen break the output String into lines containing output for lineLen - * bytes. - */ - public static String toHexString(byte[] buf, int of, int cb, String sep, int lineLen) { - if (buf == null) return null; - if (lineLen <= 0) lineLen = Integer.MAX_VALUE; + * + * @param buf + * input data + * @param of + * the offset into the byte[] to start reading + * @param cb + * the number of bytes to read from the byte[] + * @param sep + * separate every pair of hexadecimal digits with this separator, or + * null if no separation is needed. + * @param lineLen + * break the output String into lines containing output for lineLen + * bytes. + */ + public static String toHexString(byte[] buf, int of, int cb, String sep, + int lineLen) { + if (buf == null) + return null; + if (lineLen <= 0) + lineLen = Integer.MAX_VALUE; StringBuffer res = new StringBuffer(cb * 2); for (int c = 0; c < cb; c++) { int b = buf[of++]; res.append(HEX_DIGITS[(b >> 4) & 0xf]); res.append(HEX_DIGITS[b & 0xf]); - if (c > 0 && (c % lineLen) == 0) res.append('\n'); - else if (sep != null && c < lineLen - 1) res.append(sep); + if (c > 0 && (c % lineLen) == 0) + res.append('\n'); + else if (sep != null && c < lineLen - 1) + res.append(sep); } return res.toString(); } - + /** * Convert a String containing consecutive (no inside whitespace) hexadecimal - * digits into a corresponding byte array. If the number of digits is not even, - * a '0' will be appended in the front of the String prior to conversion. - * Leading and trailing whitespace is ignored. - * @param text input text + * digits into a corresponding byte array. If the number of digits is not + * even, a '0' will be appended in the front of the String prior to + * conversion. Leading and trailing whitespace is ignored. + * + * @param text + * input text * @return converted byte array, or null if unable to convert */ public static byte[] fromHexString(String text) { text = text.trim(); - if (text.length() % 2 != 0) text = "0" + text; + if (text.length() % 2 != 0) + text = "0" + text; int resLen = text.length() / 2; int loNibble, hiNibble; byte[] res = new byte[resLen]; @@ -140,12 +164,13 @@ public class StringUtil { int j = i << 1; hiNibble = charToNibble(text.charAt(j)); loNibble = charToNibble(text.charAt(j + 1)); - if (loNibble == -1 || hiNibble == -1) return null; - res[i] = (byte)(hiNibble << 4 | loNibble); + if (loNibble == -1 || hiNibble == -1) + return null; + res[i] = (byte) (hiNibble << 4 | loNibble); } return res; } - + private static final int charToNibble(char c) { if (c >= '0' && c <= '9') { return c - '0'; @@ -164,11 +189,12 @@ public class StringUtil { public static boolean isEmpty(String str) { return (str == null) || (str.equals("")); } - /** * Takes in a String value and cleans out any offending "�" - * @param value the dirty String value. + * + * @param value + * the dirty String value. * @return clean String */ public static String cleanField(String value) { @@ -178,8 +204,8 @@ public class StringUtil { public static void main(String[] args) { if (args.length != 1) System.out.println("Usage: StringUtil <encoding name>"); - else - System.out.println(args[0] + " is resolved to " + - EncodingDetector.resolveEncodingAlias(args[0])); + else + System.out.println(args[0] + " is resolved to " + + EncodingDetector.resolveEncodingAlias(args[0])); } } Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/SuffixStringMatcher.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/SuffixStringMatcher.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/SuffixStringMatcher.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/SuffixStringMatcher.java Fri Jan 9 06:34:33 2015 @@ -21,8 +21,8 @@ import java.util.Collection; import java.util.Iterator; /** - * A class for efficiently matching <code>String</code>s against a set - * of suffixes. Zero-length <code>Strings</code> are ignored. + * A class for efficiently matching <code>String</code>s against a set of + * suffixes. Zero-length <code>Strings</code> are ignored. */ public class SuffixStringMatcher extends TrieStringMatcher { @@ -32,7 +32,7 @@ public class SuffixStringMatcher extends */ public SuffixStringMatcher(String[] suffixes) { super(); - for (int i= 0; i < suffixes.length; i++) + for (int i = 0; i < suffixes.length; i++) addPatternBackward(suffixes[i]); } @@ -49,14 +49,14 @@ public class SuffixStringMatcher extends } /** - * Returns true if the given <code>String</code> is matched by a - * suffix in the trie + * Returns true if the given <code>String</code> is matched by a suffix in the + * trie */ public boolean matches(String input) { - TrieNode node= root; - for (int i= input.length() - 1; i >= 0; i--) { - node= node.getChild(input.charAt(i)); - if (node == null) + TrieNode node = root; + for (int i = input.length() - 1; i >= 0; i--) { + node = node.getChild(input.charAt(i)); + if (node == null) return false; if (node.isTerminal()) return true; @@ -64,16 +64,15 @@ public class SuffixStringMatcher extends return false; } - /** * Returns the shortest suffix of <code>input<code> that is matched, * or <code>null<code> if no match exists. */ public String shortestMatch(String input) { - TrieNode node= root; - for (int i= input.length() - 1; i >= 0; i--) { - node= node.getChild(input.charAt(i)); - if (node == null) + TrieNode node = root; + for (int i = input.length() - 1; i >= 0; i--) { + node = node.getChild(input.charAt(i)); + if (node == null) return null; if (node.isTerminal()) return input.substring(i); @@ -86,29 +85,26 @@ public class SuffixStringMatcher extends * or <code>null<code> if no match exists. */ public String longestMatch(String input) { - TrieNode node= root; - String result= null; - for (int i= input.length() - 1; i >= 0; i--) { - node= node.getChild(input.charAt(i)); - if (node == null) + TrieNode node = root; + String result = null; + for (int i = input.length() - 1; i >= 0; i--) { + node = node.getChild(input.charAt(i)); + if (node == null) break; if (node.isTerminal()) - result= input.substring(i); + result = input.substring(i); } return result; } public static final void main(String[] argv) { - SuffixStringMatcher matcher= - new SuffixStringMatcher( - new String[] - {"a", "abcd", "bcd", "bcdefg", "defg", "aac", "baz", "foo", "foobar"} ); - - String[] tests= {"a", "ac", "abcd", "abcdefg", "apple", "aa", "aac", - "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", - "kite", }; + SuffixStringMatcher matcher = new SuffixStringMatcher(new String[] { "a", + "abcd", "bcd", "bcdefg", "defg", "aac", "baz", "foo", "foobar" }); + + String[] tests = { "a", "ac", "abcd", "abcdefg", "apple", "aa", "aac", + "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", }; - for (int i= 0; i < tests.length; i++) { + for (int i = 0; i < tests.length; i++) { System.out.println("testing: " + tests[i]); System.out.println(" matches: " + matcher.matches(tests[i])); System.out.println(" shortest: " + matcher.shortestMatch(tests[i])); Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java Fri Jan 9 06:34:33 2015 @@ -33,7 +33,7 @@ public class TableUtil { * <p> * E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes * "com.foo.bar:8983:http/to/index.html?a=b". - * + * * @param url * url to be reversed * @return Reversed url @@ -50,7 +50,7 @@ public class TableUtil { * <p> * E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes * "com.foo.bar:http:8983/to/index.html?a=b". - * + * * @param url * url to be reversed * @return Reversed url @@ -93,8 +93,11 @@ public class TableUtil { pathBegin = reversedUrl.length(); String sub = reversedUrl.substring(0, pathBegin); - String[] splits = StringUtils.splitPreserveAllTokens(sub, ':'); // {<reversed host>, <port>, <protocol>} - + String[] splits = StringUtils.splitPreserveAllTokens(sub, ':'); // {<reversed + // host>, + // <port>, + // <protocol>} + buf.append(splits[1]); // add protocol buf.append("://"); reverseAppendSplits(splits[0], buf); // splits[0] is reversed @@ -110,7 +113,7 @@ public class TableUtil { /** * Given a reversed url, returns the reversed host E.g * "com.foo.bar:http:8983/to/index.html?a=b" -> "com.foo.bar" - * + * * @param reversedUrl * Reversed url * @return Reversed host @@ -120,7 +123,7 @@ public class TableUtil { } private static void reverseAppendSplits(String string, StringBuilder buf) { - String[] splits = StringUtils.split(string,'.'); + String[] splits = StringUtils.split(string, '.'); if (splits.length > 0) { for (int i = splits.length - 1; i > 0; i--) { buf.append(splits[i]); @@ -136,18 +139,18 @@ public class TableUtil { StringBuilder buf = new StringBuilder(); reverseAppendSplits(hostName, buf); return buf.toString(); - + } + public static String unreverseHost(String reversedHostName) { return reverseHost(reversedHostName); // Reversible } - - + /** - * Convert given Utf8 instance to String and and cleans out - * any offending "�" from the String. - * - * + * Convert given Utf8 instance to String and and cleans out any offending "�" + * from the String. + * + * * @param utf8 * Utf8 object * @return string-ifed Utf8 object or null if Utf8 instance is null Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/TimingUtil.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/TimingUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/TimingUtil.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/TimingUtil.java Fri Jan 9 06:34:33 2015 @@ -21,35 +21,39 @@ import java.text.NumberFormat; public class TimingUtil { - private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 }; + private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 }; - /** - * Calculate the elapsed time between two times specified in milliseconds. - * @param start The start of the time period - * @param end The end of the time period - * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y minutes and Z seconds or null if start > end. - */ - public static String elapsedTime(long start, long end){ - if (start > end) { - return null; - } - - long[] elapsedTime = new long[TIME_FACTOR.length]; - - for (int i = 0; i < TIME_FACTOR.length; i++) { - elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i]; - start += TIME_FACTOR[i] * elapsedTime[i]; - } - - NumberFormat nf = NumberFormat.getInstance(); - nf.setMinimumIntegerDigits(2); - StringBuffer buf = new StringBuffer(); - for (int i = 0; i < elapsedTime.length; i++) { - if (i > 0) { - buf.append(":"); - } - buf.append(nf.format(elapsedTime[i])); - } - return buf.toString(); + /** + * Calculate the elapsed time between two times specified in milliseconds. + * + * @param start + * The start of the time period + * @param end + * The end of the time period + * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y + * minutes and Z seconds or null if start > end. + */ + public static String elapsedTime(long start, long end) { + if (start > end) { + return null; } + + long[] elapsedTime = new long[TIME_FACTOR.length]; + + for (int i = 0; i < TIME_FACTOR.length; i++) { + elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i]; + start += TIME_FACTOR[i] * elapsedTime[i]; + } + + NumberFormat nf = NumberFormat.getInstance(); + nf.setMinimumIntegerDigits(2); + StringBuffer buf = new StringBuffer(); + for (int i = 0; i < elapsedTime.length; i++) { + if (i > 0) { + buf.append(":"); + } + buf.append(nf.format(elapsedTime[i])); + } + return buf.toString(); + } } Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/ToolUtil.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/ToolUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/ToolUtil.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/ToolUtil.java Fri Jan 9 06:34:33 2015 @@ -28,14 +28,14 @@ import org.apache.nutch.metadata.Nutch; public class ToolUtil { - public static final Map<String,Object> toArgMap(Object... args) { + public static final Map<String, Object> toArgMap(Object... args) { if (args == null) { return null; } if (args.length % 2 != 0) { throw new RuntimeException("expected pairs of argName argValue"); } - HashMap<String,Object> res = new HashMap<String,Object>(); + HashMap<String, Object> res = new HashMap<String, Object>(); for (int i = 0; i < args.length; i += 2) { if (args[i + 1] != null) { res.put(String.valueOf(args[i]), args[i + 1]); @@ -43,20 +43,22 @@ public class ToolUtil { } return res; } - + @SuppressWarnings("unchecked") - public static final void recordJobStatus(String label, Job job, Map<String,Object> results) { - Map<String,Object> jobs = (Map<String,Object>)results.get(Nutch.STAT_JOBS); + public static final void recordJobStatus(String label, Job job, + Map<String, Object> results) { + Map<String, Object> jobs = (Map<String, Object>) results + .get(Nutch.STAT_JOBS); if (jobs == null) { - jobs = new LinkedHashMap<String,Object>(); + jobs = new LinkedHashMap<String, Object>(); results.put(Nutch.STAT_JOBS, jobs); } - Map<String,Object> stats = new HashMap<String,Object>(); - Map<String,Object> countStats = new HashMap<String,Object>(); + Map<String, Object> stats = new HashMap<String, Object>(); + Map<String, Object> countStats = new HashMap<String, Object>(); try { Counters counters = job.getCounters(); for (CounterGroup cg : counters) { - Map<String,Object> cnts = new HashMap<String,Object>(); + Map<String, Object> cnts = new HashMap<String, Object>(); countStats.put(cg.getDisplayName(), cnts); for (Counter c : cg) { cnts.put(c.getName(), c.getValue());
