Modified: nutch/trunk/src/java/org/apache/nutch/metadata/DublinCore.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/DublinCore.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/metadata/DublinCore.java (original) +++ nutch/trunk/src/java/org/apache/nutch/metadata/DublinCore.java Thu Jan 29 05:38:59 2015 @@ -16,149 +16,146 @@ */ package org.apache.nutch.metadata; - /** * A collection of Dublin Core metadata names. - * - * @see <a href="http://dublincore.org">dublincore.org</a> - * + * + * @see <a href="http://dublincore.org">dublincore.org</a> + * * @author Chris Mattmann * @author Jérôme Charron */ public interface DublinCore { - - + /** - * Typically, Format may include the media-type or dimensions of the - * resource. Format may be used to determine the software, hardware or other - * equipment needed to display or operate the resource. Examples of - * dimensions include size and duration. Recommended best practice is to - * select a value from a controlled vocabulary (for example, the list of - * Internet Media Types [MIME] defining computer media formats). + * Typically, Format may include the media-type or dimensions of the resource. + * Format may be used to determine the software, hardware or other equipment + * needed to display or operate the resource. Examples of dimensions include + * size and duration. Recommended best practice is to select a value from a + * controlled vocabulary (for example, the list of Internet Media Types [MIME] + * defining computer media formats). */ public static final String FORMAT = "format"; - + /** - * Recommended best practice is to identify the resource by means of a - * string or number conforming to a formal identification system. Example - * formal identification systems include the Uniform Resource Identifier - * (URI) (including the Uniform Resource Locator (URL)), the Digital Object + * Recommended best practice is to identify the resource by means of a string + * or number conforming to a formal identification system. Example formal + * identification systems include the Uniform Resource Identifier (URI) + * (including the Uniform Resource Locator (URL)), the Digital Object * Identifier (DOI) and the International Standard Book Number (ISBN). */ public static final String IDENTIFIER = "identifier"; - + /** * Date on which the resource was changed. */ public static final String MODIFIED = "modified"; - + /** * An entity responsible for making contributions to the content of the - * resource. Examples of a Contributor include a person, an organisation, or - * a service. Typically, the name of a Contributor should be used to - * indicate the entity. + * resource. Examples of a Contributor include a person, an organisation, or a + * service. Typically, the name of a Contributor should be used to indicate + * the entity. */ public static final String CONTRIBUTOR = "contributor"; - + /** - * The extent or scope of the content of the resource. Coverage will - * typically include spatial location (a place name or geographic - * coordinates), temporal period (a period label, date, or date range) or - * jurisdiction (such as a named administrative entity). Recommended best - * practice is to select a value from a controlled vocabulary (for example, - * the Thesaurus of Geographic Names [TGN]) and that, where appropriate, - * named places or time periods be used in preference to numeric identifiers - * such as sets of coordinates or date ranges. + * The extent or scope of the content of the resource. Coverage will typically + * include spatial location (a place name or geographic coordinates), temporal + * period (a period label, date, or date range) or jurisdiction (such as a + * named administrative entity). Recommended best practice is to select a + * value from a controlled vocabulary (for example, the Thesaurus of + * Geographic Names [TGN]) and that, where appropriate, named places or time + * periods be used in preference to numeric identifiers such as sets of + * coordinates or date ranges. */ public static final String COVERAGE = "coverage"; - + /** * An entity primarily responsible for making the content of the resource. * Examples of a Creator include a person, an organisation, or a service. * Typically, the name of a Creator should be used to indicate the entity. */ public static final String CREATOR = "creator"; - + /** * A date associated with an event in the life cycle of the resource. - * Typically, Date will be associated with the creation or availability of - * the resource. Recommended best practice for encoding the date value is - * defined in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD - * format. + * Typically, Date will be associated with the creation or availability of the + * resource. Recommended best practice for encoding the date value is defined + * in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD format. */ public static final String DATE = "date"; - + /** * An account of the content of the resource. Description may include but is * not limited to: an abstract, table of contents, reference to a graphical * representation of content or a free-text account of the content. */ public static final String DESCRIPTION = "description"; - + /** * A language of the intellectual content of the resource. Recommended best * practice is to use RFC 3066 [RFC3066], which, in conjunction with ISO 639 - * [ISO639], defines two- and three-letter primary language tags with - * optional subtags. Examples include "en" or "eng" for English, "akk" for - * Akkadian, and "en-GB" for English used in the United Kingdom. + * [ISO639], defines two- and three-letter primary language tags with optional + * subtags. Examples include "en" or "eng" for English, "akk" for Akkadian, + * and "en-GB" for English used in the United Kingdom. */ public static final String LANGUAGE = "language"; - + /** * An entity responsible for making the resource available. Examples of a * Publisher include a person, an organisation, or a service. Typically, the * name of a Publisher should be used to indicate the entity. */ public static final String PUBLISHER = "publisher"; - + /** * A reference to a related resource. Recommended best practice is to * reference the resource by means of a string or number conforming to a * formal identification system. */ public static final String RELATION = "relation"; - + /** - * Information about rights held in and over the resource. Typically, a - * Rights element will contain a rights management statement for the - * resource, or reference a service providing such information. Rights - * information often encompasses Intellectual Property Rights (IPR), - * Copyright, and various Property Rights. If the Rights element is absent, - * no assumptions can be made about the status of these and other rights - * with respect to the resource. + * Information about rights held in and over the resource. Typically, a Rights + * element will contain a rights management statement for the resource, or + * reference a service providing such information. Rights information often + * encompasses Intellectual Property Rights (IPR), Copyright, and various + * Property Rights. If the Rights element is absent, no assumptions can be + * made about the status of these and other rights with respect to the + * resource. */ public static final String RIGHTS = "rights"; - + /** * A reference to a resource from which the present resource is derived. The * present resource may be derived from the Source resource in whole or in - * part. Recommended best practice is to reference the resource by means of - * a string or number conforming to a formal identification system. + * part. Recommended best practice is to reference the resource by means of a + * string or number conforming to a formal identification system. */ public static final String SOURCE = "source"; - + /** * The topic of the content of the resource. Typically, a Subject will be - * expressed as keywords, key phrases or classification codes that describe - * a topic of the resource. Recommended best practice is to select a value - * from a controlled vocabulary or formal classification scheme. + * expressed as keywords, key phrases or classification codes that describe a + * topic of the resource. Recommended best practice is to select a value from + * a controlled vocabulary or formal classification scheme. */ public static final String SUBJECT = "subject"; - + /** * A name given to the resource. Typically, a Title will be a name by which * the resource is formally known. */ public static final String TITLE = "title"; - + /** * The nature or genre of the content of the resource. Type includes terms - * describing general categories, functions, genres, or aggregation levels - * for content. Recommended best practice is to select a value from a - * controlled vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]). - * To describe the physical or digital manifestation of the resource, use - * the Format element. + * describing general categories, functions, genres, or aggregation levels for + * content. Recommended best practice is to select a value from a controlled + * vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]). To describe + * the physical or digital manifestation of the resource, use the Format + * element. */ public static final String TYPE = "type"; - + }
Modified: nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java (original) +++ nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java Thu Jan 29 05:38:59 2015 @@ -20,32 +20,32 @@ import org.apache.hadoop.io.Text; /** * A collection of HTTP header names. - * - * @see <a href="http://rfc-ref.org/RFC-TEXTS/2616/">Hypertext Transfer - * Protocol -- HTTP/1.1 (RFC 2616)</a> + * + * @see <a href="http://rfc-ref.org/RFC-TEXTS/2616/">Hypertext Transfer Protocol + * -- HTTP/1.1 (RFC 2616)</a> */ public interface HttpHeaders { public final static String TRANSFER_ENCODING = "Transfer-Encoding"; - + public final static String CONTENT_ENCODING = "Content-Encoding"; - + public final static String CONTENT_LANGUAGE = "Content-Language"; public final static String CONTENT_LENGTH = "Content-Length"; - + public final static String CONTENT_LOCATION = "Content-Location"; - + public static final String CONTENT_DISPOSITION = "Content-Disposition"; public final static String CONTENT_MD5 = "Content-MD5"; - + public final static String CONTENT_TYPE = "Content-Type"; public static final Text WRITABLE_CONTENT_TYPE = new Text(CONTENT_TYPE); - + public final static String LAST_MODIFIED = "Last-Modified"; - + public final static String LOCATION = "Location"; } Modified: nutch/trunk/src/java/org/apache/nutch/metadata/MetaWrapper.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/MetaWrapper.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/metadata/MetaWrapper.java (original) +++ nutch/trunk/src/java/org/apache/nutch/metadata/MetaWrapper.java Thu Jan 29 05:38:59 2015 @@ -28,28 +28,29 @@ import org.apache.nutch.crawl.NutchWrita /** * This is a simple decorator that adds metadata to any Writable-s that can be * serialized by <tt>NutchWritable</tt>. This is useful when data needs to be - * temporarily enriched during processing, but this - * temporary metadata doesn't need to be permanently stored after the job is done. + * temporarily enriched during processing, but this temporary metadata doesn't + * need to be permanently stored after the job is done. * * @author Andrzej Bialecki */ public class MetaWrapper extends NutchWritable { private Metadata metadata; - + public MetaWrapper() { super(); metadata = new Metadata(); } - + public MetaWrapper(Writable instance, Configuration conf) { super(instance); metadata = new Metadata(); setConf(conf); } - + public MetaWrapper(Metadata metadata, Writable instance, Configuration conf) { super(instance); - if (metadata == null) metadata = new Metadata(); + if (metadata == null) + metadata = new Metadata(); this.metadata = metadata; setConf(conf); } @@ -60,43 +61,52 @@ public class MetaWrapper extends NutchWr public Metadata getMetadata() { return metadata; } - + /** - * Add metadata. See {@link Metadata#add(String, String)} for more information. - * @param name metadata name - * @param value metadata value + * Add metadata. See {@link Metadata#add(String, String)} for more + * information. + * + * @param name + * metadata name + * @param value + * metadata value */ public void addMeta(String name, String value) { metadata.add(name, value); } - + /** - * Set metadata. See {@link Metadata#set(String, String)} for more information. + * Set metadata. See {@link Metadata#set(String, String)} for more + * information. + * * @param name * @param value */ public void setMeta(String name, String value) { metadata.set(name, value); } - + /** * Get metadata. See {@link Metadata#get(String)} for more information. + * * @param name * @return metadata value */ public String getMeta(String name) { return metadata.get(name); } - + /** - * Get multiple metadata. See {@link Metadata#getValues(String)} for more information. + * Get multiple metadata. See {@link Metadata#getValues(String)} for more + * information. + * * @param name * @return multiple values */ public String[] getMetaValues(String name) { return metadata.getValues(name); } - + public void readFields(DataInput in) throws IOException { super.readFields(in); metadata = new Metadata(); Modified: nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java (original) +++ nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java Thu Jan 29 05:38:59 2015 @@ -30,15 +30,14 @@ import org.apache.hadoop.io.Writable; /** * A multi-valued metadata container. */ -public class Metadata implements Writable, CreativeCommons, -DublinCore, HttpHeaders, Nutch, Feed { +public class Metadata implements Writable, CreativeCommons, DublinCore, + HttpHeaders, Nutch, Feed { /** * A map of all metadata attributes. */ private Map<String, String[]> metadata = null; - /** * Constructs a new, empty metadata. */ @@ -48,9 +47,10 @@ DublinCore, HttpHeaders, Nutch, Feed { /** * Returns true if named value is multivalued. - * @param name name of metadata - * @return true is named value is multivalued, false if single - * value or null + * + * @param name + * name of metadata + * @return true is named value is multivalued, false if single value or null */ public boolean isMultiValued(final String name) { return metadata.get(name) != null && metadata.get(name).length > 1; @@ -58,6 +58,7 @@ DublinCore, HttpHeaders, Nutch, Feed { /** * Returns an array of the names contained in the metadata. + * * @return Metadata names */ public String[] names() { @@ -65,11 +66,11 @@ DublinCore, HttpHeaders, Nutch, Feed { } /** - * Get the value associated to a metadata name. - * If many values are assiociated to the specified name, then the first - * one is returned. - * - * @param name of the metadata. + * Get the value associated to a metadata name. If many values are assiociated + * to the specified name, then the first one is returned. + * + * @param name + * of the metadata. * @return the value associated to the specified metadata name. */ public String get(final String name) { @@ -83,13 +84,15 @@ DublinCore, HttpHeaders, Nutch, Feed { /** * Get the values associated to a metadata name. - * @param name of the metadata. + * + * @param name + * of the metadata. * @return the values associated to a metadata name. */ public String[] getValues(final String name) { return _getValues(name); } - + private String[] _getValues(final String name) { String[] values = metadata.get(name); if (values == null) { @@ -99,12 +102,13 @@ DublinCore, HttpHeaders, Nutch, Feed { } /** - * Add a metadata name/value mapping. - * Add the specified value to the list of values associated to the - * specified metadata name. - * - * @param name the metadata name. - * @param value the metadata value. + * Add a metadata name/value mapping. Add the specified value to the list of + * values associated to the specified metadata name. + * + * @param name + * the metadata name. + * @param value + * the metadata value. */ public void add(final String name, final String value) { String[] values = metadata.get(name); @@ -120,31 +124,37 @@ DublinCore, HttpHeaders, Nutch, Feed { /** * Copy All key-value pairs from properties. - * @param properties properties to copy from + * + * @param properties + * properties to copy from */ public void setAll(Properties properties) { Enumeration<?> names = properties.propertyNames(); while (names.hasMoreElements()) { String name = (String) names.nextElement(); - metadata.put(name, new String[]{properties.getProperty(name)}); + metadata.put(name, new String[] { properties.getProperty(name) }); } } /** - * Set metadata name/value. - * Associate the specified value to the specified metadata name. If some - * previous values were associated to this name, they are removed. - * - * @param name the metadata name. - * @param value the metadata value. + * Set metadata name/value. Associate the specified value to the specified + * metadata name. If some previous values were associated to this name, they + * are removed. + * + * @param name + * the metadata name. + * @param value + * the metadata value. */ public void set(String name, String value) { - metadata.put(name, new String[]{value}); + metadata.put(name, new String[] { value }); } /** * Remove a metadata and all its associated values. - * @param name metadata name to remove + * + * @param name + * metadata name to remove */ public void remove(String name) { metadata.remove(name); @@ -152,12 +162,13 @@ DublinCore, HttpHeaders, Nutch, Feed { /** * Returns the number of metadata names in this metadata. + * * @return number of metadata names */ public int size() { return metadata.size(); } - + /** Remove all mappings from metadata. */ public void clear() { metadata.clear(); @@ -165,7 +176,9 @@ DublinCore, HttpHeaders, Nutch, Feed { public boolean equals(Object o) { - if (o == null) { return false; } + if (o == null) { + return false; + } Metadata other = null; try { @@ -174,7 +187,9 @@ DublinCore, HttpHeaders, Nutch, Feed { return false; } - if (other.size() != size()) { return false; } + if (other.size() != size()) { + return false; + } String[] names = names(); for (int i = 0; i < names.length; i++) { @@ -198,10 +213,7 @@ DublinCore, HttpHeaders, Nutch, Feed { for (int i = 0; i < names.length; i++) { String[] values = _getValues(names[i]); for (int j = 0; j < values.length; j++) { - buf.append(names[i]) - .append("=") - .append(values[j]) - .append(" "); + buf.append(names[i]).append("=").append(values[j]).append(" "); } } return buf.toString(); Modified: nutch/trunk/src/java/org/apache/nutch/metadata/Nutch.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/Nutch.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/metadata/Nutch.java (original) +++ nutch/trunk/src/java/org/apache/nutch/metadata/Nutch.java Thu Jan 29 05:38:59 2015 @@ -18,20 +18,17 @@ package org.apache.nutch.metadata; import org.apache.hadoop.io.Text; - /** * A collection of Nutch internal metadata constants. - * + * * @author Chris Mattmann * @author Jérôme Charron */ public interface Nutch { - - public static final String ORIGINAL_CHAR_ENCODING = - "OriginalCharEncoding"; - - public static final String CHAR_ENCODING_FOR_CONVERSION = - "CharEncodingForConversion"; + + public static final String ORIGINAL_CHAR_ENCODING = "OriginalCharEncoding"; + + public static final String CHAR_ENCODING_FOR_CONVERSION = "CharEncodingForConversion"; public static final String SIGNATURE_KEY = "nutch.content.digest"; @@ -41,17 +38,22 @@ public interface Nutch { public static final String GENERATE_TIME_KEY = "_ngt_"; - public static final Text WRITABLE_GENERATE_TIME_KEY = new Text(GENERATE_TIME_KEY); + public static final Text WRITABLE_GENERATE_TIME_KEY = new Text( + GENERATE_TIME_KEY); public static final String PROTO_STATUS_KEY = "_pst_"; - public static final Text WRITABLE_PROTO_STATUS_KEY = new Text(PROTO_STATUS_KEY); - + public static final Text WRITABLE_PROTO_STATUS_KEY = new Text( + PROTO_STATUS_KEY); + public static final String FETCH_TIME_KEY = "_ftk_"; - + public static final String FETCH_STATUS_KEY = "_fst_"; - /** Sites may request that search engines don't provide access to cached documents. */ + /** + * Sites may request that search engines don't provide access to cached + * documents. + */ public static final String CACHING_FORBIDDEN_KEY = "caching.forbidden"; /** Show both original forbidden content and summaries (default). */ @@ -70,5 +72,6 @@ public interface Nutch { /** Used by AdaptiveFetchSchedule to maintain custom fetch interval */ public static final String FIXED_INTERVAL_KEY = "fixedInterval"; - public static final Text WRITABLE_FIXED_INTERVAL_KEY = new Text(FIXED_INTERVAL_KEY); + public static final Text WRITABLE_FIXED_INTERVAL_KEY = new Text( + FIXED_INTERVAL_KEY); } Modified: nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java (original) +++ nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java Thu Jan 29 05:38:59 2015 @@ -33,7 +33,7 @@ public class SpellCheckedMetadata extend /** * Treshold divider. - * + * * <code>threshold = searched.length() / TRESHOLD_DIVIDER;</code> */ private static final int TRESHOLD_DIVIDER = 3; @@ -52,7 +52,7 @@ public class SpellCheckedMetadata extend // Uses following array to fill the metanames index and the // metanames list. - Class<?>[] spellthese = {HttpHeaders.class}; + Class<?>[] spellthese = { HttpHeaders.class }; for (Class<?> spellCheckedNames : spellthese) { for (Field field : spellCheckedNames.getFields()) { @@ -73,7 +73,7 @@ public class SpellCheckedMetadata extend /** * Normalizes String. - * + * * @param str * the string to normalize * @return normalized String @@ -102,7 +102,7 @@ public class SpellCheckedMetadata extend * </ul> * If no matching with a well-known metadata name is found, then the original * name is returned. - * + * * @param name * Name to normalize * @return normalized name Modified: nutch/trunk/src/java/org/apache/nutch/net/URLFilter.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/URLFilter.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/net/URLFilter.java (original) +++ nutch/trunk/src/java/org/apache/nutch/net/URLFilter.java Thu Jan 29 05:38:59 2015 @@ -23,17 +23,18 @@ import org.apache.hadoop.conf.Configurab // Nutch imports import org.apache.nutch.plugin.Pluggable; - /** - * Interface used to limit which URLs enter Nutch. - * Used by the injector and the db updater. + * Interface used to limit which URLs enter Nutch. Used by the injector and the + * db updater. */ public interface URLFilter extends Pluggable, Configurable { /** The name of the extension point. */ public final static String X_POINT_ID = URLFilter.class.getName(); - /* Interface for a filter that transforms a URL: it can pass the - original URL through or "delete" the URL by returning null */ + /* + * Interface for a filter that transforms a URL: it can pass the original URL + * through or "delete" the URL by returning null + */ public String filter(String urlString); } Modified: nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java (original) +++ nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java Thu Jan 29 05:38:59 2015 @@ -38,23 +38,23 @@ public class URLFilterChecker { private Configuration conf; public URLFilterChecker(Configuration conf) { - this.conf = conf; + this.conf = conf; } private void checkOne(String filterName) throws Exception { URLFilter filter = null; - ExtensionPoint point = - PluginRepository.get(conf).getExtensionPoint(URLFilter.X_POINT_ID); + ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint( + URLFilter.X_POINT_ID); if (point == null) - throw new RuntimeException(URLFilter.X_POINT_ID+" not found."); + throw new RuntimeException(URLFilter.X_POINT_ID + " not found."); Extension[] extensions = point.getExtensions(); for (int i = 0; i < extensions.length; i++) { Extension extension = extensions[i]; - filter = (URLFilter)extension.getExtensionInstance(); + filter = (URLFilter) extension.getExtensionInstance(); if (filter.getClass().getName().equals(filterName)) { break; } else { @@ -63,19 +63,19 @@ public class URLFilterChecker { } if (filter == null) - throw new RuntimeException("Filter "+filterName+" not found."); + throw new RuntimeException("Filter " + filterName + " not found."); // jerome : should we keep this behavior? - //if (LogFormatter.hasLoggedSevere()) - // throw new RuntimeException("Severe error encountered."); + // if (LogFormatter.hasLoggedSevere()) + // throw new RuntimeException("Severe error encountered."); - System.out.println("Checking URLFilter "+filterName); + System.out.println("Checking URLFilter " + filterName); BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); String line; - while((line=in.readLine())!=null) { - String out=filter.filter(line); - if(out!=null) { + while ((line = in.readLine()) != null) { + String out = filter.filter(line); + if (out != null) { System.out.print("+"); System.out.println(out); } else { @@ -90,10 +90,10 @@ public class URLFilterChecker { BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); String line; - while((line=in.readLine())!=null) { + while ((line = in.readLine()) != null) { URLFilters filters = new URLFilters(this.conf); String out = filters.filter(line); - if(out!=null) { + if (out != null) { System.out.print("+"); System.out.println(out); } else { @@ -105,8 +105,8 @@ public class URLFilterChecker { public static void main(String[] args) throws Exception { - String usage = "Usage: URLFilterChecker (-filterName filterName | -allCombined) \n" - + "Tool takes a list of URLs, one per line, passed via STDIN.\n"; + String usage = "Usage: URLFilterChecker (-filterName filterName | -allCombined) \n" + + "Tool takes a list of URLs, one per line, passed via STDIN.\n"; if (args.length == 0) { System.err.println(usage); Modified: nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java (original) +++ nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java Thu Jan 29 05:38:59 2015 @@ -20,16 +20,15 @@ package org.apache.nutch.net; import org.apache.hadoop.conf.Configuration; import org.apache.nutch.plugin.PluginRepository; -/** Creates and caches {@link URLFilter} implementing plugins.*/ +/** Creates and caches {@link URLFilter} implementing plugins. */ public class URLFilters { public static final String URLFILTER_ORDER = "urlfilter.order"; private URLFilter[] filters; public URLFilters(Configuration conf) { - this.filters = (URLFilter[]) PluginRepository.get(conf) - .getOrderedPlugins(URLFilter.class, URLFilter.X_POINT_ID, - URLFILTER_ORDER); + this.filters = (URLFilter[]) PluginRepository.get(conf).getOrderedPlugins( + URLFilter.class, URLFilter.X_POINT_ID, URLFILTER_ORDER); } /** Run all defined filters. Assume logical AND. */ Modified: nutch/trunk/src/java/org/apache/nutch/net/URLNormalizer.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/URLNormalizer.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/net/URLNormalizer.java (original) +++ nutch/trunk/src/java/org/apache/nutch/net/URLNormalizer.java Thu Jan 29 05:38:59 2015 @@ -21,13 +21,17 @@ import java.net.MalformedURLException; import org.apache.hadoop.conf.Configurable; -/** Interface used to convert URLs to normal form and optionally perform substitutions */ +/** + * Interface used to convert URLs to normal form and optionally perform + * substitutions + */ public interface URLNormalizer extends Configurable { - + /* Extension ID */ public static final String X_POINT_ID = URLNormalizer.class.getName(); - + /* Interface for URL normalization */ - public String normalize(String urlString, String scope) throws MalformedURLException; + public String normalize(String urlString, String scope) + throws MalformedURLException; } Modified: nutch/trunk/src/java/org/apache/nutch/net/URLNormalizerChecker.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/URLNormalizerChecker.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/net/URLNormalizerChecker.java (original) +++ nutch/trunk/src/java/org/apache/nutch/net/URLNormalizerChecker.java Thu Jan 29 05:38:59 2015 @@ -36,23 +36,23 @@ public class URLNormalizerChecker { private Configuration conf; public URLNormalizerChecker(Configuration conf) { - this.conf = conf; + this.conf = conf; } private void checkOne(String normalizerName, String scope) throws Exception { URLNormalizer normalizer = null; - ExtensionPoint point = - PluginRepository.get(conf).getExtensionPoint(URLNormalizer.X_POINT_ID); + ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint( + URLNormalizer.X_POINT_ID); if (point == null) - throw new RuntimeException(URLNormalizer.X_POINT_ID+" not found."); + throw new RuntimeException(URLNormalizer.X_POINT_ID + " not found."); Extension[] extensions = point.getExtensions(); for (int i = 0; i < extensions.length; i++) { Extension extension = extensions[i]; - normalizer = (URLNormalizer)extension.getExtensionInstance(); + normalizer = (URLNormalizer) extension.getExtensionInstance(); if (normalizer.getClass().getName().equals(normalizerName)) { break; } else { @@ -61,7 +61,8 @@ public class URLNormalizerChecker { } if (normalizer == null) - throw new RuntimeException("URLNormalizer "+normalizerName+" not found."); + throw new RuntimeException("URLNormalizer " + normalizerName + + " not found."); System.out.println("Checking URLNormalizer " + normalizerName); @@ -79,7 +80,7 @@ public class URLNormalizerChecker { BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); String line; URLNormalizers normalizers = new URLNormalizers(conf, scope); - while((line = in.readLine()) != null) { + while ((line = in.readLine()) != null) { String out = normalizers.normalize(line, scope); System.out.println(out); } @@ -88,7 +89,7 @@ public class URLNormalizerChecker { public static void main(String[] args) throws Exception { String usage = "Usage: URLNormalizerChecker [-normalizer <normalizerName>] [-scope <scope>]" - + "\n\tscope can be one of: default,partition,generate_host_count,fetcher,crawldb,linkdb,inject,outlink"; + + "\n\tscope can be one of: default,partition,generate_host_count,fetcher,crawldb,linkdb,inject,outlink"; String normalizerName = null; String scope = URLNormalizers.SCOPE_DEFAULT; @@ -103,7 +104,8 @@ public class URLNormalizerChecker { } } - URLNormalizerChecker checker = new URLNormalizerChecker(NutchConfiguration.create()); + URLNormalizerChecker checker = new URLNormalizerChecker( + NutchConfiguration.create()); if (normalizerName != null) { checker.checkOne(normalizerName, scope); } else { Modified: nutch/trunk/src/java/org/apache/nutch/net/URLNormalizers.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/URLNormalizers.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/net/URLNormalizers.java (original) +++ nutch/trunk/src/java/org/apache/nutch/net/URLNormalizers.java Thu Jan 29 05:38:59 2015 @@ -43,47 +43,63 @@ import org.apache.nutch.util.ObjectCache * contexts where they are used (note however that they need to be activated * first through <tt>plugin.include</tt> property). * - * <p>There is one global scope defined by default, which consists of all - * active normalizers. The order in which these normalizers - * are executed may be defined in "urlnormalizer.order" property, which lists - * space-separated implementation classes (if this property is missing normalizers - * will be run in random order). If there are more - * normalizers activated than explicitly named on this list, the remaining ones - * will be run in random order after the ones specified on the list are executed.</p> - * <p>You can define a set of contexts (or scopes) in which normalizers may be + * <p> + * There is one global scope defined by default, which consists of all active + * normalizers. The order in which these normalizers are executed may be defined + * in "urlnormalizer.order" property, which lists space-separated implementation + * classes (if this property is missing normalizers will be run in random + * order). If there are more normalizers activated than explicitly named on this + * list, the remaining ones will be run in random order after the ones specified + * on the list are executed. + * </p> + * <p> + * You can define a set of contexts (or scopes) in which normalizers may be * called. Each scope can have its own list of normalizers (defined in * "urlnormalizer.scope.<scope_name>" property) and its own order (defined in * "urlnormalizer.order.<scope_name>" property). If any of these properties are - * missing, default settings are used for the global scope.</p> - * <p>In case no normalizers are required for any given scope, a - * <code>org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer</code> should be used.</p> - * <p>Each normalizer may further select among many configurations, depending on - * the scope in which it is called, because the scope name is passed as a parameter - * to each normalizer. You can also use the same normalizer for many scopes.</p> - * <p>Several scopes have been defined, and various Nutch tools will attempt using - * scope-specific normalizers first (and fall back to default config if scope-specific - * configuration is missing).</p> - * <p>Normalizers may be run several times, to ensure that modifications introduced + * missing, default settings are used for the global scope. + * </p> + * <p> + * In case no normalizers are required for any given scope, a + * <code>org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer</code> should + * be used. + * </p> + * <p> + * Each normalizer may further select among many configurations, depending on + * the scope in which it is called, because the scope name is passed as a + * parameter to each normalizer. You can also use the same normalizer for many + * scopes. + * </p> + * <p> + * Several scopes have been defined, and various Nutch tools will attempt using + * scope-specific normalizers first (and fall back to default config if + * scope-specific configuration is missing). + * </p> + * <p> + * Normalizers may be run several times, to ensure that modifications introduced * by normalizers at the end of the list can be further reduced by normalizers - * executed at the beginning. By default this loop is executed just once - if you want - * to ensure that all possible combinations have been applied you may want to run - * this loop up to the number of activated normalizers. This loop count can be configured - * through <tt>urlnormalizer.loop.count</tt> property. As soon as the url is - * unchanged the loop will stop and return the result.</p> + * executed at the beginning. By default this loop is executed just once - if + * you want to ensure that all possible combinations have been applied you may + * want to run this loop up to the number of activated normalizers. This loop + * count can be configured through <tt>urlnormalizer.loop.count</tt> property. + * As soon as the url is unchanged the loop will stop and return the result. + * </p> * * @author Andrzej Bialecki */ public final class URLNormalizers { - - /** Default scope. If no scope properties are defined then the configuration for - * this scope will be used. + + /** + * Default scope. If no scope properties are defined then the configuration + * for this scope will be used. */ public static final String SCOPE_DEFAULT = "default"; /** Scope used by {@link org.apache.nutch.crawl.URLPartitioner}. */ public static final String SCOPE_PARTITION = "partition"; /** Scope used by {@link org.apache.nutch.crawl.Generator}. */ public static final String SCOPE_GENERATE_HOST_COUNT = "generate_host_count"; - /** Scope used by {@link org.apache.nutch.fetcher.Fetcher} when processing + /** + * Scope used by {@link org.apache.nutch.fetcher.Fetcher} when processing * redirect URLs. */ public static final String SCOPE_FETCHER = "fetcher"; @@ -93,16 +109,21 @@ public final class URLNormalizers { public static final String SCOPE_LINKDB = "linkdb"; /** Scope used by {@link org.apache.nutch.crawl.Injector}. */ public static final String SCOPE_INJECT = "inject"; - /** Scope used when constructing new {@link org.apache.nutch.parse.Outlink} instances. */ + /** + * Scope used when constructing new {@link org.apache.nutch.parse.Outlink} + * instances. + */ public static final String SCOPE_OUTLINK = "outlink"; /** Scope used when indexing URLs. */ public static final String SCOPE_INDEXER = "indexer"; - public static final Logger LOG = LoggerFactory.getLogger(URLNormalizers.class); + public static final Logger LOG = LoggerFactory + .getLogger(URLNormalizers.class); /* Empty extension list for caching purposes. */ - private final List<Extension> EMPTY_EXTENSION_LIST = Collections.<Extension>emptyList(); - + private final List<Extension> EMPTY_EXTENSION_LIST = Collections + .<Extension> emptyList(); + private final URLNormalizer[] EMPTY_NORMALIZERS = new URLNormalizer[0]; private Configuration conf; @@ -110,37 +131,39 @@ public final class URLNormalizers { private ExtensionPoint extensionPoint; private URLNormalizer[] normalizers; - + private int loopCount; public URLNormalizers(Configuration conf, String scope) { this.conf = conf; this.extensionPoint = PluginRepository.get(conf).getExtensionPoint( - URLNormalizer.X_POINT_ID); + URLNormalizer.X_POINT_ID); ObjectCache objectCache = ObjectCache.get(conf); - + if (this.extensionPoint == null) { throw new RuntimeException("x point " + URLNormalizer.X_POINT_ID - + " not found."); + + " not found."); } - normalizers = (URLNormalizer[])objectCache.getObject(URLNormalizer.X_POINT_ID + "_" + scope); + normalizers = (URLNormalizer[]) objectCache + .getObject(URLNormalizer.X_POINT_ID + "_" + scope); if (normalizers == null) { normalizers = getURLNormalizers(scope); } if (normalizers == EMPTY_NORMALIZERS) { - normalizers = (URLNormalizer[])objectCache.getObject(URLNormalizer.X_POINT_ID + "_" + SCOPE_DEFAULT); + normalizers = (URLNormalizer[]) objectCache + .getObject(URLNormalizer.X_POINT_ID + "_" + SCOPE_DEFAULT); if (normalizers == null) { normalizers = getURLNormalizers(SCOPE_DEFAULT); } } - + loopCount = conf.getInt("urlnormalizer.loop.count", 1); } /** - * Function returns an array of {@link URLNormalizer}s for a given scope, - * with a specified order. + * Function returns an array of {@link URLNormalizer}s for a given scope, with + * a specified order. * * @param scope * The scope to return the <code>Array</code> of @@ -152,12 +175,13 @@ public final class URLNormalizers { URLNormalizer[] getURLNormalizers(String scope) { List<Extension> extensions = getExtensions(scope); ObjectCache objectCache = ObjectCache.get(conf); - + if (extensions == EMPTY_EXTENSION_LIST) { return EMPTY_NORMALIZERS; } - - List<URLNormalizer> normalizers = new Vector<URLNormalizer>(extensions.size()); + + List<URLNormalizer> normalizers = new Vector<URLNormalizer>( + extensions.size()); Iterator<Extension> it = extensions.iterator(); while (it.hasNext()) { @@ -175,14 +199,13 @@ public final class URLNormalizers { } catch (PluginRuntimeException e) { e.printStackTrace(); LOG.warn("URLNormalizers:PluginRuntimeException when " - + "initializing url normalizer plugin " - + ext.getDescriptor().getPluginId() - + " instance in getURLNormalizers " - + "function: attempting to continue instantiating plugins"); + + "initializing url normalizer plugin " + + ext.getDescriptor().getPluginId() + + " instance in getURLNormalizers " + + "function: attempting to continue instantiating plugins"); } } - return normalizers.toArray(new URLNormalizer[normalizers - .size()]); + return normalizers.toArray(new URLNormalizer[normalizers.size()]); } /** @@ -197,9 +220,8 @@ public final class URLNormalizers { @SuppressWarnings("unchecked") private List<Extension> getExtensions(String scope) { ObjectCache objectCache = ObjectCache.get(conf); - List<Extension> extensions = - (List<Extension>) objectCache.getObject(URLNormalizer.X_POINT_ID + "_x_" - + scope); + List<Extension> extensions = (List<Extension>) objectCache + .getObject(URLNormalizer.X_POINT_ID + "_x_" + scope); // Just compare the reference: // if this is the empty list, we know we will find no extension. @@ -210,11 +232,13 @@ public final class URLNormalizers { if (extensions == null) { extensions = findExtensions(scope); if (extensions != null) { - objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, extensions); + objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, + extensions); } else { // Put the empty extension list into cache // to remember we don't know any related extension. - objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, EMPTY_EXTENSION_LIST); + objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, + EMPTY_EXTENSION_LIST); extensions = EMPTY_EXTENSION_LIST; } } @@ -234,7 +258,8 @@ public final class URLNormalizers { String[] orders = null; String orderlist = conf.get("urlnormalizer.order." + scope); - if (orderlist == null) orderlist = conf.get("urlnormalizer.order"); + if (orderlist == null) + orderlist = conf.get("urlnormalizer.order"); if (orderlist != null && !orderlist.trim().equals("")) { orders = orderlist.trim().split("\\s+"); } @@ -272,13 +297,17 @@ public final class URLNormalizers { /** * Normalize - * @param urlString The URL string to normalize. - * @param scope The given scope. + * + * @param urlString + * The URL string to normalize. + * @param scope + * The given scope. * @return A normalized String, using the given <code>scope</code> - * @throws MalformedURLException If the given URL string is malformed. + * @throws MalformedURLException + * If the given URL string is malformed. */ public String normalize(String urlString, String scope) - throws MalformedURLException { + throws MalformedURLException { // optionally loop several times, and break if no further changes String initialString = urlString; for (int k = 0; k < loopCount; k++) { @@ -287,7 +316,8 @@ public final class URLNormalizers { return null; urlString = this.normalizers[i].normalize(urlString, scope); } - if (initialString.equals(urlString)) break; + if (initialString.equals(urlString)) + break; initialString = urlString; } return urlString; Modified: nutch/trunk/src/java/org/apache/nutch/net/package-info.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/package-info.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/net/package-info.java (original) +++ nutch/trunk/src/java/org/apache/nutch/net/package-info.java Thu Jan 29 05:38:59 2015 @@ -20,3 +20,4 @@ * and {@link org.apache.nutch.net.URLNormalizer normalizers}. */ package org.apache.nutch.net; + Modified: nutch/trunk/src/java/org/apache/nutch/net/protocols/HttpDateFormat.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/protocols/HttpDateFormat.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/net/protocols/HttpDateFormat.java (original) +++ nutch/trunk/src/java/org/apache/nutch/net/protocols/HttpDateFormat.java Thu Jan 29 05:38:59 2015 @@ -26,15 +26,15 @@ import java.text.ParseException; /** * class to handle HTTP dates. - * + * * Modified from FastHttpDateFormat.java in jakarta-tomcat. - * + * * @author John Xing */ public class HttpDateFormat { - protected static SimpleDateFormat format = - new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US); + protected static SimpleDateFormat format = new SimpleDateFormat( + "EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US); /** * HTTP date uses TimeZone GMT @@ -43,29 +43,29 @@ public class HttpDateFormat { format.setTimeZone(TimeZone.getTimeZone("GMT")); } - //HttpDate (long t) { - //} + // HttpDate (long t) { + // } - //HttpDate (String s) { - //} + // HttpDate (String s) { + // } -// /** -// * Get the current date in HTTP format. -// */ -// public static String getCurrentDate() { -// -// long now = System.currentTimeMillis(); -// if ((now - currentDateGenerated) > 1000) { -// synchronized (format) { -// if ((now - currentDateGenerated) > 1000) { -// currentDateGenerated = now; -// currentDate = format.format(new Date(now)); -// } -// } -// } -// return currentDate; -// -// } + // /** + // * Get the current date in HTTP format. + // */ + // public static String getCurrentDate() { + // + // long now = System.currentTimeMillis(); + // if ((now - currentDateGenerated) > 1000) { + // synchronized (format) { + // if ((now - currentDateGenerated) > 1000) { + // currentDateGenerated = now; + // currentDate = format.format(new Date(now)); + // } + // } + // } + // return currentDate; + // + // } /** * Get the HTTP format of the specified date. Modified: nutch/trunk/src/java/org/apache/nutch/net/protocols/ProtocolException.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/protocols/ProtocolException.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/net/protocols/ProtocolException.java (original) +++ nutch/trunk/src/java/org/apache/nutch/net/protocols/ProtocolException.java Thu Jan 29 05:38:59 2015 @@ -21,13 +21,13 @@ import java.io.Serializable; /** * Base exception for all protocol handlers + * * @deprecated Use {@link org.apache.nutch.protocol.ProtocolException} instead. */ @Deprecated @SuppressWarnings("serial") public class ProtocolException extends Exception implements Serializable { - public ProtocolException() { super(); } Modified: nutch/trunk/src/java/org/apache/nutch/net/protocols/Response.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/protocols/Response.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/net/protocols/Response.java (original) +++ nutch/trunk/src/java/org/apache/nutch/net/protocols/Response.java Thu Jan 29 05:38:59 2015 @@ -23,12 +23,11 @@ import java.net.URL; import org.apache.nutch.metadata.HttpHeaders; import org.apache.nutch.metadata.Metadata; - /** - * A response interface. Makes all protocols model HTTP. + * A response interface. Makes all protocols model HTTP. */ public interface Response extends HttpHeaders { - + /** Returns the URL used to retrieve this response. */ public URL getUrl(); @@ -40,7 +39,7 @@ public interface Response extends HttpHe /** Returns all the headers. */ public Metadata getHeaders(); - + /** Returns the full content of the response. */ public byte[] getContent(); Modified: nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java (original) +++ nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java Thu Jan 29 05:38:59 2015 @@ -20,3 +20,4 @@ * interface, sea also {@link org.apache.nutch.protocol}. */ package org.apache.nutch.net.protocols; + Modified: nutch/trunk/src/java/org/apache/nutch/parse/HTMLMetaTags.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/HTMLMetaTags.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/parse/HTMLMetaTags.java (original) +++ nutch/trunk/src/java/org/apache/nutch/parse/HTMLMetaTags.java Thu Jan 29 05:38:59 2015 @@ -24,8 +24,8 @@ import java.util.Properties; import org.apache.nutch.metadata.Metadata; /** - * This class holds the information about HTML "meta" tags extracted from - * a page. Some special tags have convenience methods for easy checking. + * This class holds the information about HTML "meta" tags extracted from a + * page. Some special tags have convenience methods for easy checking. */ public class HTMLMetaTags { private boolean noIndex = false; @@ -45,7 +45,7 @@ public class HTMLMetaTags { private Metadata generalTags = new Metadata(); private Properties httpEquivTags = new Properties(); - + /** * Sets all boolean values to <code>false</code>. Clears all other tags. */ @@ -156,8 +156,8 @@ public class HTMLMetaTags { } /** - * A convenience method. Returns the current value of <code>refreshTime</code>. - * The value may be invalid if {@link #getRefresh()}returns + * A convenience method. Returns the current value of <code>refreshTime</code> + * . The value may be invalid if {@link #getRefresh()}returns * <code>false</code>. */ public int getRefreshTime() { @@ -179,16 +179,12 @@ public class HTMLMetaTags { public Properties getHttpEquivTags() { return httpEquivTags; } - + public String toString() { StringBuffer sb = new StringBuffer(); - sb.append("base=" + baseHref - + ", noCache=" + noCache - + ", noFollow=" + noFollow - + ", noIndex=" + noIndex - + ", refresh=" + refresh - + ", refreshHref=" + refreshHref + "\n" - ); + sb.append("base=" + baseHref + ", noCache=" + noCache + ", noFollow=" + + noFollow + ", noIndex=" + noIndex + ", refresh=" + refresh + + ", refreshHref=" + refreshHref + "\n"); sb.append(" * general tags:\n"); String[] names = generalTags.names(); for (String name : names) { @@ -199,7 +195,7 @@ public class HTMLMetaTags { Iterator<Object> it = httpEquivTags.keySet().iterator(); it = httpEquivTags.keySet().iterator(); while (it.hasNext()) { - String key = (String)it.next(); + String key = (String) it.next(); sb.append(" - " + key + "\t=\t" + httpEquivTags.get(key) + "\n"); } return sb.toString(); Modified: nutch/trunk/src/java/org/apache/nutch/parse/HtmlParseFilter.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/HtmlParseFilter.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/parse/HtmlParseFilter.java (original) +++ nutch/trunk/src/java/org/apache/nutch/parse/HtmlParseFilter.java Thu Jan 29 05:38:59 2015 @@ -27,16 +27,19 @@ import org.apache.hadoop.conf.Configurab import org.apache.nutch.plugin.Pluggable; import org.apache.nutch.protocol.Content; - -/** Extension point for DOM-based HTML parsers. Permits one to add additional - * metadata to HTML parses. All plugins found which implement this extension +/** + * Extension point for DOM-based HTML parsers. Permits one to add additional + * metadata to HTML parses. All plugins found which implement this extension * point are run sequentially on the parse. */ public interface HtmlParseFilter extends Pluggable, Configurable { /** The name of the extension point. */ final static String X_POINT_ID = HtmlParseFilter.class.getName(); - /** Adds metadata or otherwise modifies a parse of HTML content, given - * the DOM tree of a page. */ - ParseResult filter(Content content, ParseResult parseResult, HTMLMetaTags metaTags, DocumentFragment doc); + /** + * Adds metadata or otherwise modifies a parse of HTML content, given the DOM + * tree of a page. + */ + ParseResult filter(Content content, ParseResult parseResult, + HTMLMetaTags metaTags, DocumentFragment doc); } Modified: nutch/trunk/src/java/org/apache/nutch/parse/HtmlParseFilters.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/HtmlParseFilters.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/parse/HtmlParseFilters.java (original) +++ nutch/trunk/src/java/org/apache/nutch/parse/HtmlParseFilters.java Thu Jan 29 05:38:59 2015 @@ -23,11 +23,11 @@ import org.apache.hadoop.conf.Configurat import org.w3c.dom.DocumentFragment; -/** Creates and caches {@link HtmlParseFilter} implementing plugins.*/ +/** Creates and caches {@link HtmlParseFilter} implementing plugins. */ public class HtmlParseFilters { private HtmlParseFilter[] htmlParseFilters; - + public static final String HTMLPARSEFILTER_ORDER = "htmlparsefilter.order"; public HtmlParseFilters(Configuration conf) { @@ -37,13 +37,14 @@ public class HtmlParseFilters { } /** Run all defined filters. */ - public ParseResult filter(Content content, ParseResult parseResult, HTMLMetaTags metaTags, DocumentFragment doc) { + public ParseResult filter(Content content, ParseResult parseResult, + HTMLMetaTags metaTags, DocumentFragment doc) { // loop on each filter - for (int i = 0 ; i < this.htmlParseFilters.length; i++) { + for (int i = 0; i < this.htmlParseFilters.length; i++) { // call filter interface - parseResult = - htmlParseFilters[i].filter(content, parseResult, metaTags, doc); + parseResult = htmlParseFilters[i].filter(content, parseResult, metaTags, + doc); // any failure on parse obj, return if (!parseResult.isSuccess()) { Modified: nutch/trunk/src/java/org/apache/nutch/parse/Outlink.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/Outlink.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/parse/Outlink.java (original) +++ nutch/trunk/src/java/org/apache/nutch/parse/Outlink.java Thu Jan 29 05:38:59 2015 @@ -30,103 +30,102 @@ import org.apache.hadoop.io.Writable; /* An outgoing link from a page. */ public class Outlink implements Writable { - private String toUrl; - private String anchor; - private MapWritable md; - - public Outlink() { - } - - public Outlink(String toUrl, String anchor) throws MalformedURLException { - this.toUrl = toUrl; - if (anchor == null) - anchor = ""; - this.anchor = anchor; - md = null; - } - - public void readFields(DataInput in) throws IOException { - toUrl = Text.readString(in); - anchor = Text.readString(in); - boolean hasMD = in.readBoolean(); - if (hasMD) { - md = new org.apache.hadoop.io.MapWritable(); - md.readFields(in); - } else - md = null; - } - - /** Skips over one Outlink in the input. */ - public static void skip(DataInput in) throws IOException { - Text.skip(in); // skip toUrl - Text.skip(in); // skip anchor - boolean hasMD = in.readBoolean(); - if (hasMD) { - MapWritable metadata = new org.apache.hadoop.io.MapWritable(); - metadata.readFields(in); - ; - } - } - - public void write(DataOutput out) throws IOException { - Text.writeString(out, toUrl); - Text.writeString(out, anchor); - if (md != null && md.size() > 0) { - out.writeBoolean(true); - md.write(out); - } else { - out.writeBoolean(false); - } - } - - public static Outlink read(DataInput in) throws IOException { - Outlink outlink = new Outlink(); - outlink.readFields(in); - return outlink; - } - - public String getToUrl() { - return toUrl; - } - - public void setUrl(String toUrl) { - this.toUrl = toUrl; - } - - public String getAnchor() { - return anchor; - } - - public MapWritable getMetadata() { - return md; - } - - public void setMetadata(MapWritable md) { - this.md = md; - } - - public boolean equals(Object o) { - if (!(o instanceof Outlink)) - return false; - Outlink other = (Outlink) o; - return this.toUrl.equals(other.toUrl) - && this.anchor.equals(other.anchor); - } - - public String toString() { - StringBuffer repr = new StringBuffer("toUrl: "); - repr.append(toUrl); - repr.append(" anchor: "); - repr.append(anchor); - if (md != null && !md.isEmpty()) { - for (Entry<Writable, Writable> e : md.entrySet()) { - repr.append(" "); - repr.append(e.getKey()); - repr.append(": "); - repr.append(e.getValue()); - } - } - return repr.toString(); + private String toUrl; + private String anchor; + private MapWritable md; + + public Outlink() { + } + + public Outlink(String toUrl, String anchor) throws MalformedURLException { + this.toUrl = toUrl; + if (anchor == null) + anchor = ""; + this.anchor = anchor; + md = null; + } + + public void readFields(DataInput in) throws IOException { + toUrl = Text.readString(in); + anchor = Text.readString(in); + boolean hasMD = in.readBoolean(); + if (hasMD) { + md = new org.apache.hadoop.io.MapWritable(); + md.readFields(in); + } else + md = null; + } + + /** Skips over one Outlink in the input. */ + public static void skip(DataInput in) throws IOException { + Text.skip(in); // skip toUrl + Text.skip(in); // skip anchor + boolean hasMD = in.readBoolean(); + if (hasMD) { + MapWritable metadata = new org.apache.hadoop.io.MapWritable(); + metadata.readFields(in); + ; + } + } + + public void write(DataOutput out) throws IOException { + Text.writeString(out, toUrl); + Text.writeString(out, anchor); + if (md != null && md.size() > 0) { + out.writeBoolean(true); + md.write(out); + } else { + out.writeBoolean(false); + } + } + + public static Outlink read(DataInput in) throws IOException { + Outlink outlink = new Outlink(); + outlink.readFields(in); + return outlink; + } + + public String getToUrl() { + return toUrl; + } + + public void setUrl(String toUrl) { + this.toUrl = toUrl; + } + + public String getAnchor() { + return anchor; + } + + public MapWritable getMetadata() { + return md; + } + + public void setMetadata(MapWritable md) { + this.md = md; + } + + public boolean equals(Object o) { + if (!(o instanceof Outlink)) + return false; + Outlink other = (Outlink) o; + return this.toUrl.equals(other.toUrl) && this.anchor.equals(other.anchor); + } + + public String toString() { + StringBuffer repr = new StringBuffer("toUrl: "); + repr.append(toUrl); + repr.append(" anchor: "); + repr.append(anchor); + if (md != null && !md.isEmpty()) { + for (Entry<Writable, Writable> e : md.entrySet()) { + repr.append(" "); + repr.append(e.getKey()); + repr.append(": "); + repr.append(e.getValue()); + } } + return repr.toString(); + } } Modified: nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java (original) +++ nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java Thu Jan 29 05:38:59 2015 @@ -34,8 +34,8 @@ import org.apache.oro.text.regex.Perl5Co import org.apache.oro.text.regex.Perl5Matcher; /** - * Extractor to extract {@link org.apache.nutch.parse.Outlink}s - * / URLs from plain text using Regular Expressions. + * Extractor to extract {@link org.apache.nutch.parse.Outlink}s / URLs from + * plain text using Regular Expressions. * * @see <a * href="http://wiki.java.net/bin/view/Javapedia/RegularExpressions">Comparison @@ -48,23 +48,26 @@ import org.apache.oro.text.regex.Perl5Ma * @since 0.7 */ public class OutlinkExtractor { - private static final Logger LOG = LoggerFactory.getLogger(OutlinkExtractor.class); + private static final Logger LOG = LoggerFactory + .getLogger(OutlinkExtractor.class); /** * Regex pattern to get URLs within a plain text. * * @see <a * href="http://www.truerwords.net/articles/ut/urlactivation.html">http://www.truerwords.net/articles/ut/urlactivation.html + * </a> */ - private static final String URL_PATTERN = - "([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)"; + private static final String URL_PATTERN = "([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)"; /** - * Extracts <code>Outlink</code> from given plain text. - * Applying this method to non-plain-text can result in extremely lengthy - * runtimes for parasitic cases (postscript is a known example). - * @param plainText the plain text from wich URLs should be extracted. + * Extracts <code>Outlink</code> from given plain text. Applying this method + * to non-plain-text can result in extremely lengthy runtimes for parasitic + * cases (postscript is a known example). + * + * @param plainText + * the plain text from wich URLs should be extracted. * * @return Array of <code>Outlink</code>s within found in plainText */ @@ -73,15 +76,18 @@ public class OutlinkExtractor { } /** - * Extracts <code>Outlink</code> from given plain text and adds anchor - * to the extracted <code>Outlink</code>s + * Extracts <code>Outlink</code> from given plain text and adds anchor to the + * extracted <code>Outlink</code>s * - * @param plainText the plain text from wich URLs should be extracted. - * @param anchor the anchor of the url + * @param plainText + * the plain text from wich URLs should be extracted. + * @param anchor + * the anchor of the url * * @return Array of <code>Outlink</code>s within found in plainText */ - public static Outlink[] getOutlinks(final String plainText, String anchor, Configuration conf) { + public static Outlink[] getOutlinks(final String plainText, String anchor, + Configuration conf) { long start = System.currentTimeMillis(); final List<Outlink> outlinks = new ArrayList<Outlink>(); @@ -97,11 +103,11 @@ public class OutlinkExtractor { MatchResult result; String url; - //loop the matches + // loop the matches while (matcher.contains(input, pattern)) { // if this is taking too long, stop matching - // (SHOULD really check cpu time used so that heavily loaded systems - // do not unnecessarily hit this limit.) + // (SHOULD really check cpu time used so that heavily loaded systems + // do not unnecessarily hit this limit.) if (System.currentTimeMillis() - start >= 60000L) { if (LOG.isWarnEnabled()) { LOG.warn("Time limit exceeded for getOutLinks"); @@ -117,13 +123,16 @@ public class OutlinkExtractor { } } } catch (Exception ex) { - // if the matcher fails (perhaps a malformed URL) we just log it and move on - if (LOG.isErrorEnabled()) { LOG.error("getOutlinks", ex); } + // if the matcher fails (perhaps a malformed URL) we just log it and move + // on + if (LOG.isErrorEnabled()) { + LOG.error("getOutlinks", ex); + } } final Outlink[] retval; - //create array of the Outlinks + // create array of the Outlinks if (outlinks != null && outlinks.size() > 0) { retval = outlinks.toArray(new Outlink[0]); } else { @@ -132,7 +141,6 @@ public class OutlinkExtractor { return retval; } - /** * Extracts outlinks from a plain text. <br /> @@ -162,7 +170,7 @@ public class OutlinkExtractor { // url = re.getParen(0); // // if (LOG.isTraceEnabled()) { - // LOG.trace("Extracted url: " + url); + // LOG.trace("Extracted url: " + url); // } // // try { @@ -192,9 +200,8 @@ public class OutlinkExtractor { } /** - * Extracts outlinks from a plain text. - * </p> - * This Method takes the JDK5 Regexp API. + * Extracts outlinks from a plain text. </p> This Method takes the JDK5 Regexp + * API. * * @param plainText * @@ -243,5 +250,5 @@ public class OutlinkExtractor { // // return retval; } - + } Modified: nutch/trunk/src/java/org/apache/nutch/parse/Parse.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/Parse.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/parse/Parse.java (original) +++ nutch/trunk/src/java/org/apache/nutch/parse/Parse.java Thu Jan 29 05:38:59 2015 @@ -17,18 +17,22 @@ package org.apache.nutch.parse; -/** The result of parsing a page's raw content. +/** + * The result of parsing a page's raw content. + * * @see Parser#getParse(Content) */ public interface Parse { - - /** The textual content of the page. This is indexed, searched, and used when - * generating snippets.*/ + + /** + * The textual content of the page. This is indexed, searched, and used when + * generating snippets. + */ String getText(); /** Other data extracted from the page. */ ParseData getData(); - + /** Indicates if the parse is coming from a url or a sub-url */ boolean isCanonical(); } Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParseCallable.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParseCallable.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/parse/ParseCallable.java (original) +++ nutch/trunk/src/java/org/apache/nutch/parse/ParseCallable.java Thu Jan 29 05:38:59 2015 @@ -24,7 +24,7 @@ import org.apache.nutch.protocol.Content class ParseCallable implements Callable<ParseResult> { private Parser p; private Content content; - + public ParseCallable(Parser p, Content content) { this.p = p; this.content = content; @@ -33,5 +33,5 @@ class ParseCallable implements Callable< @Override public ParseResult call() throws Exception { return p.getParse(content); - } + } } \ No newline at end of file Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java (original) +++ nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java Thu Jan 29 05:38:59 2015 @@ -30,8 +30,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.nutch.metadata.Metadata; import org.apache.nutch.util.NutchConfiguration; - -/** Data extracted from a page's content. +/** + * Data extracted from a page's content. + * * @see Parse#getData() */ public final class ParseData extends VersionedWritable { @@ -45,19 +46,19 @@ public final class ParseData extends Ver private Metadata parseMeta; private ParseStatus status; private byte version = VERSION; - + public ParseData() { contentMeta = new Metadata(); parseMeta = new Metadata(); } public ParseData(ParseStatus status, String title, Outlink[] outlinks, - Metadata contentMeta) { + Metadata contentMeta) { this(status, title, outlinks, contentMeta, new Metadata()); } - + public ParseData(ParseStatus status, String title, Outlink[] outlinks, - Metadata contentMeta, Metadata parseMeta) { + Metadata contentMeta, Metadata parseMeta) { this.status = status; this.title = title; this.outlinks = outlinks; @@ -70,25 +71,34 @@ public final class ParseData extends Ver // /** The status of parsing the page. */ - public ParseStatus getStatus() { return status; } - + public ParseStatus getStatus() { + return status; + } + /** The title of the page. */ - public String getTitle() { return title; } + public String getTitle() { + return title; + } /** The outlinks of the page. */ - public Outlink[] getOutlinks() { return outlinks; } + public Outlink[] getOutlinks() { + return outlinks; + } /** The original Metadata retrieved from content */ - public Metadata getContentMeta() { return contentMeta; } + public Metadata getContentMeta() { + return contentMeta; + } /** - * Other content properties. - * This is the place to find format-specific properties. - * Different parser implementations for different content types will populate - * this differently. + * Other content properties. This is the place to find format-specific + * properties. Different parser implementations for different content types + * will populate this differently. */ - public Metadata getParseMeta() { return parseMeta; } - + public Metadata getParseMeta() { + return parseMeta; + } + public void setParseMeta(Metadata parseMeta) { this.parseMeta = parseMeta; } @@ -96,11 +106,12 @@ public final class ParseData extends Ver public void setOutlinks(Outlink[] outlinks) { this.outlinks = outlinks; } - + /** - * Get a metadata single value. - * This method first looks for the metadata value in the parse metadata. If no - * value is found it the looks for the metadata in the content metadata. + * Get a metadata single value. This method first looks for the metadata value + * in the parse metadata. If no value is found it the looks for the metadata + * in the content metadata. + * * @see #getContentMeta() * @see #getParseMeta() */ @@ -111,12 +122,14 @@ public final class ParseData extends Ver } return value; } - + // // Writable methods // - public byte getVersion() { return version; } + public byte getVersion() { + return version; + } public final void readFields(DataInput in) throws IOException { @@ -125,16 +138,16 @@ public final class ParseData extends Ver if (version != VERSION) throw new VersionMismatchException(VERSION, version); status = ParseStatus.read(in); - title = Text.readString(in); // read title + title = Text.readString(in); // read title - int numOutlinks = in.readInt(); + int numOutlinks = in.readInt(); outlinks = new Outlink[numOutlinks]; for (int i = 0; i < numOutlinks; i++) { outlinks[i] = Outlink.read(in); } - + if (version < 3) { - int propertyCount = in.readInt(); // read metadata + int propertyCount = in.readInt(); // read metadata contentMeta.clear(); for (int i = 0; i < propertyCount; i++) { contentMeta.add(Text.readString(in), Text.readString(in)); @@ -150,15 +163,15 @@ public final class ParseData extends Ver } public final void write(DataOutput out) throws IOException { - out.writeByte(VERSION); // write version - status.write(out); // write status - Text.writeString(out, title); // write title + out.writeByte(VERSION); // write version + status.write(out); // write status + Text.writeString(out, title); // write title - out.writeInt(outlinks.length); // write outlinks + out.writeInt(outlinks.length); // write outlinks for (int i = 0; i < outlinks.length; i++) { outlinks[i].write(out); } - contentMeta.write(out); // write content metadata + contentMeta.write(out); // write content metadata parseMeta.write(out); } @@ -175,38 +188,36 @@ public final class ParseData extends Ver public boolean equals(Object o) { if (!(o instanceof ParseData)) return false; - ParseData other = (ParseData)o; - return - this.status.equals(other.status) && - this.title.equals(other.title) && - Arrays.equals(this.outlinks, other.outlinks) && - this.contentMeta.equals(other.contentMeta) && - this.parseMeta.equals(other.parseMeta); + ParseData other = (ParseData) o; + return this.status.equals(other.status) && this.title.equals(other.title) + && Arrays.equals(this.outlinks, other.outlinks) + && this.contentMeta.equals(other.contentMeta) + && this.parseMeta.equals(other.parseMeta); } public String toString() { StringBuffer buffer = new StringBuffer(); - buffer.append("Version: " + version + "\n" ); - buffer.append("Status: " + status + "\n" ); - buffer.append("Title: " + title + "\n" ); + buffer.append("Version: " + version + "\n"); + buffer.append("Status: " + status + "\n"); + buffer.append("Title: " + title + "\n"); if (outlinks != null) { - buffer.append("Outlinks: " + outlinks.length + "\n" ); + buffer.append("Outlinks: " + outlinks.length + "\n"); for (int i = 0; i < outlinks.length; i++) { buffer.append(" outlink: " + outlinks[i] + "\n"); } } - buffer.append("Content Metadata: " + contentMeta + "\n" ); - buffer.append("Parse Metadata: " + parseMeta + "\n" ); + buffer.append("Content Metadata: " + contentMeta + "\n"); + buffer.append("Parse Metadata: " + parseMeta + "\n"); return buffer.toString(); } public static void main(String argv[]) throws Exception { String usage = "ParseData (-local | -dfs <namenode:port>) recno segment"; - + if (argv.length < 3) { System.out.println("usage:" + usage); return; @@ -214,13 +225,12 @@ public final class ParseData extends Ver Options opts = new Options(); Configuration conf = NutchConfiguration.create(); - - GenericOptionsParser parser = - new GenericOptionsParser(conf, opts, argv); - + + GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv); + String[] remainingArgs = parser.getRemainingArgs(); FileSystem fs = FileSystem.get(conf); - + try { int recno = Integer.parseInt(remainingArgs[0]); String segment = remainingArgs[1]; Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParseImpl.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParseImpl.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/parse/ParseImpl.java (original) +++ nutch/trunk/src/java/org/apache/nutch/parse/ParseImpl.java Thu Jan 29 05:38:59 2015 @@ -20,8 +20,9 @@ package org.apache.nutch.parse; import java.io.*; import org.apache.hadoop.io.*; - -/** The result of parsing a page's raw content. +/** + * The result of parsing a page's raw content. + * * @see Parser#getParse(Content) */ public class ParseImpl implements Parse, Writable { @@ -29,7 +30,8 @@ public class ParseImpl implements Parse, private ParseData data; private boolean isCanonical; - public ParseImpl() {} + public ParseImpl() { + } public ParseImpl(Parse parse) { this(new ParseText(parse.getText()), parse.getData(), true); @@ -38,7 +40,7 @@ public class ParseImpl implements Parse, public ParseImpl(String text, ParseData data) { this(new ParseText(text), data, true); } - + public ParseImpl(ParseText text, ParseData data) { this(text, data, true); } @@ -49,12 +51,18 @@ public class ParseImpl implements Parse, this.isCanonical = isCanonical; } - public String getText() { return text.getText(); } + public String getText() { + return text.getText(); + } - public ParseData getData() { return data; } + public ParseData getData() { + return data; + } + + public boolean isCanonical() { + return isCanonical; + } - public boolean isCanonical() { return isCanonical; } - public final void write(DataOutput out) throws IOException { out.writeBoolean(isCanonical); text.write(out);
