svn commit: r375965 - in /lucene/nutch/trunk: build.xml src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html
Author: jerome Date: Wed Feb 8 05:58:08 2006 New Revision: 375965 URL: http://svn.apache.org/viewcvs?rev=375965view=rev Log: Fix some javadoc issues with lib-http plugin Added: lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html (with props) Modified: lucene/nutch/trunk/build.xml Modified: lucene/nutch/trunk/build.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/build.xml?rev=375965r1=375964r2=375965view=diff == --- lucene/nutch/trunk/build.xml (original) +++ lucene/nutch/trunk/build.xml Wed Feb 8 05:58:08 2006 @@ -223,6 +223,7 @@ bottom=Copyright amp;copy; ${year} The Apache Software Foundation packageset dir=${src.dir}/ + packageset dir=${plugins.dir}/lib-http/src/java/ packageset dir=${plugins.dir}/protocol-file/src/java/ packageset dir=${plugins.dir}/protocol-ftp/src/java/ packageset dir=${plugins.dir}/protocol-http/src/java/ Added: lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html?rev=375965view=auto == --- lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html (added) +++ lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html Wed Feb 8 05:58:08 2006 @@ -0,0 +1,6 @@ +html +body +pCommon API used by HTTP plugins ([EMAIL PROTECTED] org.apache.nutch.protocol.http http}, [EMAIL PROTECTED] org.apache.nutch.protocol.httpclient httpclient})/p +/body +/html Propchange: lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html -- svn:eol-style = native
svn commit: r375984 - in /lucene/nutch/trunk/src: java/org/apache/nutch/parse/ java/org/apache/nutch/util/ plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/
Author: jerome Date: Wed Feb 8 07:42:44 2006 New Revision: 375984 URL: http://svn.apache.org/viewcvs?rev=375984view=rev Log: Fix some javadoc errors and warnings Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java lucene/nutch/trunk/src/java/org/apache/nutch/util/StringUtil.java lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java?rev=375984r1=375983r2=375984view=diff == --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java Wed Feb 8 07:42:44 2006 @@ -64,9 +64,9 @@ /** * Reads the codeparse-plugins.xml/code file and returns the - * [EMAIL PROTECTED] ParsePluginPreferenceList} defined by it. + * [EMAIL PROTECTED] ParsePluginList} defined by it. * - * @return A [EMAIL PROTECTED] ParsePluginPreferenceList} specified by the + * @return A [EMAIL PROTECTED] ParsePluginList} specified by the * codeparse-plugins.xml/code file. * @throws Exception * If any parsing error occurs. Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java?rev=375984r1=375983r2=375984view=diff == --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java Wed Feb 8 07:42:44 2006 @@ -51,7 +51,7 @@ } /** - * Performs a parse by iterating through a List of preferred [EMAIL PROTECTED] + * Performs a parse by iterating through a List of preferred [EMAIL PROTECTED] Parser}s * until a successful parse is performed and a [EMAIL PROTECTED] Parse} object is * returned. If the parse is unsuccessful, a message is logged to the * codeWARNING/code level, and an empty parse is returned. Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java?rev=375984r1=375983r2=375984view=diff == --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java Wed Feb 8 07:42:44 2006 @@ -106,13 +106,13 @@ * * The function consults the internal [EMAIL PROTECTED] ParsePluginList} for the * ParserFactory to determine the list of pluginIds, then gets the - * appropriate extension points to instantiate as {Parser}s. + * appropriate extension points to instantiate as [EMAIL PROTECTED] Parser}s. * * @param contentType The contentType to return the codeArray/code - *of {Parser}s for. + *of [EMAIL PROTECTED] Parser}s for. * @param url The url for the content that may allow us to get the type from *the file suffix. - * @return An codeArray/code of [EMAIL PROTECTED] for the given contentType. + * @return An codeArray/code of [EMAIL PROTECTED] Parser}s for the given contentType. * If there were plugins mapped to a contentType via the * codeparse-plugins.xml/code file, but never enabled via * the codeplugin.includes/code Nutch conf, then those plugins Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/StringUtil.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/util/StringUtil.java?rev=375984r1=375983r2=375984view=diff == --- lucene/nutch/trunk/src/java/org/apache/nutch/util/StringUtil.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/util/StringUtil.java Wed Feb 8 07:42:44 2006 @@ -57,7 +57,6 @@ * Convenience call for [EMAIL PROTECTED] #toHexString(byte[], String, int)}, where * codesep = null; lineLen = Integer.MAX_VALUE/code. * @param buf - * @return */ public static String toHexString(byte[] buf) { return toHexString(buf, null, Integer.MAX_VALUE); Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java URL:
svn commit: r376012 - in /lucene/nutch/trunk: build.xml default.properties
Author: jerome Date: Wed Feb 8 10:03:01 2006 New Revision: 376012 URL: http://svn.apache.org/viewcvs?rev=376012view=rev Log: Add/Move some plugins javadoc to the Plugins group Modified: lucene/nutch/trunk/build.xml lucene/nutch/trunk/default.properties Modified: lucene/nutch/trunk/build.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/build.xml?rev=376012r1=376011r2=376012view=diff == --- lucene/nutch/trunk/build.xml (original) +++ lucene/nutch/trunk/build.xml Wed Feb 8 10:03:01 2006 @@ -227,20 +227,28 @@ packageset dir=${src.dir}/ packageset dir=${plugins.dir}/lib-http/src/java/ + packageset dir=${plugins.dir}/ontology/src/java/ packageset dir=${plugins.dir}/protocol-file/src/java/ packageset dir=${plugins.dir}/protocol-ftp/src/java/ packageset dir=${plugins.dir}/protocol-http/src/java/ packageset dir=${plugins.dir}/protocol-httpclient/src/java/ + packageset dir=${plugins.dir}/parse-ext/src/java/ packageset dir=${plugins.dir}/parse-html/src/java/ packageset dir=${plugins.dir}/parse-js/src/java/ packageset dir=${plugins.dir}/parse-text/src/java/ packageset dir=${plugins.dir}/parse-pdf/src/java/ !-- packageset dir=${plugins.dir}/parse-rtf/src/java/ plugin excluded from build due to licensing issues-- !-- packageset dir=${plugins.dir}/parse-mp3/src/java/ plugin excluded from build due to licensing issues-- + packageset dir=${plugins.dir}/parse-mspowerpoint/src/java/ packageset dir=${plugins.dir}/parse-msword/src/java/ + packageset dir=${plugins.dir}/parse-rss/src/java/ + packageset dir=${plugins.dir}/parse-swf/src/java/ + packageset dir=${plugins.dir}/parse-zip/src/java/ packageset dir=${plugins.dir}/index-basic/src/java/ packageset dir=${plugins.dir}/index-more/src/java/ packageset dir=${plugins.dir}/query-more/src/java/ + packageset dir=${plugins.dir}/query-site/src/java/ + packageset dir=${plugins.dir}/query-url/src/java/ packageset dir=${plugins.dir}/urlfilter-regex/src/java/ packageset dir=${plugins.dir}/urlfilter-prefix/src/java/ packageset dir=${plugins.dir}/creativecommons/src/java/ Modified: lucene/nutch/trunk/default.properties URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/default.properties?rev=376012r1=376011r2=376012view=diff == --- lucene/nutch/trunk/default.properties (original) +++ lucene/nutch/trunk/default.properties Wed Feb 8 10:03:01 2006 @@ -33,8 +33,6 @@ src.webapps = ./src/webapps # Proxy Host and Port to use for building JavaDoc -#javadoc.proxy.host=-J-DproxyHost= -#javadoc.proxy.port=-J-DproxyPort= javadoc.proxy.host=-J-DproxyHost= javadoc.proxy.port=-J-DproxyPort= javadoc.link.java=http://java.sun.com/j2se/1.4.2/docs/api/ @@ -49,21 +47,57 @@ javac.deprecation=off javac.version= 1.4 -plugin.http=org.apache.nutch.protocol.http* -plugin.httpclient=org.apache.nutch.protocol.httpclient* -plugin.ftp=org.apache.nutch.protocol.ftp* +# The list of packages assigned to plugins group in javadoc +# (please keep this list ordered) +plugin.basic=org.apache.nutch.indexer.basic* +plugin.carrot2=org.apache.nutch.clustering.carrot2* +plugin.creative=org.creativecommons.nutch* +plugin.ext=org.apache.nutch.parse.ext* plugin.file=org.apache.nutch.protocol.file* +plugin.ftp=org.apache.nutch.protocol.ftp* plugin.html=org.apache.nutch.parse.html* +plugin.http=org.apache.nutch.protocol.http* +plugin.httpclient=org.apache.nutch.protocol.httpclient* plugin.js=org.apache.nutch.parse.js* +plugin.language=org.apache.nutch.analysis.lang* +plugin.libhttp=org.apache.nutch.protocol.http.api* +plugin.more=org.apache.nutch.indexer.more*:org.apache.nutch.searcher.more* plugin.mp3=org.apache.nutch.parse.mp3* +plugin.mspowerpoint=org.apache.nutch.parse.mspowerpoint* plugin.msword=org.apache.nutch.parse.msword* -plugin.rtf=org.apache.nutch.parse.rtf* +# Unfortunately, ontology on core and plugin uses the same package: +# plugin.ontology=org.apache.nutch.ontology* plugin.pdf=org.apache.nutch.parse.pdf* +plugin.rss=org.apache.nutch.parse.rss* +plugin.rtf=org.apache.nutch.parse.rtf* +plugin.site=org.apache.nutch.searcher.site* +plugin.swf=org.apache.nutch.parse.swf* plugin.text=org.apache.nutch.parse.text* -plugin.basic=org.apache.nutch.indexer.basic* -plugin.more=org.apache.nutch.indexer.more* -plugin.language=org.apache.nutch.analysis.lang* -plugin.creative=org.creativecommons.nutch* -plugins.packages=${plugin.http}:${plugin.httpclient}:${plugin.ftp}:${plugin.file}:${plugin.html}:${plugin.js}:${plugin.mp3}:\ - ${plugin.msword}:${plugin.rtf}:${plugin.pdf}:${plugin.text}:${plugin.basic}:${plugin.more}:\ - ${plugin.language}:${plugin.creative} +plugin.url=org.apache.nutch.searcher.url*
svn commit: r376072 - /lucene/nutch/trunk/conf/nutch-default.xml
Author: cutting Date: Wed Feb 8 13:25:30 2006 New Revision: 376072 URL: http://svn.apache.org/viewcvs?rev=376072view=rev Log: Restore accidentally removed file defaults. Modified: lucene/nutch/trunk/conf/nutch-default.xml Modified: lucene/nutch/trunk/conf/nutch-default.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/nutch-default.xml?rev=376072r1=376071r2=376072view=diff == --- lucene/nutch/trunk/conf/nutch-default.xml (original) +++ lucene/nutch/trunk/conf/nutch-default.xml Wed Feb 8 13:25:30 2006 @@ -7,6 +7,28 @@ configuration +!-- file properties -- + +property + namefile.content.limit/name + value65536/value + descriptionThe length limit for downloaded content, in bytes. + If this value is larger than zero, content longer than it will be + truncated; otherwise (zero or negative), no truncation at all. + /description +/property + +property + namefile.content.ignored/name + valuetrue/value + descriptionIf true, no file content will be saved during fetch. + And it is probably what we want to set most of time, since file:// URLs + are meant to be local and we can always use them directly at parsing + and indexing stages. Otherwise file contents will be saved. + !! NO IMPLEMENTED YET !! + /description +/property + !-- HTTP properties -- property