svn commit: r375965 - in /lucene/nutch/trunk: build.xml src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html

2006-02-08 Thread jerome
Author: jerome
Date: Wed Feb  8 05:58:08 2006
New Revision: 375965

URL: http://svn.apache.org/viewcvs?rev=375965view=rev
Log:
Fix some javadoc issues with lib-http plugin

Added:

lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html
   (with props)
Modified:
lucene/nutch/trunk/build.xml

Modified: lucene/nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/build.xml?rev=375965r1=375964r2=375965view=diff
==
--- lucene/nutch/trunk/build.xml (original)
+++ lucene/nutch/trunk/build.xml Wed Feb  8 05:58:08 2006
@@ -223,6 +223,7 @@
   bottom=Copyright amp;copy; ${year} The Apache Software Foundation
   
packageset dir=${src.dir}/
+   packageset dir=${plugins.dir}/lib-http/src/java/
packageset dir=${plugins.dir}/protocol-file/src/java/
packageset dir=${plugins.dir}/protocol-ftp/src/java/
packageset dir=${plugins.dir}/protocol-http/src/java/

Added: 
lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html?rev=375965view=auto
==
--- 
lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html
 (added)
+++ 
lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html
 Wed Feb  8 05:58:08 2006
@@ -0,0 +1,6 @@
+html
+body
+pCommon API used by HTTP plugins ([EMAIL PROTECTED] 
org.apache.nutch.protocol.http http},
[EMAIL PROTECTED] org.apache.nutch.protocol.httpclient httpclient})/p
+/body
+/html

Propchange: 
lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html
--
svn:eol-style = native




svn commit: r375984 - in /lucene/nutch/trunk/src: java/org/apache/nutch/parse/ java/org/apache/nutch/util/ plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/

2006-02-08 Thread jerome
Author: jerome
Date: Wed Feb  8 07:42:44 2006
New Revision: 375984

URL: http://svn.apache.org/viewcvs?rev=375984view=rev
Log:
Fix some javadoc errors and warnings

Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java
lucene/nutch/trunk/src/java/org/apache/nutch/util/StringUtil.java

lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java

Modified: 
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java?rev=375984r1=375983r2=375984view=diff
==
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java 
Wed Feb  8 07:42:44 2006
@@ -64,9 +64,9 @@
   
   /**
* Reads the codeparse-plugins.xml/code file and returns the
-   * [EMAIL PROTECTED] ParsePluginPreferenceList} defined by it.
+   * [EMAIL PROTECTED] ParsePluginList} defined by it.
*
-   * @return A [EMAIL PROTECTED] ParsePluginPreferenceList} specified by the
+   * @return A [EMAIL PROTECTED] ParsePluginList} specified by the
* codeparse-plugins.xml/code file.
* @throws Exception
* If any parsing error occurs.

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java?rev=375984r1=375983r2=375984view=diff
==
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java Wed Feb  
8 07:42:44 2006
@@ -51,7 +51,7 @@
   }
   
   /**
-   * Performs a parse by iterating through a List of preferred [EMAIL 
PROTECTED]
+   * Performs a parse by iterating through a List of preferred [EMAIL 
PROTECTED] Parser}s
* until a successful parse is performed and a [EMAIL PROTECTED] Parse} 
object is
* returned. If the parse is unsuccessful, a message is logged to the
* codeWARNING/code level, and an empty parse is returned.

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java?rev=375984r1=375983r2=375984view=diff
==
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java Wed 
Feb  8 07:42:44 2006
@@ -106,13 +106,13 @@
*
* The function consults the internal [EMAIL PROTECTED] ParsePluginList} for 
the
* ParserFactory to determine the list of pluginIds, then gets the
-   * appropriate extension points to instantiate as {Parser}s.
+   * appropriate extension points to instantiate as [EMAIL PROTECTED] Parser}s.
*
* @param contentType The contentType to return the codeArray/code
-   *of {Parser}s for.
+   *of [EMAIL PROTECTED] Parser}s for.
* @param url The url for the content that may allow us to get the type from
*the file suffix.
-   * @return An codeArray/code of [EMAIL PROTECTED] for the given 
contentType.
+   * @return An codeArray/code of [EMAIL PROTECTED] Parser}s for the given 
contentType.
* If there were plugins mapped to a contentType via the
* codeparse-plugins.xml/code file, but never enabled via
* the codeplugin.includes/code Nutch conf, then those plugins

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/StringUtil.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/util/StringUtil.java?rev=375984r1=375983r2=375984view=diff
==
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/StringUtil.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/StringUtil.java Wed Feb  
8 07:42:44 2006
@@ -57,7 +57,6 @@
* Convenience call for [EMAIL PROTECTED] #toHexString(byte[], String, 
int)}, where
* codesep = null; lineLen = Integer.MAX_VALUE/code.
* @param buf
-   * @return
*/
   public static String toHexString(byte[] buf) {
 return toHexString(buf, null, Integer.MAX_VALUE);

Modified: 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
URL: 

svn commit: r376012 - in /lucene/nutch/trunk: build.xml default.properties

2006-02-08 Thread jerome
Author: jerome
Date: Wed Feb  8 10:03:01 2006
New Revision: 376012

URL: http://svn.apache.org/viewcvs?rev=376012view=rev
Log:
Add/Move some plugins javadoc to the Plugins group

Modified:
lucene/nutch/trunk/build.xml
lucene/nutch/trunk/default.properties

Modified: lucene/nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/build.xml?rev=376012r1=376011r2=376012view=diff
==
--- lucene/nutch/trunk/build.xml (original)
+++ lucene/nutch/trunk/build.xml Wed Feb  8 10:03:01 2006
@@ -227,20 +227,28 @@
 
packageset dir=${src.dir}/
packageset dir=${plugins.dir}/lib-http/src/java/
+   packageset dir=${plugins.dir}/ontology/src/java/
packageset dir=${plugins.dir}/protocol-file/src/java/
packageset dir=${plugins.dir}/protocol-ftp/src/java/
packageset dir=${plugins.dir}/protocol-http/src/java/
packageset dir=${plugins.dir}/protocol-httpclient/src/java/
+   packageset dir=${plugins.dir}/parse-ext/src/java/
packageset dir=${plugins.dir}/parse-html/src/java/
packageset dir=${plugins.dir}/parse-js/src/java/
packageset dir=${plugins.dir}/parse-text/src/java/
packageset dir=${plugins.dir}/parse-pdf/src/java/
 !--   packageset dir=${plugins.dir}/parse-rtf/src/java/ plugin excluded 
from build due to licensing issues--
 !--   packageset dir=${plugins.dir}/parse-mp3/src/java/ plugin excluded 
from build due to licensing issues--
+   packageset dir=${plugins.dir}/parse-mspowerpoint/src/java/
packageset dir=${plugins.dir}/parse-msword/src/java/
+   packageset dir=${plugins.dir}/parse-rss/src/java/
+   packageset dir=${plugins.dir}/parse-swf/src/java/
+   packageset dir=${plugins.dir}/parse-zip/src/java/
packageset dir=${plugins.dir}/index-basic/src/java/
packageset dir=${plugins.dir}/index-more/src/java/
packageset dir=${plugins.dir}/query-more/src/java/
+   packageset dir=${plugins.dir}/query-site/src/java/
+   packageset dir=${plugins.dir}/query-url/src/java/
packageset dir=${plugins.dir}/urlfilter-regex/src/java/
packageset dir=${plugins.dir}/urlfilter-prefix/src/java/
packageset dir=${plugins.dir}/creativecommons/src/java/

Modified: lucene/nutch/trunk/default.properties
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/default.properties?rev=376012r1=376011r2=376012view=diff
==
--- lucene/nutch/trunk/default.properties (original)
+++ lucene/nutch/trunk/default.properties Wed Feb  8 10:03:01 2006
@@ -33,8 +33,6 @@
 src.webapps = ./src/webapps
 
 # Proxy Host and Port to use for building JavaDoc
-#javadoc.proxy.host=-J-DproxyHost=
-#javadoc.proxy.port=-J-DproxyPort=
 javadoc.proxy.host=-J-DproxyHost=
 javadoc.proxy.port=-J-DproxyPort=
 javadoc.link.java=http://java.sun.com/j2se/1.4.2/docs/api/
@@ -49,21 +47,57 @@
 javac.deprecation=off
 javac.version= 1.4
 
-plugin.http=org.apache.nutch.protocol.http*
-plugin.httpclient=org.apache.nutch.protocol.httpclient*
-plugin.ftp=org.apache.nutch.protocol.ftp*
+# The list of packages assigned to plugins group in javadoc
+# (please keep this list ordered)
+plugin.basic=org.apache.nutch.indexer.basic*
+plugin.carrot2=org.apache.nutch.clustering.carrot2*
+plugin.creative=org.creativecommons.nutch*
+plugin.ext=org.apache.nutch.parse.ext*
 plugin.file=org.apache.nutch.protocol.file*
+plugin.ftp=org.apache.nutch.protocol.ftp*
 plugin.html=org.apache.nutch.parse.html*
+plugin.http=org.apache.nutch.protocol.http*
+plugin.httpclient=org.apache.nutch.protocol.httpclient*
 plugin.js=org.apache.nutch.parse.js*
+plugin.language=org.apache.nutch.analysis.lang*
+plugin.libhttp=org.apache.nutch.protocol.http.api*
+plugin.more=org.apache.nutch.indexer.more*:org.apache.nutch.searcher.more*
 plugin.mp3=org.apache.nutch.parse.mp3*
+plugin.mspowerpoint=org.apache.nutch.parse.mspowerpoint*
 plugin.msword=org.apache.nutch.parse.msword*
-plugin.rtf=org.apache.nutch.parse.rtf*
+# Unfortunately, ontology on core and plugin uses the same package:
+# plugin.ontology=org.apache.nutch.ontology*
 plugin.pdf=org.apache.nutch.parse.pdf*
+plugin.rss=org.apache.nutch.parse.rss*
+plugin.rtf=org.apache.nutch.parse.rtf*
+plugin.site=org.apache.nutch.searcher.site*
+plugin.swf=org.apache.nutch.parse.swf*
 plugin.text=org.apache.nutch.parse.text*
-plugin.basic=org.apache.nutch.indexer.basic*
-plugin.more=org.apache.nutch.indexer.more*
-plugin.language=org.apache.nutch.analysis.lang*
-plugin.creative=org.creativecommons.nutch*
-plugins.packages=${plugin.http}:${plugin.httpclient}:${plugin.ftp}:${plugin.file}:${plugin.html}:${plugin.js}:${plugin.mp3}:\
-   
${plugin.msword}:${plugin.rtf}:${plugin.pdf}:${plugin.text}:${plugin.basic}:${plugin.more}:\
-   ${plugin.language}:${plugin.creative}
+plugin.url=org.apache.nutch.searcher.url*

svn commit: r376072 - /lucene/nutch/trunk/conf/nutch-default.xml

2006-02-08 Thread cutting
Author: cutting
Date: Wed Feb  8 13:25:30 2006
New Revision: 376072

URL: http://svn.apache.org/viewcvs?rev=376072view=rev
Log:
Restore accidentally removed file defaults.

Modified:
lucene/nutch/trunk/conf/nutch-default.xml

Modified: lucene/nutch/trunk/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/nutch-default.xml?rev=376072r1=376071r2=376072view=diff
==
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/nutch/trunk/conf/nutch-default.xml Wed Feb  8 13:25:30 2006
@@ -7,6 +7,28 @@
 
 configuration
 
+!-- file properties --
+
+property
+  namefile.content.limit/name
+  value65536/value
+  descriptionThe length limit for downloaded content, in bytes.
+  If this value is larger than zero, content longer than it will be
+  truncated; otherwise (zero or negative), no truncation at all.
+  /description
+/property
+
+property
+  namefile.content.ignored/name
+  valuetrue/value
+  descriptionIf true, no file content will be saved during fetch.
+  And it is probably what we want to set most of time, since file:// URLs
+  are meant to be local and we can always use them directly at parsing
+  and indexing stages. Otherwise file contents will be saved.
+  !! NO IMPLEMENTED YET !!
+  /description
+/property
+
 !-- HTTP properties --
 
 property