svn commit: r382512 - in /lucene/nutch/trunk/lib: lucene-core-1.9-final.jar lucene-core-1.9.1.jar lucene-misc-1.9-final.jar lucene-misc-1.9.1.jar

2006-03-02 Thread cutting
Author: cutting
Date: Thu Mar  2 12:59:09 2006
New Revision: 382512

URL: http://svn.apache.org/viewcvs?rev=382512view=rev
Log:
Upgrade to Lucene 1.9.1.

Added:
lucene/nutch/trunk/lib/lucene-core-1.9.1.jar   (with props)
lucene/nutch/trunk/lib/lucene-misc-1.9.1.jar   (with props)
Removed:
lucene/nutch/trunk/lib/lucene-core-1.9-final.jar
lucene/nutch/trunk/lib/lucene-misc-1.9-final.jar

Added: lucene/nutch/trunk/lib/lucene-core-1.9.1.jar
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/lib/lucene-core-1.9.1.jar?rev=382512view=auto
==
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/lucene-core-1.9.1.jar
--
svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/lib/lucene-misc-1.9.1.jar
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/lib/lucene-misc-1.9.1.jar?rev=382512view=auto
==
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/lucene-misc-1.9.1.jar
--
svn:mime-type = application/octet-stream




svn commit: r382535 - in /lucene/nutch/trunk: conf/nutch-default.xml src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java src/plugin/protocol-ftp/src/java/org/apache/nutc

2006-03-02 Thread jerome
Author: jerome
Date: Thu Mar  2 14:38:40 2006
New Revision: 382535

URL: http://svn.apache.org/viewcvs?rev=382535view=rev
Log:
Fix content.limit inconsistency in http, ftp and file

Modified:
lucene/nutch/trunk/conf/nutch-default.xml

lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java

lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java

Modified: lucene/nutch/trunk/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/nutch-default.xml?rev=382535r1=382534r2=382535view=diff
==
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/nutch/trunk/conf/nutch-default.xml Thu Mar  2 14:38:40 2006
@@ -13,8 +13,8 @@
   namefile.content.limit/name
   value65536/value
   descriptionThe length limit for downloaded content, in bytes.
-  If this value is larger than zero, content longer than it will be
-  truncated; otherwise (zero or negative), no truncation at all.
+  If this value is nonnegative (=0), content longer than it will be truncated;
+  otherwise, no truncation at all.
   /description
 /property
 
@@ -150,11 +150,11 @@
   nameftp.content.limit/name
   value65536/value 
   descriptionThe length limit for downloaded content, in bytes.
-  If this value is larger than zero, content longer than it is truncated;
-  otherwise (zero or negative), no truncation at all. Caution: classical
-  ftp RFCs never defines partial transfer and, in fact, some ftp servers
-  out there do not handle client side forced close-down very well.
-  Our implementation tries its best to handle such situations smoothly.
+  If this value is nonnegative (=0), content longer than it will be truncated;
+  otherwise, no truncation at all.
+  Caution: classical ftp RFCs never defines partial transfer and, in fact,
+  some ftp servers out there do not handle client side forced close-down very
+  well. Our implementation tries its best to handle such situations smoothly.
   /description
 /property
 

Modified: 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java?rev=382535r1=382534r2=382535view=diff
==
--- 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
 Thu Mar  2 14:38:40 2006
@@ -167,7 +167,7 @@
 // capture content
 int len = (int) size;
 
-if (this.file.maxContentLength  0  len  this.file.maxContentLength)
+if (this.file.maxContentLength = 0  len  this.file.maxContentLength)
   len = this.file.maxContentLength;
 
 this.content = new byte[len];

Modified: 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java?rev=382535r1=382534r2=382535view=diff
==
--- 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java
 Thu Mar  2 14:38:40 2006
@@ -344,9 +344,9 @@
 }
 entries.add(ftpFile);
 count += line.length();
-// impose download limit if limit  0, otherwise no limit
+// impose download limit if limit = 0, otherwise no limit
 // here, cut off is up to the line when total bytes is just over limit
-if (limit  0  count  limit) {
+if (limit = 0  count  limit) {
   mandatory_close = true;
   break;
 }
@@ -409,9 +409,9 @@
 new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
   while((len=input.read(buf,0,buf.length)) != -1){
 count += len;
-// impose download limit if limit  0, otherwise no limit
+// impose download limit if limit = 0, otherwise no limit
 // here, cut off is exactly of limit bytes
-if (limit  0  count  limit) {
+if (limit = 0  count  limit) {
   os.write(buf,0,len-(count-limit));
   mandatory_close = true;
   break;




svn commit: r382573 - in /lucene/nutch/trunk: conf/hadoop-env.sh.template lib/hadoop-0.1-dev.jar

2006-03-02 Thread cutting
Author: cutting
Date: Thu Mar  2 15:59:24 2006
New Revision: 382573

URL: http://svn.apache.org/viewcvs?rev=382573view=rev
Log:
Update to latest Hadoop code.

Modified:
lucene/nutch/trunk/conf/hadoop-env.sh.template
lucene/nutch/trunk/lib/hadoop-0.1-dev.jar

Modified: lucene/nutch/trunk/conf/hadoop-env.sh.template
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/hadoop-env.sh.template?rev=382573r1=382572r2=382573view=diff
==
--- lucene/nutch/trunk/conf/hadoop-env.sh.template (original)
+++ lucene/nutch/trunk/conf/hadoop-env.sh.template Thu Mar  2 15:59:24 2006
@@ -1,6 +1,11 @@
 # Set Hadoop-specific environment variables here.
 
-# The java implementation to use.
+# The only required environment variable is JAVA_HOME.  All others are
+# optional.  When running a distributed configuration it is best to
+# set JAVA_HOME in this file, so that it is correctly defined on
+# remote nodes.
+
+# The java implementation to use.  Required.
 # export JAVA_HOME=/usr/bin/java
 
 # The maximum amount of heap to use, in MB. Default is 1000.
@@ -8,6 +13,9 @@
 
 # Extra Java runtime options.  Empty by default.
 # export HADOOP_OPTS=-server
+
+# Extra ssh options.  Default: '-o ConnectTimeout=1 -o 
SendEnv=HADOOP_CONF_DIR'.
+# export HADOOP_SSH_OPTS=-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR
 
 # Where log files are stored.  $HADOOP_HOME/logs by default.
 # export HADOOP_LOG_DIR=${HADOOP_HOME}/logs

Modified: lucene/nutch/trunk/lib/hadoop-0.1-dev.jar
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/lib/hadoop-0.1-dev.jar?rev=382573r1=382572r2=382573view=diff
==
Binary files - no diff available.




svn commit: r382579 - /lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java

2006-03-02 Thread cutting
Author: cutting
Date: Thu Mar  2 16:06:59 2006
New Revision: 382579

URL: http://svn.apache.org/viewcvs?rev=382579view=rev
Log:
Disable speculative execution, since input format has side effects.

Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java

Modified: 
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java?rev=382579r1=382578r2=382579view=diff
==
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java 
Thu Mar  2 16:06:59 2006
@@ -307,6 +307,7 @@
 job.setInputKeyClass(HashScore.class);
 job.setInputValueClass(IndexDoc.class);
 job.setInputFormat(InputFormat.class);
+job.setBoolean(mapred.speculative.execution, false);
 
 job.setPartitionerClass(HashPartitioner.class);
 job.setReducerClass(HashReducer.class);




[Nutch Wiki] Update of Support by SupreetSethi

2006-03-02 Thread Apache Wiki
Dear Wiki user,

You have subscribed to a wiki page or wiki category on Nutch Wiki for change 
notification.

The following page has been changed by SupreetSethi:
http://wiki.apache.org/nutch/Support

The comment on the change is:
Added my services as developer for hire

--
* IntraFind Software AG
* Michael Rosset mrosset at btmeta.com
* Sami Siren s.siren at sonera.inet.fi
+   * Supreet Sethi [EMAIL PROTECTED] (india preferred)
  


[Nutch Wiki] Update of Support by SupreetSethi

2006-03-02 Thread Apache Wiki
Dear Wiki user,

You have subscribed to a wiki page or wiki category on Nutch Wiki for change 
notification.

The following page has been changed by SupreetSethi:
http://wiki.apache.org/nutch/Support

--
* IntraFind Software AG
* Michael Rosset mrosset at btmeta.com
* Sami Siren s.siren at sonera.inet.fi
-   * Supreet Sethi [EMAIL PROTECTED] (india preferred)
+   * Supreet Sethi supreet at linux-delhi.org (india preferred)