This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 13a9a6d  NUTCH-2666 Increase default value for http.content.limit / 
ftp.content.limit / file.content.limit - increase the default content limit 
from 64 kB to 1024 kB
     new 190828f  Merge pull request #427 from 
sebastian-nagel/NUTCH-2666-increase-http-content-limit
13a9a6d is described below

commit 13a9a6daf2ca2f764d052ee338b51dc9f91824d5
Author: Sebastian Nagel <[email protected]>
AuthorDate: Mon Jan 7 12:41:40 2019 +0100

    NUTCH-2666 Increase default value for http.content.limit / 
ftp.content.limit / file.content.limit
    - increase the default content limit from 64 kB to 1024 kB
---
 conf/nutch-default.xml                                         | 10 +++++-----
 .../src/java/org/apache/nutch/protocol/http/api/HttpBase.java  |  4 ++--
 .../src/java/org/apache/nutch/protocol/file/File.java          |  2 +-
 .../src/java/org/apache/nutch/protocol/ftp/Ftp.java            |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index 00cb845..bb53301 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -38,7 +38,7 @@
 
 <property>
   <name>file.content.limit</name>
-  <value>65536</value>
+  <value>1048576</value>
   <description>The length limit for downloaded content using the file://
   protocol, in bytes. If this value is nonnegative (>=0), content longer
   than it will be truncated; otherwise, no truncation at all. Do not
@@ -215,9 +215,9 @@
 
 <property>
   <name>http.content.limit</name>
-  <value>65536</value>
-  <description>The length limit for downloaded content using the http://
-  protocol, in bytes. If this value is nonnegative (>=0), content longer
+  <value>1048576</value>
+  <description>The length limit for downloaded content using the http/https
+  protocols, in bytes. If this value is nonnegative (>=0), content longer
   than it will be truncated; otherwise, no truncation at all. Do not
   confuse this setting with the file.content.limit setting.
   </description>
@@ -440,7 +440,7 @@
 
 <property>
   <name>ftp.content.limit</name>
-  <value>65536</value> 
+  <value>1048576</value>
   <description>The length limit for downloaded content, in bytes.
   If this value is nonnegative (>=0), content longer than it will be truncated;
   otherwise, no truncation at all.
diff --git 
a/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java 
b/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
index a5c0a90..9fa6a71 100644
--- 
a/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
+++ 
b/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
@@ -86,7 +86,7 @@ public abstract class HttpBase implements Protocol {
   protected int timeout = 10000;
 
   /** The length limit for downloaded content, in bytes. */
-  protected int maxContent = 64 * 1024;
+  protected int maxContent = 1024 * 1024;
 
   /** The time limit to download the entire content, in seconds. */
   protected int maxDuration = 300;
@@ -194,7 +194,7 @@ public abstract class HttpBase implements Protocol {
     this.proxyException = 
arrayToMap(conf.getStrings("http.proxy.exception.list"));
     this.useProxy = (proxyHost != null && proxyHost.length() > 0);
     this.timeout = conf.getInt("http.timeout", 10000);
-    this.maxContent = conf.getInt("http.content.limit", 64 * 1024);
+    this.maxContent = conf.getInt("http.content.limit", 1024 * 1024);
     this.maxDuration = conf.getInt("http.time.limit", -1);
     this.partialAsTruncated = conf
         .getBoolean("http.partial.truncated", false);
diff --git 
a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java 
b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
index 8a415b6..17def96 100644
--- a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
+++ b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
@@ -73,7 +73,7 @@ public class File implements Protocol {
    */
   public void setConf(Configuration conf) {
     this.conf = conf;
-    this.maxContentLength = conf.getInt("file.content.limit", 64 * 1024);
+    this.maxContentLength = conf.getInt("file.content.limit", 1024 * 1024);
     this.crawlParents = conf.getBoolean("file.crawl.parent", true);
     this.symlinksAsRedirects = conf.getBoolean(
         "file.crawl.redirect_noncanonical", true);
diff --git 
a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java 
b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
index 6d21b50..7b70790 100644
--- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
+++ b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
@@ -251,7 +251,7 @@ public class Ftp implements Protocol {
    */
   public void setConf(Configuration conf) {
     this.conf = conf;
-    this.maxContentLength = conf.getInt("ftp.content.limit", 64 * 1024);
+    this.maxContentLength = conf.getInt("ftp.content.limit", 1024 * 1024);
     this.timeout = conf.getInt("ftp.timeout", 10000);
     this.userName = conf.get("ftp.username", "anonymous");
     this.passWord = conf.get("ftp.password", "[email protected]");

Reply via email to