This is an automated email from the ASF dual-hosted git repository.
snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new 13a9a6d NUTCH-2666 Increase default value for http.content.limit /
ftp.content.limit / file.content.limit - increase the default content limit
from 64 kB to 1024 kB
new 190828f Merge pull request #427 from
sebastian-nagel/NUTCH-2666-increase-http-content-limit
13a9a6d is described below
commit 13a9a6daf2ca2f764d052ee338b51dc9f91824d5
Author: Sebastian Nagel <[email protected]>
AuthorDate: Mon Jan 7 12:41:40 2019 +0100
NUTCH-2666 Increase default value for http.content.limit /
ftp.content.limit / file.content.limit
- increase the default content limit from 64 kB to 1024 kB
---
conf/nutch-default.xml | 10 +++++-----
.../src/java/org/apache/nutch/protocol/http/api/HttpBase.java | 4 ++--
.../src/java/org/apache/nutch/protocol/file/File.java | 2 +-
.../src/java/org/apache/nutch/protocol/ftp/Ftp.java | 2 +-
4 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index 00cb845..bb53301 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -38,7 +38,7 @@
<property>
<name>file.content.limit</name>
- <value>65536</value>
+ <value>1048576</value>
<description>The length limit for downloaded content using the file://
protocol, in bytes. If this value is nonnegative (>=0), content longer
than it will be truncated; otherwise, no truncation at all. Do not
@@ -215,9 +215,9 @@
<property>
<name>http.content.limit</name>
- <value>65536</value>
- <description>The length limit for downloaded content using the http://
- protocol, in bytes. If this value is nonnegative (>=0), content longer
+ <value>1048576</value>
+ <description>The length limit for downloaded content using the http/https
+ protocols, in bytes. If this value is nonnegative (>=0), content longer
than it will be truncated; otherwise, no truncation at all. Do not
confuse this setting with the file.content.limit setting.
</description>
@@ -440,7 +440,7 @@
<property>
<name>ftp.content.limit</name>
- <value>65536</value>
+ <value>1048576</value>
<description>The length limit for downloaded content, in bytes.
If this value is nonnegative (>=0), content longer than it will be truncated;
otherwise, no truncation at all.
diff --git
a/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
b/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
index a5c0a90..9fa6a71 100644
---
a/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
+++
b/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
@@ -86,7 +86,7 @@ public abstract class HttpBase implements Protocol {
protected int timeout = 10000;
/** The length limit for downloaded content, in bytes. */
- protected int maxContent = 64 * 1024;
+ protected int maxContent = 1024 * 1024;
/** The time limit to download the entire content, in seconds. */
protected int maxDuration = 300;
@@ -194,7 +194,7 @@ public abstract class HttpBase implements Protocol {
this.proxyException =
arrayToMap(conf.getStrings("http.proxy.exception.list"));
this.useProxy = (proxyHost != null && proxyHost.length() > 0);
this.timeout = conf.getInt("http.timeout", 10000);
- this.maxContent = conf.getInt("http.content.limit", 64 * 1024);
+ this.maxContent = conf.getInt("http.content.limit", 1024 * 1024);
this.maxDuration = conf.getInt("http.time.limit", -1);
this.partialAsTruncated = conf
.getBoolean("http.partial.truncated", false);
diff --git
a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
index 8a415b6..17def96 100644
--- a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
+++ b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
@@ -73,7 +73,7 @@ public class File implements Protocol {
*/
public void setConf(Configuration conf) {
this.conf = conf;
- this.maxContentLength = conf.getInt("file.content.limit", 64 * 1024);
+ this.maxContentLength = conf.getInt("file.content.limit", 1024 * 1024);
this.crawlParents = conf.getBoolean("file.crawl.parent", true);
this.symlinksAsRedirects = conf.getBoolean(
"file.crawl.redirect_noncanonical", true);
diff --git
a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
index 6d21b50..7b70790 100644
--- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
+++ b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
@@ -251,7 +251,7 @@ public class Ftp implements Protocol {
*/
public void setConf(Configuration conf) {
this.conf = conf;
- this.maxContentLength = conf.getInt("ftp.content.limit", 64 * 1024);
+ this.maxContentLength = conf.getInt("ftp.content.limit", 1024 * 1024);
this.timeout = conf.getInt("ftp.timeout", 10000);
this.userName = conf.get("ftp.username", "anonymous");
this.passWord = conf.get("ftp.password", "[email protected]");