This is an automated email from the ASF dual-hosted git repository.
snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new e2e3ee7 NUTCH-2709 Remove unused properties and code related to HTTP
protocol - remove `http.verbose` and related code (commented out) - remove
`http.max.delays` which is obsolete since handling of politeness and delays
is handled in the multi-threaded Fetcher (see NUTCH-339 and NUTCH-876)
new 0e44fd1 Merge pull request #451 from
sebastian-nagel/NUTCH-2709-remove-unused-http-properties
e2e3ee7 is described below
commit e2e3ee7158a807f1ec6b92a8f84083b6e1b35783
Author: Sebastian Nagel <[email protected]>
AuthorDate: Tue Apr 16 14:34:14 2019 +0200
NUTCH-2709 Remove unused properties and code related to HTTP protocol
- remove `http.verbose` and related code (commented out)
- remove `http.max.delays` which is obsolete since handling of
politeness and delays is handled in the multi-threaded Fetcher
(see NUTCH-339 and NUTCH-876)
---
conf/nutch-default.xml | 15 ---------------
src/java/org/apache/nutch/protocol/ProtocolStatus.java | 4 +++-
src/plugin/creativecommons/conf/nutch-site.xml | 7 -------
.../src/java/org/apache/nutch/protocol/http/Http.java | 5 -----
4 files changed, 3 insertions(+), 28 deletions(-)
diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index 951494e..f4a5953 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -205,15 +205,6 @@
</property>
<property>
- <name>http.max.delays</name>
- <value>100</value>
- <description>The number of times a thread will delay when trying to
- fetch a page. Each time it finds that a host is busy, it will wait
- fetcher.server.delay. After http.max.delays attempts, it will give
- up on the page for now.</description>
-</property>
-
-<property>
<name>http.content.limit</name>
<value>1048576</value>
<description>The length limit for downloaded content using the http/https
@@ -332,12 +323,6 @@
</property>
<property>
- <name>http.verbose</name>
- <value>false</value>
- <description>If true, HTTP will log more verbosely.</description>
-</property>
-
-<property>
<name>http.redirect.max</name>
<value>0</value>
<description>The maximum number of redirects the fetcher will follow when
diff --git a/src/java/org/apache/nutch/protocol/ProtocolStatus.java
b/src/java/org/apache/nutch/protocol/ProtocolStatus.java
index 0f7e8d0..d9e7e3d 100644
--- a/src/java/org/apache/nutch/protocol/ProtocolStatus.java
+++ b/src/java/org/apache/nutch/protocol/ProtocolStatus.java
@@ -50,7 +50,7 @@ public class ProtocolStatus implements Writable {
/** Temporary failure. Application may retry immediately. */
public static final int RETRY = 15;
/**
- * Unspecified exception occured. Further information may be provided in
args.
+ * Unspecified exception occurred. Further information may be provided in
args.
*/
public static final int EXCEPTION = 16;
/** Access denied - authorization required, but missing/incorrect. */
@@ -68,8 +68,10 @@ public class ProtocolStatus implements Writable {
* expected number of milliseconds to wait before retry may be provided in
* args.
*/
+ @Deprecated
public static final int WOULDBLOCK = 22;
/** Thread was blocked http.max.delays times during fetching. */
+ @Deprecated
public static final int BLOCKED = 23;
// Useful static instances for status codes that don't usually require any
diff --git a/src/plugin/creativecommons/conf/nutch-site.xml
b/src/plugin/creativecommons/conf/nutch-site.xml
index 71e344b..e639746 100644
--- a/src/plugin/creativecommons/conf/nutch-site.xml
+++ b/src/plugin/creativecommons/conf/nutch-site.xml
@@ -26,13 +26,6 @@
</property>
<property>
- <name>http.max.delays</name>
- <value>3</value>
- <description>The CC crawl visits a large number of different
- hosts, so we should not need to delay much.</description>
-</property>
-
-<property>
<name>creativecommons.exclude.unlicensed</name>
<value>true</value>
<description>Exclude HTML content which does not contain a CC license.
diff --git
a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
index d4a5cd2..6c7a7be 100644
--- a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
+++ b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
@@ -50,11 +50,6 @@ public class Http extends HttpBase {
*/
public void setConf(Configuration conf) {
super.setConf(conf);
- // Level logLevel = Level.WARNING;
- // if (conf.getBoolean("http.verbose", false)) {
- // logLevel = Level.FINE;
- // }
- // LOG.setLevel(logLevel);
}
public static void main(String[] args) throws Exception {