This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new e2e3ee7  NUTCH-2709 Remove unused properties and code related to HTTP 
protocol - remove `http.verbose` and related code (commented out) - remove 
`http.max.delays` which is obsolete since handling of   politeness and delays 
is handled in the multi-threaded Fetcher   (see NUTCH-339 and NUTCH-876)
     new 0e44fd1  Merge pull request #451 from 
sebastian-nagel/NUTCH-2709-remove-unused-http-properties
e2e3ee7 is described below

commit e2e3ee7158a807f1ec6b92a8f84083b6e1b35783
Author: Sebastian Nagel <[email protected]>
AuthorDate: Tue Apr 16 14:34:14 2019 +0200

    NUTCH-2709 Remove unused properties and code related to HTTP protocol
    - remove `http.verbose` and related code (commented out)
    - remove `http.max.delays` which is obsolete since handling of
      politeness and delays is handled in the multi-threaded Fetcher
      (see NUTCH-339 and NUTCH-876)
---
 conf/nutch-default.xml                                    | 15 ---------------
 src/java/org/apache/nutch/protocol/ProtocolStatus.java    |  4 +++-
 src/plugin/creativecommons/conf/nutch-site.xml            |  7 -------
 .../src/java/org/apache/nutch/protocol/http/Http.java     |  5 -----
 4 files changed, 3 insertions(+), 28 deletions(-)

diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index 951494e..f4a5953 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -205,15 +205,6 @@
 </property>
 
 <property>
-  <name>http.max.delays</name>
-  <value>100</value>
-  <description>The number of times a thread will delay when trying to
-  fetch a page.  Each time it finds that a host is busy, it will wait
-  fetcher.server.delay.  After http.max.delays attempts, it will give
-  up on the page for now.</description>
-</property>
-
-<property>
   <name>http.content.limit</name>
   <value>1048576</value>
   <description>The length limit for downloaded content using the http/https
@@ -332,12 +323,6 @@
 </property>
 
 <property>
-  <name>http.verbose</name>
-  <value>false</value>
-  <description>If true, HTTP will log more verbosely.</description>
-</property>
-
-<property>
   <name>http.redirect.max</name>
   <value>0</value>
   <description>The maximum number of redirects the fetcher will follow when
diff --git a/src/java/org/apache/nutch/protocol/ProtocolStatus.java 
b/src/java/org/apache/nutch/protocol/ProtocolStatus.java
index 0f7e8d0..d9e7e3d 100644
--- a/src/java/org/apache/nutch/protocol/ProtocolStatus.java
+++ b/src/java/org/apache/nutch/protocol/ProtocolStatus.java
@@ -50,7 +50,7 @@ public class ProtocolStatus implements Writable {
   /** Temporary failure. Application may retry immediately. */
   public static final int RETRY = 15;
   /**
-   * Unspecified exception occured. Further information may be provided in 
args.
+   * Unspecified exception occurred. Further information may be provided in 
args.
    */
   public static final int EXCEPTION = 16;
   /** Access denied - authorization required, but missing/incorrect. */
@@ -68,8 +68,10 @@ public class ProtocolStatus implements Writable {
    * expected number of milliseconds to wait before retry may be provided in
    * args.
    */
+  @Deprecated
   public static final int WOULDBLOCK = 22;
   /** Thread was blocked http.max.delays times during fetching. */
+  @Deprecated
   public static final int BLOCKED = 23;
 
   // Useful static instances for status codes that don't usually require any
diff --git a/src/plugin/creativecommons/conf/nutch-site.xml 
b/src/plugin/creativecommons/conf/nutch-site.xml
index 71e344b..e639746 100644
--- a/src/plugin/creativecommons/conf/nutch-site.xml
+++ b/src/plugin/creativecommons/conf/nutch-site.xml
@@ -26,13 +26,6 @@
 </property>
 
 <property>
-  <name>http.max.delays</name>
-  <value>3</value>
-  <description>The CC crawl visits a large number of different
-  hosts, so we should not need to delay much.</description>
-</property>
-
-<property>
   <name>creativecommons.exclude.unlicensed</name>
   <value>true</value>
   <description>Exclude HTML content which does not contain a CC license.
diff --git 
a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java 
b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
index d4a5cd2..6c7a7be 100644
--- a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
+++ b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
@@ -50,11 +50,6 @@ public class Http extends HttpBase {
    */
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    // Level logLevel = Level.WARNING;
-    // if (conf.getBoolean("http.verbose", false)) {
-    // logLevel = Level.FINE;
-    // }
-    // LOG.setLevel(logLevel);
   }
 
   public static void main(String[] args) throws Exception {

Reply via email to