Author: ab
Date: Tue Jan  3 23:32:04 2006
New Revision: 365850

URL: http://svn.apache.org/viewcvs?rev=365850&view=rev
Log:
Update Commons HTTPClient to v. 3.0.

Add some default headers to prefer HTML content, and in English.


Added:
    
lucene/nutch/trunk/src/plugin/protocol-httpclient/lib/commons-httpclient-3.0.jar
   (with props)
Removed:
    
lucene/nutch/trunk/src/plugin/protocol-httpclient/lib/commons-httpclient-3.0-rc2.jar
Modified:
    lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml
    
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
    
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java

Added: 
lucene/nutch/trunk/src/plugin/protocol-httpclient/lib/commons-httpclient-3.0.jar
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/lib/commons-httpclient-3.0.jar?rev=365850&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
lucene/nutch/trunk/src/plugin/protocol-httpclient/lib/commons-httpclient-3.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml?rev=365850&r1=365849&r2=365850&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml Tue Jan  3 
23:32:04 2006
@@ -10,7 +10,7 @@
          <export name="*"/>
       </library>
       <library name="commons-codec.jar" />
-      <library name="commons-httpclient-3.0-rc2.jar" />
+      <library name="commons-httpclient-3.0.jar" />
    </runtime>
 
    <requires>

Modified: 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=365850&r1=365849&r2=365850&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
 Tue Jan  3 23:32:04 2006
@@ -7,12 +7,14 @@
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.UnknownHostException;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
 import org.apache.commons.httpclient.Credentials;
+import org.apache.commons.httpclient.Header;
 import org.apache.commons.httpclient.HostConfiguration;
 import org.apache.commons.httpclient.HttpClient;
 import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
@@ -358,6 +360,15 @@
     }
 
     HostConfiguration hostConf = client.getHostConfiguration();
+    ArrayList headers = new ArrayList();
+    // prefer English
+    headers.add(new Header("Accept-Language", "en-us,en-gb,en;q=0.7,*;q=0.3"));
+    // prefer UTF-8
+    headers.add(new Header("Accept-Charset", 
"utf-8,ISO-8859-1;q=0.7,*;q=0.7"));
+    // prefer understandable formats
+    headers.add(new Header("Accept",
+            
"text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"));
+    hostConf.getParams().setParameter("http.default-headers", headers);
     if (PROXY) {
       hostConf.setProxy(PROXY_HOST, PROXY_PORT);
     }

Modified: 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java?rev=365850&r1=365849&r2=365850&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
 Tue Jan  3 23:32:04 2006
@@ -88,7 +88,7 @@
       Header[] heads = get.getResponseHeaders();
 
       for (int i = 0; i < heads.length; i++) {
-        headers.put(heads[i].getName(), heads[i].getValue());
+        headers.setProperty(heads[i].getName(), heads[i].getValue());
       }
       // always read content. Sometimes content is useful to find a cause
       // for error.


Reply via email to