Author: ab
Date: Tue Jan 3 23:32:04 2006
New Revision: 365850
URL: http://svn.apache.org/viewcvs?rev=365850&view=rev
Log:
Update Commons HTTPClient to v. 3.0.
Add some default headers to prefer HTML content, and in English.
Added:
lucene/nutch/trunk/src/plugin/protocol-httpclient/lib/commons-httpclient-3.0.jar
(with props)
Removed:
lucene/nutch/trunk/src/plugin/protocol-httpclient/lib/commons-httpclient-3.0-rc2.jar
Modified:
lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
Added:
lucene/nutch/trunk/src/plugin/protocol-httpclient/lib/commons-httpclient-3.0.jar
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/lib/commons-httpclient-3.0.jar?rev=365850&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
lucene/nutch/trunk/src/plugin/protocol-httpclient/lib/commons-httpclient-3.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml?rev=365850&r1=365849&r2=365850&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml Tue Jan 3
23:32:04 2006
@@ -10,7 +10,7 @@
<export name="*"/>
</library>
<library name="commons-codec.jar" />
- <library name="commons-httpclient-3.0-rc2.jar" />
+ <library name="commons-httpclient-3.0.jar" />
</runtime>
<requires>
Modified:
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=365850&r1=365849&r2=365850&view=diff
==============================================================================
---
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
(original)
+++
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
Tue Jan 3 23:32:04 2006
@@ -7,12 +7,14 @@
import java.net.MalformedURLException;
import java.net.URL;
import java.net.UnknownHostException;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.httpclient.Credentials;
+import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HostConfiguration;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
@@ -358,6 +360,15 @@
}
HostConfiguration hostConf = client.getHostConfiguration();
+ ArrayList headers = new ArrayList();
+ // prefer English
+ headers.add(new Header("Accept-Language", "en-us,en-gb,en;q=0.7,*;q=0.3"));
+ // prefer UTF-8
+ headers.add(new Header("Accept-Charset",
"utf-8,ISO-8859-1;q=0.7,*;q=0.7"));
+ // prefer understandable formats
+ headers.add(new Header("Accept",
+
"text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"));
+ hostConf.getParams().setParameter("http.default-headers", headers);
if (PROXY) {
hostConf.setProxy(PROXY_HOST, PROXY_PORT);
}
Modified:
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java?rev=365850&r1=365849&r2=365850&view=diff
==============================================================================
---
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
(original)
+++
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
Tue Jan 3 23:32:04 2006
@@ -88,7 +88,7 @@
Header[] heads = get.getResponseHeaders();
for (int i = 0; i < heads.length; i++) {
- headers.put(heads[i].getName(), heads[i].getValue());
+ headers.setProperty(heads[i].getName(), heads[i].getValue());
}
// always read content. Sometimes content is useful to find a cause
// for error.