this small patch enables usage of http proxy in fetching process.
proxy is configured with two properties in config file:
<property> <name>http.proxy.host</name> <value>[!!your proxy host here!!]</value> <description>The proxy hostname.</description> </property>
<property> <name>http.proxy.port</name> <value>[!!your proxy port here!!]</value> <description>The proxy port.</description> </property>
possible future enhancements might include
-enabling persistent connections to proxy
-proxy support for other protocols but http (many proxies do also atleast ftp)
br,
Sami Siren
Index: Http.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/net/protocols/http/Http.java,v
retrieving revision 1.4
diff -u -r1.4 Http.java
--- Http.java 23 Apr 2004 19:32:33 -0000 1.4
+++ Http.java 29 Apr 2004 19:01:42 -0000
@@ -36,6 +36,10 @@
/** Always indicates the latest HTTP version we support, currently 1.1 */
public static final int HTTP_VER_LATEST;
+ String proxyHost=NutchConf.get("http.proxy.host");
+ int proxyPort=NutchConf.getInt("http.proxy.port",8080);
+ boolean proxyenabled=(proxyHost!=null && proxyHost.length()>0);
+
int timeout = NutchConf.getInt("http.timeout", 10000);
int maxContentLength= NutchConf.getInt("http.content.limit",64*1024);
Index: HttpResponse.java
===================================================================
RCS file:
/cvsroot/nutch/nutch/src/java/net/nutch/net/protocols/http/HttpResponse.java,v
retrieving revision 1.2
diff -u -r1.2 HttpResponse.java
--- HttpResponse.java 21 Apr 2004 22:52:35 -0000 1.2
+++ HttpResponse.java 29 Apr 2004 19:01:43 -0000
@@ -108,20 +108,23 @@
socket.setSoTimeout(this.http.timeout);
if (addr == null) {
- addr= InetAddress.getByName(url.getHost());
+
addr=InetAddress.getByName(this.http.proxyenabled?this.http.proxyHost:url.getHost());
if (httpAccounting != null)
httpAccounting.setAddr(addr);
}
// connect
- InetSocketAddress sockAddr= new InetSocketAddress(addr, port);
+ InetSocketAddress sockAddr= new InetSocketAddress(addr,
this.http.proxyenabled?this.http.proxyPort:port);
socket.connect(sockAddr, this.http.timeout);
OutputStream req = socket.getOutputStream(); // make request
StringBuffer reqStr = new StringBuffer("GET ");
- reqStr.append(path);
-
+ if(this.http.proxyenabled){
+
reqStr.append(url.getProtocol()).append("://").append(url.getHost()).append(portString).append(path);
+ } else {
+ reqStr.append(path);
+ }
if (httpVersion == Http.HTTP_VER_1_1)
reqStr.append(" HTTP/1.1\r\n");
