Author: lewismc
Date: Tue Jul 10 16:29:11 2012
New Revision: 1359760
URL: http://svn.apache.org/viewvc?rev=1359760&view=rev
Log:
revert NUTCH-1360
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/conf/nutch-default.xml
nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1359760&r1=1359759&r2=1359760&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jul 10 16:29:11 2012
@@ -32,8 +32,6 @@ Nutch Change Log
* NUTCH-1364 Add a counter in Generator for malformed urls (lewismc)
-* NUTCH-1360 Suport the storing of IP address connected to when web crawling
(lewismc)
-
* NUTCH-1262 Map `duplicating` content-types to a single type (markus)
* NUTCH-1385 More robust plug-in order properties in nutch-site.xml (Andy Xue
via markus)
Modified: nutch/trunk/conf/nutch-default.xml
URL:
http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1359760&r1=1359759&r2=1359760&view=diff
==============================================================================
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Tue Jul 10 16:29:11 2012
@@ -255,13 +255,6 @@
</description>
</property>
-<property>
- <name>http.store.ip.address</name>
- <value>false</value>
- <description>Enables us to capture the specific IP address of the
- host which we connect to to fetch a page.</description>
-</property>
-
<!-- FTP properties -->
<property>
Modified: nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java?rev=1359760&r1=1359759&r2=1359760&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java Tue Jul 10
16:29:11 2012
@@ -48,7 +48,5 @@ public interface HttpHeaders {
public final static String LAST_MODIFIED = "Last-Modified";
public final static String LOCATION = "Location";
-
- public final static String IP_ADDRESS = "_ip";
}
Modified:
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java?rev=1359760&r1=1359759&r2=1359760&view=diff
==============================================================================
---
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
(original)
+++
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
Tue Jul 10 16:29:11 2012
@@ -80,9 +80,6 @@ public abstract class HttpBase implement
/** The "Accept" request header value. */
protected String accept =
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
- /** The "_ip" request header value. */
- protected boolean ip_header = false;
-
/** The default logger */
private final static Logger LOGGER = LoggerFactory.getLogger(HttpBase.class);
@@ -123,7 +120,6 @@ public abstract class HttpBase implement
.get("http.agent.description"), conf.get("http.agent.url"),
conf.get("http.agent.email"));
this.acceptLanguage = conf.get("http.accept.language", acceptLanguage);
this.accept = conf.get("http.accept", accept);
- this.ip_header = conf.getBoolean("http.store.ip.address", false);
// backward-compatible default setting
this.useHttp11 = conf.getBoolean("http.useHttp11", false);
this.robots.setConf(conf);
@@ -251,10 +247,6 @@ public abstract class HttpBase implement
return useHttp11;
}
- public boolean getIP_Header(){
- return ip_header;
- }
-
private static String getAgentString(String agentName,
String agentVersion,
String agentDesc,
@@ -309,7 +301,6 @@ public abstract class HttpBase implement
logger.info("http.agent = " + userAgent);
logger.info("http.accept.language = " + acceptLanguage);
logger.info("http.accept = " + accept);
- logger.info("http.store.ip.address = " + ip_header);
}
}
Modified:
nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java?rev=1359760&r1=1359759&r2=1359760&view=diff
==============================================================================
---
nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
(original)
+++
nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
Tue Jul 10 16:29:11 2012
@@ -93,9 +93,7 @@ public class HttpResponse implements Res
int sockPort = http.useProxy() ? http.getProxyPort() : port;
InetSocketAddress sockAddr= new InetSocketAddress(sockHost, sockPort);
socket.connect(sockAddr, http.getTimeout());
-
- headers.set("_ip", socket.getInetAddress().getHostAddress());
-
+
// make request
OutputStream req = socket.getOutputStream();
@@ -112,12 +110,6 @@ public class HttpResponse implements Res
reqStr.append(host);
reqStr.append(portString);
reqStr.append("\r\n");
-
- if(this.http.getConf().getBoolean("http.store.ip.address", true)) {
- reqStr.append("_ip: ");
- reqStr.append(http.getIP_Header());
- reqStr.append("\r\n");
- }
reqStr.append("Accept-Encoding: x-gzip, gzip, deflate\r\n");
@@ -440,5 +432,5 @@ public class HttpResponse implements Res
in.unread(value);
return value;
}
-
+
}