Author: lewismc Date: Tue Jul 10 16:29:11 2012 New Revision: 1359760 URL: http://svn.apache.org/viewvc?rev=1359760&view=rev Log: revert NUTCH-1360
Modified: nutch/trunk/CHANGES.txt nutch/trunk/conf/nutch-default.xml nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1359760&r1=1359759&r2=1359760&view=diff ============================================================================== --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Tue Jul 10 16:29:11 2012 @@ -32,8 +32,6 @@ Nutch Change Log * NUTCH-1364 Add a counter in Generator for malformed urls (lewismc) -* NUTCH-1360 Suport the storing of IP address connected to when web crawling (lewismc) - * NUTCH-1262 Map `duplicating` content-types to a single type (markus) * NUTCH-1385 More robust plug-in order properties in nutch-site.xml (Andy Xue via markus) Modified: nutch/trunk/conf/nutch-default.xml URL: http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1359760&r1=1359759&r2=1359760&view=diff ============================================================================== --- nutch/trunk/conf/nutch-default.xml (original) +++ nutch/trunk/conf/nutch-default.xml Tue Jul 10 16:29:11 2012 @@ -255,13 +255,6 @@ </description> </property> -<property> - <name>http.store.ip.address</name> - <value>false</value> - <description>Enables us to capture the specific IP address of the - host which we connect to to fetch a page.</description> -</property> - <!-- FTP properties --> <property> Modified: nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java?rev=1359760&r1=1359759&r2=1359760&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java (original) +++ nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java Tue Jul 10 16:29:11 2012 @@ -48,7 +48,5 @@ public interface HttpHeaders { public final static String LAST_MODIFIED = "Last-Modified"; public final static String LOCATION = "Location"; - - public final static String IP_ADDRESS = "_ip"; } Modified: nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java?rev=1359760&r1=1359759&r2=1359760&view=diff ============================================================================== --- nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java (original) +++ nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java Tue Jul 10 16:29:11 2012 @@ -80,9 +80,6 @@ public abstract class HttpBase implement /** The "Accept" request header value. */ protected String accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; - /** The "_ip" request header value. */ - protected boolean ip_header = false; - /** The default logger */ private final static Logger LOGGER = LoggerFactory.getLogger(HttpBase.class); @@ -123,7 +120,6 @@ public abstract class HttpBase implement .get("http.agent.description"), conf.get("http.agent.url"), conf.get("http.agent.email")); this.acceptLanguage = conf.get("http.accept.language", acceptLanguage); this.accept = conf.get("http.accept", accept); - this.ip_header = conf.getBoolean("http.store.ip.address", false); // backward-compatible default setting this.useHttp11 = conf.getBoolean("http.useHttp11", false); this.robots.setConf(conf); @@ -251,10 +247,6 @@ public abstract class HttpBase implement return useHttp11; } - public boolean getIP_Header(){ - return ip_header; - } - private static String getAgentString(String agentName, String agentVersion, String agentDesc, @@ -309,7 +301,6 @@ public abstract class HttpBase implement logger.info("http.agent = " + userAgent); logger.info("http.accept.language = " + acceptLanguage); logger.info("http.accept = " + accept); - logger.info("http.store.ip.address = " + ip_header); } } Modified: nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java?rev=1359760&r1=1359759&r2=1359760&view=diff ============================================================================== --- nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java (original) +++ nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java Tue Jul 10 16:29:11 2012 @@ -93,9 +93,7 @@ public class HttpResponse implements Res int sockPort = http.useProxy() ? http.getProxyPort() : port; InetSocketAddress sockAddr= new InetSocketAddress(sockHost, sockPort); socket.connect(sockAddr, http.getTimeout()); - - headers.set("_ip", socket.getInetAddress().getHostAddress()); - + // make request OutputStream req = socket.getOutputStream(); @@ -112,12 +110,6 @@ public class HttpResponse implements Res reqStr.append(host); reqStr.append(portString); reqStr.append("\r\n"); - - if(this.http.getConf().getBoolean("http.store.ip.address", true)) { - reqStr.append("_ip: "); - reqStr.append(http.getIP_Header()); - reqStr.append("\r\n"); - } reqStr.append("Accept-Encoding: x-gzip, gzip, deflate\r\n"); @@ -440,5 +432,5 @@ public class HttpResponse implements Res in.unread(value); return value; } - + }