Author: cutting
Date: Mon Nov  7 09:53:54 2005
New Revision: 331555

URL: http://svn.apache.org/viewcvs?rev=331555&view=rev
Log:
NUTCH-124: Follow redirects when fetching robots.txt.

Modified:
    
lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
    
lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/RobotRulesParser.java

Modified: 
lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java?rev=331555&r1=331554&r2=331555&view=diff
==============================================================================
--- 
lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
 (original)
+++ 
lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
 Mon Nov  7 09:53:54 2005
@@ -82,10 +82,14 @@
   }
 
   public HttpResponse(URL url) throws IOException {
+    this(url, false);
+  }
+
+  HttpResponse(URL url, boolean followRedirects) throws IOException {
     this.base = url.toString();
     this.orig = url.toString();
     GetMethod get = new GetMethod(this.orig);
-    get.setFollowRedirects(false);
+    get.setFollowRedirects(followRedirects);
     get.setRequestHeader("User-Agent", Http.AGENT_STRING);
     HttpMethodParams params = get.getParams();
     // some servers cannot digest the new protocol

Modified: 
lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/RobotRulesParser.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/RobotRulesParser.java?rev=331555&r1=331554&r2=331555&view=diff
==============================================================================
--- 
lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/RobotRulesParser.java
 (original)
+++ 
lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/RobotRulesParser.java
 Mon Nov  7 09:53:54 2005
@@ -379,7 +379,8 @@
     if (robotRules == null) {                     // cache miss
       LOG.fine("cache miss " + url);
       try {
-        HttpResponse response = new HttpResponse(new URL(url, "/robots.txt"));
+        HttpResponse response = new HttpResponse(new URL(url, "/robots.txt"),
+                                                 true);
 
         if (response.getCode() == 200)               // found rules: parse them
           robotRules = new 
RobotRulesParser().parseRules(response.getContent());


Reply via email to