Author: jerome
Date: Mon Jun  5 14:43:42 2006
New Revision: 411926

URL: http://svn.apache.org/viewvc?rev=411926&view=rev
Log:
NUTCH-298 : No more NPE if a 404 for a robots.txt + some unit tests

Modified:
    
lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
    
lucene/nutch/trunk/src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java

Modified: 
lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java?rev=411926&r1=411925&r2=411926&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
 Mon Jun  5 14:43:42 2006
@@ -70,8 +70,8 @@
    * file, and can test paths against those rules.
    */
   public static class RobotRuleSet {
-    ArrayList tmpEntries;
-    RobotsEntry[] entries;
+    ArrayList tmpEntries = new ArrayList();
+    RobotsEntry[] entries = null;
     long expireTime;
 
     /**

Modified: 
lucene/nutch/trunk/src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java?rev=411926&r1=411925&r2=411926&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java
 Mon Jun  5 14:43:42 2006
@@ -25,7 +25,29 @@
   private static final String CR= "\r";
   private static final String CRLF= "\r\n";
   
-
+  private static final boolean[] ACCEPT_ALL = {
+    true,   // "/a",         
+    true,   // "/a/",        
+    true,   // "/a/bloh/foo.html"
+    true,   // "/b",         
+    true,   // "/b/a",       
+    true,   // "/b/a/index.html",
+    true,   // "/b/b/foo.html",  
+    true,   // "/c",         
+    true,   // "/c/a",       
+    true,   // "/c/a/index.html",
+    true,   // "/c/b/foo.html",  
+    true,   // "/d",         
+    true,   // "/d/a",       
+    true,   // "/e/a/index.html",
+    true,   // "/e/d",       
+    true,   // "/e/d/foo.html",  
+    true,   // "/e/doh.html",    
+    true,   // "/f/index.html",  
+    true,   // "/foo/bar.html",  
+    true,   // "/f/",
+  };
+  
   private static final String[] ROBOTS_STRINGS= new String[] {
     "User-Agent: Agent1 #foo" + CR 
     + "Disallow: /a" + CR 
@@ -40,6 +62,7 @@
     + "" + CR 
     + "User-Agent: *" + CR 
     + "Disallow: /foo/bar/" + CR,
+    null  // Used to test EMPTY_RULES
   };
 
   private static final String[] AGENT_STRINGS= new String[] {
@@ -57,7 +80,14 @@
       false,
       false,
       true,
-    }
+    },
+    { 
+      false, 
+      false,
+      false,
+      false,
+      true,
+    }    
   };
 
   private static final String[] TEST_PATHS= new String[] {
@@ -195,6 +225,13 @@
        false,  // "/foo/bar.html",  
        true,   // "/f/",  
       }
+    },
+    { // ROBOTS_STRINGS[1]
+      ACCEPT_ALL, // Agent 1
+      ACCEPT_ALL, // Agent 2
+      ACCEPT_ALL, // Agent 3
+      ACCEPT_ALL, // Agent 4
+      ACCEPT_ALL, // Agent 5
     }
   };
  
@@ -233,7 +270,9 @@
     for (int i= 1; i < agents.length; i++)
       agentsString= agentsString + "," + agents[i];
     RobotRulesParser p= new RobotRulesParser(agents);
-    RobotRuleSet rules= p.parseRules(ROBOTS_STRINGS[robotsString].getBytes());
+    RobotRuleSet rules= p.parseRules(ROBOTS_STRINGS[robotsString] != null
+                                     ? ROBOTS_STRINGS[robotsString].getBytes()
+                                     : null);
     for (int i= 0; i < paths.length; i++) {
       assertTrue("testing robots file "+robotsString+", on agents ("
                 + agentsString + "), and path " + TEST_PATHS[i] + "; got " 
@@ -243,4 +282,6 @@
     }
   }
 
+
+  
 }


Reply via email to