Author: markus
Date: Mon Jun 25 14:42:05 2012
New Revision: 1353582

URL: http://svn.apache.org/viewvc?rev=1353582&view=rev
Log:
NUTCH-1408 RobotRulesParser main doesn't take URL's

Modified:
    nutch/trunk/CHANGES.txt
    
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1353582&r1=1353581&r2=1353582&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon Jun 25 14:42:05 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 (trunk) Current Development:
 
+* NUTCH-1408 RobotRulesParser main doesn't take URL's (markus)
+
 * NUTCH-1400 Remove developer -core option for bin/nutch (jnioche)
 
 * NUTCH-1404 Nutch script fails to find job file in deploy mode (sidabatra, 
jnioche)

Modified: 
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java?rev=1353582&r1=1353581&r2=1353582&view=diff
==============================================================================
--- 
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
 (original)
+++ 
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
 Mon Jun 25 14:42:05 2012
@@ -182,6 +182,7 @@ public class RobotRulesParser implements
       while (pos < end) {
         if (path.startsWith(entries[pos].prefix))
           return entries[pos].allowed;
+
         pos++;
       }
 
@@ -335,6 +336,12 @@ public class RobotRulesParser implements
         doneAgents= true;
         String path= line.substring(line.indexOf(":") + 1);
         path= path.trim();
+        
+        // Skip if no path was specified
+        if (path.length() == 0) {
+          // Go to the next token
+          continue;
+        }
         try {
           path= URLDecoder.decode(path, CHARACTER_ENCODING);
         } catch (Exception e) {
@@ -560,7 +567,7 @@ public class RobotRulesParser implements
 
       String testPath= testsIn.readLine().trim();
       while (testPath != null) {
-        System.out.println( (rules.isAllowed(testPath) ? 
+        System.out.println( (rules.isAllowed(new URL(testPath)) ? 
                              "allowed" : "not allowed")
                             + ":\t" + testPath);
         testPath= testsIn.readLine();


Reply via email to