svn commit: r1353582 - in /nutch/trunk: CHANGES.txt src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java

2012-06-25 Thread markus
Author: markus
Date: Mon Jun 25 14:42:05 2012
New Revision: 1353582

URL: http://svn.apache.org/viewvc?rev=1353582view=rev
Log:
NUTCH-1408 RobotRulesParser main doesn't take URL's

Modified:
nutch/trunk/CHANGES.txt

nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1353582r1=1353581r2=1353582view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon Jun 25 14:42:05 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 (trunk) Current Development:
 
+* NUTCH-1408 RobotRulesParser main doesn't take URL's (markus)
+
 * NUTCH-1400 Remove developer -core option for bin/nutch (jnioche)
 
 * NUTCH-1404 Nutch script fails to find job file in deploy mode (sidabatra, 
jnioche)

Modified: 
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java?rev=1353582r1=1353581r2=1353582view=diff
==
--- 
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
 (original)
+++ 
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
 Mon Jun 25 14:42:05 2012
@@ -182,6 +182,7 @@ public class RobotRulesParser implements
   while (pos  end) {
 if (path.startsWith(entries[pos].prefix))
   return entries[pos].allowed;
+
 pos++;
   }
 
@@ -335,6 +336,12 @@ public class RobotRulesParser implements
 doneAgents= true;
 String path= line.substring(line.indexOf(:) + 1);
 path= path.trim();
+
+// Skip if no path was specified
+if (path.length() == 0) {
+  // Go to the next token
+  continue;
+}
 try {
   path= URLDecoder.decode(path, CHARACTER_ENCODING);
 } catch (Exception e) {
@@ -560,7 +567,7 @@ public class RobotRulesParser implements
 
   String testPath= testsIn.readLine().trim();
   while (testPath != null) {
-System.out.println( (rules.isAllowed(testPath) ? 
+System.out.println( (rules.isAllowed(new URL(testPath)) ? 
  allowed : not allowed)
 + :\t + testPath);
 testPath= testsIn.readLine();




svn commit: r1353619 - /nutch/branches/branch-1.5.1/pom.xml

2012-06-25 Thread lewismc
Author: lewismc
Date: Mon Jun 25 15:56:23 2012
New Revision: 1353619

URL: http://svn.apache.org/viewvc?rev=1353619view=rev
Log:
commit to sync pom.xml with Ivy deps

Modified:
nutch/branches/branch-1.5.1/pom.xml

Modified: nutch/branches/branch-1.5.1/pom.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/pom.xml?rev=1353619r1=1353618r2=1353619view=diff
==
--- nutch/branches/branch-1.5.1/pom.xml (original)
+++ nutch/branches/branch-1.5.1/pom.xml Mon Jun 25 15:56:23 2012
@@ -15,286 +15,215 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 --
-project xmlns=http://maven.apache.org/POM/4.0.0; 
xmlns:xsi=http://www.w3.org/2001/XMLSchema-instance; 
xsi:schemaLocation=http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd;
+project xmlns=http://maven.apache.org/POM/4.0.0; 
xmlns:xsi=http://www.w3.org/2001/XMLSchema-instance;
+xsi:schemaLocation=http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd;
 
-   modelVersion4.0.0/modelVersion
-   parent
-   groupIdorg.apache/groupId
-   artifactIdapache/artifactId
-   version9/version
-   relativePath /
-   /parent   
-   groupIdorg.apache.nutch/groupId
-   artifactIdnutch/artifactId
-   packagingjar/packaging
-   version1.6-SNAPSHOT/version
-   nameApache Nutch/name
-   urlhttp://nutch.apache.org/url
-   licenses
-   license
-   nameThe Apache Software License, Version 2.0/name
-   
urlhttp://www.apache.org/licenses/LICENSE-2.0.txt/url
-   distributionrepo/distribution
-   /license
-   /licenses
-   scm
-   
connectionscm:svn:http://svn.apache.org/repos/asf/nutch/trunk//connection
-   
developerConnectionscm:svn:https://svn.apache.org/repos/asf/nutch/trunk//developerConnection
-   urlhttp://svn.apache.org/viewvc/nutch/trunk//url
-   /scm
-   developers
+  modelVersion4.0.0/modelVersion
+  groupIdorg.apache.nutch/groupId
+  artifactIdnutch/artifactId
+  packagingjar/packaging
+  version1.5.1/version
+  nameApache Nutch/name
+  urlhttp://nutch.apache.org/url
+  licenses
+ license
+   nameThe Apache Software License, Version 2.0/name
+   urlhttp://www.apache.org/licenses/LICENSE-2.0.txt/url
+   distributionrepo/distribution
+ /license
+  /licenses
+  scm
+ urlhttp://svn.apache.org/viewvc/nutch/url
+ connectionhttp://svn.apache.org/viewvc/nutch/connection
+  /scm
+  developers
developer
idab/id
nameAndrzej Bialecki/name
emaila...@apache.org/email
/developer
developer
-   idmattmann/id
-   nameChris A. Mattmann/name
-   emailmattm...@apache.org/email
-   /developer
-   developer
-   idkubes/id
-   nameDennis Kubes/name
-   emailku...@apache.org/email
-   /developer
-   developer
+idalexis/id
+nameAlexis Detlegrode/name
+emailale...@apache.org/email
+/developer
+developer
iddogacan/id
-   nameDogacan Güney/name
+   nameDogacan Güney/name
emaildoga...@apache.org/email
/developer
developer
+idferdy/id
+nameFerdy Galema/name
+emailfe...@apache.org/email
+/developer
+developer
idjnioche/id
nameJulien Nioche/name
emailjnio...@apache.org/email
/developer
developer
-   idsiren/id
-   nameSami Siren/name
-   emailsi...@apache.org/email
+   idkubes/id
+   nameDennis Kubes/name
+   emailku...@apache.org/email
/developer
developer
-   idmarkus/id
-   nameMarkus Jelsma/name
-   emailmar...@apache.org/email
-   /developer
+idlewismc/id
+nameLewis John McGibbney/name
+emaillewi...@apache.org/email
+/developer
+   developer
+idmarkus/id
+nameMarkus Jelsma/name
+emailmar...@apache.org/email
+/developer   

svn commit: r1353615 - in /nutch/branches/branch-1.5.1: CHANGES.txt conf/nutch-default.xml conf/schema.xml default.properties

2012-06-25 Thread lewismc
Author: lewismc
Date: Mon Jun 25 15:52:52 2012
New Revision: 1353615

URL: http://svn.apache.org/viewvc?rev=1353615view=rev
Log:
commit to set up RC

Modified:
nutch/branches/branch-1.5.1/CHANGES.txt
nutch/branches/branch-1.5.1/conf/nutch-default.xml
nutch/branches/branch-1.5.1/conf/schema.xml
nutch/branches/branch-1.5.1/default.properties

Modified: nutch/branches/branch-1.5.1/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/CHANGES.txt?rev=1353615r1=1353614r2=1353615view=diff
==
--- nutch/branches/branch-1.5.1/CHANGES.txt (original)
+++ nutch/branches/branch-1.5.1/CHANGES.txt Mon Jun 25 15:52:52 2012
@@ -1,6 +1,6 @@
 Nutch Change Log
 
-(trunk) Current Development:
+Release 1.5.1 - 25/06/2012 - ddmm
 
 * NUTCH-1400 Remove developer -core option for bin/nutch (jnioche)
 

Modified: nutch/branches/branch-1.5.1/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/conf/nutch-default.xml?rev=1353615r1=1353614r2=1353615view=diff
==
--- nutch/branches/branch-1.5.1/conf/nutch-default.xml (original)
+++ nutch/branches/branch-1.5.1/conf/nutch-default.xml Mon Jun 25 15:52:52 2012
@@ -123,7 +123,7 @@
 
 property
   namehttp.agent.version/name
-  valueNutch-1.6-SNAPSHOT/value
+  valueNutch-1.5.1/value
   descriptionA version string to advertise in the User-Agent 
header./description
 /property

Modified: nutch/branches/branch-1.5.1/conf/schema.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/conf/schema.xml?rev=1353615r1=1353614r2=1353615view=diff
==
--- nutch/branches/branch-1.5.1/conf/schema.xml (original)
+++ nutch/branches/branch-1.5.1/conf/schema.xml Mon Jun 25 15:52:52 2012
@@ -28,7 +28,7 @@
 example/solr/conf/schema.xml?view=markup
 for more info.
 --
-schema name=nutch version=1.6
+schema name=nutch version=1.5.1
 types
 fieldType name=string class=solr.StrField sortMissingLast=true
 omitNorms=true/ 

Modified: nutch/branches/branch-1.5.1/default.properties
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/default.properties?rev=1353615r1=1353614r2=1353615view=diff
==
--- nutch/branches/branch-1.5.1/default.properties (original)
+++ nutch/branches/branch-1.5.1/default.properties Mon Jun 25 15:52:52 2012
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 name=apache-nutch
-version=1.5.1-SNAPSHOT
+version=1.5.1
 final.name=${name}-${version}
 year=2012
 




svn commit: r1353638 - /nutch/tags/release-2.0rc3/

2012-06-25 Thread lewismc
Author: lewismc
Date: Mon Jun 25 16:24:14 2012
New Revision: 1353638

URL: http://svn.apache.org/viewvc?rev=1353638view=rev
Log:
tagging Nutch 2.0 RC3

Added:
nutch/tags/release-2.0rc3/
  - copied from r1353637, nutch/branches/nutchgora/