svn commit: r1353582 - in /nutch/trunk: CHANGES.txt src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
Author: markus Date: Mon Jun 25 14:42:05 2012 New Revision: 1353582 URL: http://svn.apache.org/viewvc?rev=1353582view=rev Log: NUTCH-1408 RobotRulesParser main doesn't take URL's Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1353582r1=1353581r2=1353582view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Mon Jun 25 14:42:05 2012 @@ -2,6 +2,8 @@ Nutch Change Log (trunk) Current Development: +* NUTCH-1408 RobotRulesParser main doesn't take URL's (markus) + * NUTCH-1400 Remove developer -core option for bin/nutch (jnioche) * NUTCH-1404 Nutch script fails to find job file in deploy mode (sidabatra, jnioche) Modified: nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java?rev=1353582r1=1353581r2=1353582view=diff == --- nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java (original) +++ nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java Mon Jun 25 14:42:05 2012 @@ -182,6 +182,7 @@ public class RobotRulesParser implements while (pos end) { if (path.startsWith(entries[pos].prefix)) return entries[pos].allowed; + pos++; } @@ -335,6 +336,12 @@ public class RobotRulesParser implements doneAgents= true; String path= line.substring(line.indexOf(:) + 1); path= path.trim(); + +// Skip if no path was specified +if (path.length() == 0) { + // Go to the next token + continue; +} try { path= URLDecoder.decode(path, CHARACTER_ENCODING); } catch (Exception e) { @@ -560,7 +567,7 @@ public class RobotRulesParser implements String testPath= testsIn.readLine().trim(); while (testPath != null) { -System.out.println( (rules.isAllowed(testPath) ? +System.out.println( (rules.isAllowed(new URL(testPath)) ? allowed : not allowed) + :\t + testPath); testPath= testsIn.readLine();
svn commit: r1353619 - /nutch/branches/branch-1.5.1/pom.xml
Author: lewismc Date: Mon Jun 25 15:56:23 2012 New Revision: 1353619 URL: http://svn.apache.org/viewvc?rev=1353619view=rev Log: commit to sync pom.xml with Ivy deps Modified: nutch/branches/branch-1.5.1/pom.xml Modified: nutch/branches/branch-1.5.1/pom.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/pom.xml?rev=1353619r1=1353618r2=1353619view=diff == --- nutch/branches/branch-1.5.1/pom.xml (original) +++ nutch/branches/branch-1.5.1/pom.xml Mon Jun 25 15:56:23 2012 @@ -15,286 +15,215 @@ See the License for the specific language governing permissions and limitations under the License. -- -project xmlns=http://maven.apache.org/POM/4.0.0; xmlns:xsi=http://www.w3.org/2001/XMLSchema-instance; xsi:schemaLocation=http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd; +project xmlns=http://maven.apache.org/POM/4.0.0; xmlns:xsi=http://www.w3.org/2001/XMLSchema-instance; +xsi:schemaLocation=http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd; - modelVersion4.0.0/modelVersion - parent - groupIdorg.apache/groupId - artifactIdapache/artifactId - version9/version - relativePath / - /parent - groupIdorg.apache.nutch/groupId - artifactIdnutch/artifactId - packagingjar/packaging - version1.6-SNAPSHOT/version - nameApache Nutch/name - urlhttp://nutch.apache.org/url - licenses - license - nameThe Apache Software License, Version 2.0/name - urlhttp://www.apache.org/licenses/LICENSE-2.0.txt/url - distributionrepo/distribution - /license - /licenses - scm - connectionscm:svn:http://svn.apache.org/repos/asf/nutch/trunk//connection - developerConnectionscm:svn:https://svn.apache.org/repos/asf/nutch/trunk//developerConnection - urlhttp://svn.apache.org/viewvc/nutch/trunk//url - /scm - developers + modelVersion4.0.0/modelVersion + groupIdorg.apache.nutch/groupId + artifactIdnutch/artifactId + packagingjar/packaging + version1.5.1/version + nameApache Nutch/name + urlhttp://nutch.apache.org/url + licenses + license + nameThe Apache Software License, Version 2.0/name + urlhttp://www.apache.org/licenses/LICENSE-2.0.txt/url + distributionrepo/distribution + /license + /licenses + scm + urlhttp://svn.apache.org/viewvc/nutch/url + connectionhttp://svn.apache.org/viewvc/nutch/connection + /scm + developers developer idab/id nameAndrzej Bialecki/name emaila...@apache.org/email /developer developer - idmattmann/id - nameChris A. Mattmann/name - emailmattm...@apache.org/email - /developer - developer - idkubes/id - nameDennis Kubes/name - emailku...@apache.org/email - /developer - developer +idalexis/id +nameAlexis Detlegrode/name +emailale...@apache.org/email +/developer +developer iddogacan/id - nameDogacan Gâºney/name + nameDogacan Güney/name emaildoga...@apache.org/email /developer developer +idferdy/id +nameFerdy Galema/name +emailfe...@apache.org/email +/developer +developer idjnioche/id nameJulien Nioche/name emailjnio...@apache.org/email /developer developer - idsiren/id - nameSami Siren/name - emailsi...@apache.org/email + idkubes/id + nameDennis Kubes/name + emailku...@apache.org/email /developer developer - idmarkus/id - nameMarkus Jelsma/name - emailmar...@apache.org/email - /developer +idlewismc/id +nameLewis John McGibbney/name +emaillewi...@apache.org/email +/developer + developer +idmarkus/id +nameMarkus Jelsma/name +emailmar...@apache.org/email +/developer
svn commit: r1353615 - in /nutch/branches/branch-1.5.1: CHANGES.txt conf/nutch-default.xml conf/schema.xml default.properties
Author: lewismc Date: Mon Jun 25 15:52:52 2012 New Revision: 1353615 URL: http://svn.apache.org/viewvc?rev=1353615view=rev Log: commit to set up RC Modified: nutch/branches/branch-1.5.1/CHANGES.txt nutch/branches/branch-1.5.1/conf/nutch-default.xml nutch/branches/branch-1.5.1/conf/schema.xml nutch/branches/branch-1.5.1/default.properties Modified: nutch/branches/branch-1.5.1/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/CHANGES.txt?rev=1353615r1=1353614r2=1353615view=diff == --- nutch/branches/branch-1.5.1/CHANGES.txt (original) +++ nutch/branches/branch-1.5.1/CHANGES.txt Mon Jun 25 15:52:52 2012 @@ -1,6 +1,6 @@ Nutch Change Log -(trunk) Current Development: +Release 1.5.1 - 25/06/2012 - ddmm * NUTCH-1400 Remove developer -core option for bin/nutch (jnioche) Modified: nutch/branches/branch-1.5.1/conf/nutch-default.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/conf/nutch-default.xml?rev=1353615r1=1353614r2=1353615view=diff == --- nutch/branches/branch-1.5.1/conf/nutch-default.xml (original) +++ nutch/branches/branch-1.5.1/conf/nutch-default.xml Mon Jun 25 15:52:52 2012 @@ -123,7 +123,7 @@ property namehttp.agent.version/name - valueNutch-1.6-SNAPSHOT/value + valueNutch-1.5.1/value descriptionA version string to advertise in the User-Agent header./description /property Modified: nutch/branches/branch-1.5.1/conf/schema.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/conf/schema.xml?rev=1353615r1=1353614r2=1353615view=diff == --- nutch/branches/branch-1.5.1/conf/schema.xml (original) +++ nutch/branches/branch-1.5.1/conf/schema.xml Mon Jun 25 15:52:52 2012 @@ -28,7 +28,7 @@ example/solr/conf/schema.xml?view=markup for more info. -- -schema name=nutch version=1.6 +schema name=nutch version=1.5.1 types fieldType name=string class=solr.StrField sortMissingLast=true omitNorms=true/ Modified: nutch/branches/branch-1.5.1/default.properties URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/default.properties?rev=1353615r1=1353614r2=1353615view=diff == --- nutch/branches/branch-1.5.1/default.properties (original) +++ nutch/branches/branch-1.5.1/default.properties Mon Jun 25 15:52:52 2012 @@ -14,7 +14,7 @@ # limitations under the License. name=apache-nutch -version=1.5.1-SNAPSHOT +version=1.5.1 final.name=${name}-${version} year=2012
svn commit: r1353638 - /nutch/tags/release-2.0rc3/
Author: lewismc Date: Mon Jun 25 16:24:14 2012 New Revision: 1353638 URL: http://svn.apache.org/viewvc?rev=1353638view=rev Log: tagging Nutch 2.0 RC3 Added: nutch/tags/release-2.0rc3/ - copied from r1353637, nutch/branches/nutchgora/