svn commit: r1366844 - in /nutch/branches/2.x: CHANGES.txt build.xml
Author: lewismc Date: Sun Jul 29 13:03:43 2012 New Revision: 1366844 URL: http://svn.apache.org/viewvc?rev=1366844view=rev Log: NUTCH-1376 Add description parameter to every ant task Modified: nutch/branches/2.x/CHANGES.txt nutch/branches/2.x/build.xml Modified: nutch/branches/2.x/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1366844r1=1366843r2=1366844view=diff == --- nutch/branches/2.x/CHANGES.txt (original) +++ nutch/branches/2.x/CHANGES.txt Sun Jul 29 13:03:43 2012 @@ -2,6 +2,8 @@ Nutch Change Log Release 2.1 - Current Development +* NUTCH-1376 add ant description parameters (lewismc) + * NUTCH-1440 reconfigure non-existent stopwords_en.txt in schema-solr4.xml (shekhar sharma via lewismc) * NUTCH-1439 Define boost field as type float in schema-solr4.xml (shekhar sharma via lewismc) Modified: nutch/branches/2.x/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/2.x/build.xml?rev=1366844r1=1366843r2=1366844view=diff == --- nutch/branches/2.x/build.xml (original) +++ nutch/branches/2.x/build.xml Sun Jul 29 13:03:43 2012 @@ -61,7 +61,7 @@ !-- == -- !-- Stuff needed by all targets -- !-- == -- - target name=init depends=ivy-init + target name=init depends=ivy-init description=-- stuff required by all targets mkdir dir=${build.dir} / mkdir dir=${build.classes} / mkdir dir=${release.dir} / @@ -82,9 +82,9 @@ !-- == -- !-- Compile the Java files -- !-- == -- - target name=compile depends=compile-core, compile-plugins / + target name=compile depends=compile-core, compile-plugins description=-- compile all Java files/ - target name=compile-core depends=init, resolve-default + target name=compile-core depends=init, resolve-default description=-- compile core Java files only javac encoding=${build.encoding} srcdir=${src.dir} @@ -100,7 +100,7 @@ /javac /target - target name=compile-plugins depends=init, resolve-default + target name=compile-plugins depends=init, resolve-default description=-- compile plugins only ant dir=src/plugin target=deploy inheritAll=false / /target @@ -109,7 +109,7 @@ !-- == -- !-- -- !-- == -- - target name=jar depends=compile-core + target name=jar depends=compile-core description=-- make nutch.jar copy file=${conf.dir}/nutch-default.xml todir=${build.classes} / copy file=${conf.dir}/nutch-site.xml todir=${build.classes} / jar jarfile=${build.dir}/${final.name}.jar basedir=${build.classes} @@ -262,7 +262,7 @@ !-- == -- !-- -- !-- == -- - target name=job depends=compile + target name=job depends=compile description=-- make nutch.job jar jar jarfile=${build.dir}/${final.name}.job !-- If the build.classes has the nutch config files because the jar command @@ -280,7 +280,7 @@ /jar /target - target name=runtime depends=jar, job + target name=runtime depends=jar, job description=-- default target for running Nutch mkdir dir=${runtime.dir} / mkdir dir=${runtime.local} / mkdir dir=${runtime.deploy} / @@ -321,7 +321,7 @@ !-- == -- !-- Compile test code -- !-- == -- - target name=compile-core-test depends=compile-core, resolve-test + target name=compile-core-test depends=compile-core, resolve-test description=-- compile test code javac encoding=${build.encoding} srcdir=${test.src.dir} @@ -341,7 +341,7 @@ !-- Run Nutch proxy -- !-- == -- - target name=proxy depends=job, compile-core-test + target name=proxy depends=job, compile-core-test description=-- run nutch proxy java classname=org.apache.nutch.tools.proxy.TestbedProxy fork=true classpath refid=test.classpath / arg value=-fake / @@ -357,7 +357,7 @@ !-- Run Nutch benchmarking analysis -- !-- == -- - target name=benchmark + target name=benchmark description=-- run nutch benchmarking analysis java classname=org.apache.nutch.tools.Benchmark fork=true classpath refid=test.classpath / jvmarg line=-Xmx512m -Djavax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl / @@ -374,9 +374,9
svn commit: r1366847 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/metadata/Metadata.java src/java/org/apache/nutch/metadata/Office.java
Author: lewismc Date: Sun Jul 29 13:13:25 2012 New Revision: 1366847 URL: http://svn.apache.org/viewvc?rev=1366847view=rev Log: NUTCH-1416 Remove o.a.n.metadata.Office Removed: nutch/trunk/src/java/org/apache/nutch/metadata/Office.java Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1366847r1=1366846r2=1366847view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Sun Jul 29 13:13:25 2012 @@ -2,6 +2,8 @@ Nutch Change Log (trunk) Current Development: +* NUTCH-1417 Remove o.a.n.metadata.Office (lewismc) + * NUTCH-1376 Add description parameter to every ant task (lewismc) * NUTCH-1440 reconfigure non-existent stopwords_en.txt in schema-solr4.xml (shekhar sharma via lewismc) Modified: nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java?rev=1366847r1=1366846r2=1366847view=diff == --- nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java (original) +++ nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java Sun Jul 29 13:13:25 2012 @@ -36,7 +36,7 @@ import org.apache.hadoop.io.Writable; * */ public class Metadata implements Writable, CreativeCommons, -DublinCore, HttpHeaders, Nutch, Office, Feed { +DublinCore, HttpHeaders, Nutch, Feed { /** * A map of all metadata attributes.
svn commit: r1366342 - in /nutch/trunk: CHANGES.txt conf/schema-solr4.xml
Author: lewismc Date: Fri Jul 27 11:38:07 2012 New Revision: 1366342 URL: http://svn.apache.org/viewvc?rev=1366342view=rev Log: NUTCH-1440 reconfigure non-existent stopwords_en.txt in schema-solr4.xml Modified: nutch/trunk/CHANGES.txt nutch/trunk/conf/schema-solr4.xml Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1366342r1=1366341r2=1366342view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Fri Jul 27 11:38:07 2012 @@ -2,6 +2,8 @@ Nutch Change Log (trunk) Current Development: +* NUTCH-1440 reconfigure non-existent stopwords_en.txt in schema-solr4.xml (shekhar sharma via lewismc) + * NUTCH-1439 Define boost field as type float in schema-solr4.xml (shekhar sharma via lewismc) * NUTCH-1433 Upgrade to Tika 1.2 (jnioche) Modified: nutch/trunk/conf/schema-solr4.xml URL: http://svn.apache.org/viewvc/nutch/trunk/conf/schema-solr4.xml?rev=1366342r1=1366341r2=1366342view=diff == --- nutch/trunk/conf/schema-solr4.xml (original) +++ nutch/trunk/conf/schema-solr4.xml Fri Jul 27 11:38:07 2012 @@ -120,7 +120,7 @@ !-- A text field with defaults appropriate for English: it tokenizes with StandardTokenizer, removes English stop words - (stopwords_en.txt), down cases, protects words from protwords.txt, and + (stopwords.txt), down cases, protects words from protwords.txt, and finally applies Porter's stemming. The query time analyzer also applies synonyms from synonyms.txt. -- fieldType name=text_en class=solr.TextField positionIncrementGap=100 @@ -135,7 +135,7 @@ -- filter class=solr.StopFilterFactory ignoreCase=true -words=stopwords_en.txt +words=stopwords.txt enablePositionIncrements=true / filter class=solr.LowerCaseFilterFactory/ @@ -151,7 +151,7 @@ filter class=solr.SynonymFilterFactory synonyms=synonyms.txt ignoreCase=true expand=true/ filter class=solr.StopFilterFactory ignoreCase=true -words=stopwords_en.txt +words=stopwords.txt enablePositionIncrements=true / filter class=solr.LowerCaseFilterFactory/ @@ -188,7 +188,7 @@ -- filter class=solr.StopFilterFactory ignoreCase=true -words=stopwords_en.txt +words=stopwords.txt enablePositionIncrements=true / filter class=solr.WordDelimiterFilterFactory generateWordParts=1 generateNumberParts=1 catenateWords=1 catenateNumbers=1 catenateAll=0 splitOnCaseChange=1/ @@ -201,7 +201,7 @@ filter class=solr.SynonymFilterFactory synonyms=synonyms.txt ignoreCase=true expand=true/ filter class=solr.StopFilterFactory ignoreCase=true -words=stopwords_en.txt +words=stopwords.txt enablePositionIncrements=true / filter class=solr.WordDelimiterFilterFactory generateWordParts=1 generateNumberParts=1 catenateWords=0 catenateNumbers=0 catenateAll=0 splitOnCaseChange=1/ @@ -217,7 +217,7 @@ analyzer tokenizer class=solr.WhitespaceTokenizerFactory/ filter class=solr.SynonymFilterFactory synonyms=synonyms.txt ignoreCase=true expand=false/ -filter class=solr.StopFilterFactory ignoreCase=true words=stopwords_en.txt/ +filter class=solr.StopFilterFactory ignoreCase=true words=stopwords.txt/ filter class=solr.WordDelimiterFilterFactory generateWordParts=0 generateNumberParts=0 catenateWords=1 catenateNumbers=1 catenateAll=0/ filter class=solr.LowerCaseFilterFactory/ filter class=solr.KeywordMarkerFilterFactory protected=protwords.txt/
svn commit: r1366348 - in /nutch/branches/2.x: CHANGES.txt conf/schema-solr4.xml
Author: lewismc Date: Fri Jul 27 11:55:22 2012 New Revision: 1366348 URL: http://svn.apache.org/viewvc?rev=1366348view=rev Log: NUTCH-1440 reconfigure non-existent stopwords_en.txt in schema-solr4.xml Modified: nutch/branches/2.x/CHANGES.txt nutch/branches/2.x/conf/schema-solr4.xml Modified: nutch/branches/2.x/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1366348r1=1366347r2=1366348view=diff == --- nutch/branches/2.x/CHANGES.txt (original) +++ nutch/branches/2.x/CHANGES.txt Fri Jul 27 11:55:22 2012 @@ -1,6 +1,11 @@ Nutch Change Log Release 2.1 - Current Development + +* NUTCH-1440 reconfigure non-existent stopwords_en.txt in schema-solr4.xml (shekhar sharma via lewismc) + +* NUTCH-1439 Define boost field as type float in schema-solr4.xml (shekhar sharma via lewismc) + * NUTCH-1438 ParserJob support for option -reparse (ferdy) * NUTCH-1437 HostInjectorJob to accept lines with or without protocol (ferdy) Modified: nutch/branches/2.x/conf/schema-solr4.xml URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/schema-solr4.xml?rev=1366348r1=1366347r2=1366348view=diff == --- nutch/branches/2.x/conf/schema-solr4.xml (original) +++ nutch/branches/2.x/conf/schema-solr4.xml Fri Jul 27 11:55:22 2012 @@ -25,7 +25,7 @@ for more info. -- -schema name=nutch version=1.6-SNAPSHOT +schema name=nutch version=2.1-SNAPSHOT types @@ -120,7 +120,7 @@ !-- A text field with defaults appropriate for English: it tokenizes with StandardTokenizer, removes English stop words - (stopwords_en.txt), down cases, protects words from protwords.txt, and + (stopwords.txt), down cases, protects words from protwords.txt, and finally applies Porter's stemming. The query time analyzer also applies synonyms from synonyms.txt. -- fieldType name=text_en class=solr.TextField positionIncrementGap=100 @@ -135,7 +135,7 @@ -- filter class=solr.StopFilterFactory ignoreCase=true -words=stopwords_en.txt +words=stopwords.txt enablePositionIncrements=true / filter class=solr.LowerCaseFilterFactory/ @@ -151,7 +151,7 @@ filter class=solr.SynonymFilterFactory synonyms=synonyms.txt ignoreCase=true expand=true/ filter class=solr.StopFilterFactory ignoreCase=true -words=stopwords_en.txt +words=stopwords.txt enablePositionIncrements=true / filter class=solr.LowerCaseFilterFactory/ @@ -188,7 +188,7 @@ -- filter class=solr.StopFilterFactory ignoreCase=true -words=stopwords_en.txt +words=stopwords.txt enablePositionIncrements=true / filter class=solr.WordDelimiterFilterFactory generateWordParts=1 generateNumberParts=1 catenateWords=1 catenateNumbers=1 catenateAll=0 splitOnCaseChange=1/ @@ -201,7 +201,7 @@ filter class=solr.SynonymFilterFactory synonyms=synonyms.txt ignoreCase=true expand=true/ filter class=solr.StopFilterFactory ignoreCase=true -words=stopwords_en.txt +words=stopwords.txt enablePositionIncrements=true / filter class=solr.WordDelimiterFilterFactory generateWordParts=1 generateNumberParts=1 catenateWords=0 catenateNumbers=0 catenateAll=0 splitOnCaseChange=1/ @@ -217,7 +217,7 @@ analyzer tokenizer class=solr.WhitespaceTokenizerFactory/ filter class=solr.SynonymFilterFactory synonyms=synonyms.txt ignoreCase=true expand=false/ -filter class=solr.StopFilterFactory ignoreCase=true words=stopwords_en.txt/ +filter class=solr.StopFilterFactory ignoreCase=true words=stopwords.txt/ filter class=solr.WordDelimiterFilterFactory generateWordParts=0 generateNumberParts=0 catenateWords=1 catenateNumbers=1 catenateAll=0/ filter class=solr.LowerCaseFilterFactory/ filter class=solr.KeywordMarkerFilterFactory protected=protwords.txt/
svn commit: r1365973 - /nutch/branches/2.x/doap.rdf
Author: lewismc Date: Thu Jul 26 13:03:54 2012 New Revision: 1365973 URL: http://svn.apache.org/viewvc?rev=1365973view=rev Log: remove unnecessary doap.rdf Removed: nutch/branches/2.x/doap.rdf
svn commit: r1365972 - /nutch/site/publish/doap.rdf
Author: lewismc Date: Thu Jul 26 13:00:46 2012 New Revision: 1365972 URL: http://svn.apache.org/viewvc?rev=1365972view=rev Log: trivial commit to update doap.rdf Modified: nutch/site/publish/doap.rdf Modified: nutch/site/publish/doap.rdf URL: http://svn.apache.org/viewvc/nutch/site/publish/doap.rdf?rev=1365972r1=1365971r2=1365972view=diff == --- nutch/site/publish/doap.rdf (original) +++ nutch/site/publish/doap.rdf Thu Jul 26 13:00:46 2012 @@ -40,6 +40,20 @@ The system can be enhanced (eg other doc category rdf:resource=http://projects.apache.org/category/web-framework; / release Version +nameApache Nutch 1.5.1/name +created2012-07-10/created +revision1.5.1/revision + /Version +/release +release + Version +nameApache Nutch 2.0/name +created2012-07-07/created +revision2.0/revision + /Version +/release +release + Version nameApache Nutch 1.5/name created2012-06-07/created revision1.5/revision
svn commit: r1366159 - in /nutch/trunk: CHANGES.txt conf/schema-solr4.xml
Author: lewismc Date: Thu Jul 26 19:20:44 2012 New Revision: 1366159 URL: http://svn.apache.org/viewvc?rev=1366159view=rev Log: NUTCH-1439 Define boost field as type float in schema-solr4.xml Modified: nutch/trunk/CHANGES.txt nutch/trunk/conf/schema-solr4.xml Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1366159r1=1366158r2=1366159view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Thu Jul 26 19:20:44 2012 @@ -2,6 +2,8 @@ Nutch Change Log (trunk) Current Development: +* NUTCH-1439 Define boost field as type float in schema-solr4.xml (shekhar sharma via lewismc) + * NUTCH-1433 Upgrade to Tika 1.2 (jnioche) * NUTCH-1388 Optionally maintain custom fetch interval despite AdaptiveFetchSchedule (markus) Modified: nutch/trunk/conf/schema-solr4.xml URL: http://svn.apache.org/viewvc/nutch/trunk/conf/schema-solr4.xml?rev=1366159r1=1366158r2=1366159view=diff == --- nutch/trunk/conf/schema-solr4.xml (original) +++ nutch/trunk/conf/schema-solr4.xml Thu Jul 26 19:20:44 2012 @@ -306,7 +306,7 @@ !-- core fields -- field name=segment type=string stored=true indexed=false/ field name=digest type=string stored=true indexed=false/ -field name=boost type=string stored=true indexed=false/ +field name=boost type=float stored=true indexed=false/ !-- fields for index-basic plugin -- field name=host type=url stored=false indexed=true/
svn commit: r1366170 - /nutch/branches/2.x/conf/schema-solr4.xml
Author: lewismc Date: Thu Jul 26 19:37:35 2012 New Revision: 1366170 URL: http://svn.apache.org/viewvc?rev=1366170view=rev Log: copy over solr 4 schema. Added: nutch/branches/2.x/conf/schema-solr4.xml - copied unchanged from r1366169, nutch/trunk/conf/schema-solr4.xml
svn commit: r1364584 - in /nutch/branches/2.x: CHANGES.txt conf/gora-cassandra-mapping.xml conf/gora-sql-mapping.xml
Author: lewismc Date: Mon Jul 23 11:11:59 2012 New Revision: 1364584 URL: http://svn.apache.org/viewvc?rev=1364584view=rev Log: NUTCH-1435 Host jobs throw NullPointerException with MySQL Modified: nutch/branches/2.x/CHANGES.txt nutch/branches/2.x/conf/gora-cassandra-mapping.xml nutch/branches/2.x/conf/gora-sql-mapping.xml Modified: nutch/branches/2.x/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1364584r1=1364583r2=1364584view=diff == --- nutch/branches/2.x/CHANGES.txt (original) +++ nutch/branches/2.x/CHANGES.txt Mon Jul 23 11:11:59 2012 @@ -2,6 +2,8 @@ Nutch Change Log Release 2.1 - Current Development +* NUTCH-1435 Host jobs throw NullPointerException with MySQL (ferdy via lewismc) + * NUTCH-1428 GeneratorMapper should not initialize filters/normalizers when they are disabled (ferdy) * NUTCH-1427 Reuse SelectorEntry in Generator. (ferdy) Modified: nutch/branches/2.x/conf/gora-cassandra-mapping.xml URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/gora-cassandra-mapping.xml?rev=1364584r1=1364583r2=1364584view=diff == --- nutch/branches/2.x/conf/gora-cassandra-mapping.xml (original) +++ nutch/branches/2.x/conf/gora-cassandra-mapping.xml Mon Jul 23 11:11:59 2012 @@ -46,11 +46,11 @@ field name=score family=f qualifier=s/ !-- super columns -- -field name=markers family=sc qualifier=mk/ +field name=headers family=sc qualifier=h/ field name=inlinks family=sc qualifier=il/ field name=outlinks family=sc qualifier=ol/ field name=metadata family=sc qualifier=mtdt/ -field name=headers family=sc qualifier=h/ +field name=markers family=sc qualifier=mk/ field name=parseStatus family=sc qualifier=pas/ field name=protocolStatus family=sc qualifier=prs/ /class Modified: nutch/branches/2.x/conf/gora-sql-mapping.xml URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/gora-sql-mapping.xml?rev=1364584r1=1364583r2=1364584view=diff == --- nutch/branches/2.x/conf/gora-sql-mapping.xml (original) +++ nutch/branches/2.x/conf/gora-sql-mapping.xml Mon Jul 23 11:11:59 2012 @@ -47,4 +47,12 @@ field name=markers column=markers/ /class +class name=org.apache.nutch.storage.Host keyClass=java.lang.String +table=host + primarykey column=id length=512/ + field name=metadata column=metadata/ + field name=inlinks column=inlinks/ + field name=outlinks column=outlinks/ +/class + /gora-orm
svn commit: r1359704 - in /nutch/site: forrest/src/documentation/content/xdocs/ publish/
Author: lewismc Date: Tue Jul 10 14:39:10 2012 New Revision: 1359704 URL: http://svn.apache.org/viewvc?rev=1359704view=rev Log: Nutch v1.5.1 announcement Modified: nutch/site/forrest/src/documentation/content/xdocs/index.xml nutch/site/forrest/src/documentation/content/xdocs/site.xml nutch/site/publish/about.html nutch/site/publish/about.pdf nutch/site/publish/bot.html nutch/site/publish/bot.pdf nutch/site/publish/credits.html nutch/site/publish/credits.pdf nutch/site/publish/faq.html nutch/site/publish/faq.pdf nutch/site/publish/index.html nutch/site/publish/index.pdf nutch/site/publish/issue_tracking.html nutch/site/publish/issue_tracking.pdf nutch/site/publish/linkmap.html nutch/site/publish/linkmap.pdf nutch/site/publish/mailing_lists.html nutch/site/publish/mailing_lists.pdf nutch/site/publish/nightly.html nutch/site/publish/nightly.pdf nutch/site/publish/old_downloads.html nutch/site/publish/old_downloads.pdf nutch/site/publish/sonar.html nutch/site/publish/sonar.pdf nutch/site/publish/tutorial.html nutch/site/publish/tutorial.pdf nutch/site/publish/version_control.html nutch/site/publish/version_control.pdf nutch/site/publish/wiki.html nutch/site/publish/wiki.pdf Modified: nutch/site/forrest/src/documentation/content/xdocs/index.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/index.xml?rev=1359704r1=1359703r2=1359704view=diff == --- nutch/site/forrest/src/documentation/content/xdocs/index.xml (original) +++ nutch/site/forrest/src/documentation/content/xdocs/index.xml Tue Jul 10 14:39:10 2012 @@ -30,6 +30,15 @@ section titleApache Nutch News/title + + section + title10 July 2012 - Apache Nutch v1.5.1 Released/title + pThe Apache Nutch PMC are very pleased to announce the release of Apache Nutch v1.5.1. This release is a maintainence release of the popular 1.5.X mainstream version of Nutch which has been widely adopted within the community. + Please see the a href=http://www.apache.org/dist/nutch/1.5.1/CHANGES.txt;list of changes/a made + in this version for a full breakdown. The release is available + a href=http://www.apache.org/dyn/closer.cgi/nutch/;here/a. +/p + /section section title07 July 2012 - Apache Nutch v2.0 Released/title Modified: nutch/site/forrest/src/documentation/content/xdocs/site.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/site.xml?rev=1359704r1=1359703r2=1359704view=diff == --- nutch/site/forrest/src/documentation/content/xdocs/site.xml (original) +++ nutch/site/forrest/src/documentation/content/xdocs/site.xml Tue Jul 10 14:39:10 2012 @@ -47,7 +47,7 @@ See http://forrest.apache.org/docs/linki tutoriallabel=Tutorial href=tutorial.html / webmasters label=Robothref=bot.html / apidocs label=API Docs (2.0)href=apidocs-2.0/index.html/ -apidocs label=API Docs (1.5)href=apidocs-1.5/index.html/ +apidocs label=API Docs (1.5.1)href=apidocs-1.5/index.html/ apidocs label=API Docs (trunk-nightly) href=ext:nightly-api / apidocslabel=API Docs (2.0-Dev-nightly) href=ext:nightly-2.0-api / /docs Modified: nutch/site/publish/about.html URL: http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1359704r1=1359703r2=1359704view=diff == --- nutch/site/publish/about.html (original) +++ nutch/site/publish/about.html Tue Jul 10 14:39:10 2012 @@ -186,7 +186,7 @@ document.write(Last Published: + docu a href=apidocs-2.0/index.htmlAPI Docs (2.0)/a /div div class=menuitem -a href=apidocs-1.5/index.htmlAPI Docs (1.5)/a +a href=apidocs-1.5/index.htmlAPI Docs (1.5.1)/a /div div class=menuitem a href=https://builds.apache.org/job/Nutch-trunk/javadoc/;API Docs (trunk-nightly)/a Modified: nutch/site/publish/about.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1359704r1=1359703r2=1359704view=diff == Binary files - no diff available. Modified: nutch/site/publish/bot.html URL: http://svn.apache.org/viewvc/nutch/site/publish/bot.html?rev=1359704r1=1359703r2=1359704view=diff == --- nutch/site/publish/bot.html (original) +++ nutch/site/publish/bot.html Tue Jul 10 14:39:10 2012 @@ -186,7 +186,7 @@ document.write(Last Published: + docu a href=apidocs-2.0/index.htmlAPI Docs (2.0)/a /div div class=menuitem -a href=apidocs-1.5/index.htmlAPI Docs (1.5)/a +a href=apidocs-1.5/index.htmlAPI Docs (1.5.1)/a /div div class=menuitem
svn commit: r1359746 - in /nutch/branches/2.x: conf/nutch-default.xml conf/schema.xml default.properties
Author: lewismc Date: Tue Jul 10 16:08:23 2012 New Revision: 1359746 URL: http://svn.apache.org/viewvc?rev=1359746view=rev Log: update all versions to 2.1-SNAPSHOT Modified: nutch/branches/2.x/conf/nutch-default.xml nutch/branches/2.x/conf/schema.xml nutch/branches/2.x/default.properties Modified: nutch/branches/2.x/conf/nutch-default.xml URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/nutch-default.xml?rev=1359746r1=1359745r2=1359746view=diff == --- nutch/branches/2.x/conf/nutch-default.xml (original) +++ nutch/branches/2.x/conf/nutch-default.xml Tue Jul 10 16:08:23 2012 @@ -125,7 +125,7 @@ property namehttp.agent.version/name - valueNutch-2.0/value + valueNutch-2.1-SNAPSHOT/value descriptionA version string to advertise in the User-Agent header./description /property Modified: nutch/branches/2.x/conf/schema.xml URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/schema.xml?rev=1359746r1=1359745r2=1359746view=diff == --- nutch/branches/2.x/conf/schema.xml (original) +++ nutch/branches/2.x/conf/schema.xml Tue Jul 10 16:08:23 2012 @@ -27,7 +27,7 @@ example/solr/conf/schema.xml?view=markup for more info. -- -schema name=nutch version=2.0 +schema name=nutch version=2.1-SNAPSHOT types fieldType name=string class=solr.StrField sortMissingLast=true omitNorms=true/ Modified: nutch/branches/2.x/default.properties URL: http://svn.apache.org/viewvc/nutch/branches/2.x/default.properties?rev=1359746r1=1359745r2=1359746view=diff == --- nutch/branches/2.x/default.properties (original) +++ nutch/branches/2.x/default.properties Tue Jul 10 16:08:23 2012 @@ -15,7 +15,7 @@ name=apache-nutch -version=2.0 +version=2.1-SNAPSHOT final.name=${name}-${version} year=2012
svn commit: r1359752 - in /nutch/trunk: conf/schema-solr4.xml conf/schema.xml default.properties
Author: lewismc Date: Tue Jul 10 16:15:24 2012 New Revision: 1359752 URL: http://svn.apache.org/viewvc?rev=1359752view=rev Log: update all versions to 1.6-SNAPSHOT Modified: nutch/trunk/conf/schema-solr4.xml nutch/trunk/conf/schema.xml nutch/trunk/default.properties Modified: nutch/trunk/conf/schema-solr4.xml URL: http://svn.apache.org/viewvc/nutch/trunk/conf/schema-solr4.xml?rev=1359752r1=1359751r2=1359752view=diff == --- nutch/trunk/conf/schema-solr4.xml (original) +++ nutch/trunk/conf/schema-solr4.xml Tue Jul 10 16:15:24 2012 @@ -25,7 +25,7 @@ for more info. -- -schema name=nutch version=1.4 +schema name=nutch version=1.6-SNAPSHOT types Modified: nutch/trunk/conf/schema.xml URL: http://svn.apache.org/viewvc/nutch/trunk/conf/schema.xml?rev=1359752r1=1359751r2=1359752view=diff == --- nutch/trunk/conf/schema.xml (original) +++ nutch/trunk/conf/schema.xml Tue Jul 10 16:15:24 2012 @@ -28,7 +28,7 @@ example/solr/conf/schema.xml?view=markup for more info. -- -schema name=nutch version=1.6 +schema name=nutch version=1.6-SNAPSHOT types fieldType name=string class=solr.StrField sortMissingLast=true omitNorms=true/ Modified: nutch/trunk/default.properties URL: http://svn.apache.org/viewvc/nutch/trunk/default.properties?rev=1359752r1=1359751r2=1359752view=diff == --- nutch/trunk/default.properties (original) +++ nutch/trunk/default.properties Tue Jul 10 16:15:24 2012 @@ -14,7 +14,7 @@ # limitations under the License. name=apache-nutch -version=1.5.1-SNAPSHOT +version=1.6-SNAPSHOT final.name=${name}-${version} year=2012
svn commit: r1359760 - in /nutch/trunk: ./ conf/ src/java/org/apache/nutch/metadata/ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/ src/plugin/protocol-http/src/java/org/apache/nutch
Author: lewismc Date: Tue Jul 10 16:29:11 2012 New Revision: 1359760 URL: http://svn.apache.org/viewvc?rev=1359760view=rev Log: revert NUTCH-1360 Modified: nutch/trunk/CHANGES.txt nutch/trunk/conf/nutch-default.xml nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1359760r1=1359759r2=1359760view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Tue Jul 10 16:29:11 2012 @@ -32,8 +32,6 @@ Nutch Change Log * NUTCH-1364 Add a counter in Generator for malformed urls (lewismc) -* NUTCH-1360 Suport the storing of IP address connected to when web crawling (lewismc) - * NUTCH-1262 Map `duplicating` content-types to a single type (markus) * NUTCH-1385 More robust plug-in order properties in nutch-site.xml (Andy Xue via markus) Modified: nutch/trunk/conf/nutch-default.xml URL: http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1359760r1=1359759r2=1359760view=diff == --- nutch/trunk/conf/nutch-default.xml (original) +++ nutch/trunk/conf/nutch-default.xml Tue Jul 10 16:29:11 2012 @@ -255,13 +255,6 @@ /description /property -property - namehttp.store.ip.address/name - valuefalse/value - descriptionEnables us to capture the specific IP address of the - host which we connect to to fetch a page./description -/property - !-- FTP properties -- property Modified: nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java?rev=1359760r1=1359759r2=1359760view=diff == --- nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java (original) +++ nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java Tue Jul 10 16:29:11 2012 @@ -48,7 +48,5 @@ public interface HttpHeaders { public final static String LAST_MODIFIED = Last-Modified; public final static String LOCATION = Location; - - public final static String IP_ADDRESS = _ip; } Modified: nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java?rev=1359760r1=1359759r2=1359760view=diff == --- nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java (original) +++ nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java Tue Jul 10 16:29:11 2012 @@ -80,9 +80,6 @@ public abstract class HttpBase implement /** The Accept request header value. */ protected String accept = text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8; - /** The _ip request header value. */ - protected boolean ip_header = false; - /** The default logger */ private final static Logger LOGGER = LoggerFactory.getLogger(HttpBase.class); @@ -123,7 +120,6 @@ public abstract class HttpBase implement .get(http.agent.description), conf.get(http.agent.url), conf.get(http.agent.email)); this.acceptLanguage = conf.get(http.accept.language, acceptLanguage); this.accept = conf.get(http.accept, accept); - this.ip_header = conf.getBoolean(http.store.ip.address, false); // backward-compatible default setting this.useHttp11 = conf.getBoolean(http.useHttp11, false); this.robots.setConf(conf); @@ -251,10 +247,6 @@ public abstract class HttpBase implement return useHttp11; } - public boolean getIP_Header(){ - return ip_header; - } - private static String getAgentString(String agentName, String agentVersion, String agentDesc, @@ -309,7 +301,6 @@ public abstract class HttpBase implement logger.info(http.agent = + userAgent); logger.info(http.accept.language = + acceptLanguage); logger.info(http.accept = + accept); - logger.info(http.store.ip.address = + ip_header); } } Modified: nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java?rev=1359760r1=1359759r2=1359760view=diff == --- nutch/trunk/src/plugin/protocol-http
svn commit: r1358658 - in /nutch/site: forrest/src/documentation/content/xdocs/ publish/ publish/apidocs-2.0/ publish/apidocs-2.0/org/ publish/apidocs-2.0/org/apache/ publish/apidocs-2.0/org/apache/nu
Author: lewismc Date: Sat Jul 7 22:28:29 2012 New Revision: 1358658 URL: http://svn.apache.org/viewvc?rev=1358658view=rev Log: commit for 2.0 release [This commit notification would consist of 146 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.]
svn commit: r1356855 - /nutch/branches/branch-1.5.1/build.xml
Author: lewismc Date: Tue Jul 3 18:15:49 2012 New Revision: 1356855 URL: http://svn.apache.org/viewvc?rev=1356855view=rev Log: NUTCH-1415-v2 Modified: nutch/branches/branch-1.5.1/build.xml Modified: nutch/branches/branch-1.5.1/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/build.xml?rev=1356855r1=1356854r2=1356855view=diff == --- nutch/branches/branch-1.5.1/build.xml (original) +++ nutch/branches/branch-1.5.1/build.xml Tue Jul 3 18:15:49 2012 @@ -703,11 +703,11 @@ target name=tar-bin depends=package-bin tar compression=gzip longfile=gnu destfile=${bin.dist.version.dir}.tar.gz - tarfileset dir=${bin.dist.version.dir} mode=664 + tarfileset dir=${bin.dist.version.dir} mode=664 prefix=${final.name} exclude name=bin/* / include name=** / /tarfileset - tarfileset dir=${bin.dist.version.dir} mode=755 + tarfileset dir=${bin.dist.version.dir} mode=755 prefix=${final.name} include name=bin/* / /tarfileset /tar @@ -735,11 +735,11 @@ target name=zip-bin depends=package-bin zip compress=true casesensitive=yes destfile=${bin.dist.version.dir}.zip - zipfileset dir=${bin.dist.version.dir} filemode=664 + zipfileset dir=${bin.dist.version.dir} filemode=664 prefix=${final.name} exclude name=bin/* / include name=** / /zipfileset - zipfileset dir=${bin.dist.version.dir} filemode=755 + zipfileset dir=${src.dist.version.dir} filemode=755 prefix=${final.name} include name=bin/* / /zipfileset /zip
svn commit: r1356863 - /nutch/tags/release-1.5.1-rc3/
Author: lewismc Date: Tue Jul 3 18:28:37 2012 New Revision: 1356863 URL: http://svn.apache.org/viewvc?rev=1356863view=rev Log: tag for apache-nutch-1.5.1-rc3 Added: nutch/tags/release-1.5.1-rc3/ - copied from r1356862, nutch/branches/branch-1.5.1/
svn commit: r1356338 - /nutch/branches/branch-1.5.1/
Author: lewismc Date: Mon Jul 2 16:48:49 2012 New Revision: 1356338 URL: http://svn.apache.org/viewvc?rev=1356338view=rev Log: remove old/incorrect branch-1.5.1 Removed: nutch/branches/branch-1.5.1/
svn commit: r1356343 - in /nutch/branches/branch-1.5.1: CHANGES.txt ivy/ivy.xml
Author: lewismc Date: Mon Jul 2 16:56:21 2012 New Revision: 1356343 URL: http://svn.apache.org/viewvc?rev=1356343view=rev Log: backport of NUTCH-1398 Upgrade to Hadoop 1.0.3 Modified: nutch/branches/branch-1.5.1/CHANGES.txt nutch/branches/branch-1.5.1/ivy/ivy.xml Modified: nutch/branches/branch-1.5.1/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/CHANGES.txt?rev=1356343r1=1356342r2=1356343view=diff == --- nutch/branches/branch-1.5.1/CHANGES.txt (original) +++ nutch/branches/branch-1.5.1/CHANGES.txt Mon Jul 2 16:56:21 2012 @@ -1,5 +1,9 @@ Nutch Change Log +Release 1.5.1 - 07/02/2012 + +* NUTCH-1398 Upgrade to Hadoop 1.0.3 (jnioche) + Release 1.5 - 04/15/2012 * NUTCH-1208 Don't include KEYS file in bin distribution (jnioche) Modified: nutch/branches/branch-1.5.1/ivy/ivy.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/ivy/ivy.xml?rev=1356343r1=1356342r2=1356343view=diff == --- nutch/branches/branch-1.5.1/ivy/ivy.xml (original) +++ nutch/branches/branch-1.5.1/ivy/ivy.xml Mon Jul 2 16:56:21 2012 @@ -43,7 +43,7 @@ dependency org=commons-codec name=commons-codec rev=1.3 conf=*-default / - dependency org=org.apache.hadoop name=hadoop-core rev=1.0.0 + dependency org=org.apache.hadoop name=hadoop-core rev=1.0.3 conf=*-default exclude org=hsqldb name=hsqldb / exclude org=net.sf.kosmosfs name=kfs / @@ -67,7 +67,7 @@ !--artifacts needed for testing -- dependency org=junit name=junit rev=3.8.1 conf=*-default / - dependency org=org.apache.hadoop name=hadoop-test rev=1.0.0 + dependency org=org.apache.hadoop name=hadoop-test rev=1.0.3 conf=test-default / dependency org=org.mortbay.jetty name=jetty rev=6.1.22
svn commit: r1356363 - in /nutch/branches/branch-1.5.1: CHANGES.txt pom.xml src/bin/nutch
Author: lewismc Date: Mon Jul 2 17:23:10 2012 New Revision: 1356363 URL: http://svn.apache.org/viewvc?rev=1356363view=rev Log: NUTCH-1404 Nutch script fails to find job file in deploy mode Modified: nutch/branches/branch-1.5.1/CHANGES.txt nutch/branches/branch-1.5.1/pom.xml nutch/branches/branch-1.5.1/src/bin/nutch Modified: nutch/branches/branch-1.5.1/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/CHANGES.txt?rev=1356363r1=1356362r2=1356363view=diff == --- nutch/branches/branch-1.5.1/CHANGES.txt (original) +++ nutch/branches/branch-1.5.1/CHANGES.txt Mon Jul 2 17:23:10 2012 @@ -2,6 +2,8 @@ Nutch Change Log Release 1.5.1 - 07/02/2012 +* NUTCH-1404 Nutch script fails to find job file in deploy mode (sidabatra, jnioche) + * NUTCH-1415 release packages to contain top level folder apache-nutch-x.x (snagel via lewismc) * NUTCH-1400 Remove developer -core option for bin/nutch (jnioche) Modified: nutch/branches/branch-1.5.1/pom.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/pom.xml?rev=1356363r1=1356362r2=1356363view=diff == --- nutch/branches/branch-1.5.1/pom.xml (original) +++ nutch/branches/branch-1.5.1/pom.xml Mon Jul 2 17:23:10 2012 @@ -22,7 +22,7 @@ groupIdorg.apache.nutch/groupId artifactIdnutch/artifactId packagingjar/packaging - version1.5/version + version1.5.1/version nameApache Nutch/name urlhttp://nutch.apache.org/url licenses @@ -149,7 +149,7 @@ dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-core/artifactId -version1.0.0/version +version1.0.3/version optionaltrue/optional /dependency dependency @@ -203,7 +203,7 @@ dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-test/artifactId -version1.0.0/version +version1.0.3/version optionaltrue/optional /dependency dependency Modified: nutch/branches/branch-1.5.1/src/bin/nutch URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/src/bin/nutch?rev=1356363r1=1356362r2=1356363view=diff == --- nutch/branches/branch-1.5.1/src/bin/nutch (original) +++ nutch/branches/branch-1.5.1/src/bin/nutch Mon Jul 2 17:23:10 2012 @@ -101,9 +101,9 @@ fi local=true # NUTCH_JOB -if [ -f ${NUTCH_HOME}/nutch-*.job ]; then +if [ -f ${NUTCH_HOME}/*nutch*.job ]; then local=false - for f in $NUTCH_HOME/nutch-*.job; do + for f in $NUTCH_HOME/*nutch*.job; do NUTCH_JOB=$f; done fi
svn commit: r1353619 - /nutch/branches/branch-1.5.1/pom.xml
Author: lewismc Date: Mon Jun 25 15:56:23 2012 New Revision: 1353619 URL: http://svn.apache.org/viewvc?rev=1353619view=rev Log: commit to sync pom.xml with Ivy deps Modified: nutch/branches/branch-1.5.1/pom.xml Modified: nutch/branches/branch-1.5.1/pom.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/pom.xml?rev=1353619r1=1353618r2=1353619view=diff == --- nutch/branches/branch-1.5.1/pom.xml (original) +++ nutch/branches/branch-1.5.1/pom.xml Mon Jun 25 15:56:23 2012 @@ -15,286 +15,215 @@ See the License for the specific language governing permissions and limitations under the License. -- -project xmlns=http://maven.apache.org/POM/4.0.0; xmlns:xsi=http://www.w3.org/2001/XMLSchema-instance; xsi:schemaLocation=http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd; +project xmlns=http://maven.apache.org/POM/4.0.0; xmlns:xsi=http://www.w3.org/2001/XMLSchema-instance; +xsi:schemaLocation=http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd; - modelVersion4.0.0/modelVersion - parent - groupIdorg.apache/groupId - artifactIdapache/artifactId - version9/version - relativePath / - /parent - groupIdorg.apache.nutch/groupId - artifactIdnutch/artifactId - packagingjar/packaging - version1.6-SNAPSHOT/version - nameApache Nutch/name - urlhttp://nutch.apache.org/url - licenses - license - nameThe Apache Software License, Version 2.0/name - urlhttp://www.apache.org/licenses/LICENSE-2.0.txt/url - distributionrepo/distribution - /license - /licenses - scm - connectionscm:svn:http://svn.apache.org/repos/asf/nutch/trunk//connection - developerConnectionscm:svn:https://svn.apache.org/repos/asf/nutch/trunk//developerConnection - urlhttp://svn.apache.org/viewvc/nutch/trunk//url - /scm - developers + modelVersion4.0.0/modelVersion + groupIdorg.apache.nutch/groupId + artifactIdnutch/artifactId + packagingjar/packaging + version1.5.1/version + nameApache Nutch/name + urlhttp://nutch.apache.org/url + licenses + license + nameThe Apache Software License, Version 2.0/name + urlhttp://www.apache.org/licenses/LICENSE-2.0.txt/url + distributionrepo/distribution + /license + /licenses + scm + urlhttp://svn.apache.org/viewvc/nutch/url + connectionhttp://svn.apache.org/viewvc/nutch/connection + /scm + developers developer idab/id nameAndrzej Bialecki/name emaila...@apache.org/email /developer developer - idmattmann/id - nameChris A. Mattmann/name - emailmattm...@apache.org/email - /developer - developer - idkubes/id - nameDennis Kubes/name - emailku...@apache.org/email - /developer - developer +idalexis/id +nameAlexis Detlegrode/name +emailale...@apache.org/email +/developer +developer iddogacan/id - nameDogacan Gâºney/name + nameDogacan Güney/name emaildoga...@apache.org/email /developer developer +idferdy/id +nameFerdy Galema/name +emailfe...@apache.org/email +/developer +developer idjnioche/id nameJulien Nioche/name emailjnio...@apache.org/email /developer developer - idsiren/id - nameSami Siren/name - emailsi...@apache.org/email + idkubes/id + nameDennis Kubes/name + emailku...@apache.org/email /developer developer - idmarkus/id - nameMarkus Jelsma/name - emailmar...@apache.org/email - /developer +idlewismc/id +nameLewis John McGibbney/name +emaillewi...@apache.org/email +/developer + developer +idmarkus/id +nameMarkus Jelsma/name +emailmar...@apache.org/email +/developer
svn commit: r1353615 - in /nutch/branches/branch-1.5.1: CHANGES.txt conf/nutch-default.xml conf/schema.xml default.properties
Author: lewismc Date: Mon Jun 25 15:52:52 2012 New Revision: 1353615 URL: http://svn.apache.org/viewvc?rev=1353615view=rev Log: commit to set up RC Modified: nutch/branches/branch-1.5.1/CHANGES.txt nutch/branches/branch-1.5.1/conf/nutch-default.xml nutch/branches/branch-1.5.1/conf/schema.xml nutch/branches/branch-1.5.1/default.properties Modified: nutch/branches/branch-1.5.1/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/CHANGES.txt?rev=1353615r1=1353614r2=1353615view=diff == --- nutch/branches/branch-1.5.1/CHANGES.txt (original) +++ nutch/branches/branch-1.5.1/CHANGES.txt Mon Jun 25 15:52:52 2012 @@ -1,6 +1,6 @@ Nutch Change Log -(trunk) Current Development: +Release 1.5.1 - 25/06/2012 - ddmm * NUTCH-1400 Remove developer -core option for bin/nutch (jnioche) Modified: nutch/branches/branch-1.5.1/conf/nutch-default.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/conf/nutch-default.xml?rev=1353615r1=1353614r2=1353615view=diff == --- nutch/branches/branch-1.5.1/conf/nutch-default.xml (original) +++ nutch/branches/branch-1.5.1/conf/nutch-default.xml Mon Jun 25 15:52:52 2012 @@ -123,7 +123,7 @@ property namehttp.agent.version/name - valueNutch-1.6-SNAPSHOT/value + valueNutch-1.5.1/value descriptionA version string to advertise in the User-Agent header./description /property Modified: nutch/branches/branch-1.5.1/conf/schema.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/conf/schema.xml?rev=1353615r1=1353614r2=1353615view=diff == --- nutch/branches/branch-1.5.1/conf/schema.xml (original) +++ nutch/branches/branch-1.5.1/conf/schema.xml Mon Jun 25 15:52:52 2012 @@ -28,7 +28,7 @@ example/solr/conf/schema.xml?view=markup for more info. -- -schema name=nutch version=1.6 +schema name=nutch version=1.5.1 types fieldType name=string class=solr.StrField sortMissingLast=true omitNorms=true/ Modified: nutch/branches/branch-1.5.1/default.properties URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/default.properties?rev=1353615r1=1353614r2=1353615view=diff == --- nutch/branches/branch-1.5.1/default.properties (original) +++ nutch/branches/branch-1.5.1/default.properties Mon Jun 25 15:52:52 2012 @@ -14,7 +14,7 @@ # limitations under the License. name=apache-nutch -version=1.5.1-SNAPSHOT +version=1.5.1 final.name=${name}-${version} year=2012
svn commit: r1353638 - /nutch/tags/release-2.0rc3/
Author: lewismc Date: Mon Jun 25 16:24:14 2012 New Revision: 1353638 URL: http://svn.apache.org/viewvc?rev=1353638view=rev Log: tagging Nutch 2.0 RC3 Added: nutch/tags/release-2.0rc3/ - copied from r1353637, nutch/branches/nutchgora/
svn commit: r1350600 - /nutch/tags/release-2.0rc2/
Author: lewismc Date: Fri Jun 15 12:32:13 2012 New Revision: 1350600 URL: http://svn.apache.org/viewvc?rev=1350600view=rev Log: Nutch 2.0 RC2. Added: nutch/tags/release-2.0rc2/ - copied from r1350599, nutch/branches/nutchgora/
svn commit: r1348070 - in /nutch/trunk: conf/schema.xml default.properties
Author: lewismc Date: Fri Jun 8 13:47:20 2012 New Revision: 1348070 URL: http://svn.apache.org/viewvc?rev=1348070view=rev Log: trivial commit to add license header and update schema number Modified: nutch/trunk/conf/schema.xml nutch/trunk/default.properties Modified: nutch/trunk/conf/schema.xml URL: http://svn.apache.org/viewvc/nutch/trunk/conf/schema.xml?rev=1348070r1=1348069r2=1348070view=diff == --- nutch/trunk/conf/schema.xml (original) +++ nutch/trunk/conf/schema.xml Fri Jun 8 13:47:20 2012 @@ -28,7 +28,7 @@ example/solr/conf/schema.xml?view=markup for more info. -- -schema name=nutch version=1.4 +schema name=nutch version=1.6 types fieldType name=string class=solr.StrField sortMissingLast=true omitNorms=true/ Modified: nutch/trunk/default.properties URL: http://svn.apache.org/viewvc/nutch/trunk/default.properties?rev=1348070r1=1348069r2=1348070view=diff == --- nutch/trunk/default.properties (original) +++ nutch/trunk/default.properties Fri Jun 8 13:47:20 2012 @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the License); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + name=apache-nutch version=1.6-SNAPSHOT final.name=${name}-${version}
svn commit: r1348074 - in /nutch/branches/nutchgora: CHANGES.txt build.xml conf/schema.xml
Author: lewismc Date: Fri Jun 8 13:56:20 2012 New Revision: 1348074 URL: http://svn.apache.org/viewvc?rev=1348074view=rev Log: trivial commit prior to RC#1 Modified: nutch/branches/nutchgora/CHANGES.txt nutch/branches/nutchgora/build.xml nutch/branches/nutchgora/conf/schema.xml Modified: nutch/branches/nutchgora/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1348074r1=1348073r2=1348074view=diff == --- nutch/branches/nutchgora/CHANGES.txt (original) +++ nutch/branches/nutchgora/CHANGES.txt Fri Jun 8 13:56:20 2012 @@ -1,6 +1,7 @@ Nutch Change Log -Release 2.1 (22/02/2012) +Release 2.0 (08/06/2012) ddmmyyy +Full Jira report - https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=10680version=12314893 * NUTCH-1379 NPE when reprUrl is null in ParseUtil (ferdy) Modified: nutch/branches/nutchgora/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1348074r1=1348073r2=1348074view=diff == --- nutch/branches/nutchgora/build.xml (original) +++ nutch/branches/nutchgora/build.xml Fri Jun 8 13:56:20 2012 @@ -608,7 +608,6 @@ mkdir dir=${dist.dir}/ mkdir dir=${src.dist.version.dir}/ mkdir dir=${src.dist.version.dir}/lib/ -mkdir dir=${src.dist.version.dir}/runtime/ mkdir dir=${src.dist.version.dir}/docs/ mkdir dir=${src.dist.version.dir}/docs/api/ mkdir dir=${src.dist.version.dir}/ivy/ @@ -616,15 +615,6 @@ copy todir=${src.dist.version.dir}/lib includeEmptyDirs=false fileset dir=lib/ /copy - -copy todir=${src.dist.version.dir}/runtime - fileset dir=runtime/ -/copy - -chmod perm=ugo+x type=file -fileset dir=${src.dist.version.dir}/runtime/deploy/bin/ -fileset dir=${src.dist.version.dir}/runtime/local/bin/ -/chmod copy todir=${src.dist.version.dir}/conf fileset dir=${conf.dir} excludes=**/*.template/ @@ -704,6 +694,7 @@ destfile=${src.dist.version.dir}.tar.gz basedir=${src.dist.version.dir} tarfileset dir=${dist.dir} mode=664 exclude name=${src.dist.version.dir}/bin/* / + exclude name=${src.dist.version.dir}/runtime/* / include name=${src.dist.version.dir}/** / /tarfileset tarfileset dir=${dist.dir} mode=755 @@ -736,6 +727,7 @@ destfile=${src.dist.version.dir}.zip basedir=${src.dist.version.dir} zipfileset dir=${dist.dir} filemode=664 exclude name=${src.dist.version.dir}/bin/* / + exclude name=${src.dist.version.dir}/runtime/* / include name=${src.dist.version.dir}/** / /zipfileset zipfileset dir=${dist.dir} filemode=755 Modified: nutch/branches/nutchgora/conf/schema.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/schema.xml?rev=1348074r1=1348073r2=1348074view=diff == --- nutch/branches/nutchgora/conf/schema.xml (original) +++ nutch/branches/nutchgora/conf/schema.xml Fri Jun 8 13:56:20 2012 @@ -27,7 +27,7 @@ example/solr/conf/schema.xml?view=markup for more info. -- -schema name=nutch version=1.4 +schema name=nutch version=2.0 types fieldType name=string class=solr.StrField sortMissingLast=true omitNorms=true/
svn commit: r1348087 - /nutch/tags/release-2.0/nutchgora/
Author: lewismc Date: Fri Jun 8 14:23:13 2012 New Revision: 1348087 URL: http://svn.apache.org/viewvc?rev=1348087view=rev Log: Nutch 2.0 release. Added: nutch/tags/release-2.0/nutchgora/ - copied from r1348086, nutch/branches/nutchgora/
svn commit: r1348095 - /nutch/branches/nutchgora/KEYS
Author: lewismc Date: Fri Jun 8 14:42:59 2012 New Revision: 1348095 URL: http://svn.apache.org/viewvc?rev=1348095view=rev Log: trivial commit to add my details to KEYS file Modified: nutch/branches/nutchgora/KEYS Modified: nutch/branches/nutchgora/KEYS URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/KEYS?rev=1348095r1=1348094r2=1348095view=diff == --- nutch/branches/nutchgora/KEYS (original) +++ nutch/branches/nutchgora/KEYS Fri Jun 8 14:42:59 2012 @@ -240,3 +240,62 @@ vIxV0MEa+WSBaplFlj0ACfs2Hdh6skErvMAzXHBC oPLz8+OED4Ec4Pjuuic3HX2Ff0myAKDd74+vKoAC78+CStjf1pSSmy4y4w== =mbQZ -END PGP PUBLIC KEY BLOCK- + +pub 4096R/C601BCA7 2012-04-17 +uid Lewis John McGibbney (CODE SIGNING KEY) lewi...@apache.org +sig 3C601BCA7 2012-04-17 Lewis John McGibbney (CODE SIGNING KEY) lewi...@apache.org +sub 4096R/FCD9FF28 2012-04-17 +sig C601BCA7 2012-04-17 Lewis John McGibbney (CODE SIGNING KEY) lewi...@apache.org + +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v1.4.10 (GNU/Linux) + +mQINBE+NSUkBEAC3Qu1mT3x0swS4zXta2NnJtrepOqpsU292U+hzkbjdG8W+W2WA +3oRdd5f/iKkkE1Z3q53qD++PazLQf+g+378Ce+CP4bwhZuz/CgSa8EO2rIXadVUG +M+XBAiSlLWyQhwW8qbipGQvpT1PXp8mjwXlWzt+0+4F9ybepYxStUPaybIFfSn+f +M8YzYLgfKSsHMgPeK6TGRJAqC+u7t+XMYWmfVS9TpoOyfZ3tsn3YmeH4JiqF49/0 +XzkqgM7FW52By64Nm6xCOfqXCaMmVV5JRuZFhLB4VmWlH/Mikv5Tu99gsAdGwFIb +MhMWtWZ/azKarTkQiZjDka09Mxc6skXCBBbxz9lstE4X50d5PMqOgVBtFstmL64h +Km2dSIdVEUyjM9y1HBRZO1+ooNs5xja1DnSAuytstrRnt5Vdnuk/RS8t2qfcm2jP +NWrZNOix9U+pT7qUQ1wbK/ew+qWbNFlvp9i3XyZdfPpyEmYD4CsBvkVbiH+FULwS +F4OJQlJoDJ1vHnSPMNSGtiNRTLSQ2+E6huqktyAY+rcTamCEkCdoZ5NTyMbEgqZ1 +P4fr+h+EpV0h/ACzjhE4sq6MK6KZFv3a3Erlk4oC93BVJpcYyZyQneKQSapbAv9u +oYCTLHyCrBdXItnFEHhy1zN0DvbWoGtsxDvAVjY3D9YP32Yu3WvxeW25bQARAQAB +tDxMZXdpcyBKb2huIE1jR2liYm5leSAoQ09ERSBTSUdOSU5HIEtFWSkgPGxld2lz +bWNAYXBhY2hlLm9yZz6JAjgEEwECACIFAk+NSUkCGwMGCwkIBwMCBhUIAgkKCwQW +AgMBAh4BAheAAAoJEPReeXDGAbynxnQP/1s1e1eDUAvZv1k+OVhG+nDhqtBtmFV6 +sx67atpzZCj6ckKXphkiWAFmYsAH7pujHgASuAIoMY7MLjaRuG2MiEdWINYH5LVB +xmZ3M9f1+YBuTSs/0KKBfqVBYm5vbEC+vBkjez54DOJ7OfRQllra98FR5GxEoYhh +bIQDtUtYrLjzd9kbUH5J+cTgSJ08ciIxanscvFRE7+X2sQTopor6f+o7iea7k6KM +b5FJ9mi4Q3RQbkorncyyDp4O7rBsuaGeD2oORdSM1zT5ql3glq7cYUI8havHY696 +jWYLOc951l6fDofGi4ZirX0+Mlxj+d2BNY54rx9dl6pZOmahvD4pveq/vbzwOH9E +vb1uTfRIYLaNW++1nXzPBZ5nzsemDb3K8yVYXnCDrqmzOZMJu5AinvUUusTrRhT/ +4oy2AO1YEIjgwHFzYvv7C7/wYSQC5AxvO0plvyH/kMK/vQk3H7I13isHdyZhEjrR +e+ciNzPWh4R6W8zVbe29MljItmINWniJ/CnYi9/r7ZtkQUBUCmHQZcsCm2DflA83 +ueLozFY3NH2eQ4q9dY8QIJDOpsX1SrP8DUOpuai3PvEiE8stHxGpamFq2DgnS81x +/e/kSbIBD6QGgP1S7Zrkdz4jriCCY4mv9mYMu9De/sObYcpGdg6rE49lz9NWeE8w +Wtt1oexR6DhpuQINBE+NSUkBEADOm92hnYd9ZNSmaVSUegmo0Rx9CMIzRZzHXPXT +SxxMnJScWDKeTWa7U1A0peiNIUKKlgFcnUY176o4wk8y2sNgyYkYO6wQlzmoyQIh +Ft0fqE3LMKBJcW2JONWFVrFZpRPTFvRWnDOSur8IQq3rJkyiqfT5y0E7PAdd8aa3 +l7anp8gfKCf9iIYtgfNsKNphngkwOLNDVsED7G/VRfAezjDKyf0M9HSL0fjQ5YDe +L5MMmgduvYKBtWISM5tqJAunkMpGeWJ6/khJZT+bLK8iLM2073W5uSlNs6oO2AM8 +lDvfmnsFC4178mbU9nJNi+KAXzwZXH4xcqywRKZhuWI5BVPGi50HJ/RIZtDyrkrK +W7NACtmniuFzSy9PxrM2iappUsfY8b7uZBzGoo1BzT7F7VM7sSte+X+zs8TZ0dam +6TbuGMuv5rPQGAwu2JWUNOeBzXvfkg3gzk4qZrBdHtUrQjx33c1NBZddLcoSqzgC +ph2cz4NG4Fs/Mi8SXoKBwJGVeWE+ZCBma8vFP/zctb/XroIaFSE5rAwHydwCB4gu +VB3rNuLCoiiB50lPzAPFjjFxOuZeTZfl4bp1XRE1KKYi+n974At4HDd5g0Az8w37 +5/9G+pARCzjytvIHJTYQDsG0hfnj2Vfb5WWYF6LMib0ZGf739Yp7L602/yE9QAKm +bifPCQARAQABiQIfBBgBAgAJBQJPjUlJAhsMAAoJEPReeXDGAbynzc4P/AomVPfY +bY61TE+QSKAJl8/dyyw+LSddTPFTleVBFHlq1tnQmLWxoNq5t1CRXUJOv3q6haPE +PLKR5pXXtNzAGVP74Jipa5r8FQjBG0j+XriiHmr861xyno0uPG23c0LSRqHrcLi6 +tgN2Q2ihu1Tjaql+ukzPI6u2v97FD0qhJWKvFFo64p7HTNUXHJLQ9N/m1Pien7Nm +KFLRI0Pu0CW95I1w2gAAlS++lIxT3/ANfw6SpK9+lNBaan1g0xM5/P54MIQvZgCQ +gdIcWdAOmXjTyMryconkeNRWpkYjXG4hZj9crP48j3lZPlUYol4pdkQ1CtSq1emv +VDGoUrn5bRWoybOFfx3joOLpUqJA5PDjeN7YMpJNWc3O/lz+S+sW9WZY7vwbK+Mn +E/l4Bz2k9fQDsxm2rPzM2aS/qaBo9v7vj+NE85B2/NE9cXo0WoC8u5o+KEQY6urV +ANW/A0k94wmfoBMbmzNZ5Y5zJ9vceW9d4FE2FXaynRke2awYHBZE2Ty3MSxCQAvp +MREQKzxB1XcR+Frj0nMKMmdEmM55OmIgAqAct1OuGDbOATJMcmVuwHqTZIdynzqh +NPgXHx4ASqesjF/9GUrAQfOmXqHdOF6xOb7YYGssl1kgvOQRVJhkWtmTckyk+xu9 +U3Wt+q9F6O+RmemV6a6mrpog+Aq+BkIMWCJ8 +=xHbT +-END PGP PUBLIC KEY BLOCK-
svn commit: r1344886 - /nutch/branches/branch-1.5/build.xml
Author: lewismc Date: Thu May 31 20:08:26 2012 New Revision: 1344886 URL: http://svn.apache.org/viewvc?rev=1344886view=rev Log: commit to finalise ant tar-src and ant zip-src targets for RC4 Modified: nutch/branches/branch-1.5/build.xml Modified: nutch/branches/branch-1.5/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5/build.xml?rev=1344886r1=1344885r2=1344886view=diff == --- nutch/branches/branch-1.5/build.xml (original) +++ nutch/branches/branch-1.5/build.xml Thu May 31 20:08:26 2012 @@ -603,7 +603,6 @@ mkdir dir=${dist.dir}/ mkdir dir=${src.dist.version.dir}/ mkdir dir=${src.dist.version.dir}/lib/ -mkdir dir=${src.dist.version.dir}/runtime/ mkdir dir=${src.dist.version.dir}/docs/ mkdir dir=${src.dist.version.dir}/docs/api/ mkdir dir=${src.dist.version.dir}/ivy/ @@ -612,15 +611,6 @@ fileset dir=lib/ /copy -copy todir=${src.dist.version.dir}/runtime - fileset dir=runtime/ -/copy - -chmod perm=ugo+x type=file -fileset dir=${src.dist.version.dir}/runtime/deploy/bin/ -fileset dir=${src.dist.version.dir}/runtime/local/bin/ -/chmod - copy todir=${src.dist.version.dir}/conf fileset dir=${conf.dir} excludes=**/*.template/ /copy @@ -699,6 +689,7 @@ destfile=${src.dist.version.dir}.tar.gz basedir=${src.dist.version.dir} tarfileset dir=${dist.dir} mode=664 exclude name=${src.dist.version.dir}/bin/* / + exclude name=${src.dist.version.dir}/runtime/* / include name=${src.dist.version.dir}/** / /tarfileset tarfileset dir=${dist.dir} mode=755 @@ -731,6 +722,7 @@ destfile=${src.dist.version.dir}.zip basedir=${src.dist.version.dir} zipfileset dir=${dist.dir} filemode=664 exclude name=${src.dist.version.dir}/bin/* / + exclude name=${src.dist.version.dir}/runtime/* / include name=${src.dist.version.dir}/** / /zipfileset zipfileset dir=${dist.dir} filemode=755
svn commit: r1344451 - in /nutch/branches/branch-1.5: build.xml default.properties
Author: lewismc Date: Wed May 30 20:28:39 2012 New Revision: 1344451 URL: http://svn.apache.org/viewvc?rev=1344451view=rev Log: commit to fix broken ant targets Modified: nutch/branches/branch-1.5/build.xml nutch/branches/branch-1.5/default.properties Modified: nutch/branches/branch-1.5/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5/build.xml?rev=1344451r1=1344450r2=1344451view=diff == --- nutch/branches/branch-1.5/build.xml (original) +++ nutch/branches/branch-1.5/build.xml Wed May 30 20:28:39 2012 @@ -420,7 +420,7 @@ ant dir=src/plugin target=test inheritAll=false/ /target - target name=nightly depends=test, tar + target name=nightly depends=test, tar-src, zip-src /target !-- == -- @@ -599,123 +599,158 @@ !-- == -- !---- !-- == -- - target name=package depends=runtime, javadoc -mkdir dir=${dist.version.dir}/ -mkdir dir=${dist.version.dir}/lib/ -mkdir dir=${dist.version.dir}/runtime/ -mkdir dir=${dist.version.dir}/docs/ -mkdir dir=${dist.version.dir}/docs/api/ -mkdir dir=${dist.version.dir}/ivy/ + target name=package-src depends=runtime, javadoc +mkdir dir=${dist.dir}/ +mkdir dir=${src.dist.version.dir}/ +mkdir dir=${src.dist.version.dir}/lib/ +mkdir dir=${src.dist.version.dir}/runtime/ +mkdir dir=${src.dist.version.dir}/docs/ +mkdir dir=${src.dist.version.dir}/docs/api/ +mkdir dir=${src.dist.version.dir}/ivy/ -copy todir=${dist.version.dir}/lib includeEmptyDirs=false +copy todir=${src.dist.version.dir}/lib includeEmptyDirs=false fileset dir=lib/ /copy -copy todir=${dist.version.dir}/runtime +copy todir=${src.dist.version.dir}/runtime fileset dir=runtime/ /copy chmod perm=ugo+x type=file -fileset dir=${dist.version.dir}/runtime/deploy/bin/ -fileset dir=${dist.version.dir}/runtime/local/bin/ +fileset dir=${src.dist.version.dir}/runtime/deploy/bin/ +fileset dir=${src.dist.version.dir}/runtime/local/bin/ /chmod -copy todir=${dist.version.dir}/conf +copy todir=${src.dist.version.dir}/conf fileset dir=${conf.dir} excludes=**/*.template/ /copy -copy todir=${dist.version.dir}/docs/api +copy todir=${src.dist.version.dir}/docs/api fileset dir=${build.javadoc}/ /copy -copy todir=${dist.version.dir} +copy todir=${src.dist.version.dir} fileset dir=. include name=*.txt / !--include name=KEYS /-- /fileset /copy -copy todir=${dist.version.dir}/src includeEmptyDirs=true +copy todir=${src.dist.version.dir}/src includeEmptyDirs=true fileset dir=src/ /copy -copy todir=${dist.version.dir}/ivy includeEmptyDirs=true +copy todir=${src.dist.version.dir}/ivy includeEmptyDirs=true fileset dir=ivy/ /copy -copy todir=${dist.version.dir}/ file=build.xml/ -copy todir=${dist.version.dir}/ file=default.properties/ +copy todir=${src.dist.version.dir}/ file=build.xml/ +copy todir=${src.dist.version.dir}/ file=default.properties/ /target target name=package-bin depends=runtime, javadoc -mkdir dir=${dist.version.dir}-bin/ -mkdir dir=${dist.version.dir}-bin/lib/ -mkdir dir=${dist.version.dir}-bin/bin/ -mkdir dir=${dist.version.dir}-bin/conf/ -mkdir dir=${dist.version.dir}-bin/docs/ -mkdir dir=${dist.version.dir}-bin/docs/api/ -mkdir dir=${dist.version.dir}-bin/plugins/ +mkdir dir=${dist.dir}/ +mkdir dir=${bin.dist.version.dir}/ +mkdir dir=${bin.dist.version.dir}/lib/ +mkdir dir=${bin.dist.version.dir}/bin/ +mkdir dir=${bin.dist.version.dir}/conf/ +mkdir dir=${bin.dist.version.dir}/docs/ +mkdir dir=${bin.dist.version.dir}/docs/api/ +mkdir dir=${bin.dist.version.dir}/plugins/ -copy todir=${dist.version.dir}-bin/lib includeEmptyDirs=false +copy todir=${bin.dist.version.dir}/lib includeEmptyDirs=false fileset dir=runtime/local/lib/ /copy -copy todir=${dist.version.dir}-bin/bin +copy todir=${bin.dist.version.dir}/bin fileset dir=runtime/local/bin/ /copy chmod perm=ugo+x type=file -fileset dir=${dist.version.dir}-bin/bin/ +fileset dir=${bin.dist.version.dir}/bin/ /chmod -copy todir=${dist.version.dir}-bin/conf +copy todir=${bin.dist.version.dir}/conf fileset dir=runtime/local/conf excludes=**/*.template/ /copy -copy todir=${dist.version.dir}-bin/docs/api +copy todir=${bin.dist.version.dir}/docs/api fileset dir=${build.javadoc}/ /copy -copy todir=${dist.version.dir}-bin
svn commit: r1344452 - /nutch/tags/release-1.5-rc3/
Author: lewismc Date: Wed May 30 20:31:05 2012 New Revision: 1344452 URL: http://svn.apache.org/viewvc?rev=1344452view=rev Log: Nutch release-1.5RC3 Added: nutch/tags/release-1.5-rc3/ - copied from r1344451, nutch/branches/branch-1.5/
svn commit: r1344477 - in /nutch/trunk: build.xml conf/nutch-default.xml default.properties
Author: lewismc Date: Wed May 30 21:42:49 2012 New Revision: 1344477 URL: http://svn.apache.org/viewvc?rev=1344477view=rev Log: commit to backport release1.5 changes to trunk Modified: nutch/trunk/build.xml nutch/trunk/conf/nutch-default.xml nutch/trunk/default.properties Modified: nutch/trunk/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1344477r1=1344476r2=1344477view=diff == --- nutch/trunk/build.xml (original) +++ nutch/trunk/build.xml Wed May 30 21:42:49 2012 @@ -422,7 +422,7 @@ ant dir=src/plugin target=test inheritAll=false/ /target - target name=nightly depends=test, tar + target name=nightly depends=test, tar-src, zip-src /target !-- == -- @@ -596,96 +596,98 @@ includes=nutch-default.xml style=conf/nutch-conf.xsl/ /target - !-- == -- +!-- == -- !-- D I S T R I B U T I O N-- !-- == -- !---- !-- == -- target name=package-src depends=runtime, javadoc -mkdir dir=${dist.version.dir}-src/ -mkdir dir=${dist.version.dir}-src/lib/ -mkdir dir=${dist.version.dir}-src/runtime/ -mkdir dir=${dist.version.dir}-src/docs/ -mkdir dir=${dist.version.dir}-src/docs/api/ -mkdir dir=${dist.version.dir}-src/ivy/ +mkdir dir=${dist.dir}/ +mkdir dir=${src.dist.version.dir}/ +mkdir dir=${src.dist.version.dir}/lib/ +mkdir dir=${src.dist.version.dir}/runtime/ +mkdir dir=${src.dist.version.dir}/docs/ +mkdir dir=${src.dist.version.dir}/docs/api/ +mkdir dir=${src.dist.version.dir}/ivy/ -copy todir=${dist.version.dir}-src/lib includeEmptyDirs=false +copy todir=${src.dist.version.dir}/lib includeEmptyDirs=false fileset dir=lib/ /copy -copy todir=${dist.version.dir}-src/runtime +copy todir=${src.dist.version.dir}/runtime fileset dir=runtime/ /copy chmod perm=ugo+x type=file -fileset dir=${dist.version.dir}-src/runtime/deploy/bin/ -fileset dir=${dist.version.dir}-src/runtime/local/bin/ +fileset dir=${src.dist.version.dir}/runtime/deploy/bin/ +fileset dir=${src.dist.version.dir}/runtime/local/bin/ /chmod -copy todir=${dist.version.dir}-src/conf +copy todir=${src.dist.version.dir}/conf fileset dir=${conf.dir} excludes=**/*.template/ /copy -copy todir=${dist.version.dir}-src/docs/api +copy todir=${src.dist.version.dir}/docs/api fileset dir=${build.javadoc}/ /copy -copy todir=${dist.version.dir}-src +copy todir=${src.dist.version.dir} fileset dir=. include name=*.txt / !--include name=KEYS /-- /fileset /copy -copy todir=${dist.version.dir}-src/src includeEmptyDirs=true +copy todir=${src.dist.version.dir}/src includeEmptyDirs=true fileset dir=src/ /copy -copy todir=${dist.version.dir}-src/ivy includeEmptyDirs=true +copy todir=${src.dist.version.dir}/ivy includeEmptyDirs=true fileset dir=ivy/ /copy -copy todir=${dist.version.dir}-src/ file=build.xml/ -copy todir=${dist.version.dir}-src/ file=default.properties/ +copy todir=${src.dist.version.dir}/ file=build.xml/ +copy todir=${src.dist.version.dir}/ file=default.properties/ /target target name=package-bin depends=runtime, javadoc -mkdir dir=${dist.version.dir}-bin/ -mkdir dir=${dist.version.dir}-bin/lib/ -mkdir dir=${dist.version.dir}-bin/bin/ -mkdir dir=${dist.version.dir}-bin/conf/ -mkdir dir=${dist.version.dir}-bin/docs/ -mkdir dir=${dist.version.dir}-bin/docs/api/ -mkdir dir=${dist.version.dir}-bin/plugins/ +mkdir dir=${dist.dir}/ +mkdir dir=${bin.dist.version.dir}/ +mkdir dir=${bin.dist.version.dir}/lib/ +mkdir dir=${bin.dist.version.dir}/bin/ +mkdir dir=${bin.dist.version.dir}/conf/ +mkdir dir=${bin.dist.version.dir}/docs/ +mkdir dir=${bin.dist.version.dir}/docs/api/ +mkdir dir=${bin.dist.version.dir}/plugins/ -copy todir=${dist.version.dir}-bin/lib includeEmptyDirs=false +copy todir=${bin.dist.version.dir}/lib includeEmptyDirs=false fileset dir=runtime/local/lib/ /copy -copy todir=${dist.version.dir}-bin/bin +copy todir=${bin.dist.version.dir}/bin fileset dir=runtime/local/bin/ /copy chmod perm=ugo+x type=file -fileset dir=${dist.version.dir}-bin/bin/ +fileset dir=${bin.dist.version.dir}/bin/ /chmod -copy todir=${dist.version.dir}-bin/conf +copy
svn commit: r1341425 - in /nutch/branches/nutchgora: ./ ivy/ src/java/org/apache/nutch/storage/ src/plugin/creativecommons/src/web/ src/plugin/protocol-httpclient/src/test/conf/
Author: lewismc Date: Tue May 22 11:59:27 2012 New Revision: 1341425 URL: http://svn.apache.org/viewvc?rev=1341425view=rev Log: commit to bring code up to scratch with trunk w.r.t preparation for the RC Modified: nutch/branches/nutchgora/NOTICE.txt nutch/branches/nutchgora/build.xml nutch/branches/nutchgora/default.properties nutch/branches/nutchgora/ivy/mvn.template nutch/branches/nutchgora/src/java/org/apache/nutch/storage/Host.java nutch/branches/nutchgora/src/plugin/creativecommons/src/web/search.jsp nutch/branches/nutchgora/src/plugin/creativecommons/src/web/web.xml nutch/branches/nutchgora/src/plugin/protocol-httpclient/src/test/conf/httpclient-auth-test.xml nutch/branches/nutchgora/src/plugin/protocol-httpclient/src/test/conf/nutch-site-test.xml Modified: nutch/branches/nutchgora/NOTICE.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/NOTICE.txt?rev=1341425r1=1341424r2=1341425view=diff == --- nutch/branches/nutchgora/NOTICE.txt (original) +++ nutch/branches/nutchgora/NOTICE.txt Tue May 22 11:59:27 2012 @@ -1,5 +1,5 @@ Apache Nutch -Copyright 2009 The Apache Software Foundation +Copyright 2012 The Apache Software Foundation This product includes software developed by The Apache Software Foundation (http://www.apache.org/). Modified: nutch/branches/nutchgora/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1341425r1=1341424r2=1341425view=diff == --- nutch/branches/nutchgora/build.xml (original) +++ nutch/branches/nutchgora/build.xml Tue May 22 11:59:27 2012 @@ -144,27 +144,40 @@ arg value=${javadoc.proxy.host} / arg value=${javadoc.proxy.port} / - packageset dir=${src.dir} / - packageset dir=${plugins.dir}/lib-http/src/java / - packageset dir=${plugins.dir}/lib-regex-filter/src/java / - packageset dir=${plugins.dir}/microformats-reltag/src/java / - packageset dir=${plugins.dir}/protocol-file/src/java / - packageset dir=${plugins.dir}/protocol-ftp/src/java / - packageset dir=${plugins.dir}/protocol-http/src/java / - packageset dir=${plugins.dir}/protocol-httpclient/src/java / - packageset dir=${plugins.dir}/parse-tika/src/java / - packageset dir=${plugins.dir}/parse-ext/src/java / - packageset dir=${plugins.dir}/parse-js/src/java / - packageset dir=${plugins.dir}/parse-swf/src/java / - packageset dir=${plugins.dir}/parse-zip/src/java / - packageset dir=${plugins.dir}/index-basic/src/java / - packageset dir=${plugins.dir}/index-more/src/java / - packageset dir=${plugins.dir}/scoring-opic/src/java / - packageset dir=${plugins.dir}/urlfilter-automaton/src/java / - packageset dir=${plugins.dir}/urlfilter-regex/src/java / - packageset dir=${plugins.dir}/urlfilter-prefix/src/java / - packageset dir=${plugins.dir}/creativecommons/src/java / - packageset dir=${plugins.dir}/language-identifier/src/java / + packageset dir=${src.dir}/ + packageset dir=${plugins.dir}/creativecommons/src/java/ + packageset dir=${plugins.dir}/feed/src/java/ + packageset dir=${plugins.dir}/index-anchor/src/java/ + packageset dir=${plugins.dir}/index-basic/src/java/ + packageset dir=${plugins.dir}/index-more/src/java/ + packageset dir=${plugins.dir}/language-identifier/src/java/ + packageset dir=${plugins.dir}/lib-http/src/java/ + packageset dir=${plugins.dir}/lib-regex-filter/src/java/ + packageset dir=${plugins.dir}/microformats-reltag/src/java/ + packageset dir=${plugins.dir}/parse-ext/src/java/ + packageset dir=${plugins.dir}/parse-html/src/java/ + packageset dir=${plugins.dir}/parse-js/src/java/ + packageset dir=${plugins.dir}/parse-swf/src/java/ + packageset dir=${plugins.dir}/parse-tika/src/java/ + packageset dir=${plugins.dir}/parse-zip/src/java/ + packageset dir=${plugins.dir}/protocol-file/src/java/ + packageset dir=${plugins.dir}/protocol-ftp/src/java/ + packageset dir=${plugins.dir}/protocol-http/src/java/ + packageset dir=${plugins.dir}/protocol-httpclient/src/java/ + packageset dir=${plugins.dir}/protocol-sftp/src/java/ + packageset dir=${plugins.dir}/scoring-link/src/java/ + packageset dir=${plugins.dir}/scoring-opic/src/java/ + packageset dir=${plugins.dir}/subcollection/src/java/ + packageset dir=${plugins.dir}/tld/src/java/ + packageset dir=${plugins.dir}/urlfilter-automaton/src/java/ + packageset dir=${plugins.dir}/urlfilter-domain/src/java/ + packageset dir=${plugins.dir}/urlfilter-prefix/src/java/ + packageset dir=${plugins.dir}/urlfilter-regex/src/java/ + packageset dir=${plugins.dir}/urlfilter-suffix/src/java/ + packageset dir=${plugins.dir}/urlfilter-validator/src/java/ + packageset dir=${plugins.dir}/urlnormalizer-basic/src/java/ + packageset dir
svn commit: r1341570 - in /nutch/branches/branch-1.5: KEYS build.xml ivy/mvn.template
Author: lewismc Date: Tue May 22 17:31:35 2012 New Revision: 1341570 URL: http://svn.apache.org/viewvc?rev=1341570view=rev Log: final commit to before pushing RC2 Modified: nutch/branches/branch-1.5/KEYS nutch/branches/branch-1.5/build.xml nutch/branches/branch-1.5/ivy/mvn.template Modified: nutch/branches/branch-1.5/KEYS URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5/KEYS?rev=1341570r1=1341569r2=1341570view=diff == --- nutch/branches/branch-1.5/KEYS (original) +++ nutch/branches/branch-1.5/KEYS Tue May 22 17:31:35 2012 @@ -240,3 +240,62 @@ vIxV0MEa+WSBaplFlj0ACfs2Hdh6skErvMAzXHBC oPLz8+OED4Ec4Pjuuic3HX2Ff0myAKDd74+vKoAC78+CStjf1pSSmy4y4w== =mbQZ -END PGP PUBLIC KEY BLOCK- + +pub 4096R/C601BCA7 2012-04-17 +uid Lewis John McGibbney (CODE SIGNING KEY) lewi...@apache.org +sig 3C601BCA7 2012-04-17 Lewis John McGibbney (CODE SIGNING KEY) lewi...@apache.org +sub 4096R/FCD9FF28 2012-04-17 +sig C601BCA7 2012-04-17 Lewis John McGibbney (CODE SIGNING KEY) lewi...@apache.org + +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v1.4.10 (GNU/Linux) + +mQINBE+NSUkBEAC3Qu1mT3x0swS4zXta2NnJtrepOqpsU292U+hzkbjdG8W+W2WA +3oRdd5f/iKkkE1Z3q53qD++PazLQf+g+378Ce+CP4bwhZuz/CgSa8EO2rIXadVUG +M+XBAiSlLWyQhwW8qbipGQvpT1PXp8mjwXlWzt+0+4F9ybepYxStUPaybIFfSn+f +M8YzYLgfKSsHMgPeK6TGRJAqC+u7t+XMYWmfVS9TpoOyfZ3tsn3YmeH4JiqF49/0 +XzkqgM7FW52By64Nm6xCOfqXCaMmVV5JRuZFhLB4VmWlH/Mikv5Tu99gsAdGwFIb +MhMWtWZ/azKarTkQiZjDka09Mxc6skXCBBbxz9lstE4X50d5PMqOgVBtFstmL64h +Km2dSIdVEUyjM9y1HBRZO1+ooNs5xja1DnSAuytstrRnt5Vdnuk/RS8t2qfcm2jP +NWrZNOix9U+pT7qUQ1wbK/ew+qWbNFlvp9i3XyZdfPpyEmYD4CsBvkVbiH+FULwS +F4OJQlJoDJ1vHnSPMNSGtiNRTLSQ2+E6huqktyAY+rcTamCEkCdoZ5NTyMbEgqZ1 +P4fr+h+EpV0h/ACzjhE4sq6MK6KZFv3a3Erlk4oC93BVJpcYyZyQneKQSapbAv9u +oYCTLHyCrBdXItnFEHhy1zN0DvbWoGtsxDvAVjY3D9YP32Yu3WvxeW25bQARAQAB +tDxMZXdpcyBKb2huIE1jR2liYm5leSAoQ09ERSBTSUdOSU5HIEtFWSkgPGxld2lz +bWNAYXBhY2hlLm9yZz6JAjgEEwECACIFAk+NSUkCGwMGCwkIBwMCBhUIAgkKCwQW +AgMBAh4BAheAAAoJEPReeXDGAbynxnQP/1s1e1eDUAvZv1k+OVhG+nDhqtBtmFV6 +sx67atpzZCj6ckKXphkiWAFmYsAH7pujHgASuAIoMY7MLjaRuG2MiEdWINYH5LVB +xmZ3M9f1+YBuTSs/0KKBfqVBYm5vbEC+vBkjez54DOJ7OfRQllra98FR5GxEoYhh +bIQDtUtYrLjzd9kbUH5J+cTgSJ08ciIxanscvFRE7+X2sQTopor6f+o7iea7k6KM +b5FJ9mi4Q3RQbkorncyyDp4O7rBsuaGeD2oORdSM1zT5ql3glq7cYUI8havHY696 +jWYLOc951l6fDofGi4ZirX0+Mlxj+d2BNY54rx9dl6pZOmahvD4pveq/vbzwOH9E +vb1uTfRIYLaNW++1nXzPBZ5nzsemDb3K8yVYXnCDrqmzOZMJu5AinvUUusTrRhT/ +4oy2AO1YEIjgwHFzYvv7C7/wYSQC5AxvO0plvyH/kMK/vQk3H7I13isHdyZhEjrR +e+ciNzPWh4R6W8zVbe29MljItmINWniJ/CnYi9/r7ZtkQUBUCmHQZcsCm2DflA83 +ueLozFY3NH2eQ4q9dY8QIJDOpsX1SrP8DUOpuai3PvEiE8stHxGpamFq2DgnS81x +/e/kSbIBD6QGgP1S7Zrkdz4jriCCY4mv9mYMu9De/sObYcpGdg6rE49lz9NWeE8w +Wtt1oexR6DhpuQINBE+NSUkBEADOm92hnYd9ZNSmaVSUegmo0Rx9CMIzRZzHXPXT +SxxMnJScWDKeTWa7U1A0peiNIUKKlgFcnUY176o4wk8y2sNgyYkYO6wQlzmoyQIh +Ft0fqE3LMKBJcW2JONWFVrFZpRPTFvRWnDOSur8IQq3rJkyiqfT5y0E7PAdd8aa3 +l7anp8gfKCf9iIYtgfNsKNphngkwOLNDVsED7G/VRfAezjDKyf0M9HSL0fjQ5YDe +L5MMmgduvYKBtWISM5tqJAunkMpGeWJ6/khJZT+bLK8iLM2073W5uSlNs6oO2AM8 +lDvfmnsFC4178mbU9nJNi+KAXzwZXH4xcqywRKZhuWI5BVPGi50HJ/RIZtDyrkrK +W7NACtmniuFzSy9PxrM2iappUsfY8b7uZBzGoo1BzT7F7VM7sSte+X+zs8TZ0dam +6TbuGMuv5rPQGAwu2JWUNOeBzXvfkg3gzk4qZrBdHtUrQjx33c1NBZddLcoSqzgC +ph2cz4NG4Fs/Mi8SXoKBwJGVeWE+ZCBma8vFP/zctb/XroIaFSE5rAwHydwCB4gu +VB3rNuLCoiiB50lPzAPFjjFxOuZeTZfl4bp1XRE1KKYi+n974At4HDd5g0Az8w37 +5/9G+pARCzjytvIHJTYQDsG0hfnj2Vfb5WWYF6LMib0ZGf739Yp7L602/yE9QAKm +bifPCQARAQABiQIfBBgBAgAJBQJPjUlJAhsMAAoJEPReeXDGAbynzc4P/AomVPfY +bY61TE+QSKAJl8/dyyw+LSddTPFTleVBFHlq1tnQmLWxoNq5t1CRXUJOv3q6haPE +PLKR5pXXtNzAGVP74Jipa5r8FQjBG0j+XriiHmr861xyno0uPG23c0LSRqHrcLi6 +tgN2Q2ihu1Tjaql+ukzPI6u2v97FD0qhJWKvFFo64p7HTNUXHJLQ9N/m1Pien7Nm +KFLRI0Pu0CW95I1w2gAAlS++lIxT3/ANfw6SpK9+lNBaan1g0xM5/P54MIQvZgCQ +gdIcWdAOmXjTyMryconkeNRWpkYjXG4hZj9crP48j3lZPlUYol4pdkQ1CtSq1emv +VDGoUrn5bRWoybOFfx3joOLpUqJA5PDjeN7YMpJNWc3O/lz+S+sW9WZY7vwbK+Mn +E/l4Bz2k9fQDsxm2rPzM2aS/qaBo9v7vj+NE85B2/NE9cXo0WoC8u5o+KEQY6urV +ANW/A0k94wmfoBMbmzNZ5Y5zJ9vceW9d4FE2FXaynRke2awYHBZE2Ty3MSxCQAvp +MREQKzxB1XcR+Frj0nMKMmdEmM55OmIgAqAct1OuGDbOATJMcmVuwHqTZIdynzqh +NPgXHx4ASqesjF/9GUrAQfOmXqHdOF6xOb7YYGssl1kgvOQRVJhkWtmTckyk+xu9 +U3Wt+q9F6O+RmemV6a6mrpog+Aq+BkIMWCJ8 +=xHbT +-END PGP PUBLIC KEY BLOCK- Modified: nutch/branches/branch-1.5/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5/build.xml?rev=1341570r1=1341569r2=1341570view=diff == --- nutch/branches/branch-1.5/build.xml (original) +++ nutch/branches/branch-1.5/build.xml Tue May 22 17:31:35 2012 @@ -168,7 +168,6 @@ packageset dir=${plugins.dir}/parse-swf/src/java/ packageset dir=${plugins.dir}/parse-tika/src/java/ packageset dir=${plugins.dir}/parse-zip/src/java/ - packageset dir=${plugins.dir}/lib-http/src/java/ packageset dir=${plugins.dir}/protocol-file/src/java
svn commit: r1341574 - /nutch/branches/branch-1.5/pom.xml
Author: lewismc Date: Tue May 22 17:55:07 2012 New Revision: 1341574 URL: http://svn.apache.org/viewvc?rev=1341574view=rev Log: commit to bring pom.xml up-tp-date for tag... generated via ant deploy task Modified: nutch/branches/branch-1.5/pom.xml Modified: nutch/branches/branch-1.5/pom.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.5/pom.xml?rev=1341574r1=1341573r2=1341574view=diff == --- nutch/branches/branch-1.5/pom.xml (original) +++ nutch/branches/branch-1.5/pom.xml Tue May 22 17:55:07 2012 @@ -15,34 +15,28 @@ See the License for the specific language governing permissions and limitations under the License. -- -project xmlns=http://maven.apache.org/POM/4.0.0; xmlns:xsi=http://www.w3.org/2001/XMLSchema-instance; xsi:schemaLocation=http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd; +project xmlns=http://maven.apache.org/POM/4.0.0; xmlns:xsi=http://www.w3.org/2001/XMLSchema-instance; +xsi:schemaLocation=http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd; - modelVersion4.0.0/modelVersion - parent - groupIdorg.apache/groupId - artifactIdapache/artifactId - version9/version - relativePath / - /parent - groupIdorg.apache.nutch/groupId - artifactIdnutch/artifactId - packagingjar/packaging - version1.5/version - nameApache Nutch/name - urlhttp://nutch.apache.org/url - licenses - license - nameThe Apache Software License, Version 2.0/name - urlhttp://www.apache.org/licenses/LICENSE-2.0.txt/url - distributionrepo/distribution - /license - /licenses - scm - connectionscm:svn:http://svn.apache.org/repos/asf/nutch/trunk//connection - developerConnectionscm:svn:https://svn.apache.org/repos/asf/nutch/trunk//developerConnection - urlhttp://svn.apache.org/viewvc/nutch/trunk//url - /scm - developers + modelVersion4.0.0/modelVersion + groupIdorg.apache.nutch/groupId + artifactIdnutch/artifactId + packagingjar/packaging + version1.5/version + nameApache Nutch/name + urlhttp://nutch.apache.org/url + licenses + license + nameThe Apache Software License, Version 2.0/name + urlhttp://www.apache.org/licenses/LICENSE-2.0.txt/url + distributionrepo/distribution + /license + /licenses + scm + urlhttp://svn.apache.org/viewvc/nutch/url + connectionhttp://svn.apache.org/viewvc/nutch/connection + /scm + developers developer idab/id nameAndrzej Bialecki/name @@ -57,13 +51,18 @@ idkubes/id nameDennis Kubes/name emailku...@apache.org/email - /developer + /developer developer iddogacan/id - nameDogacan Gâºney/name + nameDogacan Güney/name emaildoga...@apache.org/email /developer developer +idferdy/id +nameFerdy Galema/name +emailfe...@apache.org/email +/developer + developer idjnioche/id nameJulien Nioche/name emailjnio...@apache.org/email @@ -73,228 +72,152 @@ nameSami Siren/name emailsi...@apache.org/email /developer - developer - idmarkus/id - nameMarkus Jelsma/name - emailmar...@apache.org/email - /developer - developer - idalexis/id - nameAlexis Detlegrode/name - emailale...@apache.org/email - /developer - developer - idlewismc/id - nameLewis John McGibbney/name - emaillewi...@apache.org/email - /developer - developer - idferdy/id - nameFerdy Galema/name - emailfe...@apache.org/email - /developer - /developers - build - testSourceDirectory${basedir}/src/test/testSourceDirectory - sourceDirectory${basedir}/src/java/sourceDirectory - testResources - testResource - directorysrc/testresources/directory - /testResource - testResource - directoryconf//directory
svn commit: r1341603 - in /nutch/trunk: CHANGES.txt build.xml
Author: lewismc Date: Tue May 22 20:07:55 2012 New Revision: 1341603 URL: http://svn.apache.org/viewvc?rev=1341603view=rev Log: commit to add new ant targets to build.xml Modified: nutch/trunk/CHANGES.txt nutch/trunk/build.xml Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1341603r1=1341602r2=1341603view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Tue May 22 20:07:55 2012 @@ -1,5 +1,9 @@ Nutch Change Log +(trunk) Current Development: + +* NUTCH-XX Commit to add configuration for separation of ant distribution targets (lewismc + jnioche) + Release 1.5 - 04/15/2012 * NUTCH-1208 Don't include KEYS file in bin distribution (jnioche) Modified: nutch/trunk/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1341603r1=1341602r2=1341603view=diff == --- nutch/trunk/build.xml (original) +++ nutch/trunk/build.xml Tue May 22 20:07:55 2012 @@ -601,52 +601,52 @@ !-- == -- !---- !-- == -- - target name=package depends=runtime, javadoc -mkdir dir=${dist.version.dir}/ -mkdir dir=${dist.version.dir}/lib/ -mkdir dir=${dist.version.dir}/runtime/ -mkdir dir=${dist.version.dir}/docs/ -mkdir dir=${dist.version.dir}/docs/api/ -mkdir dir=${dist.version.dir}/ivy/ + target name=package-src depends=runtime, javadoc +mkdir dir=${dist.version.dir}-src/ +mkdir dir=${dist.version.dir}-src/lib/ +mkdir dir=${dist.version.dir}-src/runtime/ +mkdir dir=${dist.version.dir}-src/docs/ +mkdir dir=${dist.version.dir}-src/docs/api/ +mkdir dir=${dist.version.dir}-src/ivy/ -copy todir=${dist.version.dir}/lib includeEmptyDirs=false +copy todir=${dist.version.dir}-src/lib includeEmptyDirs=false fileset dir=lib/ /copy -copy todir=${dist.version.dir}/runtime +copy todir=${dist.version.dir}-src/runtime fileset dir=runtime/ /copy chmod perm=ugo+x type=file -fileset dir=${dist.version.dir}/runtime/deploy/bin/ -fileset dir=${dist.version.dir}/runtime/local/bin/ +fileset dir=${dist.version.dir}-src/runtime/deploy/bin/ +fileset dir=${dist.version.dir}-src/runtime/local/bin/ /chmod -copy todir=${dist.version.dir}/conf +copy todir=${dist.version.dir}-src/conf fileset dir=${conf.dir} excludes=**/*.template/ /copy -copy todir=${dist.version.dir}/docs/api +copy todir=${dist.version.dir}-src/docs/api fileset dir=${build.javadoc}/ /copy -copy todir=${dist.version.dir} +copy todir=${dist.version.dir}-src fileset dir=. include name=*.txt / !--include name=KEYS /-- /fileset /copy -copy todir=${dist.version.dir}/src includeEmptyDirs=true +copy todir=${dist.version.dir}-src/src includeEmptyDirs=true fileset dir=src/ /copy -copy todir=${dist.version.dir}/ivy includeEmptyDirs=true +copy todir=${dist.version.dir}-src/ivy includeEmptyDirs=true fileset dir=ivy/ /copy -copy todir=${dist.version.dir}/ file=build.xml/ -copy todir=${dist.version.dir}/ file=default.properties/ +copy todir=${dist.version.dir}-src/ file=build.xml/ +copy todir=${dist.version.dir}-src/ file=default.properties/ /target @@ -696,7 +696,23 @@ !-- == -- target name=tar depends=package tar compression=gzip longfile=gnu - destfile=${dist.dir}/${final.name}.tar.gz + destfile=${dist.dir}/${final.name}-src.tar.gz + tarfileset dir=${dist.dir} mode=664 + exclude name=${final.name}/bin/* / +include name=${final.name}/** / + /tarfileset + tarfileset dir=${dist.dir} mode=755 +include name=${final.name}/bin/* / + /tarfileset +/tar + /target + + !-- == -- + !-- Make bin release tarball -- + !-- == -- + target name=tar-bin depends=package +tar compression=gzip longfile=gnu + destfile=${dist.dir}/${final.name}-bin.tar.gz tarfileset dir=${dist.dir} mode=664 exclude name=${final.name}/bin/* / include name=${final.name}/** / @@ -710,8 +726,23 @@ !-- == -- !-- Make release zip -- !-- == -- - target name=zip depends=package - zip compress
svn commit: r1341609 - in /nutch/branches/nutchgora: CHANGES.txt build.xml conf/nutch-default.xml default.properties
Author: lewismc Date: Tue May 22 20:18:22 2012 New Revision: 1341609 URL: http://svn.apache.org/viewvc?rev=1341609view=rev Log: final commit before rolling 2.0RC Modified: nutch/branches/nutchgora/CHANGES.txt nutch/branches/nutchgora/build.xml nutch/branches/nutchgora/conf/nutch-default.xml nutch/branches/nutchgora/default.properties Modified: nutch/branches/nutchgora/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1341609r1=1341608r2=1341609view=diff == --- nutch/branches/nutchgora/CHANGES.txt (original) +++ nutch/branches/nutchgora/CHANGES.txt Tue May 22 20:18:22 2012 @@ -1,6 +1,8 @@ Nutch Change Log -Release nutchgora - Current Development +Release 2.1 (22/02/2012) + +* NUTCH-XX Commit to add configuration for separation of ant distribution targets (lewismc + jnioche) * NUTCH-1364 Add a counter for malformed urls (Jason Trost via lewismc) Modified: nutch/branches/nutchgora/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1341609r1=1341608r2=1341609view=diff == --- nutch/branches/nutchgora/build.xml (original) +++ nutch/branches/nutchgora/build.xml Tue May 22 20:18:22 2012 @@ -600,59 +600,59 @@ /target !-- == -- - !-- D I S T R I B U T I O N -- - !-- == -- - !-- -- - !-- == -- - target name=package depends=runtime, javadoc - mkdir dir=${dist.version.dir} / - mkdir dir=${dist.version.dir}/lib / - mkdir dir=${dist.version.dir}/runtime / - mkdir dir=${dist.version.dir}/docs / - mkdir dir=${dist.version.dir}/docs/api / - mkdir dir=${dist.version.dir}/ivy / - - copy todir=${dist.version.dir}/lib includeEmptyDirs=false - fileset dir=lib / - /copy - - copy todir=${dist.version.dir}/runtime - fileset dir=runtime / - /copy - - chmod perm=ugo+x type=file - fileset dir=${dist.version.dir}/runtime/deploy/bin / - fileset dir=${dist.version.dir}/runtime/local/bin / - /chmod - - copy todir=${dist.version.dir}/conf - fileset dir=${conf.dir} excludes=**/*.template / - /copy - - copy todir=${dist.version.dir}/docs/api - fileset dir=${build.javadoc} / - /copy - - copy todir=${dist.version.dir} - fileset dir=. -include name=*.txt / -include name=KEYS / - /fileset - /copy + !-- D I S T R I B U T I O N-- + !-- == -- + !---- + !-- == -- + target name=package-src depends=runtime, javadoc +mkdir dir=${dist.version.dir}-src/ +mkdir dir=${dist.version.dir}-src/lib/ +mkdir dir=${dist.version.dir}-src/runtime/ +mkdir dir=${dist.version.dir}-src/docs/ +mkdir dir=${dist.version.dir}-src/docs/api/ +mkdir dir=${dist.version.dir}-src/ivy/ - copy todir=${dist.version.dir}/src includeEmptyDirs=true - fileset dir=src / - /copy +copy todir=${dist.version.dir}-src/lib includeEmptyDirs=false + fileset dir=lib/ +/copy + +copy todir=${dist.version.dir}-src/runtime + fileset dir=runtime/ +/copy - copy todir=${dist.version.dir}/ivy includeEmptyDirs=true - fileset dir=ivy / - /copy +chmod perm=ugo+x type=file +fileset dir=${dist.version.dir}-src/runtime/deploy/bin/ +fileset dir=${dist.version.dir}-src/runtime/local/bin/ +/chmod - copy todir=${dist.version.dir}/ file=build.xml / - copy todir=${dist.version.dir}/ file=default.properties / +copy todir=${dist.version.dir}-src/conf + fileset dir=${conf.dir} excludes=**/*.template/ +/copy + +copy todir=${dist.version.dir}-src/docs/api + fileset dir=${build.javadoc}/ +/copy + +copy todir=${dist.version.dir}-src + fileset dir=. +include name=*.txt / +!--include name=KEYS /-- + /fileset +/copy + +copy todir=${dist.version.dir}-src/src includeEmptyDirs=true + fileset dir=src/ +/copy + +copy todir=${dist.version.dir}-src/ivy includeEmptyDirs=true + fileset dir=ivy/ +/copy + +copy todir=${dist.version.dir}-src/ file=build.xml/ +copy todir=${dist.version.dir}-src/ file=default.properties/ + + /target - /target - target name=package-bin depends=runtime, javadoc mkdir dir=${dist.version.dir}-bin/ mkdir dir=${dist.version.dir}-bin/lib/ @@ -694,36 +694,67 @@ /target - !-- == -- - !-- Make release tarball -- - !-- == -- - target name=tar depends=package
svn commit: r1341100 - in /nutch/branches/nutchgora: ./ conf/ src/java/org/apache/nutch/metadata/ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/ src/plugin/protocol-http/src/java/org
Author: lewismc Date: Mon May 21 16:40:32 2012 New Revision: 1341100 URL: http://svn.apache.org/viewvc?rev=1341100view=rev Log: commit to address NUTCH-1360 and update to CHANGES.txt Modified: nutch/branches/nutchgora/CHANGES.txt nutch/branches/nutchgora/conf/nutch-default.xml nutch/branches/nutchgora/src/java/org/apache/nutch/metadata/HttpHeaders.java nutch/branches/nutchgora/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java nutch/branches/nutchgora/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java nutch/branches/nutchgora/src/plugin/protocol-sftp/src/java/org/apache/nutch/protocol/sftp/Sftp.java Modified: nutch/branches/nutchgora/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1341100r1=1341099r2=1341100view=diff == --- nutch/branches/nutchgora/CHANGES.txt (original) +++ nutch/branches/nutchgora/CHANGES.txt Mon May 21 16:40:32 2012 @@ -1,6 +1,9 @@ Nutch Change Log Release nutchgora - Current Development + +* NUTCH-1360 Support the storing of IP address connected to when web crawling (lewismc) + * NUTCH-1366 speed up indexing by eliminating the indexreducer (ferdy) * NUTCH-1362 Fix error handling of urls with empty fields (lewis, ferdy) Modified: nutch/branches/nutchgora/conf/nutch-default.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/nutch-default.xml?rev=1341100r1=1341099r2=1341100view=diff == --- nutch/branches/nutchgora/conf/nutch-default.xml (original) +++ nutch/branches/nutchgora/conf/nutch-default.xml Mon May 21 16:40:32 2012 @@ -257,6 +257,13 @@ /description /property +property + namehttp.store.ip.address/name + valuefalse/value + descriptionEnables us to capture the specific IP address of the + host which we connect to to fetch a page./description +/property + !-- FTP properties -- property Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/metadata/HttpHeaders.java URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/metadata/HttpHeaders.java?rev=1341100r1=1341099r2=1341100view=diff == --- nutch/branches/nutchgora/src/java/org/apache/nutch/metadata/HttpHeaders.java (original) +++ nutch/branches/nutchgora/src/java/org/apache/nutch/metadata/HttpHeaders.java Mon May 21 16:40:32 2012 @@ -46,5 +46,7 @@ public interface HttpHeaders { public final static String LAST_MODIFIED = Last-Modified; public final static String LOCATION = Location; + + public final static String IP_ADDRESS = _ip; } Modified: nutch/branches/nutchgora/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java?rev=1341100r1=1341099r2=1341100view=diff == --- nutch/branches/nutchgora/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java (original) +++ nutch/branches/nutchgora/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java Mon May 21 16:40:32 2012 @@ -68,8 +68,8 @@ public abstract class HttpBase implement /** The Nutch 'User-Agent' request header */ protected String userAgent = getAgentString( NutchCVS, null, Nutch, - http://lucene.apache.org/nutch/bot.html;, - nutch-ag...@lucene.apache.org); + http://nutch.apache.org/bot.html;, + ag...@nutch.apache.org); /** The Accept-Language request header value. */ @@ -77,6 +77,9 @@ public abstract class HttpBase implement /** The Accept request header value. */ protected String accept = text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8; + + /** The _ip request header value. */ + protected boolean ip_header = false; /** The default logger */ private final static Logger LOGGER = LoggerFactory.getLogger(HttpBase.class); @@ -117,6 +120,7 @@ public abstract class HttpBase implement .get(http.agent.description), conf.get(http.agent.url), conf.get(http.agent.email)); this.acceptLanguage = conf.get(http.accept.language, acceptLanguage); this.accept = conf.get(http.accept, accept); +this.ip_header = conf.getBoolean(http.store.ip.address, false); this.mimeTypes = new MimeUtil(conf); this.useHttp11 = conf.getBoolean(http.useHttp11, false); this.robots.setConf(conf); @@ -246,6 +250,10 @@ public abstract class HttpBase implement public boolean getUseHttp11() { return useHttp11; } + + public boolean getIP_Header(){ + return ip_header; + } private static String getAgentString(String agentName, String agentVersion
svn commit: r1341128 - /nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java
Author: lewismc Date: Mon May 21 17:46:15 2012 New Revision: 1341128 URL: http://svn.apache.org/viewvc?rev=1341128view=rev Log: trivial commit to make logging configuration for NUTCH-1361 consistent with trunk Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java?rev=1341128r1=1341127r2=1341128view=diff == --- nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java (original) +++ nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java Mon May 21 17:46:15 2012 @@ -62,10 +62,15 @@ extends GoraMapperString, WebPage, Sele if (filter filters.filter(url) == null) return; } catch (URLFilterException e) { - GeneratorJob.LOG.warn(Couldn't filter url: + url + ( + e.getMessage() + )); - return; + if (GeneratorJob.LOG.isWarnEnabled()) { +GeneratorJob.LOG.warn(Couldn't filter url: + url + ( + e.getMessage() + )); +return; + } } catch (MalformedURLException e) { - GeneratorJob.LOG.warn(Couldn't filter url: + url + ( + e.getMessage() +)); + if (GeneratorJob.LOG.isWarnEnabled()) { +GeneratorJob.LOG.warn(Couldn't filter url: + url + ( + e.getMessage() +)); +return; + } } // check fetch schedule
svn commit: r1341137 - in /nutch/branches/nutchgora: CHANGES.txt src/java/org/apache/nutch/crawl/GeneratorReducer.java
Author: lewismc Date: Mon May 21 18:25:09 2012 New Revision: 1341137 URL: http://svn.apache.org/viewvc?rev=1341137view=rev Log: commit to address NUTCH-1364 and update to CHANGES.txt Modified: nutch/branches/nutchgora/CHANGES.txt nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorReducer.java Modified: nutch/branches/nutchgora/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1341137r1=1341136r2=1341137view=diff == --- nutch/branches/nutchgora/CHANGES.txt (original) +++ nutch/branches/nutchgora/CHANGES.txt Mon May 21 18:25:09 2012 @@ -2,6 +2,8 @@ Nutch Change Log Release nutchgora - Current Development +* NUTCH-1364 Add a counter for malformed urls (Jason Trost via lewismc) + * NUTCH-1361 Fix mishandling of malformed urls in generator job (Jason Trost via lewismc) * NUTCH-1360 Support the storing of IP address connected to when web crawling (lewismc) Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorReducer.java URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorReducer.java?rev=1341137r1=1341136r2=1341137view=diff == --- nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorReducer.java (original) +++ nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorReducer.java Mon May 21 18:25:09 2012 @@ -77,6 +77,7 @@ extends GoraReducerSelectorEntry, WebPa try { context.write(TableUtil.reverseUrl(key.url), page); } catch (MalformedURLException e) { + context.getCounter(Generator, MALFORMED_URL).increment(1); continue; } context.getCounter(Generator, GENERATE_MARK).increment(1);
svn commit: r1340546 - in /nutch/trunk: ./ src/java/org/apache/nutch/indexer/ src/plugin/creativecommons/src/web/ src/plugin/protocol-httpclient/src/test/conf/
Author: lewismc Date: Sat May 19 19:27:09 2012 New Revision: 1340546 URL: http://svn.apache.org/viewvc?rev=1340546view=rev Log: trivial commit to address issues with RC Modified: nutch/trunk/NOTICE.txt nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java nutch/trunk/src/plugin/creativecommons/src/web/web.xml nutch/trunk/src/plugin/protocol-httpclient/src/test/conf/httpclient-auth-test.xml nutch/trunk/src/plugin/protocol-httpclient/src/test/conf/nutch-site-test.xml Modified: nutch/trunk/NOTICE.txt URL: http://svn.apache.org/viewvc/nutch/trunk/NOTICE.txt?rev=1340546r1=1340545r2=1340546view=diff == --- nutch/trunk/NOTICE.txt (original) +++ nutch/trunk/NOTICE.txt Sat May 19 19:27:09 2012 @@ -1,5 +1,5 @@ Apache Nutch -Copyright 2009 The Apache Software Foundation +Copyright 2012 The Apache Software Foundation This product includes software developed by The Apache Software Foundation (http://www.apache.org/). Modified: nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java?rev=1340546r1=1340545r2=1340546view=diff == --- nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java (original) +++ nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java Sat May 19 19:27:09 2012 @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the License); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.nutch.indexer; import java.util.Arrays; Modified: nutch/trunk/src/plugin/creativecommons/src/web/web.xml URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/creativecommons/src/web/web.xml?rev=1340546r1=1340545r2=1340546view=diff == --- nutch/trunk/src/plugin/creativecommons/src/web/web.xml (original) +++ nutch/trunk/src/plugin/creativecommons/src/web/web.xml Sat May 19 19:27:09 2012 @@ -1,4 +1,20 @@ ?xml version=1.0 encoding=ISO-8859-1? +!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the License); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an AS IS BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +-- !DOCTYPE web-app PUBLIC -//Sun Microsystems, Inc.//DTD Web Application 2.3//EN Modified: nutch/trunk/src/plugin/protocol-httpclient/src/test/conf/httpclient-auth-test.xml URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-httpclient/src/test/conf/httpclient-auth-test.xml?rev=1340546r1=1340545r2=1340546view=diff == --- nutch/trunk/src/plugin/protocol-httpclient/src/test/conf/httpclient-auth-test.xml (original) +++ nutch/trunk/src/plugin/protocol-httpclient/src/test/conf/httpclient-auth-test.xml Sat May 19 19:27:09 2012 @@ -1,4 +1,20 @@ ?xml version=1.0? +!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the License); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an AS IS BASIS
svn commit: r1340553 - /nutch/branches/branch-1.5/
Author: lewismc Date: Sat May 19 20:10:33 2012 New Revision: 1340553 URL: http://svn.apache.org/viewvc?rev=1340553view=rev Log: Committing a branch of trunk for the 1.5RC2 Added: nutch/branches/branch-1.5/ - copied from r1340552, nutch/trunk/
svn commit: r1340558 - /nutch/trunk/build.xml
Author: lewismc Date: Sat May 19 20:35:36 2012 New Revision: 1340558 URL: http://svn.apache.org/viewvc?rev=1340558view=rev Log: trivial commit to improve Javadoc Modified: nutch/trunk/build.xml Modified: nutch/trunk/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1340558r1=1340557r2=1340558view=diff == --- nutch/trunk/build.xml (original) +++ nutch/trunk/build.xml Sat May 19 20:35:36 2012 @@ -149,26 +149,46 @@ arg value=${javadoc.proxy.port}/ packageset dir=${src.dir}/ + packageset dir=${plugins.dir}/creativecommons/src/java/ + packageset dir=${plugins.dir}/feed/src/java/ + packageset dir=${plugins.dir}/headings/src/java/ + packageset dir=${plugins.dir}/index-anchor/src/java/ + packageset dir=${plugins.dir}/index-basic/src/java/ + packageset dir=${plugins.dir}/index-metadata/src/java/ + packageset dir=${plugins.dir}/index-more/src/java/ + packageset dir=${plugins.dir}/index-static/src/java/ + packageset dir=${plugins.dir}/language-identifier/src/java/ packageset dir=${plugins.dir}/lib-http/src/java/ packageset dir=${plugins.dir}/lib-regex-filter/src/java/ packageset dir=${plugins.dir}/microformats-reltag/src/java/ - packageset dir=${plugins.dir}/protocol-file/src/java/ - packageset dir=${plugins.dir}/protocol-ftp/src/java/ - packageset dir=${plugins.dir}/protocol-http/src/java/ - packageset dir=${plugins.dir}/protocol-httpclient/src/java/ - packageset dir=${plugins.dir}/parse-tika/src/java/ packageset dir=${plugins.dir}/parse-ext/src/java/ + packageset dir=${plugins.dir}/parse-html/src/java/ packageset dir=${plugins.dir}/parse-js/src/java/ + packageset dir=${plugins.dir}/parse-metatags/src/java/ packageset dir=${plugins.dir}/parse-swf/src/java/ + packageset dir=${plugins.dir}/parse-tika/src/java/ packageset dir=${plugins.dir}/parse-zip/src/java/ - packageset dir=${plugins.dir}/index-basic/src/java/ - packageset dir=${plugins.dir}/index-more/src/java/ + packageset dir=${plugins.dir}/lib-http/src/java/ + packageset dir=${plugins.dir}/protocol-file/src/java/ + packageset dir=${plugins.dir}/protocol-ftp/src/java/ + packageset dir=${plugins.dir}/protocol-http/src/java/ + packageset dir=${plugins.dir}/protocol-httpclient/src/java/ + packageset dir=${plugins.dir}/scoring-link/src/java/ packageset dir=${plugins.dir}/scoring-opic/src/java/ + packageset dir=${plugins.dir}/subcollection/src/java/ + packageset dir=${plugins.dir}/tld/src/java/ packageset dir=${plugins.dir}/urlfilter-automaton/src/java/ + packageset dir=${plugins.dir}/urlfilter-domain/src/java/ + packageset dir=${plugins.dir}/urlfilter-domainblacklist/src/java/ + packageset dir=${plugins.dir}/urlfilter-prefix/src/java/ packageset dir=${plugins.dir}/urlfilter-regex/src/java/ packageset dir=${plugins.dir}/urlfilter-prefix/src/java/ - packageset dir=${plugins.dir}/creativecommons/src/java/ - packageset dir=${plugins.dir}/language-identifier/src/java/ + packageset dir=${plugins.dir}/urlfilter-suffix/src/java/ + packageset dir=${plugins.dir}/urlfilter-validator/src/java/ + packageset dir=${plugins.dir}/urlmeta/src/java/ + packageset dir=${plugins.dir}/urlnormalizer-basic/src/java/ + packageset dir=${plugins.dir}/urlnormalizer-pass/src/java/ + packageset dir=${plugins.dir}/urlnormalizer-regex/src/java/ link href=${javadoc.link.java}/ link href=${javadoc.link.lucene}/ @@ -507,26 +527,46 @@ arg value=${javadoc.proxy.port}/ packageset dir=${src.dir}/ + packageset dir=${plugins.dir}/creativecommons/src/java/ + packageset dir=${plugins.dir}/feed/src/java/ + packageset dir=${plugins.dir}/headings/src/java/ + packageset dir=${plugins.dir}/index-anchor/src/java/ + packageset dir=${plugins.dir}/index-basic/src/java/ + packageset dir=${plugins.dir}/index-metadata/src/java/ + packageset dir=${plugins.dir}/index-more/src/java/ + packageset dir=${plugins.dir}/index-static/src/java/ + packageset dir=${plugins.dir}/language-identifier/src/java/ packageset dir=${plugins.dir}/lib-http/src/java/ packageset dir=${plugins.dir}/lib-regex-filter/src/java/ packageset dir=${plugins.dir}/microformats-reltag/src/java/ - packageset dir=${plugins.dir}/protocol-file/src/java/ - packageset dir=${plugins.dir}/protocol-ftp/src/java/ - packageset dir=${plugins.dir}/protocol-http/src/java/ - packageset dir=${plugins.dir}/protocol-httpclient/src/java/ - packageset dir=${plugins.dir}/parse-tika/src/java/ packageset dir=${plugins.dir}/parse-ext/src/java/ + packageset dir=${plugins.dir}/parse-html/src/java/ packageset dir=${plugins.dir}/parse-js/src/java
svn commit: r1302134 - in /nutch/trunk: build.xml default.properties src/java/overview.html
Author: lewismc Date: Sun Mar 18 15:02:50 2012 New Revision: 1302134 URL: http://svn.apache.org/viewvc?rev=1302134view=rev Log: incremental commit to update Javadocs Modified: nutch/trunk/build.xml nutch/trunk/default.properties nutch/trunk/src/java/overview.html Modified: nutch/trunk/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1302134r1=1302133r2=1302134view=diff == --- nutch/trunk/build.xml (original) +++ nutch/trunk/build.xml Sun Mar 18 15:02:50 2012 @@ -15,7 +15,7 @@ See the License for the specific language governing permissions and limitations under the License. -- -project name=Nutch default=runtime xmlns:ivy=antlib:org.apache.ivy.ant xmlns:artifact=antlib:org.apache.maven.artifact.ant +project name=${name} default=runtime xmlns:ivy=antlib:org.apache.ivy.ant xmlns:artifact=antlib:org.apache.maven.artifact.ant !-- Load all the default properties, and any the user wants-- !-- to contribute (without having to type -D or edit this file -- @@ -125,7 +125,7 @@ !-- == -- !---- !-- == -- - target name=release depends=compile-core description=-- generate the release distribution + target name=release depends=compile-core description=generate the release distribution copy file=${conf.dir}/nutch-default.xml todir=${build.classes}/ copy file=${conf.dir}/nutch-site.xml @@ -141,8 +141,8 @@ author=true version=true use=true - windowtitle=${Name} ${version} API - doctitle=${Name} ${version} API + windowtitle=${name} ${version} API + doctitle=${name} ${version} API bottom=Copyright amp;copy; ${year} The Apache Software Foundation arg value=${javadoc.proxy.host}/ @@ -205,7 +205,7 @@ !-- == -- !---- !-- == -- - target name=deploy depends=release description=-- deploy to Apache Nexus + target name=deploy depends=release description=deploy to Apache Nexus !-- generate a pom file -- ivy:makepom ivyfile=${ivy.file} pomfile=${basedir}/pom.xml templatefile=ivy/mvn.template @@ -460,7 +460,7 @@ /target !-- target: ivy-download -- - target name=ivy-download description=-- download ivy + target name=ivy-download description=Download ivy available file=${ivy.jar} property=ivy.jar.found/ antcall target=-ivy-download-unchecked/ /target @@ -499,8 +499,8 @@ author=true version=true use=true - windowtitle=${Name} ${version} API - doctitle=${Name} ${version} API + windowtitle=${name} ${version} API + doctitle=${name} ${version} API bottom=Copyright amp;copy; ${year} The Apache Software Foundation arg value=${javadoc.proxy.host}/ @@ -673,7 +673,7 @@ !-- target: clean-cache = -- target name=clean-cache depends= -description=-- delete ivy cache +description=delete ivy cache ivy:cleancache / /target @@ -693,7 +693,7 @@ /target target name=rat-sources depends=rat-sources-typedef - description=-- runs the tasks over src/java + description=runs the tasks over src/java rat:report xmlns:rat=antlib:org.apache.rat.anttasks fileset dir=src include name=java/**/*/ Modified: nutch/trunk/default.properties URL: http://svn.apache.org/viewvc/nutch/trunk/default.properties?rev=1302134r1=1302133r2=1302134view=diff == --- nutch/trunk/default.properties (original) +++ nutch/trunk/default.properties Sun Mar 18 15:02:50 2012 @@ -1,7 +1,7 @@ name=nutch version=1.5-SNAPSHOT final.name=${name}-${version} -year=2011 +year=2012 basedir = ./ src.dir = ./src/java @@ -78,31 +78,59 @@ plugins.protocol=\ # plugins.urlfilter=\ org.apache.nutch.urlfilter.automaton*:\ + org.apache.nutch.urlfilter.domain*:\ org.apache.nutch.urlfilter.prefix*:\ - org.apache.nutch.urlfilter.regex* + org.apache.nutch.urlfilter.regex*\ + org.apache.nutch.urlfilter.suffix*:\ + org.apache.nutch.urlfilter.validator* + +# +# URL Normalizer Plugins +# +plugins.urlfilter=\ + org.apache.nutch.net.urlnormalizer.basic*:\ + org.apache.nutch.net.urlnormalizer.pass*:\ + org.apache.nutch.net.urlnormalizer.regex* # # Scoring Plugins # plugins.scoring=\ - org.apache.nutch.scoring.opic* - + org.apache.nutch.scoring.link
svn commit: r1302136 - in /nutch/branches/nutchgora: default.properties src/java/overview.html
Author: lewismc Date: Sun Mar 18 15:03:43 2012 New Revision: 1302136 URL: http://svn.apache.org/viewvc?rev=1302136view=rev Log: incremental update to Javadoc Modified: nutch/branches/nutchgora/default.properties nutch/branches/nutchgora/src/java/overview.html Modified: nutch/branches/nutchgora/default.properties URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/default.properties?rev=1302136r1=1302135r2=1302136view=diff == --- nutch/branches/nutchgora/default.properties (original) +++ nutch/branches/nutchgora/default.properties Sun Mar 18 15:03:43 2012 @@ -112,7 +112,7 @@ plugins.parse=\ org.apache.nutch.parse.swf*:\ org.apache.nutch.parse.tika:\ org.apache.nutch.parse.zip - + # # Indexing Filter Plugins # Modified: nutch/branches/nutchgora/src/java/overview.html URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/overview.html?rev=1302136r1=1302135r2=1302136view=diff == --- nutch/branches/nutchgora/src/java/overview.html (original) +++ nutch/branches/nutchgora/src/java/overview.html Sun Mar 18 15:03:43 2012 @@ -1,9 +1,10 @@ html head - titleNutch/title + titleApache Nutch/title /head body -Nutch is the open-source search engine.p +pApache Nutch is an open source web-search software project. /p +pNutch is a project of the Apache Software Foundation and is part of the larger Apache community of developers and users./p /body /html
svn commit: r1302138 - /nutch/trunk/default.properties
Author: lewismc Date: Sun Mar 18 15:08:41 2012 New Revision: 1302138 URL: http://svn.apache.org/viewvc?rev=1302138view=rev Log: incremental update to Javadoc Modified: nutch/trunk/default.properties Modified: nutch/trunk/default.properties URL: http://svn.apache.org/viewvc/nutch/trunk/default.properties?rev=1302138r1=1302137r2=1302138view=diff == --- nutch/trunk/default.properties (original) +++ nutch/trunk/default.properties Sun Mar 18 15:08:41 2012 @@ -79,6 +79,7 @@ plugins.protocol=\ plugins.urlfilter=\ org.apache.nutch.urlfilter.automaton*:\ org.apache.nutch.urlfilter.domain*:\ + org.apache.nutch.urlfilter.domainblacklist*:\ org.apache.nutch.urlfilter.prefix*:\ org.apache.nutch.urlfilter.regex*\ org.apache.nutch.urlfilter.suffix*:\
svn commit: r1302161 - in /nutch/trunk/src: java/org/apache/nutch/crawl/ java/org/apache/nutch/parse/ java/org/apache/nutch/plugin/ java/org/apache/nutch/protocol/ java/org/apache/nutch/segment/ java/
Author: lewismc Date: Sun Mar 18 16:46:33 2012 New Revision: 1302161 URL: http://svn.apache.org/viewvc?rev=1302161view=rev Log: commit to address NUTCH-1273 Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java nutch/trunk/src/java/org/apache/nutch/protocol/Content.java nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java?rev=1302161r1=1302160r2=1302161view=diff == --- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java (original) +++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java Sun Mar 18 16:46:33 2012 @@ -262,7 +262,7 @@ public class CrawlDatum implements Writa if (version 3) { boolean hasMetadata = false; if (version 7) { -MapWritable oldMetaData = new MapWritable(); +org.apache.hadoop.io.MapWritable oldMetaData = new org.apache.hadoop.io.MapWritable(); if (in.readBoolean()) { hasMetadata = true; metaData = new org.apache.hadoop.io.MapWritable(); Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java?rev=1302161r1=1302160r2=1302161view=diff == --- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java (original) +++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java Sun Mar 18 16:46:33 2012 @@ -19,6 +19,7 @@ package org.apache.nutch.crawl; import java.io.DataOutputStream; import java.io.IOException; +import java.io.Closeable; import java.net.URL; import java.util.Date; import java.util.Iterator; @@ -35,7 +36,6 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.Closeable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.MapFile; Modified: nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java?rev=1302161r1=1302160r2=1302161view=diff == --- nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java (original) +++ nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java Sun Mar 18 16:46:33 2012 @@ -37,6 +37,7 @@ import org.apache.nutch.util.TimingUtil; import java.text.SimpleDateFormat; import java.util.Iterator; +import java.io.Closeable; /** . */ public class LinkDbReader extends Configured implements Tool, Closeable { Modified: nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java?rev=1302161r1=1302160r2=1302161view=diff == --- nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java (original) +++ nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java Sun Mar 18 16:46:33 2012 @@ -31,12 +31,12 @@ public class NutchWritable extends Gener org.apache.hadoop.io.BytesWritable.class, org.apache.hadoop.io.FloatWritable.class, org.apache.hadoop.io.IntWritable.class, + org.apache.hadoop.io.MapWritable.class, org.apache.hadoop.io.Text.class, org.apache.hadoop.io.MD5Hash.class, org.apache.nutch.crawl.CrawlDatum.class, org.apache.nutch.crawl.Inlink.class, org.apache.nutch.crawl.Inlinks.class, - org.apache.nutch.crawl.MapWritable.class, org.apache.nutch.fetcher.FetcherOutput.class, org.apache.nutch.metadata.Metadata.class, org.apache.nutch.parse.Outlink.class, Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java?rev=1302161r1=1302160r2=1302161view=diff == --- nutch/trunk/src/java/org/apache
svn commit: r1302172 - in /nutch/branches/nutchgora/src: java/org/apache/nutch/plugin/ java/org/apache/nutch/protocol/ plugin/protocol-file/src/java/org/apache/nutch/protocol/file/
Author: lewismc Date: Sun Mar 18 17:23:13 2012 New Revision: 1302172 URL: http://svn.apache.org/viewvc?rev=1302172view=rev Log: rollback to -r1302136 Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginDescriptor.java nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginManifestParser.java nutch/branches/nutchgora/src/java/org/apache/nutch/protocol/Content.java nutch/branches/nutchgora/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginDescriptor.java URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginDescriptor.java?rev=1302172r1=1302171r2=1302172view=diff == --- nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginDescriptor.java (original) +++ nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginDescriptor.java Sun Mar 18 17:23:13 2012 @@ -20,7 +20,6 @@ import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; -import java.net.URI; import java.util.ArrayList; import java.util.HashMap; import java.util.Locale; @@ -215,16 +214,12 @@ public class PluginDescriptor { /** * Adds a exported library with a relative path to the plugin directory. - * We wish to automatically escape characters that are illegal in URLs. - * It is recommended that new code convert an abstract pathname into a URL - * by first converting it into a URI, via the toURI method, and then - * converting the URI into a URL via the URI.toURL method * * @param pLibPath */ public void addExportedLibRelative(String pLibPath) throws MalformedURLException { -URL url = new File(getPluginPath() + File.separator + pLibPath).toURI().toURL(); +URL url = new File(getPluginPath() + File.separator + pLibPath).toURL(); fExportedLibs.add(url); } @@ -247,17 +242,13 @@ public class PluginDescriptor { } /** - * Adds a exported library with a relative path to the plugin directory. - * We wish to automatically escape characters that are illegal in URLs. - * It is recommended that new code convert an abstract pathname into a URL - * by first converting it into a URI, via the toURI method, and then - * converting the URI into a URL via the URI.toURL method + * Adds a not exported library with a plugin directory relative path. * * @param pLibPath */ public void addNotExportedLibRelative(String pLibPath) throws MalformedURLException { -URL url = new File(getPluginPath() + File.separator + pLibPath).toURI().toURL(); +URL url = new File(getPluginPath() + File.separator + pLibPath).toURL(); fNotExportedLibs.add(url); } @@ -288,7 +279,7 @@ public class PluginDescriptor { try { for (File file2 : file.listFiles()) { if (file2.getAbsolutePath().endsWith(properties)) - arrayList.add(file2.getParentFile().toURI().toURL()); + arrayList.add(file2.getParentFile().toURL()); } } catch (MalformedURLException e) { LOG.debug(getPluginId() + + e.toString()); Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginManifestParser.java URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginManifestParser.java?rev=1302172r1=1302171r2=1302172view=diff == --- nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginManifestParser.java (original) +++ nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginManifestParser.java Sun Mar 18 17:23:13 2012 @@ -21,7 +21,6 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; -import java.net.URI; import java.net.URLDecoder; import java.util.HashMap; import java.util.Map; @@ -148,7 +147,7 @@ public class PluginManifestParser { private PluginDescriptor parseManifestFile(String pManifestPath) throws MalformedURLException, SAXException, IOException, ParserConfigurationException { -Document document = parseXML(new File(pManifestPath).toURI().toURL()); +Document document = parseXML(new File(pManifestPath).toURL()); String pPath = new File(pManifestPath).getParent(); return parsePlugin(document, pPath); } Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/protocol/Content.java URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/protocol/Content.java?rev=1302172r1=1302171r2=1302172view=diff == --- nutch/branches/nutchgora/src/java/org/apache/nutch/protocol/Content.java (original) +++ nutch/branches/nutchgora
svn commit: r1298437 - in /nutch/branches/nutchgora: CHANGES.txt build.xml
Author: lewismc Date: Thu Mar 8 15:47:37 2012 New Revision: 1298437 URL: http://svn.apache.org/viewvc?rev=1298437view=rev Log: commit to address NUTCH-1307 and update to CHANGES.txt Modified: nutch/branches/nutchgora/CHANGES.txt nutch/branches/nutchgora/build.xml Modified: nutch/branches/nutchgora/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1298437r1=1298436r2=1298437view=diff == --- nutch/branches/nutchgora/CHANGES.txt (original) +++ nutch/branches/nutchgora/CHANGES.txt Thu Mar 8 15:47:37 2012 @@ -2,6 +2,8 @@ Nutch Change Log Release nutchgora - Current Development +* NUTCH-1307 Improve formatting of ant targets for clearer project help (lewismc) + * NUTCH-1302 nutchgora job failures should be noticed by submitter (ferdy) * NUTCH-1298 Pass numTasks to FetcherJob (Dan Rosher via ferdy) Modified: nutch/branches/nutchgora/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1298437r1=1298436r2=1298437view=diff == --- nutch/branches/nutchgora/build.xml (original) +++ nutch/branches/nutchgora/build.xml Thu Mar 8 15:47:37 2012 @@ -124,7 +124,7 @@ !-- -- !-- == -- target name=release depends=compile-core - description=generate the release distribution + description=-- generate the release distribution copy file=${conf.dir}/nutch-default.xml todir=${build.classes} / copy file=${conf.dir}/nutch-site.xml todir=${build.classes} / @@ -201,7 +201,7 @@ !-- == -- !-- -- !-- == -- - target name=deploy depends=release description=deploy to Apache Nexus + target name=deploy depends=release description=-- deploy to Apache Nexus !-- generate a pom file -- ivy:makepom ivyfile=${ivy.file} pomfile=${basedir}/pom.xml @@ -459,7 +459,7 @@ /target !-- target: ivy-download -- - target name=ivy-download description=Download ivy + target name=ivy-download description=-- Download ivy available file=${ivy.jar} property=ivy.jar.found / antcall target=-ivy-download-unchecked / /target @@ -669,7 +669,7 @@ /target !-- target: clean-cache = -- - target name=clean-cache depends= description=delete ivy cache + target name=clean-cache depends= description=-- delete ivy cache ivy:cleancache / /target @@ -689,7 +689,7 @@ /target target name=rat-sources depends=rat-sources-typedef - description=runs the tasks over src/java + description=-- runs the tasks over src/java rat:report xmlns:rat=antlib:org.apache.rat.anttasks fileset dir=src include name=java/**/* /
svn commit: r1298444 - in /nutch/branches/nutchgora: CHANGES.txt src/java/org/apache/nutch/crawl/GeneratorMapper.java
Author: lewismc Date: Thu Mar 8 15:53:37 2012 New Revision: 1298444 URL: http://svn.apache.org/viewvc?rev=1298444view=rev Log: commit to address NUTCH-1304 and update to CHANGES.txt Modified: nutch/branches/nutchgora/CHANGES.txt nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java Modified: nutch/branches/nutchgora/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1298444r1=1298443r2=1298444view=diff == --- nutch/branches/nutchgora/CHANGES.txt (original) +++ nutch/branches/nutchgora/CHANGES.txt Thu Mar 8 15:53:37 2012 @@ -2,6 +2,8 @@ Nutch Change Log Release nutchgora - Current Development +* NUTCH-1304 GeneratorMapper.java dosen't return when skipping and already generated mark (Dan Rosher via lewismc) + * NUTCH-1307 Improve formatting of ant targets for clearer project help (lewismc) * NUTCH-1302 nutchgora job failures should be noticed by submitter (ferdy) Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java?rev=1298444r1=1298443r2=1298444view=diff == --- nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java (original) +++ nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java Thu Mar 8 15:53:37 2012 @@ -50,6 +50,7 @@ extends GoraMapperString, WebPage, Sele if (GeneratorJob.LOG.isDebugEnabled()) { GeneratorJob.LOG.debug(Skipping + url + ; already generated); } + return; } // If filtering is on don't generate URLs that don't pass URLFilters
svn commit: r1291030 - in /nutch/trunk/src: java/org/apache/nutch/crawl/MapWritable.java java/org/apache/nutch/net/protocols/ProtocolException.java java/org/apache/nutch/parse/OutlinkExtractor.java te
Author: lewismc Date: Sun Feb 19 18:19:36 2012 New Revision: 1291030 URL: http://svn.apache.org/viewvc?rev=1291030view=rev Log: trivial commit to address NUTCH-1276 Modified: nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java nutch/trunk/src/java/org/apache/nutch/net/protocols/ProtocolException.java nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java nutch/trunk/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java Modified: nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java?rev=1291030r1=1291029r2=1291030view=diff == --- nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java (original) +++ nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java Sun Feb 19 18:19:36 2012 @@ -59,6 +59,8 @@ import org.apache.nutch.protocol.Protoco * @author Stefan Groschupf * @deprecated Use org.apache.hadoop.io.MapWritable instead. */ + +@Deprecated public class MapWritable implements Writable { public static final Logger LOG = LoggerFactory.getLogger(MapWritable.class); Modified: nutch/trunk/src/java/org/apache/nutch/net/protocols/ProtocolException.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/protocols/ProtocolException.java?rev=1291030r1=1291029r2=1291030view=diff == --- nutch/trunk/src/java/org/apache/nutch/net/protocols/ProtocolException.java (original) +++ nutch/trunk/src/java/org/apache/nutch/net/protocols/ProtocolException.java Sun Feb 19 18:19:36 2012 @@ -23,6 +23,7 @@ import java.io.Serializable; * Base exception for all protocol handlers * @deprecated Use {@link org.apache.nutch.protocol.ProtocolException} instead. */ +@Deprecated @SuppressWarnings(serial) public class ProtocolException extends Exception implements Serializable { Modified: nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java?rev=1291030r1=1291029r2=1291030view=diff == --- nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java (original) +++ nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java Sun Feb 19 18:19:36 2012 @@ -143,6 +143,7 @@ public class OutlinkExtractor { * @return Array of codeOutlink/code s within found in plainText * @deprecated only for tests */ + @Deprecated private Outlink[] getOutlinksJakartaRegexpImpl(final String plainText) { throw new UnsupportedOperationException( @@ -200,6 +201,7 @@ public class OutlinkExtractor { * @return Array of codeOutlink/code s within found in plainText * @deprecated only for tests */ + @Deprecated private Outlink[] getOutlinksJDK5Impl(final String plainText) { throw new UnsupportedOperationException( Modified: nutch/trunk/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java?rev=1291030r1=1291029r2=1291030view=diff == --- nutch/trunk/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java (original) +++ nutch/trunk/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java Sun Feb 19 18:19:36 2012 @@ -74,6 +74,7 @@ public class CrawlDBTestUtil { * @return * @deprecated Use {@link #createConfiguration()} instead */ + @Deprecated public static Configuration create(){ return createConfiguration(); }
svn commit: r1231517 - in /nutch/trunk/src: java/org/apache/nutch/crawl/ java/org/apache/nutch/indexer/solr/ java/org/apache/nutch/tools/arc/ java/org/apache/nutch/util/ java/org/apache/nutch/util/dom
Author: lewismc Date: Sat Jan 14 15:45:46 2012 New Revision: 1231517 URL: http://svn.apache.org/viewvc?rev=1231517view=rev Log: commit to try and resolve NUTCH-1176, I expect this not to work 1st time, N.B. This doesn't change or even touch syntax of code. Modified: nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java nutch/trunk/src/java/org/apache/nutch/crawl/FetchSchedule.java nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java nutch/trunk/src/java/org/apache/nutch/util/NodeWalker.java nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffix.java nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffixes.java nutch/trunk/src/java/org/apache/nutch/util/domain/TopLevelDomain.java nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java Modified: nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java?rev=1231517r1=1231516r2=1231517view=diff == --- nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java (original) +++ nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java Sat Jan 14 15:45:46 2012 @@ -64,6 +64,7 @@ public abstract class AbstractFetchSched * default codefetchInterval/code. * * @param url URL of the page. + * * @param datum datum instance to be initialized (modified in place). */ public CrawlDatum initializeSchedule(Text url, CrawlDatum datum) { @@ -91,12 +92,15 @@ public abstract class AbstractFetchSched * marked as GONE. Default implementation increases fetchInterval by 50%, * and if it exceeds the codemaxInterval/code it calls * {@link #forceRefetch(Text, CrawlDatum, boolean)}. - * @param url URL of the page - * @param datum datum instance to be adjusted + * + * @param url URL of the page. + * + * @param datum datum instance to be adjusted. + * * @return adjusted page information, including all original information. - * NOTE: this may be a different instance than {@param datum}, but + * NOTE: this may be a different instance than {@see datum}, but * implementations should make sure that it contains at least all - * information from {@param datum}. + * information from {@see datum}. */ public CrawlDatum setPageGoneSchedule(Text url, CrawlDatum datum, long prevFetchTime, long prevModifiedTime, long fetchTime) { @@ -113,15 +117,21 @@ public abstract class AbstractFetchSched * re-tried due to transient errors. The default implementation * sets the next fetch time 1 day in the future and increases * the retry counter. - * @param url URL of the page - * @param datum page information - * @param prevFetchTime previous fetch time - * @param prevModifiedTime previous modified time - * @param fetchTime current fetch time + * + * @param url URL of the page. + * + * @param datum page information. + * + * @param prevFetchTime previous fetch time. + * + * @param prevModifiedTime previous modified time. + * + * @param fetchTime current fetch time. + * * @return adjusted page information, including all original information. - * NOTE: this may be a different instance than {@param datum}, but + * NOTE: this may be a different instance than {@see datum}, but * implementations should make sure that it contains at least all - * information from {@param datum}. + * information from {@see datum}. */ public CrawlDatum setPageRetrySchedule(Text url, CrawlDatum datum, long prevFetchTime, long prevModifiedTime, long fetchTime) { @@ -147,10 +157,14 @@ public abstract class AbstractFetchSched * {@param curTime} it returns false, and true otherwise. It will also * check that fetchTime is not too remote (more than codemaxInterval/code, * in which case it lowers the interval and returns true. - * @param url URL of the page - * @param datum datum instance + * + * @param url URL of the page. + * + * @param datum datum instance. + * * @param curTime reference time (usually set to the time when the * fetchlist generation process was started). + * * @return true, if the page should be considered for inclusion in the current * fetchlist, otherwise false. */ @@ -173,8 +187,11 @@ public abstract class AbstractFetchSched /** * This method resets fetchTime, fetchInterval, modifiedTime, * retriesSinceFetch and page signature, so that it forces refetching. - * @param url URL of the page - * @param datum datum instance + * + * @param url URL of the page. + * + * @param datum datum instance
svn commit: r1227620 - in /nutch/trunk: CHANGES.txt build.xml default.properties
Author: lewismc Date: Thu Jan 5 15:00:42 2012 New Revision: 1227620 URL: http://svn.apache.org/viewvc?rev=1227620view=rev Log: commit to address NUTCH-1237 update to CHANGES.txt Modified: nutch/trunk/CHANGES.txt nutch/trunk/build.xml nutch/trunk/default.properties Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1227620r1=1227619r2=1227620view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Thu Jan 5 15:00:42 2012 @@ -1,5 +1,9 @@ Nutch Change Log +* NUTCH-1237 Improve javac arguements for more verbose ouput (lewismc) + +* NUTCH-1236 Add link to site documentation to download older versions of Nutch (lewismc) + * NUTCH-1146 Prevent generation of _SUCCESS files in output (jnioche) * NUTCH-1232 Remove site field from index-basic (markus) Modified: nutch/trunk/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1227620r1=1227619r2=1227620view=diff == --- nutch/trunk/build.xml (original) +++ nutch/trunk/build.xml Thu Jan 5 15:00:42 2012 @@ -94,6 +94,7 @@ target=${javac.version} source=${javac.version} deprecation=${javac.deprecation} + compilerarg value=-Xlint/ classpath refid=classpath/ /javac /target @@ -318,6 +319,7 @@ target=${javac.version} source=${javac.version} deprecation=${javac.deprecation} + compilerarg value=-Xlint/ classpath refid=test.classpath/ /javac /target Modified: nutch/trunk/default.properties URL: http://svn.apache.org/viewvc/nutch/trunk/default.properties?rev=1227620r1=1227619r2=1227620view=diff == --- nutch/trunk/default.properties (original) +++ nutch/trunk/default.properties Thu Jan 5 15:00:42 2012 @@ -36,7 +36,7 @@ dist.version.dir=${dist.dir}/${final.nam javac.debug=on javac.optimize=on -javac.deprecation=off +javac.deprecation=on javac.version= 1.6 runtime.dir=./runtime
svn commit: r1226800 - in /nutch/site: forrest/src/documentation/content/xdocs/ publish/ publish/images/ publish/skin/images/
Author: lewismc Date: Tue Jan 3 15:04:30 2012 New Revision: 1226800 URL: http://svn.apache.org/viewvc?rev=1226800view=rev Log: commit to add an old downloads page, a sonar analysis page and update to site.xml to accomodate the changes. Added: nutch/site/forrest/src/documentation/content/xdocs/old_downloads.xml nutch/site/forrest/src/documentation/content/xdocs/sonar.xml Modified: nutch/site/forrest/src/documentation/content/xdocs/site.xml nutch/site/publish/about.html nutch/site/publish/about.pdf nutch/site/publish/bot.html nutch/site/publish/bot.pdf nutch/site/publish/credits.html nutch/site/publish/credits.pdf nutch/site/publish/faq.html nutch/site/publish/faq.pdf nutch/site/publish/images/built-with-forrest-button.png nutch/site/publish/index.html nutch/site/publish/index.pdf nutch/site/publish/issue_tracking.html nutch/site/publish/issue_tracking.pdf nutch/site/publish/linkmap.html nutch/site/publish/linkmap.pdf nutch/site/publish/mailing_lists.html nutch/site/publish/mailing_lists.pdf nutch/site/publish/nightly.html nutch/site/publish/nightly.pdf nutch/site/publish/skin/images/built-with-forrest-button.png nutch/site/publish/skin/images/rc-b-l-15-1body-2menu-3menu.png nutch/site/publish/skin/images/rc-b-r-15-1body-2menu-3menu.png nutch/site/publish/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png nutch/site/publish/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png nutch/site/publish/skin/images/rc-t-r-15-1body-2menu-3menu.png nutch/site/publish/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png nutch/site/publish/tutorial.html nutch/site/publish/tutorial.pdf nutch/site/publish/version_control.html nutch/site/publish/version_control.pdf nutch/site/publish/wiki.html nutch/site/publish/wiki.pdf Added: nutch/site/forrest/src/documentation/content/xdocs/old_downloads.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/old_downloads.xml?rev=1226800view=auto == --- nutch/site/forrest/src/documentation/content/xdocs/old_downloads.xml (added) +++ nutch/site/forrest/src/documentation/content/xdocs/old_downloads.xml Tue Jan 3 15:04:30 2012 @@ -0,0 +1,35 @@ +?xml version=1.0? +!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the License); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an AS IS BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +-- +!DOCTYPE document PUBLIC -//APACHE//DTD Documentation V2.0//EN + http://forrest.apache.org/dtd/document-v20.dtd; + +document + +header + titleOlder Downloads/title +/header + +body + +section + titleOld Downloads/title pAll old Nutch downloads can be found a href=http://archive.apache.org/dist/nutch/;here/a. Please note that the versions available here for download are now not configured for general release and are unsupported, in addition it is always recommended by the Nutch development team to use the most recent stable reslease./p +/section + +/body + +/document Modified: nutch/site/forrest/src/documentation/content/xdocs/site.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/site.xml?rev=1226800r1=1226799r2=1226800view=diff == --- nutch/site/forrest/src/documentation/content/xdocs/site.xml (original) +++ nutch/site/forrest/src/documentation/content/xdocs/site.xml Tue Jan 3 15:04:30 2012 @@ -54,9 +54,11 @@ See http://forrest.apache.org/docs/linki resources label=Resources downloadlabel=Download href=ext:release / nightly label=Nightly builds href=nightly.html / +sonar label=Sonar Analysis href=sonar.html / contact label=Mailing Listshref=mailing_lists.html / issues label=Issue Tracking href=issue_tracking.html
svn commit: r1224750 - in /nutch/branches/nutchgora: CHANGES.txt NOTICE.txt
Author: lewismc Date: Mon Dec 26 16:17:42 2011 New Revision: 1224750 URL: http://svn.apache.org/viewvc?rev=1224750view=rev Log: commit to address NUTCH-1217 and update to CHANGES.txt Modified: nutch/branches/nutchgora/CHANGES.txt nutch/branches/nutchgora/NOTICE.txt Modified: nutch/branches/nutchgora/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1224750r1=1224749r2=1224750view=diff == --- nutch/branches/nutchgora/CHANGES.txt (original) +++ nutch/branches/nutchgora/CHANGES.txt Mon Dec 26 16:17:42 2011 @@ -2,6 +2,8 @@ Nutch Change Log Release nutchgora - Current Development +* NUTCH-1217 Update NOTICE.txt to drop some copyrights (lewismc) + * NUTCH-1216 Add trivial comment to lib/native/README.txt (lewismc) * NUTCH-1198 Less verbose logging when unmapped mimetypes are trying to be parsed. (ferdy) Modified: nutch/branches/nutchgora/NOTICE.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/NOTICE.txt?rev=1224750r1=1224749r2=1224750view=diff == --- nutch/branches/nutchgora/NOTICE.txt (original) +++ nutch/branches/nutchgora/NOTICE.txt Mon Dec 26 16:17:42 2011 @@ -6,70 +6,10 @@ Foundation (http://www.apache.org/). This product includes software developed by the following copyright owners: -Nutch includes icu4j: -Copyright (c) 1995-2006 International Business Machines Corporation and -others - -Nutch includes Carrot2: -Copyright (C) 2002-2006, Dawid Weiss, Stanis�aw Osi�ski. -awid Weiss; Project administrator, various components, core; 2002; Poland -StanisÅaw, OsiÅski; Lingo clustering component, ODP Input; 2003; Poland -Karol GoÅembniak, Irmina MasÅowska; HAOG clustering component; 2006; Poznan University of Technology; Poland -MichaÅ, Wróblewski [*]; AHC clustering components; 2003; Poznan University of Technology, Poland -PaweÅ, Kowalik [*]; Inductive search engine wrapper; 2003; Poznan University of Technology, Poland -Steven, Schockaert [*]; Fuzzy Ants clustering component; 2004; University of Gent, Belgium -Lang, Ngo Chi [*]; Fuzzy Rough set clustering component; 2004; Warsaw University, Poland - -Nutch includes Saxpath: -Copyright (C) 2000-2002 werken digital. All rights reserved. - -Nutch includes jaxen: -Copyright 2003-2006 The Werken Company. All Rights Reserved. - -Nutch includes Jdom: -Copyright (C) 2000-2004 Jason Hunter Brett McLaughlin. -All rights reserved - -Nutch includes SaxPath: -Copyright (C) 2000-2002 werken digital. All rights reserved. - -Nutch includes Snowball: -Copyright (c) 2001, Dr Martin Porter -(for the Java developments) Copyright (c) 2002, Richard Boulton. - -Nutch includes ViolinStrings: -Copyright (c) Michael Schmeling 1998, 2000 - All Rights Reserve - -Nutch includes Cyperneko: -(C) Copyright 2002,2003, Andy Clark. All rights reserved. - -Nutch includes Jena: -(c) Copyright 2000, 2001, 2002, 2003, 2004 Hewlett-Packard Development Company, LP -All rights reserved. - -Nutch includes BouncyCastle: -Copyright (c) 2000 - 2008 The Legion Of The Bouncy Castle (http://www.bouncycastle.org) - -Nutch includes FontBox: -Copyright (c) 2003-2005, www.fontbox.org - -Nutch includes JempBox: -Copyright (c) 2006-2007, www.jempbox.org -All rights reserved. - -Nutch includes PDFBox: -Copyright (c) 2003-2005, www.pdfbox.org -All rights reserved. - Nutch includes JavaSWF: Copyright (c) 2001-2005, David N. Main, All rights reserved. -Nutch includes Json Lib: -This product includes software developed by Douglas Crockford -(http://www.crockford.com). - Nutch includes Automaton: This package is Copyright © 2001-2008 Anders Møller. All rights reserved. -Nutch includes Rome: -Copyright 2004 Sun Microsystems, Inc. +
svn commit: r1199860 - in /nutch/branches/nutchgora: build.xml doap.rdf
Author: lewismc Date: Wed Nov 9 17:06:46 2011 New Revision: 1199860 URL: http://svn.apache.org/viewvc?rev=1199860view=rev Log: commit to update the doap file and to configure a unique key for Sonar ant task. Modified: nutch/branches/nutchgora/build.xml nutch/branches/nutchgora/doap.rdf Modified: nutch/branches/nutchgora/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1199860r1=1199859r2=1199860view=diff == --- nutch/branches/nutchgora/build.xml (original) +++ nutch/branches/nutchgora/build.xml Wed Nov 9 17:06:46 2011 @@ -695,7 +695,7 @@ property name=sonar.binaries value=${build.dir}/plugins / property name=sonar.tests value=${test.src.dir} / - sonar:sonar workDir=${base.dir} key=org.apache.nutch:nutch + sonar:sonar workDir=${base.dir} key=org.apache.nutch:branch version=2.0-SNAPSHOT xmlns:sonar=antlib:org.sonar.ant / /target /project Modified: nutch/branches/nutchgora/doap.rdf URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/doap.rdf?rev=1199860r1=1199859r2=1199860view=diff == --- nutch/branches/nutchgora/doap.rdf (original) +++ nutch/branches/nutchgora/doap.rdf Wed Nov 9 17:06:46 2011 @@ -27,11 +27,11 @@ nameApache Nutch/name homepage rdf:resource=http://nutch.apache.org; / asfext:pmc rdf:resource=http://nutch.apache.org; / -shortdescNutch 2.0 is a beeding edge (trunk) development of the Apache Nutch web search software./shortdesc +shortdescNutch 2.0 is a branch development of the Apache Nutch web search software./shortdesc descriptionApache Nutch 2.0 maintains a refined architecture by delegating searching, parsing, and data storage to other software projects. In particular the storage layer has been delegated to the object relational mapping framework Gora (Apache Incubator) enabling the focus of Nutch 2.0 to be entirely on web crawling. This logic promotes Nutch 2.0 as a simpler, focussed web crawler enabling easy integration with other resources. /description bug-database rdf:resource=http://issues.apache.org/jira/browse/NUTCH; / mailing-list rdf:resource=http://www.mail-archive.com/dev%40nutch.apache.org/; / -download-page rdf:resource=http://svn.apache.org/repos/asf/nutch/trunk/; / +download-page rdf:resource=http://svn.apache.org/repos/asf/nutch/branches/nutchgora; / programming-languageJava/programming-language category rdf:resource=http://projects.apache.org/category/web-framework; / release @@ -43,8 +43,8 @@ /release repository SVNRepository -location rdf:resource=https://svn.apache.org/repos/asf/nutch/trunk// -browse rdf:resource=http://svn.apache.org/viewvc/nutch/trunk// +location rdf:resource=https://svn.apache.org/repos/asf/nutch/branches/nutchgora/ +browse rdf:resource=http://svn.apache.org/viewvc/nutch/branches/nucthgora/ /SVNRepository /repository maintainer
svn commit: r1199863 - /nutch/trunk/build.xml
Author: lewismc Date: Wed Nov 9 17:07:32 2011 New Revision: 1199863 URL: http://svn.apache.org/viewvc?rev=1199863view=rev Log: commit to assign a unique key to build.xml Ant Sonar task. Modified: nutch/trunk/build.xml Modified: nutch/trunk/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1199863r1=1199862r2=1199863view=diff == --- nutch/trunk/build.xml (original) +++ nutch/trunk/build.xml Wed Nov 9 17:07:32 2011 @@ -722,7 +722,7 @@ property name=sonar.binaries value=${build.dir}/plugins / property name=sonar.tests value=${test.src.dir} / -sonar:sonar workDir=${base.dir} key=org.apache.nutch:nutch +sonar:sonar workDir=${base.dir} key=org.apache.nutch:trunk version=1.4-SNAPSHOT xmlns:sonar=antlib:org.sonar.ant/ /target
svn commit: r1197624 - /nutch/trunk/build.xml
Author: lewismc Date: Fri Nov 4 16:15:24 2011 New Revision: 1197624 URL: http://svn.apache.org/viewvc?rev=1197624view=rev Log: commit to reconfigure ant sonar task, again. Modified: nutch/trunk/build.xml Modified: nutch/trunk/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1197624r1=1197623r2=1197624view=diff == --- nutch/trunk/build.xml (original) +++ nutch/trunk/build.xml Fri Nov 4 16:15:24 2011 @@ -712,26 +712,18 @@ !-- Add the target -- target name=sonar -sonar:sonar workDir=${base.dir} key=org.apache.nutch:nutch version=1.4 xmlns:sonar=antlib:org.sonar.ant/ - -!-- source directories (required) -- -sources - path location=${src.dir} / -/sources + +!-- list of mandatory source directories (required) -- +property name=sonar.sources value=${src.dir}/ !-- list of properties (optional) -- -property key=sonar.projectName value=Nutch Trunk 1.4 Sonar Analysis / -property key=sonar.dynamicAnalysis value=false / - -!-- test source directories (optional) -- -tests - path location=${test.src.dir} / -/tests +property name=sonar.projectName value=Nutch Trunk 1.4 Sonar Analysis / +property name=sonar.binaries value=${build.dir}/classes / +property name=sonar.binaries value=${build.dir}/plugins / +property name=sonar.tests value=${test.src.dir} / -!-- binaries directories, which contain for example the compiled Java bytecode -- - binaries - path location=${build.dir}/classes/ - /binaries +sonar:sonar workDir=${base.dir} key=org.apache.nutch:nutch + version=1.4-SNAPSHOT xmlns:sonar=antlib:org.sonar.ant/ /target /project
svn commit: r1196537 - /nutch/trunk/build.xml
Author: lewismc Date: Wed Nov 2 10:58:20 2011 New Revision: 1196537 URL: http://svn.apache.org/viewvc?rev=1196537view=rev Log: commit to add (hopefully) final configuration parameters to ant sonar target Modified: nutch/trunk/build.xml Modified: nutch/trunk/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1196537r1=1196536r2=1196537view=diff == --- nutch/trunk/build.xml (original) +++ nutch/trunk/build.xml Wed Nov 2 10:58:20 2011 @@ -706,7 +706,7 @@ !-- Define the Sonar task if this hasn't been done in a common script -- taskdef uri=antlib:org.sonar.ant resource=org/sonar/ant/antlib.xml -classpath path=/ +classpath path=${ant.library.dir}/ /taskdef !-- Add the target --
svn commit: r1196800 - /nutch/trunk/build.xml
Author: lewismc Date: Wed Nov 2 21:08:44 2011 New Revision: 1196800 URL: http://svn.apache.org/viewvc?rev=1196800view=rev Log: commit to add MySQL driver classpath to sonar analysis target Modified: nutch/trunk/build.xml Modified: nutch/trunk/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1196800r1=1196799r2=1196800view=diff == --- nutch/trunk/build.xml (original) +++ nutch/trunk/build.xml Wed Nov 2 21:08:44 2011 @@ -707,6 +707,7 @@ !-- Define the Sonar task if this hasn't been done in a common script -- taskdef uri=antlib:org.sonar.ant resource=org/sonar/ant/antlib.xml classpath path=${ant.library.dir}/ +classpath path=${mysql.library.dir}/ /taskdef !-- Add the target --
svn commit: r1196823 - /nutch/trunk/build.xml
Author: lewismc Date: Wed Nov 2 22:00:32 2011 New Revision: 1196823 URL: http://svn.apache.org/viewvc?rev=1196823view=rev Log: commit to make trivial change to working directory configuration for ant sonar target... Modified: nutch/trunk/build.xml Modified: nutch/trunk/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1196823r1=1196822r2=1196823view=diff == --- nutch/trunk/build.xml (original) +++ nutch/trunk/build.xml Wed Nov 2 22:00:32 2011 @@ -712,7 +712,7 @@ !-- Add the target -- target name=sonar -sonar:sonar workDir=${build.dir} key=org.apache.nutch:nutch version=1.4 xmlns:sonar=antlib:org.sonar.ant/ +sonar:sonar workDir=${base.dir} key=org.apache.nutch:nutch version=1.4 xmlns:sonar=antlib:org.sonar.ant/ !-- source directories (required) -- sources
svn commit: r1195403 - in /nutch/branches/nutchgora: CHANGES.txt build.xml conf/gora-cassandra-mapping.xml conf/gora-hbase-mapping.xml conf/gora-sql-mapping.xml conf/nutch-default.xml ivy/ivy.xml
Author: lewismc Date: Mon Oct 31 10:49:17 2011 New Revision: 1195403 URL: http://svn.apache.org/viewvc?rev=1195403view=rev Log: commit to address NUTCH-902 and update to changes.txt Added: nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml nutch/branches/nutchgora/conf/gora-hbase-mapping.xml Modified: nutch/branches/nutchgora/CHANGES.txt nutch/branches/nutchgora/build.xml nutch/branches/nutchgora/conf/gora-sql-mapping.xml nutch/branches/nutchgora/conf/nutch-default.xml nutch/branches/nutchgora/ivy/ivy.xml Modified: nutch/branches/nutchgora/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1195403r1=1195402r2=1195403view=diff == --- nutch/branches/nutchgora/CHANGES.txt (original) +++ nutch/branches/nutchgora/CHANGES.txt Mon Oct 31 10:49:17 2011 @@ -2,6 +2,8 @@ Nutch Change Log Release nutchgora - Current Development +* NUTCH-902 Add all necessary files and configuration so that nutch can be used with different backends out-of-the-box (lewismc) + * NUTCH-1081 1135 ant tests fail Fix TestGoraStorage for Nutchgora (Ferdy via lewismc) * NUTCH-1156 building errors with gora-hbase as a backend; update ivy.xml to use correct dependancies (Ferdy via lewismc) Modified: nutch/branches/nutchgora/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1195403r1=1195402r2=1195403view=diff == --- nutch/branches/nutchgora/build.xml (original) +++ nutch/branches/nutchgora/build.xml Mon Oct 31 10:49:17 2011 @@ -15,7 +15,7 @@ See the License for the specific language governing permissions and limitations under the License. -- -project name=Nutch default=runtime xmlns:ivy=antlib:org.apache.ivy.ant xmlns:artifact=antlib:org.apache.maven.artifact.ant +project name=Nutchgora default=runtime xmlns:ivy=antlib:org.apache.ivy.ant xmlns:artifact=antlib:org.apache.maven.artifact.ant !-- Load all the default properties, and any the user wants-- !-- to contribute (without having to type -D or edit this file -- Added: nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml?rev=1195403view=auto == --- nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml (added) +++ nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml Mon Oct 31 10:49:17 2011 @@ -0,0 +1,43 @@ +?xml version=1.0 encoding=UTF-8? + +gora-orm + +keyspace name=webpage cluster=Test Cluster host=localhost +family name=p/ +family name=f/ +family name=sc type=super/ +/keyspace +class keyClass=java.lang.String name=org.apache.nutch.storage.WebPage + +!-- fetch fields -- +field name=baseUrl family=f qualifier=bas/ +field name=status family=f qualifier=st/ +field name=prevFetchTime family=f qualifier=pts/ +field name=fetchTime family=f qualifier=ts/ +field name=fetchInterval family=f qualifier=fi/ +field name=retriesSinceFetch family=f qualifier=rsf/ +field name=reprUrl family=f qualifier=rpr/ +field name=content family=f qualifier=cnt/ +field name=contentType family=f qualifier=typ/ +field name=modifiedTime family=f qualifier=mod/ + +!-- parse fields -- +field name=title family=p qualifier=t/ +field name=text family=p qualifier=c/ +field name=signature family=p qualifier=sig/ +field name=prevSignature family=p qualifier=psig/ + +!-- score fields -- +field name=score family=f qualifier=s/ + +!-- super columns -- +field name=markers family=sc qualifier=mk/ +field name=inlinks family=sc qualifier=il/ +field name=outlinks family=sc qualifier=ol/ +field name=metadata family=sc qualifier=mtdt/ +field name=headers family=sc qualifier=h/ +field name=parseStatus family=sc qualifier=pas/ +field name=protocolStatus family=sc qualifier=prs/ +/class + +/gora-orm \ No newline at end of file Added: nutch/branches/nutchgora/conf/gora-hbase-mapping.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-hbase-mapping.xml?rev=1195403view=auto == --- nutch/branches/nutchgora/conf/gora-hbase-mapping.xml (added) +++ nutch/branches/nutchgora/conf/gora-hbase-mapping.xml Mon Oct 31 10:49:17 2011 @@ -0,0 +1,46 @@ +?xml version=1.0 encoding=UTF-8? + +gora-orm + +table name=webpage +family name=p/ !-- This can also have params like compression, bloom filters -- +family name=f/ +family name=s/ +family name=il/ +family name=ol
svn commit: r1186985 - /nutch/branches/nutchgora/build.xml
Author: lewismc Date: Thu Oct 20 18:34:25 2011 New Revision: 1186985 URL: http://svn.apache.org/viewvc?rev=1186985view=rev Log: commit to fix upgrade to language-identifier targets which I missed when addressing a previous commit Modified: nutch/branches/nutchgora/build.xml Modified: nutch/branches/nutchgora/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1186985r1=1186984r2=1186985view=diff == --- nutch/branches/nutchgora/build.xml (original) +++ nutch/branches/nutchgora/build.xml Thu Oct 20 18:34:25 2011 @@ -167,7 +167,7 @@ packageset dir=${plugins.dir}/urlfilter-regex/src/java/ packageset dir=${plugins.dir}/urlfilter-prefix/src/java/ packageset dir=${plugins.dir}/creativecommons/src/java/ - packageset dir=${plugins.dir}/languageidentifier/src/java/ + packageset dir=${plugins.dir}/language-identifier/src/java/ link href=${javadoc.link.java}/ link href=${javadoc.link.lucene}/ @@ -530,7 +530,7 @@ packageset dir=${plugins.dir}/urlfilter-regex/src/java/ packageset dir=${plugins.dir}/urlfilter-prefix/src/java/ packageset dir=${plugins.dir}/creativecommons/src/java/ - packageset dir=${plugins.dir}/languageidentifier/src/java/ + packageset dir=${plugins.dir}/language-identifier/src/java/ link href=${javadoc.link.java}/ link href=${javadoc.link.lucene}/
svn commit: r1185868 - in /nutch/site: forrest/src/documentation/content/xdocs/ publish/ publish/images/ publish/skin/images/
Author: lewismc Date: Tue Oct 18 21:10:55 2011 New Revision: 1185868 URL: http://svn.apache.org/viewvc?rev=1185868view=rev Log: commit to make trivial update to credits.html Modified: nutch/site/forrest/src/documentation/content/xdocs/about.xml nutch/site/forrest/src/documentation/content/xdocs/credits.xml nutch/site/publish/about.html nutch/site/publish/about.pdf nutch/site/publish/bot.html nutch/site/publish/bot.pdf nutch/site/publish/broken-links.xml nutch/site/publish/credits.html nutch/site/publish/credits.pdf nutch/site/publish/faq.html nutch/site/publish/faq.pdf nutch/site/publish/images/built-with-forrest-button.png nutch/site/publish/index.html nutch/site/publish/issue_tracking.html nutch/site/publish/issue_tracking.pdf nutch/site/publish/linkmap.html nutch/site/publish/linkmap.pdf nutch/site/publish/mailing_lists.html nutch/site/publish/mailing_lists.pdf nutch/site/publish/nightly.html nutch/site/publish/nightly.pdf nutch/site/publish/skin/images/built-with-forrest-button.png nutch/site/publish/tutorial.html nutch/site/publish/tutorial.pdf nutch/site/publish/version_control.html nutch/site/publish/version_control.pdf nutch/site/publish/wiki.html nutch/site/publish/wiki.pdf Modified: nutch/site/forrest/src/documentation/content/xdocs/about.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/about.xml?rev=1185868r1=1185867r2=1185868view=diff == --- nutch/site/forrest/src/documentation/content/xdocs/about.xml (original) +++ nutch/site/forrest/src/documentation/content/xdocs/about.xml Tue Oct 18 21:10:55 2011 @@ -40,7 +40,7 @@ plugin infrastructure./p pFor more information about Apache Nutch, please see the a - href=wiki.htmlNutch wiki./a/p + href=http://wiki.apache.org/nutch/;Nutch wiki./a/p Modified: nutch/site/forrest/src/documentation/content/xdocs/credits.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/credits.xml?rev=1185868r1=1185867r2=1185868view=diff == --- nutch/site/forrest/src/documentation/content/xdocs/credits.xml (original) +++ nutch/site/forrest/src/documentation/content/xdocs/credits.xml Tue Oct 18 21:10:55 2011 @@ -37,7 +37,7 @@ lia href=http://openindex.io/;Markus Jelsma/a/li lia href=http://people.apache.org/~siren;Sami Siren/a/li lia href=http://techvineyard.blogspot.com/;Alexis de Tréglodé/a/li - liLewis John McGibbney/li + lia href=http://www.linkedin.com/pub/lewis-john-mcgibbney/26/a92/a39;Lewis John McGibbney/a/li /ul /section Modified: nutch/site/publish/about.html URL: http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1185868r1=1185867r2=1185868view=diff == --- nutch/site/publish/about.html (original) +++ nutch/site/publish/about.html Tue Oct 18 21:10:55 2011 @@ -3,7 +3,7 @@ head META http-equiv=Content-Type content=text/html; charset=UTF-8 meta content=Apache Forrest name=Generator -meta name=Forrest-version content=0.9 +meta name=Forrest-version content=0.10-dev meta name=Forrest-skin-name content=nutch titleAbout Apache Nutch/title link type=text/css href=skin/basic.css rel=stylesheet @@ -271,7 +271,7 @@ document.write(Last Published: + docu pThe system can be enhanced (eg other document formats can be parsed) using a highly flexible, easily extensible and thoroughly maintained plugin infrastructure./p -pFor more information about Apache Nutch, please see the a href=wiki.htmlNutch wiki./a +pFor more information about Apache Nutch, please see the a href=http://wiki.apache.org/nutch/;Nutch wiki./a /p /div Modified: nutch/site/publish/about.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1185868r1=1185867r2=1185868view=diff == Binary files - no diff available. Modified: nutch/site/publish/bot.html URL: http://svn.apache.org/viewvc/nutch/site/publish/bot.html?rev=1185868r1=1185867r2=1185868view=diff == --- nutch/site/publish/bot.html (original) +++ nutch/site/publish/bot.html Tue Oct 18 21:10:55 2011 @@ -3,7 +3,7 @@ head META http-equiv=Content-Type content=text/html; charset=UTF-8 meta content=Apache Forrest name=Generator -meta name=Forrest-version content=0.9 +meta name=Forrest-version content=0.10-dev meta name=Forrest-skin-name content=nutch titleApache Nutch robot/title link type=text/css href=skin/basic.css rel=stylesheet Modified: nutch/site/publish/bot.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/bot.pdf?rev=1185868r1=1185867r2=1185868view=diff
svn commit: r1182504 - in /nutch/branches/nutchgora: CHANGES.txt src/java/org/apache/nutch/parse/ParserFactory.java src/plugin/parse-html/plugin.xml
Author: lewismc Date: Wed Oct 12 18:18:48 2011 New Revision: 1182504 URL: http://svn.apache.org/viewvc?rev=1182504view=rev Log: commit to address NUTCH-1097 and update to changes.txt Modified: nutch/branches/nutchgora/CHANGES.txt nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserFactory.java nutch/branches/nutchgora/src/plugin/parse-html/plugin.xml Modified: nutch/branches/nutchgora/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1182504r1=1182503r2=1182504view=diff == --- nutch/branches/nutchgora/CHANGES.txt (original) +++ nutch/branches/nutchgora/CHANGES.txt Wed Oct 12 18:18:48 2011 @@ -2,6 +2,8 @@ Nutch Change Log Release nutchgora - Current Development +* NUTCH-1097 application/xhtml+xml should be enabled in plugin.xml of parse-html; allow multiple mimetypes for plugin.xml (Ferdy via lewismc) + * Change plugin source directory languageidentifier to language-identifier (lewismc) * NUTCH-1132, 1133 1134 Fix TestGenerator, TestInjector TestFetcher respectively (lewismc) Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserFactory.java URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserFactory.java?rev=1182504r1=1182503r2=1182504view=diff == --- nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserFactory.java (original) +++ nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserFactory.java Wed Oct 12 18:18:48 2011 @@ -356,14 +356,13 @@ public final class ParserFactory { // NotMappedParserException for (int i=0; iextensions.length; i++) { -if (extensions[i].getAttribute(contentType) != null - extensions[i].getAttribute(contentType).equals( -contentType)) { - extList.add(extensions[i]); -} -else if (*.equals(extensions[i].getAttribute(contentType))){ + if (*.equals(extensions[i].getAttribute(contentType))){ extList.add(0, extensions[i]); } + else if (extensions[i].getAttribute(contentType) != null + contentType.matches(escapeContentType(extensions[i].getAttribute(contentType { + extList.add(extensions[i]); +} } if (extList.size() 0) { @@ -391,10 +390,19 @@ public final class ParserFactory { return (extList.size() 0) ? extList : null; } - private boolean match(Extension extension, String id, String type) { -return ((id.equals(extension.getId())) -(type.equals(extension.getAttribute(contentType)) || extension.getAttribute(contentType).equals(*) || - type.equals(DEFAULT_PLUGIN))); + private String escapeContentType(String contentType) { + // Escapes contentType in order to use as a regex + // (and keep backwards compatibility). + // This enables to accept multiple types for a single parser. + return contentType.replace(+, \\+).replace(., \\.); + } + + + private boolean match(Extension extension, String id, String type) { +return (id.equals(extension.getId())) +(extension.getAttribute(contentType).equals(*) || + type.matches(escapeContentType(extension.getAttribute(contentType))) || + type.equals(DEFAULT_PLUGIN)); } /** Get an extension from its id and supported content-type. */ Modified: nutch/branches/nutchgora/src/plugin/parse-html/plugin.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/plugin/parse-html/plugin.xml?rev=1182504r1=1182503r2=1182504view=diff == --- nutch/branches/nutchgora/src/plugin/parse-html/plugin.xml (original) +++ nutch/branches/nutchgora/src/plugin/parse-html/plugin.xml Wed Oct 12 18:18:48 2011 @@ -39,7 +39,7 @@ implementation id=org.apache.nutch.parse.html.HtmlParser class=org.apache.nutch.parse.html.HtmlParser -parameter name=contentType value=text/html/ +parameter name=contentType value=text/html|application/xhtml+xml/ parameter name=pathSuffix value=/ /implementation
svn commit: r1182506 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/parse/ParserFactory.java src/plugin/parse-html/plugin.xml
Author: lewismc Date: Wed Oct 12 18:22:20 2011 New Revision: 1182506 URL: http://svn.apache.org/viewvc?rev=1182506view=rev Log: commit to address NUTCH-1097 and update to changes.txt Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java nutch/trunk/src/plugin/parse-html/plugin.xml Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1182506r1=1182505r2=1182506view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Wed Oct 12 18:22:20 2011 @@ -2,6 +2,8 @@ Nutch Change Log Release 1.4 - Current development +* NUTCH-1097 application/xhtml+xml should be enabled in plugin.xml of parse-html; allow multiple mimetypes for plugin.xml (Ferdy via lewismc) + * NUTCH-797 Fix parse-tika and parse-html to use relative URL resolution per RFC-3986 (Robert Hohman, ab) Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java?rev=1182506r1=1182505r2=1182506view=diff == --- nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java (original) +++ nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java Wed Oct 12 18:22:20 2011 @@ -343,14 +343,13 @@ public final class ParserFactory { // NotMappedParserException for (int i=0; iextensions.length; i++) { -if (extensions[i].getAttribute(contentType) != null - extensions[i].getAttribute(contentType).equals( -contentType)) { - extList.add(extensions[i]); -} -else if (*.equals(extensions[i].getAttribute(contentType))){ + if (*.equals(extensions[i].getAttribute(contentType))){ extList.add(0, extensions[i]); } +else if (extensions[i].getAttribute(contentType) != null + contentType.matches(escapeContentType(extensions[i].getAttribute(contentType { + extList.add(extensions[i]); +} } if (extList.size() 0) { @@ -377,10 +376,18 @@ public final class ParserFactory { return (extList.size() 0) ? extList : null; } + + private String escapeContentType(String contentType) { + // Escapes contentType in order to use as a regex + // (and keep backwards compatibility). + // This enables to accept multiple types for a single parser. + return contentType.replace(+, \\+).replace(., \\.); + } private boolean match(Extension extension, String id, String type) { return ((id.equals(extension.getId())) -(type.equals(extension.getAttribute(contentType)) || extension.getAttribute(contentType).equals(*) || +(extension.getAttribute(contentType).equals(*) || + type.matches(escapeContentType(extension.getAttribute(contentType))) || type.equals(DEFAULT_PLUGIN))); } Modified: nutch/trunk/src/plugin/parse-html/plugin.xml URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-html/plugin.xml?rev=1182506r1=1182505r2=1182506view=diff == --- nutch/trunk/src/plugin/parse-html/plugin.xml (original) +++ nutch/trunk/src/plugin/parse-html/plugin.xml Wed Oct 12 18:22:20 2011 @@ -39,7 +39,7 @@ implementation id=org.apache.nutch.parse.html.HtmlParser class=org.apache.nutch.parse.html.HtmlParser -parameter name=contentType value=text/html/ +parameter name=contentType value=text/html|application/xhtml+xml/ parameter name=pathSuffix value=/ /implementation
svn commit: r1182511 - in /nutch/branches/nutchgora: CHANGES.txt build.xml
Author: lewismc Date: Wed Oct 12 18:27:42 2011 New Revision: 1182511 URL: http://svn.apache.org/viewvc?rev=1182511view=rev Log: commit to address NUTCH-1109 and update to changes.txt Modified: nutch/branches/nutchgora/CHANGES.txt nutch/branches/nutchgora/build.xml Modified: nutch/branches/nutchgora/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1182511r1=1182510r2=1182511view=diff == --- nutch/branches/nutchgora/CHANGES.txt (original) +++ nutch/branches/nutchgora/CHANGES.txt Wed Oct 12 18:27:42 2011 @@ -2,6 +2,8 @@ Nutch Change Log Release nutchgora - Current Development +* NUTCH-1109 Add Sonar targets to Ant build.xml (lewismc) + * NUTCH-1097 application/xhtml+xml should be enabled in plugin.xml of parse-html; allow multiple mimetypes for plugin.xml (Ferdy via lewismc) * Change plugin source directory languageidentifier to language-identifier (lewismc) Modified: nutch/branches/nutchgora/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1182511r1=1182510r2=1182511view=diff == --- nutch/branches/nutchgora/build.xml (original) +++ nutch/branches/nutchgora/build.xml Wed Oct 12 18:27:42 2011 @@ -705,5 +705,37 @@ /fileset /rat:report /target - + + !-- == -- + !-- SONAR targets -- + !-- == -- + + !-- Define the Sonar task if this hasn't been done in a common script -- + taskdef uri=antlib:org.sonar.ant resource=org/sonar/ant/antlib.xml +classpath path=/ + /taskdef + + !-- Add the target -- + target name=sonar +sonar:sonar workDir=${build.dir} key=org.apache.nutch:nutch version=2.0 xmlns:sonar=antlib:org.sonar.ant/ + +!-- source directories (required) -- +sources + path location=${src.dir} / +/sources + +!-- list of properties (optional) -- +property key=sonar.projectName value=Nutchgora 2.0 Sonar Analysis / +property key=sonar.dynamicAnalysis value=false / + +!-- test source directories (optional) -- +tests + path location=${test.src.dir} / +/tests + +!-- binaries directories, which contain for example the compiled Java bytecode -- + binaries + path location=${build.dir}/classes/ + /binaries + /target /project
svn commit: r1179603 - in /nutch/trunk: CHANGES.txt build.xml
Author: lewismc Date: Thu Oct 6 12:49:31 2011 New Revision: 1179603 URL: http://svn.apache.org/viewvc?rev=1179603view=rev Log: commit to address NUTCH-1136 and update to changes.txt Modified: nutch/trunk/CHANGES.txt nutch/trunk/build.xml Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1179603r1=1179602r2=1179603view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Thu Oct 6 12:49:31 2011 @@ -2,6 +2,8 @@ Nutch Change Log Release 1.4 - Current development +* NUTCH-1136 Ant pmd target is broken + * NUTCH-1058 Upgrade Solr schema to version 1.4 (markus) * NUTCH-1137 LinkDB invertlinks other options ignored when using -dir option (Sebastian Nagel, markus) Modified: nutch/trunk/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1179603r1=1179602r2=1179603view=diff == --- nutch/trunk/build.xml (original) +++ nutch/trunk/build.xml Thu Oct 6 12:49:31 2011 @@ -323,40 +323,8 @@ /target !-- == -- - !-- Run code checks (PMD) -- + !-- Run Nutch proxy-- !-- == -- - target name=pmd depends=compile - property name=pmd.report location=${build.dir}/pmd-report.html / - taskdef name=pmd classname=net.sourceforge.pmd.ant.PMDTask - classpath - fileset dir=${lib.dir} -include name=pmd-ext/*.jar / -include name=xerces*.jar / - /fileset - /classpath - /taskdef - pmd shortFilenames=true failonerror=true failOnRuleViolation=false -encoding=${build.encoding} failuresPropertyName=pmd.failures - rulesetunusedcode/ruleset - !--rulesetbasic/ruleset-- - !--rulesetoptimizations/ruleset-- - formatter type=html toFile=${pmd.report} / - !-- formatter type=xml toFile=${tempbuild}/$report_pmd.xml/ -- - fileset dir=${basedir}/src - include name=java/**/*.java/ - include name=plugin/**/*.java/ - /fileset -/pmd - condition property=pmd.stop value=true - and -isset property=pmd.failures / - not -equals arg1=0 arg2=${pmd.failures} trim=true / - /not - /and - /condition - fail if=pmd.stopFAILURE: PMD shows ${pmd.failures} rule violations. See ${pmd.report} for details./fail - /target target name=proxy depends=job, compile-core-test java classname=org.apache.nutch.tools.proxy.TestbedProxy fork=true @@ -370,6 +338,10 @@ /java /target + !-- == -- + !-- Run Nutch benchmarking analysis-- + !-- == -- + target name=benchmark java classname=org.apache.nutch.tools.Benchmark fork=true classpath refid=test.classpath/
svn commit: r1179605 - in /nutch/branches/nutchgora: CHANGES.txt build.xml
Author: lewismc Date: Thu Oct 6 12:55:08 2011 New Revision: 1179605 URL: http://svn.apache.org/viewvc?rev=1179605view=rev Log: commit to address NUTCH-1136 and update to changes.txt Modified: nutch/branches/nutchgora/CHANGES.txt nutch/branches/nutchgora/build.xml Modified: nutch/branches/nutchgora/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1179605r1=1179604r2=1179605view=diff == --- nutch/branches/nutchgora/CHANGES.txt (original) +++ nutch/branches/nutchgora/CHANGES.txt Thu Oct 6 12:55:08 2011 @@ -2,6 +2,8 @@ Nutch Change Log Release nutchgora - Current Development +* NUTCH-1136 Ant pmd target is broken + * NUTCH-1058 Upgrade Solr schema version to 1.4 (markus) * NUTCH-672 allow unit tests to be run from bin/nutch (Todd Lipton via lewismc) Modified: nutch/branches/nutchgora/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1179605r1=1179604r2=1179605view=diff == --- nutch/branches/nutchgora/build.xml (original) +++ nutch/branches/nutchgora/build.xml Thu Oct 6 12:55:08 2011 @@ -328,50 +328,13 @@ /target !-- == -- - !-- Run code checks (PMD) -- + !-- Run Nutch proxy-- !-- == -- - target name=pmd depends=compile - property name=pmd.report location=${build.dir}/pmd-report.html / - taskdef name=pmd classname=net.sourceforge.pmd.ant.PMDTask - classpath - fileset dir=${lib.dir} -include name=pmd-ext/*.jar / -include name=xerces*.jar / - /fileset - /classpath - /taskdef - pmd shortFilenames=true failonerror=true failOnRuleViolation=false -encoding=${build.encoding} failuresPropertyName=pmd.failures - rulesetunusedcode/ruleset - !--rulesetbasic/ruleset-- - !--rulesetoptimizations/ruleset-- - formatter type=html toFile=${pmd.report} / - !-- formatter type=xml toFile=${tempbuild}/$report_pmd.xml/ -- - fileset dir=${basedir}/src - include name=java/**/*.java/ - include name=plugin/**/*.java/ - /fileset -/pmd - condition property=pmd.stop value=true - and -isset property=pmd.failures / - not -equals arg1=0 arg2=${pmd.failures} trim=true / - /not - /and - /condition - fail if=pmd.stopFAILURE: PMD shows ${pmd.failures} rule violations. See ${pmd.report} for details./fail - /target - target name=proxy depends=job + target name=proxy depends=job, compile-core-test java classname=org.apache.nutch.tools.proxy.TestbedProxy fork=true classpath refid=test.classpath/ arg value=-fake/ - arg value=-hostMode/ - arg value=u/ - arg value=-pageMode/ - arg value=u/ - arg value=-debug/ !-- arg value=-delay/ arg value=-200/ @@ -380,15 +343,9 @@ /java /target - target name=rundb -java classname=org.hsqldb.server.Server fork=true - classpath refid=test.classpath/ - arg value=--database.0/ - arg value=file:data/benchmark/ - arg value=--dbname.0/ - arg value=nutchtest/ -/java - /target + !-- == -- + !-- Run Nutch benchmarking analysis-- + !-- == -- target name=benchmark java classname=org.apache.nutch.tools.Benchmark fork=true
svn commit: r1177269 - in /nutch/branches/nutchgora: CHANGES.txt build.xml src/bin/nutch
Author: lewismc Date: Thu Sep 29 11:38:30 2011 New Revision: 1177269 URL: http://svn.apache.org/viewvc?rev=1177269view=rev Log: commit to address NUTCH-672 to update to changes.txt Modified: nutch/branches/nutchgora/CHANGES.txt nutch/branches/nutchgora/build.xml nutch/branches/nutchgora/src/bin/nutch Modified: nutch/branches/nutchgora/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1177269r1=1177268r2=1177269view=diff == --- nutch/branches/nutchgora/CHANGES.txt (original) +++ nutch/branches/nutchgora/CHANGES.txt Thu Sep 29 11:38:30 2011 @@ -1,12 +1,14 @@ Nutch Change Log -Release 2.0 - Current Development +Release nutchgora - Current Development + +* NUTCH-672 allow unit tests to be run from bin/nutch (Todd Lipton via lewismc) * NUTCH-937 Put plugins in classes/plugins in job file (Claudio Martella, Ferdy Galema, jnioche) * NUTCH-1131 Rely on published artefacts for GORA (jnioche) -* NUTCH-1099 Adds HBase and Cassandra storage properties to nutch-default.xml +* NUTCH-1099 Adds HBase and Cassandra storage properties to nutch-default.xml (lewismc) * NUTCH-1096 Empty (not null) ContentLength results in failure of fetch (Ferdy Galema via jnioche) Modified: nutch/branches/nutchgora/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1177269r1=1177268r2=1177269view=diff == --- nutch/branches/nutchgora/build.xml (original) +++ nutch/branches/nutchgora/build.xml Thu Sep 29 11:38:30 2011 @@ -304,6 +304,9 @@ copy todir=${runtime.local}/plugins fileset dir=${build.dir}/plugins/ /copy +copy todir=${runtime.local}/test + fileset dir=${build.dir}/test/ +/copy /target !-- == -- Modified: nutch/branches/nutchgora/src/bin/nutch URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/bin/nutch?rev=1177269r1=1177268r2=1177269view=diff == --- nutch/branches/nutchgora/src/bin/nutch (original) +++ nutch/branches/nutchgora/src/bin/nutch Thu Sep 29 11:38:30 2011 @@ -48,17 +48,18 @@ if [ $# = 0 ]; then echo Usage: nutch [-core] COMMAND echo where COMMAND is one of: # echo crawl one-step crawler for intranets - echo inject inject new urls into the database - echo generate generate new segments to fetch from crawl db - echo fetch fetch URLs marked during generate - echo parse parse URLs marked during fetch - echo updatedb update web table after parsing - echo readdb read/dump records from page database - echo solrindex run the solr indexer on parsed segments and linkdb - echo solrdedup remove duplicates from solr - echo plugin load a plugin and run one of its classes main() + echo injectinject new urls into the database + echo generate generate new segments to fetch from crawl db + echo fetch fetch URLs marked during generate + echo parse parse URLs marked during fetch + echo updatedb update web table after parsing + echo readdbread/dump records from page database + echo solrindex run the solr indexer on parsed segments and linkdb + echo solrdedup remove duplicates from solr + echo pluginload a plugin and run one of its classes main() + echo junit runs the given JUnit test echo or - echo CLASSNAME run the class named CLASSNAME + echo CLASSNAME run the class named CLASSNAME echo Most commands print help when invoked w/o parameters. echo echo Expert: -core option is for developers only. It avoids building the job jar, @@ -199,6 +200,9 @@ elif [ $COMMAND = solrdedup ] ; then CLASS=org.apache.nutch.indexer.solr.SolrDeleteDuplicates elif [ $COMMAND = plugin ] ; then CLASS=org.apache.nutch.plugin.PluginRepository +elif [ $COMMAND = junit ] ; then + CLASSPATH=$CLASSPATH:test/classes/ + CLASS=junit.textui.TestRunner else CLASS=$COMMAND fi
svn commit: r1177290 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/util/LogUtil.java
Author: lewismc Date: Thu Sep 29 12:47:39 2011 New Revision: 1177290 URL: http://svn.apache.org/viewvc?rev=1177290view=rev Log: commit to address NUTCH-1078 and update to changes.txt Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1177290r1=1177289r2=1177290view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Thu Sep 29 12:47:39 2011 @@ -6,7 +6,7 @@ Release 1.4 - Current development * NUTCH-937 Put plugins in classes/plugins in job file (Claudio Martella, Ferdy Galema, jnioche) -* NUTCH-623 Change plugin source directory languageidentifier to language-identifier +* NUTCH-623 Change plugin source directory languageidentifier to language-identifier (lewismc) * NUTCH-1074 topN is ignored with maxNumSegments and generate.max.count (Robert Thomson via markus) Modified: nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java?rev=1177290r1=1177289r2=1177290view=diff == --- nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java (original) +++ nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java Thu Sep 29 12:47:39 2011 @@ -22,7 +22,7 @@ import java.io.IOException; import java.io.PrintStream; import java.lang.reflect.Method; -// Commons Logging imports +// slf4j Logging imports import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,12 +45,12 @@ public class LogUtil { static { try { - TRACE = Logger.class.getMethod(trace, new Class[] { Object.class }); - DEBUG = Logger.class.getMethod(debug, new Class[] { Object.class }); - INFO = Logger.class.getMethod(info, new Class[] { Object.class }); - WARN = Logger.class.getMethod(warn, new Class[] { Object.class }); - ERROR = Logger.class.getMethod(error, new Class[] { Object.class }); - FATAL = Logger.class.getMethod(error, new Class[] { Object.class }); + TRACE = Logger.class.getMethod(trace, new Class[] { String.class }); + DEBUG = Logger.class.getMethod(debug, new Class[] { String.class }); + INFO = Logger.class.getMethod(info, new Class[] { String.class }); + WARN = Logger.class.getMethod(warn, new Class[] { String.class }); + ERROR = Logger.class.getMethod(error, new Class[] { String.class }); + FATAL = Logger.class.getMethod(error, new Class[] { String.class }); } catch(Exception e) { if (LOG.isErrorEnabled()) { LOG.error(Cannot init log methods, e); @@ -100,7 +100,7 @@ public class LogUtil { if (!hasNewline()) return; try { -method.invoke(logger, new Object[] { toString().trim() }); +method.invoke(logger, new String[] { toString().trim() }); } catch (Exception e) { if (LOG.isErrorEnabled()) { LOG.error(Cannot log with method [ + method + ], e);
svn commit: r1172043 - in /nutch/site: forrest/src/documentation/ forrest/src/documentation/content/xdocs/ forrest/src/documentation/resources/images/ publish/ publish/images/ publish/skin/images/
Author: lewismc Date: Sat Sep 17 18:41:15 2011 New Revision: 1172043 URL: http://svn.apache.org/viewvc?rev=1172043view=rev Log: commit to update site to address NUTCH-1092, as well as other trivial site update, of which I suspect there will be some more once voting has finished. Added: nutch/site/forrest/src/documentation/content/xdocs/faq.xml nutch/site/forrest/src/documentation/content/xdocs/tutorial.xml nutch/site/forrest/src/documentation/content/xdocs/wiki.xml nutch/site/forrest/src/documentation/resources/images/feather-small.gif (with props) Removed: nutch/site/forrest/src/documentation/content/xdocs/i18n.xml Modified: nutch/site/forrest/src/documentation/content/xdocs/about.xml nutch/site/forrest/src/documentation/content/xdocs/index.xml nutch/site/forrest/src/documentation/content/xdocs/site.xml nutch/site/forrest/src/documentation/content/xdocs/tabs.xml nutch/site/forrest/src/documentation/skinconf.xml nutch/site/publish/about.html nutch/site/publish/about.pdf nutch/site/publish/bot.html nutch/site/publish/bot.pdf nutch/site/publish/broken-links.xml nutch/site/publish/credits.html nutch/site/publish/credits.pdf nutch/site/publish/images/built-with-forrest-button.png nutch/site/publish/index.html nutch/site/publish/issue_tracking.html nutch/site/publish/issue_tracking.pdf nutch/site/publish/linkmap.html nutch/site/publish/linkmap.pdf nutch/site/publish/mailing_lists.html nutch/site/publish/mailing_lists.pdf nutch/site/publish/nightly.html nutch/site/publish/nightly.pdf nutch/site/publish/skin/images/built-with-forrest-button.png nutch/site/publish/version_control.html nutch/site/publish/version_control.pdf Modified: nutch/site/forrest/src/documentation/content/xdocs/about.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/about.xml?rev=1172043r1=1172042r2=1172043view=diff == --- nutch/site/forrest/src/documentation/content/xdocs/about.xml (original) +++ nutch/site/forrest/src/documentation/content/xdocs/about.xml Sat Sep 17 18:41:15 2011 @@ -40,7 +40,7 @@ plugin infrastructure./p pFor more information about Apache Nutch, please see the a - href=ext:wikiNutch wiki./a/p + href=wiki.htmlNutch wiki./a/p Added: nutch/site/forrest/src/documentation/content/xdocs/faq.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/faq.xml?rev=1172043view=auto == --- nutch/site/forrest/src/documentation/content/xdocs/faq.xml (added) +++ nutch/site/forrest/src/documentation/content/xdocs/faq.xml Sat Sep 17 18:41:15 2011 @@ -0,0 +1,35 @@ +?xml version=1.0? +!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the License); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an AS IS BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +-- +!DOCTYPE document PUBLIC -//APACHE//DTD Documentation V2.0//EN + http://forrest.apache.org/dtd/document-v20.dtd; + +document + +header + titleApache Nutch FAQ's/title +/header + +body + +section + titleFrequently Asked Questions/title pWelcome to the Apache Nutch FAQ's which can be found a href=http://wiki.apache.org/nutch/FAQ;here/a./p +/section + +/body + +/document Modified: nutch/site/forrest/src/documentation/content/xdocs/index.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/index.xml?rev=1172043r1=1172042r2=1172043view=diff == --- nutch/site/forrest/src/documentation/content/xdocs/index.xml (original) +++ nutch/site/forrest/src/documentation/content/xdocs/index.xml Sat Sep 17 18:41:15 2011 @@ -1,6 +1,4 @@ ?xml version=1.0? -!DOCTYPE document PUBLIC -//APACHE//DTD Documentation V2.0//EN - http://forrest.apache.org/dtd/document-v20.dtd; !-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with @@ -17,12 +15,13 @@ See the License for the specific language governing permissions and limitations under the License. -- +!DOCTYPE document
svn commit: r1169475 - in /nutch/trunk: CHANGES.txt conf/nutch-default.xml
Author: lewismc Date: Sun Sep 11 16:40:22 2011 New Revision: 1169475 URL: http://svn.apache.org/viewvc?rev=1169475view=rev Log: commit to address NUTCH-1099 and update to changes.txt Modified: nutch/trunk/CHANGES.txt nutch/trunk/conf/nutch-default.xml Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1169475r1=1169474r2=1169475view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Sun Sep 11 16:40:22 2011 @@ -2,6 +2,8 @@ Nutch Change Log Release 2.0 - Current Development +* NUTCH-1099 Adds HBase and Cassandra storage properties to nutch-default.xml + * NUTCH-1096 Empty (not null) ContentLength results in failure of fetch (Ferdy Galema via jnioche) * NUTCH-1089 Short compressed pages caused exception in protocol-httpclient (Simone Frenzel via jnioche) Modified: nutch/trunk/conf/nutch-default.xml URL: http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1169475r1=1169474r2=1169475view=diff == --- nutch/trunk/conf/nutch-default.xml (original) +++ nutch/trunk/conf/nutch-default.xml Sun Sep 11 16:40:22 2011 @@ -1068,6 +1068,22 @@ descriptionDefault class for storing data/description /property +!-- +property + namestorage.data.store.class/name + valueorg.apache.gora.cassandra.store.CassandraStore/value + descriptionClass for storing data in Apache Cassandra/description +/property +-- + +!-- +property + namestorage.data.store.class/name + valueorg.apache.gora.hbase.store.HBaseStore/value + descriptionClass for storing data in Apache HBase/description +/property +-- + property namestorage.schema/name valuewebpage/value
svn commit: r1169502 - in /nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield: StaticFieldIndexer.java package.html
Author: lewismc Date: Sun Sep 11 19:13:38 2011 New Revision: 1169502 URL: http://svn.apache.org/viewvc?rev=1169502view=rev Log: commit to address final patch for NUTCH-940 1.4 branch. Added: nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package.html Modified: nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java Modified: nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java?rev=1169502r1=1169501r2=1169502view=diff == --- nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java (original) +++ nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java Sun Sep 11 19:13:38 2011 @@ -29,6 +29,11 @@ import org.apache.nutch.parse.Parse; import org.apache.hadoop.io.Text; import org.apache.hadoop.conf.Configuration; +/** A simple plugin called at indexing that adds fields with static data. + * You can specify a list of fieldname:fieldcontent per nutch job. + * It can be useful when collections can't be created by urlpatterns, + * like in subcollection, but on a job-basis. */ + public class StaticFieldIndexer implements IndexingFilter { private Configuration conf; private HashMapString, String[] fields; Added: nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package.html URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package.html?rev=1169502view=auto == --- nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package.html (added) +++ nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package.html Sun Sep 11 19:13:38 2011 @@ -0,0 +1,5 @@ +html +body +pA simple plugin called at indexing that adds fields with static data. You can specify a list of fieldname:fieldcontent per nutch job. It can be useful when collections can't be created by urlpatterns, like in subcollection, but on a job-basis./pp/p +/body +/html
svn commit: r1167651 - in /nutch/branches/branch-1.4: ./ conf/ src/plugin/ src/plugin/index-static/ src/plugin/index-static/src/ src/plugin/index-static/src/java/ src/plugin/index-static/src/java/org/
Author: lewismc Date: Sat Sep 10 23:46:00 2011 New Revision: 1167651 URL: http://svn.apache.org/viewvc?rev=1167651view=rev Log: commit to address NUTCH-940 and update to changes.txt Added: nutch/branches/branch-1.4/src/plugin/index-static/ nutch/branches/branch-1.4/src/plugin/index-static/build.xml nutch/branches/branch-1.4/src/plugin/index-static/ivy.xml nutch/branches/branch-1.4/src/plugin/index-static/plugin.xml nutch/branches/branch-1.4/src/plugin/index-static/src/ nutch/branches/branch-1.4/src/plugin/index-static/src/java/ nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/ nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/ nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/ nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/ nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/ nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java Modified: nutch/branches/branch-1.4/CHANGES.txt nutch/branches/branch-1.4/conf/nutch-default.xml nutch/branches/branch-1.4/src/plugin/build.xml Modified: nutch/branches/branch-1.4/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1167651r1=1167650r2=1167651view=diff == --- nutch/branches/branch-1.4/CHANGES.txt (original) +++ nutch/branches/branch-1.4/CHANGES.txt Sat Sep 10 23:46:00 2011 @@ -2,6 +2,12 @@ Nutch Change Log Release 1.4 - Current development +* NUTCH-940 static field plugin (Claudio Martella via lewismc) + +* NUTCH-914 Implement Apache Project Branding Requirements (lewismc) + +* NUTCH-1095 remove i18n from Nutch site to archive and legacy secton of wiki (lewismc) + * NUTCH-1101 Option to purge db_gone records with updatedb (markus) * NUTCH-1096 Empty (not null) ContentLength results in failure of fetch (Ferdy Galema via jnioche) Modified: nutch/branches/branch-1.4/conf/nutch-default.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/conf/nutch-default.xml?rev=1167651r1=1167650r2=1167651view=diff == --- nutch/branches/branch-1.4/conf/nutch-default.xml (original) +++ nutch/branches/branch-1.4/conf/nutch-default.xml Sat Sep 10 23:46:00 2011 @@ -1050,6 +1050,19 @@ /description /property +!-- index-static plugin properties -- + +property + nameindex-static/name + value/value + description + A simple plugin called at indexing that adds fields with static data. + You can specify a list of fieldname:fieldcontent per nutch job. + It can be useful when collections can't be created by urlpatterns, + like in subcollection, but on a job-basis. + /description +/property + !-- Temporary Hadoop 0.17.x workaround. -- property Modified: nutch/branches/branch-1.4/src/plugin/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/build.xml?rev=1167651r1=1167650r2=1167651view=diff == --- nutch/branches/branch-1.4/src/plugin/build.xml (original) +++ nutch/branches/branch-1.4/src/plugin/build.xml Sat Sep 10 23:46:00 2011 @@ -31,6 +31,7 @@ ant dir=index-basic target=deploy/ ant dir=index-anchor target=deploy/ ant dir=index-more target=deploy/ + ant dir=index-static target=deploy/ ant dir=languageidentifier target=deploy/ ant dir=lib-http target=deploy/ ant dir=lib-nekohtml target=deploy/ @@ -101,6 +102,7 @@ ant dir=index-basic target=clean/ ant dir=index-anchor target=clean/ ant dir=index-more target=clean/ +ant dir=index-static target=clean/ ant dir=languageidentifier target=clean/ ant dir=lib-commons-httpclient target=clean/ ant dir=lib-http target=clean/ Added: nutch/branches/branch-1.4/src/plugin/index-static/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/index-static/build.xml?rev=1167651view=auto == --- nutch/branches/branch-1.4/src/plugin/index-static/build.xml (added) +++ nutch/branches/branch-1.4/src/plugin/index-static/build.xml Sat Sep 10 23:46:00 2011 @@ -0,0 +1,22 @@ +?xml version=1.0? +!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the License); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed
svn commit: r1161287 - in /nutch/site: forrest/src/documentation/content/xdocs/ publish/
Author: lewismc Date: Wed Aug 24 21:31:07 2011 New Revision: 1161287 URL: http://svn.apache.org/viewvc?rev=1161287view=rev Log: commit to address NUTCH-1095 Modified: nutch/site/forrest/src/documentation/content/xdocs/site.xml nutch/site/publish/about.html nutch/site/publish/about.pdf nutch/site/publish/bot.html nutch/site/publish/bot.pdf nutch/site/publish/credits.html nutch/site/publish/credits.pdf nutch/site/publish/index.html nutch/site/publish/index.pdf nutch/site/publish/issue_tracking.html nutch/site/publish/issue_tracking.pdf nutch/site/publish/linkmap.html nutch/site/publish/linkmap.pdf nutch/site/publish/mailing_lists.html nutch/site/publish/mailing_lists.pdf nutch/site/publish/nightly.html nutch/site/publish/nightly.pdf nutch/site/publish/version_control.html nutch/site/publish/version_control.pdf Modified: nutch/site/forrest/src/documentation/content/xdocs/site.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/site.xml?rev=1161287r1=1161286r2=1161287view=diff == --- nutch/site/forrest/src/documentation/content/xdocs/site.xml (original) +++ nutch/site/forrest/src/documentation/content/xdocs/site.xml Wed Aug 24 21:31:07 2011 @@ -46,7 +46,6 @@ See http://forrest.apache.org/docs/linki wikilabel=Wiki href=ext:wiki / tutoriallabel=Tutorial href=ext:tutorial / webmasters label=Robothref=bot.html / -i18nlabel=i18n href=i18n.html / apidocs label=API Docs (1.3)href=apidocs-1.3/index.html/ apidocs label=API Docs (1.2)href=apidocs-1.2/index.html/ apidocs label=API Docs (nightly)href=ext:nightly-api / Modified: nutch/site/publish/about.html URL: http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1161287r1=1161286r2=1161287view=diff == --- nutch/site/publish/about.html (original) +++ nutch/site/publish/about.html Wed Aug 24 21:31:07 2011 @@ -183,9 +183,6 @@ document.write(Last Published: + docu a href=bot.htmlRobot /a /div div class=menuitem -a href=i18n.htmli18n/a -/div -div class=menuitem a href=apidocs-1.3/index.htmlAPI Docs (1.3)/a /div div class=menuitem Modified: nutch/site/publish/about.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1161287r1=1161286r2=1161287view=diff == Binary files - no diff available. Modified: nutch/site/publish/bot.html URL: http://svn.apache.org/viewvc/nutch/site/publish/bot.html?rev=1161287r1=1161286r2=1161287view=diff == --- nutch/site/publish/bot.html (original) +++ nutch/site/publish/bot.html Wed Aug 24 21:31:07 2011 @@ -183,9 +183,6 @@ document.write(Last Published: + docu div class=menupagetitleRobot /div /div div class=menuitem -a href=i18n.htmli18n/a -/div -div class=menuitem a href=apidocs-1.3/index.htmlAPI Docs (1.3)/a /div div class=menuitem Modified: nutch/site/publish/bot.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/bot.pdf?rev=1161287r1=1161286r2=1161287view=diff == Binary files - no diff available. Modified: nutch/site/publish/credits.html URL: http://svn.apache.org/viewvc/nutch/site/publish/credits.html?rev=1161287r1=1161286r2=1161287view=diff == --- nutch/site/publish/credits.html (original) +++ nutch/site/publish/credits.html Wed Aug 24 21:31:07 2011 @@ -183,9 +183,6 @@ document.write(Last Published: + docu a href=bot.htmlRobot /a /div div class=menuitem -a href=i18n.htmli18n/a -/div -div class=menuitem a href=apidocs-1.3/index.htmlAPI Docs (1.3)/a /div div class=menuitem Modified: nutch/site/publish/credits.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/credits.pdf?rev=1161287r1=1161286r2=1161287view=diff == Binary files - no diff available. Modified: nutch/site/publish/index.html URL: http://svn.apache.org/viewvc/nutch/site/publish/index.html?rev=1161287r1=1161286r2=1161287view=diff == --- nutch/site/publish/index.html (original) +++ nutch/site/publish/index.html Wed Aug 24 21:31:07 2011 @@ -183,9 +183,6 @@ document.write(Last Published: + docu a href=bot.htmlRobot /a /div div class=menuitem -a href=i18n.htmli18n/a -/div -div class=menuitem a href=apidocs-1.3/index.htmlAPI Docs (1.3)/a /div div class=menuitem Modified: nutch/site/publish/index.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish
svn commit: r1156683 - in /nutch/branches/branch-1.4: CHANGES.txt src/plugin/languageidentifier/build.xml src/plugin/languageidentifier/plugin.xml
Author: lewismc Date: Thu Aug 11 17:20:06 2011 New Revision: 1156683 URL: http://svn.apache.org/viewvc?rev=1156683view=rev Log: commit to address NUTCH-623 and NUTCH-914 as well as update to changes.txt Modified: nutch/branches/branch-1.4/CHANGES.txt nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml Modified: nutch/branches/branch-1.4/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1156683r1=1156682r2=1156683view=diff == --- nutch/branches/branch-1.4/CHANGES.txt (original) +++ nutch/branches/branch-1.4/CHANGES.txt Thu Aug 11 17:20:06 2011 @@ -2,6 +2,10 @@ Nutch Change Log Release 1.4 - Current development +* NUTCH-914 Implement Apache Project Branding Requirements (lewismc via jnioche) + +*NUTCH-623 Change plugin source directory languageidentifier to language-identifier (lewismc) + * NUTCH-1069 Readlinkdb broken on Hadoop 0.20 (markus) * NUTCH-1044 Redirected URLs and possibly all of their outlinked URLs have invalid scores (jnioche) Modified: nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml?rev=1156683r1=1156682r2=1156683view=diff == --- nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml (original) +++ nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml Thu Aug 11 17:20:06 2011 @@ -15,7 +15,7 @@ See the License for the specific language governing permissions and limitations under the License. -- -project name=language-identifier default=jar-core +project name=languageidentifier default=jar-core import file=../build-plugin.xml/ Modified: nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml?rev=1156683r1=1156682r2=1156683view=diff == --- nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml (original) +++ nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml Thu Aug 11 17:20:06 2011 @@ -16,7 +16,7 @@ limitations under the License. -- plugin - id=language-identifier + id=languageidentifier name=Language Identification Parser/Filter version=1.0.0 provider-name=nutch.org
svn commit: r1156692 - in /nutch/trunk: CHANGES.txt src/plugin/languageidentifier/build.xml src/plugin/languageidentifier/plugin.xml
Author: lewismc Date: Thu Aug 11 17:25:51 2011 New Revision: 1156692 URL: http://svn.apache.org/viewvc?rev=1156692view=rev Log: commit to address NUTCH-623 and changes.txt Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/plugin/languageidentifier/build.xml nutch/trunk/src/plugin/languageidentifier/plugin.xml Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1156692r1=1156691r2=1156692view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Thu Aug 11 17:25:51 2011 @@ -2,6 +2,10 @@ Nutch Change Log Release 2.0 - Current Development +* NUTCH-914 Implement Apache Project Branding Requirements (lewismc via jnioche) + +* NUTCH-623 Change plugin source directory languageidentifier to language-identifier (lewismc) + * NUTCH-1065 New mvn.template (lewismc) * NUTCH-1045 MimeUtil to rely on default config provided by Tika (jnioche) Modified: nutch/trunk/src/plugin/languageidentifier/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/languageidentifier/build.xml?rev=1156692r1=1156691r2=1156692view=diff == --- nutch/trunk/src/plugin/languageidentifier/build.xml (original) +++ nutch/trunk/src/plugin/languageidentifier/build.xml Thu Aug 11 17:25:51 2011 @@ -15,7 +15,7 @@ See the License for the specific language governing permissions and limitations under the License. -- -project name=language-identifier default=jar-core +project name=languageidentifier default=jar-core import file=../build-plugin.xml/ Modified: nutch/trunk/src/plugin/languageidentifier/plugin.xml URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/languageidentifier/plugin.xml?rev=1156692r1=1156691r2=1156692view=diff == --- nutch/trunk/src/plugin/languageidentifier/plugin.xml (original) +++ nutch/trunk/src/plugin/languageidentifier/plugin.xml Thu Aug 11 17:25:51 2011 @@ -16,7 +16,7 @@ limitations under the License. -- plugin - id=language-identifier + id=languageidentifier name=Language Identification Parser/Filter version=1.0.0 provider-name=nutch.org
svn commit: r1156711 - in /nutch/branches/branch-1.4: CHANGES.txt src/plugin/languageidentifier/build.xml src/plugin/languageidentifier/plugin.xml
Author: lewismc Date: Thu Aug 11 18:16:31 2011 New Revision: 1156711 URL: http://svn.apache.org/viewvc?rev=1156711view=rev Log: reverting changes made by commit of NUTCH-623 as the patch breaks tests Modified: nutch/branches/branch-1.4/CHANGES.txt nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml Modified: nutch/branches/branch-1.4/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1156711r1=1156710r2=1156711view=diff == --- nutch/branches/branch-1.4/CHANGES.txt (original) +++ nutch/branches/branch-1.4/CHANGES.txt Thu Aug 11 18:16:31 2011 @@ -4,8 +4,6 @@ Release 1.4 - Current development * NUTCH-914 Implement Apache Project Branding Requirements (lewismc via jnioche) -*NUTCH-623 Change plugin source directory languageidentifier to language-identifier (lewismc) - * NUTCH-1069 Readlinkdb broken on Hadoop 0.20 (markus) * NUTCH-1044 Redirected URLs and possibly all of their outlinked URLs have invalid scores (jnioche) Modified: nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml?rev=1156711r1=1156710r2=1156711view=diff == --- nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml (original) +++ nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml Thu Aug 11 18:16:31 2011 @@ -15,7 +15,7 @@ See the License for the specific language governing permissions and limitations under the License. -- -project name=languageidentifier default=jar-core +project name=language-identifier default=jar-core import file=../build-plugin.xml/ Modified: nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml?rev=1156711r1=1156710r2=1156711view=diff == --- nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml (original) +++ nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml Thu Aug 11 18:16:31 2011 @@ -16,7 +16,7 @@ limitations under the License. -- plugin - id=languageidentifier + id=language-identifier name=Language Identification Parser/Filter version=1.0.0 provider-name=nutch.org
svn commit: r1156712 - in /nutch/trunk: CHANGES.txt src/plugin/languageidentifier/build.xml src/plugin/languageidentifier/plugin.xml
Author: lewismc Date: Thu Aug 11 18:18:27 2011 New Revision: 1156712 URL: http://svn.apache.org/viewvc?rev=1156712view=rev Log: commit to revert changes by NUTCH-623 which broke tests. Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/plugin/languageidentifier/build.xml nutch/trunk/src/plugin/languageidentifier/plugin.xml Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1156712r1=1156711r2=1156712view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Thu Aug 11 18:18:27 2011 @@ -4,8 +4,6 @@ Release 2.0 - Current Development * NUTCH-914 Implement Apache Project Branding Requirements (lewismc via jnioche) -* NUTCH-623 Change plugin source directory languageidentifier to language-identifier (lewismc) - * NUTCH-1065 New mvn.template (lewismc) * NUTCH-1045 MimeUtil to rely on default config provided by Tika (jnioche) Modified: nutch/trunk/src/plugin/languageidentifier/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/languageidentifier/build.xml?rev=1156712r1=1156711r2=1156712view=diff == --- nutch/trunk/src/plugin/languageidentifier/build.xml (original) +++ nutch/trunk/src/plugin/languageidentifier/build.xml Thu Aug 11 18:18:27 2011 @@ -15,7 +15,7 @@ See the License for the specific language governing permissions and limitations under the License. -- -project name=languageidentifier default=jar-core +project name=language-identifier default=jar-core import file=../build-plugin.xml/ Modified: nutch/trunk/src/plugin/languageidentifier/plugin.xml URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/languageidentifier/plugin.xml?rev=1156712r1=1156711r2=1156712view=diff == --- nutch/trunk/src/plugin/languageidentifier/plugin.xml (original) +++ nutch/trunk/src/plugin/languageidentifier/plugin.xml Thu Aug 11 18:18:27 2011 @@ -16,7 +16,7 @@ limitations under the License. -- plugin - id=languageidentifier + id=language-identifier name=Language Identification Parser/Filter version=1.0.0 provider-name=nutch.org
svn commit: r1156101 - /nutch/trunk/doap.rdf
Author: lewismc Date: Wed Aug 10 10:47:24 2011 New Revision: 1156101 URL: http://svn.apache.org/viewvc?rev=1156101view=rev Log: commit to address NUTCH-920 adding trunk 2.0 DOAP file to svn. Added: nutch/trunk/doap.rdf Added: nutch/trunk/doap.rdf URL: http://svn.apache.org/viewvc/nutch/trunk/doap.rdf?rev=1156101view=auto == --- nutch/trunk/doap.rdf (added) +++ nutch/trunk/doap.rdf Wed Aug 10 10:47:24 2011 @@ -0,0 +1,57 @@ +?xml version=1.0? +?xml-stylesheet type=text/xsl? +rdf:RDF xml:lang=en + xmlns=http://usefulinc.com/ns/doap#; + xmlns:rdf=http://www.w3.org/1999/02/22-rdf-syntax-ns#; + xmlns:asfext=http://projects.apache.org/ns/asfext#; + xmlns:foaf=http://xmlns.com/foaf/0.1/; +!-- +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the License); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an AS IS BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +-- + Project rdf:about=http://nutch.apache.org; +created2011-07-21/created +license rdf:resource=http://usefulinc.com/doap/licenses/asl20; / +nameApache Nutch/name +homepage rdf:resource=http://nutch.apache.org; / +asfext:pmc rdf:resource=http://nutch.apache.org; / +shortdescNutch 2.0 is a beeding edge (trunk) development of the Apache Nutch web search software./shortdesc +descriptionApache Nutch 2.0 maintains a refined architecture by delegating searching, parsing, and data storage to other software projects. In particular the storage layer has been delegated to the object relational mapping framework Gora (Apache Incubator) enabling the focus of Nutch 2.0 to be entirely on web crawling. This logic promotes Nutch 2.0 as a simpler, focussed web crawler enabling easy integration with other resources. /description +bug-database rdf:resource=http://issues.apache.org/jira/browse/NUTCH; / +mailing-list rdf:resource=http://www.mail-archive.com/dev%40nutch.apache.org/; / +download-page rdf:resource=http://svn.apache.org/repos/asf/nutch/trunk/; / +programming-languageJava/programming-language +category rdf:resource=http://projects.apache.org/category/web-framework; / +release + Version +nameNutch 2.0 Trunk/name +createdtbc/created +revision2.0/revision + /Version +/release +repository + SVNRepository +location rdf:resource=https://svn.apache.org/repos/asf/nutch/trunk// +browse rdf:resource=http://svn.apache.org/viewvc/nutch/trunk// + /SVNRepository +/repository +maintainer + foaf:Person +foaf:nameNutch PMC/foaf:name + foaf:mbox rdf:resource=mailto:d...@nutch.apache.org/ + /foaf:Person +/maintainer + /Project +/rdf:RDF
svn commit: r1153833 - in /nutch/trunk: CHANGES.txt conf/domain-urlfilter.txt ivy/mvn.template
Author: lewismc Date: Thu Aug 4 10:24:04 2011 New Revision: 1153833 URL: http://svn.apache.org/viewvc?rev=1153833view=rev Log: commit to address NUTCH-1065 - New mvn.template and update of changes.txt Modified: nutch/trunk/CHANGES.txt nutch/trunk/conf/domain-urlfilter.txt nutch/trunk/ivy/mvn.template Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1153833r1=1153832r2=1153833view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Thu Aug 4 10:24:04 2011 @@ -2,6 +2,8 @@ Nutch Change Log Release 2.0 - Current Development +* NUTCH-1065 New mvn.template (lewismc) + * NUTCH-1045 MimeUtil to rely on default config provided by Tika (jnioche) * NUTCH-1037 Option to deduplicate anchors prior to indexing (markus) Modified: nutch/trunk/conf/domain-urlfilter.txt URL: http://svn.apache.org/viewvc/nutch/trunk/conf/domain-urlfilter.txt?rev=1153833r1=1153832r2=1153833view=diff == --- nutch/trunk/conf/domain-urlfilter.txt (original) +++ nutch/trunk/conf/domain-urlfilter.txt Thu Aug 4 10:24:04 2011 @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -# config file for urlfilter-domsin plugin \ No newline at end of file +# config file for urlfilter-domain plugin Modified: nutch/trunk/ivy/mvn.template URL: http://svn.apache.org/viewvc/nutch/trunk/ivy/mvn.template?rev=1153833r1=1153832r2=1153833view=diff == --- nutch/trunk/ivy/mvn.template (original) +++ nutch/trunk/ivy/mvn.template Thu Aug 4 10:24:04 2011 @@ -64,11 +64,6 @@ emailjnio...@apache.org/email /developer developer - idotis/id - nameOtis GospodnetiÄ/name - emailo...@apache.org/email - /developer - developer idsiren/id nameSami Siren/name emailsi...@apache.org/email @@ -83,6 +78,11 @@ nameAlexis Detlegrode/name emailale...@apache.org/email /developer + developer + idlewismc/id + nameLewis John McGibbney/name + emaillewi...@apache.org/email + /developer /developers build sourceDirectorysrc/java/sourceDirectory
svn commit: r1153108 - in /nutch/site: forrest/src/documentation/content/xdocs/ publish/ publish/skin/images/
Author: lewismc Date: Tue Aug 2 12:43:01 2011 New Revision: 1153108 URL: http://svn.apache.org/viewvc?rev=1153108view=rev Log: commit to address NUTCH-917 Modified: nutch/site/forrest/src/documentation/content/xdocs/index.xml nutch/site/forrest/src/documentation/content/xdocs/site.xml nutch/site/publish/about.html nutch/site/publish/about.pdf nutch/site/publish/bot.html nutch/site/publish/bot.pdf nutch/site/publish/credits.html nutch/site/publish/credits.pdf nutch/site/publish/i18n.html nutch/site/publish/i18n.pdf nutch/site/publish/index.html nutch/site/publish/index.pdf nutch/site/publish/issue_tracking.html nutch/site/publish/issue_tracking.pdf nutch/site/publish/linkmap.html nutch/site/publish/linkmap.pdf nutch/site/publish/mailing_lists.html nutch/site/publish/mailing_lists.pdf nutch/site/publish/nightly.html nutch/site/publish/nightly.pdf nutch/site/publish/skin/images/rc-b-l-15-1body-2menu-3menu.png nutch/site/publish/skin/images/rc-b-r-15-1body-2menu-3menu.png nutch/site/publish/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png nutch/site/publish/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png nutch/site/publish/skin/images/rc-t-r-15-1body-2menu-3menu.png nutch/site/publish/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png nutch/site/publish/version_control.html nutch/site/publish/version_control.pdf Modified: nutch/site/forrest/src/documentation/content/xdocs/index.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/index.xml?rev=1153108r1=1153107r2=1153108view=diff == --- nutch/site/forrest/src/documentation/content/xdocs/index.xml (original) +++ nutch/site/forrest/src/documentation/content/xdocs/index.xml Tue Aug 2 12:43:01 2011 @@ -22,7 +22,7 @@ header titleWelcome to Apache Nutch#174;/title abstractApache Nutch is an open source web-search software project. Nutch is a project of the a href=http://www.apache.org/;Apache Software Foundation/a -and is part of the larger Apache community of developers and users. More about Nutch can be found a href=about.htmlhere/a. +and is part of the larger Apache community of developers and users. More about Nutch can be found a href=./about.htmlhere./a /abstract /header Modified: nutch/site/forrest/src/documentation/content/xdocs/site.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/site.xml?rev=1153108r1=1153107r2=1153108view=diff == --- nutch/site/forrest/src/documentation/content/xdocs/site.xml (original) +++ nutch/site/forrest/src/documentation/content/xdocs/site.xml Tue Aug 2 12:43:01 2011 @@ -34,9 +34,11 @@ See http://forrest.apache.org/docs/linki newslabel=News href=index.html / about label=Abouthref=about.html / credits label=Credits href=credits.html / +thanks label=Thanks href=ext:thanks / store label=Buy Stuffhref=ext:store / sponsorlabel=Sponsorship href=ext:sponsor / licenselabel=License href=ext:license / +security label=Security href=ext:security / /project docs label=Documentation @@ -79,6 +81,8 @@ See http://forrest.apache.org/docs/linki release href=http://www.apache.org/dyn/closer.cgi/nutch// license href=http://www.apache.org/licenses// sponsor href=http://www.apache.org/foundation/sponsorship.html; / +thanks href=http://www.apache.org/foundation/thanks.html; / +security href=http://www.apache.org/security/; / /external-refs /site Modified: nutch/site/publish/about.html URL: http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1153108r1=1153107r2=1153108view=diff == --- nutch/site/publish/about.html (original) +++ nutch/site/publish/about.html Tue Aug 2 12:43:01 2011 @@ -153,6 +153,9 @@ document.write(Last Published: + docu a href=credits.htmlCredits/a /div div class=menuitem +a href=http://www.apache.org/foundation/thanks.html;Thanks/a +/div +div class=menuitem a href=http://www.cafepress.com/nutch/;Buy Stuff/a /div div class=menuitem @@ -161,6 +164,9 @@ document.write(Last Published: + docu div class=menuitem a href=http://www.apache.org/licenses/;License/a /div
svn commit: r1149508 - in /nutch/site: forrest/src/documentation/resources/images/nutch_logo_tm.gif publish/images/nutch_logo_tm.gif
Author: lewismc Date: Fri Jul 22 09:18:11 2011 New Revision: 1149508 URL: http://svn.apache.org/viewvc?rev=1149508view=rev Log: new Nutch 'tm' logo commit Added: nutch/site/forrest/src/documentation/resources/images/nutch_logo_tm.gif (with props) nutch/site/publish/images/nutch_logo_tm.gif (with props) Added: nutch/site/forrest/src/documentation/resources/images/nutch_logo_tm.gif URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/resources/images/nutch_logo_tm.gif?rev=1149508view=auto == Binary file - no diff available. Propchange: nutch/site/forrest/src/documentation/resources/images/nutch_logo_tm.gif -- svn:mime-type = application/octet-stream Added: nutch/site/publish/images/nutch_logo_tm.gif URL: http://svn.apache.org/viewvc/nutch/site/publish/images/nutch_logo_tm.gif?rev=1149508view=auto == Binary file - no diff available. Propchange: nutch/site/publish/images/nutch_logo_tm.gif -- svn:mime-type = application/octet-stream
svn commit: r1149641 - /nutch/branches/branch-1.4/conf/domain-urlfilter.txt
Author: lewismc Date: Fri Jul 22 15:50:06 2011 New Revision: 1149641 URL: http://svn.apache.org/viewvc?rev=1149641view=rev Log: commit to address NUTCH-1066 - very trivial update of domain-urlfilter.txt Modified: nutch/branches/branch-1.4/conf/domain-urlfilter.txt Modified: nutch/branches/branch-1.4/conf/domain-urlfilter.txt URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/conf/domain-urlfilter.txt?rev=1149641r1=1149640r2=1149641view=diff == --- nutch/branches/branch-1.4/conf/domain-urlfilter.txt (original) +++ nutch/branches/branch-1.4/conf/domain-urlfilter.txt Fri Jul 22 15:50:06 2011 @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -# config file for urlfilter-domsin plugin \ No newline at end of file +# config file for urlfilter-domain plugin
svn commit: r1149263 - in /nutch/site: forrest/src/documentation/skinconf.xml publish/doap.rdf
Author: lewismc Date: Thu Jul 21 16:22:58 2011 New Revision: 1149263 URL: http://svn.apache.org/viewvc?rev=1149263view=rev Log: commit to address NUTCH-919 and NUTCH-920, hopefully this resolves NUTCH-914 for the time being. Modified: nutch/site/forrest/src/documentation/skinconf.xml nutch/site/publish/doap.rdf Modified: nutch/site/forrest/src/documentation/skinconf.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/skinconf.xml?rev=1149263r1=1149262r2=1149263view=diff == --- nutch/site/forrest/src/documentation/skinconf.xml (original) +++ nutch/site/forrest/src/documentation/skinconf.xml Thu Jul 21 16:22:58 2011 @@ -68,7 +68,7 @@ which will be used to configure the chos project-nameNutch/project-name project-descriptionOpen Source Web Search Software/project-description project-urlhttp://nutch.apache.org//project-url - project-logoimages/nutch-logo.gif/project-logo + project-logoimages/nutch-logo.png/project-logo !-- group logo -- group-nameApache/group-name Modified: nutch/site/publish/doap.rdf URL: http://svn.apache.org/viewvc/nutch/site/publish/doap.rdf?rev=1149263r1=1149262r2=1149263view=diff == --- nutch/site/publish/doap.rdf (original) +++ nutch/site/publish/doap.rdf Thu Jul 21 16:22:58 2011 @@ -6,31 +6,45 @@ xmlns:asfext=http://projects.apache.org/ns/asfext#; xmlns:foaf=http://xmlns.com/foaf/0.1/; !-- - === - - Copyright (c) 2006 The Apache Software Foundation. - All rights reserved. - - === +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the License); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an AS IS BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. -- - Project rdf:about=http://nutch.apache.org/; -created2006-02-28/created + Project rdf:about=http://nutch.apache.org; +created2011-07-21/created license rdf:resource=http://usefulinc.com/doap/licenses/asl20; / nameApache Nutch/name -homepage rdf:resource=http://nutch.apache.org/; / +homepage rdf:resource=http://nutch.apache.org; / asfext:pmc rdf:resource=http://nutch.apache.org; / -shortdescNutch is the open-source search engine./shortdesc -descriptionNutch is open source web-search software. It builds -on Lucene Java and Hadoop, adding web-specifics, such as a -crawler, a link-graph database, parsers for HTML and other -document formats, etc. -/description +shortdescApache Nutch is an open source web-search software project./shortdesc +descriptionApache Nutch is an open source web-search software project. Stemming from Apache Lucene, it now builds on Apache Solr adding web-specifics, such as a crawler, a link-graph database and parsing support handled by Apache Tika for HTML and and array other document formats. + +Apache Nutch can run on a single machine, but gains a lot of its strength from running in a Hadoop cluster + +The system can be enhanced (eg other document formats can be parsed) using a highly flexible, easily extensible and thoroughly maintained plugin infrastructure./description bug-database rdf:resource=http://issues.apache.org/jira/browse/NUTCH; / mailing-list rdf:resource=http://nutch.apache.org/mailing_lists.html; / -download-page rdf:resource=http://www.apache.org/dyn/closer.cgi/lucene/nutch/; / +download-page rdf:resource=http://www.apache.org/dyn/closer.cgi/nutch/; / programming-languageJava/programming-language category rdf:resource=http://projects.apache.org/category/web-framework; / -wiki rdf:resource=http://wiki.apache.org/nutch// +release + Version +nameApache Nutch 1.3/name +created2011-06-07/created +revision1.3/revision + /Version +/release release Version branchbranch-1.0/branch @@ -73,14 +87,14 @@ /release repository SVNRepository -location rdf:resource=http://svn.apache.org/repos/asf/nutch// -browse rdf:resource=http://svn.apache.org/viewcvs.cgi/nutch// +location rdf:resource=https
svn commit: r1149267 - in /nutch/site/publish: ./ skin/images/
Author: lewismc Date: Thu Jul 21 16:29:04 2011 New Revision: 1149267 URL: http://svn.apache.org/viewvc?rev=1149267view=rev Log: to incorporate new logo within site Modified: nutch/site/publish/about.html nutch/site/publish/about.pdf nutch/site/publish/bot.html nutch/site/publish/bot.pdf nutch/site/publish/credits.html nutch/site/publish/credits.pdf nutch/site/publish/i18n.html nutch/site/publish/i18n.pdf nutch/site/publish/index.html nutch/site/publish/index.pdf nutch/site/publish/issue_tracking.html nutch/site/publish/issue_tracking.pdf nutch/site/publish/linkmap.html nutch/site/publish/linkmap.pdf nutch/site/publish/mailing_lists.html nutch/site/publish/mailing_lists.pdf nutch/site/publish/nightly.html nutch/site/publish/nightly.pdf nutch/site/publish/skin/images/rc-b-l-15-1body-2menu-3menu.png nutch/site/publish/skin/images/rc-b-r-15-1body-2menu-3menu.png nutch/site/publish/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png nutch/site/publish/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png nutch/site/publish/skin/images/rc-t-r-15-1body-2menu-3menu.png nutch/site/publish/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png nutch/site/publish/version_control.html nutch/site/publish/version_control.pdf Modified: nutch/site/publish/about.html URL: http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1149267r1=1149266r2=1149267view=diff == --- nutch/site/publish/about.html (original) +++ nutch/site/publish/about.html Thu Jul 21 16:29:04 2011 @@ -39,7 +39,7 @@ |start Project Logo +-- div class=projectlogo -a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo.gif title=Open Source Web Search Software/a +a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo.png title=Open Source Web Search Software/a /div !--+ |end Project Logo Modified: nutch/site/publish/about.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1149267r1=1149266r2=1149267view=diff == Binary files - no diff available. Modified: nutch/site/publish/bot.html URL: http://svn.apache.org/viewvc/nutch/site/publish/bot.html?rev=1149267r1=1149266r2=1149267view=diff == --- nutch/site/publish/bot.html (original) +++ nutch/site/publish/bot.html Thu Jul 21 16:29:04 2011 @@ -39,7 +39,7 @@ |start Project Logo +-- div class=projectlogo -a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo.gif title=Open Source Web Search Software/a +a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo.png title=Open Source Web Search Software/a /div !--+ |end Project Logo Modified: nutch/site/publish/bot.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/bot.pdf?rev=1149267r1=1149266r2=1149267view=diff == Binary files - no diff available. Modified: nutch/site/publish/credits.html URL: http://svn.apache.org/viewvc/nutch/site/publish/credits.html?rev=1149267r1=1149266r2=1149267view=diff == --- nutch/site/publish/credits.html (original) +++ nutch/site/publish/credits.html Thu Jul 21 16:29:04 2011 @@ -39,7 +39,7 @@ |start Project Logo +-- div class=projectlogo -a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo.gif title=Open Source Web Search Software/a +a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo.png title=Open Source Web Search Software/a /div !--+ |end Project Logo Modified: nutch/site/publish/credits.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/credits.pdf?rev=1149267r1=1149266r2=1149267view=diff == Binary files - no diff available. Modified: nutch/site/publish/i18n.html URL: http://svn.apache.org/viewvc/nutch/site/publish/i18n.html?rev=1149267r1=1149266r2=1149267view=diff == --- nutch/site/publish/i18n.html (original) +++ nutch/site/publish/i18n.html Thu Jul 21 16:29:04 2011 @@ -39,7 +39,7 @@ |start Project Logo +-- div class=projectlogo -a href=http
svn commit: r1149269 - in /nutch/site: forrest/src/documentation/ publish/
Author: lewismc Date: Thu Jul 21 16:40:09 2011 New Revision: 1149269 URL: http://svn.apache.org/viewvc?rev=1149269view=rev Log: changed logo to .gif file as .png is not accepted or liked by forrest I don't think. Modified: nutch/site/forrest/src/documentation/skinconf.xml nutch/site/publish/about.html nutch/site/publish/about.pdf nutch/site/publish/bot.html nutch/site/publish/bot.pdf nutch/site/publish/credits.html nutch/site/publish/credits.pdf nutch/site/publish/i18n.html nutch/site/publish/i18n.pdf nutch/site/publish/index.html nutch/site/publish/index.pdf nutch/site/publish/issue_tracking.html nutch/site/publish/issue_tracking.pdf nutch/site/publish/linkmap.html nutch/site/publish/linkmap.pdf nutch/site/publish/mailing_lists.html nutch/site/publish/mailing_lists.pdf nutch/site/publish/nightly.html nutch/site/publish/nightly.pdf nutch/site/publish/version_control.html nutch/site/publish/version_control.pdf Modified: nutch/site/forrest/src/documentation/skinconf.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/skinconf.xml?rev=1149269r1=1149268r2=1149269view=diff == --- nutch/site/forrest/src/documentation/skinconf.xml (original) +++ nutch/site/forrest/src/documentation/skinconf.xml Thu Jul 21 16:40:09 2011 @@ -68,7 +68,7 @@ which will be used to configure the chos project-nameNutch/project-name project-descriptionOpen Source Web Search Software/project-description project-urlhttp://nutch.apache.org//project-url - project-logoimages/nutch-logo.png/project-logo + project-logoimages/nutch-logo-tm.gif/project-logo !-- group logo -- group-nameApache/group-name Modified: nutch/site/publish/about.html URL: http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1149269r1=1149268r2=1149269view=diff == --- nutch/site/publish/about.html (original) +++ nutch/site/publish/about.html Thu Jul 21 16:40:09 2011 @@ -39,7 +39,7 @@ |start Project Logo +-- div class=projectlogo -a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo.png title=Open Source Web Search Software/a +a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo-tm.gif title=Open Source Web Search Software/a /div !--+ |end Project Logo Modified: nutch/site/publish/about.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1149269r1=1149268r2=1149269view=diff == Binary files - no diff available. Modified: nutch/site/publish/bot.html URL: http://svn.apache.org/viewvc/nutch/site/publish/bot.html?rev=1149269r1=1149268r2=1149269view=diff == --- nutch/site/publish/bot.html (original) +++ nutch/site/publish/bot.html Thu Jul 21 16:40:09 2011 @@ -39,7 +39,7 @@ |start Project Logo +-- div class=projectlogo -a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo.png title=Open Source Web Search Software/a +a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo-tm.gif title=Open Source Web Search Software/a /div !--+ |end Project Logo Modified: nutch/site/publish/bot.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/bot.pdf?rev=1149269r1=1149268r2=1149269view=diff == Binary files - no diff available. Modified: nutch/site/publish/credits.html URL: http://svn.apache.org/viewvc/nutch/site/publish/credits.html?rev=1149269r1=1149268r2=1149269view=diff == --- nutch/site/publish/credits.html (original) +++ nutch/site/publish/credits.html Thu Jul 21 16:40:09 2011 @@ -39,7 +39,7 @@ |start Project Logo +-- div class=projectlogo -a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo.png title=Open Source Web Search Software/a +a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo-tm.gif title=Open Source Web Search Software/a /div !--+ |end Project Logo Modified: nutch/site/publish/credits.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/credits.pdf?rev=1149269r1=1149268r2=1149269view=diff == Binary files - no diff available. Modified: nutch/site/publish/i18n.html URL: http://svn.apache.org/viewvc/nutch/site/publish/i18n.html?rev=1149269r1=1149268r2=1149269view=diff == --- nutch/site/publish/i18n.html (original) +++ nutch/site/publish/i18n.html Thu Jul 21 16:40:09
svn commit: r1149280 - in /nutch/site: forrest/src/documentation/ publish/
Author: lewismc Date: Thu Jul 21 17:51:40 2011 New Revision: 1149280 URL: http://svn.apache.org/viewvc?rev=1149280view=rev Log: rebuild to try and incorporate new logo... Modified: nutch/site/forrest/src/documentation/skinconf.xml nutch/site/publish/about.html nutch/site/publish/about.pdf nutch/site/publish/bot.html nutch/site/publish/bot.pdf nutch/site/publish/credits.html nutch/site/publish/credits.pdf nutch/site/publish/i18n.html nutch/site/publish/i18n.pdf nutch/site/publish/index.html nutch/site/publish/index.pdf nutch/site/publish/issue_tracking.html nutch/site/publish/issue_tracking.pdf nutch/site/publish/linkmap.html nutch/site/publish/linkmap.pdf nutch/site/publish/mailing_lists.html nutch/site/publish/mailing_lists.pdf nutch/site/publish/nightly.html nutch/site/publish/nightly.pdf nutch/site/publish/version_control.html nutch/site/publish/version_control.pdf Modified: nutch/site/forrest/src/documentation/skinconf.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/skinconf.xml?rev=1149280r1=1149279r2=1149280view=diff == --- nutch/site/forrest/src/documentation/skinconf.xml (original) +++ nutch/site/forrest/src/documentation/skinconf.xml Thu Jul 21 17:51:40 2011 @@ -68,7 +68,7 @@ which will be used to configure the chos project-nameNutch/project-name project-descriptionOpen Source Web Search Software/project-description project-urlhttp://nutch.apache.org//project-url - project-logoimages/nutch-logo-tm.gif/project-logo + project-logoimages/nutch_logo_tm.gif/project-logo !-- group logo -- group-nameApache/group-name Modified: nutch/site/publish/about.html URL: http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1149280r1=1149279r2=1149280view=diff == --- nutch/site/publish/about.html (original) +++ nutch/site/publish/about.html Thu Jul 21 17:51:40 2011 @@ -39,7 +39,7 @@ |start Project Logo +-- div class=projectlogo -a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo-tm.gif title=Open Source Web Search Software/a +a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch_logo_tm.gif title=Open Source Web Search Software/a /div !--+ |end Project Logo Modified: nutch/site/publish/about.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1149280r1=1149279r2=1149280view=diff == Binary files - no diff available. Modified: nutch/site/publish/bot.html URL: http://svn.apache.org/viewvc/nutch/site/publish/bot.html?rev=1149280r1=1149279r2=1149280view=diff == --- nutch/site/publish/bot.html (original) +++ nutch/site/publish/bot.html Thu Jul 21 17:51:40 2011 @@ -39,7 +39,7 @@ |start Project Logo +-- div class=projectlogo -a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo-tm.gif title=Open Source Web Search Software/a +a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch_logo_tm.gif title=Open Source Web Search Software/a /div !--+ |end Project Logo Modified: nutch/site/publish/bot.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/bot.pdf?rev=1149280r1=1149279r2=1149280view=diff == Binary files - no diff available. Modified: nutch/site/publish/credits.html URL: http://svn.apache.org/viewvc/nutch/site/publish/credits.html?rev=1149280r1=1149279r2=1149280view=diff == --- nutch/site/publish/credits.html (original) +++ nutch/site/publish/credits.html Thu Jul 21 17:51:40 2011 @@ -39,7 +39,7 @@ |start Project Logo +-- div class=projectlogo -a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch-logo-tm.gif title=Open Source Web Search Software/a +a href=http://nutch.apache.org/;img class=logoImage alt=Nutch src=images/nutch_logo_tm.gif title=Open Source Web Search Software/a /div !--+ |end Project Logo Modified: nutch/site/publish/credits.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/credits.pdf?rev=1149280r1=1149279r2=1149280view=diff == Binary files - no diff available. Modified: nutch/site/publish/i18n.html URL: http://svn.apache.org/viewvc/nutch/site/publish/i18n.html?rev=1149280r1=1149279r2=1149280view=diff == --- nutch/site/publish/i18n.html (original) +++ nutch/site/publish/i18n.html Thu Jul 21 17:51:40 2011 @@ -39,7 +39,7
svn commit: r1148482 - in /nutch/site/forrest/src/documentation/content/xdocs: about.xml bot.xml credits.xml index.xml mailing_lists.xml site.xml
Author: lewismc Date: Tue Jul 19 18:42:33 2011 New Revision: 1148482 URL: http://svn.apache.org/viewvc?rev=1148482view=rev Log: commit to ensure site src has been committed alongside publish Modified: nutch/site/forrest/src/documentation/content/xdocs/about.xml nutch/site/forrest/src/documentation/content/xdocs/bot.xml nutch/site/forrest/src/documentation/content/xdocs/credits.xml nutch/site/forrest/src/documentation/content/xdocs/index.xml nutch/site/forrest/src/documentation/content/xdocs/mailing_lists.xml nutch/site/forrest/src/documentation/content/xdocs/site.xml Modified: nutch/site/forrest/src/documentation/content/xdocs/about.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/about.xml?rev=1148482r1=1148481r2=1148482view=diff == --- nutch/site/forrest/src/documentation/content/xdocs/about.xml (original) +++ nutch/site/forrest/src/documentation/content/xdocs/about.xml Tue Jul 19 18:42:33 2011 @@ -20,24 +20,26 @@ document header -titleAbout Nutch/title +titleAbout Apache Nutch/title /header body section - titleOverview/title pNutch is open source web-search - software. It builds on a href=ext:luceneLucene and Solr/a, - adding web-specifics, such as a crawler, a link-graph database, - parsers for HTML and other document formats, etc./p + titleOverview/title pApache Nutch is an open source web-search + software project. Stemming from a href=ext:luceneApache Lucene/a, it now builds + on a href=ext:solrApache Solr/a adding web-specifics, such as a crawler, + a link-graph database and parsing support handled by a href=ext:tikaApache Tika/a + for HTML and and array other document formats./p - pNutch can run on a single machine, but gains a lot of its + pApache Nutch can run on a single machine, but gains a lot of its strength from running in a a href=ext:hadoopHadoop cluster/a/p pThe system can be enhanced (eg other document formats can be - parsed) using a plugin mechanism./p + parsed) using a highly flexible, easily extensible and thoroughly maintained + plugin infrastructure./p - pFor more information about Nutch, please see the a + pFor more information about Apache Nutch, please see the a href=ext:wikiNutch wiki./a/p Modified: nutch/site/forrest/src/documentation/content/xdocs/bot.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/bot.xml?rev=1148482r1=1148481r2=1148482view=diff == --- nutch/site/forrest/src/documentation/content/xdocs/bot.xml (original) +++ nutch/site/forrest/src/documentation/content/xdocs/bot.xml Tue Jul 19 18:42:33 2011 @@ -18,7 +18,7 @@ document header -titleNutch robot/title +titleApache Nutch robot/title /header body Modified: nutch/site/forrest/src/documentation/content/xdocs/credits.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/credits.xml?rev=1148482r1=1148481r2=1148482view=diff == --- nutch/site/forrest/src/documentation/content/xdocs/credits.xml (original) +++ nutch/site/forrest/src/documentation/content/xdocs/credits.xml Tue Jul 19 18:42:33 2011 @@ -21,7 +21,7 @@ document header - titleNutch credits/title + titleApache Nutch Credits/title /header body @@ -35,9 +35,9 @@ liDogacan Güney/li lia href=http://www.digitalpebble.com/;Julien Nioche/a/li lia href=http://openindex.io/;Markus Jelsma/a/li - lia href=http://www.sematext.com/;Otis GospodnetiÄ/a/li lia href=http://people.apache.org/~siren;Sami Siren/a/li lia href=http://techvineyard.blogspot.com/;Alexis de Tréglodé/a/li + liLewis John McGibbney/li /ul /section @@ -49,6 +49,7 @@ liJohn Xing/li lia href=http://www.eecs.umich.edu/~michjc/;Mike Cafarella/a/li liPiotr Kosiorowski/li + lia href=http://www.sematext.com/;Otis GospodnetiÄ/a/li /ul /section @@ -76,6 +77,9 @@ lia href=http://www.archive.org/;The Internet Archive/a hosts some Nutch work./li + + liWe would also like to acknowledge the a href=http://www.apache.org/foundation/thanks.html;Apache +Software Foundation Sponsors/a./li /ul /section Modified: nutch/site/forrest/src/documentation/content/xdocs/index.xml URL: http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/index.xml?rev=1148482r1=1148481r2=1148482view=diff == --- nutch/site/forrest/src/documentation/content/xdocs/index.xml (original) +++ nutch/site/forrest/src/documentation/content/xdocs/index.xml Tue Jul 19 18:42:33 2011 @@ -20,13 +20,17 @@ document header -titleWelcome to Nutch
svn commit: r1147813 - in /nutch/branches/branch-1.4: CHANGES.txt src/bin/nutch
Author: lewismc Date: Mon Jul 18 11:22:52 2011 New Revision: 1147813 URL: http://svn.apache.org/viewvc?rev=1147813view=rev Log: commit to resolve and close NUTCH-1059 and changes.txt Modified: nutch/branches/branch-1.4/CHANGES.txt nutch/branches/branch-1.4/src/bin/nutch Modified: nutch/branches/branch-1.4/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1147813r1=1147812r2=1147813view=diff == --- nutch/branches/branch-1.4/CHANGES.txt (original) +++ nutch/branches/branch-1.4/CHANGES.txt Mon Jul 18 11:22:52 2011 @@ -2,6 +2,12 @@ Nutch Change Log Release 1.4 - Current development +* NUTCH-1059 Remove convdb command from /bin/nutch (lewismc) + +* NUTCH-1019 Edit comment in org.apache.nutch.crawl.Crawl to reflect removal of legacy (lewismc) + +* NUTCH-1023 Trivial error in error message for org.apache.nutch.crawl.LinkDbReader (lewismc) + * NUTCH-1043 Add pattern for filtering .js in default url filters (jnioche) * NUTCH-1054 LinkDB optional during indexing (jnioche) Modified: nutch/branches/branch-1.4/src/bin/nutch URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/bin/nutch?rev=1147813r1=1147812r2=1147813view=diff == --- nutch/branches/branch-1.4/src/bin/nutch (original) +++ nutch/branches/branch-1.4/src/bin/nutch Mon Jul 18 11:22:52 2011 @@ -49,7 +49,6 @@ if [ $# = 0 ]; then echo where COMMAND is one of: echo crawl one-step crawler for intranets echo readdbread / dump crawl db - echo convdbconvert crawl db from pre-0.9 format echo mergedb merge crawldb-s, with optional filtering echo readlinkdbread / dump link db echo injectinject new urls into the database @@ -206,8 +205,6 @@ elif [ $COMMAND = parse ] ; then CLASS=org.apache.nutch.parse.ParseSegment elif [ $COMMAND = readdb ] ; then CLASS=org.apache.nutch.crawl.CrawlDbReader -elif [ $COMMAND = convdb ] ; then - CLASS=org.apache.nutch.tools.compat.CrawlDbConverter elif [ $COMMAND = mergedb ] ; then CLASS=org.apache.nutch.crawl.CrawlDbMerger elif [ $COMMAND = readlinkdb ] ; then
svn commit: r1147815 - in /nutch/branches/branch-1.4: CHANGES.txt src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html
Author: lewismc Date: Mon Jul 18 11:34:47 2011 New Revision: 1147815 URL: http://svn.apache.org/viewvc?rev=1147815view=rev Log: commit and close for NUTCH-1055 and changes.txt Modified: nutch/branches/branch-1.4/CHANGES.txt nutch/branches/branch-1.4/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html Modified: nutch/branches/branch-1.4/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1147815r1=1147814r2=1147815view=diff == --- nutch/branches/branch-1.4/CHANGES.txt (original) +++ nutch/branches/branch-1.4/CHANGES.txt Mon Jul 18 11:34:47 2011 @@ -2,6 +2,8 @@ Nutch Change Log Release 1.4 - Current development +* NUTCH-1055 upgrade package.html file in language identifier plugin (lewismc) + * NUTCH-1059 Remove convdb command from /bin/nutch (lewismc) * NUTCH-1019 Edit comment in org.apache.nutch.crawl.Crawl to reflect removal of legacy (lewismc) Modified: nutch/branches/branch-1.4/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html?rev=1147815r1=1147814r2=1147815view=diff == --- nutch/branches/branch-1.4/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html (original) +++ nutch/branches/branch-1.4/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html Mon Jul 18 11:34:47 2011 @@ -1,6 +1,6 @@ html body pText document language identifier./ppLanguage profiles are based on material from -a href=http://www.isi.edu/~koehn/europarl/;http://www.isi.edu/~koehn/europarl//a./p +a href=http://www.homepages.inf.ed.ac.uk/pkoehn/publications/europarl.ps/;http://www.homepages.inf.ed.ac.uk/pkoehn/publications/europarl.ps//a./p /body /html
svn commit: r1147817 - in /nutch/trunk: CHANGES.txt src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html
Author: lewismc Date: Mon Jul 18 11:43:26 2011 New Revision: 1147817 URL: http://svn.apache.org/viewvc?rev=1147817view=rev Log: commit and close of NUTCH-1055 and changes.txt, this commit does not affect functionality it is merely a hyperlink reference to the document used as the basis for the language identifier plugin Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1147817r1=1147816r2=1147817view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Mon Jul 18 11:43:26 2011 @@ -2,6 +2,8 @@ Nutch Change Log Release 2.0 - Current Development +* NUTCH-1055 upgrade package.html file in language identifier plugin (lewismc) + * NUTCH-1043 Add pattern for filtering .js in default url filters (jnioche) * NUTCH-1027 Degrade log level of `can't find rules for scope` (markus) Modified: nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html?rev=1147817r1=1147816r2=1147817view=diff == --- nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html (original) +++ nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html Mon Jul 18 11:43:26 2011 @@ -1,6 +1,6 @@ html body pText document language identifier./ppLanguage profiles are based on material from -a href=http://www.isi.edu/~koehn/europarl/;http://www.isi.edu/~koehn/europarl//a./p +a href=http://www.homepages.inf.ed.ac.uk/pkoehn/publications/europarl.ps/;http://www.homepages.inf.ed.ac.uk/pkoehn/publications/europarl.ps//a./p /body /html
svn commit: r1147969 - in /nutch/site/publish: about.pdf bot.pdf credits.html credits.pdf i18n.pdf index.pdf issue_tracking.pdf linkmap.pdf mailing_lists.html mailing_lists.pdf nightly.pdf version_con
Author: lewismc Date: Mon Jul 18 16:59:18 2011 New Revision: 1147969 URL: http://svn.apache.org/viewvc?rev=1147969view=rev Log: update to commit and close NUTCH-1048 and to move Otis from committer to former committer Modified: nutch/site/publish/about.pdf nutch/site/publish/bot.pdf nutch/site/publish/credits.html nutch/site/publish/credits.pdf nutch/site/publish/i18n.pdf nutch/site/publish/index.pdf nutch/site/publish/issue_tracking.pdf nutch/site/publish/linkmap.pdf nutch/site/publish/mailing_lists.html nutch/site/publish/mailing_lists.pdf nutch/site/publish/nightly.pdf nutch/site/publish/version_control.pdf Modified: nutch/site/publish/about.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1147969r1=1147968r2=1147969view=diff == Binary files - no diff available. Modified: nutch/site/publish/bot.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/bot.pdf?rev=1147969r1=1147968r2=1147969view=diff == Binary files - no diff available. Modified: nutch/site/publish/credits.html URL: http://svn.apache.org/viewvc/nutch/site/publish/credits.html?rev=1147969r1=1147968r2=1147969view=diff == --- nutch/site/publish/credits.html (original) +++ nutch/site/publish/credits.html Mon Jul 18 16:59:18 2011 @@ -284,10 +284,6 @@ document.write(Last Published: + docu /li li -a href=http://www.sematext.com/;Otis GospodnetiÄ/a -/li - -li a href=http://people.apache.org/~siren;Sami Siren/a /li @@ -301,7 +297,7 @@ document.write(Last Published: + docu /div -a name=N10042/aa name=Former+Committers/a +a name=N1003D/aa name=Former+Committers/a h2 class=h3Former Committers/h2 div class=section ul @@ -321,6 +317,10 @@ document.write(Last Published: + docu /li liPiotr Kosiorowski/li + +li +a href=http://www.sematext.com/;Otis GospodnetiÄ/a +/li /ul /div Modified: nutch/site/publish/credits.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/credits.pdf?rev=1147969r1=1147968r2=1147969view=diff == Binary files - no diff available. Modified: nutch/site/publish/i18n.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/i18n.pdf?rev=1147969r1=1147968r2=1147969view=diff == Binary files - no diff available. Modified: nutch/site/publish/index.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/index.pdf?rev=1147969r1=1147968r2=1147969view=diff == Binary files - no diff available. Modified: nutch/site/publish/issue_tracking.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/issue_tracking.pdf?rev=1147969r1=1147968r2=1147969view=diff == Binary files - no diff available. Modified: nutch/site/publish/linkmap.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/linkmap.pdf?rev=1147969r1=1147968r2=1147969view=diff == Binary files - no diff available. Modified: nutch/site/publish/mailing_lists.html URL: http://svn.apache.org/viewvc/nutch/site/publish/mailing_lists.html?rev=1147969r1=1147968r2=1147969view=diff == --- nutch/site/publish/mailing_lists.html (original) +++ nutch/site/publish/mailing_lists.html Mon Jul 18 16:59:18 2011 @@ -283,11 +283,11 @@ document.write(Last Published: + docu /li li -a href=http://www.mail-archive.com/nutch-user%40lucene.apache.org/;Search Old List Archive/a +a href=http://www.mail-archive.com/nutch-user%40lucene.apache.org/;Search Old List Archive (nu...@lucene.apache.org)/a /li li -a href=http://nutch.apache.org/mail/user/;View List Archive/a +a href=http://mail-archives.apache.org/mod_mbox/nutch-user/;View List Archive/a /li /ul @@ -322,11 +322,11 @@ document.write(Last Published: + docu /li li -a href=http://www.mail-archive.com/nutch-dev%40lucene.apache.org/;Search Old List Archive/a +a href=http://www.mail-archive.com/nutch-dev%40lucene.apache.org/;Search Old List Archive (nu...@lucene.apache.org)/a /li li -a href=http://nutch.apache.org/mail/dev/;View List Archive/a +a href=http://mail-archives.apache.org/mod_mbox/nutch-dev/;View List Archive/a /li /ul @@ -357,11 +357,11 @@ document.write(Last Published: + docu /li li -a href=http://www.mail-archive.com/nutch-commits%40lucene.apache.org/;Search Old List Archive/a +a href=http://www.mail-archive.com/nutch-commits%40lucene.apache.org/;Search Old List
svn commit: r1147712 - in /nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl: Crawl.java NUTCH-1019-crawl-comment.patch
Author: lewismc Date: Sun Jul 17 20:39:10 2011 New Revision: 1147712 URL: http://svn.apache.org/viewvc?rev=1147712view=rev Log: commit to resolve and close NUTCH-1019, this commit does not affect any functionality but instead adds further minor comments to the code for the crawl class. Added: nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/NUTCH-1019-crawl-comment.patch Modified: nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/Crawl.java Modified: nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/Crawl.java URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/Crawl.java?rev=1147712r1=1147711r2=1147712view=diff == --- nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/Crawl.java (original) +++ nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/Crawl.java Sun Jul 17 20:39:10 2011 @@ -48,7 +48,8 @@ public class Crawl extends Configured im } - /* Perform complete crawling and indexing given a set of root urls. */ + /* Perform complete crawling and indexing (to Solr) given a set of root urls and the -solr + parameter respectively. More information and Usage parameters can be found below. */ public static void main(String args[]) throws Exception { Configuration conf = NutchConfiguration.create(); int res = ToolRunner.run(conf, new Crawl(), args); Added: nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/NUTCH-1019-crawl-comment.patch URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/NUTCH-1019-crawl-comment.patch?rev=1147712view=auto == --- nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/NUTCH-1019-crawl-comment.patch (added) +++ nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/NUTCH-1019-crawl-comment.patch Sun Jul 17 20:39:10 2011 @@ -0,0 +1,14 @@ +Index: Crawl.java +=== +--- Crawl.java (revision 1147708) Crawl.java (working copy) +@@ -48,7 +48,8 @@ + } + + +- /* Perform complete crawling and indexing given a set of root urls. */ ++ /* Perform complete crawling and indexing (to Solr) given a set of root urls and the -solr ++ parameter respectively. More information and Usage parameters can be found below. */ + public static void main(String args[]) throws Exception { + Configuration conf = NutchConfiguration.create(); + int res = ToolRunner.run(conf, new Crawl(), args);
svn commit: r1147268 - in /nutch/site/publish: ./ skin/images/
Author: lewismc Date: Fri Jul 15 18:16:55 2011 New Revision: 1147268 URL: http://svn.apache.org/viewvc?rev=1147268view=rev Log: First commit action, rebuilding site to address various issues covered within NUTCH-914 Modified: nutch/site/publish/about.html nutch/site/publish/about.pdf nutch/site/publish/bot.html nutch/site/publish/bot.pdf nutch/site/publish/credits.html nutch/site/publish/credits.pdf nutch/site/publish/i18n.html nutch/site/publish/i18n.pdf nutch/site/publish/index.html nutch/site/publish/index.pdf nutch/site/publish/issue_tracking.html nutch/site/publish/issue_tracking.pdf nutch/site/publish/linkmap.html nutch/site/publish/linkmap.pdf nutch/site/publish/mailing_lists.html nutch/site/publish/mailing_lists.pdf nutch/site/publish/nightly.html nutch/site/publish/nightly.pdf nutch/site/publish/skin/images/rc-b-l-15-1body-2menu-3menu.png nutch/site/publish/skin/images/rc-b-r-15-1body-2menu-3menu.png nutch/site/publish/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png nutch/site/publish/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png nutch/site/publish/skin/images/rc-t-r-15-1body-2menu-3menu.png nutch/site/publish/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png nutch/site/publish/version_control.html nutch/site/publish/version_control.pdf Modified: nutch/site/publish/about.html URL: http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1147268r1=1147267r2=1147268view=diff == --- nutch/site/publish/about.html (original) +++ nutch/site/publish/about.html Fri Jul 15 18:16:55 2011 @@ -3,9 +3,9 @@ head META http-equiv=Content-Type content=text/html; charset=UTF-8 meta content=Apache Forrest name=Generator -meta name=Forrest-version content=0.9 +meta name=Forrest-version content=0.10-dev meta name=Forrest-skin-name content=nutch -titleAbout Nutch/title +titleAbout Apache Nutch/title link type=text/css href=skin/basic.css rel=stylesheet link media=screen type=text/css href=skin/screen.css rel=stylesheet link media=print type=text/css href=skin/print.css rel=stylesheet @@ -155,6 +155,12 @@ document.write(Last Published: + docu div class=menuitem a href=http://www.cafepress.com/nutch/;Buy Stuff/a /div +div class=menuitem +a href=http://www.apache.org/foundation/sponsorship.html;Sponsorship/a +/div +div class=menuitem +a href=http://www.apache.org/licenses/;License/a +/div /div div onclick=SwitchMenu('menu_1.2', 'skin/') id=menu_1.2Title class=menutitleDocumentation/div div id=menu_1.2 class=menuitemgroup @@ -165,7 +171,7 @@ document.write(Last Published: + docu a href=http://wiki.apache.org/nutch/;Wiki/a /div div class=menuitem -a href=http://wiki.apache.org/nutch/NutchTutorial;Tutorial/a +a href=http://wiki.apache.org/nutch/RunningNutchAndSolr;Tutorial/a /div div class=menuitem a href=bot.htmlRobot /a @@ -235,7 +241,7 @@ document.write(Last Published: + docu a class=dida href=about.pdfimg alt=PDF -icon src=skin/images/pdfdoc.gif class=skinbr PDF/a /div -h1About Nutch/h1 +h1About Apache Nutch/h1 div id=minitoc-area ul class=minitoc li @@ -248,16 +254,18 @@ document.write(Last Published: + docu a name=N1000E/aa name=Overview/a h2 class=h3Overview/h2 div class=section -pNutch is open source web-search - software. It builds on a href=http://lucene.apache.org/java/;Lucene and Solr/a, - adding web-specifics, such as a crawler, a link-graph database, - parsers for HTML and other document formats, etc./p -pNutch can run on a single machine, but gains a lot of its +pApache Nutch is an open source web-search + software project. Stemming from a href=http://lucene.apache.org/java/;Apache Lucene/a, it now builds + on a href=http://lucene.apache.org/solr/;Apache Solr/a adding web-specifics, such as a crawler, + a link-graph database and parsing support handled by a href=http://tika.apache.org/;Apache Tika/a + for HTML and and array other document formats./p +pApache Nutch can run on a single machine, but gains a lot of its strength from running in a a href=http://hadoop.apache.org/;Hadoop cluster/a /p pThe system can be enhanced (eg other document formats can be - parsed) using a plugin mechanism./p -pFor more information about Nutch, please see the a href=http://wiki.apache.org/nutch/;Nutch wiki./a + parsed) using a highly flexible, easily extensible and thoroughly maintained + plugin infrastructure./p +pFor more information about Apache Nutch
svn commit: r1147276 - in /nutch/site/publish: about.pdf bot.pdf credits.pdf i18n.pdf index.html index.pdf issue_tracking.pdf linkmap.pdf mailing_lists.pdf nightly.pdf version_control.pdf
Author: lewismc Date: Fri Jul 15 18:43:17 2011 New Revision: 1147276 URL: http://svn.apache.org/viewvc?rev=1147276view=rev Log: Trivial aesthetic improvement Modified: nutch/site/publish/about.pdf nutch/site/publish/bot.pdf nutch/site/publish/credits.pdf nutch/site/publish/i18n.pdf nutch/site/publish/index.html nutch/site/publish/index.pdf nutch/site/publish/issue_tracking.pdf nutch/site/publish/linkmap.pdf nutch/site/publish/mailing_lists.pdf nutch/site/publish/nightly.pdf nutch/site/publish/version_control.pdf Modified: nutch/site/publish/about.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1147276r1=1147275r2=1147276view=diff == Binary files - no diff available. Modified: nutch/site/publish/bot.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/bot.pdf?rev=1147276r1=1147275r2=1147276view=diff == Binary files - no diff available. Modified: nutch/site/publish/credits.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/credits.pdf?rev=1147276r1=1147275r2=1147276view=diff == Binary files - no diff available. Modified: nutch/site/publish/i18n.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/i18n.pdf?rev=1147276r1=1147275r2=1147276view=diff == Binary files - no diff available. Modified: nutch/site/publish/index.html URL: http://svn.apache.org/viewvc/nutch/site/publish/index.html?rev=1147276r1=1147275r2=1147276view=diff == --- nutch/site/publish/index.html (original) +++ nutch/site/publish/index.html Fri Jul 15 18:43:17 2011 @@ -246,7 +246,7 @@ document.write(Last Published: + docu /div h1Welcome to Apache Nutchreg;/h1 div class=abstractApache Nutch is an open source web-search software project. Nutch is a project of the Apache Software Foundation -and is part of the larger Apache community of developers and users. Apache NutchMore about Nutch can be found here. +and is part of the larger Apache community of developers and users. More about Nutch can be found here. /div div id=minitoc-area ul class=minitoc Modified: nutch/site/publish/index.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/index.pdf?rev=1147276r1=1147275r2=1147276view=diff == Binary files - no diff available. Modified: nutch/site/publish/issue_tracking.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/issue_tracking.pdf?rev=1147276r1=1147275r2=1147276view=diff == Binary files - no diff available. Modified: nutch/site/publish/linkmap.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/linkmap.pdf?rev=1147276r1=1147275r2=1147276view=diff == Binary files - no diff available. Modified: nutch/site/publish/mailing_lists.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/mailing_lists.pdf?rev=1147276r1=1147275r2=1147276view=diff == Binary files - no diff available. Modified: nutch/site/publish/nightly.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/nightly.pdf?rev=1147276r1=1147275r2=1147276view=diff == Binary files - no diff available. Modified: nutch/site/publish/version_control.pdf URL: http://svn.apache.org/viewvc/nutch/site/publish/version_control.pdf?rev=1147276r1=1147275r2=1147276view=diff == Binary files - no diff available.